From a1c187eef3ba08076aedb5154929f7eda8d1b424 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 12 Aug 2020 18:26:51 +0200 Subject: Rename ra_syntax -> syntax --- crates/ra_syntax/src/parsing/lexer.rs | 244 ------------ crates/ra_syntax/src/parsing/reparsing.rs | 455 ---------------------- crates/ra_syntax/src/parsing/text_token_source.rs | 84 ---- crates/ra_syntax/src/parsing/text_tree_sink.rs | 183 --------- 4 files changed, 966 deletions(-) delete mode 100644 crates/ra_syntax/src/parsing/lexer.rs delete mode 100644 crates/ra_syntax/src/parsing/reparsing.rs delete mode 100644 crates/ra_syntax/src/parsing/text_token_source.rs delete mode 100644 crates/ra_syntax/src/parsing/text_tree_sink.rs (limited to 'crates/ra_syntax/src/parsing') diff --git a/crates/ra_syntax/src/parsing/lexer.rs b/crates/ra_syntax/src/parsing/lexer.rs deleted file mode 100644 index fa3be1016..000000000 --- a/crates/ra_syntax/src/parsing/lexer.rs +++ /dev/null @@ -1,244 +0,0 @@ -//! Lexer analyzes raw input string and produces lexemes (tokens). -//! It is just a bridge to `rustc_lexer`. - -use rustc_lexer::{LiteralKind as LK, RawStrError}; - -use std::convert::TryInto; - -use crate::{ - SyntaxError, - SyntaxKind::{self, *}, - TextRange, TextSize, T, -}; - -/// A token of Rust source. -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct Token { - /// The kind of token. - pub kind: SyntaxKind, - /// The length of the token. - pub len: TextSize, -} - -/// Break a string up into its component tokens. -/// Beware that it checks for shebang first and its length contributes to resulting -/// tokens offsets. -pub fn tokenize(text: &str) -> (Vec, Vec) { - // non-empty string is a precondtion of `rustc_lexer::strip_shebang()`. - if text.is_empty() { - return Default::default(); - } - - let mut tokens = Vec::new(); - let mut errors = Vec::new(); - - let mut offset = match rustc_lexer::strip_shebang(text) { - Some(shebang_len) => { - tokens.push(Token { kind: SHEBANG, len: shebang_len.try_into().unwrap() }); - shebang_len - } - None => 0, - }; - - let text_without_shebang = &text[offset..]; - - for rustc_token in rustc_lexer::tokenize(text_without_shebang) { - let token_len: TextSize = rustc_token.len.try_into().unwrap(); - let token_range = TextRange::at(offset.try_into().unwrap(), token_len); - - let (syntax_kind, err_message) = - rustc_token_kind_to_syntax_kind(&rustc_token.kind, &text[token_range]); - - tokens.push(Token { kind: syntax_kind, len: token_len }); - - if let Some(err_message) = err_message { - errors.push(SyntaxError::new(err_message, token_range)); - } - - offset += rustc_token.len; - } - - (tokens, errors) -} - -/// Returns `SyntaxKind` and `Option` of the first token -/// encountered at the beginning of the string. -/// -/// Returns `None` if the string contains zero *or two or more* tokens. -/// The token is malformed if the returned error is not `None`. -/// -/// Beware that unescape errors are not checked at tokenization time. -pub fn lex_single_syntax_kind(text: &str) -> Option<(SyntaxKind, Option)> { - lex_first_token(text) - .filter(|(token, _)| token.len == TextSize::of(text)) - .map(|(token, error)| (token.kind, error)) -} - -/// The same as `lex_single_syntax_kind()` but returns only `SyntaxKind` and -/// returns `None` if any tokenization error occured. -/// -/// Beware that unescape errors are not checked at tokenization time. -pub fn lex_single_valid_syntax_kind(text: &str) -> Option { - lex_first_token(text) - .filter(|(token, error)| !error.is_some() && token.len == TextSize::of(text)) - .map(|(token, _error)| token.kind) -} - -/// Returns `SyntaxKind` and `Option` of the first token -/// encountered at the beginning of the string. -/// -/// Returns `None` if the string contains zero tokens or if the token was parsed -/// with an error. -/// The token is malformed if the returned error is not `None`. -/// -/// Beware that unescape errors are not checked at tokenization time. -fn lex_first_token(text: &str) -> Option<(Token, Option)> { - // non-empty string is a precondtion of `rustc_lexer::first_token()`. - if text.is_empty() { - return None; - } - - let rustc_token = rustc_lexer::first_token(text); - let (syntax_kind, err_message) = rustc_token_kind_to_syntax_kind(&rustc_token.kind, text); - - let token = Token { kind: syntax_kind, len: rustc_token.len.try_into().unwrap() }; - let optional_error = err_message - .map(|err_message| SyntaxError::new(err_message, TextRange::up_to(TextSize::of(text)))); - - Some((token, optional_error)) -} - -/// Returns `SyntaxKind` and an optional tokenize error message. -fn rustc_token_kind_to_syntax_kind( - rustc_token_kind: &rustc_lexer::TokenKind, - token_text: &str, -) -> (SyntaxKind, Option<&'static str>) { - // A note on an intended tradeoff: - // We drop some useful infromation here (see patterns with double dots `..`) - // Storing that info in `SyntaxKind` is not possible due to its layout requirements of - // being `u16` that come from `rowan::SyntaxKind`. - - let syntax_kind = { - match rustc_token_kind { - rustc_lexer::TokenKind::LineComment => COMMENT, - - rustc_lexer::TokenKind::BlockComment { terminated: true } => COMMENT, - rustc_lexer::TokenKind::BlockComment { terminated: false } => { - return ( - COMMENT, - Some("Missing trailing `*/` symbols to terminate the block comment"), - ); - } - - rustc_lexer::TokenKind::Whitespace => WHITESPACE, - - rustc_lexer::TokenKind::Ident => { - if token_text == "_" { - UNDERSCORE - } else { - SyntaxKind::from_keyword(token_text).unwrap_or(IDENT) - } - } - - rustc_lexer::TokenKind::RawIdent => IDENT, - rustc_lexer::TokenKind::Literal { kind, .. } => return match_literal_kind(&kind), - - rustc_lexer::TokenKind::Lifetime { starts_with_number: false } => LIFETIME, - rustc_lexer::TokenKind::Lifetime { starts_with_number: true } => { - return (LIFETIME, Some("Lifetime name cannot start with a number")) - } - - rustc_lexer::TokenKind::Semi => T![;], - rustc_lexer::TokenKind::Comma => T![,], - rustc_lexer::TokenKind::Dot => T![.], - rustc_lexer::TokenKind::OpenParen => T!['('], - rustc_lexer::TokenKind::CloseParen => T![')'], - rustc_lexer::TokenKind::OpenBrace => T!['{'], - rustc_lexer::TokenKind::CloseBrace => T!['}'], - rustc_lexer::TokenKind::OpenBracket => T!['['], - rustc_lexer::TokenKind::CloseBracket => T![']'], - rustc_lexer::TokenKind::At => T![@], - rustc_lexer::TokenKind::Pound => T![#], - rustc_lexer::TokenKind::Tilde => T![~], - rustc_lexer::TokenKind::Question => T![?], - rustc_lexer::TokenKind::Colon => T![:], - rustc_lexer::TokenKind::Dollar => T![$], - rustc_lexer::TokenKind::Eq => T![=], - rustc_lexer::TokenKind::Not => T![!], - rustc_lexer::TokenKind::Lt => T![<], - rustc_lexer::TokenKind::Gt => T![>], - rustc_lexer::TokenKind::Minus => T![-], - rustc_lexer::TokenKind::And => T![&], - rustc_lexer::TokenKind::Or => T![|], - rustc_lexer::TokenKind::Plus => T![+], - rustc_lexer::TokenKind::Star => T![*], - rustc_lexer::TokenKind::Slash => T![/], - rustc_lexer::TokenKind::Caret => T![^], - rustc_lexer::TokenKind::Percent => T![%], - rustc_lexer::TokenKind::Unknown => ERROR, - } - }; - - return (syntax_kind, None); - - fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> (SyntaxKind, Option<&'static str>) { - #[rustfmt::skip] - let syntax_kind = match *kind { - LK::Int { empty_int: false, .. } => INT_NUMBER, - LK::Int { empty_int: true, .. } => { - return (INT_NUMBER, Some("Missing digits after the integer base prefix")) - } - - LK::Float { empty_exponent: false, .. } => FLOAT_NUMBER, - LK::Float { empty_exponent: true, .. } => { - return (FLOAT_NUMBER, Some("Missing digits after the exponent symbol")) - } - - LK::Char { terminated: true } => CHAR, - LK::Char { terminated: false } => { - return (CHAR, Some("Missing trailing `'` symbol to terminate the character literal")) - } - - LK::Byte { terminated: true } => BYTE, - LK::Byte { terminated: false } => { - return (BYTE, Some("Missing trailing `'` symbol to terminate the byte literal")) - } - - LK::Str { terminated: true } => STRING, - LK::Str { terminated: false } => { - return (STRING, Some("Missing trailing `\"` symbol to terminate the string literal")) - } - - - LK::ByteStr { terminated: true } => BYTE_STRING, - LK::ByteStr { terminated: false } => { - return (BYTE_STRING, Some("Missing trailing `\"` symbol to terminate the byte string literal")) - } - - LK::RawStr { err, .. } => match err { - None => RAW_STRING, - Some(RawStrError::InvalidStarter { .. }) => return (RAW_STRING, Some("Missing `\"` symbol after `#` symbols to begin the raw string literal")), - Some(RawStrError::NoTerminator { expected, found, .. }) => if expected == found { - return (RAW_STRING, Some("Missing trailing `\"` to terminate the raw string literal")) - } else { - return (RAW_STRING, Some("Missing trailing `\"` with `#` symbols to terminate the raw string literal")) - - }, - Some(RawStrError::TooManyDelimiters { .. }) => return (RAW_STRING, Some("Too many `#` symbols: raw strings may be delimited by up to 65535 `#` symbols")), - }, - LK::RawByteStr { err, .. } => match err { - None => RAW_BYTE_STRING, - Some(RawStrError::InvalidStarter { .. }) => return (RAW_BYTE_STRING, Some("Missing `\"` symbol after `#` symbols to begin the raw byte string literal")), - Some(RawStrError::NoTerminator { expected, found, .. }) => if expected == found { - return (RAW_BYTE_STRING, Some("Missing trailing `\"` to terminate the raw byte string literal")) - } else { - return (RAW_BYTE_STRING, Some("Missing trailing `\"` with `#` symbols to terminate the raw byte string literal")) - - }, - Some(RawStrError::TooManyDelimiters { .. }) => return (RAW_BYTE_STRING, Some("Too many `#` symbols: raw byte strings may be delimited by up to 65535 `#` symbols")), - }, - }; - - (syntax_kind, None) - } -} diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs deleted file mode 100644 index 4149f856a..000000000 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ /dev/null @@ -1,455 +0,0 @@ -//! Implementation of incremental re-parsing. -//! -//! We use two simple strategies for this: -//! - if the edit modifies only a single token (like changing an identifier's -//! letter), we replace only this token. -//! - otherwise, we search for the nearest `{}` block which contains the edit -//! and try to parse only this block. - -use parser::Reparser; -use text_edit::Indel; - -use crate::{ - algo, - parsing::{ - lexer::{lex_single_syntax_kind, tokenize, Token}, - text_token_source::TextTokenSource, - text_tree_sink::TextTreeSink, - }, - syntax_node::{GreenNode, GreenToken, NodeOrToken, SyntaxElement, SyntaxNode}, - SyntaxError, - SyntaxKind::*, - TextRange, TextSize, T, -}; - -pub(crate) fn incremental_reparse( - node: &SyntaxNode, - edit: &Indel, - errors: Vec, -) -> Option<(GreenNode, Vec, TextRange)> { - if let Some((green, new_errors, old_range)) = reparse_token(node, &edit) { - return Some((green, merge_errors(errors, new_errors, old_range, edit), old_range)); - } - - if let Some((green, new_errors, old_range)) = reparse_block(node, &edit) { - return Some((green, merge_errors(errors, new_errors, old_range, edit), old_range)); - } - None -} - -fn reparse_token<'node>( - root: &'node SyntaxNode, - edit: &Indel, -) -> Option<(GreenNode, Vec, TextRange)> { - let prev_token = algo::find_covering_element(root, edit.delete).as_token()?.clone(); - let prev_token_kind = prev_token.kind(); - match prev_token_kind { - WHITESPACE | COMMENT | IDENT | STRING | RAW_STRING => { - if prev_token_kind == WHITESPACE || prev_token_kind == COMMENT { - // removing a new line may extends previous token - let deleted_range = edit.delete - prev_token.text_range().start(); - if prev_token.text()[deleted_range].contains('\n') { - return None; - } - } - - let mut new_text = get_text_after_edit(prev_token.clone().into(), &edit); - let (new_token_kind, new_err) = lex_single_syntax_kind(&new_text)?; - - if new_token_kind != prev_token_kind - || (new_token_kind == IDENT && is_contextual_kw(&new_text)) - { - return None; - } - - // Check that edited token is not a part of the bigger token. - // E.g. if for source code `bruh"str"` the user removed `ruh`, then - // `b` no longer remains an identifier, but becomes a part of byte string literal - if let Some(next_char) = root.text().char_at(prev_token.text_range().end()) { - new_text.push(next_char); - let token_with_next_char = lex_single_syntax_kind(&new_text); - if let Some((_kind, _error)) = token_with_next_char { - return None; - } - new_text.pop(); - } - - let new_token = - GreenToken::new(rowan::SyntaxKind(prev_token_kind.into()), new_text.into()); - Some(( - prev_token.replace_with(new_token), - new_err.into_iter().collect(), - prev_token.text_range(), - )) - } - _ => None, - } -} - -fn reparse_block<'node>( - root: &'node SyntaxNode, - edit: &Indel, -) -> Option<(GreenNode, Vec, TextRange)> { - let (node, reparser) = find_reparsable_node(root, edit.delete)?; - let text = get_text_after_edit(node.clone().into(), edit); - - let (tokens, new_lexer_errors) = tokenize(&text); - if !is_balanced(&tokens) { - return None; - } - - let mut token_source = TextTokenSource::new(&text, &tokens); - let mut tree_sink = TextTreeSink::new(&text, &tokens); - reparser.parse(&mut token_source, &mut tree_sink); - - let (green, mut new_parser_errors) = tree_sink.finish(); - new_parser_errors.extend(new_lexer_errors); - - Some((node.replace_with(green), new_parser_errors, node.text_range())) -} - -fn get_text_after_edit(element: SyntaxElement, edit: &Indel) -> String { - let edit = Indel::replace(edit.delete - element.text_range().start(), edit.insert.clone()); - - let mut text = match element { - NodeOrToken::Token(token) => token.text().to_string(), - NodeOrToken::Node(node) => node.text().to_string(), - }; - edit.apply(&mut text); - text -} - -fn is_contextual_kw(text: &str) -> bool { - matches!(text, "auto" | "default" | "union") -} - -fn find_reparsable_node(node: &SyntaxNode, range: TextRange) -> Option<(SyntaxNode, Reparser)> { - let node = algo::find_covering_element(node, range); - - let mut ancestors = match node { - NodeOrToken::Token(it) => it.parent().ancestors(), - NodeOrToken::Node(it) => it.ancestors(), - }; - ancestors.find_map(|node| { - let first_child = node.first_child_or_token().map(|it| it.kind()); - let parent = node.parent().map(|it| it.kind()); - Reparser::for_node(node.kind(), first_child, parent).map(|r| (node, r)) - }) -} - -fn is_balanced(tokens: &[Token]) -> bool { - if tokens.is_empty() - || tokens.first().unwrap().kind != T!['{'] - || tokens.last().unwrap().kind != T!['}'] - { - return false; - } - let mut balance = 0usize; - for t in &tokens[1..tokens.len() - 1] { - match t.kind { - T!['{'] => balance += 1, - T!['}'] => { - balance = match balance.checked_sub(1) { - Some(b) => b, - None => return false, - } - } - _ => (), - } - } - balance == 0 -} - -fn merge_errors( - old_errors: Vec, - new_errors: Vec, - range_before_reparse: TextRange, - edit: &Indel, -) -> Vec { - let mut res = Vec::new(); - - for old_err in old_errors { - let old_err_range = old_err.range(); - if old_err_range.end() <= range_before_reparse.start() { - res.push(old_err); - } else if old_err_range.start() >= range_before_reparse.end() { - let inserted_len = TextSize::of(&edit.insert); - res.push(old_err.with_range((old_err_range + inserted_len) - edit.delete.len())); - // Note: extra parens are intentional to prevent uint underflow, HWAB (here was a bug) - } - } - res.extend(new_errors.into_iter().map(|new_err| { - // fighting borrow checker with a variable ;) - let offseted_range = new_err.range() + range_before_reparse.start(); - new_err.with_range(offseted_range) - })); - res -} - -#[cfg(test)] -mod tests { - use test_utils::{assert_eq_text, extract_range}; - - use super::*; - use crate::{AstNode, Parse, SourceFile}; - - fn do_check(before: &str, replace_with: &str, reparsed_len: u32) { - let (range, before) = extract_range(before); - let edit = Indel::replace(range, replace_with.to_owned()); - let after = { - let mut after = before.clone(); - edit.apply(&mut after); - after - }; - - let fully_reparsed = SourceFile::parse(&after); - let incrementally_reparsed: Parse = { - let before = SourceFile::parse(&before); - let (green, new_errors, range) = - incremental_reparse(before.tree().syntax(), &edit, before.errors.to_vec()).unwrap(); - assert_eq!(range.len(), reparsed_len.into(), "reparsed fragment has wrong length"); - Parse::new(green, new_errors) - }; - - assert_eq_text!( - &format!("{:#?}", fully_reparsed.tree().syntax()), - &format!("{:#?}", incrementally_reparsed.tree().syntax()), - ); - assert_eq!(fully_reparsed.errors(), incrementally_reparsed.errors()); - } - - #[test] // FIXME: some test here actually test token reparsing - fn reparse_block_tests() { - do_check( - r" -fn foo() { - let x = foo + <|>bar<|> -} -", - "baz", - 3, - ); - do_check( - r" -fn foo() { - let x = foo<|> + bar<|> -} -", - "baz", - 25, - ); - do_check( - r" -struct Foo { - f: foo<|><|> -} -", - ",\n g: (),", - 14, - ); - do_check( - r" -fn foo { - let; - 1 + 1; - <|>92<|>; -} -", - "62", - 31, // FIXME: reparse only int literal here - ); - do_check( - r" -mod foo { - fn <|><|> -} -", - "bar", - 11, - ); - - do_check( - r" -trait Foo { - type <|>Foo<|>; -} -", - "Output", - 3, - ); - do_check( - r" -impl IntoIterator for Foo { - f<|><|> -} -", - "n next(", - 9, - ); - do_check(r"use a::b::{foo,<|>,bar<|>};", "baz", 10); - do_check( - r" -pub enum A { - Foo<|><|> -} -", - "\nBar;\n", - 11, - ); - do_check( - r" -foo!{a, b<|><|> d} -", - ", c[3]", - 8, - ); - do_check( - r" -fn foo() { - vec![<|><|>] -} -", - "123", - 14, - ); - do_check( - r" -extern { - fn<|>;<|> -} -", - " exit(code: c_int)", - 11, - ); - } - - #[test] - fn reparse_token_tests() { - do_check( - r"<|><|> -fn foo() -> i32 { 1 } -", - "\n\n\n \n", - 1, - ); - do_check( - r" -fn foo() -> <|><|> {} -", - " \n", - 2, - ); - do_check( - r" -fn <|>foo<|>() -> i32 { 1 } -", - "bar", - 3, - ); - do_check( - r" -fn foo<|><|>foo() { } -", - "bar", - 6, - ); - do_check( - r" -fn foo /* <|><|> */ () {} -", - "some comment", - 6, - ); - do_check( - r" -fn baz <|><|> () {} -", - " \t\t\n\n", - 2, - ); - do_check( - r" -fn baz <|><|> () {} -", - " \t\t\n\n", - 2, - ); - do_check( - r" -/// foo <|><|>omment -mod { } -", - "c", - 14, - ); - do_check( - r#" -fn -> &str { "Hello<|><|>" } -"#, - ", world", - 7, - ); - do_check( - r#" -fn -> &str { // "Hello<|><|>" -"#, - ", world", - 10, - ); - do_check( - r##" -fn -> &str { r#"Hello<|><|>"# -"##, - ", world", - 10, - ); - do_check( - r" -#[derive(<|>Copy<|>)] -enum Foo { - -} -", - "Clone", - 4, - ); - } - - #[test] - fn reparse_str_token_with_error_unchanged() { - do_check(r#""<|>Unclosed<|> string literal"#, "Still unclosed", 24); - } - - #[test] - fn reparse_str_token_with_error_fixed() { - do_check(r#""unterinated<|><|>"#, "\"", 12); - } - - #[test] - fn reparse_block_with_error_in_middle_unchanged() { - do_check( - r#"fn main() { - if {} - 32 + 4<|><|> - return - if {} - }"#, - "23", - 105, - ) - } - - #[test] - fn reparse_block_with_error_in_middle_fixed() { - do_check( - r#"fn main() { - if {} - 32 + 4<|><|> - return - if {} - }"#, - ";", - 105, - ) - } -} diff --git a/crates/ra_syntax/src/parsing/text_token_source.rs b/crates/ra_syntax/src/parsing/text_token_source.rs deleted file mode 100644 index df866dc2b..000000000 --- a/crates/ra_syntax/src/parsing/text_token_source.rs +++ /dev/null @@ -1,84 +0,0 @@ -//! See `TextTokenSource` docs. - -use parser::TokenSource; - -use crate::{parsing::lexer::Token, SyntaxKind::EOF, TextRange, TextSize}; - -/// Implementation of `parser::TokenSource` that takes tokens from source code text. -pub(crate) struct TextTokenSource<'t> { - text: &'t str, - /// token and its start position (non-whitespace/comment tokens) - /// ```non-rust - /// struct Foo; - /// ^------^--^- - /// | | \________ - /// | \____ \ - /// | \ | - /// (struct, 0) (Foo, 7) (;, 10) - /// ``` - /// `[(struct, 0), (Foo, 7), (;, 10)]` - token_offset_pairs: Vec<(Token, TextSize)>, - - /// Current token and position - curr: (parser::Token, usize), -} - -impl<'t> TokenSource for TextTokenSource<'t> { - fn current(&self) -> parser::Token { - self.curr.0 - } - - fn lookahead_nth(&self, n: usize) -> parser::Token { - mk_token(self.curr.1 + n, &self.token_offset_pairs) - } - - fn bump(&mut self) { - if self.curr.0.kind == EOF { - return; - } - - let pos = self.curr.1 + 1; - self.curr = (mk_token(pos, &self.token_offset_pairs), pos); - } - - fn is_keyword(&self, kw: &str) -> bool { - self.token_offset_pairs - .get(self.curr.1) - .map(|(token, offset)| &self.text[TextRange::at(*offset, token.len)] == kw) - .unwrap_or(false) - } -} - -fn mk_token(pos: usize, token_offset_pairs: &[(Token, TextSize)]) -> parser::Token { - let (kind, is_jointed_to_next) = match token_offset_pairs.get(pos) { - Some((token, offset)) => ( - token.kind, - token_offset_pairs - .get(pos + 1) - .map(|(_, next_offset)| offset + token.len == *next_offset) - .unwrap_or(false), - ), - None => (EOF, false), - }; - parser::Token { kind, is_jointed_to_next } -} - -impl<'t> TextTokenSource<'t> { - /// Generate input from tokens(expect comment and whitespace). - pub fn new(text: &'t str, raw_tokens: &'t [Token]) -> TextTokenSource<'t> { - let token_offset_pairs: Vec<_> = raw_tokens - .iter() - .filter_map({ - let mut len = 0.into(); - move |token| { - let pair = if token.kind.is_trivia() { None } else { Some((*token, len)) }; - len += token.len; - pair - } - }) - .collect(); - - let first = mk_token(0, &token_offset_pairs); - TextTokenSource { text, token_offset_pairs, curr: (first, 0) } - } -} diff --git a/crates/ra_syntax/src/parsing/text_tree_sink.rs b/crates/ra_syntax/src/parsing/text_tree_sink.rs deleted file mode 100644 index c1b5f246d..000000000 --- a/crates/ra_syntax/src/parsing/text_tree_sink.rs +++ /dev/null @@ -1,183 +0,0 @@ -//! FIXME: write short doc here - -use std::mem; - -use parser::{ParseError, TreeSink}; - -use crate::{ - parsing::Token, - syntax_node::GreenNode, - SmolStr, SyntaxError, - SyntaxKind::{self, *}, - SyntaxTreeBuilder, TextRange, TextSize, -}; - -/// Bridges the parser with our specific syntax tree representation. -/// -/// `TextTreeSink` also handles attachment of trivia (whitespace) to nodes. -pub(crate) struct TextTreeSink<'a> { - text: &'a str, - tokens: &'a [Token], - text_pos: TextSize, - token_pos: usize, - state: State, - inner: SyntaxTreeBuilder, -} - -enum State { - PendingStart, - Normal, - PendingFinish, -} - -impl<'a> TreeSink for TextTreeSink<'a> { - fn token(&mut self, kind: SyntaxKind, n_tokens: u8) { - match mem::replace(&mut self.state, State::Normal) { - State::PendingStart => unreachable!(), - State::PendingFinish => self.inner.finish_node(), - State::Normal => (), - } - self.eat_trivias(); - let n_tokens = n_tokens as usize; - let len = self.tokens[self.token_pos..self.token_pos + n_tokens] - .iter() - .map(|it| it.len) - .sum::(); - self.do_token(kind, len, n_tokens); - } - - fn start_node(&mut self, kind: SyntaxKind) { - match mem::replace(&mut self.state, State::Normal) { - State::PendingStart => { - self.inner.start_node(kind); - // No need to attach trivias to previous node: there is no - // previous node. - return; - } - State::PendingFinish => self.inner.finish_node(), - State::Normal => (), - } - - let n_trivias = - self.tokens[self.token_pos..].iter().take_while(|it| it.kind.is_trivia()).count(); - let leading_trivias = &self.tokens[self.token_pos..self.token_pos + n_trivias]; - let mut trivia_end = - self.text_pos + leading_trivias.iter().map(|it| it.len).sum::(); - - let n_attached_trivias = { - let leading_trivias = leading_trivias.iter().rev().map(|it| { - let next_end = trivia_end - it.len; - let range = TextRange::new(next_end, trivia_end); - trivia_end = next_end; - (it.kind, &self.text[range]) - }); - n_attached_trivias(kind, leading_trivias) - }; - self.eat_n_trivias(n_trivias - n_attached_trivias); - self.inner.start_node(kind); - self.eat_n_trivias(n_attached_trivias); - } - - fn finish_node(&mut self) { - match mem::replace(&mut self.state, State::PendingFinish) { - State::PendingStart => unreachable!(), - State::PendingFinish => self.inner.finish_node(), - State::Normal => (), - } - } - - fn error(&mut self, error: ParseError) { - self.inner.error(error, self.text_pos) - } -} - -impl<'a> TextTreeSink<'a> { - pub(super) fn new(text: &'a str, tokens: &'a [Token]) -> Self { - Self { - text, - tokens, - text_pos: 0.into(), - token_pos: 0, - state: State::PendingStart, - inner: SyntaxTreeBuilder::default(), - } - } - - pub(super) fn finish(mut self) -> (GreenNode, Vec) { - match mem::replace(&mut self.state, State::Normal) { - State::PendingFinish => { - self.eat_trivias(); - self.inner.finish_node() - } - State::PendingStart | State::Normal => unreachable!(), - } - - self.inner.finish_raw() - } - - fn eat_trivias(&mut self) { - while let Some(&token) = self.tokens.get(self.token_pos) { - if !token.kind.is_trivia() { - break; - } - self.do_token(token.kind, token.len, 1); - } - } - - fn eat_n_trivias(&mut self, n: usize) { - for _ in 0..n { - let token = self.tokens[self.token_pos]; - assert!(token.kind.is_trivia()); - self.do_token(token.kind, token.len, 1); - } - } - - fn do_token(&mut self, kind: SyntaxKind, len: TextSize, n_tokens: usize) { - let range = TextRange::at(self.text_pos, len); - let text: SmolStr = self.text[range].into(); - self.text_pos += len; - self.token_pos += n_tokens; - self.inner.token(kind, text); - } -} - -fn n_attached_trivias<'a>( - kind: SyntaxKind, - trivias: impl Iterator, -) -> usize { - match kind { - MACRO_CALL | CONST | TYPE_ALIAS | STRUCT | ENUM | VARIANT | FN | TRAIT | MODULE - | RECORD_FIELD | STATIC => { - let mut res = 0; - let mut trivias = trivias.enumerate().peekable(); - - while let Some((i, (kind, text))) = trivias.next() { - match kind { - WHITESPACE => { - if text.contains("\n\n") { - // we check whether the next token is a doc-comment - // and skip the whitespace in this case - if let Some((peek_kind, peek_text)) = - trivias.peek().map(|(_, pair)| pair) - { - if *peek_kind == COMMENT - && peek_text.starts_with("///") - && !peek_text.starts_with("////") - { - continue; - } - } - break; - } - } - COMMENT => { - res = i + 1; - } - _ => (), - } - } - res - } - _ => 0, - } -} -- cgit v1.2.3