From 0c81b9deeed81bfb2cf8142af9d748317d5d71a1 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 20 Feb 2019 21:50:07 +0300 Subject: route parsing via TokenSource trait --- crates/ra_syntax/src/parsing/parser_api.rs | 4 +- crates/ra_syntax/src/parsing/parser_impl.rs | 50 ++++++++-------- crates/ra_syntax/src/parsing/parser_impl/input.rs | 69 ++++++++++------------- 3 files changed, 59 insertions(+), 64 deletions(-) (limited to 'crates/ra_syntax/src') diff --git a/crates/ra_syntax/src/parsing/parser_api.rs b/crates/ra_syntax/src/parsing/parser_api.rs index 781c407de..813ae494c 100644 --- a/crates/ra_syntax/src/parsing/parser_api.rs +++ b/crates/ra_syntax/src/parsing/parser_api.rs @@ -17,7 +17,9 @@ use crate::{ /// tree, but rather a flat stream of events of the form /// "start expression, consume number literal, /// finish expression". See `Event` docs for more. -pub(crate) struct Parser<'t>(pub(super) ParserImpl<'t>); +pub(crate) struct Parser<'t>( + pub(super) ParserImpl>, +); impl<'t> Parser<'t> { /// Returns the kind of the current token. diff --git a/crates/ra_syntax/src/parsing/parser_impl.rs b/crates/ra_syntax/src/parsing/parser_impl.rs index 02baed76b..c0d2b6ec1 100644 --- a/crates/ra_syntax/src/parsing/parser_impl.rs +++ b/crates/ra_syntax/src/parsing/parser_impl.rs @@ -1,5 +1,5 @@ mod event; -mod input; +pub(crate) mod input; use std::cell::Cell; @@ -11,7 +11,7 @@ use crate::{ parser_api::Parser, parser_impl::{ event::{Event, EventProcessor}, - input::{InputPosition, ParserInput}, + input::InputPosition, }, }, }; @@ -39,6 +39,12 @@ pub(super) trait TreeSink { fn finish(self) -> Self::Tree; } +pub(super) trait TokenSource { + fn token_kind(&self, pos: InputPosition) -> SyntaxKind; + fn is_token_joint_to_next(&self, pos: InputPosition) -> bool; + fn is_keyword(&self, pos: InputPosition, kw: &str) -> bool; +} + /// Parse a sequence of tokens into the representative node tree pub(super) fn parse_with( sink: S, @@ -48,7 +54,7 @@ pub(super) fn parse_with( ) -> S::Tree { let mut events = { let input = input::ParserInput::new(text, tokens); - let parser_impl = ParserImpl::new(&input); + let parser_impl = ParserImpl::new(input); let mut parser_api = Parser(parser_impl); parser(&mut parser_api); parser_api.0.into_events() @@ -59,17 +65,17 @@ pub(super) fn parse_with( /// Implementation details of `Parser`, extracted /// to a separate struct in order not to pollute /// the public API of the `Parser`. -pub(super) struct ParserImpl<'t> { - parser_input: &'t ParserInput<'t>, +pub(super) struct ParserImpl { + token_source: S, pos: InputPosition, events: Vec, steps: Cell, } -impl<'t> ParserImpl<'t> { - fn new(inp: &'t ParserInput<'t>) -> ParserImpl<'t> { +impl ParserImpl { + fn new(token_source: S) -> ParserImpl { ParserImpl { - parser_input: inp, + token_source, pos: InputPosition::new(), events: Vec::new(), steps: Cell::new(0), @@ -82,11 +88,9 @@ impl<'t> ParserImpl<'t> { } pub(super) fn current2(&self) -> Option<(SyntaxKind, SyntaxKind)> { - let c1 = self.parser_input.kind(self.pos); - let c2 = self.parser_input.kind(self.pos + 1); - if self.parser_input.token_start_at(self.pos + 1) - == self.parser_input.token_start_at(self.pos) + self.parser_input.token_len(self.pos) - { + let c1 = self.token_source.token_kind(self.pos); + let c2 = self.token_source.token_kind(self.pos + 1); + if self.token_source.is_token_joint_to_next(self.pos) { Some((c1, c2)) } else { None @@ -94,14 +98,11 @@ impl<'t> ParserImpl<'t> { } pub(super) fn current3(&self) -> Option<(SyntaxKind, SyntaxKind, SyntaxKind)> { - let c1 = self.parser_input.kind(self.pos); - let c2 = self.parser_input.kind(self.pos + 1); - let c3 = self.parser_input.kind(self.pos + 2); - if self.parser_input.token_start_at(self.pos + 1) - == self.parser_input.token_start_at(self.pos) + self.parser_input.token_len(self.pos) - && self.parser_input.token_start_at(self.pos + 2) - == self.parser_input.token_start_at(self.pos + 1) - + self.parser_input.token_len(self.pos + 1) + let c1 = self.token_source.token_kind(self.pos); + let c2 = self.token_source.token_kind(self.pos + 1); + let c3 = self.token_source.token_kind(self.pos + 2); + if self.token_source.is_token_joint_to_next(self.pos) + && self.token_source.is_token_joint_to_next(self.pos + 1) { Some((c1, c2, c3)) } else { @@ -114,12 +115,11 @@ impl<'t> ParserImpl<'t> { let steps = self.steps.get(); assert!(steps <= 10_000_000, "the parser seems stuck"); self.steps.set(steps + 1); - - self.parser_input.kind(self.pos + n) + self.token_source.token_kind(self.pos + n) } - pub(super) fn at_kw(&self, t: &str) -> bool { - self.parser_input.token_text(self.pos) == t + pub(super) fn at_kw(&self, kw: &str) -> bool { + self.token_source.is_keyword(self.pos, kw) } /// Start parsing right behind the last event. diff --git a/crates/ra_syntax/src/parsing/parser_impl/input.rs b/crates/ra_syntax/src/parsing/parser_impl/input.rs index 275d94918..8ebbd3825 100644 --- a/crates/ra_syntax/src/parsing/parser_impl/input.rs +++ b/crates/ra_syntax/src/parsing/parser_impl/input.rs @@ -1,10 +1,40 @@ use crate::{ SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit, - parsing::lexer::Token, + parsing::{ + parser_impl::TokenSource, + lexer::Token, + }, }; use std::ops::{Add, AddAssign}; +impl<'t> TokenSource for ParserInput<'t> { + fn token_kind(&self, pos: InputPosition) -> SyntaxKind { + let idx = pos.0 as usize; + if !(idx < self.tokens.len()) { + return EOF; + } + self.tokens[idx].kind + } + fn is_token_joint_to_next(&self, pos: InputPosition) -> bool { + let idx_curr = pos.0 as usize; + let idx_next = pos.0 as usize; + if !(idx_next < self.tokens.len()) { + return true; + } + self.start_offsets[idx_curr] + self.tokens[idx_curr].len == self.start_offsets[idx_next] + } + fn is_keyword(&self, pos: InputPosition, kw: &str) -> bool { + let idx = pos.0 as usize; + if !(idx < self.tokens.len()) { + return false; + } + let range = TextRange::offset_len(self.start_offsets[idx], self.tokens[idx].len); + + self.text[range] == *kw + } +} + pub(crate) struct ParserInput<'t> { text: &'t str, /// start position of each token(expect whitespace and comment) @@ -41,43 +71,6 @@ impl<'t> ParserInput<'t> { ParserInput { text, start_offsets, tokens } } - - /// Get the syntax kind of token at given input position. - pub fn kind(&self, pos: InputPosition) -> SyntaxKind { - let idx = pos.0 as usize; - if !(idx < self.tokens.len()) { - return EOF; - } - self.tokens[idx].kind - } - - /// Get the length of a token at given input position. - pub fn token_len(&self, pos: InputPosition) -> TextUnit { - let idx = pos.0 as usize; - if !(idx < self.tokens.len()) { - return 0.into(); - } - self.tokens[idx].len - } - - /// Get the start position of a taken at given input position. - pub fn token_start_at(&self, pos: InputPosition) -> TextUnit { - let idx = pos.0 as usize; - if !(idx < self.tokens.len()) { - return 0.into(); - } - self.start_offsets[idx] - } - - /// Get the raw text of a token at given input position. - pub fn token_text(&self, pos: InputPosition) -> &'t str { - let idx = pos.0 as usize; - if !(idx < self.tokens.len()) { - return ""; - } - let range = TextRange::offset_len(self.start_offsets[idx], self.tokens[idx].len); - &self.text[range] - } } #[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] -- cgit v1.2.3