From ea7b569e1b133b6c19ef60c9cb2b2fd6b79847da Mon Sep 17 00:00:00 2001 From: csmoe Date: Mon, 31 Dec 2018 20:53:43 +0800 Subject: docing parser methods --- crates/ra_syntax/src/parser_api.rs | 4 ++-- crates/ra_syntax/src/parser_impl.rs | 15 +++++++++++++-- crates/ra_syntax/src/parser_impl/event.rs | 12 ++++++------ crates/ra_syntax/src/yellow/builder.rs | 4 ++-- 4 files changed, 23 insertions(+), 12 deletions(-) diff --git a/crates/ra_syntax/src/parser_api.rs b/crates/ra_syntax/src/parser_api.rs index 02421def1..0f740963d 100644 --- a/crates/ra_syntax/src/parser_api.rs +++ b/crates/ra_syntax/src/parser_api.rs @@ -61,7 +61,7 @@ impl<'t> Parser<'t> { Marker::new(self.0.start()) } - /// Advances the parser by one token. + /// Advances the parser by one token unconditionally. pub(crate) fn bump(&mut self) { self.0.bump(); } @@ -91,7 +91,7 @@ impl<'t> Parser<'t> { self.0.error(message.into()) } - /// Consume the next token if it is `kind`. + /// Consume the next token if `kind` matches. pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool { if !self.at(kind) { return false; diff --git a/crates/ra_syntax/src/parser_impl.rs b/crates/ra_syntax/src/parser_impl.rs index cb6e370ac..d4032a6d9 100644 --- a/crates/ra_syntax/src/parser_impl.rs +++ b/crates/ra_syntax/src/parser_impl.rs @@ -22,10 +22,21 @@ use crate::SyntaxKind::{self, EOF, TOMBSTONE}; pub(crate) trait Sink { type Tree; + /// Adds new leaf to the current branch. fn leaf(&mut self, kind: SyntaxKind, text: SmolStr); - fn start_internal(&mut self, kind: SyntaxKind); - fn finish_internal(&mut self); + + /// Start new branch and make it current. + fn start_branch(&mut self, kind: SyntaxKind); + + /// Finish current branch and restore previous + /// branch as current. + fn finish_branch(&mut self); + fn error(&mut self, error: SyntaxError); + + /// Complete tree building. Make sure that + /// `start_branch` and `finish_branch` calls + /// are paired! fn finish(self) -> Self::Tree; } diff --git a/crates/ra_syntax/src/parser_impl/event.rs b/crates/ra_syntax/src/parser_impl/event.rs index 3d8b062d5..d6299b5e3 100644 --- a/crates/ra_syntax/src/parser_impl/event.rs +++ b/crates/ra_syntax/src/parser_impl/event.rs @@ -154,7 +154,7 @@ impl<'a, S: Sink> EventProcessor<'a, S> { self.finish(last); } Event::Token { kind, n_raw_tokens } => { - self.eat_ws(); + self.eat_trivias(); let n_raw_tokens = n_raw_tokens as usize; let len = self.tokens[self.token_pos..self.token_pos + n_raw_tokens] .iter() @@ -173,7 +173,7 @@ impl<'a, S: Sink> EventProcessor<'a, S> { fn start(&mut self, kind: SyntaxKind) { if kind == SOURCE_FILE { - self.sink.start_internal(kind); + self.sink.start_branch(kind); return; } let n_trivias = self.tokens[self.token_pos..] @@ -194,18 +194,18 @@ impl<'a, S: Sink> EventProcessor<'a, S> { n_attached_trivias(kind, leading_trivias) }; self.eat_n_trivias(n_trivias - n_attached_trivias); - self.sink.start_internal(kind); + self.sink.start_branch(kind); self.eat_n_trivias(n_attached_trivias); } fn finish(&mut self, last: bool) { if last { - self.eat_ws() + self.eat_trivias() } - self.sink.finish_internal(); + self.sink.finish_branch(); } - fn eat_ws(&mut self) { + fn eat_trivias(&mut self) { while let Some(&token) = self.tokens.get(self.token_pos) { if !token.kind.is_trivia() { break; diff --git a/crates/ra_syntax/src/yellow/builder.rs b/crates/ra_syntax/src/yellow/builder.rs index 9fcebfb93..37ae6329b 100644 --- a/crates/ra_syntax/src/yellow/builder.rs +++ b/crates/ra_syntax/src/yellow/builder.rs @@ -26,11 +26,11 @@ impl Sink for GreenBuilder { self.inner.leaf(kind, text); } - fn start_internal(&mut self, kind: SyntaxKind) { + fn start_branch(&mut self, kind: SyntaxKind) { self.inner.start_internal(kind) } - fn finish_internal(&mut self) { + fn finish_branch(&mut self) { self.inner.finish_internal(); } -- cgit v1.2.3 From b01e707dba7810c3d28c82a84dec9064cc01d3c8 Mon Sep 17 00:00:00 2001 From: csmoe Date: Mon, 31 Dec 2018 21:30:37 +0800 Subject: doc parser input --- crates/ra_syntax/src/parser_impl.rs | 13 +++++++------ crates/ra_syntax/src/parser_impl/input.rs | 27 +++++++++++++++++++++++---- 2 files changed, 30 insertions(+), 10 deletions(-) diff --git a/crates/ra_syntax/src/parser_impl.rs b/crates/ra_syntax/src/parser_impl.rs index d4032a6d9..ce321aecb 100644 --- a/crates/ra_syntax/src/parser_impl.rs +++ b/crates/ra_syntax/src/parser_impl.rs @@ -64,7 +64,6 @@ pub(crate) fn parse_with( /// the public API of the `Parser`. pub(crate) struct ParserImpl<'t> { inp: &'t ParserInput<'t>, - pos: InputPosition, events: Vec, steps: Cell, @@ -74,7 +73,6 @@ impl<'t> ParserImpl<'t> { pub(crate) fn new(inp: &'t ParserInput<'t>) -> ParserImpl<'t> { ParserImpl { inp, - pos: InputPosition::new(), events: Vec::new(), steps: Cell::new(0), @@ -89,7 +87,9 @@ impl<'t> ParserImpl<'t> { pub(super) fn next2(&self) -> Option<(SyntaxKind, SyntaxKind)> { let c1 = self.inp.kind(self.pos); let c2 = self.inp.kind(self.pos + 1); - if self.inp.start(self.pos + 1) == self.inp.start(self.pos) + self.inp.len(self.pos) { + if self.inp.token_start_at(self.pos + 1) + == self.inp.token_start_at(self.pos) + self.inp.len(self.pos) + { Some((c1, c2)) } else { None @@ -100,9 +100,10 @@ impl<'t> ParserImpl<'t> { let c1 = self.inp.kind(self.pos); let c2 = self.inp.kind(self.pos + 1); let c3 = self.inp.kind(self.pos + 2); - if self.inp.start(self.pos + 1) == self.inp.start(self.pos) + self.inp.len(self.pos) - && self.inp.start(self.pos + 2) - == self.inp.start(self.pos + 1) + self.inp.len(self.pos + 1) + if self.inp.token_start_at(self.pos + 1) + == self.inp.token_start_at(self.pos) + self.inp.len(self.pos) + && self.inp.token_start_at(self.pos + 2) + == self.inp.token_start_at(self.pos + 1) + self.inp.len(self.pos + 1) { Some((c1, c2, c3)) } else { diff --git a/crates/ra_syntax/src/parser_impl/input.rs b/crates/ra_syntax/src/parser_impl/input.rs index ac6d900d8..083a7aa15 100644 --- a/crates/ra_syntax/src/parser_impl/input.rs +++ b/crates/ra_syntax/src/parser_impl/input.rs @@ -4,11 +4,26 @@ use std::ops::{Add, AddAssign}; pub(crate) struct ParserInput<'t> { text: &'t str, + /// start position of each token(expect whitespace and comment) + /// ```non-rust + /// struct Foo; + /// ^------^--- + /// | | ^- + /// 0 7 10 + /// ``` + /// (token, start_offset): `[(struct, 0), (Foo, 7), (;, 10)]` start_offsets: Vec, - tokens: Vec, // non-whitespace tokens + /// non-whitespace/comment tokens + /// ```non-rust + /// struct Foo {} + /// ^^^^^^ ^^^ ^^ + /// ``` + /// tokens: `[struct, Foo, {, }]` + tokens: Vec, } impl<'t> ParserInput<'t> { + /// Generate input from tokens(expect comment and whitespace). pub fn new(text: &'t str, raw_tokens: &'t [Token]) -> ParserInput<'t> { let mut tokens = Vec::new(); let mut start_offsets = Vec::new(); @@ -28,6 +43,7 @@ impl<'t> ParserInput<'t> { } } + /// Get the syntax kind of token at given input position. pub fn kind(&self, pos: InputPosition) -> SyntaxKind { let idx = pos.0 as usize; if !(idx < self.tokens.len()) { @@ -36,7 +52,8 @@ impl<'t> ParserInput<'t> { self.tokens[idx].kind } - pub fn len(&self, pos: InputPosition) -> TextUnit { + /// Get the length of a token at given input position. + pub fn token_len(&self, pos: InputPosition) -> TextUnit { let idx = pos.0 as usize; if !(idx < self.tokens.len()) { return 0.into(); @@ -44,7 +61,8 @@ impl<'t> ParserInput<'t> { self.tokens[idx].len } - pub fn start(&self, pos: InputPosition) -> TextUnit { + /// Get the start position of a taken at given input position. + pub fn token_start_at(&self, pos: InputPosition) -> TextUnit { let idx = pos.0 as usize; if !(idx < self.tokens.len()) { return 0.into(); @@ -52,7 +70,8 @@ impl<'t> ParserInput<'t> { self.start_offsets[idx] } - pub fn text(&self, pos: InputPosition) -> &'t str { + /// Get the raw text of a toen at given input position. + pub fn token_text(&self, pos: InputPosition) -> &'t str { let idx = pos.0 as usize; if !(idx < self.tokens.len()) { return ""; -- cgit v1.2.3 From df591a1e48a876653f1f48ed595d1754470d116f Mon Sep 17 00:00:00 2001 From: csmoe Date: Tue, 1 Jan 2019 16:09:51 +0800 Subject: doc parsing events --- crates/ra_syntax/src/parser_api.rs | 17 ++++++++-- crates/ra_syntax/src/parser_impl.rs | 55 ++++++++++++++++--------------- crates/ra_syntax/src/parser_impl/event.rs | 40 ++++++++++++++-------- crates/ra_syntax/src/parser_impl/input.rs | 2 +- 4 files changed, 71 insertions(+), 43 deletions(-) diff --git a/crates/ra_syntax/src/parser_api.rs b/crates/ra_syntax/src/parser_api.rs index 0f740963d..3487aef85 100644 --- a/crates/ra_syntax/src/parser_api.rs +++ b/crates/ra_syntax/src/parser_api.rs @@ -142,11 +142,13 @@ impl Marker { } } - /// Finishes the syntax tree node and assigns `kind` to it. + /// Finishes the syntax tree node and assigns `kind` to it, + /// and mark the create a `CompletedMarker` for possible future + /// operation like `.precede()` to deal with forward_parent. pub(crate) fn complete(mut self, p: &mut Parser, kind: SyntaxKind) -> CompletedMarker { self.bomb.defuse(); p.0.complete(self.pos, kind); - CompletedMarker(self.pos, kind) + CompletedMarker::new(self.pos, kind) } /// Abandons the syntax tree node. All its children @@ -160,13 +162,22 @@ impl Marker { pub(crate) struct CompletedMarker(u32, SyntaxKind); impl CompletedMarker { - /// This one is tricky :-) + fn new(pos: u32, kind: SyntaxKind) -> Self { + CompletedMarker(pos, kind) + } + /// This method allows to create a new node which starts /// *before* the current one. That is, parser could start /// node `A`, then complete it, and then after parsing the /// whole `A`, decide that it should have started some node /// `B` before starting `A`. `precede` allows to do exactly /// that. See also docs about `forward_parent` in `Event::Start`. + /// + /// Given completed events `[START, FINISH]` and its corresponding + /// `CompletedMarker(pos: 0, _)`. + /// Append a new `START` events as `[START, FINISH, NEWSTART]`, + /// then mark `NEWSTART` as `START`'s parent with saving its relative + /// distance to `NEWSTART` into forward_parent(=2 in this case); pub(crate) fn precede(self, p: &mut Parser) -> Marker { Marker::new(p.0.precede(self.0)) } diff --git a/crates/ra_syntax/src/parser_impl.rs b/crates/ra_syntax/src/parser_impl.rs index ce321aecb..01a51cd8d 100644 --- a/crates/ra_syntax/src/parser_impl.rs +++ b/crates/ra_syntax/src/parser_impl.rs @@ -63,7 +63,7 @@ pub(crate) fn parse_with( /// to a separate struct in order not to pollute /// the public API of the `Parser`. pub(crate) struct ParserImpl<'t> { - inp: &'t ParserInput<'t>, + parser_input: &'t ParserInput<'t>, pos: InputPosition, events: Vec, steps: Cell, @@ -72,7 +72,7 @@ pub(crate) struct ParserImpl<'t> { impl<'t> ParserImpl<'t> { pub(crate) fn new(inp: &'t ParserInput<'t>) -> ParserImpl<'t> { ParserImpl { - inp, + parser_input: inp, pos: InputPosition::new(), events: Vec::new(), steps: Cell::new(0), @@ -85,10 +85,10 @@ impl<'t> ParserImpl<'t> { } pub(super) fn next2(&self) -> Option<(SyntaxKind, SyntaxKind)> { - let c1 = self.inp.kind(self.pos); - let c2 = self.inp.kind(self.pos + 1); - if self.inp.token_start_at(self.pos + 1) - == self.inp.token_start_at(self.pos) + self.inp.len(self.pos) + let c1 = self.parser_input.kind(self.pos); + let c2 = self.parser_input.kind(self.pos + 1); + if self.parser_input.token_start_at(self.pos + 1) + == self.parser_input.token_start_at(self.pos) + self.parser_input.token_len(self.pos) { Some((c1, c2)) } else { @@ -97,13 +97,14 @@ impl<'t> ParserImpl<'t> { } pub(super) fn next3(&self) -> Option<(SyntaxKind, SyntaxKind, SyntaxKind)> { - let c1 = self.inp.kind(self.pos); - let c2 = self.inp.kind(self.pos + 1); - let c3 = self.inp.kind(self.pos + 2); - if self.inp.token_start_at(self.pos + 1) - == self.inp.token_start_at(self.pos) + self.inp.len(self.pos) - && self.inp.token_start_at(self.pos + 2) - == self.inp.token_start_at(self.pos + 1) + self.inp.len(self.pos + 1) + let c1 = self.parser_input.kind(self.pos); + let c2 = self.parser_input.kind(self.pos + 1); + let c3 = self.parser_input.kind(self.pos + 2); + if self.parser_input.token_start_at(self.pos + 1) + == self.parser_input.token_start_at(self.pos) + self.parser_input.token_len(self.pos) + && self.parser_input.token_start_at(self.pos + 2) + == self.parser_input.token_start_at(self.pos + 1) + + self.parser_input.token_len(self.pos + 1) { Some((c1, c2, c3)) } else { @@ -111,29 +112,27 @@ impl<'t> ParserImpl<'t> { } } + /// Get the syntax kind of the nth token. pub(super) fn nth(&self, n: u32) -> SyntaxKind { let steps = self.steps.get(); - if steps > 10_000_000 { - panic!("the parser seems stuck"); - } + assert!(steps <= 10_000_000, "the parser seems stuck"); self.steps.set(steps + 1); - self.inp.kind(self.pos + n) + self.parser_input.kind(self.pos + n) } pub(super) fn at_kw(&self, t: &str) -> bool { - self.inp.text(self.pos) == t + self.parser_input.token_text(self.pos) == t } + /// Start parsing right behind the last event. pub(super) fn start(&mut self) -> u32 { let pos = self.events.len() as u32; - self.event(Event::Start { - kind: TOMBSTONE, - forward_parent: None, - }); + self.push_event(Event::tombstone()); pos } + /// Advances the parser by one token unconditionally. pub(super) fn bump(&mut self) { let kind = self.nth(0); if kind == EOF { @@ -156,15 +155,17 @@ impl<'t> ParserImpl<'t> { fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { self.pos += u32::from(n_raw_tokens); - self.event(Event::Token { kind, n_raw_tokens }); + self.push_event(Event::Token { kind, n_raw_tokens }); } + /// Append one Error event to the back of events. pub(super) fn error(&mut self, msg: String) { - self.event(Event::Error { + self.push_event(Event::Error { msg: ParseError(msg), }) } + /// Complete an event with appending a `Finish` event. pub(super) fn complete(&mut self, pos: u32, kind: SyntaxKind) { match self.events[pos as usize] { Event::Start { @@ -174,9 +175,10 @@ impl<'t> ParserImpl<'t> { } _ => unreachable!(), } - self.event(Event::Finish); + self.push_event(Event::Finish); } + /// Ignore the dummy `Start` event. pub(super) fn abandon(&mut self, pos: u32) { let idx = pos as usize; if idx == self.events.len() - 1 { @@ -190,6 +192,7 @@ impl<'t> ParserImpl<'t> { } } + /// Save the relative distance of a completed event to its forward_parent. pub(super) fn precede(&mut self, pos: u32) -> u32 { let new_pos = self.start(); match self.events[pos as usize] { @@ -204,7 +207,7 @@ impl<'t> ParserImpl<'t> { new_pos } - fn event(&mut self, event: Event) { + fn push_event(&mut self, event: Event) { self.events.push(event) } } diff --git a/crates/ra_syntax/src/parser_impl/event.rs b/crates/ra_syntax/src/parser_impl/event.rs index d6299b5e3..835b2c78d 100644 --- a/crates/ra_syntax/src/parser_impl/event.rs +++ b/crates/ra_syntax/src/parser_impl/event.rs @@ -36,7 +36,7 @@ pub(crate) enum Event { /// /// For left-recursive syntactic constructs, the parser produces /// a child node before it sees a parent. `forward_parent` - /// exists to allow to tweak parent-child relationships. + /// saves the position of current event's parent. /// /// Consider this path /// @@ -84,6 +84,15 @@ pub(crate) enum Event { }, } +impl Event { + pub(crate) fn tombstone() -> Self { + Event::Start { + kind: TOMBSTONE, + forward_parent: None, + } + } +} + pub(super) struct EventProcessor<'a, S: Sink> { sink: S, text_pos: TextUnit, @@ -110,17 +119,12 @@ impl<'a, S: Sink> EventProcessor<'a, S> { } } + /// Generate the syntax tree with the control of events. pub(super) fn process(mut self) -> S { - fn tombstone() -> Event { - Event::Start { - kind: TOMBSTONE, - forward_parent: None, - } - } let mut forward_parents = Vec::new(); for i in 0..self.events.len() { - match mem::replace(&mut self.events[i], tombstone()) { + match mem::replace(&mut self.events[i], Event::tombstone()) { Event::Start { kind: TOMBSTONE, .. } => (), @@ -129,12 +133,18 @@ impl<'a, S: Sink> EventProcessor<'a, S> { kind, forward_parent, } => { + // For events[A, B, C], B is A's forward_parent, C is B's forward_parent, + // in the normal control flow, the parent-child relation: `A -> B -> C`, + // while with the magic forward_parent, it writes: `C <- B <- A`. + + // append `A` into parents. forward_parents.push(kind); let mut idx = i; let mut fp = forward_parent; while let Some(fwd) = fp { idx += fwd as usize; - fp = match mem::replace(&mut self.events[idx], tombstone()) { + // append `A`'s forward_parent `B` + fp = match mem::replace(&mut self.events[idx], Event::tombstone()) { Event::Start { kind, forward_parent, @@ -144,14 +154,16 @@ impl<'a, S: Sink> EventProcessor<'a, S> { } _ => unreachable!(), }; + // append `B`'s forward_parent `C` in the next stage. } + for kind in forward_parents.drain(..).rev() { self.start(kind); } } Event::Finish => { - let last = i == self.events.len() - 1; - self.finish(last); + let is_last = i == self.events.len() - 1; + self.finish(is_last); } Event::Token { kind, n_raw_tokens } => { self.eat_trivias(); @@ -171,6 +183,7 @@ impl<'a, S: Sink> EventProcessor<'a, S> { self.sink } + /// Add the node into syntax tree but discard the comments/whitespaces. fn start(&mut self, kind: SyntaxKind) { if kind == SOURCE_FILE { self.sink.start_branch(kind); @@ -198,8 +211,8 @@ impl<'a, S: Sink> EventProcessor<'a, S> { self.eat_n_trivias(n_attached_trivias); } - fn finish(&mut self, last: bool) { - if last { + fn finish(&mut self, is_last: bool) { + if is_last { self.eat_trivias() } self.sink.finish_branch(); @@ -235,6 +248,7 @@ fn n_attached_trivias<'a>( kind: SyntaxKind, trivias: impl Iterator, ) -> usize { + // FIXME: parse attached trivias of CONST_DEF/TYPE_DEF match kind { STRUCT_DEF | ENUM_DEF | FN_DEF | TRAIT_DEF | MODULE => { let mut res = 0; diff --git a/crates/ra_syntax/src/parser_impl/input.rs b/crates/ra_syntax/src/parser_impl/input.rs index 083a7aa15..7fde5b3ab 100644 --- a/crates/ra_syntax/src/parser_impl/input.rs +++ b/crates/ra_syntax/src/parser_impl/input.rs @@ -70,7 +70,7 @@ impl<'t> ParserInput<'t> { self.start_offsets[idx] } - /// Get the raw text of a toen at given input position. + /// Get the raw text of a token at given input position. pub fn token_text(&self, pos: InputPosition) -> &'t str { let idx = pos.0 as usize; if !(idx < self.tokens.len()) { -- cgit v1.2.3 From 58139c558aa085588264ba659b8483a036c1da0e Mon Sep 17 00:00:00 2001 From: csmoe Date: Fri, 4 Jan 2019 12:22:57 +0800 Subject: consume trivias for type/const def --- crates/ra_syntax/src/parser_impl/event.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/crates/ra_syntax/src/parser_impl/event.rs b/crates/ra_syntax/src/parser_impl/event.rs index 835b2c78d..73dd6e02b 100644 --- a/crates/ra_syntax/src/parser_impl/event.rs +++ b/crates/ra_syntax/src/parser_impl/event.rs @@ -248,9 +248,8 @@ fn n_attached_trivias<'a>( kind: SyntaxKind, trivias: impl Iterator, ) -> usize { - // FIXME: parse attached trivias of CONST_DEF/TYPE_DEF match kind { - STRUCT_DEF | ENUM_DEF | FN_DEF | TRAIT_DEF | MODULE => { + CONST_DEF | TYPE_DEF | STRUCT_DEF | ENUM_DEF | FN_DEF | TRAIT_DEF | MODULE => { let mut res = 0; for (i, (kind, text)) in trivias.enumerate() { match kind { -- cgit v1.2.3