From 50a02eb3593591a02677e1b56f24d7ff0459b9d0 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 12 Aug 2020 17:06:49 +0200 Subject: Rename ra_parser -> parser --- crates/parser/src/lib.rs | 149 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 149 insertions(+) create mode 100644 crates/parser/src/lib.rs (limited to 'crates/parser/src/lib.rs') diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs new file mode 100644 index 000000000..eeb8ad66b --- /dev/null +++ b/crates/parser/src/lib.rs @@ -0,0 +1,149 @@ +//! The Rust parser. +//! +//! The parser doesn't know about concrete representation of tokens and syntax +//! trees. Abstract `TokenSource` and `TreeSink` traits are used instead. As a +//! consequence, this crates does not contain a lexer. +//! +//! The `Parser` struct from the `parser` module is a cursor into the sequence +//! of tokens. Parsing routines use `Parser` to inspect current state and +//! advance the parsing. +//! +//! The actual parsing happens in the `grammar` module. +//! +//! Tests for this crate live in `ra_syntax` crate. + +#[macro_use] +mod token_set; +#[macro_use] +mod syntax_kind; +mod event; +mod parser; +mod grammar; + +pub(crate) use token_set::TokenSet; + +pub use syntax_kind::SyntaxKind; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct ParseError(pub Box); + +/// `TokenSource` abstracts the source of the tokens parser operates on. +/// +/// Hopefully this will allow us to treat text and token trees in the same way! +pub trait TokenSource { + fn current(&self) -> Token; + + /// Lookahead n token + fn lookahead_nth(&self, n: usize) -> Token; + + /// bump cursor to next token + fn bump(&mut self); + + /// Is the current token a specified keyword? + fn is_keyword(&self, kw: &str) -> bool; +} + +/// `Token` abstracts the cursor of `TokenSource` operates on. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub struct Token { + /// What is the current token? + pub kind: SyntaxKind, + + /// Is the current token joined to the next one (`> >` vs `>>`). + pub is_jointed_to_next: bool, +} + +/// `TreeSink` abstracts details of a particular syntax tree implementation. +pub trait TreeSink { + /// Adds new token to the current branch. + fn token(&mut self, kind: SyntaxKind, n_tokens: u8); + + /// Start new branch and make it current. + fn start_node(&mut self, kind: SyntaxKind); + + /// Finish current branch and restore previous + /// branch as current. + fn finish_node(&mut self); + + fn error(&mut self, error: ParseError); +} + +fn parse_from_tokens(token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink, f: F) +where + F: FnOnce(&mut parser::Parser), +{ + let mut p = parser::Parser::new(token_source); + f(&mut p); + let events = p.finish(); + event::process(tree_sink, events); +} + +/// Parse given tokens into the given sink as a rust file. +pub fn parse(token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) { + parse_from_tokens(token_source, tree_sink, grammar::root); +} + +#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)] +pub enum FragmentKind { + Path, + Expr, + Statement, + Type, + Pattern, + Item, + Block, + Visibility, + MetaItem, + + // These kinds are used when parsing the result of expansion + // FIXME: use separate fragment kinds for macro inputs and outputs? + Items, + Statements, +} + +pub fn parse_fragment( + token_source: &mut dyn TokenSource, + tree_sink: &mut dyn TreeSink, + fragment_kind: FragmentKind, +) { + let parser: fn(&'_ mut parser::Parser) = match fragment_kind { + FragmentKind::Path => grammar::fragments::path, + FragmentKind::Expr => grammar::fragments::expr, + FragmentKind::Type => grammar::fragments::type_, + FragmentKind::Pattern => grammar::fragments::pattern, + FragmentKind::Item => grammar::fragments::item, + FragmentKind::Block => grammar::fragments::block_expr, + FragmentKind::Visibility => grammar::fragments::opt_visibility, + FragmentKind::MetaItem => grammar::fragments::meta_item, + FragmentKind::Statement => grammar::fragments::stmt, + FragmentKind::Items => grammar::fragments::macro_items, + FragmentKind::Statements => grammar::fragments::macro_stmts, + }; + parse_from_tokens(token_source, tree_sink, parser) +} + +/// A parsing function for a specific braced-block. +pub struct Reparser(fn(&mut parser::Parser)); + +impl Reparser { + /// If the node is a braced block, return the corresponding `Reparser`. + pub fn for_node( + node: SyntaxKind, + first_child: Option, + parent: Option, + ) -> Option { + grammar::reparser(node, first_child, parent).map(Reparser) + } + + /// Re-parse given tokens using this `Reparser`. + /// + /// Tokens must start with `{`, end with `}` and form a valid brace + /// sequence. + pub fn parse(self, token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) { + let Reparser(r) = self; + let mut p = parser::Parser::new(token_source); + r(&mut p); + let events = p.finish(); + event::process(tree_sink, events); + } +} -- cgit v1.2.3