From 415c891d641fa305e7ddbbbcc78db990dd5d3564 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 29 Jul 2018 15:16:07 +0300 Subject: Reorganize --- src/lexer/comments.rs | 3 +- src/lexer/mod.rs | 50 ++++--- src/lexer/numbers.rs | 3 +- src/lexer/strings.rs | 3 +- src/lib.rs | 37 +++-- src/parser/event.rs | 33 ++++- src/parser/grammar/mod.rs | 13 +- src/parser/input.rs | 10 +- src/parser/mod.rs | 26 ++-- src/parser/parser/imp.rs | 3 +- src/parser/parser/mod.rs | 3 +- src/syntax_kinds.rs | 326 ------------------------------------------ src/syntax_kinds/generated.rs | 326 ++++++++++++++++++++++++++++++++++++++++++ src/syntax_kinds/mod.rs | 27 ++++ src/tree/file_builder.rs | 87 ----------- src/tree/mod.rs | 27 ---- src/yellow/builder.rs | 68 +++++++++ src/yellow/mod.rs | 4 +- src/yellow/syntax.rs | 10 +- tests/parser.rs | 5 +- tools/src/bin/gen.rs | 18 +-- tools/src/bin/parse.rs | 5 +- 22 files changed, 550 insertions(+), 537 deletions(-) delete mode 100644 src/syntax_kinds.rs create mode 100644 src/syntax_kinds/generated.rs create mode 100644 src/syntax_kinds/mod.rs delete mode 100644 src/tree/file_builder.rs delete mode 100644 src/tree/mod.rs create mode 100644 src/yellow/builder.rs diff --git a/src/lexer/comments.rs b/src/lexer/comments.rs index d1e958817..01acb6515 100644 --- a/src/lexer/comments.rs +++ b/src/lexer/comments.rs @@ -1,7 +1,6 @@ use lexer::ptr::Ptr; -use SyntaxKind; -use syntax_kinds::*; +use SyntaxKind::{self, *}; pub(crate) fn scan_shebang(ptr: &mut Ptr) -> bool { if ptr.next_is('!') && ptr.nnext_is('/') { diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index 65a994327..69cab5b57 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -1,21 +1,32 @@ -use {SyntaxKind, Token}; -use syntax_kinds::*; - mod ptr; -use self::ptr::Ptr; - +mod comments; +mod strings; +mod numbers; mod classes; -use self::classes::*; -mod numbers; -use self::numbers::scan_number; +use { + TextUnit, + SyntaxKind::{self, *}, +}; -mod strings; -use self::strings::{is_string_literal_start, scan_byte_char_or_string, scan_char, scan_raw_string, - scan_string}; +use self::{ + ptr::Ptr, + classes::*, + numbers::scan_number, + strings::{ + is_string_literal_start, scan_byte_char_or_string, scan_char, + scan_raw_string, scan_string}, + comments::{scan_comment, scan_shebang}, +}; -mod comments; -use self::comments::{scan_comment, scan_shebang}; +/// A token of Rust source. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Token { + /// The kind of token. + pub kind: SyntaxKind, + /// The length of the token. + pub len: TextUnit, +} /// Break a string up into its component tokens pub fn tokenize(text: &str) -> Vec { @@ -29,6 +40,7 @@ pub fn tokenize(text: &str) -> Vec { } acc } + /// Get the next token from a string pub fn next_token(text: &str) -> Token { assert!(!text.is_empty()); @@ -109,7 +121,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { DOTDOT } _ => DOT, - } + }; } ':' => { return match ptr.next() { @@ -118,7 +130,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { COLONCOLON } _ => COLON, - } + }; } '=' => { return match ptr.next() { @@ -131,7 +143,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { FAT_ARROW } _ => EQ, - } + }; } '!' => { return match ptr.next() { @@ -140,7 +152,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { NEQ } _ => EXCL, - } + }; } '-' => { return if ptr.next_is('>') { @@ -148,7 +160,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { THIN_ARROW } else { MINUS - } + }; } // If the character is an ident start not followed by another single @@ -202,7 +214,7 @@ fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind { return if c == '_' { UNDERSCORE } else { IDENT }; } ptr.bump_while(is_ident_continue); - if let Some(kind) = ident_to_keyword(ptr.current_token_text()) { + if let Some(kind) = SyntaxKind::from_keyword(ptr.current_token_text()) { return kind; } IDENT diff --git a/src/lexer/numbers.rs b/src/lexer/numbers.rs index 95e42246f..38eac9212 100644 --- a/src/lexer/numbers.rs +++ b/src/lexer/numbers.rs @@ -1,8 +1,7 @@ use lexer::ptr::Ptr; use lexer::classes::*; -use SyntaxKind; -use syntax_kinds::*; +use SyntaxKind::{self, *}; pub(crate) fn scan_number(c: char, ptr: &mut Ptr) -> SyntaxKind { if c == '0' { diff --git a/src/lexer/strings.rs b/src/lexer/strings.rs index 00a84ec85..e3704fbb3 100644 --- a/src/lexer/strings.rs +++ b/src/lexer/strings.rs @@ -1,5 +1,4 @@ -use SyntaxKind; -use syntax_kinds::*; +use SyntaxKind::{self, *}; use lexer::ptr::Ptr; diff --git a/src/lib.rs b/src/lib.rs index 619ad62e5..4260e22e7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -19,27 +19,36 @@ extern crate unicode_xid; extern crate text_unit; -mod tree; mod lexer; mod parser; mod yellow; +mod syntax_kinds; + +pub use { + text_unit::{TextRange, TextUnit}, + syntax_kinds::SyntaxKind, + yellow::{SyntaxNode}, + lexer::{tokenize, Token}, +}; + +pub(crate) use { + yellow::SyntaxError +}; + +pub fn parse(text: String) -> SyntaxNode { + let tokens = tokenize(&text); + parser::parse::(text, &tokens) +} -pub mod syntax_kinds; -pub use text_unit::{TextRange, TextUnit}; -pub use tree::{SyntaxKind, Token}; -pub(crate) use tree::{Sink, GreenBuilder}; -pub use lexer::{next_token, tokenize}; -pub use yellow::SyntaxNode; -pub(crate) use yellow::SError; -pub use parser::{parse_green}; /// Utilities for simple uses of the parser. pub mod utils { - use std::fmt::Write; + use std::{ + fmt::Write, + collections::BTreeSet + }; - use {SyntaxNode}; - use std::collections::BTreeSet; - use SError; + use {SyntaxNode, SyntaxError}; /// Parse a file and create a string representation of the resulting parse tree. pub fn dump_tree_green(syntax: &SyntaxNode) -> String { @@ -48,7 +57,7 @@ pub mod utils { go(syntax, &mut result, 0, &mut errors); return result; - fn go(node: &SyntaxNode, buff: &mut String, level: usize, errors: &mut BTreeSet) { + fn go(node: &SyntaxNode, buff: &mut String, level: usize, errors: &mut BTreeSet) { buff.push_str(&String::from(" ").repeat(level)); write!(buff, "{:?}\n", node).unwrap(); let my_errors: Vec<_> = errors.iter().filter(|e| e.offset == node.range().start()) diff --git a/src/parser/event.rs b/src/parser/event.rs index 83039c664..a8d503b3d 100644 --- a/src/parser/event.rs +++ b/src/parser/event.rs @@ -1,8 +1,29 @@ +//! This module provides a way to construct a `File`. +//! It is intended to be completely decoupled from the +//! parser, so as to allow to evolve the tree representation +//! and the parser algorithm independently. +//! +//! The `Sink` trait is the bridge between the parser and the +//! tree builder: the parser produces a stream of events like +//! `start node`, `finish node`, and `FileBuilder` converts +//! this stream to a real tree. use { - Sink, SyntaxKind, Token, - syntax_kinds::TOMBSTONE, + TextUnit, + SyntaxKind::{self, TOMBSTONE}, + lexer::Token, }; -use super::is_insignificant; + +pub(crate) trait Sink { + type Tree; + + fn new(text: String) -> Self; + + fn leaf(&mut self, kind: SyntaxKind, len: TextUnit); + fn start_internal(&mut self, kind: SyntaxKind); + fn finish_internal(&mut self); + fn error(&mut self, err: String); + fn finish(self) -> Self::Tree; +} /// `Parser` produces a flat list of `Event`s. /// They are converted to a tree-structure in @@ -67,7 +88,7 @@ pub(crate) enum Event { }, } -pub(super) fn process(builder: &mut Sink, tokens: &[Token], events: Vec) { +pub(super) fn process(builder: &mut impl Sink, tokens: &[Token], events: Vec) { let mut idx = 0; let mut holes = Vec::new(); @@ -111,7 +132,7 @@ pub(super) fn process(builder: &mut Sink, tokens: &[Token], events: Vec) &Event::Finish => { while idx < tokens.len() { let token = tokens[idx]; - if is_insignificant(token.kind) { + if token.kind.is_trivia() { idx += 1; builder.leaf(token.kind, token.len); } else { @@ -128,7 +149,7 @@ pub(super) fn process(builder: &mut Sink, tokens: &[Token], events: Vec) // this should be done in a sensible manner instead loop { let token = tokens[idx]; - if !is_insignificant(token.kind) { + if !token.kind.is_trivia() { break; } builder.leaf(token.kind, token.len); diff --git a/src/parser/grammar/mod.rs b/src/parser/grammar/mod.rs index 23216452f..085e62d56 100644 --- a/src/parser/grammar/mod.rs +++ b/src/parser/grammar/mod.rs @@ -21,11 +21,6 @@ //! After adding a new inline-test, run `cargo collect-tests` to extract //! it as a standalone text-fixture into `tests/data/parser/inline`, and //! run `cargo test` once to create the "gold" value. -use parser::parser::Parser; -use parser::token_set::TokenSet; -use SyntaxKind; -use syntax_kinds::*; - mod items; mod attributes; mod expressions; @@ -34,6 +29,14 @@ mod patterns; mod paths; mod type_params; +use { + SyntaxKind::{self, *}, + parser::{ + parser::Parser, + token_set::TokenSet + } +}; + pub(crate) fn file(p: &mut Parser) { let file = p.start(); p.eat(SHEBANG); diff --git a/src/parser/input.rs b/src/parser/input.rs index 9b400b959..052981fbc 100644 --- a/src/parser/input.rs +++ b/src/parser/input.rs @@ -1,6 +1,8 @@ -use {SyntaxKind, TextRange, TextUnit, Token}; -use syntax_kinds::EOF; -use super::is_insignificant; +use { + SyntaxKind, TextRange, TextUnit, + SyntaxKind::EOF, + lexer::Token, +}; use std::ops::{Add, AddAssign}; @@ -16,7 +18,7 @@ impl<'t> ParserInput<'t> { let mut start_offsets = Vec::new(); let mut len = 0.into(); for &token in raw_tokens.iter() { - if !is_insignificant(token.kind) { + if !token.kind.is_trivia() { tokens.push(token); start_offsets.push(len); } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index b7d5e5832..e72ab05af 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5,18 +5,16 @@ mod input; mod event; mod grammar; -use std::sync::Arc; use { - Token, - yellow::SyntaxNode, - syntax_kinds::* + lexer::Token, + parser::event::{process} }; -use GreenBuilder; -use parser::event::process; + +pub(crate) use self::event::Sink; /// Parse a sequence of tokens into the representative node tree -pub fn parse_green(text: String, tokens: &[Token]) -> SyntaxNode { +pub(crate) fn parse(text: String, tokens: &[Token]) -> S::Tree { let events = { let input = input::ParserInput::new(&text, tokens); let parser_impl = parser::imp::ParserImpl::new(&input); @@ -24,15 +22,7 @@ pub fn parse_green(text: String, tokens: &[Token]) -> SyntaxNode { grammar::file(&mut parser); parser.0.into_events() }; - let mut builder = GreenBuilder::new(text); - process(&mut builder, tokens, events); - let (green, errors) = builder.finish(); - SyntaxNode::new(Arc::new(green), errors) -} - -fn is_insignificant(kind: SyntaxKind) -> bool { - match kind { - WHITESPACE | COMMENT => true, - _ => false, - } + let mut sink = S::new(text); + process(&mut sink, tokens, events); + sink.finish() } diff --git a/src/parser/parser/imp.rs b/src/parser/parser/imp.rs index f2641c388..38237ac06 100644 --- a/src/parser/parser/imp.rs +++ b/src/parser/parser/imp.rs @@ -1,8 +1,7 @@ use parser::input::{InputPosition, ParserInput}; use parser::event::Event; -use SyntaxKind; -use syntax_kinds::{EOF, TOMBSTONE}; +use SyntaxKind::{self, EOF, TOMBSTONE}; /// Implementation details of `Parser`, extracted /// to a separate struct in order not to pollute diff --git a/src/parser/parser/mod.rs b/src/parser/parser/mod.rs index 58f820738..0930ff9e4 100644 --- a/src/parser/parser/mod.rs +++ b/src/parser/parser/mod.rs @@ -1,5 +1,4 @@ -use SyntaxKind; -use syntax_kinds::ERROR; +use SyntaxKind::{self, ERROR}; pub(super) mod imp; use self::imp::ParserImpl; diff --git a/src/syntax_kinds.rs b/src/syntax_kinds.rs deleted file mode 100644 index 1cc29bb61..000000000 --- a/src/syntax_kinds.rs +++ /dev/null @@ -1,326 +0,0 @@ -#![allow(bad_style, missing_docs, unreachable_pub)] -#![cfg_attr(rustfmt, rustfmt_skip)] -//! Generated from grammar.ron -use tree::SyntaxInfo; - -/// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT_DEF`. -#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub enum SyntaxKind { - ERROR, - IDENT, - UNDERSCORE, - WHITESPACE, - INT_NUMBER, - FLOAT_NUMBER, - SEMI, - COMMA, - DOT, - DOTDOT, - DOTDOTDOT, - DOTDOTEQ, - L_PAREN, - R_PAREN, - L_CURLY, - R_CURLY, - L_BRACK, - R_BRACK, - L_ANGLE, - R_ANGLE, - AT, - POUND, - TILDE, - QUESTION, - COLON, - COLONCOLON, - DOLLAR, - EQ, - EQEQ, - FAT_ARROW, - NEQ, - EXCL, - LIFETIME, - CHAR, - BYTE, - STRING, - RAW_STRING, - BYTE_STRING, - RAW_BYTE_STRING, - PLUS, - MINUS, - STAR, - SLASH, - CARET, - PERCENT, - AMPERSAND, - PIPE, - THIN_ARROW, - COMMENT, - DOC_COMMENT, - SHEBANG, - USE_KW, - FN_KW, - STRUCT_KW, - ENUM_KW, - TRAIT_KW, - IMPL_KW, - TRUE_KW, - FALSE_KW, - AS_KW, - EXTERN_KW, - CRATE_KW, - MOD_KW, - PUB_KW, - SELF_KW, - SUPER_KW, - IN_KW, - WHERE_KW, - FOR_KW, - LOOP_KW, - WHILE_KW, - IF_KW, - MATCH_KW, - CONST_KW, - STATIC_KW, - MUT_KW, - UNSAFE_KW, - TYPE_KW, - REF_KW, - LET_KW, - AUTO_KW, - DEFAULT_KW, - UNION_KW, - FILE, - STRUCT_ITEM, - ENUM_ITEM, - FN_ITEM, - EXTERN_CRATE_ITEM, - MOD_ITEM, - USE_ITEM, - STATIC_ITEM, - CONST_ITEM, - TRAIT_ITEM, - IMPL_ITEM, - TYPE_ITEM, - PAREN_TYPE, - TUPLE_TYPE, - NEVER_TYPE, - PATH_TYPE, - POINTER_TYPE, - ARRAY_TYPE, - SLICE_TYPE, - REFERENCE_TYPE, - PLACEHOLDER_TYPE, - FN_POINTER_TYPE, - FOR_TYPE, - REF_PAT, - BIND_PAT, - PLACEHOLDER_PAT, - TUPLE_EXPR, - EXTERN_BLOCK, - ENUM_VARIANT, - NAMED_FIELD, - POS_FIELD, - ATTR, - META_ITEM, - USE_TREE, - PATH, - PATH_SEGMENT, - LITERAL, - ALIAS, - VISIBILITY, - TYPE_PARAM_LIST, - WHERE_CLAUSE, - LIFETIME_PARAM, - TYPE_PARAM, - ABI, - NAME, - NAME_REF, - VALUE_PARAMETER, - BLOCK, - LET_STMT, - - // Technical SyntaxKinds: they appear temporally during parsing, - // but never end up in the final tree - #[doc(hidden)] - TOMBSTONE, - #[doc(hidden)] - EOF, -} -pub(crate) use self::SyntaxKind::*; - -impl SyntaxKind { - pub(crate) fn info(self) -> &'static SyntaxInfo { - match self { - ERROR => &SyntaxInfo { name: "ERROR" }, - IDENT => &SyntaxInfo { name: "IDENT" }, - UNDERSCORE => &SyntaxInfo { name: "UNDERSCORE" }, - WHITESPACE => &SyntaxInfo { name: "WHITESPACE" }, - INT_NUMBER => &SyntaxInfo { name: "INT_NUMBER" }, - FLOAT_NUMBER => &SyntaxInfo { name: "FLOAT_NUMBER" }, - SEMI => &SyntaxInfo { name: "SEMI" }, - COMMA => &SyntaxInfo { name: "COMMA" }, - DOT => &SyntaxInfo { name: "DOT" }, - DOTDOT => &SyntaxInfo { name: "DOTDOT" }, - DOTDOTDOT => &SyntaxInfo { name: "DOTDOTDOT" }, - DOTDOTEQ => &SyntaxInfo { name: "DOTDOTEQ" }, - L_PAREN => &SyntaxInfo { name: "L_PAREN" }, - R_PAREN => &SyntaxInfo { name: "R_PAREN" }, - L_CURLY => &SyntaxInfo { name: "L_CURLY" }, - R_CURLY => &SyntaxInfo { name: "R_CURLY" }, - L_BRACK => &SyntaxInfo { name: "L_BRACK" }, - R_BRACK => &SyntaxInfo { name: "R_BRACK" }, - L_ANGLE => &SyntaxInfo { name: "L_ANGLE" }, - R_ANGLE => &SyntaxInfo { name: "R_ANGLE" }, - AT => &SyntaxInfo { name: "AT" }, - POUND => &SyntaxInfo { name: "POUND" }, - TILDE => &SyntaxInfo { name: "TILDE" }, - QUESTION => &SyntaxInfo { name: "QUESTION" }, - COLON => &SyntaxInfo { name: "COLON" }, - COLONCOLON => &SyntaxInfo { name: "COLONCOLON" }, - DOLLAR => &SyntaxInfo { name: "DOLLAR" }, - EQ => &SyntaxInfo { name: "EQ" }, - EQEQ => &SyntaxInfo { name: "EQEQ" }, - FAT_ARROW => &SyntaxInfo { name: "FAT_ARROW" }, - NEQ => &SyntaxInfo { name: "NEQ" }, - EXCL => &SyntaxInfo { name: "EXCL" }, - LIFETIME => &SyntaxInfo { name: "LIFETIME" }, - CHAR => &SyntaxInfo { name: "CHAR" }, - BYTE => &SyntaxInfo { name: "BYTE" }, - STRING => &SyntaxInfo { name: "STRING" }, - RAW_STRING => &SyntaxInfo { name: "RAW_STRING" }, - BYTE_STRING => &SyntaxInfo { name: "BYTE_STRING" }, - RAW_BYTE_STRING => &SyntaxInfo { name: "RAW_BYTE_STRING" }, - PLUS => &SyntaxInfo { name: "PLUS" }, - MINUS => &SyntaxInfo { name: "MINUS" }, - STAR => &SyntaxInfo { name: "STAR" }, - SLASH => &SyntaxInfo { name: "SLASH" }, - CARET => &SyntaxInfo { name: "CARET" }, - PERCENT => &SyntaxInfo { name: "PERCENT" }, - AMPERSAND => &SyntaxInfo { name: "AMPERSAND" }, - PIPE => &SyntaxInfo { name: "PIPE" }, - THIN_ARROW => &SyntaxInfo { name: "THIN_ARROW" }, - COMMENT => &SyntaxInfo { name: "COMMENT" }, - DOC_COMMENT => &SyntaxInfo { name: "DOC_COMMENT" }, - SHEBANG => &SyntaxInfo { name: "SHEBANG" }, - USE_KW => &SyntaxInfo { name: "USE_KW" }, - FN_KW => &SyntaxInfo { name: "FN_KW" }, - STRUCT_KW => &SyntaxInfo { name: "STRUCT_KW" }, - ENUM_KW => &SyntaxInfo { name: "ENUM_KW" }, - TRAIT_KW => &SyntaxInfo { name: "TRAIT_KW" }, - IMPL_KW => &SyntaxInfo { name: "IMPL_KW" }, - TRUE_KW => &SyntaxInfo { name: "TRUE_KW" }, - FALSE_KW => &SyntaxInfo { name: "FALSE_KW" }, - AS_KW => &SyntaxInfo { name: "AS_KW" }, - EXTERN_KW => &SyntaxInfo { name: "EXTERN_KW" }, - CRATE_KW => &SyntaxInfo { name: "CRATE_KW" }, - MOD_KW => &SyntaxInfo { name: "MOD_KW" }, - PUB_KW => &SyntaxInfo { name: "PUB_KW" }, - SELF_KW => &SyntaxInfo { name: "SELF_KW" }, - SUPER_KW => &SyntaxInfo { name: "SUPER_KW" }, - IN_KW => &SyntaxInfo { name: "IN_KW" }, - WHERE_KW => &SyntaxInfo { name: "WHERE_KW" }, - FOR_KW => &SyntaxInfo { name: "FOR_KW" }, - LOOP_KW => &SyntaxInfo { name: "LOOP_KW" }, - WHILE_KW => &SyntaxInfo { name: "WHILE_KW" }, - IF_KW => &SyntaxInfo { name: "IF_KW" }, - MATCH_KW => &SyntaxInfo { name: "MATCH_KW" }, - CONST_KW => &SyntaxInfo { name: "CONST_KW" }, - STATIC_KW => &SyntaxInfo { name: "STATIC_KW" }, - MUT_KW => &SyntaxInfo { name: "MUT_KW" }, - UNSAFE_KW => &SyntaxInfo { name: "UNSAFE_KW" }, - TYPE_KW => &SyntaxInfo { name: "TYPE_KW" }, - REF_KW => &SyntaxInfo { name: "REF_KW" }, - LET_KW => &SyntaxInfo { name: "LET_KW" }, - AUTO_KW => &SyntaxInfo { name: "AUTO_KW" }, - DEFAULT_KW => &SyntaxInfo { name: "DEFAULT_KW" }, - UNION_KW => &SyntaxInfo { name: "UNION_KW" }, - FILE => &SyntaxInfo { name: "FILE" }, - STRUCT_ITEM => &SyntaxInfo { name: "STRUCT_ITEM" }, - ENUM_ITEM => &SyntaxInfo { name: "ENUM_ITEM" }, - FN_ITEM => &SyntaxInfo { name: "FN_ITEM" }, - EXTERN_CRATE_ITEM => &SyntaxInfo { name: "EXTERN_CRATE_ITEM" }, - MOD_ITEM => &SyntaxInfo { name: "MOD_ITEM" }, - USE_ITEM => &SyntaxInfo { name: "USE_ITEM" }, - STATIC_ITEM => &SyntaxInfo { name: "STATIC_ITEM" }, - CONST_ITEM => &SyntaxInfo { name: "CONST_ITEM" }, - TRAIT_ITEM => &SyntaxInfo { name: "TRAIT_ITEM" }, - IMPL_ITEM => &SyntaxInfo { name: "IMPL_ITEM" }, - TYPE_ITEM => &SyntaxInfo { name: "TYPE_ITEM" }, - PAREN_TYPE => &SyntaxInfo { name: "PAREN_TYPE" }, - TUPLE_TYPE => &SyntaxInfo { name: "TUPLE_TYPE" }, - NEVER_TYPE => &SyntaxInfo { name: "NEVER_TYPE" }, - PATH_TYPE => &SyntaxInfo { name: "PATH_TYPE" }, - POINTER_TYPE => &SyntaxInfo { name: "POINTER_TYPE" }, - ARRAY_TYPE => &SyntaxInfo { name: "ARRAY_TYPE" }, - SLICE_TYPE => &SyntaxInfo { name: "SLICE_TYPE" }, - REFERENCE_TYPE => &SyntaxInfo { name: "REFERENCE_TYPE" }, - PLACEHOLDER_TYPE => &SyntaxInfo { name: "PLACEHOLDER_TYPE" }, - FN_POINTER_TYPE => &SyntaxInfo { name: "FN_POINTER_TYPE" }, - FOR_TYPE => &SyntaxInfo { name: "FOR_TYPE" }, - REF_PAT => &SyntaxInfo { name: "REF_PAT" }, - BIND_PAT => &SyntaxInfo { name: "BIND_PAT" }, - PLACEHOLDER_PAT => &SyntaxInfo { name: "PLACEHOLDER_PAT" }, - TUPLE_EXPR => &SyntaxInfo { name: "TUPLE_EXPR" }, - EXTERN_BLOCK => &SyntaxInfo { name: "EXTERN_BLOCK" }, - ENUM_VARIANT => &SyntaxInfo { name: "ENUM_VARIANT" }, - NAMED_FIELD => &SyntaxInfo { name: "NAMED_FIELD" }, - POS_FIELD => &SyntaxInfo { name: "POS_FIELD" }, - ATTR => &SyntaxInfo { name: "ATTR" }, - META_ITEM => &SyntaxInfo { name: "META_ITEM" }, - USE_TREE => &SyntaxInfo { name: "USE_TREE" }, - PATH => &SyntaxInfo { name: "PATH" }, - PATH_SEGMENT => &SyntaxInfo { name: "PATH_SEGMENT" }, - LITERAL => &SyntaxInfo { name: "LITERAL" }, - ALIAS => &SyntaxInfo { name: "ALIAS" }, - VISIBILITY => &SyntaxInfo { name: "VISIBILITY" }, - TYPE_PARAM_LIST => &SyntaxInfo { name: "TYPE_PARAM_LIST" }, - WHERE_CLAUSE => &SyntaxInfo { name: "WHERE_CLAUSE" }, - LIFETIME_PARAM => &SyntaxInfo { name: "LIFETIME_PARAM" }, - TYPE_PARAM => &SyntaxInfo { name: "TYPE_PARAM" }, - ABI => &SyntaxInfo { name: "ABI" }, - NAME => &SyntaxInfo { name: "NAME" }, - NAME_REF => &SyntaxInfo { name: "NAME_REF" }, - VALUE_PARAMETER => &SyntaxInfo { name: "VALUE_PARAMETER" }, - BLOCK => &SyntaxInfo { name: "BLOCK" }, - LET_STMT => &SyntaxInfo { name: "LET_STMT" }, - - TOMBSTONE => &SyntaxInfo { name: "TOMBSTONE" }, - EOF => &SyntaxInfo { name: "EOF" }, - } - } -} - -pub(crate) fn ident_to_keyword(ident: &str) -> Option { - match ident { - "use" => Some(USE_KW), - "fn" => Some(FN_KW), - "struct" => Some(STRUCT_KW), - "enum" => Some(ENUM_KW), - "trait" => Some(TRAIT_KW), - "impl" => Some(IMPL_KW), - "true" => Some(TRUE_KW), - "false" => Some(FALSE_KW), - "as" => Some(AS_KW), - "extern" => Some(EXTERN_KW), - "crate" => Some(CRATE_KW), - "mod" => Some(MOD_KW), - "pub" => Some(PUB_KW), - "self" => Some(SELF_KW), - "super" => Some(SUPER_KW), - "in" => Some(IN_KW), - "where" => Some(WHERE_KW), - "for" => Some(FOR_KW), - "loop" => Some(LOOP_KW), - "while" => Some(WHILE_KW), - "if" => Some(IF_KW), - "match" => Some(MATCH_KW), - "const" => Some(CONST_KW), - "static" => Some(STATIC_KW), - "mut" => Some(MUT_KW), - "unsafe" => Some(UNSAFE_KW), - "type" => Some(TYPE_KW), - "ref" => Some(REF_KW), - "let" => Some(LET_KW), - _ => None, - } -} diff --git a/src/syntax_kinds/generated.rs b/src/syntax_kinds/generated.rs new file mode 100644 index 000000000..d332fd02e --- /dev/null +++ b/src/syntax_kinds/generated.rs @@ -0,0 +1,326 @@ +#![allow(bad_style, missing_docs, unreachable_pub)] +#![cfg_attr(rustfmt, rustfmt_skip)] +//! Generated from grammar.ron +use super::SyntaxInfo; + +/// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT_DEF`. +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum SyntaxKind { + ERROR, + IDENT, + UNDERSCORE, + WHITESPACE, + INT_NUMBER, + FLOAT_NUMBER, + SEMI, + COMMA, + DOT, + DOTDOT, + DOTDOTDOT, + DOTDOTEQ, + L_PAREN, + R_PAREN, + L_CURLY, + R_CURLY, + L_BRACK, + R_BRACK, + L_ANGLE, + R_ANGLE, + AT, + POUND, + TILDE, + QUESTION, + COLON, + COLONCOLON, + DOLLAR, + EQ, + EQEQ, + FAT_ARROW, + NEQ, + EXCL, + LIFETIME, + CHAR, + BYTE, + STRING, + RAW_STRING, + BYTE_STRING, + RAW_BYTE_STRING, + PLUS, + MINUS, + STAR, + SLASH, + CARET, + PERCENT, + AMPERSAND, + PIPE, + THIN_ARROW, + COMMENT, + DOC_COMMENT, + SHEBANG, + USE_KW, + FN_KW, + STRUCT_KW, + ENUM_KW, + TRAIT_KW, + IMPL_KW, + TRUE_KW, + FALSE_KW, + AS_KW, + EXTERN_KW, + CRATE_KW, + MOD_KW, + PUB_KW, + SELF_KW, + SUPER_KW, + IN_KW, + WHERE_KW, + FOR_KW, + LOOP_KW, + WHILE_KW, + IF_KW, + MATCH_KW, + CONST_KW, + STATIC_KW, + MUT_KW, + UNSAFE_KW, + TYPE_KW, + REF_KW, + LET_KW, + AUTO_KW, + DEFAULT_KW, + UNION_KW, + FILE, + STRUCT_ITEM, + ENUM_ITEM, + FN_ITEM, + EXTERN_CRATE_ITEM, + MOD_ITEM, + USE_ITEM, + STATIC_ITEM, + CONST_ITEM, + TRAIT_ITEM, + IMPL_ITEM, + TYPE_ITEM, + PAREN_TYPE, + TUPLE_TYPE, + NEVER_TYPE, + PATH_TYPE, + POINTER_TYPE, + ARRAY_TYPE, + SLICE_TYPE, + REFERENCE_TYPE, + PLACEHOLDER_TYPE, + FN_POINTER_TYPE, + FOR_TYPE, + REF_PAT, + BIND_PAT, + PLACEHOLDER_PAT, + TUPLE_EXPR, + EXTERN_BLOCK, + ENUM_VARIANT, + NAMED_FIELD, + POS_FIELD, + ATTR, + META_ITEM, + USE_TREE, + PATH, + PATH_SEGMENT, + LITERAL, + ALIAS, + VISIBILITY, + TYPE_PARAM_LIST, + WHERE_CLAUSE, + LIFETIME_PARAM, + TYPE_PARAM, + ABI, + NAME, + NAME_REF, + VALUE_PARAMETER, + BLOCK, + LET_STMT, + + // Technical SyntaxKinds: they appear temporally during parsing, + // but never end up in the final tree + #[doc(hidden)] + TOMBSTONE, + #[doc(hidden)] + EOF, +} +pub(crate) use self::SyntaxKind::*; + +impl SyntaxKind { + pub(crate) fn info(self) -> &'static SyntaxInfo { + match self { + ERROR => &SyntaxInfo { name: "ERROR" }, + IDENT => &SyntaxInfo { name: "IDENT" }, + UNDERSCORE => &SyntaxInfo { name: "UNDERSCORE" }, + WHITESPACE => &SyntaxInfo { name: "WHITESPACE" }, + INT_NUMBER => &SyntaxInfo { name: "INT_NUMBER" }, + FLOAT_NUMBER => &SyntaxInfo { name: "FLOAT_NUMBER" }, + SEMI => &SyntaxInfo { name: "SEMI" }, + COMMA => &SyntaxInfo { name: "COMMA" }, + DOT => &SyntaxInfo { name: "DOT" }, + DOTDOT => &SyntaxInfo { name: "DOTDOT" }, + DOTDOTDOT => &SyntaxInfo { name: "DOTDOTDOT" }, + DOTDOTEQ => &SyntaxInfo { name: "DOTDOTEQ" }, + L_PAREN => &SyntaxInfo { name: "L_PAREN" }, + R_PAREN => &SyntaxInfo { name: "R_PAREN" }, + L_CURLY => &SyntaxInfo { name: "L_CURLY" }, + R_CURLY => &SyntaxInfo { name: "R_CURLY" }, + L_BRACK => &SyntaxInfo { name: "L_BRACK" }, + R_BRACK => &SyntaxInfo { name: "R_BRACK" }, + L_ANGLE => &SyntaxInfo { name: "L_ANGLE" }, + R_ANGLE => &SyntaxInfo { name: "R_ANGLE" }, + AT => &SyntaxInfo { name: "AT" }, + POUND => &SyntaxInfo { name: "POUND" }, + TILDE => &SyntaxInfo { name: "TILDE" }, + QUESTION => &SyntaxInfo { name: "QUESTION" }, + COLON => &SyntaxInfo { name: "COLON" }, + COLONCOLON => &SyntaxInfo { name: "COLONCOLON" }, + DOLLAR => &SyntaxInfo { name: "DOLLAR" }, + EQ => &SyntaxInfo { name: "EQ" }, + EQEQ => &SyntaxInfo { name: "EQEQ" }, + FAT_ARROW => &SyntaxInfo { name: "FAT_ARROW" }, + NEQ => &SyntaxInfo { name: "NEQ" }, + EXCL => &SyntaxInfo { name: "EXCL" }, + LIFETIME => &SyntaxInfo { name: "LIFETIME" }, + CHAR => &SyntaxInfo { name: "CHAR" }, + BYTE => &SyntaxInfo { name: "BYTE" }, + STRING => &SyntaxInfo { name: "STRING" }, + RAW_STRING => &SyntaxInfo { name: "RAW_STRING" }, + BYTE_STRING => &SyntaxInfo { name: "BYTE_STRING" }, + RAW_BYTE_STRING => &SyntaxInfo { name: "RAW_BYTE_STRING" }, + PLUS => &SyntaxInfo { name: "PLUS" }, + MINUS => &SyntaxInfo { name: "MINUS" }, + STAR => &SyntaxInfo { name: "STAR" }, + SLASH => &SyntaxInfo { name: "SLASH" }, + CARET => &SyntaxInfo { name: "CARET" }, + PERCENT => &SyntaxInfo { name: "PERCENT" }, + AMPERSAND => &SyntaxInfo { name: "AMPERSAND" }, + PIPE => &SyntaxInfo { name: "PIPE" }, + THIN_ARROW => &SyntaxInfo { name: "THIN_ARROW" }, + COMMENT => &SyntaxInfo { name: "COMMENT" }, + DOC_COMMENT => &SyntaxInfo { name: "DOC_COMMENT" }, + SHEBANG => &SyntaxInfo { name: "SHEBANG" }, + USE_KW => &SyntaxInfo { name: "USE_KW" }, + FN_KW => &SyntaxInfo { name: "FN_KW" }, + STRUCT_KW => &SyntaxInfo { name: "STRUCT_KW" }, + ENUM_KW => &SyntaxInfo { name: "ENUM_KW" }, + TRAIT_KW => &SyntaxInfo { name: "TRAIT_KW" }, + IMPL_KW => &SyntaxInfo { name: "IMPL_KW" }, + TRUE_KW => &SyntaxInfo { name: "TRUE_KW" }, + FALSE_KW => &SyntaxInfo { name: "FALSE_KW" }, + AS_KW => &SyntaxInfo { name: "AS_KW" }, + EXTERN_KW => &SyntaxInfo { name: "EXTERN_KW" }, + CRATE_KW => &SyntaxInfo { name: "CRATE_KW" }, + MOD_KW => &SyntaxInfo { name: "MOD_KW" }, + PUB_KW => &SyntaxInfo { name: "PUB_KW" }, + SELF_KW => &SyntaxInfo { name: "SELF_KW" }, + SUPER_KW => &SyntaxInfo { name: "SUPER_KW" }, + IN_KW => &SyntaxInfo { name: "IN_KW" }, + WHERE_KW => &SyntaxInfo { name: "WHERE_KW" }, + FOR_KW => &SyntaxInfo { name: "FOR_KW" }, + LOOP_KW => &SyntaxInfo { name: "LOOP_KW" }, + WHILE_KW => &SyntaxInfo { name: "WHILE_KW" }, + IF_KW => &SyntaxInfo { name: "IF_KW" }, + MATCH_KW => &SyntaxInfo { name: "MATCH_KW" }, + CONST_KW => &SyntaxInfo { name: "CONST_KW" }, + STATIC_KW => &SyntaxInfo { name: "STATIC_KW" }, + MUT_KW => &SyntaxInfo { name: "MUT_KW" }, + UNSAFE_KW => &SyntaxInfo { name: "UNSAFE_KW" }, + TYPE_KW => &SyntaxInfo { name: "TYPE_KW" }, + REF_KW => &SyntaxInfo { name: "REF_KW" }, + LET_KW => &SyntaxInfo { name: "LET_KW" }, + AUTO_KW => &SyntaxInfo { name: "AUTO_KW" }, + DEFAULT_KW => &SyntaxInfo { name: "DEFAULT_KW" }, + UNION_KW => &SyntaxInfo { name: "UNION_KW" }, + FILE => &SyntaxInfo { name: "FILE" }, + STRUCT_ITEM => &SyntaxInfo { name: "STRUCT_ITEM" }, + ENUM_ITEM => &SyntaxInfo { name: "ENUM_ITEM" }, + FN_ITEM => &SyntaxInfo { name: "FN_ITEM" }, + EXTERN_CRATE_ITEM => &SyntaxInfo { name: "EXTERN_CRATE_ITEM" }, + MOD_ITEM => &SyntaxInfo { name: "MOD_ITEM" }, + USE_ITEM => &SyntaxInfo { name: "USE_ITEM" }, + STATIC_ITEM => &SyntaxInfo { name: "STATIC_ITEM" }, + CONST_ITEM => &SyntaxInfo { name: "CONST_ITEM" }, + TRAIT_ITEM => &SyntaxInfo { name: "TRAIT_ITEM" }, + IMPL_ITEM => &SyntaxInfo { name: "IMPL_ITEM" }, + TYPE_ITEM => &SyntaxInfo { name: "TYPE_ITEM" }, + PAREN_TYPE => &SyntaxInfo { name: "PAREN_TYPE" }, + TUPLE_TYPE => &SyntaxInfo { name: "TUPLE_TYPE" }, + NEVER_TYPE => &SyntaxInfo { name: "NEVER_TYPE" }, + PATH_TYPE => &SyntaxInfo { name: "PATH_TYPE" }, + POINTER_TYPE => &SyntaxInfo { name: "POINTER_TYPE" }, + ARRAY_TYPE => &SyntaxInfo { name: "ARRAY_TYPE" }, + SLICE_TYPE => &SyntaxInfo { name: "SLICE_TYPE" }, + REFERENCE_TYPE => &SyntaxInfo { name: "REFERENCE_TYPE" }, + PLACEHOLDER_TYPE => &SyntaxInfo { name: "PLACEHOLDER_TYPE" }, + FN_POINTER_TYPE => &SyntaxInfo { name: "FN_POINTER_TYPE" }, + FOR_TYPE => &SyntaxInfo { name: "FOR_TYPE" }, + REF_PAT => &SyntaxInfo { name: "REF_PAT" }, + BIND_PAT => &SyntaxInfo { name: "BIND_PAT" }, + PLACEHOLDER_PAT => &SyntaxInfo { name: "PLACEHOLDER_PAT" }, + TUPLE_EXPR => &SyntaxInfo { name: "TUPLE_EXPR" }, + EXTERN_BLOCK => &SyntaxInfo { name: "EXTERN_BLOCK" }, + ENUM_VARIANT => &SyntaxInfo { name: "ENUM_VARIANT" }, + NAMED_FIELD => &SyntaxInfo { name: "NAMED_FIELD" }, + POS_FIELD => &SyntaxInfo { name: "POS_FIELD" }, + ATTR => &SyntaxInfo { name: "ATTR" }, + META_ITEM => &SyntaxInfo { name: "META_ITEM" }, + USE_TREE => &SyntaxInfo { name: "USE_TREE" }, + PATH => &SyntaxInfo { name: "PATH" }, + PATH_SEGMENT => &SyntaxInfo { name: "PATH_SEGMENT" }, + LITERAL => &SyntaxInfo { name: "LITERAL" }, + ALIAS => &SyntaxInfo { name: "ALIAS" }, + VISIBILITY => &SyntaxInfo { name: "VISIBILITY" }, + TYPE_PARAM_LIST => &SyntaxInfo { name: "TYPE_PARAM_LIST" }, + WHERE_CLAUSE => &SyntaxInfo { name: "WHERE_CLAUSE" }, + LIFETIME_PARAM => &SyntaxInfo { name: "LIFETIME_PARAM" }, + TYPE_PARAM => &SyntaxInfo { name: "TYPE_PARAM" }, + ABI => &SyntaxInfo { name: "ABI" }, + NAME => &SyntaxInfo { name: "NAME" }, + NAME_REF => &SyntaxInfo { name: "NAME_REF" }, + VALUE_PARAMETER => &SyntaxInfo { name: "VALUE_PARAMETER" }, + BLOCK => &SyntaxInfo { name: "BLOCK" }, + LET_STMT => &SyntaxInfo { name: "LET_STMT" }, + + TOMBSTONE => &SyntaxInfo { name: "TOMBSTONE" }, + EOF => &SyntaxInfo { name: "EOF" }, + } + } + pub(crate) fn from_keyword(ident: &str) -> Option { + match ident { + "use" => Some(USE_KW), + "fn" => Some(FN_KW), + "struct" => Some(STRUCT_KW), + "enum" => Some(ENUM_KW), + "trait" => Some(TRAIT_KW), + "impl" => Some(IMPL_KW), + "true" => Some(TRUE_KW), + "false" => Some(FALSE_KW), + "as" => Some(AS_KW), + "extern" => Some(EXTERN_KW), + "crate" => Some(CRATE_KW), + "mod" => Some(MOD_KW), + "pub" => Some(PUB_KW), + "self" => Some(SELF_KW), + "super" => Some(SUPER_KW), + "in" => Some(IN_KW), + "where" => Some(WHERE_KW), + "for" => Some(FOR_KW), + "loop" => Some(LOOP_KW), + "while" => Some(WHILE_KW), + "if" => Some(IF_KW), + "match" => Some(MATCH_KW), + "const" => Some(CONST_KW), + "static" => Some(STATIC_KW), + "mut" => Some(MUT_KW), + "unsafe" => Some(UNSAFE_KW), + "type" => Some(TYPE_KW), + "ref" => Some(REF_KW), + "let" => Some(LET_KW), + _ => None, + } + } +} + diff --git a/src/syntax_kinds/mod.rs b/src/syntax_kinds/mod.rs new file mode 100644 index 000000000..a8e9bfe29 --- /dev/null +++ b/src/syntax_kinds/mod.rs @@ -0,0 +1,27 @@ +mod generated; + +use std::fmt; +use ::{SyntaxKind::*}; + +pub use self::generated::SyntaxKind; + +impl fmt::Debug for SyntaxKind { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let name = self.info().name; + f.write_str(name) + } +} + +pub(crate) struct SyntaxInfo { + pub name: &'static str, +} + + +impl SyntaxKind { + pub(crate) fn is_trivia(self: SyntaxKind) -> bool { + match self { + WHITESPACE | COMMENT | DOC_COMMENT => true, + _ => false, + } + } +} diff --git a/src/tree/file_builder.rs b/src/tree/file_builder.rs deleted file mode 100644 index f5d1751f9..000000000 --- a/src/tree/file_builder.rs +++ /dev/null @@ -1,87 +0,0 @@ -//! This module provides a way to construct a `File`. -//! It is intended to be completely decoupled from the -//! parser, so as to allow to evolve the tree representation -//! and the parser algorithm independently. -//! -//! The `Sink` trait is the bridge between the parser and the -//! tree builder: the parser produces a stream of events like -//! `start node`, `finish node`, and `FileBuilder` converts -//! this stream to a real tree. -use std::sync::Arc; -use { - SyntaxKind, TextRange, TextUnit, - yellow::GreenNode -}; -use SError; - -pub(crate) trait Sink { - fn leaf(&mut self, kind: SyntaxKind, len: TextUnit); - fn start_internal(&mut self, kind: SyntaxKind); - fn finish_internal(&mut self); - fn error(&mut self, err: String); -} - -pub(crate) struct GreenBuilder { - text: String, - stack: Vec, - pos: TextUnit, - root: Option, - errors: Vec, -} - -impl GreenBuilder { - pub(crate) fn new(text: String) -> GreenBuilder { - GreenBuilder { - text, - stack: Vec::new(), - pos: 0.into(), - root: None, - errors: Vec::new(), - } - } - - pub(crate) fn finish(self) -> (GreenNode, Vec) { - (self.root.unwrap(), self.errors) - } -} - -impl Sink for GreenBuilder { - fn leaf(&mut self, kind: SyntaxKind, len: TextUnit) { - let range = TextRange::offset_len(self.pos, len); - self.pos += len; - let text = self.text[range].to_owned(); - let parent = self.stack.last_mut().unwrap(); - if kind.is_trivia() { - parent.push_trivia(kind, text); - } else { - let node = GreenNode::new_leaf(kind, text); - parent.push_child(Arc::new(node)); - } - } - - fn start_internal(&mut self, kind: SyntaxKind) { - self.stack.push(GreenNode::new_branch(kind)) - } - - fn finish_internal(&mut self) { - let node = self.stack.pop().unwrap(); - if let Some(parent) = self.stack.last_mut() { - parent.push_child(Arc::new(node)) - } else { - self.root = Some(node); - } - } - - fn error(&mut self, message: String) { - self.errors.push(SError { message, offset: self.pos }) - } -} -impl SyntaxKind { - fn is_trivia(self) -> bool { - match self { - SyntaxKind::WHITESPACE | SyntaxKind::DOC_COMMENT | SyntaxKind::COMMENT => true, - _ => false - } - } -} - diff --git a/src/tree/mod.rs b/src/tree/mod.rs deleted file mode 100644 index efba82825..000000000 --- a/src/tree/mod.rs +++ /dev/null @@ -1,27 +0,0 @@ -mod file_builder; - -use ::{TextUnit}; -use std::{fmt}; -pub(crate) use self::file_builder::{Sink, GreenBuilder}; - -pub use syntax_kinds::SyntaxKind; - -impl fmt::Debug for SyntaxKind { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let name = self.info().name; - f.write_str(name) - } -} - -pub(crate) struct SyntaxInfo { - pub name: &'static str, -} - -/// A token of Rust source. -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct Token { - /// The kind of token. - pub kind: SyntaxKind, - /// The length of the token. - pub len: TextUnit, -} diff --git a/src/yellow/builder.rs b/src/yellow/builder.rs new file mode 100644 index 000000000..346d561cd --- /dev/null +++ b/src/yellow/builder.rs @@ -0,0 +1,68 @@ +use std::sync::Arc; +use { + SyntaxKind, TextRange, TextUnit, + yellow::{SyntaxNode, GreenNode, SyntaxError}, + parser::Sink +}; + +pub(crate) struct GreenBuilder { + text: String, + stack: Vec, + pos: TextUnit, + root: Option, + errors: Vec, +} + +impl GreenBuilder { + +} + +impl Sink for GreenBuilder { + type Tree = SyntaxNode; + + fn new(text: String) -> Self { + GreenBuilder { + text, + stack: Vec::new(), + pos: 0.into(), + root: None, + errors: Vec::new(), + } + } + + fn leaf(&mut self, kind: SyntaxKind, len: TextUnit) { + let range = TextRange::offset_len(self.pos, len); + self.pos += len; + let text = self.text[range].to_owned(); + let parent = self.stack.last_mut().unwrap(); + if kind.is_trivia() { + parent.push_trivia(kind, text); + } else { + let node = GreenNode::new_leaf(kind, text); + parent.push_child(Arc::new(node)); + } + } + + fn start_internal(&mut self, kind: SyntaxKind) { + self.stack.push(GreenNode::new_branch(kind)) + } + + fn finish_internal(&mut self) { + let node = self.stack.pop().unwrap(); + if let Some(parent) = self.stack.last_mut() { + parent.push_child(Arc::new(node)) + } else { + self.root = Some(node); + } + } + + fn error(&mut self, message: String) { + self.errors.push(SyntaxError { message, offset: self.pos }) + } + + fn finish(self) -> SyntaxNode { + SyntaxNode::new(Arc::new(self.root.unwrap()), self.errors) + } +} + + diff --git a/src/yellow/mod.rs b/src/yellow/mod.rs index 88d88e226..9e64d042f 100644 --- a/src/yellow/mod.rs +++ b/src/yellow/mod.rs @@ -1,6 +1,7 @@ mod green; mod red; mod syntax; +mod builder; use std::{ sync::{Arc, Weak}, @@ -9,7 +10,8 @@ use std::{ pub(crate) use self::{ green::{GreenNode, TextLen}, red::RedNode, - syntax::SError, + syntax::SyntaxError, + builder::GreenBuilder, }; pub use self::syntax::SyntaxNode; diff --git a/src/yellow/syntax.rs b/src/yellow/syntax.rs index 7b1a05cd9..78fa5bf95 100644 --- a/src/yellow/syntax.rs +++ b/src/yellow/syntax.rs @@ -4,7 +4,8 @@ use std::{ }; use { - TextRange, TextUnit, SyntaxKind, + TextRange, TextUnit, + SyntaxKind::{self, *}, yellow::{Ptr, RedNode, GreenNode, TextLen}, }; @@ -18,17 +19,17 @@ pub struct SyntaxNode { #[derive(Clone)] pub struct SyntaxRoot { red: Arc, - pub(crate) errors: Arc>, + pub(crate) errors: Arc>, } #[derive(Debug, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)] -pub(crate) struct SError { +pub(crate) struct SyntaxError { pub(crate) message: String, pub(crate) offset: TextUnit, } impl SyntaxNode { - pub(crate) fn new(root: Arc, errors: Vec) -> SyntaxNode { + pub(crate) fn new(root: Arc, errors: Vec) -> SyntaxNode { let root = Arc::new(RedNode::new_root(root)); let red = Ptr::new(&root); let root = SyntaxRoot { red: root, errors: Arc::new(errors) }; @@ -123,7 +124,6 @@ impl fmt::Debug for SyntaxNode { } fn has_short_text(kind: SyntaxKind) -> bool { - use syntax_kinds::*; match kind { IDENT | LIFETIME => true, _ => false, diff --git a/tests/parser.rs b/tests/parser.rs index 1b86fe55a..3b6670cb0 100644 --- a/tests/parser.rs +++ b/tests/parser.rs @@ -1,15 +1,14 @@ extern crate libsyntax2; extern crate testutils; -use libsyntax2::{tokenize, parse_green}; +use libsyntax2::{parse}; use libsyntax2::utils::{dump_tree_green}; use testutils::dir_tests; #[test] fn parser_tests() { dir_tests(&["parser/inline", "parser/ok", "parser/err"], |text| { - let tokens = tokenize(text); - let file = parse_green(text.to_string(), &tokens); + let file = parse(text.to_string()); dump_tree_green(&file) }) } diff --git a/tools/src/bin/gen.rs b/tools/src/bin/gen.rs index 7cb164316..e772922ba 100644 --- a/tools/src/bin/gen.rs +++ b/tools/src/bin/gen.rs @@ -36,7 +36,7 @@ impl Grammar { acc.push_str("#![allow(bad_style, missing_docs, unreachable_pub)]\n"); acc.push_str("#![cfg_attr(rustfmt, rustfmt_skip)]\n"); acc.push_str("//! Generated from grammar.ron\n"); - acc.push_str("use tree::SyntaxInfo;\n"); + acc.push_str("use super::SyntaxInfo;\n"); acc.push_str("\n"); let syntax_kinds: Vec = self.tokens @@ -82,19 +82,19 @@ impl Grammar { acc.push_str(" EOF => &SyntaxInfo { name: \"EOF\" },\n"); acc.push_str(" }\n"); acc.push_str(" }\n"); - acc.push_str("}\n"); - acc.push_str("\n"); - // fn ident_to_keyword - acc.push_str("pub(crate) fn ident_to_keyword(ident: &str) -> Option {\n"); - acc.push_str(" match ident {\n"); + // fn from_keyword + acc.push_str(" pub(crate) fn from_keyword(ident: &str) -> Option {\n"); + acc.push_str(" match ident {\n"); // NB: no contextual_keywords here! for kw in self.keywords.iter() { - write!(acc, " {:?} => Some({}),\n", kw, kw_token(kw)).unwrap(); + write!(acc, " {:?} => Some({}),\n", kw, kw_token(kw)).unwrap(); } - acc.push_str(" _ => None,\n"); + acc.push_str(" _ => None,\n"); + acc.push_str(" }\n"); acc.push_str(" }\n"); acc.push_str("}\n"); + acc.push_str("\n"); acc } } @@ -104,7 +104,7 @@ fn grammar_file() -> PathBuf { } fn generated_file() -> PathBuf { - base_dir().join("src/syntax_kinds.rs") + base_dir().join("src/syntax_kinds/generated.rs") } fn scream(word: &str) -> String { diff --git a/tools/src/bin/parse.rs b/tools/src/bin/parse.rs index 5e4dc261f..cb3414711 100644 --- a/tools/src/bin/parse.rs +++ b/tools/src/bin/parse.rs @@ -2,13 +2,12 @@ extern crate libsyntax2; use std::io::Read; -use libsyntax2::{parse_green, tokenize}; +use libsyntax2::{parse}; use libsyntax2::utils::dump_tree_green; fn main() { let text = read_input(); - let tokens = tokenize(&text); - let file = parse_green(text, &tokens); + let file = parse(text); let tree = dump_tree_green(&file); println!("{}", tree); } -- cgit v1.2.3