From 3b42ddae601fbd73f672e82028e04c3abdf1252d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adolfo=20Ochagav=C3=ADa?= Date: Sun, 4 Nov 2018 16:45:22 +0100 Subject: Introduce SyntaxErrorKind and TextRange in SyntaxError --- crates/ra_syntax/src/parser_impl/event.rs | 11 +++- crates/ra_syntax/src/parser_impl/mod.rs | 12 +++- crates/ra_syntax/src/reparsing.rs | 12 ++-- crates/ra_syntax/src/string_lexing/mod.rs | 10 --- crates/ra_syntax/src/utils.rs | 8 +-- crates/ra_syntax/src/validation.rs | 99 ++++++++++++++++++++++------- crates/ra_syntax/src/yellow/builder.rs | 11 ++-- crates/ra_syntax/src/yellow/mod.rs | 10 +-- crates/ra_syntax/src/yellow/syntax_error.rs | 42 ++++++++++++ 9 files changed, 153 insertions(+), 62 deletions(-) create mode 100644 crates/ra_syntax/src/yellow/syntax_error.rs (limited to 'crates/ra_syntax') diff --git a/crates/ra_syntax/src/parser_impl/event.rs b/crates/ra_syntax/src/parser_impl/event.rs index 79fa21389..ced09bcff 100644 --- a/crates/ra_syntax/src/parser_impl/event.rs +++ b/crates/ra_syntax/src/parser_impl/event.rs @@ -13,6 +13,10 @@ use crate::{ SmolStr, SyntaxKind::{self, *}, TextRange, TextUnit, + yellow::syntax_error::{ + ParseError, + SyntaxErrorKind, + }, }; use std::mem; @@ -75,7 +79,7 @@ pub(crate) enum Event { }, Error { - msg: String, + msg: ParseError, }, } @@ -157,7 +161,10 @@ impl<'a, S: Sink> EventProcessor<'a, S> { .sum::(); self.leaf(kind, len, n_raw_tokens); } - Event::Error { msg } => self.sink.error(msg, self.text_pos), + Event::Error { msg } => self.sink.error( + SyntaxErrorKind::ParseError(msg), + TextRange::offset_len(self.text_pos, 1.into()), + ), } } self.sink diff --git a/crates/ra_syntax/src/parser_impl/mod.rs b/crates/ra_syntax/src/parser_impl/mod.rs index 2b026d61e..ade25770b 100644 --- a/crates/ra_syntax/src/parser_impl/mod.rs +++ b/crates/ra_syntax/src/parser_impl/mod.rs @@ -10,7 +10,11 @@ use crate::{ event::{Event, EventProcessor}, input::{InputPosition, ParserInput}, }, - SmolStr, TextUnit, + SmolStr, TextRange, + yellow::syntax_error::{ + ParseError, + SyntaxErrorKind, + }, }; use crate::SyntaxKind::{self, EOF, TOMBSTONE}; @@ -21,7 +25,7 @@ pub(crate) trait Sink { fn leaf(&mut self, kind: SyntaxKind, text: SmolStr); fn start_internal(&mut self, kind: SyntaxKind); fn finish_internal(&mut self); - fn error(&mut self, message: String, offset: TextUnit); + fn error(&mut self, kind: SyntaxErrorKind, offset: TextRange); fn finish(self) -> Self::Tree; } @@ -144,7 +148,9 @@ impl<'t> ParserImpl<'t> { } pub(super) fn error(&mut self, msg: String) { - self.event(Event::Error { msg }) + self.event(Event::Error { + msg: ParseError(msg), + }) } pub(super) fn complete(&mut self, pos: u32, kind: SyntaxKind) { diff --git a/crates/ra_syntax/src/reparsing.rs b/crates/ra_syntax/src/reparsing.rs index b3b51b3e4..9f5baf1ef 100644 --- a/crates/ra_syntax/src/reparsing.rs +++ b/crates/ra_syntax/src/reparsing.rs @@ -165,19 +165,19 @@ fn merge_errors( ) -> Vec { let mut res = Vec::new(); for e in old_errors { - if e.offset <= old_node.range().start() { + if e.range.start() <= old_node.range().start() { res.push(e) - } else if e.offset >= old_node.range().end() { + } else if e.range.start() >= old_node.range().end() { res.push(SyntaxError { - msg: e.msg, - offset: e.offset + TextUnit::of_str(&edit.insert) - edit.delete.len(), + kind: e.kind, + range: e.range + TextUnit::of_str(&edit.insert) - edit.delete.len(), }) } } for e in new_errors { res.push(SyntaxError { - msg: e.msg, - offset: e.offset + old_node.range().start(), + kind: e.kind, + range: e.range + old_node.range().start(), }) } res diff --git a/crates/ra_syntax/src/string_lexing/mod.rs b/crates/ra_syntax/src/string_lexing/mod.rs index 6b52c62c3..f0812ff28 100644 --- a/crates/ra_syntax/src/string_lexing/mod.rs +++ b/crates/ra_syntax/src/string_lexing/mod.rs @@ -100,10 +100,6 @@ impl<'a> Parser<'a> { // Char parsing methods fn parse_unicode_escape(&mut self, start: TextUnit) -> CharComponent { - // Note: validation of UnicodeEscape will be done elsewhere: - // * Only hex digits or underscores allowed - // * Max 6 chars - // * Within allowed range (must be at most 10FFFF) match self.peek() { Some('{') => { self.advance(); @@ -127,9 +123,6 @@ impl<'a> Parser<'a> { } fn parse_ascii_code_escape(&mut self, start: TextUnit) -> CharComponent { - // Note: validation of AsciiCodeEscape will be done elsewhere: - // * First digit is octal - // * Second digit is hex let code_start = self.get_pos(); while let Some(next) = self.peek() { if next == '\'' || (self.get_pos() - code_start == 2.into()) { @@ -144,9 +137,6 @@ impl<'a> Parser<'a> { } fn parse_escape(&mut self, start: TextUnit) -> CharComponent { - // Note: validation of AsciiEscape will be done elsewhere: - // * The escape sequence is non-empty - // * The escape sequence is valid if self.peek().is_none() { return CharComponent::new(TextRange::from_to(start, start), AsciiEscape); } diff --git a/crates/ra_syntax/src/utils.rs b/crates/ra_syntax/src/utils.rs index 00f00139a..f55568d94 100644 --- a/crates/ra_syntax/src/utils.rs +++ b/crates/ra_syntax/src/utils.rs @@ -4,7 +4,7 @@ use std::fmt::Write; /// Parse a file and create a string representation of the resulting parse tree. pub fn dump_tree(syntax: SyntaxNodeRef) -> String { let mut errors: Vec<_> = syntax.root_data().to_vec(); - errors.sort_by_key(|e| e.offset); + errors.sort_by_key(|e| e.range.start()); let mut err_pos = 0; let mut level = 0; let mut buf = String::new(); @@ -23,9 +23,9 @@ pub fn dump_tree(syntax: SyntaxNodeRef) -> String { writeln!(buf, "{:?}", node).unwrap(); if node.first_child().is_none() { let off = node.range().end(); - while err_pos < errors.len() && errors[err_pos].offset <= off { + while err_pos < errors.len() && errors[err_pos].range.start() <= off { indent!(); - writeln!(buf, "err: `{}`", errors[err_pos].msg).unwrap(); + writeln!(buf, "err: `{}`", errors[err_pos].kind).unwrap(); err_pos += 1; } } @@ -37,7 +37,7 @@ pub fn dump_tree(syntax: SyntaxNodeRef) -> String { assert_eq!(level, 0); for err in errors[err_pos..].iter() { - writeln!(buf, "err: `{}`", err.msg).unwrap(); + writeln!(buf, "err: `{}`", err.kind).unwrap(); } buf diff --git a/crates/ra_syntax/src/validation.rs b/crates/ra_syntax/src/validation.rs index 03d98eff4..06e6e7505 100644 --- a/crates/ra_syntax/src/validation.rs +++ b/crates/ra_syntax/src/validation.rs @@ -1,40 +1,93 @@ use crate::{ + algo::visit::{visitor_ctx, VisitorCtx}, ast::{self, AstNode}, File, - string_lexing, + string_lexing::{self, CharComponentKind}, yellow::{ SyntaxError, + SyntaxErrorKind::*, }, }; pub(crate) fn validate(file: &File) -> Vec { let mut errors = Vec::new(); - for d in file.root.borrowed().descendants() { - if let Some(c) = ast::Char::cast(d) { - let components = &mut string_lexing::parse_char_literal(c.text()); - let len = components.count(); + for node in file.root.borrowed().descendants() { + let _ = visitor_ctx(&mut errors) + .visit::(validate_char) + .accept(node); + } + errors +} - if !components.has_closing_quote { - errors.push(SyntaxError { - msg: "Unclosed char literal".to_string(), - offset: d.range().start(), - }); - } +fn validate_char(node: ast::Char, errors: &mut Vec) { + let mut components = string_lexing::parse_char_literal(node.text()); + let mut len = 0; + for component in &mut components { + len += 1; - if len == 0 { - errors.push(SyntaxError { - msg: "Empty char literal".to_string(), - offset: d.range().start(), - }); + // Validate escapes + let text = &node.text()[component.range]; + let range = component.range + node.syntax().range().start(); + use self::CharComponentKind::*; + match component.kind { + AsciiEscape => { + if text.len() == 1 { + // Escape sequence consists only of leading `\` + errors.push(SyntaxError { + kind: EmptyAsciiEscape, + range: range, + }); + } else { + let escape_code = text.chars().skip(1).next().unwrap(); + if !is_ascii_escape(escape_code) { + errors.push(SyntaxError { + kind: InvalidAsciiEscape, + range: range, + }); + } + } } - - if len > 1 { - errors.push(SyntaxError { - msg: "Character literal should be only one character long".to_string(), - offset: d.range().start(), - }); + AsciiCodeEscape => { + // TODO: + // * First digit is octal + // * Second digit is hex + } + UnicodeEscape => { + // TODO: + // * Only hex digits or underscores allowed + // * Max 6 chars + // * Within allowed range (must be at most 10FFFF) } + // Code points are always valid + CodePoint => (), } } - errors + + if !components.has_closing_quote { + errors.push(SyntaxError { + kind: UnclosedChar, + range: node.syntax().range(), + }); + } + + if len == 0 { + errors.push(SyntaxError { + kind: EmptyChar, + range: node.syntax().range(), + }); + } + + if len > 1 { + errors.push(SyntaxError { + kind: LongChar, + range: node.syntax().range(), + }); + } +} + +fn is_ascii_escape(code: char) -> bool { + match code { + '\'' | '"' | 'n' | 'r' | 't' | '0' => true, + _ => false, + } } diff --git a/crates/ra_syntax/src/yellow/builder.rs b/crates/ra_syntax/src/yellow/builder.rs index d64053409..dbe2df125 100644 --- a/crates/ra_syntax/src/yellow/builder.rs +++ b/crates/ra_syntax/src/yellow/builder.rs @@ -1,7 +1,7 @@ use crate::{ parser_impl::Sink, - yellow::{GreenNode, RaTypes, SyntaxError}, - SmolStr, SyntaxKind, TextUnit, + yellow::{GreenNode, RaTypes, SyntaxError, SyntaxErrorKind}, + SmolStr, SyntaxKind, TextRange, }; use rowan::GreenNodeBuilder; @@ -34,11 +34,8 @@ impl Sink for GreenBuilder { self.inner.finish_internal(); } - fn error(&mut self, message: String, offset: TextUnit) { - let error = SyntaxError { - msg: message, - offset, - }; + fn error(&mut self, kind: SyntaxErrorKind, range: TextRange) { + let error = SyntaxError { kind, range }; self.errors.push(error) } diff --git a/crates/ra_syntax/src/yellow/mod.rs b/crates/ra_syntax/src/yellow/mod.rs index 650917214..fd2b5bd33 100644 --- a/crates/ra_syntax/src/yellow/mod.rs +++ b/crates/ra_syntax/src/yellow/mod.rs @@ -1,8 +1,9 @@ mod builder; +pub mod syntax_error; mod syntax_text; use self::syntax_text::SyntaxText; -use crate::{SmolStr, SyntaxKind, TextRange, TextUnit}; +use crate::{SmolStr, SyntaxKind, TextRange}; use rowan::Types; use std::{ fmt, @@ -10,6 +11,7 @@ use std::{ }; pub(crate) use self::builder::GreenBuilder; +pub use self::syntax_error::{SyntaxError, SyntaxErrorKind}; pub use rowan::{TreeRoot, WalkEvent}; #[derive(Debug, Clone, Copy)] @@ -24,12 +26,6 @@ pub type RefRoot<'a> = ::rowan::RefRoot<'a, RaTypes>; pub type GreenNode = ::rowan::GreenNode; -#[derive(Debug, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)] -pub struct SyntaxError { - pub msg: String, - pub offset: TextUnit, -} - #[derive(Clone, Copy)] pub struct SyntaxNode = OwnedRoot>(::rowan::SyntaxNode); pub type SyntaxNodeRef<'a> = SyntaxNode>; diff --git a/crates/ra_syntax/src/yellow/syntax_error.rs b/crates/ra_syntax/src/yellow/syntax_error.rs new file mode 100644 index 000000000..e8c818dc6 --- /dev/null +++ b/crates/ra_syntax/src/yellow/syntax_error.rs @@ -0,0 +1,42 @@ +use std::fmt; + +use crate::TextRange; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct SyntaxError { + pub kind: SyntaxErrorKind, + pub range: TextRange, +} + +impl SyntaxError { + pub fn new(kind: SyntaxErrorKind, range: TextRange) -> SyntaxError { + SyntaxError { kind, range } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum SyntaxErrorKind { + ParseError(ParseError), + EmptyChar, + UnclosedChar, + LongChar, + EmptyAsciiEscape, + InvalidAsciiEscape, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct ParseError(pub String); + +impl fmt::Display for SyntaxErrorKind { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use self::SyntaxErrorKind::*; + match self { + EmptyAsciiEscape => write!(f, "Empty escape sequence"), + InvalidAsciiEscape => write!(f, "Invalid escape sequence"), + EmptyChar => write!(f, "Empty char literal"), + UnclosedChar => write!(f, "Unclosed char literal"), + LongChar => write!(f, "Char literal should be one character long"), + ParseError(msg) => write!(f, "{}", msg.0), + } + } +} -- cgit v1.2.3