From 9ce4db545efba697f20ab5cecbefc0589c7146ca Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 31 Dec 2017 17:54:33 +0300 Subject: Parser: groundwork --- src/lexer/mod.rs | 11 +++++++++ src/lib.rs | 4 +-- src/parser/mod.rs | 5 ++++ src/text.rs | 59 ++++++++++++++++++++++++++++++++++++++++++++ src/tree.rs | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 5 files changed, 149 insertions(+), 4 deletions(-) create mode 100644 src/parser/mod.rs (limited to 'src') diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index f46746bee..7c4259763 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -16,6 +16,17 @@ use self::strings::{is_string_literal_start, scan_char, scan_byte_char_or_string mod comments; use self::comments::{scan_shebang, scan_comment}; +pub fn tokenize(text: &str) -> Vec { + let mut text = text; + let mut acc = Vec::new(); + while !text.is_empty() { + let token = next_token(text); + acc.push(token); + let len: u32 = token.len.into(); + text = &text[len as usize..]; + } + acc +} pub fn next_token(text: &str) -> Token { assert!(!text.is_empty()); let mut ptr = Ptr::new(text); diff --git a/src/lib.rs b/src/lib.rs index 3b9dbc8f7..82213e2b3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,6 +5,6 @@ mod tree; mod lexer; pub mod syntax_kinds; -pub use text::TextUnit; +pub use text::{TextUnit, TextRange}; pub use tree::{SyntaxKind, Token}; -pub use lexer::next_token; +pub use lexer::{next_token, tokenize}; diff --git a/src/parser/mod.rs b/src/parser/mod.rs new file mode 100644 index 000000000..da902e2b7 --- /dev/null +++ b/src/parser/mod.rs @@ -0,0 +1,5 @@ +use {Token, File}; + +pub fn parse(tokens: &[Token]) -> File { + unimplemented!() +} \ No newline at end of file diff --git a/src/text.rs b/src/text.rs index c3ef1ac8e..ee0dc8398 100644 --- a/src/text.rs +++ b/src/text.rs @@ -56,4 +56,63 @@ impl ops::SubAssign for TextUnit { fn sub_assign(&mut self, rhs: TextUnit) { self.0 -= rhs.0 } +} + + +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct TextRange { + start: TextUnit, + end: TextUnit, +} + +impl fmt::Debug for TextRange { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + ::fmt(self, f) + } +} + +impl fmt::Display for TextRange { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "[{}; {})", self.start(), self.end()) + } +} + + +impl TextRange { + pub fn empty() -> TextRange { + TextRange::from_to(TextUnit::new(0), TextUnit::new(0)) + } + + pub fn from_to(from: TextUnit, to: TextUnit) -> TextRange { + assert!(from <= to, "Invalid text range [{}; {})", from, to); + TextRange { start: from, end: to } + } + + pub fn from_len(from: TextUnit, len: TextUnit) -> TextRange { + TextRange::from_to(from, from + len) + } + + pub fn start(&self) -> TextUnit { + self.start + } + + pub fn end(&self) -> TextUnit { + self.end + } + + pub fn len(&self) -> TextUnit { + self.end - self.start + } + + pub fn is_empty(&self) -> bool { + self.start() == self.end() + } +} + +impl ops::Index for str { + type Output = str; + + fn index(&self, index: TextRange) -> &str { + &self[index.start().0 as usize..index.end().0 as usize] + } } \ No newline at end of file diff --git a/src/tree.rs b/src/tree.rs index 0924f38d0..2ac25e795 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -1,4 +1,4 @@ -use text::{TextUnit}; +use text::{TextUnit, TextRange}; use syntax_kinds::syntax_info; use std::fmt; @@ -28,4 +28,74 @@ pub(crate) struct SyntaxInfo { pub struct Token { pub kind: SyntaxKind, pub len: TextUnit, -} \ No newline at end of file +} + +pub struct File { + text: String, + nodes: Vec, +} + +impl File { + pub fn root<'f>(&'f self) -> Node<'f> { + assert!(!self.nodes.is_empty()); + Node { file: self, idx: NodeIdx(0) } + } +} + +#[derive(Clone, Copy)] +pub struct Node<'f> { + file: &'f File, + idx: NodeIdx, +} + +impl<'f> Node<'f> { + pub fn kind(&self) -> SyntaxKind { + self.data().kind + } + + pub fn text(&self) -> &'f str { + let range = self.data().range; + &self.file.text.as_str()[range] + } + + pub fn parent(&self) -> Option> { + self.as_node(self.data().parent) + } + + pub fn children(&self) -> Children<'f> { + Children { next: self.as_node(self.data().first_child) } + } + + fn data(&self) -> &'f NodeData { + &self.file.nodes[self.idx.0 as usize] + } + + fn as_node(&self, idx: Option) -> Option> { + idx.map(|idx| Node { file: self.file, idx }) + } +} + +pub struct Children<'f> { + next: Option>, +} + +impl<'f> Iterator for Children<'f> { + type Item = Node<'f>; + + fn next(&mut self) -> Option> { + let next = self.next; + self.next = next.and_then(|node| node.as_node(node.data().next_sibling)); + next + } +} + +#[derive(Clone, Copy)] +struct NodeIdx(u32); + +struct NodeData { + kind: SyntaxKind, + range: TextRange, + parent: Option, + first_child: Option, + next_sibling: Option, +} -- cgit v1.2.3