From 5e1e8ed34a46738dda507a4a0f4e73065be74e57 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 29 Dec 2017 00:56:36 +0300 Subject: Lexer scaffold --- src/lexer.rs | 10 ++++++++++ src/lib.rs | 15 ++++++++------- src/syntax_kinds.rs | 16 ++++++++++++++++ src/text.rs | 32 ++++++++++++++++++++++++++++++++ src/tree.rs | 31 +++++++++++++++++++++++++++++++ tests/lexer.rs | 24 ++++++++++++++++++++---- 6 files changed, 117 insertions(+), 11 deletions(-) create mode 100644 src/lexer.rs create mode 100644 src/syntax_kinds.rs create mode 100644 src/text.rs create mode 100644 src/tree.rs diff --git a/src/lexer.rs b/src/lexer.rs new file mode 100644 index 000000000..cda9fe2b2 --- /dev/null +++ b/src/lexer.rs @@ -0,0 +1,10 @@ +use {Token, TextUnit}; +use syntax_kinds::*; + +pub fn next_token(text: &str) -> Token { + let c = text.chars().next().unwrap(); + Token { + kind: IDENT, + len: TextUnit::len_of_char(c), + } +} \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 31e1bb209..4385c0325 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,8 @@ -#[cfg(test)] -mod tests { - #[test] - fn it_works() { - assert_eq!(2 + 2, 4); - } -} +mod text; +mod tree; +mod lexer; + +pub mod syntax_kinds; +pub use text::TextUnit; +pub use tree::{SyntaxKind, Token}; +pub use lexer::next_token; diff --git a/src/syntax_kinds.rs b/src/syntax_kinds.rs new file mode 100644 index 000000000..18574b7f5 --- /dev/null +++ b/src/syntax_kinds.rs @@ -0,0 +1,16 @@ +use tree::{SyntaxKind, SyntaxInfo}; + +pub const IDENT: SyntaxKind = SyntaxKind(1); +pub const WHITESPACE: SyntaxKind = SyntaxKind(2); + + +static IDENT_INFO: SyntaxInfo = SyntaxInfo { + name: "IDENT", +}; + +pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo { + match kind { + IDENT => &IDENT_INFO, + _ => unreachable!(), + } +} \ No newline at end of file diff --git a/src/text.rs b/src/text.rs new file mode 100644 index 000000000..5297275ed --- /dev/null +++ b/src/text.rs @@ -0,0 +1,32 @@ +use std::fmt; + +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct TextUnit(u32); + +impl TextUnit { + pub fn len_of_char(c: char) -> TextUnit { + TextUnit(c.len_utf8() as u32) + } + + pub fn new(val: u32) -> TextUnit { + TextUnit(val) + } +} + +impl fmt::Debug for TextUnit { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + ::fmt(self, f) + } +} + +impl fmt::Display for TextUnit { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.0.fmt(f) + } +} + +impl From for u32 { + fn from(tu: TextUnit) -> u32 { + tu.0 + } +} diff --git a/src/tree.rs b/src/tree.rs new file mode 100644 index 000000000..0924f38d0 --- /dev/null +++ b/src/tree.rs @@ -0,0 +1,31 @@ +use text::{TextUnit}; +use syntax_kinds::syntax_info; + +use std::fmt; + +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct SyntaxKind(pub(crate) u32); + +impl SyntaxKind { + fn info(self) -> &'static SyntaxInfo { + syntax_info(self) + } +} + +impl fmt::Debug for SyntaxKind { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let name = self.info().name; + f.write_str(name) + } +} + + +pub(crate) struct SyntaxInfo { + pub name: &'static str, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Token { + pub kind: SyntaxKind, + pub len: TextUnit, +} \ No newline at end of file diff --git a/tests/lexer.rs b/tests/lexer.rs index de76f0a15..a27e7c395 100644 --- a/tests/lexer.rs +++ b/tests/lexer.rs @@ -1,9 +1,13 @@ extern crate file; #[macro_use(assert_diff)] extern crate difference; +extern crate libsyntax2; use std::path::{PathBuf, Path}; use std::fs::read_dir; +use std::fmt::Write; + +use libsyntax2::{Token, next_token}; #[test] fn lexer_tests() { @@ -46,10 +50,22 @@ fn lexer_test_case(path: &Path) { ) } -fn tokenize(text: &str) -> Vec<()> { - Vec::new() +fn tokenize(text: &str) -> Vec { + let mut text = text; + let mut acc = Vec::new(); + while !text.is_empty() { + let token = next_token(text); + acc.push(token); + let len: u32 = token.len.into(); + text = &text[len as usize..]; + } + acc } -fn dump_tokens(tokens: &[()]) -> String { - "IDENT 5\nKEYWORD 1\nIDENT 5\n".to_string() +fn dump_tokens(tokens: &[Token]) -> String { + let mut acc = String::new(); + for token in tokens { + write!(acc, "{:?} {}\n", token.kind, token.len).unwrap() + } + acc } \ No newline at end of file -- cgit v1.2.3