From ae79d5a8b0166d43d3ff48aa593db6038b40410b Mon Sep 17 00:00:00 2001
From: Akshay <nerdy@peppe.rs>
Date: Fri, 19 Mar 2021 21:57:34 +0530
Subject: begin work on scripting lisp

---
 src/lisp/error.rs  |   4 +
 src/lisp/lex.rs    | 303 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/lisp/mod.rs    |  46 ++++++++
 src/lisp/number.rs |  70 +++++++++++++
 4 files changed, 423 insertions(+)
 create mode 100644 src/lisp/error.rs
 create mode 100644 src/lisp/lex.rs
 create mode 100644 src/lisp/mod.rs
 create mode 100644 src/lisp/number.rs

(limited to 'src/lisp')

diff --git a/src/lisp/error.rs b/src/lisp/error.rs
new file mode 100644
index 0000000..cde1e31
--- /dev/null
+++ b/src/lisp/error.rs
@@ -0,0 +1,4 @@
+#[derive(Debug, PartialEq, Copy, Clone)]
+pub enum LispError {
+    ParseError,
+}
diff --git a/src/lisp/lex.rs b/src/lisp/lex.rs
new file mode 100644
index 0000000..a1bea5f
--- /dev/null
+++ b/src/lisp/lex.rs
@@ -0,0 +1,303 @@
+use std::{fmt, str::CharIndices};
+
+use super::error::LispError;
+
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+pub enum Token<'a> {
+    LeftParen,
+    RightParen,
+    Comment(&'a str),
+    Float(&'a str),
+    Integer(&'a str),
+    Char(&'a str),
+    String(&'a str),
+    Name(&'a str),
+    Keyword(&'a str),
+    BackQuote,
+    Comma,
+    CommaAt,
+    Quote,
+    End,
+}
+
+impl<'a> fmt::Display for Token<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Token::LeftParen => write!(f, "("),
+            Token::RightParen => write!(f, ")"),
+            Token::Comment(_) => write!(f, "comment"),
+            Token::Float(_) => write!(f, "float"),
+            Token::Integer(_) => write!(f, "integer"),
+            Token::Char(_) => write!(f, "char"),
+            Token::String(_) => write!(f, "string"),
+            Token::Name(_) => write!(f, "name"),
+            Token::Keyword(_) => write!(f, "keyword"),
+            Token::BackQuote => write!(f, "`"),
+            Token::Comma => write!(f, ","),
+            Token::CommaAt => write!(f, ",@"),
+            Token::Quote => write!(f, "'"),
+            Token::End => write!(f, "EOF"),
+        }
+    }
+}
+
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub struct Span {
+    low: u32,
+    high: u32,
+}
+
+impl Span {
+    fn empty(pos: u32) -> Self {
+        Self {
+            low: pos,
+            high: pos,
+        }
+    }
+}
+
+pub struct Lexer<'input> {
+    input: &'input str,
+    cur_pos: u32,
+    offset: u32,
+}
+
+impl<'a> Lexer<'a> {
+    pub fn new(input: &'a str, offset: u32) -> Self {
+        Self {
+            input,
+            cur_pos: 0,
+            offset,
+        }
+    }
+    pub fn next_token(&mut self) -> Result<(Span, Token<'a>), LispError> {
+        let mut chars = self.input.char_indices();
+
+        while let Some((ind, chr)) = chars.next() {
+            let low = self.cur_pos;
+            let res = match chr {
+                '(' => Ok((1, Token::LeftParen)),
+                ')' => Ok((1, Token::RightParen)),
+                '\'' => Ok((1, Token::Quote)),
+                '`' => Ok((1, Token::BackQuote)),
+                ',' => match chars.next() {
+                    Some((_, '@')) => Ok((2, Token::CommaAt)),
+                    _ => Ok((1, Token::Comma)),
+                },
+                '#' => parse_name(&self.input[ind..]),
+                '-' | '0'..='9' => parse_number(&self.input[ind..]),
+                '"' => parse_string(&self.input[ind..]),
+                ';' => {
+                    self.cur_pos += consume_comment(ind, &mut chars) as u32;
+                    continue;
+                }
+                _ if is_ident(chr) => parse_name(&self.input[ind..]),
+                ch if ch.is_whitespace() => {
+                    self.cur_pos += ch.len_utf8() as u32;
+                    continue;
+                }
+                ch => {
+                    eprintln!("some unexpected character: {}", ch);
+                    Err(LispError::ParseError)
+                }
+            };
+            let (size, token) = match res {
+                Ok(v) => v,
+                Err(_) => return Err(LispError::ParseError),
+            };
+            self.cur_pos += size as u32;
+            self.input = &self.input[ind + size..];
+            let sp = Span {
+                low,
+                high: low + size as u32,
+            };
+            return Ok((sp, token));
+        }
+        self.input = &self.input[..0];
+        return Ok((Span::empty(self.cur_pos), Token::End));
+    }
+}
+
+fn parse_number<'a>(mut input: &'a str) -> Result<(usize, Token<'a>), LispError> {
+    let mut dot = false;
+    let mut size = 0;
+    let mut chars = input.chars();
+
+    if let Some(v) = chars.next() {
+        if v == '-' {
+            size += 1;
+            input = &input[1..];
+        } else if v.is_digit(10) {
+            size += 1;
+        }
+    }
+
+    while let Some(chr) = chars.next() {
+        if chr.is_digit(10) {
+            size += 1;
+        } else if chr == '.' {
+            if !dot {
+                dot = true;
+                size += 1;
+            } else {
+                return Err(LispError::ParseError);
+            }
+        } else if !is_ident(chr) {
+            break;
+        } else {
+            return Err(LispError::ParseError);
+        }
+    }
+    let tok = if dot {
+        Token::Float(&input[..size])
+    } else {
+        Token::Integer(&input[..size])
+    };
+    return Ok((size, tok));
+}
+
+fn parse_string<'a>(input: &'a str) -> Result<(usize, Token<'a>), LispError> {
+    // count opening quote
+    let mut size = 1;
+    let mut chars = input.char_indices().skip(1);
+    while let Some((ind, chr)) = chars.next() {
+        match chr {
+            '\\' => {
+                let _ = chars.next();
+            }
+            '"' => {
+                size += ind;
+                break;
+            }
+            _ => (),
+        }
+    }
+    return Ok((size, Token::String(&input[..size])));
+}
+
+fn is_ident(ch: char) -> bool {
+    match ch {
+        '!' | '$' | '%' | '&' | '*' | '+' | '-' | '.' | '/' | '<' | '=' | '>' | '?' | '^' | '_'
+        | '|' | '#' => true,
+        _ if ch.is_alphanumeric() => true,
+        _ => false,
+    }
+}
+
+fn consume_comment(start: usize, chars: &mut CharIndices) -> usize {
+    let mut last = start;
+
+    for (ind, ch) in chars {
+        last = ind;
+        if ch == '\n' {
+            break;
+        }
+    }
+
+    last - start + 1
+}
+
+fn parse_name<'a>(input: &'a str) -> Result<(usize, Token<'a>), LispError> {
+    for (ind, chr) in input.char_indices() {
+        if !is_ident(chr) {
+            return Ok((ind, Token::Name(&input[..ind])));
+        }
+    }
+    return Ok((input.len(), Token::Name(input)));
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn sp(low: u32, high: u32) -> Span {
+        Span { low, high }
+    }
+
+    fn tokens(input: &str) -> Vec<(Span, Token)> {
+        let mut lexer = Lexer::new(input, 0);
+        let mut tokens = Vec::new();
+        loop {
+            match lexer.next_token().unwrap() {
+                (_, Token::End) => break,
+                t => tokens.push(t),
+            }
+        }
+        tokens
+    }
+
+    #[test]
+    fn string_parsing() {
+        let input = r#""hello there""#;
+        let parsed = parse_string(input).unwrap();
+        assert_eq!(parsed.0, 13);
+        assert_eq!(parsed.1, Token::String(r#""hello there""#));
+    }
+
+    #[test]
+    fn integer_parsing() {
+        let input = "12345";
+        let parsed = parse_number(input).unwrap();
+        assert_eq!(parsed.0, 5);
+        assert_eq!(parsed.1, Token::Integer("12345"));
+    }
+
+    #[test]
+    fn float_parsing() {
+        let input = "12.345";
+        let parsed = parse_number(input).unwrap();
+        assert_eq!(parsed.0, 6);
+        assert_eq!(parsed.1, Token::Float("12.345"));
+    }
+
+    #[test]
+    fn lexer() {
+        assert_eq!(
+            tokens("1 2 3"),
+            [
+                (sp(0, 1), Token::Integer("1")),
+                (sp(2, 3), Token::Integer("2")),
+                (sp(4, 5), Token::Integer("3"))
+            ]
+        );
+
+        assert_eq!(
+            tokens("1 foo 3"),
+            [
+                (sp(0, 1), Token::Integer("1")),
+                (sp(2, 5), Token::Name("foo")),
+                (sp(6, 7), Token::Integer("3"))
+            ]
+        );
+
+        assert_eq!(tokens("foo"), [(sp(0, 3), Token::Name("foo")),]);
+        assert_eq!(tokens("#t"), [(sp(0, 2), Token::Name("#t")),]);
+
+        assert_eq!(
+            tokens("1 \"foo\" 3"),
+            [
+                (sp(0, 1), Token::Integer("1")),
+                (sp(2, 7), Token::String(r#""foo""#)),
+                (sp(8, 9), Token::Integer("3"))
+            ]
+        );
+
+        assert_eq!(
+            tokens("(* 1 (+ 2 3))"),
+            [
+                (sp(0, 1), Token::LeftParen),
+                (sp(1, 2), Token::Name("*")),
+                (sp(3, 4), Token::Integer("1")),
+                (sp(5, 6), Token::LeftParen),
+                (sp(6, 7), Token::Name("+")),
+                (sp(8, 9), Token::Integer("2")),
+                (sp(10, 11), Token::Integer("3")),
+                (sp(11, 12), Token::RightParen),
+                (sp(12, 13), Token::RightParen),
+            ]
+        );
+
+        assert_eq!(tokens("; foo"), []);
+        assert_eq!(tokens("1; foo"), [(sp(0, 1), Token::Integer("1"))]);
+    }
+}
diff --git a/src/lisp/mod.rs b/src/lisp/mod.rs
new file mode 100644
index 0000000..5d8965f
--- /dev/null
+++ b/src/lisp/mod.rs
@@ -0,0 +1,46 @@
+use std::fmt;
+
+use number::LispNumber;
+
+mod error;
+mod lex;
+mod number;
+
+#[derive(Debug, PartialEq)]
+pub enum LispExpr {
+    Number(LispNumber),
+    List(Vec<LispExpr>),
+    StringLit(String),
+    BoolLit(bool),
+    Ident(String),
+    Function(LispFunction),
+}
+
+impl fmt::Display for LispExpr {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            LispExpr::Number(n) => write!(f, "{}", n)?,
+            LispExpr::List(l) => {
+                for expr in l.iter() {
+                    write!(f, " {} ", expr)?
+                }
+            }
+            LispExpr::StringLit(s) => write!(f, "{:?}", s)?,
+            LispExpr::BoolLit(b) => {
+                if *b {
+                    write!(f, "#t")?
+                } else {
+                    write!(f, "#f")?
+                }
+            }
+            LispExpr::Ident(s) => write!(f, "{}", s)?,
+            LispExpr::Function(_) => write!(f, "<#procedure>")?,
+        };
+        Ok(())
+    }
+}
+
+pub type Environment = Vec<(String, LispExpr)>;
+
+#[derive(Debug, PartialEq)]
+struct LispFunction {}
diff --git a/src/lisp/number.rs b/src/lisp/number.rs
new file mode 100644
index 0000000..ddadbe6
--- /dev/null
+++ b/src/lisp/number.rs
@@ -0,0 +1,70 @@
+use std::{
+    fmt::*,
+    ops::{Add, Div, Mul, Sub},
+};
+
+#[derive(Debug, Copy, Clone)]
+pub enum LispNumber {
+    Integer(i64),
+    Float(f64),
+}
+
+impl Add for LispNumber {
+    type Output = Self;
+    fn add(self, rhs: Self) -> Self::Output {
+        use LispNumber::*;
+        match (self, rhs) {
+            (Integer(a), Integer(b)) => Integer(a + b),
+            (Float(a), Integer(b)) => Float(a + b as f64),
+            (Integer(a), Float(b)) => Float(a as f64 + b),
+            (Float(a), Float(b)) => Float(a + b),
+        }
+    }
+}
+
+impl Sub for LispNumber {
+    type Output = Self;
+    fn sub(self, rhs: Self) -> Self::Output {
+        use LispNumber::*;
+        match (self, rhs) {
+            (Integer(a), Integer(b)) => Integer(a - b),
+            (Float(a), Integer(b)) => Float(a - b as f64),
+            (Integer(a), Float(b)) => Float(a as f64 - b),
+            (Float(a), Float(b)) => Float(a - b),
+        }
+    }
+}
+
+impl Mul for LispNumber {
+    type Output = Self;
+    fn mul(self, rhs: Self) -> Self::Output {
+        use LispNumber::*;
+        match (self, rhs) {
+            (Integer(a), Integer(b)) => Integer(a * b),
+            (Float(a), Integer(b)) => Float(a * b as f64),
+            (Integer(a), Float(b)) => Float(a as f64 * b),
+            (Float(a), Float(b)) => Float(a * b),
+        }
+    }
+}
+
+impl PartialEq for LispNumber {
+    fn eq(&self, other: &Self) -> bool {
+        use LispNumber::*;
+        match (self, other) {
+            (Integer(a), Integer(b)) => *a == *b,
+            (Float(a), Integer(b)) => *a == *b as f64,
+            (Integer(a), Float(b)) => *a as f64 == *b,
+            (Float(a), Float(b)) => *a == *b,
+        }
+    }
+}
+
+impl Display for LispNumber {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        match self {
+            LispNumber::Integer(v) => write!(f, "{}", v),
+            LispNumber::Float(v) => write!(f, "{}", v),
+        }
+    }
+}
-- 
cgit v1.2.3