aboutsummaryrefslogtreecommitdiff
path: root/src/lex.rs
blob: 701009ae738e734642aa137aadbeb897a5c334a2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
use crate::utils::FromStaticStr;

#[derive(Debug)]
pub enum Stanza {
    Entry(&'static str),
    Defn(&'static str),
    Note(&'static str),
    Synonym(&'static str),
    Bullet(&'static str),
    SubBullet(&'static str),
}

impl Stanza {
    fn is_entry(s: &str) -> bool {
        s.chars()
            .all(|c| c.is_uppercase() || c.is_ascii_whitespace() || "-;'.".contains(c))
    }

    fn is_defn(s: &str) -> bool {
        s.starts_with("Defn")
    }

    fn is_note(s: &str) -> bool {
        s.starts_with("Note")
    }

    fn is_synonym(s: &str) -> bool {
        s.starts_with("Syn")
    }

    fn is_bullet(s: &str) -> bool {
        s.find('.')
            .map(|idx| s[..idx].chars().all(char::is_numeric))
            .unwrap_or_default()
    }

    fn is_sub_bullet(s: &str) -> bool {
        let mut chars = s.chars();
        chars.next().map(|c| c == '(').unwrap_or_default()
            && chars.next().map(char::is_alphabetic).unwrap_or_default()
            && chars.next().map(|c| c == ')').unwrap_or_default()
    }
}

pub struct StanzaLexError {
    pub data: String,
}

impl FromStaticStr for Stanza {
    type Err = StanzaLexError;
    fn from_str(s: &'static str) -> Result<Self, Self::Err> {
        let mut lines = s.split("\n");
        if let Some(first_line) = lines.next() {
            if !first_line.is_empty() {
                if Stanza::is_entry(first_line) {
                    Ok(Self::Entry(first_line.trim()))
                } else if Stanza::is_defn(first_line) {
                    Ok(Self::Defn(s.strip_prefix("Defn: ").unwrap_or(s).trim()))
                } else if Stanza::is_note(first_line) {
                    Ok(Self::Note(s.strip_prefix("Note: ").unwrap_or(s).trim()))
                } else if Stanza::is_synonym(first_line) {
                    Ok(Self::Synonym(s.strip_prefix("Syn.").unwrap_or(s)))
                } else if Stanza::is_bullet(first_line) {
                    Ok(Self::Defn(
                        s.trim_start_matches(|c| "0123456789. ".contains(c)),
                    ))
                // } else if Stanza::is_sub_bullet(first_line) {
                //     Ok(Self::SubBullet(s))
                } else {
                    Err(Self::Err {
                        data: format!("weird stanza: {}", s),
                    })
                }
            } else {
                Err(Self::Err {
                    data: format!("empty first line: {}", s),
                })
            }
        } else {
            Err(Self::Err {
                data: format!("empty stanza: {}", s),
            })
        }
    }
}

pub fn lex(src: &'static str) -> impl Iterator<Item = Result<Stanza, StanzaLexError>> {
    src.split("\n\n").map(Stanza::from_str)
}