aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/consts.rs32
-rw-r--r--src/lex.rs33
-rw-r--r--src/lib.rs83
-rw-r--r--src/main.rs107
-rw-r--r--src/parse.rs200
-rw-r--r--src/utils.rs5
6 files changed, 317 insertions, 143 deletions
diff --git a/src/consts.rs b/src/consts.rs
index 446c341..c606a95 100644
--- a/src/consts.rs
+++ b/src/consts.rs
@@ -1,31 +1 @@
1pub const SRC: &str = include_str!("../assets/en.txt"); pub const SRC: &str = include_str!("../../assets/en.txt");
2// pub const SRC: &str = r"A
3// A (named a in the English, and most commonly ä in other languages).
4//
5// Defn: The first letter of the English and of many other alphabets.
6// The capital A of the alphabets of Middle and Western Europe, as also
7// the small letter (a), besides the forms in Italic, black letter,
8// etc., are all descended from the old Latin A, which was borrowed from
9// the Greek Alpha, of the same form; and this was made from the first
10// letter (Aleph, and itself from the Egyptian origin. The Aleph was a
11// consonant letter, with a guttural breath sound that was not an
12// element of Greek articulation; and the Greeks took it to represent
13// their vowel Alpha with the ä sound, the Phoenician alphabet having no
14// vowel symbols. This letter, in English, is used for several different
15// vowel sounds. See Guide to pronunciation, §§ 43-74. The regular long
16// a, as in fate, etc., is a comparatively modern sound, and has taken
17// the place of what, till about the early part of the 17th century, was
18// a sound of the quality of ä (as in far).
19//
20// 2. (Mus.)
21//
22// Defn: The name of the sixth tone in the model major scale (that in
23// C), or the first tone of the minor scale, which is named after it the
24// scale in A minor. The second string of the violin is tuned to the A
25// in the treble staff.
26// -- A sharp (A#) is the name of a musical tone intermediate between A
27// and B.
28// -- A flat (A) is the name of a tone intermediate between A and G.";
29//
30//
31//
diff --git a/src/lex.rs b/src/lex.rs
index 0f9a535..701009a 100644
--- a/src/lex.rs
+++ b/src/lex.rs
@@ -1,5 +1,6 @@
1use crate::utils::FromStaticStr; 1use crate::utils::FromStaticStr;
2 2
3#[derive(Debug)]
3pub enum Stanza { 4pub enum Stanza {
4 Entry(&'static str), 5 Entry(&'static str),
5 Defn(&'static str), 6 Defn(&'static str),
@@ -11,7 +12,8 @@ pub enum Stanza {
11 12
12impl Stanza { 13impl Stanza {
13 fn is_entry(s: &str) -> bool { 14 fn is_entry(s: &str) -> bool {
14 s.chars().all(|c| c.is_uppercase() || c.is_ascii_whitespace() || "-;'.".contains(c)) 15 s.chars()
16 .all(|c| c.is_uppercase() || c.is_ascii_whitespace() || "-;'.".contains(c))
15 } 17 }
16 18
17 fn is_defn(s: &str) -> bool { 19 fn is_defn(s: &str) -> bool {
@@ -27,7 +29,9 @@ impl Stanza {
27 } 29 }
28 30
29 fn is_bullet(s: &str) -> bool { 31 fn is_bullet(s: &str) -> bool {
30 s.find('.').map(|idx| s[..idx].chars().all(char::is_numeric)).unwrap_or_default() 32 s.find('.')
33 .map(|idx| s[..idx].chars().all(char::is_numeric))
34 .unwrap_or_default()
31 } 35 }
32 36
33 fn is_sub_bullet(s: &str) -> bool { 37 fn is_sub_bullet(s: &str) -> bool {
@@ -49,21 +53,23 @@ impl FromStaticStr for Stanza {
49 if let Some(first_line) = lines.next() { 53 if let Some(first_line) = lines.next() {
50 if !first_line.is_empty() { 54 if !first_line.is_empty() {
51 if Stanza::is_entry(first_line) { 55 if Stanza::is_entry(first_line) {
52 Ok(Self::Entry(s)) 56 Ok(Self::Entry(first_line.trim()))
53 } else if Stanza::is_defn(first_line) { 57 } else if Stanza::is_defn(first_line) {
54 Ok(Self::Defn(s)) 58 Ok(Self::Defn(s.strip_prefix("Defn: ").unwrap_or(s).trim()))
55 } else if Stanza::is_note(first_line) { 59 } else if Stanza::is_note(first_line) {
56 Ok(Self::Note(s)) 60 Ok(Self::Note(s.strip_prefix("Note: ").unwrap_or(s).trim()))
57 } else if Stanza::is_synonym(first_line) { 61 } else if Stanza::is_synonym(first_line) {
58 Ok(Self::Synonym(s)) 62 Ok(Self::Synonym(s.strip_prefix("Syn.").unwrap_or(s)))
59 } else if Stanza::is_bullet(first_line) { 63 } else if Stanza::is_bullet(first_line) {
60 Ok(Self::Bullet(s)) 64 Ok(Self::Defn(
61 } else if Stanza::is_sub_bullet(first_line) { 65 s.trim_start_matches(|c| "0123456789. ".contains(c)),
62 Ok(Self::SubBullet(s)) 66 ))
67 // } else if Stanza::is_sub_bullet(first_line) {
68 // Ok(Self::SubBullet(s))
63 } else { 69 } else {
64 Err(Self::Err { 70 Err(Self::Err {
65 data: format!("weird stanza: {}", s), 71 data: format!("weird stanza: {}", s),
66 }) 72 })
67 } 73 }
68 } else { 74 } else {
69 Err(Self::Err { 75 Err(Self::Err {
@@ -78,3 +84,6 @@ impl FromStaticStr for Stanza {
78 } 84 }
79} 85}
80 86
87pub fn lex(src: &'static str) -> impl Iterator<Item = Result<Stanza, StanzaLexError>> {
88 src.split("\n\n").map(Stanza::from_str)
89}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..1324f70
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,83 @@
1pub mod consts;
2pub mod lex;
3pub mod parse;
4mod utils;
5
6use std::fmt;
7
8use radix_trie::{Trie, TrieCommon};
9
10pub struct Dict {
11 inner: Trie<DictKey, DictValue>,
12}
13
14impl Dict {
15 fn new() -> Self {
16 Self { inner: Trie::new() }
17 }
18
19 fn insert(&mut self, entry: DictKey, value: DictValue) {
20 self.inner.map_with_default(
21 entry,
22 |dict_value| {
23 // TODO: this only merges defns, not notes/syns
24 for v in value.defn.iter() {
25 dict_value.defn.push(v);
26 }
27 },
28 value.clone(),
29 );
30 }
31
32 pub fn search<'dict, 'search>(
33 &'dict self,
34 search_term: &'search str,
35 ) -> SearchResults<'dict, 'search> {
36 self.inner
37 .subtrie(search_term)
38 .map_or(SearchResults::Empty, |subtrie| {
39 SearchResults::Hit(subtrie.iter())
40 })
41 }
42}
43
44pub enum SearchResults<'dict, 'search> {
45 Empty,
46 Hit(radix_trie::iter::Iter<'dict, &'search str, DictValue>),
47}
48
49impl<'dict, 'search> SearchResults<'dict, 'search> {
50 // mutable ref here to advance the iterator present in Self::Hit
51 pub fn print(&mut self) {
52 match self {
53 Self::Hit(results) => {
54 while let Some((key, value)) = results.next() {
55 if value.defn.len() > 1 {
56 for (def, idx) in value.defn.iter().zip(1..) {
57 println!("{}({}) {}", key, idx, def.replace('\n', " "));
58 }
59 } else {
60 println!("{} {}", key, value.defn[0].replace('\n', " "));
61 }
62
63 // if let Some(note) = value.note {
64 // print!("\t{}", note);
65 // }
66 // if let Some(synonym) = value.synonym {
67 // print!("\t{}", synonym);
68 // }
69 }
70 }
71 Self::Empty => (),
72 }
73 }
74}
75
76type DictKey = &'static str;
77
78#[derive(Clone)]
79pub struct DictValue {
80 defn: Vec<&'static str>,
81 note: Option<&'static str>,
82 synonym: Option<&'static str>,
83}
diff --git a/src/main.rs b/src/main.rs
index e6d997d..9def90c 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,101 +1,14 @@
1mod consts; 1use dict::{consts::SRC, lex, parse::ParseState};
2mod utils;
3mod lex;
4
5use consts::SRC;
6use lex::{Stanza, StanzaLexError};
7use utils::FromStaticStr;
8 2
9fn main() { 3fn main() {
10 let mut count = 0; 4 let Some(search_term) = std::env::args().skip(1).next() else {
11 let mut parse_state = ParseState::Ready; 5 eprintln!("usage: dict <search-term>");
12 let mut current_entry = EntryBuilder::new(); 6 return;
13 let mut dict = Dictionary {
14 entries: vec![],
15 }; 7 };
16 8 lex::lex(SRC)
17 for l in SRC.split("\n\n") { 9 .filter_map(Result::ok)
18 count += 1; 10 .fold(ParseState::new(), ParseState::advance)
19 let stanza = match Stanza::from_str(l) { 11 .finish()
20 Ok(s) => { 12 .search(search_term.to_ascii_uppercase().as_str())
21 println!("{count} ok"); 13 .print()
22 s
23 },
24 Err(StanzaLexError { data }) => {
25 eprintln!("stanza err: {data}\n\n");
26 continue;
27 },
28 };
29 match stanza {
30 Stanza::Entry(s) if parse_state == ParseState::Ready => {
31 current_entry.set_name(s);
32 parse_state = ParseState::InEntry;
33 }
34 Stanza::Defn(d) if parse_state == ParseState::InEntry => {
35 current_entry.set_defn(d);
36
37 match current_entry.build() {
38 Ok(e) => dict.entries.push(e),
39 Err(_) => eprintln!("failed to build entry"),
40 }
41
42 parse_state = ParseState::Ready;
43 }
44 _ => ()
45 }
46 }
47 dbg!(dict.entries.iter().find(|entry| entry.name.to_ascii_lowercase().starts_with("discursive")));
48}
49
50#[derive(PartialEq, Eq, PartialOrd, Ord)]
51enum ParseState {
52 Ready,
53 InEntry
54}
55
56struct Dictionary {
57 entries: Vec<Entry>
58}
59
60#[derive(Debug)]
61struct Entry {
62 name: &'static str,
63 defn: Option<&'static str>,
64 note: Option<&'static str>,
65 synonym: Option<&'static str>,
66}
67
68#[derive(Default)]
69struct EntryBuilder {
70 name: Option<&'static str>,
71 defn: Option<&'static str>,
72 note: Option<&'static str>,
73 synonym: Option<&'static str>,
74}
75
76enum EntryBuilderError {
77 MissingField(&'static str)
78}
79
80impl EntryBuilder {
81 fn new() -> Self {
82 Self::default()
83 }
84
85 fn set_name(&mut self, name: &'static str) {
86 self.name = Some(name);
87 }
88
89 fn set_defn(&mut self, defn: &'static str) {
90 self.defn = Some(defn);
91 }
92
93 fn build(&self) -> Result<Entry, EntryBuilderError> {
94 Ok(Entry {
95 name: self.name.ok_or(EntryBuilderError::MissingField("name"))?,
96 defn: self.defn,
97 note: self.note,
98 synonym: self.synonym,
99 })
100 }
101} 14}
diff --git a/src/parse.rs b/src/parse.rs
new file mode 100644
index 0000000..5b613ca
--- /dev/null
+++ b/src/parse.rs
@@ -0,0 +1,200 @@
1use crate::{lex::Stanza, Dict};
2
3pub struct ParseState {
4 dict: Dict,
5 status: Status,
6 current_entry: EntryBuilder,
7 errors: Vec<ParseError>,
8}
9
10#[derive(Debug)]
11enum ParseError {
12 Build(EntryBuilderError),
13 UndefinedState(Status, EntryBuilder, Stanza),
14}
15
16impl ParseState {
17 pub fn new() -> Self {
18 Self {
19 dict: Dict::new(),
20 status: Status::Start,
21 current_entry: EntryBuilder::new(),
22 errors: Vec::new(),
23 }
24 }
25
26 pub fn advance(mut self, stanza: Stanza) -> Self {
27 match (self.status, stanza) {
28 (Status::Start, Stanza::Entry(e)) => {
29 self.current_entry.set_name(e);
30 self.status = Status::ContainsName;
31 }
32 (Status::ContainsName, Stanza::Defn(d)) => {
33 self.current_entry.push_defn(d);
34 self.status = Status::ContainsOneDefn;
35 }
36 (Status::ContainsOneDefn | Status::ContainsMulDefn, Stanza::Defn(d)) => {
37 self.current_entry.push_defn(d);
38 self.status = Status::ContainsMulDefn;
39 }
40 (
41 Status::ContainsOneDefn | Status::ContainsMulDefn | Status::ContainsSynonym,
42 Stanza::Note(n),
43 ) => {
44 self.current_entry.set_note(n);
45 self.status = Status::ContainsNote;
46 }
47 (
48 Status::ContainsOneDefn | Status::ContainsMulDefn | Status::ContainsNote,
49 Stanza::Synonym(s),
50 ) => {
51 self.current_entry.set_synonym(s);
52 self.status = Status::ContainsSynonym;
53 }
54 (
55 Status::ContainsOneDefn
56 | Status::ContainsMulDefn
57 | Status::ContainsNote
58 | Status::ContainsSynonym,
59 Stanza::Entry(e),
60 ) => {
61 // flush the current entry
62 match self.current_entry.build() {
63 Ok(entry) => self.dict.insert(entry.name, entry.into()),
64 Err(b) => self.register_error(ParseError::Build(b)),
65 };
66
67 // begin with the new one
68 self.current_entry.clear();
69 self.current_entry.set_name(e);
70 self.status = Status::ContainsName;
71 }
72 (Status::ContainsName, Stanza::Entry(e)) => {
73 // dump unfinished entry and enter new entry
74 self.current_entry.clear();
75 self.current_entry.set_name(e);
76 self.status = Status::ContainsName;
77 }
78 (_, new_entry) => {
79 // any other states means our parser is entering undefined teritorry
80 // register an error if we have anything in current_entry
81 self.register_undefined_state_error(new_entry);
82 // and set the status to Start and fast forward to the next entry
83 self.current_entry.clear();
84 self.status = Status::Start;
85 }
86 }
87 self
88 }
89
90 fn register_error(&mut self, error: ParseError) {
91 self.errors.push(error)
92 }
93
94 fn register_undefined_state_error(&mut self, new_entry: Stanza) {
95 self.register_error(ParseError::UndefinedState(
96 self.status,
97 self.current_entry.clone(),
98 new_entry,
99 ));
100 }
101
102 pub fn finish(self) -> Dict {
103 self.dict
104 }
105
106 pub fn dump(&self) {
107 for err in &self.errors {
108 eprintln!("{err:?}");
109 }
110 }
111}
112
113#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
114enum Status {
115 // ready to accept a new entry
116 Start,
117 // ready to accept a defn
118 ContainsName,
119 // can accept notes or synonyms, or flush this entry
120 ContainsOneDefn,
121 // can accept notes or synonyms, or flush this entry
122 ContainsMulDefn,
123 // can accept a synonym
124 ContainsNote,
125 // can accept a note
126 ContainsSynonym,
127 // mangled stanza, skip until the next entry occurs
128}
129
130#[derive(Debug, Clone)]
131struct Entry {
132 name: &'static str,
133 defn: Vec<&'static str>,
134 note: Option<&'static str>,
135 synonym: Option<&'static str>,
136}
137
138impl From<Entry> for crate::DictValue {
139 fn from(entry: Entry) -> Self {
140 Self {
141 defn: entry.defn,
142 note: entry.note,
143 synonym: entry.synonym,
144 }
145 }
146}
147
148#[derive(Debug, Default, Clone)]
149struct EntryBuilder {
150 name: Option<&'static str>,
151 defn: Vec<&'static str>,
152 note: Option<&'static str>,
153 synonym: Option<&'static str>,
154}
155
156#[derive(Debug)]
157enum EntryBuilderError {
158 MissingField(&'static str),
159}
160
161impl EntryBuilder {
162 fn new() -> Self {
163 Self::default()
164 }
165
166 fn clear(&mut self) {
167 *self = Self::default();
168 }
169
170 fn set_name(&mut self, name: &'static str) {
171 self.name = Some(name);
172 }
173
174 fn push_defn(&mut self, defn: &'static str) {
175 self.defn.push(defn);
176 }
177
178 fn set_note(&mut self, note: &'static str) {
179 self.note = Some(note);
180 }
181
182 fn set_synonym(&mut self, synonym: &'static str) {
183 self.synonym = Some(synonym);
184 }
185
186 fn build(&self) -> Result<Entry, EntryBuilderError> {
187 let name = self.name.ok_or(EntryBuilderError::MissingField("name"))?;
188 let defn = if self.defn.is_empty() {
189 return Err(EntryBuilderError::MissingField("defn"));
190 } else {
191 self.defn.clone()
192 };
193 Ok(Entry {
194 name,
195 defn,
196 note: self.note,
197 synonym: self.synonym,
198 })
199 }
200}
diff --git a/src/utils.rs b/src/utils.rs
index 23fff7e..c53e564 100644
--- a/src/utils.rs
+++ b/src/utils.rs
@@ -1,7 +1,6 @@
1
2pub trait FromStaticStr { 1pub trait FromStaticStr {
3 type Err; 2 type Err;
4 fn from_str(s: &'static str) -> Result<Self, Self::Err> 3 fn from_str(s: &'static str) -> Result<Self, Self::Err>
5 where Self: Sized; 4 where
5 Self: Sized;
6} 6}
7