diff options
author | Akshay <[email protected]> | 2023-03-30 17:39:49 +0100 |
---|---|---|
committer | Akshay <[email protected]> | 2023-03-30 17:39:49 +0100 |
commit | d315cce8e99ec6d96695bea708ae315028f3db66 (patch) | |
tree | c4cdf0331788cb1ec742d2dd39d6325d0557f06a |
init
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | Cargo.lock | 7 | ||||
-rw-r--r-- | Cargo.toml | 8 | ||||
-rw-r--r-- | flake.lock | 24 | ||||
-rw-r--r-- | flake.nix | 27 | ||||
-rw-r--r-- | src/consts.rs | 31 | ||||
-rw-r--r-- | src/lex.rs | 80 | ||||
-rw-r--r-- | src/main.rs | 101 | ||||
-rw-r--r-- | src/utils.rs | 7 |
9 files changed, 287 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2d5df85 --- /dev/null +++ b/.gitignore | |||
@@ -0,0 +1,2 @@ | |||
1 | /target | ||
2 | .direnv | ||
diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..06c3f88 --- /dev/null +++ b/Cargo.lock | |||
@@ -0,0 +1,7 @@ | |||
1 | # This file is automatically @generated by Cargo. | ||
2 | # It is not intended for manual editing. | ||
3 | version = 3 | ||
4 | |||
5 | [[package]] | ||
6 | name = "dict" | ||
7 | version = "0.1.0" | ||
diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..d2c0dc4 --- /dev/null +++ b/Cargo.toml | |||
@@ -0,0 +1,8 @@ | |||
1 | [package] | ||
2 | name = "dict" | ||
3 | version = "0.1.0" | ||
4 | edition = "2021" | ||
5 | |||
6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | ||
7 | |||
8 | [dependencies] | ||
diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..f231e8c --- /dev/null +++ b/flake.lock | |||
@@ -0,0 +1,24 @@ | |||
1 | { | ||
2 | "nodes": { | ||
3 | "nixpkgs": { | ||
4 | "locked": { | ||
5 | "lastModified": 1677852945, | ||
6 | "narHash": "sha256-liiVJjkBTuBTAkRW3hrI8MbPD2ImYzwUpa7kvteiKhM=", | ||
7 | "path": "/nix/store/cgfz9cycn82cwhvpaskq80bfw0k711gq-source", | ||
8 | "rev": "f5ffd5787786dde3a8bf648c7a1b5f78c4e01abb", | ||
9 | "type": "path" | ||
10 | }, | ||
11 | "original": { | ||
12 | "id": "nixpkgs", | ||
13 | "type": "indirect" | ||
14 | } | ||
15 | }, | ||
16 | "root": { | ||
17 | "inputs": { | ||
18 | "nixpkgs": "nixpkgs" | ||
19 | } | ||
20 | } | ||
21 | }, | ||
22 | "root": "root", | ||
23 | "version": 7 | ||
24 | } | ||
diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..a615408 --- /dev/null +++ b/flake.nix | |||
@@ -0,0 +1,27 @@ | |||
1 | { | ||
2 | description = "A very basic flake"; | ||
3 | |||
4 | outputs = { self, nixpkgs }: | ||
5 | let | ||
6 | supportedSystems = [ "x86_64-linux" ]; | ||
7 | forAllSystems = nixpkgs.lib.genAttrs supportedSystems; | ||
8 | nixpkgsFor = forAllSystems (system: import nixpkgs { inherit system; }); | ||
9 | in | ||
10 | { | ||
11 | |||
12 | devShell = forAllSystems (system: | ||
13 | let | ||
14 | pkgs = nixpkgsFor."${system}"; | ||
15 | in | ||
16 | pkgs.mkShell { | ||
17 | nativeBuildInputs = [ | ||
18 | pkgs.rustc | ||
19 | pkgs.cargo | ||
20 | pkgs.rust-analyzer | ||
21 | pkgs.cargo-watch | ||
22 | ]; | ||
23 | RUST_BACKTRACE = 1; | ||
24 | } | ||
25 | ); | ||
26 | }; | ||
27 | } | ||
diff --git a/src/consts.rs b/src/consts.rs new file mode 100644 index 0000000..446c341 --- /dev/null +++ b/src/consts.rs | |||
@@ -0,0 +1,31 @@ | |||
1 | pub const SRC: &str = include_str!("../assets/en.txt"); | ||
2 | // pub const SRC: &str = r"A | ||
3 | // A (named a in the English, and most commonly ä in other languages). | ||
4 | // | ||
5 | // Defn: The first letter of the English and of many other alphabets. | ||
6 | // The capital A of the alphabets of Middle and Western Europe, as also | ||
7 | // the small letter (a), besides the forms in Italic, black letter, | ||
8 | // etc., are all descended from the old Latin A, which was borrowed from | ||
9 | // the Greek Alpha, of the same form; and this was made from the first | ||
10 | // letter (Aleph, and itself from the Egyptian origin. The Aleph was a | ||
11 | // consonant letter, with a guttural breath sound that was not an | ||
12 | // element of Greek articulation; and the Greeks took it to represent | ||
13 | // their vowel Alpha with the ä sound, the Phoenician alphabet having no | ||
14 | // vowel symbols. This letter, in English, is used for several different | ||
15 | // vowel sounds. See Guide to pronunciation, §§ 43-74. The regular long | ||
16 | // a, as in fate, etc., is a comparatively modern sound, and has taken | ||
17 | // the place of what, till about the early part of the 17th century, was | ||
18 | // a sound of the quality of ä (as in far). | ||
19 | // | ||
20 | // 2. (Mus.) | ||
21 | // | ||
22 | // Defn: The name of the sixth tone in the model major scale (that in | ||
23 | // C), or the first tone of the minor scale, which is named after it the | ||
24 | // scale in A minor. The second string of the violin is tuned to the A | ||
25 | // in the treble staff. | ||
26 | // -- A sharp (A#) is the name of a musical tone intermediate between A | ||
27 | // and B. | ||
28 | // -- A flat (A) is the name of a tone intermediate between A and G."; | ||
29 | // | ||
30 | // | ||
31 | // | ||
diff --git a/src/lex.rs b/src/lex.rs new file mode 100644 index 0000000..0f9a535 --- /dev/null +++ b/src/lex.rs | |||
@@ -0,0 +1,80 @@ | |||
1 | use crate::utils::FromStaticStr; | ||
2 | |||
3 | pub enum Stanza { | ||
4 | Entry(&'static str), | ||
5 | Defn(&'static str), | ||
6 | Note(&'static str), | ||
7 | Synonym(&'static str), | ||
8 | Bullet(&'static str), | ||
9 | SubBullet(&'static str), | ||
10 | } | ||
11 | |||
12 | impl Stanza { | ||
13 | fn is_entry(s: &str) -> bool { | ||
14 | s.chars().all(|c| c.is_uppercase() || c.is_ascii_whitespace() || "-;'.".contains(c)) | ||
15 | } | ||
16 | |||
17 | fn is_defn(s: &str) -> bool { | ||
18 | s.starts_with("Defn") | ||
19 | } | ||
20 | |||
21 | fn is_note(s: &str) -> bool { | ||
22 | s.starts_with("Note") | ||
23 | } | ||
24 | |||
25 | fn is_synonym(s: &str) -> bool { | ||
26 | s.starts_with("Syn") | ||
27 | } | ||
28 | |||
29 | fn is_bullet(s: &str) -> bool { | ||
30 | s.find('.').map(|idx| s[..idx].chars().all(char::is_numeric)).unwrap_or_default() | ||
31 | } | ||
32 | |||
33 | fn is_sub_bullet(s: &str) -> bool { | ||
34 | let mut chars = s.chars(); | ||
35 | chars.next().map(|c| c == '(').unwrap_or_default() | ||
36 | && chars.next().map(char::is_alphabetic).unwrap_or_default() | ||
37 | && chars.next().map(|c| c == ')').unwrap_or_default() | ||
38 | } | ||
39 | } | ||
40 | |||
41 | pub struct StanzaLexError { | ||
42 | pub data: String, | ||
43 | } | ||
44 | |||
45 | impl FromStaticStr for Stanza { | ||
46 | type Err = StanzaLexError; | ||
47 | fn from_str(s: &'static str) -> Result<Self, Self::Err> { | ||
48 | let mut lines = s.split("\n"); | ||
49 | if let Some(first_line) = lines.next() { | ||
50 | if !first_line.is_empty() { | ||
51 | if Stanza::is_entry(first_line) { | ||
52 | Ok(Self::Entry(s)) | ||
53 | } else if Stanza::is_defn(first_line) { | ||
54 | Ok(Self::Defn(s)) | ||
55 | } else if Stanza::is_note(first_line) { | ||
56 | Ok(Self::Note(s)) | ||
57 | } else if Stanza::is_synonym(first_line) { | ||
58 | Ok(Self::Synonym(s)) | ||
59 | } else if Stanza::is_bullet(first_line) { | ||
60 | Ok(Self::Bullet(s)) | ||
61 | } else if Stanza::is_sub_bullet(first_line) { | ||
62 | Ok(Self::SubBullet(s)) | ||
63 | } else { | ||
64 | Err(Self::Err { | ||
65 | data: format!("weird stanza: {}", s), | ||
66 | }) | ||
67 | } | ||
68 | } else { | ||
69 | Err(Self::Err { | ||
70 | data: format!("empty first line: {}", s), | ||
71 | }) | ||
72 | } | ||
73 | } else { | ||
74 | Err(Self::Err { | ||
75 | data: format!("empty stanza: {}", s), | ||
76 | }) | ||
77 | } | ||
78 | } | ||
79 | } | ||
80 | |||
diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..e6d997d --- /dev/null +++ b/src/main.rs | |||
@@ -0,0 +1,101 @@ | |||
1 | mod consts; | ||
2 | mod utils; | ||
3 | mod lex; | ||
4 | |||
5 | use consts::SRC; | ||
6 | use lex::{Stanza, StanzaLexError}; | ||
7 | use utils::FromStaticStr; | ||
8 | |||
9 | fn main() { | ||
10 | let mut count = 0; | ||
11 | let mut parse_state = ParseState::Ready; | ||
12 | let mut current_entry = EntryBuilder::new(); | ||
13 | let mut dict = Dictionary { | ||
14 | entries: vec![], | ||
15 | }; | ||
16 | |||
17 | for l in SRC.split("\n\n") { | ||
18 | count += 1; | ||
19 | let stanza = match Stanza::from_str(l) { | ||
20 | Ok(s) => { | ||
21 | println!("{count} ok"); | ||
22 | s | ||
23 | }, | ||
24 | Err(StanzaLexError { data }) => { | ||
25 | eprintln!("stanza err: {data}\n\n"); | ||
26 | continue; | ||
27 | }, | ||
28 | }; | ||
29 | match stanza { | ||
30 | Stanza::Entry(s) if parse_state == ParseState::Ready => { | ||
31 | current_entry.set_name(s); | ||
32 | parse_state = ParseState::InEntry; | ||
33 | } | ||
34 | Stanza::Defn(d) if parse_state == ParseState::InEntry => { | ||
35 | current_entry.set_defn(d); | ||
36 | |||
37 | match current_entry.build() { | ||
38 | Ok(e) => dict.entries.push(e), | ||
39 | Err(_) => eprintln!("failed to build entry"), | ||
40 | } | ||
41 | |||
42 | parse_state = ParseState::Ready; | ||
43 | } | ||
44 | _ => () | ||
45 | } | ||
46 | } | ||
47 | dbg!(dict.entries.iter().find(|entry| entry.name.to_ascii_lowercase().starts_with("discursive"))); | ||
48 | } | ||
49 | |||
50 | #[derive(PartialEq, Eq, PartialOrd, Ord)] | ||
51 | enum ParseState { | ||
52 | Ready, | ||
53 | InEntry | ||
54 | } | ||
55 | |||
56 | struct Dictionary { | ||
57 | entries: Vec<Entry> | ||
58 | } | ||
59 | |||
60 | #[derive(Debug)] | ||
61 | struct Entry { | ||
62 | name: &'static str, | ||
63 | defn: Option<&'static str>, | ||
64 | note: Option<&'static str>, | ||
65 | synonym: Option<&'static str>, | ||
66 | } | ||
67 | |||
68 | #[derive(Default)] | ||
69 | struct EntryBuilder { | ||
70 | name: Option<&'static str>, | ||
71 | defn: Option<&'static str>, | ||
72 | note: Option<&'static str>, | ||
73 | synonym: Option<&'static str>, | ||
74 | } | ||
75 | |||
76 | enum EntryBuilderError { | ||
77 | MissingField(&'static str) | ||
78 | } | ||
79 | |||
80 | impl EntryBuilder { | ||
81 | fn new() -> Self { | ||
82 | Self::default() | ||
83 | } | ||
84 | |||
85 | fn set_name(&mut self, name: &'static str) { | ||
86 | self.name = Some(name); | ||
87 | } | ||
88 | |||
89 | fn set_defn(&mut self, defn: &'static str) { | ||
90 | self.defn = Some(defn); | ||
91 | } | ||
92 | |||
93 | fn build(&self) -> Result<Entry, EntryBuilderError> { | ||
94 | Ok(Entry { | ||
95 | name: self.name.ok_or(EntryBuilderError::MissingField("name"))?, | ||
96 | defn: self.defn, | ||
97 | note: self.note, | ||
98 | synonym: self.synonym, | ||
99 | }) | ||
100 | } | ||
101 | } | ||
diff --git a/src/utils.rs b/src/utils.rs new file mode 100644 index 0000000..23fff7e --- /dev/null +++ b/src/utils.rs | |||
@@ -0,0 +1,7 @@ | |||
1 | |||
2 | pub trait FromStaticStr { | ||
3 | type Err; | ||
4 | fn from_str(s: &'static str) -> Result<Self, Self::Err> | ||
5 | where Self: Sized; | ||
6 | } | ||
7 | |||