aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAkshay <[email protected]>2023-03-30 17:39:49 +0100
committerAkshay <[email protected]>2023-03-30 17:39:49 +0100
commitd315cce8e99ec6d96695bea708ae315028f3db66 (patch)
treec4cdf0331788cb1ec742d2dd39d6325d0557f06a
init
-rw-r--r--.gitignore2
-rw-r--r--Cargo.lock7
-rw-r--r--Cargo.toml8
-rw-r--r--flake.lock24
-rw-r--r--flake.nix27
-rw-r--r--src/consts.rs31
-rw-r--r--src/lex.rs80
-rw-r--r--src/main.rs101
-rw-r--r--src/utils.rs7
9 files changed, 287 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..2d5df85
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
1/target
2.direnv
diff --git a/Cargo.lock b/Cargo.lock
new file mode 100644
index 0000000..06c3f88
--- /dev/null
+++ b/Cargo.lock
@@ -0,0 +1,7 @@
1# This file is automatically @generated by Cargo.
2# It is not intended for manual editing.
3version = 3
4
5[[package]]
6name = "dict"
7version = "0.1.0"
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..d2c0dc4
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,8 @@
1[package]
2name = "dict"
3version = "0.1.0"
4edition = "2021"
5
6# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
7
8[dependencies]
diff --git a/flake.lock b/flake.lock
new file mode 100644
index 0000000..f231e8c
--- /dev/null
+++ b/flake.lock
@@ -0,0 +1,24 @@
1{
2 "nodes": {
3 "nixpkgs": {
4 "locked": {
5 "lastModified": 1677852945,
6 "narHash": "sha256-liiVJjkBTuBTAkRW3hrI8MbPD2ImYzwUpa7kvteiKhM=",
7 "path": "/nix/store/cgfz9cycn82cwhvpaskq80bfw0k711gq-source",
8 "rev": "f5ffd5787786dde3a8bf648c7a1b5f78c4e01abb",
9 "type": "path"
10 },
11 "original": {
12 "id": "nixpkgs",
13 "type": "indirect"
14 }
15 },
16 "root": {
17 "inputs": {
18 "nixpkgs": "nixpkgs"
19 }
20 }
21 },
22 "root": "root",
23 "version": 7
24}
diff --git a/flake.nix b/flake.nix
new file mode 100644
index 0000000..a615408
--- /dev/null
+++ b/flake.nix
@@ -0,0 +1,27 @@
1{
2 description = "A very basic flake";
3
4 outputs = { self, nixpkgs }:
5 let
6 supportedSystems = [ "x86_64-linux" ];
7 forAllSystems = nixpkgs.lib.genAttrs supportedSystems;
8 nixpkgsFor = forAllSystems (system: import nixpkgs { inherit system; });
9 in
10 {
11
12 devShell = forAllSystems (system:
13 let
14 pkgs = nixpkgsFor."${system}";
15 in
16 pkgs.mkShell {
17 nativeBuildInputs = [
18 pkgs.rustc
19 pkgs.cargo
20 pkgs.rust-analyzer
21 pkgs.cargo-watch
22 ];
23 RUST_BACKTRACE = 1;
24 }
25 );
26 };
27}
diff --git a/src/consts.rs b/src/consts.rs
new file mode 100644
index 0000000..446c341
--- /dev/null
+++ b/src/consts.rs
@@ -0,0 +1,31 @@
1pub const SRC: &str = include_str!("../assets/en.txt");
2// pub const SRC: &str = r"A
3// A (named a in the English, and most commonly ä in other languages).
4//
5// Defn: The first letter of the English and of many other alphabets.
6// The capital A of the alphabets of Middle and Western Europe, as also
7// the small letter (a), besides the forms in Italic, black letter,
8// etc., are all descended from the old Latin A, which was borrowed from
9// the Greek Alpha, of the same form; and this was made from the first
10// letter (Aleph, and itself from the Egyptian origin. The Aleph was a
11// consonant letter, with a guttural breath sound that was not an
12// element of Greek articulation; and the Greeks took it to represent
13// their vowel Alpha with the ä sound, the Phoenician alphabet having no
14// vowel symbols. This letter, in English, is used for several different
15// vowel sounds. See Guide to pronunciation, §§ 43-74. The regular long
16// a, as in fate, etc., is a comparatively modern sound, and has taken
17// the place of what, till about the early part of the 17th century, was
18// a sound of the quality of ä (as in far).
19//
20// 2. (Mus.)
21//
22// Defn: The name of the sixth tone in the model major scale (that in
23// C), or the first tone of the minor scale, which is named after it the
24// scale in A minor. The second string of the violin is tuned to the A
25// in the treble staff.
26// -- A sharp (A#) is the name of a musical tone intermediate between A
27// and B.
28// -- A flat (A) is the name of a tone intermediate between A and G.";
29//
30//
31//
diff --git a/src/lex.rs b/src/lex.rs
new file mode 100644
index 0000000..0f9a535
--- /dev/null
+++ b/src/lex.rs
@@ -0,0 +1,80 @@
1use crate::utils::FromStaticStr;
2
3pub enum Stanza {
4 Entry(&'static str),
5 Defn(&'static str),
6 Note(&'static str),
7 Synonym(&'static str),
8 Bullet(&'static str),
9 SubBullet(&'static str),
10}
11
12impl Stanza {
13 fn is_entry(s: &str) -> bool {
14 s.chars().all(|c| c.is_uppercase() || c.is_ascii_whitespace() || "-;'.".contains(c))
15 }
16
17 fn is_defn(s: &str) -> bool {
18 s.starts_with("Defn")
19 }
20
21 fn is_note(s: &str) -> bool {
22 s.starts_with("Note")
23 }
24
25 fn is_synonym(s: &str) -> bool {
26 s.starts_with("Syn")
27 }
28
29 fn is_bullet(s: &str) -> bool {
30 s.find('.').map(|idx| s[..idx].chars().all(char::is_numeric)).unwrap_or_default()
31 }
32
33 fn is_sub_bullet(s: &str) -> bool {
34 let mut chars = s.chars();
35 chars.next().map(|c| c == '(').unwrap_or_default()
36 && chars.next().map(char::is_alphabetic).unwrap_or_default()
37 && chars.next().map(|c| c == ')').unwrap_or_default()
38 }
39}
40
41pub struct StanzaLexError {
42 pub data: String,
43}
44
45impl FromStaticStr for Stanza {
46 type Err = StanzaLexError;
47 fn from_str(s: &'static str) -> Result<Self, Self::Err> {
48 let mut lines = s.split("\n");
49 if let Some(first_line) = lines.next() {
50 if !first_line.is_empty() {
51 if Stanza::is_entry(first_line) {
52 Ok(Self::Entry(s))
53 } else if Stanza::is_defn(first_line) {
54 Ok(Self::Defn(s))
55 } else if Stanza::is_note(first_line) {
56 Ok(Self::Note(s))
57 } else if Stanza::is_synonym(first_line) {
58 Ok(Self::Synonym(s))
59 } else if Stanza::is_bullet(first_line) {
60 Ok(Self::Bullet(s))
61 } else if Stanza::is_sub_bullet(first_line) {
62 Ok(Self::SubBullet(s))
63 } else {
64 Err(Self::Err {
65 data: format!("weird stanza: {}", s),
66 })
67 }
68 } else {
69 Err(Self::Err {
70 data: format!("empty first line: {}", s),
71 })
72 }
73 } else {
74 Err(Self::Err {
75 data: format!("empty stanza: {}", s),
76 })
77 }
78 }
79}
80
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..e6d997d
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,101 @@
1mod consts;
2mod utils;
3mod lex;
4
5use consts::SRC;
6use lex::{Stanza, StanzaLexError};
7use utils::FromStaticStr;
8
9fn main() {
10 let mut count = 0;
11 let mut parse_state = ParseState::Ready;
12 let mut current_entry = EntryBuilder::new();
13 let mut dict = Dictionary {
14 entries: vec![],
15 };
16
17 for l in SRC.split("\n\n") {
18 count += 1;
19 let stanza = match Stanza::from_str(l) {
20 Ok(s) => {
21 println!("{count} ok");
22 s
23 },
24 Err(StanzaLexError { data }) => {
25 eprintln!("stanza err: {data}\n\n");
26 continue;
27 },
28 };
29 match stanza {
30 Stanza::Entry(s) if parse_state == ParseState::Ready => {
31 current_entry.set_name(s);
32 parse_state = ParseState::InEntry;
33 }
34 Stanza::Defn(d) if parse_state == ParseState::InEntry => {
35 current_entry.set_defn(d);
36
37 match current_entry.build() {
38 Ok(e) => dict.entries.push(e),
39 Err(_) => eprintln!("failed to build entry"),
40 }
41
42 parse_state = ParseState::Ready;
43 }
44 _ => ()
45 }
46 }
47 dbg!(dict.entries.iter().find(|entry| entry.name.to_ascii_lowercase().starts_with("discursive")));
48}
49
50#[derive(PartialEq, Eq, PartialOrd, Ord)]
51enum ParseState {
52 Ready,
53 InEntry
54}
55
56struct Dictionary {
57 entries: Vec<Entry>
58}
59
60#[derive(Debug)]
61struct Entry {
62 name: &'static str,
63 defn: Option<&'static str>,
64 note: Option<&'static str>,
65 synonym: Option<&'static str>,
66}
67
68#[derive(Default)]
69struct EntryBuilder {
70 name: Option<&'static str>,
71 defn: Option<&'static str>,
72 note: Option<&'static str>,
73 synonym: Option<&'static str>,
74}
75
76enum EntryBuilderError {
77 MissingField(&'static str)
78}
79
80impl EntryBuilder {
81 fn new() -> Self {
82 Self::default()
83 }
84
85 fn set_name(&mut self, name: &'static str) {
86 self.name = Some(name);
87 }
88
89 fn set_defn(&mut self, defn: &'static str) {
90 self.defn = Some(defn);
91 }
92
93 fn build(&self) -> Result<Entry, EntryBuilderError> {
94 Ok(Entry {
95 name: self.name.ok_or(EntryBuilderError::MissingField("name"))?,
96 defn: self.defn,
97 note: self.note,
98 synonym: self.synonym,
99 })
100 }
101}
diff --git a/src/utils.rs b/src/utils.rs
new file mode 100644
index 0000000..23fff7e
--- /dev/null
+++ b/src/utils.rs
@@ -0,0 +1,7 @@
1
2pub trait FromStaticStr {
3 type Err;
4 fn from_str(s: &'static str) -> Result<Self, Self::Err>
5 where Self: Sized;
6}
7