From d315cce8e99ec6d96695bea708ae315028f3db66 Mon Sep 17 00:00:00 2001 From: Akshay Date: Thu, 30 Mar 2023 22:09:49 +0530 Subject: init --- .gitignore | 2 ++ Cargo.lock | 7 ++++ Cargo.toml | 8 +++++ flake.lock | 24 ++++++++++++++ flake.nix | 27 ++++++++++++++++ src/consts.rs | 31 ++++++++++++++++++ src/lex.rs | 80 ++++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 101 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/utils.rs | 7 ++++ 9 files changed, 287 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 flake.lock create mode 100644 flake.nix create mode 100644 src/consts.rs create mode 100644 src/lex.rs create mode 100644 src/main.rs create mode 100644 src/utils.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2d5df85 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +.direnv diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..06c3f88 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "dict" +version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..d2c0dc4 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "dict" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..f231e8c --- /dev/null +++ b/flake.lock @@ -0,0 +1,24 @@ +{ + "nodes": { + "nixpkgs": { + "locked": { + "lastModified": 1677852945, + "narHash": "sha256-liiVJjkBTuBTAkRW3hrI8MbPD2ImYzwUpa7kvteiKhM=", + "path": "/nix/store/cgfz9cycn82cwhvpaskq80bfw0k711gq-source", + "rev": "f5ffd5787786dde3a8bf648c7a1b5f78c4e01abb", + "type": "path" + }, + "original": { + "id": "nixpkgs", + "type": "indirect" + } + }, + "root": { + "inputs": { + "nixpkgs": "nixpkgs" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..a615408 --- /dev/null +++ b/flake.nix @@ -0,0 +1,27 @@ +{ + description = "A very basic flake"; + + outputs = { self, nixpkgs }: + let + supportedSystems = [ "x86_64-linux" ]; + forAllSystems = nixpkgs.lib.genAttrs supportedSystems; + nixpkgsFor = forAllSystems (system: import nixpkgs { inherit system; }); + in + { + + devShell = forAllSystems (system: + let + pkgs = nixpkgsFor."${system}"; + in + pkgs.mkShell { + nativeBuildInputs = [ + pkgs.rustc + pkgs.cargo + pkgs.rust-analyzer + pkgs.cargo-watch + ]; + RUST_BACKTRACE = 1; + } + ); + }; +} diff --git a/src/consts.rs b/src/consts.rs new file mode 100644 index 0000000..446c341 --- /dev/null +++ b/src/consts.rs @@ -0,0 +1,31 @@ +pub const SRC: &str = include_str!("../assets/en.txt"); +// pub const SRC: &str = r"A +// A (named a in the English, and most commonly ä in other languages). +// +// Defn: The first letter of the English and of many other alphabets. +// The capital A of the alphabets of Middle and Western Europe, as also +// the small letter (a), besides the forms in Italic, black letter, +// etc., are all descended from the old Latin A, which was borrowed from +// the Greek Alpha, of the same form; and this was made from the first +// letter (Aleph, and itself from the Egyptian origin. The Aleph was a +// consonant letter, with a guttural breath sound that was not an +// element of Greek articulation; and the Greeks took it to represent +// their vowel Alpha with the ä sound, the Phoenician alphabet having no +// vowel symbols. This letter, in English, is used for several different +// vowel sounds. See Guide to pronunciation, §§ 43-74. The regular long +// a, as in fate, etc., is a comparatively modern sound, and has taken +// the place of what, till about the early part of the 17th century, was +// a sound of the quality of ä (as in far). +// +// 2. (Mus.) +// +// Defn: The name of the sixth tone in the model major scale (that in +// C), or the first tone of the minor scale, which is named after it the +// scale in A minor. The second string of the violin is tuned to the A +// in the treble staff. +// -- A sharp (A#) is the name of a musical tone intermediate between A +// and B. +// -- A flat (A) is the name of a tone intermediate between A and G."; +// +// +// diff --git a/src/lex.rs b/src/lex.rs new file mode 100644 index 0000000..0f9a535 --- /dev/null +++ b/src/lex.rs @@ -0,0 +1,80 @@ +use crate::utils::FromStaticStr; + +pub enum Stanza { + Entry(&'static str), + Defn(&'static str), + Note(&'static str), + Synonym(&'static str), + Bullet(&'static str), + SubBullet(&'static str), +} + +impl Stanza { + fn is_entry(s: &str) -> bool { + s.chars().all(|c| c.is_uppercase() || c.is_ascii_whitespace() || "-;'.".contains(c)) + } + + fn is_defn(s: &str) -> bool { + s.starts_with("Defn") + } + + fn is_note(s: &str) -> bool { + s.starts_with("Note") + } + + fn is_synonym(s: &str) -> bool { + s.starts_with("Syn") + } + + fn is_bullet(s: &str) -> bool { + s.find('.').map(|idx| s[..idx].chars().all(char::is_numeric)).unwrap_or_default() + } + + fn is_sub_bullet(s: &str) -> bool { + let mut chars = s.chars(); + chars.next().map(|c| c == '(').unwrap_or_default() + && chars.next().map(char::is_alphabetic).unwrap_or_default() + && chars.next().map(|c| c == ')').unwrap_or_default() + } +} + +pub struct StanzaLexError { + pub data: String, +} + +impl FromStaticStr for Stanza { + type Err = StanzaLexError; + fn from_str(s: &'static str) -> Result { + let mut lines = s.split("\n"); + if let Some(first_line) = lines.next() { + if !first_line.is_empty() { + if Stanza::is_entry(first_line) { + Ok(Self::Entry(s)) + } else if Stanza::is_defn(first_line) { + Ok(Self::Defn(s)) + } else if Stanza::is_note(first_line) { + Ok(Self::Note(s)) + } else if Stanza::is_synonym(first_line) { + Ok(Self::Synonym(s)) + } else if Stanza::is_bullet(first_line) { + Ok(Self::Bullet(s)) + } else if Stanza::is_sub_bullet(first_line) { + Ok(Self::SubBullet(s)) + } else { + Err(Self::Err { + data: format!("weird stanza: {}", s), + }) + } + } else { + Err(Self::Err { + data: format!("empty first line: {}", s), + }) + } + } else { + Err(Self::Err { + data: format!("empty stanza: {}", s), + }) + } + } +} + diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..e6d997d --- /dev/null +++ b/src/main.rs @@ -0,0 +1,101 @@ +mod consts; +mod utils; +mod lex; + +use consts::SRC; +use lex::{Stanza, StanzaLexError}; +use utils::FromStaticStr; + +fn main() { + let mut count = 0; + let mut parse_state = ParseState::Ready; + let mut current_entry = EntryBuilder::new(); + let mut dict = Dictionary { + entries: vec![], + }; + + for l in SRC.split("\n\n") { + count += 1; + let stanza = match Stanza::from_str(l) { + Ok(s) => { + println!("{count} ok"); + s + }, + Err(StanzaLexError { data }) => { + eprintln!("stanza err: {data}\n\n"); + continue; + }, + }; + match stanza { + Stanza::Entry(s) if parse_state == ParseState::Ready => { + current_entry.set_name(s); + parse_state = ParseState::InEntry; + } + Stanza::Defn(d) if parse_state == ParseState::InEntry => { + current_entry.set_defn(d); + + match current_entry.build() { + Ok(e) => dict.entries.push(e), + Err(_) => eprintln!("failed to build entry"), + } + + parse_state = ParseState::Ready; + } + _ => () + } + } + dbg!(dict.entries.iter().find(|entry| entry.name.to_ascii_lowercase().starts_with("discursive"))); +} + +#[derive(PartialEq, Eq, PartialOrd, Ord)] +enum ParseState { + Ready, + InEntry +} + +struct Dictionary { + entries: Vec +} + +#[derive(Debug)] +struct Entry { + name: &'static str, + defn: Option<&'static str>, + note: Option<&'static str>, + synonym: Option<&'static str>, +} + +#[derive(Default)] +struct EntryBuilder { + name: Option<&'static str>, + defn: Option<&'static str>, + note: Option<&'static str>, + synonym: Option<&'static str>, +} + +enum EntryBuilderError { + MissingField(&'static str) +} + +impl EntryBuilder { + fn new() -> Self { + Self::default() + } + + fn set_name(&mut self, name: &'static str) { + self.name = Some(name); + } + + fn set_defn(&mut self, defn: &'static str) { + self.defn = Some(defn); + } + + fn build(&self) -> Result { + Ok(Entry { + name: self.name.ok_or(EntryBuilderError::MissingField("name"))?, + defn: self.defn, + note: self.note, + synonym: self.synonym, + }) + } +} diff --git a/src/utils.rs b/src/utils.rs new file mode 100644 index 0000000..23fff7e --- /dev/null +++ b/src/utils.rs @@ -0,0 +1,7 @@ + +pub trait FromStaticStr { + type Err; + fn from_str(s: &'static str) -> Result + where Self: Sized; +} + -- cgit v1.2.3