From 08475a690ccc26ab5fd5e809a8cd3a19fcf5aff4 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Thu, 9 Aug 2018 02:26:22 +0300 Subject: Simplify string optimizations --- src/smol_str.rs | 33 +++++++++++--- src/syntax_kinds/generated.rs | 88 -------------------------------------- src/syntax_kinds/generated.rs.tera | 13 ------ src/yellow/green.rs | 67 ++++++++--------------------- 4 files changed, 46 insertions(+), 155 deletions(-) diff --git a/src/smol_str.rs b/src/smol_str.rs index 2a330c343..abf69dce7 100644 --- a/src/smol_str.rs +++ b/src/smol_str.rs @@ -1,6 +1,7 @@ use std::{sync::Arc}; const INLINE_CAP: usize = 22; +const WS_TAG: u8 = (INLINE_CAP + 1) as u8; #[derive(Clone, Debug)] pub(crate) enum SmolStr { @@ -17,18 +18,34 @@ impl SmolStr { if len <= INLINE_CAP { let mut buf = [0; INLINE_CAP]; buf[..len].copy_from_slice(text.as_bytes()); - SmolStr::Inline { len: len as u8, buf } - } else { - SmolStr::Heap( - text.to_string().into_boxed_str().into() - ) + return SmolStr::Inline { len: len as u8, buf }; } + + let newlines = text.bytes().take_while(|&b| b == b'\n').count(); + let spaces = text[newlines..].bytes().take_while(|&b| b == b' ').count(); + if newlines + spaces == len && newlines <= N_NEWLINES && spaces <= N_SPACES { + let mut buf = [0; INLINE_CAP]; + buf[0] = newlines as u8; + buf[1] = spaces as u8; + return SmolStr::Inline { len: WS_TAG, buf }; + } + + SmolStr::Heap( + text.to_string().into_boxed_str().into() + ) } pub fn as_str(&self) -> &str { match self { SmolStr::Heap(data) => &*data, SmolStr::Inline { len, buf } => { + if *len == WS_TAG { + let newlines = buf[0] as usize; + let spaces = buf[1] as usize; + assert!(newlines <= N_NEWLINES && spaces <= N_SPACES); + return &WS[N_NEWLINES - newlines..N_NEWLINES + spaces] + } + let len = *len as usize; let buf = &buf[..len]; unsafe { ::std::str::from_utf8_unchecked(buf) } @@ -37,6 +54,12 @@ impl SmolStr { } } +const N_NEWLINES: usize = 32; +const N_SPACES: usize = 128; +const WS: &str = + "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n "; + + #[cfg(test)] mod tests { use super::*; diff --git a/src/syntax_kinds/generated.rs b/src/syntax_kinds/generated.rs index 655ec4ea5..84f90c593 100644 --- a/src/syntax_kinds/generated.rs +++ b/src/syntax_kinds/generated.rs @@ -504,93 +504,5 @@ impl SyntaxKind { }; Some(tok) } - - pub(crate) fn static_text(self) -> Option<&'static str> { - let tok = match self { - SEMI => ";", - COMMA => ",", - L_PAREN => "(", - R_PAREN => ")", - L_CURLY => "{", - R_CURLY => "}", - L_BRACK => "[", - R_BRACK => "]", - L_ANGLE => "<", - R_ANGLE => ">", - AT => "@", - POUND => "#", - TILDE => "~", - QUESTION => "?", - DOLLAR => "$", - AMP => "&", - PIPE => "|", - PLUS => "+", - STAR => "*", - SLASH => "/", - CARET => "^", - PERCENT => "%", - DOT => ".", - DOTDOT => "..", - DOTDOTDOT => "...", - DOTDOTEQ => "..=", - COLON => ":", - COLONCOLON => "::", - EQ => "=", - EQEQ => "==", - FAT_ARROW => "=>", - EXCL => "!", - NEQ => "!=", - MINUS => "-", - THIN_ARROW => "->", - LTEQ => "<=", - GTEQ => ">=", - PLUSEQ => "+=", - MINUSEQ => "-=", - AMPAMP => "&&", - PIPEPIPE => "||", - SHL => "<<", - SHR => ">>", - SHLEQ => "<<=", - SHREQ => ">>=", - - USE_KW => "use", - FN_KW => "fn", - STRUCT_KW => "struct", - ENUM_KW => "enum", - TRAIT_KW => "trait", - IMPL_KW => "impl", - TRUE_KW => "true", - FALSE_KW => "false", - AS_KW => "as", - EXTERN_KW => "extern", - CRATE_KW => "crate", - MOD_KW => "mod", - PUB_KW => "pub", - SELF_KW => "self", - SUPER_KW => "super", - IN_KW => "in", - WHERE_KW => "where", - FOR_KW => "for", - LOOP_KW => "loop", - WHILE_KW => "while", - IF_KW => "if", - ELSE_KW => "else", - MATCH_KW => "match", - CONST_KW => "const", - STATIC_KW => "static", - MUT_KW => "mut", - UNSAFE_KW => "unsafe", - TYPE_KW => "type", - REF_KW => "ref", - LET_KW => "let", - MOVE_KW => "move", - RETURN_KW => "return", - AUTO_KW => "auto", - DEFAULT_KW => "default", - UNION_KW => "union", - _ => return None, - }; - Some(tok) - } } diff --git a/src/syntax_kinds/generated.rs.tera b/src/syntax_kinds/generated.rs.tera index 197f720e1..90618721a 100644 --- a/src/syntax_kinds/generated.rs.tera +++ b/src/syntax_kinds/generated.rs.tera @@ -64,19 +64,6 @@ impl SyntaxKind { let tok = match c { {%- for t in single_byte_tokens %} '{{t.0}}' => {{t.1}}, -{%- endfor %} - _ => return None, - }; - Some(tok) - } - - pub(crate) fn static_text(self) -> Option<&'static str> { - let tok = match self { -{%- for t in concat(a=single_byte_tokens, b=multi_byte_tokens) %} - {{t.1}} => "{{t.0}}", -{%- endfor %} -{% for kw in concat(a=keywords, b=contextual_keywords) %} - {{kw | upper}}_KW => "{{kw}}", {%- endfor %} _ => return None, }; diff --git a/src/yellow/green.rs b/src/yellow/green.rs index 57579a8f2..2d19c252b 100644 --- a/src/yellow/green.rs +++ b/src/yellow/green.rs @@ -1,8 +1,7 @@ use std::sync::Arc; use { + SyntaxKind, TextUnit, smol_str::SmolStr, - SyntaxKind::{self, *}, - TextUnit, }; #[derive(Clone, Debug)] @@ -91,59 +90,23 @@ impl GreenBranch { } #[derive(Clone, Debug)] -pub(crate) enum GreenLeaf { - Whitespace { - newlines: u8, - spaces: u8, - }, - Token { - kind: SyntaxKind, - text: Option, - }, +pub(crate) struct GreenLeaf { + kind: SyntaxKind, + text: SmolStr, } impl GreenLeaf { fn new(kind: SyntaxKind, text: &str) -> Self { - if kind == WHITESPACE { - let newlines = text.bytes().take_while(|&b| b == b'\n').count(); - let spaces = text[newlines..].bytes().take_while(|&b| b == b' ').count(); - if newlines + spaces == text.len() && newlines <= N_NEWLINES && spaces <= N_SPACES { - return GreenLeaf::Whitespace { - newlines: newlines as u8, - spaces: spaces as u8, - }; - } - } - let text = match SyntaxKind::static_text(kind) { - Some(t) => { - debug_assert_eq!(t, text); - None - } - None => Some(SmolStr::new(text)), - }; - GreenLeaf::Token { kind, text } + let text = SmolStr::new(text); + GreenLeaf { kind, text } } pub(crate) fn kind(&self) -> SyntaxKind { - match self { - GreenLeaf::Whitespace { .. } => WHITESPACE, - GreenLeaf::Token { kind, .. } => *kind, - } + self.kind } pub(crate) fn text(&self) -> &str { - match self { - &GreenLeaf::Whitespace { newlines, spaces } => { - let newlines = newlines as usize; - let spaces = spaces as usize; - assert!(newlines <= N_NEWLINES && spaces <= N_SPACES); - &WS[N_NEWLINES - newlines..N_NEWLINES + spaces] - } - GreenLeaf::Token { kind, text } => match text { - None => kind.static_text().unwrap(), - Some(t) => t.as_str(), - }, - } + self.text.as_str() } pub(crate) fn text_len(&self) -> TextUnit { @@ -151,7 +114,13 @@ impl GreenLeaf { } } -const N_NEWLINES: usize = 16; -const N_SPACES: usize = 64; -const WS: &str = - "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n "; + +#[test] +fn test_sizes() { + use std::mem::size_of; + + println!("GreenNode = {}", size_of::()); + println!("GreenLeaf = {}", size_of::()); + println!("SyntaxKind = {}", size_of::()); + println!("SmolStr = {}", size_of::()); +} -- cgit v1.2.3