diff options
author | Aleksey Kladov <[email protected]> | 2018-08-09 00:26:22 +0100 |
---|---|---|
committer | Aleksey Kladov <[email protected]> | 2018-08-09 00:26:22 +0100 |
commit | 08475a690ccc26ab5fd5e809a8cd3a19fcf5aff4 (patch) | |
tree | 81a7395c814dc97ada0e1065896c73e3b1bb2cee | |
parent | 7974c6b1a0b2d07c5a337c89a70a51e01f629778 (diff) |
Simplify string optimizations
-rw-r--r-- | src/smol_str.rs | 33 | ||||
-rw-r--r-- | src/syntax_kinds/generated.rs | 88 | ||||
-rw-r--r-- | src/syntax_kinds/generated.rs.tera | 13 | ||||
-rw-r--r-- | src/yellow/green.rs | 67 |
4 files changed, 46 insertions, 155 deletions
diff --git a/src/smol_str.rs b/src/smol_str.rs index 2a330c343..abf69dce7 100644 --- a/src/smol_str.rs +++ b/src/smol_str.rs | |||
@@ -1,6 +1,7 @@ | |||
1 | use std::{sync::Arc}; | 1 | use std::{sync::Arc}; |
2 | 2 | ||
3 | const INLINE_CAP: usize = 22; | 3 | const INLINE_CAP: usize = 22; |
4 | const WS_TAG: u8 = (INLINE_CAP + 1) as u8; | ||
4 | 5 | ||
5 | #[derive(Clone, Debug)] | 6 | #[derive(Clone, Debug)] |
6 | pub(crate) enum SmolStr { | 7 | pub(crate) enum SmolStr { |
@@ -17,18 +18,34 @@ impl SmolStr { | |||
17 | if len <= INLINE_CAP { | 18 | if len <= INLINE_CAP { |
18 | let mut buf = [0; INLINE_CAP]; | 19 | let mut buf = [0; INLINE_CAP]; |
19 | buf[..len].copy_from_slice(text.as_bytes()); | 20 | buf[..len].copy_from_slice(text.as_bytes()); |
20 | SmolStr::Inline { len: len as u8, buf } | 21 | return SmolStr::Inline { len: len as u8, buf }; |
21 | } else { | ||
22 | SmolStr::Heap( | ||
23 | text.to_string().into_boxed_str().into() | ||
24 | ) | ||
25 | } | 22 | } |
23 | |||
24 | let newlines = text.bytes().take_while(|&b| b == b'\n').count(); | ||
25 | let spaces = text[newlines..].bytes().take_while(|&b| b == b' ').count(); | ||
26 | if newlines + spaces == len && newlines <= N_NEWLINES && spaces <= N_SPACES { | ||
27 | let mut buf = [0; INLINE_CAP]; | ||
28 | buf[0] = newlines as u8; | ||
29 | buf[1] = spaces as u8; | ||
30 | return SmolStr::Inline { len: WS_TAG, buf }; | ||
31 | } | ||
32 | |||
33 | SmolStr::Heap( | ||
34 | text.to_string().into_boxed_str().into() | ||
35 | ) | ||
26 | } | 36 | } |
27 | 37 | ||
28 | pub fn as_str(&self) -> &str { | 38 | pub fn as_str(&self) -> &str { |
29 | match self { | 39 | match self { |
30 | SmolStr::Heap(data) => &*data, | 40 | SmolStr::Heap(data) => &*data, |
31 | SmolStr::Inline { len, buf } => { | 41 | SmolStr::Inline { len, buf } => { |
42 | if *len == WS_TAG { | ||
43 | let newlines = buf[0] as usize; | ||
44 | let spaces = buf[1] as usize; | ||
45 | assert!(newlines <= N_NEWLINES && spaces <= N_SPACES); | ||
46 | return &WS[N_NEWLINES - newlines..N_NEWLINES + spaces] | ||
47 | } | ||
48 | |||
32 | let len = *len as usize; | 49 | let len = *len as usize; |
33 | let buf = &buf[..len]; | 50 | let buf = &buf[..len]; |
34 | unsafe { ::std::str::from_utf8_unchecked(buf) } | 51 | unsafe { ::std::str::from_utf8_unchecked(buf) } |
@@ -37,6 +54,12 @@ impl SmolStr { | |||
37 | } | 54 | } |
38 | } | 55 | } |
39 | 56 | ||
57 | const N_NEWLINES: usize = 32; | ||
58 | const N_SPACES: usize = 128; | ||
59 | const WS: &str = | ||
60 | "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n "; | ||
61 | |||
62 | |||
40 | #[cfg(test)] | 63 | #[cfg(test)] |
41 | mod tests { | 64 | mod tests { |
42 | use super::*; | 65 | use super::*; |
diff --git a/src/syntax_kinds/generated.rs b/src/syntax_kinds/generated.rs index 655ec4ea5..84f90c593 100644 --- a/src/syntax_kinds/generated.rs +++ b/src/syntax_kinds/generated.rs | |||
@@ -504,93 +504,5 @@ impl SyntaxKind { | |||
504 | }; | 504 | }; |
505 | Some(tok) | 505 | Some(tok) |
506 | } | 506 | } |
507 | |||
508 | pub(crate) fn static_text(self) -> Option<&'static str> { | ||
509 | let tok = match self { | ||
510 | SEMI => ";", | ||
511 | COMMA => ",", | ||
512 | L_PAREN => "(", | ||
513 | R_PAREN => ")", | ||
514 | L_CURLY => "{", | ||
515 | R_CURLY => "}", | ||
516 | L_BRACK => "[", | ||
517 | R_BRACK => "]", | ||
518 | L_ANGLE => "<", | ||
519 | R_ANGLE => ">", | ||
520 | AT => "@", | ||
521 | POUND => "#", | ||
522 | TILDE => "~", | ||
523 | QUESTION => "?", | ||
524 | DOLLAR => "$", | ||
525 | AMP => "&", | ||
526 | PIPE => "|", | ||
527 | PLUS => "+", | ||
528 | STAR => "*", | ||
529 | SLASH => "/", | ||
530 | CARET => "^", | ||
531 | PERCENT => "%", | ||
532 | DOT => ".", | ||
533 | DOTDOT => "..", | ||
534 | DOTDOTDOT => "...", | ||
535 | DOTDOTEQ => "..=", | ||
536 | COLON => ":", | ||
537 | COLONCOLON => "::", | ||
538 | EQ => "=", | ||
539 | EQEQ => "==", | ||
540 | FAT_ARROW => "=>", | ||
541 | EXCL => "!", | ||
542 | NEQ => "!=", | ||
543 | MINUS => "-", | ||
544 | THIN_ARROW => "->", | ||
545 | LTEQ => "<=", | ||
546 | GTEQ => ">=", | ||
547 | PLUSEQ => "+=", | ||
548 | MINUSEQ => "-=", | ||
549 | AMPAMP => "&&", | ||
550 | PIPEPIPE => "||", | ||
551 | SHL => "<<", | ||
552 | SHR => ">>", | ||
553 | SHLEQ => "<<=", | ||
554 | SHREQ => ">>=", | ||
555 | |||
556 | USE_KW => "use", | ||
557 | FN_KW => "fn", | ||
558 | STRUCT_KW => "struct", | ||
559 | ENUM_KW => "enum", | ||
560 | TRAIT_KW => "trait", | ||
561 | IMPL_KW => "impl", | ||
562 | TRUE_KW => "true", | ||
563 | FALSE_KW => "false", | ||
564 | AS_KW => "as", | ||
565 | EXTERN_KW => "extern", | ||
566 | CRATE_KW => "crate", | ||
567 | MOD_KW => "mod", | ||
568 | PUB_KW => "pub", | ||
569 | SELF_KW => "self", | ||
570 | SUPER_KW => "super", | ||
571 | IN_KW => "in", | ||
572 | WHERE_KW => "where", | ||
573 | FOR_KW => "for", | ||
574 | LOOP_KW => "loop", | ||
575 | WHILE_KW => "while", | ||
576 | IF_KW => "if", | ||
577 | ELSE_KW => "else", | ||
578 | MATCH_KW => "match", | ||
579 | CONST_KW => "const", | ||
580 | STATIC_KW => "static", | ||
581 | MUT_KW => "mut", | ||
582 | UNSAFE_KW => "unsafe", | ||
583 | TYPE_KW => "type", | ||
584 | REF_KW => "ref", | ||
585 | LET_KW => "let", | ||
586 | MOVE_KW => "move", | ||
587 | RETURN_KW => "return", | ||
588 | AUTO_KW => "auto", | ||
589 | DEFAULT_KW => "default", | ||
590 | UNION_KW => "union", | ||
591 | _ => return None, | ||
592 | }; | ||
593 | Some(tok) | ||
594 | } | ||
595 | } | 507 | } |
596 | 508 | ||
diff --git a/src/syntax_kinds/generated.rs.tera b/src/syntax_kinds/generated.rs.tera index 197f720e1..90618721a 100644 --- a/src/syntax_kinds/generated.rs.tera +++ b/src/syntax_kinds/generated.rs.tera | |||
@@ -69,18 +69,5 @@ impl SyntaxKind { | |||
69 | }; | 69 | }; |
70 | Some(tok) | 70 | Some(tok) |
71 | } | 71 | } |
72 | |||
73 | pub(crate) fn static_text(self) -> Option<&'static str> { | ||
74 | let tok = match self { | ||
75 | {%- for t in concat(a=single_byte_tokens, b=multi_byte_tokens) %} | ||
76 | {{t.1}} => "{{t.0}}", | ||
77 | {%- endfor %} | ||
78 | {% for kw in concat(a=keywords, b=contextual_keywords) %} | ||
79 | {{kw | upper}}_KW => "{{kw}}", | ||
80 | {%- endfor %} | ||
81 | _ => return None, | ||
82 | }; | ||
83 | Some(tok) | ||
84 | } | ||
85 | } | 72 | } |
86 | 73 | ||
diff --git a/src/yellow/green.rs b/src/yellow/green.rs index 57579a8f2..2d19c252b 100644 --- a/src/yellow/green.rs +++ b/src/yellow/green.rs | |||
@@ -1,8 +1,7 @@ | |||
1 | use std::sync::Arc; | 1 | use std::sync::Arc; |
2 | use { | 2 | use { |
3 | SyntaxKind, TextUnit, | ||
3 | smol_str::SmolStr, | 4 | smol_str::SmolStr, |
4 | SyntaxKind::{self, *}, | ||
5 | TextUnit, | ||
6 | }; | 5 | }; |
7 | 6 | ||
8 | #[derive(Clone, Debug)] | 7 | #[derive(Clone, Debug)] |
@@ -91,59 +90,23 @@ impl GreenBranch { | |||
91 | } | 90 | } |
92 | 91 | ||
93 | #[derive(Clone, Debug)] | 92 | #[derive(Clone, Debug)] |
94 | pub(crate) enum GreenLeaf { | 93 | pub(crate) struct GreenLeaf { |
95 | Whitespace { | 94 | kind: SyntaxKind, |
96 | newlines: u8, | 95 | text: SmolStr, |
97 | spaces: u8, | ||
98 | }, | ||
99 | Token { | ||
100 | kind: SyntaxKind, | ||
101 | text: Option<SmolStr>, | ||
102 | }, | ||
103 | } | 96 | } |
104 | 97 | ||
105 | impl GreenLeaf { | 98 | impl GreenLeaf { |
106 | fn new(kind: SyntaxKind, text: &str) -> Self { | 99 | fn new(kind: SyntaxKind, text: &str) -> Self { |
107 | if kind == WHITESPACE { | 100 | let text = SmolStr::new(text); |
108 | let newlines = text.bytes().take_while(|&b| b == b'\n').count(); | 101 | GreenLeaf { kind, text } |
109 | let spaces = text[newlines..].bytes().take_while(|&b| b == b' ').count(); | ||
110 | if newlines + spaces == text.len() && newlines <= N_NEWLINES && spaces <= N_SPACES { | ||
111 | return GreenLeaf::Whitespace { | ||
112 | newlines: newlines as u8, | ||
113 | spaces: spaces as u8, | ||
114 | }; | ||
115 | } | ||
116 | } | ||
117 | let text = match SyntaxKind::static_text(kind) { | ||
118 | Some(t) => { | ||
119 | debug_assert_eq!(t, text); | ||
120 | None | ||
121 | } | ||
122 | None => Some(SmolStr::new(text)), | ||
123 | }; | ||
124 | GreenLeaf::Token { kind, text } | ||
125 | } | 102 | } |
126 | 103 | ||
127 | pub(crate) fn kind(&self) -> SyntaxKind { | 104 | pub(crate) fn kind(&self) -> SyntaxKind { |
128 | match self { | 105 | self.kind |
129 | GreenLeaf::Whitespace { .. } => WHITESPACE, | ||
130 | GreenLeaf::Token { kind, .. } => *kind, | ||
131 | } | ||
132 | } | 106 | } |
133 | 107 | ||
134 | pub(crate) fn text(&self) -> &str { | 108 | pub(crate) fn text(&self) -> &str { |
135 | match self { | 109 | self.text.as_str() |
136 | &GreenLeaf::Whitespace { newlines, spaces } => { | ||
137 | let newlines = newlines as usize; | ||
138 | let spaces = spaces as usize; | ||
139 | assert!(newlines <= N_NEWLINES && spaces <= N_SPACES); | ||
140 | &WS[N_NEWLINES - newlines..N_NEWLINES + spaces] | ||
141 | } | ||
142 | GreenLeaf::Token { kind, text } => match text { | ||
143 | None => kind.static_text().unwrap(), | ||
144 | Some(t) => t.as_str(), | ||
145 | }, | ||
146 | } | ||
147 | } | 110 | } |
148 | 111 | ||
149 | pub(crate) fn text_len(&self) -> TextUnit { | 112 | pub(crate) fn text_len(&self) -> TextUnit { |
@@ -151,7 +114,13 @@ impl GreenLeaf { | |||
151 | } | 114 | } |
152 | } | 115 | } |
153 | 116 | ||
154 | const N_NEWLINES: usize = 16; | 117 | |
155 | const N_SPACES: usize = 64; | 118 | #[test] |
156 | const WS: &str = | 119 | fn test_sizes() { |
157 | "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n "; | 120 | use std::mem::size_of; |
121 | |||
122 | println!("GreenNode = {}", size_of::<GreenNode>()); | ||
123 | println!("GreenLeaf = {}", size_of::<GreenLeaf>()); | ||
124 | println!("SyntaxKind = {}", size_of::<SyntaxKind>()); | ||
125 | println!("SmolStr = {}", size_of::<SmolStr>()); | ||
126 | } | ||