From f1ffd14922d2dc885ba6441ca8380f7d4cb75269 Mon Sep 17 00:00:00 2001 From: Edwin Cheng Date: Tue, 5 Jan 2021 00:11:56 +0800 Subject: Reduce string copying --- crates/mbe/src/syntax_bridge.rs | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'crates/mbe/src') diff --git a/crates/mbe/src/syntax_bridge.rs b/crates/mbe/src/syntax_bridge.rs index 265c0d63d..423a4934e 100644 --- a/crates/mbe/src/syntax_bridge.rs +++ b/crates/mbe/src/syntax_bridge.rs @@ -414,7 +414,7 @@ trait TokenConvertor { fn id_alloc(&mut self) -> &mut TokenIdAlloc; } -impl<'a> SrcToken for (RawToken, &'a str) { +impl<'a> SrcToken for (&'a RawToken, &'a str) { fn kind(&self) -> SyntaxKind { self.0.kind } @@ -431,7 +431,7 @@ impl<'a> SrcToken for (RawToken, &'a str) { impl RawConvertor<'_> {} impl<'a> TokenConvertor for RawConvertor<'a> { - type Token = (RawToken, &'a str); + type Token = (&'a RawToken, &'a str); fn convert_doc_comment(&self, token: &Self::Token) -> Option> { convert_doc_comment(&doc_comment(token.1)) @@ -442,11 +442,11 @@ impl<'a> TokenConvertor for RawConvertor<'a> { let range = TextRange::at(self.offset, token.len); self.offset += token.len; - Some(((*token, &self.text[range]), range)) + Some(((token, &self.text[range]), range)) } fn peek(&self) -> Option { - let token = self.inner.as_slice().get(0).cloned(); + let token = self.inner.as_slice().get(0); token.map(|it| { let range = TextRange::at(self.offset, it.len); @@ -601,17 +601,16 @@ impl<'a> TtTreeSink<'a> { } } -fn delim_to_str(d: Option, closing: bool) -> SmolStr { +fn delim_to_str(d: Option, closing: bool) -> &'static str { let texts = match d { Some(tt::DelimiterKind::Parenthesis) => "()", Some(tt::DelimiterKind::Brace) => "{}", Some(tt::DelimiterKind::Bracket) => "[]", - None => return "".into(), + None => return "", }; let idx = closing as usize; - let text = &texts[idx..texts.len() - (1 - idx)]; - text.into() + &texts[idx..texts.len() - (1 - idx)] } impl<'a> TreeSink for TtTreeSink<'a> { @@ -626,22 +625,25 @@ impl<'a> TreeSink for TtTreeSink<'a> { let mut last = self.cursor; for _ in 0..n_tokens { + let tmp_str: SmolStr; if self.cursor.eof() { break; } last = self.cursor; - let text: SmolStr = match self.cursor.token_tree() { + let text: &str = match self.cursor.token_tree() { Some(tt::TokenTree::Leaf(leaf)) => { // Mark the range if needed let (text, id) = match leaf { - tt::Leaf::Ident(ident) => (ident.text.clone(), ident.id), + tt::Leaf::Ident(ident) => (&ident.text, ident.id), tt::Leaf::Punct(punct) => { assert!(punct.char.is_ascii()); let char = &(punct.char as u8); - let text = std::str::from_utf8(std::slice::from_ref(char)).unwrap(); - (SmolStr::new_inline(text), punct.id) + tmp_str = SmolStr::new_inline( + std::str::from_utf8(std::slice::from_ref(char)).unwrap(), + ); + (&tmp_str, punct.id) } - tt::Leaf::Literal(lit) => (lit.text.clone(), lit.id), + tt::Leaf::Literal(lit) => (&lit.text, lit.id), }; let range = TextRange::at(self.text_pos, TextSize::of(text.as_str())); self.token_map.insert(id, range); @@ -672,7 +674,7 @@ impl<'a> TreeSink for TtTreeSink<'a> { } }; self.buf += &text; - self.text_pos += TextSize::of(text.as_str()); + self.text_pos += TextSize::of(text); } let text = SmolStr::new(self.buf.as_str()); -- cgit v1.2.3 From af3d75ad2e760dc885f54e6179543718ef8f141f Mon Sep 17 00:00:00 2001 From: Edwin Cheng Date: Tue, 5 Jan 2021 00:22:42 +0800 Subject: Refactor TokenBuffer for reduc cloning --- crates/mbe/src/mbe_expander/matcher.rs | 6 +++--- crates/mbe/src/subtree_source.rs | 10 ++++++---- crates/mbe/src/syntax_bridge.rs | 21 +++++++++------------ 3 files changed, 18 insertions(+), 19 deletions(-) (limited to 'crates/mbe/src') diff --git a/crates/mbe/src/mbe_expander/matcher.rs b/crates/mbe/src/mbe_expander/matcher.rs index ab5f87c48..fdc8844ce 100644 --- a/crates/mbe/src/mbe_expander/matcher.rs +++ b/crates/mbe/src/mbe_expander/matcher.rs @@ -309,7 +309,7 @@ impl<'a> TtIter<'a> { } } - let buffer = TokenBuffer::new(&self.inner.as_slice()); + let buffer = TokenBuffer::from_tokens(&self.inner.as_slice()); let mut src = SubtreeTokenSource::new(&buffer); let mut sink = OffsetTokenSink { cursor: buffer.begin(), error: false }; @@ -336,11 +336,11 @@ impl<'a> TtIter<'a> { err = Some(err!("no tokens consumed")); } let res = match res.len() { - 1 => Some(res[0].clone()), + 1 => Some(res[0].cloned()), 0 => None, _ => Some(tt::TokenTree::Subtree(tt::Subtree { delimiter: None, - token_trees: res.into_iter().cloned().collect(), + token_trees: res.into_iter().map(|it| it.cloned()).collect(), })), }; ExpandResult { value: res, err } diff --git a/crates/mbe/src/subtree_source.rs b/crates/mbe/src/subtree_source.rs index d10d4b70e..36d6f1038 100644 --- a/crates/mbe/src/subtree_source.rs +++ b/crates/mbe/src/subtree_source.rs @@ -53,10 +53,12 @@ impl<'a> SubtreeTokenSource<'a> { fn is_lifetime(c: Cursor) -> Option<(Cursor, SmolStr)> { let tkn = c.token_tree(); - if let Some(tt::TokenTree::Leaf(tt::Leaf::Punct(punct))) = tkn { + if let Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(punct), _)) = tkn { if punct.char == '\'' { let next = c.bump(); - if let Some(tt::TokenTree::Leaf(tt::Leaf::Ident(ident))) = next.token_tree() { + if let Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Ident(ident), _)) = + next.token_tree() + { let res_cursor = next.bump(); let text = SmolStr::new("'".to_string() + &ident.to_string()); @@ -94,11 +96,11 @@ impl<'a> SubtreeTokenSource<'a> { } match cursor.token_tree() { - Some(tt::TokenTree::Leaf(leaf)) => { + Some(tt::buffer::TokenTreeRef::Leaf(leaf, _)) => { cached.push(Some(convert_leaf(&leaf))); self.cached_cursor.set(cursor.bump()); } - Some(tt::TokenTree::Subtree(subtree)) => { + Some(tt::buffer::TokenTreeRef::Subtree(subtree, _)) => { self.cached_cursor.set(cursor.subtree().unwrap()); cached.push(Some(convert_delim(subtree.delimiter_kind(), false))); } diff --git a/crates/mbe/src/syntax_bridge.rs b/crates/mbe/src/syntax_bridge.rs index 423a4934e..671036e1c 100644 --- a/crates/mbe/src/syntax_bridge.rs +++ b/crates/mbe/src/syntax_bridge.rs @@ -70,15 +70,12 @@ pub fn token_tree_to_syntax_node( tt: &tt::Subtree, fragment_kind: FragmentKind, ) -> Result<(Parse, TokenMap), ExpandError> { - let tmp; - let tokens = match tt { - tt::Subtree { delimiter: None, token_trees } => token_trees.as_slice(), - _ => { - tmp = [tt.clone().into()]; - &tmp[..] + let buffer = match tt { + tt::Subtree { delimiter: None, token_trees } => { + TokenBuffer::from_tokens(token_trees.as_slice()) } + _ => TokenBuffer::from_subtree(tt), }; - let buffer = TokenBuffer::new(&tokens); let mut token_source = SubtreeTokenSource::new(&buffer); let mut tree_sink = TtTreeSink::new(buffer.begin()); parser::parse_fragment(&mut token_source, &mut tree_sink, fragment_kind); @@ -631,7 +628,7 @@ impl<'a> TreeSink for TtTreeSink<'a> { } last = self.cursor; let text: &str = match self.cursor.token_tree() { - Some(tt::TokenTree::Leaf(leaf)) => { + Some(tt::buffer::TokenTreeRef::Leaf(leaf, _)) => { // Mark the range if needed let (text, id) = match leaf { tt::Leaf::Ident(ident) => (&ident.text, ident.id), @@ -650,7 +647,7 @@ impl<'a> TreeSink for TtTreeSink<'a> { self.cursor = self.cursor.bump(); text } - Some(tt::TokenTree::Subtree(subtree)) => { + Some(tt::buffer::TokenTreeRef::Subtree(subtree, _)) => { self.cursor = self.cursor.subtree().unwrap(); if let Some(id) = subtree.delimiter.map(|it| it.id) { self.open_delims.insert(id, self.text_pos); @@ -684,8 +681,8 @@ impl<'a> TreeSink for TtTreeSink<'a> { // Add whitespace between adjoint puncts let next = last.bump(); if let ( - Some(tt::TokenTree::Leaf(tt::Leaf::Punct(curr))), - Some(tt::TokenTree::Leaf(tt::Leaf::Punct(_))), + Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(curr), _)), + Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(_), _)), ) = (last.token_tree(), next.token_tree()) { // Note: We always assume the semi-colon would be the last token in @@ -744,7 +741,7 @@ mod tests { ) .expand_tt("literals!(foo);"); let tts = &[expansion.into()]; - let buffer = tt::buffer::TokenBuffer::new(tts); + let buffer = tt::buffer::TokenBuffer::from_tokens(tts); let mut tt_src = SubtreeTokenSource::new(&buffer); let mut tokens = vec![]; while tt_src.current().kind != EOF { -- cgit v1.2.3 From d387bfdc4abc47ff62f1dffe7d41ce2bb11e7a00 Mon Sep 17 00:00:00 2001 From: Edwin Cheng Date: Tue, 5 Jan 2021 01:50:34 +0800 Subject: Simplify SubtreeTokenSource --- crates/mbe/src/subtree_source.rs | 148 ++++++++++++++++----------------------- 1 file changed, 60 insertions(+), 88 deletions(-) (limited to 'crates/mbe/src') diff --git a/crates/mbe/src/subtree_source.rs b/crates/mbe/src/subtree_source.rs index 36d6f1038..d7433bd35 100644 --- a/crates/mbe/src/subtree_source.rs +++ b/crates/mbe/src/subtree_source.rs @@ -1,131 +1,104 @@ //! FIXME: write short doc here use parser::{Token, TokenSource}; -use std::cell::{Cell, Ref, RefCell}; use syntax::{lex_single_syntax_kind, SmolStr, SyntaxKind, SyntaxKind::*, T}; -use tt::buffer::{Cursor, TokenBuffer}; +use tt::buffer::TokenBuffer; #[derive(Debug, Clone, Eq, PartialEq)] struct TtToken { - kind: SyntaxKind, - is_joint_to_next: bool, + tt: Token, text: SmolStr, } -pub(crate) struct SubtreeTokenSource<'a> { - cached_cursor: Cell>, - cached: RefCell>>, +pub(crate) struct SubtreeTokenSource { + cached: Vec, curr: (Token, usize), } -impl<'a> SubtreeTokenSource<'a> { +impl<'a> SubtreeTokenSource { // Helper function used in test #[cfg(test)] pub(crate) fn text(&self) -> SmolStr { - match *self.get(self.curr.1) { + match self.cached.get(self.curr.1) { Some(ref tt) => tt.text.clone(), _ => SmolStr::new(""), } } } -impl<'a> SubtreeTokenSource<'a> { - pub(crate) fn new(buffer: &'a TokenBuffer) -> SubtreeTokenSource<'a> { - let cursor = buffer.begin(); +impl<'a> SubtreeTokenSource { + pub(crate) fn new(buffer: &TokenBuffer) -> SubtreeTokenSource { + let mut current = buffer.begin(); + let mut cached = Vec::with_capacity(100); - let mut res = SubtreeTokenSource { - curr: (Token { kind: EOF, is_jointed_to_next: false }, 0), - cached_cursor: Cell::new(cursor), - cached: RefCell::new(Vec::with_capacity(10)), - }; - res.curr = (res.mk_token(0), 0); - res - } + while !current.eof() { + let cursor = current; + let tt = cursor.token_tree(); - fn mk_token(&self, pos: usize) -> Token { - match *self.get(pos) { - Some(ref tt) => Token { kind: tt.kind, is_jointed_to_next: tt.is_joint_to_next }, - None => Token { kind: EOF, is_jointed_to_next: false }, - } - } - - fn get(&self, pos: usize) -> Ref> { - fn is_lifetime(c: Cursor) -> Option<(Cursor, SmolStr)> { - let tkn = c.token_tree(); - - if let Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(punct), _)) = tkn { + // Check if it is lifetime + if let Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(punct), _)) = tt { if punct.char == '\'' { - let next = c.bump(); + let next = cursor.bump(); if let Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Ident(ident), _)) = next.token_tree() { - let res_cursor = next.bump(); - let text = SmolStr::new("'".to_string() + &ident.to_string()); - - return Some((res_cursor, text)); + let text = SmolStr::new("'".to_string() + &ident.text); + cached.push(TtToken { + tt: Token { kind: LIFETIME_IDENT, is_jointed_to_next: false }, + text, + }); + current = next.bump(); + continue; } else { panic!("Next token must be ident : {:#?}", next.token_tree()); } } } - None - } - - if pos < self.cached.borrow().len() { - return Ref::map(self.cached.borrow(), |c| &c[pos]); - } - - { - let mut cached = self.cached.borrow_mut(); - while pos >= cached.len() { - let cursor = self.cached_cursor.get(); - if cursor.eof() { - cached.push(None); - continue; + current = match tt { + Some(tt::buffer::TokenTreeRef::Leaf(leaf, _)) => { + cached.push(convert_leaf(&leaf)); + cursor.bump() } - - if let Some((curr, text)) = is_lifetime(cursor) { - cached.push(Some(TtToken { - kind: LIFETIME_IDENT, - is_joint_to_next: false, - text, - })); - self.cached_cursor.set(curr); - continue; + Some(tt::buffer::TokenTreeRef::Subtree(subtree, _)) => { + cached.push(convert_delim(subtree.delimiter_kind(), false)); + cursor.subtree().unwrap() } - - match cursor.token_tree() { - Some(tt::buffer::TokenTreeRef::Leaf(leaf, _)) => { - cached.push(Some(convert_leaf(&leaf))); - self.cached_cursor.set(cursor.bump()); - } - Some(tt::buffer::TokenTreeRef::Subtree(subtree, _)) => { - self.cached_cursor.set(cursor.subtree().unwrap()); - cached.push(Some(convert_delim(subtree.delimiter_kind(), false))); - } - None => { - if let Some(subtree) = cursor.end() { - cached.push(Some(convert_delim(subtree.delimiter_kind(), true))); - self.cached_cursor.set(cursor.bump()); - } + None => { + if let Some(subtree) = cursor.end() { + cached.push(convert_delim(subtree.delimiter_kind(), true)); + cursor.bump() + } else { + continue; } } - } + }; } - Ref::map(self.cached.borrow(), |c| &c[pos]) + let mut res = SubtreeTokenSource { + curr: (Token { kind: EOF, is_jointed_to_next: false }, 0), + cached, + }; + res.curr = (res.token(0), 0); + res + } + + fn token(&self, pos: usize) -> Token { + match self.cached.get(pos) { + Some(it) => it.tt, + None => Token { kind: EOF, is_jointed_to_next: false }, + } } } -impl<'a> TokenSource for SubtreeTokenSource<'a> { +impl<'a> TokenSource for SubtreeTokenSource { fn current(&self) -> Token { self.curr.0 } /// Lookahead n token fn lookahead_nth(&self, n: usize) -> Token { - self.mk_token(self.curr.1 + n) + self.token(self.curr.1 + n) } /// bump cursor to next token @@ -133,13 +106,12 @@ impl<'a> TokenSource for SubtreeTokenSource<'a> { if self.current().kind == EOF { return; } - - self.curr = (self.mk_token(self.curr.1 + 1), self.curr.1 + 1); + self.curr = (self.token(self.curr.1 + 1), self.curr.1 + 1); } /// Is the current token a specified keyword? fn is_keyword(&self, kw: &str) -> bool { - match *self.get(self.curr.1) { + match self.cached.get(self.curr.1) { Some(ref t) => t.text == *kw, _ => false, } @@ -157,7 +129,7 @@ fn convert_delim(d: Option, closing: bool) -> TtToken { let idx = closing as usize; let kind = kinds[idx]; let text = if !texts.is_empty() { &texts[idx..texts.len() - (1 - idx)] } else { "" }; - TtToken { kind, is_joint_to_next: false, text: SmolStr::new(text) } + TtToken { tt: Token { kind, is_jointed_to_next: false }, text: SmolStr::new(text) } } fn convert_literal(l: &tt::Literal) -> TtToken { @@ -171,7 +143,7 @@ fn convert_literal(l: &tt::Literal) -> TtToken { }) .unwrap_or_else(|| panic!("Fail to convert given literal {:#?}", &l)); - TtToken { kind, is_joint_to_next: false, text: l.text.clone() } + TtToken { tt: Token { kind, is_jointed_to_next: false }, text: l.text.clone() } } fn convert_ident(ident: &tt::Ident) -> TtToken { @@ -182,7 +154,7 @@ fn convert_ident(ident: &tt::Ident) -> TtToken { _ => SyntaxKind::from_keyword(ident.text.as_str()).unwrap_or(IDENT), }; - TtToken { kind, is_joint_to_next: false, text: ident.text.clone() } + TtToken { tt: Token { kind, is_jointed_to_next: false }, text: ident.text.clone() } } fn convert_punct(p: tt::Punct) -> TtToken { @@ -196,7 +168,7 @@ fn convert_punct(p: tt::Punct) -> TtToken { let s: &str = p.char.encode_utf8(&mut buf); SmolStr::new(s) }; - TtToken { kind, is_joint_to_next: p.spacing == tt::Spacing::Joint, text } + TtToken { tt: Token { kind, is_jointed_to_next: p.spacing == tt::Spacing::Joint }, text } } fn convert_leaf(leaf: &tt::Leaf) -> TtToken { @@ -210,6 +182,7 @@ fn convert_leaf(leaf: &tt::Leaf) -> TtToken { #[cfg(test)] mod tests { use super::{convert_literal, TtToken}; + use parser::Token; use syntax::{SmolStr, SyntaxKind}; #[test] @@ -220,8 +193,7 @@ mod tests { text: SmolStr::new("-42.0") }), TtToken { - kind: SyntaxKind::FLOAT_NUMBER, - is_joint_to_next: false, + tt: Token { kind: SyntaxKind::FLOAT_NUMBER, is_jointed_to_next: false }, text: SmolStr::new("-42.0") } ); -- cgit v1.2.3