diff options
Diffstat (limited to 'crates/libsyntax2/src')
43 files changed, 5438 insertions, 0 deletions
diff --git a/crates/libsyntax2/src/algo/mod.rs b/crates/libsyntax2/src/algo/mod.rs new file mode 100644 index 000000000..d2de70fd4 --- /dev/null +++ b/crates/libsyntax2/src/algo/mod.rs | |||
@@ -0,0 +1,123 @@ | |||
1 | pub mod walk; | ||
2 | |||
3 | use {SyntaxNodeRef, TextUnit, TextRange}; | ||
4 | |||
5 | pub fn find_leaf_at_offset(node: SyntaxNodeRef, offset: TextUnit) -> LeafAtOffset { | ||
6 | let range = node.range(); | ||
7 | assert!( | ||
8 | contains_offset_nonstrict(range, offset), | ||
9 | "Bad offset: range {:?} offset {:?}", range, offset | ||
10 | ); | ||
11 | if range.is_empty() { | ||
12 | return LeafAtOffset::None; | ||
13 | } | ||
14 | |||
15 | if node.is_leaf() { | ||
16 | return LeafAtOffset::Single(node); | ||
17 | } | ||
18 | |||
19 | let mut children = node.children() | ||
20 | .filter(|child| { | ||
21 | let child_range = child.range(); | ||
22 | !child_range.is_empty() && contains_offset_nonstrict(child_range, offset) | ||
23 | }); | ||
24 | |||
25 | let left = children.next().unwrap(); | ||
26 | let right = children.next(); | ||
27 | assert!(children.next().is_none()); | ||
28 | return if let Some(right) = right { | ||
29 | match (find_leaf_at_offset(left, offset), find_leaf_at_offset(right, offset)) { | ||
30 | (LeafAtOffset::Single(left), LeafAtOffset::Single(right)) => | ||
31 | LeafAtOffset::Between(left, right), | ||
32 | _ => unreachable!() | ||
33 | } | ||
34 | } else { | ||
35 | find_leaf_at_offset(left, offset) | ||
36 | }; | ||
37 | } | ||
38 | |||
39 | #[derive(Clone, Copy, Debug)] | ||
40 | pub enum LeafAtOffset<'a> { | ||
41 | None, | ||
42 | Single(SyntaxNodeRef<'a>), | ||
43 | Between(SyntaxNodeRef<'a>, SyntaxNodeRef<'a>) | ||
44 | } | ||
45 | |||
46 | impl<'a> LeafAtOffset<'a> { | ||
47 | pub fn right_biased(self) -> Option<SyntaxNodeRef<'a>> { | ||
48 | match self { | ||
49 | LeafAtOffset::None => None, | ||
50 | LeafAtOffset::Single(node) => Some(node), | ||
51 | LeafAtOffset::Between(_, right) => Some(right) | ||
52 | } | ||
53 | } | ||
54 | |||
55 | pub fn left_biased(self) -> Option<SyntaxNodeRef<'a>> { | ||
56 | match self { | ||
57 | LeafAtOffset::None => None, | ||
58 | LeafAtOffset::Single(node) => Some(node), | ||
59 | LeafAtOffset::Between(left, _) => Some(left) | ||
60 | } | ||
61 | } | ||
62 | } | ||
63 | |||
64 | impl<'f> Iterator for LeafAtOffset<'f> { | ||
65 | type Item = SyntaxNodeRef<'f>; | ||
66 | |||
67 | fn next(&mut self) -> Option<SyntaxNodeRef<'f>> { | ||
68 | match *self { | ||
69 | LeafAtOffset::None => None, | ||
70 | LeafAtOffset::Single(node) => { *self = LeafAtOffset::None; Some(node) } | ||
71 | LeafAtOffset::Between(left, right) => { *self = LeafAtOffset::Single(right); Some(left) } | ||
72 | } | ||
73 | } | ||
74 | } | ||
75 | |||
76 | |||
77 | pub fn find_covering_node(root: SyntaxNodeRef, range: TextRange) -> SyntaxNodeRef { | ||
78 | assert!(is_subrange(root.range(), range)); | ||
79 | let (left, right) = match ( | ||
80 | find_leaf_at_offset(root, range.start()).right_biased(), | ||
81 | find_leaf_at_offset(root, range.end()).left_biased() | ||
82 | ) { | ||
83 | (Some(l), Some(r)) => (l, r), | ||
84 | _ => return root | ||
85 | }; | ||
86 | |||
87 | common_ancestor(left, right) | ||
88 | } | ||
89 | |||
90 | fn common_ancestor<'a>(n1: SyntaxNodeRef<'a>, n2: SyntaxNodeRef<'a>) -> SyntaxNodeRef<'a> { | ||
91 | for p in ancestors(n1) { | ||
92 | if ancestors(n2).any(|a| a == p) { | ||
93 | return p; | ||
94 | } | ||
95 | } | ||
96 | panic!("Can't find common ancestor of {:?} and {:?}", n1, n2) | ||
97 | } | ||
98 | |||
99 | pub fn ancestors<'a>(node: SyntaxNodeRef<'a>) -> impl Iterator<Item=SyntaxNodeRef<'a>> { | ||
100 | Ancestors(Some(node)) | ||
101 | } | ||
102 | |||
103 | #[derive(Debug)] | ||
104 | struct Ancestors<'a>(Option<SyntaxNodeRef<'a>>); | ||
105 | |||
106 | impl<'a> Iterator for Ancestors<'a> { | ||
107 | type Item = SyntaxNodeRef<'a>; | ||
108 | |||
109 | fn next(&mut self) -> Option<Self::Item> { | ||
110 | self.0.take().map(|n| { | ||
111 | self.0 = n.parent(); | ||
112 | n | ||
113 | }) | ||
114 | } | ||
115 | } | ||
116 | |||
117 | fn contains_offset_nonstrict(range: TextRange, offset: TextUnit) -> bool { | ||
118 | range.start() <= offset && offset <= range.end() | ||
119 | } | ||
120 | |||
121 | fn is_subrange(range: TextRange, subrange: TextRange) -> bool { | ||
122 | range.start() <= subrange.start() && subrange.end() <= range.end() | ||
123 | } | ||
diff --git a/crates/libsyntax2/src/algo/search.rs b/crates/libsyntax2/src/algo/search.rs new file mode 100644 index 000000000..46404f537 --- /dev/null +++ b/crates/libsyntax2/src/algo/search.rs | |||
@@ -0,0 +1,136 @@ | |||
1 | use {Node, NodeType, TextUnit, TextRange}; | ||
2 | use ::visitor::{visitor, process_subtree_bottom_up}; | ||
3 | |||
4 | pub fn child_of_type(node: Node, ty: NodeType) -> Option<Node> { | ||
5 | node.children().find(|n| n.ty() == ty) | ||
6 | } | ||
7 | |||
8 | pub fn children_of_type<'f>(node: Node<'f>, ty: NodeType) -> Box<Iterator<Item=Node<'f>> + 'f> { | ||
9 | Box::new(node.children().filter(move |n| n.ty() == ty)) | ||
10 | } | ||
11 | |||
12 | pub fn subtree<'f>(node: Node<'f>) -> Box<Iterator<Item=Node<'f>> + 'f> { | ||
13 | Box::new(node.children().flat_map(subtree).chain(::std::iter::once(node))) | ||
14 | } | ||
15 | |||
16 | pub fn descendants_of_type<'f>(node: Node<'f>, ty: NodeType) -> Vec<Node<'f>> { | ||
17 | process_subtree_bottom_up( | ||
18 | node, | ||
19 | visitor(Vec::new()) | ||
20 | .visit_nodes(&[ty], |node, nodes| nodes.push(node)) | ||
21 | ) | ||
22 | } | ||
23 | |||
24 | pub fn child_of_type_exn(node: Node, ty: NodeType) -> Node { | ||
25 | child_of_type(node, ty).unwrap_or_else(|| { | ||
26 | panic!("No child of type {:?} for {:?}\ | ||
27 | ----\ | ||
28 | {}\ | ||
29 | ----", ty, node.ty(), node.text()) | ||
30 | }) | ||
31 | } | ||
32 | |||
33 | |||
34 | pub fn ancestors(node: Node) -> Ancestors { | ||
35 | Ancestors(Some(node)) | ||
36 | } | ||
37 | |||
38 | pub struct Ancestors<'f>(Option<Node<'f>>); | ||
39 | |||
40 | impl<'f> Iterator for Ancestors<'f> { | ||
41 | type Item = Node<'f>; | ||
42 | |||
43 | fn next(&mut self) -> Option<Self::Item> { | ||
44 | let current = self.0; | ||
45 | self.0 = current.and_then(|n| n.parent()); | ||
46 | current | ||
47 | } | ||
48 | } | ||
49 | |||
50 | pub fn is_leaf(node: Node) -> bool { | ||
51 | node.children().next().is_none() && !node.range().is_empty() | ||
52 | } | ||
53 | |||
54 | |||
55 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] | ||
56 | pub enum Direction { | ||
57 | Left, Right | ||
58 | } | ||
59 | |||
60 | pub fn sibling(node: Node, dir: Direction) -> Option<Node> { | ||
61 | let (parent, idx) = child_position(node)?; | ||
62 | let idx = match dir { | ||
63 | Direction::Left => idx.checked_sub(1)?, | ||
64 | Direction::Right => idx + 1, | ||
65 | }; | ||
66 | parent.children().nth(idx) | ||
67 | } | ||
68 | |||
69 | pub mod ast { | ||
70 | use {Node, AstNode, TextUnit, AstChildren}; | ||
71 | use visitor::{visitor, process_subtree_bottom_up}; | ||
72 | use super::{ancestors, find_leaf_at_offset, LeafAtOffset}; | ||
73 | |||
74 | pub fn ancestor<'f, T: AstNode<'f>>(node: Node<'f>) -> Option<T> { | ||
75 | ancestors(node) | ||
76 | .filter_map(T::wrap) | ||
77 | .next() | ||
78 | } | ||
79 | |||
80 | pub fn ancestor_exn<'f, T: AstNode<'f>>(node: Node<'f>) -> T { | ||
81 | ancestor(node).unwrap() | ||
82 | } | ||
83 | |||
84 | pub fn children_of_type<'f, N: AstNode<'f>>(node: Node<'f>) -> AstChildren<N> { | ||
85 | AstChildren::new(node.children()) | ||
86 | } | ||
87 | |||
88 | pub fn descendants_of_type<'f, N: AstNode<'f>>(node: Node<'f>) -> Vec<N> { | ||
89 | process_subtree_bottom_up( | ||
90 | node, | ||
91 | visitor(Vec::new()) | ||
92 | .visit::<N, _>(|node, acc| acc.push(node)) | ||
93 | ) | ||
94 | } | ||
95 | |||
96 | pub fn node_at_offset<'f, T: AstNode<'f>>(node: Node<'f>, offset: TextUnit) -> Option<T> { | ||
97 | match find_leaf_at_offset(node, offset) { | ||
98 | LeafAtOffset::None => None, | ||
99 | LeafAtOffset::Single(node) => ancestor(node), | ||
100 | LeafAtOffset::Between(left, right) => ancestor(left).or_else(|| ancestor(right)), | ||
101 | } | ||
102 | } | ||
103 | } | ||
104 | |||
105 | pub mod traversal { | ||
106 | use {Node}; | ||
107 | |||
108 | pub fn bottom_up<'f, F: FnMut(Node<'f>)>(node: Node<'f>, mut f: F) | ||
109 | { | ||
110 | go(node, &mut f); | ||
111 | |||
112 | fn go<'f, F: FnMut(Node<'f>)>(node: Node<'f>, f: &mut F) { | ||
113 | for child in node.children() { | ||
114 | go(child, f) | ||
115 | } | ||
116 | f(node); | ||
117 | } | ||
118 | } | ||
119 | } | ||
120 | |||
121 | fn child_position(child: Node) -> Option<(Node, usize)> { | ||
122 | child.parent() | ||
123 | .map(|parent| { | ||
124 | (parent, parent.children().position(|n| n == child).unwrap()) | ||
125 | }) | ||
126 | } | ||
127 | |||
128 | fn common_ancestor<'f>(n1: Node<'f>, n2: Node<'f>) -> Node<'f> { | ||
129 | for p in ancestors(n1) { | ||
130 | if ancestors(n2).any(|a| a == p) { | ||
131 | return p; | ||
132 | } | ||
133 | } | ||
134 | panic!("Can't find common ancestor of {:?} and {:?}", n1, n2) | ||
135 | } | ||
136 | |||
diff --git a/crates/libsyntax2/src/algo/walk.rs b/crates/libsyntax2/src/algo/walk.rs new file mode 100644 index 000000000..a50ec2a09 --- /dev/null +++ b/crates/libsyntax2/src/algo/walk.rs | |||
@@ -0,0 +1,45 @@ | |||
1 | use SyntaxNodeRef; | ||
2 | |||
3 | pub fn preorder<'a>(root: SyntaxNodeRef<'a>) -> impl Iterator<Item = SyntaxNodeRef<'a>> { | ||
4 | walk(root).filter_map(|event| match event { | ||
5 | WalkEvent::Enter(node) => Some(node), | ||
6 | WalkEvent::Exit(_) => None, | ||
7 | }) | ||
8 | } | ||
9 | |||
10 | #[derive(Debug, Copy, Clone)] | ||
11 | pub enum WalkEvent<'a> { | ||
12 | Enter(SyntaxNodeRef<'a>), | ||
13 | Exit(SyntaxNodeRef<'a>), | ||
14 | } | ||
15 | |||
16 | pub fn walk<'a>(root: SyntaxNodeRef<'a>) -> impl Iterator<Item = WalkEvent<'a>> { | ||
17 | let mut done = false; | ||
18 | ::itertools::unfold(WalkEvent::Enter(root), move |pos| { | ||
19 | if done { | ||
20 | return None; | ||
21 | } | ||
22 | let res = *pos; | ||
23 | *pos = match *pos { | ||
24 | WalkEvent::Enter(node) => match node.first_child() { | ||
25 | Some(child) => WalkEvent::Enter(child), | ||
26 | None => WalkEvent::Exit(node), | ||
27 | }, | ||
28 | WalkEvent::Exit(node) => { | ||
29 | if node == root { | ||
30 | done = true; | ||
31 | WalkEvent::Exit(node) | ||
32 | } else { | ||
33 | match node.next_sibling() { | ||
34 | Some(sibling) => WalkEvent::Enter(sibling), | ||
35 | None => match node.parent() { | ||
36 | Some(node) => WalkEvent::Exit(node), | ||
37 | None => WalkEvent::Exit(node), | ||
38 | }, | ||
39 | } | ||
40 | } | ||
41 | } | ||
42 | }; | ||
43 | Some(res) | ||
44 | }) | ||
45 | } | ||
diff --git a/crates/libsyntax2/src/ast/generated.rs b/crates/libsyntax2/src/ast/generated.rs new file mode 100644 index 000000000..2f813050a --- /dev/null +++ b/crates/libsyntax2/src/ast/generated.rs | |||
@@ -0,0 +1,54 @@ | |||
1 | use std::sync::Arc; | ||
2 | use { | ||
3 | SyntaxNode, SyntaxRoot, TreeRoot, AstNode, | ||
4 | SyntaxKind::*, | ||
5 | }; | ||
6 | |||
7 | |||
8 | #[derive(Debug, Clone, Copy)] | ||
9 | pub struct File<R: TreeRoot = Arc<SyntaxRoot>> { | ||
10 | syntax: SyntaxNode<R>, | ||
11 | } | ||
12 | |||
13 | impl<R: TreeRoot> AstNode<R> for File<R> { | ||
14 | fn cast(syntax: SyntaxNode<R>) -> Option<Self> { | ||
15 | match syntax.kind() { | ||
16 | FILE => Some(File { syntax }), | ||
17 | _ => None, | ||
18 | } | ||
19 | } | ||
20 | fn syntax(&self) -> &SyntaxNode<R> { &self.syntax } | ||
21 | } | ||
22 | |||
23 | |||
24 | #[derive(Debug, Clone, Copy)] | ||
25 | pub struct Function<R: TreeRoot = Arc<SyntaxRoot>> { | ||
26 | syntax: SyntaxNode<R>, | ||
27 | } | ||
28 | |||
29 | impl<R: TreeRoot> AstNode<R> for Function<R> { | ||
30 | fn cast(syntax: SyntaxNode<R>) -> Option<Self> { | ||
31 | match syntax.kind() { | ||
32 | FUNCTION => Some(Function { syntax }), | ||
33 | _ => None, | ||
34 | } | ||
35 | } | ||
36 | fn syntax(&self) -> &SyntaxNode<R> { &self.syntax } | ||
37 | } | ||
38 | |||
39 | |||
40 | #[derive(Debug, Clone, Copy)] | ||
41 | pub struct Name<R: TreeRoot = Arc<SyntaxRoot>> { | ||
42 | syntax: SyntaxNode<R>, | ||
43 | } | ||
44 | |||
45 | impl<R: TreeRoot> AstNode<R> for Name<R> { | ||
46 | fn cast(syntax: SyntaxNode<R>) -> Option<Self> { | ||
47 | match syntax.kind() { | ||
48 | NAME => Some(Name { syntax }), | ||
49 | _ => None, | ||
50 | } | ||
51 | } | ||
52 | fn syntax(&self) -> &SyntaxNode<R> { &self.syntax } | ||
53 | } | ||
54 | |||
diff --git a/crates/libsyntax2/src/ast/generated.rs.tera b/crates/libsyntax2/src/ast/generated.rs.tera new file mode 100644 index 000000000..242837801 --- /dev/null +++ b/crates/libsyntax2/src/ast/generated.rs.tera | |||
@@ -0,0 +1,22 @@ | |||
1 | use std::sync::Arc; | ||
2 | use { | ||
3 | SyntaxNode, SyntaxRoot, TreeRoot, AstNode, | ||
4 | SyntaxKind::*, | ||
5 | }; | ||
6 | {% for node in ast %} | ||
7 | {% set Name = node.kind | camel %} | ||
8 | #[derive(Debug, Clone, Copy)] | ||
9 | pub struct {{ Name }}<R: TreeRoot = Arc<SyntaxRoot>> { | ||
10 | syntax: SyntaxNode<R>, | ||
11 | } | ||
12 | |||
13 | impl<R: TreeRoot> AstNode<R> for {{ Name }}<R> { | ||
14 | fn cast(syntax: SyntaxNode<R>) -> Option<Self> { | ||
15 | match syntax.kind() { | ||
16 | {{ node.kind }} => Some({{ Name }} { syntax }), | ||
17 | _ => None, | ||
18 | } | ||
19 | } | ||
20 | fn syntax(&self) -> &SyntaxNode<R> { &self.syntax } | ||
21 | } | ||
22 | {% endfor %} | ||
diff --git a/crates/libsyntax2/src/ast/mod.rs b/crates/libsyntax2/src/ast/mod.rs new file mode 100644 index 000000000..eeb7ae6f6 --- /dev/null +++ b/crates/libsyntax2/src/ast/mod.rs | |||
@@ -0,0 +1,74 @@ | |||
1 | mod generated; | ||
2 | |||
3 | use std::sync::Arc; | ||
4 | use { | ||
5 | SyntaxNode, SyntaxRoot, TreeRoot, SyntaxError, | ||
6 | SyntaxKind::*, | ||
7 | }; | ||
8 | pub use self::generated::*; | ||
9 | |||
10 | pub trait AstNode<R: TreeRoot>: Sized { | ||
11 | fn cast(syntax: SyntaxNode<R>) -> Option<Self>; | ||
12 | fn syntax(&self) -> &SyntaxNode<R>; | ||
13 | } | ||
14 | |||
15 | impl File<Arc<SyntaxRoot>> { | ||
16 | pub fn parse(text: &str) -> Self { | ||
17 | File::cast(::parse(text)).unwrap() | ||
18 | } | ||
19 | } | ||
20 | |||
21 | impl<R: TreeRoot> File<R> { | ||
22 | pub fn errors(&self) -> Vec<SyntaxError> { | ||
23 | self.syntax().root.errors.clone() | ||
24 | } | ||
25 | |||
26 | pub fn functions<'a>(&'a self) -> impl Iterator<Item = Function<R>> + 'a { | ||
27 | self.syntax() | ||
28 | .children() | ||
29 | .filter_map(Function::cast) | ||
30 | } | ||
31 | } | ||
32 | |||
33 | impl<R: TreeRoot> Function<R> { | ||
34 | pub fn name(&self) -> Option<Name<R>> { | ||
35 | self.syntax() | ||
36 | .children() | ||
37 | .filter_map(Name::cast) | ||
38 | .next() | ||
39 | } | ||
40 | |||
41 | pub fn has_atom_attr(&self, atom: &str) -> bool { | ||
42 | self.syntax() | ||
43 | .children() | ||
44 | .filter(|node| node.kind() == ATTR) | ||
45 | .any(|attr| { | ||
46 | let mut metas = attr.children().filter(|node| node.kind() == META_ITEM); | ||
47 | let meta = match metas.next() { | ||
48 | None => return false, | ||
49 | Some(meta) => { | ||
50 | if metas.next().is_some() { | ||
51 | return false; | ||
52 | } | ||
53 | meta | ||
54 | } | ||
55 | }; | ||
56 | let mut children = meta.children(); | ||
57 | match children.next() { | ||
58 | None => false, | ||
59 | Some(child) => { | ||
60 | if children.next().is_some() { | ||
61 | return false; | ||
62 | } | ||
63 | child.kind() == IDENT && child.text() == atom | ||
64 | } | ||
65 | } | ||
66 | }) | ||
67 | } | ||
68 | } | ||
69 | |||
70 | impl<R: TreeRoot> Name<R> { | ||
71 | pub fn text(&self) -> String { | ||
72 | self.syntax().text() | ||
73 | } | ||
74 | } | ||
diff --git a/crates/libsyntax2/src/grammar.ron b/crates/libsyntax2/src/grammar.ron new file mode 100644 index 000000000..bcc79843a --- /dev/null +++ b/crates/libsyntax2/src/grammar.ron | |||
@@ -0,0 +1,227 @@ | |||
1 | Grammar( | ||
2 | single_byte_tokens: [ | ||
3 | [";", "SEMI"], | ||
4 | [",", "COMMA"], | ||
5 | ["(", "L_PAREN"], | ||
6 | [")", "R_PAREN"], | ||
7 | ["{", "L_CURLY"], | ||
8 | ["}", "R_CURLY"], | ||
9 | ["[", "L_BRACK"], | ||
10 | ["]", "R_BRACK"], | ||
11 | ["<", "L_ANGLE"], | ||
12 | [">", "R_ANGLE"], | ||
13 | ["@", "AT"], | ||
14 | ["#", "POUND"], | ||
15 | ["~", "TILDE"], | ||
16 | ["?", "QUESTION"], | ||
17 | ["$", "DOLLAR"], | ||
18 | ["&", "AMP"], | ||
19 | ["|", "PIPE"], | ||
20 | ["+", "PLUS"], | ||
21 | ["*", "STAR"], | ||
22 | ["/", "SLASH"], | ||
23 | ["^", "CARET"], | ||
24 | ["%", "PERCENT"], | ||
25 | ], | ||
26 | multi_byte_tokens: [ | ||
27 | [".", "DOT"], | ||
28 | ["..", "DOTDOT"], | ||
29 | ["...", "DOTDOTDOT"], | ||
30 | ["..=", "DOTDOTEQ"], | ||
31 | [":", "COLON"], | ||
32 | ["::", "COLONCOLON"], | ||
33 | ["=", "EQ"], | ||
34 | ["==", "EQEQ"], | ||
35 | ["=>", "FAT_ARROW"], | ||
36 | ["!", "EXCL"], | ||
37 | ["!=", "NEQ"], | ||
38 | ["-", "MINUS"], | ||
39 | ["->", "THIN_ARROW"], | ||
40 | ["<=", "LTEQ"], | ||
41 | [">=", "GTEQ"], | ||
42 | ["+=", "PLUSEQ"], | ||
43 | ["-=", "MINUSEQ"], | ||
44 | ["&&", "AMPAMP"], | ||
45 | ["||", "PIPEPIPE"], | ||
46 | ["<<", "SHL"], | ||
47 | [">>", "SHR"], | ||
48 | ["<<=", "SHLEQ"], | ||
49 | [">>=", "SHREQ"], | ||
50 | ], | ||
51 | keywords: [ | ||
52 | "use", | ||
53 | "fn", | ||
54 | "struct", | ||
55 | "enum", | ||
56 | "trait", | ||
57 | "impl", | ||
58 | "true", | ||
59 | "false", | ||
60 | "as", | ||
61 | "extern", | ||
62 | "crate", | ||
63 | "mod", | ||
64 | "pub", | ||
65 | "self", | ||
66 | "super", | ||
67 | "in", | ||
68 | "where", | ||
69 | "for", | ||
70 | "loop", | ||
71 | "while", | ||
72 | "if", | ||
73 | "else", | ||
74 | "match", | ||
75 | "const", | ||
76 | "static", | ||
77 | "mut", | ||
78 | "unsafe", | ||
79 | "type", | ||
80 | "ref", | ||
81 | "let", | ||
82 | "move", | ||
83 | "return", | ||
84 | ], | ||
85 | contextual_keywords: [ | ||
86 | "auto", | ||
87 | "default", | ||
88 | "union", | ||
89 | ], | ||
90 | tokens: [ | ||
91 | "ERROR", | ||
92 | "IDENT", | ||
93 | "UNDERSCORE", | ||
94 | "WHITESPACE", | ||
95 | "INT_NUMBER", | ||
96 | "FLOAT_NUMBER", | ||
97 | "LIFETIME", | ||
98 | "CHAR", | ||
99 | "BYTE", | ||
100 | "STRING", | ||
101 | "RAW_STRING", | ||
102 | "BYTE_STRING", | ||
103 | "RAW_BYTE_STRING", | ||
104 | "COMMENT", | ||
105 | "DOC_COMMENT", | ||
106 | "SHEBANG", | ||
107 | ], | ||
108 | nodes: [ | ||
109 | "FILE", | ||
110 | |||
111 | "STRUCT_ITEM", | ||
112 | "ENUM_ITEM", | ||
113 | "FUNCTION", | ||
114 | "EXTERN_CRATE_ITEM", | ||
115 | "MOD_ITEM", | ||
116 | "USE_ITEM", | ||
117 | "STATIC_ITEM", | ||
118 | "CONST_ITEM", | ||
119 | "TRAIT_ITEM", | ||
120 | "IMPL_ITEM", | ||
121 | "TYPE_ITEM", | ||
122 | "MACRO_CALL", | ||
123 | "TOKEN_TREE", | ||
124 | |||
125 | "PAREN_TYPE", | ||
126 | "TUPLE_TYPE", | ||
127 | "NEVER_TYPE", | ||
128 | "PATH_TYPE", | ||
129 | "POINTER_TYPE", | ||
130 | "ARRAY_TYPE", | ||
131 | "SLICE_TYPE", | ||
132 | "REFERENCE_TYPE", | ||
133 | "PLACEHOLDER_TYPE", | ||
134 | "FN_POINTER_TYPE", | ||
135 | "FOR_TYPE", | ||
136 | "IMPL_TRAIT_TYPE", | ||
137 | |||
138 | "REF_PAT", | ||
139 | "BIND_PAT", | ||
140 | "PLACEHOLDER_PAT", | ||
141 | "PATH_PAT", | ||
142 | "STRUCT_PAT", | ||
143 | "TUPLE_STRUCT_PAT", | ||
144 | "TUPLE_PAT", | ||
145 | "SLICE_PAT", | ||
146 | "RANGE_PAT", | ||
147 | |||
148 | // atoms | ||
149 | "TUPLE_EXPR", | ||
150 | "ARRAY_EXPR", | ||
151 | "PAREN_EXPR", | ||
152 | "PATH_EXPR", | ||
153 | "LAMBDA_EXPR", | ||
154 | "IF_EXPR", | ||
155 | "WHILE_EXPR", | ||
156 | "LOOP_EXPR", | ||
157 | "FOR_EXPR", | ||
158 | "BLOCK_EXPR", | ||
159 | "RETURN_EXPR", | ||
160 | "MATCH_EXPR", | ||
161 | "MATCH_ARM", | ||
162 | "MATCH_GUARD", | ||
163 | "STRUCT_LIT", | ||
164 | "STRUCT_LIT_FIELD", | ||
165 | |||
166 | // postfix | ||
167 | "CALL_EXPR", | ||
168 | "INDEX_EXPR", | ||
169 | "METHOD_CALL_EXPR", | ||
170 | "FIELD_EXPR", | ||
171 | "TRY_EXPR", | ||
172 | "CAST_EXPR", | ||
173 | |||
174 | // unary | ||
175 | "REF_EXPR", | ||
176 | "PREFIX_EXPR", | ||
177 | |||
178 | "RANGE_EXPR", // just weird | ||
179 | "BIN_EXPR", | ||
180 | |||
181 | |||
182 | "EXTERN_BLOCK_EXPR", | ||
183 | "ENUM_VARIANT", | ||
184 | "NAMED_FIELD", | ||
185 | "POS_FIELD", | ||
186 | "ATTR", | ||
187 | "META_ITEM", // not an item actually | ||
188 | "USE_TREE", | ||
189 | "PATH", | ||
190 | "PATH_SEGMENT", | ||
191 | "LITERAL", | ||
192 | "ALIAS", | ||
193 | "VISIBILITY", | ||
194 | "WHERE_CLAUSE", | ||
195 | "WHERE_PRED", | ||
196 | "ABI", | ||
197 | "NAME", | ||
198 | "NAME_REF", | ||
199 | |||
200 | "LET_STMT", | ||
201 | "EXPR_STMT", | ||
202 | |||
203 | "TYPE_PARAM_LIST", | ||
204 | "LIFETIME_PARAM", | ||
205 | "TYPE_PARAM", | ||
206 | "TYPE_ARG_LIST", | ||
207 | "LIFETIME_ARG", | ||
208 | "TYPE_ARG", | ||
209 | "ASSOC_TYPE_ARG", | ||
210 | |||
211 | "PARAM_LIST", | ||
212 | "PARAM", | ||
213 | "SELF_PARAM", | ||
214 | "ARG_LIST", | ||
215 | ], | ||
216 | ast: [ | ||
217 | ( | ||
218 | kind: "FILE" | ||
219 | ), | ||
220 | ( | ||
221 | kind: "FUNCTION" | ||
222 | ), | ||
223 | ( | ||
224 | kind: "NAME" | ||
225 | ), | ||
226 | ] | ||
227 | ) | ||
diff --git a/crates/libsyntax2/src/grammar/attributes.rs b/crates/libsyntax2/src/grammar/attributes.rs new file mode 100644 index 000000000..c411d4d7f --- /dev/null +++ b/crates/libsyntax2/src/grammar/attributes.rs | |||
@@ -0,0 +1,79 @@ | |||
1 | use super::*; | ||
2 | |||
3 | pub(super) fn inner_attributes(p: &mut Parser) { | ||
4 | while p.current() == POUND && p.nth(1) == EXCL { | ||
5 | attribute(p, true) | ||
6 | } | ||
7 | } | ||
8 | |||
9 | pub(super) fn outer_attributes(p: &mut Parser) { | ||
10 | while p.at(POUND) { | ||
11 | attribute(p, false) | ||
12 | } | ||
13 | } | ||
14 | |||
15 | fn attribute(p: &mut Parser, inner: bool) { | ||
16 | let attr = p.start(); | ||
17 | assert!(p.at(POUND)); | ||
18 | p.bump(); | ||
19 | |||
20 | if inner { | ||
21 | assert!(p.at(EXCL)); | ||
22 | p.bump(); | ||
23 | } | ||
24 | |||
25 | if p.expect(L_BRACK) { | ||
26 | meta_item(p); | ||
27 | p.expect(R_BRACK); | ||
28 | } | ||
29 | attr.complete(p, ATTR); | ||
30 | } | ||
31 | |||
32 | fn meta_item(p: &mut Parser) { | ||
33 | if p.at(IDENT) { | ||
34 | let meta_item = p.start(); | ||
35 | p.bump(); | ||
36 | match p.current() { | ||
37 | EQ => { | ||
38 | p.bump(); | ||
39 | if expressions::literal(p).is_none() { | ||
40 | p.error("expected literal"); | ||
41 | } | ||
42 | } | ||
43 | L_PAREN => meta_item_arg_list(p), | ||
44 | _ => (), | ||
45 | } | ||
46 | meta_item.complete(p, META_ITEM); | ||
47 | } else { | ||
48 | p.error("expected attribute value"); | ||
49 | } | ||
50 | } | ||
51 | |||
52 | fn meta_item_arg_list(p: &mut Parser) { | ||
53 | assert!(p.at(L_PAREN)); | ||
54 | p.bump(); | ||
55 | loop { | ||
56 | match p.current() { | ||
57 | EOF | R_PAREN => break, | ||
58 | IDENT => meta_item(p), | ||
59 | c => if expressions::literal(p).is_none() { | ||
60 | let message = "expected attribute"; | ||
61 | |||
62 | if items::ITEM_FIRST.contains(c) { | ||
63 | p.error(message); | ||
64 | return; | ||
65 | } | ||
66 | |||
67 | let err = p.start(); | ||
68 | p.error(message); | ||
69 | p.bump(); | ||
70 | err.complete(p, ERROR); | ||
71 | continue; | ||
72 | }, | ||
73 | } | ||
74 | if !p.at(R_PAREN) { | ||
75 | p.expect(COMMA); | ||
76 | } | ||
77 | } | ||
78 | p.expect(R_PAREN); | ||
79 | } | ||
diff --git a/crates/libsyntax2/src/grammar/expressions/atom.rs b/crates/libsyntax2/src/grammar/expressions/atom.rs new file mode 100644 index 000000000..af9f47c5e --- /dev/null +++ b/crates/libsyntax2/src/grammar/expressions/atom.rs | |||
@@ -0,0 +1,348 @@ | |||
1 | use super::*; | ||
2 | |||
3 | // test expr_literals | ||
4 | // fn foo() { | ||
5 | // let _ = true; | ||
6 | // let _ = false; | ||
7 | // let _ = 1; | ||
8 | // let _ = 2.0; | ||
9 | // let _ = b'a'; | ||
10 | // let _ = 'b'; | ||
11 | // let _ = "c"; | ||
12 | // let _ = r"d"; | ||
13 | // let _ = b"e"; | ||
14 | // let _ = br"f"; | ||
15 | // } | ||
16 | const LITERAL_FIRST: TokenSet = | ||
17 | token_set![TRUE_KW, FALSE_KW, INT_NUMBER, FLOAT_NUMBER, BYTE, CHAR, | ||
18 | STRING, RAW_STRING, BYTE_STRING, RAW_BYTE_STRING]; | ||
19 | |||
20 | pub(crate) fn literal(p: &mut Parser) -> Option<CompletedMarker> { | ||
21 | if !LITERAL_FIRST.contains(p.current()) { | ||
22 | return None; | ||
23 | } | ||
24 | let m = p.start(); | ||
25 | p.bump(); | ||
26 | Some(m.complete(p, LITERAL)) | ||
27 | } | ||
28 | |||
29 | pub(super) const ATOM_EXPR_FIRST: TokenSet = | ||
30 | token_set_union![ | ||
31 | LITERAL_FIRST, | ||
32 | token_set![L_PAREN, PIPE, MOVE_KW, IF_KW, WHILE_KW, MATCH_KW, UNSAFE_KW, L_CURLY, RETURN_KW, | ||
33 | IDENT, SELF_KW, SUPER_KW, COLONCOLON ], | ||
34 | ]; | ||
35 | |||
36 | pub(super) fn atom_expr(p: &mut Parser, r: Restrictions) -> Option<CompletedMarker> { | ||
37 | match literal(p) { | ||
38 | Some(m) => return Some(m), | ||
39 | None => (), | ||
40 | } | ||
41 | if paths::is_path_start(p) { | ||
42 | return Some(path_expr(p, r)); | ||
43 | } | ||
44 | let la = p.nth(1); | ||
45 | let done = match p.current() { | ||
46 | L_PAREN => tuple_expr(p), | ||
47 | L_BRACK => array_expr(p), | ||
48 | PIPE => lambda_expr(p), | ||
49 | MOVE_KW if la == PIPE => lambda_expr(p), | ||
50 | IF_KW => if_expr(p), | ||
51 | WHILE_KW => while_expr(p), | ||
52 | LOOP_KW => loop_expr(p), | ||
53 | FOR_KW => for_expr(p), | ||
54 | MATCH_KW => match_expr(p), | ||
55 | UNSAFE_KW if la == L_CURLY => block_expr(p), | ||
56 | L_CURLY => block_expr(p), | ||
57 | RETURN_KW => return_expr(p), | ||
58 | _ => { | ||
59 | p.err_and_bump("expected expression"); | ||
60 | return None; | ||
61 | } | ||
62 | }; | ||
63 | Some(done) | ||
64 | } | ||
65 | |||
66 | // test tuple_expr | ||
67 | // fn foo() { | ||
68 | // (); | ||
69 | // (1); | ||
70 | // (1,); | ||
71 | // } | ||
72 | fn tuple_expr(p: &mut Parser) -> CompletedMarker { | ||
73 | assert!(p.at(L_PAREN)); | ||
74 | let m = p.start(); | ||
75 | p.expect(L_PAREN); | ||
76 | |||
77 | let mut saw_comma = false; | ||
78 | let mut saw_expr = false; | ||
79 | while !p.at(EOF) && !p.at(R_PAREN) { | ||
80 | saw_expr = true; | ||
81 | expr(p); | ||
82 | if !p.at(R_PAREN) { | ||
83 | saw_comma = true; | ||
84 | p.expect(COMMA); | ||
85 | } | ||
86 | } | ||
87 | p.expect(R_PAREN); | ||
88 | m.complete(p, if saw_expr && !saw_comma { PAREN_EXPR } else { TUPLE_EXPR }) | ||
89 | } | ||
90 | |||
91 | // test array_expr | ||
92 | // fn foo() { | ||
93 | // []; | ||
94 | // [1]; | ||
95 | // [1, 2,]; | ||
96 | // [1; 2]; | ||
97 | // } | ||
98 | fn array_expr(p: &mut Parser) -> CompletedMarker { | ||
99 | assert!(p.at(L_BRACK)); | ||
100 | let m = p.start(); | ||
101 | p.bump(); | ||
102 | if p.eat(R_BRACK) { | ||
103 | return m.complete(p, ARRAY_EXPR); | ||
104 | } | ||
105 | expr(p); | ||
106 | if p.eat(SEMI) { | ||
107 | expr(p); | ||
108 | p.expect(R_BRACK); | ||
109 | return m.complete(p, ARRAY_EXPR); | ||
110 | } | ||
111 | while !p.at(EOF) && !p.at(R_BRACK) { | ||
112 | p.expect(COMMA); | ||
113 | if !p.at(R_BRACK) { | ||
114 | expr(p); | ||
115 | } | ||
116 | } | ||
117 | p.expect(R_BRACK); | ||
118 | m.complete(p, ARRAY_EXPR) | ||
119 | } | ||
120 | |||
121 | // test lambda_expr | ||
122 | // fn foo() { | ||
123 | // || (); | ||
124 | // || -> i32 { 92 }; | ||
125 | // |x| x; | ||
126 | // move |x: i32,| x; | ||
127 | // } | ||
128 | fn lambda_expr(p: &mut Parser) -> CompletedMarker { | ||
129 | assert!(p.at(PIPE) || (p.at(MOVE_KW) && p.nth(1) == PIPE)); | ||
130 | let m = p.start(); | ||
131 | p.eat(MOVE_KW); | ||
132 | params::param_list_opt_types(p); | ||
133 | if fn_ret_type(p) { | ||
134 | block(p); | ||
135 | } else { | ||
136 | expr(p); | ||
137 | } | ||
138 | m.complete(p, LAMBDA_EXPR) | ||
139 | } | ||
140 | |||
141 | // test if_expr | ||
142 | // fn foo() { | ||
143 | // if true {}; | ||
144 | // if true {} else {}; | ||
145 | // if true {} else if false {} else {}; | ||
146 | // if S {}; | ||
147 | // } | ||
148 | fn if_expr(p: &mut Parser) -> CompletedMarker { | ||
149 | assert!(p.at(IF_KW)); | ||
150 | let m = p.start(); | ||
151 | p.bump(); | ||
152 | cond(p); | ||
153 | block(p); | ||
154 | if p.at(ELSE_KW) { | ||
155 | p.bump(); | ||
156 | if p.at(IF_KW) { | ||
157 | if_expr(p); | ||
158 | } else { | ||
159 | block(p); | ||
160 | } | ||
161 | } | ||
162 | m.complete(p, IF_EXPR) | ||
163 | } | ||
164 | |||
165 | // test while_expr | ||
166 | // fn foo() { | ||
167 | // while true {}; | ||
168 | // while let Some(x) = it.next() {}; | ||
169 | // } | ||
170 | fn while_expr(p: &mut Parser) -> CompletedMarker { | ||
171 | assert!(p.at(WHILE_KW)); | ||
172 | let m = p.start(); | ||
173 | p.bump(); | ||
174 | cond(p); | ||
175 | block(p); | ||
176 | m.complete(p, WHILE_EXPR) | ||
177 | } | ||
178 | |||
179 | // test loop_expr | ||
180 | // fn foo() { | ||
181 | // loop {}; | ||
182 | // } | ||
183 | fn loop_expr(p: &mut Parser) -> CompletedMarker { | ||
184 | assert!(p.at(LOOP_KW)); | ||
185 | let m = p.start(); | ||
186 | p.bump(); | ||
187 | block(p); | ||
188 | m.complete(p, LOOP_EXPR) | ||
189 | } | ||
190 | |||
191 | // test for_expr | ||
192 | // fn foo() { | ||
193 | // for x in [] {}; | ||
194 | // } | ||
195 | fn for_expr(p: &mut Parser) -> CompletedMarker { | ||
196 | assert!(p.at(FOR_KW)); | ||
197 | let m = p.start(); | ||
198 | p.bump(); | ||
199 | patterns::pattern(p); | ||
200 | p.expect(IN_KW); | ||
201 | expr_no_struct(p); | ||
202 | block(p); | ||
203 | m.complete(p, FOR_EXPR) | ||
204 | } | ||
205 | |||
206 | // test cond | ||
207 | // fn foo() { if let Some(_) = None {} } | ||
208 | fn cond(p: &mut Parser) { | ||
209 | if p.eat(LET_KW) { | ||
210 | patterns::pattern(p); | ||
211 | p.expect(EQ); | ||
212 | } | ||
213 | expr_no_struct(p) | ||
214 | } | ||
215 | |||
216 | // test match_expr | ||
217 | // fn foo() { | ||
218 | // match () { }; | ||
219 | // match S {}; | ||
220 | // } | ||
221 | fn match_expr(p: &mut Parser) -> CompletedMarker { | ||
222 | assert!(p.at(MATCH_KW)); | ||
223 | let m = p.start(); | ||
224 | p.bump(); | ||
225 | expr_no_struct(p); | ||
226 | p.eat(L_CURLY); | ||
227 | while !p.at(EOF) && !p.at(R_CURLY) { | ||
228 | // test match_arms_commas | ||
229 | // fn foo() { | ||
230 | // match () { | ||
231 | // _ => (), | ||
232 | // _ => {} | ||
233 | // _ => () | ||
234 | // } | ||
235 | // } | ||
236 | if match_arm(p).is_block() { | ||
237 | p.eat(COMMA); | ||
238 | } else if !p.at(R_CURLY) { | ||
239 | p.expect(COMMA); | ||
240 | } | ||
241 | } | ||
242 | p.expect(R_CURLY); | ||
243 | m.complete(p, MATCH_EXPR) | ||
244 | } | ||
245 | |||
246 | // test match_arm | ||
247 | // fn foo() { | ||
248 | // match () { | ||
249 | // _ => (), | ||
250 | // X | Y if Z => (), | ||
251 | // }; | ||
252 | // } | ||
253 | fn match_arm(p: &mut Parser) -> BlockLike { | ||
254 | let m = p.start(); | ||
255 | loop { | ||
256 | patterns::pattern(p); | ||
257 | if !p.eat(PIPE) { | ||
258 | break; | ||
259 | } | ||
260 | } | ||
261 | if p.eat(IF_KW) { | ||
262 | expr_no_struct(p); | ||
263 | } | ||
264 | p.expect(FAT_ARROW); | ||
265 | let ret = expr_stmt(p); | ||
266 | m.complete(p, MATCH_ARM); | ||
267 | ret | ||
268 | } | ||
269 | |||
270 | // test block_expr | ||
271 | // fn foo() { | ||
272 | // {}; | ||
273 | // unsafe {}; | ||
274 | // } | ||
275 | pub(super) fn block_expr(p: &mut Parser) -> CompletedMarker { | ||
276 | assert!(p.at(L_CURLY) || p.at(UNSAFE_KW) && p.nth(1) == L_CURLY); | ||
277 | let m = p.start(); | ||
278 | p.eat(UNSAFE_KW); | ||
279 | p.bump(); | ||
280 | while !p.at(EOF) && !p.at(R_CURLY) { | ||
281 | match p.current() { | ||
282 | LET_KW => let_stmt(p), | ||
283 | _ => { | ||
284 | // test block_items | ||
285 | // fn a() { fn b() {} } | ||
286 | let m = p.start(); | ||
287 | match items::maybe_item(p) { | ||
288 | items::MaybeItem::Item(kind) => { | ||
289 | m.complete(p, kind); | ||
290 | } | ||
291 | items::MaybeItem::Modifiers => { | ||
292 | m.abandon(p); | ||
293 | p.error("expected an item"); | ||
294 | } | ||
295 | // test pub_expr | ||
296 | // fn foo() { pub 92; } //FIXME | ||
297 | items::MaybeItem::None => { | ||
298 | let is_blocklike = expressions::expr_stmt(p) == BlockLike::Block; | ||
299 | if p.eat(SEMI) || (is_blocklike && !p.at(R_CURLY)) { | ||
300 | m.complete(p, EXPR_STMT); | ||
301 | } else { | ||
302 | m.abandon(p); | ||
303 | } | ||
304 | } | ||
305 | } | ||
306 | } | ||
307 | } | ||
308 | } | ||
309 | p.expect(R_CURLY); | ||
310 | m.complete(p, BLOCK_EXPR) | ||
311 | } | ||
312 | |||
313 | // test let_stmt; | ||
314 | // fn foo() { | ||
315 | // let a; | ||
316 | // let b: i32; | ||
317 | // let c = 92; | ||
318 | // let d: i32 = 92; | ||
319 | // } | ||
320 | fn let_stmt(p: &mut Parser) { | ||
321 | assert!(p.at(LET_KW)); | ||
322 | let m = p.start(); | ||
323 | p.bump(); | ||
324 | patterns::pattern(p); | ||
325 | if p.at(COLON) { | ||
326 | types::ascription(p); | ||
327 | } | ||
328 | if p.eat(EQ) { | ||
329 | expressions::expr(p); | ||
330 | } | ||
331 | p.expect(SEMI); | ||
332 | m.complete(p, LET_STMT); | ||
333 | } | ||
334 | |||
335 | // test return_expr | ||
336 | // fn foo() { | ||
337 | // return; | ||
338 | // return 92; | ||
339 | // } | ||
340 | fn return_expr(p: &mut Parser) -> CompletedMarker { | ||
341 | assert!(p.at(RETURN_KW)); | ||
342 | let m = p.start(); | ||
343 | p.bump(); | ||
344 | if EXPR_FIRST.contains(p.current()) { | ||
345 | expr(p); | ||
346 | } | ||
347 | m.complete(p, RETURN_EXPR) | ||
348 | } | ||
diff --git a/crates/libsyntax2/src/grammar/expressions/mod.rs b/crates/libsyntax2/src/grammar/expressions/mod.rs new file mode 100644 index 000000000..dcbb1e2a8 --- /dev/null +++ b/crates/libsyntax2/src/grammar/expressions/mod.rs | |||
@@ -0,0 +1,379 @@ | |||
1 | mod atom; | ||
2 | |||
3 | use super::*; | ||
4 | pub(super) use self::atom::literal; | ||
5 | |||
6 | const EXPR_FIRST: TokenSet = LHS_FIRST; | ||
7 | |||
8 | pub(super) fn expr(p: &mut Parser) -> BlockLike { | ||
9 | let r = Restrictions { forbid_structs: false, prefer_stmt: false }; | ||
10 | expr_bp(p, r, 1) | ||
11 | } | ||
12 | |||
13 | pub(super) fn expr_stmt(p: &mut Parser) -> BlockLike { | ||
14 | let r = Restrictions { forbid_structs: false, prefer_stmt: true }; | ||
15 | expr_bp(p, r, 1) | ||
16 | } | ||
17 | |||
18 | fn expr_no_struct(p: &mut Parser) { | ||
19 | let r = Restrictions { forbid_structs: true, prefer_stmt: false }; | ||
20 | expr_bp(p, r, 1); | ||
21 | } | ||
22 | |||
23 | // test block | ||
24 | // fn a() {} | ||
25 | // fn b() { let _ = 1; } | ||
26 | // fn c() { 1; 2; } | ||
27 | // fn d() { 1; 2 } | ||
28 | pub(super) fn block(p: &mut Parser) { | ||
29 | if !p.at(L_CURLY) { | ||
30 | p.error("expected block"); | ||
31 | return; | ||
32 | } | ||
33 | atom::block_expr(p); | ||
34 | } | ||
35 | |||
36 | #[derive(Clone, Copy)] | ||
37 | struct Restrictions { | ||
38 | forbid_structs: bool, | ||
39 | prefer_stmt: bool, | ||
40 | } | ||
41 | |||
42 | enum Op { | ||
43 | Simple, | ||
44 | Composite(SyntaxKind, u8), | ||
45 | } | ||
46 | |||
47 | fn current_op(p: &Parser) -> (u8, Op) { | ||
48 | if p.at_compound2(PLUS, EQ) { | ||
49 | return (1, Op::Composite(PLUSEQ, 2)); | ||
50 | } | ||
51 | if p.at_compound2(MINUS, EQ) { | ||
52 | return (1, Op::Composite(MINUSEQ, 2)); | ||
53 | } | ||
54 | if p.at_compound3(L_ANGLE, L_ANGLE, EQ) { | ||
55 | return (1, Op::Composite(SHLEQ, 3)); | ||
56 | } | ||
57 | if p.at_compound3(R_ANGLE, R_ANGLE, EQ) { | ||
58 | return (1, Op::Composite(SHREQ, 3)); | ||
59 | } | ||
60 | if p.at_compound2(PIPE, PIPE) { | ||
61 | return (3, Op::Composite(PIPEPIPE, 2)); | ||
62 | } | ||
63 | if p.at_compound2(AMP, AMP) { | ||
64 | return (4, Op::Composite(AMPAMP, 2)); | ||
65 | } | ||
66 | if p.at_compound2(L_ANGLE, EQ) { | ||
67 | return (5, Op::Composite(LTEQ, 2)); | ||
68 | } | ||
69 | if p.at_compound2(R_ANGLE, EQ) { | ||
70 | return (5, Op::Composite(GTEQ, 2)); | ||
71 | } | ||
72 | if p.at_compound2(L_ANGLE, L_ANGLE) { | ||
73 | return (9, Op::Composite(SHL, 2)); | ||
74 | } | ||
75 | if p.at_compound2(R_ANGLE, R_ANGLE) { | ||
76 | return (9, Op::Composite(SHR, 2)); | ||
77 | } | ||
78 | |||
79 | let bp = match p.current() { | ||
80 | EQ => 1, | ||
81 | DOTDOT => 2, | ||
82 | EQEQ | NEQ | L_ANGLE | R_ANGLE => 5, | ||
83 | PIPE => 6, | ||
84 | CARET => 7, | ||
85 | AMP => 8, | ||
86 | MINUS | PLUS => 10, | ||
87 | STAR | SLASH | PERCENT => 11, | ||
88 | _ => 0, | ||
89 | }; | ||
90 | (bp, Op::Simple) | ||
91 | } | ||
92 | |||
93 | // Parses expression with binding power of at least bp. | ||
94 | fn expr_bp(p: &mut Parser, r: Restrictions, bp: u8) -> BlockLike { | ||
95 | let mut lhs = match lhs(p, r) { | ||
96 | Some(lhs) => { | ||
97 | // test stmt_bin_expr_ambiguity | ||
98 | // fn foo() { | ||
99 | // let _ = {1} & 2; | ||
100 | // {1} &2; | ||
101 | // } | ||
102 | if r.prefer_stmt && is_block(lhs.kind()) { | ||
103 | return BlockLike::Block; | ||
104 | } | ||
105 | lhs | ||
106 | } | ||
107 | None => return BlockLike::NotBlock, | ||
108 | }; | ||
109 | |||
110 | loop { | ||
111 | let is_range = p.current() == DOTDOT; | ||
112 | let (op_bp, op) = current_op(p); | ||
113 | if op_bp < bp { | ||
114 | break; | ||
115 | } | ||
116 | let m = lhs.precede(p); | ||
117 | match op { | ||
118 | Op::Simple => p.bump(), | ||
119 | Op::Composite(kind, n) => { | ||
120 | p.bump_compound(kind, n); | ||
121 | } | ||
122 | } | ||
123 | expr_bp(p, r, op_bp + 1); | ||
124 | lhs = m.complete(p, if is_range { RANGE_EXPR } else { BIN_EXPR }); | ||
125 | } | ||
126 | BlockLike::NotBlock | ||
127 | } | ||
128 | |||
129 | // test no_semi_after_block | ||
130 | // fn foo() { | ||
131 | // if true {} | ||
132 | // loop {} | ||
133 | // match () {} | ||
134 | // while true {} | ||
135 | // for _ in () {} | ||
136 | // {} | ||
137 | // {} | ||
138 | // } | ||
139 | fn is_block(kind: SyntaxKind) -> bool { | ||
140 | match kind { | ||
141 | IF_EXPR | WHILE_EXPR | FOR_EXPR | LOOP_EXPR | MATCH_EXPR | BLOCK_EXPR => true, | ||
142 | _ => false, | ||
143 | } | ||
144 | } | ||
145 | |||
146 | const LHS_FIRST: TokenSet = | ||
147 | token_set_union![ | ||
148 | token_set![AMP, STAR, EXCL, DOTDOT, MINUS], | ||
149 | atom::ATOM_EXPR_FIRST, | ||
150 | ]; | ||
151 | |||
152 | fn lhs(p: &mut Parser, r: Restrictions) -> Option<CompletedMarker> { | ||
153 | let m; | ||
154 | let kind = match p.current() { | ||
155 | // test ref_expr | ||
156 | // fn foo() { | ||
157 | // let _ = &1; | ||
158 | // let _ = &mut &f(); | ||
159 | // } | ||
160 | AMP => { | ||
161 | m = p.start(); | ||
162 | p.bump(); | ||
163 | p.eat(MUT_KW); | ||
164 | REF_EXPR | ||
165 | } | ||
166 | // test unary_expr | ||
167 | // fn foo() { | ||
168 | // **&1; | ||
169 | // !!true; | ||
170 | // --1; | ||
171 | // } | ||
172 | STAR | EXCL | MINUS => { | ||
173 | m = p.start(); | ||
174 | p.bump(); | ||
175 | PREFIX_EXPR | ||
176 | } | ||
177 | DOTDOT => { | ||
178 | m = p.start(); | ||
179 | p.bump(); | ||
180 | expr_bp(p, r, 2); | ||
181 | return Some(m.complete(p, RANGE_EXPR)); | ||
182 | } | ||
183 | _ => { | ||
184 | let lhs = atom::atom_expr(p, r)?; | ||
185 | return Some(postfix_expr(p, r, lhs)); | ||
186 | } | ||
187 | }; | ||
188 | expr_bp(p, r, 255); | ||
189 | Some(m.complete(p, kind)) | ||
190 | } | ||
191 | |||
192 | fn postfix_expr(p: &mut Parser, r: Restrictions, mut lhs: CompletedMarker) -> CompletedMarker { | ||
193 | let mut allow_calls = !r.prefer_stmt || !is_block(lhs.kind()); | ||
194 | loop { | ||
195 | lhs = match p.current() { | ||
196 | // test stmt_postfix_expr_ambiguity | ||
197 | // fn foo() { | ||
198 | // match () { | ||
199 | // _ => {} | ||
200 | // () => {} | ||
201 | // [] => {} | ||
202 | // } | ||
203 | // } | ||
204 | L_PAREN if allow_calls => call_expr(p, lhs), | ||
205 | L_BRACK if allow_calls => index_expr(p, lhs), | ||
206 | DOT if p.nth(1) == IDENT => if p.nth(2) == L_PAREN || p.nth(2) == COLONCOLON { | ||
207 | method_call_expr(p, lhs) | ||
208 | } else { | ||
209 | field_expr(p, lhs) | ||
210 | }, | ||
211 | DOT if p.nth(1) == INT_NUMBER => field_expr(p, lhs), | ||
212 | // test postfix_range | ||
213 | // fn foo() { let x = 1..; } | ||
214 | DOTDOT if !EXPR_FIRST.contains(p.nth(1)) => { | ||
215 | let m = lhs.precede(p); | ||
216 | p.bump(); | ||
217 | m.complete(p, RANGE_EXPR) | ||
218 | } | ||
219 | QUESTION => try_expr(p, lhs), | ||
220 | AS_KW => cast_expr(p, lhs), | ||
221 | _ => break, | ||
222 | }; | ||
223 | allow_calls = true | ||
224 | } | ||
225 | lhs | ||
226 | } | ||
227 | |||
228 | // test call_expr | ||
229 | // fn foo() { | ||
230 | // let _ = f(); | ||
231 | // let _ = f()(1)(1, 2,); | ||
232 | // } | ||
233 | fn call_expr(p: &mut Parser, lhs: CompletedMarker) -> CompletedMarker { | ||
234 | assert!(p.at(L_PAREN)); | ||
235 | let m = lhs.precede(p); | ||
236 | arg_list(p); | ||
237 | m.complete(p, CALL_EXPR) | ||
238 | } | ||
239 | |||
240 | // test index_expr | ||
241 | // fn foo() { | ||
242 | // x[1][2]; | ||
243 | // } | ||
244 | fn index_expr(p: &mut Parser, lhs: CompletedMarker) -> CompletedMarker { | ||
245 | assert!(p.at(L_BRACK)); | ||
246 | let m = lhs.precede(p); | ||
247 | p.bump(); | ||
248 | expr(p); | ||
249 | p.expect(R_BRACK); | ||
250 | m.complete(p, INDEX_EXPR) | ||
251 | } | ||
252 | |||
253 | // test method_call_expr | ||
254 | // fn foo() { | ||
255 | // x.foo(); | ||
256 | // y.bar::<T>(1, 2,); | ||
257 | // } | ||
258 | fn method_call_expr(p: &mut Parser, lhs: CompletedMarker) -> CompletedMarker { | ||
259 | assert!( | ||
260 | p.at(DOT) && p.nth(1) == IDENT | ||
261 | && (p.nth(2) == L_PAREN || p.nth(2) == COLONCOLON) | ||
262 | ); | ||
263 | let m = lhs.precede(p); | ||
264 | p.bump(); | ||
265 | name_ref(p); | ||
266 | type_args::type_arg_list(p, true); | ||
267 | arg_list(p); | ||
268 | m.complete(p, METHOD_CALL_EXPR) | ||
269 | } | ||
270 | |||
271 | // test field_expr | ||
272 | // fn foo() { | ||
273 | // x.foo; | ||
274 | // x.0.bar; | ||
275 | // } | ||
276 | fn field_expr(p: &mut Parser, lhs: CompletedMarker) -> CompletedMarker { | ||
277 | assert!(p.at(DOT) && (p.nth(1) == IDENT || p.nth(1) == INT_NUMBER)); | ||
278 | let m = lhs.precede(p); | ||
279 | p.bump(); | ||
280 | if p.at(IDENT) { | ||
281 | name_ref(p) | ||
282 | } else { | ||
283 | p.bump() | ||
284 | } | ||
285 | m.complete(p, FIELD_EXPR) | ||
286 | } | ||
287 | |||
288 | // test try_expr | ||
289 | // fn foo() { | ||
290 | // x?; | ||
291 | // } | ||
292 | fn try_expr(p: &mut Parser, lhs: CompletedMarker) -> CompletedMarker { | ||
293 | assert!(p.at(QUESTION)); | ||
294 | let m = lhs.precede(p); | ||
295 | p.bump(); | ||
296 | m.complete(p, TRY_EXPR) | ||
297 | } | ||
298 | |||
299 | // test cast_expr | ||
300 | // fn foo() { | ||
301 | // 82 as i32; | ||
302 | // } | ||
303 | fn cast_expr(p: &mut Parser, lhs: CompletedMarker) -> CompletedMarker { | ||
304 | assert!(p.at(AS_KW)); | ||
305 | let m = lhs.precede(p); | ||
306 | p.bump(); | ||
307 | types::type_(p); | ||
308 | m.complete(p, CAST_EXPR) | ||
309 | } | ||
310 | |||
311 | fn arg_list(p: &mut Parser) { | ||
312 | assert!(p.at(L_PAREN)); | ||
313 | let m = p.start(); | ||
314 | p.bump(); | ||
315 | while !p.at(R_PAREN) && !p.at(EOF) { | ||
316 | expr(p); | ||
317 | if !p.at(R_PAREN) && !p.expect(COMMA) { | ||
318 | break; | ||
319 | } | ||
320 | } | ||
321 | p.eat(R_PAREN); | ||
322 | m.complete(p, ARG_LIST); | ||
323 | } | ||
324 | |||
325 | // test path_expr | ||
326 | // fn foo() { | ||
327 | // let _ = a; | ||
328 | // let _ = a::b; | ||
329 | // let _ = ::a::<b>; | ||
330 | // let _ = format!(); | ||
331 | // } | ||
332 | fn path_expr(p: &mut Parser, r: Restrictions) -> CompletedMarker { | ||
333 | assert!(paths::is_path_start(p)); | ||
334 | let m = p.start(); | ||
335 | paths::expr_path(p); | ||
336 | match p.current() { | ||
337 | L_CURLY if !r.forbid_structs => { | ||
338 | struct_lit(p); | ||
339 | m.complete(p, STRUCT_LIT) | ||
340 | } | ||
341 | EXCL => { | ||
342 | items::macro_call_after_excl(p); | ||
343 | m.complete(p, MACRO_CALL) | ||
344 | } | ||
345 | _ => m.complete(p, PATH_EXPR) | ||
346 | } | ||
347 | } | ||
348 | |||
349 | // test struct_lit | ||
350 | // fn foo() { | ||
351 | // S {}; | ||
352 | // S { x, y: 32, }; | ||
353 | // S { x, y: 32, ..Default::default() }; | ||
354 | // } | ||
355 | fn struct_lit(p: &mut Parser) { | ||
356 | assert!(p.at(L_CURLY)); | ||
357 | p.bump(); | ||
358 | while !p.at(EOF) && !p.at(R_CURLY) { | ||
359 | match p.current() { | ||
360 | IDENT => { | ||
361 | let m = p.start(); | ||
362 | name_ref(p); | ||
363 | if p.eat(COLON) { | ||
364 | expr(p); | ||
365 | } | ||
366 | m.complete(p, STRUCT_LIT_FIELD); | ||
367 | } | ||
368 | DOTDOT => { | ||
369 | p.bump(); | ||
370 | expr(p); | ||
371 | } | ||
372 | _ => p.err_and_bump("expected identifier"), | ||
373 | } | ||
374 | if !p.at(R_CURLY) { | ||
375 | p.expect(COMMA); | ||
376 | } | ||
377 | } | ||
378 | p.expect(R_CURLY); | ||
379 | } | ||
diff --git a/crates/libsyntax2/src/grammar/items/consts.rs b/crates/libsyntax2/src/grammar/items/consts.rs new file mode 100644 index 000000000..b11949b49 --- /dev/null +++ b/crates/libsyntax2/src/grammar/items/consts.rs | |||
@@ -0,0 +1,21 @@ | |||
1 | use super::*; | ||
2 | |||
3 | pub(super) fn static_item(p: &mut Parser) { | ||
4 | const_or_static(p, STATIC_KW) | ||
5 | } | ||
6 | |||
7 | pub(super) fn const_item(p: &mut Parser) { | ||
8 | const_or_static(p, CONST_KW) | ||
9 | } | ||
10 | |||
11 | fn const_or_static(p: &mut Parser, kw: SyntaxKind) { | ||
12 | assert!(p.at(kw)); | ||
13 | p.bump(); | ||
14 | p.eat(MUT_KW); // TODO: validator to forbid const mut | ||
15 | name(p); | ||
16 | types::ascription(p); | ||
17 | if p.eat(EQ) { | ||
18 | expressions::expr(p); | ||
19 | } | ||
20 | p.expect(SEMI); | ||
21 | } | ||
diff --git a/crates/libsyntax2/src/grammar/items/mod.rs b/crates/libsyntax2/src/grammar/items/mod.rs new file mode 100644 index 000000000..3bf906f85 --- /dev/null +++ b/crates/libsyntax2/src/grammar/items/mod.rs | |||
@@ -0,0 +1,332 @@ | |||
1 | use super::*; | ||
2 | |||
3 | mod consts; | ||
4 | mod structs; | ||
5 | mod traits; | ||
6 | mod use_item; | ||
7 | |||
8 | // test mod_contents | ||
9 | // fn foo() {} | ||
10 | // macro_rules! foo {} | ||
11 | // foo::bar!(); | ||
12 | // super::baz! {} | ||
13 | // struct S; | ||
14 | pub(super) fn mod_contents(p: &mut Parser, stop_on_r_curly: bool) { | ||
15 | attributes::inner_attributes(p); | ||
16 | while !p.at(EOF) && !(stop_on_r_curly && p.at(R_CURLY)) { | ||
17 | item_or_macro(p, stop_on_r_curly) | ||
18 | } | ||
19 | } | ||
20 | |||
21 | pub(super) fn item_or_macro(p: &mut Parser, stop_on_r_curly: bool) { | ||
22 | let m = p.start(); | ||
23 | match maybe_item(p) { | ||
24 | MaybeItem::Item(kind) => { | ||
25 | m.complete(p, kind); | ||
26 | } | ||
27 | MaybeItem::None => { | ||
28 | if paths::is_path_start(p) { | ||
29 | match macro_call(p) { | ||
30 | BlockLike::Block => (), | ||
31 | BlockLike::NotBlock => { | ||
32 | p.expect(SEMI); | ||
33 | } | ||
34 | } | ||
35 | m.complete(p, MACRO_CALL); | ||
36 | } else { | ||
37 | m.abandon(p); | ||
38 | if p.at(L_CURLY) { | ||
39 | error_block(p, "expected an item"); | ||
40 | } else if !p.at(EOF) && !(stop_on_r_curly && p.at(R_CURLY)) { | ||
41 | p.err_and_bump("expected an item"); | ||
42 | } else { | ||
43 | p.error("expected an item"); | ||
44 | } | ||
45 | } | ||
46 | } | ||
47 | MaybeItem::Modifiers => { | ||
48 | p.error("expected fn, trait or impl"); | ||
49 | m.complete(p, ERROR); | ||
50 | } | ||
51 | } | ||
52 | } | ||
53 | |||
54 | pub(super) const ITEM_FIRST: TokenSet = | ||
55 | token_set![EXTERN_KW, MOD_KW, USE_KW, STRUCT_KW, ENUM_KW, FN_KW, PUB_KW, POUND]; | ||
56 | |||
57 | pub(super) enum MaybeItem { | ||
58 | None, | ||
59 | Item(SyntaxKind), | ||
60 | Modifiers, | ||
61 | } | ||
62 | |||
63 | pub(super) fn maybe_item(p: &mut Parser) -> MaybeItem { | ||
64 | attributes::outer_attributes(p); | ||
65 | visibility(p); | ||
66 | if let Some(kind) = items_without_modifiers(p) { | ||
67 | return MaybeItem::Item(kind); | ||
68 | } | ||
69 | |||
70 | let mut has_mods = false; | ||
71 | // modifiers | ||
72 | has_mods |= p.eat(CONST_KW); | ||
73 | |||
74 | // test unsafe_block_in_mod | ||
75 | // fn foo(){} unsafe { } fn bar(){} | ||
76 | if p.at(UNSAFE_KW) && p.nth(1) != L_CURLY { | ||
77 | p.eat(UNSAFE_KW); | ||
78 | has_mods = true; | ||
79 | } | ||
80 | if p.at(EXTERN_KW) { | ||
81 | has_mods = true; | ||
82 | abi(p); | ||
83 | } | ||
84 | if p.at(IDENT) && p.at_contextual_kw("auto") && p.nth(1) == TRAIT_KW { | ||
85 | p.bump_remap(AUTO_KW); | ||
86 | has_mods = true; | ||
87 | } | ||
88 | if p.at(IDENT) && p.at_contextual_kw("default") && p.nth(1) == IMPL_KW { | ||
89 | p.bump_remap(DEFAULT_KW); | ||
90 | has_mods = true; | ||
91 | } | ||
92 | |||
93 | // items | ||
94 | let kind = match p.current() { | ||
95 | // test extern_fn | ||
96 | // extern fn foo() {} | ||
97 | |||
98 | // test const_fn | ||
99 | // const fn foo() {} | ||
100 | |||
101 | // test const_unsafe_fn | ||
102 | // const unsafe fn foo() {} | ||
103 | |||
104 | // test unsafe_extern_fn | ||
105 | // unsafe extern "C" fn foo() {} | ||
106 | |||
107 | // test unsafe_fn | ||
108 | // unsafe fn foo() {} | ||
109 | FN_KW => { | ||
110 | function(p); | ||
111 | FUNCTION | ||
112 | } | ||
113 | |||
114 | // test unsafe_trait | ||
115 | // unsafe trait T {} | ||
116 | |||
117 | // test auto_trait | ||
118 | // auto trait T {} | ||
119 | |||
120 | // test unsafe_auto_trait | ||
121 | // unsafe auto trait T {} | ||
122 | TRAIT_KW => { | ||
123 | traits::trait_item(p); | ||
124 | TRAIT_ITEM | ||
125 | } | ||
126 | |||
127 | // test unsafe_impl | ||
128 | // unsafe impl Foo {} | ||
129 | |||
130 | // test default_impl | ||
131 | // default impl Foo {} | ||
132 | |||
133 | // test unsafe_default_impl | ||
134 | // unsafe default impl Foo {} | ||
135 | IMPL_KW => { | ||
136 | traits::impl_item(p); | ||
137 | IMPL_ITEM | ||
138 | } | ||
139 | _ => return if has_mods { | ||
140 | MaybeItem::Modifiers | ||
141 | } else { | ||
142 | MaybeItem::None | ||
143 | } | ||
144 | }; | ||
145 | |||
146 | MaybeItem::Item(kind) | ||
147 | } | ||
148 | |||
149 | fn items_without_modifiers(p: &mut Parser) -> Option<SyntaxKind> { | ||
150 | let la = p.nth(1); | ||
151 | let kind = match p.current() { | ||
152 | // test extern_crate | ||
153 | // extern crate foo; | ||
154 | EXTERN_KW if la == CRATE_KW => { | ||
155 | extern_crate_item(p); | ||
156 | EXTERN_CRATE_ITEM | ||
157 | } | ||
158 | TYPE_KW => { | ||
159 | type_item(p); | ||
160 | TYPE_ITEM | ||
161 | } | ||
162 | MOD_KW => { | ||
163 | mod_item(p); | ||
164 | MOD_ITEM | ||
165 | } | ||
166 | STRUCT_KW => { | ||
167 | structs::struct_item(p); | ||
168 | if p.at(SEMI) { | ||
169 | p.err_and_bump( | ||
170 | "expected item, found `;`\n\ | ||
171 | consider removing this semicolon" | ||
172 | ); | ||
173 | } | ||
174 | STRUCT_ITEM | ||
175 | } | ||
176 | ENUM_KW => { | ||
177 | structs::enum_item(p); | ||
178 | ENUM_ITEM | ||
179 | } | ||
180 | USE_KW => { | ||
181 | use_item::use_item(p); | ||
182 | USE_ITEM | ||
183 | } | ||
184 | CONST_KW if (la == IDENT || la == MUT_KW) => { | ||
185 | consts::const_item(p); | ||
186 | CONST_ITEM | ||
187 | } | ||
188 | STATIC_KW => { | ||
189 | consts::static_item(p); | ||
190 | STATIC_ITEM | ||
191 | } | ||
192 | // test extern_block | ||
193 | // extern {} | ||
194 | EXTERN_KW if la == L_CURLY || ((la == STRING || la == RAW_STRING) && p.nth(2) == L_CURLY) => { | ||
195 | abi(p); | ||
196 | extern_block(p); | ||
197 | EXTERN_BLOCK_EXPR | ||
198 | } | ||
199 | _ => return None, | ||
200 | }; | ||
201 | Some(kind) | ||
202 | } | ||
203 | |||
204 | fn extern_crate_item(p: &mut Parser) { | ||
205 | assert!(p.at(EXTERN_KW)); | ||
206 | p.bump(); | ||
207 | assert!(p.at(CRATE_KW)); | ||
208 | p.bump(); | ||
209 | name(p); | ||
210 | alias(p); | ||
211 | p.expect(SEMI); | ||
212 | } | ||
213 | |||
214 | fn extern_block(p: &mut Parser) { | ||
215 | assert!(p.at(L_CURLY)); | ||
216 | p.bump(); | ||
217 | p.expect(R_CURLY); | ||
218 | } | ||
219 | |||
220 | fn function(p: &mut Parser) { | ||
221 | assert!(p.at(FN_KW)); | ||
222 | p.bump(); | ||
223 | |||
224 | name(p); | ||
225 | // test function_type_params | ||
226 | // fn foo<T: Clone + Copy>(){} | ||
227 | type_params::type_param_list(p); | ||
228 | |||
229 | if p.at(L_PAREN) { | ||
230 | params::param_list(p); | ||
231 | } else { | ||
232 | p.error("expected function arguments"); | ||
233 | } | ||
234 | // test function_ret_type | ||
235 | // fn foo() {} | ||
236 | // fn bar() -> () {} | ||
237 | fn_ret_type(p); | ||
238 | |||
239 | // test function_where_clause | ||
240 | // fn foo<T>() where T: Copy {} | ||
241 | type_params::where_clause(p); | ||
242 | |||
243 | // test fn_decl | ||
244 | // trait T { fn foo(); } | ||
245 | if !p.eat(SEMI) { | ||
246 | expressions::block(p); | ||
247 | } | ||
248 | } | ||
249 | |||
250 | // test type_item | ||
251 | // type Foo = Bar; | ||
252 | fn type_item(p: &mut Parser) { | ||
253 | assert!(p.at(TYPE_KW)); | ||
254 | p.bump(); | ||
255 | |||
256 | name(p); | ||
257 | |||
258 | // test type_item_type_params | ||
259 | // type Result<T> = (); | ||
260 | type_params::type_param_list(p); | ||
261 | |||
262 | if p.at(COLON) { | ||
263 | type_params::bounds(p); | ||
264 | } | ||
265 | |||
266 | // test type_item_where_clause | ||
267 | // type Foo where Foo: Copy = (); | ||
268 | type_params::where_clause(p); | ||
269 | |||
270 | if p.eat(EQ) { | ||
271 | types::type_(p); | ||
272 | } | ||
273 | p.expect(SEMI); | ||
274 | } | ||
275 | |||
276 | fn mod_item(p: &mut Parser) { | ||
277 | assert!(p.at(MOD_KW)); | ||
278 | p.bump(); | ||
279 | |||
280 | name(p); | ||
281 | if !p.eat(SEMI) { | ||
282 | if p.expect(L_CURLY) { | ||
283 | mod_contents(p, true); | ||
284 | p.expect(R_CURLY); | ||
285 | } | ||
286 | } | ||
287 | } | ||
288 | |||
289 | fn macro_call(p: &mut Parser) -> BlockLike { | ||
290 | assert!(paths::is_path_start(p)); | ||
291 | paths::use_path(p); | ||
292 | macro_call_after_excl(p) | ||
293 | } | ||
294 | |||
295 | pub(super) fn macro_call_after_excl(p: &mut Parser) -> BlockLike { | ||
296 | p.expect(EXCL); | ||
297 | p.eat(IDENT); | ||
298 | let flavor = match p.current() { | ||
299 | L_CURLY => { | ||
300 | token_tree(p); | ||
301 | BlockLike::Block | ||
302 | } | ||
303 | L_PAREN | L_BRACK => { | ||
304 | token_tree(p); | ||
305 | BlockLike::NotBlock | ||
306 | } | ||
307 | _ => { | ||
308 | p.error("expected `{`, `[`, `(`"); | ||
309 | BlockLike::NotBlock | ||
310 | }, | ||
311 | }; | ||
312 | |||
313 | flavor | ||
314 | } | ||
315 | |||
316 | fn token_tree(p: &mut Parser) { | ||
317 | let closing_paren_kind = match p.current() { | ||
318 | L_CURLY => R_CURLY, | ||
319 | L_PAREN => R_PAREN, | ||
320 | L_BRACK => R_BRACK, | ||
321 | _ => unreachable!(), | ||
322 | }; | ||
323 | p.bump(); | ||
324 | while !p.at(EOF) && !p.at(closing_paren_kind) { | ||
325 | match p.current() { | ||
326 | L_CURLY | L_PAREN | L_BRACK => token_tree(p), | ||
327 | R_CURLY | R_PAREN | R_BRACK => p.err_and_bump("unmatched brace"), | ||
328 | _ => p.bump() | ||
329 | } | ||
330 | }; | ||
331 | p.expect(closing_paren_kind); | ||
332 | } | ||
diff --git a/crates/libsyntax2/src/grammar/items/structs.rs b/crates/libsyntax2/src/grammar/items/structs.rs new file mode 100644 index 000000000..67616eaad --- /dev/null +++ b/crates/libsyntax2/src/grammar/items/structs.rs | |||
@@ -0,0 +1,116 @@ | |||
1 | use super::*; | ||
2 | |||
3 | pub(super) fn struct_item(p: &mut Parser) { | ||
4 | assert!(p.at(STRUCT_KW)); | ||
5 | p.bump(); | ||
6 | |||
7 | name(p); | ||
8 | type_params::type_param_list(p); | ||
9 | match p.current() { | ||
10 | WHERE_KW => { | ||
11 | type_params::where_clause(p); | ||
12 | match p.current() { | ||
13 | SEMI => { | ||
14 | p.bump(); | ||
15 | return; | ||
16 | } | ||
17 | L_CURLY => named_fields(p), | ||
18 | _ => { | ||
19 | //TODO: special case `(` error message | ||
20 | p.error("expected `;` or `{`"); | ||
21 | return; | ||
22 | } | ||
23 | } | ||
24 | } | ||
25 | SEMI => { | ||
26 | p.bump(); | ||
27 | return; | ||
28 | } | ||
29 | L_CURLY => named_fields(p), | ||
30 | L_PAREN => { | ||
31 | pos_fields(p); | ||
32 | p.expect(SEMI); | ||
33 | } | ||
34 | _ => { | ||
35 | p.error("expected `;`, `{`, or `(`"); | ||
36 | return; | ||
37 | } | ||
38 | } | ||
39 | } | ||
40 | |||
41 | pub(super) fn enum_item(p: &mut Parser) { | ||
42 | assert!(p.at(ENUM_KW)); | ||
43 | p.bump(); | ||
44 | name(p); | ||
45 | type_params::type_param_list(p); | ||
46 | type_params::where_clause(p); | ||
47 | if p.expect(L_CURLY) { | ||
48 | while !p.at(EOF) && !p.at(R_CURLY) { | ||
49 | let var = p.start(); | ||
50 | attributes::outer_attributes(p); | ||
51 | if p.at(IDENT) { | ||
52 | name(p); | ||
53 | match p.current() { | ||
54 | L_CURLY => named_fields(p), | ||
55 | L_PAREN => pos_fields(p), | ||
56 | EQ => { | ||
57 | p.bump(); | ||
58 | expressions::expr(p); | ||
59 | } | ||
60 | _ => (), | ||
61 | } | ||
62 | var.complete(p, ENUM_VARIANT); | ||
63 | } else { | ||
64 | var.abandon(p); | ||
65 | p.err_and_bump("expected enum variant"); | ||
66 | } | ||
67 | if !p.at(R_CURLY) { | ||
68 | p.expect(COMMA); | ||
69 | } | ||
70 | } | ||
71 | p.expect(R_CURLY); | ||
72 | } | ||
73 | } | ||
74 | |||
75 | fn named_fields(p: &mut Parser) { | ||
76 | assert!(p.at(L_CURLY)); | ||
77 | p.bump(); | ||
78 | while !p.at(R_CURLY) && !p.at(EOF) { | ||
79 | named_field(p); | ||
80 | if !p.at(R_CURLY) { | ||
81 | p.expect(COMMA); | ||
82 | } | ||
83 | } | ||
84 | p.expect(R_CURLY); | ||
85 | |||
86 | fn named_field(p: &mut Parser) { | ||
87 | let field = p.start(); | ||
88 | visibility(p); | ||
89 | if p.at(IDENT) { | ||
90 | name(p); | ||
91 | p.expect(COLON); | ||
92 | types::type_(p); | ||
93 | field.complete(p, NAMED_FIELD); | ||
94 | } else { | ||
95 | field.abandon(p); | ||
96 | p.err_and_bump("expected field declaration"); | ||
97 | } | ||
98 | } | ||
99 | } | ||
100 | |||
101 | fn pos_fields(p: &mut Parser) { | ||
102 | if !p.expect(L_PAREN) { | ||
103 | return; | ||
104 | } | ||
105 | while !p.at(R_PAREN) && !p.at(EOF) { | ||
106 | let pos_field = p.start(); | ||
107 | visibility(p); | ||
108 | types::type_(p); | ||
109 | pos_field.complete(p, POS_FIELD); | ||
110 | |||
111 | if !p.at(R_PAREN) { | ||
112 | p.expect(COMMA); | ||
113 | } | ||
114 | } | ||
115 | p.expect(R_PAREN); | ||
116 | } | ||
diff --git a/crates/libsyntax2/src/grammar/items/traits.rs b/crates/libsyntax2/src/grammar/items/traits.rs new file mode 100644 index 000000000..0b9fb2b0b --- /dev/null +++ b/crates/libsyntax2/src/grammar/items/traits.rs | |||
@@ -0,0 +1,87 @@ | |||
1 | use super::*; | ||
2 | |||
3 | // test trait_item | ||
4 | // trait T<U>: Hash + Clone where U: Copy {} | ||
5 | pub(super) fn trait_item(p: &mut Parser) { | ||
6 | assert!(p.at(TRAIT_KW)); | ||
7 | p.bump(); | ||
8 | name(p); | ||
9 | type_params::type_param_list(p); | ||
10 | if p.at(COLON) { | ||
11 | type_params::bounds(p); | ||
12 | } | ||
13 | type_params::where_clause(p); | ||
14 | p.expect(L_CURLY); | ||
15 | // test trait_item_items | ||
16 | // impl F { | ||
17 | // type A: Clone; | ||
18 | // const B: i32; | ||
19 | // fn foo() {} | ||
20 | // fn bar(&self); | ||
21 | // } | ||
22 | while !p.at(EOF) && !p.at(R_CURLY) { | ||
23 | item_or_macro(p, true); | ||
24 | } | ||
25 | p.expect(R_CURLY); | ||
26 | } | ||
27 | |||
28 | // test impl_item | ||
29 | // impl Foo {} | ||
30 | pub(super) fn impl_item(p: &mut Parser) { | ||
31 | assert!(p.at(IMPL_KW)); | ||
32 | p.bump(); | ||
33 | if choose_type_params_over_qpath(p) { | ||
34 | type_params::type_param_list(p); | ||
35 | } | ||
36 | |||
37 | // TODO: never type | ||
38 | // impl ! {} | ||
39 | |||
40 | // test impl_item_neg | ||
41 | // impl !Send for X {} | ||
42 | p.eat(EXCL); | ||
43 | types::type_(p); | ||
44 | if p.eat(FOR_KW) { | ||
45 | types::type_(p); | ||
46 | } | ||
47 | type_params::where_clause(p); | ||
48 | p.expect(L_CURLY); | ||
49 | |||
50 | // test impl_item_items | ||
51 | // impl F { | ||
52 | // type A = i32; | ||
53 | // const B: i32 = 92; | ||
54 | // fn foo() {} | ||
55 | // fn bar(&self) {} | ||
56 | // } | ||
57 | while !p.at(EOF) && !p.at(R_CURLY) { | ||
58 | item_or_macro(p, true); | ||
59 | } | ||
60 | p.expect(R_CURLY); | ||
61 | } | ||
62 | |||
63 | fn choose_type_params_over_qpath(p: &Parser) -> bool { | ||
64 | // There's an ambiguity between generic parameters and qualified paths in impls. | ||
65 | // If we see `<` it may start both, so we have to inspect some following tokens. | ||
66 | // The following combinations can only start generics, | ||
67 | // but not qualified paths (with one exception): | ||
68 | // `<` `>` - empty generic parameters | ||
69 | // `<` `#` - generic parameters with attributes | ||
70 | // `<` (LIFETIME|IDENT) `>` - single generic parameter | ||
71 | // `<` (LIFETIME|IDENT) `,` - first generic parameter in a list | ||
72 | // `<` (LIFETIME|IDENT) `:` - generic parameter with bounds | ||
73 | // `<` (LIFETIME|IDENT) `=` - generic parameter with a default | ||
74 | // The only truly ambiguous case is | ||
75 | // `<` IDENT `>` `::` IDENT ... | ||
76 | // we disambiguate it in favor of generics (`impl<T> ::absolute::Path<T> { ... }`) | ||
77 | // because this is what almost always expected in practice, qualified paths in impls | ||
78 | // (`impl <Type>::AssocTy { ... }`) aren't even allowed by type checker at the moment. | ||
79 | if !p.at(L_ANGLE) { | ||
80 | return false; | ||
81 | } | ||
82 | if p.nth(1) == POUND || p.nth(1) == R_ANGLE { | ||
83 | return true; | ||
84 | } | ||
85 | (p.nth(1) == LIFETIME || p.nth(1) == IDENT) | ||
86 | && (p.nth(2) == R_ANGLE || p.nth(2) == COMMA || p.nth(2) == COLON || p.nth(2) == EQ) | ||
87 | } | ||
diff --git a/crates/libsyntax2/src/grammar/items/use_item.rs b/crates/libsyntax2/src/grammar/items/use_item.rs new file mode 100644 index 000000000..a3f7f0da8 --- /dev/null +++ b/crates/libsyntax2/src/grammar/items/use_item.rs | |||
@@ -0,0 +1,66 @@ | |||
1 | use super::*; | ||
2 | |||
3 | pub(super) fn use_item(p: &mut Parser) { | ||
4 | assert!(p.at(USE_KW)); | ||
5 | p.bump(); | ||
6 | use_tree(p); | ||
7 | p.expect(SEMI); | ||
8 | } | ||
9 | |||
10 | fn use_tree(p: &mut Parser) { | ||
11 | let la = p.nth(1); | ||
12 | let m = p.start(); | ||
13 | match (p.current(), la) { | ||
14 | (STAR, _) => p.bump(), | ||
15 | (COLONCOLON, STAR) => { | ||
16 | p.bump(); | ||
17 | p.bump(); | ||
18 | } | ||
19 | (L_CURLY, _) | (COLONCOLON, L_CURLY) => { | ||
20 | if p.at(COLONCOLON) { | ||
21 | p.bump(); | ||
22 | } | ||
23 | nested_trees(p); | ||
24 | } | ||
25 | _ if paths::is_path_start(p) => { | ||
26 | paths::use_path(p); | ||
27 | match p.current() { | ||
28 | AS_KW => { | ||
29 | alias(p); | ||
30 | } | ||
31 | COLONCOLON => { | ||
32 | p.bump(); | ||
33 | match p.current() { | ||
34 | STAR => { | ||
35 | p.bump(); | ||
36 | } | ||
37 | L_CURLY => nested_trees(p), | ||
38 | _ => { | ||
39 | // is this unreachable? | ||
40 | p.error("expected `{` or `*`"); | ||
41 | } | ||
42 | } | ||
43 | } | ||
44 | _ => (), | ||
45 | } | ||
46 | } | ||
47 | _ => { | ||
48 | m.abandon(p); | ||
49 | p.err_and_bump("expected one of `*`, `::`, `{`, `self`, `super`, `indent`"); | ||
50 | return; | ||
51 | } | ||
52 | } | ||
53 | m.complete(p, USE_TREE); | ||
54 | } | ||
55 | |||
56 | fn nested_trees(p: &mut Parser) { | ||
57 | assert!(p.at(L_CURLY)); | ||
58 | p.bump(); | ||
59 | while !p.at(EOF) && !p.at(R_CURLY) { | ||
60 | use_tree(p); | ||
61 | if !p.at(R_CURLY) { | ||
62 | p.expect(COMMA); | ||
63 | } | ||
64 | } | ||
65 | p.expect(R_CURLY); | ||
66 | } | ||
diff --git a/crates/libsyntax2/src/grammar/mod.rs b/crates/libsyntax2/src/grammar/mod.rs new file mode 100644 index 000000000..e1329044d --- /dev/null +++ b/crates/libsyntax2/src/grammar/mod.rs | |||
@@ -0,0 +1,161 @@ | |||
1 | //! This is the actual "grammar" of the Rust language. | ||
2 | //! | ||
3 | //! Each function in this module and its children corresponds | ||
4 | //! to a production of the format grammar. Submodules roughly | ||
5 | //! correspond to different *areas* of the grammar. By convention, | ||
6 | //! each submodule starts with `use super::*` import and exports | ||
7 | //! "public" productions via `pub(super)`. | ||
8 | //! | ||
9 | //! See docs for `Parser` to learn about API, available to the grammar, | ||
10 | //! and see docs for `Event` to learn how this actually manages to | ||
11 | //! produce parse trees. | ||
12 | //! | ||
13 | //! Code in this module also contains inline tests, which start with | ||
14 | //! `// test name-of-the-test` comment and look like this: | ||
15 | //! | ||
16 | //! ``` | ||
17 | //! // test function_with_zero_parameters | ||
18 | //! // fn foo() {} | ||
19 | //! ``` | ||
20 | //! | ||
21 | //! After adding a new inline-test, run `cargo collect-tests` to extract | ||
22 | //! it as a standalone text-fixture into `tests/data/parser/inline`, and | ||
23 | //! run `cargo test` once to create the "gold" value. | ||
24 | mod attributes; | ||
25 | mod expressions; | ||
26 | mod items; | ||
27 | mod params; | ||
28 | mod paths; | ||
29 | mod patterns; | ||
30 | mod type_args; | ||
31 | mod type_params; | ||
32 | mod types; | ||
33 | |||
34 | use { | ||
35 | parser_api::{CompletedMarker, Parser, TokenSet}, | ||
36 | SyntaxKind::{self, *}, | ||
37 | }; | ||
38 | |||
39 | pub(crate) fn file(p: &mut Parser) { | ||
40 | let file = p.start(); | ||
41 | p.eat(SHEBANG); | ||
42 | items::mod_contents(p, false); | ||
43 | file.complete(p, FILE); | ||
44 | } | ||
45 | |||
46 | |||
47 | #[derive(Clone, Copy, PartialEq, Eq)] | ||
48 | enum BlockLike { | ||
49 | Block, | ||
50 | NotBlock, | ||
51 | } | ||
52 | |||
53 | impl BlockLike { | ||
54 | fn is_block(self) -> bool { self == BlockLike::Block } | ||
55 | } | ||
56 | |||
57 | fn visibility(p: &mut Parser) { | ||
58 | match p.current() { | ||
59 | PUB_KW => { | ||
60 | let m = p.start(); | ||
61 | p.bump(); | ||
62 | if p.at(L_PAREN) { | ||
63 | match p.nth(1) { | ||
64 | // test crate_visibility | ||
65 | // pub(crate) struct S; | ||
66 | // pub(self) struct S; | ||
67 | // pub(self) struct S; | ||
68 | // pub(self) struct S; | ||
69 | CRATE_KW | SELF_KW | SUPER_KW => { | ||
70 | p.bump(); | ||
71 | p.bump(); | ||
72 | p.expect(R_PAREN); | ||
73 | } | ||
74 | IN_KW => { | ||
75 | p.bump(); | ||
76 | p.bump(); | ||
77 | paths::use_path(p); | ||
78 | p.expect(R_PAREN); | ||
79 | } | ||
80 | _ => (), | ||
81 | } | ||
82 | } | ||
83 | m.complete(p, VISIBILITY); | ||
84 | } | ||
85 | // test crate_keyword_vis | ||
86 | // crate fn main() { } | ||
87 | CRATE_KW => { | ||
88 | let m = p.start(); | ||
89 | p.bump(); | ||
90 | m.complete(p, VISIBILITY); | ||
91 | } | ||
92 | _ => (), | ||
93 | } | ||
94 | } | ||
95 | fn alias(p: &mut Parser) -> bool { | ||
96 | if p.at(AS_KW) { | ||
97 | let alias = p.start(); | ||
98 | p.bump(); | ||
99 | name(p); | ||
100 | alias.complete(p, ALIAS); | ||
101 | } | ||
102 | true //FIXME: return false if three are errors | ||
103 | } | ||
104 | |||
105 | fn abi(p: &mut Parser) { | ||
106 | assert!(p.at(EXTERN_KW)); | ||
107 | let abi = p.start(); | ||
108 | p.bump(); | ||
109 | match p.current() { | ||
110 | STRING | RAW_STRING => p.bump(), | ||
111 | _ => (), | ||
112 | } | ||
113 | abi.complete(p, ABI); | ||
114 | } | ||
115 | |||
116 | fn fn_ret_type(p: &mut Parser) -> bool { | ||
117 | if p.at(THIN_ARROW) { | ||
118 | p.bump(); | ||
119 | types::type_(p); | ||
120 | true | ||
121 | } else { | ||
122 | false | ||
123 | } | ||
124 | } | ||
125 | |||
126 | fn name(p: &mut Parser) { | ||
127 | if p.at(IDENT) { | ||
128 | let m = p.start(); | ||
129 | p.bump(); | ||
130 | m.complete(p, NAME); | ||
131 | } else { | ||
132 | p.error("expected a name"); | ||
133 | } | ||
134 | } | ||
135 | |||
136 | fn name_ref(p: &mut Parser) { | ||
137 | if p.at(IDENT) { | ||
138 | let m = p.start(); | ||
139 | p.bump(); | ||
140 | m.complete(p, NAME_REF); | ||
141 | } else { | ||
142 | p.error("expected identifier"); | ||
143 | } | ||
144 | } | ||
145 | |||
146 | fn error_block(p: &mut Parser, message: &str) { | ||
147 | assert!(p.at(L_CURLY)); | ||
148 | let err = p.start(); | ||
149 | p.error(message); | ||
150 | p.bump(); | ||
151 | let mut level: u32 = 1; | ||
152 | while level > 0 && !p.at(EOF) { | ||
153 | match p.current() { | ||
154 | L_CURLY => level += 1, | ||
155 | R_CURLY => level -= 1, | ||
156 | _ => (), | ||
157 | } | ||
158 | p.bump(); | ||
159 | } | ||
160 | err.complete(p, ERROR); | ||
161 | } | ||
diff --git a/crates/libsyntax2/src/grammar/params.rs b/crates/libsyntax2/src/grammar/params.rs new file mode 100644 index 000000000..32e905cb2 --- /dev/null +++ b/crates/libsyntax2/src/grammar/params.rs | |||
@@ -0,0 +1,116 @@ | |||
1 | use super::*; | ||
2 | |||
3 | // test param_list | ||
4 | // fn a() {} | ||
5 | // fn b(x: i32) {} | ||
6 | // fn c(x: i32, ) {} | ||
7 | // fn d(x: i32, y: ()) {} | ||
8 | pub(super) fn param_list(p: &mut Parser) { | ||
9 | list_(p, Flavor::Normal) | ||
10 | } | ||
11 | |||
12 | // test param_list_opt_patterns | ||
13 | // fn foo<F: FnMut(&mut Foo<'a>)>(){} | ||
14 | pub(super) fn param_list_opt_patterns(p: &mut Parser) { | ||
15 | list_(p, Flavor::OptionalPattern) | ||
16 | } | ||
17 | |||
18 | pub(super) fn param_list_opt_types(p: &mut Parser) { | ||
19 | list_(p, Flavor::OptionalType) | ||
20 | } | ||
21 | |||
22 | #[derive(Clone, Copy, Eq, PartialEq)] | ||
23 | enum Flavor { | ||
24 | OptionalType, | ||
25 | OptionalPattern, | ||
26 | Normal, | ||
27 | } | ||
28 | |||
29 | impl Flavor { | ||
30 | fn type_required(self) -> bool { | ||
31 | match self { | ||
32 | Flavor::OptionalType => false, | ||
33 | _ => true, | ||
34 | } | ||
35 | } | ||
36 | } | ||
37 | |||
38 | fn list_(p: &mut Parser, flavor: Flavor) { | ||
39 | let (bra, ket) = if flavor.type_required() { | ||
40 | (L_PAREN, R_PAREN) | ||
41 | } else { | ||
42 | (PIPE, PIPE) | ||
43 | }; | ||
44 | assert!(p.at(bra)); | ||
45 | let m = p.start(); | ||
46 | p.bump(); | ||
47 | if flavor.type_required() { | ||
48 | self_param(p); | ||
49 | } | ||
50 | while !p.at(EOF) && !p.at(ket) { | ||
51 | value_parameter(p, flavor); | ||
52 | if !p.at(ket) { | ||
53 | p.expect(COMMA); | ||
54 | } | ||
55 | } | ||
56 | p.expect(ket); | ||
57 | m.complete(p, PARAM_LIST); | ||
58 | } | ||
59 | |||
60 | fn value_parameter(p: &mut Parser, flavor: Flavor) { | ||
61 | let m = p.start(); | ||
62 | match flavor { | ||
63 | Flavor::OptionalType | Flavor::Normal => { | ||
64 | patterns::pattern(p); | ||
65 | if p.at(COLON) || flavor.type_required() { | ||
66 | types::ascription(p) | ||
67 | } | ||
68 | }, | ||
69 | // test value_parameters_no_patterns | ||
70 | // type F = Box<Fn(a: i32, &b: &i32, &mut c: &i32, ())>; | ||
71 | Flavor::OptionalPattern => { | ||
72 | let la0 = p.current(); | ||
73 | let la1 = p.nth(1); | ||
74 | let la2 = p.nth(2); | ||
75 | let la3 = p.nth(3); | ||
76 | if la0 == IDENT && la1 == COLON | ||
77 | || la0 == AMP && la1 == IDENT && la2 == COLON | ||
78 | || la0 == AMP && la1 == MUT_KW && la2 == IDENT && la3 == COLON { | ||
79 | patterns::pattern(p); | ||
80 | types::ascription(p); | ||
81 | } else { | ||
82 | types::type_(p); | ||
83 | } | ||
84 | }, | ||
85 | } | ||
86 | m.complete(p, PARAM); | ||
87 | } | ||
88 | |||
89 | // test self_param | ||
90 | // impl S { | ||
91 | // fn a(self) {} | ||
92 | // fn b(&self,) {} | ||
93 | // fn c(&'a self,) {} | ||
94 | // fn d(&'a mut self, x: i32) {} | ||
95 | // } | ||
96 | fn self_param(p: &mut Parser) { | ||
97 | let la1 = p.nth(1); | ||
98 | let la2 = p.nth(2); | ||
99 | let la3 = p.nth(3); | ||
100 | let n_toks = match (p.current(), la1, la2, la3) { | ||
101 | (SELF_KW, _, _, _) => 1, | ||
102 | (AMP, SELF_KW, _, _) => 2, | ||
103 | (AMP, MUT_KW, SELF_KW, _) => 3, | ||
104 | (AMP, LIFETIME, SELF_KW, _) => 3, | ||
105 | (AMP, LIFETIME, MUT_KW, SELF_KW) => 4, | ||
106 | _ => return, | ||
107 | }; | ||
108 | let m = p.start(); | ||
109 | for _ in 0..n_toks { | ||
110 | p.bump(); | ||
111 | } | ||
112 | m.complete(p, SELF_PARAM); | ||
113 | if !p.at(R_PAREN) { | ||
114 | p.expect(COMMA); | ||
115 | } | ||
116 | } | ||
diff --git a/crates/libsyntax2/src/grammar/paths.rs b/crates/libsyntax2/src/grammar/paths.rs new file mode 100644 index 000000000..c277e2a6b --- /dev/null +++ b/crates/libsyntax2/src/grammar/paths.rs | |||
@@ -0,0 +1,86 @@ | |||
1 | use super::*; | ||
2 | |||
3 | pub(super) fn is_path_start(p: &Parser) -> bool { | ||
4 | match p.current() { | ||
5 | IDENT | SELF_KW | SUPER_KW | COLONCOLON => true, | ||
6 | _ => false, | ||
7 | } | ||
8 | } | ||
9 | |||
10 | pub(super) fn use_path(p: &mut Parser) { | ||
11 | path(p, Mode::Use) | ||
12 | } | ||
13 | |||
14 | pub(super) fn type_path(p: &mut Parser) { | ||
15 | path(p, Mode::Type) | ||
16 | } | ||
17 | |||
18 | pub(super) fn expr_path(p: &mut Parser) { | ||
19 | path(p, Mode::Expr) | ||
20 | } | ||
21 | |||
22 | #[derive(Clone, Copy, Eq, PartialEq)] | ||
23 | enum Mode { | ||
24 | Use, | ||
25 | Type, | ||
26 | Expr, | ||
27 | } | ||
28 | |||
29 | fn path(p: &mut Parser, mode: Mode) { | ||
30 | if !is_path_start(p) { | ||
31 | return; | ||
32 | } | ||
33 | let path = p.start(); | ||
34 | path_segment(p, mode, true); | ||
35 | let mut qual = path.complete(p, PATH); | ||
36 | loop { | ||
37 | let use_tree = match p.nth(1) { | ||
38 | STAR | L_CURLY => true, | ||
39 | _ => false, | ||
40 | }; | ||
41 | if p.at(COLONCOLON) && !use_tree { | ||
42 | let path = qual.precede(p); | ||
43 | p.bump(); | ||
44 | path_segment(p, mode, false); | ||
45 | let path = path.complete(p, PATH); | ||
46 | qual = path; | ||
47 | } else { | ||
48 | break; | ||
49 | } | ||
50 | } | ||
51 | } | ||
52 | |||
53 | fn path_segment(p: &mut Parser, mode: Mode, first: bool) { | ||
54 | let segment = p.start(); | ||
55 | if first { | ||
56 | p.eat(COLONCOLON); | ||
57 | } | ||
58 | match p.current() { | ||
59 | IDENT => { | ||
60 | name_ref(p); | ||
61 | path_generic_args(p, mode); | ||
62 | } | ||
63 | SELF_KW | SUPER_KW => p.bump(), | ||
64 | _ => { | ||
65 | p.error("expected identifier"); | ||
66 | } | ||
67 | }; | ||
68 | segment.complete(p, PATH_SEGMENT); | ||
69 | } | ||
70 | |||
71 | fn path_generic_args(p: &mut Parser, mode: Mode) { | ||
72 | match mode { | ||
73 | Mode::Use => return, | ||
74 | Mode::Type => { | ||
75 | // test path_fn_trait_args | ||
76 | // type F = Box<Fn(x: i32) -> ()>; | ||
77 | if p.at(L_PAREN) { | ||
78 | params::param_list_opt_patterns(p); | ||
79 | fn_ret_type(p); | ||
80 | } else { | ||
81 | type_args::type_arg_list(p, false) | ||
82 | } | ||
83 | }, | ||
84 | Mode::Expr => type_args::type_arg_list(p, true), | ||
85 | } | ||
86 | } | ||
diff --git a/crates/libsyntax2/src/grammar/patterns.rs b/crates/libsyntax2/src/grammar/patterns.rs new file mode 100644 index 000000000..436f3b26d --- /dev/null +++ b/crates/libsyntax2/src/grammar/patterns.rs | |||
@@ -0,0 +1,204 @@ | |||
1 | use super::*; | ||
2 | |||
3 | pub(super) fn pattern(p: &mut Parser) { | ||
4 | if let Some(lhs) = atom_pat(p) { | ||
5 | // test range_pat | ||
6 | // fn main() { | ||
7 | // match 92 { 0 ... 100 => () } | ||
8 | // } | ||
9 | if p.at(DOTDOTDOT) { | ||
10 | let m = lhs.precede(p); | ||
11 | p.bump(); | ||
12 | atom_pat(p); | ||
13 | m.complete(p, RANGE_PAT); | ||
14 | } | ||
15 | } | ||
16 | } | ||
17 | |||
18 | fn atom_pat(p: &mut Parser) -> Option<CompletedMarker> { | ||
19 | let la0 = p.nth(0); | ||
20 | let la1 = p.nth(1); | ||
21 | if la0 == REF_KW || la0 == MUT_KW | ||
22 | || (la0 == IDENT && !(la1 == COLONCOLON || la1 == L_PAREN || la1 == L_CURLY)) { | ||
23 | return Some(bind_pat(p, true)); | ||
24 | } | ||
25 | if paths::is_path_start(p) { | ||
26 | return Some(path_pat(p)); | ||
27 | } | ||
28 | |||
29 | // test literal_pattern | ||
30 | // fn main() { | ||
31 | // match () { | ||
32 | // 92 => (), | ||
33 | // 'c' => (), | ||
34 | // "hello" => (), | ||
35 | // } | ||
36 | // } | ||
37 | match expressions::literal(p) { | ||
38 | Some(m) => return Some(m), | ||
39 | None => (), | ||
40 | } | ||
41 | |||
42 | let m = match la0 { | ||
43 | UNDERSCORE => placeholder_pat(p), | ||
44 | AMP => ref_pat(p), | ||
45 | L_PAREN => tuple_pat(p), | ||
46 | L_BRACK => slice_pat(p), | ||
47 | _ => { | ||
48 | p.err_and_bump("expected pattern"); | ||
49 | return None; | ||
50 | } | ||
51 | }; | ||
52 | Some(m) | ||
53 | } | ||
54 | |||
55 | // test path_part | ||
56 | // fn foo() { | ||
57 | // let foo::Bar = (); | ||
58 | // let ::Bar = (); | ||
59 | // let Bar { .. } = (); | ||
60 | // let Bar(..) = (); | ||
61 | // } | ||
62 | fn path_pat(p: &mut Parser) -> CompletedMarker { | ||
63 | let m = p.start(); | ||
64 | paths::expr_path(p); | ||
65 | let kind = match p.current() { | ||
66 | L_PAREN => { | ||
67 | tuple_pat_fields(p); | ||
68 | TUPLE_STRUCT_PAT | ||
69 | } | ||
70 | L_CURLY => { | ||
71 | struct_pat_fields(p); | ||
72 | STRUCT_PAT | ||
73 | } | ||
74 | _ => PATH_PAT | ||
75 | }; | ||
76 | m.complete(p, kind) | ||
77 | } | ||
78 | |||
79 | // test tuple_pat_fields | ||
80 | // fn foo() { | ||
81 | // let S() = (); | ||
82 | // let S(_) = (); | ||
83 | // let S(_,) = (); | ||
84 | // let S(_, .. , x) = (); | ||
85 | // } | ||
86 | fn tuple_pat_fields(p: &mut Parser) { | ||
87 | assert!(p.at(L_PAREN)); | ||
88 | p.bump(); | ||
89 | while !p.at(EOF) && !p.at(R_PAREN) { | ||
90 | match p.current() { | ||
91 | DOTDOT => p.bump(), | ||
92 | _ => pattern(p), | ||
93 | } | ||
94 | if !p.at(R_PAREN) { | ||
95 | p.expect(COMMA); | ||
96 | } | ||
97 | } | ||
98 | p.expect(R_PAREN); | ||
99 | } | ||
100 | |||
101 | // test struct_pat_fields | ||
102 | // fn foo() { | ||
103 | // let S {} = (); | ||
104 | // let S { f, ref mut g } = (); | ||
105 | // let S { h: _, ..} = (); | ||
106 | // let S { h: _, } = (); | ||
107 | // } | ||
108 | fn struct_pat_fields(p: &mut Parser) { | ||
109 | assert!(p.at(L_CURLY)); | ||
110 | p.bump(); | ||
111 | while !p.at(EOF) && !p.at(R_CURLY) { | ||
112 | match p.current() { | ||
113 | DOTDOT => p.bump(), | ||
114 | IDENT if p.nth(1) == COLON => { | ||
115 | p.bump(); | ||
116 | p.bump(); | ||
117 | pattern(p); | ||
118 | } | ||
119 | _ => { | ||
120 | bind_pat(p, false); | ||
121 | } | ||
122 | } | ||
123 | if !p.at(R_CURLY) { | ||
124 | p.expect(COMMA); | ||
125 | } | ||
126 | } | ||
127 | p.expect(R_CURLY); | ||
128 | } | ||
129 | |||
130 | // test placeholder_pat | ||
131 | // fn main() { let _ = (); } | ||
132 | fn placeholder_pat(p: &mut Parser) -> CompletedMarker { | ||
133 | assert!(p.at(UNDERSCORE)); | ||
134 | let m = p.start(); | ||
135 | p.bump(); | ||
136 | m.complete(p, PLACEHOLDER_PAT) | ||
137 | } | ||
138 | |||
139 | // test ref_pat | ||
140 | // fn main() { | ||
141 | // let &a = (); | ||
142 | // let &mut b = (); | ||
143 | // } | ||
144 | fn ref_pat(p: &mut Parser) -> CompletedMarker { | ||
145 | assert!(p.at(AMP)); | ||
146 | let m = p.start(); | ||
147 | p.bump(); | ||
148 | p.eat(MUT_KW); | ||
149 | pattern(p); | ||
150 | m.complete(p, REF_PAT) | ||
151 | } | ||
152 | |||
153 | // test tuple_pat | ||
154 | // fn main() { | ||
155 | // let (a, b, ..) = (); | ||
156 | // } | ||
157 | fn tuple_pat(p: &mut Parser) -> CompletedMarker { | ||
158 | assert!(p.at(L_PAREN)); | ||
159 | let m = p.start(); | ||
160 | tuple_pat_fields(p); | ||
161 | m.complete(p, TUPLE_PAT) | ||
162 | } | ||
163 | |||
164 | // test slice_pat | ||
165 | // fn main() { | ||
166 | // let [a, b, ..] = []; | ||
167 | // } | ||
168 | fn slice_pat(p: &mut Parser) -> CompletedMarker { | ||
169 | assert!(p.at(L_BRACK)); | ||
170 | let m = p.start(); | ||
171 | p.bump(); | ||
172 | while !p.at(EOF) && !p.at(R_BRACK) { | ||
173 | match p.current() { | ||
174 | DOTDOT => p.bump(), | ||
175 | _ => pattern(p), | ||
176 | } | ||
177 | if !p.at(R_BRACK) { | ||
178 | p.expect(COMMA); | ||
179 | } | ||
180 | } | ||
181 | p.expect(R_BRACK); | ||
182 | |||
183 | m.complete(p, SLICE_PAT) | ||
184 | } | ||
185 | |||
186 | // test bind_pat | ||
187 | // fn main() { | ||
188 | // let a = (); | ||
189 | // let mut b = (); | ||
190 | // let ref c = (); | ||
191 | // let ref mut d = (); | ||
192 | // let e @ _ = (); | ||
193 | // let ref mut f @ g @ _ = (); | ||
194 | // } | ||
195 | fn bind_pat(p: &mut Parser, with_at: bool) -> CompletedMarker { | ||
196 | let m = p.start(); | ||
197 | p.eat(REF_KW); | ||
198 | p.eat(MUT_KW); | ||
199 | name(p); | ||
200 | if with_at && p.eat(AT) { | ||
201 | pattern(p); | ||
202 | } | ||
203 | m.complete(p, BIND_PAT) | ||
204 | } | ||
diff --git a/crates/libsyntax2/src/grammar/type_args.rs b/crates/libsyntax2/src/grammar/type_args.rs new file mode 100644 index 000000000..5b960f10b --- /dev/null +++ b/crates/libsyntax2/src/grammar/type_args.rs | |||
@@ -0,0 +1,48 @@ | |||
1 | use super::*; | ||
2 | |||
3 | pub(super) fn type_arg_list(p: &mut Parser, colon_colon_required: bool) { | ||
4 | let m; | ||
5 | match (colon_colon_required, p.nth(0), p.nth(1)) { | ||
6 | (_, COLONCOLON, L_ANGLE) => { | ||
7 | m = p.start(); | ||
8 | p.bump(); | ||
9 | p.bump(); | ||
10 | } | ||
11 | (false, L_ANGLE, _) => { | ||
12 | m = p.start(); | ||
13 | p.bump(); | ||
14 | } | ||
15 | _ => return, | ||
16 | }; | ||
17 | |||
18 | while !p.at(EOF) && !p.at(R_ANGLE) { | ||
19 | type_arg(p); | ||
20 | if !p.at(R_ANGLE) && !p.expect(COMMA) { | ||
21 | break; | ||
22 | } | ||
23 | } | ||
24 | p.expect(R_ANGLE); | ||
25 | m.complete(p, TYPE_ARG_LIST); | ||
26 | } | ||
27 | |||
28 | // test type_arg | ||
29 | // type A = B<'static, i32, Item=u64> | ||
30 | fn type_arg(p: &mut Parser) { | ||
31 | let m = p.start(); | ||
32 | match p.current() { | ||
33 | LIFETIME => { | ||
34 | p.bump(); | ||
35 | m.complete(p, LIFETIME_ARG); | ||
36 | } | ||
37 | IDENT if p.nth(1) == EQ => { | ||
38 | name_ref(p); | ||
39 | p.bump(); | ||
40 | types::type_(p); | ||
41 | m.complete(p, ASSOC_TYPE_ARG); | ||
42 | } | ||
43 | _ => { | ||
44 | types::type_(p); | ||
45 | m.complete(p, TYPE_ARG); | ||
46 | } | ||
47 | } | ||
48 | } | ||
diff --git a/crates/libsyntax2/src/grammar/type_params.rs b/crates/libsyntax2/src/grammar/type_params.rs new file mode 100644 index 000000000..0a3e8fd07 --- /dev/null +++ b/crates/libsyntax2/src/grammar/type_params.rs | |||
@@ -0,0 +1,127 @@ | |||
1 | use super::*; | ||
2 | |||
3 | pub(super) fn type_param_list(p: &mut Parser) { | ||
4 | if !p.at(L_ANGLE) { | ||
5 | return; | ||
6 | } | ||
7 | let m = p.start(); | ||
8 | p.bump(); | ||
9 | |||
10 | while !p.at(EOF) && !p.at(R_ANGLE) { | ||
11 | match p.current() { | ||
12 | LIFETIME => lifetime_param(p), | ||
13 | IDENT => type_param(p), | ||
14 | _ => p.err_and_bump("expected type parameter"), | ||
15 | } | ||
16 | if !p.at(R_ANGLE) && !p.expect(COMMA) { | ||
17 | break; | ||
18 | } | ||
19 | } | ||
20 | p.expect(R_ANGLE); | ||
21 | m.complete(p, TYPE_PARAM_LIST); | ||
22 | |||
23 | fn lifetime_param(p: &mut Parser) { | ||
24 | assert!(p.at(LIFETIME)); | ||
25 | let m = p.start(); | ||
26 | p.bump(); | ||
27 | if p.at(COLON) { | ||
28 | lifetime_bounds(p); | ||
29 | } | ||
30 | m.complete(p, LIFETIME_PARAM); | ||
31 | } | ||
32 | |||
33 | fn type_param(p: &mut Parser) { | ||
34 | assert!(p.at(IDENT)); | ||
35 | let m = p.start(); | ||
36 | name(p); | ||
37 | if p.at(COLON) { | ||
38 | bounds(p); | ||
39 | } | ||
40 | // test type_param_default | ||
41 | // struct S<T = i32>; | ||
42 | if p.at(EQ) { | ||
43 | p.bump(); | ||
44 | types::type_(p) | ||
45 | } | ||
46 | m.complete(p, TYPE_PARAM); | ||
47 | } | ||
48 | } | ||
49 | |||
50 | // test type_param_bounds | ||
51 | // struct S<T: 'a + ?Sized + (Copy)>; | ||
52 | pub(super) fn bounds(p: &mut Parser) { | ||
53 | assert!(p.at(COLON)); | ||
54 | p.bump(); | ||
55 | bounds_without_colon(p); | ||
56 | } | ||
57 | |||
58 | fn lifetime_bounds(p: &mut Parser) { | ||
59 | assert!(p.at(COLON)); | ||
60 | p.bump(); | ||
61 | while p.at(LIFETIME) { | ||
62 | p.bump(); | ||
63 | if !p.eat(PLUS) { | ||
64 | break; | ||
65 | } | ||
66 | } | ||
67 | } | ||
68 | |||
69 | pub(super) fn bounds_without_colon(p: &mut Parser) { | ||
70 | loop { | ||
71 | let has_paren = p.eat(L_PAREN); | ||
72 | p.eat(QUESTION); | ||
73 | if p.at(FOR_KW) { | ||
74 | //TODO | ||
75 | } | ||
76 | if p.at(LIFETIME) { | ||
77 | p.bump(); | ||
78 | } else if paths::is_path_start(p) { | ||
79 | paths::type_path(p); | ||
80 | } else { | ||
81 | break; | ||
82 | } | ||
83 | if has_paren { | ||
84 | p.expect(R_PAREN); | ||
85 | } | ||
86 | if !p.eat(PLUS) { | ||
87 | break; | ||
88 | } | ||
89 | } | ||
90 | } | ||
91 | |||
92 | // test where_clause | ||
93 | // fn foo() | ||
94 | // where | ||
95 | // 'a: 'b + 'c, | ||
96 | // T: Clone + Copy + 'static, | ||
97 | // Iterator::Item: 'a, | ||
98 | // {} | ||
99 | pub(super) fn where_clause(p: &mut Parser) { | ||
100 | if !p.at(WHERE_KW) { | ||
101 | return; | ||
102 | } | ||
103 | let m = p.start(); | ||
104 | p.bump(); | ||
105 | loop { | ||
106 | if !(paths::is_path_start(p) || p.current() == LIFETIME) { | ||
107 | break | ||
108 | } | ||
109 | where_predicate(p); | ||
110 | if p.current() != L_CURLY && p.current() != SEMI { | ||
111 | p.expect(COMMA); | ||
112 | } | ||
113 | } | ||
114 | m.complete(p, WHERE_CLAUSE); | ||
115 | } | ||
116 | |||
117 | fn where_predicate(p: &mut Parser) { | ||
118 | let m = p.start(); | ||
119 | if p.at(LIFETIME) { | ||
120 | p.eat(LIFETIME); | ||
121 | lifetime_bounds(p) | ||
122 | } else { | ||
123 | types::path_type(p); | ||
124 | bounds(p); | ||
125 | } | ||
126 | m.complete(p, WHERE_PRED); | ||
127 | } | ||
diff --git a/crates/libsyntax2/src/grammar/types.rs b/crates/libsyntax2/src/grammar/types.rs new file mode 100644 index 000000000..0d8c6bfba --- /dev/null +++ b/crates/libsyntax2/src/grammar/types.rs | |||
@@ -0,0 +1,212 @@ | |||
1 | use super::*; | ||
2 | |||
3 | pub(super) fn type_(p: &mut Parser) { | ||
4 | match p.current() { | ||
5 | L_PAREN => paren_or_tuple_type(p), | ||
6 | EXCL => never_type(p), | ||
7 | STAR => pointer_type(p), | ||
8 | L_BRACK => array_or_slice_type(p), | ||
9 | AMP => reference_type(p), | ||
10 | UNDERSCORE => placeholder_type(p), | ||
11 | FN_KW | UNSAFE_KW | EXTERN_KW => fn_pointer_type(p), | ||
12 | FOR_KW => for_type(p), | ||
13 | IMPL_KW => impl_trait_type(p), | ||
14 | _ if paths::is_path_start(p) => path_type(p), | ||
15 | _ => { | ||
16 | p.error("expected type"); | ||
17 | } | ||
18 | } | ||
19 | } | ||
20 | |||
21 | pub(super) fn ascription(p: &mut Parser) { | ||
22 | p.expect(COLON); | ||
23 | type_(p) | ||
24 | } | ||
25 | |||
26 | fn type_no_plus(p: &mut Parser) { | ||
27 | type_(p); | ||
28 | } | ||
29 | |||
30 | fn paren_or_tuple_type(p: &mut Parser) { | ||
31 | assert!(p.at(L_PAREN)); | ||
32 | let m = p.start(); | ||
33 | p.bump(); | ||
34 | let mut n_types: u32 = 0; | ||
35 | let mut trailing_comma: bool = false; | ||
36 | while !p.at(EOF) && !p.at(R_PAREN) { | ||
37 | n_types += 1; | ||
38 | type_(p); | ||
39 | if p.eat(COMMA) { | ||
40 | trailing_comma = true; | ||
41 | } else { | ||
42 | trailing_comma = false; | ||
43 | break; | ||
44 | } | ||
45 | } | ||
46 | p.expect(R_PAREN); | ||
47 | |||
48 | let kind = if n_types == 1 && !trailing_comma { | ||
49 | // test paren_type | ||
50 | // type T = (i32); | ||
51 | PAREN_TYPE | ||
52 | } else { | ||
53 | // test unit_type | ||
54 | // type T = (); | ||
55 | |||
56 | // test singleton_tuple_type | ||
57 | // type T = (i32,); | ||
58 | TUPLE_TYPE | ||
59 | }; | ||
60 | m.complete(p, kind); | ||
61 | } | ||
62 | |||
63 | // test never_type | ||
64 | // type Never = !; | ||
65 | fn never_type(p: &mut Parser) { | ||
66 | assert!(p.at(EXCL)); | ||
67 | let m = p.start(); | ||
68 | p.bump(); | ||
69 | m.complete(p, NEVER_TYPE); | ||
70 | } | ||
71 | |||
72 | fn pointer_type(p: &mut Parser) { | ||
73 | assert!(p.at(STAR)); | ||
74 | let m = p.start(); | ||
75 | p.bump(); | ||
76 | |||
77 | match p.current() { | ||
78 | // test pointer_type_mut | ||
79 | // type M = *mut (); | ||
80 | // type C = *mut (); | ||
81 | MUT_KW | CONST_KW => p.bump(), | ||
82 | _ => { | ||
83 | // test pointer_type_no_mutability | ||
84 | // type T = *(); | ||
85 | p.error( | ||
86 | "expected mut or const in raw pointer type \ | ||
87 | (use `*mut T` or `*const T` as appropriate)", | ||
88 | ); | ||
89 | } | ||
90 | }; | ||
91 | |||
92 | type_no_plus(p); | ||
93 | m.complete(p, POINTER_TYPE); | ||
94 | } | ||
95 | |||
96 | fn array_or_slice_type(p: &mut Parser) { | ||
97 | assert!(p.at(L_BRACK)); | ||
98 | let m = p.start(); | ||
99 | p.bump(); | ||
100 | |||
101 | type_(p); | ||
102 | let kind = match p.current() { | ||
103 | // test slice_type | ||
104 | // type T = [()]; | ||
105 | R_BRACK => { | ||
106 | p.bump(); | ||
107 | SLICE_TYPE | ||
108 | } | ||
109 | |||
110 | // test array_type | ||
111 | // type T = [(); 92]; | ||
112 | SEMI => { | ||
113 | p.bump(); | ||
114 | expressions::expr(p); | ||
115 | p.expect(R_BRACK); | ||
116 | ARRAY_TYPE | ||
117 | } | ||
118 | // test array_type_missing_semi | ||
119 | // type T = [() 92]; | ||
120 | _ => { | ||
121 | p.error("expected `;` or `]`"); | ||
122 | SLICE_TYPE | ||
123 | } | ||
124 | }; | ||
125 | m.complete(p, kind); | ||
126 | } | ||
127 | |||
128 | // test reference_type; | ||
129 | // type A = &(); | ||
130 | // type B = &'static (); | ||
131 | // type C = &mut (); | ||
132 | fn reference_type(p: &mut Parser) { | ||
133 | assert!(p.at(AMP)); | ||
134 | let m = p.start(); | ||
135 | p.bump(); | ||
136 | p.eat(LIFETIME); | ||
137 | p.eat(MUT_KW); | ||
138 | type_no_plus(p); | ||
139 | m.complete(p, REFERENCE_TYPE); | ||
140 | } | ||
141 | |||
142 | // test placeholder_type | ||
143 | // type Placeholder = _; | ||
144 | fn placeholder_type(p: &mut Parser) { | ||
145 | assert!(p.at(UNDERSCORE)); | ||
146 | let m = p.start(); | ||
147 | p.bump(); | ||
148 | m.complete(p, PLACEHOLDER_TYPE); | ||
149 | } | ||
150 | |||
151 | // test fn_pointer_type | ||
152 | // type A = fn(); | ||
153 | // type B = unsafe fn(); | ||
154 | // type C = unsafe extern "C" fn(); | ||
155 | fn fn_pointer_type(p: &mut Parser) { | ||
156 | let m = p.start(); | ||
157 | p.eat(UNSAFE_KW); | ||
158 | if p.at(EXTERN_KW) { | ||
159 | abi(p); | ||
160 | } | ||
161 | // test fn_pointer_type_missing_fn | ||
162 | // type F = unsafe (); | ||
163 | if !p.eat(FN_KW) { | ||
164 | m.abandon(p); | ||
165 | p.error("expected `fn`"); | ||
166 | return; | ||
167 | } | ||
168 | |||
169 | params::param_list_opt_patterns(p); | ||
170 | // test fn_pointer_type_with_ret | ||
171 | // type F = fn() -> (); | ||
172 | fn_ret_type(p); | ||
173 | m.complete(p, FN_POINTER_TYPE); | ||
174 | } | ||
175 | |||
176 | // test for_type | ||
177 | // type A = for<'a> fn() -> (); | ||
178 | fn for_type(p: &mut Parser) { | ||
179 | assert!(p.at(FOR_KW)); | ||
180 | let m = p.start(); | ||
181 | p.bump(); | ||
182 | type_params::type_param_list(p); | ||
183 | type_(p); | ||
184 | m.complete(p, FOR_TYPE); | ||
185 | } | ||
186 | |||
187 | // test impl_trait_type | ||
188 | // type A = impl Iterator<Item=Foo<'a>> + 'a; | ||
189 | fn impl_trait_type(p: &mut Parser) { | ||
190 | assert!(p.at(IMPL_KW)); | ||
191 | let m = p.start(); | ||
192 | p.bump(); | ||
193 | type_params::bounds_without_colon(p); | ||
194 | m.complete(p, IMPL_TRAIT_TYPE); | ||
195 | } | ||
196 | |||
197 | // test path_type | ||
198 | // type A = Foo; | ||
199 | // type B = ::Foo; | ||
200 | // type C = self::Foo; | ||
201 | // type D = super::Foo; | ||
202 | pub(super) fn path_type(p: &mut Parser) { | ||
203 | assert!(paths::is_path_start(p)); | ||
204 | let m = p.start(); | ||
205 | paths::type_path(p); | ||
206 | // test path_type_with_bounds | ||
207 | // fn foo() -> Box<T + 'f> {} | ||
208 | if p.eat(PLUS) { | ||
209 | type_params::bounds_without_colon(p); | ||
210 | } | ||
211 | m.complete(p, PATH_TYPE); | ||
212 | } | ||
diff --git a/crates/libsyntax2/src/lexer/classes.rs b/crates/libsyntax2/src/lexer/classes.rs new file mode 100644 index 000000000..4235d2648 --- /dev/null +++ b/crates/libsyntax2/src/lexer/classes.rs | |||
@@ -0,0 +1,26 @@ | |||
1 | use unicode_xid::UnicodeXID; | ||
2 | |||
3 | pub fn is_ident_start(c: char) -> bool { | ||
4 | (c >= 'a' && c <= 'z') | ||
5 | || (c >= 'A' && c <= 'Z') | ||
6 | || c == '_' | ||
7 | || (c > '\x7f' && UnicodeXID::is_xid_start(c)) | ||
8 | } | ||
9 | |||
10 | pub fn is_ident_continue(c: char) -> bool { | ||
11 | (c >= 'a' && c <= 'z') | ||
12 | || (c >= 'A' && c <= 'Z') | ||
13 | || (c >= '0' && c <= '9') | ||
14 | || c == '_' | ||
15 | || (c > '\x7f' && UnicodeXID::is_xid_continue(c)) | ||
16 | } | ||
17 | |||
18 | pub fn is_whitespace(c: char) -> bool { | ||
19 | //FIXME: use is_pattern_whitespace | ||
20 | //https://github.com/behnam/rust-unic/issues/192 | ||
21 | c.is_whitespace() | ||
22 | } | ||
23 | |||
24 | pub fn is_dec_digit(c: char) -> bool { | ||
25 | '0' <= c && c <= '9' | ||
26 | } | ||
diff --git a/crates/libsyntax2/src/lexer/comments.rs b/crates/libsyntax2/src/lexer/comments.rs new file mode 100644 index 000000000..01acb6515 --- /dev/null +++ b/crates/libsyntax2/src/lexer/comments.rs | |||
@@ -0,0 +1,57 @@ | |||
1 | use lexer::ptr::Ptr; | ||
2 | |||
3 | use SyntaxKind::{self, *}; | ||
4 | |||
5 | pub(crate) fn scan_shebang(ptr: &mut Ptr) -> bool { | ||
6 | if ptr.next_is('!') && ptr.nnext_is('/') { | ||
7 | ptr.bump(); | ||
8 | ptr.bump(); | ||
9 | bump_until_eol(ptr); | ||
10 | true | ||
11 | } else { | ||
12 | false | ||
13 | } | ||
14 | } | ||
15 | |||
16 | fn scan_block_comment(ptr: &mut Ptr) -> Option<SyntaxKind> { | ||
17 | if ptr.next_is('*') { | ||
18 | ptr.bump(); | ||
19 | let mut depth: u32 = 1; | ||
20 | while depth > 0 { | ||
21 | if ptr.next_is('*') && ptr.nnext_is('/') { | ||
22 | depth -= 1; | ||
23 | ptr.bump(); | ||
24 | ptr.bump(); | ||
25 | } else if ptr.next_is('/') && ptr.nnext_is('*') { | ||
26 | depth += 1; | ||
27 | ptr.bump(); | ||
28 | ptr.bump(); | ||
29 | } else if ptr.bump().is_none() { | ||
30 | break; | ||
31 | } | ||
32 | } | ||
33 | Some(COMMENT) | ||
34 | } else { | ||
35 | None | ||
36 | } | ||
37 | } | ||
38 | |||
39 | pub(crate) fn scan_comment(ptr: &mut Ptr) -> Option<SyntaxKind> { | ||
40 | if ptr.next_is('/') { | ||
41 | bump_until_eol(ptr); | ||
42 | Some(COMMENT) | ||
43 | } else { | ||
44 | scan_block_comment(ptr) | ||
45 | } | ||
46 | } | ||
47 | |||
48 | fn bump_until_eol(ptr: &mut Ptr) { | ||
49 | loop { | ||
50 | if ptr.next_is('\n') || ptr.next_is('\r') && ptr.nnext_is('\n') { | ||
51 | return; | ||
52 | } | ||
53 | if ptr.bump().is_none() { | ||
54 | break; | ||
55 | } | ||
56 | } | ||
57 | } | ||
diff --git a/crates/libsyntax2/src/lexer/mod.rs b/crates/libsyntax2/src/lexer/mod.rs new file mode 100644 index 000000000..f8fdc41ac --- /dev/null +++ b/crates/libsyntax2/src/lexer/mod.rs | |||
@@ -0,0 +1,209 @@ | |||
1 | mod classes; | ||
2 | mod comments; | ||
3 | mod numbers; | ||
4 | mod ptr; | ||
5 | mod strings; | ||
6 | |||
7 | use { | ||
8 | SyntaxKind::{self, *}, | ||
9 | TextUnit, | ||
10 | }; | ||
11 | |||
12 | use self::{ | ||
13 | classes::*, | ||
14 | comments::{scan_comment, scan_shebang}, | ||
15 | numbers::scan_number, | ||
16 | ptr::Ptr, | ||
17 | strings::{ | ||
18 | is_string_literal_start, scan_byte_char_or_string, scan_char, scan_raw_string, scan_string, | ||
19 | }, | ||
20 | }; | ||
21 | |||
22 | /// A token of Rust source. | ||
23 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] | ||
24 | pub struct Token { | ||
25 | /// The kind of token. | ||
26 | pub kind: SyntaxKind, | ||
27 | /// The length of the token. | ||
28 | pub len: TextUnit, | ||
29 | } | ||
30 | |||
31 | /// Break a string up into its component tokens | ||
32 | pub fn tokenize(text: &str) -> Vec<Token> { | ||
33 | let mut text = text; | ||
34 | let mut acc = Vec::new(); | ||
35 | while !text.is_empty() { | ||
36 | let token = next_token(text); | ||
37 | acc.push(token); | ||
38 | let len: u32 = token.len.into(); | ||
39 | text = &text[len as usize..]; | ||
40 | } | ||
41 | acc | ||
42 | } | ||
43 | |||
44 | /// Get the next token from a string | ||
45 | pub fn next_token(text: &str) -> Token { | ||
46 | assert!(!text.is_empty()); | ||
47 | let mut ptr = Ptr::new(text); | ||
48 | let c = ptr.bump().unwrap(); | ||
49 | let kind = next_token_inner(c, &mut ptr); | ||
50 | let len = ptr.into_len(); | ||
51 | Token { kind, len } | ||
52 | } | ||
53 | |||
54 | fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { | ||
55 | if is_whitespace(c) { | ||
56 | ptr.bump_while(is_whitespace); | ||
57 | return WHITESPACE; | ||
58 | } | ||
59 | |||
60 | match c { | ||
61 | '#' => if scan_shebang(ptr) { | ||
62 | return SHEBANG; | ||
63 | }, | ||
64 | '/' => if let Some(kind) = scan_comment(ptr) { | ||
65 | return kind; | ||
66 | }, | ||
67 | _ => (), | ||
68 | } | ||
69 | |||
70 | let ident_start = is_ident_start(c) && !is_string_literal_start(c, ptr.next(), ptr.nnext()); | ||
71 | if ident_start { | ||
72 | return scan_ident(c, ptr); | ||
73 | } | ||
74 | |||
75 | if is_dec_digit(c) { | ||
76 | let kind = scan_number(c, ptr); | ||
77 | scan_literal_suffix(ptr); | ||
78 | return kind; | ||
79 | } | ||
80 | |||
81 | // One-byte tokens. | ||
82 | if let Some(kind) = SyntaxKind::from_char(c) { | ||
83 | return kind; | ||
84 | } | ||
85 | |||
86 | match c { | ||
87 | // Multi-byte tokens. | ||
88 | '.' => { | ||
89 | return match (ptr.next(), ptr.nnext()) { | ||
90 | (Some('.'), Some('.')) => { | ||
91 | ptr.bump(); | ||
92 | ptr.bump(); | ||
93 | DOTDOTDOT | ||
94 | } | ||
95 | (Some('.'), Some('=')) => { | ||
96 | ptr.bump(); | ||
97 | ptr.bump(); | ||
98 | DOTDOTEQ | ||
99 | } | ||
100 | (Some('.'), _) => { | ||
101 | ptr.bump(); | ||
102 | DOTDOT | ||
103 | } | ||
104 | _ => DOT, | ||
105 | }; | ||
106 | } | ||
107 | ':' => { | ||
108 | return match ptr.next() { | ||
109 | Some(':') => { | ||
110 | ptr.bump(); | ||
111 | COLONCOLON | ||
112 | } | ||
113 | _ => COLON, | ||
114 | }; | ||
115 | } | ||
116 | '=' => { | ||
117 | return match ptr.next() { | ||
118 | Some('=') => { | ||
119 | ptr.bump(); | ||
120 | EQEQ | ||
121 | } | ||
122 | Some('>') => { | ||
123 | ptr.bump(); | ||
124 | FAT_ARROW | ||
125 | } | ||
126 | _ => EQ, | ||
127 | }; | ||
128 | } | ||
129 | '!' => { | ||
130 | return match ptr.next() { | ||
131 | Some('=') => { | ||
132 | ptr.bump(); | ||
133 | NEQ | ||
134 | } | ||
135 | _ => EXCL, | ||
136 | }; | ||
137 | } | ||
138 | '-' => { | ||
139 | return if ptr.next_is('>') { | ||
140 | ptr.bump(); | ||
141 | THIN_ARROW | ||
142 | } else { | ||
143 | MINUS | ||
144 | }; | ||
145 | } | ||
146 | |||
147 | // If the character is an ident start not followed by another single | ||
148 | // quote, then this is a lifetime name: | ||
149 | '\'' => { | ||
150 | return if ptr.next_is_p(is_ident_start) && !ptr.nnext_is('\'') { | ||
151 | ptr.bump(); | ||
152 | while ptr.next_is_p(is_ident_continue) { | ||
153 | ptr.bump(); | ||
154 | } | ||
155 | // lifetimes shouldn't end with a single quote | ||
156 | // if we find one, then this is an invalid character literal | ||
157 | if ptr.next_is('\'') { | ||
158 | ptr.bump(); | ||
159 | return CHAR; // TODO: error reporting | ||
160 | } | ||
161 | LIFETIME | ||
162 | } else { | ||
163 | scan_char(ptr); | ||
164 | scan_literal_suffix(ptr); | ||
165 | CHAR | ||
166 | }; | ||
167 | } | ||
168 | 'b' => { | ||
169 | let kind = scan_byte_char_or_string(ptr); | ||
170 | scan_literal_suffix(ptr); | ||
171 | return kind; | ||
172 | } | ||
173 | '"' => { | ||
174 | scan_string(ptr); | ||
175 | scan_literal_suffix(ptr); | ||
176 | return STRING; | ||
177 | } | ||
178 | 'r' => { | ||
179 | scan_raw_string(ptr); | ||
180 | scan_literal_suffix(ptr); | ||
181 | return RAW_STRING; | ||
182 | } | ||
183 | _ => (), | ||
184 | } | ||
185 | ERROR | ||
186 | } | ||
187 | |||
188 | fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind { | ||
189 | let is_single_letter = match ptr.next() { | ||
190 | None => true, | ||
191 | Some(c) if !is_ident_continue(c) => true, | ||
192 | _ => false, | ||
193 | }; | ||
194 | if is_single_letter { | ||
195 | return if c == '_' { UNDERSCORE } else { IDENT }; | ||
196 | } | ||
197 | ptr.bump_while(is_ident_continue); | ||
198 | if let Some(kind) = SyntaxKind::from_keyword(ptr.current_token_text()) { | ||
199 | return kind; | ||
200 | } | ||
201 | IDENT | ||
202 | } | ||
203 | |||
204 | fn scan_literal_suffix(ptr: &mut Ptr) { | ||
205 | if ptr.next_is_p(is_ident_start) { | ||
206 | ptr.bump(); | ||
207 | } | ||
208 | ptr.bump_while(is_ident_continue); | ||
209 | } | ||
diff --git a/crates/libsyntax2/src/lexer/numbers.rs b/crates/libsyntax2/src/lexer/numbers.rs new file mode 100644 index 000000000..5c4641a2d --- /dev/null +++ b/crates/libsyntax2/src/lexer/numbers.rs | |||
@@ -0,0 +1,67 @@ | |||
1 | use lexer::classes::*; | ||
2 | use lexer::ptr::Ptr; | ||
3 | |||
4 | use SyntaxKind::{self, *}; | ||
5 | |||
6 | pub(crate) fn scan_number(c: char, ptr: &mut Ptr) -> SyntaxKind { | ||
7 | if c == '0' { | ||
8 | match ptr.next().unwrap_or('\0') { | ||
9 | 'b' | 'o' => { | ||
10 | ptr.bump(); | ||
11 | scan_digits(ptr, false); | ||
12 | } | ||
13 | 'x' => { | ||
14 | ptr.bump(); | ||
15 | scan_digits(ptr, true); | ||
16 | } | ||
17 | '0'...'9' | '_' | '.' | 'e' | 'E' => { | ||
18 | scan_digits(ptr, true); | ||
19 | } | ||
20 | _ => return INT_NUMBER, | ||
21 | } | ||
22 | } else { | ||
23 | scan_digits(ptr, false); | ||
24 | } | ||
25 | |||
26 | // might be a float, but don't be greedy if this is actually an | ||
27 | // integer literal followed by field/method access or a range pattern | ||
28 | // (`0..2` and `12.foo()`) | ||
29 | if ptr.next_is('.') && !(ptr.nnext_is('.') || ptr.nnext_is_p(is_ident_start)) { | ||
30 | // might have stuff after the ., and if it does, it needs to start | ||
31 | // with a number | ||
32 | ptr.bump(); | ||
33 | scan_digits(ptr, false); | ||
34 | scan_float_exponent(ptr); | ||
35 | return FLOAT_NUMBER; | ||
36 | } | ||
37 | // it might be a float if it has an exponent | ||
38 | if ptr.next_is('e') || ptr.next_is('E') { | ||
39 | scan_float_exponent(ptr); | ||
40 | return FLOAT_NUMBER; | ||
41 | } | ||
42 | INT_NUMBER | ||
43 | } | ||
44 | |||
45 | fn scan_digits(ptr: &mut Ptr, allow_hex: bool) { | ||
46 | while let Some(c) = ptr.next() { | ||
47 | match c { | ||
48 | '_' | '0'...'9' => { | ||
49 | ptr.bump(); | ||
50 | } | ||
51 | 'a'...'f' | 'A'...'F' if allow_hex => { | ||
52 | ptr.bump(); | ||
53 | } | ||
54 | _ => return, | ||
55 | } | ||
56 | } | ||
57 | } | ||
58 | |||
59 | fn scan_float_exponent(ptr: &mut Ptr) { | ||
60 | if ptr.next_is('e') || ptr.next_is('E') { | ||
61 | ptr.bump(); | ||
62 | if ptr.next_is('-') || ptr.next_is('+') { | ||
63 | ptr.bump(); | ||
64 | } | ||
65 | scan_digits(ptr, false); | ||
66 | } | ||
67 | } | ||
diff --git a/crates/libsyntax2/src/lexer/ptr.rs b/crates/libsyntax2/src/lexer/ptr.rs new file mode 100644 index 000000000..d1391fd5f --- /dev/null +++ b/crates/libsyntax2/src/lexer/ptr.rs | |||
@@ -0,0 +1,74 @@ | |||
1 | use TextUnit; | ||
2 | |||
3 | use std::str::Chars; | ||
4 | |||
5 | pub(crate) struct Ptr<'s> { | ||
6 | text: &'s str, | ||
7 | len: TextUnit, | ||
8 | } | ||
9 | |||
10 | impl<'s> Ptr<'s> { | ||
11 | pub fn new(text: &'s str) -> Ptr<'s> { | ||
12 | Ptr { | ||
13 | text, | ||
14 | len: 0.into(), | ||
15 | } | ||
16 | } | ||
17 | |||
18 | pub fn into_len(self) -> TextUnit { | ||
19 | self.len | ||
20 | } | ||
21 | |||
22 | pub fn next(&self) -> Option<char> { | ||
23 | self.chars().next() | ||
24 | } | ||
25 | |||
26 | pub fn nnext(&self) -> Option<char> { | ||
27 | let mut chars = self.chars(); | ||
28 | chars.next()?; | ||
29 | chars.next() | ||
30 | } | ||
31 | |||
32 | pub fn next_is(&self, c: char) -> bool { | ||
33 | self.next() == Some(c) | ||
34 | } | ||
35 | |||
36 | pub fn nnext_is(&self, c: char) -> bool { | ||
37 | self.nnext() == Some(c) | ||
38 | } | ||
39 | |||
40 | pub fn next_is_p<P: Fn(char) -> bool>(&self, p: P) -> bool { | ||
41 | self.next().map(p) == Some(true) | ||
42 | } | ||
43 | |||
44 | pub fn nnext_is_p<P: Fn(char) -> bool>(&self, p: P) -> bool { | ||
45 | self.nnext().map(p) == Some(true) | ||
46 | } | ||
47 | |||
48 | pub fn bump(&mut self) -> Option<char> { | ||
49 | let ch = self.chars().next()?; | ||
50 | self.len += TextUnit::of_char(ch); | ||
51 | Some(ch) | ||
52 | } | ||
53 | |||
54 | pub fn bump_while<F: Fn(char) -> bool>(&mut self, pred: F) { | ||
55 | loop { | ||
56 | match self.next() { | ||
57 | Some(c) if pred(c) => { | ||
58 | self.bump(); | ||
59 | } | ||
60 | _ => return, | ||
61 | } | ||
62 | } | ||
63 | } | ||
64 | |||
65 | pub fn current_token_text(&self) -> &str { | ||
66 | let len: u32 = self.len.into(); | ||
67 | &self.text[..len as usize] | ||
68 | } | ||
69 | |||
70 | fn chars(&self) -> Chars { | ||
71 | let len: u32 = self.len.into(); | ||
72 | self.text[len as usize..].chars() | ||
73 | } | ||
74 | } | ||
diff --git a/crates/libsyntax2/src/lexer/strings.rs b/crates/libsyntax2/src/lexer/strings.rs new file mode 100644 index 000000000..e3704fbb3 --- /dev/null +++ b/crates/libsyntax2/src/lexer/strings.rs | |||
@@ -0,0 +1,106 @@ | |||
1 | use SyntaxKind::{self, *}; | ||
2 | |||
3 | use lexer::ptr::Ptr; | ||
4 | |||
5 | pub(crate) fn is_string_literal_start(c: char, c1: Option<char>, c2: Option<char>) -> bool { | ||
6 | match (c, c1, c2) { | ||
7 | ('r', Some('"'), _) | ||
8 | | ('r', Some('#'), _) | ||
9 | | ('b', Some('"'), _) | ||
10 | | ('b', Some('\''), _) | ||
11 | | ('b', Some('r'), Some('"')) | ||
12 | | ('b', Some('r'), Some('#')) => true, | ||
13 | _ => false, | ||
14 | } | ||
15 | } | ||
16 | |||
17 | pub(crate) fn scan_char(ptr: &mut Ptr) { | ||
18 | if ptr.bump().is_none() { | ||
19 | return; // TODO: error reporting is upper in the stack | ||
20 | } | ||
21 | scan_char_or_byte(ptr); | ||
22 | if !ptr.next_is('\'') { | ||
23 | return; // TODO: error reporting | ||
24 | } | ||
25 | ptr.bump(); | ||
26 | } | ||
27 | |||
28 | pub(crate) fn scan_byte_char_or_string(ptr: &mut Ptr) -> SyntaxKind { | ||
29 | // unwrapping and not-exhaustive match are ok | ||
30 | // because of string_literal_start | ||
31 | let c = ptr.bump().unwrap(); | ||
32 | match c { | ||
33 | '\'' => { | ||
34 | scan_byte(ptr); | ||
35 | BYTE | ||
36 | } | ||
37 | '"' => { | ||
38 | scan_byte_string(ptr); | ||
39 | BYTE_STRING | ||
40 | } | ||
41 | 'r' => { | ||
42 | scan_raw_byte_string(ptr); | ||
43 | RAW_BYTE_STRING | ||
44 | } | ||
45 | _ => unreachable!(), | ||
46 | } | ||
47 | } | ||
48 | |||
49 | pub(crate) fn scan_string(ptr: &mut Ptr) { | ||
50 | while let Some(c) = ptr.bump() { | ||
51 | if c == '"' { | ||
52 | return; | ||
53 | } | ||
54 | } | ||
55 | } | ||
56 | |||
57 | pub(crate) fn scan_raw_string(ptr: &mut Ptr) { | ||
58 | if !ptr.next_is('"') { | ||
59 | return; | ||
60 | } | ||
61 | ptr.bump(); | ||
62 | |||
63 | while let Some(c) = ptr.bump() { | ||
64 | if c == '"' { | ||
65 | return; | ||
66 | } | ||
67 | } | ||
68 | } | ||
69 | |||
70 | fn scan_byte(ptr: &mut Ptr) { | ||
71 | if ptr.next_is('\'') { | ||
72 | ptr.bump(); | ||
73 | return; | ||
74 | } | ||
75 | ptr.bump(); | ||
76 | if ptr.next_is('\'') { | ||
77 | ptr.bump(); | ||
78 | return; | ||
79 | } | ||
80 | } | ||
81 | |||
82 | fn scan_byte_string(ptr: &mut Ptr) { | ||
83 | while let Some(c) = ptr.bump() { | ||
84 | if c == '"' { | ||
85 | return; | ||
86 | } | ||
87 | } | ||
88 | } | ||
89 | |||
90 | fn scan_raw_byte_string(ptr: &mut Ptr) { | ||
91 | if !ptr.next_is('"') { | ||
92 | return; | ||
93 | } | ||
94 | ptr.bump(); | ||
95 | |||
96 | while let Some(c) = ptr.bump() { | ||
97 | if c == '"' { | ||
98 | return; | ||
99 | } | ||
100 | } | ||
101 | } | ||
102 | |||
103 | fn scan_char_or_byte(ptr: &mut Ptr) { | ||
104 | //FIXME: deal with escape sequencies | ||
105 | ptr.bump(); | ||
106 | } | ||
diff --git a/crates/libsyntax2/src/lib.rs b/crates/libsyntax2/src/lib.rs new file mode 100644 index 000000000..ca33618a0 --- /dev/null +++ b/crates/libsyntax2/src/lib.rs | |||
@@ -0,0 +1,55 @@ | |||
1 | //! An experimental implementation of [Rust RFC#2256 libsyntax2.0][rfc#2256]. | ||
2 | //! | ||
3 | //! The intent is to be an IDE-ready parser, i.e. one that offers | ||
4 | //! | ||
5 | //! - easy and fast incremental re-parsing, | ||
6 | //! - graceful handling of errors, and | ||
7 | //! - maintains all information in the source file. | ||
8 | //! | ||
9 | //! For more information, see [the RFC][rfc#2265], or [the working draft][RFC.md]. | ||
10 | //! | ||
11 | //! [rfc#2256]: <https://github.com/rust-lang/rfcs/pull/2256> | ||
12 | //! [RFC.md]: <https://github.com/matklad/libsyntax2/blob/master/docs/RFC.md> | ||
13 | |||
14 | #![forbid( | ||
15 | missing_debug_implementations, | ||
16 | unconditional_recursion, | ||
17 | future_incompatible | ||
18 | )] | ||
19 | #![deny(bad_style, missing_docs)] | ||
20 | #![allow(missing_docs)] | ||
21 | //#![warn(unreachable_pub)] // rust-lang/rust#47816 | ||
22 | |||
23 | extern crate itertools; | ||
24 | extern crate text_unit; | ||
25 | extern crate unicode_xid; | ||
26 | extern crate drop_bomb; | ||
27 | extern crate parking_lot; | ||
28 | |||
29 | pub mod algo; | ||
30 | pub mod ast; | ||
31 | mod lexer; | ||
32 | #[macro_use] | ||
33 | mod parser_api; | ||
34 | mod grammar; | ||
35 | mod parser_impl; | ||
36 | |||
37 | mod syntax_kinds; | ||
38 | mod smol_str; | ||
39 | mod yellow; | ||
40 | /// Utilities for simple uses of the parser. | ||
41 | pub mod utils; | ||
42 | |||
43 | pub use { | ||
44 | ast::{AstNode, File}, | ||
45 | lexer::{tokenize, Token}, | ||
46 | syntax_kinds::SyntaxKind, | ||
47 | text_unit::{TextRange, TextUnit}, | ||
48 | yellow::{SyntaxNode, SyntaxNodeRef, SyntaxRoot, TreeRoot, SyntaxError}, | ||
49 | }; | ||
50 | |||
51 | |||
52 | pub fn parse(text: &str) -> SyntaxNode { | ||
53 | let tokens = tokenize(&text); | ||
54 | parser_impl::parse::<yellow::GreenBuilder>(text, &tokens) | ||
55 | } | ||
diff --git a/crates/libsyntax2/src/parser_api.rs b/crates/libsyntax2/src/parser_api.rs new file mode 100644 index 000000000..c78c6e43a --- /dev/null +++ b/crates/libsyntax2/src/parser_api.rs | |||
@@ -0,0 +1,195 @@ | |||
1 | use { | ||
2 | parser_impl::ParserImpl, | ||
3 | SyntaxKind::{self, ERROR}, | ||
4 | drop_bomb::DropBomb, | ||
5 | }; | ||
6 | |||
7 | #[derive(Clone, Copy)] | ||
8 | pub(crate) struct TokenSet(pub(crate) u128); | ||
9 | |||
10 | fn mask(kind: SyntaxKind) -> u128 { | ||
11 | 1u128 << (kind as usize) | ||
12 | } | ||
13 | |||
14 | impl TokenSet { | ||
15 | pub fn contains(&self, kind: SyntaxKind) -> bool { | ||
16 | self.0 & mask(kind) != 0 | ||
17 | } | ||
18 | } | ||
19 | |||
20 | #[macro_export] | ||
21 | macro_rules! token_set { | ||
22 | ($($t:ident),*) => { TokenSet($(1u128 << ($t as usize))|*) }; | ||
23 | ($($t:ident),* ,) => { token_set!($($t),*) }; | ||
24 | } | ||
25 | |||
26 | #[macro_export] | ||
27 | macro_rules! token_set_union { | ||
28 | ($($ts:expr),*) => { TokenSet($($ts.0)|*) }; | ||
29 | ($($ts:expr),* ,) => { token_set_union!($($ts),*) }; | ||
30 | } | ||
31 | |||
32 | #[test] | ||
33 | fn token_set_works_for_tokens() { | ||
34 | use SyntaxKind::*; | ||
35 | let ts = token_set! { EOF, SHEBANG }; | ||
36 | assert!(ts.contains(EOF)); | ||
37 | assert!(ts.contains(SHEBANG)); | ||
38 | assert!(!ts.contains(PLUS)); | ||
39 | } | ||
40 | |||
41 | /// `Parser` struct provides the low-level API for | ||
42 | /// navigating through the stream of tokens and | ||
43 | /// constructing the parse tree. The actual parsing | ||
44 | /// happens in the `grammar` module. | ||
45 | /// | ||
46 | /// However, the result of this `Parser` is not a real | ||
47 | /// tree, but rather a flat stream of events of the form | ||
48 | /// "start expression, consume number literal, | ||
49 | /// finish expression". See `Event` docs for more. | ||
50 | pub(crate) struct Parser<'t>(pub(super) ParserImpl<'t>); | ||
51 | |||
52 | impl<'t> Parser<'t> { | ||
53 | /// Returns the kind of the current token. | ||
54 | /// If parser has already reached the end of input, | ||
55 | /// the special `EOF` kind is returned. | ||
56 | pub(crate) fn current(&self) -> SyntaxKind { | ||
57 | self.nth(0) | ||
58 | } | ||
59 | |||
60 | /// Lookahead operation: returns the kind of the next nth | ||
61 | /// token. | ||
62 | pub(crate) fn nth(&self, n: u32) -> SyntaxKind { | ||
63 | self.0.nth(n) | ||
64 | } | ||
65 | |||
66 | /// Checks if the current token is `kind`. | ||
67 | pub(crate) fn at(&self, kind: SyntaxKind) -> bool { | ||
68 | self.current() == kind | ||
69 | } | ||
70 | |||
71 | pub(crate) fn at_compound2(&self, c1: SyntaxKind, c2: SyntaxKind) -> bool { | ||
72 | self.0.at_compound2(c1, c2) | ||
73 | } | ||
74 | |||
75 | pub(crate) fn at_compound3(&self, c1: SyntaxKind, c2: SyntaxKind, c3: SyntaxKind) -> bool { | ||
76 | self.0.at_compound3(c1, c2, c3) | ||
77 | } | ||
78 | |||
79 | /// Checks if the current token is contextual keyword with text `t`. | ||
80 | pub(crate) fn at_contextual_kw(&self, t: &str) -> bool { | ||
81 | self.0.at_kw(t) | ||
82 | } | ||
83 | |||
84 | /// Starts a new node in the syntax tree. All nodes and tokens | ||
85 | /// consumed between the `start` and the corresponding `Marker::complete` | ||
86 | /// belong to the same node. | ||
87 | pub(crate) fn start(&mut self) -> Marker { | ||
88 | Marker::new(self.0.start()) | ||
89 | } | ||
90 | |||
91 | /// Advances the parser by one token. | ||
92 | pub(crate) fn bump(&mut self) { | ||
93 | self.0.bump(); | ||
94 | } | ||
95 | |||
96 | /// Advances the parser by one token, remapping its kind. | ||
97 | /// This is useful to create contextual keywords from | ||
98 | /// identifiers. For example, the lexer creates an `union` | ||
99 | /// *identifier* token, but the parser remaps it to the | ||
100 | /// `union` keyword, and keyword is what ends up in the | ||
101 | /// final tree. | ||
102 | pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) { | ||
103 | self.0.bump_remap(kind); | ||
104 | } | ||
105 | |||
106 | /// Advances the parser by `n` tokens, remapping its kind. | ||
107 | /// This is useful to create compound tokens from parts. For | ||
108 | /// example, an `<<` token is two consecutive remapped `<` tokens | ||
109 | pub(crate) fn bump_compound(&mut self, kind: SyntaxKind, n: u8) { | ||
110 | self.0.bump_compound(kind, n); | ||
111 | } | ||
112 | |||
113 | /// Emit error with the `message` | ||
114 | /// TODO: this should be much more fancy and support | ||
115 | /// structured errors with spans and notes, like rustc | ||
116 | /// does. | ||
117 | pub(crate) fn error<T: Into<String>>(&mut self, message: T) { | ||
118 | self.0.error(message.into()) | ||
119 | } | ||
120 | |||
121 | /// Consume the next token if it is `kind`. | ||
122 | pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool { | ||
123 | if !self.at(kind) { | ||
124 | return false; | ||
125 | } | ||
126 | self.bump(); | ||
127 | true | ||
128 | } | ||
129 | |||
130 | /// Consume the next token if it is `kind` or emit an error | ||
131 | /// otherwise. | ||
132 | pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool { | ||
133 | if self.eat(kind) { | ||
134 | return true; | ||
135 | } | ||
136 | self.error(format!("expected {:?}", kind)); | ||
137 | false | ||
138 | } | ||
139 | |||
140 | /// Create an error node and consume the next token. | ||
141 | pub(crate) fn err_and_bump(&mut self, message: &str) { | ||
142 | let m = self.start(); | ||
143 | self.error(message); | ||
144 | self.bump(); | ||
145 | m.complete(self, ERROR); | ||
146 | } | ||
147 | } | ||
148 | |||
149 | /// See `Parser::start`. | ||
150 | pub(crate) struct Marker { | ||
151 | pos: u32, | ||
152 | bomb: DropBomb, | ||
153 | } | ||
154 | |||
155 | impl Marker { | ||
156 | fn new(pos: u32) -> Marker { | ||
157 | Marker { | ||
158 | pos, | ||
159 | bomb: DropBomb::new("Marker must be either completed or abandoned"), | ||
160 | } | ||
161 | } | ||
162 | |||
163 | /// Finishes the syntax tree node and assigns `kind` to it. | ||
164 | pub(crate) fn complete(mut self, p: &mut Parser, kind: SyntaxKind) -> CompletedMarker { | ||
165 | self.bomb.defuse(); | ||
166 | p.0.complete(self.pos, kind); | ||
167 | CompletedMarker(self.pos, kind) | ||
168 | } | ||
169 | |||
170 | /// Abandons the syntax tree node. All its children | ||
171 | /// are attached to its parent instead. | ||
172 | pub(crate) fn abandon(mut self, p: &mut Parser) { | ||
173 | self.bomb.defuse(); | ||
174 | p.0.abandon(self.pos); | ||
175 | } | ||
176 | } | ||
177 | |||
178 | pub(crate) struct CompletedMarker(u32, SyntaxKind); | ||
179 | |||
180 | impl CompletedMarker { | ||
181 | /// This one is tricky :-) | ||
182 | /// This method allows to create a new node which starts | ||
183 | /// *before* the current one. That is, parser could start | ||
184 | /// node `A`, then complete it, and then after parsing the | ||
185 | /// whole `A`, decide that it should have started some node | ||
186 | /// `B` before starting `A`. `precede` allows to do exactly | ||
187 | /// that. See also docs about `forward_parent` in `Event::Start`. | ||
188 | pub(crate) fn precede(self, p: &mut Parser) -> Marker { | ||
189 | Marker::new(p.0.precede(self.0)) | ||
190 | } | ||
191 | |||
192 | pub(crate) fn kind(&self) -> SyntaxKind { | ||
193 | self.1 | ||
194 | } | ||
195 | } | ||
diff --git a/crates/libsyntax2/src/parser_impl/event.rs b/crates/libsyntax2/src/parser_impl/event.rs new file mode 100644 index 000000000..9fd56b996 --- /dev/null +++ b/crates/libsyntax2/src/parser_impl/event.rs | |||
@@ -0,0 +1,154 @@ | |||
1 | //! This module provides a way to construct a `File`. | ||
2 | //! It is intended to be completely decoupled from the | ||
3 | //! parser, so as to allow to evolve the tree representation | ||
4 | //! and the parser algorithm independently. | ||
5 | //! | ||
6 | //! The `Sink` trait is the bridge between the parser and the | ||
7 | //! tree builder: the parser produces a stream of events like | ||
8 | //! `start node`, `finish node`, and `FileBuilder` converts | ||
9 | //! this stream to a real tree. | ||
10 | use std::mem; | ||
11 | use { | ||
12 | lexer::Token, | ||
13 | parser_impl::Sink, | ||
14 | SyntaxKind::{self, TOMBSTONE}, | ||
15 | }; | ||
16 | |||
17 | |||
18 | /// `Parser` produces a flat list of `Event`s. | ||
19 | /// They are converted to a tree-structure in | ||
20 | /// a separate pass, via `TreeBuilder`. | ||
21 | #[derive(Debug)] | ||
22 | pub(crate) enum Event { | ||
23 | /// This event signifies the start of the node. | ||
24 | /// It should be either abandoned (in which case the | ||
25 | /// `kind` is `TOMBSTONE`, and the event is ignored), | ||
26 | /// or completed via a `Finish` event. | ||
27 | /// | ||
28 | /// All tokens between a `Start` and a `Finish` would | ||
29 | /// become the children of the respective node. | ||
30 | /// | ||
31 | /// For left-recursive syntactic constructs, the parser produces | ||
32 | /// a child node before it sees a parent. `forward_parent` | ||
33 | /// exists to allow to tweak parent-child relationships. | ||
34 | /// | ||
35 | /// Consider this path | ||
36 | /// | ||
37 | /// foo::bar | ||
38 | /// | ||
39 | /// The events for it would look like this: | ||
40 | /// | ||
41 | /// | ||
42 | /// START(PATH) IDENT('foo') FINISH START(PATH) COLONCOLON IDENT('bar') FINISH | ||
43 | /// | /\ | ||
44 | /// | | | ||
45 | /// +------forward-parent------+ | ||
46 | /// | ||
47 | /// And the tree would look like this | ||
48 | /// | ||
49 | /// +--PATH---------+ | ||
50 | /// | | | | ||
51 | /// | | | | ||
52 | /// | '::' 'bar' | ||
53 | /// | | ||
54 | /// PATH | ||
55 | /// | | ||
56 | /// 'foo' | ||
57 | /// | ||
58 | /// See also `CompletedMarker::precede`. | ||
59 | Start { | ||
60 | kind: SyntaxKind, | ||
61 | forward_parent: Option<u32>, | ||
62 | }, | ||
63 | |||
64 | /// Complete the previous `Start` event | ||
65 | Finish, | ||
66 | |||
67 | /// Produce a single leaf-element. | ||
68 | /// `n_raw_tokens` is used to glue complex contextual tokens. | ||
69 | /// For example, lexer tokenizes `>>` as `>`, `>`, and | ||
70 | /// `n_raw_tokens = 2` is used to produced a single `>>`. | ||
71 | Token { | ||
72 | kind: SyntaxKind, | ||
73 | n_raw_tokens: u8, | ||
74 | }, | ||
75 | |||
76 | Error { | ||
77 | msg: String, | ||
78 | }, | ||
79 | } | ||
80 | |||
81 | |||
82 | pub(super) fn process<'a, S: Sink<'a>>(builder: &mut S, tokens: &[Token], mut events: Vec<Event>) { | ||
83 | fn tombstone() -> Event { | ||
84 | Event::Start { kind: TOMBSTONE, forward_parent: None } | ||
85 | } | ||
86 | let eat_ws = |idx: &mut usize, builder: &mut S| { | ||
87 | while let Some(token) = tokens.get(*idx) { | ||
88 | if !token.kind.is_trivia() { | ||
89 | break; | ||
90 | } | ||
91 | builder.leaf(token.kind, token.len); | ||
92 | *idx += 1 | ||
93 | } | ||
94 | }; | ||
95 | |||
96 | let events: &mut [Event] = &mut events; | ||
97 | let mut depth = 0; | ||
98 | let mut forward_parents = Vec::new(); | ||
99 | let mut next_tok_idx = 0; | ||
100 | for i in 0..events.len() { | ||
101 | match mem::replace(&mut events[i], tombstone()) { | ||
102 | Event::Start { | ||
103 | kind: TOMBSTONE, .. | ||
104 | } => (), | ||
105 | |||
106 | Event::Start { kind, forward_parent } => { | ||
107 | forward_parents.push(kind); | ||
108 | let mut idx = i; | ||
109 | let mut fp = forward_parent; | ||
110 | while let Some(fwd) = fp { | ||
111 | idx += fwd as usize; | ||
112 | fp = match mem::replace(&mut events[idx], tombstone()) { | ||
113 | Event::Start { | ||
114 | kind, | ||
115 | forward_parent, | ||
116 | } => { | ||
117 | forward_parents.push(kind); | ||
118 | forward_parent | ||
119 | }, | ||
120 | _ => unreachable!(), | ||
121 | }; | ||
122 | } | ||
123 | for kind in forward_parents.drain(..).rev() { | ||
124 | if depth > 0 { | ||
125 | eat_ws(&mut next_tok_idx, builder); | ||
126 | } | ||
127 | depth += 1; | ||
128 | builder.start_internal(kind); | ||
129 | } | ||
130 | } | ||
131 | Event::Finish => { | ||
132 | depth -= 1; | ||
133 | if depth == 0 { | ||
134 | eat_ws(&mut next_tok_idx, builder); | ||
135 | } | ||
136 | |||
137 | builder.finish_internal(); | ||
138 | } | ||
139 | Event::Token { | ||
140 | kind, | ||
141 | mut n_raw_tokens, | ||
142 | } => { | ||
143 | eat_ws(&mut next_tok_idx, builder); | ||
144 | let mut len = 0.into(); | ||
145 | for _ in 0..n_raw_tokens { | ||
146 | len += tokens[next_tok_idx].len; | ||
147 | next_tok_idx += 1; | ||
148 | } | ||
149 | builder.leaf(kind, len); | ||
150 | } | ||
151 | Event::Error { msg } => builder.error(msg), | ||
152 | } | ||
153 | } | ||
154 | } | ||
diff --git a/crates/libsyntax2/src/parser_impl/input.rs b/crates/libsyntax2/src/parser_impl/input.rs new file mode 100644 index 000000000..c0fe4d488 --- /dev/null +++ b/crates/libsyntax2/src/parser_impl/input.rs | |||
@@ -0,0 +1,86 @@ | |||
1 | use {lexer::Token, SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit}; | ||
2 | |||
3 | use std::ops::{Add, AddAssign}; | ||
4 | |||
5 | pub(crate) struct ParserInput<'t> { | ||
6 | text: &'t str, | ||
7 | start_offsets: Vec<TextUnit>, | ||
8 | tokens: Vec<Token>, // non-whitespace tokens | ||
9 | } | ||
10 | |||
11 | impl<'t> ParserInput<'t> { | ||
12 | pub fn new(text: &'t str, raw_tokens: &'t [Token]) -> ParserInput<'t> { | ||
13 | let mut tokens = Vec::new(); | ||
14 | let mut start_offsets = Vec::new(); | ||
15 | let mut len = 0.into(); | ||
16 | for &token in raw_tokens.iter() { | ||
17 | if !token.kind.is_trivia() { | ||
18 | tokens.push(token); | ||
19 | start_offsets.push(len); | ||
20 | } | ||
21 | len += token.len; | ||
22 | } | ||
23 | |||
24 | ParserInput { | ||
25 | text, | ||
26 | start_offsets, | ||
27 | tokens, | ||
28 | } | ||
29 | } | ||
30 | |||
31 | pub fn kind(&self, pos: InputPosition) -> SyntaxKind { | ||
32 | let idx = pos.0 as usize; | ||
33 | if !(idx < self.tokens.len()) { | ||
34 | return EOF; | ||
35 | } | ||
36 | self.tokens[idx].kind | ||
37 | } | ||
38 | |||
39 | pub fn len(&self, pos: InputPosition) -> TextUnit { | ||
40 | let idx = pos.0 as usize; | ||
41 | if !(idx < self.tokens.len()) { | ||
42 | return 0.into(); | ||
43 | } | ||
44 | self.tokens[idx].len | ||
45 | } | ||
46 | |||
47 | pub fn start(&self, pos: InputPosition) -> TextUnit { | ||
48 | let idx = pos.0 as usize; | ||
49 | if !(idx < self.tokens.len()) { | ||
50 | return 0.into(); | ||
51 | } | ||
52 | self.start_offsets[idx] | ||
53 | } | ||
54 | |||
55 | pub fn text(&self, pos: InputPosition) -> &'t str { | ||
56 | let idx = pos.0 as usize; | ||
57 | if !(idx < self.tokens.len()) { | ||
58 | return ""; | ||
59 | } | ||
60 | let range = TextRange::offset_len(self.start_offsets[idx], self.tokens[idx].len); | ||
61 | &self.text[range] | ||
62 | } | ||
63 | } | ||
64 | |||
65 | #[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] | ||
66 | pub(crate) struct InputPosition(u32); | ||
67 | |||
68 | impl InputPosition { | ||
69 | pub fn new() -> Self { | ||
70 | InputPosition(0) | ||
71 | } | ||
72 | } | ||
73 | |||
74 | impl Add<u32> for InputPosition { | ||
75 | type Output = InputPosition; | ||
76 | |||
77 | fn add(self, rhs: u32) -> InputPosition { | ||
78 | InputPosition(self.0 + rhs) | ||
79 | } | ||
80 | } | ||
81 | |||
82 | impl AddAssign<u32> for InputPosition { | ||
83 | fn add_assign(&mut self, rhs: u32) { | ||
84 | self.0 += rhs | ||
85 | } | ||
86 | } | ||
diff --git a/crates/libsyntax2/src/parser_impl/mod.rs b/crates/libsyntax2/src/parser_impl/mod.rs new file mode 100644 index 000000000..06c16cdb4 --- /dev/null +++ b/crates/libsyntax2/src/parser_impl/mod.rs | |||
@@ -0,0 +1,170 @@ | |||
1 | mod event; | ||
2 | mod input; | ||
3 | |||
4 | use { | ||
5 | grammar, | ||
6 | lexer::Token, | ||
7 | parser_api::Parser, | ||
8 | parser_impl::{ | ||
9 | event::{process, Event}, | ||
10 | input::{InputPosition, ParserInput}, | ||
11 | }, | ||
12 | TextUnit, | ||
13 | }; | ||
14 | |||
15 | use SyntaxKind::{self, EOF, TOMBSTONE}; | ||
16 | |||
17 | pub(crate) trait Sink<'a> { | ||
18 | type Tree; | ||
19 | |||
20 | fn new(text: &'a str) -> Self; | ||
21 | |||
22 | fn leaf(&mut self, kind: SyntaxKind, len: TextUnit); | ||
23 | fn start_internal(&mut self, kind: SyntaxKind); | ||
24 | fn finish_internal(&mut self); | ||
25 | fn error(&mut self, err: String); | ||
26 | fn finish(self) -> Self::Tree; | ||
27 | } | ||
28 | |||
29 | /// Parse a sequence of tokens into the representative node tree | ||
30 | pub(crate) fn parse<'a, S: Sink<'a>>(text: &'a str, tokens: &[Token]) -> S::Tree { | ||
31 | let events = { | ||
32 | let input = input::ParserInput::new(text, tokens); | ||
33 | let parser_impl = ParserImpl::new(&input); | ||
34 | let mut parser_api = Parser(parser_impl); | ||
35 | grammar::file(&mut parser_api); | ||
36 | parser_api.0.into_events() | ||
37 | }; | ||
38 | let mut sink = S::new(text); | ||
39 | process(&mut sink, tokens, events); | ||
40 | sink.finish() | ||
41 | } | ||
42 | |||
43 | /// Implementation details of `Parser`, extracted | ||
44 | /// to a separate struct in order not to pollute | ||
45 | /// the public API of the `Parser`. | ||
46 | pub(crate) struct ParserImpl<'t> { | ||
47 | inp: &'t ParserInput<'t>, | ||
48 | |||
49 | pos: InputPosition, | ||
50 | events: Vec<Event>, | ||
51 | } | ||
52 | |||
53 | impl<'t> ParserImpl<'t> { | ||
54 | pub(crate) fn new(inp: &'t ParserInput<'t>) -> ParserImpl<'t> { | ||
55 | ParserImpl { | ||
56 | inp, | ||
57 | |||
58 | pos: InputPosition::new(), | ||
59 | events: Vec::new(), | ||
60 | } | ||
61 | } | ||
62 | |||
63 | pub(crate) fn into_events(self) -> Vec<Event> { | ||
64 | assert_eq!(self.nth(0), EOF); | ||
65 | self.events | ||
66 | } | ||
67 | |||
68 | pub(super) fn at_compound2(&self, c1: SyntaxKind, c2: SyntaxKind) -> bool { | ||
69 | self.inp.kind(self.pos) == c1 && self.inp.kind(self.pos + 1) == c2 | ||
70 | && self.inp.start(self.pos + 1) == self.inp.start(self.pos) + self.inp.len(self.pos) | ||
71 | } | ||
72 | |||
73 | pub(super) fn at_compound3(&self, c1: SyntaxKind, c2: SyntaxKind, c3: SyntaxKind) -> bool { | ||
74 | self.inp.kind(self.pos) == c1 && self.inp.kind(self.pos + 1) == c2 && self.inp.kind(self.pos + 2) == c3 | ||
75 | && self.inp.start(self.pos + 1) == self.inp.start(self.pos) + self.inp.len(self.pos) | ||
76 | && self.inp.start(self.pos + 2) == self.inp.start(self.pos + 1) + self.inp.len(self.pos + 1) | ||
77 | } | ||
78 | |||
79 | pub(super) fn nth(&self, n: u32) -> SyntaxKind { | ||
80 | self.inp.kind(self.pos + n) | ||
81 | } | ||
82 | |||
83 | pub(super) fn at_kw(&self, t: &str) -> bool { | ||
84 | self.inp.text(self.pos) == t | ||
85 | } | ||
86 | |||
87 | pub(super) fn start(&mut self) -> u32 { | ||
88 | let pos = self.events.len() as u32; | ||
89 | self.event(Event::Start { | ||
90 | kind: TOMBSTONE, | ||
91 | forward_parent: None, | ||
92 | }); | ||
93 | pos | ||
94 | } | ||
95 | |||
96 | pub(super) fn bump(&mut self) { | ||
97 | let kind = self.nth(0); | ||
98 | if kind == EOF { | ||
99 | return; | ||
100 | } | ||
101 | self.do_bump(kind, 1); | ||
102 | } | ||
103 | |||
104 | pub(super) fn bump_remap(&mut self, kind: SyntaxKind) { | ||
105 | if self.nth(0) == EOF { | ||
106 | // TODO: panic!? | ||
107 | return; | ||
108 | } | ||
109 | self.do_bump(kind, 1); | ||
110 | } | ||
111 | |||
112 | pub(super) fn bump_compound(&mut self, kind: SyntaxKind, n: u8) { | ||
113 | self.do_bump(kind, n); | ||
114 | } | ||
115 | |||
116 | fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { | ||
117 | self.pos += u32::from(n_raw_tokens); | ||
118 | self.event(Event::Token { | ||
119 | kind, | ||
120 | n_raw_tokens, | ||
121 | }); | ||
122 | } | ||
123 | |||
124 | pub(super) fn error(&mut self, msg: String) { | ||
125 | self.event(Event::Error { msg }) | ||
126 | } | ||
127 | |||
128 | pub(super) fn complete(&mut self, pos: u32, kind: SyntaxKind) { | ||
129 | match self.events[pos as usize] { | ||
130 | Event::Start { | ||
131 | kind: ref mut slot, .. | ||
132 | } => { | ||
133 | *slot = kind; | ||
134 | } | ||
135 | _ => unreachable!(), | ||
136 | } | ||
137 | self.event(Event::Finish); | ||
138 | } | ||
139 | |||
140 | pub(super) fn abandon(&mut self, pos: u32) { | ||
141 | let idx = pos as usize; | ||
142 | if idx == self.events.len() - 1 { | ||
143 | match self.events.pop() { | ||
144 | Some(Event::Start { | ||
145 | kind: TOMBSTONE, | ||
146 | forward_parent: None, | ||
147 | }) => (), | ||
148 | _ => unreachable!(), | ||
149 | } | ||
150 | } | ||
151 | } | ||
152 | |||
153 | pub(super) fn precede(&mut self, pos: u32) -> u32 { | ||
154 | let new_pos = self.start(); | ||
155 | match self.events[pos as usize] { | ||
156 | Event::Start { | ||
157 | ref mut forward_parent, | ||
158 | .. | ||
159 | } => { | ||
160 | *forward_parent = Some(new_pos - pos); | ||
161 | } | ||
162 | _ => unreachable!(), | ||
163 | } | ||
164 | new_pos | ||
165 | } | ||
166 | |||
167 | fn event(&mut self, event: Event) { | ||
168 | self.events.push(event) | ||
169 | } | ||
170 | } | ||
diff --git a/crates/libsyntax2/src/smol_str.rs b/crates/libsyntax2/src/smol_str.rs new file mode 100644 index 000000000..abf69dce7 --- /dev/null +++ b/crates/libsyntax2/src/smol_str.rs | |||
@@ -0,0 +1,83 @@ | |||
1 | use std::{sync::Arc}; | ||
2 | |||
3 | const INLINE_CAP: usize = 22; | ||
4 | const WS_TAG: u8 = (INLINE_CAP + 1) as u8; | ||
5 | |||
6 | #[derive(Clone, Debug)] | ||
7 | pub(crate) enum SmolStr { | ||
8 | Heap(Arc<str>), | ||
9 | Inline { | ||
10 | len: u8, | ||
11 | buf: [u8; INLINE_CAP], | ||
12 | }, | ||
13 | } | ||
14 | |||
15 | impl SmolStr { | ||
16 | pub fn new(text: &str) -> SmolStr { | ||
17 | let len = text.len(); | ||
18 | if len <= INLINE_CAP { | ||
19 | let mut buf = [0; INLINE_CAP]; | ||
20 | buf[..len].copy_from_slice(text.as_bytes()); | ||
21 | return SmolStr::Inline { len: len as u8, buf }; | ||
22 | } | ||
23 | |||
24 | let newlines = text.bytes().take_while(|&b| b == b'\n').count(); | ||
25 | let spaces = text[newlines..].bytes().take_while(|&b| b == b' ').count(); | ||
26 | if newlines + spaces == len && newlines <= N_NEWLINES && spaces <= N_SPACES { | ||
27 | let mut buf = [0; INLINE_CAP]; | ||
28 | buf[0] = newlines as u8; | ||
29 | buf[1] = spaces as u8; | ||
30 | return SmolStr::Inline { len: WS_TAG, buf }; | ||
31 | } | ||
32 | |||
33 | SmolStr::Heap( | ||
34 | text.to_string().into_boxed_str().into() | ||
35 | ) | ||
36 | } | ||
37 | |||
38 | pub fn as_str(&self) -> &str { | ||
39 | match self { | ||
40 | SmolStr::Heap(data) => &*data, | ||
41 | SmolStr::Inline { len, buf } => { | ||
42 | if *len == WS_TAG { | ||
43 | let newlines = buf[0] as usize; | ||
44 | let spaces = buf[1] as usize; | ||
45 | assert!(newlines <= N_NEWLINES && spaces <= N_SPACES); | ||
46 | return &WS[N_NEWLINES - newlines..N_NEWLINES + spaces] | ||
47 | } | ||
48 | |||
49 | let len = *len as usize; | ||
50 | let buf = &buf[..len]; | ||
51 | unsafe { ::std::str::from_utf8_unchecked(buf) } | ||
52 | } | ||
53 | } | ||
54 | } | ||
55 | } | ||
56 | |||
57 | const N_NEWLINES: usize = 32; | ||
58 | const N_SPACES: usize = 128; | ||
59 | const WS: &str = | ||
60 | "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n "; | ||
61 | |||
62 | |||
63 | #[cfg(test)] | ||
64 | mod tests { | ||
65 | use super::*; | ||
66 | |||
67 | #[test] | ||
68 | #[cfg(target_pointer_width = "64")] | ||
69 | fn smol_str_is_smol() { | ||
70 | assert_eq!(::std::mem::size_of::<SmolStr>(), 8 + 8 + 8) | ||
71 | } | ||
72 | |||
73 | #[test] | ||
74 | fn test_round_trip() { | ||
75 | let mut text = String::new(); | ||
76 | for n in 0..256 { | ||
77 | let smol = SmolStr::new(&text); | ||
78 | assert_eq!(smol.as_str(), text.as_str()); | ||
79 | text.push_str(&n.to_string()); | ||
80 | } | ||
81 | } | ||
82 | } | ||
83 | |||
diff --git a/crates/libsyntax2/src/syntax_kinds/generated.rs b/crates/libsyntax2/src/syntax_kinds/generated.rs new file mode 100644 index 000000000..de2807ba6 --- /dev/null +++ b/crates/libsyntax2/src/syntax_kinds/generated.rs | |||
@@ -0,0 +1,508 @@ | |||
1 | #![allow(bad_style, missing_docs, unreachable_pub)] | ||
2 | #![cfg_attr(rustfmt, rustfmt_skip)] | ||
3 | use super::SyntaxInfo; | ||
4 | |||
5 | /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT_DEF`. | ||
6 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] | ||
7 | pub enum SyntaxKind { | ||
8 | // Technical SyntaxKinds: they appear temporally during parsing, | ||
9 | // but never end up in the final tree | ||
10 | #[doc(hidden)] | ||
11 | TOMBSTONE, | ||
12 | #[doc(hidden)] | ||
13 | EOF, | ||
14 | SEMI, | ||
15 | COMMA, | ||
16 | L_PAREN, | ||
17 | R_PAREN, | ||
18 | L_CURLY, | ||
19 | R_CURLY, | ||
20 | L_BRACK, | ||
21 | R_BRACK, | ||
22 | L_ANGLE, | ||
23 | R_ANGLE, | ||
24 | AT, | ||
25 | POUND, | ||
26 | TILDE, | ||
27 | QUESTION, | ||
28 | DOLLAR, | ||
29 | AMP, | ||
30 | PIPE, | ||
31 | PLUS, | ||
32 | STAR, | ||
33 | SLASH, | ||
34 | CARET, | ||
35 | PERCENT, | ||
36 | DOT, | ||
37 | DOTDOT, | ||
38 | DOTDOTDOT, | ||
39 | DOTDOTEQ, | ||
40 | COLON, | ||
41 | COLONCOLON, | ||
42 | EQ, | ||
43 | EQEQ, | ||
44 | FAT_ARROW, | ||
45 | EXCL, | ||
46 | NEQ, | ||
47 | MINUS, | ||
48 | THIN_ARROW, | ||
49 | LTEQ, | ||
50 | GTEQ, | ||
51 | PLUSEQ, | ||
52 | MINUSEQ, | ||
53 | AMPAMP, | ||
54 | PIPEPIPE, | ||
55 | SHL, | ||
56 | SHR, | ||
57 | SHLEQ, | ||
58 | SHREQ, | ||
59 | USE_KW, | ||
60 | FN_KW, | ||
61 | STRUCT_KW, | ||
62 | ENUM_KW, | ||
63 | TRAIT_KW, | ||
64 | IMPL_KW, | ||
65 | TRUE_KW, | ||
66 | FALSE_KW, | ||
67 | AS_KW, | ||
68 | EXTERN_KW, | ||
69 | CRATE_KW, | ||
70 | MOD_KW, | ||
71 | PUB_KW, | ||
72 | SELF_KW, | ||
73 | SUPER_KW, | ||
74 | IN_KW, | ||
75 | WHERE_KW, | ||
76 | FOR_KW, | ||
77 | LOOP_KW, | ||
78 | WHILE_KW, | ||
79 | IF_KW, | ||
80 | ELSE_KW, | ||
81 | MATCH_KW, | ||
82 | CONST_KW, | ||
83 | STATIC_KW, | ||
84 | MUT_KW, | ||
85 | UNSAFE_KW, | ||
86 | TYPE_KW, | ||
87 | REF_KW, | ||
88 | LET_KW, | ||
89 | MOVE_KW, | ||
90 | RETURN_KW, | ||
91 | AUTO_KW, | ||
92 | DEFAULT_KW, | ||
93 | UNION_KW, | ||
94 | ERROR, | ||
95 | IDENT, | ||
96 | UNDERSCORE, | ||
97 | WHITESPACE, | ||
98 | INT_NUMBER, | ||
99 | FLOAT_NUMBER, | ||
100 | LIFETIME, | ||
101 | CHAR, | ||
102 | BYTE, | ||
103 | STRING, | ||
104 | RAW_STRING, | ||
105 | BYTE_STRING, | ||
106 | RAW_BYTE_STRING, | ||
107 | COMMENT, | ||
108 | DOC_COMMENT, | ||
109 | SHEBANG, | ||
110 | FILE, | ||
111 | STRUCT_ITEM, | ||
112 | ENUM_ITEM, | ||
113 | FUNCTION, | ||
114 | EXTERN_CRATE_ITEM, | ||
115 | MOD_ITEM, | ||
116 | USE_ITEM, | ||
117 | STATIC_ITEM, | ||
118 | CONST_ITEM, | ||
119 | TRAIT_ITEM, | ||
120 | IMPL_ITEM, | ||
121 | TYPE_ITEM, | ||
122 | MACRO_CALL, | ||
123 | TOKEN_TREE, | ||
124 | PAREN_TYPE, | ||
125 | TUPLE_TYPE, | ||
126 | NEVER_TYPE, | ||
127 | PATH_TYPE, | ||
128 | POINTER_TYPE, | ||
129 | ARRAY_TYPE, | ||
130 | SLICE_TYPE, | ||
131 | REFERENCE_TYPE, | ||
132 | PLACEHOLDER_TYPE, | ||
133 | FN_POINTER_TYPE, | ||
134 | FOR_TYPE, | ||
135 | IMPL_TRAIT_TYPE, | ||
136 | REF_PAT, | ||
137 | BIND_PAT, | ||
138 | PLACEHOLDER_PAT, | ||
139 | PATH_PAT, | ||
140 | STRUCT_PAT, | ||
141 | TUPLE_STRUCT_PAT, | ||
142 | TUPLE_PAT, | ||
143 | SLICE_PAT, | ||
144 | RANGE_PAT, | ||
145 | TUPLE_EXPR, | ||
146 | ARRAY_EXPR, | ||
147 | PAREN_EXPR, | ||
148 | PATH_EXPR, | ||
149 | LAMBDA_EXPR, | ||
150 | IF_EXPR, | ||
151 | WHILE_EXPR, | ||
152 | LOOP_EXPR, | ||
153 | FOR_EXPR, | ||
154 | BLOCK_EXPR, | ||
155 | RETURN_EXPR, | ||
156 | MATCH_EXPR, | ||
157 | MATCH_ARM, | ||
158 | MATCH_GUARD, | ||
159 | STRUCT_LIT, | ||
160 | STRUCT_LIT_FIELD, | ||
161 | CALL_EXPR, | ||
162 | INDEX_EXPR, | ||
163 | METHOD_CALL_EXPR, | ||
164 | FIELD_EXPR, | ||
165 | TRY_EXPR, | ||
166 | CAST_EXPR, | ||
167 | REF_EXPR, | ||
168 | PREFIX_EXPR, | ||
169 | RANGE_EXPR, | ||
170 | BIN_EXPR, | ||
171 | EXTERN_BLOCK_EXPR, | ||
172 | ENUM_VARIANT, | ||
173 | NAMED_FIELD, | ||
174 | POS_FIELD, | ||
175 | ATTR, | ||
176 | META_ITEM, | ||
177 | USE_TREE, | ||
178 | PATH, | ||
179 | PATH_SEGMENT, | ||
180 | LITERAL, | ||
181 | ALIAS, | ||
182 | VISIBILITY, | ||
183 | WHERE_CLAUSE, | ||
184 | WHERE_PRED, | ||
185 | ABI, | ||
186 | NAME, | ||
187 | NAME_REF, | ||
188 | LET_STMT, | ||
189 | EXPR_STMT, | ||
190 | TYPE_PARAM_LIST, | ||
191 | LIFETIME_PARAM, | ||
192 | TYPE_PARAM, | ||
193 | TYPE_ARG_LIST, | ||
194 | LIFETIME_ARG, | ||
195 | TYPE_ARG, | ||
196 | ASSOC_TYPE_ARG, | ||
197 | PARAM_LIST, | ||
198 | PARAM, | ||
199 | SELF_PARAM, | ||
200 | ARG_LIST, | ||
201 | } | ||
202 | use self::SyntaxKind::*; | ||
203 | |||
204 | impl SyntaxKind { | ||
205 | pub fn is_keyword(self) -> bool { | ||
206 | match self { | ||
207 | | USE_KW | ||
208 | | FN_KW | ||
209 | | STRUCT_KW | ||
210 | | ENUM_KW | ||
211 | | TRAIT_KW | ||
212 | | IMPL_KW | ||
213 | | TRUE_KW | ||
214 | | FALSE_KW | ||
215 | | AS_KW | ||
216 | | EXTERN_KW | ||
217 | | CRATE_KW | ||
218 | | MOD_KW | ||
219 | | PUB_KW | ||
220 | | SELF_KW | ||
221 | | SUPER_KW | ||
222 | | IN_KW | ||
223 | | WHERE_KW | ||
224 | | FOR_KW | ||
225 | | LOOP_KW | ||
226 | | WHILE_KW | ||
227 | | IF_KW | ||
228 | | ELSE_KW | ||
229 | | MATCH_KW | ||
230 | | CONST_KW | ||
231 | | STATIC_KW | ||
232 | | MUT_KW | ||
233 | | UNSAFE_KW | ||
234 | | TYPE_KW | ||
235 | | REF_KW | ||
236 | | LET_KW | ||
237 | | MOVE_KW | ||
238 | | RETURN_KW | ||
239 | | AUTO_KW | ||
240 | | DEFAULT_KW | ||
241 | | UNION_KW | ||
242 | => true, | ||
243 | _ => false | ||
244 | } | ||
245 | } | ||
246 | |||
247 | pub(crate) fn info(self) -> &'static SyntaxInfo { | ||
248 | match self { | ||
249 | SEMI => &SyntaxInfo { name: "SEMI" }, | ||
250 | COMMA => &SyntaxInfo { name: "COMMA" }, | ||
251 | L_PAREN => &SyntaxInfo { name: "L_PAREN" }, | ||
252 | R_PAREN => &SyntaxInfo { name: "R_PAREN" }, | ||
253 | L_CURLY => &SyntaxInfo { name: "L_CURLY" }, | ||
254 | R_CURLY => &SyntaxInfo { name: "R_CURLY" }, | ||
255 | L_BRACK => &SyntaxInfo { name: "L_BRACK" }, | ||
256 | R_BRACK => &SyntaxInfo { name: "R_BRACK" }, | ||
257 | L_ANGLE => &SyntaxInfo { name: "L_ANGLE" }, | ||
258 | R_ANGLE => &SyntaxInfo { name: "R_ANGLE" }, | ||
259 | AT => &SyntaxInfo { name: "AT" }, | ||
260 | POUND => &SyntaxInfo { name: "POUND" }, | ||
261 | TILDE => &SyntaxInfo { name: "TILDE" }, | ||
262 | QUESTION => &SyntaxInfo { name: "QUESTION" }, | ||
263 | DOLLAR => &SyntaxInfo { name: "DOLLAR" }, | ||
264 | AMP => &SyntaxInfo { name: "AMP" }, | ||
265 | PIPE => &SyntaxInfo { name: "PIPE" }, | ||
266 | PLUS => &SyntaxInfo { name: "PLUS" }, | ||
267 | STAR => &SyntaxInfo { name: "STAR" }, | ||
268 | SLASH => &SyntaxInfo { name: "SLASH" }, | ||
269 | CARET => &SyntaxInfo { name: "CARET" }, | ||
270 | PERCENT => &SyntaxInfo { name: "PERCENT" }, | ||
271 | DOT => &SyntaxInfo { name: "DOT" }, | ||
272 | DOTDOT => &SyntaxInfo { name: "DOTDOT" }, | ||
273 | DOTDOTDOT => &SyntaxInfo { name: "DOTDOTDOT" }, | ||
274 | DOTDOTEQ => &SyntaxInfo { name: "DOTDOTEQ" }, | ||
275 | COLON => &SyntaxInfo { name: "COLON" }, | ||
276 | COLONCOLON => &SyntaxInfo { name: "COLONCOLON" }, | ||
277 | EQ => &SyntaxInfo { name: "EQ" }, | ||
278 | EQEQ => &SyntaxInfo { name: "EQEQ" }, | ||
279 | FAT_ARROW => &SyntaxInfo { name: "FAT_ARROW" }, | ||
280 | EXCL => &SyntaxInfo { name: "EXCL" }, | ||
281 | NEQ => &SyntaxInfo { name: "NEQ" }, | ||
282 | MINUS => &SyntaxInfo { name: "MINUS" }, | ||
283 | THIN_ARROW => &SyntaxInfo { name: "THIN_ARROW" }, | ||
284 | LTEQ => &SyntaxInfo { name: "LTEQ" }, | ||
285 | GTEQ => &SyntaxInfo { name: "GTEQ" }, | ||
286 | PLUSEQ => &SyntaxInfo { name: "PLUSEQ" }, | ||
287 | MINUSEQ => &SyntaxInfo { name: "MINUSEQ" }, | ||
288 | AMPAMP => &SyntaxInfo { name: "AMPAMP" }, | ||
289 | PIPEPIPE => &SyntaxInfo { name: "PIPEPIPE" }, | ||
290 | SHL => &SyntaxInfo { name: "SHL" }, | ||
291 | SHR => &SyntaxInfo { name: "SHR" }, | ||
292 | SHLEQ => &SyntaxInfo { name: "SHLEQ" }, | ||
293 | SHREQ => &SyntaxInfo { name: "SHREQ" }, | ||
294 | USE_KW => &SyntaxInfo { name: "USE_KW" }, | ||
295 | FN_KW => &SyntaxInfo { name: "FN_KW" }, | ||
296 | STRUCT_KW => &SyntaxInfo { name: "STRUCT_KW" }, | ||
297 | ENUM_KW => &SyntaxInfo { name: "ENUM_KW" }, | ||
298 | TRAIT_KW => &SyntaxInfo { name: "TRAIT_KW" }, | ||
299 | IMPL_KW => &SyntaxInfo { name: "IMPL_KW" }, | ||
300 | TRUE_KW => &SyntaxInfo { name: "TRUE_KW" }, | ||
301 | FALSE_KW => &SyntaxInfo { name: "FALSE_KW" }, | ||
302 | AS_KW => &SyntaxInfo { name: "AS_KW" }, | ||
303 | EXTERN_KW => &SyntaxInfo { name: "EXTERN_KW" }, | ||
304 | CRATE_KW => &SyntaxInfo { name: "CRATE_KW" }, | ||
305 | MOD_KW => &SyntaxInfo { name: "MOD_KW" }, | ||
306 | PUB_KW => &SyntaxInfo { name: "PUB_KW" }, | ||
307 | SELF_KW => &SyntaxInfo { name: "SELF_KW" }, | ||
308 | SUPER_KW => &SyntaxInfo { name: "SUPER_KW" }, | ||
309 | IN_KW => &SyntaxInfo { name: "IN_KW" }, | ||
310 | WHERE_KW => &SyntaxInfo { name: "WHERE_KW" }, | ||
311 | FOR_KW => &SyntaxInfo { name: "FOR_KW" }, | ||
312 | LOOP_KW => &SyntaxInfo { name: "LOOP_KW" }, | ||
313 | WHILE_KW => &SyntaxInfo { name: "WHILE_KW" }, | ||
314 | IF_KW => &SyntaxInfo { name: "IF_KW" }, | ||
315 | ELSE_KW => &SyntaxInfo { name: "ELSE_KW" }, | ||
316 | MATCH_KW => &SyntaxInfo { name: "MATCH_KW" }, | ||
317 | CONST_KW => &SyntaxInfo { name: "CONST_KW" }, | ||
318 | STATIC_KW => &SyntaxInfo { name: "STATIC_KW" }, | ||
319 | MUT_KW => &SyntaxInfo { name: "MUT_KW" }, | ||
320 | UNSAFE_KW => &SyntaxInfo { name: "UNSAFE_KW" }, | ||
321 | TYPE_KW => &SyntaxInfo { name: "TYPE_KW" }, | ||
322 | REF_KW => &SyntaxInfo { name: "REF_KW" }, | ||
323 | LET_KW => &SyntaxInfo { name: "LET_KW" }, | ||
324 | MOVE_KW => &SyntaxInfo { name: "MOVE_KW" }, | ||
325 | RETURN_KW => &SyntaxInfo { name: "RETURN_KW" }, | ||
326 | AUTO_KW => &SyntaxInfo { name: "AUTO_KW" }, | ||
327 | DEFAULT_KW => &SyntaxInfo { name: "DEFAULT_KW" }, | ||
328 | UNION_KW => &SyntaxInfo { name: "UNION_KW" }, | ||
329 | ERROR => &SyntaxInfo { name: "ERROR" }, | ||
330 | IDENT => &SyntaxInfo { name: "IDENT" }, | ||
331 | UNDERSCORE => &SyntaxInfo { name: "UNDERSCORE" }, | ||
332 | WHITESPACE => &SyntaxInfo { name: "WHITESPACE" }, | ||
333 | INT_NUMBER => &SyntaxInfo { name: "INT_NUMBER" }, | ||
334 | FLOAT_NUMBER => &SyntaxInfo { name: "FLOAT_NUMBER" }, | ||
335 | LIFETIME => &SyntaxInfo { name: "LIFETIME" }, | ||
336 | CHAR => &SyntaxInfo { name: "CHAR" }, | ||
337 | BYTE => &SyntaxInfo { name: "BYTE" }, | ||
338 | STRING => &SyntaxInfo { name: "STRING" }, | ||
339 | RAW_STRING => &SyntaxInfo { name: "RAW_STRING" }, | ||
340 | BYTE_STRING => &SyntaxInfo { name: "BYTE_STRING" }, | ||
341 | RAW_BYTE_STRING => &SyntaxInfo { name: "RAW_BYTE_STRING" }, | ||
342 | COMMENT => &SyntaxInfo { name: "COMMENT" }, | ||
343 | DOC_COMMENT => &SyntaxInfo { name: "DOC_COMMENT" }, | ||
344 | SHEBANG => &SyntaxInfo { name: "SHEBANG" }, | ||
345 | FILE => &SyntaxInfo { name: "FILE" }, | ||
346 | STRUCT_ITEM => &SyntaxInfo { name: "STRUCT_ITEM" }, | ||
347 | ENUM_ITEM => &SyntaxInfo { name: "ENUM_ITEM" }, | ||
348 | FUNCTION => &SyntaxInfo { name: "FUNCTION" }, | ||
349 | EXTERN_CRATE_ITEM => &SyntaxInfo { name: "EXTERN_CRATE_ITEM" }, | ||
350 | MOD_ITEM => &SyntaxInfo { name: "MOD_ITEM" }, | ||
351 | USE_ITEM => &SyntaxInfo { name: "USE_ITEM" }, | ||
352 | STATIC_ITEM => &SyntaxInfo { name: "STATIC_ITEM" }, | ||
353 | CONST_ITEM => &SyntaxInfo { name: "CONST_ITEM" }, | ||
354 | TRAIT_ITEM => &SyntaxInfo { name: "TRAIT_ITEM" }, | ||
355 | IMPL_ITEM => &SyntaxInfo { name: "IMPL_ITEM" }, | ||
356 | TYPE_ITEM => &SyntaxInfo { name: "TYPE_ITEM" }, | ||
357 | MACRO_CALL => &SyntaxInfo { name: "MACRO_CALL" }, | ||
358 | TOKEN_TREE => &SyntaxInfo { name: "TOKEN_TREE" }, | ||
359 | PAREN_TYPE => &SyntaxInfo { name: "PAREN_TYPE" }, | ||
360 | TUPLE_TYPE => &SyntaxInfo { name: "TUPLE_TYPE" }, | ||
361 | NEVER_TYPE => &SyntaxInfo { name: "NEVER_TYPE" }, | ||
362 | PATH_TYPE => &SyntaxInfo { name: "PATH_TYPE" }, | ||
363 | POINTER_TYPE => &SyntaxInfo { name: "POINTER_TYPE" }, | ||
364 | ARRAY_TYPE => &SyntaxInfo { name: "ARRAY_TYPE" }, | ||
365 | SLICE_TYPE => &SyntaxInfo { name: "SLICE_TYPE" }, | ||
366 | REFERENCE_TYPE => &SyntaxInfo { name: "REFERENCE_TYPE" }, | ||
367 | PLACEHOLDER_TYPE => &SyntaxInfo { name: "PLACEHOLDER_TYPE" }, | ||
368 | FN_POINTER_TYPE => &SyntaxInfo { name: "FN_POINTER_TYPE" }, | ||
369 | FOR_TYPE => &SyntaxInfo { name: "FOR_TYPE" }, | ||
370 | IMPL_TRAIT_TYPE => &SyntaxInfo { name: "IMPL_TRAIT_TYPE" }, | ||
371 | REF_PAT => &SyntaxInfo { name: "REF_PAT" }, | ||
372 | BIND_PAT => &SyntaxInfo { name: "BIND_PAT" }, | ||
373 | PLACEHOLDER_PAT => &SyntaxInfo { name: "PLACEHOLDER_PAT" }, | ||
374 | PATH_PAT => &SyntaxInfo { name: "PATH_PAT" }, | ||
375 | STRUCT_PAT => &SyntaxInfo { name: "STRUCT_PAT" }, | ||
376 | TUPLE_STRUCT_PAT => &SyntaxInfo { name: "TUPLE_STRUCT_PAT" }, | ||
377 | TUPLE_PAT => &SyntaxInfo { name: "TUPLE_PAT" }, | ||
378 | SLICE_PAT => &SyntaxInfo { name: "SLICE_PAT" }, | ||
379 | RANGE_PAT => &SyntaxInfo { name: "RANGE_PAT" }, | ||
380 | TUPLE_EXPR => &SyntaxInfo { name: "TUPLE_EXPR" }, | ||
381 | ARRAY_EXPR => &SyntaxInfo { name: "ARRAY_EXPR" }, | ||
382 | PAREN_EXPR => &SyntaxInfo { name: "PAREN_EXPR" }, | ||
383 | PATH_EXPR => &SyntaxInfo { name: "PATH_EXPR" }, | ||
384 | LAMBDA_EXPR => &SyntaxInfo { name: "LAMBDA_EXPR" }, | ||
385 | IF_EXPR => &SyntaxInfo { name: "IF_EXPR" }, | ||
386 | WHILE_EXPR => &SyntaxInfo { name: "WHILE_EXPR" }, | ||
387 | LOOP_EXPR => &SyntaxInfo { name: "LOOP_EXPR" }, | ||
388 | FOR_EXPR => &SyntaxInfo { name: "FOR_EXPR" }, | ||
389 | BLOCK_EXPR => &SyntaxInfo { name: "BLOCK_EXPR" }, | ||
390 | RETURN_EXPR => &SyntaxInfo { name: "RETURN_EXPR" }, | ||
391 | MATCH_EXPR => &SyntaxInfo { name: "MATCH_EXPR" }, | ||
392 | MATCH_ARM => &SyntaxInfo { name: "MATCH_ARM" }, | ||
393 | MATCH_GUARD => &SyntaxInfo { name: "MATCH_GUARD" }, | ||
394 | STRUCT_LIT => &SyntaxInfo { name: "STRUCT_LIT" }, | ||
395 | STRUCT_LIT_FIELD => &SyntaxInfo { name: "STRUCT_LIT_FIELD" }, | ||
396 | CALL_EXPR => &SyntaxInfo { name: "CALL_EXPR" }, | ||
397 | INDEX_EXPR => &SyntaxInfo { name: "INDEX_EXPR" }, | ||
398 | METHOD_CALL_EXPR => &SyntaxInfo { name: "METHOD_CALL_EXPR" }, | ||
399 | FIELD_EXPR => &SyntaxInfo { name: "FIELD_EXPR" }, | ||
400 | TRY_EXPR => &SyntaxInfo { name: "TRY_EXPR" }, | ||
401 | CAST_EXPR => &SyntaxInfo { name: "CAST_EXPR" }, | ||
402 | REF_EXPR => &SyntaxInfo { name: "REF_EXPR" }, | ||
403 | PREFIX_EXPR => &SyntaxInfo { name: "PREFIX_EXPR" }, | ||
404 | RANGE_EXPR => &SyntaxInfo { name: "RANGE_EXPR" }, | ||
405 | BIN_EXPR => &SyntaxInfo { name: "BIN_EXPR" }, | ||
406 | EXTERN_BLOCK_EXPR => &SyntaxInfo { name: "EXTERN_BLOCK_EXPR" }, | ||
407 | ENUM_VARIANT => &SyntaxInfo { name: "ENUM_VARIANT" }, | ||
408 | NAMED_FIELD => &SyntaxInfo { name: "NAMED_FIELD" }, | ||
409 | POS_FIELD => &SyntaxInfo { name: "POS_FIELD" }, | ||
410 | ATTR => &SyntaxInfo { name: "ATTR" }, | ||
411 | META_ITEM => &SyntaxInfo { name: "META_ITEM" }, | ||
412 | USE_TREE => &SyntaxInfo { name: "USE_TREE" }, | ||
413 | PATH => &SyntaxInfo { name: "PATH" }, | ||
414 | PATH_SEGMENT => &SyntaxInfo { name: "PATH_SEGMENT" }, | ||
415 | LITERAL => &SyntaxInfo { name: "LITERAL" }, | ||
416 | ALIAS => &SyntaxInfo { name: "ALIAS" }, | ||
417 | VISIBILITY => &SyntaxInfo { name: "VISIBILITY" }, | ||
418 | WHERE_CLAUSE => &SyntaxInfo { name: "WHERE_CLAUSE" }, | ||
419 | WHERE_PRED => &SyntaxInfo { name: "WHERE_PRED" }, | ||
420 | ABI => &SyntaxInfo { name: "ABI" }, | ||
421 | NAME => &SyntaxInfo { name: "NAME" }, | ||
422 | NAME_REF => &SyntaxInfo { name: "NAME_REF" }, | ||
423 | LET_STMT => &SyntaxInfo { name: "LET_STMT" }, | ||
424 | EXPR_STMT => &SyntaxInfo { name: "EXPR_STMT" }, | ||
425 | TYPE_PARAM_LIST => &SyntaxInfo { name: "TYPE_PARAM_LIST" }, | ||
426 | LIFETIME_PARAM => &SyntaxInfo { name: "LIFETIME_PARAM" }, | ||
427 | TYPE_PARAM => &SyntaxInfo { name: "TYPE_PARAM" }, | ||
428 | TYPE_ARG_LIST => &SyntaxInfo { name: "TYPE_ARG_LIST" }, | ||
429 | LIFETIME_ARG => &SyntaxInfo { name: "LIFETIME_ARG" }, | ||
430 | TYPE_ARG => &SyntaxInfo { name: "TYPE_ARG" }, | ||
431 | ASSOC_TYPE_ARG => &SyntaxInfo { name: "ASSOC_TYPE_ARG" }, | ||
432 | PARAM_LIST => &SyntaxInfo { name: "PARAM_LIST" }, | ||
433 | PARAM => &SyntaxInfo { name: "PARAM" }, | ||
434 | SELF_PARAM => &SyntaxInfo { name: "SELF_PARAM" }, | ||
435 | ARG_LIST => &SyntaxInfo { name: "ARG_LIST" }, | ||
436 | TOMBSTONE => &SyntaxInfo { name: "TOMBSTONE" }, | ||
437 | EOF => &SyntaxInfo { name: "EOF" }, | ||
438 | } | ||
439 | } | ||
440 | pub(crate) fn from_keyword(ident: &str) -> Option<SyntaxKind> { | ||
441 | let kw = match ident { | ||
442 | "use" => USE_KW, | ||
443 | "fn" => FN_KW, | ||
444 | "struct" => STRUCT_KW, | ||
445 | "enum" => ENUM_KW, | ||
446 | "trait" => TRAIT_KW, | ||
447 | "impl" => IMPL_KW, | ||
448 | "true" => TRUE_KW, | ||
449 | "false" => FALSE_KW, | ||
450 | "as" => AS_KW, | ||
451 | "extern" => EXTERN_KW, | ||
452 | "crate" => CRATE_KW, | ||
453 | "mod" => MOD_KW, | ||
454 | "pub" => PUB_KW, | ||
455 | "self" => SELF_KW, | ||
456 | "super" => SUPER_KW, | ||
457 | "in" => IN_KW, | ||
458 | "where" => WHERE_KW, | ||
459 | "for" => FOR_KW, | ||
460 | "loop" => LOOP_KW, | ||
461 | "while" => WHILE_KW, | ||
462 | "if" => IF_KW, | ||
463 | "else" => ELSE_KW, | ||
464 | "match" => MATCH_KW, | ||
465 | "const" => CONST_KW, | ||
466 | "static" => STATIC_KW, | ||
467 | "mut" => MUT_KW, | ||
468 | "unsafe" => UNSAFE_KW, | ||
469 | "type" => TYPE_KW, | ||
470 | "ref" => REF_KW, | ||
471 | "let" => LET_KW, | ||
472 | "move" => MOVE_KW, | ||
473 | "return" => RETURN_KW, | ||
474 | _ => return None, | ||
475 | }; | ||
476 | Some(kw) | ||
477 | } | ||
478 | |||
479 | pub(crate) fn from_char(c: char) -> Option<SyntaxKind> { | ||
480 | let tok = match c { | ||
481 | ';' => SEMI, | ||
482 | ',' => COMMA, | ||
483 | '(' => L_PAREN, | ||
484 | ')' => R_PAREN, | ||
485 | '{' => L_CURLY, | ||
486 | '}' => R_CURLY, | ||
487 | '[' => L_BRACK, | ||
488 | ']' => R_BRACK, | ||
489 | '<' => L_ANGLE, | ||
490 | '>' => R_ANGLE, | ||
491 | '@' => AT, | ||
492 | '#' => POUND, | ||
493 | '~' => TILDE, | ||
494 | '?' => QUESTION, | ||
495 | '$' => DOLLAR, | ||
496 | '&' => AMP, | ||
497 | '|' => PIPE, | ||
498 | '+' => PLUS, | ||
499 | '*' => STAR, | ||
500 | '/' => SLASH, | ||
501 | '^' => CARET, | ||
502 | '%' => PERCENT, | ||
503 | _ => return None, | ||
504 | }; | ||
505 | Some(tok) | ||
506 | } | ||
507 | } | ||
508 | |||
diff --git a/crates/libsyntax2/src/syntax_kinds/generated.rs.tera b/crates/libsyntax2/src/syntax_kinds/generated.rs.tera new file mode 100644 index 000000000..90618721a --- /dev/null +++ b/crates/libsyntax2/src/syntax_kinds/generated.rs.tera | |||
@@ -0,0 +1,73 @@ | |||
1 | #![allow(bad_style, missing_docs, unreachable_pub)] | ||
2 | #![cfg_attr(rustfmt, rustfmt_skip)] | ||
3 | use super::SyntaxInfo; | ||
4 | |||
5 | /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT_DEF`. | ||
6 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] | ||
7 | pub enum SyntaxKind { | ||
8 | // Technical SyntaxKinds: they appear temporally during parsing, | ||
9 | // but never end up in the final tree | ||
10 | #[doc(hidden)] | ||
11 | TOMBSTONE, | ||
12 | #[doc(hidden)] | ||
13 | EOF, | ||
14 | |||
15 | {%- for t in concat(a=single_byte_tokens, b=multi_byte_tokens) %} | ||
16 | {{t.1}}, | ||
17 | {%- endfor -%} | ||
18 | {% for kw in concat(a=keywords, b=contextual_keywords) %} | ||
19 | {{kw | upper}}_KW, | ||
20 | {%- endfor -%} | ||
21 | {% for t in concat(a=tokens, b=nodes) %} | ||
22 | {{t}}, | ||
23 | {%- endfor %} | ||
24 | } | ||
25 | use self::SyntaxKind::*; | ||
26 | |||
27 | impl SyntaxKind { | ||
28 | pub fn is_keyword(self) -> bool { | ||
29 | match self { | ||
30 | {%- for kw in concat(a=keywords, b=contextual_keywords) %} | ||
31 | | {{kw | upper}}_KW | ||
32 | {%- endfor %} | ||
33 | => true, | ||
34 | _ => false | ||
35 | } | ||
36 | } | ||
37 | |||
38 | pub(crate) fn info(self) -> &'static SyntaxInfo { | ||
39 | match self { | ||
40 | {%- for t in concat(a=single_byte_tokens, b=multi_byte_tokens) %} | ||
41 | {{t.1}} => &SyntaxInfo { name: "{{t.1}}" }, | ||
42 | {%- endfor -%} | ||
43 | {% for kw in concat(a=keywords, b=contextual_keywords) %} | ||
44 | {{kw | upper}}_KW => &SyntaxInfo { name: "{{kw | upper}}_KW" }, | ||
45 | {%- endfor -%} | ||
46 | {% for t in concat(a=tokens, b=nodes) %} | ||
47 | {{t}} => &SyntaxInfo { name: "{{t}}" }, | ||
48 | {%- endfor %} | ||
49 | TOMBSTONE => &SyntaxInfo { name: "TOMBSTONE" }, | ||
50 | EOF => &SyntaxInfo { name: "EOF" }, | ||
51 | } | ||
52 | } | ||
53 | pub(crate) fn from_keyword(ident: &str) -> Option<SyntaxKind> { | ||
54 | let kw = match ident { | ||
55 | {%- for kw in keywords %} | ||
56 | "{{kw}}" => {{kw | upper}}_KW, | ||
57 | {%- endfor %} | ||
58 | _ => return None, | ||
59 | }; | ||
60 | Some(kw) | ||
61 | } | ||
62 | |||
63 | pub(crate) fn from_char(c: char) -> Option<SyntaxKind> { | ||
64 | let tok = match c { | ||
65 | {%- for t in single_byte_tokens %} | ||
66 | '{{t.0}}' => {{t.1}}, | ||
67 | {%- endfor %} | ||
68 | _ => return None, | ||
69 | }; | ||
70 | Some(tok) | ||
71 | } | ||
72 | } | ||
73 | |||
diff --git a/crates/libsyntax2/src/syntax_kinds/mod.rs b/crates/libsyntax2/src/syntax_kinds/mod.rs new file mode 100644 index 000000000..ed4fa5d4d --- /dev/null +++ b/crates/libsyntax2/src/syntax_kinds/mod.rs | |||
@@ -0,0 +1,26 @@ | |||
1 | mod generated; | ||
2 | |||
3 | use std::fmt; | ||
4 | use SyntaxKind::*; | ||
5 | |||
6 | pub use self::generated::SyntaxKind; | ||
7 | |||
8 | impl fmt::Debug for SyntaxKind { | ||
9 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||
10 | let name = self.info().name; | ||
11 | f.write_str(name) | ||
12 | } | ||
13 | } | ||
14 | |||
15 | pub(crate) struct SyntaxInfo { | ||
16 | pub name: &'static str, | ||
17 | } | ||
18 | |||
19 | impl SyntaxKind { | ||
20 | pub(crate) fn is_trivia(self) -> bool { | ||
21 | match self { | ||
22 | WHITESPACE | COMMENT | DOC_COMMENT => true, | ||
23 | _ => false, | ||
24 | } | ||
25 | } | ||
26 | } | ||
diff --git a/crates/libsyntax2/src/utils.rs b/crates/libsyntax2/src/utils.rs new file mode 100644 index 000000000..1fbb872a5 --- /dev/null +++ b/crates/libsyntax2/src/utils.rs | |||
@@ -0,0 +1,48 @@ | |||
1 | use std::fmt::Write; | ||
2 | use { | ||
3 | algo::walk::{walk, WalkEvent}, | ||
4 | SyntaxNode, | ||
5 | }; | ||
6 | |||
7 | /// Parse a file and create a string representation of the resulting parse tree. | ||
8 | pub fn dump_tree(syntax: &SyntaxNode) -> String { | ||
9 | let syntax = syntax.as_ref(); | ||
10 | let mut errors: Vec<_> = syntax.root.errors.iter().cloned().collect(); | ||
11 | errors.sort_by_key(|e| e.offset); | ||
12 | let mut err_pos = 0; | ||
13 | let mut level = 0; | ||
14 | let mut buf = String::new(); | ||
15 | macro_rules! indent { | ||
16 | () => { | ||
17 | for _ in 0..level { | ||
18 | buf.push_str(" "); | ||
19 | } | ||
20 | }; | ||
21 | } | ||
22 | |||
23 | for event in walk(syntax) { | ||
24 | match event { | ||
25 | WalkEvent::Enter(node) => { | ||
26 | indent!(); | ||
27 | writeln!(buf, "{:?}", node).unwrap(); | ||
28 | if node.first_child().is_none() { | ||
29 | let off = node.range().end(); | ||
30 | while err_pos < errors.len() && errors[err_pos].offset <= off { | ||
31 | indent!(); | ||
32 | writeln!(buf, "err: `{}`", errors[err_pos].msg).unwrap(); | ||
33 | err_pos += 1; | ||
34 | } | ||
35 | } | ||
36 | level += 1; | ||
37 | } | ||
38 | WalkEvent::Exit(_) => level -= 1, | ||
39 | } | ||
40 | } | ||
41 | |||
42 | assert_eq!(level, 0); | ||
43 | for err in errors[err_pos..].iter() { | ||
44 | writeln!(buf, "err: `{}`", err.msg).unwrap(); | ||
45 | } | ||
46 | |||
47 | return buf; | ||
48 | } | ||
diff --git a/crates/libsyntax2/src/yellow/builder.rs b/crates/libsyntax2/src/yellow/builder.rs new file mode 100644 index 000000000..5e94e5055 --- /dev/null +++ b/crates/libsyntax2/src/yellow/builder.rs | |||
@@ -0,0 +1,65 @@ | |||
1 | use { | ||
2 | parser_impl::Sink, | ||
3 | yellow::{GreenNode, SyntaxError, SyntaxNode, SyntaxRoot}, | ||
4 | SyntaxKind, TextRange, TextUnit, | ||
5 | }; | ||
6 | |||
7 | pub(crate) struct GreenBuilder<'a> { | ||
8 | text: &'a str, | ||
9 | parents: Vec<(SyntaxKind, usize)>, | ||
10 | children: Vec<GreenNode>, | ||
11 | pos: TextUnit, | ||
12 | errors: Vec<SyntaxError>, | ||
13 | } | ||
14 | |||
15 | impl<'a> Sink<'a> for GreenBuilder<'a> { | ||
16 | type Tree = SyntaxNode; | ||
17 | |||
18 | fn new(text: &'a str) -> Self { | ||
19 | GreenBuilder { | ||
20 | text, | ||
21 | parents: Vec::new(), | ||
22 | children: Vec::new(), | ||
23 | pos: 0.into(), | ||
24 | errors: Vec::new(), | ||
25 | } | ||
26 | } | ||
27 | |||
28 | fn leaf(&mut self, kind: SyntaxKind, len: TextUnit) { | ||
29 | let range = TextRange::offset_len(self.pos, len); | ||
30 | self.pos += len; | ||
31 | let text = &self.text[range]; | ||
32 | self.children.push( | ||
33 | GreenNode::new_leaf(kind, text) | ||
34 | ); | ||
35 | } | ||
36 | |||
37 | fn start_internal(&mut self, kind: SyntaxKind) { | ||
38 | let len = self.children.len(); | ||
39 | self.parents.push((kind, len)); | ||
40 | } | ||
41 | |||
42 | fn finish_internal(&mut self) { | ||
43 | let (kind, first_child) = self.parents.pop().unwrap(); | ||
44 | let children: Vec<_> = self.children | ||
45 | .drain(first_child..) | ||
46 | .collect(); | ||
47 | self.children.push( | ||
48 | GreenNode::new_branch(kind, children.into_boxed_slice()) | ||
49 | ); | ||
50 | } | ||
51 | |||
52 | fn error(&mut self, message: String) { | ||
53 | self.errors.push(SyntaxError { | ||
54 | msg: message, | ||
55 | offset: self.pos, | ||
56 | }) | ||
57 | } | ||
58 | |||
59 | fn finish(mut self) -> SyntaxNode { | ||
60 | assert_eq!(self.children.len(), 1); | ||
61 | let root = self.children.pop().unwrap(); | ||
62 | let root = SyntaxRoot::new(root, self.errors); | ||
63 | SyntaxNode::new_owned(root) | ||
64 | } | ||
65 | } | ||
diff --git a/crates/libsyntax2/src/yellow/green.rs b/crates/libsyntax2/src/yellow/green.rs new file mode 100644 index 000000000..f505b26d7 --- /dev/null +++ b/crates/libsyntax2/src/yellow/green.rs | |||
@@ -0,0 +1,95 @@ | |||
1 | use std::sync::Arc; | ||
2 | use { | ||
3 | SyntaxKind, TextUnit, | ||
4 | smol_str::SmolStr, | ||
5 | }; | ||
6 | |||
7 | #[derive(Clone, Debug)] | ||
8 | pub(crate) enum GreenNode { | ||
9 | Leaf { | ||
10 | kind: SyntaxKind, | ||
11 | text: SmolStr, | ||
12 | }, | ||
13 | Branch(Arc<GreenBranch>), | ||
14 | } | ||
15 | |||
16 | impl GreenNode { | ||
17 | pub(crate) fn new_leaf(kind: SyntaxKind, text: &str) -> GreenNode { | ||
18 | GreenNode::Leaf { kind, text: SmolStr::new(text) } | ||
19 | } | ||
20 | |||
21 | pub(crate) fn new_branch(kind: SyntaxKind, children: Box<[GreenNode]>) -> GreenNode { | ||
22 | GreenNode::Branch(Arc::new(GreenBranch::new(kind, children))) | ||
23 | } | ||
24 | |||
25 | pub fn kind(&self) -> SyntaxKind { | ||
26 | match self { | ||
27 | GreenNode::Leaf { kind, .. } => *kind, | ||
28 | GreenNode::Branch(b) => b.kind(), | ||
29 | } | ||
30 | } | ||
31 | |||
32 | pub fn text_len(&self) -> TextUnit { | ||
33 | match self { | ||
34 | GreenNode::Leaf { text, ..} => TextUnit::of_str(text.as_str()), | ||
35 | GreenNode::Branch(b) => b.text_len(), | ||
36 | } | ||
37 | } | ||
38 | |||
39 | pub fn children(&self) -> &[GreenNode] { | ||
40 | match self { | ||
41 | GreenNode::Leaf { .. } => &[], | ||
42 | GreenNode::Branch(b) => b.children(), | ||
43 | } | ||
44 | } | ||
45 | |||
46 | pub fn text(&self) -> String { | ||
47 | let mut buff = String::new(); | ||
48 | go(self, &mut buff); | ||
49 | return buff; | ||
50 | fn go(node: &GreenNode, buff: &mut String) { | ||
51 | match node { | ||
52 | GreenNode::Leaf { text, .. } => buff.push_str(text.as_str()), | ||
53 | GreenNode::Branch(b) => b.children().iter().for_each(|child| go(child, buff)), | ||
54 | } | ||
55 | } | ||
56 | } | ||
57 | } | ||
58 | |||
59 | #[derive(Clone, Debug)] | ||
60 | pub(crate) struct GreenBranch { | ||
61 | text_len: TextUnit, | ||
62 | kind: SyntaxKind, | ||
63 | children: Box<[GreenNode]>, | ||
64 | } | ||
65 | |||
66 | impl GreenBranch { | ||
67 | fn new(kind: SyntaxKind, children: Box<[GreenNode]>) -> GreenBranch { | ||
68 | let text_len = children.iter().map(|x| x.text_len()).sum::<TextUnit>(); | ||
69 | GreenBranch { | ||
70 | text_len, | ||
71 | kind, | ||
72 | children, | ||
73 | } | ||
74 | } | ||
75 | |||
76 | pub fn kind(&self) -> SyntaxKind { | ||
77 | self.kind | ||
78 | } | ||
79 | |||
80 | pub fn text_len(&self) -> TextUnit { | ||
81 | self.text_len | ||
82 | } | ||
83 | |||
84 | pub fn children(&self) -> &[GreenNode] { | ||
85 | &*self.children | ||
86 | } | ||
87 | } | ||
88 | |||
89 | #[test] | ||
90 | fn test_sizes() { | ||
91 | use std::mem::size_of; | ||
92 | println!("GreenBranch = {}", size_of::<GreenBranch>()); | ||
93 | println!("GreenNode = {}", size_of::<GreenNode>()); | ||
94 | println!("SmolStr = {}", size_of::<SmolStr>()); | ||
95 | } | ||
diff --git a/crates/libsyntax2/src/yellow/mod.rs b/crates/libsyntax2/src/yellow/mod.rs new file mode 100644 index 000000000..ff3bb221b --- /dev/null +++ b/crates/libsyntax2/src/yellow/mod.rs | |||
@@ -0,0 +1,62 @@ | |||
1 | mod builder; | ||
2 | mod green; | ||
3 | mod red; | ||
4 | mod syntax; | ||
5 | |||
6 | use std::{ | ||
7 | ops::Deref, | ||
8 | sync::Arc, | ||
9 | ptr, | ||
10 | }; | ||
11 | pub use self::syntax::{SyntaxNode, SyntaxNodeRef, SyntaxError}; | ||
12 | pub(crate) use self::{ | ||
13 | builder::GreenBuilder, | ||
14 | green::GreenNode, | ||
15 | red::RedNode, | ||
16 | }; | ||
17 | |||
18 | pub trait TreeRoot: Deref<Target=SyntaxRoot> + Clone + Send + Sync {} | ||
19 | |||
20 | #[derive(Debug)] | ||
21 | pub struct SyntaxRoot { | ||
22 | red: RedNode, | ||
23 | pub(crate) errors: Vec<SyntaxError>, | ||
24 | } | ||
25 | |||
26 | impl TreeRoot for Arc<SyntaxRoot> {} | ||
27 | |||
28 | impl<'a> TreeRoot for &'a SyntaxRoot {} | ||
29 | |||
30 | impl SyntaxRoot { | ||
31 | pub(crate) fn new(green: GreenNode, errors: Vec<SyntaxError>) -> SyntaxRoot { | ||
32 | SyntaxRoot { | ||
33 | red: RedNode::new_root(green), | ||
34 | errors, | ||
35 | } | ||
36 | } | ||
37 | } | ||
38 | |||
39 | #[derive(Clone, Copy, PartialEq, Eq, Debug)] | ||
40 | pub(crate) struct RedPtr(ptr::NonNull<RedNode>); | ||
41 | |||
42 | unsafe impl Send for RedPtr {} | ||
43 | |||
44 | unsafe impl Sync for RedPtr {} | ||
45 | |||
46 | impl RedPtr { | ||
47 | fn new(red: &RedNode) -> RedPtr { | ||
48 | RedPtr(red.into()) | ||
49 | } | ||
50 | |||
51 | unsafe fn get<'a>(self, _root: &'a impl TreeRoot) -> &'a RedNode { | ||
52 | &*self.0.as_ptr() | ||
53 | } | ||
54 | } | ||
55 | |||
56 | #[test] | ||
57 | fn assert_send_sync() { | ||
58 | fn f<T: Send + Sync>() {} | ||
59 | f::<GreenNode>(); | ||
60 | f::<RedNode>(); | ||
61 | f::<SyntaxNode>(); | ||
62 | } | ||
diff --git a/crates/libsyntax2/src/yellow/red.rs b/crates/libsyntax2/src/yellow/red.rs new file mode 100644 index 000000000..13ad44c65 --- /dev/null +++ b/crates/libsyntax2/src/yellow/red.rs | |||
@@ -0,0 +1,94 @@ | |||
1 | use parking_lot::RwLock; | ||
2 | use {yellow::{GreenNode, RedPtr}, TextUnit}; | ||
3 | |||
4 | #[derive(Debug)] | ||
5 | pub(crate) struct RedNode { | ||
6 | green: GreenNode, | ||
7 | parent: Option<ParentData>, | ||
8 | children: RwLock<Box<[Option<RedNode>]>>, | ||
9 | } | ||
10 | |||
11 | #[derive(Debug)] | ||
12 | struct ParentData { | ||
13 | parent: RedPtr, | ||
14 | start_offset: TextUnit, | ||
15 | index_in_parent: usize, | ||
16 | } | ||
17 | |||
18 | impl RedNode { | ||
19 | pub fn new_root(green: GreenNode) -> RedNode { | ||
20 | RedNode::new(green, None) | ||
21 | } | ||
22 | |||
23 | fn new_child( | ||
24 | green: GreenNode, | ||
25 | parent: RedPtr, | ||
26 | start_offset: TextUnit, | ||
27 | index_in_parent: usize, | ||
28 | ) -> RedNode { | ||
29 | let parent_data = ParentData { | ||
30 | parent, | ||
31 | start_offset, | ||
32 | index_in_parent, | ||
33 | }; | ||
34 | RedNode::new(green, Some(parent_data)) | ||
35 | } | ||
36 | |||
37 | fn new(green: GreenNode, parent: Option<ParentData>) -> RedNode { | ||
38 | let n_children = green.children().len(); | ||
39 | let children = (0..n_children) | ||
40 | .map(|_| None) | ||
41 | .collect::<Vec<_>>() | ||
42 | .into_boxed_slice(); | ||
43 | RedNode { | ||
44 | green, | ||
45 | parent, | ||
46 | children: RwLock::new(children), | ||
47 | } | ||
48 | } | ||
49 | |||
50 | pub(crate) fn green(&self) -> &GreenNode { | ||
51 | &self.green | ||
52 | } | ||
53 | |||
54 | pub(crate) fn start_offset(&self) -> TextUnit { | ||
55 | match &self.parent { | ||
56 | None => 0.into(), | ||
57 | Some(p) => p.start_offset, | ||
58 | } | ||
59 | } | ||
60 | |||
61 | pub(crate) fn n_children(&self) -> usize { | ||
62 | self.green.children().len() | ||
63 | } | ||
64 | |||
65 | pub(crate) fn get_child(&self, idx: usize) -> Option<RedPtr> { | ||
66 | if idx >= self.n_children() { | ||
67 | return None; | ||
68 | } | ||
69 | match &self.children.read()[idx] { | ||
70 | Some(child) => return Some(RedPtr::new(child)), | ||
71 | None => (), | ||
72 | }; | ||
73 | let green_children = self.green.children(); | ||
74 | let start_offset = self.start_offset() | ||
75 | + green_children[..idx] | ||
76 | .iter() | ||
77 | .map(|x| x.text_len()) | ||
78 | .sum::<TextUnit>(); | ||
79 | let child = | ||
80 | RedNode::new_child(green_children[idx].clone(), RedPtr::new(self), start_offset, idx); | ||
81 | let mut children = self.children.write(); | ||
82 | if children[idx].is_none() { | ||
83 | children[idx] = Some(child) | ||
84 | } | ||
85 | Some(RedPtr::new(children[idx].as_ref().unwrap())) | ||
86 | } | ||
87 | |||
88 | pub(crate) fn parent(&self) -> Option<RedPtr> { | ||
89 | Some(self.parent.as_ref()?.parent) | ||
90 | } | ||
91 | pub(crate) fn index_in_parent(&self) -> Option<usize> { | ||
92 | Some(self.parent.as_ref()?.index_in_parent) | ||
93 | } | ||
94 | } | ||
diff --git a/crates/libsyntax2/src/yellow/syntax.rs b/crates/libsyntax2/src/yellow/syntax.rs new file mode 100644 index 000000000..6e33310f1 --- /dev/null +++ b/crates/libsyntax2/src/yellow/syntax.rs | |||
@@ -0,0 +1,122 @@ | |||
1 | use std::{fmt, sync::Arc}; | ||
2 | |||
3 | use { | ||
4 | yellow::{RedNode, TreeRoot, SyntaxRoot, RedPtr}, | ||
5 | SyntaxKind::{self, *}, | ||
6 | TextRange, TextUnit, | ||
7 | }; | ||
8 | |||
9 | |||
10 | #[derive(Clone, Copy)] | ||
11 | pub struct SyntaxNode<R: TreeRoot = Arc<SyntaxRoot>> { | ||
12 | pub(crate) root: R, | ||
13 | // Guaranteed to not dangle, because `root` holds a | ||
14 | // strong reference to red's ancestor | ||
15 | red: RedPtr, | ||
16 | } | ||
17 | |||
18 | unsafe impl<R: TreeRoot> Send for SyntaxNode<R> {} | ||
19 | unsafe impl<R: TreeRoot> Sync for SyntaxNode<R> {} | ||
20 | |||
21 | impl<R1: TreeRoot, R2: TreeRoot> PartialEq<SyntaxNode<R1>> for SyntaxNode<R2> { | ||
22 | fn eq(&self, other: &SyntaxNode<R1>) -> bool { | ||
23 | self.red == other.red | ||
24 | } | ||
25 | } | ||
26 | |||
27 | impl<R: TreeRoot> Eq for SyntaxNode<R> {} | ||
28 | |||
29 | pub type SyntaxNodeRef<'a> = SyntaxNode<&'a SyntaxRoot>; | ||
30 | |||
31 | #[derive(Debug, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)] | ||
32 | pub struct SyntaxError { | ||
33 | pub msg: String, | ||
34 | pub offset: TextUnit, | ||
35 | } | ||
36 | |||
37 | impl SyntaxNode<Arc<SyntaxRoot>> { | ||
38 | pub(crate) fn new_owned(root: SyntaxRoot) -> Self { | ||
39 | let root = Arc::new(root); | ||
40 | let red = RedPtr::new(&root.red); | ||
41 | SyntaxNode { root, red } | ||
42 | } | ||
43 | } | ||
44 | |||
45 | impl<R: TreeRoot> SyntaxNode<R> { | ||
46 | pub fn as_ref<'a>(&'a self) -> SyntaxNode<&'a SyntaxRoot> { | ||
47 | SyntaxNode { | ||
48 | root: &*self.root, | ||
49 | red: self.red, | ||
50 | } | ||
51 | } | ||
52 | |||
53 | pub fn kind(&self) -> SyntaxKind { | ||
54 | self.red().green().kind() | ||
55 | } | ||
56 | |||
57 | pub fn range(&self) -> TextRange { | ||
58 | let red = self.red(); | ||
59 | TextRange::offset_len(red.start_offset(), red.green().text_len()) | ||
60 | } | ||
61 | |||
62 | pub fn text(&self) -> String { | ||
63 | self.red().green().text() | ||
64 | } | ||
65 | |||
66 | pub fn children<'a>(&'a self) -> impl Iterator<Item = SyntaxNode<R>> + 'a { | ||
67 | let red = self.red(); | ||
68 | let n_children = red.n_children(); | ||
69 | (0..n_children).map(move |i| SyntaxNode { | ||
70 | root: self.root.clone(), | ||
71 | red: red.get_child(i).unwrap(), | ||
72 | }) | ||
73 | } | ||
74 | |||
75 | pub fn parent(&self) -> Option<SyntaxNode<R>> { | ||
76 | let parent = self.red().parent()?; | ||
77 | Some(SyntaxNode { | ||
78 | root: self.root.clone(), | ||
79 | red: parent, | ||
80 | }) | ||
81 | } | ||
82 | |||
83 | pub fn first_child(&self) -> Option<SyntaxNode<R>> { | ||
84 | self.children().next() | ||
85 | } | ||
86 | |||
87 | pub fn next_sibling(&self) -> Option<SyntaxNode<R>> { | ||
88 | let red = self.red(); | ||
89 | let parent = self.parent()?; | ||
90 | let next_sibling_idx = red.index_in_parent()? + 1; | ||
91 | let sibling_red = parent.red().get_child(next_sibling_idx)?; | ||
92 | Some(SyntaxNode { | ||
93 | root: self.root.clone(), | ||
94 | red: sibling_red, | ||
95 | }) | ||
96 | } | ||
97 | |||
98 | pub fn is_leaf(&self) -> bool { | ||
99 | self.first_child().is_none() | ||
100 | } | ||
101 | |||
102 | fn red(&self) -> &RedNode { | ||
103 | unsafe { self.red.get(&self.root) } | ||
104 | } | ||
105 | } | ||
106 | |||
107 | impl<R: TreeRoot> fmt::Debug for SyntaxNode<R> { | ||
108 | fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { | ||
109 | write!(fmt, "{:?}@{:?}", self.kind(), self.range())?; | ||
110 | if has_short_text(self.kind()) { | ||
111 | write!(fmt, " \"{}\"", self.text())?; | ||
112 | } | ||
113 | Ok(()) | ||
114 | } | ||
115 | } | ||
116 | |||
117 | fn has_short_text(kind: SyntaxKind) -> bool { | ||
118 | match kind { | ||
119 | IDENT | LIFETIME | INT_NUMBER | FLOAT_NUMBER => true, | ||
120 | _ => false, | ||
121 | } | ||
122 | } | ||