diff options
author | Zac Pullar-Strecker <[email protected]> | 2020-08-24 10:19:53 +0100 |
---|---|---|
committer | Zac Pullar-Strecker <[email protected]> | 2020-08-24 10:20:13 +0100 |
commit | 7bbca7a1b3f9293d2f5cc5745199bc5f8396f2f0 (patch) | |
tree | bdb47765991cb973b2cd5481a088fac636bd326c /crates/parser/src/lib.rs | |
parent | ca464650eeaca6195891199a93f4f76cf3e7e697 (diff) | |
parent | e65d48d1fb3d4d91d9dc1148a7a836ff5c9a3c87 (diff) |
Merge remote-tracking branch 'upstream/master' into 503-hover-doc-links
Diffstat (limited to 'crates/parser/src/lib.rs')
-rw-r--r-- | crates/parser/src/lib.rs | 149 |
1 files changed, 149 insertions, 0 deletions
diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs new file mode 100644 index 000000000..41e62116f --- /dev/null +++ b/crates/parser/src/lib.rs | |||
@@ -0,0 +1,149 @@ | |||
1 | //! The Rust parser. | ||
2 | //! | ||
3 | //! The parser doesn't know about concrete representation of tokens and syntax | ||
4 | //! trees. Abstract `TokenSource` and `TreeSink` traits are used instead. As a | ||
5 | //! consequence, this crates does not contain a lexer. | ||
6 | //! | ||
7 | //! The `Parser` struct from the `parser` module is a cursor into the sequence | ||
8 | //! of tokens. Parsing routines use `Parser` to inspect current state and | ||
9 | //! advance the parsing. | ||
10 | //! | ||
11 | //! The actual parsing happens in the `grammar` module. | ||
12 | //! | ||
13 | //! Tests for this crate live in `syntax` crate. | ||
14 | |||
15 | #[macro_use] | ||
16 | mod token_set; | ||
17 | #[macro_use] | ||
18 | mod syntax_kind; | ||
19 | mod event; | ||
20 | mod parser; | ||
21 | mod grammar; | ||
22 | |||
23 | pub(crate) use token_set::TokenSet; | ||
24 | |||
25 | pub use syntax_kind::SyntaxKind; | ||
26 | |||
27 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] | ||
28 | pub struct ParseError(pub Box<String>); | ||
29 | |||
30 | /// `TokenSource` abstracts the source of the tokens parser operates on. | ||
31 | /// | ||
32 | /// Hopefully this will allow us to treat text and token trees in the same way! | ||
33 | pub trait TokenSource { | ||
34 | fn current(&self) -> Token; | ||
35 | |||
36 | /// Lookahead n token | ||
37 | fn lookahead_nth(&self, n: usize) -> Token; | ||
38 | |||
39 | /// bump cursor to next token | ||
40 | fn bump(&mut self); | ||
41 | |||
42 | /// Is the current token a specified keyword? | ||
43 | fn is_keyword(&self, kw: &str) -> bool; | ||
44 | } | ||
45 | |||
46 | /// `Token` abstracts the cursor of `TokenSource` operates on. | ||
47 | #[derive(Debug, Copy, Clone, Eq, PartialEq)] | ||
48 | pub struct Token { | ||
49 | /// What is the current token? | ||
50 | pub kind: SyntaxKind, | ||
51 | |||
52 | /// Is the current token joined to the next one (`> >` vs `>>`). | ||
53 | pub is_jointed_to_next: bool, | ||
54 | } | ||
55 | |||
56 | /// `TreeSink` abstracts details of a particular syntax tree implementation. | ||
57 | pub trait TreeSink { | ||
58 | /// Adds new token to the current branch. | ||
59 | fn token(&mut self, kind: SyntaxKind, n_tokens: u8); | ||
60 | |||
61 | /// Start new branch and make it current. | ||
62 | fn start_node(&mut self, kind: SyntaxKind); | ||
63 | |||
64 | /// Finish current branch and restore previous | ||
65 | /// branch as current. | ||
66 | fn finish_node(&mut self); | ||
67 | |||
68 | fn error(&mut self, error: ParseError); | ||
69 | } | ||
70 | |||
71 | fn parse_from_tokens<F>(token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink, f: F) | ||
72 | where | ||
73 | F: FnOnce(&mut parser::Parser), | ||
74 | { | ||
75 | let mut p = parser::Parser::new(token_source); | ||
76 | f(&mut p); | ||
77 | let events = p.finish(); | ||
78 | event::process(tree_sink, events); | ||
79 | } | ||
80 | |||
81 | /// Parse given tokens into the given sink as a rust file. | ||
82 | pub fn parse(token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) { | ||
83 | parse_from_tokens(token_source, tree_sink, grammar::root); | ||
84 | } | ||
85 | |||
86 | #[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)] | ||
87 | pub enum FragmentKind { | ||
88 | Path, | ||
89 | Expr, | ||
90 | Statement, | ||
91 | Type, | ||
92 | Pattern, | ||
93 | Item, | ||
94 | Block, | ||
95 | Visibility, | ||
96 | MetaItem, | ||
97 | |||
98 | // These kinds are used when parsing the result of expansion | ||
99 | // FIXME: use separate fragment kinds for macro inputs and outputs? | ||
100 | Items, | ||
101 | Statements, | ||
102 | } | ||
103 | |||
104 | pub fn parse_fragment( | ||
105 | token_source: &mut dyn TokenSource, | ||
106 | tree_sink: &mut dyn TreeSink, | ||
107 | fragment_kind: FragmentKind, | ||
108 | ) { | ||
109 | let parser: fn(&'_ mut parser::Parser) = match fragment_kind { | ||
110 | FragmentKind::Path => grammar::fragments::path, | ||
111 | FragmentKind::Expr => grammar::fragments::expr, | ||
112 | FragmentKind::Type => grammar::fragments::type_, | ||
113 | FragmentKind::Pattern => grammar::fragments::pattern, | ||
114 | FragmentKind::Item => grammar::fragments::item, | ||
115 | FragmentKind::Block => grammar::fragments::block_expr, | ||
116 | FragmentKind::Visibility => grammar::fragments::opt_visibility, | ||
117 | FragmentKind::MetaItem => grammar::fragments::meta_item, | ||
118 | FragmentKind::Statement => grammar::fragments::stmt, | ||
119 | FragmentKind::Items => grammar::fragments::macro_items, | ||
120 | FragmentKind::Statements => grammar::fragments::macro_stmts, | ||
121 | }; | ||
122 | parse_from_tokens(token_source, tree_sink, parser) | ||
123 | } | ||
124 | |||
125 | /// A parsing function for a specific braced-block. | ||
126 | pub struct Reparser(fn(&mut parser::Parser)); | ||
127 | |||
128 | impl Reparser { | ||
129 | /// If the node is a braced block, return the corresponding `Reparser`. | ||
130 | pub fn for_node( | ||
131 | node: SyntaxKind, | ||
132 | first_child: Option<SyntaxKind>, | ||
133 | parent: Option<SyntaxKind>, | ||
134 | ) -> Option<Reparser> { | ||
135 | grammar::reparser(node, first_child, parent).map(Reparser) | ||
136 | } | ||
137 | |||
138 | /// Re-parse given tokens using this `Reparser`. | ||
139 | /// | ||
140 | /// Tokens must start with `{`, end with `}` and form a valid brace | ||
141 | /// sequence. | ||
142 | pub fn parse(self, token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) { | ||
143 | let Reparser(r) = self; | ||
144 | let mut p = parser::Parser::new(token_source); | ||
145 | r(&mut p); | ||
146 | let events = p.finish(); | ||
147 | event::process(tree_sink, events); | ||
148 | } | ||
149 | } | ||