From e4d4adb060fb01542cf036002285018bead1f7b8 Mon Sep 17 00:00:00 2001 From: Akshay Date: Sun, 4 Feb 2024 22:28:55 +0000 Subject: finish rust stag defs --- stag/src/main.rs | 72 ++++++-- stag/src/stag.scm | 538 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 579 insertions(+), 31 deletions(-) diff --git a/stag/src/main.rs b/stag/src/main.rs index eae6fe9..1945a03 100644 --- a/stag/src/main.rs +++ b/stag/src/main.rs @@ -1,5 +1,3 @@ -use std::collections::VecDeque; - use serde::Deserialize; use serde::Serialize; use tree_sitter::Parser; @@ -19,11 +17,14 @@ use petgraph::{graph::NodeIndex, visit::EdgeRef, Direction, Graph}; fn main() { let scopes = std::fs::read_to_string("src/stag.scm").unwrap(); - let src = r#"fn main() { - let a = 3; - let b = 4; - a + b -} + let src = r#" + fn main() { + let x = 2; + let a = 5; + if let _ = z { + a[x]; + } + } "#; let mut parser = Parser::new(); @@ -55,6 +56,15 @@ fn main() { sg = build_scope_graph(graph, sg); println!("{:#?}", sg); + + for e in sg + .graph + .raw_edges() + .iter() + .filter(|e| e.weight == EdgeKind::RefToDef) + { + println!("{:?} -> {:?}", e.source(), e.target()); + } } fn range_to_value(value: &tree_sitter::Range) -> Value { @@ -111,11 +121,12 @@ fn is_ref(node: &tree_sitter_graph::graph::GraphNode) -> bool { pub struct ScopeShorthand; +#[allow(unused_must_use)] impl Function for ScopeShorthand { fn call( &self, graph: &mut tree_sitter_graph::graph::Graph, - source: &str, + _source: &str, parameters: &mut dyn tree_sitter_graph::functions::Parameters, ) -> Result { let target_range = parameters.param()?; @@ -140,6 +151,7 @@ impl Function for ScopeShorthand { pub struct DefShorthand; +#[allow(unused_must_use)] impl Function for DefShorthand { fn call( &self, @@ -149,6 +161,10 @@ impl Function for DefShorthand { ) -> Result { let target_node = parameters.param()?.into_syntax_node_ref()?; let ts_node = graph[target_node]; + let symbol = parameters + .param() + .and_then(|p| p.as_str().map(ToOwned::to_owned)) + .ok(); parameters.finish()?; let graph_node = graph.add_graph_node(); @@ -158,6 +174,12 @@ impl Function for DefShorthand { graph[graph_node] .attributes .add::(Identifier::from("scope"), "local".into()); + + if let Some(s) = symbol { + graph[graph_node] + .attributes + .add::(Identifier::from("symbol"), s.into()); + } graph[graph_node].attributes.add::( Identifier::from("text"), source[ts_node.byte_range()].to_string().into(), @@ -178,6 +200,7 @@ impl Function for DefShorthand { pub struct RefShortHand; +#[allow(unused_must_use)] impl Function for RefShortHand { fn call( &self, @@ -220,17 +243,29 @@ impl Function for CoverRanges { _source: &str, parameters: &mut dyn tree_sitter_graph::functions::Parameters, ) -> Result { - let node_a = parameters.param()?.into_syntax_node_ref()?; - let node_b = parameters.param()?.into_syntax_node_ref()?; - let ts_node_a = graph[node_a]; - let ts_node_b = graph[node_b]; - - let mut range = cover(ts_node_a.range(), ts_node_b.range()); - while let Ok(param) = parameters.param() { - range = cover(range, graph[param.into_syntax_node_ref()?].range()) - } + let p1 = parameters.param()?; + let p2 = parameters.param()?; + + match (p1.is_null(), p2.is_null()) { + (true, true) => panic!("all nulls"), + (false, true) => return Ok(range_to_value(&graph[p1.into_syntax_node_ref()?].range())), + (true, false) => return Ok(range_to_value(&graph[p2.into_syntax_node_ref()?].range())), + (false, false) => { + let node_a = p1.into_syntax_node_ref()?; + let node_b = p2.into_syntax_node_ref()?; + let ts_node_a = graph[node_a]; + let ts_node_b = graph[node_b]; + + let mut range = cover(ts_node_a.range(), ts_node_b.range()); + while let Ok(param) = parameters.param() { + if !param.is_null() { + range = cover(range, graph[param.into_syntax_node_ref()?].range()) + } + } - Ok(range_to_value(&range)) + Ok(range_to_value(&range)) + } + } } } @@ -739,7 +774,6 @@ fn build_scope_graph( mut scope_graph: ScopeGraph, ) -> ScopeGraph { let nodes = tsg.iter_nodes().collect::>(); - // insert scopes first for node in nodes .iter() .map(|node_ref| &tsg[*node_ref]) diff --git a/stag/src/stag.scm b/stag/src/stag.scm index b6271b8..dc56e74 100644 --- a/stag/src/stag.scm +++ b/stag/src/stag.scm @@ -1,36 +1,550 @@ -[ - (block) +[(block) (declaration_list) (impl_item) - (struct_item) - (enum_item) - (union_item) - (type_item) - (trait_item) + + ;; let expressions create scopes (if_expression [(let_condition) (let_chain)]) - ] @cap + +;; each match arm can bind variables with +;; patterns, without creating a block scope; +;; +;; match _ { +;; (a, b) => a, +;; } +;; +;; The bindings for a, b are constrained to +;; the match arm. + (match_arm) + +;; loop labels are defs that are available only +;; within the scope they create: +;; +;; 'outer: loop { +;; let x = 2; +;; }; +;; let y = 2; +;; +;; Produces a scope graph like so: +;; +;; { +;; defs: [ y ], +;; scopes: [ +;; { +;; defs: [ 'outer ], +;; scopes: [ +;; { +;; defs: [ x ] +;; } +;; ] +;; } +;; ] +;; } +;; + (loop_expression) + (for_expression) + (while_expression)] @cap { (scope (range @cap)) } + (function_item + (identifier) @i (parameters) @params (block) @body) { + (def @i "function") (scope (cover @params @body)) } -(let_declaration - pattern: (identifier) @cap) +;; impl items can define types and lifetimes: +;; +;; impl<'a, T> Trait for Struct { .. } +;; +;; in order to constrain those to the impl block, +;; we add a local scope here: +[(struct_item (type_identifier) @i (type_parameters)? @t body: (_) @b) + (union_item (type_identifier) @i (type_parameters)? @t body: (_) @b) + (enum_item (type_identifier) @i (type_parameters)? @t body: (_) @b) + (type_item (type_identifier) @i (type_parameters)? @t type: (_) @b) + (trait_item (type_identifier) @i (type_parameters)? @t body: (_) @b)] +{ + (def @i) + (scope (cover @t @b)) + (scope (range @b)) +} + + +;; DEFS +;; ---- + +;; let x = ...; +(let_declaration pattern: (identifier) @cap) +{ + (def @cap "variable") +} + +;; if let x = ...; +;; while let x = ...; +(let_condition (identifier) @cap . "=") +{ + (def @cap "variable") +} + +;; let (a, b, ...) = ..; +;; if let (a, b, ...) = {} +;; while let (a, b, ...) = {} +;; match _ { (a, b) => { .. } } +(tuple_pattern (identifier) @cap) +{ + (def @cap "variable") +} + +;; Some(a) +(tuple_struct_pattern + type: (_) + (identifier) @cap) +{ + (def @cap "variable") +} + +;; let S { field: a } = ..; +(struct_pattern + (field_pattern + (identifier) @cap)) +{ + (def @cap "variable") +} + + +[ + ;; (mut x: T) + (mut_pattern (identifier) @i) + + ;; (ref x: T) + (ref_pattern (identifier) @i) + + ;; const x = ...; + (const_item (identifier) @i) + + ;; static x = ...; + (static_item (identifier) @i)] +{ + (def @i "variable") +} + +;; fn _(x: _) +(parameters + (parameter + pattern: (identifier) @cap)) +{ + (def @cap) +} + +;; fn _(self) +(parameters + (self_parameter + (self) @cap)) +{ + (def @cap) +} + +;; type parameters +(type_parameters + (type_identifier) @cap) +{ + (def @cap) +} +(type_parameters + (lifetime) @cap) +{ + (def @cap) +} +(constrained_type_parameter + left: (type_identifier) @cap) +{ + (def @cap) +} + +;; |x| { ... } +;; no type +(closure_parameters (identifier) @cap) +{ + (def @cap) +} + +;; |x: T| { ... } +;; with type +(closure_parameters + (parameter + (identifier) @cap)) +{ + (def @cap) +} + +;; 'outer: loop { .. } +(loop_expression + (loop_label) @cap) +{ + (def @cap) +} + +;; `for` exprs create two defs: a label (if any) and the +;; loop variable +(for_expression . (identifier) @cap) +{ + (def @cap) +} +(for_expression (loop_label) @cap) { (def @cap) } +;; 'label: while cond { .. } +(while_expression + (loop_label) @cap) +{ + (def @cap) +} -(binary_expression (identifier) @c) { - (ref @c) +;; struct and union fields +(field_declaration_list + (field_declaration + (field_identifier) @cap)) +{ + (def @cap) } +;; enum variants +(enum_variant_list + (enum_variant + (identifier) @cap)) +{ + (def @cap) +} + +;; mod x; +(mod_item (identifier) @cap) +{ + (def @cap) +} + + +;; IMPORTS +;; ------- + +;; use item; +(use_declaration + (identifier) @cap) +{ + (def @cap) +} + +;; use path as item; +(use_as_clause + alias: (identifier) @cap) +{ + (def @cap) +} + +;; use path::item; +(use_declaration + (scoped_identifier + name: (identifier) @cap)) +{ + (def @cap) +} + +;; use module::{member1, member2, member3}; +(use_list + (identifier) @cap) +{ + (def @cap) +} + +(use_list + (scoped_identifier + name: (identifier) @cap)) +{ + (def @cap) +} + + +;; REFS +;; ---- + +[ + ;; !x + (unary_expression (identifier) @cap) + ;; &x + (reference_expression (identifier) @cap) + + ;; (x) + (parenthesized_expression (identifier) @cap) + + ;; x? + (try_expression (identifier) @cap) + + ;; a = b + (assignment_expression (identifier) @cap) + + ;; a op b + (binary_expression (identifier) @cap) + + ;; a op= b + (compound_assignment_expr (identifier) @cap) + + ;; a as b + (type_cast_expression (identifier) @cap) + + ;; a() + (call_expression (identifier) @cap) + + ;; return a + (return_expression (identifier) @cap) + + ;; break a + (break_expression (identifier) @cap) + + ;; break 'label + (break_expression (loop_label) @cap) + + ;; continue 'label; + (continue_expression (loop_label) @cap) + + ;; yield x; + (yield_expression (identifier) @cap) + + ;; await a + (await_expression (identifier) @cap) + + ;; (a, b) + (tuple_expression (identifier) @cap) + + ;; a[] + (index_expression (identifier) @cap) + + ;; ident; + (expression_statement (identifier) @cap) + + ;; a..b + (range_expression (identifier) @cap) + + ;; [ident; N] + (array_expression (identifier) @cap) + + ;; path::to::item + ;; + ;; `path` is a ref + (scoped_identifier + path: (identifier) @cap) + + ;; rhs of let decls + (let_declaration + value: (identifier) @cap) + + ;; type T = [T; N] + ;; + ;; N is a ident ref + (array_type + length: (identifier) @cap) + + ;; S { _ } + (struct_expression + (type_identifier) @cap) + + ;; S { a } + (struct_expression + (field_initializer_list + (shorthand_field_initializer + (identifier) @cap))) + + ;; S { a: value } + (struct_expression + (field_initializer_list + (field_initializer + (identifier) @cap))) + + ;; S { ..a } + (struct_expression + (field_initializer_list + (base_field_initializer + (identifier) @cap))) + + ;; if a {} + (if_expression (identifier) @cap) + + ;; for pattern in value {} + ;; + ;; `value` is a ref + (for_expression + value: (identifier) @cap) + + ;; while a {} + (while_expression (identifier) @cap) + + ;; match a + (match_expression (identifier) @cap) + + ;; match _ { + ;; pattern => a, + ;; } + ;; + ;; this `a` is somehow not any expression form + (match_arm (identifier) @cap) + + ;; a.b + ;; + ;; `b` is ignored + (field_expression + (identifier) @cap) + + ;; { stmt; foo } + (block + (identifier) @cap) + + ;; arguments to method calls or function calls + (arguments + (identifier) @cap) + + ;; impl S { .. } + (impl_item (type_identifier) @cap) + + ;; where T: ... + (where_predicate + left: (type_identifier) @cap) + + ;; trait bounds + (trait_bounds + (type_identifier) @cap) + (trait_bounds + (lifetime) @cap) + + ;; idents in macros + (token_tree + (identifier) @cap) + + + ;; types + + ;; (T, U) + (tuple_type + (type_identifier) @cap) + + ;; &T + (reference_type + (type_identifier) @cap) + + ;; &'a T + (reference_type + (lifetime) @cap) + + ;; &'a self + (self_parameter + (lifetime) @cap) + + ;; *mut T + ;; *const T + (pointer_type + (type_identifier) @cap) + + ;; A<_> + (generic_type + (type_identifier) @cap) + + ;; _ + (type_arguments + (type_identifier) @cap) + (type_arguments + (lifetime) @cap) + + ;; T + ;; + ;; U is ignored + ;; V is a ref + (type_binding + name: (_) + type: (type_identifier) @cap) + + ;; [T] + (array_type + (type_identifier) @cap) + + ;; type T = U; + ;; + ;; T is a def + ;; U is a ref + (type_item + name: (_) + type: (type_identifier) @cap) + + (function_item + return_type: (type_identifier) @cap) + + ;; type refs in params + ;; + ;; fn _(_: T) + (parameters + (parameter + type: (type_identifier) @cap)) + + ;; dyn T + (dynamic_type + (type_identifier) @cap) + + ;; ::call() + (bracketed_type + (type_identifier) @cap) + + ;; T as Trait + (qualified_type + (type_identifier) @cap) + + ;; module::T + ;; + ;; `module` is a def + ;; `T` is a ref + (scoped_type_identifier + path: (identifier) @cap) + + ;; struct _ { field: Type } + ;; `Type` is a ref + (field_declaration + name: (_) + type: (type_identifier) @cap) + + ;; Self::foo() + ;; + ;; `foo` can be resolved + (call_expression + (scoped_identifier + (identifier) @_self_type + (identifier) @cap) + (#match? @_self_type "Self")) + + ;; self.foo() + ;; + ;; `foo` can be resolved + (call_expression + (field_expression + (self) + (field_identifier) @cap)) + + ;; if let _ = a {} + ;; + ;; the ident following the `=` is a ref + ;; the ident preceding the `=` is a def + ;; while let _ = a {} + (let_condition + "=" + . + (identifier) @cap) +] +{ + (ref @cap) +} + + -- cgit v1.2.3