From be60a9f66d1c01b15ddc3a56bca0416ce8dee0fd Mon Sep 17 00:00:00 2001 From: Akshay Date: Tue, 16 Jan 2024 21:18:52 +0000 Subject: begin work on stag --- stag/Cargo.toml | 16 + stag/src/main.rs | 810 +++++++++++++++++++++++++++++++++++++++++++++++++ stag/src/scopes.scm | 463 ++++++++++++++++++++++++++++ stag/src/stag.scm | 36 +++ stag/src/text_range.rs | 121 ++++++++ 5 files changed, 1446 insertions(+) create mode 100644 stag/Cargo.toml create mode 100644 stag/src/main.rs create mode 100644 stag/src/scopes.scm create mode 100644 stag/src/stag.scm create mode 100644 stag/src/text_range.rs (limited to 'stag') diff --git a/stag/Cargo.toml b/stag/Cargo.toml new file mode 100644 index 0000000..24f07e0 --- /dev/null +++ b/stag/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "stag" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +petgraph = { version = "0.6.4", default-features = false, features = ["serde-1"] } +serde = {version = "1.0.188", features = ["derive"]} +once_cell = "1.19.0" +thiserror = "1.0.56" +tree-sitter-graph = { path = "../tree-sitter-graph/"} +tree-sitter-rust = "0.20.4" +tree-sitter = "0.20.10" + diff --git a/stag/src/main.rs b/stag/src/main.rs new file mode 100644 index 0000000..eae6fe9 --- /dev/null +++ b/stag/src/main.rs @@ -0,0 +1,810 @@ +use std::collections::VecDeque; + +use serde::Deserialize; +use serde::Serialize; +use tree_sitter::Parser; +use tree_sitter_graph::ast::File; +use tree_sitter_graph::functions::{Function, Functions}; +use tree_sitter_graph::graph::Value; +use tree_sitter_graph::ExecutionConfig; +use tree_sitter_graph::ExecutionError; +use tree_sitter_graph::Identifier; +use tree_sitter_graph::NoCancellation; +use tree_sitter_graph::Variables; + +mod text_range; +use text_range::TextRange; + +use petgraph::{graph::NodeIndex, visit::EdgeRef, Direction, Graph}; + +fn main() { + let scopes = std::fs::read_to_string("src/stag.scm").unwrap(); + let src = r#"fn main() { + let a = 3; + let b = 4; + a + b +} + "#; + + let mut parser = Parser::new(); + let language = tree_sitter_rust::language(); + parser.set_language(language).unwrap(); + let tree = parser.parse(src.as_bytes(), None).unwrap(); + + let file = File::from_str(language, &scopes).unwrap(); + + let mut functions = Functions::stdlib(); + functions.add(Identifier::from("scope"), ScopeShorthand); + functions.add(Identifier::from("def"), DefShorthand); + functions.add(Identifier::from("ref"), RefShortHand); + functions.add(Identifier::from("cover"), CoverRanges); + functions.add(Identifier::from("range"), NodeRange); + + let mut globals = Variables::new(); + globals + .add(Identifier::from("filename"), "test.rs".into()) + .map_err(|_| ExecutionError::DuplicateVariable("filename".into())) + .unwrap(); + + let config = ExecutionConfig::new(&functions, &globals); + + let graph = file.execute(&tree, &src, &config, &NoCancellation).unwrap(); + + let mut sg = ScopeGraph::new(tree.root_node().range().into()); + + sg = build_scope_graph(graph, sg); + + println!("{:#?}", sg); +} + +fn range_to_value(value: &tree_sitter::Range) -> Value { + let start = Value::from(vec![ + Value::from(value.start_byte as u32), + Value::from(value.start_point.row as u32), + Value::from(value.start_point.column as u32), + ]); + let end = Value::from(vec![ + Value::from(value.end_byte as u32), + Value::from(value.end_point.row as u32), + Value::from(value.end_point.column as u32), + ]); + Value::from(vec![start, end]) +} + +fn value_to_range(value: &Value) -> tree_sitter::Range { + let range = value.as_list().unwrap(); + let start = range[0].as_list().unwrap(); + let end = range[1].as_list().unwrap(); + tree_sitter::Range { + start_byte: start[0].as_integer().unwrap() as usize, + start_point: tree_sitter::Point { + row: start[1].as_integer().unwrap() as usize, + column: start[2].as_integer().unwrap() as usize, + }, + end_byte: end[0].as_integer().unwrap() as usize, + end_point: tree_sitter::Point { + row: end[1].as_integer().unwrap() as usize, + column: end[2].as_integer().unwrap() as usize, + }, + } +} + +fn is_string_attr(node: &tree_sitter_graph::graph::GraphNode, key: &str, value: &str) -> bool { + matches!(node.attributes.get(&Identifier::from(key)).and_then(|v| v.as_str().ok()), Some(v) if v == value) +} + +fn is_scope(node: &tree_sitter_graph::graph::GraphNode) -> bool { + is_string_attr(node, "kind", "scope") +} + +fn is_import(node: &tree_sitter_graph::graph::GraphNode) -> bool { + is_string_attr(node, "kind", "import") +} + +fn is_def(node: &tree_sitter_graph::graph::GraphNode) -> bool { + is_string_attr(node, "kind", "def") +} + +fn is_ref(node: &tree_sitter_graph::graph::GraphNode) -> bool { + is_string_attr(node, "kind", "ref") +} + +pub struct ScopeShorthand; + +impl Function for ScopeShorthand { + fn call( + &self, + graph: &mut tree_sitter_graph::graph::Graph, + source: &str, + parameters: &mut dyn tree_sitter_graph::functions::Parameters, + ) -> Result { + let target_range = parameters.param()?; + if target_range.as_list().is_err() { + return Err(ExecutionError::ExpectedList(format!( + "`scope` expects list" + ))); + } + parameters.finish()?; + + let graph_node = graph.add_graph_node(); + graph[graph_node] + .attributes + .add::(Identifier::from("kind"), "scope".into()); + graph[graph_node] + .attributes + .add(Identifier::from("range"), target_range); + + Ok(tree_sitter_graph::graph::Value::GraphNode(graph_node)) + } +} + +pub struct DefShorthand; + +impl Function for DefShorthand { + fn call( + &self, + graph: &mut tree_sitter_graph::graph::Graph, + source: &str, + parameters: &mut dyn tree_sitter_graph::functions::Parameters, + ) -> Result { + let target_node = parameters.param()?.into_syntax_node_ref()?; + let ts_node = graph[target_node]; + parameters.finish()?; + + let graph_node = graph.add_graph_node(); + graph[graph_node] + .attributes + .add::(Identifier::from("kind"), "def".into()); + graph[graph_node] + .attributes + .add::(Identifier::from("scope"), "local".into()); + graph[graph_node].attributes.add::( + Identifier::from("text"), + source[ts_node.byte_range()].to_string().into(), + ); + graph[graph_node] + .attributes + .add(Identifier::from("range"), range_to_value(&ts_node.range())); + graph[graph_node] + .attributes + .add::( + Identifier::from("target"), + target_node.into(), + ); + + Ok(tree_sitter_graph::graph::Value::GraphNode(graph_node)) + } +} + +pub struct RefShortHand; + +impl Function for RefShortHand { + fn call( + &self, + graph: &mut tree_sitter_graph::graph::Graph, + source: &str, + parameters: &mut dyn tree_sitter_graph::functions::Parameters, + ) -> Result { + let target_node = parameters.param()?.into_syntax_node_ref()?; + let ts_node = graph[target_node]; + parameters.finish()?; + + let graph_node = graph.add_graph_node(); + graph[graph_node] + .attributes + .add::(Identifier::from("kind"), "ref".into()); + graph[graph_node].attributes.add::( + Identifier::from("text"), + source[ts_node.byte_range()].to_string().into(), + ); + graph[graph_node] + .attributes + .add(Identifier::from("range"), range_to_value(&ts_node.range())); + graph[graph_node] + .attributes + .add::( + Identifier::from("target"), + target_node.into(), + ); + + Ok(tree_sitter_graph::graph::Value::GraphNode(graph_node)) + } +} + +pub struct CoverRanges; + +impl Function for CoverRanges { + fn call( + &self, + graph: &mut tree_sitter_graph::graph::Graph, + _source: &str, + parameters: &mut dyn tree_sitter_graph::functions::Parameters, + ) -> Result { + let node_a = parameters.param()?.into_syntax_node_ref()?; + let node_b = parameters.param()?.into_syntax_node_ref()?; + let ts_node_a = graph[node_a]; + let ts_node_b = graph[node_b]; + + let mut range = cover(ts_node_a.range(), ts_node_b.range()); + while let Ok(param) = parameters.param() { + range = cover(range, graph[param.into_syntax_node_ref()?].range()) + } + + Ok(range_to_value(&range)) + } +} + +fn cover(a: tree_sitter::Range, b: tree_sitter::Range) -> tree_sitter::Range { + let (start_byte, start_point) = if a.start_point < b.start_point { + (a.start_byte, a.start_point) + } else { + (b.start_byte, b.start_point) + }; + let (end_byte, end_point) = if a.end_point > b.end_point { + (a.end_byte, a.end_point) + } else { + (b.end_byte, b.end_point) + }; + tree_sitter::Range { + start_byte, + start_point, + end_byte, + end_point, + } +} + +pub struct NodeRange; + +impl Function for NodeRange { + fn call( + &self, + graph: &mut tree_sitter_graph::graph::Graph, + _source: &str, + parameters: &mut dyn tree_sitter_graph::functions::Parameters, + ) -> Result { + let target_node = parameters.param()?.into_syntax_node_ref()?; + let ts_node = graph[target_node]; + Ok(range_to_value(&ts_node.range())) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] +pub struct LocalDef { + pub range: TextRange, + pub text: String, + pub symbol: Option, +} + +impl LocalDef { + /// Initialize a new definition + pub fn new(range: TextRange, text: String, symbol: Option) -> Self { + Self { + range, + text, + symbol, + } + } + + pub fn name<'a>(&self, buffer: &'a [u8]) -> &'a [u8] { + &buffer[self.range.start.byte..self.range.end.byte] + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] +pub struct LocalImport { + pub range: TextRange, + pub text: String, +} + +impl LocalImport { + /// Initialize a new import + pub fn new(range: TextRange, text: String) -> Self { + Self { range, text } + } + + pub fn name<'a>(&self, buffer: &'a [u8]) -> &'a [u8] { + &buffer[self.range.start.byte..self.range.end.byte] + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Reference { + pub range: TextRange, + pub text: String, + pub symbol: Option, +} + +impl Reference { + /// Initialize a new reference + pub fn new(range: TextRange, text: String, symbol: Option) -> Self { + Self { + range, + text, + symbol, + } + } +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +pub struct LocalScope { + pub range: TextRange, +} + +impl LocalScope { + pub fn new(range: TextRange) -> Self { + Self { range } + } +} + +pub struct ScopeStack<'a> { + pub scope_graph: &'a ScopeGraph, + pub start: Option>, +} + +impl<'a> Iterator for ScopeStack<'a> { + type Item = NodeIndex; + fn next(&mut self) -> Option { + if let Some(start) = self.start { + let parent = self + .scope_graph + .graph + .edges_directed(start, Direction::Outgoing) + .find(|edge| *edge.weight() == EdgeKind::ScopeToScope) + .map(|edge| edge.target()); + let original = start; + self.start = parent; + Some(original) + } else { + None + } + } +} + +/// The type of a node in the ScopeGraph +#[derive(Serialize, Deserialize, Debug, Clone)] +pub enum NodeKind { + /// A scope node + Scope(LocalScope), + + /// A definition node + Def(LocalDef), + + /// An import node + Import(LocalImport), + + /// A reference node + Ref(Reference), +} + +impl NodeKind { + /// Construct a scope node from a range + pub fn scope(range: TextRange) -> Self { + Self::Scope(LocalScope::new(range)) + } + + /// Produce the range spanned by this node + pub fn range(&self) -> TextRange { + match self { + Self::Scope(l) => l.range, + Self::Def(d) => d.range, + Self::Ref(r) => r.range, + Self::Import(i) => i.range, + } + } +} + +/// Describes the relation between two nodes in the ScopeGraph +#[derive(Serialize, Deserialize, PartialEq, Eq, Copy, Clone, Debug)] +pub enum EdgeKind { + /// The edge weight from a nested scope to its parent scope + ScopeToScope, + + /// The edge weight from a definition to its definition scope + DefToScope, + + /// The edge weight from an import to its definition scope + ImportToScope, + + /// The edge weight from a reference to its definition + RefToDef, + + /// The edge weight from a reference to its import + RefToImport, +} + +/// A graph representation of scopes and names in a single syntax tree +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct ScopeGraph { + /// The raw graph + pub graph: Graph, + + // Graphs do not have the concept of a `root`, but lexical scopes follow the syntax + // tree, and as a result, have a "root" node. The root_idx points to a scope node that + // encompasses the entire file: the file-global scope. + root_idx: NodeIndex, +} + +impl ScopeGraph { + pub fn new(range: TextRange) -> Self { + let mut graph = Graph::new(); + let root_idx = graph.add_node(NodeKind::scope(range)); + Self { graph, root_idx } + } + + pub fn get_node(&self, node_idx: NodeIndex) -> Option<&NodeKind> { + self.graph.node_weight(node_idx) + } + + /// Insert a local scope into the scope-graph + pub fn insert_local_scope(&mut self, new: LocalScope) { + if let Some(parent_scope) = self.scope_by_range(new.range, self.root_idx) { + let new_scope = NodeKind::Scope(new); + let new_idx = self.graph.add_node(new_scope); + self.graph + .add_edge(new_idx, parent_scope, EdgeKind::ScopeToScope); + } + } + + /// Insert a def into the scope-graph + pub fn insert_local_def(&mut self, new: LocalDef) { + if let Some(defining_scope) = self.scope_by_range(new.range, self.root_idx) { + let new_def = NodeKind::Def(new); + let new_idx = self.graph.add_node(new_def); + self.graph + .add_edge(new_idx, defining_scope, EdgeKind::DefToScope); + } + } + + /// Insert a def into the scope-graph, at the parent scope of the defining scope + pub fn insert_hoisted_def(&mut self, new: LocalDef) { + if let Some(defining_scope) = self.scope_by_range(new.range, self.root_idx) { + let new_def = NodeKind::Def(new); + let new_idx = self.graph.add_node(new_def); + + // if the parent scope exists, insert this def there, if not, + // insert into the defining scope + let target_scope = self.parent_scope(defining_scope).unwrap_or(defining_scope); + + self.graph + .add_edge(new_idx, target_scope, EdgeKind::DefToScope); + } + } + + /// Insert a def into the scope-graph, at the root scope + pub fn insert_global_def(&mut self, new: LocalDef) { + let new_def = NodeKind::Def(new); + let new_idx = self.graph.add_node(new_def); + self.graph + .add_edge(new_idx, self.root_idx, EdgeKind::DefToScope); + } + + /// Insert an import into the scope-graph + pub fn insert_local_import(&mut self, new: LocalImport) { + if let Some(defining_scope) = self.scope_by_range(new.range, self.root_idx) { + let new_imp = NodeKind::Import(new); + let new_idx = self.graph.add_node(new_imp); + self.graph + .add_edge(new_idx, defining_scope, EdgeKind::ImportToScope); + } + } + + /// Insert a ref into the scope-graph + pub fn insert_ref(&mut self, new: Reference) { + let mut possible_defs = vec![]; + let mut possible_imports = vec![]; + if let Some(local_scope_idx) = self.scope_by_range(new.range, self.root_idx) { + // traverse the scopes from the current-scope to the root-scope + for scope in self.scope_stack(local_scope_idx) { + // find candidate definitions in each scope + for local_def in self + .graph + .edges_directed(scope, Direction::Incoming) + .filter(|edge| *edge.weight() == EdgeKind::DefToScope) + .map(|edge| edge.source()) + { + if let NodeKind::Def(def) = &self.graph[local_def] { + if new.text == def.text { + match (def.symbol.as_ref(), new.symbol.as_ref()) { + // both contain symbols, but they don't belong to the same namepspace + (Some(d), Some(r)) if d != r => {} + + // in all other cases, form an edge from the ref to def. + // an empty symbol belongs to all namespaces: + // * (None, None) + // * (None, Some(_)) + // * (Some(_), None) + // * (Some(_), Some(_)) if def.namespace == ref.namespace + _ => { + possible_defs.push(local_def); + } + }; + } + } + } + + // find candidate imports in each scope + for local_import in self + .graph + .edges_directed(scope, Direction::Incoming) + .filter(|edge| *edge.weight() == EdgeKind::ImportToScope) + .map(|edge| edge.source()) + { + if let NodeKind::Import(import) = &self.graph[local_import] { + if new.text == import.text { + possible_imports.push(local_import); + } + } + } + } + } + + if !possible_defs.is_empty() || !possible_imports.is_empty() { + let new_ref = NodeKind::Ref(new); + let ref_idx = self.graph.add_node(new_ref); + for def_idx in possible_defs { + self.graph.add_edge(ref_idx, def_idx, EdgeKind::RefToDef); + } + for imp_idx in possible_imports { + self.graph.add_edge(ref_idx, imp_idx, EdgeKind::RefToImport); + } + } + } + + fn scope_stack(&self, start: NodeIndex) -> ScopeStack<'_> { + ScopeStack { + scope_graph: self, + start: Some(start), + } + } + + // The smallest scope that encompasses `range`. Start at `start` and narrow down if possible. + fn scope_by_range(&self, range: TextRange, start: NodeIndex) -> Option { + let target_range = self.graph[start].range(); + if target_range.contains(&range) { + let mut child_scopes = self + .graph + .edges_directed(start, Direction::Incoming) + .filter(|edge| *edge.weight() == EdgeKind::ScopeToScope) + .map(|edge| edge.source()) + .collect::>(); + + child_scopes.sort_by_key(|s| self.graph[*s].range()); + let target_child_scope = child_scopes.binary_search_by(|x| { + if self.graph[*x].range().contains(&range) { + std::cmp::Ordering::Equal + } else { + self.graph[*x].range().cmp(&range) + } + }); + + if let Some(t) = target_child_scope + .ok() + .and_then(|idx| child_scopes.get(idx)) + .and_then(|s| self.scope_by_range(range, *s)) + { + return Some(t); + } else { + return Some(start); + } + } + None + } + + // Produce the parent scope of a given scope + fn parent_scope(&self, start: NodeIndex) -> Option { + if matches!(self.graph[start], NodeKind::Scope(_)) { + return self + .graph + .edges_directed(start, Direction::Outgoing) + .filter(|edge| *edge.weight() == EdgeKind::ScopeToScope) + .map(|edge| edge.target()) + .next(); + } + None + } + + /// Produce a list of interesting ranges: ranges of defs and refs + pub fn hoverable_ranges(&self) -> Box + '_> { + let iterator = + self.graph + .node_indices() + .filter_map(|node_idx| match &self.graph[node_idx] { + NodeKind::Scope(_) => None, + NodeKind::Def(d) => Some(d.range), + NodeKind::Ref(r) => Some(r.range), + NodeKind::Import(i) => Some(i.range), + }); + Box::new(iterator) + } + + /// Produce possible definitions for a reference + pub fn definitions( + &self, + reference_node: NodeIndex, + ) -> Box + '_> { + let iterator = self + .graph + .edges_directed(reference_node, Direction::Outgoing) + .filter(|edge| *edge.weight() == EdgeKind::RefToDef) + .map(|edge| edge.target()); + Box::new(iterator) + } + + /// Produce possible imports for a reference + pub fn imports(&self, reference_node: NodeIndex) -> Box + '_> { + let iterator = self + .graph + .edges_directed(reference_node, Direction::Outgoing) + .filter(|edge| *edge.weight() == EdgeKind::RefToImport) + .map(|edge| edge.target()); + Box::new(iterator) + } + + /// Produce possible references for a definition/import node + pub fn references( + &self, + definition_node: NodeIndex, + ) -> Box + '_> { + let iterator = self + .graph + .edges_directed(definition_node, Direction::Incoming) + .filter(|edge| { + *edge.weight() == EdgeKind::RefToDef || *edge.weight() == EdgeKind::RefToImport + }) + .map(|edge| edge.source()); + Box::new(iterator) + } + + pub fn node_by_range(&self, start_byte: usize, end_byte: usize) -> Option { + self.graph + .node_indices() + .filter(|&idx| self.is_definition(idx) || self.is_reference(idx) || self.is_import(idx)) + .find(|&idx| { + let node = self.graph[idx].range(); + start_byte >= node.start.byte && end_byte <= node.end.byte + }) + } + + /// The "value" of a definition is loosely characterized as + /// + /// - the body of a function block + /// - the body of a class + /// - the parameters list defining generic types + /// - the RHS of a value + /// + /// The heuristic used here is + /// - the smallest scope-node that encompasses the definition_node + /// - or the largest scope-node on the same line as the to the definition_node + pub fn value_of_definition(&self, def_idx: NodeIndex) -> Option { + let smallest_scope_node = self + .scope_by_range(self.graph[def_idx].range(), self.root_idx) + .filter(|&idx| { + self.graph[idx].range().start.line == self.graph[def_idx].range().start.line + }); + let largest_adjacent_node = self + .graph + .node_indices() + .filter(|&idx| match self.graph[idx] { + NodeKind::Scope(scope) => { + scope.range.start.line == self.graph[def_idx].range().start.line + } + _ => false, + }) + .max_by_key(|idx| self.graph[*idx].range().size()); + + smallest_scope_node.or(largest_adjacent_node) + } + + pub fn node_by_position(&self, line: usize, column: usize) -> Option { + self.graph + .node_indices() + .filter(|&idx| self.is_definition(idx) || self.is_reference(idx)) + .find(|&idx| { + let node = self.graph[idx].range(); + node.start.line == line + && node.end.line == line + && node.start.column <= column + && node.end.column >= column + }) + } + + #[cfg(test)] + pub fn find_node_by_name(&self, src: &[u8], name: &[u8]) -> Option { + self.graph.node_indices().find(|idx| { + matches!( + &self.graph[*idx], + NodeKind::Def(d) if d.name(src) == name) + }) + } + + pub fn is_definition(&self, node_idx: NodeIndex) -> bool { + matches!(self.graph[node_idx], NodeKind::Def(_)) + } + + pub fn is_reference(&self, node_idx: NodeIndex) -> bool { + matches!(self.graph[node_idx], NodeKind::Ref(_)) + } + + pub fn is_scope(&self, node_idx: NodeIndex) -> bool { + matches!(self.graph[node_idx], NodeKind::Scope(_)) + } + + pub fn is_import(&self, node_idx: NodeIndex) -> bool { + matches!(self.graph[node_idx], NodeKind::Import(_)) + } +} + +fn build_scope_graph( + tsg: tree_sitter_graph::graph::Graph, + mut scope_graph: ScopeGraph, +) -> ScopeGraph { + let nodes = tsg.iter_nodes().collect::>(); + // insert scopes first + for node in nodes + .iter() + .map(|node_ref| &tsg[*node_ref]) + .filter(|node| is_scope(node)) + { + let range = value_to_range(node.attributes.get(&Identifier::from("range")).unwrap()); + let scope = LocalScope::new(range.into()); + scope_graph.insert_local_scope(scope); + } + + for node in nodes + .iter() + .map(|node_ref| &tsg[*node_ref]) + .filter(|node| is_import(node)) + { + let range = value_to_range(node.attributes.get(&Identifier::from("range")).unwrap()); + let text = node + .attributes + .get(&Identifier::from("text")) + .and_then(|id| id.clone().into_string().ok()) + .expect("import without text"); + let import = LocalImport::new(range.into(), text); + scope_graph.insert_local_import(import); + } + + for node in nodes + .iter() + .map(|node_ref| &tsg[*node_ref]) + .filter(|node| is_def(node)) + { + let range = value_to_range(node.attributes.get(&Identifier::from("range")).unwrap()); + let symbol = node + .attributes + .get(&Identifier::from("symbol")) + .and_then(|id| id.clone().into_string().ok()); + let text = node + .attributes + .get(&Identifier::from("text")) + .and_then(|id| id.clone().into_string().ok()) + .expect("def without text"); + let local_def = LocalDef::new(range.into(), text, symbol); + + // TODO: fix scoping here + scope_graph.insert_local_def(local_def); + } + + for node in nodes + .iter() + .map(|node_ref| &tsg[*node_ref]) + .filter(|node| is_ref(node)) + { + let range = value_to_range(node.attributes.get(&Identifier::from("range")).unwrap()); + let symbol = node + .attributes + .get(&Identifier::from("symbol")) + .and_then(|id| id.clone().into_string().ok()); + let text = node + .attributes + .get(&Identifier::from("text")) + .and_then(|id| id.clone().into_string().ok()) + .expect("ref without text"); + let ref_ = Reference::new(range.into(), text, symbol); + + scope_graph.insert_ref(ref_); + } + + scope_graph +} diff --git a/stag/src/scopes.scm b/stag/src/scopes.scm new file mode 100644 index 0000000..8a5c385 --- /dev/null +++ b/stag/src/scopes.scm @@ -0,0 +1,463 @@ +;; see tree-sitter-rust/src/grammar.json for an exhaustive list of productions + +;; scopes +(block) @local.scope ; { ... } +(function_item) @local.scope +(declaration_list) @local.scope ; mod { ... } + +;; impl items can define types and lifetimes: +;; +;; impl<'a, T> Trait for Struct { .. } +;; +;; in order to constrain those to the impl block, +;; we add a local scope here: +(impl_item) @local.scope +(struct_item) @local.scope +(enum_item) @local.scope +(union_item) @local.scope +(type_item) @local.scope +(trait_item) @local.scope + +;; let expressions create scopes +(if_expression + [(let_condition) + (let_chain)]) @local.scope + +;; each match arm can bind variables with +;; patterns, without creating a block scope; +;; +;; match _ { +;; (a, b) => a, +;; } +;; +;; The bindings for a, b are constrained to +;; the match arm. +(match_arm) @local.scope + +;; loop labels are defs that are available only +;; within the scope they create: +;; +;; 'outer: loop { +;; let x = 2; +;; }; +;; let y = 2; +;; +;; Produces a scope graph like so: +;; +;; { +;; defs: [ y ], +;; scopes: [ +;; { +;; defs: [ 'outer ], +;; scopes: [ +;; { +;; defs: [ x ] +;; } +;; ] +;; } +;; ] +;; } +;; +(loop_expression) @local.scope +(for_expression) @local.scope +(while_expression) @local.scope + + +;; defs + +;; let x = ...; +(let_declaration + pattern: (identifier) @local.definition.variable) + +;; if let x = ...; +;; while let x = ...; +(let_condition + . + (identifier) @local.definition.variable) + +;; let (a, b, ...) = ..; +;; if let (a, b, ...) = {} +;; while let (a, b, ...) = {} +;; match _ { (a, b) => { .. } } +(tuple_pattern (identifier) @local.definition.variable) + +;; Some(a) +(tuple_struct_pattern + type: (_) + (identifier) @local.definition.variable) + +;; let S { field: a } = ..; +(struct_pattern + (field_pattern + (identifier) @local.definition.variable)) + +;; let S { a, b } = ..; +(struct_pattern + (field_pattern + (shorthand_field_identifier) @local.definition.variable)) + +;; (mut x: T) +(mut_pattern (identifier) @local.definition.variable) + +;; (ref x: T) +(ref_pattern (identifier) @local.definition.variable) + +;; const x = ...; +(const_item (identifier) @local.definition.const) + +;; static x = ...; +(static_item (identifier) @local.definition.const) + +;; fn _(x: _) +(parameters + (parameter + pattern: (identifier) @local.definition.variable)) +;; fn _(self) +(parameters + (self_parameter + (self) @local.definition.variable)) + +;; type parameters +(type_parameters + (type_identifier) @local.definition.typedef) +(type_parameters + (lifetime) @local.definition.lifetime) +(constrained_type_parameter + left: (type_identifier) @local.definition.typedef) + +;; |x| { ... } +;; no type +(closure_parameters (identifier) @local.definition.variable) + +;; |x: T| { ... } +;; with type +(closure_parameters + (parameter + (identifier) @local.definition.variable)) + +;;fn x(..) +(function_item (identifier) @hoist.definition.function) + +;; 'outer: loop { .. } +(loop_expression + (loop_label) @local.definition.label) + +;; `for` exprs create two defs: a label (if any) and the +;; loop variable +(for_expression . (identifier) @local.definition.variable) +(for_expression (loop_label) @local.definition.label) + +;; 'label: while cond { .. } +(while_expression + (loop_label) @local.definition.label) + +;; type definitions +(struct_item (type_identifier) @hoist.definition.struct) +(enum_item (type_identifier) @hoist.definition.enum) +(union_item (type_identifier) @hoist.definition.union) +(type_item . (type_identifier) @hoist.definition.typedef) +(trait_item (type_identifier) @hoist.definition.interface) + +;; struct and union fields +(field_declaration_list + (field_declaration + (field_identifier) @local.definition.field)) + +;; enum variants +(enum_variant_list + (enum_variant + (identifier) @local.definition.enumerator)) + +;; mod x; +(mod_item (identifier) @local.definition.module) + +;; use statements + +;; use item; +(use_declaration + (identifier) @local.import) + +;; use path as item; +(use_as_clause + alias: (identifier) @local.import) + +;; use path::item; +(use_declaration + (scoped_identifier + name: (identifier) @local.import)) + +;; use module::{member1, member2, member3}; +(use_list + (identifier) @local.import) +(use_list + (scoped_identifier + name: (identifier) @local.import)) + + +;; refs + +;; !x +(unary_expression (identifier) @local.reference) + +;; &x +(reference_expression (identifier) @local.reference) + +;; (x) +(parenthesized_expression (identifier) @local.reference) + +;; x? +(try_expression (identifier) @local.reference) + +;; a = b +(assignment_expression (identifier) @local.reference) + +;; a op b +(binary_expression (identifier) @local.reference) + +;; a op= b +(compound_assignment_expr (identifier) @local.reference) + +;; a as b +(type_cast_expression (identifier) @local.reference) + +;; a() +(call_expression (identifier) @local.reference) + +;; Self::foo() +;; +;; `foo` can be resolved +(call_expression + (scoped_identifier + (identifier) @_self_type + (identifier) @local.reference) + (#match? @_self_type "Self")) + +;; self.foo() +;; +;; `foo` can be resolved +(call_expression + (field_expression + (self) + (field_identifier) @local.reference)) + +;; return a +(return_expression (identifier) @local.reference) + +;; break a +(break_expression (identifier) @local.reference) + +;; break 'label +(break_expression (loop_label) @local.reference) + +;; continue 'label; +(continue_expression (loop_label) @local.reference) + +;; yield x; +(yield_expression (identifier) @local.reference) + +;; await a +(await_expression (identifier) @local.reference) + +;; (a, b) +(tuple_expression (identifier) @local.reference) + +;; a[] +(index_expression (identifier) @local.reference) + +;; ident; +(expression_statement (identifier) @local.reference) + +;; a..b +(range_expression (identifier) @local.reference) + +;; [ident; N] +(array_expression (identifier) @local.reference) + +;; path::to::item +;; +;; `path` is a ref +(scoped_identifier + path: (identifier) @local.reference) + +;; rhs of let decls +(let_declaration + value: (identifier) @local.reference) + +;; type T = [T; N] +;; +;; N is a ident ref +(array_type + length: (identifier) @local.reference) + +;; S { _ } +(struct_expression + (type_identifier) @local.reference) + +;; S { a } +(struct_expression + (field_initializer_list + (shorthand_field_initializer + (identifier) @local.reference))) + +;; S { a: value } +(struct_expression + (field_initializer_list + (field_initializer + (identifier) @local.reference))) + +;; S { ..a } +(struct_expression + (field_initializer_list + (base_field_initializer + (identifier) @local.reference))) + +;; if a {} +(if_expression (identifier) @local.reference) + +;; for pattern in value {} +;; +;; `value` is a ref +(for_expression + value: (identifier) @local.reference) + +;; while a {} +(while_expression (identifier) @local.reference) + +;; if let _ = a {} +;; +;; the ident following the `=` is a ref +;; the ident preceding the `=` is a def +;; while let _ = a {} +(let_condition + "=" + (identifier) @local.reference) + + +;; match a +(match_expression (identifier) @local.reference) + +;; match _ { +;; pattern => a, +;; } +;; +;; this `a` is somehow not any expression form +(match_arm (identifier) @local.reference) + +;; a.b +;; +;; `b` is ignored +(field_expression + (identifier) @local.reference) + +;; { stmt; foo } +(block + (identifier) @local.reference) + +;; arguments to method calls or function calls +(arguments + (identifier) @local.reference) + +;; impl S { .. } +(impl_item (type_identifier) @local.reference) + +;; where T: ... +(where_predicate + left: (type_identifier) @local.reference) + +;; trait bounds +(trait_bounds + (type_identifier) @local.reference) +(trait_bounds + (lifetime) @local.reference) + +;; idents in macros +(token_tree + (identifier) @local.reference) + +;; types + +;; (T, U) +(tuple_type + (type_identifier) @local.reference) + +;; &T +(reference_type + (type_identifier) @local.reference) + +;; &'a T +(reference_type + (lifetime) @local.reference) + +;; &'a self +(self_parameter + (lifetime) @local.reference) + +;; *mut T +;; *const T +(pointer_type + (type_identifier) @local.reference) + +;; A<_> +(generic_type + (type_identifier) @local.reference) + +;; _ +(type_arguments + (type_identifier) @local.reference) +(type_arguments + (lifetime) @local.reference) + +;; T +;; +;; U is ignored +;; V is a ref +(type_binding + name: (_) + type: (type_identifier) @local.reference) + +;; [T] +(array_type + (type_identifier) @local.reference) + +;; type T = U; +;; +;; T is a def +;; U is a ref +(type_item + name: (_) + type: (type_identifier) @local.reference) + +(function_item + return_type: (type_identifier) @local.reference) + +;; type refs in params +;; +;; fn _(_: T) +(parameters + (parameter + type: (type_identifier) @local.reference)) + +;; dyn T +(dynamic_type + (type_identifier) @local.reference) + +;; ::call() +(bracketed_type + (type_identifier) @local.reference) + +;; T as Trait +(qualified_type + (type_identifier) @local.reference) + +;; module::T +;; +;; `module` is a def +;; `T` is a ref +(scoped_type_identifier + path: (identifier) @local.reference) + +;; struct _ { field: Type } +;; `Type` is a ref + (field_declaration + name: (_) + type: (type_identifier) @local.reference) diff --git a/stag/src/stag.scm b/stag/src/stag.scm new file mode 100644 index 0000000..b6271b8 --- /dev/null +++ b/stag/src/stag.scm @@ -0,0 +1,36 @@ +[ + (block) + (declaration_list) + (impl_item) + (struct_item) + (enum_item) + (union_item) + (type_item) + (trait_item) + (if_expression + [(let_condition) + (let_chain)]) + ] @cap +{ + (scope (range @cap)) +} + +(function_item + (parameters) @params + (block) @body) +{ + (scope (cover @params @body)) +} + + +(let_declaration + pattern: (identifier) @cap) +{ + (def @cap) +} + + +(binary_expression (identifier) @c) { + (ref @c) +} + diff --git a/stag/src/text_range.rs b/stag/src/text_range.rs new file mode 100644 index 0000000..5b1ec67 --- /dev/null +++ b/stag/src/text_range.rs @@ -0,0 +1,121 @@ +use std::cmp::{Ord, Ordering}; + +use serde::{Deserialize, Serialize}; + +/// A singular position in a text document +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct Point { + /// The byte index + pub byte: usize, + + /// 0-indexed line number + pub line: usize, + + /// Position within the line + pub column: usize, +} + +impl PartialOrd for Point { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for Point { + fn cmp(&self, other: &Self) -> Ordering { + self.byte.cmp(&other.byte) + } +} + +impl Point { + pub fn new(byte: usize, line: usize, column: usize) -> Self { + Self { byte, line, column } + } + + pub fn from_byte(byte: usize, line_end_indices: &[u32]) -> Self { + let line = line_end_indices + .iter() + .position(|&line_end_byte| (line_end_byte as usize) > byte) + .unwrap_or(0); + + let column = line + .checked_sub(1) + .and_then(|idx| line_end_indices.get(idx)) + .map(|&prev_line_end| byte.saturating_sub(prev_line_end as usize)) + .unwrap_or(byte); + + Self::new(byte, line, column) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct TextRange { + pub start: Point, + pub end: Point, +} + +impl PartialOrd for TextRange { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for TextRange { + fn cmp(&self, other: &Self) -> Ordering { + let compare_start_byte = self.start.byte.cmp(&other.start.byte); + let compare_size = self.size().cmp(&other.size()); + + compare_start_byte.then(compare_size) + } +} + +impl TextRange { + pub fn new(start: Point, end: Point) -> Self { + assert!(start <= end); + Self { start, end } + } + + pub fn contains(&self, other: &TextRange) -> bool { + // (self.start ... [other.start ... other.end] ... self.end) + self.start <= other.start && other.end <= self.end + } + + #[allow(unused)] + pub fn contains_strict(&self, other: TextRange) -> bool { + // (self.start ... (other.start ... other.end) ... self.end) + self.start < other.start && other.end <= self.end + } + + pub fn size(&self) -> usize { + self.end.byte.saturating_sub(self.start.byte) + } + + pub fn from_byte_range(range: std::ops::Range, line_end_indices: &[u32]) -> Self { + let start = Point::from_byte(range.start, line_end_indices); + let end = Point::from_byte(range.end, line_end_indices); + Self::new(start, end) + } +} + +impl From for TextRange { + fn from(r: tree_sitter::Range) -> Self { + Self { + start: Point { + byte: r.start_byte, + line: r.start_point.row, + column: r.start_point.column, + }, + end: Point { + byte: r.end_byte, + line: r.end_point.row, + column: r.end_point.column, + }, + } + } +} + +impl From for std::ops::Range { + fn from(r: TextRange) -> std::ops::Range { + r.start.byte..r.end.byte + } +} -- cgit v1.2.3