From 188b1848f5f9bc439bed9f4b333a9f160b18a210 Mon Sep 17 00:00:00 2001 From: Kurt Lawrence Date: Sun, 31 Jul 2022 12:01:33 +1000 Subject: [PATCH] Variable sealing rework (#154) * add tests and trial a few options as a fix * made the change to sealing which asserts if an op adds variables or not and it is passing pretty much all tests, very promising! * all tests are passing, solution is viable * document variable items * add release note --- RELEASE.md | 2 + ogma/src/eng/arg.rs | 49 ++-- ogma/src/eng/blk.rs | 40 +++- ogma/src/eng/comp/infer.rs | 29 ++- ogma/src/eng/comp/mod.rs | 55 +++-- ogma/src/eng/comp/params.rs | 16 +- ogma/src/eng/graphs/locals_graph.rs | 255 +++++++++++++-------- ogma/src/eng/mod.rs | 32 ++- ogma/src/eng/var.rs | 2 +- ogma/src/lang/impls/intrinsics/morphism.rs | 12 +- ogma/src/lang/impls/intrinsics/pipeline.rs | 4 + ogma/tests/commands/pipeline.rs | 35 +++ 12 files changed, 349 insertions(+), 182 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 2f6973ee..9f15d485 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -14,6 +14,8 @@ - Fix _locals graph needs updating_ bug (https://github.com/kdr-aus/ogma/pull/148) - Provide more verbose parsing errors (https://github.com/kdr-aus/ogma/pull/151) - Fix CPU spinning with completion prompt open (https://github.com/kdr-aus/ogma/pull/152) +- Fix an uncommon variable shadowing bug by reworking the variable sealing system + (https://github.com/kdr-aus/ogma/pull/154) **✨ Other Updates** - `ogma` crate API documentation is now published at https://kdr-aus.github.io/ogma/ogma/ (https://github.com/kdr-aus/ogma/commit/cf5cc7979c399b609e2e0605ffe176e70e474ac2) diff --git a/ogma/src/eng/arg.rs b/ogma/src/eng/arg.rs index f0a289c6..8ce1bb5d 100644 --- a/ogma/src/eng/arg.rs +++ b/ogma/src/eng/arg.rs @@ -13,14 +13,14 @@ pub struct ArgBuilder<'a> { compiler: &'a Compiler<'a>, blk_in_ty: Option, - chgs: Chgs<'a>, + chgs: &'a mut Chgs, } impl<'a> ArgBuilder<'a> { pub(super) fn new( node: ArgNode, compiler: &'a Compiler<'a>, - chgs: Chgs<'a>, + chgs: &'a mut Chgs, blk_in_ty: Option, ) -> Box { // see if the input and/or output types are known @@ -42,7 +42,7 @@ impl<'a> ArgBuilder<'a> { fn follow_local_arg_ptr(mut self: Box) -> Box { let new_node = self.compiler.ag[self.node.idx()] .var() - .and_then(|t| self.compiler.lg.get(self.node.idx(), t.str())) + .and_then(|t| self.compiler.lg.get_opt(self.node.idx(), t.str()).1) .and_then(|l| match l { Local::Ptr { to, tag: _ } => Some(to), _ => None, @@ -66,6 +66,20 @@ impl<'a> ArgBuilder<'a> { self.node } + /// Flag that this argument does not require it's parent op to be sealed for variable + /// resolution. + /// + /// This method is only required when variables are being introduced, see + /// [`Block::assert_adds_vars`] for more information and usage practices. + pub fn decouple_op_seal(self: Box) -> Box { + let arg = self.node(); + let op = arg.op(self.compiler.ag()); + self.chgs + .chgs + .push(locals_graph::Chg::BreakEdge { op, arg }.into()); + self + } + /// Assert that this argument will be supplied an input of type `ty`. /// /// > Since the **blocks's** input type is used often, and trying to pass this through is @@ -99,6 +113,7 @@ impl<'a> ArgBuilder<'a> { "expecting TypesSet to not be empty or a single set" ); self.chgs + .chgs .push(tygraph::Chg::KnownInput(self.node.idx(), ty).into()); Err(Error::unknown_arg_input_type(self.tag())) } @@ -132,6 +147,7 @@ impl<'a> ArgBuilder<'a> { "expecting TypesSet to not be empty or a single set" ); self.chgs + .chgs .push(tygraph::Chg::ObligeOutput(self.node.idx(), ty).into()); Err(Error::unknown_arg_output_type(self.tag())) } @@ -244,7 +260,7 @@ impl<'a> ArgBuilder<'a> { Ok(Hold::Expr(stack)) } Var(tag) => lg - .get_checked(node.idx(), tag.str(), tag) + .get(node.idx(), tag.str(), tag) .and_then(|local| match local { Local::Var(var) => Ok(Hold::Var(var.clone())), Local::Ptr { .. } => { @@ -271,24 +287,13 @@ impl<'a> ArgBuilder<'a> { /// - `Ok(Some(true))`: The variable exists in the locals, /// - `Err(_)`: The variable does not exist in the locals. pub fn assert_var_exists(&self) -> Result> { - let Compiler { ag, lg, .. } = self.compiler; - - match &ag[self.node.idx()] { - astgraph::AstNode::Var(tag) => { - lg.get(self.node.idx(), tag.str()) - // Ok(Some(true)) if variable exists - .map(|_| true) - // if NOT sealed, return Ok(Some(false)) -- lg should be updated - // NOTE we check the parent op for seal, not the argument itself. - .or_else(|| { - let sealed = lg.sealed(self.node.op(ag).idx()); - (!sealed).then(|| false) - }) - // Error with a HARD error - .ok_or_else(|| Error::var_not_found(tag)) - .map(Some) - } - _ => Ok(None), // not a variable + match self.node.var(self.compiler.ag()) { + Some(var) => match self.compiler.lg.get_opt(self.node.idx(), var.str()) { + (true, Some(_)) => Ok(Some(true)), + (true, None) => Err(Error::var_not_found(var)), + (false, _) => Ok(Some(false)), + }, + None => Ok(None), // not a variable } } } diff --git a/ogma/src/eng/blk.rs b/ogma/src/eng/blk.rs index 1e7287e8..0d101a08 100644 --- a/ogma/src/eng/blk.rs +++ b/ogma/src/eng/blk.rs @@ -66,6 +66,7 @@ impl<'a> Block<'a> { } self.chgs + .chgs .push(graphs::tygraph::Chg::KnownOutput(self.node.idx(), ty).into()); } @@ -73,7 +74,7 @@ impl<'a> Block<'a> { /// /// Note that this will not affect already resolved nodes, only inferred nodes. pub fn insert_anon_type_into_compiler(&mut self, ty: Type) { - self.chgs.push(graphs::tygraph::Chg::AnonTy(ty).into()); + self.chgs.chgs.push(graphs::tygraph::Chg::AnonTy(ty).into()); } /// Gets the flag that matches a given name. @@ -102,6 +103,39 @@ impl<'a> Block<'a> { self.args.get(0).copied() } + /// Assert that this operation will be adding variables. + /// + /// This should be called early on to flag to the compiler that it cannot eagerly seal this + /// node. + /// Usually, `retain_op_coupling: false`, unless one or more arguments cannot be considered + /// sealed until this operation is (see the `fold` intrinsic as an example). + /// If `retain_op_coupling: true`, any arguments that **do not require** the op to be + /// considered sealed should flag this with [`ArgBuilder::decouple_op_seal`]. + /// + /// Note that this function should be paired with a subsequent [`Block::assert_vars_added`] + /// call once all required variables have been added. + pub fn assert_adds_vars(&mut self, retain_op_coupling: bool) { + self.chgs.adds_vars = true; + if !retain_op_coupling { + let op = self.node; + self.chgs.chgs.extend( + self.args + .iter() + .map(|&arg| locals_graph::Chg::BreakEdge { op, arg }.into()), + ); + } + } + + /// Asserts that all the variables have been added, allowing the compiler to seal this node. + /// + /// This should be paired with a [`Block::assert_adds_vars`] call, and invoked _after_ the + /// variables have been added. + pub fn assert_vars_added(&mut self) { + self.chgs + .chgs + .push(locals_graph::Chg::Seal(self.node).into()); + } + /// Assert this argument is a variable and construct a reference to it. /// /// If the block does not contain a reference to an up-to-date locals, and error is returned. @@ -124,7 +158,7 @@ impl<'a> Block<'a> { Some(next) => lg .new_var(next.idx(), Str::new(var.str()), ty, var.clone()) .map_err(|chg| { - chgs.push(chg.into()); + chgs.chgs.push(chg.into()); Error::update_locals_graph(var) }), None => Ok(Variable::noop(var.clone(), ty)), @@ -168,7 +202,7 @@ impl<'a> Block<'a> { .lg .new_var(arg.idx(), name, ty, tag.clone()) .map_err(|chg| { - self.chgs.push(chg.into()); + self.chgs.chgs.push(chg.into()); Error::update_locals_graph(tag) }) } diff --git a/ogma/src/eng/comp/infer.rs b/ogma/src/eng/comp/infer.rs index 44c87dec..80c0b965 100644 --- a/ogma/src/eng/comp/infer.rs +++ b/ogma/src/eng/comp/infer.rs @@ -35,7 +35,7 @@ impl<'d> Compiler<'d> { (self.lg.sealed(n) || !self.ag.detect_var(OpNode(n))) }); - let mut chgs = Vec::new(); + let mut chgs = Vec::default(); for op in infer_nodes.into_iter().map(OpNode) { trial_inferred_types(op, self, &mut chgs); @@ -143,8 +143,8 @@ enum Error { /// Tries to compile the `op` with each type in the inferred types set. /// Pushes `RemoveInput` if failed. -fn trial_inferred_types(op: OpNode, compiler: &Compiler, chgs: Chgs) { - let sink1 = &mut Default::default(); +fn trial_inferred_types(op: OpNode, compiler: &Compiler, chgs: &mut Vec) { + let mut sink1 = Default::default(); let set = compiler.tg[op.idx()] .input @@ -155,10 +155,19 @@ fn trial_inferred_types(op: OpNode, compiler: &Compiler, chgs: Chgs) { .iter() .cloned() .filter(|ty| { - let infer_output = &mut false; - let compiled = compiler - .compile_block(op, ty.clone(), sink1, infer_output) - .is_ok(); + let mut chgs = Chgs { + chgs: std::mem::take(&mut sink1), + ..Chgs::default() + }; + + let compiled = compiler.compile_block(op, ty.clone(), &mut chgs).is_ok(); + + let Chgs { + chgs, + infer_output, + adds_vars: _, + } = chgs; + sink1 = chgs; let lg_chg_req = sink1.drain(..).any(|chg| chg.is_lg_chg()); @@ -167,7 +176,7 @@ fn trial_inferred_types(op: OpNode, compiler: &Compiler, chgs: Chgs) { // we keep in if infer_output is true, since we need more information about types // before we can compile this block, and removing input inferences will just reduce it // to an empty set. - !compiled && !*infer_output && !lg_chg_req + !compiled && !infer_output && !lg_chg_req }) .map(|ty| RemoveInput(op.into(), ty)) .map(Into::into); @@ -316,7 +325,7 @@ impl<'a> Block<'a> { let Block { node: opnode, compiler: Compiler { ag, tg, .. }, - infer_output, + chgs: Chgs { infer_output, .. }, .. } = self; @@ -340,7 +349,7 @@ impl<'a> Block<'a> { .cloned(); if ret.is_none() { - **infer_output = true; + *infer_output = true; } ret diff --git a/ogma/src/eng/comp/mod.rs b/ogma/src/eng/comp/mod.rs index 6d49e945..24f2747a 100644 --- a/ogma/src/eng/comp/mod.rs +++ b/ogma/src/eng/comp/mod.rs @@ -270,7 +270,7 @@ impl<'d> Compiler<'d> { } // map in if has a Locals - let local = match self.lg.get(node, var_tag.str()) { + let local = match self.lg.get_opt(node, var_tag.str()).1 { Some(l) => l, None => continue, }; @@ -356,7 +356,11 @@ impl<'d> Compiler<'d> { let mut goto_resolve = false; let mut err = None; - let mut chgs = Vec::new(); + let mut chgs = Chgs { + chgs: Vec::new(), + infer_output: false, + adds_vars: false, + }; for node in nodes { let in_ty = self.tg[node.idx()] @@ -365,9 +369,10 @@ impl<'d> Compiler<'d> { .cloned() .expect("known input at this point"); - let mut infer_output = false; + chgs.infer_output = false; + chgs.adds_vars = false; - match self.compile_block(node, in_ty, &mut chgs, &mut infer_output) { + match self.compile_block(node, in_ty, &mut chgs) { Ok(step) => { goto_resolve = true; let out_ty = step.out_ty.clone(); @@ -381,15 +386,12 @@ impl<'d> Compiler<'d> { // since this compilation was successful, the output type is known! // the TG is updated with this information - chgs.push(tygraph::Chg::KnownOutput(node.idx(), out_ty).into()); - - // we also seal off the op node in the locals graph, this op is not going to - // alter it's dependent locals maps - self.lg.seal_node(node.idx(), &self.ag); + chgs.chgs + .push(tygraph::Chg::KnownOutput(node.idx(), out_ty).into()); } Err(mut e) => { // only set the infer output if tygraph is showing that it has multiple options - if infer_output { + if chgs.infer_output { let _unknown = self.tg[node.idx()].output.is_multiple(); debug_assert!( _unknown, @@ -418,9 +420,13 @@ impl<'d> Compiler<'d> { err = Some(e); } } + + if !chgs.adds_vars { + chgs.chgs.push(locals_graph::Chg::Seal(node).into()); + } } - let chgd = self.apply_graph_chgs(chgs.into_iter())?; + let chgd = self.apply_graph_chgs(chgs.chgs.into_iter())?; (goto_resolve | chgd) .then(|| ()) @@ -428,13 +434,7 @@ impl<'d> Compiler<'d> { } /// Try to compile `opnode` into an evaluation [`Step`] given the input type (`in_ty`). - pub fn compile_block( - &self, - opnode: OpNode, - in_ty: Type, - chgs: Chgs, - infer_output: &mut bool, - ) -> Result { + pub fn compile_block(&self, opnode: OpNode, in_ty: Type, chgs: &mut Chgs) -> Result { let cmd_node = self.ag.get_impl(opnode, &in_ty).ok_or_else(|| { Error::op_not_found( self.ag[opnode.idx()].tag(), @@ -446,7 +446,7 @@ impl<'d> Compiler<'d> { match &self.ag[cmd_node.idx()] { AstNode::Intrinsic { op: _op, intrinsic } => { - let block = Block::construct(self, cmd_node, in_ty, chgs, infer_output); + let block = Block::construct(self, cmd_node, in_ty, chgs); intrinsic(block) } AstNode::Def { expr: _, params: _ } => { @@ -473,7 +473,8 @@ impl<'d> Compiler<'d> { None => { // there is not, but we can add to the TG that this sub-expression will // have input of `in_ty`. - chgs.push(tygraph::Chg::KnownInput(expr.idx(), in_ty).into()); + chgs.chgs + .push(tygraph::Chg::KnownInput(expr.idx(), in_ty).into()); Err(Error::incomplete_expr_compilation( self.ag[expr.idx()].tag(), @@ -509,7 +510,7 @@ impl<'d> Compiler<'d> { &self, def: DefNode, in_ty: &Type, - chgs: Chgs, + chgs: &mut Chgs, ) -> Result> { let Compiler { ag, @@ -601,13 +602,7 @@ impl<'a> Compiler<'a> { } impl<'a> Block<'a> { - fn construct( - compiler: &'a Compiler, - node: CmdNode, - in_ty: Type, - chgs: Chgs<'a>, - output_infer: &'a mut bool, - ) -> Self { + fn construct(compiler: &'a Compiler, node: CmdNode, in_ty: Type, chgs: &'a mut Chgs) -> Self { let opnode = node.parent(&compiler.ag); let mut flags = compiler.ag.get_flags(node); let mut args = compiler.ag.get_args(node); @@ -623,7 +618,6 @@ impl<'a> Block<'a> { args, args_count: 0, chgs, - infer_output: output_infer, #[cfg(debug_assertions)] output_ty: None, } @@ -744,6 +738,9 @@ mod tests { fn compilation_test_07() { // initial variable testing compile("let $x | \\ $x").unwrap(); + + eprintln!("TEST #2 ------- \\ 3 | let $a | = $a"); + compile("\\ 3 | let $a | = $a").unwrap(); } #[test] diff --git a/ogma/src/eng/comp/params.rs b/ogma/src/eng/comp/params.rs index 8d89bd6b..d3afb203 100644 --- a/ogma/src/eng/comp/params.rs +++ b/ogma/src/eng/comp/params.rs @@ -31,11 +31,11 @@ impl<'d> Compiler<'d> { .filter(|d| !self.callsite_params.contains_key(&d.index())) .collect::>(); - let chgs = &mut Vec::new(); + let mut chgs = Chgs::default(); let mut chgd = false; for def in defs { - match map_def_params_into_variables(self, def, chgs)? { + match map_def_params_into_variables(self, def, &mut chgs)? { LocalInjection::LgChange => (), // continue, LocalInjection::Success { callsite_params } => { chgd = true; @@ -50,7 +50,7 @@ impl<'d> Compiler<'d> { // seal off the def's expr node // no more changes should occur since we have had succcess building. - self.lg.seal_node(def.expr(&self.ag).idx(), &self.ag); + self.lg.seal(def.expr(&self.ag)); } LocalInjection::UnknownReturnTy(_) => { // NOTE what to do about unknown return arguments!? @@ -59,7 +59,7 @@ impl<'d> Compiler<'d> { } } - let chgd2 = self.apply_graph_chgs(chgs.drain(..))?; + let chgd2 = self.apply_graph_chgs(chgs.chgs.into_iter())?; Ok(chgd || chgd2) } } @@ -80,7 +80,7 @@ pub struct CallsiteParam { pub(super) fn map_def_params_into_variables( compiler: &Compiler, defnode: DefNode, - chgs: Chgs, + chgs: &mut Chgs, ) -> Result { let Compiler { ag, tg, defs, .. } = compiler; @@ -131,7 +131,7 @@ pub(super) fn map_def_params_into_variables( if param.ty.is_expr() { // the parameter is lazy if let Err(chg) = map_lazy_param(compiler, argnode, defnode, param) { - chgs.push(chg.into()); + chgs.chgs.push(chg.into()); lg_chg = true; } } else { @@ -179,7 +179,7 @@ fn map_callsite_param( defnode: DefNode, arg_idx: u8, param: &Parameter, - chgs: Chgs, + chgs: &mut Chgs, ) -> Result, LocalInjection>> { let Compiler { ag, lg, .. } = compiler; @@ -228,7 +228,7 @@ fn map_callsite_param( var, arg_idx, }) - .map_err(|chg| chgs.push(chg.into())) + .map_err(|chg| chgs.chgs.push(chg.into())) .ok())) } diff --git a/ogma/src/eng/graphs/locals_graph.rs b/ogma/src/eng/graphs/locals_graph.rs index 7fc2feea..9aee015c 100644 --- a/ogma/src/eng/graphs/locals_graph.rs +++ b/ogma/src/eng/graphs/locals_graph.rs @@ -2,6 +2,15 @@ use super::*; use astgraph::AstGraph; use petgraph::prelude::*; +type Map = HashMap; + +#[derive(Copy, Clone, Debug)] +enum E { + Flow, + Seal, +} + +/// A index pointer into the locals stack. #[derive(Debug, Copy, Clone)] struct Local { local: u32, @@ -10,12 +19,13 @@ struct Local { #[derive(Default, Debug, Clone)] struct Vars { - map: HashMap, - /// Marker indicating that this map will not have any more variables introduced into it. + /// A map of all the available variables. + map: Map, + /// Flags if this node is sealed. sealed: bool, } -type LocalsGraphInner = petgraph::stable_graph::StableGraph; +type LocalsGraphInner = petgraph::stable_graph::StableGraph; #[derive(Debug)] pub enum Chg { @@ -32,6 +42,12 @@ pub enum Chg { tag: Tag, defined_at: NodeIndex, }, + /// Seal the op node, indicating no further variable additions will be made. + Seal(OpNode), + /// Removes a connection between the op and arg. + /// This is useful for ops such as `let` which might need it's arguments to be able to compile + /// without `let` op needing to be 'sealed'. + BreakEdge { op: OpNode, arg: ArgNode }, } /// A graph structure which tracks variables, allowing for lexical scoping and shadowing. @@ -46,24 +62,29 @@ pub struct LocalsGraph { locals: Vec, } +impl E { + fn is_flow(&self) -> bool { + matches!(self, E::Flow) + } + + fn is_seal(&self) -> bool { + matches!(self, E::Seal) + } +} + impl LocalsGraph { /// Builds a locals graph based off the ast graph. pub fn build(ast_graph: &AstGraph) -> Self { - let mut graph = ast_graph.map(|_, _| Default::default(), |_, _| ()); + let mut graph = ast_graph.map(|_, _| Default::default(), |_, _| E::Seal); graph.clear_edges(); // remove all the edges - let mut this = Self { + Self { graph, count: 0, locals: Vec::new(), } - .init_edges(ast_graph); - - // seal initial node since no variables will be introduced into them - this.seal_node(0.into(), ast_graph); - - this + .init_edges(ast_graph) } /// Crucially, no edges are provided between def boundaries. @@ -73,22 +94,45 @@ impl LocalsGraph { let g = &mut self.graph; // blocks flow between each other in expressions + // all ops are indeterminate to add for op in ag.op_nodes() { if let Some(next) = op.next(ag) { - g.update_edge(op.idx(), next.idx(), ()); + g.update_edge(op.idx(), next.idx(), E::Flow); + g.update_edge(next.idx(), op.idx(), E::Seal); } } // each op flows into each of its arguments + // sealing checks the _prev_ argument, or the expr for the root + // it also defaults to the op, but this can be broken + // all args assert that they are sealed for arg in ag.arg_nodes() { - g.update_edge(arg.op(ag).idx(), arg.idx(), ()); + g[arg.idx()].sealed = true; + + let op = arg.op(ag); + + g.update_edge(op.idx(), arg.idx(), E::Flow); + g.update_edge(arg.idx(), op.idx(), E::Seal); + + match op.prev(ag) { + Some(prev) => g.update_edge(arg.idx(), prev.idx(), E::Seal), + None => g.update_edge(arg.idx(), op.parent(ag).idx(), E::Seal), + }; } // each expr flows into its first op + // sealing flows in opposite direction + // expressions are indeterminate to add, since sub-expressions of defs may have parameters + // that need to finalise. the only node which would be sealed is the root node for expr in ag.expr_nodes() { - g.update_edge(expr.idx(), expr.first_op(ag).idx(), ()); + let fop = expr.first_op(ag).idx(); + g.update_edge(expr.idx(), fop, E::Flow); + g.update_edge(fop, expr.idx(), E::Seal); } + // seal the root expression node + self.seal(ExprNode(0.into())); + self } @@ -126,28 +170,40 @@ impl LocalsGraph { self.count } - /// Fetch a variable if it is found on the node's locals map. - pub fn get(&self, node: NodeIndex, name: &str) -> Option<&eng::Local> { + /// Get a local `name` at `node`. + /// + /// Note that this does not check for sealing, that should be done before using this reference. + fn get_(&self, node: NodeIndex, name: &str) -> Option<&eng::Local> { self.graph[node] .map .get(name) - .map(|l| l.local as usize) - .map(|i| &self.locals[i]) + .map(|l| &self.locals[l.local as usize]) } - /// Much like `get`, but runs checks to assert if `name` is definitely not found, returning a - /// hard error. + /// Gets the local `name` at `node`. /// - /// If `name` is not found but there could be updates to the graph which introduce `name`, then - /// a soft error is returned. - pub fn get_checked(&self, node: NodeIndex, name: &str, tag: &Tag) -> Result<&eng::Local> { - self.get(node, name).ok_or_else(|| { - if self.sealed(node) { - Error::var_not_found(tag) - } else { - Error::update_locals_graph(tag) - } - }) + /// If the locals graph cannot provide a concrete answer, [`Error::update_locals_graph`] is + /// returned. + pub fn get(&self, node: NodeIndex, name: &str, tag: &Tag) -> Result<&eng::Local> { + self.sealed(node) + .then(|| { + self.get_(node, name) + .ok_or_else(|| Error::var_not_found(tag)) + }) + .ok_or_else(|| Error::update_locals_graph(tag)) + .and_then(|x| x) + } + + /// Gets the local `name` at `node`. + /// + /// If the locals graph cannot provide a concrete answer because there are unsealed nodes in + /// lexical scope, `(false, None)` is returned. + /// Otherwise `(true, Option)` is returned. + pub fn get_opt(&self, node: NodeIndex, name: &str) -> (bool, Option<&eng::Local>) { + match self.sealed(node) { + true => (true, self.get_(node, name)), + false => (false, None), + } } /// A new variable should be added. @@ -242,13 +298,41 @@ impl LocalsGraph { self.flow(defined_at); // propogate change true // graph altered } + Chg::Seal(op) => { + let a = &mut self.graph[op.idx()]; + if a.sealed { + false + } else { + a.sealed = true; + true + } + } + Chg::BreakEdge { op, arg } => { + if let Some(e) = self.graph.find_edge(arg.idx(), op.idx()) { + self.graph.remove_edge(e); + true + } else { + false + } + } } } + fn neighbours_flow(&self, n: NodeIndex) -> impl Iterator + '_ { + self.graph + .edges(n) + .filter_map(|e| e.weight().is_flow().then(|| e.target())) + } + + fn neighbours_seal(&self, n: NodeIndex) -> impl Iterator + '_ { + self.graph + .edges(n) + .filter_map(|e| e.weight().is_seal().then(|| e.target())) + } + fn flow(&mut self, from: NodeIndex) { let mut stack = self - .graph - .neighbors(from) + .neighbours_flow(from) .map(|to| (from, to)) .collect::>(); @@ -257,32 +341,17 @@ impl LocalsGraph { // to is now update, flow from there let from = to; - stack.extend(self.graph.neighbors(from).map(|to| (from, to))); + stack.extend(self.neighbours_flow(from).map(|to| (from, to))); } } - fn accrete(&mut self, from: NodeIndex, to_idx: NodeIndex) { + fn accrete(&mut self, from: NodeIndex, to: NodeIndex) { // to get around lifetime issues, we swap out the `to` map to mutate it, // then swap it back in once we are done - let mut to = std::mem::take(&mut self.graph[to_idx].map); - let from = &self.graph[from].map; - - for (k, v) in from { - // if to does not contain this key, add it in! - if !to.contains_key(k) { - to.insert(k.clone(), *v); - } else { - let tov = to.get_mut(k).expect("will exist"); - // if the to variable was defined at to, it would be more recent. - // if not, update the map local here to be from - if tov.defined != to_idx { - *tov = *v; - } - } - } - + let to_ = std::mem::take(&mut self.graph[to].map); + let to_ = accrete(&self.graph[from].map, to_, to); // reinstate the map - self.graph[to_idx].map = to; + self.graph[to].map = to_; } fn add_new_var(&mut self, name: Str, ty: Type, tag: Tag, defined: NodeIndex) { @@ -317,54 +386,27 @@ impl LocalsGraph { .insert(name, Local { local, defined }); } - /// Seal an op or expr node, flagging that there will not be any more variables introduced. - /// An op's successful compilation would seal it. - pub fn seal_node(&mut self, node: NodeIndex, ag: &AstGraph) { - if ag[node].expr().is_some() { - // if sealing an expression seal itself - // NOTE we do NOT seal op's as themselves - self.graph[node].sealed = true; - } - - let mut stack = self.graph.neighbors(node).collect::>(); - - while let Some(n) = stack.pop() { - // if already sealed, it would have already gone through this process, so do not keep - // walking down. - if self.graph[n].sealed { - continue; - } - - self.graph[n].sealed = true; // seal flow targets + /// Checks that this node is sealed and no variables will be introduced along the path to root. + pub fn sealed(&self, node: NodeIndex) -> bool { + let mut q = std::collections::VecDeque::new(); + q.push_back(node); - // if flow target is an expression, seal the expr's first op as well - if let Some(e) = ag[n].expr().map(|_| ExprNode(n)) { - stack.push(e.first_op(ag).idx()); + while let Some(node) = q.pop_front() { + if !self.graph[node].sealed { + return false; } - // if the flow target is an Op (ie next block), seal the **argument** nodes - // NOTE: this is done since an Op should not have variable definition power that would - // influence the op's arguments... - if ag[n].op().is_some() { - stack.extend( - self.graph - .neighbors(n) - // do not filter the next op though - .filter(|&n| ag[n].op().is_none()), - ); - } + q.extend(self.neighbours_seal(node)); } + + true } - /// Returns if this node will not be updated, sealing the variable set. + /// Mark this expression node as one which will not introduce variables. /// - /// This not only looks at itself, but the parent paths, since an unsealed node in the chain - /// might introduce a variable which propagates along. - pub fn sealed(&self, node: NodeIndex) -> bool { - // self is sealed, - self.graph[node].sealed - // and its ancestors are sealed (notice the recursion) - && self.graph.neighbors_directed(node, Incoming).all(|n| self.sealed(n)) + /// This is meant for definition sub-expressions for when the parameters have been finalised. + pub fn seal(&mut self, expr: ExprNode) { + self.graph[expr.idx()].sealed = true; } } @@ -424,7 +466,7 @@ impl LocalsGraph { vars, } in xs { - writeln!(s, r#" [{},"{}",{},"{}"]"#, n.index(), tag, sealed, vars)?; + writeln!(s, r#" [{},"{tag}",{sealed},"{vars}"]"#, n.index())?; } writeln!(s, "]")?; @@ -442,12 +484,33 @@ impl LocalsGraph { "{}: {}{}", n.index(), ag[n].tag(), - if w.sealed { "
sealed" } else { "" } + match w.sealed { + true => "
sealed", + false => "", + } ) }, - |_, s| write!(s, " "), + |n, s| write!(s, "{n:?}",), )?; Ok(()) } } + +fn accrete(from: &Map, mut to: Map, to_idx: NodeIndex) -> Map { + for (k, v) in from { + // if to does not contain this key, add it in! + if !to.contains_key(k) { + to.insert(k.clone(), *v); + } else { + let tov = to.get_mut(k).expect("will exist"); + // if the to variable was defined at to, it would be more recent. + // if not, update the map local here to be from + if tov.defined != to_idx { + *tov = *v; + } + } + } + + to +} diff --git a/ogma/src/eng/mod.rs b/ogma/src/eng/mod.rs index ce347e59..46572b3f 100644 --- a/ogma/src/eng/mod.rs +++ b/ogma/src/eng/mod.rs @@ -24,8 +24,6 @@ pub(crate) use self::{ pub use self::comp::{compile, FullCompilation}; -type Chgs<'a> = &'a mut Vec; - // ###### COMPILER ############################################################# /// Ogma expression compiler. #[derive(Clone)] @@ -87,6 +85,21 @@ pub struct Block<'a> { /// Only 255 arguments are supported. args_count: u8, + chgs: &'a mut Chgs, + + /// Carry information about an asserted output type. + /// Check this against upon finalisation to ensure it matches. + /// Only available and checked in debug builds. + #[cfg(debug_assertions)] + output_ty: Option, +} + +/// Compiler changes to apply. +/// +/// These changes are kept separate to the compiler state and are applied given the blocks partial +/// compilation. +#[derive(Default)] +pub struct Chgs { /// A list of changes to be made to the type graph. /// /// This is stored as a mutable reference since the block is usually passed by value to @@ -94,16 +107,11 @@ pub struct Block<'a> { /// Any items here are actioned by the compiler to update the type graph, providing more /// information to conduct the type inferencing. /// This allows for block compilation to fail but the updates still be applied. - chgs: &'a mut Vec, - + chgs: Vec, /// Flag that this block's output should be inferred if getting to output inferencing phase. - infer_output: &'a mut bool, - - /// Carry information about an asserted output type. - /// Check this against upon finalisation to ensure it matches. - /// Only available and checked in debug builds. - #[cfg(debug_assertions)] - output_ty: Option, + infer_output: bool, + /// Flag that this op would introduce variables. + adds_vars: bool, } // ###### STEP ################################################################# @@ -135,7 +143,7 @@ mod tests { // Although block sizing is large, it would not really be a hot spot, and the cost of // refactoring somewhat outweighs any benefit, without doing any proper profiling to // support it. - assert_eq!(size_of::(), 96 + size_of::>()); // `output_ty` is only on debug builds + assert_eq!(size_of::(), 88 + size_of::>()); // `output_ty` is only on debug builds assert_eq!(size_of::(), 32); } } diff --git a/ogma/src/eng/var.rs b/ogma/src/eng/var.rs index d663a002..23781edb 100644 --- a/ogma/src/eng/var.rs +++ b/ogma/src/eng/var.rs @@ -1,6 +1,6 @@ use super::*; -use ::libs::divvy::Str; use graphs::ArgNode; +use libs::divvy::Str; use std::sync::Arc; // ###### VARIABLE ############################################################# diff --git a/ogma/src/lang/impls/intrinsics/morphism.rs b/ogma/src/lang/impls/intrinsics/morphism.rs index 669acfe6..6eeccb55 100644 --- a/ogma/src/lang/impls/intrinsics/morphism.rs +++ b/ogma/src/lang/impls/intrinsics/morphism.rs @@ -541,13 +541,19 @@ the variable $row is available to query the table row" } fn fold_table_intrinsic(mut blk: Block) -> Result { + blk.assert_adds_vars(true); blk.assert_input(&Ty::Tab)?; - let seed = blk.next_arg()?.supplied(Type::Nil)?.concrete()?; + let seed = blk + .next_arg()? + .decouple_op_seal() + .supplied(Type::Nil)? + .concrete()?; let out_ty = seed.out_ty().clone(); blk.assert_output(out_ty.clone()); let row_var = blk.inject_manual_var_next_arg("row", Ty::TabRow)?; + blk.assert_vars_added(); let acc_expr = blk .next_arg()? .supplied(out_ty.clone())? // accumulator supplies seed type @@ -864,8 +870,11 @@ struct MapTable { impl MapTable { fn map(mut blk: Block) -> Result { + blk.assert_adds_vars(true); + let colarg = blk .next_arg()? + .decouple_op_seal() .supplied(Ty::Nil)? .returns(Ty::Str)? .concrete()?; @@ -876,6 +885,7 @@ impl MapTable { .flatten(); let row_var = blk.inject_manual_var_next_arg("row", Ty::TabRow)?; + blk.assert_vars_added(); let transformation = blk.next_arg()?; let transformation = match (force_flag, ty_flag) { diff --git a/ogma/src/lang/impls/intrinsics/pipeline.rs b/ogma/src/lang/impls/intrinsics/pipeline.rs index 42127ef8..c5da8107 100644 --- a/ogma/src/lang/impls/intrinsics/pipeline.rs +++ b/ogma/src/lang/impls/intrinsics/pipeline.rs @@ -382,6 +382,8 @@ variables are scoped to within the expression they are defined" } fn let_intrinsic(mut blk: Block) -> Result { + blk.assert_adds_vars(false); + type Binding = (eng::Variable, eng::Argument); // detect if the trailing argument is a command (expr) node @@ -434,6 +436,8 @@ fn let_intrinsic(mut blk: Block) -> Result { None }; + blk.assert_vars_added(); + fn bind_vars(bindings: &[Binding], value: &Value, cx: &mut Context) -> Result<()> { for (var, e) in bindings { let v = e.resolve(|| value.clone(), cx)?; diff --git a/ogma/tests/commands/pipeline.rs b/ogma/tests/commands/pipeline.rs index ccc9c97f..06924afe 100644 --- a/ogma/tests/commands/pipeline.rs +++ b/ogma/tests/commands/pipeline.rs @@ -777,6 +777,41 @@ fn not_enough_let_params() { ); } +#[test] +fn variable_shadowing_soundness_01() { + let defs = &Definitions::new(); + let code = "ls | let $x | + { let 'hdr' $x | Table $x }"; + let x = process_w_nil(code, defs); + + if let Err(e) = &x { + println!("{e}"); + } + + assert!(matches!(x, Ok(Value::Tab(_)))); + + let code = r#"ls | let $x | map size {:Num let $row.type:Str $x | Table $x }"#; + let x = process_w_nil(code, defs); + + if let Err(e) = &x { + println!("{e}"); + } + + assert!(matches!(x, Ok(Value::Tab(_)))); +} + +#[test] +fn variable_shadowing_soundness_02() { + let defs = &Definitions::new(); + let code = r#"ls | let $x | grp type | map value {:Table \ $x | let $row.key:Str $x | fold {Table $x} append-row $row.size:Num }"#; + let x = process_w_nil(code, defs); + + if let Err(e) = &x { + println!("{e}"); + } + + assert!(matches!(x, Ok(Value::Tab(_)))); +} + // ------ Nth ------------------------------------------------------------------ #[test] fn nth_help_msg() {