diff --git a/CHANGELOG.md b/CHANGELOG.md index 6c15aaa681..b9bbce6259 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,10 @@ ## Cairo-VM Changelog #### Upcoming Changes +* feat: Add support for cairo1 run with segements arena validation. + * Refactored the runner CASM code generation to user a more high level builder. + * Added segment merging of the dictionary segments. + * Added validation of the generated segment arena in cairo1 run. * feat: Implement `extend_additional_data` for `BuiltinRunner`[#1726](https://github.com/lambdaclass/cairo-vm/pull/1726) diff --git a/cairo1-run/src/cairo_run.rs b/cairo1-run/src/cairo_run.rs index 3cbb4076b4..b3b9e8f20a 100644 --- a/cairo1-run/src/cairo_run.rs +++ b/cairo1-run/src/cairo_run.rs @@ -1,5 +1,11 @@ use cairo_lang_casm::{ - casm, casm_extend, hints::Hint, inline::CasmContext, instructions::Instruction, + builder::{CasmBuilder, Var}, + casm, casm_build_extend, + cell_expression::CellExpression, + deref, deref_or_immediate, + hints::Hint, + inline::CasmContext, + instructions::{Instruction, InstructionBody}, }; use cairo_lang_sierra::{ extensions::{ @@ -14,7 +20,7 @@ use cairo_lang_sierra::{ starknet::syscalls::SystemType, ConcreteType, NamedType, }, - ids::ConcreteTypeId, + ids::{ConcreteTypeId, GenericTypeId}, program::{Function, GenericArg, Program as SierraProgram}, program_registry::ProgramRegistry, }; @@ -348,97 +354,117 @@ fn create_entry_code( // The builtins in the formatting expected by the runner. let (builtins, builtin_offset) = get_function_builtins(&signature.param_types, copy_to_output_builtin); - let mut ctx = casm! {}; + let mut ctx = CasmBuilder::default(); + // Getting a variable pointing to the location of each builtin. + let mut builtin_vars = + HashMap::::from_iter(builtin_offset.iter().map(|(id, offset)| { + ( + id.clone(), + ctx.add_var(CellExpression::Deref(deref!([fp - offset]))), + ) + })); + // Getting a variable for the location output builtin if required. + let output_ptr = copy_to_output_builtin.then(|| { + let offset: i16 = 2 + builtins.len().into_or_panic::(); + ctx.add_var(CellExpression::Deref(deref!([fp - offset]))) + }); + let got_segment_arena = signature.param_types.iter().any(|ty| { + get_info(sierra_program_registry, ty) + .map(|x| x.long_id.generic_id == SegmentArenaType::ID) + .unwrap_or_default() + }); + if got_segment_arena { + // Allocating local vars to save the builtins for the validations. + for _ in 0..builtins.len() { + casm_build_extend!(ctx, tempvar _local;); + } + casm_build_extend!(ctx, ap += builtins.len();); + } // Load all vecs to memory. // Load all array args content to memory. let mut array_args_data = vec![]; - let mut ap_offset: i16 = 0; for arg in config.args { let FuncArg::Array(values) = arg else { continue; }; - array_args_data.push(ap_offset); - casm_extend! {ctx, - %{ memory[ap + 0] = segments.add() %} + casm_build_extend! {ctx, + tempvar arr; + hint AllocSegment {} into {dst: arr}; ap += 1; - } + }; + array_args_data.push(arr); for (i, v) in values.iter().enumerate() { - let arr_at = (i + 1) as i16; - casm_extend! {ctx, - [ap + 0] = (v.to_bigint()); - [ap + 0] = [[ap - arr_at] + (i as i16)], ap++; + casm_build_extend! {ctx, + const cvalue = v.to_bigint(); + tempvar value = cvalue; + assert value = arr[i.to_i16().unwrap()]; }; } - ap_offset += (1 + values.len()) as i16; } - let mut array_args_data_iter = array_args_data.iter(); - let after_arrays_data_offset = ap_offset; + let mut array_args_data_iter = array_args_data.into_iter(); let mut arg_iter = config.args.iter().enumerate(); let mut param_index = 0; let mut expected_arguments_size = 0; - if signature.param_types.iter().any(|ty| { - get_info(sierra_program_registry, ty) - .map(|x| x.long_id.generic_id == SegmentArenaType::ID) - .unwrap_or_default() - }) { - casm_extend! {ctx, - // SegmentArena segment. - %{ memory[ap + 0] = segments.add() %} - // Infos segment. - %{ memory[ap + 1] = segments.add() %} - ap += 2; - [ap + 0] = 0, ap++; + if got_segment_arena { + // Allocating the segment arena and initializing it. + casm_build_extend! {ctx, + tempvar segment_arena; + tempvar infos; + hint AllocSegment {} into {dst: segment_arena}; + hint AllocSegment {} into {dst: infos}; + const czero = 0; + tempvar zero = czero; // Write Infos segment, n_constructed (0), and n_destructed (0) to the segment. - [ap - 2] = [[ap - 3]]; - [ap - 1] = [[ap - 3] + 1]; - [ap - 1] = [[ap - 3] + 2]; + assert infos = *(segment_arena++); + assert zero = *(segment_arena++); + assert zero = *(segment_arena++); } - ap_offset += 3; - } + // Adding the segment arena to the builtins var map. + builtin_vars.insert(SegmentArenaType::ID, segment_arena); + }; for ty in &signature.param_types { let info = get_info(sierra_program_registry, ty) .ok_or_else(|| Error::NoInfoForType(ty.clone()))?; let generic_ty = &info.long_id.generic_id; - if let Some(offset) = builtin_offset.get(generic_ty) { - casm_extend!(ctx, [ap + 0] = [fp - *offset], ap++;); - ap_offset += 1; + if let Some(var) = builtin_vars.get(generic_ty).cloned() { + casm_build_extend!(ctx, tempvar _builtin = var;); } else if generic_ty == &SystemType::ID { - casm_extend! {ctx, - %{ memory[ap + 0] = segments.add() %} + casm_build_extend! {ctx, + tempvar system; + hint AllocSegment {} into {dst: system}; ap += 1; - } - ap_offset += 1; + }; } else if generic_ty == &GasBuiltinType::ID { - casm_extend!(ctx, [ap + 0] = initial_gas, ap++;); - ap_offset += 1; - } else if generic_ty == &SegmentArenaType::ID { - let offset = -ap_offset + after_arrays_data_offset; - casm_extend!(ctx, [ap + 0] = [ap + offset] + 3, ap++;); - ap_offset += 1; + casm_build_extend! {ctx, + const initial_gas = initial_gas; + tempvar _gas = initial_gas; + }; } else { let ty_size = type_sizes[ty]; - let param_ap_offset_end = ap_offset + ty_size; + let mut param_accum_size = 0; expected_arguments_size += ty_size; - while ap_offset < param_ap_offset_end { + while param_accum_size < ty_size { let Some((arg_index, arg)) = arg_iter.next() else { break; }; match arg { FuncArg::Single(value) => { - casm_extend! {ctx, - [ap + 0] = (value.to_bigint()), ap++; - } - ap_offset += 1; + casm_build_extend! {ctx, + const value = value.to_bigint(); + tempvar _value = value; + }; + param_accum_size += 1; } FuncArg::Array(values) => { - let offset = -ap_offset + array_args_data_iter.next().unwrap(); - casm_extend! {ctx, - [ap + 0] = [ap + (offset)], ap++; - [ap + 0] = [ap - 1] + (values.len()), ap++; - } - ap_offset += 2; - if ap_offset > param_ap_offset_end { + let var = array_args_data_iter.next().unwrap(); + casm_build_extend! {ctx, + const length = values.len(); + tempvar start = var; + tempvar end = var + length; + }; + param_accum_size += 2; + if param_accum_size > ty_size { return Err(Error::ArgumentUnaligned { param_index, arg_index, @@ -465,7 +491,7 @@ fn create_entry_code( }); } - let before_final_call = ctx.current_code_offset; + casm_build_extend!(ctx, let () = call FUNCTION;); let return_type_id = signature .ret_types @@ -475,98 +501,128 @@ fn create_entry_code( .get(return_type_id) .cloned() .ok_or_else(|| Error::NoTypeSizeForId(return_type_id.clone()))?; - let builtin_count: i16 = builtins.len().into_or_panic(); - let builtin_locations: Vec = builtins - .iter() - .enumerate() - .map(|(i, name)| { - let fp_loc = i.into_or_panic::() - builtin_count - 2; - let generic = match name { - BuiltinName::range_check => RangeCheckType::ID, - BuiltinName::pedersen => PedersenType::ID, - BuiltinName::bitwise => BitwiseType::ID, - BuiltinName::ec_op => EcOpType::ID, - BuiltinName::poseidon => PoseidonType::ID, - BuiltinName::segment_arena => SegmentArenaType::ID, - BuiltinName::keccak - | BuiltinName::ecdsa - | BuiltinName::output - | BuiltinName::range_check96 - | BuiltinName::add_mod - | BuiltinName::mul_mod => return fp_loc, - }; - signature - .ret_types - .iter() - .position(|ty| { - sierra_program_registry - .get_type(ty) - .unwrap() - .info() - .long_id - .generic_id - == generic - }) - .map(|i| (signature.ret_types.len() - i).into_or_panic()) - .unwrap_or(fp_loc) - }) - .collect(); - if copy_to_output_builtin { - assert!( - builtins.iter().contains(&BuiltinName::output), - "Output builtin is required for proof mode or append_return_values" - ); + let mut offset: i16 = 0; + for ty in signature.ret_types.iter().rev() { + let info = get_info(sierra_program_registry, ty) + .ok_or_else(|| Error::NoInfoForType(ty.clone()))?; + offset += type_sizes[ty]; + let generic_ty = &info.long_id.generic_id; + let Some(var) = builtin_vars.get_mut(generic_ty) else { + continue; + }; + *var = ctx.add_var(CellExpression::Deref(deref!([ap - offset]))); } - let final_call_size = - // The call. - 2 - // The copying of the return values to the output segment. - + if copy_to_output_builtin { return_type_size.into_or_panic::() + 1 } else { 0 } - // Rewriting the builtins to top of the stack. - + builtins.len() - // The return or infinite loop. - + if config.proof_mode { 2 } else { 1 }; - let offset = final_call_size - + casm_program.debug_info.sierra_statement_info[func.entry_point.0].code_offset; - - casm_extend!(ctx, call rel offset;); - if copy_to_output_builtin { - let Some(output_builtin_idx) = builtins.iter().position(|b| b == &BuiltinName::output) - else { - panic!("Output builtin is required for proof mode or append_return_values."); - }; - let output_fp_offset: i16 = builtin_locations[output_builtin_idx]; - for (i, j) in (1..return_type_size + 1).rev().enumerate() { - casm_extend! {ctx, - // [ap -j] is where each return value is located in memory - // [[fp + output_fp_offet] + 0] is the base of the output segment - [ap - j] = [[fp + output_fp_offset] + i as i16]; - }; + let output_ptr = output_ptr.unwrap(); + let outputs = (1..(return_type_size + 1)) + .rev() + .map(|i| ctx.add_var(CellExpression::Deref(deref!([ap - i])))) + .collect_vec(); + for output in outputs { + casm_build_extend!(ctx, assert output = *(output_ptr++);); } } - let mut ret_builtin_offset = return_type_size - 1; - for (builtin, location) in builtins.iter().zip(builtin_locations) { - if builtin == &BuiltinName::output && copy_to_output_builtin { - casm_extend!(ctx, [ap + 0] = [fp + location] + return_type_size, ap++;); - } else if location < 0 { - casm_extend!(ctx, [ap + 0] = [fp + location], ap++;); - } else { - casm_extend!(ctx, [ap + 0] = [ap - (ret_builtin_offset + location)], ap++;); + // Helper to get a variable for a given builtin. + // Fails for builtins that will never be present. + let get_var = |name: &BuiltinName| match name { + BuiltinName::output => output_ptr.unwrap(), + BuiltinName::range_check => builtin_vars[&RangeCheckType::ID], + BuiltinName::pedersen => builtin_vars[&PedersenType::ID], + BuiltinName::bitwise => builtin_vars[&BitwiseType::ID], + BuiltinName::ec_op => builtin_vars[&EcOpType::ID], + BuiltinName::poseidon => builtin_vars[&PoseidonType::ID], + BuiltinName::segment_arena => builtin_vars[&SegmentArenaType::ID], + BuiltinName::keccak + | BuiltinName::ecdsa + | BuiltinName::range_check96 + | BuiltinName::add_mod + | BuiltinName::mul_mod => unreachable!(), + }; + if copy_to_output_builtin && got_segment_arena { + // Copying the final builtins into a local variables. + for (i, builtin) in builtins.iter().enumerate() { + let var = get_var(builtin); + let local = ctx.add_var(CellExpression::Deref(deref!([fp + i.to_i16().unwrap()]))); + casm_build_extend!(ctx, assert local = var;); + } + let segment_arena_ptr = get_var(&BuiltinName::segment_arena); + // Validating the segment arena's segments are one after the other. + casm_build_extend! {ctx, + tempvar n_segments = segment_arena_ptr[-2]; + tempvar n_finalized = segment_arena_ptr[-1]; + assert n_segments = n_finalized; + jump STILL_LEFT_PRE if n_segments != 0; + rescope{}; + jump DONE_VALIDATION; + STILL_LEFT_PRE: + const one = 1; + tempvar infos = segment_arena_ptr[-3]; + tempvar remaining_segments = n_segments - one; + rescope{infos = infos, remaining_segments = remaining_segments}; + LOOP_START: + jump STILL_LEFT_LOOP if remaining_segments != 0; + rescope{}; + jump DONE_VALIDATION; + STILL_LEFT_LOOP: + const one = 1; + const three = 3; + tempvar prev_end = infos[1]; + tempvar curr_start = infos[3]; + assert curr_start = prev_end + one; + tempvar next_infos = infos + three; + tempvar next_remaining_segments = remaining_segments - one; + rescope{infos = next_infos, remaining_segments = next_remaining_segments}; + #{ steps = 0; } + jump LOOP_START; + DONE_VALIDATION: + }; + // Copying the final builtins from locals into the top of the stack. + for i in 0..builtins.len().to_i16().unwrap() { + let local = ctx.add_var(CellExpression::Deref(deref!([fp + i]))); + casm_build_extend!(ctx, tempvar _r = local;); + } + } else { + // Writing the final builtins into the top of the stack. + for builtin in &builtins { + let var = get_var(builtin); + casm_build_extend!(ctx, tempvar _r = var;); } - ret_builtin_offset += 1; } if config.proof_mode { - casm_extend!(ctx, jmp rel 0;); + casm_build_extend! {ctx, + INFINITE_LOOP: + // To enable the merge of the branches. + #{ steps = 0; } + jump INFINITE_LOOP; + }; } else { - casm_extend!(ctx, ret;); + casm_build_extend!(ctx, ret;); } - - assert_eq!(before_final_call + final_call_size, ctx.current_code_offset); - - Ok((ctx, builtins)) + let result = ctx.build(["FUNCTION"]); + let [call_inst] = result.branches[0].1.as_slice() else { + panic!("Expected a single relocation"); + }; + let mut instructions = result.instructions; + let instruction_sizes = instructions.iter().map(|inst| inst.body.op_size()); + let prev_call_size: usize = instruction_sizes.clone().take(*call_inst).sum(); + let post_call_size: usize = instruction_sizes.skip(*call_inst).sum(); + let InstructionBody::Call(inst) = &mut instructions[*call_inst].body else { + panic!("Expected call instruction"); + }; + inst.target = deref_or_immediate!( + post_call_size + + casm_program.debug_info.sierra_statement_info[func.entry_point.0].code_offset + ); + Ok(( + CasmContext { + instructions, + current_code_offset: prev_call_size + post_call_size, + current_hints: vec![], + }, + builtins, + )) } fn get_info<'a>( diff --git a/cairo_programs/cairo-1-programs/dict_with_struct.cairo b/cairo_programs/cairo-1-programs/dict_with_struct.cairo index e24df874c2..9f24aade67 100644 --- a/cairo_programs/cairo-1-programs/dict_with_struct.cairo +++ b/cairo_programs/cairo-1-programs/dict_with_struct.cairo @@ -7,7 +7,7 @@ struct FP16x16 { sign: bool } -fn main() -> Felt252Dict> { +fn main() -> SquashedFelt252Dict> { // Create the dictionary let mut d: Felt252Dict> = Default::default(); @@ -20,5 +20,5 @@ fn main() -> Felt252Dict> { d.insert(1, nullable_from_box(box_a)); d.insert(2, nullable_from_box(box_b)); - d + d.squash() } diff --git a/cairo_programs/cairo-1-programs/felt_dict.cairo b/cairo_programs/cairo-1-programs/felt_dict.cairo index 9934d4dcc9..cd935c317e 100644 --- a/cairo_programs/cairo-1-programs/felt_dict.cairo +++ b/cairo_programs/cairo-1-programs/felt_dict.cairo @@ -1,6 +1,6 @@ use core::nullable::{nullable_from_box, match_nullable, FromNullableResult}; -fn main() -> Felt252Dict>> { +fn main() -> SquashedFelt252Dict>> { // Create the dictionary let mut d: Felt252Dict>> = Default::default(); @@ -11,5 +11,5 @@ fn main() -> Felt252Dict>> { // Insert it as a `Span` d.insert(66675, nullable_from_box(BoxTrait::new(a.span()))); d.insert(66676, nullable_from_box(BoxTrait::new(b.span()))); - d + d.squash() } diff --git a/cairo_programs/cairo-1-programs/nullable_box_vec.cairo b/cairo_programs/cairo-1-programs/nullable_box_vec.cairo index 37b2b8042a..f051270e2d 100644 --- a/cairo_programs/cairo-1-programs/nullable_box_vec.cairo +++ b/cairo_programs/cairo-1-programs/nullable_box_vec.cairo @@ -1,5 +1,5 @@ struct NullableVec { - items: Felt252Dict>>, + items: SquashedFelt252Dict>>, len: usize, } @@ -13,7 +13,7 @@ fn main() -> NullableVec { // Return NullableVec NullableVec { - items: d, + items: d.squash(), len: 3, } } diff --git a/vm/src/hint_processor/cairo_1_hint_processor/dict_manager.rs b/vm/src/hint_processor/cairo_1_hint_processor/dict_manager.rs index 354e332f03..87f646d3dd 100644 --- a/vm/src/hint_processor/cairo_1_hint_processor/dict_manager.rs +++ b/vm/src/hint_processor/cairo_1_hint_processor/dict_manager.rs @@ -12,23 +12,28 @@ use crate::{types::relocatable::Relocatable, vm::vm_core::VirtualMachine}; pub struct DictTrackerExecScope { /// The data of the dictionary. data: HashMap, - /// The index of the dictionary in the dict_infos segment. - idx: usize, + /// The start of the segment of the dictionary. + start: Relocatable, + /// The start of the next segment in the segment arena, if finalized. + next_start: Option, } /// Helper object to allocate, track and destruct all dictionaries in the run. #[derive(Default)] pub struct DictManagerExecScope { /// Maps between a segment index and the DictTrackerExecScope associated with it. - trackers: HashMap, + segment_to_tracker: HashMap, + /// The actual trackers of the dictionaries, in the order of allocation. + trackers: Vec, } impl DictTrackerExecScope { - /// Creates a new tracker placed in index `idx` in the dict_infos segment. - pub fn new(idx: usize) -> Self { + /// Creates a new tracker starting at `start`. + pub fn new(start: Relocatable) -> Self { Self { data: HashMap::default(), - idx, + start, + next_start: None, } } } @@ -38,45 +43,75 @@ impl DictManagerExecScope { /// Allocates a new segment for a new dictionary and return the start of the segment. pub fn new_default_dict(&mut self, vm: &mut VirtualMachine) -> Result { - let dict_segment = vm.add_memory_segment(); - if self - .trackers - .insert( - dict_segment.segment_index, - DictTrackerExecScope::new(self.trackers.len()), - ) - .is_some() - { - return Err(HintError::CustomHint( - "Segment index already in use.".to_string().into_boxed_str(), - )); - } - + let dict_segment = match self.trackers.last() { + // This is the first dict - a totally new segment is required. + None => vm.add_memory_segment(), + // New dict segment should be appended to the last segment. + // Appending by a temporary segment, if the last segment is not finalized. + Some(last) => last + .next_start + .unwrap_or_else(|| vm.add_temporary_segment()), + }; + let tracker = DictTrackerExecScope::new(dict_segment); + // Not checking if overriding - since overriding is allowed. + self.segment_to_tracker + .insert(dict_segment.segment_index, self.trackers.len()); + + self.trackers.push(tracker); Ok(dict_segment) } - /// Returns a reference for a dict tracker corresponding to a given pointer to a dict segment. - fn get_dict_tracker(&self, dict_end: Relocatable) -> Result<&DictTrackerExecScope, HintError> { - self.trackers.get(&dict_end.segment_index).ok_or_else(|| { - HintError::CustomHint( - "The given value does not point to a known dictionary." - .to_string() - .into_boxed_str(), - ) - }) - } - /// Returns a mut reference for a dict tracker corresponding to a given pointer to a dict /// segment. fn get_dict_tracker_mut(&mut self, dict_end: Relocatable) -> &mut DictTrackerExecScope { - self.trackers - .get_mut(&dict_end.segment_index) - .expect("The given value does not point to a known dictionary.") + let idx = self + .get_dict_infos_index(dict_end) + .expect("The given value does not point to a known dictionary."); + &mut self.trackers[idx] } /// Returns the index of the dict tracker corresponding to a given pointer to a dict segment. pub fn get_dict_infos_index(&self, dict_end: Relocatable) -> Result { - Ok(self.get_dict_tracker(dict_end)?.idx) + Ok(*self + .segment_to_tracker + .get(&dict_end.segment_index) + .ok_or_else(|| { + HintError::CustomHint( + "The given value does not point to a known dictionary." + .to_string() + .into_boxed_str(), + ) + })?) + } + + /// Finalizes a segment of a dictionary. + pub fn finalize_segment( + &mut self, + vm: &mut VirtualMachine, + dict_end: Relocatable, + ) -> Result<(), HintError> { + let tracker_idx = self.get_dict_infos_index(dict_end).unwrap(); + let tracker = &mut self.trackers[tracker_idx]; + let next_start = (dict_end + 1u32).unwrap(); + if let Some(prev) = tracker.next_start { + return Err(HintError::CustomHint( + format!( + "The segment is already finalized. \ + Attempting to override next start {prev}, with: {next_start}.", + ) + .into_boxed_str(), + )); + } + tracker.next_start = Some(next_start); + if let Some(next) = self.trackers.get(tracker_idx + 1) { + // Merging the next temporary segment with the closed segment. + vm.add_relocation_rule(next.start, next_start).unwrap(); + // Updating the segment to point to tracker the next segment points to. + let next_tracker_idx = self.segment_to_tracker[&next.start.segment_index]; + self.segment_to_tracker + .insert(dict_end.segment_index, next_tracker_idx); + } + Ok(()) } /// Inserts a value to the dict tracker corresponding to a given pointer to a dict segment. @@ -96,7 +131,10 @@ impl DictManagerExecScope { dict_end: Relocatable, key: &Felt252, ) -> Option { - self.get_dict_tracker(dict_end).ok()?.data.get(key).cloned() + self.trackers[self.get_dict_infos_index(dict_end).ok()?] + .data + .get(key) + .cloned() } } diff --git a/vm/src/hint_processor/cairo_1_hint_processor/hint_processor.rs b/vm/src/hint_processor/cairo_1_hint_processor/hint_processor.rs index 864c6277d1..1cfc100939 100644 --- a/vm/src/hint_processor/cairo_1_hint_processor/hint_processor.rs +++ b/vm/src/hint_processor/cairo_1_hint_processor/hint_processor.rs @@ -392,7 +392,9 @@ impl Cairo1HintProcessor { let dict_infos_index = dict_manager_exec_scope.get_dict_infos_index(dict_address)?; vm.insert_value(cell_ref_to_relocatable(dict_index, vm)?, dict_infos_index) - .map_err(HintError::from) + .map_err(HintError::from)?; + // The hint is only for dictionary finalization, so can be called. + dict_manager_exec_scope.finalize_segment(vm, dict_address) } #[allow(clippy::too_many_arguments)]