diff --git a/CHANGELOG.md b/CHANGELOG.md index af95f6fd5..8a36eebbd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Refactor accessed lists as sorted linked lists ([#30](https://github.com/0xPolygonZero/zk_evm/pull/30)) - Change visibility of `compact` mod ([#57](https://github.com/0xPolygonZero/zk_evm/pull/57)) - Fix block padding without withdrawals ([#63](https://github.com/0xPolygonZero/zk_evm/pull/63)) +- Change position of empty node encoding in RLP segment ([#62](https://github.com/0xPolygonZero/zk_evm/pull/62)) ## [0.1.0] - 2024-02-21 * Initial release. diff --git a/evm_arithmetization/src/cpu/kernel/asm/core/create_receipt.asm b/evm_arithmetization/src/cpu/kernel/asm/core/create_receipt.asm index 60e926473..bc6959483 100644 --- a/evm_arithmetization/src/cpu/kernel/asm/core/create_receipt.asm +++ b/evm_arithmetization/src/cpu/kernel/asm/core/create_receipt.asm @@ -54,8 +54,8 @@ process_receipt_after_bloom: // Now we can write the receipt in MPT_TRIE_DATA. %get_trie_data_size // stack: receipt_ptr, payload_len, status, new_cum_gas, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest - // Write transaction type if necessary. RLP_RAW contains, at index 0, the current transaction type. - PUSH @SEGMENT_RLP_RAW // ctx == virt == 0 + // Write transaction type if necessary. The address INITIAL_TXN_RLP_ADDR contains the current transaction type. + PUSH @INITIAL_TXN_RLP_ADDR MLOAD_GENERAL // stack: first_txn_byte, receipt_ptr, payload_len, status, new_cum_gas, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest DUP1 %eq_const(1) %jumpi(receipt_nonzero_type) diff --git a/evm_arithmetization/src/cpu/kernel/asm/main.asm b/evm_arithmetization/src/cpu/kernel/asm/main.asm index c69ae2636..14bb38b6d 100644 --- a/evm_arithmetization/src/cpu/kernel/asm/main.asm +++ b/evm_arithmetization/src/cpu/kernel/asm/main.asm @@ -15,8 +15,9 @@ global main: // Initialize accessed addresses and storage keys lists %init_access_lists - // Initialize the RLP DATA pointer to its initial position (ctx == virt == 0, segment = RLP) - PUSH @SEGMENT_RLP_RAW + // Initialize the RLP DATA pointer to its initial position, + // skipping over the preinitialized empty node. + PUSH @INITIAL_TXN_RLP_ADDR %mstore_global_metadata(@GLOBAL_METADATA_RLP_DATA_SIZE) // Encode constant nodes diff --git a/evm_arithmetization/src/cpu/kernel/asm/mpt/hash/hash.asm b/evm_arithmetization/src/cpu/kernel/asm/mpt/hash/hash.asm index 9acde9ce7..e2c460d1a 100644 --- a/evm_arithmetization/src/cpu/kernel/asm/mpt/hash/hash.asm +++ b/evm_arithmetization/src/cpu/kernel/asm/mpt/hash/hash.asm @@ -119,7 +119,7 @@ encode_node: global encode_node_empty: // stack: node_type, node_payload_ptr, encode_value, cur_len, retdest %pop3 - %stack (cur_len, retdest) -> (retdest, @ENCODED_EMPTY_NODE_POS, 1, cur_len) + %stack (cur_len, retdest) -> (retdest, @ENCODED_EMPTY_NODE_ADDR, 1, cur_len) JUMP global encode_node_branch: diff --git a/evm_arithmetization/src/cpu/kernel/asm/mpt/util.asm b/evm_arithmetization/src/cpu/kernel/asm/mpt/util.asm index 9829494c2..092710bc8 100644 --- a/evm_arithmetization/src/cpu/kernel/asm/mpt/util.asm +++ b/evm_arithmetization/src/cpu/kernel/asm/mpt/util.asm @@ -11,7 +11,7 @@ %endmacro %macro initialize_rlp_segment - PUSH @ENCODED_EMPTY_NODE_POS + PUSH @ENCODED_EMPTY_NODE_ADDR PUSH 0x80 MSTORE_GENERAL %endmacro diff --git a/evm_arithmetization/src/cpu/kernel/asm/rlp/read_to_memory.asm b/evm_arithmetization/src/cpu/kernel/asm/rlp/read_to_memory.asm index 8070fd0be..7c44d61d9 100644 --- a/evm_arithmetization/src/cpu/kernel/asm/rlp/read_to_memory.asm +++ b/evm_arithmetization/src/cpu/kernel/asm/rlp/read_to_memory.asm @@ -8,10 +8,10 @@ global read_rlp_to_memory: // stack: retdest PROVER_INPUT(rlp) // Read the RLP blob length from the prover tape. // stack: len, retdest - PUSH @SEGMENT_RLP_RAW + PUSH @INITIAL_TXN_RLP_ADDR %build_kernel_address - PUSH @SEGMENT_RLP_RAW // ctx == virt == 0 + PUSH @INITIAL_TXN_RLP_ADDR // stack: addr, final_addr, retdest read_rlp_to_memory_loop: // stack: addr, final_addr, retdest @@ -31,7 +31,7 @@ read_rlp_to_memory_loop: read_rlp_to_memory_finish: // stack: addr, final_addr, retdest // we recover the offset here - PUSH @SEGMENT_RLP_RAW // ctx == virt == 0 + PUSH @INITIAL_TXN_RLP_ADDR DUP3 SUB // stack: pos, addr, final_addr, retdest %stack(pos, addr, final_addr, retdest) -> (retdest, pos) diff --git a/evm_arithmetization/src/cpu/kernel/asm/transactions/router.asm b/evm_arithmetization/src/cpu/kernel/asm/transactions/router.asm index edabfbc43..10506d328 100644 --- a/evm_arithmetization/src/cpu/kernel/asm/transactions/router.asm +++ b/evm_arithmetization/src/cpu/kernel/asm/transactions/router.asm @@ -18,14 +18,14 @@ read_txn_from_memory: // Type 0 (legacy) transactions have no such prefix, but their RLP will have a // first byte >= 0xc0, so there is no overlap. - PUSH @SEGMENT_RLP_RAW // ctx == virt == 0 + PUSH @INITIAL_TXN_RLP_ADDR MLOAD_GENERAL %eq_const(1) // stack: first_byte == 1, retdest %jumpi(process_type_1_txn) // stack: retdest - PUSH @SEGMENT_RLP_RAW // ctx == virt == 0 + PUSH @INITIAL_TXN_RLP_ADDR MLOAD_GENERAL %eq_const(2) // stack: first_byte == 2, retdest @@ -47,15 +47,14 @@ global update_txn_trie: // stack: txn_rlp_len, value_ptr, txn_counter, num_nibbles, ret_dest DUP2 %increment // stack: rlp_start=value_ptr+1, txn_rlp_len, value_ptr, txn_counter, num_nibbles, retdest - // and now copy txn_rlp to the new block %stack (rlp_start, txn_rlp_len, value_ptr, txn_counter, num_nibbles) -> ( - @SEGMENT_RLP_RAW, // src addr. ctx == virt == 0 - rlp_start, @SEGMENT_TRIE_DATA, // swapped dest addr, ctx == 0 + @SEGMENT_TRIE_DATA, rlp_start, // dest addr, ctx == 0 + @INITIAL_TXN_RLP_ADDR, // src addr txn_rlp_len, // mcpy len txn_rlp_len, rlp_start, txn_counter, num_nibbles, value_ptr) - SWAP2 %build_kernel_address + %build_kernel_address // stack: DST, SRC, txn_rlp_len, txn_rlp_len, rlp_start, txn_counter, num_nibbles, value_ptr %memcpy_bytes ADD diff --git a/evm_arithmetization/src/cpu/kernel/asm/transactions/type_0.asm b/evm_arithmetization/src/cpu/kernel/asm/transactions/type_0.asm index a3f3bb0d2..12d105b8a 100644 --- a/evm_arithmetization/src/cpu/kernel/asm/transactions/type_0.asm +++ b/evm_arithmetization/src/cpu/kernel/asm/transactions/type_0.asm @@ -13,7 +13,7 @@ global process_type_0_txn: // stack: retdest - PUSH @SEGMENT_RLP_RAW // ctx == virt == 0 + PUSH @INITIAL_TXN_RLP_ADDR // stack: rlp_addr, retdest %decode_rlp_list_len // We don't actually need the length. diff --git a/evm_arithmetization/src/cpu/kernel/asm/transactions/type_1.asm b/evm_arithmetization/src/cpu/kernel/asm/transactions/type_1.asm index e64a4aee0..f8a7a556e 100644 --- a/evm_arithmetization/src/cpu/kernel/asm/transactions/type_1.asm +++ b/evm_arithmetization/src/cpu/kernel/asm/transactions/type_1.asm @@ -10,7 +10,7 @@ global process_type_1_txn: // stack: retdest // Initial rlp address offset of 1 (skipping over the 0x01 byte) PUSH 1 - PUSH @SEGMENT_RLP_RAW + PUSH @INITIAL_TXN_RLP_ADDR %build_kernel_address // stack: rlp_addr, retdest %decode_rlp_list_len diff --git a/evm_arithmetization/src/cpu/kernel/asm/transactions/type_2.asm b/evm_arithmetization/src/cpu/kernel/asm/transactions/type_2.asm index 5074c5795..41bdfd4ed 100644 --- a/evm_arithmetization/src/cpu/kernel/asm/transactions/type_2.asm +++ b/evm_arithmetization/src/cpu/kernel/asm/transactions/type_2.asm @@ -11,7 +11,7 @@ global process_type_2_txn: // stack: retdest // Initial rlp address offset of 1 (skipping over the 0x02 byte) PUSH 1 - PUSH @SEGMENT_RLP_RAW + PUSH @INITIAL_TXN_RLP_ADDR %build_kernel_address // stack: rlp_addr, retdest %decode_rlp_list_len diff --git a/evm_arithmetization/src/cpu/kernel/constants/global_metadata.rs b/evm_arithmetization/src/cpu/kernel/constants/global_metadata.rs index bd492771f..4baa18f48 100644 --- a/evm_arithmetization/src/cpu/kernel/constants/global_metadata.rs +++ b/evm_arithmetization/src/cpu/kernel/constants/global_metadata.rs @@ -18,9 +18,9 @@ pub(crate) enum GlobalMetadata { /// The size of the `TrieData` segment, in bytes. In other words, the next /// address available for appending additional trie data. TrieDataSize, - /// The size of the `TrieData` segment, in bytes, represented as a whole + /// The size of the `RLP` segment, in bytes, represented as a whole /// address. In other words, the next address available for appending - /// additional trie data. + /// additional RLP data. RlpDataSize, /// A pointer to the root of the state trie within the `TrieData` buffer. StateTrieRoot, diff --git a/evm_arithmetization/src/cpu/kernel/constants/mod.rs b/evm_arithmetization/src/cpu/kernel/constants/mod.rs index 8aea84883..ffe3f6666 100644 --- a/evm_arithmetization/src/cpu/kernel/constants/mod.rs +++ b/evm_arithmetization/src/cpu/kernel/constants/mod.rs @@ -89,23 +89,31 @@ pub(crate) fn evm_constants() -> HashMap { c } -const MISC_CONSTANTS: [(&str, [u8; 32]); 3] = [ +const MISC_CONSTANTS: [(&str, [u8; 32]); 4] = [ // Base for limbs used in bignum arithmetic. ( "BIGNUM_LIMB_BASE", hex!("0000000000000000000000000000000100000000000000000000000000000000"), ), - // Position in SEGMENT_RLP_RAW where the empty node encoding is stored. It is - // equal to u32::MAX + @SEGMENT_RLP_RAW so that all rlp pointers are much smaller than that. + // Address where the empty node encoding is stored. + // It is at the offset 0 within SEGMENT_RLP_RAW. + // *Note*: Changing this will break some tests. ( - "ENCODED_EMPTY_NODE_POS", - hex!("0000000000000000000000000000000000000000000000000000000CFFFFFFFF"), + "ENCODED_EMPTY_NODE_ADDR", + hex!("0000000000000000000000000000000000000000000000000000000c00000000"), ), // 0x10000 = 2^16 bytes, much larger than any RLP blob the EVM could possibly create. ( "MAX_RLP_BLOB_SIZE", hex!("0000000000000000000000000000000000000000000000000000000000010000"), ), + // Address where the txn RLP encoding starts. + // It is the offset 1 within SEGMENT_RLP_RAW. + // *Note*: Changing this will break some tests. + ( + "INITIAL_TXN_RLP_ADDR", + hex!("0000000000000000000000000000000000000000000000000000000c00000001"), + ), ]; const HASH_CONSTANTS: [(&str, [u8; 32]); 2] = [ diff --git a/evm_arithmetization/src/cpu/kernel/interpreter.rs b/evm_arithmetization/src/cpu/kernel/interpreter.rs index c354a90fa..bd50d9745 100644 --- a/evm_arithmetization/src/cpu/kernel/interpreter.rs +++ b/evm_arithmetization/src/cpu/kernel/interpreter.rs @@ -627,6 +627,18 @@ impl<'a, F: Field> Interpreter<'a, F> { memory.into_iter().map(U256::from).collect(); } + pub(crate) fn extend_memory_segment(&mut self, segment: Segment, memory: &[U256]) { + self.generation_state.memory.contexts[0].segments[segment.unscale()] + .content + .extend(memory); + } + + pub(crate) fn extend_memory_segment_bytes(&mut self, segment: Segment, memory: Vec) { + self.generation_state.memory.contexts[0].segments[segment.unscale()] + .content + .extend(memory.into_iter().map(U256::from).collect::>()); + } + pub(crate) fn set_rlp_memory(&mut self, rlp: Vec) { self.set_memory_segment_bytes(Segment::RlpRaw, rlp) } @@ -1504,12 +1516,11 @@ impl<'a, F: Field> Interpreter<'a, F> { self.generation_state.registers.context = context; } - /// Writes the encoding of 0 to position @ENCODED_EMPTY_NODE_POS. + /// Writes the encoding of 0 at @ENCODED_EMPTY_NODE_ADDR. pub(crate) fn initialize_rlp_segment(&mut self) { - self.generation_state.memory.set( - MemoryAddress::new(0, Segment::RlpRaw, 0xFFFFFFFF), - 128.into(), - ) + self.generation_state + .memory + .set(MemoryAddress::new(0, Segment::RlpRaw, 0), 0x80.into()) } } diff --git a/evm_arithmetization/src/cpu/kernel/tests/mpt/hex_prefix.rs b/evm_arithmetization/src/cpu/kernel/tests/mpt/hex_prefix.rs index 37077e402..bc94f91bd 100644 --- a/evm_arithmetization/src/cpu/kernel/tests/mpt/hex_prefix.rs +++ b/evm_arithmetization/src/cpu/kernel/tests/mpt/hex_prefix.rs @@ -81,8 +81,9 @@ fn hex_prefix_odd_terminated_tiny() -> Result<()> { assert_eq!( interpreter.get_rlp_memory(), vec![ - // Since rlp_pos = 2, we skipped over the first two bytes. - 0, + // The two first values of the RLP segment are the hardcoded 0x80 for an empty + // node, and 0 (i.e. unset). + 0x80, 0, // No length prefix; this tiny string is its own RLP encoding. (2 + 1) * 16 + 0xA, diff --git a/evm_arithmetization/src/cpu/kernel/tests/receipt.rs b/evm_arithmetization/src/cpu/kernel/tests/receipt.rs index ac28eeebb..ede0c63a5 100644 --- a/evm_arithmetization/src/cpu/kernel/tests/receipt.rs +++ b/evm_arithmetization/src/cpu/kernel/tests/receipt.rs @@ -200,7 +200,7 @@ fn test_receipt_encoding() -> Result<()> { interpreter.run()?; let rlp_pos = interpreter.pop().expect("The stack should not be empty"); - let rlp_read: Vec = interpreter.get_rlp_memory(); + let rlp_read: &[u8] = &interpreter.get_rlp_memory()[1..]; // skip empty_node assert_eq!(rlp_pos.as_usize(), expected_rlp.len()); for i in 0..rlp_read.len() { @@ -513,7 +513,8 @@ fn test_mpt_insert_receipt() -> Result<()> { // Set memory. interpreter.generation_state.registers.program_counter = mpt_insert; interpreter.set_memory_segment(Segment::TrieData, cur_trie_data.clone()); - interpreter.set_global_metadata_field(GlobalMetadata::TrieDataSize, cur_trie_data.len().into()); + let trie_data_len = cur_trie_data.len().into(); + interpreter.set_global_metadata_field(GlobalMetadata::TrieDataSize, trie_data_len); interpreter.run()?; // Finally, check that the hashes correspond. diff --git a/evm_arithmetization/src/cpu/kernel/tests/rlp/encode.rs b/evm_arithmetization/src/cpu/kernel/tests/rlp/encode.rs index 75464235b..8734a4e5c 100644 --- a/evm_arithmetization/src/cpu/kernel/tests/rlp/encode.rs +++ b/evm_arithmetization/src/cpu/kernel/tests/rlp/encode.rs @@ -19,7 +19,11 @@ fn test_encode_rlp_scalar_small() -> Result<()> { interpreter.run()?; let expected_stack = vec![pos + U256::from(1)]; // pos' = pos + rlp_len = 2 + 1 - let expected_rlp = vec![0, 0, 42]; + + // The two first values of the RLP segment are the hardcoded 0x80 for an empty + // node, and 0 (i.e. unset). + let expected_rlp = vec![0x80, 0, 42]; + assert_eq!(interpreter.stack(), expected_stack); assert_eq!(interpreter.get_rlp_memory(), expected_rlp); @@ -39,7 +43,11 @@ fn test_encode_rlp_scalar_medium() -> Result<()> { interpreter.run()?; let expected_stack = vec![pos + U256::from(4)]; // pos' = pos + rlp_len = 2 + 4 - let expected_rlp = vec![0, 0, 0x80 + 3, 0x01, 0x23, 0x45]; + + // The two first values of the RLP segment are the hardcoded 0x80 for an empty + // node, and 0 (i.e. unset). + let expected_rlp = vec![0x80, 0, 0x80 + 3, 0x01, 0x23, 0x45]; + assert_eq!(interpreter.stack(), expected_stack); assert_eq!(interpreter.get_rlp_memory(), expected_rlp); diff --git a/evm_arithmetization/src/cpu/kernel/tests/transaction_parsing/parse_type_0_txn.rs b/evm_arithmetization/src/cpu/kernel/tests/transaction_parsing/parse_type_0_txn.rs index 2aaf9ba4a..06b0b89cf 100644 --- a/evm_arithmetization/src/cpu/kernel/tests/transaction_parsing/parse_type_0_txn.rs +++ b/evm_arithmetization/src/cpu/kernel/tests/transaction_parsing/parse_type_0_txn.rs @@ -7,6 +7,7 @@ use NormalizedTxnField::*; use crate::cpu::kernel::aggregator::KERNEL; use crate::cpu::kernel::constants::txn_fields::NormalizedTxnField; use crate::cpu::kernel::interpreter::Interpreter; +use crate::memory::segments::Segment; #[test] fn process_type_0_txn() -> Result<()> { @@ -38,7 +39,7 @@ fn process_type_0_txn() -> Result<()> { // 4c0883a69102937d6231471b5dbb6204fe5129617082792ae468d01a3f362318')) // signed_txn = unsigned_txn.as_signed_transaction(sk) // rlp.encode(signed_txn).hex() - interpreter.set_rlp_memory(hex!("f861050a8255f0940000000000000000000000000000000000000000648242421ca07c5c61ed975ebd286f6b027b8c504842e50a47d318e1e801719dd744fe93e6c6a01e7b5119b57dd54e175ff2f055c91f3ab1b53eba0b2c184f347cdff0e745aca2").to_vec()); + interpreter.extend_memory_segment_bytes(Segment::RlpRaw, hex!("f861050a8255f0940000000000000000000000000000000000000000648242421ca07c5c61ed975ebd286f6b027b8c504842e50a47d318e1e801719dd744fe93e6c6a01e7b5119b57dd54e175ff2f055c91f3ab1b53eba0b2c184f347cdff0e745aca2").to_vec()); interpreter.run()?; diff --git a/evm_arithmetization/src/memory/segments.rs b/evm_arithmetization/src/memory/segments.rs index 8cec2e360..4dea2eb6e 100644 --- a/evm_arithmetization/src/memory/segments.rs +++ b/evm_arithmetization/src/memory/segments.rs @@ -202,17 +202,4 @@ impl Segment { Segment::BlockHashes => 256, } } - - pub(crate) fn constant(&self, virt: usize) -> Option { - match self { - Segment::RlpRaw => { - if virt == 0xFFFFFFFF { - Some(U256::from(0x80)) - } else { - None - } - } - _ => None, - } - } } diff --git a/evm_arithmetization/src/witness/memory.rs b/evm_arithmetization/src/witness/memory.rs index 4da4203c0..a70f22afa 100644 --- a/evm_arithmetization/src/witness/memory.rs +++ b/evm_arithmetization/src/witness/memory.rs @@ -192,10 +192,6 @@ impl MemoryState { let segment = Segment::all()[address.segment]; - if let Some(constant) = Segment::constant(&segment, address.virt) { - return constant; - } - let val = self.contexts[address.context].segments[address.segment].get(address.virt); assert!( val.bits() <= segment.bit_range(), @@ -214,14 +210,6 @@ impl MemoryState { let segment = Segment::all()[address.segment]; - if let Some(constant) = Segment::constant(&segment, address.virt) { - assert!( - constant == val, - "Attempting to set constant {} to incorrect value", - address.virt - ); - return; - } assert!( val.bits() <= segment.bit_range(), "Value {} exceeds {:?} range of {} bits",