0xPolygonZero · Nashtare · Jul 9, 2024 · Jul 9, 2024 · Jul 9, 2024 · Jul 9, 2024
@@ -82,7 +82,7 @@ \subsection{Range-checks}
 \subsubsection{What to range-check?}
 One can note that every element that ever appears on the stack has been pushed. Therefore, enforcing a range-check on pushed elements is enough to range-check all elements on the stack. Similarly, all elements in memory must have been written prior, and therefore it is enough to range-check memory writes. However, range-checking the PUSH and MSTORE opcodes is not sufficient.
 \begin{enumerate}
-  \item Pushes and memory writes for ``MSTORE\_32BYTES'' are range-checked in ``BytePackingStark''.
+  \item Pushes and memory writes for ``MSTORE\_32BYTES'' are range-checked in ``BytePackingStark'', except PUSH operations happening in privileged mode. See \ref{push_general_view}.
   \item Syscalls, exceptions and prover inputs are range-checked in ``ArithmeticStark''.
   \item The inputs and outputs of binary and ternary arithmetic operations are range-checked in ``ArithmeticStark''.
   \item The inputs' bits of logic operations are checked to be either 1 or 0 in ``LogicStark''. Since ``LogicStark'' only deals with bitwise operations, this is enough to have range-checked outputs as well.

@@ -70,4 +70,6 @@ \subsubsection{CPU columns}
     \item  \texttt{Stack}: \texttt{stack\_inv}, \texttt{stack\_inv\_aux} and \texttt{stack\_inv\_aux\_2} are used by popping-only (resp. pushing-only) instructions to check if the stack is empty after (resp. was empty
 before) the instruction. \texttt{stack\_len\_bounds\_ aux} is used to check that the stack doesn't overflow in user mode. We use the last four columns to prevent conflicts with the other general columns.
 See \ref{stackhandling} for more details.
+    \label{push_general_view}
+    \item  \texttt{Push}: \texttt{is\_not\_kernel} is used to skip range-checking the output of a PUSH operation when we are in privileged mode, as the kernel code is known and trusted.
 \end{itemize}
@@ -175,8 +175,7 @@ impl<F: RichField + Extendable<D>, const D: usize> BytePackingStark<F, D> {
         ops: Vec<BytePackingOp>,
         min_rows: usize,
     ) -> Vec<[F; NUM_COLUMNS]> {
-        let base_len: usize = ops.iter().map(|op| usize::from(!op.bytes.is_empty())).sum();
-        let num_rows = core::cmp::max(base_len.max(BYTE_RANGE_MAX), min_rows).next_power_of_two();
+        let num_rows = core::cmp::max(ops.len().max(BYTE_RANGE_MAX), min_rows).next_power_of_two();
         let mut rows = Vec::with_capacity(num_rows);
 
         for op in ops {

@@ -14,6 +14,7 @@ pub(crate) union CpuGeneralColumnsView<T: Copy> {
     jumps: CpuJumpsView<T>,
     shift: CpuShiftView<T>,
     stack: CpuStackView<T>,
+    push: CpuPushView<T>,
 }
 
 impl<T: Copy> CpuGeneralColumnsView<T> {
@@ -78,6 +79,18 @@ impl<T: Copy> CpuGeneralColumnsView<T> {
     pub(crate) fn stack_mut(&mut self) -> &mut CpuStackView<T> {
         unsafe { &mut self.stack }
     }
+
+    /// View of the columns required for the push operation.
+    /// SAFETY: Each view is a valid interpretation of the underlying array.
+    pub(crate) fn push(&self) -> &CpuPushView<T> {
+        unsafe { &self.push }
+    }
+
+    /// Mutable view of the columns required for the push operation.
+    /// SAFETY: Each view is a valid interpretation of the underlying array.
+    pub(crate) fn push_mut(&mut self) -> &mut CpuPushView<T> {
+        unsafe { &mut self.push }
+    }
 }
 
 impl<T: Copy + PartialEq> PartialEq<Self> for CpuGeneralColumnsView<T> {
@@ -180,6 +193,17 @@ pub(crate) struct CpuStackView<T: Copy> {
     pub(crate) stack_len_bounds_aux: T,
 }
 
+/// View of the first `CpuGeneralColumn` storing the negation of
+/// `is_kernel_mode` flag, to filter out `PUSH` instructions from being
+/// range-checked when happening in the KERNEL context.
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub(crate) struct CpuPushView<T: Copy> {
+    /// Product of `push_prover_input` with the negated `is_kernel_mode` flag.
+    pub(crate) is_not_kernel: T,
+    /// Reserve the unused columns.
+    _padding_columns: [T; NUM_SHARED_COLUMNS - 1],
+}
 /// The number of columns shared by all views of [`CpuGeneralColumnsView`].
 /// This is defined in terms of the largest view in order to determine the
 /// number of padding columns to add to each field without creating a cycle
@@ -195,3 +219,4 @@ const_assert!(size_of::<CpuLogicView<u8>>() == NUM_SHARED_COLUMNS);
 const_assert!(size_of::<CpuJumpsView<u8>>() == NUM_SHARED_COLUMNS);
 const_assert!(size_of::<CpuShiftView<u8>>() == NUM_SHARED_COLUMNS);
 const_assert!(size_of::<CpuStackView<u8>>() == NUM_SHARED_COLUMNS);
+const_assert!(size_of::<CpuPushView<u8>>() == NUM_SHARED_COLUMNS);
@@ -78,6 +78,13 @@ pub(crate) fn eval_packed_generic<P: PackedField>(
     );
     yield_constr.constraint_transition(is_prover_input * (lv.is_kernel_mode - nv.is_kernel_mode));
 
+    // Check the helper value in the general columns. We do not need to enforce that
+    // it is set to 0 if the operation is a `PROVER_INPUT`, as the latter is a
+    // kernel-only instruction. This is enforced in the `decode` module.
+    yield_constr.constraint(
+        lv.op.push_prover_input * ((lv.is_kernel_mode + lv.general.push().is_not_kernel) - P::ONES),
+    );
+
     // If a non-CPU cycle row is followed by a CPU cycle row, then:
     //  - the `program_counter` of the CPU cycle row is `main` (the entry point of
     //    our kernel),
@@ -140,6 +147,16 @@ pub(crate) fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
         yield_constr.constraint_transition(builder, kernel_constr);
     }
 
+    // Check the helper value in the general columns. We do not need to enforce that
+    // it is set to 0 if the operation is a `PROVER_INPUT`, as the latter is a
+    // kernel-only instruction. This is enforced in the `decode` module.
+    {
+        let temp = builder.add_extension(lv.is_kernel_mode, lv.general.push().is_not_kernel);
+        let constr =
+            builder.mul_sub_extension(lv.op.push_prover_input, temp, lv.op.push_prover_input);
+        yield_constr.constraint(builder, constr);
+    }
+
     // If a non-CPU cycle row is followed by a CPU cycle row, then:
     //  - the `program_counter` of the CPU cycle row is `main` (the entry point of
     //    our kernel),

@@ -263,9 +263,11 @@ pub(crate) fn ctl_data_byte_packing_push<F: Field>() -> Vec<Column<F>> {
 
 /// CTL filter for the `PUSH` operation.
 pub(crate) fn ctl_filter_byte_packing_push<F: Field>() -> Filter<F> {
-    let bit_col = Column::single(COL_MAP.opcode_bits[5]);
     Filter::new(
-        vec![(Column::single(COL_MAP.op.push_prover_input), bit_col)],
+        vec![(
+            Column::single(COL_MAP.general.push().is_not_kernel),
+            Column::single(COL_MAP.op.push_prover_input),
+        )],
         vec![],
     )
 }

@@ -3,6 +3,7 @@ use itertools::Itertools;
 use keccak_hash::keccak;
 use plonky2::field::types::Field;
 
+use super::state::KERNEL_CONTEXT;
 use super::transition::Transition;
 use super::util::{
     byte_packing_log, byte_unpacking_log, mem_read_with_log, mem_write_log,
@@ -385,7 +386,13 @@ pub(crate) fn generate_push<F: Field, T: Transition<F>>(
     let val = U256::from_big_endian(&bytes);
     push_with_write(state, &mut row, val)?;
 
-    byte_packing_log(state, base_address, bytes);
+    // This is necessary to filter out PUSH instructions from the BytePackingStark
+    // CTl when happening in the KERNEL context.
+    row.general.push_mut().is_not_kernel = F::ONE - row.is_kernel_mode;
+
+    if code_context != KERNEL_CONTEXT {
+        byte_packing_log(state, base_address, bytes);
+    }
 
     state.push_cpu(row);
 

@@ -2,7 +2,7 @@ use ethereum_types::U256;
 
 use crate::cpu::kernel::aggregator::KERNEL;
 
-const KERNEL_CONTEXT: usize = 0;
+pub(crate) const KERNEL_CONTEXT: usize = 0;
 
 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
 pub struct RegistersState {

@@ -65,11 +65,7 @@ impl<T: Copy> Traces<T> {
                     Operation::RangeCheckOperation { .. } => 1,
                 })
                 .sum(),
-            byte_packing_len: self
-                .byte_packing_ops
-                .iter()
-                .map(|op| usize::from(!op.bytes.is_empty()))
-                .sum(),
+            byte_packing_len: self.byte_packing_ops.len(),
             cpu_len: self.cpu.len(),
             keccak_len: self.keccak_inputs.len() * keccak::keccak_stark::NUM_ROUNDS,
             keccak_sponge_len: self

@@ -344,6 +344,11 @@ pub(crate) fn byte_packing_log<F: Field, T: Transition<F>>(
     base_address: MemoryAddress,
     bytes: Vec<u8>,
 ) {
+    if bytes.is_empty() {
+        // No-op
+        return;
+    }
+
     let clock = state.get_clock();
 
     let mut address = base_address;

@@ -52,18 +52,28 @@ if [[ $TEST_ONLY == "test_only" ]]; then
     export MEMORY_CIRCUIT_SIZE="17..18"
 else
     if [[ $INPUT_FILE == *"witness_b19240705"* ]]; then
-      # These sizes are configured specifically for block 19240705. Don't use this in other scenarios
+        # These sizes are configured specifically for block 19240705. Don't use this in other scenarios.
         echo "Using specific circuit sizes for witness_b19240705.json"
         export ARITHMETIC_CIRCUIT_SIZE="16..19"
-        export BYTE_PACKING_CIRCUIT_SIZE="16..19"
+        export BYTE_PACKING_CIRCUIT_SIZE="11..15"
         export CPU_CIRCUIT_SIZE="18..21"
         export KECCAK_CIRCUIT_SIZE="15..18"
         export KECCAK_SPONGE_CIRCUIT_SIZE="10..13"
         export LOGIC_CIRCUIT_SIZE="13..17"
         export MEMORY_CIRCUIT_SIZE="20..23"
+    elif [[ $INPUT_FILE == *"witness_b2_b7"* ]]; then
+        # These sizes are configured specifically for custom small blocks. Don't use this in other scenarios.
+        echo "Using specific circuit sizes for witness_b2_b7.json"
+        export ARITHMETIC_CIRCUIT_SIZE="16..17"
+        export BYTE_PACKING_CIRCUIT_SIZE="9..11"
+        export CPU_CIRCUIT_SIZE="16..17"
+        export KECCAK_CIRCUIT_SIZE="14..15"
+        export KECCAK_SPONGE_CIRCUIT_SIZE="9..10"
+        export LOGIC_CIRCUIT_SIZE="12..13"
+        export MEMORY_CIRCUIT_SIZE="18..19"
     else
         export ARITHMETIC_CIRCUIT_SIZE="16..23"
-        export BYTE_PACKING_CIRCUIT_SIZE="9..21"
+        export BYTE_PACKING_CIRCUIT_SIZE="8..21"
         export CPU_CIRCUIT_SIZE="12..25"
         export KECCAK_CIRCUIT_SIZE="14..20"
         export KECCAK_SPONGE_CIRCUIT_SIZE="9..15"