diff --git a/.cargo/config.toml b/.cargo/config.toml index 526c1c9b4..ae425ea9c 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -13,3 +13,6 @@ color = 'auto' [target.x86_64-unknown-linux-gnu] linker = "clang" rustflags = ["-Z", "linker-features=-lld", "-C", "target-cpu=native"] #, "-C", "link-arg=-fuse-ld=/usr/bin/mold", "-C", "debuginfo=2"] + +[alias] +xtask = ["run", "--package=xtask", "--"] diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index cc95b3be3..d0b588152 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,9 +1,9 @@ -* @muursh @Nashtare -/evm_arithmetization/ @wborgeaud @muursh @Nashtare @LindaGuiga -/zero/ @muursh @Nashtare @atanmarko -/smt_trie/ @0xaatif @muursh @Nashtare -/mpt_trie/ @0xaatif @Nashtare @muursh -/trace_decoder/ @0xaatif @muursh @Nashtare -.github/ @0xaatif @atanmarko @muursh @Nashtare +* @Nashtare @paulgoleary +/evm_arithmetization/ @wborgeaud @Nashtare @LindaGuiga +/zero/ @Nashtare @atanmarko @0xaatif +/smt_trie/ @0xaatif @Nashtare +/mpt_trie/ @0xaatif @Nashtare +/trace_decoder/ @0xaatif @Nashtare +.github/ @0xaatif @atanmarko @Nashtare /vscode-extension @0xaatif /evm_arithmetization/src/bin/lsp-server.rs @0xaatif diff --git a/.github/actions/rust/action.yml b/.github/actions/rust/action.yml index 7df769868..373f93cf0 100644 --- a/.github/actions/rust/action.yml +++ b/.github/actions/rust/action.yml @@ -1,10 +1,12 @@ +--- # Common CI steps + name: "Rust" description: | Common steps for CI - See + runs: - using: composite + using: composite steps: - uses: actions-rust-lang/setup-rust-toolchain@v1 - uses: Swatinem/rust-cache@v2 diff --git a/.github/labeler.yml b/.github/labeler.yml index baacbe1fa..04941664b 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -1,27 +1,29 @@ +--- # Github labeler configuration file + # Add 'crate: mpt_trie' label to any changes within 'mpt_trie' folder. 'crate: mpt_trie': -- changed-files: - - any-glob-to-any-file: mpt_trie/** + - changed-files: + - any-glob-to-any-file: mpt_trie/** # Add 'crate: evm_arithmetization' label to any changes within 'evm_arithmetization' folder. 'crate: evm_arithmetization': -- changed-files: - - any-glob-to-any-file: evm_arithmetization/** + - changed-files: + - any-glob-to-any-file: evm_arithmetization/** # Add 'crate: trace_decoder' label to any changes within 'trace_decoder' folder. 'crate: trace_decoder': -- changed-files: - - any-glob-to-any-file: trace_decoder/** + - changed-files: + - any-glob-to-any-file: trace_decoder/** # Add 'crate: zero_bin' label to any changes within 'zero' folder. 'crate: zero_bin': -- changed-files: - - any-glob-to-any-file: zero/** + - changed-files: + - any-glob-to-any-file: zero/** # Add 'specs' label to any changes within 'docs' or `book` folder. 'specs': -- changed-files: - - any-glob-to-any-file: ['docs/**', 'book/**'] + - changed-files: + - any-glob-to-any-file: ['docs/**', 'book/**'] # Add 'crate: common' label to any changes within 'common' folder. 'crate: common': diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml index ee1716de7..dd124c0be 100644 --- a/.github/workflows/audit.yml +++ b/.github/workflows/audit.yml @@ -1,8 +1,11 @@ +--- # Rust cargo audit workflow + name: Security audit on: push: - paths: + paths: - '**/Cargo.toml' + jobs: security_audit: runs-on: ubuntu-latest diff --git a/.github/workflows/book.yml b/.github/workflows/book.yml index c5df23222..b920d424f 100644 --- a/.github/workflows/book.yml +++ b/.github/workflows/book.yml @@ -1,3 +1,5 @@ +--- # MD Book generation and deployment workflow + name: zkEVM mdbook on: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 030731bdd..9a45da874 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,3 +1,5 @@ +--- # Workflow with multiple jobs to test different parts of the project + name: Continuous Integration on: @@ -22,7 +24,6 @@ jobs: name: Test mpt_trie runs-on: ubuntu-latest timeout-minutes: 30 - if: ${{ ! contains(toJSON(github.event.commits.*.message), '[skip-ci]') }} steps: - name: Checkout sources uses: actions/checkout@v4 @@ -37,7 +38,11 @@ jobs: - name: Test in mpt_trie subdirectory run: cargo test --manifest-path mpt_trie/Cargo.toml env: - RUSTFLAGS: -Copt-level=3 -Cdebug-assertions -Coverflow-checks=y -Cdebuginfo=0 + RUSTFLAGS: > + -Copt-level=3 + -Cdebug-assertions + -Coverflow-checks=y + -Cdebuginfo=0 RUST_LOG: 1 CARGO_INCREMENTAL: 1 RUST_BACKTRACE: 1 @@ -50,8 +55,6 @@ jobs: RUST_LOG: info CARGO_INCREMENTAL: 1 RUST_BACKTRACE: 1 - - if: ${{ ! contains(toJSON(github.event.commits.*.message), '[skip-ci]') }} steps: - name: Checkout sources uses: actions/checkout@v4 @@ -63,7 +66,7 @@ jobs: with: cache-on-failure: true - - name: build # build separately so test logs are actually nice + - name: build # build separately so test logs are actually nice run: cargo build --tests --manifest-path trace_decoder/Cargo.toml - name: test @@ -73,7 +76,6 @@ jobs: name: Test evm_arithmetization runs-on: ubuntu-latest timeout-minutes: 20 - if: ${{ ! contains(toJSON(github.event.commits.*.message), '[skip-ci]') }} steps: - name: Checkout sources uses: actions/checkout@v4 @@ -97,7 +99,6 @@ jobs: name: Test zero_bin runs-on: ubuntu-latest timeout-minutes: 10 - if: ${{ ! contains(toJSON(github.event.commits.*.message), '[skip-ci]') }} steps: - name: Checkout sources uses: actions/checkout@v4 @@ -112,7 +113,6 @@ jobs: - name: Test in zero_bin subdirectory run: | cargo test --manifest-path zero/Cargo.toml - env: RUSTFLAGS: -Copt-level=3 -Cdebug-assertions -Coverflow-checks=y -Cdebuginfo=0 RUST_LOG: 1 @@ -123,7 +123,6 @@ jobs: name: Test zk_evm_proc_macro runs-on: ubuntu-latest timeout-minutes: 5 - if: ${{ ! contains(toJSON(github.event.commits.*.message), '[skip-ci]') }} steps: - name: Checkout sources uses: actions/checkout@v4 diff --git a/.github/workflows/docker_build.yml b/.github/workflows/docker_build.yml index 1d24cc8b7..e797213aa 100644 --- a/.github/workflows/docker_build.yml +++ b/.github/workflows/docker_build.yml @@ -1,3 +1,5 @@ +--- # Workflow for building and running docker images as regression test + name: Docker Build & Run on: diff --git a/.github/workflows/docker_build_push.yml b/.github/workflows/docker_build_push.yml index 37b89fa92..4aed557df 100644 --- a/.github/workflows/docker_build_push.yml +++ b/.github/workflows/docker_build_push.yml @@ -1,3 +1,5 @@ +--- # Workflow for building and deploying docker images + name: Docker Build & Push on: diff --git a/.github/workflows/jerigon-native.yml b/.github/workflows/jerigon-native.yml index 29a380c3a..ccfbdc005 100644 --- a/.github/workflows/jerigon-native.yml +++ b/.github/workflows/jerigon-native.yml @@ -1,3 +1,5 @@ +--- # Run and populate blockchain with transactions and generate proofs using native tracer + name: Jerigon Integration on: @@ -19,7 +21,6 @@ jobs: name: Native tracer proof generation runs-on: zero-ci timeout-minutes: 30 - if: "! contains(toJSON(github.event.commits.*.message), '[skip-ci]')" steps: - name: Checkout sources uses: actions/checkout@v4 @@ -52,16 +53,17 @@ jobs: run: | echo "deb [trusted=yes] https://apt.fury.io/kurtosis-tech/ /" | sudo tee /etc/apt/sources.list.d/kurtosis.list sudo apt update - sudo apt install kurtosis-cli + sudo apt install kurtosis-cli=1.3.1 - #It is much easier to use cast tool in scripts so install foundry + # It is much easier to use cast tool in scripts so install foundry - name: Install Foundry uses: foundry-rs/foundry-toolchain@v1 - name: Run cancun test network run: | docker pull ghcr.io/0xpolygonzero/erigon:feat-zero - kurtosis run --enclave cancun-testnet github.com/ethpandaops/ethereum-package@4.0.0 --args-file jerigon-test-network/network_params.yml + kurtosis run --enclave cancun-testnet github.com/ethpandaops/ethereum-package@4.0.0 \ + --args-file jerigon-test-network/network_params.yml - name: Generate blocks with transactions run: | @@ -74,14 +76,14 @@ jobs: run: | ETH_RPC_URL="$(kurtosis port print cancun-testnet el-2-erigon-lighthouse ws-rpc)" ulimit -n 8192 - OUTPUT_TO_TERMINAL=true ./scripts/prove_rpc.sh 0x1 0xf $ETH_RPC_URL native true 3000 100 test_only + OUTPUT_TO_TERMINAL=true ./scripts/prove_rpc.sh 1 15 $ETH_RPC_URL native 0 3000 100 test_only echo "Proving blocks in test_only mode finished" - name: Run prove blocks with native tracer in real mode run: | ETH_RPC_URL="$(kurtosis port print cancun-testnet el-2-erigon-lighthouse ws-rpc)" rm -rf proofs/* circuits/* ./proofs.json test.out verify.out leader.out - OUTPUT_TO_TERMINAL=true RUN_VERIFICATION=true ./scripts/prove_rpc.sh 0x4 0x7 $ETH_RPC_URL native true 3000 100 + OUTPUT_TO_TERMINAL=true RUN_VERIFICATION=true ./scripts/prove_rpc.sh 4 7 $ETH_RPC_URL native 3 3000 100 echo "Proving blocks in real mode finished" - name: Shut down network diff --git a/.github/workflows/jerigon-zero.yml b/.github/workflows/jerigon-zero.yml index 216b32f8c..01e580438 100644 --- a/.github/workflows/jerigon-zero.yml +++ b/.github/workflows/jerigon-zero.yml @@ -1,3 +1,5 @@ +--- # Run and populate blockchain with transactions and generate proofs using zero tracer + name: Jerigon Integration on: @@ -10,7 +12,6 @@ on: branches: - "**" - env: CARGO_TERM_COLOR: always REGISTRY: ghcr.io @@ -20,22 +21,21 @@ jobs: name: Zero tracer proof generation runs-on: zero-ci timeout-minutes: 30 - if: "! contains(toJSON(github.event.commits.*.message), '[skip-ci]')" steps: - name: Checkout sources uses: actions/checkout@v4 - name: Checkout test-jerigon-network sources - uses: actions/checkout@v4 + uses: actions/checkout@v4 with: repository: 0xPolygonZero/jerigon-test-network - ref: 'feat/kurtosis-network' + ref: "feat/kurtosis-network" path: jerigon-test-network - uses: actions-rust-lang/setup-rust-toolchain@v1 - + - name: Set up QEMU - uses: docker/setup-qemu-action@v3 + uses: docker/setup-qemu-action@v3 - name: Login to GitHub Container Registry uses: docker/login-action@v2 @@ -53,43 +53,40 @@ jobs: run: | echo "deb [trusted=yes] https://apt.fury.io/kurtosis-tech/ /" | sudo tee /etc/apt/sources.list.d/kurtosis.list sudo apt update - sudo apt install kurtosis-cli + sudo apt install kurtosis-cli=1.3.1 - #It is much easier to use cast tool in scripts so install foundry + # It is much easier to use cast tool in scripts so install foundry - name: Install Foundry - uses: foundry-rs/foundry-toolchain@v1 + uses: foundry-rs/foundry-toolchain@v1 - name: Run cancun test network run: | docker pull ghcr.io/0xpolygonzero/erigon:feat-zero - kurtosis run --enclave cancun-testnet github.com/ethpandaops/ethereum-package@4.0.0 --args-file jerigon-test-network/network_params.yml + kurtosis run --enclave cancun-testnet github.com/ethpandaops/ethereum-package@4.0.0 \ + --args-file jerigon-test-network/network_params.yml - name: Generate blocks with transactions run: | - ETH_RPC_URL="$(kurtosis port print cancun-testnet el-2-erigon-lighthouse ws-rpc)" - cast rpc eth_blockNumber --rpc-url $ETH_RPC_URL - cd jerigon-test-network && set -a && source .env && set +a - bash ./tests/generate_transactions.sh - + ETH_RPC_URL="$(kurtosis port print cancun-testnet el-2-erigon-lighthouse ws-rpc)" + cast rpc eth_blockNumber --rpc-url $ETH_RPC_URL + cd jerigon-test-network && set -a && source .env && set +a + bash ./tests/generate_transactions.sh + - name: Run prove blocks with zero tracer in test_only mode run: | ETH_RPC_URL="$(kurtosis port print cancun-testnet el-2-erigon-lighthouse ws-rpc)" ulimit -n 8192 - OUTPUT_TO_TERMINAL=true ./scripts/prove_rpc.sh 0x1 0xf $ETH_RPC_URL jerigon true 3000 100 test_only + OUTPUT_TO_TERMINAL=true ./scripts/prove_rpc.sh 1 15 $ETH_RPC_URL jerigon 0 3000 100 test_only echo "Proving blocks in test_only mode finished" - - name: Run prove blocks with zero tracer in real mode run: | ETH_RPC_URL="$(kurtosis port print cancun-testnet el-2-erigon-lighthouse ws-rpc)" rm -rf proofs/* circuits/* ./proofs.json test.out verify.out leader.out - OUTPUT_TO_TERMINAL=true RUN_VERIFICATION=true ./scripts/prove_rpc.sh 0x2 0x5 $ETH_RPC_URL jerigon true 3000 100 + OUTPUT_TO_TERMINAL=true RUN_VERIFICATION=true ./scripts/prove_rpc.sh 2 5 $ETH_RPC_URL jerigon 1 3000 100 echo "Proving blocks in real mode finished" - + - name: Shut down network run: | kurtosis enclave rm -f cancun-testnet kurtosis engine stop - - - diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml index ee79f0f56..9792f107b 100644 --- a/.github/workflows/labeler.yml +++ b/.github/workflows/labeler.yml @@ -1,6 +1,8 @@ +--- # Add labels to the PR + name: "Pull Request Labeler" on: -- pull_request_target + - pull_request_target jobs: labeler: @@ -10,7 +12,7 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 10 steps: - - uses: actions/labeler@v5 - with: - # Allow to remove labels that are no longer relevant when new changes are pushed. - sync-labels: true + - uses: actions/labeler@v5 + with: + # Allow to remove labels that are no longer relevant when new changes are pushed. + sync-labels: true diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 13089b3d0..e22228b1d 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -1,3 +1,5 @@ +--- # Rust lint related checks + name: lint on: @@ -66,3 +68,13 @@ jobs: with: tool: taplo-cli - run: taplo fmt --check + outdated: + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + - uses: actions/checkout@v3 + - uses: ./.github/actions/rust + - uses: taiki-e/install-action@v2 + with: + tool: cargo-outdated + - run: cargo xtask outdated diff --git a/.github/workflows/pr_checking.yml b/.github/workflows/pr_checking.yml index 497a17208..adc6d81bc 100644 --- a/.github/workflows/pr_checking.yml +++ b/.github/workflows/pr_checking.yml @@ -1,3 +1,5 @@ +--- # Rust lint related checks + name: PR check on: @@ -14,7 +16,10 @@ jobs: steps: - name: Set up keywords id: setup_keywords - run: echo "RESTRICTED_KEYWORDS=$(echo '${{ secrets.RESTRICTED_KEYWORDS }}' | jq -r '.[]' | tr '\n' ' ')" >> $GITHUB_ENV + run: > + echo "RESTRICTED_KEYWORDS=$(echo '${{ secrets.RESTRICTED_KEYWORDS }}' + | jq -r '.[]' + | tr '\n' ' ')" >> $GITHUB_ENV - name: Check for spam PR id: check @@ -22,7 +27,6 @@ jobs: # Initialize variables to track spam presence title_is_spam=false description_is_spam=false - # Check title for spam for keyword in $RESTRICTED_KEYWORDS; do if echo "${{ github.event.pull_request.title }}" | grep -i -q "$keyword"; then diff --git a/.github/workflows/shellcheck.yml b/.github/workflows/shellcheck.yml new file mode 100644 index 000000000..e7b826555 --- /dev/null +++ b/.github/workflows/shellcheck.yml @@ -0,0 +1,26 @@ +--- # Workflow to run shellckeck on all shell scripts + +name: Shellcheck +on: + pull_request: + paths: + - '**/*.sh' + - 'scripts/**' + workflow_dispatch: + branches: + - "**" + +jobs: + shellcheck_scripts: + runs-on: ubuntu-22.04 + timeout-minutes: 10 + steps: + - uses: actions/checkout@v4 + + - name: Install shellcheck + run: sudo apt-get install shellcheck=0.8.0-2 + + - name: Run shellcheck + run: | + cd scripts + shellcheck ./*.sh diff --git a/.github/workflows/yamllint.yml b/.github/workflows/yamllint.yml new file mode 100644 index 000000000..0c9910d31 --- /dev/null +++ b/.github/workflows/yamllint.yml @@ -0,0 +1,26 @@ +--- # Run yamllint on all YAML files in the repository + +name: Yamllint +'on': + pull_request: + paths: + - '**/*.yml' + - '.github/**' + workflow_dispatch: + branches: + - '**' + +jobs: + yamllint-check: + runs-on: ubuntu-22.04 + timeout-minutes: 10 + steps: + - uses: actions/checkout@v4 + - name: Install yamllint + run: sudo apt-get install yamllint=1.26.3-1 + - name: Run yamllint + run: > + yamllint + --format github + -d "{extends: default, rules: {line-length: {max: 120}, truthy: {check-keys: false}}}" + .github diff --git a/Cargo.lock b/Cargo.lock index 94c89138c..14eca4d65 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1834,6 +1834,12 @@ version = "0.15.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" +[[package]] +name = "downcast" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1435fa1053d8b2fbbe9be7e97eca7f33d37b28409959813daefc1446a14247f1" + [[package]] name = "dunce" version = "1.0.5" @@ -2190,6 +2196,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fragile" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa" + [[package]] name = "funty" version = "2.0.0" @@ -2859,6 +2871,29 @@ dependencies = [ "waker-fn", ] +[[package]] +name = "lazy-regex" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d8e41c97e6bc7ecb552016274b99fbb5d035e8de288c582d9b933af6677bfda" +dependencies = [ + "lazy-regex-proc_macros", + "once_cell", + "regex", +] + +[[package]] +name = "lazy-regex-proc_macros" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76e1d8b05d672c53cb9c7b920bbba8783845ae4f0b076e02a3db1d02c81b4163" +dependencies = [ + "proc-macro2", + "quote", + "regex", + "syn 2.0.77", +] + [[package]] name = "lazy_static" version = "1.5.0" @@ -3058,6 +3093,32 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "mockall" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4c28b3fb6d753d28c20e826cd46ee611fda1cf3cde03a443a974043247c065a" +dependencies = [ + "cfg-if", + "downcast", + "fragile", + "mockall_derive", + "predicates", + "predicates-tree", +] + +[[package]] +name = "mockall_derive" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "341014e7f530314e9a1fdbc7400b244efea7122662c96bfa248c31da5bfb2020" +dependencies = [ + "cfg-if", + "proc-macro2", + "quote", + "syn 2.0.77", +] + [[package]] name = "mpt_trie" version = "0.4.1" @@ -3790,6 +3851,32 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "predicates" +version = "3.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e9086cc7640c29a356d1a29fd134380bee9d8f79a17410aa76e7ad295f42c97" +dependencies = [ + "anstyle", + "predicates-core", +] + +[[package]] +name = "predicates-core" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae8177bee8e75d6846599c6b9ff679ed51e882816914eec639944d7c9aa11931" + +[[package]] +name = "predicates-tree" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41b740d195ed3166cd147c8047ec98db0e22ec019eb8eeb76d343b795304fb13" +dependencies = [ + "predicates-core", + "termtree", +] + [[package]] name = "pretty_env_logger" version = "0.5.0" @@ -4816,6 +4903,12 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "termtree" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" + [[package]] name = "text-size" version = "1.1.1" @@ -5810,6 +5903,16 @@ dependencies = [ "time", ] +[[package]] +name = "xtask" +version = "0.0.0" +dependencies = [ + "anyhow", + "clap", + "serde", + "serde_json", +] + [[package]] name = "yansi" version = "1.0.1" @@ -5839,7 +5942,9 @@ dependencies = [ "itertools 0.13.0", "jemallocator", "keccak-hash 0.10.0", + "lazy-regex", "lru", + "mockall", "mpt_trie", "num-traits", "once_cell", @@ -5847,7 +5952,6 @@ dependencies = [ "plonky2", "plonky2_maybe_rayon 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "primitive-types 0.12.2", - "regex", "rlp", "ruint", "serde", diff --git a/Cargo.toml b/Cargo.toml index 2027eb8bf..094b53a12 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,7 @@ members = [ "evm_arithmetization", "mpt_trie", "proc_macro", + "scripts", "smt_trie", "trace_decoder", "zero", diff --git a/scripts/Cargo.toml b/scripts/Cargo.toml new file mode 100644 index 000000000..d4328f96c --- /dev/null +++ b/scripts/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "xtask" +version = "0.0.0" +edition.workspace = true +license.workspace = true +repository.workspace = true +homepage.workspace = true +keywords.workspace = true +categories.workspace = true +publish = false + +[dependencies] +anyhow.workspace = true +clap = { workspace = true, features = ["derive"] } +serde = { workspace = true, features = ["derive"] } +serde_json.workspace = true + +[lints] +workspace = true + +[[bin]] +name = "xtask" +path = "xtask.rs" diff --git a/scripts/prove_rpc.sh b/scripts/prove_rpc.sh index 49848fdfe..d56d202fb 100755 --- a/scripts/prove_rpc.sh +++ b/scripts/prove_rpc.sh @@ -1,11 +1,11 @@ #!/bin/bash # Args: -# 1 --> Start block idx -# 2 --> End block index (inclusive) +# 1 --> Start block (number in decimal or block hash with prefix 0x). E.g. `1234` or `0x1d5e7a08dd1f4ce7fa52afe7f4960d78e82e508c874838dee594d5300b8df625`. +# 2 --> End block (number or hash, inclusive). Same format as start block. # 3 --> Rpc endpoint:port (eg. http://35.246.1.96:8545) # 4 --> Rpc type (eg. jerigon / native) -# 5 --> Ignore previous proofs (boolean) +# 5 --> Checkpoint block (number or hash). If argument is missing, start block predecessor will be used. # 6 --> Backoff in milliseconds (optional [default: 0]) # 7 --> Number of retries (optional [default: 0]) # 8 --> Test run only flag `test_only` (optional) @@ -38,13 +38,12 @@ REPO_ROOT=$(git rev-parse --show-toplevel) PROOF_OUTPUT_DIR="${REPO_ROOT}/proofs" OUT_LOG_PATH="${PROOF_OUTPUT_DIR}/b$1_$2.log" ALWAYS_WRITE_LOGS=0 # Change this to `1` if you always want logs to be written. -TOT_BLOCKS=$(($2-$1+1)) START_BLOCK=$1 END_BLOCK=$2 NODE_RPC_URL=$3 NODE_RPC_TYPE=$4 -IGNORE_PREVIOUS_PROOFS=$5 +CHECKPOINT_BLOCK=$5 BACKOFF=${6:-0} RETRIES=${7:-0} @@ -56,41 +55,28 @@ RUN_VERIFICATION="${RUN_VERIFICATION:-false}" # Recommended soft file handle limit. Will warn if it is set lower. RECOMMENDED_FILE_HANDLE_LIMIT=8192 -mkdir -p $PROOF_OUTPUT_DIR +mkdir -p "$PROOF_OUTPUT_DIR" -if $IGNORE_PREVIOUS_PROOFS ; then - # Set checkpoint height to previous block number for the first block in range - prev_proof_num=$(($1-1)) - PREV_PROOF_EXTRA_ARG="--checkpoint-block-number ${prev_proof_num}" +# Handle checkpoint block arg +if [ -n "$CHECKPOINT_BLOCK" ] ; then + # Checkpoint block provided, pass it to the prover as a flag + PREV_PROOF_EXTRA_ARG="--checkpoint-block $CHECKPOINT_BLOCK" else + # Checkpoint block not provided, but is required for hash-based start block + if [[ $START_BLOCK == 0x* ]]; then + echo "Checkpoint block is required when specifying blocks by hash" + exit 1 + fi + + # Checkpoint block not provided, deduce proof starting point from the start block if [[ $1 -gt 1 ]]; then prev_proof_num=$(($1-1)) PREV_PROOF_EXTRA_ARG="-f ${PROOF_OUTPUT_DIR}/b${prev_proof_num}.zkproof" fi fi -# Convert hex to decimal parameters -if [[ $START_BLOCK == 0x* ]]; then - START_BLOCK=$((16#${START_BLOCK#"0x"})) -fi -if [[ $END_BLOCK == 0x* ]]; then - END_BLOCK=$((16#${END_BLOCK#"0x"})) -fi - -# Define block interval -if [ $END_BLOCK == '-' ]; then - # Follow from the start block to the end of the chain - BLOCK_INTERVAL=$START_BLOCK.. -elif [ $START_BLOCK == $END_BLOCK ]; then - # Single block - BLOCK_INTERVAL=$START_BLOCK -else - # Block range - BLOCK_INTERVAL=$START_BLOCK..=$END_BLOCK -fi - # Print out a warning if the we're using `native` and our file descriptor limit is too low. Don't bother if we can't find `ulimit`. -if [ $(command -v ulimit) ] && [ $NODE_RPC_TYPE == "native" ] +if [ "$(command -v ulimit)" ] && [ "$NODE_RPC_TYPE" == "native" ] then file_desc_limit=$(ulimit -n) @@ -108,49 +94,77 @@ fi # other non-proving code. if [[ $8 == "test_only" ]]; then # test only run - echo "Proving blocks ${BLOCK_INTERVAL} in a test_only mode now... (Total: ${TOT_BLOCKS})" - command='cargo r --release --package zero --bin leader -- --test-only --runtime in-memory --load-strategy on-demand --proof-output-dir $PROOF_OUTPUT_DIR --block-batch-size $BLOCK_BATCH_SIZE rpc --rpc-type "$NODE_RPC_TYPE" --rpc-url "$NODE_RPC_URL" --block-interval $BLOCK_INTERVAL $PREV_PROOF_EXTRA_ARG --backoff "$BACKOFF" --max-retries "$RETRIES" ' + echo "Proving blocks from ($START_BLOCK) to ($END_BLOCK)" + command="cargo r --release --package zero --bin leader -- \ +--test-only \ +--runtime in-memory \ +--load-strategy on-demand \ +--proof-output-dir $PROOF_OUTPUT_DIR \ +--block-batch-size $BLOCK_BATCH_SIZE \ +rpc \ +--rpc-type $NODE_RPC_TYPE \ +--rpc-url $NODE_RPC_URL \ +--start-block $START_BLOCK \ +--end-block $END_BLOCK \ +--backoff $BACKOFF \ +--max-retries $RETRIES \ +$PREV_PROOF_EXTRA_ARG" + if [ "$OUTPUT_TO_TERMINAL" = true ]; then - eval $command + eval "$command" retVal=$? echo -e "Proof witness generation finished with result: $retVal" exit $retVal else - eval $command > $OUT_LOG_PATH 2>&1 - if grep -q 'All proof witnesses have been generated successfully.' $OUT_LOG_PATH; then + eval "$command" > "$OUT_LOG_PATH" 2>&1 + if grep -q 'All proof witnesses have been generated successfully.' "$OUT_LOG_PATH"; then echo -e "Success - Note this was just a test, not a proof" # Remove the log on success if we don't want to keep it. if [ $ALWAYS_WRITE_LOGS -ne 1 ]; then - rm $OUT_LOG_PATH + rm "$OUT_LOG_PATH" fi exit else - echo "Failed to create proof witnesses. See ${OUT_LOG_PATH} for more details." + echo "Failed to create proof witnesses. See $OUT_LOG_PATH for more details." exit 1 fi fi else # normal run - echo "Proving blocks ${BLOCK_INTERVAL} now... (Total: ${TOT_BLOCKS})" - command='cargo r --release --package zero --bin leader -- --runtime in-memory --load-strategy on-demand --proof-output-dir $PROOF_OUTPUT_DIR --block-batch-size $BLOCK_BATCH_SIZE rpc --rpc-type "$NODE_RPC_TYPE" --rpc-url "$3" --block-interval $BLOCK_INTERVAL $PREV_PROOF_EXTRA_ARG --backoff "$BACKOFF" --max-retries "$RETRIES" ' + echo "Proving blocks from ($START_BLOCK) to ($END_BLOCK)" + command="cargo r --release --package zero --bin leader -- \ +--runtime in-memory \ +--load-strategy on-demand \ +--proof-output-dir $PROOF_OUTPUT_DIR \ +--block-batch-size $BLOCK_BATCH_SIZE \ +rpc \ +--rpc-type $NODE_RPC_TYPE \ +--rpc-url $3 \ +--start-block $START_BLOCK \ +--end-block $END_BLOCK \ +--backoff $BACKOFF \ +--max-retries $RETRIES \ +$PREV_PROOF_EXTRA_ARG " + if [ "$OUTPUT_TO_TERMINAL" = true ]; then - eval $command + eval "$command" echo -e "Proof generation finished with result: $?" else - eval $command > $OUT_LOG_PATH 2>&1 + eval "$command" > "$OUT_LOG_PATH" 2>&1 retVal=$? if [ $retVal -ne 0 ]; then # Some error occurred, display the logs and exit. - cat $OUT_LOG_PATH - echo "Block ${i} errored. See ${OUT_LOG_PATH} for more details." + cat "$OUT_LOG_PATH" + echo "Error occurred. See $OUT_LOG_PATH for more details." exit $retVal else # Remove the log on success if we don't want to keep it. if [ $ALWAYS_WRITE_LOGS -ne 1 ]; then - rm $OUT_LOG_PATH + rm "$OUT_LOG_PATH" fi fi - echo "Successfully generated ${TOT_BLOCKS} proofs!" + proof_count=$(grep -c 'INFO zero::prover: Proving block \d' < "$OUT_LOG_PATH") + echo "Successfully generated $proof_count proofs!" fi fi @@ -160,15 +174,15 @@ if [ "$RUN_VERIFICATION" = true ]; then echo "Running the verification for the last proof..." proof_file_name=$PROOF_OUTPUT_DIR/b$END_BLOCK.zkproof - echo "Verifying the proof of the latest block in the interval:" $proof_file_name - cargo r --release --package zero --bin verifier -- -f $proof_file_name > $PROOF_OUTPUT_DIR/verify.out 2>&1 + echo "Verifying the proof of the latest block in the interval:" "$proof_file_name" + cargo r --release --package zero --bin verifier -- -f "$proof_file_name" > "$PROOF_OUTPUT_DIR/verify.out" 2>&1 - if grep -q 'All proofs verified successfully!' $PROOF_OUTPUT_DIR/verify.out; then + if grep -q 'All proofs verified successfully!' "$PROOF_OUTPUT_DIR/verify.out"; then echo "$proof_file_name verified successfully!"; - rm $PROOF_OUTPUT_DIR/verify.out + rm "$PROOF_OUTPUT_DIR/verify.out" else # Some error occurred with verification, display the logs and exit. - cat $PROOF_OUTPUT_DIR/verify.out + cat "$PROOF_OUTPUT_DIR/verify.out" echo "There was an issue with proof verification. See $PROOF_OUTPUT_DIR/verify.out for more details."; exit 1 fi diff --git a/scripts/prove_stdio.sh b/scripts/prove_stdio.sh index 1ef3ee90b..4fd5d0718 100755 --- a/scripts/prove_stdio.sh +++ b/scripts/prove_stdio.sh @@ -103,21 +103,30 @@ fi # proof. This is useful for quickly testing decoding and all of the # other non-proving code. if [[ $TEST_ONLY == "test_only" ]]; then - nice -19 cargo run --release --package zero --bin leader -- --test-only --runtime in-memory --load-strategy on-demand --block-batch-size $BLOCK_BATCH_SIZE --proof-output-dir $PROOF_OUTPUT_DIR --batch-size $BATCH_SIZE --save-inputs-on-error stdio < $INPUT_FILE |& tee &> $TEST_OUT_PATH - if grep -q 'All proof witnesses have been generated successfully.' $TEST_OUT_PATH; then + nice -19 cargo run --quiet --release --package zero --bin leader -- \ + --test-only \ + --runtime in-memory \ + --load-strategy on-demand \ + --block-batch-size "$BLOCK_BATCH_SIZE" \ + --proof-output-dir "$PROOF_OUTPUT_DIR" \ + --batch-size "$BATCH_SIZE" \ + --save-inputs-on-error stdio + stdio < "$INPUT_FILE" |& tee &> "$TEST_OUT_PATH" + + if grep -q 'All proof witnesses have been generated successfully.' "$TEST_OUT_PATH"; then echo -e "\n\nSuccess - Note this was just a test, not a proof" #rm $TEST_OUT_PATH exit 0 - elif grep -q 'Attempted to collapse an extension node' $TEST_OUT_PATH; then - echo "ERROR: Attempted to collapse an extension node. See $TEST_OUT_PATH for more details." - rm $TEST_OUT_PATH + elif grep -q 'Attempted to collapse an extension node' "$TEST_OUT_PATH"; then + echo "ERROR: Attempted to collapse an extension node. See "$TEST_OUT_PATH" for more details." + rm "$TEST_OUT_PATH" exit 4 - elif grep -q 'SIMW == RPCW ? false' $TEST_OUT_PATH; then - echo "ERROR: SIMW == RPCW ? false. See $TEST_OUT_PATH for more details." + elif grep -q 'SIMW == RPCW ? false' "$TEST_OUT_PATH"; then + echo "ERROR: SIMW == RPCW ? false. See "$TEST_OUT_PATH" for more details." exit 5 - elif grep -q 'Proving task finished with error' $TEST_OUT_PATH; then + elif grep -q 'Proving task finished with error' "$TEST_OUT_PATH"; then # Some error occurred, display the logs and exit. - echo "ERROR: Proving task finished with error. See $TEST_OUT_PATH for more details." + echo "ERROR: Proving task finished with error. See "$TEST_OUT_PATH" for more details." exit 1 else echo -e "\n\nUndecided. Proving process has stopped but verdict is undecided. See $TEST_OUT_PATH for more details." @@ -129,45 +138,43 @@ cargo build --release --jobs "$num_procs" start_time=$(date +%s%N) -nice -19 "${REPO_ROOT}/target/release/leader" --runtime in-memory --load-strategy on-demand -n 1 --block-batch-size $BLOCK_BATCH_SIZE \ - --proof-output-dir $PROOF_OUTPUT_DIR stdio < $INPUT_FILE |& tee $OUTPUT_LOG +nice -19 "${REPO_ROOT}/target/release/leader" --runtime in-memory \ + --load-strategy on-demand -n 1 \ + --block-batch-size "$BLOCK_BATCH_SIZE" \ + --proof-output-dir "$PROOF_OUTPUT_DIR" stdio < "$INPUT_FILE" |& tee "$OUTPUT_LOG" end_time=$(date +%s%N) -cat $OUTPUT_LOG | grep "Successfully wrote to disk proof file " | awk '{print $NF}' | tee $PROOFS_FILE_LIST +grep "Successfully wrote to disk proof file " "$OUTPUT_LOG" | awk '{print $NF}' | tee "$PROOFS_FILE_LIST" if [ ! -s "$PROOFS_FILE_LIST" ]; then # Some error occurred, display the logs and exit. - cat $OUTPUT_LOG + cat "$OUTPUT_LOG" echo "Proof list not generated, some error happened. For more details check the log file $OUTPUT_LOG" exit 1 fi -cat $PROOFS_FILE_LIST | while read proof_file; +while read -r proof_file; do echo "Verifying proof file $proof_file" verify_file=$PROOF_OUTPUT_DIR/verify_$(basename $proof_file).out - nice -19 "${REPO_ROOT}/target/release/verifier" -f $proof_file | tee $verify_file + nice -19 "${REPO_ROOT}/target/release/verifier" -f $proof_file | tee "$verify_file" if grep -q 'All proofs verified successfully!' $verify_file; then echo "Proof verification for file $proof_file successful"; - rm $verify_file # we keep the generated proof for potential reuse + rm "$verify_file" # we keep the generated proof for potential reuse else # Some error occurred with verification, display the logs and exit. - cat $verify_file + cat "$verify_file" echo "There was an issue with proof verification. See $verify_file for more details."; exit 1 fi -done +done < "$PROOFS_FILE_LIST" duration_ns=$((end_time - start_time)) duration_sec=$(echo "$duration_ns / 1000000000" | bc -l) echo "Success!" -echo "Proving duration:" $duration_sec " seconds" +echo "Proving duration: $duration_sec seconds" echo "Note, this duration is inclusive of circuit handling and overall process initialization"; # Clean up in case of success -rm $OUTPUT_LOG - - - - +rm "$OUTPUT_LOG" diff --git a/scripts/xtask.rs b/scripts/xtask.rs new file mode 100644 index 000000000..c60770e28 --- /dev/null +++ b/scripts/xtask.rs @@ -0,0 +1,69 @@ +//! General purpose scripts for development + +use std::process::{Command, Stdio}; + +use anyhow::{ensure, Context as _}; +use clap::Parser; +use serde::Deserialize; + +#[derive(Parser)] +enum Args { + /// Run `cargo-outdated`, printing warnings compatible with GitHub's CI. + /// + /// If a direct dependency listed in our Cargo.lock is behind the latest + /// available on crates-io, a warning will be emitted. + /// + /// Note that we only warn on our _direct_ dependencies, + /// not the entire supply chain. + Outdated, +} + +#[derive(Deserialize)] +struct Outdated<'a> { + crate_name: &'a str, + dependencies: Vec>, +} + +#[derive(Deserialize)] +struct Dependency<'a> { + name: &'a str, + project: &'a str, + latest: &'a str, +} + +fn main() -> anyhow::Result<()> { + match Args::parse() { + Args::Outdated => { + let output = Command::new("cargo") + .args(["outdated", "--root-deps-only", "--format=json"]) + .stderr(Stdio::inherit()) + .stdout(Stdio::piped()) + .output() + .context("couldn't exec `cargo`")?; + ensure!( + output.status.success(), + "command failed with {}", + output.status + ); + for Outdated { + crate_name, + dependencies, + } in serde_json::Deserializer::from_slice(&output.stdout) + .into_iter::>() + .collect::, _>>() + .context("failed to parse output from `cargo outdated`")? + { + for Dependency { + name, + project, + latest, + } in dependencies + { + // https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/workflow-commands-for-github-actions#setting-a-warning-message + println!("::warning title=outdated-dependency::dependency {name} of crate {crate_name} is at version {project}, but the latest is {latest}") + } + } + } + } + Ok(()) +} diff --git a/trace_decoder/src/core.rs b/trace_decoder/src/core.rs index ab4c08a02..f8e870760 100644 --- a/trace_decoder/src/core.rs +++ b/trace_decoder/src/core.rs @@ -13,9 +13,9 @@ use alloy::{ use alloy_compat::Compat as _; use anyhow::{anyhow, bail, ensure, Context as _}; use either::Either; -use ethereum_types::{Address, H160, U256}; +use ethereum_types::{Address, BigEndianHash as _, H160, U256}; use evm_arithmetization::{ - generation::{mpt::AccountRlp, TrieInputs}, + generation::TrieInputs, jumpdest::JumpDestTableWitness, proof::{BlockMetadata, TrieRoots}, GenerationInputs, @@ -29,10 +29,11 @@ use zk_evm_common::gwei_to_wei; use crate::{ observer::{DummyObserver, Observer}, - tries::StateSmt, + world::Type2World, }; use crate::{ - tries::{MptKey, ReceiptTrie, StateMpt, StateTrie, StorageTrie, TransactionTrie}, + tries::{MptKey, ReceiptTrie, StateMpt, StorageTrie, TransactionTrie}, + world::{Type1World, World}, BlockLevelData, BlockTrace, BlockTraceTriePreImages, CombinedPreImages, ContractCodeUsage, OtherBlockData, SeparateStorageTriesPreImage, SeparateTriePreImage, SeparateTriePreImages, TxnInfo, TxnMeta, TxnTrace, @@ -72,7 +73,7 @@ pub fn entrypoint( trace: BlockTrace, other: OtherBlockData, batch_size_hint: usize, - observer: &mut impl Observer, + observer: &mut impl Observer, wire_disposition: WireDisposition, ) -> anyhow::Result> { ensure!(batch_size_hint != 0); @@ -87,7 +88,7 @@ pub fn entrypoint( BlockTraceTriePreImages::Separate(_) => FatalMissingCode(true), BlockTraceTriePreImages::Combined(_) => FatalMissingCode(false), }; - let (state, storage, mut code) = start(trie_pre_images, wire_disposition)?; + let (world, mut code) = start(trie_pre_images, wire_disposition)?; code.extend(code_db.clone()); @@ -108,11 +109,10 @@ pub fn entrypoint( *amt = gwei_to_wei(*amt) } - let batches = match state { - Either::Left(mpt) => Either::Left( + let batches = match world { + Either::Left(type1world) => Either::Left( middle( - mpt, - storage, + type1world, batch(txn_info, batch_size_hint), &mut code, &b_meta, @@ -124,11 +124,10 @@ pub fn entrypoint( .into_iter() .map(|it| it.map(Either::Left)), ), - Either::Right(smt) => { + Either::Right(type2world) => { Either::Right( middle( - smt, - storage, + type2world, batch(txn_info, batch_size_hint), &mut code, &b_meta, @@ -154,67 +153,69 @@ pub fn entrypoint( byte_code, before: IntraBlockTries { - state, - storage, + world, transaction, receipt, }, after, withdrawals, jumpdest_tables, - }| GenerationInputs { - txn_number_before: first_txn_ix.into(), - gas_used_before: running_gas_used.into(), - gas_used_after: { - running_gas_used += gas_used; - running_gas_used.into() - }, - signed_txns: byte_code.clone().into_iter().map(Into::into).collect(), - withdrawals, - ger_data, - tries: TrieInputs { - state_trie: match state { - Either::Left(mpt) => mpt.into(), - Either::Right(_) => todo!("evm_arithmetization accepts an SMT"), + }| { + let (state, storage) = world + .expect_left("TODO(0xaatif): evm_arithemetization accepts an SMT") + .into_state_and_storage(); + GenerationInputs { + txn_number_before: first_txn_ix.into(), + gas_used_before: running_gas_used.into(), + gas_used_after: { + running_gas_used += gas_used; + running_gas_used.into() }, - transactions_trie: transaction.into(), - receipts_trie: receipt.into(), - storage_tries: storage.into_iter().map(|(k, v)| (k, v.into())).collect(), - }, - trie_roots_after: after, - checkpoint_state_trie_root, - checkpoint_consolidated_hash, - contract_code: { - let init_codes = - byte_code - .iter() - .filter_map(|nonempty_txn_bytes| -> Option> { - let tx_envelope = - TxEnvelope::decode(&mut &nonempty_txn_bytes[..]).unwrap(); - match tx_envelope.to() { - TxKind::Create => Some(tx_envelope.input().to_vec()), - TxKind::Call(_address) => None, - } - }); - let mut result = Hash2Code::default(); - result.extend(init_codes); - result.extend(contract_code); - result.extend(code_db.clone()); - result.into_hashmap() - }, - block_metadata: b_meta.clone(), - block_hashes: b_hashes.clone(), - burn_addr, - jumpdest_table: { - // TODO(einar-polygon): - // Note that this causes any batch containing just a single `None` to collapse - // into a `None`, which causing failover to simulating jumpdest analysis for the - // whole batch. There is an optimization opportunity here. - jumpdest_tables - .into_iter() - .collect::>>() - .map(|jdt| JumpDestTableWitness::merge(jdt.iter()).0) - }, + signed_txns: byte_code.clone().into_iter().map(Into::into).collect(), + withdrawals, + ger_data, + tries: TrieInputs { + state_trie: state.into(), + transactions_trie: transaction.into(), + receipts_trie: receipt.into(), + storage_tries: storage.into_iter().map(|(k, v)| (k, v.into())).collect(), + }, + trie_roots_after: after, + checkpoint_state_trie_root, + checkpoint_consolidated_hash, + contract_code: { + let init_codes = + byte_code + .iter() + .filter_map(|nonempty_txn_bytes| -> Option> { + let tx_envelope = + TxEnvelope::decode(&mut &nonempty_txn_bytes[..]).unwrap(); + match tx_envelope.to() { + TxKind::Create => Some(tx_envelope.input().to_vec()), + TxKind::Call(_address) => None, + } + }); + let mut result = Hash2Code::default(); + result.extend(init_codes); + result.extend(contract_code); + result.extend(code_db.clone()); + result.into_hashmap() + }, + block_metadata: b_meta.clone(), + block_hashes: b_hashes.clone(), + burn_addr, + jumpdest_table: { + // TODO(einar-polygon): + // Note that this causes any batch containing just a single `None` to + // collapse into a `None`, which causing failover to + // simulating jumpdest analysis for the whole batch. + // There is an optimization opportunity here. + jumpdest_tables + .into_iter() + .collect::>>() + .map(|jdt| JumpDestTableWitness::merge(jdt.iter()).0) + }, + } }, ) .collect()) @@ -229,11 +230,7 @@ pub fn entrypoint( fn start( pre_images: BlockTraceTriePreImages, wire_disposition: WireDisposition, -) -> anyhow::Result<( - Either, - BTreeMap, - Hash2Code, -)> { +) -> anyhow::Result<(Either, Hash2Code)> { Ok(match pre_images { // TODO(0xaatif): https://github.com/0xPolygonZero/zk_evm/issues/401 // refactor our convoluted input types @@ -241,27 +238,26 @@ fn start( state: SeparateTriePreImage::Direct(state), storage: SeparateStorageTriesPreImage::MultipleTries(storage), }) => { - let state = state.items().try_fold( - StateMpt::default(), - |mut acc, (nibbles, hash_or_val)| { - let path = MptKey::from_nibbles(nibbles); - match hash_or_val { - mpt_trie::trie_ops::ValOrHash::Val(bytes) => { - #[expect(deprecated)] // this is MPT specific - acc.insert_by_hashed_address( - path.into_hash() - .context("invalid path length in direct state trie")?, - rlp::decode(&bytes) - .context("invalid AccountRlp in direct state trie")?, - )?; - } - mpt_trie::trie_ops::ValOrHash::Hash(h) => { - acc.insert_hash_by_key(path, h)?; - } - }; - anyhow::Ok(acc) - }, - )?; + let state = + state + .items() + .try_fold(StateMpt::new(), |mut acc, (nibbles, hash_or_val)| { + let path = MptKey::from_nibbles(nibbles); + match hash_or_val { + mpt_trie::trie_ops::ValOrHash::Val(bytes) => { + acc.insert( + path.into_hash() + .context("invalid path length in direct state trie")?, + rlp::decode(&bytes) + .context("invalid AccountRlp in direct state trie")?, + )?; + } + mpt_trie::trie_ops::ValOrHash::Hash(h) => { + acc.insert_hash(path, h)?; + } + }; + anyhow::Ok(acc) + })?; let storage = storage .into_iter() .map(|(k, SeparateTriePreImage::Direct(v))| { @@ -281,12 +277,15 @@ fn start( .map(|v| (k, v)) }) .collect::>()?; - (Either::Left(state), storage, Hash2Code::new()) + ( + Either::Left(Type1World::new(state, storage)?), + Hash2Code::new(), + ) } BlockTraceTriePreImages::Combined(CombinedPreImages { compact }) => { let instructions = crate::wire::parse(&compact) .context("couldn't parse instructions from binary format")?; - let (state, storage, code) = match wire_disposition { + match wire_disposition { WireDisposition::Type1 => { let crate::type1::Frontend { state, @@ -294,22 +293,19 @@ fn start( code, } = crate::type1::frontend(instructions)?; ( - Either::Left(state), - storage, + Either::Left(Type1World::new(state, storage)?), Hash2Code::from_iter(code.into_iter().map(NonEmpty::into_vec)), ) } WireDisposition::Type2 => { - let crate::type2::Frontend { trie, code } = + let crate::type2::Frontend { world: trie, code } = crate::type2::frontend(instructions)?; ( Either::Right(trie), - BTreeMap::new(), Hash2Code::from_iter(code.into_iter().map(NonEmpty::into_vec)), ) } - }; - (state, storage, code) + } } }) } @@ -413,24 +409,21 @@ impl Batch { /// [`evm_arithmetization::generation::TrieInputs`], /// generic over state trie representation. #[derive(Debug)] -pub struct IntraBlockTries { - pub state: StateTrieT, - pub storage: BTreeMap, +pub struct IntraBlockTries { + pub world: WorldT, pub transaction: TransactionTrie, pub receipt: ReceiptTrie, } impl IntraBlockTries { - fn map(self, mut f: impl FnMut(T) -> U) -> IntraBlockTries { + fn map(self, f: impl FnOnce(T) -> U) -> IntraBlockTries { let Self { - state, - storage, + world, transaction, receipt, } = self; IntraBlockTries { - state: f(state), - storage, + world: f(world), transaction, receipt, } @@ -446,11 +439,9 @@ pub struct FatalMissingCode(pub bool); /// Does the main work mentioned in the [module documentation](super). #[allow(clippy::too_many_arguments)] -fn middle( +fn middle( // state at the beginning of the block - mut state_trie: StateTrieT, - // storage at the beginning of the block - mut storage_tries: BTreeMap, + mut world: WorldT, // None represents a dummy transaction that should not increment the transaction index // all batches SHOULD not be empty batches: Vec>>, @@ -461,25 +452,11 @@ fn middle( mut withdrawals: Vec<(Address, U256)>, fatal_missing_code: FatalMissingCode, // called with the untrimmed tries after each batch - observer: &mut impl Observer, -) -> anyhow::Result>> + observer: &mut impl Observer, +) -> anyhow::Result>> where - StateTrieT::Key: Ord + From
, + WorldT::SubtriePath: Ord + From
, { - // Initialise the storage tries. - for (haddr, acct) in state_trie.iter() { - let storage = storage_tries.entry(haddr).or_insert({ - let mut it = StorageTrie::default(); - it.insert_hash(MptKey::default(), acct.storage_root) - .expect("empty trie insert cannot fail"); - it - }); - ensure!( - storage.root() == acct.storage_root, - "inconsistent initial storage for hashed address {haddr:x}" - ) - } - // These are the per-block tries. let mut transaction_trie = TransactionTrie::new(); let mut receipt_trie = ReceiptTrie::new(); @@ -496,26 +473,24 @@ where let mut batch_contract_code = BTreeSet::from([vec![]]); // always include empty code let mut before = IntraBlockTries { - state: state_trie.clone(), + world: world.clone(), transaction: transaction_trie.clone(), receipt: receipt_trie.clone(), - storage: storage_tries.clone(), }; // We want to perform mask the TrieInputs above, // but won't know the bounds until after the loop below, // so store that information here. let mut storage_masks = BTreeMap::<_, BTreeSet>::new(); - let mut state_mask = BTreeSet::::new(); + let mut state_mask = BTreeSet::::new(); if txn_ix == 0 { do_pre_execution( block, ger_data, - &mut storage_tries, &mut storage_masks, &mut state_mask, - &mut state_trie, + &mut world, )?; } @@ -568,28 +543,12 @@ where .map_err(|e| anyhow!("{e:?}")) .context(format!("couldn't decode receipt in txn {tx_hash:x}"))?; - let (mut acct, born) = state_trie - .get_by_address(addr) - .map(|acct| (acct, false)) - .unwrap_or((AccountRlp::default(), true)); + let born = !world.contains(addr)?; if born { // Empty accounts cannot have non-empty storage, // so we can safely insert a default trie. - storage_tries.insert(keccak_hash::keccak(addr), StorageTrie::default()); - } - - if born || just_access { - state_trie - .clone() - .insert_by_address(addr, acct) - .context(format!( - "couldn't reach state of {} address {addr:x} in txn {tx_hash:x}", - match born { - true => "created", - false => "accessed", - } - ))?; + world.create_storage(addr)? } let do_writes = !just_access @@ -609,71 +568,60 @@ where ); if do_writes { - acct.balance = balance.unwrap_or(acct.balance); - acct.nonce = nonce.unwrap_or(acct.nonce); - acct.code_hash = code_usage - .map(|it| match it { + if let Some(new) = balance { + world.update_balance(addr, |it| *it = new)? + } + if let Some(new) = nonce { + world.update_nonce(addr, |it| *it = new)? + } + if let Some(usage) = code_usage { + match usage { ContractCodeUsage::Read(hash) => { // TODO(Nashtare): https://github.com/0xPolygonZero/zk_evm/issues/700 - // This is a bug in the zero tracer, which shouldn't be giving us - // this read at all. Workaround for now. + // This is a bug in the zero tracer, + // which shouldn't be giving us this read at all. + // Workaround for now. + // The fix should involve removing the `Either` + // below. match (fatal_missing_code, code.get(hash)) { (FatalMissingCode(true), None) => { bail!("no code for hash {hash:x}") } (_, Some(byte_code)) => { + world.set_code(addr, Either::Left(&byte_code))?; batch_contract_code.insert(byte_code); } - (_, None) => { - log::warn!("no code for {hash:x}") - } + (_, None) => world.set_code(addr, Either::Right(hash))?, } - - anyhow::Ok(hash) } ContractCodeUsage::Write(bytes) => { code.insert(bytes.clone()); - let hash = keccak_hash::keccak(&bytes); + world.set_code(addr, Either::Left(&bytes))?; batch_contract_code.insert(bytes); - Ok(hash) } - }) - .transpose()? - .unwrap_or(acct.code_hash); - - if !storage_written.is_empty() { - let storage = match born { - true => storage_tries.entry(keccak_hash::keccak(addr)).or_default(), - false => storage_tries.get_mut(&keccak_hash::keccak(addr)).context( - format!( - "missing storage trie for address {addr:x} in txn {tx_hash:x}" - ), - )?, }; + } + if !storage_written.is_empty() { for (k, v) in storage_written { - let slot = MptKey::from_hash(keccak_hash::keccak(k)); match v.is_zero() { // this is actually a delete - true => storage_mask.extend(storage.reporting_remove(slot)?), - false => { - storage.insert(slot, rlp::encode(&v).to_vec())?; - } + true => storage_mask + .extend(world.reporting_destroy_slot(addr, k.into_uint())?), + false => world.store_int(addr, k.into_uint(), v)?, } } - acct.storage_root = storage.root(); } - state_trie.insert_by_address(addr, acct)?; - state_mask.insert(::from(addr)); + state_mask.insert(::from(addr)); } else { // Simple state access - state_mask.insert(::from(addr)); + state_mask.insert(::from(addr)); } if self_destructed { - storage_tries.remove(&keccak_hash::keccak(addr)); - state_mask.extend(state_trie.reporting_remove(addr)?) + world.destroy_storage(addr)?; + state_mask.extend(world.reporting_destroy(addr)?) } } @@ -693,41 +641,22 @@ where withdrawals: match loop_ix == loop_len { true => { for (addr, amt) in &withdrawals { - state_mask.insert(::from(*addr)); - let mut acct = state_trie - .get_by_address(*addr) - .context(format!("missing address {addr:x} for withdrawal"))?; - acct.balance += *amt; - state_trie - .insert_by_address(*addr, acct) - // TODO(0xaatif): https://github.com/0xPolygonZero/zk_evm/issues/275 - // Add an entry API - .expect("insert must succeed with the same key as a successful `get`"); + state_mask.insert(::from(*addr)); + world.update_balance(*addr, |it| *it += *amt)?; } mem::take(&mut withdrawals) } false => vec![], }, before: { - before.state.mask(state_mask)?; + before.world.mask(state_mask)?; before.receipt.mask(batch_first_txn_ix..txn_ix)?; before.transaction.mask(batch_first_txn_ix..txn_ix)?; - - let keep = storage_masks - .keys() - .map(keccak_hash::keccak) - .collect::>(); - before.storage.retain(|haddr, _| keep.contains(haddr)); - - for (addr, mask) in storage_masks { - if let Some(it) = before.storage.get_mut(&keccak_hash::keccak(addr)) { - it.mask(mask)? - } // else must have self-destructed - } + before.world.mask_storage(storage_masks)?; before }, after: TrieRoots { - state_root: state_trie.root(), + state_root: world.root(), transactions_root: transaction_trie.root(), receipts_root: receipt_trie.root(), }, @@ -737,8 +666,7 @@ where observer.collect_tries( block.block_number, batch_index, - &state_trie, - &storage_tries, + &world, &transaction_trie, &receipt_trie, ) @@ -748,38 +676,29 @@ where } /// Performs all the pre-txn execution rules of the targeted network. -fn do_pre_execution( +fn do_pre_execution( block: &BlockMetadata, ger_data: Option<(H256, H256)>, - storage: &mut BTreeMap, trim_storage: &mut BTreeMap>, - trim_state: &mut BTreeSet, - state_trie: &mut StateTrieT, + trim_state: &mut BTreeSet, + world: &mut WorldT, ) -> anyhow::Result<()> where - StateTrieT::Key: From
+ Ord, + WorldT::SubtriePath: From
+ Ord, { // Ethereum mainnet: EIP-4788 if cfg!(feature = "eth_mainnet") { return do_beacon_hook( block.block_timestamp, - storage, trim_storage, block.parent_beacon_block_root, trim_state, - state_trie, + world, ); } if cfg!(feature = "cdk_erigon") { - return do_scalable_hook( - block, - ger_data, - storage, - trim_storage, - trim_state, - state_trie, - ); + return do_scalable_hook(block, ger_data, trim_storage, trim_state, world); } Ok(()) @@ -790,37 +709,30 @@ where /// /// This is Polygon-CDK-specific, and runs at the start of the block, /// before any transactions (as per the Etrog specification). -fn do_scalable_hook( +fn do_scalable_hook( block: &BlockMetadata, ger_data: Option<(H256, H256)>, - storage: &mut BTreeMap, trim_storage: &mut BTreeMap>, - trim_state: &mut BTreeSet, - state_trie: &mut StateTrieT, + trim_state: &mut BTreeSet, + world: &mut WorldT, ) -> anyhow::Result<()> where - StateTrieT::Key: From
+ Ord, + WorldT::SubtriePath: From
+ Ord, { use evm_arithmetization::testing_utils::{ - ADDRESS_SCALABLE_L2, ADDRESS_SCALABLE_L2_ADDRESS_HASHED, GLOBAL_EXIT_ROOT_ADDRESS, - GLOBAL_EXIT_ROOT_ADDRESS_HASHED, GLOBAL_EXIT_ROOT_STORAGE_POS, LAST_BLOCK_STORAGE_POS, - STATE_ROOT_STORAGE_POS, TIMESTAMP_STORAGE_POS, + ADDRESS_SCALABLE_L2, GLOBAL_EXIT_ROOT_ADDRESS, GLOBAL_EXIT_ROOT_STORAGE_POS, + LAST_BLOCK_STORAGE_POS, STATE_ROOT_STORAGE_POS, TIMESTAMP_STORAGE_POS, }; if block.block_number.is_zero() { return Err(anyhow!("Attempted to prove the Genesis block!")); } - let scalable_storage = storage - .get_mut(&ADDRESS_SCALABLE_L2_ADDRESS_HASHED) - .context("missing scalable contract storage trie")?; let scalable_trim = trim_storage.entry(ADDRESS_SCALABLE_L2).or_default(); - let timestamp_slot_key = MptKey::from_slot_position(U256::from(TIMESTAMP_STORAGE_POS.1)); + let timestamp = world + .load_int(ADDRESS_SCALABLE_L2, U256::from(TIMESTAMP_STORAGE_POS.1)) + .unwrap_or_default(); - let timestamp = scalable_storage - .get(×tamp_slot_key) - .map(rlp::decode::) - .unwrap_or(Ok(0.into()))?; let timestamp = core::cmp::max(timestamp, block.block_timestamp); // Store block number and largest timestamp @@ -831,38 +743,31 @@ where ] { let slot = MptKey::from_slot_position(ix); - // These values are never 0. - scalable_storage.insert(slot, alloy::rlp::encode(u.compat()))?; + ensure!(!u.is_zero()); + world.store_int(ADDRESS_SCALABLE_L2, ix, u)?; scalable_trim.insert(slot); } // Store previous block root hash - let prev_block_root_hash = state_trie.root(); + let prev_block_root_hash = world.root(); let mut arr = [0; 64]; (block.block_number - 1).to_big_endian(&mut arr[0..32]); U256::from(STATE_ROOT_STORAGE_POS.1).to_big_endian(&mut arr[32..64]); let slot = MptKey::from_hash(keccak_hash::keccak(arr)); - scalable_storage.insert(slot, alloy::rlp::encode(prev_block_root_hash.compat()))?; + world.store_hash( + ADDRESS_SCALABLE_L2, + keccak_hash::keccak(arr), + prev_block_root_hash, + )?; + scalable_trim.insert(slot); - trim_state.insert(::from(ADDRESS_SCALABLE_L2)); - let mut scalable_acct = state_trie - .get_by_address(ADDRESS_SCALABLE_L2) - .context("missing scalable contract address")?; - scalable_acct.storage_root = scalable_storage.root(); - state_trie - .insert_by_address(ADDRESS_SCALABLE_L2, scalable_acct) - // TODO(0xaatif): https://github.com/0xPolygonZero/zk_evm/issues/275 - // Add an entry API - .expect("insert must succeed with the same key as a successful `get`"); + trim_state.insert(::from(ADDRESS_SCALABLE_L2)); // Update GER contract's storage if necessary if let Some((root, l1blockhash)) = ger_data { - let ger_storage = storage - .get_mut(&GLOBAL_EXIT_ROOT_ADDRESS_HASHED) - .context("missing GER contract storage trie")?; let ger_trim = trim_storage.entry(GLOBAL_EXIT_ROOT_ADDRESS).or_default(); let mut arr = [0; 64]; @@ -870,19 +775,14 @@ where U256::from(GLOBAL_EXIT_ROOT_STORAGE_POS.1).to_big_endian(&mut arr[32..64]); let slot = MptKey::from_hash(keccak_hash::keccak(arr)); - ger_storage.insert(slot, alloy::rlp::encode(l1blockhash.compat()))?; + world.store_hash( + GLOBAL_EXIT_ROOT_ADDRESS, + keccak_hash::keccak(arr), + l1blockhash, + )?; ger_trim.insert(slot); - trim_state.insert(::from(GLOBAL_EXIT_ROOT_ADDRESS)); - let mut ger_acct = state_trie - .get_by_address(GLOBAL_EXIT_ROOT_ADDRESS) - .context("missing GER contract address")?; - ger_acct.storage_root = ger_storage.root(); - state_trie - .insert_by_address(GLOBAL_EXIT_ROOT_ADDRESS, ger_acct) - // TODO(0xaatif): https://github.com/0xPolygonZero/zk_evm/issues/275 - // Add an entry API - .expect("insert must succeed with the same key as a successful `get`"); + trim_state.insert(::from(GLOBAL_EXIT_ROOT_ADDRESS)); } Ok(()) @@ -893,26 +793,22 @@ where /// /// This is Cancun-specific, and runs at the start of the block, /// before any transactions (as per the EIP). -fn do_beacon_hook( +fn do_beacon_hook( block_timestamp: U256, - storage: &mut BTreeMap, trim_storage: &mut BTreeMap>, parent_beacon_block_root: H256, - trim_state: &mut BTreeSet, - state_trie: &mut StateTrieT, + trim_state: &mut BTreeSet, + world: &mut WorldT, ) -> anyhow::Result<()> where - StateTrieT::Key: From
+ Ord, + WorldT::SubtriePath: From
+ Ord, { use evm_arithmetization::testing_utils::{ - BEACON_ROOTS_CONTRACT_ADDRESS, BEACON_ROOTS_CONTRACT_ADDRESS_HASHED, HISTORY_BUFFER_LENGTH, + BEACON_ROOTS_CONTRACT_ADDRESS, HISTORY_BUFFER_LENGTH, }; let timestamp_idx = block_timestamp % HISTORY_BUFFER_LENGTH.value; let root_idx = timestamp_idx + HISTORY_BUFFER_LENGTH.value; - let beacon_storage = storage - .get_mut(&BEACON_ROOTS_CONTRACT_ADDRESS_HASHED) - .context("missing beacon contract storage trie")?; let beacon_trim = trim_storage .entry(BEACON_ROOTS_CONTRACT_ADDRESS) .or_default(); @@ -928,23 +824,16 @@ where beacon_trim.insert(slot); match u.is_zero() { - true => beacon_trim.extend(beacon_storage.reporting_remove(slot)?), + true => { + beacon_trim.extend(world.reporting_destroy_slot(BEACON_ROOTS_CONTRACT_ADDRESS, ix)?) + } false => { - beacon_storage.insert(slot, alloy::rlp::encode(u.compat()))?; + world.store_int(BEACON_ROOTS_CONTRACT_ADDRESS, ix, u)?; beacon_trim.insert(slot); } } } - trim_state.insert(::from(BEACON_ROOTS_CONTRACT_ADDRESS)); - let mut beacon_acct = state_trie - .get_by_address(BEACON_ROOTS_CONTRACT_ADDRESS) - .context("missing beacon contract address")?; - beacon_acct.storage_root = beacon_storage.root(); - state_trie - .insert_by_address(BEACON_ROOTS_CONTRACT_ADDRESS, beacon_acct) - // TODO(0xaatif): https://github.com/0xPolygonZero/zk_evm/issues/275 - // Add an entry API - .expect("insert must succeed with the same key as a successful `get`"); + trim_state.insert(::from(BEACON_ROOTS_CONTRACT_ADDRESS)); Ok(()) } diff --git a/trace_decoder/src/lib.rs b/trace_decoder/src/lib.rs index aa2f0a0f7..aceefc76e 100644 --- a/trace_decoder/src/lib.rs +++ b/trace_decoder/src/lib.rs @@ -1,8 +1,10 @@ //! An _Ethereum Node_ executes _transactions_ in _blocks_. //! //! Execution mutates two key data structures: -//! - [The state trie](https://ethereum.org/en/developers/docs/data-structures-and-encoding/patricia-merkle-trie/#state-trie). -//! - [The storage tries](https://ethereum.org/en/developers/docs/data-structures-and-encoding/patricia-merkle-trie/#storage-trie). +//! - [The state](https://ethereum.org/en/developers/docs/data-structures-and-encoding/patricia-merkle-trie/#state-trie), +//! which tracks, e.g the account balance. +//! - [The storage](https://ethereum.org/en/developers/docs/data-structures-and-encoding/patricia-merkle-trie/#storage-trie), +//! which is a huge array of integers, per-account. //! //! Ethereum nodes expose information about the transactions over RPC, e.g: //! - [The specific changes to the storage tries](TxnTrace::storage_written). @@ -13,7 +15,8 @@ //! //! **Prover perfomance is a high priority.** //! -//! The aformentioned trie structures may have subtries _hashed out_. +//! The aformentioned data structures are represented as tries, +//! which may have subtries _hashed out_. //! That is, any node (and its children!) may be replaced by its hash, //! while maintaining provability of its contents: //! @@ -44,12 +47,16 @@ /// Over RPC, ethereum nodes expose their tries as a series of binary /// [`wire::Instruction`]s in a node-dependant format. /// -/// These are parsed into the relevant trie depending on the node: +/// These are parsed into the relevant state and storage data structures, +/// depending on the node: /// - [`type2`], which contains an [`smt_trie`]. /// - [`type1`], which contains an [`mpt_trie`]. /// /// After getting the tries, /// we can continue to do the main work of "executing" the transactions. +/// +/// The core of this library is agnostic over the (combined) +/// state and storage representation - see [`world::World`] for more. const _DEVELOPER_DOCS: () = (); mod interface; @@ -60,6 +67,7 @@ mod tries; mod type1; mod type2; mod wire; +mod world; pub use core::{entrypoint, is_precompile, WireDisposition}; diff --git a/trace_decoder/src/observer.rs b/trace_decoder/src/observer.rs index f9811e87c..428cac086 100644 --- a/trace_decoder/src/observer.rs +++ b/trace_decoder/src/observer.rs @@ -1,15 +1,14 @@ -use std::collections::BTreeMap; use std::marker::PhantomData; -use ethereum_types::{H256, U256}; +use ethereum_types::U256; use crate::core::IntraBlockTries; -use crate::tries::{ReceiptTrie, StorageTrie, TransactionTrie}; +use crate::tries::{ReceiptTrie, TransactionTrie}; /// Observer API for the trace decoder. /// Observer is used to collect various debugging and metadata info /// from the trace decoder run. -pub trait Observer { +pub trait Observer { /// Collect tries after the transaction/batch execution. /// /// Passing the arguments one by one through reference, because @@ -19,8 +18,7 @@ pub trait Observer { &mut self, block: U256, batch: usize, - state_trie: &StateTrieT, - storage: &BTreeMap, + state_trie: &WorldT, transaction_trie: &TransactionTrie, receipt_trie: &ReceiptTrie, ); @@ -55,13 +53,12 @@ impl TriesObserver { } } -impl Observer for TriesObserver { +impl Observer for TriesObserver { fn collect_tries( &mut self, block: U256, batch: usize, - state_trie: &StateTrieT, - storage: &BTreeMap, + state_trie: &WorldT, transaction_trie: &TransactionTrie, receipt_trie: &ReceiptTrie, ) { @@ -69,8 +66,7 @@ impl Observer for TriesObserver { block, batch, tries: IntraBlockTries { - state: state_trie.clone(), - storage: storage.clone(), + world: state_trie.clone(), transaction: transaction_trie.clone(), receipt: receipt_trie.clone(), }, @@ -99,13 +95,12 @@ impl DummyObserver { } } -impl Observer for DummyObserver { +impl Observer for DummyObserver { fn collect_tries( &mut self, _block: U256, _batch: usize, - _state_trie: &StateTrieT, - _storage: &BTreeMap, + _state_trie: &WorldT, _transaction_trie: &TransactionTrie, _receipt_trie: &ReceiptTrie, ) { diff --git a/trace_decoder/src/tries.rs b/trace_decoder/src/tries.rs index 91add4d98..7da8d2cfa 100644 --- a/trace_decoder/src/tries.rs +++ b/trace_decoder/src/tries.rs @@ -1,103 +1,18 @@ -//! Principled trie types and abstractions used in this library. +//! Principled trie types used in this library. use core::fmt; -use std::{cmp, collections::BTreeMap, marker::PhantomData}; +use std::cmp; use anyhow::ensure; use bitvec::{array::BitArray, slice::BitSlice}; use copyvec::CopyVec; -use ethereum_types::{Address, BigEndianHash as _, H256, U256}; +use ethereum_types::{Address, H256, U256}; use evm_arithmetization::generation::mpt::AccountRlp; use mpt_trie::partial_trie::{HashedPartialTrie, Node, OnOrphanedHashNode, PartialTrie as _}; use u4::{AsNibbles, U4}; -/// See . -/// -/// Portions of the trie may be _hashed out_: see [`Self::insert_hash`]. -#[derive(Debug, Clone, PartialEq, Eq)] -struct TypedMpt { - inner: HashedPartialTrie, - _ty: PhantomData T>, -} - -impl TypedMpt { - const PANIC_MSG: &str = "T encoding/decoding should round-trip,\ - and only encoded `T`s are ever inserted"; - fn new() -> Self { - Self { - inner: HashedPartialTrie::new(Node::Empty), - _ty: PhantomData, - } - } - /// Insert a node which represents an out-of-band sub-trie. - /// - /// See [module documentation](super) for more. - fn insert_hash(&mut self, key: MptKey, hash: H256) -> anyhow::Result<()> { - self.inner.insert(key.into_nibbles(), hash)?; - Ok(()) - } - /// Returns [`Err`] if the `key` crosses into a part of the trie that - /// is hashed out. - fn insert(&mut self, key: MptKey, value: T) -> anyhow::Result<()> - where - T: rlp::Encodable + rlp::Decodable, - { - self.inner - .insert(key.into_nibbles(), rlp::encode(&value).to_vec())?; - Ok(()) - } - /// Note that this returns [`None`] if `key` crosses into a part of the - /// trie that is hashed out. - /// - /// # Panics - /// - If [`rlp::decode`]-ing for `T` doesn't round-trip. - fn get(&self, key: MptKey) -> Option - where - T: rlp::Decodable, - { - let bytes = self.inner.get(key.into_nibbles())?; - Some(rlp::decode(bytes).expect(Self::PANIC_MSG)) - } - const fn as_hashed_partial_trie(&self) -> &HashedPartialTrie { - &self.inner - } - fn as_mut_hashed_partial_trie_unchecked(&mut self) -> &mut HashedPartialTrie { - &mut self.inner - } - fn root(&self) -> H256 { - self.inner.hash() - } - /// Note that this returns owned paths and items. - fn iter(&self) -> impl Iterator + '_ - where - T: rlp::Decodable, - { - self.inner.keys().filter_map(|nib| { - let path = MptKey::from_nibbles(nib); - Some((path, self.get(path)?)) - }) - } -} - -impl Default for TypedMpt { - fn default() -> Self { - Self::new() - } -} - -impl<'a, T> IntoIterator for &'a TypedMpt -where - T: rlp::Decodable, -{ - type Item = (MptKey, T); - type IntoIter = Box + 'a>; - fn into_iter(self) -> Self::IntoIter { - Box::new(self.iter()) - } -} - /// Bounded sequence of [`U4`], -/// used as a key for [`TypedMpt`]. +/// used as a key for [MPT](HashedPartialTrie) types in this module. /// /// Semantically equivalent to [`mpt_trie::nibbles::Nibbles`]. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] @@ -116,17 +31,6 @@ impl MptKey { pub fn new(components: impl IntoIterator) -> anyhow::Result { Ok(MptKey(CopyVec::try_from_iter(components)?)) } - pub fn into_hash_left_padded(mut self) -> H256 { - for _ in 0..self.0.spare_capacity_mut().len() { - self.0.insert(0, U4::Dec00) - } - let mut packed = [0u8; 32]; - AsNibbles(&mut packed).pack_from_slice(&self.0); - H256::from_slice(&packed) - } - pub fn from_address(address: Address) -> Self { - Self::from_hash(keccak_hash::keccak(address)) - } pub fn from_slot_position(pos: U256) -> Self { let mut bytes = [0; 32]; pos.to_big_endian(&mut bytes); @@ -189,7 +93,7 @@ fn mpt_key_into_hash() { } /// Bounded sequence of bits, -/// used as a key for [`StateSmt`]. +/// used as a key for SMT tries. /// /// Semantically equivalent to [`smt_trie::bits::Bits`]. #[derive(Clone, Copy)] @@ -366,246 +270,90 @@ impl From for HashedPartialTrie { } } -/// TODO(0xaatif): document this after refactoring is done -pub trait StateTrie { - type Key; - fn insert_by_address(&mut self, address: Address, account: AccountRlp) -> anyhow::Result<()>; - fn get_by_address(&self, address: Address) -> Option; - fn reporting_remove(&mut self, address: Address) -> anyhow::Result>; - /// _Hash out_ parts of the trie that aren't in `addresses`. - fn mask(&mut self, address: impl IntoIterator) -> anyhow::Result<()>; - fn iter(&self) -> impl Iterator + '_; - fn root(&self) -> H256; -} - /// Global, [`Address`] `->` [`AccountRlp`]. /// /// See -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone)] pub struct StateMpt { - typed: TypedMpt, + /// Values are always [`rlp`]-encoded [`AccountRlp`], + /// inserted at [256 bits](MptKey::from_hash). + inner: HashedPartialTrie, +} + +impl Default for StateMpt { + fn default() -> Self { + Self::new() + } +} + +#[track_caller] +fn assert_rlp_account(bytes: impl AsRef<[u8]>) -> AccountRlp { + rlp::decode(bytes.as_ref()).expect("invalid RLP in StateMPT") } impl StateMpt { - pub fn new(strategy: OnOrphanedHashNode) -> Self { + pub fn new() -> Self { Self { - typed: TypedMpt { - inner: HashedPartialTrie::new_with_strategy(Node::Empty, strategy), - _ty: PhantomData, - }, + inner: HashedPartialTrie::new_with_strategy( + Node::Empty, + // This frontend is intended to be used with our custom `zeroTracer`, + // which covers branch-to-extension collapse edge cases. + OnOrphanedHashNode::CollapseToExtension, + ), } } - /// Insert a _hashed out_ part of the trie - pub fn insert_hash_by_key(&mut self, key: MptKey, hash: H256) -> anyhow::Result<()> { - self.typed.insert_hash(key, hash) - } - #[deprecated = "prefer operations on `Address` where possible, as SMT support requires this"] - pub fn insert_by_hashed_address( - &mut self, - key: H256, - account: AccountRlp, - ) -> anyhow::Result<()> { - self.typed.insert(MptKey::from_hash(key), account) + pub fn as_hashed_partial_trie(&self) -> &HashedPartialTrie { + &self.inner } - pub fn iter(&self) -> impl Iterator + '_ { - self.typed - .iter() - .map(|(key, rlp)| (key.into_hash().expect("key is always H256"), rlp)) + /// Insert a _hashed out_ part of the trie + pub fn insert_hash(&mut self, key: MptKey, hash: H256) -> anyhow::Result<()> { + Ok(self.inner.insert(key.into_nibbles(), hash)?) } - pub fn as_hashed_partial_trie(&self) -> &mpt_trie::partial_trie::HashedPartialTrie { - self.typed.as_hashed_partial_trie() + pub fn insert(&mut self, key: H256, account: AccountRlp) -> anyhow::Result<()> { + Ok(self.inner.insert( + MptKey::from_hash(key).into_nibbles(), + rlp::encode(&account).to_vec(), + )?) } -} - -impl StateTrie for StateMpt { - type Key = MptKey; - fn insert_by_address(&mut self, address: Address, account: AccountRlp) -> anyhow::Result<()> { - #[expect(deprecated)] - self.insert_by_hashed_address(keccak_hash::keccak(address), account) + pub fn get(&self, key: H256) -> Option { + self.inner + .get(MptKey::from_hash(key).into_nibbles()) + .map(assert_rlp_account) } - fn get_by_address(&self, address: Address) -> Option { - self.typed - .get(MptKey::from_hash(keccak_hash::keccak(address))) + pub fn root(&self) -> H256 { + self.inner.hash() } - /// Delete the account at `address`, returning any remaining branch on - /// collapse - fn reporting_remove(&mut self, address: Address) -> anyhow::Result> { + pub fn reporting_remove(&mut self, address: Address) -> anyhow::Result> { delete_node_and_report_remaining_key_if_branch_collapsed( - self.typed.as_mut_hashed_partial_trie_unchecked(), - MptKey::from_address(address), + &mut self.inner, + MptKey::from_hash(keccak_hash::keccak(address)), ) } - fn mask(&mut self, addresses: impl IntoIterator) -> anyhow::Result<()> { - let inner = mpt_trie::trie_subsets::create_trie_subset( - self.typed.as_hashed_partial_trie(), + pub fn mask(&mut self, addresses: impl IntoIterator) -> anyhow::Result<()> { + let new = mpt_trie::trie_subsets::create_trie_subset( + &self.inner, addresses.into_iter().map(MptKey::into_nibbles), )?; - self.typed = TypedMpt { - inner, - _ty: PhantomData, - }; + self.inner = new; Ok(()) } - fn iter(&self) -> impl Iterator + '_ { - self.typed - .iter() - .map(|(key, rlp)| (key.into_hash().expect("key is always H256"), rlp)) - } - fn root(&self) -> H256 { - self.typed.root() + pub fn iter(&self) -> impl Iterator + '_ { + self.inner.items().filter_map(|(key, rlp)| match rlp { + mpt_trie::trie_ops::ValOrHash::Val(vec) => Some(( + MptKey::from_nibbles(key).into_hash().expect("bad depth"), + assert_rlp_account(vec), + )), + mpt_trie::trie_ops::ValOrHash::Hash(_) => None, + }) } } impl From for HashedPartialTrie { - fn from(value: StateMpt) -> Self { - let StateMpt { - typed: TypedMpt { inner, _ty }, - } = value; + fn from(StateMpt { inner }: StateMpt) -> Self { inner } } -// TODO(0xaatif): https://github.com/0xPolygonZero/zk_evm/issues/706 -// We're covering for [`smt_trie`] in a couple of ways: -// - insertion operations aren't fallible, they just panic. -// - it documents a requirement that `set_hash` is called before `set`. -#[derive(Clone, Debug)] -pub struct StateSmt { - address2state: BTreeMap, - hashed_out: BTreeMap, -} - -impl StateTrie for StateSmt { - type Key = SmtKey; - fn insert_by_address(&mut self, address: Address, account: AccountRlp) -> anyhow::Result<()> { - self.address2state.insert(address, account); - Ok(()) - } - fn get_by_address(&self, address: Address) -> Option { - self.address2state.get(&address).copied() - } - fn reporting_remove(&mut self, address: Address) -> anyhow::Result> { - self.address2state.remove(&address); - Ok(None) - } - fn mask(&mut self, address: impl IntoIterator) -> anyhow::Result<()> { - let _ = address; - Ok(()) - } - fn iter(&self) -> impl Iterator + '_ { - self.address2state - .iter() - .map(|(addr, acct)| (keccak_hash::keccak(addr), *acct)) - } - fn root(&self) -> H256 { - conv_hash::smt2eth(self.as_smt().root) - } -} - -impl StateSmt { - pub(crate) fn new_unchecked( - address2state: BTreeMap, - hashed_out: BTreeMap, - ) -> Self { - Self { - address2state, - hashed_out, - } - } - - fn as_smt(&self) -> smt_trie::smt::Smt { - let Self { - address2state, - hashed_out, - } = self; - let mut smt = smt_trie::smt::Smt::::default(); - for (k, v) in hashed_out { - smt.set_hash(k.into_smt_bits(), conv_hash::eth2smt(*v)); - } - for ( - addr, - AccountRlp { - nonce, - balance, - storage_root, - code_hash, - }, - ) in address2state - { - smt.set(smt_trie::keys::key_nonce(*addr), *nonce); - smt.set(smt_trie::keys::key_balance(*addr), *balance); - smt.set(smt_trie::keys::key_code(*addr), code_hash.into_uint()); - smt.set( - // TODO(0xaatif): https://github.com/0xPolygonZero/zk_evm/issues/707 - // combined abstraction for state and storage - smt_trie::keys::key_storage(*addr, U256::zero()), - storage_root.into_uint(), - ); - } - smt - } -} - -mod conv_hash { - //! We [`u64::to_le_bytes`] because: - //! - Reference go code just puns the bytes: - //! - It's better to fix the endianness for correctness. - //! - Most (consumer) CPUs are little-endian. - - use std::array; - - use ethereum_types::H256; - use itertools::Itertools as _; - use plonky2::{ - field::{ - goldilocks_field::GoldilocksField, - types::{Field as _, PrimeField64}, - }, - hash::hash_types::HashOut, - }; - - /// # Panics - /// - On certain inputs if `debug_assertions` are enabled. See - /// [`GoldilocksField::from_canonical_u64`] for more. - pub fn eth2smt(H256(bytes): H256) -> smt_trie::smt::HashOut { - let mut bytes = bytes.into_iter(); - // (no unsafe, no unstable) - let ret = HashOut { - elements: array::from_fn(|_ix| { - let (a, b, c, d, e, f, g, h) = bytes.next_tuple().unwrap(); - GoldilocksField::from_canonical_u64(u64::from_le_bytes([a, b, c, d, e, f, g, h])) - }), - }; - assert_eq!(bytes.len(), 0); - ret - } - pub fn smt2eth(HashOut { elements }: smt_trie::smt::HashOut) -> H256 { - H256( - build_array::ArrayBuilder::from_iter( - elements - .iter() - .map(GoldilocksField::to_canonical_u64) - .flat_map(u64::to_le_bytes), - ) - .build_exact() - .unwrap(), - ) - } - - #[test] - fn test() { - use plonky2::field::types::Field64 as _; - let mut max = std::iter::repeat(GoldilocksField::ORDER - 1).flat_map(u64::to_le_bytes); - for h in [ - H256::zero(), - H256(array::from_fn(|ix| ix as u8)), - H256(array::from_fn(|_| max.next().unwrap())), - ] { - assert_eq!(smt2eth(eth2smt(h)), h); - } - } -} - /// Global, per-account. /// /// See @@ -622,10 +370,9 @@ impl StorageTrie { pub fn get(&mut self, key: &MptKey) -> Option<&[u8]> { self.untyped.get(key.into_nibbles()) } - pub fn insert(&mut self, key: MptKey, value: Vec) -> anyhow::Result>> { - let prev = self.get(&key).map(Vec::from); + pub fn insert(&mut self, key: MptKey, value: Vec) -> anyhow::Result<()> { self.untyped.insert(key.into_nibbles(), value)?; - Ok(prev) + Ok(()) } pub fn insert_hash(&mut self, key: MptKey, hash: H256) -> anyhow::Result<()> { self.untyped.insert(key.into_nibbles(), hash)?; diff --git a/trace_decoder/src/type1.rs b/trace_decoder/src/type1.rs index c44beaec7..c982a1ab3 100644 --- a/trace_decoder/src/type1.rs +++ b/trace_decoder/src/type1.rs @@ -15,25 +15,13 @@ use u4::U4; use crate::tries::{MptKey, StateMpt, StorageTrie}; use crate::wire::{Instruction, SmtLeaf}; -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Default)] pub struct Frontend { pub state: StateMpt, pub code: BTreeSet>>, pub storage: BTreeMap, } -impl Default for Frontend { - // This frontend is intended to be used with our custom `zeroTracer`, - // which covers branch-to-extension collapse edge cases. - fn default() -> Self { - Self { - state: StateMpt::new(OnOrphanedHashNode::CollapseToExtension), - code: BTreeSet::new(), - storage: BTreeMap::new(), - } - } -} - pub fn frontend(instructions: impl IntoIterator) -> anyhow::Result { let executions = execute(instructions)?; ensure!( @@ -66,7 +54,7 @@ fn visit( Node::Hash(Hash { raw_hash }) => { frontend .state - .insert_hash_by_key(MptKey::new(path.iter().copied())?, raw_hash.into())?; + .insert_hash(MptKey::new(path.iter().copied())?, raw_hash.into())?; } Node::Leaf(Leaf { key, value }) => { let path = MptKey::new(path.iter().copied().chain(key))? @@ -105,8 +93,7 @@ fn visit( } }, }; - #[expect(deprecated)] // this is MPT-specific code - frontend.state.insert_by_hashed_address(path, account)?; + frontend.state.insert(path, account)?; } } } @@ -379,8 +366,6 @@ fn finish_stack(v: &mut Vec) -> anyhow::Result { #[test] fn test_tries() { - use crate::tries::StateTrie as _; - for (ix, case) in serde_json::from_str::>(include_str!("cases/zero_jerigon.json")) .unwrap() @@ -393,7 +378,7 @@ fn test_tries() { assert_eq!(case.expected_state_root, frontend.state.root()); for (haddr, acct) in frontend.state.iter() { - if acct.storage_root != StateMpt::default().root() { + if acct.storage_root != StorageTrie::default().root() { assert!(frontend.storage.contains_key(&haddr)) } } diff --git a/trace_decoder/src/type2.rs b/trace_decoder/src/type2.rs index 44d13e89a..845260d47 100644 --- a/trace_decoder/src/type2.rs +++ b/trace_decoder/src/type2.rs @@ -5,29 +5,19 @@ use std::collections::{BTreeMap, HashSet}; use anyhow::{bail, ensure, Context as _}; use ethereum_types::{Address, U256}; -use evm_arithmetization::generation::mpt::AccountRlp; use itertools::EitherOrBoth; use keccak_hash::H256; use nunny::NonEmpty; use stackstack::Stack; use crate::{ - tries::{SmtKey, StateSmt}, + tries::SmtKey, wire::{Instruction, SmtLeaf, SmtLeafType}, + world::{Type2Entry, Type2World}, }; -/// Combination of all the [`SmtLeaf::node_type`]s -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] -pub struct CollatedLeaf { - pub balance: Option, - pub nonce: Option, - pub code: Option, - pub code_length: Option, - pub storage: BTreeMap, -} - pub struct Frontend { - pub trie: StateSmt, + pub world: Type2World, pub code: HashSet>>, } @@ -36,13 +26,14 @@ pub struct Frontend { /// NOT call this function on untrusted inputs. pub fn frontend(instructions: impl IntoIterator) -> anyhow::Result { let (node, code) = fold(instructions).context("couldn't fold smt from instructions")?; - let trie = node2trie(node).context("couldn't construct trie and collation from folded node")?; - Ok(Frontend { trie, code }) + let world = + node2world(node).context("couldn't construct trie and collation from folded node")?; + Ok(Frontend { world, code }) } /// Node in a binary (SMT) tree. /// -/// This is an intermediary type on the way to [`StateSmt`]. +/// This is an intermediary type on the way to [`Type2World`]. enum Node { Branch(EitherOrBoth>), Hash([u8; 32]), @@ -113,45 +104,16 @@ fn fold1(instructions: impl IntoIterator) -> anyhow::Result< } } -fn node2trie(node: Node) -> anyhow::Result { +fn node2world(node: Node) -> anyhow::Result { let mut hashes = BTreeMap::new(); let mut leaves = BTreeMap::new(); visit(&mut hashes, &mut leaves, Stack::new(), node)?; - Ok(StateSmt::new_unchecked( - leaves - .into_iter() - .map( - |( - addr, - CollatedLeaf { - balance, - nonce, - // TODO(0xaatif): https://github.com/0xPolygonZero/zk_evm/issues/707 - // we shouldn't ignore these fields - code: _, - code_length: _, - storage: _, - }, - )| { - ( - addr, - AccountRlp { - nonce: nonce.unwrap_or_default(), - balance: balance.unwrap_or_default(), - storage_root: H256::zero(), - code_hash: H256::zero(), - }, - ) - }, - ) - .collect(), - hashes, - )) + Ok(Type2World::new_unchecked(leaves, hashes)) } fn visit( hashes: &mut BTreeMap, - leaves: &mut BTreeMap, + leaves: &mut BTreeMap, path: Stack, node: Node, ) -> anyhow::Result<()> { @@ -213,65 +175,7 @@ fn visit( #[test] fn test_tries() { - type Smt = smt_trie::smt::Smt; - use ethereum_types::BigEndianHash as _; - use plonky2::field::types::{Field, Field64 as _}; - - // TODO(0xaatif): https://github.com/0xPolygonZero/zk_evm/issues/707 - // this logic should live in StateSmt, but we need to - // - abstract over state and storage tries - // - parameterize the account types - // we preserve this code as a tested record of how it _should_ - // be done. - fn node2trie(node: Node) -> anyhow::Result { - let mut trie = Smt::default(); - let mut hashes = BTreeMap::new(); - let mut leaves = BTreeMap::new(); - visit(&mut hashes, &mut leaves, Stack::new(), node)?; - for (key, hash) in hashes { - trie.set_hash( - key.into_smt_bits(), - smt_trie::smt::HashOut { - elements: { - let ethereum_types::U256(arr) = hash.into_uint(); - for u in arr { - ensure!(u < smt_trie::smt::F::ORDER); - } - arr.map(smt_trie::smt::F::from_canonical_u64) - }, - }, - ); - } - for ( - addr, - CollatedLeaf { - balance, - nonce, - code, - code_length, - storage, - }, - ) in leaves - { - use smt_trie::keys::{key_balance, key_code, key_code_length, key_nonce, key_storage}; - - for (value, key_fn) in [ - (balance, key_balance as fn(_) -> _), - (nonce, key_nonce), - (code, key_code), - (code_length, key_code_length), - ] { - if let Some(value) = value { - trie.set(key_fn(addr), value); - } - } - for (slot, value) in storage { - trie.set(key_storage(addr, slot), value); - } - } - Ok(trie) - } - + use crate::world::World as _; for (ix, case) in serde_json::from_str::>(include_str!("cases/hermez_cdk_erigon.json")) .unwrap() @@ -279,13 +183,7 @@ fn test_tries() { .enumerate() { println!("case {}", ix); - let instructions = crate::wire::parse(&case.bytes).unwrap(); - let (node, _code) = fold(instructions).unwrap(); - let trie = node2trie(node).unwrap(); - assert_eq!(case.expected_state_root, { - let mut it = [0; 32]; - smt_trie::utils::hashout2u(trie.root).to_big_endian(&mut it); - ethereum_types::H256(it) - }); + let mut frontend = frontend(crate::wire::parse(&case.bytes).unwrap()).unwrap(); + assert_eq!(case.expected_state_root, frontend.world.root()); } } diff --git a/trace_decoder/src/world.rs b/trace_decoder/src/world.rs new file mode 100644 index 000000000..fa68854e4 --- /dev/null +++ b/trace_decoder/src/world.rs @@ -0,0 +1,420 @@ +use std::collections::{BTreeMap, BTreeSet}; + +use alloy_compat::Compat as _; +use anyhow::{ensure, Context as _}; +use either::Either; +use ethereum_types::{Address, BigEndianHash as _, U256}; +use keccak_hash::H256; + +use crate::tries::{MptKey, SmtKey, StateMpt, StorageTrie}; + +/// The [core](crate::core) of this crate is agnostic over state and storage +/// representations. +/// +/// This is the common interface to those data structures. +/// See also [crate::_DEVELOPER_DOCS]. +pub(crate) trait World { + /// (State) subtries may be _hashed out. + /// This type is a key which may identify a subtrie. + type SubtriePath; + + ////////////////////// + /// Account operations + ////////////////////// + + /// Whether the state contains an account at the given address. + /// + /// `false` is not necessarily definitive - the address may belong to a + /// _hashed out_ subtrie. + fn contains(&mut self, address: Address) -> anyhow::Result; + + /// Update the balance for the account at the given address. + /// + /// Creates a new account at `address` if it does not exist. + fn update_balance(&mut self, address: Address, f: impl FnOnce(&mut U256)) + -> anyhow::Result<()>; + + /// Update the nonce for the account at the given address. + /// + /// Creates a new account at `address` if it does not exist. + fn update_nonce(&mut self, address: Address, f: impl FnOnce(&mut U256)) -> anyhow::Result<()>; + + /// Update the code for the account at the given address. + /// + /// Creates a new account at `address` if it does not exist. + fn set_code(&mut self, address: Address, code: Either<&[u8], H256>) -> anyhow::Result<()>; + + /// The [core](crate::core) of this crate tracks required subtries for + /// proving. + /// + /// In case of a state delete, it may be that certain parts of the subtrie + /// must be retained. If so, it will be returned as [`Some`]. + fn reporting_destroy(&mut self, address: Address) -> anyhow::Result>; + + ////////////////////// + /// Storage operations + ////////////////////// + + /// Create an account at the given address. + /// + /// It may not be an error if the address already exists. + fn create_storage(&mut self, address: Address) -> anyhow::Result<()>; + + /// Destroy storage for the given address' account. + fn destroy_storage(&mut self, address: Address) -> anyhow::Result<()>; + + /// Store an integer for the given account at the given `slot`. + fn store_int(&mut self, address: Address, slot: U256, value: U256) -> anyhow::Result<()>; + fn store_hash(&mut self, address: Address, hash: H256, value: H256) -> anyhow::Result<()>; + + /// Load an integer from the given account at the given `slot`. + fn load_int(&mut self, address: Address, slot: U256) -> anyhow::Result; + + /// Delete the given slot from the given account's storage. + /// + /// In case of a delete, it may be that certain parts of the subtrie + /// must be retained. If so, it will be returned as [`Some`]. + fn reporting_destroy_slot( + &mut self, + address: Address, + slot: U256, + ) -> anyhow::Result>; + fn mask_storage(&mut self, masks: BTreeMap>) -> anyhow::Result<()>; + + //////////////////// + /// Other operations + //////////////////// + + /// _Hash out_ parts of the (state) trie that aren't in `paths`. + fn mask(&mut self, paths: impl IntoIterator) -> anyhow::Result<()>; + + /// Return an identifier for the world. + fn root(&mut self) -> H256; +} + +#[derive(Clone, Debug)] +pub struct Type1World { + state: StateMpt, + /// Writes to storage should be reconciled with + /// [`storage_root`](evm_arithmetization::generation::mpt::AccountRlp)s. + storage: BTreeMap, +} + +impl Type1World { + pub fn new(state: StateMpt, mut storage: BTreeMap) -> anyhow::Result { + // Initialise the storage tries. + for (haddr, acct) in state.iter() { + let storage = storage.entry(haddr).or_insert_with(|| { + let mut it = StorageTrie::default(); + it.insert_hash(MptKey::default(), acct.storage_root) + .expect("empty trie insert cannot fail"); + it + }); + ensure!( + storage.root() == acct.storage_root, + "inconsistent initial storage for hashed address {haddr}" + ) + } + Ok(Self { state, storage }) + } + pub fn state_trie(&self) -> &mpt_trie::partial_trie::HashedPartialTrie { + self.state.as_hashed_partial_trie() + } + pub fn into_state_and_storage(self) -> (StateMpt, BTreeMap) { + let Self { state, storage } = self; + (state, storage) + } + fn get_storage_mut(&mut self, address: Address) -> anyhow::Result<&mut StorageTrie> { + self.storage + .get_mut(&keccak_hash::keccak(address)) + .context("no such storage") + } + fn on_storage( + &mut self, + address: Address, + f: impl FnOnce(&mut StorageTrie) -> anyhow::Result, + ) -> anyhow::Result { + let mut acct = self + .state + .get(keccak_hash::keccak(address)) + .context("no such account")?; + let storage = self.get_storage_mut(address)?; + let ret = f(storage)?; + acct.storage_root = storage.root(); + self.state.insert(keccak_hash::keccak(address), acct)?; + Ok(ret) + } +} + +impl World for Type1World { + type SubtriePath = MptKey; + fn contains(&mut self, address: Address) -> anyhow::Result { + Ok(self.state.get(keccak_hash::keccak(address)).is_some()) + } + fn update_balance( + &mut self, + address: Address, + f: impl FnOnce(&mut U256), + ) -> anyhow::Result<()> { + let key = keccak_hash::keccak(address); + let mut acct = self.state.get(key).unwrap_or_default(); + f(&mut acct.balance); + self.state.insert(key, acct) + } + fn update_nonce(&mut self, address: Address, f: impl FnOnce(&mut U256)) -> anyhow::Result<()> { + let key = keccak_hash::keccak(address); + let mut acct = self.state.get(key).unwrap_or_default(); + f(&mut acct.nonce); + self.state.insert(key, acct) + } + fn set_code(&mut self, address: Address, code: Either<&[u8], H256>) -> anyhow::Result<()> { + let key = keccak_hash::keccak(address); + let mut acct = self.state.get(key).unwrap_or_default(); + acct.code_hash = code.right_or_else(keccak_hash::keccak); + self.state.insert(key, acct) + } + fn reporting_destroy(&mut self, address: Address) -> anyhow::Result> { + self.state.reporting_remove(address) + } + fn mask( + &mut self, + addresses: impl IntoIterator, + ) -> anyhow::Result<()> { + self.state.mask(addresses) + } + fn root(&mut self) -> H256 { + self.state.root() + } + fn create_storage(&mut self, address: Address) -> anyhow::Result<()> { + let _clobbered = self + .storage + .insert(keccak_hash::keccak(address), StorageTrie::default()); + // ensure!(_clobbered.is_none()); // TODO(0xaatif): fails our tests + Ok(()) + } + fn destroy_storage(&mut self, address: Address) -> anyhow::Result<()> { + let removed = self.storage.remove(&keccak_hash::keccak(address)); + ensure!(removed.is_some()); + Ok(()) + } + + fn store_int(&mut self, address: Address, slot: U256, value: U256) -> anyhow::Result<()> { + self.on_storage(address, |it| { + it.insert( + MptKey::from_slot_position(slot), + alloy::rlp::encode(value.compat()), + ) + }) + } + + fn store_hash(&mut self, address: Address, hash: H256, value: H256) -> anyhow::Result<()> { + self.on_storage(address, |it| { + it.insert(MptKey::from_hash(hash), alloy::rlp::encode(value.compat())) + }) + } + + fn load_int(&mut self, address: Address, slot: U256) -> anyhow::Result { + let bytes = self + .get_storage_mut(address)? + .get(&MptKey::from_slot_position(slot)) + .context(format!("no storage at slot {slot} for address {address:x}"))?; + Ok(rlp::decode(bytes)?) + } + + fn reporting_destroy_slot( + &mut self, + address: Address, + slot: U256, + ) -> anyhow::Result> { + self.on_storage(address, |it| { + it.reporting_remove(MptKey::from_slot_position(slot)) + }) + } + + fn mask_storage(&mut self, masks: BTreeMap>) -> anyhow::Result<()> { + let keep = masks + .keys() + .map(keccak_hash::keccak) + .collect::>(); + self.storage.retain(|haddr, _| keep.contains(haddr)); + for (addr, mask) in masks { + if let Some(it) = self.storage.get_mut(&keccak_hash::keccak(addr)) { + it.mask(mask)? + } + } + Ok(()) + } +} + +impl World for Type2World { + type SubtriePath = SmtKey; + fn contains(&mut self, address: Address) -> anyhow::Result { + Ok(self.accounts.contains_key(&address)) + } + fn update_balance( + &mut self, + address: Address, + f: impl FnOnce(&mut U256), + ) -> anyhow::Result<()> { + let acct = self.accounts.entry(address).or_default(); + f(acct.balance.get_or_insert(Default::default())); + Ok(()) + } + fn update_nonce(&mut self, address: Address, f: impl FnOnce(&mut U256)) -> anyhow::Result<()> { + let acct = self.accounts.entry(address).or_default(); + f(acct.nonce.get_or_insert(Default::default())); + Ok(()) + } + fn set_code(&mut self, address: Address, code: Either<&[u8], H256>) -> anyhow::Result<()> { + let acct = self.accounts.entry(address).or_default(); + match code { + Either::Left(bytes) => { + acct.code = Some(keccak_hash::keccak(bytes).into_uint()); + acct.code_length = Some(U256::from(bytes.len())) + } + Either::Right(hash) => acct.code = Some(hash.into_uint()), + }; + Ok(()) + } + fn reporting_destroy(&mut self, address: Address) -> anyhow::Result> { + self.accounts.remove(&address); + Ok(None) + } + fn create_storage(&mut self, address: Address) -> anyhow::Result<()> { + let _ = address; + Ok(()) + } + fn destroy_storage(&mut self, address: Address) -> anyhow::Result<()> { + self.accounts + .entry(address) + .and_modify(|it| it.storage.clear()); + Ok(()) + } + fn store_int(&mut self, address: Address, slot: U256, value: U256) -> anyhow::Result<()> { + self.accounts + .entry(address) + .or_default() + .storage + .insert(slot, value); + Ok(()) + } + fn store_hash(&mut self, address: Address, hash: H256, value: H256) -> anyhow::Result<()> { + self.accounts + .entry(address) + .or_default() + .storage + .insert(hash.into_uint(), value.into_uint()); + Ok(()) + } + fn load_int(&mut self, address: Address, slot: U256) -> anyhow::Result { + Ok(self + .accounts + .get(&address) + .context("no account")? + .storage + .get(&slot) + .copied() + .unwrap_or_default()) + } + fn reporting_destroy_slot( + &mut self, + address: Address, + slot: U256, + ) -> anyhow::Result> { + self.accounts.entry(address).and_modify(|it| { + it.storage.remove(&slot); + }); + Ok(None) + } + fn mask_storage(&mut self, masks: BTreeMap>) -> anyhow::Result<()> { + let _ = masks; + Ok(()) + } + fn mask(&mut self, paths: impl IntoIterator) -> anyhow::Result<()> { + let _ = paths; + Ok(()) + } + fn root(&mut self) -> H256 { + let mut it = [0; 32]; + smt_trie::utils::hashout2u(self.as_smt().root).to_big_endian(&mut it); + H256(it) + } +} + +// Having optional fields here is an odd decision, +// but without the distinction, +// the wire tests fail. +// This may be a bug in the SMT library. +#[derive(Default, Clone, Debug)] +pub struct Type2Entry { + pub balance: Option, + pub nonce: Option, + pub code: Option, + pub code_length: Option, + pub storage: BTreeMap, +} + +// This is a buffered version +#[derive(Clone, Debug)] +pub struct Type2World { + accounts: BTreeMap, + hashed_out: BTreeMap, +} + +impl Type2World { + /// # Panics + /// - On untrusted inputs: . + pub fn as_smt(&self) -> smt_trie::smt::Smt { + let mut smt = smt_trie::smt::Smt::::default(); + + for (key, hash) in &self.hashed_out { + smt.set_hash( + key.into_smt_bits(), + smt_trie::smt::HashOut { + elements: { + let ethereum_types::U256(arr) = hash.into_uint(); + arr.map(plonky2::field::goldilocks_field::GoldilocksField) + }, + }, + ); + } + for ( + addr, + Type2Entry { + balance, + nonce, + code, + code_length, + storage, + }, + ) in self.accounts.iter() + { + use smt_trie::keys::{key_balance, key_code, key_code_length, key_nonce, key_storage}; + + for (value, key_fn) in [ + (balance, key_balance as fn(_) -> _), + (nonce, key_nonce), + (code, key_code), + (code_length, key_code_length), + ] { + if let Some(value) = value { + smt.set(key_fn(*addr), *value); + } + } + for (slot, value) in storage { + smt.set(key_storage(*addr, *slot), *value); + } + } + smt + } + + pub fn new_unchecked( + accounts: BTreeMap, + hashed_out: BTreeMap, + ) -> Self { + Self { + accounts, + hashed_out, + } + } +} diff --git a/zero/Cargo.toml b/zero/Cargo.toml index 362125e22..fd8cba2b0 100644 --- a/zero/Cargo.toml +++ b/zero/Cargo.toml @@ -28,6 +28,7 @@ hashbrown.workspace = true hex.workspace = true itertools.workspace = true keccak-hash.workspace = true +lazy-regex = "3.3.0" lru.workspace = true mpt_trie.workspace = true num-traits.workspace = true @@ -35,7 +36,6 @@ once_cell.workspace = true paladin-core.workspace = true plonky2.workspace = true plonky2_maybe_rayon.workspace = true -regex = "1.5.4" rlp.workspace = true ruint = { workspace = true, features = ["num-traits", "primitive-types"] } serde.workspace = true @@ -53,11 +53,13 @@ zk_evm_common.workspace = true [target.'cfg(not(target_env = "msvc"))'.dependencies] jemallocator = "0.5.4" - [build-dependencies] anyhow.workspace = true vergen-git2 = { version = "1.0.0", features = ["build"] } +[dev-dependencies] +mockall = "0.13.0" + [features] default = ["eth_mainnet"] diff --git a/zero/README.md b/zero/README.md index 936a73c91..e320b8d62 100644 --- a/zero/README.md +++ b/zero/README.md @@ -425,13 +425,13 @@ For testing proof generation for blocks, the `testing` branch should be used. If you want to generate a full block proof, you can use `tools/prove_rpc.sh`: ```sh -./prove_rpc.sh +./prove_rpc.sh ``` Which may look like this: ```sh -./prove_rpc.sh 17 18 http://127.0.0.1:8545 jerigon false +./prove_rpc.sh 17 18 http://127.0.0.1:8545 jerigon ``` Which will attempt to generate proofs for blocks `17` & `18` consecutively and incorporate the previous block proof during generation. @@ -439,7 +439,7 @@ Which will attempt to generate proofs for blocks `17` & `18` consecutively and i A few other notes: - Proving blocks is very resource intensive in terms of both CPU and memory. You can also only generate the witness for a block instead (see [Generating Witnesses Only](#generating-witnesses-only)) to significantly reduce the CPU and memory requirements. -- Because incorporating the previous block proof requires a chain of proofs back to the last checkpoint height, you can also disable this requirement by passing `true` for `` (which internally just sets the current checkpoint height to the previous block height). +- Because incorporating the previous block proof requires a chain of proofs back to the last checkpoint height, you must specify a ``. The above example omits this argument which causes the command to treat block `16` as the checkpoint. - When proving multiple blocks concurrently, one may need to increase the system resource usage limit because of the number of RPC connections opened simultaneously, in particular when running a native tracer. For Linux systems, it is recommended to set `ulimit` to 8192. ### Generating Witnesses Only @@ -447,13 +447,13 @@ A few other notes: If you want to test a block without the high CPU & memory requirements that come with creating a full proof, you can instead generate only the witness using `tools/prove_rpc.sh` in the `test_only` mode: ```sh -./prove_rpc.sh test_only +./prove_rpc.sh test_only ``` Filled in: ```sh -./prove_rpc.sh 18299898 18299899 http://34.89.57.138:8545 jerigon true 0 0 test_only +./prove_rpc.sh 18299898 18299899 http://34.89.57.138:8545 jerigon 18299897 0 0 test_only ``` Finally, note that both of these testing scripts force proof generation to be sequential by allowing only one worker. Because of this, this is not a realistic representation of performance but makes the debugging logs much easier to follow. diff --git a/zero/src/bin/leader.rs b/zero/src/bin/leader.rs index 973a180d4..88d9ee60a 100644 --- a/zero/src/bin/leader.rs +++ b/zero/src/bin/leader.rs @@ -5,12 +5,12 @@ use std::sync::Arc; use anyhow::Result; use clap::Parser; use cli::Command; -use client::RpcParams; use paladin::config::Config; use paladin::runtime::Runtime; use tracing::info; use zero::env::load_dotenvy_vars_if_present; use zero::prover::{ProofRuntime, ProverConfig}; +use zero::rpc::retry::build_http_retry_provider; use zero::{ block_interval::BlockInterval, prover_state::persistence::set_circuit_cache_dir_env_if_not_set, }; @@ -104,35 +104,45 @@ async fn main() -> Result<()> { rpc_url, rpc_type, jumpdest_src, - block_interval, - checkpoint_block_number, + checkpoint_block, previous_proof, block_time, + start_block, + end_block, backoff, max_retries, timeout, } => { + // Construct the provider. let previous_proof = get_previous_proof(previous_proof)?; - let block_interval = BlockInterval::new(&block_interval)?; + let retry_provider = build_http_retry_provider(rpc_url.clone(), backoff, max_retries)?; + let cached_provider = Arc::new(zero::provider::CachedProvider::new( + retry_provider, + rpc_type, + )); + // Construct the block interval. + let block_interval = + BlockInterval::new(cached_provider.clone(), start_block, end_block).await?; + + // Convert the checkpoint block to a block number. + let checkpoint_block_number = + BlockInterval::block_to_num(cached_provider.clone(), checkpoint_block).await?; + + // Prove the block interval. info!("Proving interval {block_interval}"); client_main( proof_runtime, - RpcParams { - rpc_url, - rpc_type, - backoff, - max_retries, - block_time, - jumpdest_src, - timeout, - }, + cached_provider, + block_time, block_interval, LeaderConfig { checkpoint_block_number, previous_proof, prover_config, }, + jumpdest_src, + timeout, ) .await?; } diff --git a/zero/src/bin/leader/cli.rs b/zero/src/bin/leader/cli.rs index d21af25a4..bbefee9f4 100644 --- a/zero/src/bin/leader/cli.rs +++ b/zero/src/bin/leader/cli.rs @@ -1,6 +1,7 @@ use std::path::PathBuf; use std::time::Duration; +use alloy::eips::BlockId; use alloy::transports::http::reqwest::Url; use clap::{Parser, Subcommand, ValueEnum, ValueHint}; use zero::parsing::parse_duration; @@ -47,6 +48,7 @@ pub enum WorkerRunMode { Default, } +#[allow(clippy::large_enum_variant)] #[derive(Subcommand)] pub(crate) enum Command { /// Deletes all the previously cached circuits. @@ -73,12 +75,17 @@ pub(crate) enum Command { required = false )] jumpdest_src: JumpdestSrc, - /// The block interval for which to generate a proof. - #[arg(long, short = 'i')] - block_interval: String, - /// The checkpoint block number. - #[arg(short, long, default_value_t = 0)] - checkpoint_block_number: u64, + /// The start of the block range to prove (inclusive). + #[arg(long, short = 's')] + start_block: BlockId, + /// The end of the block range to prove (inclusive). + /// If not provided, leader will work in dynamic mode from `start_block` + /// following head of the blockchain. + #[arg(long, short = 'e')] + end_block: Option, + /// The checkpoint block. + #[arg(short, long, default_value = "0")] + checkpoint_block: BlockId, /// The previous proof output. #[arg(long, short = 'f', value_hint = ValueHint::FilePath)] previous_proof: Option, diff --git a/zero/src/bin/leader/client.rs b/zero/src/bin/leader/client.rs index 0b4c3b95b..619d1c35a 100644 --- a/zero/src/bin/leader/client.rs +++ b/zero/src/bin/leader/client.rs @@ -1,8 +1,9 @@ use std::sync::Arc; use std::time::Duration; +use alloy::providers::Provider; use alloy::rpc::types::{BlockId, BlockNumberOrTag}; -use alloy::transports::http::reqwest::Url; +use alloy::transports::Transport; use anyhow::{anyhow, Result}; use tokio::sync::mpsc; use tracing::info; @@ -10,22 +11,11 @@ use zero::block_interval::{BlockInterval, BlockIntervalStream}; use zero::pre_checks::check_previous_proof_and_checkpoint; use zero::proof_types::GeneratedBlockProof; use zero::prover::{self, BlockProverInput, ProverConfig}; +use zero::provider::CachedProvider; use zero::rpc::{self, JumpdestSrc}; -use zero::rpc::{retry::build_http_retry_provider, RpcType}; use crate::ProofRuntime; -#[derive(Debug)] -pub struct RpcParams { - pub rpc_url: Url, - pub rpc_type: RpcType, - pub backoff: u64, - pub max_retries: u32, - pub block_time: u64, - pub jumpdest_src: JumpdestSrc, - pub timeout: Duration, -} - #[derive(Debug)] pub struct LeaderConfig { pub checkpoint_block_number: u64, @@ -34,24 +24,23 @@ pub struct LeaderConfig { } /// The main function for the client. -pub(crate) async fn client_main( +pub(crate) async fn client_main( proof_runtime: Arc, - rpc_params: RpcParams, + cached_provider: Arc>, + block_time: u64, block_interval: BlockInterval, mut leader_config: LeaderConfig, -) -> Result<()> { + jumpdest_src: JumpdestSrc, + timeout: Duration, +) -> Result<()> +where + ProviderT: Provider + 'static, + TransportT: Transport + Clone, +{ use futures::StreamExt; let test_only = leader_config.prover_config.test_only; - let cached_provider = Arc::new(zero::provider::CachedProvider::new( - build_http_retry_provider( - rpc_params.rpc_url.clone(), - rpc_params.backoff, - rpc_params.max_retries, - )?, - )); - if !test_only { // For actual proof runs, perform a sanity check on the provided inputs. check_previous_proof_and_checkpoint( @@ -79,7 +68,7 @@ pub(crate) async fn client_main( let mut block_interval_stream: BlockIntervalStream = match block_interval { block_interval @ BlockInterval::FollowFrom { .. } => { block_interval - .into_unbounded_stream(cached_provider.clone(), rpc_params.block_time) + .into_unbounded_stream(cached_provider.clone(), block_time) .await? } _ => block_interval.into_bounded_stream()?, @@ -95,9 +84,8 @@ pub(crate) async fn client_main( cached_provider.clone(), block_id, leader_config.checkpoint_block_number, - rpc_params.rpc_type, - rpc_params.jumpdest_src, - rpc_params.timeout, + jumpdest_src, + timeout, ) .await?; block_tx diff --git a/zero/src/bin/rpc.rs b/zero/src/bin/rpc.rs index fa9d9eab1..689d13d33 100644 --- a/zero/src/bin/rpc.rs +++ b/zero/src/bin/rpc.rs @@ -28,7 +28,6 @@ struct FetchParams { pub start_block: u64, pub end_block: u64, pub checkpoint_block_number: Option, - pub rpc_type: RpcType, pub jumpdest_src: JumpdestSrc, pub timeout: Duration, } @@ -118,7 +117,6 @@ where cached_provider.clone(), block_id, checkpoint_block_number, - params.rpc_type, params.jumpdest_src, params.timeout, ) @@ -132,11 +130,12 @@ where impl Cli { /// Execute the cli command. pub async fn execute(self) -> anyhow::Result<()> { - let cached_provider = Arc::new(CachedProvider::new(build_http_retry_provider( + let retry_provider = build_http_retry_provider( self.config.rpc_url.clone(), self.config.backoff, self.config.max_retries, - )?)); + )?; + let cached_provider = Arc::new(CachedProvider::new(retry_provider, self.config.rpc_type)); match self.command { Command::Fetch { @@ -148,7 +147,6 @@ impl Cli { start_block, end_block, checkpoint_block_number, - rpc_type: self.config.rpc_type, jumpdest_src: self.config.jumpdest_src, timeout: self.config.timeout, }; @@ -176,7 +174,6 @@ impl Cli { start_block: block_number, end_block: block_number, checkpoint_block_number: None, - rpc_type: self.config.rpc_type, jumpdest_src: self.config.jumpdest_src, timeout: self.config.timeout, }; diff --git a/zero/src/bin/trie_diff.rs b/zero/src/bin/trie_diff.rs index c211cc528..480441486 100644 --- a/zero/src/bin/trie_diff.rs +++ b/zero/src/bin/trie_diff.rs @@ -20,9 +20,9 @@ use anyhow::Result; use clap::{Parser, ValueHint}; use evm_arithmetization::generation::DebugOutputTries; use futures::{future, TryStreamExt}; +use lazy_regex::regex_captures; use paladin::directive::{Directive, IndexedStream}; use paladin::runtime::Runtime; -use regex::Regex; use trace_decoder::observer::TriesObserver; use tracing::{error, info}; use zero::ops::register; @@ -132,10 +132,11 @@ async fn main() -> Result<()> { { // Try to parse block and batch index from error message. let error_message = e2.to_string(); - let re = Regex::new(r"block:(\d+) batch:(\d+)")?; - if let Some(cap) = re.captures(&error_message) { - let block_number: u64 = cap[1].parse()?; - let batch_index: usize = cap[2].parse()?; + if let Some((_, block_number, block_index)) = + regex_captures!(r"block:(\d+) batch:(\d+)", error_message.as_str()) + { + let block_number: u64 = block_number.parse()?; + let batch_index: usize = block_index.parse()?; let prover_tries = zero::debug_utils::load_tries_from_disk(block_number, batch_index)?; @@ -147,8 +148,8 @@ async fn main() -> Result<()> { &DebugOutputTries { state_trie: observer.data[prover_tries.batch_index] .tries - .state - .as_hashed_partial_trie() + .world + .state_trie() .clone(), transaction_trie: observer.data[prover_tries.batch_index] .tries diff --git a/zero/src/block_interval.rs b/zero/src/block_interval.rs index e424076e0..27d03fe3c 100644 --- a/zero/src/block_interval.rs +++ b/zero/src/block_interval.rs @@ -1,16 +1,52 @@ use std::pin::Pin; use std::sync::Arc; +use std::{future::Future, ops::Range}; -use alloy::primitives::B256; -use alloy::rpc::types::eth::BlockId; -use alloy::{hex, providers::Provider, transports::Transport}; +use alloy::rpc::types::{eth::BlockId, Block}; use anyhow::{anyhow, Result}; use async_stream::try_stream; use futures::Stream; +#[cfg(test)] +use mockall::automock; use tracing::info; -use crate::parsing; -use crate::provider::CachedProvider; +#[cfg_attr(test, automock)] +pub trait BlockIntervalProvider { + fn get_block_by_id( + &self, + block_id: BlockId, + ) -> impl Future>> + Send; + + fn latest_block_number(&self) -> impl Future> + Send; +} + +#[cfg(not(test))] +mod block_interval_provider_impl { + use alloy::providers::Provider; + use alloy::rpc::types::BlockTransactionsKind; + use alloy::transports::Transport; + + use super::{Block, BlockId, BlockIntervalProvider}; + + /// Implements the [`BlockIntervalProvider`] trait for [`Provider`]. + impl BlockIntervalProvider for P + where + T: Transport + Clone, + P: Provider, + { + /// Retrieves block without transaction contents from the provider. + async fn get_block_by_id(&self, block_id: BlockId) -> anyhow::Result> { + Ok(self + .get_block(block_id, BlockTransactionsKind::Hashes) + .await?) + } + + /// Retrieves the latest block number from the provider. + async fn latest_block_number(&self) -> anyhow::Result { + Ok(self.get_block_number().await?) + } + } +} /// The async stream of block numbers. /// The second bool flag indicates if the element is last in the interval. @@ -20,9 +56,9 @@ pub type BlockIntervalStream = Pin), + Range(Range), // Dynamic interval from the start block to the latest network block FollowFrom { // Interval starting block number @@ -31,64 +67,43 @@ pub enum BlockInterval { } impl BlockInterval { - /// Create a new block interval + /// Creates a new block interval. /// - /// A valid block range is of the form: - /// * `block_number` for a single block number - /// * `lhs..rhs`, `lhs..=rhs` as an exclusive/inclusive range - /// * `lhs..` for a range starting from `lhs` to the chain tip. `lhs..=` - /// is also valid format. + /// If end_block is None, the interval is unbounded and will follow from + /// start_block. If start_block == end_block, the interval is a single + /// block. Otherwise, the interval is an inclusive range from start_block to + /// end_block. /// - /// # Example - /// - /// ```rust - /// # use alloy::rpc::types::eth::BlockId; - /// # use zero::block_interval::BlockInterval; - /// assert_eq!(BlockInterval::new("0..10").unwrap(), BlockInterval::Range(0..10)); - /// assert_eq!(BlockInterval::new("0..=10").unwrap(), BlockInterval::Range(0..11)); - /// assert_eq!(BlockInterval::new("32141").unwrap(), BlockInterval::SingleBlockId(BlockId::Number(32141.into()))); - /// assert_eq!(BlockInterval::new("100..").unwrap(), BlockInterval::FollowFrom{start_block: 100}); - /// ``` - pub fn new(s: &str) -> anyhow::Result { - if (s.starts_with("0x") && s.len() == 66) || s.len() == 64 { - // Try to parse hash - let hash = s - .parse::() - .map_err(|_| anyhow!("invalid block hash '{s}'"))?; - return Ok(BlockInterval::SingleBlockId(BlockId::Hash(hash.into()))); - } - - // First we parse for inclusive range and then for exclusive range, - // because both separators start with `..` - if let Ok(range) = parsing::parse_range_inclusive(s) { - Ok(BlockInterval::Range(range)) - } else if let Ok(range) = parsing::parse_range_exclusive(s) { - Ok(BlockInterval::Range(range)) - } - // Now we look for the follow from range - else if s.contains("..") { - let mut split = s.trim().split("..").filter(|s| *s != "=" && !s.is_empty()); + /// end_block is always treated as inclusive because it may have been + /// specified as a block hash. + pub async fn new( + provider: Arc>, + start_block: BlockId, + end_block: Option, + ) -> Result { + // Ensure the start block is a valid block number. + let start_block_num = Self::block_to_num(provider.clone(), start_block).await?; - // Any other character after `..` or `..=` is invalid - if split.clone().count() > 1 { - return Err(anyhow!("invalid block interval range '{s}'")); + // Create the block interval. + match end_block { + // Start and end are the same. + Some(end_block) if end_block == start_block => { + Ok(BlockInterval::SingleBlockId(start_block_num)) } - let num = split - .next() - .map(|num| { - num.parse::() - .map_err(|_| anyhow!("invalid block number '{num}'")) - }) - .ok_or(anyhow!("invalid block interval range '{s}'"))??; - return Ok(BlockInterval::FollowFrom { start_block: num }); - } - // Only single block number is left to try to parse - else { - let num: u64 = s - .trim() - .parse() - .map_err(|_| anyhow!("invalid block interval range '{s}'"))?; - return Ok(BlockInterval::SingleBlockId(BlockId::Number(num.into()))); + // Bounded range provided. + Some(end_block) => { + let end_block_num = Self::block_to_num(provider.clone(), end_block).await?; + if end_block_num <= start_block_num { + return Err(anyhow!( + "invalid block interval range ({start_block_num}..{end_block_num})" + )); + } + Ok(BlockInterval::Range(start_block_num..end_block_num + 1)) + } + // Unbounded range provided. + None => Ok(BlockInterval::FollowFrom { + start_block: start_block_num, + }), } } @@ -96,10 +111,7 @@ impl BlockInterval { /// second bool flag indicates if the element is last in the interval. pub fn into_bounded_stream(self) -> Result { match self { - BlockInterval::SingleBlockId(BlockId::Number(num)) => { - let num = num - .as_number() - .ok_or(anyhow!("invalid block number '{num}'"))?; + BlockInterval::SingleBlockId(num) => { let range = (num..num + 1).map(|it| Ok((it, true))).collect::>(); Ok(Box::pin(futures::stream::iter(range))) @@ -110,42 +122,33 @@ impl BlockInterval { range.last_mut().map(|it| it.as_mut().map(|it| it.1 = true)); Ok(Box::pin(futures::stream::iter(range))) } - _ => Err(anyhow!( + BlockInterval::FollowFrom { .. } => Err(anyhow!( "could not create bounded stream from unbounded follow-from interval", )), } } + /// Returns the start block number of the interval. pub fn get_start_block(&self) -> Result { match self { - BlockInterval::SingleBlockId(BlockId::Number(num)) => { - let num_value = num - .as_number() - .ok_or_else(|| anyhow!("invalid block number '{num}'"))?; - Ok(num_value) // Return the valid block number - } + BlockInterval::SingleBlockId(num) => Ok(*num), BlockInterval::Range(range) => Ok(range.start), BlockInterval::FollowFrom { start_block, .. } => Ok(*start_block), - _ => Err(anyhow!("Unknown BlockInterval variant")), // Handle unknown variants } } /// Convert the block interval into an unbounded async stream of block /// numbers. Query the blockchain node for the latest block number. - pub async fn into_unbounded_stream( + pub async fn into_unbounded_stream( self, - cached_provider: Arc>, + provider: Arc + 'static>, block_time: u64, - ) -> Result - where - ProviderT: Provider + 'static, - TransportT: Transport + Clone, - { + ) -> Result { match self { BlockInterval::FollowFrom { start_block } => Ok(Box::pin(try_stream! { let mut current = start_block; loop { - let last_block_number = cached_provider.get_provider().await?.get_block_number().await.map_err(|e: alloy::transports::RpcError<_>| { + let last_block_number = provider.latest_block_number().await.map_err(|e| { anyhow!("could not retrieve latest block number from the provider: {e}") })?; @@ -166,15 +169,40 @@ impl BlockInterval { )), } } + + /// Converts a [`BlockId`] into a block number by querying the provider. + pub async fn block_to_num( + provider: Arc>, + block: BlockId, + ) -> Result { + let block_num = match block { + // Number already provided + BlockId::Number(num) => num + .as_number() + .ok_or_else(|| anyhow!("invalid block number '{num}'"))?, + + // Hash provided, query the provider for the block number. + BlockId::Hash(hash) => { + let block = provider + .get_block_by_id(BlockId::Hash(hash)) + .await + .map_err(|e| { + anyhow!("could not retrieve block number by hash from the provider: {e}") + })?; + block + .ok_or(anyhow!("block not found {hash}"))? + .header + .number + } + }; + Ok(block_num) + } } impl std::fmt::Display for BlockInterval { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match self { - BlockInterval::SingleBlockId(block_id) => match block_id { - BlockId::Number(it) => f.write_fmt(format_args!("{}", it)), - BlockId::Hash(it) => f.write_fmt(format_args!("0x{}", &hex::encode(it.block_hash))), - }, + BlockInterval::SingleBlockId(num) => f.write_fmt(format_args!("{}", num)), BlockInterval::Range(range) => { write!(f, "{}..{}", range.start, range.end) } @@ -185,92 +213,174 @@ impl std::fmt::Display for BlockInterval { } } -impl std::str::FromStr for BlockInterval { - type Err = anyhow::Error; - - fn from_str(s: &str) -> Result { - BlockInterval::new(s) - } -} - #[cfg(test)] mod test { use alloy::primitives::B256; + use alloy::rpc::types::{Block, Header, Transaction}; + use alloy::transports::BoxTransport; + use mockall::predicate::*; + use MockBlockIntervalProvider; use super::*; - #[test] - fn can_create_block_interval_from_exclusive_range() { + type Mocker = MockBlockIntervalProvider; + + #[tokio::test] + async fn can_create_block_interval_from_inclusive_range() { assert_eq!( - BlockInterval::new("0..10").unwrap(), - BlockInterval::Range(0..10) + BlockInterval::new( + Arc::new(Mocker::new()), + BlockId::from(0), + Some(BlockId::from(10)) + ) + .await + .unwrap(), + BlockInterval::Range(0..11) ); } - #[test] - fn can_create_block_interval_from_inclusive_range() { + #[tokio::test] + async fn can_create_follow_from_block_interval() { assert_eq!( - BlockInterval::new("0..=10").unwrap(), - BlockInterval::Range(0..11) + BlockInterval::new(Arc::new(Mocker::new()), BlockId::from(100), None) + .await + .unwrap(), + BlockInterval::FollowFrom { start_block: 100 } ); } - #[test] - fn can_create_follow_from_block_interval() { + #[tokio::test] + async fn can_create_single_block_interval() { assert_eq!( - BlockInterval::new("100..").unwrap(), - BlockInterval::FollowFrom { start_block: 100 } + BlockInterval::new( + Arc::new(Mocker::new()), + BlockId::from(123415131), + Some(BlockId::from(123415131)) + ) + .await + .unwrap(), + BlockInterval::SingleBlockId(123415131) ); } - #[test] - fn can_create_single_block_interval() { + #[tokio::test] + async fn cannot_create_invalid_range() { assert_eq!( - BlockInterval::new("123415131").unwrap(), - BlockInterval::SingleBlockId(BlockId::Number(123415131.into())) + BlockInterval::new( + Arc::new(Mocker::new()), + BlockId::from(123415131), + Some(BlockId::from(0)) + ) + .await + .unwrap_err() + .to_string(), + anyhow!("invalid block interval range (123415131..0)").to_string() ); } - #[test] - fn new_interval_proper_single_block_error() { + #[tokio::test] + async fn can_create_single_block_interval_from_hash() { + // Mock the block for single block interval. + let mut mock = Mocker::new(); + let block_id = BlockId::Hash( + "0xb51ceca7ba912779ed6721d2b93849758af0d2354683170fb71dead6e439e6cb" + .parse::() + .unwrap() + .into(), + ); + mock_block(&mut mock, block_id, 12345); + + // Create the interval. + let mock = Arc::new(mock); assert_eq!( - BlockInterval::new("113A").err().unwrap().to_string(), - "invalid block interval range '113A'" + BlockInterval::new(mock, block_id, Some(block_id)) + .await + .unwrap(), + BlockInterval::SingleBlockId(12345) ); } - #[test] - fn new_interval_proper_range_error() { + #[tokio::test] + async fn can_create_block_interval_from_inclusive_hash_range() { + // Mock the blocks for the range. + let mut mock = Mocker::new(); + let start_block_id = BlockId::Hash( + "0xb51ceca7ba912779ed6721d2b93849758af0d2354683170fb71dead6e439e6cb" + .parse::() + .unwrap() + .into(), + ); + mock_block(&mut mock, start_block_id, 12345); + let end_block_id = BlockId::Hash( + "0x351ceca7ba912779ed6721d2b93849758af0d2354683170fb71dead6e439e6cb" + .parse::() + .unwrap() + .into(), + ); + mock_block(&mut mock, end_block_id, 12355); + + // Create the interval. + let mock = Arc::new(mock); assert_eq!( - BlockInterval::new("111...156").err().unwrap().to_string(), - "invalid block interval range '111...156'" + BlockInterval::new(mock, start_block_id, Some(end_block_id)) + .await + .unwrap(), + BlockInterval::Range(12345..12356) ); } - #[test] - fn new_interval_parse_block_hash() { + #[tokio::test] + async fn can_create_follow_from_block_interval_hash() { + // Mock a block for range to start from. + let start_block_id = BlockId::Hash( + "0xb51ceca7ba912779ed6721d2b93849758af0d2354683170fb71dead6e439e6cb" + .parse::() + .unwrap() + .into(), + ); + let mut mock = Mocker::new(); + mock_block(&mut mock, start_block_id, 12345); + + // Create the interval. + let mock = Arc::new(mock); assert_eq!( - BlockInterval::new( - "0xb51ceca7ba912779ed6721d2b93849758af0d2354683170fb71dead6e439e6cb" - ) - .unwrap(), - BlockInterval::SingleBlockId(BlockId::Hash( - "0xb51ceca7ba912779ed6721d2b93849758af0d2354683170fb71dead6e439e6cb" - .parse::() - .unwrap() - .into() - )) - ) + BlockInterval::new(mock, start_block_id, None) + .await + .unwrap(), + BlockInterval::FollowFrom { start_block: 12345 } + ); + } + + /// Configures the mock to expect a query for a block by id and return + /// the expected block number. + fn mock_block( + mock: &mut MockBlockIntervalProvider, + query_id: BlockId, + resulting_block_num: u64, + ) { + let mut block: Block = Block::default(); + block.header.number = resulting_block_num; + mock.expect_get_block_by_id() + .with(eq(query_id)) + .returning(move |_| { + let block = block.clone(); + Box::pin(async move { Ok(Some(block)) }) + }); } #[tokio::test] async fn can_into_bounded_stream() { use futures::StreamExt; let mut result = Vec::new(); - let mut stream = BlockInterval::new("1..10") - .unwrap() - .into_bounded_stream() - .unwrap(); + let mut stream = BlockInterval::new( + Arc::new(Mocker::new()), + BlockId::from(1), + Some(BlockId::from(9)), + ) + .await + .unwrap() + .into_bounded_stream() + .unwrap(); while let Some(val) = stream.next().await { result.push(val.unwrap()); } @@ -281,13 +391,4 @@ mod test { expected.last_mut().unwrap().1 = true; assert_eq!(result, expected); } - - #[test] - fn can_create_from_string() { - use std::str::FromStr; - assert_eq!( - &format!("{}", BlockInterval::from_str("0..10").unwrap()), - "0..10" - ); - } } diff --git a/zero/src/parsing.rs b/zero/src/parsing.rs index d22634968..19c49ba83 100644 --- a/zero/src/parsing.rs +++ b/zero/src/parsing.rs @@ -40,19 +40,6 @@ where parse_range_gen(s, "..", false) } -/// Parse an inclusive range from a string. -/// -/// A valid range is of the form `lhs..=rhs`, where `lhs` and `rhs` are numbers. -pub(crate) fn parse_range_inclusive( - s: &str, -) -> Result, RangeParseError> -where - NumberT: Display + FromStr + From + Add, - NumberT::Err: Display, -{ - parse_range_gen(s, "..=", true) -} - pub(crate) fn parse_range_gen( s: &str, separator: SeparatorT, @@ -99,11 +86,6 @@ mod test { assert_eq!(parse_range_exclusive::("0..10"), Ok(0..10)); } - #[test] - fn it_parses_inclusive_ranges() { - assert_eq!(parse_range_inclusive::("0..=10"), Ok(0..11)); - } - #[test] fn it_handles_missing_lhs() { assert_eq!( diff --git a/zero/src/provider.rs b/zero/src/provider.rs index 876cb270c..a2168bbb7 100644 --- a/zero/src/provider.rs +++ b/zero/src/provider.rs @@ -2,14 +2,27 @@ use std::ops::{Deref, DerefMut}; use std::sync::Arc; use alloy::primitives::BlockHash; +use alloy::providers::RootProvider; use alloy::rpc::types::{Block, BlockId, BlockTransactionsKind}; use alloy::{providers::Provider, transports::Transport}; use anyhow::Context; use tokio::sync::{Mutex, Semaphore, SemaphorePermit}; +use crate::rpc::RpcType; + const CACHE_SIZE: usize = 1024; const MAX_NUMBER_OF_PARALLEL_REQUESTS: usize = 128; +impl Provider for CachedProvider +where + T: Transport + Clone, + P: Provider, +{ + fn root(&self) -> &RootProvider { + self.provider.root() + } +} + /// Wrapper around alloy provider to cache blocks and other /// frequently used data. pub struct CachedProvider { @@ -22,6 +35,7 @@ pub struct CachedProvider { blocks_by_number: Arc>>, blocks_by_hash: Arc>>, _phantom: std::marker::PhantomData, + pub rpc_type: RpcType, } pub struct ProviderGuard<'a, ProviderT> { @@ -48,7 +62,7 @@ where ProviderT: Provider, TransportT: Transport + Clone, { - pub fn new(provider: ProviderT) -> Self { + pub fn new(provider: ProviderT, rpc_type: RpcType) -> Self { Self { provider: provider.into(), semaphore: Arc::new(Semaphore::new(MAX_NUMBER_OF_PARALLEL_REQUESTS)), @@ -58,6 +72,7 @@ where blocks_by_hash: Arc::new(Mutex::new(lru::LruCache::new( std::num::NonZero::new(CACHE_SIZE).unwrap(), ))), + rpc_type, _phantom: std::marker::PhantomData, } } diff --git a/zero/src/rpc/jerigon.rs b/zero/src/rpc/jerigon.rs index 6c9f08e5e..9156fb937 100644 --- a/zero/src/rpc/jerigon.rs +++ b/zero/src/rpc/jerigon.rs @@ -60,9 +60,10 @@ where .raw_request::<_, String>("eth_getWitness".into(), vec![target_block_id]) .await?; - let block = cached_provider + let block: Block = cached_provider .get_block(target_block_id, BlockTransactionsKind::Full) - .await?; + .await? + .context("no block")?; let block_jumpdest_table_witnesses: Vec> = match jumpdest_src { JumpdestSrc::ProverSimulation => vec![None; tx_results.len()], diff --git a/zero/src/rpc/mod.rs b/zero/src/rpc/mod.rs index 1fa63089d..ebee7bb07 100644 --- a/zero/src/rpc/mod.rs +++ b/zero/src/rpc/mod.rs @@ -54,7 +54,6 @@ pub async fn block_prover_input( cached_provider: Arc>, block_id: BlockId, checkpoint_block_number: u64, - rpc_type: RpcType, jumpdest_src: JumpdestSrc, fetch_timeout: Duration, ) -> Result @@ -62,7 +61,7 @@ where ProviderT: Provider, TransportT: Transport + Clone, { - match rpc_type { + match cached_provider.rpc_type { RpcType::Jerigon => { jerigon::block_prover_input( cached_provider, @@ -130,8 +129,8 @@ where async move { let block = cached_provider .get_block((block_num as u64).into(), BlockTransactionsKind::Hashes) - .await - .context("couldn't get block")?; + .await? + .ok_or(anyhow!("block not found {block_num}"))?; anyhow::Ok([ (block.header.hash, Some(block_num)), (block.header.parent_hash, previous_block_number), @@ -237,8 +236,8 @@ where { let target_block = cached_provider .get_block(target_block_id, BlockTransactionsKind::Hashes) - .await?; - let target_block_number = target_block.header.number; + .await? + .ok_or(anyhow!("target block not found {}", target_block_id))?; let chain_id = cached_provider.get_provider().await?.get_chain_id().await?; // Grab interval checkpoint block state trie @@ -248,11 +247,15 @@ where BlockTransactionsKind::Hashes, ) .await? + .ok_or(anyhow!( + "checkpoint block not found {}", + checkpoint_block_number + ))? .header .state_root; let prev_hashes = - fetch_previous_block_hashes(cached_provider.clone(), target_block_number).await?; + fetch_previous_block_hashes(cached_provider.clone(), target_block.header.number).await?; let checkpoint_prev_hashes = fetch_previous_block_hashes(cached_provider, checkpoint_block_number + 1) // include the checkpoint block .await? @@ -263,7 +266,7 @@ where b_meta: BlockMetadata { block_beneficiary: target_block.header.miner.compat(), block_timestamp: target_block.header.timestamp.into(), - block_number: target_block_number.into(), + block_number: target_block.header.number.into(), block_difficulty: target_block.header.difficulty.into(), block_random: target_block .header diff --git a/zero/src/rpc/native/mod.rs b/zero/src/rpc/native/mod.rs index 5fa2ff0aa..dda71a85e 100644 --- a/zero/src/rpc/native/mod.rs +++ b/zero/src/rpc/native/mod.rs @@ -55,7 +55,8 @@ where { let block = cached_provider .get_block(block_number, BlockTransactionsKind::Full) - .await?; + .await? + .ok_or(anyhow::anyhow!("block not found {}", block_number))?; let (code_db, txn_info) = txn::process_transactions( &block, diff --git a/zero/src/rpc/native/state.rs b/zero/src/rpc/native/state.rs index 3c37e8cbc..b5b82106a 100644 --- a/zero/src/rpc/native/state.rs +++ b/zero/src/rpc/native/state.rs @@ -35,6 +35,7 @@ where let prev_state_root = cached_provider .get_block((block_number - 1).into(), BlockTransactionsKind::Hashes) .await? + .ok_or(anyhow::anyhow!("block not found {}", block_number - 1))? .header .state_root;