pulp-platform
diff --git a/‎.dockerignore
Lines changed: 2 additions & 0 deletions b/‎.dockerignore
Lines changed: 2 additions & 0 deletions
diff --git a/‎.github/workflows/ci.yml
Lines changed: 3 additions & 1 deletion b/‎.github/workflows/ci.yml
Lines changed: 3 additions & 1 deletion
diff --git a/‎.github/workflows/gitlab-ci.yaml
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/gitlab-ci.yaml
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/workflows/lint.yml
Lines changed: 0 additions & 15 deletions b/‎.github/workflows/lint.yml
Lines changed: 0 additions & 15 deletions
diff --git a/‎.gitignore
Lines changed: 3 additions & 1 deletion b/‎.gitignore
Lines changed: 3 additions & 1 deletion
diff --git a/‎.gitlab-ci.yml
Lines changed: 20 additions & 10 deletions b/‎.gitlab-ci.yml
Lines changed: 20 additions & 10 deletions
diff --git a/‎.gitmodules
Lines changed: 1 addition & 1 deletion b/‎.gitmodules
Lines changed: 1 addition & 1 deletion
diff --git a/‎Bender.yml
Lines changed: 12 additions & 0 deletions b/‎Bender.yml
Lines changed: 12 additions & 0 deletions
diff --git a/‎Makefile
Lines changed: 8 additions & 6 deletions b/‎Makefile
Lines changed: 8 additions & 6 deletions
diff --git a/‎README.md
Lines changed: 42 additions & 7 deletions b/‎README.md
Lines changed: 42 additions & 7 deletions
diff --git a/‎docs/ug/tutorial.md
Lines changed: 92 additions & 6 deletions b/‎docs/ug/tutorial.md
Lines changed: 92 additions & 6 deletions
diff --git a/‎hw/snitch/src/riscv_instr.sv
Lines changed: 4 additions & 0 deletions b/‎hw/snitch/src/riscv_instr.sv
Lines changed: 4 additions & 0 deletions
@@ -1,4 +1,6 @@
 *
 !sw/**/*.py
 !util/**/*.py
+!nonfree/util/*
+!target/snitch_cluster/util/*.py
 !pyproject.toml
@@ -8,6 +8,9 @@
 name: ci
 on: [push]
 
+env:
+  FORCE_COLOR: 1
+
 jobs:
 
   ##########################
@@ -147,7 +150,6 @@ jobs:
         working-directory: target/snitch_cluster
         run: |
           ./util/run.py sw/run.yaml --simulator verilator -j
-          ./util/run.py sw/fdiv.yaml --simulator verilator -j
       - name: Annotate traces
         working-directory: target/snitch_cluster
         run: |
 
@@ -11,12 +11,12 @@ jobs:
   gitlab-ci:
     name: Internal Gitlab CI
     runs-on: ubuntu-22.04
+    # Skip on forks due to missing secrets.
+    if: >
+      github.repository == 'pulp-platform/snitch_cluster'
     steps:
       - name: Check Gitlab CI
         uses: pulp-platform/pulp-actions/gitlab-ci@v2.1.0
-        # Skip on forks due to missing secrets.
-        if: >
-          github.repository == 'pulp-platform/snitch_cluster'
         with:
           domain: iis-git.ee.ethz.ch
           repo: github-mirror/snitch_cluster
 
@@ -121,18 +121,3 @@ jobs:
       - uses: DoozyX/clang-format-lint-action@v0.18.1
         with:
           clangFormatVersion: 10
-
-  ######################
-  # Lint Editor Config #
-  ######################
-  # Detect trailing whitespaces, missing new lines and wrong file encodings.
-  editorconfig-lint:
-    name: Lint Editorconfig
-    runs-on: ubuntu-latest
-    if: >
-      github.event_name != 'pull_request' ||
-      github.event.pull_request.head.repo.full_name != github.repository
-    steps:
-      - uses: actions/checkout@v3
-      - uses: editorconfig-checker/action-editorconfig-checker@main
-      - run: editorconfig-checker
@@ -1,5 +1,7 @@
 # Nonfree repository
-/nonfree/
+/nonfree/**
+!/nonfree/util
+!/nonfree/util/.gitignore
 
 # Dependency files
 /.bender/
 
@@ -129,16 +129,6 @@ snitch-cluster-vsim:
     # Run additional, more extensive tests
     - cd sw/apps/blas/gemm/test && ./test.sh && cd -
     - cd sw/apps/dnn/transpose/test && ./test.sh && cd -
-
-# Tests requiring hardware FDIV unit
-snitch-cluster-fdiv-vsim:
-  needs: [setup]
-  script:
-    - cd target/snitch_cluster
-    - make CFG_OVERRIDE=cfg/fdiv.json sw
-    - make bin/snitch_cluster.vsim
-    - ./util/run.py sw/fdiv.yaml --simulator vsim -j --run-dir runs/vsim
-    # Run additional, more extensive tests
     - cd sw/apps/dnn/flashattention_2/test && ./test.sh && cd -
 
 # Test OmegaNet TCDM interconnect
@@ -159,6 +149,26 @@ snitch-cluster-mchan-vsim:
     - make bin/snitch_cluster.vsim
     - ./util/run.py sw/dma_mchan.yaml --simulator vsim -j --run-dir runs/vsim
 
+# Tests requiring a larger FREP sequencer
+snitch-cluster-frep-xl-vsim:
+  needs: [setup]
+  script:
+    - cd target/snitch_cluster
+    - make CFG_OVERRIDE=cfg/frep_xl.json sw
+    - make bin/snitch_cluster.vsim
+    - ./util/run.py sw/frep_xl.yaml --simulator vsim -j --run-dir runs/vsim
+
+# COPIFT and scalar chaining experiments
+snitch-cluster-copift-sc-vsim:
+  needs: [setup]
+  script:
+    - cd target/snitch_cluster
+    - make CFG_OVERRIDE=experiments/copift/cfg.json bin/snitch_cluster.vsim
+    - cd experiments/copift
+    - ./experiments.py experiments.yaml sw run perf -j
+    - cd ../../experiments/chaining
+    - ./experiments.py experiments.yaml sw run perf -j
+
 ############
 # Non-free #
 ############
 
@@ -1,6 +1,6 @@
 [submodule "sw/deps/riscv-opcodes"]
 	path = sw/deps/riscv-opcodes
-	url = https://github.com/pulp-platform/riscv-opcodes.git
+	url = git@github.com:pulp-platform/riscv-opcodes.git
 [submodule "sw/deps/printf"]
 	path = sw/deps/printf
 	url = https://github.com/mpaland/printf.git
@@ -35,6 +35,7 @@ export_include_dirs:
   - hw/tcdm_interface/include
   - hw/snitch/include
   - hw/snitch_ssr/include
+  - target/snitch_cluster/generated
 
 sources:
   # reqrsp_interface
@@ -186,9 +187,20 @@ sources:
 
   # target/snitch_cluster
   - target: snitch_cluster_wrapper
+    files:
+      - target/snitch_cluster/generated/snitch_cluster_pkg.sv
+  - target: all(snitch_cluster_wrapper, not(postlayout))
     files:
       - target/snitch_cluster/generated/snitch_cluster_wrapper.sv
+  - target: all(snitch_cluster_wrapper, postlayout)
+    files:
+      - nonfree/gf12/fusion/runs/0/out/15/snitch_cluster_wrapper.v
   - target: all(snitch_cluster_wrapper, any(simulation, verilator))
     files:
       - target/snitch_cluster/test/vip_snitch_cluster.sv
       - target/snitch_cluster/test/testharness.sv
+
+  - target: gf12
+    files:
+      - nonfree/gf12/mems/tc_sram.sv
+      - nonfree/gf12/sourcecode/tc_clk.sv
@@ -27,21 +27,23 @@ ROOT = $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
 ############
 
 NONFREE_REMOTE ?= git@iis-git.ee.ethz.ch:pulp-restricted/snitch-cluster-nonfree.git
-NONFREE_COMMIT ?= 35cdb5b03778d3ec52e6d8fa0856ee789489b25a
+NONFREE_COMMIT ?= synth
 NONFREE_DIR = $(ROOT)/nonfree
 
 all: nonfree
 clean: clean-nonfree
 .PHONY: nonfree clean-nonfree
 
-nonfree: $(NONFREE_DIR)
-
-$(NONFREE_DIR):
-	git clone $(NONFREE_REMOTE) $(NONFREE_DIR)
-	cd $(NONFREE_DIR) && git checkout $(NONFREE_COMMIT)
+nonfree:
+	cd $(NONFREE_DIR) && \
+	git init && \
+	git remote add origin $(NONFREE_REMOTE) && \
+	git fetch origin && \
+	git checkout $(NONFREE_COMMIT) -f
 
 clean-nonfree:
 	rm -rf $(NONFREE_DIR)
+	mkdir -p $(NONFREE_DIR)/util && touch $(NONFREE_DIR)/util/.gitignore
 
 -include $(NONFREE_DIR)/Makefile
 
 
@@ -165,19 +165,54 @@ If you use the Snitch cluster or its extensions in your work, you can cite us:
 </details>
 
 <details>
-<summary><b><a href="https://doi.org/10.48550/arXiv.2404.05303">SARIS: Accelerating Stencil Computations on Energy-Efficient RISC-V Compute Clusters with Indirect Stream Registers</a></b></summary>
+<summary><b><a href="https://doi.org/10.1145/3649329.3658494">SARIS: Accelerating Stencil Computations on Energy-Efficient RISC-V Compute Clusters with Indirect Stream Registers</a></b></summary>
 <p>
 
 ```
-@misc{scheffler2024saris,
-  title={SARIS: Accelerating Stencil Computations on Energy-Efficient
-         RISC-V Compute Clusters with Indirect Stream Registers},
+@INPROCEEDINGS{scheffler2024saris,
   author={Paul Scheffler and Luca Colagrande and Luca Benini},
+  title={SARIS: Accelerating Stencil Computations on Energy-Efficient RISC-V Compute Clusters with Indirect Stream Registers},
+  booktitle = {Proceedings of the 61st ACM/IEEE Design Automation Conference},
   year={2024},
-  eprint={2404.05303},
+  doi = {10.1145/3649329.3658494}
+}
+```
+
+</p>
+</details>
+
+<details>
+<summary><b><a href="https://arxiv.org/abs/2503.20590">Dual-Issue Execution of Mixed Integer and Floating-Point Workloads on Energy-Efficient In-Order RISC-V Cores</a></b></summary>
+<p>
+
+```
+@misc{colagrande2025copift,
+  title={Dual-Issue Execution of Mixed Integer and Floating-Point Workloads on Energy-Efficient In-Order RISC-V Cores},
+  author={Luca Colagrande and Luca Benini},
+  year={2025},
+  eprint={2503.20590},
+  archivePrefix={arXiv},
+  primaryClass={cs.AR},
+  url={https://arxiv.org/abs/2503.20590}
+}
+```
+
+</p>
+</details>
+
+<details>
+<summary><b><a href="https://arxiv.org/abs/2503.20609">Late Breaking Results: A RISC-V ISA Extension for Chaining in Scalar Processors</a></b></summary>
+<p>
+
+```
+@misc{colagrande2025chaining,
+  title={Late Breaking Results: A RISC-V ISA Extension for Chaining in Scalar Processors},
+  author={Luca Colagrande and Jayanth Jonnalagadda and Luca Benini},
+  year={2025},
+  eprint={2503.20609},
   archivePrefix={arXiv},
-  primaryClass={cs.MS},
-  url={https://arxiv.org/abs/2404.05303}
+  primaryClass={cs.AR},
+  url={https://arxiv.org/abs/2503.20609}
 }
 ```
 
 
@@ -47,20 +47,20 @@ These commands compile the RTL sources respectively in `work-vlt`, `work-vsim` a
 
 ## Configuring the hardware
 
-The Snitch cluster RTL sources are partly automatically generated from a configuration file provided in `.hjson` format. Several RTL files are templated and use the `.hjson` configuration file as input to fill in the template. An example is [snitch_cluster_wrapper.sv.tpl](https://github.com/pulp-platform/{{ repo }}/blob/{{ branch }}/hw/snitch_cluster/src/snitch_cluster_wrapper.sv.tpl).
+The Snitch cluster RTL sources are partly automatically generated from a configuration file provided in [JSON5](https://json5.org/) format. Several RTL files are templated and use the `.json` configuration file as input to fill in the template. An example is [snitch_cluster_wrapper.sv.tpl](https://github.com/pulp-platform/{{ repo }}/blob/{{ branch }}/hw/snitch_cluster/src/snitch_cluster_wrapper.sv.tpl).
 
-In the [`cfg`](https://github.com/pulp-platform/{{ repo }}/blob/{{ branch }}/target/snitch_cluster/cfg) folder, different configurations are provided. The [`cfg/default.hjson`](https://github.com/pulp-platform/{{ repo }}/blob/{{ branch }}/target/snitch_cluster/cfg/default.hjson) configuration instantiates 8 compute cores + 1 DMA core in the cluster.
+In the [`cfg`](https://github.com/pulp-platform/{{ repo }}/blob/{{ branch }}/target/snitch_cluster/cfg) folder, different configurations are provided. The [`cfg/default.json`](https://github.com/pulp-platform/{{ repo }}/blob/{{ branch }}/target/snitch_cluster/cfg/default.json) configuration instantiates 8 compute cores + 1 DMA core in the cluster.
 
 The command you previously executed automatically generated the RTL sources from the templates, and it implicitly used the default configuration file. In this configuration the FPU is not equipped with a floating-point divide and square-root unit.
-To override the default configuration file, e.g. to use the configuration with FDIV/FSQRT unit, define the following variable when you invoke `make`:
+To override the default configuration file, e.g. to use the omega TCDM interconnect, define the following variable when you invoke `make`:
 ```shell
-make CFG_OVERRIDE=cfg/fdiv.hjson bin/snitch_cluster.vlt
+make CFG_OVERRIDE=cfg/omega.json bin/snitch_cluster.vlt
 ```
 
 If you want to use a custom configuration, just point `CFG_OVERRIDE` to the path of your configuration file.
 
 !!! tip
-    When you override the configuration file on the `make` command-line, the configuration is stored in the `cfg/lru.hjson` file. Successive invocations of `make` will automatically pick up the `cfg/lru.hjson` file. You can therefore omit the `CFG_OVERRIDE` definition in successive commands unless you want to override the least-recently used configuration.
+    When you override the configuration file on the `make` command-line, the configuration is stored in the `cfg/lru.json` file. Successive invocations of `make` will automatically pick up the `cfg/lru.json` file. You can therefore omit the `CFG_OVERRIDE` definition in successive commands unless you want to override the least-recently used configuration.
 
 ## Building the software
 
@@ -132,7 +132,7 @@ bin/snitch_cluster.vsim.gui sw/apps/blas/axpy/build/axpy.elf
 
 ## Debugging and benchmarking
 
-When you run a simulation, every core logs all the instructions it executes in a trace file. The traces are located in the `logs` folder within the simulation directory. Every trace is identified by a hart ID, that is a unique ID for every _hardware thread (hart)_ in a RISC-V system (and since all our cores have a single thread that is a unique ID per core).
+When you run a simulation, every core logs all the instructions it executes in a trace file. The traces are located in the `logs` folder within the _simulation directory_. Every trace is identified by a hart ID, that is a unique ID for every _hardware thread (hart)_ in a RISC-V system (and since all our cores have a single thread that is a unique ID per core).
 
 The simulation dumps the traces in a non-human-readable format with `.dasm` extension. To convert these to a human-readable form run:
 
@@ -328,3 +328,89 @@ As you may have noticed, there is a good deal of code which is independent of th
 It is thus preferable to develop the data generation scripts and Snitch kernels in a shared location, from which multiple platforms can take and include the code. The `sw` directory in the root of this repository was created with this goal in mind. For the AXPY example, shared sources are hosted under the `sw/blas/axpy` directory.
 
 We recommend that you follow this approach also in your own developments for as much of the code which can be reused.
+
+## Implementing the hardware
+
+If you make changes to the hardware, you probably also want to physically implement it to estimate the PPA impact of your modifications. As the physical implementation flow involves proprietary tools licensed under non-disclosure agreements, our physical implementation flow is contained in a separate private git repository. If you are an IIS user, with access to our Gitlab server and IIS machines, you may follow the next instructions to replicate our implementation flow.
+
+Firstly, we need to clone all the sources for the physical flow. The following command takes care of everything for you:
+```shell
+make nonfree
+```
+
+Behind the scenes, it will clone the `snitch-cluster-nonfree` repo under the `nonfree` folder. Let's move into this folder:
+
+```shell
+cd nonfree
+```
+
+Here, you will find a Makefile with a series of convenience targets to launch our flow up to a certain stage: may it be elaboration (`elab`), synthesis (`synth`) or place-and-route (`pnr`). If you can wait long enough you may also launch the entire flow to produce a final optimized post-layout netlist:
+
+```shell
+make post-layout-netlist
+```
+
+This may take as long as a day, or more, depending on your machine's performance. If you previously launched the flow up to a certain stage, you can resume it from that point without restarting from scratch. Just specify the `FIRST_STAGE` flag with the name of the stage you want to start from, e.g.:
+
+```shell
+make FIRST_STAGE=synth-init-opto post-layout-netlist
+```
+
+You will find reports and output files produced by the flow in the `nonfree/gf12/fusion/runs/0/` folder, respectively in the `reports` and `out` subdirectories, separated into individual subdirectories for every stage in the flow. These are all you should need to derive area and timing numbers for your design.
+
+## Running a physical simulation
+
+Once your design is physically implemented, you want to also verify that it works as intended.
+Assuming you used the previous command to get a final optimized post-layout netlist, you can directly build a simulation model out of it. Head back to the main repository, in the `target/snitch_cluster` folder, and build the simulation model with the following flag:
+
+```shell
+make clean-vsim
+make PL_SIM=1 bin/snitch_cluster.vsim
+```
+
+This resembles the commands you've previously seen in section [Building the hardware](#building-the-hardware). In fact, all testbench components are the same, we simply use the added flag to tell [Bender](https://github.com/pulp-platform/bender) to reference the physical netlist in place of the source RTL as a DUT during compilation.
+The `Bender.yml` file in the root of the repository automatically references the final netlist in our flow, but you could replace that with a netlist from an intermediate stage if you do not intend to run the whole flow.
+
+!!! note
+    Make does not track changes in the flags passed to it, so it does not know that it has to update the RTL source list for compilation. To ensure that it is updated, we can delete the compilation script, which was implicitly generated when you last built the simulation model. The first command above achieves this, by deleting all artifacts from the last build with QuestaSim.
+
+Running a physical simulation is then no different from running a functional simulation, so you may continue using the commands introduced in section [Running a simulation](#running-a-simulation).
+
+## Power estimation
+
+During physical implementation, the tools are able to independently generate area and timing numbers. For a complete PPA analysis, you will want to include power estimates as well.
+
+Power numbers are extremely dependent on the switching activity in your circuit, which in turn depends on the stimuli you feed in to your DUT, so you are in charge of providing this information to the tools. The switching activity is typically recorded in the form of a [VCD](https://en.wikipedia.org/wiki/Value_change_dump) file, and can be generated by most RTL simulators.
+
+To do so, set the `VCD_DUMP` flag when building the physical simulation model:
+```shell
+make PL_SIM=1 VCD_DUMP=1 DEBUG=ON bin/snitch_cluster.vsim
+``` 
+
+!!! danger
+    When using QuestaSim for VCD generation, you must build the model with the `DEBUG=ON` flag, to ensure that all nets are preserved during compilation, preventing them from being optimized away. This guarantees that the VCD file contains switching activity for all nets in your circuit. 
+
+When you run a simulation, the simulator will now automatically create a `vcd` subdirectory within the _simulation directory_, where a VCD file is generated.
+
+Most often you are not interested in estimating the power of an entire simulation, but only of a specific section, e.g. while executing a part of a kernel computation.
+You can pass start and end times for VCD recording to the simulation as environment variables:
+
+```shell
+VCD_START=127ns VCD_END=8898ns bin/snitch_cluster.vsim sw/apps/blas/axpy/build/axpy.elf
+```
+
+!!! note
+    Variable assignments must preceed the executable in a shell command to be interpreted as environment variable assignments. Note that environment variables set this way only persist for the current command.
+
+A benefit of RTL simulations is that they are cycle-accurate. You can thus use them as a reference to find the start and end times of interest with the help of the simulation traces (unavailable during physical simulation), and directly apply these to the physical simulation.
+
+With a VCD file at your disposal, you can now estimate the power consumption of your circuit. In the non-free repository, run the following command:
+```shell
+make SIM_DIR=<path_to_simulation_directory> power
+```
+You need to point the command to the _simulation directory_ in which the VCD dump was generated, for it to find the VCD file.
+
+!!! note
+    Since the actual simulation command is run in a different directory, you need to point to the _simulation directory_ using an absolute path.
+
+Once the command terminates, you will find power reports in the `nonfree/gf12/synopsys/reports` folder, from which you can extract relevant power numbers.
@@ -334,6 +334,9 @@ package riscv_instr;
   localparam logic [31:0] SCFGWI             = 32'b?????????????????010000000101011;
   localparam logic [31:0] SCFGR              = 32'b0000000?????00001001?????0101011;
   localparam logic [31:0] SCFGW              = 32'b0000000??????????010000010101011;
+  localparam logic [31:0] FLT_D_COPIFT       = 32'b1010001??????????001?????0101011;
+  localparam logic [31:0] FCVT_D_W_COPIFT    = 32'b110100100000?????????????0101011;
+  localparam logic [31:0] FCVT_D_WU_COPIFT   = 32'b110100100001?????????????0101011;
   localparam logic [31:0] FLH                = 32'b?????????????????001?????0000111;
   localparam logic [31:0] FSH                = 32'b?????????????????001?????0100111;
   localparam logic [31:0] FMADD_H            = 32'b?????10??????????????????1000011;
@@ -1134,6 +1137,7 @@ package riscv_instr;
   localparam logic [11:0] CSR_SSR = 12'h7c0;
   localparam logic [11:0] CSR_FPMODE = 12'h7c1;
   localparam logic [11:0] CSR_BARRIER = 12'h7c2;
+  localparam logic [11:0] CSR_SC = 12'h7c3;
   localparam logic [11:0] CSR_HTIMEDELTAH = 12'h615;
   localparam logic [11:0] CSR_CYCLEH = 12'hc80;
   localparam logic [11:0] CSR_TIMEH = 12'hc81;
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,6 @@`
`1`	`1`	`*`
`2`	`2`	`!sw/*/.py`
`3`	`3`	`!util/*/.py`
	`4`	`+!nonfree/util/*`
	`5`	`+!target/snitch_cluster/util/*.py`
`4`	`6`	`!pyproject.toml`