Skip to content

Commit c176a57

Browse files
collucaBlueonics
andcommitted
treewide: Add COPIFT and scalar chaining paper contributions (#219)
* README.md: Update SARIS citation to peer-reviewed article * docs: Change all .hjson references to .json * sw: Streamline PRNG API * util/sim: Reuse code to get simulations from a list object * target/snitch_cluster: Extend Python package * trace: Format `tstart`, `tend` and `cycles` as integer decimals * cfg: Increment `num_int_outstanding_loads` to 4 * snRuntime: Clean up SSR and add first ISSR API * sw: Accelerate doitgen and GeLU tests * treewide: Remove legacy `editorconfig` * ci: Show internal Gitlab CI as skipped * ci: Enable colors in Github CI * treewide: Add physical implementation flow * treewide: Split cluster wrapper and package * treewide: Add post-layout simulation flow * clustergen: Improve error messages on template rendering * treewide: Use new Snitch LLVM toolchain * treewide: Run fdiv tests on default hardware with `-mno-fdiv` * treewide: Separate tests requiring larger FREP sequencer Prepare for changes in PI estimation test, which will require larger FREP sequencer. * treewide: Add COPIFT extensions * treewide: Test COPIFT extensions * treewide: Add COPIFT experiments * treewide: Add scalar chaining extension * treewide: Add scalar chaining experiments --------- Co-authored-by: Blueonics <jln925@live.com>
1 parent 85cd473 commit c176a57

File tree

126 files changed

+8652
-585
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

126 files changed

+8652
-585
lines changed

.dockerignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
*
22
!sw/**/*.py
33
!util/**/*.py
4+
!nonfree/util/*
5+
!target/snitch_cluster/util/*.py
46
!pyproject.toml

.github/workflows/ci.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
name: ci
99
on: [push]
1010

11+
env:
12+
FORCE_COLOR: 1
13+
1114
jobs:
1215

1316
##########################
@@ -147,7 +150,6 @@ jobs:
147150
working-directory: target/snitch_cluster
148151
run: |
149152
./util/run.py sw/run.yaml --simulator verilator -j
150-
./util/run.py sw/fdiv.yaml --simulator verilator -j
151153
- name: Annotate traces
152154
working-directory: target/snitch_cluster
153155
run: |

.github/workflows/gitlab-ci.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,12 @@ jobs:
1111
gitlab-ci:
1212
name: Internal Gitlab CI
1313
runs-on: ubuntu-22.04
14+
# Skip on forks due to missing secrets.
15+
if: >
16+
github.repository == 'pulp-platform/snitch_cluster'
1417
steps:
1518
- name: Check Gitlab CI
1619
uses: pulp-platform/pulp-actions/gitlab-ci@v2.1.0
17-
# Skip on forks due to missing secrets.
18-
if: >
19-
github.repository == 'pulp-platform/snitch_cluster'
2020
with:
2121
domain: iis-git.ee.ethz.ch
2222
repo: github-mirror/snitch_cluster

.github/workflows/lint.yml

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -121,18 +121,3 @@ jobs:
121121
- uses: DoozyX/clang-format-lint-action@v0.18.1
122122
with:
123123
clangFormatVersion: 10
124-
125-
######################
126-
# Lint Editor Config #
127-
######################
128-
# Detect trailing whitespaces, missing new lines and wrong file encodings.
129-
editorconfig-lint:
130-
name: Lint Editorconfig
131-
runs-on: ubuntu-latest
132-
if: >
133-
github.event_name != 'pull_request' ||
134-
github.event.pull_request.head.repo.full_name != github.repository
135-
steps:
136-
- uses: actions/checkout@v3
137-
- uses: editorconfig-checker/action-editorconfig-checker@main
138-
- run: editorconfig-checker

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# Nonfree repository
2-
/nonfree/
2+
/nonfree/**
3+
!/nonfree/util
4+
!/nonfree/util/.gitignore
35

46
# Dependency files
57
/.bender/

.gitlab-ci.yml

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -129,16 +129,6 @@ snitch-cluster-vsim:
129129
# Run additional, more extensive tests
130130
- cd sw/apps/blas/gemm/test && ./test.sh && cd -
131131
- cd sw/apps/dnn/transpose/test && ./test.sh && cd -
132-
133-
# Tests requiring hardware FDIV unit
134-
snitch-cluster-fdiv-vsim:
135-
needs: [setup]
136-
script:
137-
- cd target/snitch_cluster
138-
- make CFG_OVERRIDE=cfg/fdiv.json sw
139-
- make bin/snitch_cluster.vsim
140-
- ./util/run.py sw/fdiv.yaml --simulator vsim -j --run-dir runs/vsim
141-
# Run additional, more extensive tests
142132
- cd sw/apps/dnn/flashattention_2/test && ./test.sh && cd -
143133

144134
# Test OmegaNet TCDM interconnect
@@ -159,6 +149,26 @@ snitch-cluster-mchan-vsim:
159149
- make bin/snitch_cluster.vsim
160150
- ./util/run.py sw/dma_mchan.yaml --simulator vsim -j --run-dir runs/vsim
161151

152+
# Tests requiring a larger FREP sequencer
153+
snitch-cluster-frep-xl-vsim:
154+
needs: [setup]
155+
script:
156+
- cd target/snitch_cluster
157+
- make CFG_OVERRIDE=cfg/frep_xl.json sw
158+
- make bin/snitch_cluster.vsim
159+
- ./util/run.py sw/frep_xl.yaml --simulator vsim -j --run-dir runs/vsim
160+
161+
# COPIFT and scalar chaining experiments
162+
snitch-cluster-copift-sc-vsim:
163+
needs: [setup]
164+
script:
165+
- cd target/snitch_cluster
166+
- make CFG_OVERRIDE=experiments/copift/cfg.json bin/snitch_cluster.vsim
167+
- cd experiments/copift
168+
- ./experiments.py experiments.yaml sw run perf -j
169+
- cd ../../experiments/chaining
170+
- ./experiments.py experiments.yaml sw run perf -j
171+
162172
############
163173
# Non-free #
164174
############

.gitmodules

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[submodule "sw/deps/riscv-opcodes"]
22
path = sw/deps/riscv-opcodes
3-
url = https://github.com/pulp-platform/riscv-opcodes.git
3+
url = git@github.com:pulp-platform/riscv-opcodes.git
44
[submodule "sw/deps/printf"]
55
path = sw/deps/printf
66
url = https://github.com/mpaland/printf.git

Bender.yml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ export_include_dirs:
3535
- hw/tcdm_interface/include
3636
- hw/snitch/include
3737
- hw/snitch_ssr/include
38+
- target/snitch_cluster/generated
3839

3940
sources:
4041
# reqrsp_interface
@@ -186,9 +187,20 @@ sources:
186187

187188
# target/snitch_cluster
188189
- target: snitch_cluster_wrapper
190+
files:
191+
- target/snitch_cluster/generated/snitch_cluster_pkg.sv
192+
- target: all(snitch_cluster_wrapper, not(postlayout))
189193
files:
190194
- target/snitch_cluster/generated/snitch_cluster_wrapper.sv
195+
- target: all(snitch_cluster_wrapper, postlayout)
196+
files:
197+
- nonfree/gf12/fusion/runs/0/out/15/snitch_cluster_wrapper.v
191198
- target: all(snitch_cluster_wrapper, any(simulation, verilator))
192199
files:
193200
- target/snitch_cluster/test/vip_snitch_cluster.sv
194201
- target/snitch_cluster/test/testharness.sv
202+
203+
- target: gf12
204+
files:
205+
- nonfree/gf12/mems/tc_sram.sv
206+
- nonfree/gf12/sourcecode/tc_clk.sv

Makefile

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,21 +27,23 @@ ROOT = $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
2727
############
2828

2929
NONFREE_REMOTE ?= git@iis-git.ee.ethz.ch:pulp-restricted/snitch-cluster-nonfree.git
30-
NONFREE_COMMIT ?= 35cdb5b03778d3ec52e6d8fa0856ee789489b25a
30+
NONFREE_COMMIT ?= synth
3131
NONFREE_DIR = $(ROOT)/nonfree
3232

3333
all: nonfree
3434
clean: clean-nonfree
3535
.PHONY: nonfree clean-nonfree
3636

37-
nonfree: $(NONFREE_DIR)
38-
39-
$(NONFREE_DIR):
40-
git clone $(NONFREE_REMOTE) $(NONFREE_DIR)
41-
cd $(NONFREE_DIR) && git checkout $(NONFREE_COMMIT)
37+
nonfree:
38+
cd $(NONFREE_DIR) && \
39+
git init && \
40+
git remote add origin $(NONFREE_REMOTE) && \
41+
git fetch origin && \
42+
git checkout $(NONFREE_COMMIT) -f
4243

4344
clean-nonfree:
4445
rm -rf $(NONFREE_DIR)
46+
mkdir -p $(NONFREE_DIR)/util && touch $(NONFREE_DIR)/util/.gitignore
4547

4648
-include $(NONFREE_DIR)/Makefile
4749

README.md

Lines changed: 42 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -165,19 +165,54 @@ If you use the Snitch cluster or its extensions in your work, you can cite us:
165165
</details>
166166

167167
<details>
168-
<summary><b><a href="https://doi.org/10.48550/arXiv.2404.05303">SARIS: Accelerating Stencil Computations on Energy-Efficient RISC-V Compute Clusters with Indirect Stream Registers</a></b></summary>
168+
<summary><b><a href="https://doi.org/10.1145/3649329.3658494">SARIS: Accelerating Stencil Computations on Energy-Efficient RISC-V Compute Clusters with Indirect Stream Registers</a></b></summary>
169169
<p>
170170

171171
```
172-
@misc{scheffler2024saris,
173-
title={SARIS: Accelerating Stencil Computations on Energy-Efficient
174-
RISC-V Compute Clusters with Indirect Stream Registers},
172+
@INPROCEEDINGS{scheffler2024saris,
175173
author={Paul Scheffler and Luca Colagrande and Luca Benini},
174+
title={SARIS: Accelerating Stencil Computations on Energy-Efficient RISC-V Compute Clusters with Indirect Stream Registers},
175+
booktitle = {Proceedings of the 61st ACM/IEEE Design Automation Conference},
176176
year={2024},
177-
eprint={2404.05303},
177+
doi = {10.1145/3649329.3658494}
178+
}
179+
```
180+
181+
</p>
182+
</details>
183+
184+
<details>
185+
<summary><b><a href="https://arxiv.org/abs/2503.20590">Dual-Issue Execution of Mixed Integer and Floating-Point Workloads on Energy-Efficient In-Order RISC-V Cores</a></b></summary>
186+
<p>
187+
188+
```
189+
@misc{colagrande2025copift,
190+
title={Dual-Issue Execution of Mixed Integer and Floating-Point Workloads on Energy-Efficient In-Order RISC-V Cores},
191+
author={Luca Colagrande and Luca Benini},
192+
year={2025},
193+
eprint={2503.20590},
194+
archivePrefix={arXiv},
195+
primaryClass={cs.AR},
196+
url={https://arxiv.org/abs/2503.20590}
197+
}
198+
```
199+
200+
</p>
201+
</details>
202+
203+
<details>
204+
<summary><b><a href="https://arxiv.org/abs/2503.20609">Late Breaking Results: A RISC-V ISA Extension for Chaining in Scalar Processors</a></b></summary>
205+
<p>
206+
207+
```
208+
@misc{colagrande2025chaining,
209+
title={Late Breaking Results: A RISC-V ISA Extension for Chaining in Scalar Processors},
210+
author={Luca Colagrande and Jayanth Jonnalagadda and Luca Benini},
211+
year={2025},
212+
eprint={2503.20609},
178213
archivePrefix={arXiv},
179-
primaryClass={cs.MS},
180-
url={https://arxiv.org/abs/2404.05303}
214+
primaryClass={cs.AR},
215+
url={https://arxiv.org/abs/2503.20609}
181216
}
182217
```
183218

docs/ug/tutorial.md

Lines changed: 92 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,20 +47,20 @@ These commands compile the RTL sources respectively in `work-vlt`, `work-vsim` a
4747

4848
## Configuring the hardware
4949

50-
The Snitch cluster RTL sources are partly automatically generated from a configuration file provided in `.hjson` format. Several RTL files are templated and use the `.hjson` configuration file as input to fill in the template. An example is [snitch_cluster_wrapper.sv.tpl](https://github.com/pulp-platform/{{ repo }}/blob/{{ branch }}/hw/snitch_cluster/src/snitch_cluster_wrapper.sv.tpl).
50+
The Snitch cluster RTL sources are partly automatically generated from a configuration file provided in [JSON5](https://json5.org/) format. Several RTL files are templated and use the `.json` configuration file as input to fill in the template. An example is [snitch_cluster_wrapper.sv.tpl](https://github.com/pulp-platform/{{ repo }}/blob/{{ branch }}/hw/snitch_cluster/src/snitch_cluster_wrapper.sv.tpl).
5151

52-
In the [`cfg`](https://github.com/pulp-platform/{{ repo }}/blob/{{ branch }}/target/snitch_cluster/cfg) folder, different configurations are provided. The [`cfg/default.hjson`](https://github.com/pulp-platform/{{ repo }}/blob/{{ branch }}/target/snitch_cluster/cfg/default.hjson) configuration instantiates 8 compute cores + 1 DMA core in the cluster.
52+
In the [`cfg`](https://github.com/pulp-platform/{{ repo }}/blob/{{ branch }}/target/snitch_cluster/cfg) folder, different configurations are provided. The [`cfg/default.json`](https://github.com/pulp-platform/{{ repo }}/blob/{{ branch }}/target/snitch_cluster/cfg/default.json) configuration instantiates 8 compute cores + 1 DMA core in the cluster.
5353

5454
The command you previously executed automatically generated the RTL sources from the templates, and it implicitly used the default configuration file. In this configuration the FPU is not equipped with a floating-point divide and square-root unit.
55-
To override the default configuration file, e.g. to use the configuration with FDIV/FSQRT unit, define the following variable when you invoke `make`:
55+
To override the default configuration file, e.g. to use the omega TCDM interconnect, define the following variable when you invoke `make`:
5656
```shell
57-
make CFG_OVERRIDE=cfg/fdiv.hjson bin/snitch_cluster.vlt
57+
make CFG_OVERRIDE=cfg/omega.json bin/snitch_cluster.vlt
5858
```
5959

6060
If you want to use a custom configuration, just point `CFG_OVERRIDE` to the path of your configuration file.
6161

6262
!!! tip
63-
When you override the configuration file on the `make` command-line, the configuration is stored in the `cfg/lru.hjson` file. Successive invocations of `make` will automatically pick up the `cfg/lru.hjson` file. You can therefore omit the `CFG_OVERRIDE` definition in successive commands unless you want to override the least-recently used configuration.
63+
When you override the configuration file on the `make` command-line, the configuration is stored in the `cfg/lru.json` file. Successive invocations of `make` will automatically pick up the `cfg/lru.json` file. You can therefore omit the `CFG_OVERRIDE` definition in successive commands unless you want to override the least-recently used configuration.
6464

6565
## Building the software
6666

@@ -132,7 +132,7 @@ bin/snitch_cluster.vsim.gui sw/apps/blas/axpy/build/axpy.elf
132132

133133
## Debugging and benchmarking
134134

135-
When you run a simulation, every core logs all the instructions it executes in a trace file. The traces are located in the `logs` folder within the simulation directory. Every trace is identified by a hart ID, that is a unique ID for every _hardware thread (hart)_ in a RISC-V system (and since all our cores have a single thread that is a unique ID per core).
135+
When you run a simulation, every core logs all the instructions it executes in a trace file. The traces are located in the `logs` folder within the _simulation directory_. Every trace is identified by a hart ID, that is a unique ID for every _hardware thread (hart)_ in a RISC-V system (and since all our cores have a single thread that is a unique ID per core).
136136

137137
The simulation dumps the traces in a non-human-readable format with `.dasm` extension. To convert these to a human-readable form run:
138138

@@ -328,3 +328,89 @@ As you may have noticed, there is a good deal of code which is independent of th
328328
It is thus preferable to develop the data generation scripts and Snitch kernels in a shared location, from which multiple platforms can take and include the code. The `sw` directory in the root of this repository was created with this goal in mind. For the AXPY example, shared sources are hosted under the `sw/blas/axpy` directory.
329329

330330
We recommend that you follow this approach also in your own developments for as much of the code which can be reused.
331+
332+
## Implementing the hardware
333+
334+
If you make changes to the hardware, you probably also want to physically implement it to estimate the PPA impact of your modifications. As the physical implementation flow involves proprietary tools licensed under non-disclosure agreements, our physical implementation flow is contained in a separate private git repository. If you are an IIS user, with access to our Gitlab server and IIS machines, you may follow the next instructions to replicate our implementation flow.
335+
336+
Firstly, we need to clone all the sources for the physical flow. The following command takes care of everything for you:
337+
```shell
338+
make nonfree
339+
```
340+
341+
Behind the scenes, it will clone the `snitch-cluster-nonfree` repo under the `nonfree` folder. Let's move into this folder:
342+
343+
```shell
344+
cd nonfree
345+
```
346+
347+
Here, you will find a Makefile with a series of convenience targets to launch our flow up to a certain stage: may it be elaboration (`elab`), synthesis (`synth`) or place-and-route (`pnr`). If you can wait long enough you may also launch the entire flow to produce a final optimized post-layout netlist:
348+
349+
```shell
350+
make post-layout-netlist
351+
```
352+
353+
This may take as long as a day, or more, depending on your machine's performance. If you previously launched the flow up to a certain stage, you can resume it from that point without restarting from scratch. Just specify the `FIRST_STAGE` flag with the name of the stage you want to start from, e.g.:
354+
355+
```shell
356+
make FIRST_STAGE=synth-init-opto post-layout-netlist
357+
```
358+
359+
You will find reports and output files produced by the flow in the `nonfree/gf12/fusion/runs/0/` folder, respectively in the `reports` and `out` subdirectories, separated into individual subdirectories for every stage in the flow. These are all you should need to derive area and timing numbers for your design.
360+
361+
## Running a physical simulation
362+
363+
Once your design is physically implemented, you want to also verify that it works as intended.
364+
Assuming you used the previous command to get a final optimized post-layout netlist, you can directly build a simulation model out of it. Head back to the main repository, in the `target/snitch_cluster` folder, and build the simulation model with the following flag:
365+
366+
```shell
367+
make clean-vsim
368+
make PL_SIM=1 bin/snitch_cluster.vsim
369+
```
370+
371+
This resembles the commands you've previously seen in section [Building the hardware](#building-the-hardware). In fact, all testbench components are the same, we simply use the added flag to tell [Bender](https://github.com/pulp-platform/bender) to reference the physical netlist in place of the source RTL as a DUT during compilation.
372+
The `Bender.yml` file in the root of the repository automatically references the final netlist in our flow, but you could replace that with a netlist from an intermediate stage if you do not intend to run the whole flow.
373+
374+
!!! note
375+
Make does not track changes in the flags passed to it, so it does not know that it has to update the RTL source list for compilation. To ensure that it is updated, we can delete the compilation script, which was implicitly generated when you last built the simulation model. The first command above achieves this, by deleting all artifacts from the last build with QuestaSim.
376+
377+
Running a physical simulation is then no different from running a functional simulation, so you may continue using the commands introduced in section [Running a simulation](#running-a-simulation).
378+
379+
## Power estimation
380+
381+
During physical implementation, the tools are able to independently generate area and timing numbers. For a complete PPA analysis, you will want to include power estimates as well.
382+
383+
Power numbers are extremely dependent on the switching activity in your circuit, which in turn depends on the stimuli you feed in to your DUT, so you are in charge of providing this information to the tools. The switching activity is typically recorded in the form of a [VCD](https://en.wikipedia.org/wiki/Value_change_dump) file, and can be generated by most RTL simulators.
384+
385+
To do so, set the `VCD_DUMP` flag when building the physical simulation model:
386+
```shell
387+
make PL_SIM=1 VCD_DUMP=1 DEBUG=ON bin/snitch_cluster.vsim
388+
```
389+
390+
!!! danger
391+
When using QuestaSim for VCD generation, you must build the model with the `DEBUG=ON` flag, to ensure that all nets are preserved during compilation, preventing them from being optimized away. This guarantees that the VCD file contains switching activity for all nets in your circuit.
392+
393+
When you run a simulation, the simulator will now automatically create a `vcd` subdirectory within the _simulation directory_, where a VCD file is generated.
394+
395+
Most often you are not interested in estimating the power of an entire simulation, but only of a specific section, e.g. while executing a part of a kernel computation.
396+
You can pass start and end times for VCD recording to the simulation as environment variables:
397+
398+
```shell
399+
VCD_START=127ns VCD_END=8898ns bin/snitch_cluster.vsim sw/apps/blas/axpy/build/axpy.elf
400+
```
401+
402+
!!! note
403+
Variable assignments must preceed the executable in a shell command to be interpreted as environment variable assignments. Note that environment variables set this way only persist for the current command.
404+
405+
A benefit of RTL simulations is that they are cycle-accurate. You can thus use them as a reference to find the start and end times of interest with the help of the simulation traces (unavailable during physical simulation), and directly apply these to the physical simulation.
406+
407+
With a VCD file at your disposal, you can now estimate the power consumption of your circuit. In the non-free repository, run the following command:
408+
```shell
409+
make SIM_DIR=<path_to_simulation_directory> power
410+
```
411+
You need to point the command to the _simulation directory_ in which the VCD dump was generated, for it to find the VCD file.
412+
413+
!!! note
414+
Since the actual simulation command is run in a different directory, you need to point to the _simulation directory_ using an absolute path.
415+
416+
Once the command terminates, you will find power reports in the `nonfree/gf12/synopsys/reports` folder, from which you can extract relevant power numbers.

hw/snitch/src/riscv_instr.sv

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,9 @@ package riscv_instr;
334334
localparam logic [31:0] SCFGWI = 32'b?????????????????010000000101011;
335335
localparam logic [31:0] SCFGR = 32'b0000000?????00001001?????0101011;
336336
localparam logic [31:0] SCFGW = 32'b0000000??????????010000010101011;
337+
localparam logic [31:0] FLT_D_COPIFT = 32'b1010001??????????001?????0101011;
338+
localparam logic [31:0] FCVT_D_W_COPIFT = 32'b110100100000?????????????0101011;
339+
localparam logic [31:0] FCVT_D_WU_COPIFT = 32'b110100100001?????????????0101011;
337340
localparam logic [31:0] FLH = 32'b?????????????????001?????0000111;
338341
localparam logic [31:0] FSH = 32'b?????????????????001?????0100111;
339342
localparam logic [31:0] FMADD_H = 32'b?????10??????????????????1000011;
@@ -1134,6 +1137,7 @@ package riscv_instr;
11341137
localparam logic [11:0] CSR_SSR = 12'h7c0;
11351138
localparam logic [11:0] CSR_FPMODE = 12'h7c1;
11361139
localparam logic [11:0] CSR_BARRIER = 12'h7c2;
1140+
localparam logic [11:0] CSR_SC = 12'h7c3;
11371141
localparam logic [11:0] CSR_HTIMEDELTAH = 12'h615;
11381142
localparam logic [11:0] CSR_CYCLEH = 12'hc80;
11391143
localparam logic [11:0] CSR_TIMEH = 12'hc81;

0 commit comments

Comments
 (0)