Skip to content

Commit a721154

Browse files
authored
Merge pull request #3588 from chipsalliance/tlb_port
Generalize tlb_port | improve NBDcache performance
2 parents 6d00957 + b50ad58 commit a721154

File tree

9 files changed

+181
-170
lines changed

9 files changed

+181
-170
lines changed

Diff for: src/main/resources/vsrc/RoccBlackBox.v

+147-146
Original file line numberDiff line numberDiff line change
@@ -15,153 +15,154 @@ module RoccBlackBox
1515
fLen = 64,
1616
FPConstants_FLAGS_SZ = 5)
1717
( input clock,
18-
input reset,
19-
output rocc_cmd_ready,
20-
input rocc_cmd_valid,
21-
input [6:0] rocc_cmd_bits_inst_funct,
22-
input [4:0] rocc_cmd_bits_inst_rs2,
23-
input [4:0] rocc_cmd_bits_inst_rs1,
24-
input rocc_cmd_bits_inst_xd,
25-
input rocc_cmd_bits_inst_xs1,
26-
input rocc_cmd_bits_inst_xs2,
27-
input [4:0] rocc_cmd_bits_inst_rd,
28-
input [6:0] rocc_cmd_bits_inst_opcode,
29-
input [xLen-1:0] rocc_cmd_bits_rs1,
30-
input [xLen-1:0] rocc_cmd_bits_rs2,
31-
input rocc_cmd_bits_status_debug,
32-
input rocc_cmd_bits_status_cease,
33-
input rocc_cmd_bits_status_wfi,
34-
input [31:0] rocc_cmd_bits_status_isa,
35-
input [PRV_SZ-1:0] rocc_cmd_bits_status_dprv,
36-
input rocc_cmd_bits_status_dv,
37-
input [PRV_SZ-1:0] rocc_cmd_bits_status_prv,
38-
input rocc_cmd_bits_status_v,
39-
input rocc_cmd_bits_status_sd,
40-
input [22:0] rocc_cmd_bits_status_zero2,
41-
input rocc_cmd_bits_status_mpv,
42-
input rocc_cmd_bits_status_gva,
43-
input rocc_cmd_bits_status_mbe,
44-
input rocc_cmd_bits_status_sbe,
45-
input [1:0] rocc_cmd_bits_status_sxl,
46-
input [1:0] rocc_cmd_bits_status_uxl,
47-
input rocc_cmd_bits_status_sd_rv32,
48-
input [7:0] rocc_cmd_bits_status_zero1,
49-
input rocc_cmd_bits_status_tsr,
50-
input rocc_cmd_bits_status_tw,
51-
input rocc_cmd_bits_status_tvm,
52-
input rocc_cmd_bits_status_mxr,
53-
input rocc_cmd_bits_status_sum,
54-
input rocc_cmd_bits_status_mprv,
55-
input [1:0] rocc_cmd_bits_status_xs,
56-
input [1:0] rocc_cmd_bits_status_fs,
57-
input [1:0] rocc_cmd_bits_status_vs,
58-
input [1:0] rocc_cmd_bits_status_mpp,
59-
input [0:0] rocc_cmd_bits_status_spp,
60-
input rocc_cmd_bits_status_mpie,
61-
input rocc_cmd_bits_status_ube,
62-
input rocc_cmd_bits_status_spie,
63-
input rocc_cmd_bits_status_upie,
64-
input rocc_cmd_bits_status_mie,
65-
input rocc_cmd_bits_status_hie,
66-
input rocc_cmd_bits_status_sie,
67-
input rocc_cmd_bits_status_uie,
68-
input rocc_resp_ready,
69-
output rocc_resp_valid,
70-
output [4:0] rocc_resp_bits_rd,
71-
output [xLen-1:0] rocc_resp_bits_data,
72-
input rocc_mem_req_ready,
73-
output rocc_mem_req_valid,
74-
output [coreMaxAddrBits-1:0] rocc_mem_req_bits_addr,
75-
output [dcacheReqTagBits-1:0] rocc_mem_req_bits_tag,
76-
output [M_SZ-1:0] rocc_mem_req_bits_cmd,
18+
input reset,
19+
output rocc_cmd_ready,
20+
input rocc_cmd_valid,
21+
input [6:0] rocc_cmd_bits_inst_funct,
22+
input [4:0] rocc_cmd_bits_inst_rs2,
23+
input [4:0] rocc_cmd_bits_inst_rs1,
24+
input rocc_cmd_bits_inst_xd,
25+
input rocc_cmd_bits_inst_xs1,
26+
input rocc_cmd_bits_inst_xs2,
27+
input [4:0] rocc_cmd_bits_inst_rd,
28+
input [6:0] rocc_cmd_bits_inst_opcode,
29+
input [xLen-1:0] rocc_cmd_bits_rs1,
30+
input [xLen-1:0] rocc_cmd_bits_rs2,
31+
input rocc_cmd_bits_status_debug,
32+
input rocc_cmd_bits_status_cease,
33+
input rocc_cmd_bits_status_wfi,
34+
input [31:0] rocc_cmd_bits_status_isa,
35+
input [PRV_SZ-1:0] rocc_cmd_bits_status_dprv,
36+
input rocc_cmd_bits_status_dv,
37+
input [PRV_SZ-1:0] rocc_cmd_bits_status_prv,
38+
input rocc_cmd_bits_status_v,
39+
input rocc_cmd_bits_status_sd,
40+
input [22:0] rocc_cmd_bits_status_zero2,
41+
input rocc_cmd_bits_status_mpv,
42+
input rocc_cmd_bits_status_gva,
43+
input rocc_cmd_bits_status_mbe,
44+
input rocc_cmd_bits_status_sbe,
45+
input [1:0] rocc_cmd_bits_status_sxl,
46+
input [1:0] rocc_cmd_bits_status_uxl,
47+
input rocc_cmd_bits_status_sd_rv32,
48+
input [7:0] rocc_cmd_bits_status_zero1,
49+
input rocc_cmd_bits_status_tsr,
50+
input rocc_cmd_bits_status_tw,
51+
input rocc_cmd_bits_status_tvm,
52+
input rocc_cmd_bits_status_mxr,
53+
input rocc_cmd_bits_status_sum,
54+
input rocc_cmd_bits_status_mprv,
55+
input [1:0] rocc_cmd_bits_status_xs,
56+
input [1:0] rocc_cmd_bits_status_fs,
57+
input [1:0] rocc_cmd_bits_status_vs,
58+
input [1:0] rocc_cmd_bits_status_mpp,
59+
input [0:0] rocc_cmd_bits_status_spp,
60+
input rocc_cmd_bits_status_mpie,
61+
input rocc_cmd_bits_status_ube,
62+
input rocc_cmd_bits_status_spie,
63+
input rocc_cmd_bits_status_upie,
64+
input rocc_cmd_bits_status_mie,
65+
input rocc_cmd_bits_status_hie,
66+
input rocc_cmd_bits_status_sie,
67+
input rocc_cmd_bits_status_uie,
68+
input rocc_resp_ready,
69+
output rocc_resp_valid,
70+
output [4:0] rocc_resp_bits_rd,
71+
output [xLen-1:0] rocc_resp_bits_data,
72+
input rocc_mem_req_ready,
73+
output rocc_mem_req_valid,
74+
output [coreMaxAddrBits-1:0] rocc_mem_req_bits_addr,
75+
output [dcacheReqTagBits-1:0] rocc_mem_req_bits_tag,
76+
output [M_SZ-1:0] rocc_mem_req_bits_cmd,
7777
output [mem_req_bits_size_width-1:0] rocc_mem_req_bits_size,
78-
output rocc_mem_req_bits_signed,
79-
output rocc_mem_req_bits_phys,
80-
output rocc_mem_req_bits_no_alloc,
81-
output rocc_mem_req_bits_no_xcpt,
82-
output [1:0] rocc_mem_req_bits_dprv,
83-
output rocc_mem_req_bits_dv,
84-
output [coreDataBits-1:0] rocc_mem_req_bits_data,
85-
output [coreDataBytes-1:0] rocc_mem_req_bits_mask,
86-
output rocc_mem_s1_kill,
87-
output [coreDataBits-1:0] rocc_mem_s1_data_data,
88-
output [coreDataBytes-1:0] rocc_mem_s1_data_mask,
89-
input rocc_mem_s2_nack,
90-
input rocc_mem_s2_nack_cause_raw,
91-
output rocc_mem_s2_kill,
92-
input rocc_mem_s2_uncached,
93-
input [paddrBits-1:0] rocc_mem_s2_paddr,
94-
input [vaddrBitsExtended-1:0] rocc_mem_s2_gpa,
95-
input rocc_mem_s2_gpa_is_pte,
96-
input rocc_mem_resp_valid,
97-
input [coreMaxAddrBits-1:0] rocc_mem_resp_bits_addr,
98-
input [dcacheReqTagBits-1:0] rocc_mem_resp_bits_tag,
99-
input [M_SZ-1:0] rocc_mem_resp_bits_cmd,
100-
input [mem_req_bits_size_width-1:0] rocc_mem_resp_bits_size,
101-
input rocc_mem_resp_bits_signed,
102-
input [coreDataBits-1:0] rocc_mem_resp_bits_data,
103-
input [coreDataBytes-1:0] rocc_mem_resp_bits_mask,
104-
input rocc_mem_resp_bits_replay,
105-
input rocc_mem_resp_bits_has_data,
106-
input [coreDataBits-1:0] rocc_mem_resp_bits_data_word_bypass,
107-
input [coreDataBits-1:0] rocc_mem_resp_bits_data_raw,
108-
input [coreDataBits-1:0] rocc_mem_resp_bits_store_data,
109-
input [1:0] rocc_mem_resp_bits_dprv,
110-
input rocc_mem_resp_bits_dv,
111-
input rocc_mem_replay_next,
112-
input rocc_mem_s2_xcpt_ma_ld,
113-
input rocc_mem_s2_xcpt_ma_st,
114-
input rocc_mem_s2_xcpt_pf_ld,
115-
input rocc_mem_s2_xcpt_pf_st,
116-
input rocc_mem_s2_xcpt_gf_ld,
117-
input rocc_mem_s2_xcpt_gf_st,
118-
input rocc_mem_s2_xcpt_ae_ld,
119-
input rocc_mem_s2_xcpt_ae_st,
120-
input rocc_mem_ordered,
121-
input rocc_mem_perf_acquire,
122-
input rocc_mem_perf_release,
123-
input rocc_mem_perf_grant,
124-
input rocc_mem_perf_tlbMiss,
125-
input rocc_mem_perf_blocked,
126-
input rocc_mem_perf_canAcceptStoreThenLoad,
127-
input rocc_mem_perf_canAcceptStoreThenRMW,
128-
input rocc_mem_perf_canAcceptLoadThenLoad,
129-
input rocc_mem_perf_storeBufferEmptyAfterLoad,
130-
input rocc_mem_perf_storeBufferEmptyAfterStore,
131-
output rocc_mem_keep_clock_enabled,
132-
input rocc_mem_clock_enabled,
133-
output rocc_busy,
134-
output rocc_interrupt,
135-
input rocc_exception,
136-
input rocc_fpu_req_ready,
137-
output rocc_fpu_req_valid,
138-
output rocc_fpu_req_bits_ldst,
139-
output rocc_fpu_req_bits_wen,
140-
output rocc_fpu_req_bits_ren1,
141-
output rocc_fpu_req_bits_ren2,
142-
output rocc_fpu_req_bits_ren3,
143-
output rocc_fpu_req_bits_swap12,
144-
output rocc_fpu_req_bits_swap23,
145-
output [1:0] rocc_fpu_req_bits_typeTagIn,
146-
output [1:0] rocc_fpu_req_bits_typeTagOut,
147-
output rocc_fpu_req_bits_fromint,
148-
output rocc_fpu_req_bits_toint,
149-
output rocc_fpu_req_bits_fastpipe,
150-
output rocc_fpu_req_bits_fma,
151-
output rocc_fpu_req_bits_div,
152-
output rocc_fpu_req_bits_sqrt,
153-
output rocc_fpu_req_bits_wflags,
154-
output [FPConstants_RM_SZ-1:0] rocc_fpu_req_bits_rm,
155-
output [1:0] rocc_fpu_req_bits_fmaCmd,
156-
output [1:0] rocc_fpu_req_bits_typ,
157-
output [1:0] rocc_fpu_req_bits_fmt,
158-
output [fLen:0] rocc_fpu_req_bits_in1,
159-
output [fLen:0] rocc_fpu_req_bits_in2,
160-
output [fLen:0] rocc_fpu_req_bits_in3,
161-
output rocc_fpu_resp_ready,
162-
input rocc_fpu_resp_valid,
163-
input [fLen:0] rocc_fpu_resp_bits_data,
164-
input [FPConstants_FLAGS_SZ-1:0] rocc_fpu_resp_bits_exc );
78+
output rocc_mem_req_bits_signed,
79+
output rocc_mem_req_bits_phys,
80+
output rocc_mem_req_bits_no_alloc,
81+
output rocc_mem_req_bits_no_xcpt,
82+
output rocc_mem_req_bits_no_resp,
83+
output [1:0] rocc_mem_req_bits_dprv,
84+
output rocc_mem_req_bits_dv,
85+
output [coreDataBits-1:0] rocc_mem_req_bits_data,
86+
output [coreDataBytes-1:0] rocc_mem_req_bits_mask,
87+
output rocc_mem_s1_kill,
88+
output [coreDataBits-1:0] rocc_mem_s1_data_data,
89+
output [coreDataBytes-1:0] rocc_mem_s1_data_mask,
90+
input rocc_mem_s2_nack,
91+
input rocc_mem_s2_nack_cause_raw,
92+
output rocc_mem_s2_kill,
93+
input rocc_mem_s2_uncached,
94+
input [paddrBits-1:0] rocc_mem_s2_paddr,
95+
input [vaddrBitsExtended-1:0] rocc_mem_s2_gpa,
96+
input rocc_mem_s2_gpa_is_pte,
97+
input rocc_mem_resp_valid,
98+
input [coreMaxAddrBits-1:0] rocc_mem_resp_bits_addr,
99+
input [dcacheReqTagBits-1:0] rocc_mem_resp_bits_tag,
100+
input [M_SZ-1:0] rocc_mem_resp_bits_cmd,
101+
input [mem_req_bits_size_width-1:0] rocc_mem_resp_bits_size,
102+
input rocc_mem_resp_bits_signed,
103+
input [coreDataBits-1:0] rocc_mem_resp_bits_data,
104+
input [coreDataBytes-1:0] rocc_mem_resp_bits_mask,
105+
input rocc_mem_resp_bits_replay,
106+
input rocc_mem_resp_bits_has_data,
107+
input [coreDataBits-1:0] rocc_mem_resp_bits_data_word_bypass,
108+
input [coreDataBits-1:0] rocc_mem_resp_bits_data_raw,
109+
input [coreDataBits-1:0] rocc_mem_resp_bits_store_data,
110+
input [1:0] rocc_mem_resp_bits_dprv,
111+
input rocc_mem_resp_bits_dv,
112+
input rocc_mem_replay_next,
113+
input rocc_mem_s2_xcpt_ma_ld,
114+
input rocc_mem_s2_xcpt_ma_st,
115+
input rocc_mem_s2_xcpt_pf_ld,
116+
input rocc_mem_s2_xcpt_pf_st,
117+
input rocc_mem_s2_xcpt_gf_ld,
118+
input rocc_mem_s2_xcpt_gf_st,
119+
input rocc_mem_s2_xcpt_ae_ld,
120+
input rocc_mem_s2_xcpt_ae_st,
121+
input rocc_mem_ordered,
122+
input rocc_mem_perf_acquire,
123+
input rocc_mem_perf_release,
124+
input rocc_mem_perf_grant,
125+
input rocc_mem_perf_tlbMiss,
126+
input rocc_mem_perf_blocked,
127+
input rocc_mem_perf_canAcceptStoreThenLoad,
128+
input rocc_mem_perf_canAcceptStoreThenRMW,
129+
input rocc_mem_perf_canAcceptLoadThenLoad,
130+
input rocc_mem_perf_storeBufferEmptyAfterLoad,
131+
input rocc_mem_perf_storeBufferEmptyAfterStore,
132+
output rocc_mem_keep_clock_enabled,
133+
input rocc_mem_clock_enabled,
134+
output rocc_busy,
135+
output rocc_interrupt,
136+
input rocc_exception,
137+
input rocc_fpu_req_ready,
138+
output rocc_fpu_req_valid,
139+
output rocc_fpu_req_bits_ldst,
140+
output rocc_fpu_req_bits_wen,
141+
output rocc_fpu_req_bits_ren1,
142+
output rocc_fpu_req_bits_ren2,
143+
output rocc_fpu_req_bits_ren3,
144+
output rocc_fpu_req_bits_swap12,
145+
output rocc_fpu_req_bits_swap23,
146+
output [1:0] rocc_fpu_req_bits_typeTagIn,
147+
output [1:0] rocc_fpu_req_bits_typeTagOut,
148+
output rocc_fpu_req_bits_fromint,
149+
output rocc_fpu_req_bits_toint,
150+
output rocc_fpu_req_bits_fastpipe,
151+
output rocc_fpu_req_bits_fma,
152+
output rocc_fpu_req_bits_div,
153+
output rocc_fpu_req_bits_sqrt,
154+
output rocc_fpu_req_bits_wflags,
155+
output [FPConstants_RM_SZ-1:0] rocc_fpu_req_bits_rm,
156+
output [1:0] rocc_fpu_req_bits_fmaCmd,
157+
output [1:0] rocc_fpu_req_bits_typ,
158+
output [1:0] rocc_fpu_req_bits_fmt,
159+
output [fLen:0] rocc_fpu_req_bits_in1,
160+
output [fLen:0] rocc_fpu_req_bits_in2,
161+
output [fLen:0] rocc_fpu_req_bits_in3,
162+
output rocc_fpu_resp_ready,
163+
input rocc_fpu_resp_valid,
164+
input [fLen:0] rocc_fpu_resp_bits_data,
165+
input [FPConstants_FLAGS_SZ-1:0] rocc_fpu_resp_bits_exc );
165166

166167
assign rocc_cmd_ready = 1'b1;
167168

Diff for: src/main/scala/groundtest/Tile.scala

+1-4
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,7 @@ abstract class GroundTestTile(
4343
dcacheOpt.foreach { m =>
4444
m.hartIdSinkNodeOpt.foreach { _ := hartIdNexusNode }
4545
InModuleBody {
46-
m.module match {
47-
case module: DCacheModule => module.tlb_port := DontCare
48-
case other => other
49-
}
46+
m.module.io.tlb_port := DontCare
5047
}
5148
}
5249

Diff for: src/main/scala/rocket/DCache.scala

+8-10
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,6 @@ class DCacheTLBPort(implicit p: Parameters) extends CoreBundle()(p) {
9191
}
9292

9393
class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
94-
val tlb_port = IO(new DCacheTLBPort)
95-
9694
val tECC = cacheParams.tagCode
9795
val dECC = cacheParams.dataCode
9896
require(subWordBits % eccBits == 0, "subWordBits must be a multiple of eccBits")
@@ -179,7 +177,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
179177
val s1_nack = WireDefault(false.B)
180178
val s1_valid_masked = s1_valid && !io.cpu.s1_kill
181179
val s1_valid_not_nacked = s1_valid && !s1_nack
182-
val s1_tlb_req_valid = RegNext(tlb_port.req.fire, false.B)
180+
val s1_tlb_req_valid = RegNext(io.tlb_port.req.fire, false.B)
183181
val s2_tlb_req_valid = RegNext(s1_tlb_req_valid, false.B)
184182
val s0_clk_en = metaArb.io.out.valid && !metaArb.io.out.bits.write
185183

@@ -190,16 +188,16 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
190188
val s1_req = RegEnable(s0_req, s0_clk_en)
191189
val s1_vaddr = Cat(s1_req.idx.getOrElse(s1_req.addr) >> tagLSB, s1_req.addr(tagLSB-1, 0))
192190

193-
val s0_tlb_req = WireInit(tlb_port.req.bits)
194-
when (!tlb_port.req.fire) {
191+
val s0_tlb_req = WireInit(io.tlb_port.req.bits)
192+
when (!io.tlb_port.req.fire) {
195193
s0_tlb_req.passthrough := s0_req.phys
196194
s0_tlb_req.vaddr := s0_req.addr
197195
s0_tlb_req.size := s0_req.size
198196
s0_tlb_req.cmd := s0_req.cmd
199197
s0_tlb_req.prv := s0_req.dprv
200198
s0_tlb_req.v := s0_req.dv
201199
}
202-
val s1_tlb_req = RegEnable(s0_tlb_req, s0_clk_en || tlb_port.req.valid)
200+
val s1_tlb_req = RegEnable(s0_tlb_req, s0_clk_en || io.tlb_port.req.valid)
203201

204202
val s1_read = isRead(s1_req.cmd)
205203
val s1_write = isWrite(s1_req.cmd)
@@ -263,7 +261,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
263261
// address translation
264262
val s1_cmd_uses_tlb = s1_readwrite || s1_flush_line || s1_req.cmd === M_WOK
265263
io.ptw <> tlb.io.ptw
266-
tlb.io.kill := io.cpu.s2_kill || s2_tlb_req_valid && tlb_port.s2_kill
264+
tlb.io.kill := io.cpu.s2_kill || s2_tlb_req_valid && io.tlb_port.s2_kill
267265
tlb.io.req.valid := s1_tlb_req_valid || s1_valid && !io.cpu.s1_kill && s1_cmd_uses_tlb
268266
tlb.io.req.bits := s1_tlb_req
269267
when (!tlb.io.req.ready && !tlb.io.ptw.resp.valid && !io.cpu.req.bits.phys) { io.cpu.req.ready := false.B }
@@ -277,8 +275,8 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
277275
tlb.io.sfence.bits.hv := s1_req.cmd === M_HFENCEV
278276
tlb.io.sfence.bits.hg := s1_req.cmd === M_HFENCEG
279277

280-
tlb_port.req.ready := clock_en_reg
281-
tlb_port.s1_resp := tlb.io.resp
278+
io.tlb_port.req.ready := clock_en_reg
279+
io.tlb_port.s1_resp := tlb.io.resp
282280
when (s1_tlb_req_valid && s1_valid && !(s1_req.phys && s1_req.no_xcpt)) { s1_nack := true.B }
283281

284282
pma_checker.io <> DontCare
@@ -1056,7 +1054,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
10561054
metaArb.io.out.valid || // subsumes resetting || flushing
10571055
s1_probe || s2_probe ||
10581056
s1_valid || s2_valid ||
1059-
tlb_port.req.valid ||
1057+
io.tlb_port.req.valid ||
10601058
s1_tlb_req_valid || s2_tlb_req_valid ||
10611059
pstore1_held || pstore2_valid ||
10621060
release_state =/= s_ready ||

0 commit comments

Comments
 (0)