working on dcache.py
[soc.git] / src / soc / experiment / dcache.py
1 """DCache
2
3 based on Anton Blanchard microwatt dcache.vhdl
4
5 """
6
7 from enum import Enum, unique
8
9 from nmigen import Module, Signal, Elaboratable,
10 Cat, Repl
11 from nmigen.cli import main
12 from nmigen.iocontrol import RecordObject
13 from nmigen.util import log2_int
14
15 from experiment.mem_types import LoadStore1ToDCacheType,
16 DCacheToLoadStore1Type,
17 MMUToDCacheType,
18 DCacheToMMUType
19
20 from experiment.wb_types import WB_ADDR_BITS, WB_DATA_BITS, WB_SEL_BITS,
21 WBAddrType, WBDataType, WBSelType,
22 WbMasterOut, WBSlaveOut,
23 WBMasterOutVector, WBSlaveOutVector,
24 WBIOMasterOut, WBIOSlaveOut
25
26
27 # Record for storing permission, attribute, etc. bits from a PTE
28 class PermAttr(RecordObject):
29 def __init__(self):
30 super().__init__()
31 self.reference = Signal()
32 self.changed = Signal()
33 self.nocache = Signal()
34 self.priv = Signal()
35 self.rd_perm = Signal()
36 self.wr_perm = Signal()
37
38
39 def extract_perm_attr(pte):
40 pa = PermAttr()
41 pa.reference = pte[8]
42 pa.changed = pte[7]
43 pa.nocache = pte[5]
44 pa.priv = pte[3]
45 pa.rd_perm = pte[2]
46 pa.wr_perm = pte[1]
47 return pa;
48
49
50 # Type of operation on a "valid" input
51 @unique
52 class Op(Enum):
53 OP_NONE = 0
54 OP_BAD = 1 # NC cache hit, TLB miss, prot/RC failure
55 OP_STCX_FAIL = 2 # conditional store w/o reservation
56 OP_LOAD_HIT = 3 # Cache hit on load
57 OP_LOAD_MISS = 4 # Load missing cache
58 OP_LOAD_NC = 5 # Non-cachable load
59 OP_STORE_HIT = 6 # Store hitting cache
60 OP_STORE_MISS = 7 # Store missing cache
61
62
63 # Cache state machine
64 @unique
65 class State(Enum):
66 IDLE = 0 # Normal load hit processing
67 RELOAD_WAIT_ACK = 1 # Cache reload wait ack
68 STORE_WAIT_ACK = 2 # Store wait ack
69 NC_LOAD_WAIT_ACK = 3 # Non-cachable load wait ack
70
71
72 # Dcache operations:
73 #
74 # In order to make timing, we use the BRAMs with
75 # an output buffer, which means that the BRAM
76 # output is delayed by an extra cycle.
77 #
78 # Thus, the dcache has a 2-stage internal pipeline
79 # for cache hits with no stalls.
80 #
81 # All other operations are handled via stalling
82 # in the first stage.
83 #
84 # The second stage can thus complete a hit at the same
85 # time as the first stage emits a stall for a complex op.
86 #
87 # Stage 0 register, basically contains just the latched request
88 class RegStage0(RecordObject):
89 def __init__(self):
90 super().__init__()
91 self.req = LoadStore1ToDCacheType()
92 self.tlbie = Signal()
93 self.doall = Signal()
94 self.tlbld = Signal()
95 self.mmu_req = Signal() # indicates source of request
96
97
98 class MemAccessRequest(RecordObject):
99 def __init__(self):
100 super().__init__()
101 self.op = Op()
102 self.valid = Signal()
103 self.dcbz = Signal()
104 self.real_addr = Signal(REAL_ADDR_BITS)
105 self.data = Signal(64)
106 self.byte_sel = Signal(8)
107 self.hit_way = Signal(WAY_BITS)
108 self.same_tag = Signal()
109 self.mmu_req = Signal()
110
111
112 # First stage register, contains state for stage 1 of load hits
113 # and for the state machine used by all other operations
114 class RegStage1(RecordObject):
115 def __init__(self):
116 super().__init__()
117 # Info about the request
118 self.full = Signal() # have uncompleted request
119 self.mmu_req = Signal() # request is from MMU
120 self.req = MemAccessRequest()
121
122 # Cache hit state
123 self.hit_way = Signal(WAY_BITS)
124 self.hit_load_valid = Signal()
125 self.hit_index = Signal(NUM_LINES)
126 self.cache_hit = Signal()
127
128 # TLB hit state
129 self.tlb_hit = Signal()
130 self.tlb_hit_way = Signal(TLB_NUM_WAYS)
131 self.tlb_hit_index = Signal(TLB_SET_SIZE)
132 self.
133 # 2-stage data buffer for data forwarded from writes to reads
134 self.forward_data1 = Signal(64)
135 self.forward_data2 = Signal(64)
136 self.forward_sel1 = Signal(8)
137 self.forward_valid1 = Signal()
138 self.forward_way1 = Signal(WAY_BITS)
139 self.forward_row1 = Signal(BRAM_ROWS)
140 self.use_forward1 = Signal()
141 self.forward_sel = Signal(8)
142
143 # Cache miss state (reload state machine)
144 self.state = State()
145 self.dcbz = Signal()
146 self.write_bram = Signal()
147 self.write_tag = Signal()
148 self.slow_valid = Signal()
149 self.wb = WishboneMasterOut()
150 self.reload_tag = Signal(TAG_BITS)
151 self.store_way = Signal(WAY_BITS)
152 self.store_row = Signal(BRAM_ROWS)
153 self.store_index = Signal(NUM_LINES)
154 self.end_row_ix = Signal(ROW_LINE_BIT)
155 self.rows_valid = RowPerLineValidArray()
156 self.acks_pending = Signal(3)
157 self.inc_acks = Signal()
158 self.dec_acks = Signal()
159
160 # Signals to complete (possibly with error)
161 self.ls_valid = Signal()
162 self.ls_error = Signal()
163 self.mmu_done = Signal()
164 self.mmu_error = Signal()
165 self.cache_paradox = Signal()
166
167 # Signal to complete a failed stcx.
168 self.stcx_fail = Signal()
169
170
171 # Reservation information
172 class Reservation(RecordObject):
173 def __init__(self):
174 super().__init__()
175 valid = Signal()
176 # TODO LINE_OFF_BITS is 6
177 addr = Signal(63 downto LINE_OFF_BITS)
178
179
180 # Set associative dcache write-through
181 #
182 # TODO (in no specific order):
183 #
184 # * See list in icache.vhdl
185 # * Complete load misses on the cycle when WB data comes instead of
186 # at the end of line (this requires dealing with requests coming in
187 # while not idle...)
188 class DCache(Elaboratable):
189 def __init__(self):
190 # TODO: make these parameters of DCache at some point
191 self.LINE_SIZE = 64 # Line size in bytes
192 self.NUM_LINES = 32 # Number of lines in a set
193 self.NUM_WAYS = 4 # Number of ways
194 self.TLB_SET_SIZE = 64 # L1 DTLB entries per set
195 self.TLB_NUM_WAYS = 2 # L1 DTLB number of sets
196 self.TLB_LG_PGSZ = 12 # L1 DTLB log_2(page_size)
197 self.LOG_LENGTH = 0 # Non-zero to enable log data collection
198
199 self.d_in = LoadStore1ToDCacheType()
200 self.d_out = DCacheToLoadStore1Type()
201
202 self.m_in = MMUToDCacheType()
203 self.m_out = DCacheToMMUType()
204
205 self.stall_out = Signal()
206
207 self.wb_out = WBMasterOut()
208 self.wb_in = WBSlaveOut()
209
210 self.log_out = Signal(20)
211
212 # Latch the request in r0.req as long as we're not stalling
213 def stage_0(self, m, d_in, m_in):
214 comb = m.d.comb
215 sync = m.d.sync
216
217 r = RegStage0()
218
219 # TODO, this goes in unit tests and formal proofs
220 # assert ~(d_in.valid & m_in.valid),
221 # "request collision loadstore vs MMU"
222 with m.If(~(d_in.valid & m_in.valid)):
223 #sync += Display("request collision loadstore vs MMU")
224 pass
225
226 with m.If(m_in.valid):
227 sync += r.req.valid.eq(1)
228 sync += r.req.load.eq(~(m_in.tlbie | m_in.tlbld))
229 sync += r.req.dcbz.eq(0)
230 sync += r.req.nc.eq(0)
231 sync += r.req.reserve.eq(0)
232 sync += r.req.virt_mode.eq(1)
233 sync += r.req.priv_mode.eq(1)
234 sync += r.req.addr.eq(m_in.addr)
235 sync += r.req.data.eq(m_in.pte)
236 sync += r.req.byte_sel.eq(-1) # Const -1 sets all to 0b111....
237 sync += r.tlbie.eq(m_in.tlbie)
238 sync += r.doall.eq(m_in.doall)
239 sync += r.tlbld.eq(m_in.tlbld)
240 sync += r.mmu_req.eq(1)
241 with m.Else():
242 sync += r.req.eq(d_in)
243 sync += r.req.tlbie.eq(0)
244 sync += r.req.doall.eq(0)
245 sync += r.req.tlbd.eq(0)
246 sync += r.req.mmu_req.eq(0)
247 with m.If(~(r1.full & r0_full)):
248 sync += r0.eq(r)
249 sync += r0_full.eq(r.req.valid)
250
251 # TLB
252 # Operates in the second cycle on the request latched in r0.req.
253 # TLB updates write the entry at the end of the second cycle.
254 def tlb_read(self, m, m_in, d_in, r0_stall, tlb_valid_way,
255 tlb_tag_way, tlb_pte_way, dtlb_valid_bits,
256 dtlb_tags, dtlb_ptes):
257
258 comb = m.d.comb
259 sync = m.d.sync
260
261 index = Signal(log2_int(TLB_SET_BITS), False)
262 addrbits = Signal(TLB_SET_BITS)
263
264 amin = TLB_LG_PGSZ
265 amax = TLB_LG_PGSZ + TLB_SET_BITS
266
267 with m.If(m_in.valid):
268 comb += addrbits.eq(m_in.addr[amin : amax])
269 with m.Else():
270 comb += addrbits.eq(d_in.addr[amin : amax])
271 comb += index.eq(addrbits)
272
273 # If we have any op and the previous op isn't finished,
274 # then keep the same output for next cycle.
275 with m.If(~r0_stall):
276 sync += tlb_valid_way.eq(dtlb_valid_bits[index])
277 sync += tlb_tag_way.eq(dtlb_tags[index])
278 sync += tlb_pte_way.eq(dtlb_ptes[index])
279
280 # Generate TLB PLRUs
281 def maybe_tlb_plrus(self, m, r1, tlb_plru_victim, acc, acc_en, lru):
282 comb = m.d.comb
283 sync = m.d.sync
284
285 with m.If(TLB_NUM_WAYS > 1):
286 for i in range(TLB_SET_SIZE):
287 # TLB PLRU interface
288 tlb_plru = PLRU(TLB_WAY_BITS)
289 tlb_plru_acc = Signal(TLB_WAY_BITS)
290 tlb_plru_acc_en = Signal()
291 tlb_plru_out = Signal(TLB_WAY_BITS)
292
293 comb += tlb_plru.acc.eq(tlb_plru_acc)
294 comb += tlb_plru.acc_en.eq(tlb_plru_acc_en)
295 comb += tlb_plru.lru.eq(tlb_plru_out)
296
297 # PLRU interface
298 with m.If(r1.tlb_hit_index == i):
299 comb += tlb_plru.acc_en.eq(r1.tlb_hit)
300 with m.Else():
301 comb += tlb_plru.acc_en.eq(0)
302 comb += tlb_plru.acc.eq(r1.tlb_hit_way)
303
304 comb += tlb_plru_victim[i].eq(tlb_plru.lru)
305
306 def tlb_search(self, tlb_req_index, r0, tlb_valid_way_ tlb_tag_way,
307 tlb_pte_way, pte, tlb_hit, valid_ra, perm_attr, ra):
308
309 comb = m.d.comb
310 sync = m.d.sync
311
312 hitway = Signal(TLB_WAY_BITS)
313 hit = Signal()
314 eatag = Signal(log2_int(TLB_EA_TAG_BITS, False))
315
316 TLB_LG_END = TLB_LG_PGSZ + TLB_SET_BITS
317 comb += tlb_req_index.eq(r0.req.addr[TLB_LG_PGSZ : TLB_LG_END])
318 comb += eatag.eq(r0.req.addr[TLB_LG_END : 64 ])
319
320 for i in range(TLB_NUM_WAYS):
321 with m.If(tlb_valid_way(i)
322 & read_tlb_tag(i, tlb_tag_way) == eatag):
323 comb += hitway.eq(i)
324 comb += hit.eq(1)
325
326 comb += tlb_hit.eq(hit & r0_valid)
327 comb += tlb_hit_way.eq(hitway)
328
329 with m.If(tlb_hit):
330 comb += pte.eq(read_tlb_pte(hitway, tlb_pte_way))
331 with m.Else():
332 comb += pte.eq(0)
333 comb += valid_ra.eq(tlb_hit | ~r0.req.virt_mode)
334 with m.If(r0.req.virt_mode):
335 comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
336 r0.req.addr[ROW_OFF_BITS:TLB_LG_PGSZ],
337 pte[TLB_LG_PGSZ:REAL_ADDR_BITS]))
338 comb += perm_attr.eq(extract_perm_attr(pte))
339 with m.Else():
340 comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
341 r0.rq.addr[ROW_OFF_BITS:REAL_ADDR_BITS]))
342
343 comb += perm_attr.reference.eq(1)
344 comb += perm_attr.changed.eq(1)
345 comb += perm_attr.priv.eq(1)
346 comb += perm_attr.nocache.eq(0)
347 comb += perm_attr.rd_perm.eq(1)
348 comb += perm_attr.wr_perm.eq(1)
349
350 def tlb_update(self, r0_valid, r0, dtlb_valid_bits, tlb_req_index,
351 tlb_hit_way, tlb_hit, tlb_plru_victim, tlb_tag_way,
352 dtlb_tags, tlb_pte_way, dtlb_ptes, dtlb_valid_bits):
353
354 comb = m.d.comb
355 sync = m.d.sync
356
357 # variable tlbie : std_ulogic;
358 # variable tlbwe : std_ulogic;
359 # variable repl_way : tlb_way_t;
360 # variable eatag : tlb_tag_t;
361 # variable tagset : tlb_way_tags_t;
362 # variable pteset : tlb_way_ptes_t;
363 #type tlb_tags_t is array(tlb_index_t) of tlb_way_tags_t;
364 # --> Array([Signal(log(way_tags length)) for i in range(number of tlbs)])
365
366 tlbie = Signal()
367 tlbwe = Signal()
368 repl_way = Signal(TLB_WAY_BITS)
369 eatag = Signal(log2_int(TLB_EA_TAG_BITS, False))
370 tagset = TLBWayTags()
371 pteset = TLBWayPtes()
372
373 comb += tlbie
374 comb += tlbwe
375 comb += repl_way
376 comb += eatag
377 comb += tagset
378 comb += pteset
379
380 # begin
381 # if rising_edge(clk) then
382 # tlbie := r0_valid and r0.tlbie;
383 # tlbwe := r0_valid and r0.tlbldoi;
384 comb += tlbie.eq(r0_valid & r0.tlbie)
385 comb += tlbwe.eq(r0_valid & r0.tlbldoi)
386
387 with m.If(tlbie & r0.doall):
388 # clear all valid bits at once
389 for i in range(TLB_SET_SIZE):
390 sync += dtlb_valid_bits[i].eq(0)
391
392 with m.Elif(tlbie):
393 with m.If(tlb_hit):
394 sync += dtlb_valid_bits[tlb_req_index][tlb_hit_way].eq(0)
395 with m.Elif(tlbwe):
396 with m.If(tlb_hit):
397 comb += repl_way.eq(tlb_hit_way)
398 with m.Else():
399 comb += repl_way.eq(tlb_plru_victim[tlb_req_index])
400 comb += eatag.eq(r0.req.addr[TLB_LG_PGSZ + TLB_SET_BITS:64])
401 comb += tagset.eq(tlb_tag_way)
402 sync += write_tlb_tag(repl_way, tagset, eatag)
403 sync += dtlb_tags[tlb_req_index].eq(tagset)
404 comb += pteset.eq(tlb_pte_way)
405 sync += write_tlb_pte(repl_way, pteset, r0.req.data)
406 sync += dtlb_ptes[tlb_req_index].eq(pteset)
407 sync += dtlb_valid_bits[tlb_req_index][repl_way].eq(1)
408
409 # -- Generate PLRUs
410 # maybe_plrus: if NUM_WAYS > 1 generate
411 # Generate PLRUs
412 def maybe_plrus(self, r1):
413
414 comb = m.d.comb
415 sync = m.d.sync
416
417 # begin
418 # TODO learn translation of generate into nmgien @lkcl
419 # plrus: for i in 0 to NUM_LINES-1 generate
420 for i in range(NUM_LINES):
421 # -- PLRU interface
422 # signal plru_acc : std_ulogic_vector(WAY_BITS-1 downto 0);
423 # signal plru_acc_en : std_ulogic;
424 # signal plru_out : std_ulogic_vector(WAY_BITS-1 downto 0);
425 plru = PLRU(WAY_BITS)
426 plru_acc = Signal(WAY_BITS)
427 plru_acc_en = Signal()
428 plru_out = Signal(WAY_BITS)
429 #
430 # begin
431 # TODO learn tranlation of entity, generic map, port map in
432 # nmigen @lkcl
433 # plru : entity work.plru
434 # generic map (
435 # BITS => WAY_BITS
436 # )
437 # port map (
438 # clk => clk,
439 # rst => rst,
440 # acc => plru_acc,
441 # acc_en => plru_acc_en,
442 # lru => plru_out
443 # );
444 comb += plru.acc.eq(plru_acc)
445 comb += plru.acc_en.eq(plru_acc_en)
446 comb += plru.lru.eq(plru_out)
447
448 # process(all)
449 # begin
450 # -- PLRU interface
451 # if r1.hit_index = i then
452 # PLRU interface
453 with m.If(r1.hit_index == i):
454 # plru_acc_en <= r1.cache_hit;
455 comb += plru_acc_en.eq(r1.cache_hit)
456 # else
457 with m.Else():
458 # plru_acc_en <= '0';
459 comb += plru_acc_en.eq(0)
460 # end if;
461 # plru_acc <= std_ulogic_vector(to_unsigned(
462 # r1.hit_way, WAY_BITS
463 # ));
464 # plru_victim(i) <= plru_out;
465 comb += plru_acc.eq(r1.hit_way)
466 comb += plru_victim[i].eq(plru_out)
467 # end process;
468 # end generate;
469 # end generate;
470
471 # -- Cache tag RAM read port
472 # cache_tag_read : process(clk)
473 # Cache tag RAM read port
474 def cache_tag_read(self, r0_stall, req_index, m_in, d_in,
475 cache_tag_set, cache_tags):
476
477 comb = m.d.comb
478 sync = m.d.sync
479
480 # variable index : index_t;
481 index = Signal(NUM_LINES)
482
483 comb += index
484
485 # begin
486 # if rising_edge(clk) then
487 # if r0_stall = '1' then
488 with m.If(r0_stall):
489 # index := req_index;
490 sync += index.eq(req_index)
491
492 # elsif m_in.valid = '1' then
493 with m.Elif(m_in.valid):
494 # index := get_index(m_in.addr);
495 sync += index.eq(get_index(m_in.addr))
496
497 # else
498 with m.Else():
499 # index := get_index(d_in.addr);
500 sync += index.eq(get_index(d_in.addr))
501 # end if;
502 # cache_tag_set <= cache_tags(index);
503 sync += cache_tag_set.eq(cache_tags[index])
504 # end if;
505 # end process;
506
507 # Cache request parsing and hit detection
508 def dcache_request(self, r0, ra, req_index, req_row, req_tag,
509 r0_valid, r1, cache_valid_bits, replace_way,
510 use_forward1_next, use_forward2_next,
511 req_hit_way, plru_victim, rc_ok, perm_attr,
512 valid_ra, perm_ok, access_ok, req_op, req_ok,
513 r0_stall, m_in, early_req_row, d_in):
514
515 comb = m.d.comb
516 sync = m.d.sync
517
518 # variable is_hit : std_ulogic;
519 # variable hit_way : way_t;
520 # variable op : op_t;
521 # variable opsel : std_ulogic_vector(2 downto 0);
522 # variable go : std_ulogic;
523 # variable nc : std_ulogic;
524 # variable s_hit : std_ulogic;
525 # variable s_tag : cache_tag_t;
526 # variable s_pte : tlb_pte_t;
527 # variable s_ra : std_ulogic_vector(
528 # REAL_ADDR_BITS - 1 downto 0
529 # );
530 # variable hit_set : std_ulogic_vector(
531 # TLB_NUM_WAYS - 1 downto 0
532 # );
533 # variable hit_way_set : hit_way_set_t;
534 # variable rel_matches : std_ulogic_vector(
535 # TLB_NUM_WAYS - 1 downto 0
536 # );
537 rel_match = Signal()
538 is_hit = Signal()
539 hit_way = Signal(WAY_BITS)
540 op = Op()
541 opsel = Signal(3)
542 go = Signal()
543 nc = Signal()
544 s_hit = Signal()
545 s_tag = Signal(TAG_BITS)
546 s_pte = Signal(TLB_PTE_BITS)
547 s_ra = Signal(REAL_ADDR_BITS)
548 hit_set = Signal(TLB_NUM_WAYS)
549 hit_way_set = HitWaySet()
550 rel_matches = Signal(TLB_NUM_WAYS)
551 rel_match = Signal()
552
553 # begin
554 # -- Extract line, row and tag from request
555 # req_index <= get_index(r0.req.addr);
556 # req_row <= get_row(r0.req.addr);
557 # req_tag <= get_tag(ra);
558 #
559 # go := r0_valid and not (r0.tlbie or r0.tlbld)
560 # and not r1.ls_error;
561 # Extract line, row and tag from request
562 comb += req_index.eq(get_index(r0.req.addr))
563 comb += req_row.eq(get_row(r0.req.addr))
564 comb += req_tag.eq(get_tag(ra))
565
566 comb += go.eq(r0_valid & ~(r0.tlbie | r0.tlbld) & ~r1.ls_error)
567
568 # hit_way := 0;
569 # is_hit := '0';
570 # rel_match := '0';
571 # Test if pending request is a hit on any way
572 # In order to make timing in virtual mode,
573 # when we are using the TLB, we compare each
574 # way with each of the real addresses from each way of
575 # the TLB, and then decide later which match to use.
576
577 # if r0.req.virt_mode = '1' then
578 with m.If(r0.req.virt_mode):
579 # rel_matches := (others => '0');
580 comb += rel_matches.eq(0)
581 # for j in tlb_way_t loop
582 for j in range(TLB_NUM_WAYS):
583 # hit_way_set(j) := 0;
584 # s_hit := '0';
585 # s_pte := read_tlb_pte(j, tlb_pte_way);
586 # s_ra := s_pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ)
587 # & r0.req.addr(TLB_LG_PGSZ - 1 downto 0);
588 # s_tag := get_tag(s_ra);
589 comb += hit_way_set[j].eq(0)
590 comb += s_hit.eq(0)
591 comb += s_pte.eq(read_tlb_pte(j, tlb_pte_way))
592 comb += s_ra.eq(Cat(
593 r0.req.addr[0:TLB_LG_PGSZ],
594 s_pte[TLB_LG_PGSZ:REAL_ADDR_BITS]
595 ))
596 comb += s_tag.eq(get_tag(s_ra))
597
598 # for i in way_t loop
599 for i in range(NUM_WAYS):
600 # if go = '1' and cache_valids(req_index)(i) = '1'
601 # and read_tag(i, cache_tag_set) = s_tag
602 # and tlb_valid_way(j) = '1' then
603 with m.If(go & cache_valid_bits[req_index][i] &
604 read_tag(i, cache_tag_set) == s_tag
605 & tlb_valid_way[j]):
606 # hit_way_set(j) := i;
607 # s_hit := '1';
608 comb += hit_way_set[j].eq(i)
609 comb += s_hit.eq(1)
610 # end if;
611 # end loop;
612 # hit_set(j) := s_hit;
613 comb += hit_set[j].eq(s_hit)
614 # if s_tag = r1.reload_tag then
615 with m.If(s_tag == r1.reload_tag):
616 # rel_matches(j) := '1';
617 comb += rel_matches[j].eq(1)
618 # end if;
619 # end loop;
620 # if tlb_hit = '1' then
621 with m.If(tlb_hit):
622 # is_hit := hit_set(tlb_hit_way);
623 # hit_way := hit_way_set(tlb_hit_way);
624 # rel_match := rel_matches(tlb_hit_way);
625 comb += is_hit.eq(hit_set[tlb_hit_way])
626 comb += hit_way.eq(hit_way_set[tlb_hit_way])
627 comb += rel_match.eq(rel_matches[tlb_hit_way])
628 # end if;
629 # else
630 with m.Else():
631 # s_tag := get_tag(r0.req.addr);
632 comb += s_tag.eq(get_tag(r0.req.addr))
633 # for i in way_t loop
634 for i in range(NUM_WAYS):
635 # if go = '1' and cache_valids(req_index)(i) = '1' and
636 # read_tag(i, cache_tag_set) = s_tag then
637 with m.If(go & cache_valid_bits[req_index][i] &
638 read_tag(i, cache_tag_set) == s_tag):
639 # hit_way := i;
640 # is_hit := '1';
641 comb += hit_way.eq(i)
642 comb += is_hit.eq(1)
643 # end if;
644 # end loop;
645 # if s_tag = r1.reload_tag then
646 with m.If(s_tag == r1.reload_tag):
647 # rel_match := '1';
648 comb += rel_match.eq(1)
649 # end if;
650 # end if;
651 # req_same_tag <= rel_match;
652 comb += req_same_tag.eq(rel_match)
653
654 # if r1.state = RELOAD_WAIT_ACK and req_index = r1.store_index
655 # and rel_match = '1' then
656 # See if the request matches the line currently being reloaded
657 with m.If(r1.state == State.RELOAD_WAIT_ACK & req_index ==
658 r1.store_index & rel_match):
659 # For a store, consider this a hit even if the row isn't
660 # valid since it will be by the time we perform the store.
661 # For a load, check the appropriate row valid bit.
662 # is_hit :=
663 # not r0.req.load
664 # or r1.rows_valid(req_row mod ROW_PER_LINE);
665 # hit_way := replace_way;
666 comb += is_hit.eq(~r0.req.load
667 | r1.rows_valid[req_row % ROW_PER_LINE]
668 )
669 comb += hit_way.eq(replace_way)
670 # end if;
671
672 # -- Whether to use forwarded data for a load or not
673 # Whether to use forwarded data for a load or not
674 # use_forward1_next <= '0';
675 comb += use_forward1_next.eq(0)
676 # if get_row(r1.req.real_addr) = req_row
677 # and r1.req.hit_way = hit_way then
678 with m.If(get_row(r1.req.real_addr) == req_row
679 & r1.req.hit_way == hit_way)
680 # Only need to consider r1.write_bram here, since if we
681 # are writing refill data here, then we don't have a
682 # cache hit this cycle on the line being refilled.
683 # (There is the possibility that the load following the
684 # load miss that started the refill could be to the old
685 # contents of the victim line, since it is a couple of
686 # cycles after the refill starts before we see the updated
687 # cache tag. In that case we don't use the bypass.)
688 # use_forward1_next <= r1.write_bram;
689 comb += use_forward1_next.eq(r1.write_bram)
690 # end if;
691 # use_forward2_next <= '0';
692 comb += use_forward2_next.eq(0)
693 # if r1.forward_row1 = req_row
694 # and r1.forward_way1 = hit_way then
695 with m.If(r1.forward_row1 == req_row
696 & r1.forward_way1 == hit_way):
697 # use_forward2_next <= r1.forward_valid1;
698 comb += use_forward2_next.eq(r1.forward_valid1)
699 # end if;
700
701 # The way that matched on a hit
702 # req_hit_way <= hit_way;
703 comb += req_hit_way.eq(hit_way)
704
705 # The way to replace on a miss
706 # if r1.write_tag = '1' then
707 with m.If(r1.write_tag):
708 # replace_way <= to_integer(unsigned(
709 # plru_victim(r1.store_index)
710 # ));
711 replace_way.eq(plru_victim[r1.store_index])
712 # else
713 with m.Else():
714 # replace_way <= r1.store_way;
715 comb += replace_way.eq(r1.store_way)
716 # end if;
717
718 # work out whether we have permission for this access
719 # NB we don't yet implement AMR, thus no KUAP
720 # rc_ok <= perm_attr.reference and
721 # (r0.req.load or perm_attr.changed);
722 # perm_ok <= (r0.req.priv_mode or not perm_attr.priv) and
723 # (perm_attr.wr_perm or (r0.req.load
724 # and perm_attr.rd_perm));
725 # access_ok <= valid_ra and perm_ok and rc_ok;
726 comb += rc_ok.eq(
727 perm_attr.reference
728 & (r0.req.load | perm_attr.changed)
729 )
730 comb += perm_ok.eq((r0.req.prive_mode | ~perm_attr.priv)
731 & perm_attr.wr_perm
732 | (r0.req.load & perm_attr.rd_perm)
733 )
734 comb += access_ok.eq(valid_ra & perm_ok & rc_ok)
735 # nc := r0.req.nc or perm_attr.nocache;
736 # op := OP_NONE;
737 # Combine the request and cache hit status to decide what
738 # operation needs to be done
739 comb += nc.eq(r0.req.nc | perm_attr.nocache)
740 comb += op.eq(Op.OP_NONE)
741 # if go = '1' then
742 with m.If(go):
743 # if access_ok = '0' then
744 with m.If(~access_ok):
745 # op := OP_BAD;
746 comb += op.eq(Op.OP_BAD)
747 # elsif cancel_store = '1' then
748 with m.Elif(cancel_store):
749 # op := OP_STCX_FAIL;
750 comb += op.eq(Op.OP_STCX_FAIL)
751 # else
752 with m.Else():
753 # opsel := r0.req.load & nc & is_hit;
754 comb += opsel.eq(Cat(is_hit, nc, r0.req.load))
755 # case opsel is
756 with m.Switch(opsel):
757 # when "101" => op := OP_LOAD_HIT;
758 # when "100" => op := OP_LOAD_MISS;
759 # when "110" => op := OP_LOAD_NC;
760 # when "001" => op := OP_STORE_HIT;
761 # when "000" => op := OP_STORE_MISS;
762 # when "010" => op := OP_STORE_MISS;
763 # when "011" => op := OP_BAD;
764 # when "111" => op := OP_BAD;
765 # when others => op := OP_NONE;
766 with m.Case(Const(0b101, 3)):
767 comb += op.eq(Op.OP_LOAD_HIT)
768
769 with m.Case(Cosnt(0b100, 3)):
770 comb += op.eq(Op.OP_LOAD_MISS)
771
772 with m.Case(Const(0b110, 3)):
773 comb += op.eq(Op.OP_LOAD_NC)
774
775 with m.Case(Const(0b001, 3)):
776 comb += op.eq(Op.OP_STORE_HIT)
777
778 with m.Case(Const(0b000, 3)):
779 comb += op.eq(Op.OP_STORE_MISS)
780
781 with m.Case(Const(0b010, 3)):
782 comb += op.eq(Op.OP_STORE_MISS)
783
784 with m.Case(Const(0b011, 3)):
785 comb += op.eq(Op.OP_BAD)
786
787 with m.Case(Const(0b111, 3)):
788 comb += op.eq(Op.OP_BAD)
789
790 with m.Default():
791 comb += op.eq(Op.OP_NONE)
792 # end case;
793 # end if;
794 # end if;
795 # req_op <= op;
796 # req_go <= go;
797 comb += req_op.eq(op)
798 comb += req_go.eq(go)
799
800 # Version of the row number that is valid one cycle earlier
801 # in the cases where we need to read the cache data BRAM.
802 # If we're stalling then we need to keep reading the last
803 # row requested.
804 # if r0_stall = '0' then
805 with m.If(~r0_stall):
806 # if m_in.valid = '1' then
807 with m.If(m_in.valid):
808 # early_req_row <= get_row(m_in.addr);
809 comb += early_req_row.eq(get_row(m_in.addr))
810 # else
811 with m.Else():
812 # early_req_row <= get_row(d_in.addr);
813 comb += early_req_row.eq(get_row(d_in.addr))
814 # end if;
815 # else
816 with m.Else():
817 # early_req_row <= req_row;
818 comb += early_req_row.eq(req_row)
819 # end if;
820 # end process;
821
822 # Handle load-with-reservation and store-conditional instructions
823 def reservation_comb(self, cancel_store, set_rsrv, clear_rsrv,
824 r0_valid, r0, reservation):
825
826 comb = m.d.comb
827 sync = m.d.sync
828
829 # begin
830 # cancel_store <= '0';
831 # set_rsrv <= '0';
832 # clear_rsrv <= '0';
833 # if r0_valid = '1' and r0.req.reserve = '1' then
834 with m.If(r0_valid & r0.req.reserve):
835
836 # -- XXX generate alignment interrupt if address
837 # -- is not aligned XXX or if r0.req.nc = '1'
838 # if r0.req.load = '1' then
839 # XXX generate alignment interrupt if address
840 # is not aligned XXX or if r0.req.nc = '1'
841 with m.If(r0.req.load):
842 # -- load with reservation
843 # set_rsrv <= '1';
844 # load with reservation
845 comb += set_rsrv(1)
846 # else
847 with m.Else():
848 # -- store conditional
849 # clear_rsrv <= '1';
850 # store conditional
851 comb += clear_rsrv.eq(1)
852 # if reservation.valid = '0' or r0.req.addr(63
853 # downto LINE_OFF_BITS) /= reservation.addr then
854 with m.If(~reservation.valid
855 | r0.req.addr[LINE_OFF_BITS:64]):
856 # cancel_store <= '1';
857 comb += cancel_store.eq(1)
858 # end if;
859 # end if;
860 # end if;
861 # end process;
862
863 def reservation_reg(self, r0_valid, access_ok, clear_rsrv,
864 reservation, r0):
865
866 comb = m.d.comb
867 sync = m.d.sync
868
869 # begin
870 # if rising_edge(clk) then
871 # if rst = '1' then
872 # reservation.valid <= '0';
873 # TODO understand how resets work in nmigen
874 # elsif r0_valid = '1' and access_ok = '1' then
875 with m.Elif(r0_valid & access_ok):
876 # if clear_rsrv = '1' then
877 with m.If(clear_rsrv):
878 # reservation.valid <= '0';
879 sync += reservation.valid.ea(0)
880 # elsif set_rsrv = '1' then
881 with m.Elif(set_rsrv):
882 # reservation.valid <= '1';
883 # reservation.addr <=
884 # r0.req.addr(63 downto LINE_OFF_BITS);
885 sync += reservation.valid.eq(1)
886 sync += reservation.addr.eq(
887 r0.req.addr[LINE_OFF_BITS:64]
888 )
889 # end if;
890 # end if;
891 # end if;
892 # end process;
893
894 # Return data for loads & completion control logic
895 def writeback_control(self, r1, cache_out, d_out, m_out):
896
897 comb = m.d.comb
898 sync = m.d.sync
899
900 # variable data_out : std_ulogic_vector(63 downto 0);
901 # variable data_fwd : std_ulogic_vector(63 downto 0);
902 # variable j : integer;
903 data_out = Signal(64)
904 data_fwd = Signal(64)
905 j = Signal()
906
907 # begin
908 # -- Use the bypass if are reading the row that was
909 # -- written 1 or 2 cycles ago, including for the
910 # -- slow_valid = 1 case (i.e. completing a load
911 # -- miss or a non-cacheable load).
912 # if r1.use_forward1 = '1' then
913 # Use the bypass if are reading the row that was
914 # written 1 or 2 cycles ago, including for the
915 # slow_valid = 1 case (i.e. completing a load
916 # miss or a non-cacheable load).
917 with m.If(r1.use_forward1):
918 # data_fwd := r1.forward_data1;
919 comb += data_fwd.eq(r1.forward_data1)
920 # else
921 with m.Else():
922 # data_fwd := r1.forward_data2;
923 comb += data_fwd.eq(r1.forward_data2)
924 # end if;
925
926 # data_out := cache_out(r1.hit_way);
927 comb += data_out.eq(cache_out[r1.hit_way])
928
929 # for i in 0 to 7 loop
930 for i in range(8):
931 # j := i * 8;
932 comb += i * 8
933
934 # if r1.forward_sel(i) = '1' then
935 with m.If(r1.forward_sel[i]):
936 # data_out(j + 7 downto j) := data_fwd(j + 7 downto j);
937 comb += data_out[j:j+8].eq(data_fwd[j:j+8])
938 # end if;
939 # end loop;
940
941 # d_out.valid <= r1.ls_valid;
942 # d_out.data <= data_out;
943 # d_out.store_done <= not r1.stcx_fail;
944 # d_out.error <= r1.ls_error;
945 # d_out.cache_paradox <= r1.cache_paradox;
946 comb += d_out.valid.eq(r1.ls_valid)
947 comb += d_out.data.eq(data_out)
948 comb += d_out.store_done.eq(~r1.stcx_fail)
949 comb += d_out.error.eq(r1.ls_error)
950 comb += d_out.cache_paradox.eq(r1.cache_paradox)
951
952 # -- Outputs to MMU
953 # m_out.done <= r1.mmu_done;
954 # m_out.err <= r1.mmu_error;
955 # m_out.data <= data_out;
956 comb += m_out.done.eq(r1.mmu_done)
957 comb += m_out.err.eq(r1.mmu_error)
958 comb += m_out.data.eq(data_out)
959
960 # -- We have a valid load or store hit or we just completed
961 # -- a slow op such as a load miss, a NC load or a store
962 # --
963 # -- Note: the load hit is delayed by one cycle. However it
964 # -- can still not collide with r.slow_valid (well unless I
965 # -- miscalculated) because slow_valid can only be set on a
966 # -- subsequent request and not on its first cycle (the state
967 # -- machine must have advanced), which makes slow_valid
968 # -- at least 2 cycles from the previous hit_load_valid.
969 #
970 # -- Sanity: Only one of these must be set in any given cycle
971 # assert (r1.slow_valid and r1.stcx_fail) /= '1'
972 # report "unexpected slow_valid collision with stcx_fail"
973 # severity FAILURE;
974 # assert ((r1.slow_valid or r1.stcx_fail) and r1.hit_load_valid)
975 # /= '1' report "unexpected hit_load_delayed collision with
976 # slow_valid" severity FAILURE;
977 # We have a valid load or store hit or we just completed
978 # a slow op such as a load miss, a NC load or a store
979 #
980 # Note: the load hit is delayed by one cycle. However it
981 # can still not collide with r.slow_valid (well unless I
982 # miscalculated) because slow_valid can only be set on a
983 # subsequent request and not on its first cycle (the state
984 # machine must have advanced), which makes slow_valid
985 # at least 2 cycles from the previous hit_load_valid.
986
987 # Sanity: Only one of these must be set in any given cycle
988 assert (r1.slow_valid & r1.stcx_fail) != 1 "unexpected" \
989 "slow_valid collision with stcx_fail -!- severity FAILURE"
990
991 assert ((r1.slow_valid | r1.stcx_fail) | r1.hit_load_valid) != 1
992 "unexpected hit_load_delayed collision with slow_valid -!-" \
993 "severity FAILURE"
994
995 # if r1.mmu_req = '0' then
996 with m.If(~r1._mmu_req):
997 # -- Request came from loadstore1...
998 # -- Load hit case is the standard path
999 # if r1.hit_load_valid = '1' then
1000 # Request came from loadstore1...
1001 # Load hit case is the standard path
1002 with m.If(r1.hit_load_valid):
1003 # report
1004 # "completing load hit data=" & to_hstring(data_out);
1005 print(f"completing load hit data={data_out}")
1006 # end if;
1007
1008 # -- error cases complete without stalling
1009 # if r1.ls_error = '1' then
1010 # error cases complete without stalling
1011 with m.If(r1.ls_error):
1012 # report "completing ld/st with error";
1013 print("completing ld/st with error")
1014 # end if;
1015
1016 # -- Slow ops (load miss, NC, stores)
1017 # if r1.slow_valid = '1' then
1018 # Slow ops (load miss, NC, stores)
1019 with m.If(r1.slow_valid):
1020 # report
1021 # "completing store or load miss data="
1022 # & to_hstring(data_out);
1023 print(f"completing store or load miss data={data_out}")
1024 # end if;
1025
1026 # else
1027 with m.Else():
1028 # -- Request came from MMU
1029 # if r1.hit_load_valid = '1' then
1030 # Request came from MMU
1031 with m.If(r1.hit_load_valid):
1032 # report "completing load hit to MMU, data="
1033 # & to_hstring(m_out.data);
1034 print(f"completing load hit to MMU, data={m_out.data}")
1035 # end if;
1036 #
1037 # -- error cases complete without stalling
1038 # if r1.mmu_error = '1' then
1039 # report "completing MMU ld with error";
1040 # error cases complete without stalling
1041 with m.If(r1.mmu_error):
1042 print("combpleting MMU ld with error")
1043 # end if;
1044 #
1045 # -- Slow ops (i.e. load miss)
1046 # if r1.slow_valid = '1' then
1047 # Slow ops (i.e. load miss)
1048 with m.If(r1.slow_valid):
1049 # report "completing MMU load miss, data="
1050 # & to_hstring(m_out.data);
1051 print("completing MMU load miss, data={m_out.data}")
1052 # end if;
1053 # end if;
1054 # end process;
1055
1056 # -- Generate a cache RAM for each way. This handles the normal
1057 # -- reads, writes from reloads and the special store-hit update
1058 # -- path as well.
1059 # --
1060 # -- Note: the BRAMs have an extra read buffer, meaning the output
1061 # -- is pipelined an extra cycle. This differs from the
1062 # -- icache. The writeback logic needs to take that into
1063 # -- account by using 1-cycle delayed signals for load hits.
1064 # --
1065 # rams: for i in 0 to NUM_WAYS-1 generate
1066 # Generate a cache RAM for each way. This handles the normal
1067 # reads, writes from reloads and the special store-hit update
1068 # path as well.
1069 #
1070 # Note: the BRAMs have an extra read buffer, meaning the output
1071 # is pipelined an extra cycle. This differs from the
1072 # icache. The writeback logic needs to take that into
1073 # account by using 1-cycle delayed signals for load hits.
1074 def rams(self, ):
1075 for i in range(NUM_WAYS):
1076 # signal do_read : std_ulogic;
1077 # signal rd_addr : std_ulogic_vector(ROW_BITS-1 downto 0);
1078 # signal do_write : std_ulogic;
1079 # signal wr_addr : std_ulogic_vector(ROW_BITS-1 downto 0);
1080 # signal wr_data :
1081 # std_ulogic_vector(wishbone_data_bits-1 downto 0);
1082 # signal wr_sel : std_ulogic_vector(ROW_SIZE-1 downto 0);
1083 # signal wr_sel_m : std_ulogic_vector(ROW_SIZE-1 downto 0);
1084 # signal dout : cache_row_t;
1085 do_read = Signal()
1086 rd_addr = Signal(ROW_BITS)
1087 do_write = Signal()
1088 wr_addr = Signal(ROW_BITS)
1089 wr_data = Signal(WB_DATA_BITS)
1090 wr_sel = Signal(ROW_SIZE)
1091 wr_sel_m = Signal(ROW_SIZE)
1092 _d_out = Signal(WB_DATA_BITS)
1093
1094 # begin
1095 # way: entity work.cache_ram
1096 # generic map (
1097 # ROW_BITS => ROW_BITS,
1098 # WIDTH => wishbone_data_bits,
1099 # ADD_BUF => true
1100 # )
1101 # port map (
1102 # clk => clk,
1103 # rd_en => do_read,
1104 # rd_addr => rd_addr,
1105 # rd_data => dout,
1106 # wr_sel => wr_sel_m,
1107 # wr_addr => wr_addr,
1108 # wr_data => wr_data
1109 # );
1110 # process(all)
1111 way = CacheRam(ROW_BITS, WB_DATA_BITS, True)
1112 comb += way.rd_en.eq(do_read)
1113 comb += way.rd_addr.eq(rd_addr)
1114 comb += way.rd_data.eq(_d_out)
1115 comb += way.wr_sel.eq(wr_sel_m)
1116 comb += way.wr_addr.eq(wr_addr)
1117 comb += way.wr_data.eq(wr_data)
1118
1119 # begin
1120 # -- Cache hit reads
1121 # do_read <= '1';
1122 # rd_addr <=
1123 # std_ulogic_vector(to_unsigned(early_req_row, ROW_BITS));
1124 # cache_out(i) <= dout;
1125 # Cache hit reads
1126 comb += do_read.eq(1)
1127 comb += rd_addr.eq(Signal(BRAM_ROWS))
1128 comb += cache_out[i].eq(dout)
1129
1130 # -- Write mux:
1131 # --
1132 # -- Defaults to wishbone read responses (cache refill)
1133 # --
1134 # -- For timing, the mux on wr_data/sel/addr is not
1135 # -- dependent on anything other than the current state.
1136 # Write mux:
1137 #
1138 # Defaults to wishbone read responses (cache refill)
1139 #
1140 # For timing, the mux on wr_data/sel/addr is not
1141 # dependent on anything other than the current state.
1142 # wr_sel_m <= (others => '0');
1143 comb += wr_sel_m.eq(0)
1144
1145 # do_write <= '0';
1146 comb += do_write.eq(0)
1147 # if r1.write_bram = '1' then
1148 with m.If(r1.write_bram):
1149 # -- Write store data to BRAM. This happens one
1150 # -- cycle after the store is in r0.
1151 # Write store data to BRAM. This happens one
1152 # cycle after the store is in r0.
1153 # wr_data <= r1.req.data;
1154 # wr_sel <= r1.req.byte_sel;
1155 # wr_addr <= std_ulogic_vector(to_unsigned(
1156 # get_row(r1.req.real_addr), ROW_BITS
1157 # ));
1158 comb += wr_data.eq(r1.req.data)
1159 comb += wr_sel.eq(r1.req.byte_sel)
1160 comb += wr_addr.eq(Signal(get_row(r1.req.real_addr)))
1161
1162 # if i = r1.req.hit_way then
1163 with m.If(i == r1.req.hit_way):
1164 # do_write <= '1';
1165 comb += do_write.eq(1)
1166 # end if;
1167 # else
1168 with m.Else():
1169 # -- Otherwise, we might be doing a reload or a DCBZ
1170 # if r1.dcbz = '1' then
1171 # Otherwise, we might be doing a reload or a DCBZ
1172 with m.If(r1.dcbz):
1173 # wr_data <= (others => '0');
1174 comb += wr_data.eq(0)
1175 # else
1176 with m.Else():
1177 # wr_data <= wishbone_in.dat;
1178 comb += wr_data.eq(wishbone_in.dat)
1179 # end if;
1180
1181 # wr_addr <= std_ulogic_vector(to_unsigned(
1182 # r1.store_row, ROW_BITS
1183 # ));
1184 # wr_sel <= (others => '1');
1185 comb += wr_addr.eq(Signal(r1.store_row))
1186 comb += wr_sel.eq(1)
1187
1188 # if r1.state = RELOAD_WAIT_ACK and
1189 # wishbone_in.ack = '1' and replace_way = i then
1190 with m.If(r1.state == State.RELOAD_WAIT_ACK
1191 & wishbone_in.ack & relpace_way == i):
1192 # do_write <= '1';
1193 comb += do_write.eq(1)
1194 # end if;
1195 # end if;
1196
1197 # -- Mask write selects with do_write since BRAM
1198 # -- doesn't have a global write-enable
1199 # if do_write = '1' then
1200 # -- Mask write selects with do_write since BRAM
1201 # -- doesn't have a global write-enable
1202 with m.If(do_write):
1203 # wr_sel_m <= wr_sel;
1204 comb += wr_sel_m.eq(wr_sel)
1205 # end if;
1206 # end process;
1207 # end generate;
1208
1209 # Cache hit synchronous machine for the easy case.
1210 # This handles load hits.
1211 # It also handles error cases (TLB miss, cache paradox)
1212 def dcache_fast_hit(self, req_op, r0_valid, r1, ):
1213
1214 comb = m.d.comb
1215 sync = m.d.sync
1216
1217 # begin
1218 # if rising_edge(clk) then
1219 # if req_op /= OP_NONE then
1220 with m.If(req_op != Op.OP_NONE):
1221 # report "op:" & op_t'image(req_op) &
1222 # " addr:" & to_hstring(r0.req.addr) &
1223 # " nc:" & std_ulogic'image(r0.req.nc) &
1224 # " idx:" & integer'image(req_index) &
1225 # " tag:" & to_hstring(req_tag) &
1226 # " way: " & integer'image(req_hit_way);
1227 print(f"op:{req_op} addr:{r0.req.addr} nc: {r0.req.nc}" \
1228 f"idx:{req_index} tag:{req_tag} way: {req_hit_way}"
1229 )
1230 # end if;
1231 # if r0_valid = '1' then
1232 with m.If(r0_valid):
1233 # r1.mmu_req <= r0.mmu_req;
1234 sync += r1.mmu_req.eq(r0.mmu_req)
1235 # end if;
1236
1237 # -- Fast path for load/store hits.
1238 # -- Set signals for the writeback controls.
1239 # r1.hit_way <= req_hit_way;
1240 # r1.hit_index <= req_index;
1241 # Fast path for load/store hits.
1242 # Set signals for the writeback controls.
1243 sync += r1.hit_way.eq(req_hit_way)
1244 sync += r1.hit_index.eq(req_index)
1245
1246 # if req_op = OP_LOAD_HIT then
1247 with m.If(req_op == Op.OP_LOAD_HIT):
1248 # r1.hit_load_valid <= '1';
1249 sync += r1.hit_load_valid.eq(1)
1250
1251 # else
1252 with m.Else():
1253 # r1.hit_load_valid <= '0';
1254 sync += r1.hit_load_valid.eq(0)
1255 # end if;
1256
1257 # if req_op = OP_LOAD_HIT or req_op = OP_STORE_HIT then
1258 with m.If(req_op == Op.OP_LOAD_HIT | req_op == Op.OP_STORE_HIT):
1259 # r1.cache_hit <= '1';
1260 sync += r1.cache_hit.eq(1)
1261 # else
1262 with m.Else():
1263 # r1.cache_hit <= '0';
1264 sync += r1.cache_hit.eq(0)
1265 # end if;
1266
1267 # if req_op = OP_BAD then
1268 with m.If(req_op == Op.OP_BAD):
1269 # report "Signalling ld/st error valid_ra=" &
1270 # std_ulogic'image(valid_ra) & " rc_ok=" &
1271 # std_ulogic'image(rc_ok) & " perm_ok=" &
1272 # std_ulogic'image(perm_ok);
1273 print(f"Signalling ld/st error valid_ra={valid_ra}"
1274 f"rc_ok={rc_ok} perm_ok={perm_ok}"
1275
1276 # r1.ls_error <= not r0.mmu_req;
1277 # r1.mmu_error <= r0.mmu_req;
1278 # r1.cache_paradox <= access_ok;
1279 sync += r1.ls_error.eq(~r0.mmu_req)
1280 sync += r1.mmu_error.eq(r0.mmu_req)
1281 sync += r1.cache_paradox.eq(access_ok)
1282
1283 # else
1284 with m.Else():
1285 # r1.ls_error <= '0';
1286 # r1.mmu_error <= '0';
1287 # r1.cache_paradox <= '0';
1288 sync += r1.ls_error.eq(0)
1289 sync += r1.mmu_error.eq(0)
1290 sync += r1.cache_paradox.eq(0)
1291 # end if;
1292 #
1293 # if req_op = OP_STCX_FAIL then
1294 with m.If(req_op == Op.OP_STCX_FAIL):
1295 # r1.stcx_fail <= '1';
1296 r1.stcx_fail.eq(1)
1297
1298 # else
1299 with m.Else():
1300 # r1.stcx_fail <= '0';
1301 sync += r1.stcx_fail.eq(0)
1302 # end if;
1303 #
1304 # -- Record TLB hit information for updating TLB PLRU
1305 # r1.tlb_hit <= tlb_hit;
1306 # r1.tlb_hit_way <= tlb_hit_way;
1307 # r1.tlb_hit_index <= tlb_req_index;
1308 # Record TLB hit information for updating TLB PLRU
1309 sync += r1.tlb_hit.eq(tlb_hit)
1310 sync += r1.tlb_hit_way.eq(tlb_hit_way)
1311 sync += r1.tlb_hit_index.eq(tlb_req_index)
1312 # end if;
1313 # end process;
1314
1315 # Memory accesses are handled by this state machine:
1316 #
1317 # * Cache load miss/reload (in conjunction with "rams")
1318 # * Load hits for non-cachable forms
1319 # * Stores (the collision case is handled in "rams")
1320 #
1321 # All wishbone requests generation is done here.
1322 # This machine operates at stage 1.
1323 def dcache_slow(self, r1, use_forward1_next, cache_valid_bits, r0,
1324 r0_valid, req_op, cache_tag, req_go, ra, wb_in):
1325
1326 comb = m.d.comb
1327 sync = m.d.sync
1328
1329 # variable stbs_done : boolean;
1330 # variable req : mem_access_request_t;
1331 # variable acks : unsigned(2 downto 0);
1332 stbs_done = Signal()
1333 req = MemAccessRequest()
1334 acks = Signal(3)
1335
1336 comb += stbs_done
1337 comb += req
1338 comb += acks
1339
1340 # begin
1341 # if rising_edge(clk) then
1342 # r1.use_forward1 <= use_forward1_next;
1343 # r1.forward_sel <= (others => '0');
1344 sync += r1.use_forward1.eq(use_forward1_next)
1345 sync += r1.forward_sel.eq(0)
1346
1347 # if use_forward1_next = '1' then
1348 with m.If(use_forward1_next):
1349 # r1.forward_sel <= r1.req.byte_sel;
1350 sync += r1.forward_sel.eq(r1.req.byte_sel)
1351
1352 # elsif use_forward2_next = '1' then
1353 with m.Elif(use_forward2_next):
1354 # r1.forward_sel <= r1.forward_sel1;
1355 sync += r1.forward_sel.eq(r1.forward_sel1)
1356 # end if;
1357
1358 # r1.forward_data2 <= r1.forward_data1;
1359 sync += r1.forward_data2.eq(r1.forward_data1)
1360
1361 # if r1.write_bram = '1' then
1362 with m.If(r1.write_bram):
1363 # r1.forward_data1 <= r1.req.data;
1364 # r1.forward_sel1 <= r1.req.byte_sel;
1365 # r1.forward_way1 <= r1.req.hit_way;
1366 # r1.forward_row1 <= get_row(r1.req.real_addr);
1367 # r1.forward_valid1 <= '1';
1368 sync += r1.forward_data1.eq(r1.req.data)
1369 sync += r1.forward_sel1.eq(r1.req.byte_sel)
1370 sync += r1.forward_way1.eq(r1.req.hit_way)
1371 sync += r1.forward_row1.eq(get_row(r1.req.real_addr))
1372 sync += r1.forward_valid1.eq(1)
1373 # else
1374 with m.Else():
1375
1376 # if r1.dcbz = '1' then
1377 with m.If(r1.bcbz):
1378 # r1.forward_data1 <= (others => '0');
1379 sync += r1.forward_data1.eq(0)
1380
1381 # else
1382 with m.Else():
1383 # r1.forward_data1 <= wishbone_in.dat;
1384 sync += r1.forward_data1.eq(wb_in.dat)
1385 # end if;
1386
1387 # r1.forward_sel1 <= (others => '1');
1388 # r1.forward_way1 <= replace_way;
1389 # r1.forward_row1 <= r1.store_row;
1390 # r1.forward_valid1 <= '0';
1391 sync += r1.forward_sel1.eq(1)
1392 sync += r1.forward_way1.eq(replace_way)
1393 sync += r1.forward_row1.eq(r1.store_row)
1394 sync += r1.forward_valid1.eq(0)
1395 # end if;
1396
1397 # -- On reset, clear all valid bits to force misses
1398 # if rst = '1' then
1399 # On reset, clear all valid bits to force misses
1400 # TODO figure out how reset signal works in nmigeni
1401 with m.If("""TODO RST???"""):
1402 # for i in index_t loop
1403 for i in range(NUM_LINES):
1404 # cache_valids(i) <= (others => '0');
1405 sync += cache_valid_bits[i].eq(0)
1406 # end loop;
1407
1408 # r1.state <= IDLE;
1409 # r1.full <= '0';
1410 # r1.slow_valid <= '0';
1411 # r1.wb.cyc <= '0';
1412 # r1.wb.stb <= '0';
1413 # r1.ls_valid <= '0';
1414 # r1.mmu_done <= '0';
1415 sync += r1.state.eq(State.IDLE)
1416 sync += r1.full.eq(0)
1417 sync += r1.slow_valid.eq(0)
1418 sync += r1.wb.cyc.eq(0)
1419 sync += r1.wb.stb.eq(0)
1420 sync += r1.ls_valid.eq(0)
1421 sync += r1.mmu_done.eq(0)
1422
1423 # -- Not useful normally but helps avoiding
1424 # -- tons of sim warnings
1425 # Not useful normally but helps avoiding
1426 # tons of sim warnings
1427 # r1.wb.adr <= (others => '0');
1428 sync += r1.wb.adr.eq(0)
1429 # else
1430 with m.Else():
1431 # -- One cycle pulses reset
1432 # r1.slow_valid <= '0';
1433 # r1.write_bram <= '0';
1434 # r1.inc_acks <= '0';
1435 # r1.dec_acks <= '0';
1436 #
1437 # r1.ls_valid <= '0';
1438 # -- complete tlbies and TLB loads in the third cycle
1439 # r1.mmu_done <= r0_valid and (r0.tlbie or r0.tlbld);
1440 # One cycle pulses reset
1441 sync += r1.slow_valid.eq(0)
1442 sync += r1.write_bram.eq(0)
1443 sync += r1.inc_acks.eq(0)
1444 sync += r1.dec_acks.eq(0)
1445
1446 sync += r1.ls_valid.eq(0)
1447 # complete tlbies and TLB loads in the third cycle
1448 sync += r1.mmu_done.eq(r0_valid & (r0.tlbie | r0.tlbld))
1449
1450 # if req_op = OP_LOAD_HIT or req_op = OP_STCX_FAIL then
1451 with m.If(req_op == Op.OP_LOAD_HIT
1452 | req_op == Op.OP_STCX_FAIL):
1453 # if r0.mmu_req = '0' then
1454 with m.If(~r0.mmu_req):
1455 # r1.ls_valid <= '1';
1456 sync += r1.ls_valid.eq(1)
1457 # else
1458 with m.Else():
1459 # r1.mmu_done <= '1';
1460 sync += r1.mmu_done.eq(1)
1461 # end if;
1462 # end if;
1463
1464 # if r1.write_tag = '1' then
1465 with m.If(r1.write_tag):
1466 # -- Store new tag in selected way
1467 # for i in 0 to NUM_WAYS-1 loop
1468 # Store new tag in selected way
1469 for i in range(NUM_WAYS):
1470 # if i = replace_way then
1471 with m.If(i == replace_way):
1472 # cache_tags(r1.store_index)(
1473 # (i + 1) * TAG_WIDTH - 1
1474 # downto i * TAG_WIDTH
1475 # ) <=
1476 # (TAG_WIDTH - 1 downto TAG_BITS => '0')
1477 # & r1.reload_tag;
1478 sync += cache_tag[
1479 r1.store_index
1480 ][i * TAG_WIDTH:(i +1) * TAG_WIDTH].eq(
1481 Const(TAG_WIDTH, TAG_WIDTH)
1482 & r1.reload_tag
1483 )
1484 # end if;
1485 # end loop;
1486 # r1.store_way <= replace_way;
1487 # r1.write_tag <= '0';
1488 sync += r1.store_way.eq(replace_way)
1489 sync += r1.write_tag.eq(0)
1490 # end if;
1491
1492 # -- Take request from r1.req if there is one there,
1493 # -- else from req_op, ra, etc.
1494 # if r1.full = '1' then
1495 # Take request from r1.req if there is one there,
1496 # else from req_op, ra, etc.
1497 with m.If(r1.full)
1498 # req := r1.req;
1499 sync += req.eq(r1.req)
1500
1501 # else
1502 with m.Else():
1503 # req.op := req_op;
1504 # req.valid := req_go;
1505 # req.mmu_req := r0.mmu_req;
1506 # req.dcbz := r0.req.dcbz;
1507 # req.real_addr := ra;
1508 sync += req.op.eq(req_op)
1509 sync += req.valid.eq(req_go)
1510 sync += req.mmu_req.eq(r0.mmu_req)
1511 sync += req.dcbz.eq(r0.req.dcbz)
1512 sync += req.real_addr.eq(ra)
1513
1514 # -- Force data to 0 for dcbz
1515 # if r0.req.dcbz = '0' then
1516 with m.If(~r0.req.dcbz):
1517 # req.data := r0.req.data;
1518 sync += req.data.eq(r0.req.data)
1519
1520 # else
1521 with m.Else():
1522 # req.data := (others => '0');
1523 sync += req.data.eq(0)
1524 # end if;
1525
1526 # -- Select all bytes for dcbz
1527 # -- and for cacheable loads
1528 # if r0.req.dcbz = '1'
1529 # or (r0.req.load = '1' and r0.req.nc = '0') then
1530 # Select all bytes for dcbz
1531 # and for cacheable loads
1532 with m.If(r0.req.dcbz | (r0.req.load & ~r0.req.nc):
1533 # req.byte_sel := (others => '1');
1534 sync += req.byte_sel.eq(1)
1535
1536 # else
1537 with m.Else():
1538 # req.byte_sel := r0.req.byte_sel;
1539 sync += req.byte_sel.eq(r0.req.byte_sel)
1540 # end if;
1541
1542 # req.hit_way := req_hit_way;
1543 # req.same_tag := req_same_tag;
1544 sync += req.hit_way.eq(req_hit_way)
1545 sync += req.same_tag.eq(req_same_tag)
1546
1547 # -- Store the incoming request from r0,
1548 # -- if it is a slow request
1549 # -- Note that r1.full = 1 implies req_op = OP_NONE
1550 # if req_op = OP_LOAD_MISS or req_op = OP_LOAD_NC
1551 # or req_op = OP_STORE_MISS
1552 # or req_op = OP_STORE_HIT then
1553 # Store the incoming request from r0,
1554 # if it is a slow request
1555 # Note that r1.full = 1 implies req_op = OP_NONE
1556 with m.If(req_op == Op.OP_LOAD_MISS
1557 | req_op == Op.OP_LOAD_NC
1558 | req_op == Op.OP_STORE_MISS
1559 | req_op == Op.OP_STORE_HIT):
1560 # r1.req <= req;
1561 # r1.full <= '1';
1562 sync += r1.req(req)
1563 sync += r1.full.eq(1)
1564 # end if;
1565 # end if;
1566 #
1567 # -- Main state machine
1568 # case r1.state is
1569 # Main state machine
1570 with m.Switch(r1.state):
1571
1572 # when IDLE =>
1573 with m.Case(State.IDLE)
1574 # r1.wb.adr <= req.real_addr(
1575 # r1.wb.adr'left downto 0
1576 # );
1577 # r1.wb.sel <= req.byte_sel;
1578 # r1.wb.dat <= req.data;
1579 # r1.dcbz <= req.dcbz;
1580 #
1581 # -- Keep track of our index and way
1582 # -- for subsequent stores.
1583 # r1.store_index <= get_index(req.real_addr);
1584 # r1.store_row <= get_row(req.real_addr);
1585 # r1.end_row_ix <=
1586 # get_row_of_line(get_row(req.real_addr)) - 1;
1587 # r1.reload_tag <= get_tag(req.real_addr);
1588 # r1.req.same_tag <= '1';
1589 sync += r1.wb.adr.eq(req.real_addr[0:r1.wb.adr])
1590 sync += r1.wb.sel.eq(req.byte_sel)
1591 sync += r1.wb.dat.eq(req.data)
1592 sync += r1.dcbz.eq(req.dcbz)
1593
1594 # Keep track of our index and way
1595 # for subsequent stores.
1596 sync += r1.store_index.eq(get_index(req.real_addr))
1597 sync += r1.store_row.eq(get_row(req.real_addr))
1598 sync += r1.end_row_ix.eq(
1599 get_row_of_line(get_row(req.real_addr))
1600 )
1601 sync += r1.reload_tag.eq(get_tag(req.real_addr))
1602 sync += r1.req.same_tag.eq(1)
1603
1604 # if req.op = OP_STORE_HIT theni
1605 with m.If(req.op == Op.OP_STORE_HIT):
1606 # r1.store_way <= req.hit_way;
1607 sync += r1.store_way.eq(req.hit_way)
1608 # end if;
1609
1610 # -- Reset per-row valid bits,
1611 # -- ready for handling OP_LOAD_MISS
1612 # for i in 0 to ROW_PER_LINE - 1 loop
1613 # Reset per-row valid bits,
1614 # ready for handling OP_LOAD_MISS
1615 for i in range(ROW_PER_LINE):
1616 # r1.rows_valid(i) <= '0';
1617 sync += r1.rows_valid[i].eq(0)
1618 # end loop;
1619
1620 # case req.op is
1621 with m.Switch(req.op):
1622 # when OP_LOAD_HIT =>
1623 with m.Case(Op.OP_LOAD_HIT):
1624 # -- stay in IDLE state
1625 # stay in IDLE state
1626 pass
1627
1628 # when OP_LOAD_MISS =>
1629 with m.Case(Op.OP_LOAD_MISS):
1630 # -- Normal load cache miss,
1631 # -- start the reload machine
1632 # report "cache miss real addr:" &
1633 # to_hstring(req.real_addr) & " idx:" &
1634 # integer'image(get_index(req.real_addr)) &
1635 # " tag:" & to_hstring(get_tag(req.real_addr));
1636 # Normal load cache miss,
1637 # start the reload machine
1638 print(f"cache miss real addr:" \
1639 f"{req_real_addr}" \
1640 f" idx:{get_index(req_real_addr)}" \
1641 f" tag:{get_tag(req.real_addr)}")
1642
1643 # -- Start the wishbone cycle
1644 # r1.wb.we <= '0';
1645 # r1.wb.cyc <= '1';
1646 # r1.wb.stb <= '1';
1647 # Start the wishbone cycle
1648 sync += r1.wb.we.eq(0)
1649 sync += r1.wb.cyc.eq(1)
1650 sync += r1.wb.stb.eq(1)
1651
1652 # -- Track that we had one request sent
1653 # r1.state <= RELOAD_WAIT_ACK;
1654 # r1.write_tag <= '1';
1655 # Track that we had one request sent
1656 sync += r1.state.eq(State.RELOAD_WAIT_ACK)
1657 sync += r1.write_tag.eq(1)
1658
1659 # when OP_LOAD_NC =>
1660 with m.Case(Op.OP_LOAD_NC):
1661 # r1.wb.cyc <= '1';
1662 # r1.wb.stb <= '1';
1663 # r1.wb.we <= '0';
1664 # r1.state <= NC_LOAD_WAIT_ACK;
1665 sync += r1.wb.cyc.eq(1)
1666 sync += r1.wb.stb.eq(1)
1667 sync += r1.wb.we.eq(0)
1668 sync += r1.state.eq(State.NC_LOAD_WAIT_ACK)
1669
1670 # when OP_STORE_HIT | OP_STORE_MISS =>
1671 with m.Case(Op.OP_STORE_HIT
1672 | Op.OP_STORE_MISS):
1673 # if req.dcbz = '0' then
1674 with m.If(~req.bcbz):
1675 # r1.state <= STORE_WAIT_ACK;
1676 # r1.acks_pending <= to_unsigned(1, 3);
1677 # r1.full <= '0';
1678 # r1.slow_valid <= '1';
1679 sync += r1.state.eq(
1680 State.STORE_WAIT_ACK
1681 )
1682 sync += r1.acks_pending.eq(
1683 '''TODO to_unsignes(1,3)'''
1684 )
1685 sync += r1.full.eq(0)
1686 sync += r1.slow_valid.eq(1)
1687
1688 # if req.mmu_req = '0' then
1689 with m.If(~req.mmu_req):
1690 # r1.ls_valid <= '1';
1691 sync += r1.ls_valid.eq(1)
1692 # else
1693 with m.Else():
1694 # r1.mmu_done <= '1';
1695 sync += r1.mmu_done.eq(1)
1696 # end if;
1697
1698 # if req.op = OP_STORE_HIT then
1699 with m.If(req.op == Op.OP_STORE_HIT):
1700 # r1.write_bram <= '1';
1701 sync += r1.write_bram.eq(1)
1702 # end if;
1703
1704 # else
1705 with m.Else():
1706 # -- dcbz is handled much like a load
1707 # -- miss except that we are writing
1708 # -- to memory instead of reading
1709 # r1.state <= RELOAD_WAIT_ACK;
1710 # dcbz is handled much like a load
1711 # miss except that we are writing
1712 # to memory instead of reading
1713 sync += r1.state.eq(Op.RELOAD_WAIT_ACK)
1714
1715 # if req.op = OP_STORE_MISS then
1716 with m.If(req.op == Op.OP_STORE_MISS):
1717 # r1.write_tag <= '1';
1718 sync += r1.write_tag.eq(1)
1719 # end if;
1720 # end if;
1721
1722 # r1.wb.we <= '1';
1723 # r1.wb.cyc <= '1';
1724 # r1.wb.stb <= '1';
1725 sync += r1.wb.we.eq(1)
1726 sync += r1.wb.cyc.eq(1)
1727 sync += r1.wb.stb.eq(1)
1728
1729 # -- OP_NONE and OP_BAD do nothing
1730 # -- OP_BAD & OP_STCX_FAIL were handled above already
1731 # when OP_NONE =>
1732 # when OP_BAD =>
1733 # when OP_STCX_FAIL =>
1734 # OP_NONE and OP_BAD do nothing
1735 # OP_BAD & OP_STCX_FAIL were
1736 # handled above already
1737 with m.Case(Op.OP_NONE):
1738 pass
1739
1740 with m.Case(OP_BAD):
1741 pass
1742
1743 with m.Case(OP_STCX_FAIL):
1744 pass
1745 # end case;
1746
1747 # when RELOAD_WAIT_ACK =>
1748 with m.Case(State.RELOAD_WAIT_ACK):
1749 # -- Requests are all sent if stb is 0
1750 # Requests are all sent if stb is 0
1751 sync += stbs_done.eq(~r1.wb.stb)
1752 # stbs_done := r1.wb.stb = '0';
1753
1754 # -- If we are still sending requests,
1755 # -- was one accepted?
1756 # if wishbone_in.stall = '0' and not stbs_done then
1757 # If we are still sending requests,
1758 # was one accepted?
1759 with m.If(~wb_in.stall & ~stbs_done):
1760 # -- That was the last word ? We are done sending.
1761 # -- Clear stb and set stbs_done so we can handle
1762 # -- an eventual last ack on the same cycle.
1763 # if is_last_row_addr(
1764 # r1.wb.adr, r1.end_row_ix
1765 # ) then
1766 # That was the last word?
1767 # We are done sending.
1768 # Clear stb and set stbs_done
1769 # so we can handle an eventual
1770 # last ack on the same cycle.
1771 with m.If(is_last_row_addr(
1772 r1.wb.adr, r1.end_row_ix)):
1773 # r1.wb.stb <= '0';
1774 # stbs_done := true;
1775 sync += r1.wb.stb.eq(0)
1776 sync += stbs_done.eq(0)
1777 # end if;
1778
1779 # -- Calculate the next row address
1780 # r1.wb.adr <= next_row_addr(r1.wb.adr);
1781 # Calculate the next row address
1782 sync += r1.wb.adr.eq(next_row_addr(r1.wb.adr))
1783 # end if;
1784
1785 # -- Incoming acks processing
1786 # r1.forward_valid1 <= wishbone_in.ack;
1787 # Incoming acks processing
1788 sync += r1.forward_valid1.eq(wb_in.ack)
1789
1790 # if wishbone_in.ack = '1' then
1791 with m.If(wb_in.ack):
1792 # r1.rows_valid(
1793 # r1.store_row mod ROW_PER_LINE
1794 # ) <= '1';
1795 sync += r1.rows_valid[
1796 r1.store_row % ROW_PER_LINE
1797 ].eq(1)
1798
1799 # -- If this is the data we were looking for,
1800 # -- we can complete the request next cycle.
1801 # -- Compare the whole address in case the
1802 # -- request in r1.req is not the one that
1803 # -- started this refill.
1804 # if r1.full = '1' and r1.req.same_tag = '1'
1805 # and ((r1.dcbz = '1' and r1.req.dcbz = '1')
1806 # or (r1.dcbz = '0' and r1.req.op = OP_LOAD_MISS))
1807 # and r1.store_row = get_row(r1.req.real_addr) then
1808 # If this is the data we were looking for,
1809 # we can complete the request next cycle.
1810 # Compare the whole address in case the
1811 # request in r1.req is not the one that
1812 # started this refill.
1813 with m.If(r1.full & r1.req.same_tag &
1814 ((r1.dcbz & r1.req.dcbz)
1815 (~r1.dcbz &
1816 r1.req.op == Op.OP_LOAD_MISS)
1817 ) &
1818 r1.store_row
1819 == get_row(r1.req.real_addr):
1820 # r1.full <= '0';
1821 # r1.slow_valid <= '1';
1822 sync += r1.full.eq(0)
1823 sync += r1.slow_valid.eq(1)
1824
1825 # if r1.mmu_req = '0' then
1826 with m.If(~r1.mmu_req):
1827 # r1.ls_valid <= '1';
1828 sync += r1.ls_valid.eq(1)
1829 # else
1830 with m.Else():
1831 # r1.mmu_done <= '1';
1832 sync += r1.mmu_done.eq(1)
1833 # end if;
1834 # r1.forward_sel <= (others => '1');
1835 # r1.use_forward1 <= '1';
1836 sync += r1.forward_sel.eq(1)
1837 sync += r1.use_forward1.eq(1)
1838 # end if;
1839
1840 # -- Check for completion
1841 # if stbs_done and is_last_row(r1.store_row,
1842 # r1.end_row_ix) then
1843 # Check for completion
1844 with m.If(stbs_done &
1845 is_last_row(r1.store_row,
1846 r1.end_row_ix)):
1847
1848 # -- Complete wishbone cycle
1849 # r1.wb.cyc <= '0';
1850 # Complete wishbone cycle
1851 sync += r1.wb.cyc.eq(0)
1852
1853 # -- Cache line is now valid
1854 # cache_valids(r1.store_index)(
1855 # r1.store_way
1856 # ) <= '1';
1857 # Cache line is now valid
1858 sync += cache_valid_bits[
1859 r1.store_index
1860 ][r1.store_way].eq(1)
1861
1862 # r1.state <= IDLE;
1863 sync += r1.state.eq(State.IDLE)
1864 # end if;
1865
1866 # -- Increment store row counter
1867 # r1.store_row <= next_row(r1.store_row);
1868 # Increment store row counter
1869 sync += r1.store_row.eq(next_row(
1870 r1.store_row
1871 ))
1872 # end if;
1873
1874 # when STORE_WAIT_ACK =>
1875 with m.Case(State.STORE_WAIT_ACK):
1876 # stbs_done := r1.wb.stb = '0';
1877 # acks := r1.acks_pending;
1878 sync += stbs_done.eq(~r1.wb.stb)
1879 sync += acks.eq(r1.acks_pending)
1880
1881 # if r1.inc_acks /= r1.dec_acks then
1882 with m.If(r1.inc_acks != r1.dec_acks):
1883
1884 # if r1.inc_acks = '1' then
1885 with m.If(r1.inc_acks):
1886 # acks := acks + 1;
1887 sync += acks.eq(acks + 1)
1888
1889 # else
1890 with m.Else():
1891 # acks := acks - 1;
1892 sync += acks.eq(acks - 1)
1893 # end if;
1894 # end if;
1895
1896 # r1.acks_pending <= acks;
1897 sync += r1.acks_pending.eq(acks)
1898
1899 # -- Clear stb when slave accepted request
1900 # if wishbone_in.stall = '0' then
1901 # Clear stb when slave accepted request
1902 with m.If(~wb_in.stall):
1903 # -- See if there is another store waiting
1904 # -- to be done which is in the same real page.
1905 # if req.valid = '1' then
1906 # See if there is another store waiting
1907 # to be done which is in the same real page.
1908 with m.If(req.valid):
1909 # r1.wb.adr(
1910 # SET_SIZE_BITS - 1 downto 0
1911 # ) <= req.real_addr(
1912 # SET_SIZE_BITS - 1 downto 0
1913 # );
1914 # r1.wb.dat <= req.data;
1915 # r1.wb.sel <= req.byte_sel;
1916 sync += r1.wb.adr[0:SET_SIZE_BITS].eq(
1917 req.real_addr[0:SET_SIZE_BITS]
1918 )
1919 # end if;
1920
1921 # if acks < 7 and req.same_tag = '1'
1922 # and (req.op = OP_STORE_MISS
1923 # or req.op = OP_STORE_HIT) then
1924 with m.Elif(acks < 7 & req.same_tag &
1925 (req.op == Op.Op_STORE_MISS
1926 | req.op == Op.OP_SOTRE_HIT)):
1927 # r1.wb.stb <= '1';
1928 # stbs_done := false;
1929 sync += r1.wb.stb.eq(1)
1930 sync += stbs_done.eq(0)
1931
1932 # if req.op = OP_STORE_HIT then
1933 with m.If(req.op == Op.OP_STORE_HIT):
1934 # r1.write_bram <= '1';
1935 sync += r1.write_bram.eq(1)
1936 # end if;
1937 # r1.full <= '0';
1938 # r1.slow_valid <= '1';
1939 sync += r1.full.eq(0)
1940 sync += r1.slow_valid.eq(1)
1941
1942 # -- Store requests never come from the MMU
1943 # r1.ls_valid <= '1';
1944 # stbs_done := false;
1945 # r1.inc_acks <= '1';
1946 # Store request never come from the MMU
1947 sync += r1.ls_valid.eq(1)
1948 sync += stbs_done.eq(0)
1949 sync += r1.inc_acks.eq(1)
1950 # else
1951 with m.Else():
1952 # r1.wb.stb <= '0';
1953 # stbs_done := true;
1954 sync += r1.wb.stb.eq(0)
1955 sync += stbs_done.eq(1)
1956 # end if;
1957 # end if;
1958
1959 # -- Got ack ? See if complete.
1960 # if wishbone_in.ack = '1' then
1961 # Got ack ? See if complete.
1962 with m.If(wb_in.ack):
1963 # if stbs_done and acks = 1 then
1964 with m.If(stbs_done & acks)
1965 # r1.state <= IDLE;
1966 # r1.wb.cyc <= '0';
1967 # r1.wb.stb <= '0';
1968 sync += r1.state.eq(State.IDLE)
1969 sync += r1.wb.cyc.eq(0)
1970 sync += r1.wb.stb.eq(0)
1971 # end if;
1972 # r1.dec_acks <= '1';
1973 sync += r1.dec_acks.eq(1)
1974 # end if;
1975
1976 # when NC_LOAD_WAIT_ACK =>
1977 with m.Case(State.NC_LOAD_WAIT_ACK):
1978 # -- Clear stb when slave accepted request
1979 # if wishbone_in.stall = '0' then
1980 # Clear stb when slave accepted request
1981 with m.If(~wb_in.stall):
1982 # r1.wb.stb <= '0';
1983 sync += r1.wb.stb.eq(0)
1984 # end if;
1985
1986 # -- Got ack ? complete.
1987 # if wishbone_in.ack = '1' then
1988 # Got ack ? complete.
1989 with m.If(wb_in.ack):
1990 # r1.state <= IDLE;
1991 # r1.full <= '0';
1992 # r1.slow_valid <= '1';
1993 sync += r1.state.eq(State.IDLE)
1994 sync += r1.full.eq(0)
1995 sync += r1.slow_valid.eq(1)
1996
1997 # if r1.mmu_req = '0' then
1998 with m.If(~r1.mmu_req):
1999 # r1.ls_valid <= '1';
2000 sync += r1.ls_valid.eq(1)
2001
2002 # else
2003 with m.Else():
2004 # r1.mmu_done <= '1';
2005 sync += r1.mmu_done.eq(1)
2006 # end if;
2007
2008 # r1.forward_sel <= (others => '1');
2009 # r1.use_forward1 <= '1';
2010 # r1.wb.cyc <= '0';
2011 # r1.wb.stb <= '0';
2012 sync += r1.forward_sel.eq(1)
2013 sync += r1.use_forward1.eq(1)
2014 sync += r1.wb.cyc.eq(0)
2015 sync += r1.wb.stb.eq(0)
2016 # end if;
2017 # end case;
2018 # end if;
2019 # end if;
2020 # end process;
2021
2022 # dc_log: if LOG_LENGTH > 0 generate
2023 # TODO learn how to tranlate vhdl generate into nmigen
2024 def dcache_log(self, r1, valid_ra, tlb_hit_way, stall_out,
2025 d_out, wb_in, log_out):
2026
2027 comb = m.d.comb
2028 sync = m.d.sync
2029
2030 # signal log_data : std_ulogic_vector(19 downto 0);
2031 log_data = Signal(20)
2032
2033 comb += log_data
2034
2035 # begin
2036 # dcache_log: process(clk)
2037 # begin
2038 # if rising_edge(clk) then
2039 # log_data <= r1.wb.adr(5 downto 3) &
2040 # wishbone_in.stall &
2041 # wishbone_in.ack &
2042 # r1.wb.stb & r1.wb.cyc &
2043 # d_out.error &
2044 # d_out.valid &
2045 # std_ulogic_vector(
2046 # to_unsigned(op_t'pos(req_op), 3)) &
2047 # stall_out &
2048 # std_ulogic_vector(
2049 # to_unsigned(tlb_hit_way, 3)) &
2050 # valid_ra &
2051 # std_ulogic_vector(
2052 # to_unsigned(state_t'pos(r1.state), 3));
2053 sync += log_data.eq(Cat(
2054 Const(r1.state, 3), valid_ra, Const(tlb_hit_way, 3),
2055 stall_out, Const(req_op, 3), d_out.valid, d_out.error,
2056 r1.wb.cyc, r1.wb.stb, wb_in.ack, wb_in.stall,
2057 r1.wb.adr[3:6]
2058 ))
2059 # end if;
2060 # end process;
2061 # log_out <= log_data;
2062 # TODO ??? I am very confused need help
2063 comb += log_out.eq(log_data)
2064 # end generate;
2065 # end;
2066
2067 def elaborate(self, platform):
2068 LINE_SIZE = self.LINE_SIZE
2069 NUM_LINES = self.NUM_LINES
2070 NUM_WAYS = self.NUM_WAYS
2071 TLB_SET_SIZE = self.TLB_SET_SIZE
2072 TLB_NUM_WAYS = self.TLB_NUM_WAYS
2073 TLB_LG_PGSZ = self.TLB_LG_PGSZ
2074 LOG_LENGTH = self.LOG_LENGTH
2075
2076 # BRAM organisation: We never access more than
2077 # -- wishbone_data_bits at a time so to save
2078 # -- resources we make the array only that wide, and
2079 # -- use consecutive indices for to make a cache "line"
2080 # --
2081 # -- ROW_SIZE is the width in bytes of the BRAM
2082 # -- (based on WB, so 64-bits)
2083 ROW_SIZE = WB_DATA_BITS / 8;
2084
2085 # ROW_PER_LINE is the number of row (wishbone
2086 # transactions) in a line
2087 ROW_PER_LINE = LINE_SIZE // ROW_SIZE
2088
2089 # BRAM_ROWS is the number of rows in BRAM needed
2090 # to represent the full dcache
2091 BRAM_ROWS = NUM_LINES * ROW_PER_LINE
2092
2093
2094 # Bit fields counts in the address
2095
2096 # REAL_ADDR_BITS is the number of real address
2097 # bits that we store
2098 REAL_ADDR_BITS = 56
2099
2100 # ROW_BITS is the number of bits to select a row
2101 ROW_BITS = log2_int(BRAM_ROWS)
2102
2103 # ROW_LINE_BITS is the number of bits to select
2104 # a row within a line
2105 ROW_LINE_BITS = log2_int(ROW_PER_LINE)
2106
2107 # LINE_OFF_BITS is the number of bits for
2108 # the offset in a cache line
2109 LINE_OFF_BITS = log2_int(LINE_SIZE)
2110
2111 # ROW_OFF_BITS is the number of bits for
2112 # the offset in a row
2113 ROW_OFF_BITS = log2_int(ROW_SIZE)
2114
2115 # INDEX_BITS is the number if bits to
2116 # select a cache line
2117 INDEX_BITS = log2_int(NUM_LINES)
2118
2119 # SET_SIZE_BITS is the log base 2 of the set size
2120 SET_SIZE_BITS = LINE_OFF_BITS + INDEX_BITS
2121
2122 # TAG_BITS is the number of bits of
2123 # the tag part of the address
2124 TAG_BITS = REAL_ADDR_BITS - SET_SIZE_BITS
2125
2126 # TAG_WIDTH is the width in bits of each way of the tag RAM
2127 TAG_WIDTH = TAG_BITS + 7 - ((TAG_BITS + 7) % 8)
2128
2129 # WAY_BITS is the number of bits to select a way
2130 WAY_BITS = log2_int(NUM_WAYS)
2131
2132 # Example of layout for 32 lines of 64 bytes:
2133 #
2134 # .. tag |index| line |
2135 # .. | row | |
2136 # .. | |---| | ROW_LINE_BITS (3)
2137 # .. | |--- - --| LINE_OFF_BITS (6)
2138 # .. | |- --| ROW_OFF_BITS (3)
2139 # .. |----- ---| | ROW_BITS (8)
2140 # .. |-----| | INDEX_BITS (5)
2141 # .. --------| | TAG_BITS (45)
2142
2143 TAG_RAM_WIDTH = TAG_WIDTH * NUM_WAYS
2144
2145 def CacheTagArray():
2146 return Array(CacheTagSet() for x in range(NUM_LINES))
2147
2148 def CacheValidBitsArray():
2149 return Array(CacheWayValidBits() for x in range(NUM_LINES))
2150
2151 def RowPerLineValidArray():
2152 return Array(Signal() for x in range(ROW_PER_LINE))
2153
2154 # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
2155 cache_tags = CacheTagArray()
2156 cache_tag_set = Signal(TAG_RAM_WIDTH)
2157 cache_valid_bits = CacheValidBitsArray()
2158
2159 # TODO attribute ram_style : string;
2160 # TODO attribute ram_style of cache_tags : signal is "distributed";
2161
2162 # L1 TLB
2163 TLB_SET_BITS = log2_int(TLB_SET_SIZE)
2164 TLB_WAY_BITS = log2_int(TLB_NUM_WAYS)
2165 TLB_EA_TAG_BITS = 64 - (TLB_LG_PGSZ + TLB_SET_BITS)
2166 TLB_TAG_WAY_BITS = TLB_NUM_WAYS * TLB_EA_TAG_BITS
2167 TLB_PTE_BITS = 64
2168 TLB_PTE_WAY_BITS = TLB_NUM_WAYS * TLB_PTE_BITS;
2169
2170 def TLBValidBitsArray():
2171 return Array(
2172 Signal(TLB_NUM_WAYS) for x in range(TLB_SET_SIZE)
2173 )
2174
2175 def TLBTagsArray():
2176 return Array(
2177 Signal(TLB_TAG_WAY_BITS) for x in range (TLB_SET_SIZE)
2178 )
2179
2180 def TLBPtesArray():
2181 return Array(
2182 Signal(TLB_PTE_WAY_BITS) for x in range(TLB_SET_SIZE)
2183 )
2184
2185 def HitWaySet():
2186 return Array(Signal(NUM_WAYS) for x in range(TLB_NUM_WAYS))
2187
2188 """note: these are passed to nmigen.hdl.Memory as "attributes".
2189 don't know how, just that they are.
2190 """
2191 dtlb_valid_bits = TLBValidBitsArray()
2192 dtlb_tags = TLBTagsArray()
2193 dtlb_ptes = TLBPtesArray()
2194 # TODO attribute ram_style of
2195 # dtlb_tags : signal is "distributed";
2196 # TODO attribute ram_style of
2197 # dtlb_ptes : signal is "distributed";
2198
2199 r0 = RegStage0()
2200 r0_full = Signal()
2201
2202 r1 = RegStage1()
2203
2204 reservation = Reservation()
2205
2206 # Async signals on incoming request
2207 req_index = Signal(NUM_LINES)
2208 req_row = Signal(BRAM_ROWS)
2209 req_hit_way = Signal(WAY_BITS)
2210 req_tag = Signal(TAG_BITS)
2211 req_op = Op()
2212 req_data = Signal(64)
2213 req_same_tag = Signal()
2214 req_go = Signal()
2215
2216 early_req_row = Signal(BRAM_ROWS)
2217
2218 cancel_store = Signal()
2219 set_rsrv = Signal()
2220 clear_rsrv = Signal()
2221
2222 r0_valid = Signal()
2223 r0_stall = Signal()
2224
2225 use_forward1_next = Signal()
2226 use_forward2_next = Signal()
2227
2228 # Cache RAM interface
2229 def CacheRamOut():
2230 return Array(Signal(WB_DATA_BITS) for x in range(NUM_WAYS))
2231
2232 cache_out = CacheRamOut()
2233
2234 # PLRU output interface
2235 def PLRUOut():
2236 return Array(Signal(WAY_BITS) for x in range(Index()))
2237
2238 plru_victim = PLRUOut()
2239 replace_way = Signal(WAY_BITS)
2240
2241 # Wishbone read/write/cache write formatting signals
2242 bus_sel = Signal(8)
2243
2244 # TLB signals
2245 tlb_tag_way = Signal(TLB_TAG_WAY_BITS)
2246 tlb_pte_way = Signal(TLB_PTE_WAY_BITS)
2247 tlb_valid_way = Signal(TLB_NUM_WAYS)
2248 tlb_req_index = Signal(TLB_SET_SIZE)
2249 tlb_hit = Signal()
2250 tlb_hit_way = Signal(TLB_NUM_WAYS)
2251 pte = Signal(TLB_PTE_BITS)
2252 ra = Signal(REAL_ADDR_BITS)
2253 valid_ra = Signal()
2254 perm_attr = PermAttr()
2255 rc_ok = Signal()
2256 perm_ok = Signal()
2257 access_ok = Signal()
2258
2259 # TLB PLRU output interface
2260 def TLBPLRUOut():
2261 return Array(
2262 Signal(TLB_WAY_BITS) for x in range(TLB_SET_SIZE)
2263 )
2264
2265 tlb_plru_victim = TLBPLRUOut()
2266
2267 # Helper functions to decode incoming requests
2268 #
2269 # Return the cache line index (tag index) for an address
2270 def get_index(addr):
2271 return addr[LINE_OFF_BITS:SET_SIZE_BITS]
2272
2273 # Return the cache row index (data memory) for an address
2274 def get_row(addr):
2275 return addr[ROW_OFF_BITS:SET_SIZE_BITS]
2276
2277 # Return the index of a row within a line
2278 def get_row_of_line(row):
2279 row_v = Signal(ROW_BITS)
2280 row_v = Signal(row)
2281 return row_v[0:ROW_LINE_BITS]
2282
2283 # Returns whether this is the last row of a line
2284 def is_last_row_addr(addr, last):
2285 return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
2286
2287 # Returns whether this is the last row of a line
2288 def is_last_row(row, last):
2289 return get_row_of_line(row) == last
2290
2291 # Return the address of the next row in the current cache line
2292 def next_row_addr(addr):
2293 row_idx = Signal(ROW_LINE_BITS)
2294 result = WBAddrType()
2295 # Is there no simpler way in VHDL to
2296 # generate that 3 bits adder ?
2297 row_idx = addr[ROW_OFF_BITS:LINE_OFF_BITS]
2298 row_idx = Signal(row_idx + 1)
2299 result = addr
2300 result[ROW_OFF_BITS:LINE_OFF_BITS] = row_idx
2301 return result
2302
2303 # Return the next row in the current cache line. We use a
2304 # dedicated function in order to limit the size of the
2305 # generated adder to be only the bits within a cache line
2306 # (3 bits with default settings)
2307 def next_row(row)
2308 row_v = Signal(ROW_BITS)
2309 row_idx = Signal(ROW_LINE_BITS)
2310 result = Signal(ROW_BITS)
2311
2312 row_v = Signal(row)
2313 row_idx = row_v[ROW_LINE_BITS]
2314 row_v[0:ROW_LINE_BITS] = Signal(row_idx + 1)
2315 return row_v
2316
2317 # Get the tag value from the address
2318 def get_tag(addr):
2319 return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
2320
2321 # Read a tag from a tag memory row
2322 def read_tag(way, tagset):
2323 return tagset[way *TAG_WIDTH:way * TAG_WIDTH + TAG_BITS]
2324
2325 # Read a TLB tag from a TLB tag memory row
2326 def read_tlb_tag(way, tags):
2327 j = Signal()
2328
2329 j = way * TLB_EA_TAG_BITS
2330 return tags[j:j + TLB_EA_TAG_BITS]
2331
2332 # Write a TLB tag to a TLB tag memory row
2333 def write_tlb_tag(way, tags), tag):
2334 j = Signal()
2335
2336 j = way * TLB_EA_TAG_BITS
2337 tags[j:j + TLB_EA_TAG_BITS] = tag
2338
2339 # Read a PTE from a TLB PTE memory row
2340 def read_tlb_pte(way, ptes):
2341 j = Signal()
2342
2343 j = way * TLB_PTE_BITS
2344 return ptes[j:j + TLB_PTE_BITS]
2345
2346 def write_tlb_pte(way, ptes,newpte):
2347 j = Signal()
2348
2349 j = way * TLB_PTE_BITS
2350 return ptes[j:j + TLB_PTE_BITS] = newpte
2351
2352 assert (LINE_SIZE % ROW_SIZE) == 0 "LINE_SIZE not " \
2353 "multiple of ROW_SIZE"
2354
2355 assert (LINE_SIZE % 2) == 0 "LINE_SIZE not power of 2"
2356
2357 assert (NUM_LINES % 2) == 0 "NUM_LINES not power of 2"
2358
2359 assert (ROW_PER_LINE % 2) == 0 "ROW_PER_LINE not" \
2360 "power of 2"
2361
2362 assert ROW_BITS == (INDEX_BITS + ROW_LINE_BITS) \
2363 "geometry bits don't add up"
2364
2365 assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS) \
2366 "geometry bits don't add up"
2367
2368 assert REAL_ADDR_BITS == (TAG_BITS + INDEX_BITS \
2369 + LINE_OFF_BITS) "geometry bits don't add up"
2370
2371 assert REAL_ADDR_BITS == (TAG_BITS + ROW_BITS + ROW_OFF_BITS) \
2372 "geometry bits don't add up"
2373
2374 assert 64 == wishbone_data_bits "Can't yet handle a" \
2375 "wishbone width that isn't 64-bits"
2376
2377 assert SET_SIZE_BITS <= TLB_LG_PGSZ "Set indexed by" \
2378 "virtual address"
2379
2380 # we don't yet handle collisions between loadstore1 requests
2381 # and MMU requests
2382 comb += m_out.stall.eq(0)
2383
2384 # Hold off the request in r0 when r1 has an uncompleted request
2385 comb += r0_stall.eq(r0_full & r1.full)
2386 comb += r0_valid.eq(r0_full & ~r1.full)
2387 comb += stall_out.eq(r0_stall)
2388
2389 # Wire up wishbone request latch out of stage 1
2390 comb += wishbone_out.eq(r1.wb)
2391
2392
2393
2394 # dcache_tb.vhdl
2395 #
2396 # entity dcache_tb is
2397 # end dcache_tb;
2398 #
2399 # architecture behave of dcache_tb is
2400 # signal clk : std_ulogic;
2401 # signal rst : std_ulogic;
2402 #
2403 # signal d_in : Loadstore1ToDcacheType;
2404 # signal d_out : DcacheToLoadstore1Type;
2405 #
2406 # signal m_in : MmuToDcacheType;
2407 # signal m_out : DcacheToMmuType;
2408 #
2409 # signal wb_bram_in : wishbone_master_out;
2410 # signal wb_bram_out : wishbone_slave_out;
2411 #
2412 # constant clk_period : time := 10 ns;
2413 # begin
2414 # dcache0: entity work.dcache
2415 # generic map(
2416 #
2417 # LINE_SIZE => 64,
2418 # NUM_LINES => 4
2419 # )
2420 # port map(
2421 # clk => clk,
2422 # rst => rst,
2423 # d_in => d_in,
2424 # d_out => d_out,
2425 # m_in => m_in,
2426 # m_out => m_out,
2427 # wishbone_out => wb_bram_in,
2428 # wishbone_in => wb_bram_out
2429 # );
2430 #
2431 # -- BRAM Memory slave
2432 # bram0: entity work.wishbone_bram_wrapper
2433 # generic map(
2434 # MEMORY_SIZE => 1024,
2435 # RAM_INIT_FILE => "icache_test.bin"
2436 # )
2437 # port map(
2438 # clk => clk,
2439 # rst => rst,
2440 # wishbone_in => wb_bram_in,
2441 # wishbone_out => wb_bram_out
2442 # );
2443 #
2444 # clk_process: process
2445 # begin
2446 # clk <= '0';
2447 # wait for clk_period/2;
2448 # clk <= '1';
2449 # wait for clk_period/2;
2450 # end process;
2451 #
2452 # rst_process: process
2453 # begin
2454 # rst <= '1';
2455 # wait for 2*clk_period;
2456 # rst <= '0';
2457 # wait;
2458 # end process;
2459 #
2460 # stim: process
2461 # begin
2462 # -- Clear stuff
2463 # d_in.valid <= '0';
2464 # d_in.load <= '0';
2465 # d_in.nc <= '0';
2466 # d_in.addr <= (others => '0');
2467 # d_in.data <= (others => '0');
2468 # m_in.valid <= '0';
2469 # m_in.addr <= (others => '0');
2470 # m_in.pte <= (others => '0');
2471 #
2472 # wait for 4*clk_period;
2473 # wait until rising_edge(clk);
2474 #
2475 # -- Cacheable read of address 4
2476 # d_in.load <= '1';
2477 # d_in.nc <= '0';
2478 # d_in.addr <= x"0000000000000004";
2479 # d_in.valid <= '1';
2480 # wait until rising_edge(clk);
2481 # d_in.valid <= '0';
2482 #
2483 # wait until rising_edge(clk) and d_out.valid = '1';
2484 # assert d_out.data = x"0000000100000000"
2485 # report "data @" & to_hstring(d_in.addr) &
2486 # "=" & to_hstring(d_out.data) &
2487 # " expected 0000000100000000"
2488 # severity failure;
2489 # -- wait for clk_period;
2490 #
2491 # -- Cacheable read of address 30
2492 # d_in.load <= '1';
2493 # d_in.nc <= '0';
2494 # d_in.addr <= x"0000000000000030";
2495 # d_in.valid <= '1';
2496 # wait until rising_edge(clk);
2497 # d_in.valid <= '0';
2498 #
2499 # wait until rising_edge(clk) and d_out.valid = '1';
2500 # assert d_out.data = x"0000000D0000000C"
2501 # report "data @" & to_hstring(d_in.addr) &
2502 # "=" & to_hstring(d_out.data) &
2503 # " expected 0000000D0000000C"
2504 # severity failure;
2505 #
2506 # -- Non-cacheable read of address 100
2507 # d_in.load <= '1';
2508 # d_in.nc <= '1';
2509 # d_in.addr <= x"0000000000000100";
2510 # d_in.valid <= '1';
2511 # wait until rising_edge(clk);
2512 # d_in.valid <= '0';
2513 # wait until rising_edge(clk) and d_out.valid = '1';
2514 # assert d_out.data = x"0000004100000040"
2515 # report "data @" & to_hstring(d_in.addr) &
2516 # "=" & to_hstring(d_out.data) &
2517 # " expected 0000004100000040"
2518 # severity failure;
2519 #
2520 # wait until rising_edge(clk);
2521 # wait until rising_edge(clk);
2522 # wait until rising_edge(clk);
2523 # wait until rising_edge(clk);
2524 #
2525 # std.env.finish;
2526 # end process;
2527 # end;
2528 def dcache_sim(dut):
2529 # clear stuff
2530 yield dut.d_in.valid.eq(0)
2531 yield dut.d_in.load.eq(0)
2532 yield dut.d_in.nc.eq(0)
2533 yield dut.d_in.adrr.eq(0)
2534 yield dut.d_in.data.eq(0)
2535 yield dut.m_in.valid.eq(0)
2536 yield dut.m_in.addr.eq(0)
2537 yield dut.m_in.pte.eq(0)
2538 # wait 4 * clk_period
2539 yield
2540 yield
2541 yield
2542 yield
2543 # wait_until rising_edge(clk)
2544 yield
2545 # Cacheable read of address 4
2546 yield dut.d_in.load.eq(1)
2547 yield dut.d_in.nc.eq(0)
2548 yield dut.d_in.addr.eq(Const(0x0000000000000004, 64))
2549 yield dut.d_in.valid.eq(1)
2550 # wait-until rising_edge(clk)
2551 yield
2552 yield dut.d_in.valid.eq(0)
2553 yield
2554 while not (yield dut.d_out.valid):
2555 yield
2556 assert dut.d_out.data == Const(0x0000000100000000, 64) f"data @" \
2557 f"{dut.d_in.addr}={dut.d_in.data} expected 0000000100000000" \
2558 " -!- severity failure"
2559
2560
2561 # Cacheable read of address 30
2562 yield dut.d_in.load.eq(1)
2563 yield dut.d_in.nc.eq(0)
2564 yield dut.d_in.addr.eq(Const(0x0000000000000030, 64))
2565 yield dut.d_in.valid.eq(1)
2566 yield
2567 yield dut.d_in.valid.eq(0)
2568 yield
2569 while not (yield dut.d_out.valid):
2570 yield
2571 assert dut.d_out.data == Const(0x0000000D0000000C, 64) f"data @" \
2572 f"{dut.d_in.addr}={dut.d_out.data} expected 0000000D0000000C" \
2573 f"-!- severity failure"
2574
2575 # Non-cacheable read of address 100
2576 yield dut.d_in.load.eq(1)
2577 yield dut.d_in.nc.eq(1)
2578 yield dut.d_in.addr.eq(Const(0x0000000000000100, 64))
2579 yield dut.d_in.valid.eq(1)
2580 yield
2581 yield dut.d_in.valid.eq(0)
2582 yield
2583 while not (yield dut.d_out.valid):
2584 yield
2585 assert dut.d_out.data == Const(0x0000004100000040, 64) f"data @" \
2586 f"{dut.d_in.addr}={dut.d_out.data} expected 0000004100000040" \
2587 f"-!- severity failure"
2588
2589 yield
2590 yield
2591 yield
2592 yield
2593
2594
2595 def test_dcache():
2596 dut = DCache()
2597 vl = rtlil.convert(dut, ports=[])
2598 with open("test_dcache.il", "w") as f:
2599 f.write(vl)
2600
2601 run_simulation(dut, dcache_sim(), vcd_name='test_dcache.vcd')
2602
2603 if __name__ == '__main__':
2604 test_dcache()
2605