cache valid corrupted: fixed
[soc.git] / src / soc / experiment / dcache.py
1 """DCache
2
3 based on Anton Blanchard microwatt dcache.vhdl
4
5 """
6
7 from enum import Enum, unique
8
9 from nmigen import Module, Signal, Elaboratable, Cat, Repl, Array, Const
10 try:
11 from nmigen.hdl.ast import Display
12 except ImportError:
13 def Display(*args):
14 return []
15
16 from nmigen.cli import main
17 from nmutil.iocontrol import RecordObject
18 from nmutil.util import wrap
19 from nmigen.utils import log2_int
20 from soc.experiment.mem_types import (LoadStore1ToDCacheType,
21 DCacheToLoadStore1Type,
22 MMUToDCacheType,
23 DCacheToMMUType)
24
25 from soc.experiment.wb_types import (WB_ADDR_BITS, WB_DATA_BITS, WB_SEL_BITS,
26 WBAddrType, WBDataType, WBSelType,
27 WBMasterOut, WBSlaveOut,
28 WBMasterOutVector, WBSlaveOutVector,
29 WBIOMasterOut, WBIOSlaveOut)
30
31 from soc.experiment.cache_ram import CacheRam
32 from soc.experiment.plru import PLRU
33
34 # for test
35 from nmigen_soc.wishbone.sram import SRAM
36 from nmigen import Memory
37 from nmigen.cli import rtlil
38 if True:
39 from nmigen.back.pysim import Simulator, Delay, Settle
40 else:
41 from nmigen.sim.cxxsim import Simulator, Delay, Settle
42
43
44 # TODO: make these parameters of DCache at some point
45 LINE_SIZE = 64 # Line size in bytes
46 NUM_LINES = 16 # Number of lines in a set
47 NUM_WAYS = 4 # Number of ways
48 TLB_SET_SIZE = 64 # L1 DTLB entries per set
49 TLB_NUM_WAYS = 2 # L1 DTLB number of sets
50 TLB_LG_PGSZ = 12 # L1 DTLB log_2(page_size)
51 LOG_LENGTH = 0 # Non-zero to enable log data collection
52
53 # BRAM organisation: We never access more than
54 # -- WB_DATA_BITS at a time so to save
55 # -- resources we make the array only that wide, and
56 # -- use consecutive indices for to make a cache "line"
57 # --
58 # -- ROW_SIZE is the width in bytes of the BRAM
59 # -- (based on WB, so 64-bits)
60 ROW_SIZE = WB_DATA_BITS // 8;
61
62 # ROW_PER_LINE is the number of row (wishbone
63 # transactions) in a line
64 ROW_PER_LINE = LINE_SIZE // ROW_SIZE
65
66 # BRAM_ROWS is the number of rows in BRAM needed
67 # to represent the full dcache
68 BRAM_ROWS = NUM_LINES * ROW_PER_LINE
69
70
71 # Bit fields counts in the address
72
73 # REAL_ADDR_BITS is the number of real address
74 # bits that we store
75 REAL_ADDR_BITS = 56
76
77 # ROW_BITS is the number of bits to select a row
78 ROW_BITS = log2_int(BRAM_ROWS)
79
80 # ROW_LINE_BITS is the number of bits to select
81 # a row within a line
82 ROW_LINE_BITS = log2_int(ROW_PER_LINE)
83
84 # LINE_OFF_BITS is the number of bits for
85 # the offset in a cache line
86 LINE_OFF_BITS = log2_int(LINE_SIZE)
87
88 # ROW_OFF_BITS is the number of bits for
89 # the offset in a row
90 ROW_OFF_BITS = log2_int(ROW_SIZE)
91
92 # INDEX_BITS is the number if bits to
93 # select a cache line
94 INDEX_BITS = log2_int(NUM_LINES)
95
96 # SET_SIZE_BITS is the log base 2 of the set size
97 SET_SIZE_BITS = LINE_OFF_BITS + INDEX_BITS
98
99 # TAG_BITS is the number of bits of
100 # the tag part of the address
101 TAG_BITS = REAL_ADDR_BITS - SET_SIZE_BITS
102
103 # TAG_WIDTH is the width in bits of each way of the tag RAM
104 TAG_WIDTH = TAG_BITS + 7 - ((TAG_BITS + 7) % 8)
105
106 # WAY_BITS is the number of bits to select a way
107 WAY_BITS = log2_int(NUM_WAYS)
108
109 # Example of layout for 32 lines of 64 bytes:
110 #
111 # .. tag |index| line |
112 # .. | row | |
113 # .. | |---| | ROW_LINE_BITS (3)
114 # .. | |--- - --| LINE_OFF_BITS (6)
115 # .. | |- --| ROW_OFF_BITS (3)
116 # .. |----- ---| | ROW_BITS (8)
117 # .. |-----| | INDEX_BITS (5)
118 # .. --------| | TAG_BITS (45)
119
120 TAG_RAM_WIDTH = TAG_WIDTH * NUM_WAYS
121
122 def CacheTagArray():
123 return Array(Signal(TAG_RAM_WIDTH, name="cachetag_%d" % x) \
124 for x in range(NUM_LINES))
125
126 def CacheValidBitsArray():
127 return Array(Signal(INDEX_BITS, name="cachevalid_%d" % x) \
128 for x in range(NUM_LINES))
129
130 def RowPerLineValidArray():
131 return Array(Signal(name="rows_valid%d" % x) \
132 for x in range(ROW_PER_LINE))
133
134 # L1 TLB
135 TLB_SET_BITS = log2_int(TLB_SET_SIZE)
136 TLB_WAY_BITS = log2_int(TLB_NUM_WAYS)
137 TLB_EA_TAG_BITS = 64 - (TLB_LG_PGSZ + TLB_SET_BITS)
138 TLB_TAG_WAY_BITS = TLB_NUM_WAYS * TLB_EA_TAG_BITS
139 TLB_PTE_BITS = 64
140 TLB_PTE_WAY_BITS = TLB_NUM_WAYS * TLB_PTE_BITS;
141
142 assert (LINE_SIZE % ROW_SIZE) == 0, "LINE_SIZE not multiple of ROW_SIZE"
143 assert (LINE_SIZE % 2) == 0, "LINE_SIZE not power of 2"
144 assert (NUM_LINES % 2) == 0, "NUM_LINES not power of 2"
145 assert (ROW_PER_LINE % 2) == 0, "ROW_PER_LINE not power of 2"
146 assert ROW_BITS == (INDEX_BITS + ROW_LINE_BITS), "geometry bits don't add up"
147 assert (LINE_OFF_BITS == ROW_OFF_BITS + ROW_LINE_BITS), \
148 "geometry bits don't add up"
149 assert REAL_ADDR_BITS == (TAG_BITS + INDEX_BITS + LINE_OFF_BITS), \
150 "geometry bits don't add up"
151 assert REAL_ADDR_BITS == (TAG_BITS + ROW_BITS + ROW_OFF_BITS), \
152 "geometry bits don't add up"
153 assert 64 == WB_DATA_BITS, "Can't yet handle wb width that isn't 64-bits"
154 assert SET_SIZE_BITS <= TLB_LG_PGSZ, "Set indexed by virtual address"
155
156
157 def TLBValidBitsArray():
158 return Array(Signal(TLB_NUM_WAYS) for x in range(TLB_SET_SIZE))
159
160 def TLBTagEAArray():
161 return Array(Signal(TLB_EA_TAG_BITS) for x in range (TLB_NUM_WAYS))
162
163 def TLBTagsArray():
164 return Array(Signal(TLB_TAG_WAY_BITS) for x in range (TLB_SET_SIZE))
165
166 def TLBPtesArray():
167 return Array(Signal(TLB_PTE_WAY_BITS) for x in range(TLB_SET_SIZE))
168
169 def HitWaySet():
170 return Array(Signal(WAY_BITS, name="hitway_%d" % x) \
171 for x in range(TLB_NUM_WAYS))
172
173 # Cache RAM interface
174 def CacheRamOut():
175 return Array(Signal(WB_DATA_BITS, name="cache_out%d" % x) \
176 for x in range(NUM_WAYS))
177
178 # PLRU output interface
179 def PLRUOut():
180 return Array(Signal(WAY_BITS) for x in range(NUM_LINES))
181
182 # TLB PLRU output interface
183 def TLBPLRUOut():
184 return Array(Signal(TLB_WAY_BITS) for x in range(TLB_SET_SIZE))
185
186 # Helper functions to decode incoming requests
187 #
188 # Return the cache line index (tag index) for an address
189 def get_index(addr):
190 return addr[LINE_OFF_BITS:SET_SIZE_BITS]
191
192 # Return the cache row index (data memory) for an address
193 def get_row(addr):
194 return addr[ROW_OFF_BITS:SET_SIZE_BITS]
195
196 # Return the index of a row within a line
197 def get_row_of_line(row):
198 return row[:ROW_LINE_BITS]
199
200 # Returns whether this is the last row of a line
201 def is_last_row_addr(addr, last):
202 return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
203
204 # Returns whether this is the last row of a line
205 def is_last_row(row, last):
206 return get_row_of_line(row) == last
207
208 # Return the next row in the current cache line. We use a
209 # dedicated function in order to limit the size of the
210 # generated adder to be only the bits within a cache line
211 # (3 bits with default settings)
212 def next_row(row):
213 row_v = row[0:ROW_LINE_BITS] + 1
214 return Cat(row_v[:ROW_LINE_BITS], row[ROW_LINE_BITS:])
215
216 # Get the tag value from the address
217 def get_tag(addr):
218 return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
219
220 # Read a tag from a tag memory row
221 def read_tag(way, tagset):
222 return tagset.word_select(way, TAG_WIDTH)[:TAG_BITS]
223
224 # Read a TLB tag from a TLB tag memory row
225 def read_tlb_tag(way, tags):
226 return tags.word_select(way, TLB_EA_TAG_BITS)
227
228 # Write a TLB tag to a TLB tag memory row
229 def write_tlb_tag(way, tags, tag):
230 return read_tlb_tag(way, tags).eq(tag)
231
232 # Read a PTE from a TLB PTE memory row
233 def read_tlb_pte(way, ptes):
234 return ptes.word_select(way, TLB_PTE_BITS)
235
236 def write_tlb_pte(way, ptes, newpte):
237 return read_tlb_pte(way, ptes).eq(newpte)
238
239
240 # Record for storing permission, attribute, etc. bits from a PTE
241 class PermAttr(RecordObject):
242 def __init__(self, name=None):
243 super().__init__(name=name)
244 self.reference = Signal()
245 self.changed = Signal()
246 self.nocache = Signal()
247 self.priv = Signal()
248 self.rd_perm = Signal()
249 self.wr_perm = Signal()
250
251
252 def extract_perm_attr(pte):
253 pa = PermAttr()
254 pa.reference = pte[8]
255 pa.changed = pte[7]
256 pa.nocache = pte[5]
257 pa.priv = pte[3]
258 pa.rd_perm = pte[2]
259 pa.wr_perm = pte[1]
260 return pa;
261
262
263 # Type of operation on a "valid" input
264 @unique
265 class Op(Enum):
266 OP_NONE = 0
267 OP_BAD = 1 # NC cache hit, TLB miss, prot/RC failure
268 OP_STCX_FAIL = 2 # conditional store w/o reservation
269 OP_LOAD_HIT = 3 # Cache hit on load
270 OP_LOAD_MISS = 4 # Load missing cache
271 OP_LOAD_NC = 5 # Non-cachable load
272 OP_STORE_HIT = 6 # Store hitting cache
273 OP_STORE_MISS = 7 # Store missing cache
274
275
276 # Cache state machine
277 @unique
278 class State(Enum):
279 IDLE = 0 # Normal load hit processing
280 RELOAD_WAIT_ACK = 1 # Cache reload wait ack
281 STORE_WAIT_ACK = 2 # Store wait ack
282 NC_LOAD_WAIT_ACK = 3 # Non-cachable load wait ack
283
284
285 # Dcache operations:
286 #
287 # In order to make timing, we use the BRAMs with
288 # an output buffer, which means that the BRAM
289 # output is delayed by an extra cycle.
290 #
291 # Thus, the dcache has a 2-stage internal pipeline
292 # for cache hits with no stalls.
293 #
294 # All other operations are handled via stalling
295 # in the first stage.
296 #
297 # The second stage can thus complete a hit at the same
298 # time as the first stage emits a stall for a complex op.
299 #
300 # Stage 0 register, basically contains just the latched request
301
302 class RegStage0(RecordObject):
303 def __init__(self, name=None):
304 super().__init__(name=name)
305 self.req = LoadStore1ToDCacheType(name="lsmem")
306 self.tlbie = Signal()
307 self.doall = Signal()
308 self.tlbld = Signal()
309 self.mmu_req = Signal() # indicates source of request
310
311
312 class MemAccessRequest(RecordObject):
313 def __init__(self, name=None):
314 super().__init__(name=name)
315 self.op = Signal(Op)
316 self.valid = Signal()
317 self.dcbz = Signal()
318 self.real_addr = Signal(REAL_ADDR_BITS)
319 self.data = Signal(64)
320 self.byte_sel = Signal(8)
321 self.hit_way = Signal(WAY_BITS)
322 self.same_tag = Signal()
323 self.mmu_req = Signal()
324
325
326 # First stage register, contains state for stage 1 of load hits
327 # and for the state machine used by all other operations
328 class RegStage1(RecordObject):
329 def __init__(self, name=None):
330 super().__init__(name=name)
331 # Info about the request
332 self.full = Signal() # have uncompleted request
333 self.mmu_req = Signal() # request is from MMU
334 self.req = MemAccessRequest(name="reqmem")
335
336 # Cache hit state
337 self.hit_way = Signal(WAY_BITS)
338 self.hit_load_valid = Signal()
339 self.hit_index = Signal(INDEX_BITS)
340 self.cache_hit = Signal()
341
342 # TLB hit state
343 self.tlb_hit = Signal()
344 self.tlb_hit_way = Signal(TLB_NUM_WAYS)
345 self.tlb_hit_index = Signal(TLB_WAY_BITS)
346
347 # 2-stage data buffer for data forwarded from writes to reads
348 self.forward_data1 = Signal(64)
349 self.forward_data2 = Signal(64)
350 self.forward_sel1 = Signal(8)
351 self.forward_valid1 = Signal()
352 self.forward_way1 = Signal(WAY_BITS)
353 self.forward_row1 = Signal(ROW_BITS)
354 self.use_forward1 = Signal()
355 self.forward_sel = Signal(8)
356
357 # Cache miss state (reload state machine)
358 self.state = Signal(State)
359 self.dcbz = Signal()
360 self.write_bram = Signal()
361 self.write_tag = Signal()
362 self.slow_valid = Signal()
363 self.wb = WBMasterOut()
364 self.reload_tag = Signal(TAG_BITS)
365 self.store_way = Signal(WAY_BITS)
366 self.store_row = Signal(ROW_BITS)
367 self.store_index = Signal(INDEX_BITS)
368 self.end_row_ix = Signal(ROW_LINE_BITS)
369 self.rows_valid = RowPerLineValidArray()
370 self.acks_pending = Signal(3)
371 self.inc_acks = Signal()
372 self.dec_acks = Signal()
373
374 # Signals to complete (possibly with error)
375 self.ls_valid = Signal()
376 self.ls_error = Signal()
377 self.mmu_done = Signal()
378 self.mmu_error = Signal()
379 self.cache_paradox = Signal()
380
381 # Signal to complete a failed stcx.
382 self.stcx_fail = Signal()
383
384
385 # Reservation information
386 class Reservation(RecordObject):
387 def __init__(self):
388 super().__init__()
389 self.valid = Signal()
390 self.addr = Signal(64-LINE_OFF_BITS)
391
392
393 class DTLBUpdate(Elaboratable):
394 def __init__(self):
395 self.tlbie = Signal()
396 self.tlbwe = Signal()
397 self.doall = Signal()
398 self.updated = Signal()
399 self.v_updated = Signal()
400 self.tlb_hit = Signal()
401 self.tlb_req_index = Signal(TLB_SET_BITS)
402
403 self.tlb_hit_way = Signal(TLB_WAY_BITS)
404 self.tlb_tag_way = Signal(TLB_TAG_WAY_BITS)
405 self.tlb_pte_way = Signal(TLB_PTE_WAY_BITS)
406 self.repl_way = Signal(TLB_WAY_BITS)
407 self.eatag = Signal(TLB_EA_TAG_BITS)
408 self.pte_data = Signal(TLB_PTE_BITS)
409
410 self.dv = Signal(TLB_PTE_WAY_BITS)
411
412 self.tb_out = Signal(TLB_TAG_WAY_BITS)
413 self.pb_out = Signal(TLB_NUM_WAYS)
414 self.db_out = Signal(TLB_PTE_WAY_BITS)
415
416 def elaborate(self, platform):
417 m = Module()
418 comb = m.d.comb
419 sync = m.d.sync
420
421 tagset = Signal(TLB_TAG_WAY_BITS)
422 pteset = Signal(TLB_PTE_WAY_BITS)
423
424 tb_out, pb_out, db_out = self.tb_out, self.pb_out, self.db_out
425
426 with m.If(self.tlbie & self.doall):
427 pass # clear all back in parent
428 with m.Elif(self.tlbie):
429 with m.If(self.tlb_hit):
430 comb += db_out.eq(self.dv)
431 comb += db_out.bit_select(self.tlb_hit_way, 1).eq(1)
432 comb += self.v_updated.eq(1)
433
434 with m.Elif(self.tlbwe):
435
436 comb += tagset.eq(self.tlb_tag_way)
437 comb += write_tlb_tag(self.repl_way, tagset, self.eatag)
438 comb += tb_out.eq(tagset)
439
440 comb += pteset.eq(self.tlb_pte_way)
441 comb += write_tlb_pte(self.repl_way, pteset, self.pte_data)
442 comb += pb_out.eq(pteset)
443
444 comb += db_out.bit_select(self.repl_way, 1).eq(1)
445
446 comb += self.updated.eq(1)
447 comb += self.v_updated.eq(1)
448
449 return m
450
451 def dcache_request(self, m, r0, ra, req_index, req_row, req_tag,
452 r0_valid, r1, cache_valid_bits, replace_way,
453 use_forward1_next, use_forward2_next,
454 req_hit_way, plru_victim, rc_ok, perm_attr,
455 valid_ra, perm_ok, access_ok, req_op, req_go,
456 tlb_pte_way,
457 tlb_hit, tlb_hit_way, tlb_valid_way, cache_tag_set,
458 cancel_store, req_same_tag, r0_stall, early_req_row):
459 """Cache request parsing and hit detection
460 """
461
462 class DCachePendingHit(Elaboratable):
463
464 def __init__(self, tlb_pte_way, tlb_valid_way, tlb_hit_way,
465 cache_valid_idx, cache_tag_set,
466 req_addr,
467 hit_set):
468
469 self.go = Signal()
470 self.virt_mode = Signal()
471 self.is_hit = Signal()
472 self.tlb_hit = Signal()
473 self.hit_way = Signal(WAY_BITS)
474 self.rel_match = Signal()
475 self.req_index = Signal(INDEX_BITS)
476 self.reload_tag = Signal(TAG_BITS)
477
478 self.tlb_hit_way = tlb_hit_way
479 self.tlb_pte_way = tlb_pte_way
480 self.tlb_valid_way = tlb_valid_way
481 self.cache_valid_idx = cache_valid_idx
482 self.cache_tag_set = cache_tag_set
483 self.req_addr = req_addr
484 self.hit_set = hit_set
485
486 def elaborate(self, platform):
487 m = Module()
488 comb = m.d.comb
489 sync = m.d.sync
490
491 go = self.go
492 virt_mode = self.virt_mode
493 is_hit = self.is_hit
494 tlb_pte_way = self.tlb_pte_way
495 tlb_valid_way = self.tlb_valid_way
496 cache_valid_idx = self.cache_valid_idx
497 cache_tag_set = self.cache_tag_set
498 req_addr = self.req_addr
499 tlb_hit_way = self.tlb_hit_way
500 tlb_hit = self.tlb_hit
501 hit_set = self.hit_set
502 hit_way = self.hit_way
503 rel_match = self.rel_match
504 req_index = self.req_index
505 reload_tag = self.reload_tag
506
507 rel_matches = Array(Signal(name="rel_matches_%d" % i) \
508 for i in range(TLB_NUM_WAYS))
509 hit_way_set = HitWaySet()
510
511 # Test if pending request is a hit on any way
512 # In order to make timing in virtual mode,
513 # when we are using the TLB, we compare each
514 # way with each of the real addresses from each way of
515 # the TLB, and then decide later which match to use.
516
517 with m.If(virt_mode):
518 for j in range(TLB_NUM_WAYS):
519 s_tag = Signal(TAG_BITS, name="s_tag%d" % j)
520 s_hit = Signal()
521 s_pte = Signal(TLB_PTE_BITS)
522 s_ra = Signal(REAL_ADDR_BITS)
523 comb += s_pte.eq(read_tlb_pte(j, tlb_pte_way))
524 comb += s_ra.eq(Cat(req_addr[0:TLB_LG_PGSZ],
525 s_pte[TLB_LG_PGSZ:REAL_ADDR_BITS]))
526 comb += s_tag.eq(get_tag(s_ra))
527
528 for i in range(NUM_WAYS):
529 is_tag_hit = Signal(name="is_tag_hit_%d_%d" % (j, i))
530 comb += is_tag_hit.eq(go & cache_valid_idx[i] &
531 (read_tag(i, cache_tag_set) == s_tag)
532 & tlb_valid_way[j])
533 with m.If(is_tag_hit):
534 comb += hit_way_set[j].eq(i)
535 comb += s_hit.eq(1)
536 comb += hit_set[j].eq(s_hit)
537 with m.If(s_tag == reload_tag):
538 comb += rel_matches[j].eq(1)
539 with m.If(tlb_hit):
540 comb += is_hit.eq(hit_set[tlb_hit_way])
541 comb += hit_way.eq(hit_way_set[tlb_hit_way])
542 comb += rel_match.eq(rel_matches[tlb_hit_way])
543 with m.Else():
544 s_tag = Signal(TAG_BITS)
545 comb += s_tag.eq(get_tag(req_addr))
546 for i in range(NUM_WAYS):
547 is_tag_hit = Signal(name="is_tag_hit_%d" % i)
548 comb += is_tag_hit.eq(go & cache_valid_idx[i] &
549 (read_tag(i, cache_tag_set) == s_tag))
550 with m.If(is_tag_hit):
551 comb += hit_way.eq(i)
552 comb += is_hit.eq(1)
553 with m.If(s_tag == reload_tag):
554 comb += rel_match.eq(1)
555
556 return m
557
558
559 class DCache(Elaboratable):
560 """Set associative dcache write-through
561 TODO (in no specific order):
562 * See list in icache.vhdl
563 * Complete load misses on the cycle when WB data comes instead of
564 at the end of line (this requires dealing with requests coming in
565 while not idle...)
566 """
567 def __init__(self):
568 self.d_in = LoadStore1ToDCacheType("d_in")
569 self.d_out = DCacheToLoadStore1Type("d_out")
570
571 self.m_in = MMUToDCacheType("m_in")
572 self.m_out = DCacheToMMUType("m_out")
573
574 self.stall_out = Signal()
575
576 self.wb_out = WBMasterOut()
577 self.wb_in = WBSlaveOut()
578
579 self.log_out = Signal(20)
580
581 def stage_0(self, m, r0, r1, r0_full):
582 """Latch the request in r0.req as long as we're not stalling
583 """
584 comb = m.d.comb
585 sync = m.d.sync
586 d_in, d_out, m_in = self.d_in, self.d_out, self.m_in
587
588 r = RegStage0("stage0")
589
590 # TODO, this goes in unit tests and formal proofs
591 with m.If(~(d_in.valid & m_in.valid)):
592 #sync += Display("request collision loadstore vs MMU")
593 pass
594
595 with m.If(m_in.valid):
596 sync += r.req.valid.eq(1)
597 sync += r.req.load.eq(~(m_in.tlbie | m_in.tlbld))
598 sync += r.req.dcbz.eq(0)
599 sync += r.req.nc.eq(0)
600 sync += r.req.reserve.eq(0)
601 sync += r.req.virt_mode.eq(1)
602 sync += r.req.priv_mode.eq(1)
603 sync += r.req.addr.eq(m_in.addr)
604 sync += r.req.data.eq(m_in.pte)
605 sync += r.req.byte_sel.eq(~0) # Const -1 sets all to 0b111....
606 sync += r.tlbie.eq(m_in.tlbie)
607 sync += r.doall.eq(m_in.doall)
608 sync += r.tlbld.eq(m_in.tlbld)
609 sync += r.mmu_req.eq(1)
610 with m.Else():
611 sync += r.req.eq(d_in)
612 sync += r.tlbie.eq(0)
613 sync += r.doall.eq(0)
614 sync += r.tlbld.eq(0)
615 sync += r.mmu_req.eq(0)
616 with m.If(~(r1.full & r0_full)):
617 sync += r0.eq(r)
618 sync += r0_full.eq(r.req.valid)
619
620 def tlb_read(self, m, r0_stall, tlb_valid_way,
621 tlb_tag_way, tlb_pte_way, dtlb_valid_bits,
622 dtlb_tags, dtlb_ptes):
623 """TLB
624 Operates in the second cycle on the request latched in r0.req.
625 TLB updates write the entry at the end of the second cycle.
626 """
627 comb = m.d.comb
628 sync = m.d.sync
629 m_in, d_in = self.m_in, self.d_in
630
631 index = Signal(TLB_SET_BITS)
632 addrbits = Signal(TLB_SET_BITS)
633
634 amin = TLB_LG_PGSZ
635 amax = TLB_LG_PGSZ + TLB_SET_BITS
636
637 with m.If(m_in.valid):
638 comb += addrbits.eq(m_in.addr[amin : amax])
639 with m.Else():
640 comb += addrbits.eq(d_in.addr[amin : amax])
641 comb += index.eq(addrbits)
642
643 # If we have any op and the previous op isn't finished,
644 # then keep the same output for next cycle.
645 with m.If(~r0_stall):
646 sync += tlb_valid_way.eq(dtlb_valid_bits[index])
647 sync += tlb_tag_way.eq(dtlb_tags[index])
648 sync += tlb_pte_way.eq(dtlb_ptes[index])
649
650 def maybe_tlb_plrus(self, m, r1, tlb_plru_victim):
651 """Generate TLB PLRUs
652 """
653 comb = m.d.comb
654 sync = m.d.sync
655
656 if TLB_NUM_WAYS == 0:
657 return
658 for i in range(TLB_SET_SIZE):
659 # TLB PLRU interface
660 tlb_plru = PLRU(WAY_BITS)
661 setattr(m.submodules, "maybe_plru_%d" % i, tlb_plru)
662 tlb_plru_acc_en = Signal()
663
664 comb += tlb_plru_acc_en.eq(r1.tlb_hit & (r1.tlb_hit_index == i))
665 comb += tlb_plru.acc_en.eq(tlb_plru_acc_en)
666 comb += tlb_plru.acc.eq(r1.tlb_hit_way)
667 comb += tlb_plru_victim[i].eq(tlb_plru.lru_o)
668
669 def tlb_search(self, m, tlb_req_index, r0, r0_valid,
670 tlb_valid_way, tlb_tag_way, tlb_hit_way,
671 tlb_pte_way, pte, tlb_hit, valid_ra, perm_attr, ra):
672
673 comb = m.d.comb
674 sync = m.d.sync
675
676 hitway = Signal(TLB_WAY_BITS)
677 hit = Signal()
678 eatag = Signal(TLB_EA_TAG_BITS)
679
680 TLB_LG_END = TLB_LG_PGSZ + TLB_SET_BITS
681 comb += tlb_req_index.eq(r0.req.addr[TLB_LG_PGSZ : TLB_LG_END])
682 comb += eatag.eq(r0.req.addr[TLB_LG_END : 64 ])
683
684 for i in range(TLB_NUM_WAYS):
685 is_tag_hit = Signal()
686 comb += is_tag_hit.eq(tlb_valid_way[i]
687 & read_tlb_tag(i, tlb_tag_way) == eatag)
688 with m.If(is_tag_hit):
689 comb += hitway.eq(i)
690 comb += hit.eq(1)
691
692 comb += tlb_hit.eq(hit & r0_valid)
693 comb += tlb_hit_way.eq(hitway)
694
695 with m.If(tlb_hit):
696 comb += pte.eq(read_tlb_pte(hitway, tlb_pte_way))
697 with m.Else():
698 comb += pte.eq(0)
699 comb += valid_ra.eq(tlb_hit | ~r0.req.virt_mode)
700 with m.If(r0.req.virt_mode):
701 comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
702 r0.req.addr[ROW_OFF_BITS:TLB_LG_PGSZ],
703 pte[TLB_LG_PGSZ:REAL_ADDR_BITS]))
704 comb += perm_attr.eq(extract_perm_attr(pte))
705 with m.Else():
706 comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
707 r0.req.addr[ROW_OFF_BITS:REAL_ADDR_BITS]))
708
709 comb += perm_attr.reference.eq(1)
710 comb += perm_attr.changed.eq(1)
711 comb += perm_attr.nocache.eq(0)
712 comb += perm_attr.priv.eq(1)
713 comb += perm_attr.rd_perm.eq(1)
714 comb += perm_attr.wr_perm.eq(1)
715
716 def tlb_update(self, m, r0_valid, r0, dtlb_valid_bits, tlb_req_index,
717 tlb_hit_way, tlb_hit, tlb_plru_victim, tlb_tag_way,
718 dtlb_tags, tlb_pte_way, dtlb_ptes):
719
720 comb = m.d.comb
721 sync = m.d.sync
722
723 tlbie = Signal()
724 tlbwe = Signal()
725
726 comb += tlbie.eq(r0_valid & r0.tlbie)
727 comb += tlbwe.eq(r0_valid & r0.tlbld)
728
729 m.submodules.tlb_update = d = DTLBUpdate()
730 with m.If(tlbie & r0.doall):
731 # clear all valid bits at once
732 for i in range(TLB_SET_SIZE):
733 sync += dtlb_valid_bits[i].eq(0)
734 with m.If(d.updated):
735 sync += dtlb_tags[tlb_req_index].eq(d.tb_out)
736 sync += dtlb_ptes[tlb_req_index].eq(d.pb_out)
737 with m.If(d.v_updated):
738 sync += dtlb_valid_bits[tlb_req_index].eq(d.db_out)
739
740 comb += d.dv.eq(dtlb_valid_bits[tlb_req_index])
741
742 comb += d.tlbie.eq(tlbie)
743 comb += d.tlbwe.eq(tlbwe)
744 comb += d.doall.eq(r0.doall)
745 comb += d.tlb_hit.eq(tlb_hit)
746 comb += d.tlb_hit_way.eq(tlb_hit_way)
747 comb += d.tlb_tag_way.eq(tlb_tag_way)
748 comb += d.tlb_pte_way.eq(tlb_pte_way)
749 comb += d.tlb_req_index.eq(tlb_req_index)
750
751 with m.If(tlb_hit):
752 comb += d.repl_way.eq(tlb_hit_way)
753 with m.Else():
754 comb += d.repl_way.eq(tlb_plru_victim[tlb_req_index])
755 comb += d.eatag.eq(r0.req.addr[TLB_LG_PGSZ + TLB_SET_BITS:64])
756 comb += d.pte_data.eq(r0.req.data)
757
758 def maybe_plrus(self, m, r1, plru_victim):
759 """Generate PLRUs
760 """
761 comb = m.d.comb
762 sync = m.d.sync
763
764 if TLB_NUM_WAYS == 0:
765 return
766
767 for i in range(NUM_LINES):
768 # PLRU interface
769 plru = PLRU(WAY_BITS)
770 setattr(m.submodules, "plru%d" % i, plru)
771 plru_acc_en = Signal()
772
773 comb += plru_acc_en.eq(r1.cache_hit & (r1.hit_index == i))
774 comb += plru.acc_en.eq(plru_acc_en)
775 comb += plru.acc.eq(r1.hit_way)
776 comb += plru_victim[i].eq(plru.lru_o)
777
778 def cache_tag_read(self, m, r0_stall, req_index, cache_tag_set, cache_tags):
779 """Cache tag RAM read port
780 """
781 comb = m.d.comb
782 sync = m.d.sync
783 m_in, d_in = self.m_in, self.d_in
784
785 index = Signal(INDEX_BITS)
786
787 with m.If(r0_stall):
788 comb += index.eq(req_index)
789 with m.Elif(m_in.valid):
790 comb += index.eq(get_index(m_in.addr))
791 with m.Else():
792 comb += index.eq(get_index(d_in.addr))
793 sync += cache_tag_set.eq(cache_tags[index])
794
795 def dcache_request(self, m, r0, ra, req_index, req_row, req_tag,
796 r0_valid, r1, cache_valid_bits, replace_way,
797 use_forward1_next, use_forward2_next,
798 req_hit_way, plru_victim, rc_ok, perm_attr,
799 valid_ra, perm_ok, access_ok, req_op, req_go,
800 tlb_pte_way,
801 tlb_hit, tlb_hit_way, tlb_valid_way, cache_tag_set,
802 cancel_store, req_same_tag, r0_stall, early_req_row):
803 """Cache request parsing and hit detection
804 """
805
806 comb = m.d.comb
807 sync = m.d.sync
808 m_in, d_in = self.m_in, self.d_in
809
810 is_hit = Signal()
811 hit_way = Signal(WAY_BITS)
812 op = Signal(Op)
813 opsel = Signal(3)
814 go = Signal()
815 nc = Signal()
816 hit_set = Array(Signal(name="hit_set_%d" % i) \
817 for i in range(TLB_NUM_WAYS))
818 cache_valid_idx = Signal(INDEX_BITS)
819
820 # Extract line, row and tag from request
821 comb += req_index.eq(get_index(r0.req.addr))
822 comb += req_row.eq(get_row(r0.req.addr))
823 comb += req_tag.eq(get_tag(ra))
824
825 comb += go.eq(r0_valid & ~(r0.tlbie | r0.tlbld) & ~r1.ls_error)
826 comb += cache_valid_idx.eq(cache_valid_bits[req_index])
827
828 m.submodules.dcache_pend = dc = DCachePendingHit(tlb_pte_way,
829 tlb_valid_way, tlb_hit_way,
830 cache_valid_idx, cache_tag_set,
831 r0.req.addr,
832 hit_set)
833
834 comb += dc.tlb_hit.eq(tlb_hit)
835 comb += dc.reload_tag.eq(r1.reload_tag)
836 comb += dc.virt_mode.eq(r0.req.virt_mode)
837 comb += dc.go.eq(go)
838 comb += dc.req_index.eq(req_index)
839 comb += is_hit.eq(dc.is_hit)
840 comb += hit_way.eq(dc.hit_way)
841 comb += req_same_tag.eq(dc.rel_match)
842
843 # See if the request matches the line currently being reloaded
844 with m.If((r1.state == State.RELOAD_WAIT_ACK) &
845 (req_index == r1.store_index) & req_same_tag):
846 # For a store, consider this a hit even if the row isn't
847 # valid since it will be by the time we perform the store.
848 # For a load, check the appropriate row valid bit.
849 valid = r1.rows_valid[req_row % ROW_PER_LINE]
850 comb += is_hit.eq(~r0.req.load | valid)
851 comb += hit_way.eq(replace_way)
852
853 # Whether to use forwarded data for a load or not
854 with m.If((get_row(r1.req.real_addr) == req_row) &
855 (r1.req.hit_way == hit_way)):
856 # Only need to consider r1.write_bram here, since if we
857 # are writing refill data here, then we don't have a
858 # cache hit this cycle on the line being refilled.
859 # (There is the possibility that the load following the
860 # load miss that started the refill could be to the old
861 # contents of the victim line, since it is a couple of
862 # cycles after the refill starts before we see the updated
863 # cache tag. In that case we don't use the bypass.)
864 comb += use_forward1_next.eq(r1.write_bram)
865 with m.If((r1.forward_row1 == req_row) & (r1.forward_way1 == hit_way)):
866 comb += use_forward2_next.eq(r1.forward_valid1)
867
868 # The way that matched on a hit
869 comb += req_hit_way.eq(hit_way)
870
871 # The way to replace on a miss
872 with m.If(r1.write_tag):
873 comb += replace_way.eq(plru_victim[r1.store_index])
874 with m.Else():
875 comb += replace_way.eq(r1.store_way)
876
877 # work out whether we have permission for this access
878 # NB we don't yet implement AMR, thus no KUAP
879 comb += rc_ok.eq(perm_attr.reference
880 & (r0.req.load | perm_attr.changed)
881 )
882 comb += perm_ok.eq((r0.req.priv_mode | ~perm_attr.priv) &
883 (perm_attr.wr_perm |
884 (r0.req.load & perm_attr.rd_perm)))
885 comb += access_ok.eq(valid_ra & perm_ok & rc_ok)
886 # Combine the request and cache hit status to decide what
887 # operation needs to be done
888 comb += nc.eq(r0.req.nc | perm_attr.nocache)
889 comb += op.eq(Op.OP_NONE)
890 with m.If(go):
891 with m.If(~access_ok):
892 comb += op.eq(Op.OP_BAD)
893 with m.Elif(cancel_store):
894 comb += op.eq(Op.OP_STCX_FAIL)
895 with m.Else():
896 comb += opsel.eq(Cat(is_hit, nc, r0.req.load))
897 with m.Switch(opsel):
898 with m.Case(0b101):
899 comb += op.eq(Op.OP_LOAD_HIT)
900 with m.Case(0b100):
901 comb += op.eq(Op.OP_LOAD_MISS)
902 with m.Case(0b110):
903 comb += op.eq(Op.OP_LOAD_NC)
904 with m.Case(0b001):
905 comb += op.eq(Op.OP_STORE_HIT)
906 with m.Case(0b000):
907 comb += op.eq(Op.OP_STORE_MISS)
908 with m.Case(0b010):
909 comb += op.eq(Op.OP_STORE_MISS)
910 with m.Case(0b011):
911 comb += op.eq(Op.OP_BAD)
912 with m.Case(0b111):
913 comb += op.eq(Op.OP_BAD)
914 with m.Default():
915 comb += op.eq(Op.OP_NONE)
916 comb += req_op.eq(op)
917 comb += req_go.eq(go)
918
919 # Version of the row number that is valid one cycle earlier
920 # in the cases where we need to read the cache data BRAM.
921 # If we're stalling then we need to keep reading the last
922 # row requested.
923 with m.If(~r0_stall):
924 with m.If(m_in.valid):
925 comb += early_req_row.eq(get_row(m_in.addr))
926 with m.Else():
927 comb += early_req_row.eq(get_row(d_in.addr))
928 with m.Else():
929 comb += early_req_row.eq(req_row)
930
931 def reservation_comb(self, m, cancel_store, set_rsrv, clear_rsrv,
932 r0_valid, r0, reservation):
933 """Handle load-with-reservation and store-conditional instructions
934 """
935 comb = m.d.comb
936 sync = m.d.sync
937
938 with m.If(r0_valid & r0.req.reserve):
939
940 # XXX generate alignment interrupt if address
941 # is not aligned XXX or if r0.req.nc = '1'
942 with m.If(r0.req.load):
943 comb += set_rsrv.eq(1) # load with reservation
944 with m.Else():
945 comb += clear_rsrv.eq(1) # store conditional
946 with m.If(~reservation.valid | r0.req.addr[LINE_OFF_BITS:64]):
947 comb += cancel_store.eq(1)
948
949 def reservation_reg(self, m, r0_valid, access_ok, set_rsrv, clear_rsrv,
950 reservation, r0):
951
952 comb = m.d.comb
953 sync = m.d.sync
954
955 with m.If(r0_valid & access_ok):
956 with m.If(clear_rsrv):
957 sync += reservation.valid.eq(0)
958 with m.Elif(set_rsrv):
959 sync += reservation.valid.eq(1)
960 sync += reservation.addr.eq(r0.req.addr[LINE_OFF_BITS:64])
961
962 def writeback_control(self, m, r1, cache_out):
963 """Return data for loads & completion control logic
964 """
965 comb = m.d.comb
966 sync = m.d.sync
967 d_out, m_out = self.d_out, self.m_out
968
969 data_out = Signal(64)
970 data_fwd = Signal(64)
971
972 # Use the bypass if are reading the row that was
973 # written 1 or 2 cycles ago, including for the
974 # slow_valid = 1 case (i.e. completing a load
975 # miss or a non-cacheable load).
976 with m.If(r1.use_forward1):
977 comb += data_fwd.eq(r1.forward_data1)
978 with m.Else():
979 comb += data_fwd.eq(r1.forward_data2)
980
981 comb += data_out.eq(cache_out[r1.hit_way])
982
983 for i in range(8):
984 with m.If(r1.forward_sel[i]):
985 dsel = data_fwd.word_select(i, 8)
986 comb += data_out.word_select(i, 8).eq(dsel)
987
988 comb += d_out.valid.eq(r1.ls_valid)
989 comb += d_out.data.eq(data_out)
990 comb += d_out.store_done.eq(~r1.stcx_fail)
991 comb += d_out.error.eq(r1.ls_error)
992 comb += d_out.cache_paradox.eq(r1.cache_paradox)
993
994 # Outputs to MMU
995 comb += m_out.done.eq(r1.mmu_done)
996 comb += m_out.err.eq(r1.mmu_error)
997 comb += m_out.data.eq(data_out)
998
999 # We have a valid load or store hit or we just completed
1000 # a slow op such as a load miss, a NC load or a store
1001 #
1002 # Note: the load hit is delayed by one cycle. However it
1003 # can still not collide with r.slow_valid (well unless I
1004 # miscalculated) because slow_valid can only be set on a
1005 # subsequent request and not on its first cycle (the state
1006 # machine must have advanced), which makes slow_valid
1007 # at least 2 cycles from the previous hit_load_valid.
1008
1009 # Sanity: Only one of these must be set in any given cycle
1010
1011 if False: # TODO: need Display to get this to work
1012 assert (r1.slow_valid & r1.stcx_fail) != 1, \
1013 "unexpected slow_valid collision with stcx_fail"
1014
1015 assert ((r1.slow_valid | r1.stcx_fail) | r1.hit_load_valid) != 1, \
1016 "unexpected hit_load_delayed collision with slow_valid"
1017
1018 with m.If(~r1.mmu_req):
1019 # Request came from loadstore1...
1020 # Load hit case is the standard path
1021 with m.If(r1.hit_load_valid):
1022 sync += Display("completing load hit data=%x", data_out)
1023
1024 # error cases complete without stalling
1025 with m.If(r1.ls_error):
1026 sync += Display("completing ld/st with error")
1027
1028 # Slow ops (load miss, NC, stores)
1029 with m.If(r1.slow_valid):
1030 sync += Display("completing store or load miss data=%x",
1031 data_out)
1032
1033 with m.Else():
1034 # Request came from MMU
1035 with m.If(r1.hit_load_valid):
1036 sync += Display("completing load hit to MMU, data=%x",
1037 m_out.data)
1038 # error cases complete without stalling
1039 with m.If(r1.mmu_error):
1040 sync += Display("combpleting MMU ld with error")
1041
1042 # Slow ops (i.e. load miss)
1043 with m.If(r1.slow_valid):
1044 sync += Display("completing MMU load miss, data=%x",
1045 m_out.data)
1046
1047 def rams(self, m, r1, early_req_row, cache_out, replace_way):
1048 """rams
1049 Generate a cache RAM for each way. This handles the normal
1050 reads, writes from reloads and the special store-hit update
1051 path as well.
1052
1053 Note: the BRAMs have an extra read buffer, meaning the output
1054 is pipelined an extra cycle. This differs from the
1055 icache. The writeback logic needs to take that into
1056 account by using 1-cycle delayed signals for load hits.
1057 """
1058 comb = m.d.comb
1059 wb_in = self.wb_in
1060
1061 for i in range(NUM_WAYS):
1062 do_read = Signal(name="do_rd%d" % i)
1063 rd_addr = Signal(ROW_BITS)
1064 do_write = Signal(name="do_wr%d" % i)
1065 wr_addr = Signal(ROW_BITS)
1066 wr_data = Signal(WB_DATA_BITS)
1067 wr_sel = Signal(ROW_SIZE)
1068 wr_sel_m = Signal(ROW_SIZE)
1069 _d_out = Signal(WB_DATA_BITS, name="dout_%d" % i)
1070
1071 way = CacheRam(ROW_BITS, WB_DATA_BITS, True)
1072 setattr(m.submodules, "cacheram_%d" % i, way)
1073
1074 comb += way.rd_en.eq(do_read)
1075 comb += way.rd_addr.eq(rd_addr)
1076 comb += _d_out.eq(way.rd_data_o)
1077 comb += way.wr_sel.eq(wr_sel_m)
1078 comb += way.wr_addr.eq(wr_addr)
1079 comb += way.wr_data.eq(wr_data)
1080
1081 # Cache hit reads
1082 comb += do_read.eq(1)
1083 comb += rd_addr.eq(early_req_row[:ROW_BITS])
1084 comb += cache_out[i].eq(_d_out)
1085
1086 # Write mux:
1087 #
1088 # Defaults to wishbone read responses (cache refill)
1089 #
1090 # For timing, the mux on wr_data/sel/addr is not
1091 # dependent on anything other than the current state.
1092
1093 with m.If(r1.write_bram):
1094 # Write store data to BRAM. This happens one
1095 # cycle after the store is in r0.
1096 comb += wr_data.eq(r1.req.data)
1097 comb += wr_sel.eq(r1.req.byte_sel)
1098 comb += wr_addr.eq(get_row(r1.req.real_addr))
1099
1100 with m.If(i == r1.req.hit_way):
1101 comb += do_write.eq(1)
1102 with m.Else():
1103 # Otherwise, we might be doing a reload or a DCBZ
1104 with m.If(r1.dcbz):
1105 comb += wr_data.eq(0)
1106 with m.Else():
1107 comb += wr_data.eq(wb_in.dat)
1108 comb += wr_addr.eq(r1.store_row)
1109 comb += wr_sel.eq(~0) # all 1s
1110
1111 with m.If((r1.state == State.RELOAD_WAIT_ACK)
1112 & wb_in.ack & (replace_way == i)):
1113 comb += do_write.eq(1)
1114
1115 # Mask write selects with do_write since BRAM
1116 # doesn't have a global write-enable
1117 with m.If(do_write):
1118 comb += wr_sel_m.eq(wr_sel)
1119
1120 # Cache hit synchronous machine for the easy case.
1121 # This handles load hits.
1122 # It also handles error cases (TLB miss, cache paradox)
1123 def dcache_fast_hit(self, m, req_op, r0_valid, r0, r1,
1124 req_hit_way, req_index, req_tag, access_ok,
1125 tlb_hit, tlb_hit_way, tlb_req_index):
1126
1127 comb = m.d.comb
1128 sync = m.d.sync
1129
1130 with m.If(req_op != Op.OP_NONE):
1131 sync += Display("op:%d addr:%x nc: %d idx: %x tag: %x way: %x",
1132 req_op, r0.req.addr, r0.req.nc,
1133 req_index, req_tag, req_hit_way)
1134
1135 with m.If(r0_valid):
1136 sync += r1.mmu_req.eq(r0.mmu_req)
1137
1138 # Fast path for load/store hits.
1139 # Set signals for the writeback controls.
1140 sync += r1.hit_way.eq(req_hit_way)
1141 sync += r1.hit_index.eq(req_index)
1142
1143 with m.If(req_op == Op.OP_LOAD_HIT):
1144 sync += r1.hit_load_valid.eq(1)
1145 with m.Else():
1146 sync += r1.hit_load_valid.eq(0)
1147
1148 with m.If((req_op == Op.OP_LOAD_HIT) | (req_op == Op.OP_STORE_HIT)):
1149 sync += r1.cache_hit.eq(1)
1150 with m.Else():
1151 sync += r1.cache_hit.eq(0)
1152
1153 with m.If(req_op == Op.OP_BAD):
1154 # Display(f"Signalling ld/st error valid_ra={valid_ra}"
1155 # f"rc_ok={rc_ok} perm_ok={perm_ok}"
1156 sync += r1.ls_error.eq(~r0.mmu_req)
1157 sync += r1.mmu_error.eq(r0.mmu_req)
1158 sync += r1.cache_paradox.eq(access_ok)
1159
1160 with m.Else():
1161 sync += r1.ls_error.eq(0)
1162 sync += r1.mmu_error.eq(0)
1163 sync += r1.cache_paradox.eq(0)
1164
1165 with m.If(req_op == Op.OP_STCX_FAIL):
1166 r1.stcx_fail.eq(1)
1167 with m.Else():
1168 sync += r1.stcx_fail.eq(0)
1169
1170 # Record TLB hit information for updating TLB PLRU
1171 sync += r1.tlb_hit.eq(tlb_hit)
1172 sync += r1.tlb_hit_way.eq(tlb_hit_way)
1173 sync += r1.tlb_hit_index.eq(tlb_req_index)
1174
1175 # Memory accesses are handled by this state machine:
1176 #
1177 # * Cache load miss/reload (in conjunction with "rams")
1178 # * Load hits for non-cachable forms
1179 # * Stores (the collision case is handled in "rams")
1180 #
1181 # All wishbone requests generation is done here.
1182 # This machine operates at stage 1.
1183 def dcache_slow(self, m, r1, use_forward1_next, use_forward2_next,
1184 cache_valid_bits, r0, replace_way,
1185 req_hit_way, req_same_tag,
1186 r0_valid, req_op, cache_tags, req_go, ra):
1187
1188 comb = m.d.comb
1189 sync = m.d.sync
1190 wb_in = self.wb_in
1191
1192 req = MemAccessRequest("mreq_ds")
1193 acks = Signal(3)
1194 adjust_acks = Signal(3)
1195
1196 sync += r1.use_forward1.eq(use_forward1_next)
1197 sync += r1.forward_sel.eq(0)
1198
1199 with m.If(use_forward1_next):
1200 sync += r1.forward_sel.eq(r1.req.byte_sel)
1201 with m.Elif(use_forward2_next):
1202 sync += r1.forward_sel.eq(r1.forward_sel1)
1203
1204 sync += r1.forward_data2.eq(r1.forward_data1)
1205 with m.If(r1.write_bram):
1206 sync += r1.forward_data1.eq(r1.req.data)
1207 sync += r1.forward_sel1.eq(r1.req.byte_sel)
1208 sync += r1.forward_way1.eq(r1.req.hit_way)
1209 sync += r1.forward_row1.eq(get_row(r1.req.real_addr))
1210 sync += r1.forward_valid1.eq(1)
1211 with m.Else():
1212 with m.If(r1.dcbz):
1213 sync += r1.forward_data1.eq(0)
1214 with m.Else():
1215 sync += r1.forward_data1.eq(wb_in.dat)
1216 sync += r1.forward_sel1.eq(~0) # all 1s
1217 sync += r1.forward_way1.eq(replace_way)
1218 sync += r1.forward_row1.eq(r1.store_row)
1219 sync += r1.forward_valid1.eq(0)
1220
1221 # One cycle pulses reset
1222 sync += r1.slow_valid.eq(0)
1223 sync += r1.write_bram.eq(0)
1224 sync += r1.inc_acks.eq(0)
1225 sync += r1.dec_acks.eq(0)
1226
1227 sync += r1.ls_valid.eq(0)
1228 # complete tlbies and TLB loads in the third cycle
1229 sync += r1.mmu_done.eq(r0_valid & (r0.tlbie | r0.tlbld))
1230
1231 with m.If((req_op == Op.OP_LOAD_HIT)
1232 | (req_op == Op.OP_STCX_FAIL)):
1233 with m.If(~r0.mmu_req):
1234 sync += r1.ls_valid.eq(1)
1235 with m.Else():
1236 sync += r1.mmu_done.eq(1)
1237
1238 with m.If(r1.write_tag):
1239 # Store new tag in selected way
1240 for i in range(NUM_WAYS):
1241 with m.If(i == replace_way):
1242 ct = Signal(TAG_RAM_WIDTH)
1243 comb += ct.eq(cache_tags[r1.store_index])
1244 comb += ct.word_select(i, TAG_WIDTH).eq(r1.reload_tag)
1245 sync += cache_tags[r1.store_index].eq(ct)
1246 sync += r1.store_way.eq(replace_way)
1247 sync += r1.write_tag.eq(0)
1248
1249 # Take request from r1.req if there is one there,
1250 # else from req_op, ra, etc.
1251 with m.If(r1.full):
1252 comb += req.eq(r1.req)
1253 with m.Else():
1254 comb += req.op.eq(req_op)
1255 comb += req.valid.eq(req_go)
1256 comb += req.mmu_req.eq(r0.mmu_req)
1257 comb += req.dcbz.eq(r0.req.dcbz)
1258 comb += req.real_addr.eq(ra)
1259
1260 with m.If(~r0.req.dcbz):
1261 comb += req.data.eq(r0.req.data)
1262 with m.Else():
1263 comb += req.data.eq(0)
1264
1265 # Select all bytes for dcbz
1266 # and for cacheable loads
1267 with m.If(r0.req.dcbz | (r0.req.load & ~r0.req.nc)):
1268 comb += req.byte_sel.eq(~0) # all 1s
1269 with m.Else():
1270 comb += req.byte_sel.eq(r0.req.byte_sel)
1271 comb += req.hit_way.eq(req_hit_way)
1272 comb += req.same_tag.eq(req_same_tag)
1273
1274 # Store the incoming request from r0,
1275 # if it is a slow request
1276 # Note that r1.full = 1 implies req_op = OP_NONE
1277 with m.If((req_op == Op.OP_LOAD_MISS)
1278 | (req_op == Op.OP_LOAD_NC)
1279 | (req_op == Op.OP_STORE_MISS)
1280 | (req_op == Op.OP_STORE_HIT)):
1281 sync += r1.req.eq(req)
1282 sync += r1.full.eq(1)
1283
1284 # Main state machine
1285 with m.Switch(r1.state):
1286
1287 with m.Case(State.IDLE):
1288 # XXX check 'left downto. probably means len(r1.wb.adr)
1289 # r1.wb.adr <= req.real_addr(
1290 # r1.wb.adr'left downto 0
1291 # );
1292 sync += r1.wb.adr.eq(req.real_addr)
1293 sync += r1.wb.sel.eq(req.byte_sel)
1294 sync += r1.wb.dat.eq(req.data)
1295 sync += r1.dcbz.eq(req.dcbz)
1296
1297 # Keep track of our index and way
1298 # for subsequent stores.
1299 sync += r1.store_index.eq(get_index(req.real_addr))
1300 sync += r1.store_row.eq(get_row(req.real_addr))
1301 sync += r1.end_row_ix.eq(
1302 get_row_of_line(get_row(req.real_addr))
1303 )
1304 sync += r1.reload_tag.eq(get_tag(req.real_addr))
1305 sync += r1.req.same_tag.eq(1)
1306
1307 with m.If(req.op == Op.OP_STORE_HIT):
1308 sync += r1.store_way.eq(req.hit_way)
1309
1310 # Reset per-row valid bits,
1311 # ready for handling OP_LOAD_MISS
1312 for i in range(ROW_PER_LINE):
1313 sync += r1.rows_valid[i].eq(0)
1314
1315 with m.If(req_op != Op.OP_NONE):
1316 sync += Display("cache op %d", req.op)
1317
1318 with m.Switch(req.op):
1319 with m.Case(Op.OP_LOAD_HIT):
1320 # stay in IDLE state
1321 pass
1322
1323 with m.Case(Op.OP_LOAD_MISS):
1324 #Display(f"cache miss real addr:" \
1325 # f"{req_real_addr}" \
1326 # f" idx:{get_index(req_real_addr)}" \
1327 # f" tag:{get_tag(req.real_addr)}")
1328 pass
1329
1330 # Start the wishbone cycle
1331 sync += r1.wb.we.eq(0)
1332 sync += r1.wb.cyc.eq(1)
1333 sync += r1.wb.stb.eq(1)
1334
1335 # Track that we had one request sent
1336 sync += r1.state.eq(State.RELOAD_WAIT_ACK)
1337 sync += r1.write_tag.eq(1)
1338
1339 with m.Case(Op.OP_LOAD_NC):
1340 sync += r1.wb.cyc.eq(1)
1341 sync += r1.wb.stb.eq(1)
1342 sync += r1.wb.we.eq(0)
1343 sync += r1.state.eq(State.NC_LOAD_WAIT_ACK)
1344
1345 with m.Case(Op.OP_STORE_HIT, Op.OP_STORE_MISS):
1346 with m.If(~req.dcbz):
1347 sync += r1.state.eq(State.STORE_WAIT_ACK)
1348 sync += r1.acks_pending.eq(1)
1349 sync += r1.full.eq(0)
1350 sync += r1.slow_valid.eq(1)
1351
1352 with m.If(~req.mmu_req):
1353 sync += r1.ls_valid.eq(1)
1354 with m.Else():
1355 sync += r1.mmu_done.eq(1)
1356
1357 with m.If(req.op == Op.OP_STORE_HIT):
1358 sync += r1.write_bram.eq(1)
1359 with m.Else():
1360 # dcbz is handled much like a load miss except
1361 # that we are writing to memory instead of reading
1362 sync += r1.state.eq(State.RELOAD_WAIT_ACK)
1363
1364 with m.If(req.op == Op.OP_STORE_MISS):
1365 sync += r1.write_tag.eq(1)
1366
1367 sync += r1.wb.we.eq(1)
1368 sync += r1.wb.cyc.eq(1)
1369 sync += r1.wb.stb.eq(1)
1370
1371 # OP_NONE and OP_BAD do nothing
1372 # OP_BAD & OP_STCX_FAIL were
1373 # handled above already
1374 with m.Case(Op.OP_NONE):
1375 pass
1376 with m.Case(Op.OP_BAD):
1377 pass
1378 with m.Case(Op.OP_STCX_FAIL):
1379 pass
1380
1381 with m.Case(State.RELOAD_WAIT_ACK):
1382 ld_stbs_done = Signal()
1383 # Requests are all sent if stb is 0
1384 comb += ld_stbs_done.eq(~r1.wb.stb)
1385
1386 with m.If((~wb_in.stall) & r1.wb.stb):
1387 # That was the last word?
1388 # We are done sending.
1389 # Clear stb and set ld_stbs_done
1390 # so we can handle an eventual
1391 # last ack on the same cycle.
1392 with m.If(is_last_row_addr(r1.wb.adr, r1.end_row_ix)):
1393 sync += r1.wb.stb.eq(0)
1394 comb += ld_stbs_done.eq(1)
1395
1396 # Calculate the next row address in the current cache line
1397 rarange = r1.wb.adr[ROW_OFF_BITS : LINE_OFF_BITS]
1398 sync += rarange.eq(rarange + 1)
1399
1400 # Incoming acks processing
1401 sync += r1.forward_valid1.eq(wb_in.ack)
1402 with m.If(wb_in.ack):
1403 sync += r1.rows_valid[r1.store_row % ROW_PER_LINE].eq(1)
1404
1405 # If this is the data we were looking for,
1406 # we can complete the request next cycle.
1407 # Compare the whole address in case the
1408 # request in r1.req is not the one that
1409 # started this refill.
1410 with m.If(r1.full & r1.req.same_tag &
1411 ((r1.dcbz & r1.req.dcbz) |
1412 (~r1.dcbz & (r1.req.op == Op.OP_LOAD_MISS))) &
1413 (r1.store_row == get_row(r1.req.real_addr))):
1414 sync += r1.full.eq(0)
1415 sync += r1.slow_valid.eq(1)
1416 with m.If(~r1.mmu_req):
1417 sync += r1.ls_valid.eq(1)
1418 with m.Else():
1419 sync += r1.mmu_done.eq(1)
1420 sync += r1.forward_sel.eq(~0) # all 1s
1421 sync += r1.use_forward1.eq(1)
1422
1423 # Check for completion
1424 with m.If(ld_stbs_done & is_last_row(r1.store_row,
1425 r1.end_row_ix)):
1426 # Complete wishbone cycle
1427 sync += r1.wb.cyc.eq(0)
1428
1429 # Cache line is now valid
1430 cv = Signal(INDEX_BITS)
1431 comb += cv.eq(cache_valid_bits[r1.store_index])
1432 comb += cv.bit_select(r1.store_way, 1).eq(1)
1433 sync += cache_valid_bits[r1.store_index].eq(cv)
1434 sync += r1.state.eq(State.IDLE)
1435
1436 # Increment store row counter
1437 sync += r1.store_row.eq(next_row(r1.store_row))
1438
1439 with m.Case(State.STORE_WAIT_ACK):
1440 st_stbs_done = Signal()
1441 comb += st_stbs_done.eq(~r1.wb.stb)
1442 comb += acks.eq(r1.acks_pending)
1443
1444 with m.If(r1.inc_acks != r1.dec_acks):
1445 with m.If(r1.inc_acks):
1446 comb += adjust_acks.eq(acks + 1)
1447 with m.Else():
1448 comb += adjust_acks.eq(acks - 1)
1449 with m.Else():
1450 comb += adjust_acks.eq(acks)
1451
1452 sync += r1.acks_pending.eq(adjust_acks)
1453
1454 # Clear stb when slave accepted request
1455 with m.If(~wb_in.stall):
1456 # See if there is another store waiting
1457 # to be done which is in the same real page.
1458 with m.If(req.valid):
1459 ra = req.real_addr[0:SET_SIZE_BITS]
1460 sync += r1.wb.adr[0:SET_SIZE_BITS].eq(ra)
1461 sync += r1.wb.dat.eq(req.data)
1462 sync += r1.wb.sel.eq(req.byte_sel)
1463
1464 with m.Elif((adjust_acks < 7) & req.same_tag &
1465 ((req.op == Op.OP_STORE_MISS)
1466 | (req.op == Op.OP_STORE_HIT))):
1467 sync += r1.wb.stb.eq(1)
1468 comb += st_stbs_done.eq(0)
1469
1470 with m.If(req.op == Op.OP_STORE_HIT):
1471 sync += r1.write_bram.eq(1)
1472 sync += r1.full.eq(0)
1473 sync += r1.slow_valid.eq(1)
1474
1475 # Store requests never come from the MMU
1476 sync += r1.ls_valid.eq(1)
1477 comb += st_stbs_done.eq(0)
1478 sync += r1.inc_acks.eq(1)
1479 with m.Else():
1480 sync += r1.wb.stb.eq(0)
1481 comb += st_stbs_done.eq(1)
1482
1483 # Got ack ? See if complete.
1484 with m.If(wb_in.ack):
1485 with m.If(st_stbs_done & (adjust_acks == 1)):
1486 sync += r1.state.eq(State.IDLE)
1487 sync += r1.wb.cyc.eq(0)
1488 sync += r1.wb.stb.eq(0)
1489 sync += r1.dec_acks.eq(1)
1490
1491 with m.Case(State.NC_LOAD_WAIT_ACK):
1492 # Clear stb when slave accepted request
1493 with m.If(~wb_in.stall):
1494 sync += r1.wb.stb.eq(0)
1495
1496 # Got ack ? complete.
1497 with m.If(wb_in.ack):
1498 sync += r1.state.eq(State.IDLE)
1499 sync += r1.full.eq(0)
1500 sync += r1.slow_valid.eq(1)
1501
1502 with m.If(~r1.mmu_req):
1503 sync += r1.ls_valid.eq(1)
1504 with m.Else():
1505 sync += r1.mmu_done.eq(1)
1506
1507 sync += r1.forward_sel.eq(~0) # all 1s
1508 sync += r1.use_forward1.eq(1)
1509 sync += r1.wb.cyc.eq(0)
1510 sync += r1.wb.stb.eq(0)
1511
1512 def dcache_log(self, m, r1, valid_ra, tlb_hit_way, stall_out):
1513
1514 sync = m.d.sync
1515 d_out, wb_in, log_out = self.d_out, self.wb_in, self.log_out
1516
1517 sync += log_out.eq(Cat(r1.state[:3], valid_ra, tlb_hit_way[:3],
1518 stall_out, req_op[:3], d_out.valid, d_out.error,
1519 r1.wb.cyc, r1.wb.stb, wb_in.ack, wb_in.stall,
1520 r1.wb.adr[3:6]))
1521
1522 def elaborate(self, platform):
1523
1524 m = Module()
1525 comb = m.d.comb
1526
1527 # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
1528 cache_tags = CacheTagArray()
1529 cache_tag_set = Signal(TAG_RAM_WIDTH)
1530 cache_valid_bits = CacheValidBitsArray()
1531
1532 # TODO attribute ram_style : string;
1533 # TODO attribute ram_style of cache_tags : signal is "distributed";
1534
1535 """note: these are passed to nmigen.hdl.Memory as "attributes".
1536 don't know how, just that they are.
1537 """
1538 dtlb_valid_bits = TLBValidBitsArray()
1539 dtlb_tags = TLBTagsArray()
1540 dtlb_ptes = TLBPtesArray()
1541 # TODO attribute ram_style of
1542 # dtlb_tags : signal is "distributed";
1543 # TODO attribute ram_style of
1544 # dtlb_ptes : signal is "distributed";
1545
1546 r0 = RegStage0("r0")
1547 r0_full = Signal()
1548
1549 r1 = RegStage1("r1")
1550
1551 reservation = Reservation()
1552
1553 # Async signals on incoming request
1554 req_index = Signal(INDEX_BITS)
1555 req_row = Signal(ROW_BITS)
1556 req_hit_way = Signal(WAY_BITS)
1557 req_tag = Signal(TAG_BITS)
1558 req_op = Signal(Op)
1559 req_data = Signal(64)
1560 req_same_tag = Signal()
1561 req_go = Signal()
1562
1563 early_req_row = Signal(ROW_BITS)
1564
1565 cancel_store = Signal()
1566 set_rsrv = Signal()
1567 clear_rsrv = Signal()
1568
1569 r0_valid = Signal()
1570 r0_stall = Signal()
1571
1572 use_forward1_next = Signal()
1573 use_forward2_next = Signal()
1574
1575 cache_out = CacheRamOut()
1576
1577 plru_victim = PLRUOut()
1578 replace_way = Signal(WAY_BITS)
1579
1580 # Wishbone read/write/cache write formatting signals
1581 bus_sel = Signal(8)
1582
1583 # TLB signals
1584 tlb_tag_way = Signal(TLB_TAG_WAY_BITS)
1585 tlb_pte_way = Signal(TLB_PTE_WAY_BITS)
1586 tlb_valid_way = Signal(TLB_NUM_WAYS)
1587 tlb_req_index = Signal(TLB_SET_BITS)
1588 tlb_hit = Signal()
1589 tlb_hit_way = Signal(TLB_WAY_BITS)
1590 pte = Signal(TLB_PTE_BITS)
1591 ra = Signal(REAL_ADDR_BITS)
1592 valid_ra = Signal()
1593 perm_attr = PermAttr("dc_perms")
1594 rc_ok = Signal()
1595 perm_ok = Signal()
1596 access_ok = Signal()
1597
1598 tlb_plru_victim = TLBPLRUOut()
1599
1600 # we don't yet handle collisions between loadstore1 requests
1601 # and MMU requests
1602 comb += self.m_out.stall.eq(0)
1603
1604 # Hold off the request in r0 when r1 has an uncompleted request
1605 comb += r0_stall.eq(r0_full & r1.full)
1606 comb += r0_valid.eq(r0_full & ~r1.full)
1607 comb += self.stall_out.eq(r0_stall)
1608
1609 # Wire up wishbone request latch out of stage 1
1610 comb += self.wb_out.eq(r1.wb)
1611
1612 # call sub-functions putting everything together, using shared
1613 # signals established above
1614 self.stage_0(m, r0, r1, r0_full)
1615 self.tlb_read(m, r0_stall, tlb_valid_way,
1616 tlb_tag_way, tlb_pte_way, dtlb_valid_bits,
1617 dtlb_tags, dtlb_ptes)
1618 self.tlb_search(m, tlb_req_index, r0, r0_valid,
1619 tlb_valid_way, tlb_tag_way, tlb_hit_way,
1620 tlb_pte_way, pte, tlb_hit, valid_ra, perm_attr, ra)
1621 self.tlb_update(m, r0_valid, r0, dtlb_valid_bits, tlb_req_index,
1622 tlb_hit_way, tlb_hit, tlb_plru_victim, tlb_tag_way,
1623 dtlb_tags, tlb_pte_way, dtlb_ptes)
1624 self.maybe_plrus(m, r1, plru_victim)
1625 self.maybe_tlb_plrus(m, r1, tlb_plru_victim)
1626 self.cache_tag_read(m, r0_stall, req_index, cache_tag_set, cache_tags)
1627 self.dcache_request(m, r0, ra, req_index, req_row, req_tag,
1628 r0_valid, r1, cache_valid_bits, replace_way,
1629 use_forward1_next, use_forward2_next,
1630 req_hit_way, plru_victim, rc_ok, perm_attr,
1631 valid_ra, perm_ok, access_ok, req_op, req_go,
1632 tlb_pte_way,
1633 tlb_hit, tlb_hit_way, tlb_valid_way, cache_tag_set,
1634 cancel_store, req_same_tag, r0_stall, early_req_row)
1635 self.reservation_comb(m, cancel_store, set_rsrv, clear_rsrv,
1636 r0_valid, r0, reservation)
1637 self.reservation_reg(m, r0_valid, access_ok, set_rsrv, clear_rsrv,
1638 reservation, r0)
1639 self.writeback_control(m, r1, cache_out)
1640 self.rams(m, r1, early_req_row, cache_out, replace_way)
1641 self.dcache_fast_hit(m, req_op, r0_valid, r0, r1,
1642 req_hit_way, req_index, req_tag, access_ok,
1643 tlb_hit, tlb_hit_way, tlb_req_index)
1644 self.dcache_slow(m, r1, use_forward1_next, use_forward2_next,
1645 cache_valid_bits, r0, replace_way,
1646 req_hit_way, req_same_tag,
1647 r0_valid, req_op, cache_tags, req_go, ra)
1648 #self.dcache_log(m, r1, valid_ra, tlb_hit_way, stall_out)
1649
1650 return m
1651
1652 def dcache_load(dut, addr, nc=0):
1653 yield dut.d_in.load.eq(1)
1654 yield dut.d_in.nc.eq(nc)
1655 yield dut.d_in.addr.eq(addr)
1656 yield dut.d_in.valid.eq(1)
1657 yield
1658 yield dut.d_in.valid.eq(0)
1659 yield
1660 while not (yield dut.d_out.valid):
1661 yield
1662 data = yield dut.d_out.data
1663 return data
1664
1665
1666 def dcache_store(dut, addr, data, nc=0):
1667 yield dut.d_in.load.eq(0)
1668 yield dut.d_in.nc.eq(nc)
1669 yield dut.d_in.data.eq(data)
1670 yield dut.d_in.byte_sel.eq(~0)
1671 yield dut.d_in.addr.eq(addr)
1672 yield dut.d_in.valid.eq(1)
1673 yield
1674 yield dut.d_in.valid.eq(0)
1675 yield dut.d_in.byte_sel.eq(0)
1676 yield
1677 while not (yield dut.d_out.valid):
1678 yield
1679
1680
1681 def dcache_sim(dut):
1682 # clear stuff
1683 yield dut.d_in.valid.eq(0)
1684 yield dut.d_in.load.eq(0)
1685 yield dut.d_in.priv_mode.eq(1)
1686 yield dut.d_in.nc.eq(0)
1687 yield dut.d_in.addr.eq(0)
1688 yield dut.d_in.data.eq(0)
1689 yield dut.m_in.valid.eq(0)
1690 yield dut.m_in.addr.eq(0)
1691 yield dut.m_in.pte.eq(0)
1692 # wait 4 * clk_period
1693 yield
1694 yield
1695 yield
1696 yield
1697
1698 # Cacheable read of address 4
1699 data = yield from dcache_load(dut, 0x4)
1700 addr = yield dut.d_in.addr
1701 assert data == 0x0000000100000000, \
1702 f"data @%x=%x expected 0x0000000100000000" % (addr, data)
1703
1704 # Cacheable read of address 30
1705 data = yield from dcache_load(dut, 0x530)
1706 addr = yield dut.d_in.addr
1707 assert data == 0x0000004D0000004C, \
1708 f"data @%x=%x expected 0000004D0000004C" % (addr, data)
1709
1710 # 2nd Cacheable read of address 30
1711 data = yield from dcache_load(dut, 0x530)
1712 addr = yield dut.d_in.addr
1713 assert data == 0x0000004D0000004C, \
1714 f"data @%x=%x expected 0000004D0000004C" % (addr, data)
1715
1716 # Non-cacheable read of address 100
1717 data = yield from dcache_load(dut, 0x100, nc=1)
1718 addr = yield dut.d_in.addr
1719 assert data == 0x0000004100000040, \
1720 f"data @%x=%x expected 0000004100000040" % (addr, data)
1721
1722 # Store at address 530
1723 yield from dcache_store(dut, 0x530, 0x121)
1724
1725 # Store at address 30
1726 yield from dcache_store(dut, 0x530, 0x12345678)
1727
1728 # 3nd Cacheable read of address 530
1729 data = yield from dcache_load(dut, 0x530)
1730 addr = yield dut.d_in.addr
1731 assert data == 0x12345678, \
1732 f"data @%x=%x expected 0x12345678" % (addr, data)
1733
1734 yield
1735 yield
1736 yield
1737 yield
1738
1739
1740 def test_dcache():
1741 dut = DCache()
1742 vl = rtlil.convert(dut, ports=[])
1743 with open("test_dcache.il", "w") as f:
1744 f.write(vl)
1745
1746 mem = []
1747 for i in range(0,128):
1748 mem.append((i*2)| ((i*2+1)<<32))
1749 memory = Memory(width=64, depth=16*8, init=mem)
1750 sram = SRAM(memory=memory, granularity=8)
1751
1752 m = Module()
1753 m.submodules.dcache = dut
1754 m.submodules.sram = sram
1755
1756 m.d.comb += sram.bus.cyc.eq(dut.wb_out.cyc)
1757 m.d.comb += sram.bus.stb.eq(dut.wb_out.stb)
1758 m.d.comb += sram.bus.we.eq(dut.wb_out.we)
1759 m.d.comb += sram.bus.sel.eq(dut.wb_out.sel)
1760 m.d.comb += sram.bus.adr.eq(dut.wb_out.adr[3:])
1761 m.d.comb += sram.bus.dat_w.eq(dut.wb_out.dat)
1762
1763 m.d.comb += dut.wb_in.ack.eq(sram.bus.ack)
1764 m.d.comb += dut.wb_in.dat.eq(sram.bus.dat_r)
1765
1766 # nmigen Simulation
1767 sim = Simulator(m)
1768 sim.add_clock(1e-6)
1769
1770 sim.add_sync_process(wrap(dcache_sim(dut)))
1771 with sim.write_vcd('test_dcache.vcd'):
1772 sim.run()
1773
1774 if __name__ == '__main__':
1775 test_dcache()
1776