ba16779b579ad0a737c581807d62ab3a7b95ad44
[soc.git] / src / soc / experiment / dcache.py
1 """DCache
2
3 based on Anton Blanchard microwatt dcache.vhdl
4
5 """
6
7 from enum import Enum, unique
8
9 from nmigen import Module, Signal, Elaboratable, Cat, Repl, Array, Const
10 from nmigen.cli import main
11 from nmutil.iocontrol import RecordObject
12 from nmigen.utils import log2_int
13 from nmigen.cli import rtlil
14
15
16 from soc.experiment.mem_types import (LoadStore1ToDCacheType,
17 DCacheToLoadStore1Type,
18 MMUToDCacheType,
19 DCacheToMMUType)
20
21 from soc.experiment.wb_types import (WB_ADDR_BITS, WB_DATA_BITS, WB_SEL_BITS,
22 WBAddrType, WBDataType, WBSelType,
23 WBMasterOut, WBSlaveOut,
24 WBMasterOutVector, WBSlaveOutVector,
25 WBIOMasterOut, WBIOSlaveOut)
26
27 from soc.experiment.cache_ram import CacheRam
28 from soc.experiment.plru import PLRU
29
30
31 # TODO: make these parameters of DCache at some point
32 LINE_SIZE = 64 # Line size in bytes
33 NUM_LINES = 32 # Number of lines in a set
34 NUM_WAYS = 4 # Number of ways
35 TLB_SET_SIZE = 64 # L1 DTLB entries per set
36 TLB_NUM_WAYS = 2 # L1 DTLB number of sets
37 TLB_LG_PGSZ = 12 # L1 DTLB log_2(page_size)
38 LOG_LENGTH = 0 # Non-zero to enable log data collection
39
40 # BRAM organisation: We never access more than
41 # -- WB_DATA_BITS at a time so to save
42 # -- resources we make the array only that wide, and
43 # -- use consecutive indices for to make a cache "line"
44 # --
45 # -- ROW_SIZE is the width in bytes of the BRAM
46 # -- (based on WB, so 64-bits)
47 ROW_SIZE = WB_DATA_BITS // 8;
48
49 # ROW_PER_LINE is the number of row (wishbone
50 # transactions) in a line
51 ROW_PER_LINE = LINE_SIZE // ROW_SIZE
52
53 # BRAM_ROWS is the number of rows in BRAM needed
54 # to represent the full dcache
55 BRAM_ROWS = NUM_LINES * ROW_PER_LINE
56
57
58 # Bit fields counts in the address
59
60 # REAL_ADDR_BITS is the number of real address
61 # bits that we store
62 REAL_ADDR_BITS = 56
63
64 # ROW_BITS is the number of bits to select a row
65 ROW_BITS = log2_int(BRAM_ROWS)
66
67 # ROW_LINE_BITS is the number of bits to select
68 # a row within a line
69 ROW_LINE_BITS = log2_int(ROW_PER_LINE)
70
71 # LINE_OFF_BITS is the number of bits for
72 # the offset in a cache line
73 LINE_OFF_BITS = log2_int(LINE_SIZE)
74
75 # ROW_OFF_BITS is the number of bits for
76 # the offset in a row
77 ROW_OFF_BITS = log2_int(ROW_SIZE)
78
79 # INDEX_BITS is the number if bits to
80 # select a cache line
81 INDEX_BITS = log2_int(NUM_LINES)
82
83 # SET_SIZE_BITS is the log base 2 of the set size
84 SET_SIZE_BITS = LINE_OFF_BITS + INDEX_BITS
85
86 # TAG_BITS is the number of bits of
87 # the tag part of the address
88 TAG_BITS = REAL_ADDR_BITS - SET_SIZE_BITS
89
90 # TAG_WIDTH is the width in bits of each way of the tag RAM
91 TAG_WIDTH = TAG_BITS + 7 - ((TAG_BITS + 7) % 8)
92
93 # WAY_BITS is the number of bits to select a way
94 WAY_BITS = log2_int(NUM_WAYS)
95
96 # Example of layout for 32 lines of 64 bytes:
97 #
98 # .. tag |index| line |
99 # .. | row | |
100 # .. | |---| | ROW_LINE_BITS (3)
101 # .. | |--- - --| LINE_OFF_BITS (6)
102 # .. | |- --| ROW_OFF_BITS (3)
103 # .. |----- ---| | ROW_BITS (8)
104 # .. |-----| | INDEX_BITS (5)
105 # .. --------| | TAG_BITS (45)
106
107 TAG_RAM_WIDTH = TAG_WIDTH * NUM_WAYS
108
109 def CacheTagArray():
110 return Array(Signal(TAG_RAM_WIDTH) for x in range(NUM_LINES))
111
112 def CacheValidBitsArray():
113 return Array(Signal(INDEX_BITS) for x in range(NUM_LINES))
114
115 def RowPerLineValidArray():
116 return Array(Signal() for x in range(ROW_PER_LINE))
117
118 # L1 TLB
119 TLB_SET_BITS = log2_int(TLB_SET_SIZE)
120 TLB_WAY_BITS = log2_int(TLB_NUM_WAYS)
121 TLB_EA_TAG_BITS = 64 - (TLB_LG_PGSZ + TLB_SET_BITS)
122 TLB_TAG_WAY_BITS = TLB_NUM_WAYS * TLB_EA_TAG_BITS
123 TLB_PTE_BITS = 64
124 TLB_PTE_WAY_BITS = TLB_NUM_WAYS * TLB_PTE_BITS;
125
126 assert (LINE_SIZE % ROW_SIZE) == 0, "LINE_SIZE not multiple of ROW_SIZE"
127 assert (LINE_SIZE % 2) == 0, "LINE_SIZE not power of 2"
128 assert (NUM_LINES % 2) == 0, "NUM_LINES not power of 2"
129 assert (ROW_PER_LINE % 2) == 0, "ROW_PER_LINE not power of 2"
130 assert ROW_BITS == (INDEX_BITS + ROW_LINE_BITS), "geometry bits don't add up"
131 assert (LINE_OFF_BITS == ROW_OFF_BITS + ROW_LINE_BITS), \
132 "geometry bits don't add up"
133 assert REAL_ADDR_BITS == (TAG_BITS + INDEX_BITS + LINE_OFF_BITS), \
134 "geometry bits don't add up"
135 assert REAL_ADDR_BITS == (TAG_BITS + ROW_BITS + ROW_OFF_BITS), \
136 "geometry bits don't add up"
137 assert 64 == WB_DATA_BITS, "Can't yet handle wb width that isn't 64-bits"
138 assert SET_SIZE_BITS <= TLB_LG_PGSZ, "Set indexed by virtual address"
139
140
141 def TLBValidBitsArray():
142 return Array(Signal(TLB_NUM_WAYS) for x in range(TLB_SET_SIZE))
143
144 def TLBTagEAArray():
145 return Array(Signal(TLB_EA_TAG_BITS) for x in range (TLB_NUM_WAYS))
146
147 def TLBTagsArray():
148 return Array(Signal(TLB_TAG_WAY_BITS) for x in range (TLB_SET_SIZE))
149
150 def TLBPtesArray():
151 return Array(Signal(TLB_PTE_WAY_BITS) for x in range(TLB_SET_SIZE))
152
153 def HitWaySet():
154 return Array(Signal(NUM_WAYS) for x in range(TLB_NUM_WAYS))
155
156 # Cache RAM interface
157 def CacheRamOut():
158 return Array(Signal(WB_DATA_BITS) for x in range(NUM_WAYS))
159
160 # PLRU output interface
161 def PLRUOut():
162 return Array(Signal(WAY_BITS) for x in range(NUM_LINES))
163
164 # TLB PLRU output interface
165 def TLBPLRUOut():
166 return Array(Signal(TLB_WAY_BITS) for x in range(TLB_SET_SIZE))
167
168 # Helper functions to decode incoming requests
169 #
170 # Return the cache line index (tag index) for an address
171 def get_index(addr):
172 return addr[LINE_OFF_BITS:SET_SIZE_BITS]
173
174 # Return the cache row index (data memory) for an address
175 def get_row(addr):
176 return addr[ROW_OFF_BITS:SET_SIZE_BITS]
177
178 # Return the index of a row within a line
179 def get_row_of_line(row):
180 return row[:ROW_LINE_BITS]
181
182 # Returns whether this is the last row of a line
183 def is_last_row_addr(addr, last):
184 return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
185
186 # Returns whether this is the last row of a line
187 def is_last_row(row, last):
188 return get_row_of_line(row) == last
189
190 # Return the next row in the current cache line. We use a
191 # dedicated function in order to limit the size of the
192 # generated adder to be only the bits within a cache line
193 # (3 bits with default settings)
194 def next_row(row):
195 row_v = row[0:ROW_LINE_BITS] + 1
196 return Cat(row_v[:ROW_LINE_BITS], row[ROW_LINE_BITS:])
197
198 # Get the tag value from the address
199 def get_tag(addr):
200 return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
201
202 # Read a tag from a tag memory row
203 def read_tag(way, tagset):
204 return tagset.word_select(way, TAG_WIDTH)[:TAG_BITS]
205
206 # Read a TLB tag from a TLB tag memory row
207 def read_tlb_tag(way, tags):
208 return tags.word_select(way, TLB_EA_TAG_BITS)
209
210 # Write a TLB tag to a TLB tag memory row
211 def write_tlb_tag(way, tags, tag):
212 return read_tlb_tag(way, tags).eq(tag)
213
214 # Read a PTE from a TLB PTE memory row
215 def read_tlb_pte(way, ptes):
216 return ptes.word_select(way, TLB_PTE_BITS)
217
218 def write_tlb_pte(way, ptes, newpte):
219 return read_tlb_pte(way, ptes).eq(newpte)
220
221
222 # Record for storing permission, attribute, etc. bits from a PTE
223 class PermAttr(RecordObject):
224 def __init__(self):
225 super().__init__()
226 self.reference = Signal()
227 self.changed = Signal()
228 self.nocache = Signal()
229 self.priv = Signal()
230 self.rd_perm = Signal()
231 self.wr_perm = Signal()
232
233
234 def extract_perm_attr(pte):
235 pa = PermAttr()
236 pa.reference = pte[8]
237 pa.changed = pte[7]
238 pa.nocache = pte[5]
239 pa.priv = pte[3]
240 pa.rd_perm = pte[2]
241 pa.wr_perm = pte[1]
242 return pa;
243
244
245 # Type of operation on a "valid" input
246 @unique
247 class Op(Enum):
248 OP_NONE = 0
249 OP_BAD = 1 # NC cache hit, TLB miss, prot/RC failure
250 OP_STCX_FAIL = 2 # conditional store w/o reservation
251 OP_LOAD_HIT = 3 # Cache hit on load
252 OP_LOAD_MISS = 4 # Load missing cache
253 OP_LOAD_NC = 5 # Non-cachable load
254 OP_STORE_HIT = 6 # Store hitting cache
255 OP_STORE_MISS = 7 # Store missing cache
256
257
258 # Cache state machine
259 @unique
260 class State(Enum):
261 IDLE = 0 # Normal load hit processing
262 RELOAD_WAIT_ACK = 1 # Cache reload wait ack
263 STORE_WAIT_ACK = 2 # Store wait ack
264 NC_LOAD_WAIT_ACK = 3 # Non-cachable load wait ack
265
266
267 # Dcache operations:
268 #
269 # In order to make timing, we use the BRAMs with
270 # an output buffer, which means that the BRAM
271 # output is delayed by an extra cycle.
272 #
273 # Thus, the dcache has a 2-stage internal pipeline
274 # for cache hits with no stalls.
275 #
276 # All other operations are handled via stalling
277 # in the first stage.
278 #
279 # The second stage can thus complete a hit at the same
280 # time as the first stage emits a stall for a complex op.
281 #
282 # Stage 0 register, basically contains just the latched request
283
284 class RegStage0(RecordObject):
285 def __init__(self):
286 super().__init__()
287 self.req = LoadStore1ToDCacheType()
288 self.tlbie = Signal()
289 self.doall = Signal()
290 self.tlbld = Signal()
291 self.mmu_req = Signal() # indicates source of request
292
293
294 class MemAccessRequest(RecordObject):
295 def __init__(self):
296 super().__init__()
297 self.op = Signal(Op)
298 self.valid = Signal()
299 self.dcbz = Signal()
300 self.real_addr = Signal(REAL_ADDR_BITS)
301 self.data = Signal(64)
302 self.byte_sel = Signal(8)
303 self.hit_way = Signal(WAY_BITS)
304 self.same_tag = Signal()
305 self.mmu_req = Signal()
306
307
308 # First stage register, contains state for stage 1 of load hits
309 # and for the state machine used by all other operations
310 class RegStage1(RecordObject):
311 def __init__(self):
312 super().__init__()
313 # Info about the request
314 self.full = Signal() # have uncompleted request
315 self.mmu_req = Signal() # request is from MMU
316 self.req = MemAccessRequest()
317
318 # Cache hit state
319 self.hit_way = Signal(WAY_BITS)
320 self.hit_load_valid = Signal()
321 self.hit_index = Signal(NUM_LINES)
322 self.cache_hit = Signal()
323
324 # TLB hit state
325 self.tlb_hit = Signal()
326 self.tlb_hit_way = Signal(TLB_NUM_WAYS)
327 self.tlb_hit_index = Signal(TLB_WAY_BITS)
328
329 # 2-stage data buffer for data forwarded from writes to reads
330 self.forward_data1 = Signal(64)
331 self.forward_data2 = Signal(64)
332 self.forward_sel1 = Signal(8)
333 self.forward_valid1 = Signal()
334 self.forward_way1 = Signal(WAY_BITS)
335 self.forward_row1 = Signal(ROW_BITS)
336 self.use_forward1 = Signal()
337 self.forward_sel = Signal(8)
338
339 # Cache miss state (reload state machine)
340 self.state = Signal(State)
341 self.dcbz = Signal()
342 self.write_bram = Signal()
343 self.write_tag = Signal()
344 self.slow_valid = Signal()
345 self.wb = WBMasterOut()
346 self.reload_tag = Signal(TAG_BITS)
347 self.store_way = Signal(WAY_BITS)
348 self.store_row = Signal(ROW_BITS)
349 self.store_index = Signal(INDEX_BITS)
350 self.end_row_ix = Signal(log2_int(ROW_LINE_BITS, False))
351 self.rows_valid = RowPerLineValidArray()
352 self.acks_pending = Signal(3)
353 self.inc_acks = Signal()
354 self.dec_acks = Signal()
355
356 # Signals to complete (possibly with error)
357 self.ls_valid = Signal()
358 self.ls_error = Signal()
359 self.mmu_done = Signal()
360 self.mmu_error = Signal()
361 self.cache_paradox = Signal()
362
363 # Signal to complete a failed stcx.
364 self.stcx_fail = Signal()
365
366
367 # Reservation information
368 class Reservation(RecordObject):
369 def __init__(self):
370 super().__init__()
371 self.valid = Signal()
372 self.addr = Signal(64-LINE_OFF_BITS)
373
374
375 class DTLBUpdate(Elaboratable):
376 def __init__(self):
377 self.tlbie = Signal()
378 self.tlbwe = Signal()
379 self.doall = Signal()
380 self.updated = Signal()
381 self.v_updated = Signal()
382 self.tlb_hit = Signal()
383 self.tlb_req_index = Signal(TLB_SET_BITS)
384
385 self.tlb_hit_way = Signal(TLB_WAY_BITS)
386 self.tlb_tag_way = Signal(TLB_TAG_WAY_BITS)
387 self.tlb_pte_way = Signal(TLB_PTE_WAY_BITS)
388 self.repl_way = Signal(TLB_WAY_BITS)
389 self.eatag = Signal(TLB_EA_TAG_BITS)
390 self.pte_data = Signal(TLB_PTE_BITS)
391
392 self.dv = Signal(TLB_PTE_WAY_BITS)
393
394 self.tb_out = Signal(TLB_TAG_WAY_BITS)
395 self.pb_out = Signal(TLB_NUM_WAYS)
396 self.db_out = Signal(TLB_PTE_WAY_BITS)
397
398 def elaborate(self, platform):
399 m = Module()
400 comb = m.d.comb
401 sync = m.d.sync
402
403 tagset = Signal(TLB_TAG_WAY_BITS)
404 pteset = Signal(TLB_PTE_WAY_BITS)
405
406 tb_out, pb_out, db_out = self.tb_out, self.pb_out, self.db_out
407
408 with m.If(self.tlbie & self.doall):
409 pass # clear all back in parent
410 with m.Elif(self.tlbie):
411 with m.If(self.tlb_hit):
412 comb += db_out.eq(self.dv)
413 comb += db_out.bit_select(self.tlb_hit_way, 1).eq(1)
414 comb += self.v_updated.eq(1)
415
416 with m.Elif(self.tlbwe):
417
418 comb += tagset.eq(self.tlb_tag_way)
419 comb += write_tlb_tag(self.repl_way, tagset, self.eatag)
420 comb += tb_out.eq(tagset)
421
422 comb += pteset.eq(self.tlb_pte_way)
423 comb += write_tlb_pte(self.repl_way, pteset, self.pte_data)
424 comb += pb_out.eq(pteset)
425
426 comb += db_out.bit_select(self.repl_way, 1).eq(1)
427
428 comb += self.updated.eq(1)
429 comb += self.v_updated.eq(1)
430
431 return m
432
433 def dcache_request(self, m, r0, ra, req_index, req_row, req_tag,
434 r0_valid, r1, cache_valid_bits, replace_way,
435 use_forward1_next, use_forward2_next,
436 req_hit_way, plru_victim, rc_ok, perm_attr,
437 valid_ra, perm_ok, access_ok, req_op, req_go,
438 tlb_pte_way,
439 tlb_hit, tlb_hit_way, tlb_valid_way, cache_tag_set,
440 cancel_store, req_same_tag, r0_stall, early_req_row):
441 """Cache request parsing and hit detection
442 """
443
444 class DCachePendingHit(Elaboratable):
445
446 def __init__(self, tlb_pte_way, tlb_valid_way, tlb_hit_way,
447 cache_valid_idx, cache_tag_set,
448 req_addr,
449 hit_set):
450
451 self.go = Signal()
452 self.virt_mode = Signal()
453 self.is_hit = Signal()
454 self.tlb_hit = Signal()
455 self.hit_way = Signal(WAY_BITS)
456 self.rel_match = Signal()
457 self.req_index = Signal(INDEX_BITS)
458 self.reload_tag = Signal(TAG_BITS)
459
460 self.tlb_hit_way = tlb_hit_way
461 self.tlb_pte_way = tlb_pte_way
462 self.tlb_valid_way = tlb_valid_way
463 self.cache_valid_idx = cache_valid_idx
464 self.cache_tag_set = cache_tag_set
465 self.req_addr = req_addr
466 self.hit_set = hit_set
467
468 def elaborate(self, platform):
469 m = Module()
470 comb = m.d.comb
471 sync = m.d.sync
472
473 go = self.go
474 virt_mode = self.virt_mode
475 is_hit = self.is_hit
476 tlb_pte_way = self.tlb_pte_way
477 tlb_valid_way = self.tlb_valid_way
478 cache_valid_idx = self.cache_valid_idx
479 cache_tag_set = self.cache_tag_set
480 req_addr = self.req_addr
481 tlb_hit_way = self.tlb_hit_way
482 tlb_hit = self.tlb_hit
483 hit_set = self.hit_set
484 hit_way = self.hit_way
485 rel_match = self.rel_match
486 req_index = self.req_index
487 reload_tag = self.reload_tag
488
489 rel_matches = Array(Signal() for i in range(TLB_NUM_WAYS))
490 hit_way_set = HitWaySet()
491
492 # Test if pending request is a hit on any way
493 # In order to make timing in virtual mode,
494 # when we are using the TLB, we compare each
495 # way with each of the real addresses from each way of
496 # the TLB, and then decide later which match to use.
497
498 with m.If(virt_mode):
499 for j in range(TLB_NUM_WAYS):
500 s_tag = Signal(TAG_BITS, name="s_tag%d" % j)
501 s_hit = Signal()
502 s_pte = Signal(TLB_PTE_BITS)
503 s_ra = Signal(REAL_ADDR_BITS)
504 comb += s_pte.eq(read_tlb_pte(j, tlb_pte_way))
505 comb += s_ra.eq(Cat(req_addr[0:TLB_LG_PGSZ],
506 s_pte[TLB_LG_PGSZ:REAL_ADDR_BITS]))
507 comb += s_tag.eq(get_tag(s_ra))
508
509 for i in range(NUM_WAYS):
510 is_tag_hit = Signal()
511 comb += is_tag_hit.eq(go & cache_valid_idx[i] &
512 (read_tag(i, cache_tag_set) == s_tag)
513 & tlb_valid_way[j])
514 with m.If(is_tag_hit):
515 comb += hit_way_set[j].eq(i)
516 comb += s_hit.eq(1)
517 comb += hit_set[j].eq(s_hit)
518 with m.If(s_tag == reload_tag):
519 comb += rel_matches[j].eq(1)
520 with m.If(tlb_hit):
521 comb += is_hit.eq(hit_set[tlb_hit_way])
522 comb += hit_way.eq(hit_way_set[tlb_hit_way])
523 comb += rel_match.eq(rel_matches[tlb_hit_way])
524 with m.Else():
525 s_tag = Signal(TAG_BITS)
526 comb += s_tag.eq(get_tag(req_addr))
527 for i in range(NUM_WAYS):
528 is_tag_hit = Signal()
529 comb += is_tag_hit.eq(go & cache_valid_idx[i] &
530 read_tag(i, cache_tag_set) == s_tag)
531 with m.If(is_tag_hit):
532 comb += hit_way.eq(i)
533 comb += is_hit.eq(1)
534 with m.If(s_tag == reload_tag):
535 comb += rel_match.eq(1)
536
537 return m
538
539
540 class DCache(Elaboratable):
541 """Set associative dcache write-through
542 TODO (in no specific order):
543 * See list in icache.vhdl
544 * Complete load misses on the cycle when WB data comes instead of
545 at the end of line (this requires dealing with requests coming in
546 while not idle...)
547 """
548 def __init__(self):
549 self.d_in = LoadStore1ToDCacheType()
550 self.d_out = DCacheToLoadStore1Type()
551
552 self.m_in = MMUToDCacheType()
553 self.m_out = DCacheToMMUType()
554
555 self.stall_out = Signal()
556
557 self.wb_out = WBMasterOut()
558 self.wb_in = WBSlaveOut()
559
560 self.log_out = Signal(20)
561
562 def stage_0(self, m, r0, r1, r0_full):
563 """Latch the request in r0.req as long as we're not stalling
564 """
565 comb = m.d.comb
566 sync = m.d.sync
567 d_in, d_out, m_in = self.d_in, self.d_out, self.m_in
568
569 r = RegStage0()
570
571 # TODO, this goes in unit tests and formal proofs
572 with m.If(~(d_in.valid & m_in.valid)):
573 #sync += Display("request collision loadstore vs MMU")
574 pass
575
576 with m.If(m_in.valid):
577 sync += r.req.valid.eq(1)
578 sync += r.req.load.eq(~(m_in.tlbie | m_in.tlbld))
579 sync += r.req.dcbz.eq(0)
580 sync += r.req.nc.eq(0)
581 sync += r.req.reserve.eq(0)
582 sync += r.req.virt_mode.eq(1)
583 sync += r.req.priv_mode.eq(1)
584 sync += r.req.addr.eq(m_in.addr)
585 sync += r.req.data.eq(m_in.pte)
586 sync += r.req.byte_sel.eq(~0) # Const -1 sets all to 0b111....
587 sync += r.tlbie.eq(m_in.tlbie)
588 sync += r.doall.eq(m_in.doall)
589 sync += r.tlbld.eq(m_in.tlbld)
590 sync += r.mmu_req.eq(1)
591 with m.Else():
592 sync += r.req.eq(d_in)
593 sync += r.tlbie.eq(0)
594 sync += r.doall.eq(0)
595 sync += r.tlbld.eq(0)
596 sync += r.mmu_req.eq(0)
597 with m.If(~(r1.full & r0_full)):
598 sync += r0.eq(r)
599 sync += r0_full.eq(r.req.valid)
600
601 def tlb_read(self, m, r0_stall, tlb_valid_way,
602 tlb_tag_way, tlb_pte_way, dtlb_valid_bits,
603 dtlb_tags, dtlb_ptes):
604 """TLB
605 Operates in the second cycle on the request latched in r0.req.
606 TLB updates write the entry at the end of the second cycle.
607 """
608 comb = m.d.comb
609 sync = m.d.sync
610 m_in, d_in = self.m_in, self.d_in
611
612 index = Signal(TLB_SET_BITS)
613 addrbits = Signal(TLB_SET_BITS)
614
615 amin = TLB_LG_PGSZ
616 amax = TLB_LG_PGSZ + TLB_SET_BITS
617
618 with m.If(m_in.valid):
619 comb += addrbits.eq(m_in.addr[amin : amax])
620 with m.Else():
621 comb += addrbits.eq(d_in.addr[amin : amax])
622 comb += index.eq(addrbits)
623
624 # If we have any op and the previous op isn't finished,
625 # then keep the same output for next cycle.
626 with m.If(~r0_stall):
627 sync += tlb_valid_way.eq(dtlb_valid_bits[index])
628 sync += tlb_tag_way.eq(dtlb_tags[index])
629 sync += tlb_pte_way.eq(dtlb_ptes[index])
630
631 def maybe_tlb_plrus(self, m, r1, tlb_plru_victim, acc, acc_en, lru):
632 """Generate TLB PLRUs
633 """
634 comb = m.d.comb
635 sync = m.d.sync
636
637 with m.If(TLB_NUM_WAYS > 1):
638 for i in range(TLB_SET_SIZE):
639 # TLB PLRU interface
640 tlb_plru = PLRU(WAY_BITS)
641 setattr(m.submodules, "maybe_plru_%d" % i, tlb_plru)
642 tlb_plru_acc = Signal(TLB_WAY_BITS)
643 tlb_plru_acc_en = Signal()
644 tlb_plru_out = Signal(TLB_WAY_BITS)
645
646 comb += tlb_plru.acc.eq(tlb_plru_acc)
647 comb += tlb_plru.acc_en.eq(tlb_plru_acc_en)
648 comb += tlb_plru.lru.eq(tlb_plru_out)
649
650 # PLRU interface
651 with m.If(r1.tlb_hit_index == i):
652 comb += tlb_plru.acc_en.eq(r1.tlb_hit)
653 with m.Else():
654 comb += tlb_plru.acc_en.eq(0)
655 comb += tlb_plru.acc.eq(r1.tlb_hit_way)
656
657 comb += tlb_plru_victim[i].eq(tlb_plru.lru)
658
659 def tlb_search(self, m, tlb_req_index, r0, r0_valid,
660 tlb_valid_way, tlb_tag_way, tlb_hit_way,
661 tlb_pte_way, pte, tlb_hit, valid_ra, perm_attr, ra):
662
663 comb = m.d.comb
664 sync = m.d.sync
665
666 hitway = Signal(TLB_WAY_BITS)
667 hit = Signal()
668 eatag = Signal(TLB_EA_TAG_BITS)
669
670 TLB_LG_END = TLB_LG_PGSZ + TLB_SET_BITS
671 comb += tlb_req_index.eq(r0.req.addr[TLB_LG_PGSZ : TLB_LG_END])
672 comb += eatag.eq(r0.req.addr[TLB_LG_END : 64 ])
673
674 for i in range(TLB_NUM_WAYS):
675 is_tag_hit = Signal()
676 comb += is_tag_hit.eq(tlb_valid_way[i]
677 & read_tlb_tag(i, tlb_tag_way) == eatag)
678 with m.If(is_tag_hit):
679 comb += hitway.eq(i)
680 comb += hit.eq(1)
681
682 comb += tlb_hit.eq(hit & r0_valid)
683 comb += tlb_hit_way.eq(hitway)
684
685 with m.If(tlb_hit):
686 comb += pte.eq(read_tlb_pte(hitway, tlb_pte_way))
687 with m.Else():
688 comb += pte.eq(0)
689 comb += valid_ra.eq(tlb_hit | ~r0.req.virt_mode)
690 with m.If(r0.req.virt_mode):
691 comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
692 r0.req.addr[ROW_OFF_BITS:TLB_LG_PGSZ],
693 pte[TLB_LG_PGSZ:REAL_ADDR_BITS]))
694 comb += perm_attr.eq(extract_perm_attr(pte))
695 with m.Else():
696 comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
697 r0.req.addr[ROW_OFF_BITS:REAL_ADDR_BITS]))
698
699 comb += perm_attr.reference.eq(1)
700 comb += perm_attr.changed.eq(1)
701 comb += perm_attr.priv.eq(1)
702 comb += perm_attr.nocache.eq(0)
703 comb += perm_attr.rd_perm.eq(1)
704 comb += perm_attr.wr_perm.eq(1)
705
706 def tlb_update(self, m, r0_valid, r0, dtlb_valid_bits, tlb_req_index,
707 tlb_hit_way, tlb_hit, tlb_plru_victim, tlb_tag_way,
708 dtlb_tags, tlb_pte_way, dtlb_ptes):
709
710 comb = m.d.comb
711 sync = m.d.sync
712
713 tlbie = Signal()
714 tlbwe = Signal()
715
716 comb += tlbie.eq(r0_valid & r0.tlbie)
717 comb += tlbwe.eq(r0_valid & r0.tlbld)
718
719 m.submodules.tlb_update = d = DTLBUpdate()
720 with m.If(tlbie & r0.doall):
721 # clear all valid bits at once
722 for i in range(TLB_SET_SIZE):
723 sync += dtlb_valid_bits[i].eq(0)
724 with m.If(d.updated):
725 sync += dtlb_tags[tlb_req_index].eq(d.tb_out)
726 sync += dtlb_ptes[tlb_req_index].eq(d.pb_out)
727 with m.If(d.v_updated):
728 sync += dtlb_valid_bits[tlb_req_index].eq(d.db_out)
729
730 comb += d.dv.eq(dtlb_valid_bits[tlb_req_index])
731
732 comb += d.tlbie.eq(tlbie)
733 comb += d.tlbwe.eq(tlbwe)
734 comb += d.doall.eq(r0.doall)
735 comb += d.tlb_hit.eq(tlb_hit)
736 comb += d.tlb_hit_way.eq(tlb_hit_way)
737 comb += d.tlb_tag_way.eq(tlb_tag_way)
738 comb += d.tlb_pte_way.eq(tlb_pte_way)
739 comb += d.tlb_req_index.eq(tlb_req_index)
740
741 with m.If(tlb_hit):
742 comb += d.repl_way.eq(tlb_hit_way)
743 with m.Else():
744 comb += d.repl_way.eq(tlb_plru_victim[tlb_req_index])
745 comb += d.eatag.eq(r0.req.addr[TLB_LG_PGSZ + TLB_SET_BITS:64])
746 comb += d.pte_data.eq(r0.req.data)
747
748 def maybe_plrus(self, m, r1, plru_victim):
749 """Generate PLRUs
750 """
751 comb = m.d.comb
752 sync = m.d.sync
753
754 for i in range(NUM_LINES):
755 # PLRU interface
756 plru = PLRU(WAY_BITS)
757 setattr(m.submodules, "plru%d" % i, plru)
758 plru_acc = Signal(WAY_BITS)
759 plru_acc_en = Signal()
760 plru_out = Signal(WAY_BITS)
761
762 comb += plru.acc.eq(plru_acc)
763 comb += plru.acc_en.eq(plru_acc_en)
764 comb += plru_out.eq(plru.lru_o)
765
766 with m.If(r1.hit_index == i):
767 comb += plru_acc_en.eq(r1.cache_hit)
768
769 comb += plru_acc.eq(r1.hit_way)
770 comb += plru_victim[i].eq(plru_out)
771
772 def cache_tag_read(self, m, r0_stall, req_index, cache_tag_set, cache_tags):
773 """Cache tag RAM read port
774 """
775 comb = m.d.comb
776 sync = m.d.sync
777 m_in, d_in = self.m_in, self.d_in
778
779 index = Signal(INDEX_BITS)
780
781 with m.If(r0_stall):
782 comb += index.eq(req_index)
783 with m.Elif(m_in.valid):
784 comb += index.eq(get_index(m_in.addr))
785 with m.Else():
786 comb += index.eq(get_index(d_in.addr))
787 sync += cache_tag_set.eq(cache_tags[index])
788
789 def dcache_request(self, m, r0, ra, req_index, req_row, req_tag,
790 r0_valid, r1, cache_valid_bits, replace_way,
791 use_forward1_next, use_forward2_next,
792 req_hit_way, plru_victim, rc_ok, perm_attr,
793 valid_ra, perm_ok, access_ok, req_op, req_go,
794 tlb_pte_way,
795 tlb_hit, tlb_hit_way, tlb_valid_way, cache_tag_set,
796 cancel_store, req_same_tag, r0_stall, early_req_row):
797 """Cache request parsing and hit detection
798 """
799
800 comb = m.d.comb
801 sync = m.d.sync
802 m_in, d_in = self.m_in, self.d_in
803
804 is_hit = Signal()
805 hit_way = Signal(WAY_BITS)
806 op = Signal(Op)
807 opsel = Signal(3)
808 go = Signal()
809 nc = Signal()
810 hit_set = Array(Signal() for i in range(TLB_NUM_WAYS))
811 cache_valid_idx = Signal(INDEX_BITS)
812
813 # Extract line, row and tag from request
814 comb += req_index.eq(get_index(r0.req.addr))
815 comb += req_row.eq(get_row(r0.req.addr))
816 comb += req_tag.eq(get_tag(ra))
817
818 comb += go.eq(r0_valid & ~(r0.tlbie | r0.tlbld) & ~r1.ls_error)
819 comb += cache_valid_idx.eq(cache_valid_bits[req_index])
820
821 m.submodules.dcache_pend = dc = DCachePendingHit(tlb_pte_way,
822 tlb_valid_way, tlb_hit_way,
823 cache_valid_idx, cache_tag_set,
824 r0.req.addr,
825 hit_set)
826
827 comb += dc.tlb_hit.eq(tlb_hit)
828 comb += dc.reload_tag.eq(r1.reload_tag)
829 comb += dc.virt_mode.eq(r0.req.virt_mode)
830 comb += dc.go.eq(go)
831 comb += dc.req_index.eq(req_index)
832 comb += is_hit.eq(dc.is_hit)
833 comb += hit_way.eq(dc.hit_way)
834 comb += req_same_tag.eq(dc.rel_match)
835
836 # See if the request matches the line currently being reloaded
837 with m.If((r1.state == State.RELOAD_WAIT_ACK) &
838 (req_index == r1.store_index) & req_same_tag):
839 # For a store, consider this a hit even if the row isn't
840 # valid since it will be by the time we perform the store.
841 # For a load, check the appropriate row valid bit.
842 valid = r1.rows_valid[req_row % ROW_PER_LINE]
843 comb += is_hit.eq(~r0.req.load | valid)
844 comb += hit_way.eq(replace_way)
845
846 # Whether to use forwarded data for a load or not
847 comb += use_forward1_next.eq(0)
848 with m.If((get_row(r1.req.real_addr) == req_row) &
849 (r1.req.hit_way == hit_way)):
850 # Only need to consider r1.write_bram here, since if we
851 # are writing refill data here, then we don't have a
852 # cache hit this cycle on the line being refilled.
853 # (There is the possibility that the load following the
854 # load miss that started the refill could be to the old
855 # contents of the victim line, since it is a couple of
856 # cycles after the refill starts before we see the updated
857 # cache tag. In that case we don't use the bypass.)
858 comb += use_forward1_next.eq(r1.write_bram)
859 comb += use_forward2_next.eq(0)
860 with m.If((r1.forward_row1 == req_row) & (r1.forward_way1 == hit_way)):
861 comb += use_forward2_next.eq(r1.forward_valid1)
862
863 # The way that matched on a hit
864 comb += req_hit_way.eq(hit_way)
865
866 # The way to replace on a miss
867 with m.If(r1.write_tag):
868 replace_way.eq(plru_victim[r1.store_index])
869 with m.Else():
870 comb += replace_way.eq(r1.store_way)
871
872 # work out whether we have permission for this access
873 # NB we don't yet implement AMR, thus no KUAP
874 comb += rc_ok.eq(perm_attr.reference
875 & (r0.req.load | perm_attr.changed)
876 )
877 comb += perm_ok.eq((r0.req.priv_mode | ~perm_attr.priv)
878 & perm_attr.wr_perm
879 | (r0.req.load & perm_attr.rd_perm)
880 )
881 comb += access_ok.eq(valid_ra & perm_ok & rc_ok)
882 # Combine the request and cache hit status to decide what
883 # operation needs to be done
884 comb += nc.eq(r0.req.nc | perm_attr.nocache)
885 comb += op.eq(Op.OP_NONE)
886 with m.If(go):
887 with m.If(~access_ok):
888 comb += op.eq(Op.OP_BAD)
889 with m.Elif(cancel_store):
890 comb += op.eq(Op.OP_STCX_FAIL)
891 with m.Else():
892 comb += opsel.eq(Cat(is_hit, nc, r0.req.load))
893 with m.Switch(opsel):
894 with m.Case(0b101):
895 comb += op.eq(Op.OP_LOAD_HIT)
896 with m.Case(0b100):
897 comb += op.eq(Op.OP_LOAD_MISS)
898 with m.Case(0b110):
899 comb += op.eq(Op.OP_LOAD_NC)
900 with m.Case(0b001):
901 comb += op.eq(Op.OP_STORE_HIT)
902 with m.Case(0b000):
903 comb += op.eq(Op.OP_STORE_MISS)
904 with m.Case(0b010):
905 comb += op.eq(Op.OP_STORE_MISS)
906 with m.Case(0b011):
907 comb += op.eq(Op.OP_BAD)
908 with m.Case(0b111):
909 comb += op.eq(Op.OP_BAD)
910 with m.Default():
911 comb += op.eq(Op.OP_NONE)
912 comb += req_op.eq(op)
913 comb += req_go.eq(go)
914
915 # Version of the row number that is valid one cycle earlier
916 # in the cases where we need to read the cache data BRAM.
917 # If we're stalling then we need to keep reading the last
918 # row requested.
919 with m.If(~r0_stall):
920 with m.If(m_in.valid):
921 comb += early_req_row.eq(get_row(m_in.addr))
922 with m.Else():
923 comb += early_req_row.eq(get_row(d_in.addr))
924 with m.Else():
925 comb += early_req_row.eq(req_row)
926
927 def reservation_comb(self, m, cancel_store, set_rsrv, clear_rsrv,
928 r0_valid, r0, reservation):
929 """Handle load-with-reservation and store-conditional instructions
930 """
931 comb = m.d.comb
932 sync = m.d.sync
933
934 with m.If(r0_valid & r0.req.reserve):
935
936 # XXX generate alignment interrupt if address
937 # is not aligned XXX or if r0.req.nc = '1'
938 with m.If(r0.req.load):
939 comb += set_rsrv.eq(1) # load with reservation
940 with m.Else():
941 comb += clear_rsrv.eq(1) # store conditional
942 with m.If(~reservation.valid | r0.req.addr[LINE_OFF_BITS:64]):
943 comb += cancel_store.eq(1)
944
945 def reservation_reg(self, m, r0_valid, access_ok, set_rsrv, clear_rsrv,
946 reservation, r0):
947
948 comb = m.d.comb
949 sync = m.d.sync
950
951 with m.If(r0_valid & access_ok):
952 with m.If(clear_rsrv):
953 sync += reservation.valid.eq(0)
954 with m.Elif(set_rsrv):
955 sync += reservation.valid.eq(1)
956 sync += reservation.addr.eq(r0.req.addr[LINE_OFF_BITS:64])
957
958 def writeback_control(self, m, r1, cache_out):
959 """Return data for loads & completion control logic
960 """
961 comb = m.d.comb
962 sync = m.d.sync
963 d_out, m_out = self.d_out, self.m_out
964
965 data_out = Signal(64)
966 data_fwd = Signal(64)
967
968 # Use the bypass if are reading the row that was
969 # written 1 or 2 cycles ago, including for the
970 # slow_valid = 1 case (i.e. completing a load
971 # miss or a non-cacheable load).
972 with m.If(r1.use_forward1):
973 comb += data_fwd.eq(r1.forward_data1)
974 with m.Else():
975 comb += data_fwd.eq(r1.forward_data2)
976
977 comb += data_out.eq(cache_out[r1.hit_way])
978
979 for i in range(8):
980 with m.If(r1.forward_sel[i]):
981 dsel = data_fwd.word_select(i, 8)
982 comb += data_out.word_select(i, 8).eq(dsel)
983
984 comb += d_out.valid.eq(r1.ls_valid)
985 comb += d_out.data.eq(data_out)
986 comb += d_out.store_done.eq(~r1.stcx_fail)
987 comb += d_out.error.eq(r1.ls_error)
988 comb += d_out.cache_paradox.eq(r1.cache_paradox)
989
990 # Outputs to MMU
991 comb += m_out.done.eq(r1.mmu_done)
992 comb += m_out.err.eq(r1.mmu_error)
993 comb += m_out.data.eq(data_out)
994
995 # We have a valid load or store hit or we just completed
996 # a slow op such as a load miss, a NC load or a store
997 #
998 # Note: the load hit is delayed by one cycle. However it
999 # can still not collide with r.slow_valid (well unless I
1000 # miscalculated) because slow_valid can only be set on a
1001 # subsequent request and not on its first cycle (the state
1002 # machine must have advanced), which makes slow_valid
1003 # at least 2 cycles from the previous hit_load_valid.
1004
1005 # Sanity: Only one of these must be set in any given cycle
1006
1007 if False: # TODO: need Display to get this to work
1008 assert (r1.slow_valid & r1.stcx_fail) != 1, \
1009 "unexpected slow_valid collision with stcx_fail"
1010
1011 assert ((r1.slow_valid | r1.stcx_fail) | r1.hit_load_valid) != 1, \
1012 "unexpected hit_load_delayed collision with slow_valid"
1013
1014 with m.If(~r1.mmu_req):
1015 # Request came from loadstore1...
1016 # Load hit case is the standard path
1017 with m.If(r1.hit_load_valid):
1018 #Display(f"completing load hit data={data_out}")
1019 pass
1020
1021 # error cases complete without stalling
1022 with m.If(r1.ls_error):
1023 # Display("completing ld/st with error")
1024 pass
1025
1026 # Slow ops (load miss, NC, stores)
1027 with m.If(r1.slow_valid):
1028 #Display(f"completing store or load miss data={data_out}")
1029 pass
1030
1031 with m.Else():
1032 # Request came from MMU
1033 with m.If(r1.hit_load_valid):
1034 # Display(f"completing load hit to MMU, data={m_out.data}")
1035 pass
1036 # error cases complete without stalling
1037 with m.If(r1.mmu_error):
1038 #Display("combpleting MMU ld with error")
1039 pass
1040
1041 # Slow ops (i.e. load miss)
1042 with m.If(r1.slow_valid):
1043 #Display("completing MMU load miss, data={m_out.data}")
1044 pass
1045
1046 def rams(self, m, r1, early_req_row, cache_out, replace_way):
1047 """rams
1048 Generate a cache RAM for each way. This handles the normal
1049 reads, writes from reloads and the special store-hit update
1050 path as well.
1051
1052 Note: the BRAMs have an extra read buffer, meaning the output
1053 is pipelined an extra cycle. This differs from the
1054 icache. The writeback logic needs to take that into
1055 account by using 1-cycle delayed signals for load hits.
1056 """
1057 comb = m.d.comb
1058 wb_in = self.wb_in
1059
1060 for i in range(NUM_WAYS):
1061 do_read = Signal()
1062 rd_addr = Signal(ROW_BITS)
1063 do_write = Signal()
1064 wr_addr = Signal(ROW_BITS)
1065 wr_data = Signal(WB_DATA_BITS)
1066 wr_sel = Signal(ROW_SIZE)
1067 wr_sel_m = Signal(ROW_SIZE)
1068 _d_out = Signal(WB_DATA_BITS)
1069
1070 way = CacheRam(ROW_BITS, WB_DATA_BITS, True)
1071 setattr(m.submodules, "cacheram_%d" % i, way)
1072
1073 comb += way.rd_en.eq(do_read)
1074 comb += way.rd_addr.eq(rd_addr)
1075 comb += _d_out.eq(way.rd_data_o)
1076 comb += way.wr_sel.eq(wr_sel_m)
1077 comb += way.wr_addr.eq(wr_addr)
1078 comb += way.wr_data.eq(wr_data)
1079
1080 # Cache hit reads
1081 comb += do_read.eq(1)
1082 comb += rd_addr.eq(early_req_row)
1083 comb += cache_out[i].eq(_d_out)
1084
1085 # Write mux:
1086 #
1087 # Defaults to wishbone read responses (cache refill)
1088 #
1089 # For timing, the mux on wr_data/sel/addr is not
1090 # dependent on anything other than the current state.
1091
1092 with m.If(r1.write_bram):
1093 # Write store data to BRAM. This happens one
1094 # cycle after the store is in r0.
1095 comb += wr_data.eq(r1.req.data)
1096 comb += wr_sel.eq(r1.req.byte_sel)
1097 comb += wr_addr.eq(get_row(r1.req.real_addr))
1098
1099 with m.If(i == r1.req.hit_way):
1100 comb += do_write.eq(1)
1101 with m.Else():
1102 # Otherwise, we might be doing a reload or a DCBZ
1103 with m.If(r1.dcbz):
1104 comb += wr_data.eq(0)
1105 with m.Else():
1106 comb += wr_data.eq(wb_in.dat)
1107 comb += wr_addr.eq(r1.store_row)
1108 comb += wr_sel.eq(~0) # all 1s
1109
1110 with m.If((r1.state == State.RELOAD_WAIT_ACK)
1111 & wb_in.ack & (replace_way == i)):
1112 comb += do_write.eq(1)
1113
1114 # Mask write selects with do_write since BRAM
1115 # doesn't have a global write-enable
1116 with m.If(do_write):
1117 comb += wr_sel_m.eq(wr_sel)
1118
1119 # Cache hit synchronous machine for the easy case.
1120 # This handles load hits.
1121 # It also handles error cases (TLB miss, cache paradox)
1122 def dcache_fast_hit(self, m, req_op, r0_valid, r0, r1,
1123 req_hit_way, req_index, access_ok,
1124 tlb_hit, tlb_hit_way, tlb_req_index):
1125
1126 comb = m.d.comb
1127 sync = m.d.sync
1128
1129 with m.If(req_op != Op.OP_NONE):
1130 #Display(f"op:{req_op} addr:{r0.req.addr} nc: {r0.req.nc}" \
1131 # f"idx:{req_index} tag:{req_tag} way: {req_hit_way}"
1132 # )
1133 pass
1134
1135 with m.If(r0_valid):
1136 sync += r1.mmu_req.eq(r0.mmu_req)
1137
1138 # Fast path for load/store hits.
1139 # Set signals for the writeback controls.
1140 sync += r1.hit_way.eq(req_hit_way)
1141 sync += r1.hit_index.eq(req_index)
1142
1143 with m.If(req_op == Op.OP_LOAD_HIT):
1144 sync += r1.hit_load_valid.eq(1)
1145 with m.Else():
1146 sync += r1.hit_load_valid.eq(0)
1147
1148 with m.If((req_op == Op.OP_LOAD_HIT) | (req_op == Op.OP_STORE_HIT)):
1149 sync += r1.cache_hit.eq(1)
1150 with m.Else():
1151 sync += r1.cache_hit.eq(0)
1152
1153 with m.If(req_op == Op.OP_BAD):
1154 # Display(f"Signalling ld/st error valid_ra={valid_ra}"
1155 # f"rc_ok={rc_ok} perm_ok={perm_ok}"
1156 sync += r1.ls_error.eq(~r0.mmu_req)
1157 sync += r1.mmu_error.eq(r0.mmu_req)
1158 sync += r1.cache_paradox.eq(access_ok)
1159
1160 with m.Else():
1161 sync += r1.ls_error.eq(0)
1162 sync += r1.mmu_error.eq(0)
1163 sync += r1.cache_paradox.eq(0)
1164
1165 with m.If(req_op == Op.OP_STCX_FAIL):
1166 r1.stcx_fail.eq(1)
1167 with m.Else():
1168 sync += r1.stcx_fail.eq(0)
1169
1170 # Record TLB hit information for updating TLB PLRU
1171 sync += r1.tlb_hit.eq(tlb_hit)
1172 sync += r1.tlb_hit_way.eq(tlb_hit_way)
1173 sync += r1.tlb_hit_index.eq(tlb_req_index)
1174
1175 # Memory accesses are handled by this state machine:
1176 #
1177 # * Cache load miss/reload (in conjunction with "rams")
1178 # * Load hits for non-cachable forms
1179 # * Stores (the collision case is handled in "rams")
1180 #
1181 # All wishbone requests generation is done here.
1182 # This machine operates at stage 1.
1183 def dcache_slow(self, m, r1, use_forward1_next, use_forward2_next,
1184 cache_valid_bits, r0, replace_way,
1185 req_hit_way, req_same_tag,
1186 r0_valid, req_op, cache_tag, req_go, ra):
1187
1188 comb = m.d.comb
1189 sync = m.d.sync
1190 wb_in = self.wb_in
1191
1192 req = MemAccessRequest()
1193 acks = Signal(3)
1194 adjust_acks = Signal(3)
1195 stbs_done = Signal()
1196
1197 sync += r1.use_forward1.eq(use_forward1_next)
1198 sync += r1.forward_sel.eq(0)
1199
1200 with m.If(use_forward1_next):
1201 sync += r1.forward_sel.eq(r1.req.byte_sel)
1202 with m.Elif(use_forward2_next):
1203 sync += r1.forward_sel.eq(r1.forward_sel1)
1204
1205 sync += r1.forward_data2.eq(r1.forward_data1)
1206 with m.If(r1.write_bram):
1207 sync += r1.forward_data1.eq(r1.req.data)
1208 sync += r1.forward_sel1.eq(r1.req.byte_sel)
1209 sync += r1.forward_way1.eq(r1.req.hit_way)
1210 sync += r1.forward_row1.eq(get_row(r1.req.real_addr))
1211 sync += r1.forward_valid1.eq(1)
1212 with m.Else():
1213 with m.If(r1.dcbz):
1214 sync += r1.forward_data1.eq(0)
1215 with m.Else():
1216 sync += r1.forward_data1.eq(wb_in.dat)
1217 sync += r1.forward_sel1.eq(~0) # all 1s
1218 sync += r1.forward_way1.eq(replace_way)
1219 sync += r1.forward_row1.eq(r1.store_row)
1220 sync += r1.forward_valid1.eq(0)
1221
1222 # One cycle pulses reset
1223 sync += r1.slow_valid.eq(0)
1224 sync += r1.write_bram.eq(0)
1225 sync += r1.inc_acks.eq(0)
1226 sync += r1.dec_acks.eq(0)
1227
1228 sync += r1.ls_valid.eq(0)
1229 # complete tlbies and TLB loads in the third cycle
1230 sync += r1.mmu_done.eq(r0_valid & (r0.tlbie | r0.tlbld))
1231
1232 with m.If((req_op == Op.OP_LOAD_HIT)
1233 | (req_op == Op.OP_STCX_FAIL)):
1234 with m.If(~r0.mmu_req):
1235 sync += r1.ls_valid.eq(1)
1236 with m.Else():
1237 sync += r1.mmu_done.eq(1)
1238
1239 with m.If(r1.write_tag):
1240 # Store new tag in selected way
1241 for i in range(NUM_WAYS):
1242 with m.If(i == replace_way):
1243 ct = Signal(TAG_RAM_WIDTH)
1244 comb += ct.eq(cache_tag[r1.store_index])
1245 comb += ct.word_select(i, TAG_WIDTH).eq(r1.reload_tag)
1246 sync += cache_tag[r1.store_index].eq(ct)
1247 sync += r1.store_way.eq(replace_way)
1248 sync += r1.write_tag.eq(0)
1249
1250 # Take request from r1.req if there is one there,
1251 # else from req_op, ra, etc.
1252 with m.If(r1.full):
1253 comb += req.eq(r1.req)
1254 with m.Else():
1255 comb += req.op.eq(req_op)
1256 comb += req.valid.eq(req_go)
1257 comb += req.mmu_req.eq(r0.mmu_req)
1258 comb += req.dcbz.eq(r0.req.dcbz)
1259 comb += req.real_addr.eq(ra)
1260
1261 with m.If(~r0.req.dcbz):
1262 comb += req.data.eq(r0.req.data)
1263 with m.Else():
1264 comb += req.data.eq(0)
1265
1266 # Select all bytes for dcbz
1267 # and for cacheable loads
1268 with m.If(r0.req.dcbz | (r0.req.load & ~r0.req.nc)):
1269 comb += req.byte_sel.eq(~0) # all 1s
1270 with m.Else():
1271 comb += req.byte_sel.eq(r0.req.byte_sel)
1272 comb += req.hit_way.eq(req_hit_way)
1273 comb += req.same_tag.eq(req_same_tag)
1274
1275 # Store the incoming request from r0,
1276 # if it is a slow request
1277 # Note that r1.full = 1 implies req_op = OP_NONE
1278 with m.If((req_op == Op.OP_LOAD_MISS)
1279 | (req_op == Op.OP_LOAD_NC)
1280 | (req_op == Op.OP_STORE_MISS)
1281 | (req_op == Op.OP_STORE_HIT)):
1282 sync += r1.req.eq(req)
1283 sync += r1.full.eq(1)
1284
1285 # Main state machine
1286 with m.Switch(r1.state):
1287
1288 with m.Case(State.IDLE):
1289 # XXX check 'left downto. probably means len(r1.wb.adr)
1290 # r1.wb.adr <= req.real_addr(
1291 # r1.wb.adr'left downto 0
1292 # );
1293 sync += r1.wb.adr.eq(req.real_addr)
1294 sync += r1.wb.sel.eq(req.byte_sel)
1295 sync += r1.wb.dat.eq(req.data)
1296 sync += r1.dcbz.eq(req.dcbz)
1297
1298 # Keep track of our index and way
1299 # for subsequent stores.
1300 sync += r1.store_index.eq(get_index(req.real_addr))
1301 sync += r1.store_row.eq(get_row(req.real_addr))
1302 sync += r1.end_row_ix.eq(
1303 get_row_of_line(get_row(req.real_addr))
1304 )
1305 sync += r1.reload_tag.eq(get_tag(req.real_addr))
1306 sync += r1.req.same_tag.eq(1)
1307
1308 with m.If(req.op == Op.OP_STORE_HIT):
1309 sync += r1.store_way.eq(req.hit_way)
1310
1311 # Reset per-row valid bits,
1312 # ready for handling OP_LOAD_MISS
1313 for i in range(ROW_PER_LINE):
1314 sync += r1.rows_valid[i].eq(0)
1315
1316 with m.Switch(req.op):
1317 with m.Case(Op.OP_LOAD_HIT):
1318 # stay in IDLE state
1319 pass
1320
1321 with m.Case(Op.OP_LOAD_MISS):
1322 #Display(f"cache miss real addr:" \
1323 # f"{req_real_addr}" \
1324 # f" idx:{get_index(req_real_addr)}" \
1325 # f" tag:{get_tag(req.real_addr)}")
1326 pass
1327
1328 # Start the wishbone cycle
1329 sync += r1.wb.we.eq(0)
1330 sync += r1.wb.cyc.eq(1)
1331 sync += r1.wb.stb.eq(1)
1332
1333 # Track that we had one request sent
1334 sync += r1.state.eq(State.RELOAD_WAIT_ACK)
1335 sync += r1.write_tag.eq(1)
1336
1337 with m.Case(Op.OP_LOAD_NC):
1338 sync += r1.wb.cyc.eq(1)
1339 sync += r1.wb.stb.eq(1)
1340 sync += r1.wb.we.eq(0)
1341 sync += r1.state.eq(State.NC_LOAD_WAIT_ACK)
1342
1343 with m.Case(Op.OP_STORE_HIT, Op.OP_STORE_MISS):
1344 with m.If(~req.dcbz):
1345 sync += r1.state.eq(State.STORE_WAIT_ACK)
1346 sync += r1.acks_pending.eq(1)
1347 sync += r1.full.eq(0)
1348 sync += r1.slow_valid.eq(1)
1349
1350 with m.If(~req.mmu_req):
1351 sync += r1.ls_valid.eq(1)
1352 with m.Else():
1353 sync += r1.mmu_done.eq(1)
1354
1355 with m.If(req.op == Op.OP_STORE_HIT):
1356 sync += r1.write_bram.eq(1)
1357 with m.Else():
1358 sync += r1.state.eq(State.RELOAD_WAIT_ACK)
1359
1360 with m.If(req.op == Op.OP_STORE_MISS):
1361 sync += r1.write_tag.eq(1)
1362
1363 sync += r1.wb.we.eq(1)
1364 sync += r1.wb.cyc.eq(1)
1365 sync += r1.wb.stb.eq(1)
1366
1367 # OP_NONE and OP_BAD do nothing
1368 # OP_BAD & OP_STCX_FAIL were
1369 # handled above already
1370 with m.Case(Op.OP_NONE):
1371 pass
1372 with m.Case(Op.OP_BAD):
1373 pass
1374 with m.Case(Op.OP_STCX_FAIL):
1375 pass
1376
1377 with m.Case(State.RELOAD_WAIT_ACK):
1378 # Requests are all sent if stb is 0
1379 comb += stbs_done.eq(~r1.wb.stb)
1380
1381 with m.If(~wb_in.stall & ~stbs_done):
1382 # That was the last word?
1383 # We are done sending.
1384 # Clear stb and set stbs_done
1385 # so we can handle an eventual
1386 # last ack on the same cycle.
1387 with m.If(is_last_row_addr(
1388 r1.wb.adr, r1.end_row_ix)):
1389 sync += r1.wb.stb.eq(0)
1390 comb += stbs_done.eq(0)
1391
1392 # Calculate the next row address in the current cache line
1393 rarange = r1.wb.adr[ROW_OFF_BITS : LINE_OFF_BITS]
1394 sync += rarange.eq(rarange + 1)
1395
1396 # Incoming acks processing
1397 sync += r1.forward_valid1.eq(wb_in.ack)
1398 with m.If(wb_in.ack):
1399 # XXX needs an Array bit-accessor here
1400 sync += r1.rows_valid[r1.store_row % ROW_PER_LINE].eq(1)
1401
1402 # If this is the data we were looking for,
1403 # we can complete the request next cycle.
1404 # Compare the whole address in case the
1405 # request in r1.req is not the one that
1406 # started this refill.
1407 with m.If(r1.full & r1.req.same_tag &
1408 ((r1.dcbz & r1.req.dcbz) |
1409 (~r1.dcbz & (r1.req.op == Op.OP_LOAD_MISS))) &
1410 (r1.store_row == get_row(r1.req.real_addr))):
1411 sync += r1.full.eq(0)
1412 sync += r1.slow_valid.eq(1)
1413 with m.If(~r1.mmu_req):
1414 sync += r1.ls_valid.eq(1)
1415 with m.Else():
1416 sync += r1.mmu_done.eq(1)
1417 sync += r1.forward_sel.eq(~0) # all 1s
1418 sync += r1.use_forward1.eq(1)
1419
1420 # Check for completion
1421 with m.If(stbs_done & is_last_row(r1.store_row,
1422 r1.end_row_ix)):
1423 # Complete wishbone cycle
1424 sync += r1.wb.cyc.eq(0)
1425
1426 # Cache line is now valid
1427 cv = Signal(INDEX_BITS)
1428 sync += cv.eq(cache_valid_bits[r1.store_index])
1429 sync += cv.bit_select(r1.store_way, 1).eq(1)
1430 sync += r1.state.eq(State.IDLE)
1431
1432 # Increment store row counter
1433 sync += r1.store_row.eq(next_row(r1.store_row))
1434
1435 with m.Case(State.STORE_WAIT_ACK):
1436 comb += stbs_done.eq(~r1.wb.stb)
1437 comb += acks.eq(r1.acks_pending)
1438
1439 with m.If(r1.inc_acks != r1.dec_acks):
1440 with m.If(r1.inc_acks):
1441 comb += adjust_acks.eq(acks + 1)
1442 with m.Else():
1443 comb += adjust_acks.eq(acks - 1)
1444 with m.Else():
1445 comb += adjust_acks.eq(acks)
1446
1447 sync += r1.acks_pending.eq(adjust_acks)
1448
1449 # Clear stb when slave accepted request
1450 with m.If(~wb_in.stall):
1451 # See if there is another store waiting
1452 # to be done which is in the same real page.
1453 with m.If(req.valid):
1454 ra = req.real_addr[0:SET_SIZE_BITS]
1455 sync += r1.wb.adr[0:SET_SIZE_BITS].eq(ra)
1456 sync += r1.wb.dat.eq(req.data)
1457 sync += r1.wb.sel.eq(req.byte_sel)
1458
1459 with m.Elif((adjust_acks < 7) & req.same_tag &
1460 ((req.op == Op.OP_STORE_MISS)
1461 | (req.op == Op.OP_STORE_HIT))):
1462 sync += r1.wb.stb.eq(1)
1463 comb += stbs_done.eq(0)
1464
1465 with m.If(req.op == Op.OP_STORE_HIT):
1466 sync += r1.write_bram.eq(1)
1467 sync += r1.full.eq(0)
1468 sync += r1.slow_valid.eq(1)
1469
1470 # Store requests never come from the MMU
1471 sync += r1.ls_valid.eq(1)
1472 comb += stbs_done.eq(0)
1473 sync += r1.inc_acks.eq(1)
1474 with m.Else():
1475 sync += r1.wb.stb.eq(0)
1476 comb += stbs_done.eq(1)
1477
1478 # Got ack ? See if complete.
1479 with m.If(wb_in.ack):
1480 with m.If(stbs_done & (adjust_acks == 1)):
1481 sync += r1.state.eq(State.IDLE)
1482 sync += r1.wb.cyc.eq(0)
1483 sync += r1.wb.stb.eq(0)
1484 sync += r1.dec_acks.eq(1)
1485
1486 with m.Case(State.NC_LOAD_WAIT_ACK):
1487 # Clear stb when slave accepted request
1488 with m.If(~wb_in.stall):
1489 sync += r1.wb.stb.eq(0)
1490
1491 # Got ack ? complete.
1492 with m.If(wb_in.ack):
1493 sync += r1.state.eq(State.IDLE)
1494 sync += r1.full.eq(0)
1495 sync += r1.slow_valid.eq(1)
1496
1497 with m.If(~r1.mmu_req):
1498 sync += r1.ls_valid.eq(1)
1499 with m.Else():
1500 sync += r1.mmu_done.eq(1)
1501
1502 sync += r1.forward_sel.eq(~0) # all 1s
1503 sync += r1.use_forward1.eq(1)
1504 sync += r1.wb.cyc.eq(0)
1505 sync += r1.wb.stb.eq(0)
1506
1507 def dcache_log(self, m, r1, valid_ra, tlb_hit_way, stall_out):
1508
1509 sync = m.d.sync
1510 d_out, wb_in, log_out = self.d_out, self.wb_in, self.log_out
1511
1512 sync += log_out.eq(Cat(r1.state[:3], valid_ra, tlb_hit_way[:3],
1513 stall_out, req_op[:3], d_out.valid, d_out.error,
1514 r1.wb.cyc, r1.wb.stb, wb_in.ack, wb_in.stall,
1515 r1.wb.adr[3:6]))
1516
1517 def elaborate(self, platform):
1518
1519 m = Module()
1520 comb = m.d.comb
1521
1522 # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
1523 cache_tags = CacheTagArray()
1524 cache_tag_set = Signal(TAG_RAM_WIDTH)
1525 cache_valid_bits = CacheValidBitsArray()
1526
1527 # TODO attribute ram_style : string;
1528 # TODO attribute ram_style of cache_tags : signal is "distributed";
1529
1530 """note: these are passed to nmigen.hdl.Memory as "attributes".
1531 don't know how, just that they are.
1532 """
1533 dtlb_valid_bits = TLBValidBitsArray()
1534 dtlb_tags = TLBTagsArray()
1535 dtlb_ptes = TLBPtesArray()
1536 # TODO attribute ram_style of
1537 # dtlb_tags : signal is "distributed";
1538 # TODO attribute ram_style of
1539 # dtlb_ptes : signal is "distributed";
1540
1541 r0 = RegStage0()
1542 r0_full = Signal()
1543
1544 r1 = RegStage1()
1545
1546 reservation = Reservation()
1547
1548 # Async signals on incoming request
1549 req_index = Signal(INDEX_BITS)
1550 req_row = Signal(ROW_BITS)
1551 req_hit_way = Signal(WAY_BITS)
1552 req_tag = Signal(TAG_BITS)
1553 req_op = Signal(Op)
1554 req_data = Signal(64)
1555 req_same_tag = Signal()
1556 req_go = Signal()
1557
1558 early_req_row = Signal(ROW_BITS)
1559
1560 cancel_store = Signal()
1561 set_rsrv = Signal()
1562 clear_rsrv = Signal()
1563
1564 r0_valid = Signal()
1565 r0_stall = Signal()
1566
1567 use_forward1_next = Signal()
1568 use_forward2_next = Signal()
1569
1570 cache_out = CacheRamOut()
1571
1572 plru_victim = PLRUOut()
1573 replace_way = Signal(WAY_BITS)
1574
1575 # Wishbone read/write/cache write formatting signals
1576 bus_sel = Signal(8)
1577
1578 # TLB signals
1579 tlb_tag_way = Signal(TLB_TAG_WAY_BITS)
1580 tlb_pte_way = Signal(TLB_PTE_WAY_BITS)
1581 tlb_valid_way = Signal(TLB_NUM_WAYS)
1582 tlb_req_index = Signal(TLB_SET_BITS)
1583 tlb_hit = Signal()
1584 tlb_hit_way = Signal(TLB_WAY_BITS)
1585 pte = Signal(TLB_PTE_BITS)
1586 ra = Signal(REAL_ADDR_BITS)
1587 valid_ra = Signal()
1588 perm_attr = PermAttr()
1589 rc_ok = Signal()
1590 perm_ok = Signal()
1591 access_ok = Signal()
1592
1593 tlb_plru_victim = TLBPLRUOut()
1594
1595 # we don't yet handle collisions between loadstore1 requests
1596 # and MMU requests
1597 comb += self.m_out.stall.eq(0)
1598
1599 # Hold off the request in r0 when r1 has an uncompleted request
1600 comb += r0_stall.eq(r0_full & r1.full)
1601 comb += r0_valid.eq(r0_full & ~r1.full)
1602 comb += self.stall_out.eq(r0_stall)
1603
1604 # Wire up wishbone request latch out of stage 1
1605 comb += self.wb_out.eq(r1.wb)
1606
1607 # call sub-functions putting everything together, using shared
1608 # signals established above
1609 self.stage_0(m, r0, r1, r0_full)
1610 self.tlb_read(m, r0_stall, tlb_valid_way,
1611 tlb_tag_way, tlb_pte_way, dtlb_valid_bits,
1612 dtlb_tags, dtlb_ptes)
1613 self.tlb_search(m, tlb_req_index, r0, r0_valid,
1614 tlb_valid_way, tlb_tag_way, tlb_hit_way,
1615 tlb_pte_way, pte, tlb_hit, valid_ra, perm_attr, ra)
1616 self.tlb_update(m, r0_valid, r0, dtlb_valid_bits, tlb_req_index,
1617 tlb_hit_way, tlb_hit, tlb_plru_victim, tlb_tag_way,
1618 dtlb_tags, tlb_pte_way, dtlb_ptes)
1619 self.maybe_plrus(m, r1, plru_victim)
1620 self.cache_tag_read(m, r0_stall, req_index, cache_tag_set, cache_tags)
1621 self.dcache_request(m, r0, ra, req_index, req_row, req_tag,
1622 r0_valid, r1, cache_valid_bits, replace_way,
1623 use_forward1_next, use_forward2_next,
1624 req_hit_way, plru_victim, rc_ok, perm_attr,
1625 valid_ra, perm_ok, access_ok, req_op, req_go,
1626 tlb_pte_way,
1627 tlb_hit, tlb_hit_way, tlb_valid_way, cache_tag_set,
1628 cancel_store, req_same_tag, r0_stall, early_req_row)
1629 self.reservation_comb(m, cancel_store, set_rsrv, clear_rsrv,
1630 r0_valid, r0, reservation)
1631 self.reservation_reg(m, r0_valid, access_ok, set_rsrv, clear_rsrv,
1632 reservation, r0)
1633 self.writeback_control(m, r1, cache_out)
1634 self.rams(m, r1, early_req_row, cache_out, replace_way)
1635 self.dcache_fast_hit(m, req_op, r0_valid, r0, r1,
1636 req_hit_way, req_index, access_ok,
1637 tlb_hit, tlb_hit_way, tlb_req_index)
1638 self.dcache_slow(m, r1, use_forward1_next, use_forward2_next,
1639 cache_valid_bits, r0, replace_way,
1640 req_hit_way, req_same_tag,
1641 r0_valid, req_op, cache_tags, req_go, ra)
1642 #self.dcache_log(m, r1, valid_ra, tlb_hit_way, stall_out)
1643
1644 return m
1645
1646
1647 # dcache_tb.vhdl
1648 #
1649 # entity dcache_tb is
1650 # end dcache_tb;
1651 #
1652 # architecture behave of dcache_tb is
1653 # signal clk : std_ulogic;
1654 # signal rst : std_ulogic;
1655 #
1656 # signal d_in : Loadstore1ToDcacheType;
1657 # signal d_out : DcacheToLoadstore1Type;
1658 #
1659 # signal m_in : MmuToDcacheType;
1660 # signal m_out : DcacheToMmuType;
1661 #
1662 # signal wb_bram_in : wishbone_master_out;
1663 # signal wb_bram_out : wishbone_slave_out;
1664 #
1665 # constant clk_period : time := 10 ns;
1666 # begin
1667 # dcache0: entity work.dcache
1668 # generic map(
1669 #
1670 # LINE_SIZE => 64,
1671 # NUM_LINES => 4
1672 # )
1673 # port map(
1674 # clk => clk,
1675 # rst => rst,
1676 # d_in => d_in,
1677 # d_out => d_out,
1678 # m_in => m_in,
1679 # m_out => m_out,
1680 # wishbone_out => wb_bram_in,
1681 # wishbone_in => wb_bram_out
1682 # );
1683 #
1684 # -- BRAM Memory slave
1685 # bram0: entity work.wishbone_bram_wrapper
1686 # generic map(
1687 # MEMORY_SIZE => 1024,
1688 # RAM_INIT_FILE => "icache_test.bin"
1689 # )
1690 # port map(
1691 # clk => clk,
1692 # rst => rst,
1693 # wishbone_in => wb_bram_in,
1694 # wishbone_out => wb_bram_out
1695 # );
1696 #
1697 # clk_process: process
1698 # begin
1699 # clk <= '0';
1700 # wait for clk_period/2;
1701 # clk <= '1';
1702 # wait for clk_period/2;
1703 # end process;
1704 #
1705 # rst_process: process
1706 # begin
1707 # rst <= '1';
1708 # wait for 2*clk_period;
1709 # rst <= '0';
1710 # wait;
1711 # end process;
1712 #
1713 # stim: process
1714 # begin
1715 # -- Clear stuff
1716 # d_in.valid <= '0';
1717 # d_in.load <= '0';
1718 # d_in.nc <= '0';
1719 # d_in.addr <= (others => '0');
1720 # d_in.data <= (others => '0');
1721 # m_in.valid <= '0';
1722 # m_in.addr <= (others => '0');
1723 # m_in.pte <= (others => '0');
1724 #
1725 # wait for 4*clk_period;
1726 # wait until rising_edge(clk);
1727 #
1728 # -- Cacheable read of address 4
1729 # d_in.load <= '1';
1730 # d_in.nc <= '0';
1731 # d_in.addr <= x"0000000000000004";
1732 # d_in.valid <= '1';
1733 # wait until rising_edge(clk);
1734 # d_in.valid <= '0';
1735 #
1736 # wait until rising_edge(clk) and d_out.valid = '1';
1737 # assert d_out.data = x"0000000100000000"
1738 # report "data @" & to_hstring(d_in.addr) &
1739 # "=" & to_hstring(d_out.data) &
1740 # " expected 0000000100000000"
1741 # severity failure;
1742 # -- wait for clk_period;
1743 #
1744 # -- Cacheable read of address 30
1745 # d_in.load <= '1';
1746 # d_in.nc <= '0';
1747 # d_in.addr <= x"0000000000000030";
1748 # d_in.valid <= '1';
1749 # wait until rising_edge(clk);
1750 # d_in.valid <= '0';
1751 #
1752 # wait until rising_edge(clk) and d_out.valid = '1';
1753 # assert d_out.data = x"0000000D0000000C"
1754 # report "data @" & to_hstring(d_in.addr) &
1755 # "=" & to_hstring(d_out.data) &
1756 # " expected 0000000D0000000C"
1757 # severity failure;
1758 #
1759 # -- Non-cacheable read of address 100
1760 # d_in.load <= '1';
1761 # d_in.nc <= '1';
1762 # d_in.addr <= x"0000000000000100";
1763 # d_in.valid <= '1';
1764 # wait until rising_edge(clk);
1765 # d_in.valid <= '0';
1766 # wait until rising_edge(clk) and d_out.valid = '1';
1767 # assert d_out.data = x"0000004100000040"
1768 # report "data @" & to_hstring(d_in.addr) &
1769 # "=" & to_hstring(d_out.data) &
1770 # " expected 0000004100000040"
1771 # severity failure;
1772 #
1773 # wait until rising_edge(clk);
1774 # wait until rising_edge(clk);
1775 # wait until rising_edge(clk);
1776 # wait until rising_edge(clk);
1777 #
1778 # std.env.finish;
1779 # end process;
1780 # end;
1781 def dcache_sim(dut):
1782 # clear stuff
1783 yield dut.d_in.valid.eq(0)
1784 yield dut.d_in.load.eq(0)
1785 yield dut.d_in.nc.eq(0)
1786 yield dut.d_in.adrr.eq(0)
1787 yield dut.d_in.data.eq(0)
1788 yield dut.m_in.valid.eq(0)
1789 yield dut.m_in.addr.eq(0)
1790 yield dut.m_in.pte.eq(0)
1791 # wait 4 * clk_period
1792 yield
1793 yield
1794 yield
1795 yield
1796 # wait_until rising_edge(clk)
1797 yield
1798 # Cacheable read of address 4
1799 yield dut.d_in.load.eq(1)
1800 yield dut.d_in.nc.eq(0)
1801 yield dut.d_in.addr.eq(Const(0x0000000000000004, 64))
1802 yield dut.d_in.valid.eq(1)
1803 # wait-until rising_edge(clk)
1804 yield
1805 yield dut.d_in.valid.eq(0)
1806 yield
1807 while not (yield dut.d_out.valid):
1808 yield
1809 assert dut.d_out.data == 0x0000000100000000, \
1810 f"data @ {dut.d_in.addr}={dut.d_in.data} expected 0000000100000000"
1811
1812
1813 # Cacheable read of address 30
1814 yield dut.d_in.load.eq(1)
1815 yield dut.d_in.nc.eq(0)
1816 yield dut.d_in.addr.eq(Const(0x0000000000000030, 64))
1817 yield dut.d_in.valid.eq(1)
1818 yield
1819 yield dut.d_in.valid.eq(0)
1820 yield
1821 while not (yield dut.d_out.valid):
1822 yield
1823 assert dut.d_out.data == 0x0000000D0000000C, \
1824 f"data @{dut.d_in.addr}={dut.d_out.data} expected 0000000D0000000C"
1825
1826 # Non-cacheable read of address 100
1827 yield dut.d_in.load.eq(1)
1828 yield dut.d_in.nc.eq(1)
1829 yield dut.d_in.addr.eq(Const(0x0000000000000100, 64))
1830 yield dut.d_in.valid.eq(1)
1831 yield
1832 yield dut.d_in.valid.eq(0)
1833 yield
1834 while not (yield dut.d_out.valid):
1835 yield
1836 assert dut.d_out.data == 0x0000004100000040, \
1837 f"data @ {dut.d_in.addr}={dut.d_out.data} expected 0000004100000040"
1838
1839 yield
1840 yield
1841 yield
1842 yield
1843
1844
1845 def test_dcache():
1846 dut = DCache()
1847 vl = rtlil.convert(dut, ports=[])
1848 with open("test_dcache.il", "w") as f:
1849 f.write(vl)
1850
1851 #run_simulation(dut, dcache_sim(), vcd_name='test_dcache.vcd')
1852
1853 if __name__ == '__main__':
1854 test_dcache()
1855