ba16779b579ad0a737c581807d62ab3a7b95ad44
3 based on Anton Blanchard microwatt dcache.vhdl
7 from enum
import Enum
, unique
9 from nmigen
import Module
, Signal
, Elaboratable
, Cat
, Repl
, Array
, Const
10 from nmigen
.cli
import main
11 from nmutil
.iocontrol
import RecordObject
12 from nmigen
.utils
import log2_int
13 from nmigen
.cli
import rtlil
16 from soc
.experiment
.mem_types
import (LoadStore1ToDCacheType
,
17 DCacheToLoadStore1Type
,
21 from soc
.experiment
.wb_types
import (WB_ADDR_BITS
, WB_DATA_BITS
, WB_SEL_BITS
,
22 WBAddrType
, WBDataType
, WBSelType
,
23 WBMasterOut
, WBSlaveOut
,
24 WBMasterOutVector
, WBSlaveOutVector
,
25 WBIOMasterOut
, WBIOSlaveOut
)
27 from soc
.experiment
.cache_ram
import CacheRam
28 from soc
.experiment
.plru
import PLRU
31 # TODO: make these parameters of DCache at some point
32 LINE_SIZE
= 64 # Line size in bytes
33 NUM_LINES
= 32 # Number of lines in a set
34 NUM_WAYS
= 4 # Number of ways
35 TLB_SET_SIZE
= 64 # L1 DTLB entries per set
36 TLB_NUM_WAYS
= 2 # L1 DTLB number of sets
37 TLB_LG_PGSZ
= 12 # L1 DTLB log_2(page_size)
38 LOG_LENGTH
= 0 # Non-zero to enable log data collection
40 # BRAM organisation: We never access more than
41 # -- WB_DATA_BITS at a time so to save
42 # -- resources we make the array only that wide, and
43 # -- use consecutive indices for to make a cache "line"
45 # -- ROW_SIZE is the width in bytes of the BRAM
46 # -- (based on WB, so 64-bits)
47 ROW_SIZE
= WB_DATA_BITS
// 8;
49 # ROW_PER_LINE is the number of row (wishbone
50 # transactions) in a line
51 ROW_PER_LINE
= LINE_SIZE
// ROW_SIZE
53 # BRAM_ROWS is the number of rows in BRAM needed
54 # to represent the full dcache
55 BRAM_ROWS
= NUM_LINES
* ROW_PER_LINE
58 # Bit fields counts in the address
60 # REAL_ADDR_BITS is the number of real address
64 # ROW_BITS is the number of bits to select a row
65 ROW_BITS
= log2_int(BRAM_ROWS
)
67 # ROW_LINE_BITS is the number of bits to select
69 ROW_LINE_BITS
= log2_int(ROW_PER_LINE
)
71 # LINE_OFF_BITS is the number of bits for
72 # the offset in a cache line
73 LINE_OFF_BITS
= log2_int(LINE_SIZE
)
75 # ROW_OFF_BITS is the number of bits for
77 ROW_OFF_BITS
= log2_int(ROW_SIZE
)
79 # INDEX_BITS is the number if bits to
81 INDEX_BITS
= log2_int(NUM_LINES
)
83 # SET_SIZE_BITS is the log base 2 of the set size
84 SET_SIZE_BITS
= LINE_OFF_BITS
+ INDEX_BITS
86 # TAG_BITS is the number of bits of
87 # the tag part of the address
88 TAG_BITS
= REAL_ADDR_BITS
- SET_SIZE_BITS
90 # TAG_WIDTH is the width in bits of each way of the tag RAM
91 TAG_WIDTH
= TAG_BITS
+ 7 - ((TAG_BITS
+ 7) % 8)
93 # WAY_BITS is the number of bits to select a way
94 WAY_BITS
= log2_int(NUM_WAYS
)
96 # Example of layout for 32 lines of 64 bytes:
98 # .. tag |index| line |
100 # .. | |---| | ROW_LINE_BITS (3)
101 # .. | |--- - --| LINE_OFF_BITS (6)
102 # .. | |- --| ROW_OFF_BITS (3)
103 # .. |----- ---| | ROW_BITS (8)
104 # .. |-----| | INDEX_BITS (5)
105 # .. --------| | TAG_BITS (45)
107 TAG_RAM_WIDTH
= TAG_WIDTH
* NUM_WAYS
110 return Array(Signal(TAG_RAM_WIDTH
) for x
in range(NUM_LINES
))
112 def CacheValidBitsArray():
113 return Array(Signal(INDEX_BITS
) for x
in range(NUM_LINES
))
115 def RowPerLineValidArray():
116 return Array(Signal() for x
in range(ROW_PER_LINE
))
119 TLB_SET_BITS
= log2_int(TLB_SET_SIZE
)
120 TLB_WAY_BITS
= log2_int(TLB_NUM_WAYS
)
121 TLB_EA_TAG_BITS
= 64 - (TLB_LG_PGSZ
+ TLB_SET_BITS
)
122 TLB_TAG_WAY_BITS
= TLB_NUM_WAYS
* TLB_EA_TAG_BITS
124 TLB_PTE_WAY_BITS
= TLB_NUM_WAYS
* TLB_PTE_BITS
;
126 assert (LINE_SIZE
% ROW_SIZE
) == 0, "LINE_SIZE not multiple of ROW_SIZE"
127 assert (LINE_SIZE
% 2) == 0, "LINE_SIZE not power of 2"
128 assert (NUM_LINES
% 2) == 0, "NUM_LINES not power of 2"
129 assert (ROW_PER_LINE
% 2) == 0, "ROW_PER_LINE not power of 2"
130 assert ROW_BITS
== (INDEX_BITS
+ ROW_LINE_BITS
), "geometry bits don't add up"
131 assert (LINE_OFF_BITS
== ROW_OFF_BITS
+ ROW_LINE_BITS
), \
132 "geometry bits don't add up"
133 assert REAL_ADDR_BITS
== (TAG_BITS
+ INDEX_BITS
+ LINE_OFF_BITS
), \
134 "geometry bits don't add up"
135 assert REAL_ADDR_BITS
== (TAG_BITS
+ ROW_BITS
+ ROW_OFF_BITS
), \
136 "geometry bits don't add up"
137 assert 64 == WB_DATA_BITS
, "Can't yet handle wb width that isn't 64-bits"
138 assert SET_SIZE_BITS
<= TLB_LG_PGSZ
, "Set indexed by virtual address"
141 def TLBValidBitsArray():
142 return Array(Signal(TLB_NUM_WAYS
) for x
in range(TLB_SET_SIZE
))
145 return Array(Signal(TLB_EA_TAG_BITS
) for x
in range (TLB_NUM_WAYS
))
148 return Array(Signal(TLB_TAG_WAY_BITS
) for x
in range (TLB_SET_SIZE
))
151 return Array(Signal(TLB_PTE_WAY_BITS
) for x
in range(TLB_SET_SIZE
))
154 return Array(Signal(NUM_WAYS
) for x
in range(TLB_NUM_WAYS
))
156 # Cache RAM interface
158 return Array(Signal(WB_DATA_BITS
) for x
in range(NUM_WAYS
))
160 # PLRU output interface
162 return Array(Signal(WAY_BITS
) for x
in range(NUM_LINES
))
164 # TLB PLRU output interface
166 return Array(Signal(TLB_WAY_BITS
) for x
in range(TLB_SET_SIZE
))
168 # Helper functions to decode incoming requests
170 # Return the cache line index (tag index) for an address
172 return addr
[LINE_OFF_BITS
:SET_SIZE_BITS
]
174 # Return the cache row index (data memory) for an address
176 return addr
[ROW_OFF_BITS
:SET_SIZE_BITS
]
178 # Return the index of a row within a line
179 def get_row_of_line(row
):
180 return row
[:ROW_LINE_BITS
]
182 # Returns whether this is the last row of a line
183 def is_last_row_addr(addr
, last
):
184 return addr
[ROW_OFF_BITS
:LINE_OFF_BITS
] == last
186 # Returns whether this is the last row of a line
187 def is_last_row(row
, last
):
188 return get_row_of_line(row
) == last
190 # Return the next row in the current cache line. We use a
191 # dedicated function in order to limit the size of the
192 # generated adder to be only the bits within a cache line
193 # (3 bits with default settings)
195 row_v
= row
[0:ROW_LINE_BITS
] + 1
196 return Cat(row_v
[:ROW_LINE_BITS
], row
[ROW_LINE_BITS
:])
198 # Get the tag value from the address
200 return addr
[SET_SIZE_BITS
:REAL_ADDR_BITS
]
202 # Read a tag from a tag memory row
203 def read_tag(way
, tagset
):
204 return tagset
.word_select(way
, TAG_WIDTH
)[:TAG_BITS
]
206 # Read a TLB tag from a TLB tag memory row
207 def read_tlb_tag(way
, tags
):
208 return tags
.word_select(way
, TLB_EA_TAG_BITS
)
210 # Write a TLB tag to a TLB tag memory row
211 def write_tlb_tag(way
, tags
, tag
):
212 return read_tlb_tag(way
, tags
).eq(tag
)
214 # Read a PTE from a TLB PTE memory row
215 def read_tlb_pte(way
, ptes
):
216 return ptes
.word_select(way
, TLB_PTE_BITS
)
218 def write_tlb_pte(way
, ptes
, newpte
):
219 return read_tlb_pte(way
, ptes
).eq(newpte
)
222 # Record for storing permission, attribute, etc. bits from a PTE
223 class PermAttr(RecordObject
):
226 self
.reference
= Signal()
227 self
.changed
= Signal()
228 self
.nocache
= Signal()
230 self
.rd_perm
= Signal()
231 self
.wr_perm
= Signal()
234 def extract_perm_attr(pte
):
236 pa
.reference
= pte
[8]
245 # Type of operation on a "valid" input
249 OP_BAD
= 1 # NC cache hit, TLB miss, prot/RC failure
250 OP_STCX_FAIL
= 2 # conditional store w/o reservation
251 OP_LOAD_HIT
= 3 # Cache hit on load
252 OP_LOAD_MISS
= 4 # Load missing cache
253 OP_LOAD_NC
= 5 # Non-cachable load
254 OP_STORE_HIT
= 6 # Store hitting cache
255 OP_STORE_MISS
= 7 # Store missing cache
258 # Cache state machine
261 IDLE
= 0 # Normal load hit processing
262 RELOAD_WAIT_ACK
= 1 # Cache reload wait ack
263 STORE_WAIT_ACK
= 2 # Store wait ack
264 NC_LOAD_WAIT_ACK
= 3 # Non-cachable load wait ack
269 # In order to make timing, we use the BRAMs with
270 # an output buffer, which means that the BRAM
271 # output is delayed by an extra cycle.
273 # Thus, the dcache has a 2-stage internal pipeline
274 # for cache hits with no stalls.
276 # All other operations are handled via stalling
277 # in the first stage.
279 # The second stage can thus complete a hit at the same
280 # time as the first stage emits a stall for a complex op.
282 # Stage 0 register, basically contains just the latched request
284 class RegStage0(RecordObject
):
287 self
.req
= LoadStore1ToDCacheType()
288 self
.tlbie
= Signal()
289 self
.doall
= Signal()
290 self
.tlbld
= Signal()
291 self
.mmu_req
= Signal() # indicates source of request
294 class MemAccessRequest(RecordObject
):
298 self
.valid
= Signal()
300 self
.real_addr
= Signal(REAL_ADDR_BITS
)
301 self
.data
= Signal(64)
302 self
.byte_sel
= Signal(8)
303 self
.hit_way
= Signal(WAY_BITS
)
304 self
.same_tag
= Signal()
305 self
.mmu_req
= Signal()
308 # First stage register, contains state for stage 1 of load hits
309 # and for the state machine used by all other operations
310 class RegStage1(RecordObject
):
313 # Info about the request
314 self
.full
= Signal() # have uncompleted request
315 self
.mmu_req
= Signal() # request is from MMU
316 self
.req
= MemAccessRequest()
319 self
.hit_way
= Signal(WAY_BITS
)
320 self
.hit_load_valid
= Signal()
321 self
.hit_index
= Signal(NUM_LINES
)
322 self
.cache_hit
= Signal()
325 self
.tlb_hit
= Signal()
326 self
.tlb_hit_way
= Signal(TLB_NUM_WAYS
)
327 self
.tlb_hit_index
= Signal(TLB_WAY_BITS
)
329 # 2-stage data buffer for data forwarded from writes to reads
330 self
.forward_data1
= Signal(64)
331 self
.forward_data2
= Signal(64)
332 self
.forward_sel1
= Signal(8)
333 self
.forward_valid1
= Signal()
334 self
.forward_way1
= Signal(WAY_BITS
)
335 self
.forward_row1
= Signal(ROW_BITS
)
336 self
.use_forward1
= Signal()
337 self
.forward_sel
= Signal(8)
339 # Cache miss state (reload state machine)
340 self
.state
= Signal(State
)
342 self
.write_bram
= Signal()
343 self
.write_tag
= Signal()
344 self
.slow_valid
= Signal()
345 self
.wb
= WBMasterOut()
346 self
.reload_tag
= Signal(TAG_BITS
)
347 self
.store_way
= Signal(WAY_BITS
)
348 self
.store_row
= Signal(ROW_BITS
)
349 self
.store_index
= Signal(INDEX_BITS
)
350 self
.end_row_ix
= Signal(log2_int(ROW_LINE_BITS
, False))
351 self
.rows_valid
= RowPerLineValidArray()
352 self
.acks_pending
= Signal(3)
353 self
.inc_acks
= Signal()
354 self
.dec_acks
= Signal()
356 # Signals to complete (possibly with error)
357 self
.ls_valid
= Signal()
358 self
.ls_error
= Signal()
359 self
.mmu_done
= Signal()
360 self
.mmu_error
= Signal()
361 self
.cache_paradox
= Signal()
363 # Signal to complete a failed stcx.
364 self
.stcx_fail
= Signal()
367 # Reservation information
368 class Reservation(RecordObject
):
371 self
.valid
= Signal()
372 self
.addr
= Signal(64-LINE_OFF_BITS
)
375 class DTLBUpdate(Elaboratable
):
377 self
.tlbie
= Signal()
378 self
.tlbwe
= Signal()
379 self
.doall
= Signal()
380 self
.updated
= Signal()
381 self
.v_updated
= Signal()
382 self
.tlb_hit
= Signal()
383 self
.tlb_req_index
= Signal(TLB_SET_BITS
)
385 self
.tlb_hit_way
= Signal(TLB_WAY_BITS
)
386 self
.tlb_tag_way
= Signal(TLB_TAG_WAY_BITS
)
387 self
.tlb_pte_way
= Signal(TLB_PTE_WAY_BITS
)
388 self
.repl_way
= Signal(TLB_WAY_BITS
)
389 self
.eatag
= Signal(TLB_EA_TAG_BITS
)
390 self
.pte_data
= Signal(TLB_PTE_BITS
)
392 self
.dv
= Signal(TLB_PTE_WAY_BITS
)
394 self
.tb_out
= Signal(TLB_TAG_WAY_BITS
)
395 self
.pb_out
= Signal(TLB_NUM_WAYS
)
396 self
.db_out
= Signal(TLB_PTE_WAY_BITS
)
398 def elaborate(self
, platform
):
403 tagset
= Signal(TLB_TAG_WAY_BITS
)
404 pteset
= Signal(TLB_PTE_WAY_BITS
)
406 tb_out
, pb_out
, db_out
= self
.tb_out
, self
.pb_out
, self
.db_out
408 with m
.If(self
.tlbie
& self
.doall
):
409 pass # clear all back in parent
410 with m
.Elif(self
.tlbie
):
411 with m
.If(self
.tlb_hit
):
412 comb
+= db_out
.eq(self
.dv
)
413 comb
+= db_out
.bit_select(self
.tlb_hit_way
, 1).eq(1)
414 comb
+= self
.v_updated
.eq(1)
416 with m
.Elif(self
.tlbwe
):
418 comb
+= tagset
.eq(self
.tlb_tag_way
)
419 comb
+= write_tlb_tag(self
.repl_way
, tagset
, self
.eatag
)
420 comb
+= tb_out
.eq(tagset
)
422 comb
+= pteset
.eq(self
.tlb_pte_way
)
423 comb
+= write_tlb_pte(self
.repl_way
, pteset
, self
.pte_data
)
424 comb
+= pb_out
.eq(pteset
)
426 comb
+= db_out
.bit_select(self
.repl_way
, 1).eq(1)
428 comb
+= self
.updated
.eq(1)
429 comb
+= self
.v_updated
.eq(1)
433 def dcache_request(self
, m
, r0
, ra
, req_index
, req_row
, req_tag
,
434 r0_valid
, r1
, cache_valid_bits
, replace_way
,
435 use_forward1_next
, use_forward2_next
,
436 req_hit_way
, plru_victim
, rc_ok
, perm_attr
,
437 valid_ra
, perm_ok
, access_ok
, req_op
, req_go
,
439 tlb_hit
, tlb_hit_way
, tlb_valid_way
, cache_tag_set
,
440 cancel_store
, req_same_tag
, r0_stall
, early_req_row
):
441 """Cache request parsing and hit detection
444 class DCachePendingHit(Elaboratable
):
446 def __init__(self
, tlb_pte_way
, tlb_valid_way
, tlb_hit_way
,
447 cache_valid_idx
, cache_tag_set
,
452 self
.virt_mode
= Signal()
453 self
.is_hit
= Signal()
454 self
.tlb_hit
= Signal()
455 self
.hit_way
= Signal(WAY_BITS
)
456 self
.rel_match
= Signal()
457 self
.req_index
= Signal(INDEX_BITS
)
458 self
.reload_tag
= Signal(TAG_BITS
)
460 self
.tlb_hit_way
= tlb_hit_way
461 self
.tlb_pte_way
= tlb_pte_way
462 self
.tlb_valid_way
= tlb_valid_way
463 self
.cache_valid_idx
= cache_valid_idx
464 self
.cache_tag_set
= cache_tag_set
465 self
.req_addr
= req_addr
466 self
.hit_set
= hit_set
468 def elaborate(self
, platform
):
474 virt_mode
= self
.virt_mode
476 tlb_pte_way
= self
.tlb_pte_way
477 tlb_valid_way
= self
.tlb_valid_way
478 cache_valid_idx
= self
.cache_valid_idx
479 cache_tag_set
= self
.cache_tag_set
480 req_addr
= self
.req_addr
481 tlb_hit_way
= self
.tlb_hit_way
482 tlb_hit
= self
.tlb_hit
483 hit_set
= self
.hit_set
484 hit_way
= self
.hit_way
485 rel_match
= self
.rel_match
486 req_index
= self
.req_index
487 reload_tag
= self
.reload_tag
489 rel_matches
= Array(Signal() for i
in range(TLB_NUM_WAYS
))
490 hit_way_set
= HitWaySet()
492 # Test if pending request is a hit on any way
493 # In order to make timing in virtual mode,
494 # when we are using the TLB, we compare each
495 # way with each of the real addresses from each way of
496 # the TLB, and then decide later which match to use.
498 with m
.If(virt_mode
):
499 for j
in range(TLB_NUM_WAYS
):
500 s_tag
= Signal(TAG_BITS
, name
="s_tag%d" % j
)
502 s_pte
= Signal(TLB_PTE_BITS
)
503 s_ra
= Signal(REAL_ADDR_BITS
)
504 comb
+= s_pte
.eq(read_tlb_pte(j
, tlb_pte_way
))
505 comb
+= s_ra
.eq(Cat(req_addr
[0:TLB_LG_PGSZ
],
506 s_pte
[TLB_LG_PGSZ
:REAL_ADDR_BITS
]))
507 comb
+= s_tag
.eq(get_tag(s_ra
))
509 for i
in range(NUM_WAYS
):
510 is_tag_hit
= Signal()
511 comb
+= is_tag_hit
.eq(go
& cache_valid_idx
[i
] &
512 (read_tag(i
, cache_tag_set
) == s_tag
)
514 with m
.If(is_tag_hit
):
515 comb
+= hit_way_set
[j
].eq(i
)
517 comb
+= hit_set
[j
].eq(s_hit
)
518 with m
.If(s_tag
== reload_tag
):
519 comb
+= rel_matches
[j
].eq(1)
521 comb
+= is_hit
.eq(hit_set
[tlb_hit_way
])
522 comb
+= hit_way
.eq(hit_way_set
[tlb_hit_way
])
523 comb
+= rel_match
.eq(rel_matches
[tlb_hit_way
])
525 s_tag
= Signal(TAG_BITS
)
526 comb
+= s_tag
.eq(get_tag(req_addr
))
527 for i
in range(NUM_WAYS
):
528 is_tag_hit
= Signal()
529 comb
+= is_tag_hit
.eq(go
& cache_valid_idx
[i
] &
530 read_tag(i
, cache_tag_set
) == s_tag
)
531 with m
.If(is_tag_hit
):
532 comb
+= hit_way
.eq(i
)
534 with m
.If(s_tag
== reload_tag
):
535 comb
+= rel_match
.eq(1)
540 class DCache(Elaboratable
):
541 """Set associative dcache write-through
542 TODO (in no specific order):
543 * See list in icache.vhdl
544 * Complete load misses on the cycle when WB data comes instead of
545 at the end of line (this requires dealing with requests coming in
549 self
.d_in
= LoadStore1ToDCacheType()
550 self
.d_out
= DCacheToLoadStore1Type()
552 self
.m_in
= MMUToDCacheType()
553 self
.m_out
= DCacheToMMUType()
555 self
.stall_out
= Signal()
557 self
.wb_out
= WBMasterOut()
558 self
.wb_in
= WBSlaveOut()
560 self
.log_out
= Signal(20)
562 def stage_0(self
, m
, r0
, r1
, r0_full
):
563 """Latch the request in r0.req as long as we're not stalling
567 d_in
, d_out
, m_in
= self
.d_in
, self
.d_out
, self
.m_in
571 # TODO, this goes in unit tests and formal proofs
572 with m
.If(~
(d_in
.valid
& m_in
.valid
)):
573 #sync += Display("request collision loadstore vs MMU")
576 with m
.If(m_in
.valid
):
577 sync
+= r
.req
.valid
.eq(1)
578 sync
+= r
.req
.load
.eq(~
(m_in
.tlbie | m_in
.tlbld
))
579 sync
+= r
.req
.dcbz
.eq(0)
580 sync
+= r
.req
.nc
.eq(0)
581 sync
+= r
.req
.reserve
.eq(0)
582 sync
+= r
.req
.virt_mode
.eq(1)
583 sync
+= r
.req
.priv_mode
.eq(1)
584 sync
+= r
.req
.addr
.eq(m_in
.addr
)
585 sync
+= r
.req
.data
.eq(m_in
.pte
)
586 sync
+= r
.req
.byte_sel
.eq(~
0) # Const -1 sets all to 0b111....
587 sync
+= r
.tlbie
.eq(m_in
.tlbie
)
588 sync
+= r
.doall
.eq(m_in
.doall
)
589 sync
+= r
.tlbld
.eq(m_in
.tlbld
)
590 sync
+= r
.mmu_req
.eq(1)
592 sync
+= r
.req
.eq(d_in
)
593 sync
+= r
.tlbie
.eq(0)
594 sync
+= r
.doall
.eq(0)
595 sync
+= r
.tlbld
.eq(0)
596 sync
+= r
.mmu_req
.eq(0)
597 with m
.If(~
(r1
.full
& r0_full
)):
599 sync
+= r0_full
.eq(r
.req
.valid
)
601 def tlb_read(self
, m
, r0_stall
, tlb_valid_way
,
602 tlb_tag_way
, tlb_pte_way
, dtlb_valid_bits
,
603 dtlb_tags
, dtlb_ptes
):
605 Operates in the second cycle on the request latched in r0.req.
606 TLB updates write the entry at the end of the second cycle.
610 m_in
, d_in
= self
.m_in
, self
.d_in
612 index
= Signal(TLB_SET_BITS
)
613 addrbits
= Signal(TLB_SET_BITS
)
616 amax
= TLB_LG_PGSZ
+ TLB_SET_BITS
618 with m
.If(m_in
.valid
):
619 comb
+= addrbits
.eq(m_in
.addr
[amin
: amax
])
621 comb
+= addrbits
.eq(d_in
.addr
[amin
: amax
])
622 comb
+= index
.eq(addrbits
)
624 # If we have any op and the previous op isn't finished,
625 # then keep the same output for next cycle.
626 with m
.If(~r0_stall
):
627 sync
+= tlb_valid_way
.eq(dtlb_valid_bits
[index
])
628 sync
+= tlb_tag_way
.eq(dtlb_tags
[index
])
629 sync
+= tlb_pte_way
.eq(dtlb_ptes
[index
])
631 def maybe_tlb_plrus(self
, m
, r1
, tlb_plru_victim
, acc
, acc_en
, lru
):
632 """Generate TLB PLRUs
637 with m
.If(TLB_NUM_WAYS
> 1):
638 for i
in range(TLB_SET_SIZE
):
640 tlb_plru
= PLRU(WAY_BITS
)
641 setattr(m
.submodules
, "maybe_plru_%d" % i
, tlb_plru
)
642 tlb_plru_acc
= Signal(TLB_WAY_BITS
)
643 tlb_plru_acc_en
= Signal()
644 tlb_plru_out
= Signal(TLB_WAY_BITS
)
646 comb
+= tlb_plru
.acc
.eq(tlb_plru_acc
)
647 comb
+= tlb_plru
.acc_en
.eq(tlb_plru_acc_en
)
648 comb
+= tlb_plru
.lru
.eq(tlb_plru_out
)
651 with m
.If(r1
.tlb_hit_index
== i
):
652 comb
+= tlb_plru
.acc_en
.eq(r1
.tlb_hit
)
654 comb
+= tlb_plru
.acc_en
.eq(0)
655 comb
+= tlb_plru
.acc
.eq(r1
.tlb_hit_way
)
657 comb
+= tlb_plru_victim
[i
].eq(tlb_plru
.lru
)
659 def tlb_search(self
, m
, tlb_req_index
, r0
, r0_valid
,
660 tlb_valid_way
, tlb_tag_way
, tlb_hit_way
,
661 tlb_pte_way
, pte
, tlb_hit
, valid_ra
, perm_attr
, ra
):
666 hitway
= Signal(TLB_WAY_BITS
)
668 eatag
= Signal(TLB_EA_TAG_BITS
)
670 TLB_LG_END
= TLB_LG_PGSZ
+ TLB_SET_BITS
671 comb
+= tlb_req_index
.eq(r0
.req
.addr
[TLB_LG_PGSZ
: TLB_LG_END
])
672 comb
+= eatag
.eq(r0
.req
.addr
[TLB_LG_END
: 64 ])
674 for i
in range(TLB_NUM_WAYS
):
675 is_tag_hit
= Signal()
676 comb
+= is_tag_hit
.eq(tlb_valid_way
[i
]
677 & read_tlb_tag(i
, tlb_tag_way
) == eatag
)
678 with m
.If(is_tag_hit
):
682 comb
+= tlb_hit
.eq(hit
& r0_valid
)
683 comb
+= tlb_hit_way
.eq(hitway
)
686 comb
+= pte
.eq(read_tlb_pte(hitway
, tlb_pte_way
))
689 comb
+= valid_ra
.eq(tlb_hit | ~r0
.req
.virt_mode
)
690 with m
.If(r0
.req
.virt_mode
):
691 comb
+= ra
.eq(Cat(Const(0, ROW_OFF_BITS
),
692 r0
.req
.addr
[ROW_OFF_BITS
:TLB_LG_PGSZ
],
693 pte
[TLB_LG_PGSZ
:REAL_ADDR_BITS
]))
694 comb
+= perm_attr
.eq(extract_perm_attr(pte
))
696 comb
+= ra
.eq(Cat(Const(0, ROW_OFF_BITS
),
697 r0
.req
.addr
[ROW_OFF_BITS
:REAL_ADDR_BITS
]))
699 comb
+= perm_attr
.reference
.eq(1)
700 comb
+= perm_attr
.changed
.eq(1)
701 comb
+= perm_attr
.priv
.eq(1)
702 comb
+= perm_attr
.nocache
.eq(0)
703 comb
+= perm_attr
.rd_perm
.eq(1)
704 comb
+= perm_attr
.wr_perm
.eq(1)
706 def tlb_update(self
, m
, r0_valid
, r0
, dtlb_valid_bits
, tlb_req_index
,
707 tlb_hit_way
, tlb_hit
, tlb_plru_victim
, tlb_tag_way
,
708 dtlb_tags
, tlb_pte_way
, dtlb_ptes
):
716 comb
+= tlbie
.eq(r0_valid
& r0
.tlbie
)
717 comb
+= tlbwe
.eq(r0_valid
& r0
.tlbld
)
719 m
.submodules
.tlb_update
= d
= DTLBUpdate()
720 with m
.If(tlbie
& r0
.doall
):
721 # clear all valid bits at once
722 for i
in range(TLB_SET_SIZE
):
723 sync
+= dtlb_valid_bits
[i
].eq(0)
724 with m
.If(d
.updated
):
725 sync
+= dtlb_tags
[tlb_req_index
].eq(d
.tb_out
)
726 sync
+= dtlb_ptes
[tlb_req_index
].eq(d
.pb_out
)
727 with m
.If(d
.v_updated
):
728 sync
+= dtlb_valid_bits
[tlb_req_index
].eq(d
.db_out
)
730 comb
+= d
.dv
.eq(dtlb_valid_bits
[tlb_req_index
])
732 comb
+= d
.tlbie
.eq(tlbie
)
733 comb
+= d
.tlbwe
.eq(tlbwe
)
734 comb
+= d
.doall
.eq(r0
.doall
)
735 comb
+= d
.tlb_hit
.eq(tlb_hit
)
736 comb
+= d
.tlb_hit_way
.eq(tlb_hit_way
)
737 comb
+= d
.tlb_tag_way
.eq(tlb_tag_way
)
738 comb
+= d
.tlb_pte_way
.eq(tlb_pte_way
)
739 comb
+= d
.tlb_req_index
.eq(tlb_req_index
)
742 comb
+= d
.repl_way
.eq(tlb_hit_way
)
744 comb
+= d
.repl_way
.eq(tlb_plru_victim
[tlb_req_index
])
745 comb
+= d
.eatag
.eq(r0
.req
.addr
[TLB_LG_PGSZ
+ TLB_SET_BITS
:64])
746 comb
+= d
.pte_data
.eq(r0
.req
.data
)
748 def maybe_plrus(self
, m
, r1
, plru_victim
):
754 for i
in range(NUM_LINES
):
756 plru
= PLRU(WAY_BITS
)
757 setattr(m
.submodules
, "plru%d" % i
, plru
)
758 plru_acc
= Signal(WAY_BITS
)
759 plru_acc_en
= Signal()
760 plru_out
= Signal(WAY_BITS
)
762 comb
+= plru
.acc
.eq(plru_acc
)
763 comb
+= plru
.acc_en
.eq(plru_acc_en
)
764 comb
+= plru_out
.eq(plru
.lru_o
)
766 with m
.If(r1
.hit_index
== i
):
767 comb
+= plru_acc_en
.eq(r1
.cache_hit
)
769 comb
+= plru_acc
.eq(r1
.hit_way
)
770 comb
+= plru_victim
[i
].eq(plru_out
)
772 def cache_tag_read(self
, m
, r0_stall
, req_index
, cache_tag_set
, cache_tags
):
773 """Cache tag RAM read port
777 m_in
, d_in
= self
.m_in
, self
.d_in
779 index
= Signal(INDEX_BITS
)
782 comb
+= index
.eq(req_index
)
783 with m
.Elif(m_in
.valid
):
784 comb
+= index
.eq(get_index(m_in
.addr
))
786 comb
+= index
.eq(get_index(d_in
.addr
))
787 sync
+= cache_tag_set
.eq(cache_tags
[index
])
789 def dcache_request(self
, m
, r0
, ra
, req_index
, req_row
, req_tag
,
790 r0_valid
, r1
, cache_valid_bits
, replace_way
,
791 use_forward1_next
, use_forward2_next
,
792 req_hit_way
, plru_victim
, rc_ok
, perm_attr
,
793 valid_ra
, perm_ok
, access_ok
, req_op
, req_go
,
795 tlb_hit
, tlb_hit_way
, tlb_valid_way
, cache_tag_set
,
796 cancel_store
, req_same_tag
, r0_stall
, early_req_row
):
797 """Cache request parsing and hit detection
802 m_in
, d_in
= self
.m_in
, self
.d_in
805 hit_way
= Signal(WAY_BITS
)
810 hit_set
= Array(Signal() for i
in range(TLB_NUM_WAYS
))
811 cache_valid_idx
= Signal(INDEX_BITS
)
813 # Extract line, row and tag from request
814 comb
+= req_index
.eq(get_index(r0
.req
.addr
))
815 comb
+= req_row
.eq(get_row(r0
.req
.addr
))
816 comb
+= req_tag
.eq(get_tag(ra
))
818 comb
+= go
.eq(r0_valid
& ~
(r0
.tlbie | r0
.tlbld
) & ~r1
.ls_error
)
819 comb
+= cache_valid_idx
.eq(cache_valid_bits
[req_index
])
821 m
.submodules
.dcache_pend
= dc
= DCachePendingHit(tlb_pte_way
,
822 tlb_valid_way
, tlb_hit_way
,
823 cache_valid_idx
, cache_tag_set
,
827 comb
+= dc
.tlb_hit
.eq(tlb_hit
)
828 comb
+= dc
.reload_tag
.eq(r1
.reload_tag
)
829 comb
+= dc
.virt_mode
.eq(r0
.req
.virt_mode
)
831 comb
+= dc
.req_index
.eq(req_index
)
832 comb
+= is_hit
.eq(dc
.is_hit
)
833 comb
+= hit_way
.eq(dc
.hit_way
)
834 comb
+= req_same_tag
.eq(dc
.rel_match
)
836 # See if the request matches the line currently being reloaded
837 with m
.If((r1
.state
== State
.RELOAD_WAIT_ACK
) &
838 (req_index
== r1
.store_index
) & req_same_tag
):
839 # For a store, consider this a hit even if the row isn't
840 # valid since it will be by the time we perform the store.
841 # For a load, check the appropriate row valid bit.
842 valid
= r1
.rows_valid
[req_row
% ROW_PER_LINE
]
843 comb
+= is_hit
.eq(~r0
.req
.load | valid
)
844 comb
+= hit_way
.eq(replace_way
)
846 # Whether to use forwarded data for a load or not
847 comb
+= use_forward1_next
.eq(0)
848 with m
.If((get_row(r1
.req
.real_addr
) == req_row
) &
849 (r1
.req
.hit_way
== hit_way
)):
850 # Only need to consider r1.write_bram here, since if we
851 # are writing refill data here, then we don't have a
852 # cache hit this cycle on the line being refilled.
853 # (There is the possibility that the load following the
854 # load miss that started the refill could be to the old
855 # contents of the victim line, since it is a couple of
856 # cycles after the refill starts before we see the updated
857 # cache tag. In that case we don't use the bypass.)
858 comb
+= use_forward1_next
.eq(r1
.write_bram
)
859 comb
+= use_forward2_next
.eq(0)
860 with m
.If((r1
.forward_row1
== req_row
) & (r1
.forward_way1
== hit_way
)):
861 comb
+= use_forward2_next
.eq(r1
.forward_valid1
)
863 # The way that matched on a hit
864 comb
+= req_hit_way
.eq(hit_way
)
866 # The way to replace on a miss
867 with m
.If(r1
.write_tag
):
868 replace_way
.eq(plru_victim
[r1
.store_index
])
870 comb
+= replace_way
.eq(r1
.store_way
)
872 # work out whether we have permission for this access
873 # NB we don't yet implement AMR, thus no KUAP
874 comb
+= rc_ok
.eq(perm_attr
.reference
875 & (r0
.req
.load | perm_attr
.changed
)
877 comb
+= perm_ok
.eq((r0
.req
.priv_mode | ~perm_attr
.priv
)
879 |
(r0
.req
.load
& perm_attr
.rd_perm
)
881 comb
+= access_ok
.eq(valid_ra
& perm_ok
& rc_ok
)
882 # Combine the request and cache hit status to decide what
883 # operation needs to be done
884 comb
+= nc
.eq(r0
.req
.nc | perm_attr
.nocache
)
885 comb
+= op
.eq(Op
.OP_NONE
)
887 with m
.If(~access_ok
):
888 comb
+= op
.eq(Op
.OP_BAD
)
889 with m
.Elif(cancel_store
):
890 comb
+= op
.eq(Op
.OP_STCX_FAIL
)
892 comb
+= opsel
.eq(Cat(is_hit
, nc
, r0
.req
.load
))
893 with m
.Switch(opsel
):
895 comb
+= op
.eq(Op
.OP_LOAD_HIT
)
897 comb
+= op
.eq(Op
.OP_LOAD_MISS
)
899 comb
+= op
.eq(Op
.OP_LOAD_NC
)
901 comb
+= op
.eq(Op
.OP_STORE_HIT
)
903 comb
+= op
.eq(Op
.OP_STORE_MISS
)
905 comb
+= op
.eq(Op
.OP_STORE_MISS
)
907 comb
+= op
.eq(Op
.OP_BAD
)
909 comb
+= op
.eq(Op
.OP_BAD
)
911 comb
+= op
.eq(Op
.OP_NONE
)
912 comb
+= req_op
.eq(op
)
913 comb
+= req_go
.eq(go
)
915 # Version of the row number that is valid one cycle earlier
916 # in the cases where we need to read the cache data BRAM.
917 # If we're stalling then we need to keep reading the last
919 with m
.If(~r0_stall
):
920 with m
.If(m_in
.valid
):
921 comb
+= early_req_row
.eq(get_row(m_in
.addr
))
923 comb
+= early_req_row
.eq(get_row(d_in
.addr
))
925 comb
+= early_req_row
.eq(req_row
)
927 def reservation_comb(self
, m
, cancel_store
, set_rsrv
, clear_rsrv
,
928 r0_valid
, r0
, reservation
):
929 """Handle load-with-reservation and store-conditional instructions
934 with m
.If(r0_valid
& r0
.req
.reserve
):
936 # XXX generate alignment interrupt if address
937 # is not aligned XXX or if r0.req.nc = '1'
938 with m
.If(r0
.req
.load
):
939 comb
+= set_rsrv
.eq(1) # load with reservation
941 comb
+= clear_rsrv
.eq(1) # store conditional
942 with m
.If(~reservation
.valid | r0
.req
.addr
[LINE_OFF_BITS
:64]):
943 comb
+= cancel_store
.eq(1)
945 def reservation_reg(self
, m
, r0_valid
, access_ok
, set_rsrv
, clear_rsrv
,
951 with m
.If(r0_valid
& access_ok
):
952 with m
.If(clear_rsrv
):
953 sync
+= reservation
.valid
.eq(0)
954 with m
.Elif(set_rsrv
):
955 sync
+= reservation
.valid
.eq(1)
956 sync
+= reservation
.addr
.eq(r0
.req
.addr
[LINE_OFF_BITS
:64])
958 def writeback_control(self
, m
, r1
, cache_out
):
959 """Return data for loads & completion control logic
963 d_out
, m_out
= self
.d_out
, self
.m_out
965 data_out
= Signal(64)
966 data_fwd
= Signal(64)
968 # Use the bypass if are reading the row that was
969 # written 1 or 2 cycles ago, including for the
970 # slow_valid = 1 case (i.e. completing a load
971 # miss or a non-cacheable load).
972 with m
.If(r1
.use_forward1
):
973 comb
+= data_fwd
.eq(r1
.forward_data1
)
975 comb
+= data_fwd
.eq(r1
.forward_data2
)
977 comb
+= data_out
.eq(cache_out
[r1
.hit_way
])
980 with m
.If(r1
.forward_sel
[i
]):
981 dsel
= data_fwd
.word_select(i
, 8)
982 comb
+= data_out
.word_select(i
, 8).eq(dsel
)
984 comb
+= d_out
.valid
.eq(r1
.ls_valid
)
985 comb
+= d_out
.data
.eq(data_out
)
986 comb
+= d_out
.store_done
.eq(~r1
.stcx_fail
)
987 comb
+= d_out
.error
.eq(r1
.ls_error
)
988 comb
+= d_out
.cache_paradox
.eq(r1
.cache_paradox
)
991 comb
+= m_out
.done
.eq(r1
.mmu_done
)
992 comb
+= m_out
.err
.eq(r1
.mmu_error
)
993 comb
+= m_out
.data
.eq(data_out
)
995 # We have a valid load or store hit or we just completed
996 # a slow op such as a load miss, a NC load or a store
998 # Note: the load hit is delayed by one cycle. However it
999 # can still not collide with r.slow_valid (well unless I
1000 # miscalculated) because slow_valid can only be set on a
1001 # subsequent request and not on its first cycle (the state
1002 # machine must have advanced), which makes slow_valid
1003 # at least 2 cycles from the previous hit_load_valid.
1005 # Sanity: Only one of these must be set in any given cycle
1007 if False: # TODO: need Display to get this to work
1008 assert (r1
.slow_valid
& r1
.stcx_fail
) != 1, \
1009 "unexpected slow_valid collision with stcx_fail"
1011 assert ((r1
.slow_valid | r1
.stcx_fail
) | r1
.hit_load_valid
) != 1, \
1012 "unexpected hit_load_delayed collision with slow_valid"
1014 with m
.If(~r1
.mmu_req
):
1015 # Request came from loadstore1...
1016 # Load hit case is the standard path
1017 with m
.If(r1
.hit_load_valid
):
1018 #Display(f"completing load hit data={data_out}")
1021 # error cases complete without stalling
1022 with m
.If(r1
.ls_error
):
1023 # Display("completing ld/st with error")
1026 # Slow ops (load miss, NC, stores)
1027 with m
.If(r1
.slow_valid
):
1028 #Display(f"completing store or load miss data={data_out}")
1032 # Request came from MMU
1033 with m
.If(r1
.hit_load_valid
):
1034 # Display(f"completing load hit to MMU, data={m_out.data}")
1036 # error cases complete without stalling
1037 with m
.If(r1
.mmu_error
):
1038 #Display("combpleting MMU ld with error")
1041 # Slow ops (i.e. load miss)
1042 with m
.If(r1
.slow_valid
):
1043 #Display("completing MMU load miss, data={m_out.data}")
1046 def rams(self
, m
, r1
, early_req_row
, cache_out
, replace_way
):
1048 Generate a cache RAM for each way. This handles the normal
1049 reads, writes from reloads and the special store-hit update
1052 Note: the BRAMs have an extra read buffer, meaning the output
1053 is pipelined an extra cycle. This differs from the
1054 icache. The writeback logic needs to take that into
1055 account by using 1-cycle delayed signals for load hits.
1060 for i
in range(NUM_WAYS
):
1062 rd_addr
= Signal(ROW_BITS
)
1064 wr_addr
= Signal(ROW_BITS
)
1065 wr_data
= Signal(WB_DATA_BITS
)
1066 wr_sel
= Signal(ROW_SIZE
)
1067 wr_sel_m
= Signal(ROW_SIZE
)
1068 _d_out
= Signal(WB_DATA_BITS
)
1070 way
= CacheRam(ROW_BITS
, WB_DATA_BITS
, True)
1071 setattr(m
.submodules
, "cacheram_%d" % i
, way
)
1073 comb
+= way
.rd_en
.eq(do_read
)
1074 comb
+= way
.rd_addr
.eq(rd_addr
)
1075 comb
+= _d_out
.eq(way
.rd_data_o
)
1076 comb
+= way
.wr_sel
.eq(wr_sel_m
)
1077 comb
+= way
.wr_addr
.eq(wr_addr
)
1078 comb
+= way
.wr_data
.eq(wr_data
)
1081 comb
+= do_read
.eq(1)
1082 comb
+= rd_addr
.eq(early_req_row
)
1083 comb
+= cache_out
[i
].eq(_d_out
)
1087 # Defaults to wishbone read responses (cache refill)
1089 # For timing, the mux on wr_data/sel/addr is not
1090 # dependent on anything other than the current state.
1092 with m
.If(r1
.write_bram
):
1093 # Write store data to BRAM. This happens one
1094 # cycle after the store is in r0.
1095 comb
+= wr_data
.eq(r1
.req
.data
)
1096 comb
+= wr_sel
.eq(r1
.req
.byte_sel
)
1097 comb
+= wr_addr
.eq(get_row(r1
.req
.real_addr
))
1099 with m
.If(i
== r1
.req
.hit_way
):
1100 comb
+= do_write
.eq(1)
1102 # Otherwise, we might be doing a reload or a DCBZ
1104 comb
+= wr_data
.eq(0)
1106 comb
+= wr_data
.eq(wb_in
.dat
)
1107 comb
+= wr_addr
.eq(r1
.store_row
)
1108 comb
+= wr_sel
.eq(~
0) # all 1s
1110 with m
.If((r1
.state
== State
.RELOAD_WAIT_ACK
)
1111 & wb_in
.ack
& (replace_way
== i
)):
1112 comb
+= do_write
.eq(1)
1114 # Mask write selects with do_write since BRAM
1115 # doesn't have a global write-enable
1116 with m
.If(do_write
):
1117 comb
+= wr_sel_m
.eq(wr_sel
)
1119 # Cache hit synchronous machine for the easy case.
1120 # This handles load hits.
1121 # It also handles error cases (TLB miss, cache paradox)
1122 def dcache_fast_hit(self
, m
, req_op
, r0_valid
, r0
, r1
,
1123 req_hit_way
, req_index
, access_ok
,
1124 tlb_hit
, tlb_hit_way
, tlb_req_index
):
1129 with m
.If(req_op
!= Op
.OP_NONE
):
1130 #Display(f"op:{req_op} addr:{r0.req.addr} nc: {r0.req.nc}" \
1131 # f"idx:{req_index} tag:{req_tag} way: {req_hit_way}"
1135 with m
.If(r0_valid
):
1136 sync
+= r1
.mmu_req
.eq(r0
.mmu_req
)
1138 # Fast path for load/store hits.
1139 # Set signals for the writeback controls.
1140 sync
+= r1
.hit_way
.eq(req_hit_way
)
1141 sync
+= r1
.hit_index
.eq(req_index
)
1143 with m
.If(req_op
== Op
.OP_LOAD_HIT
):
1144 sync
+= r1
.hit_load_valid
.eq(1)
1146 sync
+= r1
.hit_load_valid
.eq(0)
1148 with m
.If((req_op
== Op
.OP_LOAD_HIT
) |
(req_op
== Op
.OP_STORE_HIT
)):
1149 sync
+= r1
.cache_hit
.eq(1)
1151 sync
+= r1
.cache_hit
.eq(0)
1153 with m
.If(req_op
== Op
.OP_BAD
):
1154 # Display(f"Signalling ld/st error valid_ra={valid_ra}"
1155 # f"rc_ok={rc_ok} perm_ok={perm_ok}"
1156 sync
+= r1
.ls_error
.eq(~r0
.mmu_req
)
1157 sync
+= r1
.mmu_error
.eq(r0
.mmu_req
)
1158 sync
+= r1
.cache_paradox
.eq(access_ok
)
1161 sync
+= r1
.ls_error
.eq(0)
1162 sync
+= r1
.mmu_error
.eq(0)
1163 sync
+= r1
.cache_paradox
.eq(0)
1165 with m
.If(req_op
== Op
.OP_STCX_FAIL
):
1168 sync
+= r1
.stcx_fail
.eq(0)
1170 # Record TLB hit information for updating TLB PLRU
1171 sync
+= r1
.tlb_hit
.eq(tlb_hit
)
1172 sync
+= r1
.tlb_hit_way
.eq(tlb_hit_way
)
1173 sync
+= r1
.tlb_hit_index
.eq(tlb_req_index
)
1175 # Memory accesses are handled by this state machine:
1177 # * Cache load miss/reload (in conjunction with "rams")
1178 # * Load hits for non-cachable forms
1179 # * Stores (the collision case is handled in "rams")
1181 # All wishbone requests generation is done here.
1182 # This machine operates at stage 1.
1183 def dcache_slow(self
, m
, r1
, use_forward1_next
, use_forward2_next
,
1184 cache_valid_bits
, r0
, replace_way
,
1185 req_hit_way
, req_same_tag
,
1186 r0_valid
, req_op
, cache_tag
, req_go
, ra
):
1192 req
= MemAccessRequest()
1194 adjust_acks
= Signal(3)
1195 stbs_done
= Signal()
1197 sync
+= r1
.use_forward1
.eq(use_forward1_next
)
1198 sync
+= r1
.forward_sel
.eq(0)
1200 with m
.If(use_forward1_next
):
1201 sync
+= r1
.forward_sel
.eq(r1
.req
.byte_sel
)
1202 with m
.Elif(use_forward2_next
):
1203 sync
+= r1
.forward_sel
.eq(r1
.forward_sel1
)
1205 sync
+= r1
.forward_data2
.eq(r1
.forward_data1
)
1206 with m
.If(r1
.write_bram
):
1207 sync
+= r1
.forward_data1
.eq(r1
.req
.data
)
1208 sync
+= r1
.forward_sel1
.eq(r1
.req
.byte_sel
)
1209 sync
+= r1
.forward_way1
.eq(r1
.req
.hit_way
)
1210 sync
+= r1
.forward_row1
.eq(get_row(r1
.req
.real_addr
))
1211 sync
+= r1
.forward_valid1
.eq(1)
1214 sync
+= r1
.forward_data1
.eq(0)
1216 sync
+= r1
.forward_data1
.eq(wb_in
.dat
)
1217 sync
+= r1
.forward_sel1
.eq(~
0) # all 1s
1218 sync
+= r1
.forward_way1
.eq(replace_way
)
1219 sync
+= r1
.forward_row1
.eq(r1
.store_row
)
1220 sync
+= r1
.forward_valid1
.eq(0)
1222 # One cycle pulses reset
1223 sync
+= r1
.slow_valid
.eq(0)
1224 sync
+= r1
.write_bram
.eq(0)
1225 sync
+= r1
.inc_acks
.eq(0)
1226 sync
+= r1
.dec_acks
.eq(0)
1228 sync
+= r1
.ls_valid
.eq(0)
1229 # complete tlbies and TLB loads in the third cycle
1230 sync
+= r1
.mmu_done
.eq(r0_valid
& (r0
.tlbie | r0
.tlbld
))
1232 with m
.If((req_op
== Op
.OP_LOAD_HIT
)
1233 |
(req_op
== Op
.OP_STCX_FAIL
)):
1234 with m
.If(~r0
.mmu_req
):
1235 sync
+= r1
.ls_valid
.eq(1)
1237 sync
+= r1
.mmu_done
.eq(1)
1239 with m
.If(r1
.write_tag
):
1240 # Store new tag in selected way
1241 for i
in range(NUM_WAYS
):
1242 with m
.If(i
== replace_way
):
1243 ct
= Signal(TAG_RAM_WIDTH
)
1244 comb
+= ct
.eq(cache_tag
[r1
.store_index
])
1245 comb
+= ct
.word_select(i
, TAG_WIDTH
).eq(r1
.reload_tag
)
1246 sync
+= cache_tag
[r1
.store_index
].eq(ct
)
1247 sync
+= r1
.store_way
.eq(replace_way
)
1248 sync
+= r1
.write_tag
.eq(0)
1250 # Take request from r1.req if there is one there,
1251 # else from req_op, ra, etc.
1253 comb
+= req
.eq(r1
.req
)
1255 comb
+= req
.op
.eq(req_op
)
1256 comb
+= req
.valid
.eq(req_go
)
1257 comb
+= req
.mmu_req
.eq(r0
.mmu_req
)
1258 comb
+= req
.dcbz
.eq(r0
.req
.dcbz
)
1259 comb
+= req
.real_addr
.eq(ra
)
1261 with m
.If(~r0
.req
.dcbz
):
1262 comb
+= req
.data
.eq(r0
.req
.data
)
1264 comb
+= req
.data
.eq(0)
1266 # Select all bytes for dcbz
1267 # and for cacheable loads
1268 with m
.If(r0
.req
.dcbz |
(r0
.req
.load
& ~r0
.req
.nc
)):
1269 comb
+= req
.byte_sel
.eq(~
0) # all 1s
1271 comb
+= req
.byte_sel
.eq(r0
.req
.byte_sel
)
1272 comb
+= req
.hit_way
.eq(req_hit_way
)
1273 comb
+= req
.same_tag
.eq(req_same_tag
)
1275 # Store the incoming request from r0,
1276 # if it is a slow request
1277 # Note that r1.full = 1 implies req_op = OP_NONE
1278 with m
.If((req_op
== Op
.OP_LOAD_MISS
)
1279 |
(req_op
== Op
.OP_LOAD_NC
)
1280 |
(req_op
== Op
.OP_STORE_MISS
)
1281 |
(req_op
== Op
.OP_STORE_HIT
)):
1282 sync
+= r1
.req
.eq(req
)
1283 sync
+= r1
.full
.eq(1)
1285 # Main state machine
1286 with m
.Switch(r1
.state
):
1288 with m
.Case(State
.IDLE
):
1289 # XXX check 'left downto. probably means len(r1.wb.adr)
1290 # r1.wb.adr <= req.real_addr(
1291 # r1.wb.adr'left downto 0
1293 sync
+= r1
.wb
.adr
.eq(req
.real_addr
)
1294 sync
+= r1
.wb
.sel
.eq(req
.byte_sel
)
1295 sync
+= r1
.wb
.dat
.eq(req
.data
)
1296 sync
+= r1
.dcbz
.eq(req
.dcbz
)
1298 # Keep track of our index and way
1299 # for subsequent stores.
1300 sync
+= r1
.store_index
.eq(get_index(req
.real_addr
))
1301 sync
+= r1
.store_row
.eq(get_row(req
.real_addr
))
1302 sync
+= r1
.end_row_ix
.eq(
1303 get_row_of_line(get_row(req
.real_addr
))
1305 sync
+= r1
.reload_tag
.eq(get_tag(req
.real_addr
))
1306 sync
+= r1
.req
.same_tag
.eq(1)
1308 with m
.If(req
.op
== Op
.OP_STORE_HIT
):
1309 sync
+= r1
.store_way
.eq(req
.hit_way
)
1311 # Reset per-row valid bits,
1312 # ready for handling OP_LOAD_MISS
1313 for i
in range(ROW_PER_LINE
):
1314 sync
+= r1
.rows_valid
[i
].eq(0)
1316 with m
.Switch(req
.op
):
1317 with m
.Case(Op
.OP_LOAD_HIT
):
1318 # stay in IDLE state
1321 with m
.Case(Op
.OP_LOAD_MISS
):
1322 #Display(f"cache miss real addr:" \
1323 # f"{req_real_addr}" \
1324 # f" idx:{get_index(req_real_addr)}" \
1325 # f" tag:{get_tag(req.real_addr)}")
1328 # Start the wishbone cycle
1329 sync
+= r1
.wb
.we
.eq(0)
1330 sync
+= r1
.wb
.cyc
.eq(1)
1331 sync
+= r1
.wb
.stb
.eq(1)
1333 # Track that we had one request sent
1334 sync
+= r1
.state
.eq(State
.RELOAD_WAIT_ACK
)
1335 sync
+= r1
.write_tag
.eq(1)
1337 with m
.Case(Op
.OP_LOAD_NC
):
1338 sync
+= r1
.wb
.cyc
.eq(1)
1339 sync
+= r1
.wb
.stb
.eq(1)
1340 sync
+= r1
.wb
.we
.eq(0)
1341 sync
+= r1
.state
.eq(State
.NC_LOAD_WAIT_ACK
)
1343 with m
.Case(Op
.OP_STORE_HIT
, Op
.OP_STORE_MISS
):
1344 with m
.If(~req
.dcbz
):
1345 sync
+= r1
.state
.eq(State
.STORE_WAIT_ACK
)
1346 sync
+= r1
.acks_pending
.eq(1)
1347 sync
+= r1
.full
.eq(0)
1348 sync
+= r1
.slow_valid
.eq(1)
1350 with m
.If(~req
.mmu_req
):
1351 sync
+= r1
.ls_valid
.eq(1)
1353 sync
+= r1
.mmu_done
.eq(1)
1355 with m
.If(req
.op
== Op
.OP_STORE_HIT
):
1356 sync
+= r1
.write_bram
.eq(1)
1358 sync
+= r1
.state
.eq(State
.RELOAD_WAIT_ACK
)
1360 with m
.If(req
.op
== Op
.OP_STORE_MISS
):
1361 sync
+= r1
.write_tag
.eq(1)
1363 sync
+= r1
.wb
.we
.eq(1)
1364 sync
+= r1
.wb
.cyc
.eq(1)
1365 sync
+= r1
.wb
.stb
.eq(1)
1367 # OP_NONE and OP_BAD do nothing
1368 # OP_BAD & OP_STCX_FAIL were
1369 # handled above already
1370 with m
.Case(Op
.OP_NONE
):
1372 with m
.Case(Op
.OP_BAD
):
1374 with m
.Case(Op
.OP_STCX_FAIL
):
1377 with m
.Case(State
.RELOAD_WAIT_ACK
):
1378 # Requests are all sent if stb is 0
1379 comb
+= stbs_done
.eq(~r1
.wb
.stb
)
1381 with m
.If(~wb_in
.stall
& ~stbs_done
):
1382 # That was the last word?
1383 # We are done sending.
1384 # Clear stb and set stbs_done
1385 # so we can handle an eventual
1386 # last ack on the same cycle.
1387 with m
.If(is_last_row_addr(
1388 r1
.wb
.adr
, r1
.end_row_ix
)):
1389 sync
+= r1
.wb
.stb
.eq(0)
1390 comb
+= stbs_done
.eq(0)
1392 # Calculate the next row address in the current cache line
1393 rarange
= r1
.wb
.adr
[ROW_OFF_BITS
: LINE_OFF_BITS
]
1394 sync
+= rarange
.eq(rarange
+ 1)
1396 # Incoming acks processing
1397 sync
+= r1
.forward_valid1
.eq(wb_in
.ack
)
1398 with m
.If(wb_in
.ack
):
1399 # XXX needs an Array bit-accessor here
1400 sync
+= r1
.rows_valid
[r1
.store_row
% ROW_PER_LINE
].eq(1)
1402 # If this is the data we were looking for,
1403 # we can complete the request next cycle.
1404 # Compare the whole address in case the
1405 # request in r1.req is not the one that
1406 # started this refill.
1407 with m
.If(r1
.full
& r1
.req
.same_tag
&
1408 ((r1
.dcbz
& r1
.req
.dcbz
) |
1409 (~r1
.dcbz
& (r1
.req
.op
== Op
.OP_LOAD_MISS
))) &
1410 (r1
.store_row
== get_row(r1
.req
.real_addr
))):
1411 sync
+= r1
.full
.eq(0)
1412 sync
+= r1
.slow_valid
.eq(1)
1413 with m
.If(~r1
.mmu_req
):
1414 sync
+= r1
.ls_valid
.eq(1)
1416 sync
+= r1
.mmu_done
.eq(1)
1417 sync
+= r1
.forward_sel
.eq(~
0) # all 1s
1418 sync
+= r1
.use_forward1
.eq(1)
1420 # Check for completion
1421 with m
.If(stbs_done
& is_last_row(r1
.store_row
,
1423 # Complete wishbone cycle
1424 sync
+= r1
.wb
.cyc
.eq(0)
1426 # Cache line is now valid
1427 cv
= Signal(INDEX_BITS
)
1428 sync
+= cv
.eq(cache_valid_bits
[r1
.store_index
])
1429 sync
+= cv
.bit_select(r1
.store_way
, 1).eq(1)
1430 sync
+= r1
.state
.eq(State
.IDLE
)
1432 # Increment store row counter
1433 sync
+= r1
.store_row
.eq(next_row(r1
.store_row
))
1435 with m
.Case(State
.STORE_WAIT_ACK
):
1436 comb
+= stbs_done
.eq(~r1
.wb
.stb
)
1437 comb
+= acks
.eq(r1
.acks_pending
)
1439 with m
.If(r1
.inc_acks
!= r1
.dec_acks
):
1440 with m
.If(r1
.inc_acks
):
1441 comb
+= adjust_acks
.eq(acks
+ 1)
1443 comb
+= adjust_acks
.eq(acks
- 1)
1445 comb
+= adjust_acks
.eq(acks
)
1447 sync
+= r1
.acks_pending
.eq(adjust_acks
)
1449 # Clear stb when slave accepted request
1450 with m
.If(~wb_in
.stall
):
1451 # See if there is another store waiting
1452 # to be done which is in the same real page.
1453 with m
.If(req
.valid
):
1454 ra
= req
.real_addr
[0:SET_SIZE_BITS
]
1455 sync
+= r1
.wb
.adr
[0:SET_SIZE_BITS
].eq(ra
)
1456 sync
+= r1
.wb
.dat
.eq(req
.data
)
1457 sync
+= r1
.wb
.sel
.eq(req
.byte_sel
)
1459 with m
.Elif((adjust_acks
< 7) & req
.same_tag
&
1460 ((req
.op
== Op
.OP_STORE_MISS
)
1461 |
(req
.op
== Op
.OP_STORE_HIT
))):
1462 sync
+= r1
.wb
.stb
.eq(1)
1463 comb
+= stbs_done
.eq(0)
1465 with m
.If(req
.op
== Op
.OP_STORE_HIT
):
1466 sync
+= r1
.write_bram
.eq(1)
1467 sync
+= r1
.full
.eq(0)
1468 sync
+= r1
.slow_valid
.eq(1)
1470 # Store requests never come from the MMU
1471 sync
+= r1
.ls_valid
.eq(1)
1472 comb
+= stbs_done
.eq(0)
1473 sync
+= r1
.inc_acks
.eq(1)
1475 sync
+= r1
.wb
.stb
.eq(0)
1476 comb
+= stbs_done
.eq(1)
1478 # Got ack ? See if complete.
1479 with m
.If(wb_in
.ack
):
1480 with m
.If(stbs_done
& (adjust_acks
== 1)):
1481 sync
+= r1
.state
.eq(State
.IDLE
)
1482 sync
+= r1
.wb
.cyc
.eq(0)
1483 sync
+= r1
.wb
.stb
.eq(0)
1484 sync
+= r1
.dec_acks
.eq(1)
1486 with m
.Case(State
.NC_LOAD_WAIT_ACK
):
1487 # Clear stb when slave accepted request
1488 with m
.If(~wb_in
.stall
):
1489 sync
+= r1
.wb
.stb
.eq(0)
1491 # Got ack ? complete.
1492 with m
.If(wb_in
.ack
):
1493 sync
+= r1
.state
.eq(State
.IDLE
)
1494 sync
+= r1
.full
.eq(0)
1495 sync
+= r1
.slow_valid
.eq(1)
1497 with m
.If(~r1
.mmu_req
):
1498 sync
+= r1
.ls_valid
.eq(1)
1500 sync
+= r1
.mmu_done
.eq(1)
1502 sync
+= r1
.forward_sel
.eq(~
0) # all 1s
1503 sync
+= r1
.use_forward1
.eq(1)
1504 sync
+= r1
.wb
.cyc
.eq(0)
1505 sync
+= r1
.wb
.stb
.eq(0)
1507 def dcache_log(self
, m
, r1
, valid_ra
, tlb_hit_way
, stall_out
):
1510 d_out
, wb_in
, log_out
= self
.d_out
, self
.wb_in
, self
.log_out
1512 sync
+= log_out
.eq(Cat(r1
.state
[:3], valid_ra
, tlb_hit_way
[:3],
1513 stall_out
, req_op
[:3], d_out
.valid
, d_out
.error
,
1514 r1
.wb
.cyc
, r1
.wb
.stb
, wb_in
.ack
, wb_in
.stall
,
1517 def elaborate(self
, platform
):
1522 # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
1523 cache_tags
= CacheTagArray()
1524 cache_tag_set
= Signal(TAG_RAM_WIDTH
)
1525 cache_valid_bits
= CacheValidBitsArray()
1527 # TODO attribute ram_style : string;
1528 # TODO attribute ram_style of cache_tags : signal is "distributed";
1530 """note: these are passed to nmigen.hdl.Memory as "attributes".
1531 don't know how, just that they are.
1533 dtlb_valid_bits
= TLBValidBitsArray()
1534 dtlb_tags
= TLBTagsArray()
1535 dtlb_ptes
= TLBPtesArray()
1536 # TODO attribute ram_style of
1537 # dtlb_tags : signal is "distributed";
1538 # TODO attribute ram_style of
1539 # dtlb_ptes : signal is "distributed";
1546 reservation
= Reservation()
1548 # Async signals on incoming request
1549 req_index
= Signal(INDEX_BITS
)
1550 req_row
= Signal(ROW_BITS
)
1551 req_hit_way
= Signal(WAY_BITS
)
1552 req_tag
= Signal(TAG_BITS
)
1554 req_data
= Signal(64)
1555 req_same_tag
= Signal()
1558 early_req_row
= Signal(ROW_BITS
)
1560 cancel_store
= Signal()
1562 clear_rsrv
= Signal()
1567 use_forward1_next
= Signal()
1568 use_forward2_next
= Signal()
1570 cache_out
= CacheRamOut()
1572 plru_victim
= PLRUOut()
1573 replace_way
= Signal(WAY_BITS
)
1575 # Wishbone read/write/cache write formatting signals
1579 tlb_tag_way
= Signal(TLB_TAG_WAY_BITS
)
1580 tlb_pte_way
= Signal(TLB_PTE_WAY_BITS
)
1581 tlb_valid_way
= Signal(TLB_NUM_WAYS
)
1582 tlb_req_index
= Signal(TLB_SET_BITS
)
1584 tlb_hit_way
= Signal(TLB_WAY_BITS
)
1585 pte
= Signal(TLB_PTE_BITS
)
1586 ra
= Signal(REAL_ADDR_BITS
)
1588 perm_attr
= PermAttr()
1591 access_ok
= Signal()
1593 tlb_plru_victim
= TLBPLRUOut()
1595 # we don't yet handle collisions between loadstore1 requests
1597 comb
+= self
.m_out
.stall
.eq(0)
1599 # Hold off the request in r0 when r1 has an uncompleted request
1600 comb
+= r0_stall
.eq(r0_full
& r1
.full
)
1601 comb
+= r0_valid
.eq(r0_full
& ~r1
.full
)
1602 comb
+= self
.stall_out
.eq(r0_stall
)
1604 # Wire up wishbone request latch out of stage 1
1605 comb
+= self
.wb_out
.eq(r1
.wb
)
1607 # call sub-functions putting everything together, using shared
1608 # signals established above
1609 self
.stage_0(m
, r0
, r1
, r0_full
)
1610 self
.tlb_read(m
, r0_stall
, tlb_valid_way
,
1611 tlb_tag_way
, tlb_pte_way
, dtlb_valid_bits
,
1612 dtlb_tags
, dtlb_ptes
)
1613 self
.tlb_search(m
, tlb_req_index
, r0
, r0_valid
,
1614 tlb_valid_way
, tlb_tag_way
, tlb_hit_way
,
1615 tlb_pte_way
, pte
, tlb_hit
, valid_ra
, perm_attr
, ra
)
1616 self
.tlb_update(m
, r0_valid
, r0
, dtlb_valid_bits
, tlb_req_index
,
1617 tlb_hit_way
, tlb_hit
, tlb_plru_victim
, tlb_tag_way
,
1618 dtlb_tags
, tlb_pte_way
, dtlb_ptes
)
1619 self
.maybe_plrus(m
, r1
, plru_victim
)
1620 self
.cache_tag_read(m
, r0_stall
, req_index
, cache_tag_set
, cache_tags
)
1621 self
.dcache_request(m
, r0
, ra
, req_index
, req_row
, req_tag
,
1622 r0_valid
, r1
, cache_valid_bits
, replace_way
,
1623 use_forward1_next
, use_forward2_next
,
1624 req_hit_way
, plru_victim
, rc_ok
, perm_attr
,
1625 valid_ra
, perm_ok
, access_ok
, req_op
, req_go
,
1627 tlb_hit
, tlb_hit_way
, tlb_valid_way
, cache_tag_set
,
1628 cancel_store
, req_same_tag
, r0_stall
, early_req_row
)
1629 self
.reservation_comb(m
, cancel_store
, set_rsrv
, clear_rsrv
,
1630 r0_valid
, r0
, reservation
)
1631 self
.reservation_reg(m
, r0_valid
, access_ok
, set_rsrv
, clear_rsrv
,
1633 self
.writeback_control(m
, r1
, cache_out
)
1634 self
.rams(m
, r1
, early_req_row
, cache_out
, replace_way
)
1635 self
.dcache_fast_hit(m
, req_op
, r0_valid
, r0
, r1
,
1636 req_hit_way
, req_index
, access_ok
,
1637 tlb_hit
, tlb_hit_way
, tlb_req_index
)
1638 self
.dcache_slow(m
, r1
, use_forward1_next
, use_forward2_next
,
1639 cache_valid_bits
, r0
, replace_way
,
1640 req_hit_way
, req_same_tag
,
1641 r0_valid
, req_op
, cache_tags
, req_go
, ra
)
1642 #self.dcache_log(m, r1, valid_ra, tlb_hit_way, stall_out)
1649 # entity dcache_tb is
1652 # architecture behave of dcache_tb is
1653 # signal clk : std_ulogic;
1654 # signal rst : std_ulogic;
1656 # signal d_in : Loadstore1ToDcacheType;
1657 # signal d_out : DcacheToLoadstore1Type;
1659 # signal m_in : MmuToDcacheType;
1660 # signal m_out : DcacheToMmuType;
1662 # signal wb_bram_in : wishbone_master_out;
1663 # signal wb_bram_out : wishbone_slave_out;
1665 # constant clk_period : time := 10 ns;
1667 # dcache0: entity work.dcache
1680 # wishbone_out => wb_bram_in,
1681 # wishbone_in => wb_bram_out
1684 # -- BRAM Memory slave
1685 # bram0: entity work.wishbone_bram_wrapper
1687 # MEMORY_SIZE => 1024,
1688 # RAM_INIT_FILE => "icache_test.bin"
1693 # wishbone_in => wb_bram_in,
1694 # wishbone_out => wb_bram_out
1697 # clk_process: process
1700 # wait for clk_period/2;
1702 # wait for clk_period/2;
1705 # rst_process: process
1708 # wait for 2*clk_period;
1716 # d_in.valid <= '0';
1719 # d_in.addr <= (others => '0');
1720 # d_in.data <= (others => '0');
1721 # m_in.valid <= '0';
1722 # m_in.addr <= (others => '0');
1723 # m_in.pte <= (others => '0');
1725 # wait for 4*clk_period;
1726 # wait until rising_edge(clk);
1728 # -- Cacheable read of address 4
1731 # d_in.addr <= x"0000000000000004";
1732 # d_in.valid <= '1';
1733 # wait until rising_edge(clk);
1734 # d_in.valid <= '0';
1736 # wait until rising_edge(clk) and d_out.valid = '1';
1737 # assert d_out.data = x"0000000100000000"
1738 # report "data @" & to_hstring(d_in.addr) &
1739 # "=" & to_hstring(d_out.data) &
1740 # " expected 0000000100000000"
1742 # -- wait for clk_period;
1744 # -- Cacheable read of address 30
1747 # d_in.addr <= x"0000000000000030";
1748 # d_in.valid <= '1';
1749 # wait until rising_edge(clk);
1750 # d_in.valid <= '0';
1752 # wait until rising_edge(clk) and d_out.valid = '1';
1753 # assert d_out.data = x"0000000D0000000C"
1754 # report "data @" & to_hstring(d_in.addr) &
1755 # "=" & to_hstring(d_out.data) &
1756 # " expected 0000000D0000000C"
1759 # -- Non-cacheable read of address 100
1762 # d_in.addr <= x"0000000000000100";
1763 # d_in.valid <= '1';
1764 # wait until rising_edge(clk);
1765 # d_in.valid <= '0';
1766 # wait until rising_edge(clk) and d_out.valid = '1';
1767 # assert d_out.data = x"0000004100000040"
1768 # report "data @" & to_hstring(d_in.addr) &
1769 # "=" & to_hstring(d_out.data) &
1770 # " expected 0000004100000040"
1773 # wait until rising_edge(clk);
1774 # wait until rising_edge(clk);
1775 # wait until rising_edge(clk);
1776 # wait until rising_edge(clk);
1781 def dcache_sim(dut
):
1783 yield dut
.d_in
.valid
.eq(0)
1784 yield dut
.d_in
.load
.eq(0)
1785 yield dut
.d_in
.nc
.eq(0)
1786 yield dut
.d_in
.adrr
.eq(0)
1787 yield dut
.d_in
.data
.eq(0)
1788 yield dut
.m_in
.valid
.eq(0)
1789 yield dut
.m_in
.addr
.eq(0)
1790 yield dut
.m_in
.pte
.eq(0)
1791 # wait 4 * clk_period
1796 # wait_until rising_edge(clk)
1798 # Cacheable read of address 4
1799 yield dut
.d_in
.load
.eq(1)
1800 yield dut
.d_in
.nc
.eq(0)
1801 yield dut
.d_in
.addr
.eq(Const(0x0000000000000004, 64))
1802 yield dut
.d_in
.valid
.eq(1)
1803 # wait-until rising_edge(clk)
1805 yield dut
.d_in
.valid
.eq(0)
1807 while not (yield dut
.d_out
.valid
):
1809 assert dut
.d_out
.data
== 0x0000000100000000, \
1810 f
"data @ {dut.d_in.addr}={dut.d_in.data} expected 0000000100000000"
1813 # Cacheable read of address 30
1814 yield dut
.d_in
.load
.eq(1)
1815 yield dut
.d_in
.nc
.eq(0)
1816 yield dut
.d_in
.addr
.eq(Const(0x0000000000000030, 64))
1817 yield dut
.d_in
.valid
.eq(1)
1819 yield dut
.d_in
.valid
.eq(0)
1821 while not (yield dut
.d_out
.valid
):
1823 assert dut
.d_out
.data
== 0x0000000D0000000C, \
1824 f
"data @{dut.d_in.addr}={dut.d_out.data} expected 0000000D0000000C"
1826 # Non-cacheable read of address 100
1827 yield dut
.d_in
.load
.eq(1)
1828 yield dut
.d_in
.nc
.eq(1)
1829 yield dut
.d_in
.addr
.eq(Const(0x0000000000000100, 64))
1830 yield dut
.d_in
.valid
.eq(1)
1832 yield dut
.d_in
.valid
.eq(0)
1834 while not (yield dut
.d_out
.valid
):
1836 assert dut
.d_out
.data
== 0x0000004100000040, \
1837 f
"data @ {dut.d_in.addr}={dut.d_out.data} expected 0000004100000040"
1847 vl
= rtlil
.convert(dut
, ports
=[])
1848 with
open("test_dcache.il", "w") as f
:
1851 #run_simulation(dut, dcache_sim(), vcd_name='test_dcache.vcd')
1853 if __name__
== '__main__':