src/soc/experiment/dcache.py

   1 """DCache
   2
   3 based on Anton Blanchard microwatt dcache.vhdl
   4
   5 """
   6
   7 from enum import Enum, unique
   8
   9 from nmigen import Module, Signal, Elaboratable, Cat, Repl, Array, Const
  10 from nmigen.cli import main
  11 from nmutil.iocontrol import RecordObject
  12 from nmigen.utils import log2_int
  13 from nmigen.cli import rtlil
  14
  15
  16 from soc.experiment.mem_types import (LoadStore1ToDCacheType,
  17                                      DCacheToLoadStore1Type,
  18                                      MMUToDCacheType,
  19                                      DCacheToMMUType)
  20
  21 from soc.experiment.wb_types import (WB_ADDR_BITS, WB_DATA_BITS, WB_SEL_BITS,
  22                                 WBAddrType, WBDataType, WBSelType,
  23                                 WBMasterOut, WBSlaveOut,
  24                                 WBMasterOutVector, WBSlaveOutVector,
  25                                 WBIOMasterOut, WBIOSlaveOut)
  26
  27 from soc.experiment.cache_ram import CacheRam
  28 from soc.experiment.plru import PLRU
  29
  30
  31 # TODO: make these parameters of DCache at some point
  32 LINE_SIZE = 64    # Line size in bytes
  33 NUM_LINES = 32    # Number of lines in a set
  34 NUM_WAYS = 4      # Number of ways
  35 TLB_SET_SIZE = 64 # L1 DTLB entries per set
  36 TLB_NUM_WAYS = 2  # L1 DTLB number of sets
  37 TLB_LG_PGSZ = 12  # L1 DTLB log_2(page_size)
  38 LOG_LENGTH = 0    # Non-zero to enable log data collection
  39
  40 # BRAM organisation: We never access more than
  41 #     -- WB_DATA_BITS at a time so to save
  42 #     -- resources we make the array only that wide, and
  43 #     -- use consecutive indices for to make a cache "line"
  44 #     --
  45 #     -- ROW_SIZE is the width in bytes of the BRAM
  46 #     -- (based on WB, so 64-bits)
  47 ROW_SIZE = WB_DATA_BITS // 8;
  48
  49 # ROW_PER_LINE is the number of row (wishbone
  50 # transactions) in a line
  51 ROW_PER_LINE = LINE_SIZE // ROW_SIZE
  52
  53 # BRAM_ROWS is the number of rows in BRAM needed
  54 # to represent the full dcache
  55 BRAM_ROWS = NUM_LINES * ROW_PER_LINE
  56
  57
  58 # Bit fields counts in the address
  59
  60 # REAL_ADDR_BITS is the number of real address
  61 # bits that we store
  62 REAL_ADDR_BITS = 56
  63
  64 # ROW_BITS is the number of bits to select a row
  65 ROW_BITS = log2_int(BRAM_ROWS)
  66
  67 # ROW_LINE_BITS is the number of bits to select
  68 # a row within a line
  69 ROW_LINE_BITS = log2_int(ROW_PER_LINE)
  70
  71 # LINE_OFF_BITS is the number of bits for
  72 # the offset in a cache line
  73 LINE_OFF_BITS = log2_int(LINE_SIZE)
  74
  75 # ROW_OFF_BITS is the number of bits for
  76 # the offset in a row
  77 ROW_OFF_BITS = log2_int(ROW_SIZE)
  78
  79 # INDEX_BITS is the number if bits to
  80 # select a cache line
  81 INDEX_BITS = log2_int(NUM_LINES)
  82
  83 # SET_SIZE_BITS is the log base 2 of the set size
  84 SET_SIZE_BITS = LINE_OFF_BITS + INDEX_BITS
  85
  86 # TAG_BITS is the number of bits of
  87 # the tag part of the address
  88 TAG_BITS = REAL_ADDR_BITS - SET_SIZE_BITS
  89
  90 # TAG_WIDTH is the width in bits of each way of the tag RAM
  91 TAG_WIDTH = TAG_BITS + 7 - ((TAG_BITS + 7) % 8)
  92
  93 # WAY_BITS is the number of bits to select a way
  94 WAY_BITS = log2_int(NUM_WAYS)
  95
  96 # Example of layout for 32 lines of 64 bytes:
  97 #
  98 # ..  tag    |index|  line  |
  99 # ..         |   row   |    |
 100 # ..         |     |---|    | ROW_LINE_BITS  (3)
 101 # ..         |     |--- - --| LINE_OFF_BITS (6)
 102 # ..         |         |- --| ROW_OFF_BITS  (3)
 103 # ..         |----- ---|    | ROW_BITS      (8)
 104 # ..         |-----|        | INDEX_BITS    (5)
 105 # .. --------|              | TAG_BITS      (45)
 106
 107 TAG_RAM_WIDTH = TAG_WIDTH * NUM_WAYS
 108
 109 def CacheTagArray():
 110     return Array(Signal(TAG_RAM_WIDTH) for x in range(NUM_LINES))
 111
 112 def CacheValidBitsArray():
 113     return Array(Signal(INDEX_BITS) for x in range(NUM_LINES))
 114
 115 def RowPerLineValidArray():
 116     return Array(Signal() for x in range(ROW_PER_LINE))
 117
 118 # L1 TLB
 119 TLB_SET_BITS     = log2_int(TLB_SET_SIZE)
 120 TLB_WAY_BITS     = log2_int(TLB_NUM_WAYS)
 121 TLB_EA_TAG_BITS  = 64 - (TLB_LG_PGSZ + TLB_SET_BITS)
 122 TLB_TAG_WAY_BITS = TLB_NUM_WAYS * TLB_EA_TAG_BITS
 123 TLB_PTE_BITS     = 64
 124 TLB_PTE_WAY_BITS = TLB_NUM_WAYS * TLB_PTE_BITS;
 125
 126 assert (LINE_SIZE % ROW_SIZE) == 0, "LINE_SIZE not multiple of ROW_SIZE"
 127 assert (LINE_SIZE % 2) == 0, "LINE_SIZE not power of 2"
 128 assert (NUM_LINES % 2) == 0, "NUM_LINES not power of 2"
 129 assert (ROW_PER_LINE % 2) == 0, "ROW_PER_LINE not power of 2"
 130 assert ROW_BITS == (INDEX_BITS + ROW_LINE_BITS), "geometry bits don't add up"
 131 assert (LINE_OFF_BITS == ROW_OFF_BITS + ROW_LINE_BITS), \
 132         "geometry bits don't add up"
 133 assert REAL_ADDR_BITS == (TAG_BITS + INDEX_BITS + LINE_OFF_BITS), \
 134         "geometry bits don't add up"
 135 assert REAL_ADDR_BITS == (TAG_BITS + ROW_BITS + ROW_OFF_BITS), \
 136          "geometry bits don't add up"
 137 assert 64 == WB_DATA_BITS, "Can't yet handle wb width that isn't 64-bits"
 138 assert SET_SIZE_BITS <= TLB_LG_PGSZ, "Set indexed by virtual address"
 139
 140
 141 def TLBValidBitsArray():
 142     return Array(Signal(TLB_NUM_WAYS) for x in range(TLB_SET_SIZE))
 143
 144 def TLBTagsArray():
 145     return Array(Signal(TLB_TAG_WAY_BITS) for x in range (TLB_SET_SIZE))
 146
 147 def TLBPtesArray():
 148     return Array(Signal(TLB_PTE_WAY_BITS) for x in range(TLB_SET_SIZE))
 149
 150 def HitWaySet():
 151     return Array(Signal(NUM_WAYS) for x in range(TLB_NUM_WAYS))
 152
 153 # Cache RAM interface
 154 def CacheRamOut():
 155     return Array(Signal(WB_DATA_BITS) for x in range(NUM_WAYS))
 156
 157 # PLRU output interface
 158 def PLRUOut():
 159     return Array(Signal(WAY_BITS) for x in range(NUM_LINES))
 160
 161 # TLB PLRU output interface
 162 def TLBPLRUOut():
 163     return Array(Signal(TLB_WAY_BITS) for x in range(TLB_SET_SIZE))
 164
 165 # Helper functions to decode incoming requests
 166 #
 167 # Return the cache line index (tag index) for an address
 168 def get_index(addr):
 169     return addr[LINE_OFF_BITS:SET_SIZE_BITS]
 170
 171 # Return the cache row index (data memory) for an address
 172 def get_row(addr):
 173     return addr[ROW_OFF_BITS:SET_SIZE_BITS]
 174
 175 # Return the index of a row within a line
 176 def get_row_of_line(row):
 177     return row[:ROW_LINE_BITS]
 178
 179 # Returns whether this is the last row of a line
 180 def is_last_row_addr(addr, last):
 181     return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
 182
 183 # Returns whether this is the last row of a line
 184 def is_last_row(row, last):
 185     return get_row_of_line(row) == last
 186
 187 # Return the next row in the current cache line. We use a
 188 # dedicated function in order to limit the size of the
 189 # generated adder to be only the bits within a cache line
 190 # (3 bits with default settings)
 191 def next_row(row):
 192     row_v = row[0:ROW_LINE_BITS] + 1
 193     return Cat(row_v[:ROW_LINE_BITS], row[ROW_LINE_BITS:])
 194
 195 # Get the tag value from the address
 196 def get_tag(addr):
 197     return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
 198
 199 # Read a tag from a tag memory row
 200 def read_tag(way, tagset):
 201     return tagset[way *TAG_WIDTH:way * TAG_WIDTH + TAG_BITS]
 202
 203 # Read a TLB tag from a TLB tag memory row
 204 def read_tlb_tag(way, tags):
 205     return tags.word_select(way, TLB_EA_TAG_BITS)
 206
 207 # Write a TLB tag to a TLB tag memory row
 208 def write_tlb_tag(way, tags, tag):
 209     return read_tlb_tag(way, tags).eq(tag)
 210
 211 # Read a PTE from a TLB PTE memory row
 212 def read_tlb_pte(way, ptes):
 213     return ptes.word_select(way, TLB_PTE_BITS)
 214
 215 def write_tlb_pte(way, ptes,newpte):
 216     return read_tlb_pte(way, ptes).eq(newpte)
 217
 218
 219 # Record for storing permission, attribute, etc. bits from a PTE
 220 class PermAttr(RecordObject):
 221     def __init__(self):
 222         super().__init__()
 223         self.reference = Signal()
 224         self.changed   = Signal()
 225         self.nocache   = Signal()
 226         self.priv      = Signal()
 227         self.rd_perm   = Signal()
 228         self.wr_perm   = Signal()
 229
 230
 231 def extract_perm_attr(pte):
 232     pa = PermAttr()
 233     pa.reference = pte[8]
 234     pa.changed   = pte[7]
 235     pa.nocache   = pte[5]
 236     pa.priv      = pte[3]
 237     pa.rd_perm   = pte[2]
 238     pa.wr_perm   = pte[1]
 239     return pa;
 240
 241
 242 # Type of operation on a "valid" input
 243 @unique
 244 class Op(Enum):
 245     OP_NONE       = 0
 246     OP_BAD        = 1 # NC cache hit, TLB miss, prot/RC failure
 247     OP_STCX_FAIL  = 2 # conditional store w/o reservation
 248     OP_LOAD_HIT   = 3 # Cache hit on load
 249     OP_LOAD_MISS  = 4 # Load missing cache
 250     OP_LOAD_NC    = 5 # Non-cachable load
 251     OP_STORE_HIT  = 6 # Store hitting cache
 252     OP_STORE_MISS = 7 # Store missing cache
 253
 254
 255 # Cache state machine
 256 @unique
 257 class State(Enum):
 258     IDLE             = 0 # Normal load hit processing
 259     RELOAD_WAIT_ACK  = 1 # Cache reload wait ack
 260     STORE_WAIT_ACK   = 2 # Store wait ack
 261     NC_LOAD_WAIT_ACK = 3 # Non-cachable load wait ack
 262
 263
 264 # Dcache operations:
 265 #
 266 # In order to make timing, we use the BRAMs with
 267 # an output buffer, which means that the BRAM
 268 # output is delayed by an extra cycle.
 269 #
 270 # Thus, the dcache has a 2-stage internal pipeline
 271 # for cache hits with no stalls.
 272 #
 273 # All other operations are handled via stalling
 274 # in the first stage.
 275 #
 276 # The second stage can thus complete a hit at the same
 277 # time as the first stage emits a stall for a complex op.
 278 #
 279 # Stage 0 register, basically contains just the latched request
 280
 281 class RegStage0(RecordObject):
 282     def __init__(self):
 283         super().__init__()
 284         self.req     = LoadStore1ToDCacheType()
 285         self.tlbie   = Signal()
 286         self.doall   = Signal()
 287         self.tlbld   = Signal()
 288         self.mmu_req = Signal() # indicates source of request
 289
 290
 291 class MemAccessRequest(RecordObject):
 292     def __init__(self):
 293         super().__init__()
 294         self.op        = Signal(Op)
 295         self.valid     = Signal()
 296         self.dcbz      = Signal()
 297         self.real_addr = Signal(REAL_ADDR_BITS)
 298         self.data      = Signal(64)
 299         self.byte_sel  = Signal(8)
 300         self.hit_way   = Signal(WAY_BITS)
 301         self.same_tag  = Signal()
 302         self.mmu_req   = Signal()
 303
 304
 305 # First stage register, contains state for stage 1 of load hits
 306 # and for the state machine used by all other operations
 307 class RegStage1(RecordObject):
 308     def __init__(self):
 309         super().__init__()
 310         # Info about the request
 311         self.full             = Signal() # have uncompleted request
 312         self.mmu_req          = Signal() # request is from MMU
 313         self.req              = MemAccessRequest()
 314
 315         # Cache hit state
 316         self.hit_way          = Signal(WAY_BITS)
 317         self.hit_load_valid   = Signal()
 318         self.hit_index        = Signal(NUM_LINES)
 319         self.cache_hit        = Signal()
 320
 321         # TLB hit state
 322         self.tlb_hit          = Signal()
 323         self.tlb_hit_way      = Signal(TLB_NUM_WAYS)
 324         self.tlb_hit_index    = Signal(TLB_WAY_BITS)
 325
 326         # 2-stage data buffer for data forwarded from writes to reads
 327         self.forward_data1    = Signal(64)
 328         self.forward_data2    = Signal(64)
 329         self.forward_sel1     = Signal(8)
 330         self.forward_valid1   = Signal()
 331         self.forward_way1     = Signal(WAY_BITS)
 332         self.forward_row1     = Signal(ROW_BITS)
 333         self.use_forward1     = Signal()
 334         self.forward_sel      = Signal(8)
 335
 336         # Cache miss state (reload state machine)
 337         self.state            = Signal(State)
 338         self.dcbz             = Signal()
 339         self.write_bram       = Signal()
 340         self.write_tag        = Signal()
 341         self.slow_valid       = Signal()
 342         self.wb               = WBMasterOut()
 343         self.reload_tag       = Signal(TAG_BITS)
 344         self.store_way        = Signal(WAY_BITS)
 345         self.store_row        = Signal(ROW_BITS)
 346         self.store_index      = Signal(INDEX_BITS)
 347         self.end_row_ix       = Signal(log2_int(ROW_LINE_BITS, False))
 348         self.rows_valid       = RowPerLineValidArray()
 349         self.acks_pending     = Signal(3)
 350         self.inc_acks         = Signal()
 351         self.dec_acks         = Signal()
 352
 353         # Signals to complete (possibly with error)
 354         self.ls_valid         = Signal()
 355         self.ls_error         = Signal()
 356         self.mmu_done         = Signal()
 357         self.mmu_error        = Signal()
 358         self.cache_paradox    = Signal()
 359
 360         # Signal to complete a failed stcx.
 361         self.stcx_fail        = Signal()
 362
 363
 364 # Reservation information
 365 class Reservation(RecordObject):
 366     def __init__(self):
 367         super().__init__()
 368         self.valid = Signal()
 369         self.addr  = Signal(64-LINE_OFF_BITS)
 370
 371
 372 class DCache(Elaboratable):
 373     """Set associative dcache write-through
 374     TODO (in no specific order):
 375     * See list in icache.vhdl
 376     * Complete load misses on the cycle when WB data comes instead of
 377       at the end of line (this requires dealing with requests coming in
 378       while not idle...)
 379     """
 380     def __init__(self):
 381         self.d_in      = LoadStore1ToDCacheType()
 382         self.d_out     = DCacheToLoadStore1Type()
 383
 384         self.m_in      = MMUToDCacheType()
 385         self.m_out     = DCacheToMMUType()
 386
 387         self.stall_out = Signal()
 388
 389         self.wb_out    = WBMasterOut()
 390         self.wb_in     = WBSlaveOut()
 391
 392         self.log_out   = Signal(20)
 393
 394     def stage_0(self, m, r0, r1, r0_full):
 395         """Latch the request in r0.req as long as we're not stalling
 396         """
 397         comb = m.d.comb
 398         sync = m.d.sync
 399         d_in, d_out, m_in = self.d_in, self.d_out, self.m_in
 400
 401         r = RegStage0()
 402
 403         # TODO, this goes in unit tests and formal proofs
 404         with m.If(~(d_in.valid & m_in.valid)):
 405             #sync += Display("request collision loadstore vs MMU")
 406             pass
 407
 408         with m.If(m_in.valid):
 409             sync += r.req.valid.eq(1)
 410             sync += r.req.load.eq(~(m_in.tlbie | m_in.tlbld))
 411             sync += r.req.dcbz.eq(0)
 412             sync += r.req.nc.eq(0)
 413             sync += r.req.reserve.eq(0)
 414             sync += r.req.virt_mode.eq(1)
 415             sync += r.req.priv_mode.eq(1)
 416             sync += r.req.addr.eq(m_in.addr)
 417             sync += r.req.data.eq(m_in.pte)
 418             sync += r.req.byte_sel.eq(~0) # Const -1 sets all to 0b111....
 419             sync += r.tlbie.eq(m_in.tlbie)
 420             sync += r.doall.eq(m_in.doall)
 421             sync += r.tlbld.eq(m_in.tlbld)
 422             sync += r.mmu_req.eq(1)
 423         with m.Else():
 424             sync += r.req.eq(d_in)
 425             sync += r.tlbie.eq(0)
 426             sync += r.doall.eq(0)
 427             sync += r.tlbld.eq(0)
 428             sync += r.mmu_req.eq(0)
 429             with m.If(~(r1.full & r0_full)):
 430                 sync += r0.eq(r)
 431                 sync += r0_full.eq(r.req.valid)
 432
 433     def tlb_read(self, m, r0_stall, tlb_valid_way,
 434                  tlb_tag_way, tlb_pte_way, dtlb_valid_bits,
 435                  dtlb_tags, dtlb_ptes):
 436         """TLB
 437         Operates in the second cycle on the request latched in r0.req.
 438         TLB updates write the entry at the end of the second cycle.
 439         """
 440         comb = m.d.comb
 441         sync = m.d.sync
 442         m_in, d_in = self.m_in, self.d_in
 443
 444         index    = Signal(TLB_SET_BITS)
 445         addrbits = Signal(TLB_SET_BITS)
 446
 447         amin = TLB_LG_PGSZ
 448         amax = TLB_LG_PGSZ + TLB_SET_BITS
 449
 450         with m.If(m_in.valid):
 451             comb += addrbits.eq(m_in.addr[amin : amax])
 452         with m.Else():
 453             comb += addrbits.eq(d_in.addr[amin : amax])
 454         comb += index.eq(addrbits)
 455
 456         # If we have any op and the previous op isn't finished,
 457         # then keep the same output for next cycle.
 458         with m.If(~r0_stall):
 459             sync += tlb_valid_way.eq(dtlb_valid_bits[index])
 460             sync += tlb_tag_way.eq(dtlb_tags[index])
 461             sync += tlb_pte_way.eq(dtlb_ptes[index])
 462
 463     def maybe_tlb_plrus(self, m, r1, tlb_plru_victim, acc, acc_en, lru):
 464         """Generate TLB PLRUs
 465         """
 466         comb = m.d.comb
 467         sync = m.d.sync
 468
 469         with m.If(TLB_NUM_WAYS > 1):
 470             for i in range(TLB_SET_SIZE):
 471                 # TLB PLRU interface
 472                 tlb_plru        = PLRU(TLB_WAY_BITS)
 473                 setattr(m.submodules, "maybe_plru_%d" % i, tlb_plru)
 474                 tlb_plru_acc    = Signal(TLB_WAY_BITS)
 475                 tlb_plru_acc_en = Signal()
 476                 tlb_plru_out    = Signal(TLB_WAY_BITS)
 477
 478                 comb += tlb_plru.acc.eq(tlb_plru_acc)
 479                 comb += tlb_plru.acc_en.eq(tlb_plru_acc_en)
 480                 comb += tlb_plru.lru.eq(tlb_plru_out)
 481
 482                 # PLRU interface
 483                 with m.If(r1.tlb_hit_index == i):
 484                     comb += tlb_plru.acc_en.eq(r1.tlb_hit)
 485                 with m.Else():
 486                     comb += tlb_plru.acc_en.eq(0)
 487                 comb += tlb_plru.acc.eq(r1.tlb_hit_way)
 488
 489                 comb += tlb_plru_victim[i].eq(tlb_plru.lru)
 490
 491     def tlb_search(self, m, tlb_req_index, r0, r0_valid,
 492                    tlb_valid_way, tlb_tag_way, tlb_hit_way,
 493                    tlb_pte_way, pte, tlb_hit, valid_ra, perm_attr, ra):
 494
 495         comb = m.d.comb
 496         sync = m.d.sync
 497
 498         hitway = Signal(TLB_WAY_BITS)
 499         hit    = Signal()
 500         eatag  = Signal(TLB_EA_TAG_BITS)
 501
 502         TLB_LG_END = TLB_LG_PGSZ + TLB_SET_BITS
 503         comb += tlb_req_index.eq(r0.req.addr[TLB_LG_PGSZ : TLB_LG_END])
 504         comb += eatag.eq(r0.req.addr[TLB_LG_END : 64 ])
 505
 506         for i in range(TLB_NUM_WAYS):
 507             with m.If(tlb_valid_way[i]
 508                       & read_tlb_tag(i, tlb_tag_way) == eatag):
 509                 comb += hitway.eq(i)
 510                 comb += hit.eq(1)
 511
 512         comb += tlb_hit.eq(hit & r0_valid)
 513         comb += tlb_hit_way.eq(hitway)
 514
 515         with m.If(tlb_hit):
 516             comb += pte.eq(read_tlb_pte(hitway, tlb_pte_way))
 517         with m.Else():
 518             comb += pte.eq(0)
 519         comb += valid_ra.eq(tlb_hit | ~r0.req.virt_mode)
 520         with m.If(r0.req.virt_mode):
 521             comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
 522                               r0.req.addr[ROW_OFF_BITS:TLB_LG_PGSZ],
 523                               pte[TLB_LG_PGSZ:REAL_ADDR_BITS]))
 524             comb += perm_attr.eq(extract_perm_attr(pte))
 525         with m.Else():
 526             comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
 527                               r0.req.addr[ROW_OFF_BITS:REAL_ADDR_BITS]))
 528
 529             comb += perm_attr.reference.eq(1)
 530             comb += perm_attr.changed.eq(1)
 531             comb += perm_attr.priv.eq(1)
 532             comb += perm_attr.nocache.eq(0)
 533             comb += perm_attr.rd_perm.eq(1)
 534             comb += perm_attr.wr_perm.eq(1)
 535
 536     def tlb_update(self, m, r0_valid, r0, dtlb_valid_bits, tlb_req_index,
 537                     tlb_hit_way, tlb_hit, tlb_plru_victim, tlb_tag_way,
 538                     dtlb_tags, tlb_pte_way, dtlb_ptes):
 539
 540         comb = m.d.comb
 541         sync = m.d.sync
 542
 543         tlbie    = Signal()
 544         tlbwe    = Signal()
 545         repl_way = Signal(TLB_WAY_BITS)
 546         eatag    = Signal(TLB_EA_TAG_BITS)
 547         tagset   = Signal(TLB_TAG_WAY_BITS)
 548         pteset   = Signal(TLB_PTE_WAY_BITS)
 549
 550         vb = Signal(TLB_NUM_WAYS)
 551
 552         comb += tlbie.eq(r0_valid & r0.tlbie)
 553         comb += tlbwe.eq(r0_valid & r0.tlbld)
 554         sync += vb.eq(dtlb_valid_bits[tlb_req_index])
 555
 556         with m.If(tlbie & r0.doall):
 557             # clear all valid bits at once
 558             for i in range(TLB_SET_SIZE):
 559                 sync += dtlb_valid_bits[i].eq(0)
 560
 561         with m.Elif(tlbie):
 562             with m.If(tlb_hit):
 563                 sync += vb.bit_select(tlb_hit_way, 1).eq(Const(0, 1))
 564         with m.Elif(tlbwe):
 565             with m.If(tlb_hit):
 566                 comb += repl_way.eq(tlb_hit_way)
 567             with m.Else():
 568                 comb += repl_way.eq(tlb_plru_victim[tlb_req_index])
 569             comb += eatag.eq(r0.req.addr[TLB_LG_PGSZ + TLB_SET_BITS:64])
 570             sync += tagset.eq(tlb_tag_way)
 571             sync += write_tlb_tag(repl_way, tagset, eatag)
 572             sync += dtlb_tags[tlb_req_index].eq(tagset)
 573             sync += pteset.eq(tlb_pte_way)
 574             sync += write_tlb_pte(repl_way, pteset, r0.req.data)
 575             sync += dtlb_ptes[tlb_req_index].eq(pteset)
 576             sync += vb.bit_select(repl_way, 1).eq(1)
 577
 578     def maybe_plrus(self, m, r1, plru_victim):
 579         """Generate PLRUs
 580         """
 581         comb = m.d.comb
 582         sync = m.d.sync
 583
 584         for i in range(NUM_LINES):
 585             # PLRU interface
 586             plru        = PLRU(TLB_WAY_BITS)
 587             setattr(m.submodules, "plru%d" % i, plru)
 588             plru_acc    = Signal(WAY_BITS)
 589             plru_acc_en = Signal()
 590             plru_out    = Signal(WAY_BITS)
 591
 592             comb += plru.acc.eq(plru_acc)
 593             comb += plru.acc_en.eq(plru_acc_en)
 594             comb += plru_out.eq(plru.lru_o)
 595
 596             with m.If(r1.hit_index == i):
 597                 comb += plru_acc_en.eq(r1.cache_hit)
 598
 599             comb += plru_acc.eq(r1.hit_way)
 600             comb += plru_victim[i].eq(plru_out)
 601
 602     def cache_tag_read(self, m, r0_stall, req_index, cache_tag_set, cache_tags):
 603         """Cache tag RAM read port
 604         """
 605         comb = m.d.comb
 606         sync = m.d.sync
 607         m_in, d_in = self.m_in, self.d_in
 608
 609         index = Signal(INDEX_BITS)
 610
 611         with m.If(r0_stall):
 612             comb += index.eq(req_index)
 613         with m.Elif(m_in.valid):
 614             comb += index.eq(get_index(m_in.addr))
 615         with m.Else():
 616             comb += index.eq(get_index(d_in.addr))
 617         sync += cache_tag_set.eq(cache_tags[index])
 618
 619     def dcache_request(self, m, r0, ra, req_index, req_row, req_tag,
 620                        r0_valid, r1, cache_valid_bits, replace_way,
 621                        use_forward1_next, use_forward2_next,
 622                        req_hit_way, plru_victim, rc_ok, perm_attr,
 623                        valid_ra, perm_ok, access_ok, req_op, req_go,
 624                        tlb_pte_way,
 625                        tlb_hit, tlb_hit_way, tlb_valid_way, cache_tag_set,
 626                        cancel_store, req_same_tag, r0_stall, early_req_row):
 627         """Cache request parsing and hit detection
 628         """
 629
 630         comb = m.d.comb
 631         sync = m.d.sync
 632         m_in, d_in = self.m_in, self.d_in
 633
 634         is_hit      = Signal()
 635         hit_way     = Signal(WAY_BITS)
 636         op          = Signal(Op)
 637         opsel       = Signal(3)
 638         go          = Signal()
 639         nc          = Signal()
 640         s_hit       = Signal()
 641         s_tag       = Signal(TAG_BITS)
 642         s_pte       = Signal(TLB_PTE_BITS)
 643         s_ra        = Signal(REAL_ADDR_BITS)
 644         hit_set     = Signal(TLB_NUM_WAYS)
 645         hit_way_set = HitWaySet()
 646         rel_matches = Signal(TLB_NUM_WAYS)
 647         rel_match   = Signal()
 648
 649         # Extract line, row and tag from request
 650         comb += req_index.eq(get_index(r0.req.addr))
 651         comb += req_row.eq(get_row(r0.req.addr))
 652         comb += req_tag.eq(get_tag(ra))
 653
 654         comb += go.eq(r0_valid & ~(r0.tlbie | r0.tlbld) & ~r1.ls_error)
 655
 656         # Test if pending request is a hit on any way
 657         # In order to make timing in virtual mode,
 658         # when we are using the TLB, we compare each
 659         # way with each of the real addresses from each way of
 660         # the TLB, and then decide later which match to use.
 661
 662         with m.If(r0.req.virt_mode):
 663             comb += rel_matches.eq(0)
 664             for j in range(TLB_NUM_WAYS):
 665                 comb += s_pte.eq(read_tlb_pte(j, tlb_pte_way))
 666                 comb += s_ra.eq(Cat(r0.req.addr[0:TLB_LG_PGSZ],
 667                                     s_pte[TLB_LG_PGSZ:REAL_ADDR_BITS]))
 668                 comb += s_tag.eq(get_tag(s_ra))
 669
 670                 for i in range(NUM_WAYS):
 671                     with m.If(go & cache_valid_bits[req_index][i] &
 672                               (read_tag(i, cache_tag_set) == s_tag)
 673                               & tlb_valid_way[j]):
 674                         comb += hit_way_set[j].eq(i)
 675                         comb += s_hit.eq(1)
 676                 comb += hit_set[j].eq(s_hit)
 677                 with m.If(s_tag == r1.reload_tag):
 678                     comb += rel_matches[j].eq(1)
 679             with m.If(tlb_hit):
 680                 comb += is_hit.eq(hit_set.bit_select(tlb_hit_way, 1))
 681                 comb += hit_way.eq(hit_way_set[tlb_hit_way])
 682                 comb += rel_match.eq(rel_matches.bit_select(tlb_hit_way, 1))
 683         with m.Else():
 684             comb += s_tag.eq(get_tag(r0.req.addr))
 685             for i in range(NUM_WAYS):
 686                 with m.If(go & cache_valid_bits[req_index][i] &
 687                           read_tag(i, cache_tag_set) == s_tag):
 688                     comb += hit_way.eq(i)
 689                     comb += is_hit.eq(1)
 690             with m.If(s_tag == r1.reload_tag):
 691                 comb += rel_match.eq(1)
 692         comb += req_same_tag.eq(rel_match)
 693
 694         # See if the request matches the line currently being reloaded
 695         with m.If((r1.state == State.RELOAD_WAIT_ACK) &
 696                   (req_index == r1.store_index) & rel_match):
 697             # For a store, consider this a hit even if the row isn't
 698             # valid since it will be by the time we perform the store.
 699             # For a load, check the appropriate row valid bit.
 700             valid = r1.rows_valid[req_row % ROW_PER_LINE]
 701             comb += is_hit.eq(~r0.req.load | valid)
 702             comb += hit_way.eq(replace_way)
 703
 704         # Whether to use forwarded data for a load or not
 705         comb += use_forward1_next.eq(0)
 706         with m.If((get_row(r1.req.real_addr) == req_row) &
 707                   (r1.req.hit_way == hit_way)):
 708             # Only need to consider r1.write_bram here, since if we
 709             # are writing refill data here, then we don't have a
 710             # cache hit this cycle on the line being refilled.
 711             # (There is the possibility that the load following the
 712             # load miss that started the refill could be to the old
 713             # contents of the victim line, since it is a couple of
 714             # cycles after the refill starts before we see the updated
 715             # cache tag. In that case we don't use the bypass.)
 716             comb += use_forward1_next.eq(r1.write_bram)
 717         comb += use_forward2_next.eq(0)
 718         with m.If((r1.forward_row1 == req_row) & (r1.forward_way1 == hit_way)):
 719             comb += use_forward2_next.eq(r1.forward_valid1)
 720
 721         # The way that matched on a hit
 722         comb += req_hit_way.eq(hit_way)
 723
 724         # The way to replace on a miss
 725         with m.If(r1.write_tag):
 726             replace_way.eq(plru_victim[r1.store_index])
 727         with m.Else():
 728             comb += replace_way.eq(r1.store_way)
 729
 730         # work out whether we have permission for this access
 731         # NB we don't yet implement AMR, thus no KUAP
 732         comb += rc_ok.eq(perm_attr.reference
 733                          & (r0.req.load | perm_attr.changed)
 734                 )
 735         comb += perm_ok.eq((r0.req.priv_mode | ~perm_attr.priv)
 736                            & perm_attr.wr_perm
 737                            | (r0.req.load & perm_attr.rd_perm)
 738                           )
 739         comb += access_ok.eq(valid_ra & perm_ok & rc_ok)
 740         # Combine the request and cache hit status to decide what
 741         # operation needs to be done
 742         comb += nc.eq(r0.req.nc | perm_attr.nocache)
 743         comb += op.eq(Op.OP_NONE)
 744         with m.If(go):
 745             with m.If(~access_ok):
 746                 comb += op.eq(Op.OP_BAD)
 747             with m.Elif(cancel_store):
 748                 comb += op.eq(Op.OP_STCX_FAIL)
 749             with m.Else():
 750                 comb += opsel.eq(Cat(is_hit, nc, r0.req.load))
 751                 with m.Switch(opsel):
 752                     with m.Case(0b101):
 753                         comb += op.eq(Op.OP_LOAD_HIT)
 754                     with m.Case(0b100):
 755                         comb += op.eq(Op.OP_LOAD_MISS)
 756                     with m.Case(0b110):
 757                         comb += op.eq(Op.OP_LOAD_NC)
 758                     with m.Case(0b001):
 759                         comb += op.eq(Op.OP_STORE_HIT)
 760                     with m.Case(0b000):
 761                         comb += op.eq(Op.OP_STORE_MISS)
 762                     with m.Case(0b010):
 763                         comb += op.eq(Op.OP_STORE_MISS)
 764                     with m.Case(0b011):
 765                         comb += op.eq(Op.OP_BAD)
 766                     with m.Case(0b111):
 767                         comb += op.eq(Op.OP_BAD)
 768                     with m.Default():
 769                         comb += op.eq(Op.OP_NONE)
 770         comb += req_op.eq(op)
 771         comb += req_go.eq(go)
 772
 773         # Version of the row number that is valid one cycle earlier
 774         # in the cases where we need to read the cache data BRAM.
 775         # If we're stalling then we need to keep reading the last
 776         # row requested.
 777         with m.If(~r0_stall):
 778             with m.If(m_in.valid):
 779                 comb += early_req_row.eq(get_row(m_in.addr))
 780             with m.Else():
 781                 comb += early_req_row.eq(get_row(d_in.addr))
 782         with m.Else():
 783             comb += early_req_row.eq(req_row)
 784
 785     def reservation_comb(self, m, cancel_store, set_rsrv, clear_rsrv,
 786                          r0_valid, r0, reservation):
 787         """Handle load-with-reservation and store-conditional instructions
 788         """
 789         comb = m.d.comb
 790         sync = m.d.sync
 791
 792         with m.If(r0_valid & r0.req.reserve):
 793
 794             # XXX generate alignment interrupt if address
 795             # is not aligned XXX or if r0.req.nc = '1'
 796             with m.If(r0.req.load):
 797                 comb += set_rsrv.eq(1) # load with reservation
 798             with m.Else():
 799                 comb += clear_rsrv.eq(1) # store conditional
 800                 with m.If(~reservation.valid | r0.req.addr[LINE_OFF_BITS:64]):
 801                     comb += cancel_store.eq(1)
 802
 803     def reservation_reg(self, m, r0_valid, access_ok, set_rsrv, clear_rsrv,
 804                         reservation, r0):
 805
 806         comb = m.d.comb
 807         sync = m.d.sync
 808
 809         with m.If(r0_valid & access_ok):
 810             with m.If(clear_rsrv):
 811                 sync += reservation.valid.eq(0)
 812             with m.Elif(set_rsrv):
 813                 sync += reservation.valid.eq(1)
 814                 sync += reservation.addr.eq(r0.req.addr[LINE_OFF_BITS:64])
 815
 816     def writeback_control(self, m, r1, cache_out):
 817         """Return data for loads & completion control logic
 818         """
 819         comb = m.d.comb
 820         sync = m.d.sync
 821         d_out, m_out = self.d_out, self.m_out
 822
 823         data_out = Signal(64)
 824         data_fwd = Signal(64)
 825
 826         # Use the bypass if are reading the row that was
 827         # written 1 or 2 cycles ago, including for the
 828         # slow_valid = 1 case (i.e. completing a load
 829         # miss or a non-cacheable load).
 830         with m.If(r1.use_forward1):
 831             comb += data_fwd.eq(r1.forward_data1)
 832         with m.Else():
 833             comb += data_fwd.eq(r1.forward_data2)
 834
 835         comb += data_out.eq(cache_out[r1.hit_way])
 836
 837         for i in range(8):
 838             with m.If(r1.forward_sel[i]):
 839                 dsel = data_fwd.word_select(i, 8)
 840                 comb += data_out.word_select(i, 8).eq(dsel)
 841
 842         comb += d_out.valid.eq(r1.ls_valid)
 843         comb += d_out.data.eq(data_out)
 844         comb += d_out.store_done.eq(~r1.stcx_fail)
 845         comb += d_out.error.eq(r1.ls_error)
 846         comb += d_out.cache_paradox.eq(r1.cache_paradox)
 847
 848         # Outputs to MMU
 849         comb += m_out.done.eq(r1.mmu_done)
 850         comb += m_out.err.eq(r1.mmu_error)
 851         comb += m_out.data.eq(data_out)
 852
 853         # We have a valid load or store hit or we just completed
 854         # a slow op such as a load miss, a NC load or a store
 855         #
 856         # Note: the load hit is delayed by one cycle. However it
 857         # can still not collide with r.slow_valid (well unless I
 858         # miscalculated) because slow_valid can only be set on a
 859         # subsequent request and not on its first cycle (the state
 860         # machine must have advanced), which makes slow_valid
 861         # at least 2 cycles from the previous hit_load_valid.
 862
 863         # Sanity: Only one of these must be set in any given cycle
 864
 865         if False: # TODO: need Display to get this to work
 866             assert (r1.slow_valid & r1.stcx_fail) != 1, \
 867             "unexpected slow_valid collision with stcx_fail"
 868
 869             assert ((r1.slow_valid | r1.stcx_fail) | r1.hit_load_valid) != 1, \
 870              "unexpected hit_load_delayed collision with slow_valid"
 871
 872         with m.If(~r1.mmu_req):
 873             # Request came from loadstore1...
 874             # Load hit case is the standard path
 875             with m.If(r1.hit_load_valid):
 876                 #Display(f"completing load hit data={data_out}")
 877                 pass
 878
 879             # error cases complete without stalling
 880             with m.If(r1.ls_error):
 881                 # Display("completing ld/st with error")
 882                 pass
 883
 884             # Slow ops (load miss, NC, stores)
 885             with m.If(r1.slow_valid):
 886                 #Display(f"completing store or load miss data={data_out}")
 887                 pass
 888
 889         with m.Else():
 890             # Request came from MMU
 891             with m.If(r1.hit_load_valid):
 892                 # Display(f"completing load hit to MMU, data={m_out.data}")
 893                 pass
 894             # error cases complete without stalling
 895             with m.If(r1.mmu_error):
 896                 #Display("combpleting MMU ld with error")
 897                 pass
 898
 899             # Slow ops (i.e. load miss)
 900             with m.If(r1.slow_valid):
 901                 #Display("completing MMU load miss, data={m_out.data}")
 902                 pass
 903
 904     def rams(self, m, r1, early_req_row, cache_out, replace_way):
 905         """rams
 906         Generate a cache RAM for each way. This handles the normal
 907         reads, writes from reloads and the special store-hit update
 908         path as well.
 909
 910         Note: the BRAMs have an extra read buffer, meaning the output
 911         is pipelined an extra cycle. This differs from the
 912         icache. The writeback logic needs to take that into
 913         account by using 1-cycle delayed signals for load hits.
 914         """
 915         comb = m.d.comb
 916         wb_in = self.wb_in
 917
 918         for i in range(NUM_WAYS):
 919             do_read  = Signal()
 920             rd_addr  = Signal(ROW_BITS)
 921             do_write = Signal()
 922             wr_addr  = Signal(ROW_BITS)
 923             wr_data  = Signal(WB_DATA_BITS)
 924             wr_sel   = Signal(ROW_SIZE)
 925             wr_sel_m = Signal(ROW_SIZE)
 926             _d_out   = Signal(WB_DATA_BITS)
 927
 928             way = CacheRam(ROW_BITS, WB_DATA_BITS, True)
 929             setattr(m.submodules, "cacheram_%d" % i, way)
 930
 931             comb += way.rd_en.eq(do_read)
 932             comb += way.rd_addr.eq(rd_addr)
 933             comb += _d_out.eq(way.rd_data_o)
 934             comb += way.wr_sel.eq(wr_sel_m)
 935             comb += way.wr_addr.eq(wr_addr)
 936             comb += way.wr_data.eq(wr_data)
 937
 938             # Cache hit reads
 939             comb += do_read.eq(1)
 940             comb += rd_addr.eq(early_req_row)
 941             comb += cache_out[i].eq(_d_out)
 942
 943             # Write mux:
 944             #
 945             # Defaults to wishbone read responses (cache refill)
 946             #
 947             # For timing, the mux on wr_data/sel/addr is not
 948             # dependent on anything other than the current state.
 949
 950             with m.If(r1.write_bram):
 951                 # Write store data to BRAM.  This happens one
 952                 # cycle after the store is in r0.
 953                 comb += wr_data.eq(r1.req.data)
 954                 comb += wr_sel.eq(r1.req.byte_sel)
 955                 comb += wr_addr.eq(get_row(r1.req.real_addr))
 956
 957                 with m.If(i == r1.req.hit_way):
 958                     comb += do_write.eq(1)
 959             with m.Else():
 960                 # Otherwise, we might be doing a reload or a DCBZ
 961                 with m.If(r1.dcbz):
 962                     comb += wr_data.eq(0)
 963                 with m.Else():
 964                     comb += wr_data.eq(wb_in.dat)
 965                 comb += wr_addr.eq(r1.store_row)
 966                 comb += wr_sel.eq(~0) # all 1s
 967
 968             with m.If((r1.state == State.RELOAD_WAIT_ACK)
 969                       & wb_in.ack & (replace_way == i)):
 970                 comb += do_write.eq(1)
 971
 972                 # Mask write selects with do_write since BRAM
 973                 # doesn't have a global write-enable
 974                 with m.If(do_write):
 975                     comb += wr_sel_m.eq(wr_sel)
 976
 977     # Cache hit synchronous machine for the easy case.
 978     # This handles load hits.
 979     # It also handles error cases (TLB miss, cache paradox)
 980     def dcache_fast_hit(self, m, req_op, r0_valid, r0, r1,
 981                         req_hit_way, req_index, access_ok,
 982                         tlb_hit, tlb_hit_way, tlb_req_index):
 983
 984         comb = m.d.comb
 985         sync = m.d.sync
 986
 987         with m.If(req_op != Op.OP_NONE):
 988             #Display(f"op:{req_op} addr:{r0.req.addr} nc: {r0.req.nc}" \
 989             #      f"idx:{req_index} tag:{req_tag} way: {req_hit_way}"
 990             #     )
 991             pass
 992
 993         with m.If(r0_valid):
 994             sync += r1.mmu_req.eq(r0.mmu_req)
 995
 996         # Fast path for load/store hits.
 997         # Set signals for the writeback controls.
 998         sync += r1.hit_way.eq(req_hit_way)
 999         sync += r1.hit_index.eq(req_index)
1000
1001         with m.If(req_op == Op.OP_LOAD_HIT):
1002             sync += r1.hit_load_valid.eq(1)
1003         with m.Else():
1004             sync += r1.hit_load_valid.eq(0)
1005
1006         with m.If((req_op == Op.OP_LOAD_HIT) | (req_op == Op.OP_STORE_HIT)):
1007             sync += r1.cache_hit.eq(1)
1008         with m.Else():
1009             sync += r1.cache_hit.eq(0)
1010
1011         with m.If(req_op == Op.OP_BAD):
1012             # Display(f"Signalling ld/st error valid_ra={valid_ra}"
1013             #      f"rc_ok={rc_ok} perm_ok={perm_ok}"
1014             sync += r1.ls_error.eq(~r0.mmu_req)
1015             sync += r1.mmu_error.eq(r0.mmu_req)
1016             sync += r1.cache_paradox.eq(access_ok)
1017
1018             with m.Else():
1019                 sync += r1.ls_error.eq(0)
1020                 sync += r1.mmu_error.eq(0)
1021                 sync += r1.cache_paradox.eq(0)
1022
1023         with m.If(req_op == Op.OP_STCX_FAIL):
1024             r1.stcx_fail.eq(1)
1025         with m.Else():
1026             sync += r1.stcx_fail.eq(0)
1027
1028         # Record TLB hit information for updating TLB PLRU
1029         sync += r1.tlb_hit.eq(tlb_hit)
1030         sync += r1.tlb_hit_way.eq(tlb_hit_way)
1031         sync += r1.tlb_hit_index.eq(tlb_req_index)
1032
1033     # Memory accesses are handled by this state machine:
1034     #
1035     #   * Cache load miss/reload (in conjunction with "rams")
1036     #   * Load hits for non-cachable forms
1037     #   * Stores (the collision case is handled in "rams")
1038     #
1039     # All wishbone requests generation is done here.
1040     # This machine operates at stage 1.
1041     def dcache_slow(self, m, r1, use_forward1_next, use_forward2_next,
1042                     cache_valid_bits, r0, replace_way,
1043                     req_hit_way, req_same_tag,
1044                     r0_valid, req_op, cache_tag, req_go, ra):
1045
1046         comb = m.d.comb
1047         sync = m.d.sync
1048         wb_in = self.wb_in
1049
1050         req         = MemAccessRequest()
1051         acks        = Signal(3)
1052         adjust_acks = Signal(3)
1053         stbs_done = Signal()
1054
1055         sync += r1.use_forward1.eq(use_forward1_next)
1056         sync += r1.forward_sel.eq(0)
1057
1058         with m.If(use_forward1_next):
1059             sync += r1.forward_sel.eq(r1.req.byte_sel)
1060         with m.Elif(use_forward2_next):
1061             sync += r1.forward_sel.eq(r1.forward_sel1)
1062
1063         sync += r1.forward_data2.eq(r1.forward_data1)
1064         with m.If(r1.write_bram):
1065             sync += r1.forward_data1.eq(r1.req.data)
1066             sync += r1.forward_sel1.eq(r1.req.byte_sel)
1067             sync += r1.forward_way1.eq(r1.req.hit_way)
1068             sync += r1.forward_row1.eq(get_row(r1.req.real_addr))
1069             sync += r1.forward_valid1.eq(1)
1070         with m.Else():
1071             with m.If(r1.dcbz):
1072                 sync += r1.forward_data1.eq(0)
1073             with m.Else():
1074                 sync += r1.forward_data1.eq(wb_in.dat)
1075             sync += r1.forward_sel1.eq(~0) # all 1s
1076             sync += r1.forward_way1.eq(replace_way)
1077             sync += r1.forward_row1.eq(r1.store_row)
1078             sync += r1.forward_valid1.eq(0)
1079
1080         # One cycle pulses reset
1081         sync += r1.slow_valid.eq(0)
1082         sync += r1.write_bram.eq(0)
1083         sync += r1.inc_acks.eq(0)
1084         sync += r1.dec_acks.eq(0)
1085
1086         sync += r1.ls_valid.eq(0)
1087         # complete tlbies and TLB loads in the third cycle
1088         sync += r1.mmu_done.eq(r0_valid & (r0.tlbie | r0.tlbld))
1089
1090         with m.If((req_op == Op.OP_LOAD_HIT)
1091                   | (req_op == Op.OP_STCX_FAIL)):
1092             with m.If(~r0.mmu_req):
1093                 sync += r1.ls_valid.eq(1)
1094             with m.Else():
1095                 sync += r1.mmu_done.eq(1)
1096
1097         with m.If(r1.write_tag):
1098             # Store new tag in selected way
1099             for i in range(NUM_WAYS):
1100                 with m.If(i == replace_way):
1101                     ct = Signal(TAG_RAM_WIDTH)
1102                     sync += ct.eq(cache_tag[r1.store_index])
1103                     sync += ct.word_select(i, TAG_WIDTH).eq(r1.reload_tag)
1104             sync += r1.store_way.eq(replace_way)
1105             sync += r1.write_tag.eq(0)
1106
1107         # Take request from r1.req if there is one there,
1108         # else from req_op, ra, etc.
1109         with m.If(r1.full):
1110             comb += req.eq(r1.req)
1111         with m.Else():
1112             comb += req.op.eq(req_op)
1113             comb += req.valid.eq(req_go)
1114             comb += req.mmu_req.eq(r0.mmu_req)
1115             comb += req.dcbz.eq(r0.req.dcbz)
1116             comb += req.real_addr.eq(ra)
1117
1118             with m.If(~r0.req.dcbz):
1119                 comb += req.data.eq(r0.req.data)
1120             with m.Else():
1121                 comb += req.data.eq(0)
1122
1123             # Select all bytes for dcbz
1124             # and for cacheable loads
1125             with m.If(r0.req.dcbz | (r0.req.load & ~r0.req.nc)):
1126                 comb += req.byte_sel.eq(~0) # all 1s
1127             with m.Else():
1128                 comb += req.byte_sel.eq(r0.req.byte_sel)
1129             comb += req.hit_way.eq(req_hit_way)
1130             comb += req.same_tag.eq(req_same_tag)
1131
1132             # Store the incoming request from r0,
1133             # if it is a slow request
1134             # Note that r1.full = 1 implies req_op = OP_NONE
1135             with m.If((req_op == Op.OP_LOAD_MISS)
1136                       | (req_op == Op.OP_LOAD_NC)
1137                       | (req_op == Op.OP_STORE_MISS)
1138                       | (req_op == Op.OP_STORE_HIT)):
1139                 sync += r1.req.eq(req)
1140                 sync += r1.full.eq(1)
1141
1142         # Main state machine
1143         with m.Switch(r1.state):
1144
1145             with m.Case(State.IDLE):
1146 # XXX check 'left downto.  probably means len(r1.wb.adr)
1147 #                     r1.wb.adr <= req.real_addr(
1148 #                                   r1.wb.adr'left downto 0
1149 #                                  );
1150                 sync += r1.wb.adr.eq(req.real_addr)
1151                 sync += r1.wb.sel.eq(req.byte_sel)
1152                 sync += r1.wb.dat.eq(req.data)
1153                 sync += r1.dcbz.eq(req.dcbz)
1154
1155                 # Keep track of our index and way
1156                 # for subsequent stores.
1157                 sync += r1.store_index.eq(get_index(req.real_addr))
1158                 sync += r1.store_row.eq(get_row(req.real_addr))
1159                 sync += r1.end_row_ix.eq(
1160                          get_row_of_line(get_row(req.real_addr))
1161                         )
1162                 sync += r1.reload_tag.eq(get_tag(req.real_addr))
1163                 sync += r1.req.same_tag.eq(1)
1164
1165                 with m.If(req.op == Op.OP_STORE_HIT):
1166                     sync += r1.store_way.eq(req.hit_way)
1167
1168                 # Reset per-row valid bits,
1169                 # ready for handling OP_LOAD_MISS
1170                 for i in range(ROW_PER_LINE):
1171                     sync += r1.rows_valid[i].eq(0)
1172
1173                 with m.Switch(req.op):
1174                     with m.Case(Op.OP_LOAD_HIT):
1175                         # stay in IDLE state
1176                         pass
1177
1178                     with m.Case(Op.OP_LOAD_MISS):
1179                         #Display(f"cache miss real addr:" \
1180                         #      f"{req_real_addr}" \
1181                         #      f" idx:{get_index(req_real_addr)}" \
1182                         #      f" tag:{get_tag(req.real_addr)}")
1183                         pass
1184
1185                         # Start the wishbone cycle
1186                         sync += r1.wb.we.eq(0)
1187                         sync += r1.wb.cyc.eq(1)
1188                         sync += r1.wb.stb.eq(1)
1189
1190                         # Track that we had one request sent
1191                         sync += r1.state.eq(State.RELOAD_WAIT_ACK)
1192                         sync += r1.write_tag.eq(1)
1193
1194                     with m.Case(Op.OP_LOAD_NC):
1195                         sync += r1.wb.cyc.eq(1)
1196                         sync += r1.wb.stb.eq(1)
1197                         sync += r1.wb.we.eq(0)
1198                         sync += r1.state.eq(State.NC_LOAD_WAIT_ACK)
1199
1200                     with m.Case(Op.OP_STORE_HIT, Op.OP_STORE_MISS):
1201                         with m.If(~req.dcbz):
1202                             sync += r1.state.eq(State.STORE_WAIT_ACK)
1203                             sync += r1.acks_pending.eq(1)
1204                             sync += r1.full.eq(0)
1205                             sync += r1.slow_valid.eq(1)
1206
1207                             with m.If(~req.mmu_req):
1208                                 sync += r1.ls_valid.eq(1)
1209                             with m.Else():
1210                                 sync += r1.mmu_done.eq(1)
1211
1212                             with m.If(req.op == Op.OP_STORE_HIT):
1213                                 sync += r1.write_bram.eq(1)
1214                         with m.Else():
1215                             sync += r1.state.eq(State.RELOAD_WAIT_ACK)
1216
1217                             with m.If(req.op == Op.OP_STORE_MISS):
1218                                 sync += r1.write_tag.eq(1)
1219
1220                         sync += r1.wb.we.eq(1)
1221                         sync += r1.wb.cyc.eq(1)
1222                         sync += r1.wb.stb.eq(1)
1223
1224                     # OP_NONE and OP_BAD do nothing
1225                     # OP_BAD & OP_STCX_FAIL were
1226                     # handled above already
1227                     with m.Case(Op.OP_NONE):
1228                         pass
1229                     with m.Case(Op.OP_BAD):
1230                         pass
1231                     with m.Case(Op.OP_STCX_FAIL):
1232                         pass
1233
1234             with m.Case(State.RELOAD_WAIT_ACK):
1235                 # Requests are all sent if stb is 0
1236                 comb += stbs_done.eq(~r1.wb.stb)
1237
1238                 with m.If(~wb_in.stall & ~stbs_done):
1239                     # That was the last word?
1240                     # We are done sending.
1241                     # Clear stb and set stbs_done
1242                     # so we can handle an eventual
1243                     # last ack on the same cycle.
1244                     with m.If(is_last_row_addr(
1245                               r1.wb.adr, r1.end_row_ix)):
1246                         sync += r1.wb.stb.eq(0)
1247                         comb += stbs_done.eq(0)
1248
1249                     # Calculate the next row address in the current cache line
1250                     rarange = r1.wb.adr[ROW_OFF_BITS : LINE_OFF_BITS]
1251                     sync += rarange.eq(rarange + 1)
1252
1253                 # Incoming acks processing
1254                 sync += r1.forward_valid1.eq(wb_in.ack)
1255                 with m.If(wb_in.ack):
1256                     # XXX needs an Array bit-accessor here
1257                     sync += r1.rows_valid[r1.store_row % ROW_PER_LINE].eq(1)
1258
1259                     # If this is the data we were looking for,
1260                     # we can complete the request next cycle.
1261                     # Compare the whole address in case the
1262                     # request in r1.req is not the one that
1263                     # started this refill.
1264                     with m.If(r1.full & r1.req.same_tag &
1265                               ((r1.dcbz & r1.req.dcbz) |
1266                                (~r1.dcbz & (r1.req.op == Op.OP_LOAD_MISS))) &
1267                                 (r1.store_row == get_row(r1.req.real_addr))):
1268                         sync += r1.full.eq(0)
1269                         sync += r1.slow_valid.eq(1)
1270                         with m.If(~r1.mmu_req):
1271                             sync += r1.ls_valid.eq(1)
1272                         with m.Else():
1273                             sync += r1.mmu_done.eq(1)
1274                         sync += r1.forward_sel.eq(~0) # all 1s
1275                         sync += r1.use_forward1.eq(1)
1276
1277                     # Check for completion
1278                     with m.If(stbs_done & is_last_row(r1.store_row,
1279                                                       r1.end_row_ix)):
1280                         # Complete wishbone cycle
1281                         sync += r1.wb.cyc.eq(0)
1282
1283                         # Cache line is now valid
1284                         cv = Signal(INDEX_BITS)
1285                         sync += cv.eq(cache_valid_bits[r1.store_index])
1286                         sync += cv.bit_select(r1.store_way, 1).eq(1)
1287                         sync += r1.state.eq(State.IDLE)
1288
1289                     # Increment store row counter
1290                     sync += r1.store_row.eq(next_row(r1.store_row))
1291
1292             with m.Case(State.STORE_WAIT_ACK):
1293                 comb += stbs_done.eq(~r1.wb.stb)
1294                 comb += acks.eq(r1.acks_pending)
1295
1296                 with m.If(r1.inc_acks != r1.dec_acks):
1297                     with m.If(r1.inc_acks):
1298                         comb += adjust_acks.eq(acks + 1)
1299                     with m.Else():
1300                         comb += adjust_acks.eq(acks - 1)
1301                 with m.Else():
1302                     comb += adjust_acks.eq(acks)
1303
1304                 sync += r1.acks_pending.eq(adjust_acks)
1305
1306                 # Clear stb when slave accepted request
1307                 with m.If(~wb_in.stall):
1308                     # See if there is another store waiting
1309                     # to be done which is in the same real page.
1310                     with m.If(req.valid):
1311                         ra = req.real_addr[0:SET_SIZE_BITS]
1312                         sync += r1.wb.adr[0:SET_SIZE_BITS].eq(ra)
1313                         sync += r1.wb.dat.eq(req.data)
1314                         sync += r1.wb.sel.eq(req.byte_sel)
1315
1316                     with m.Elif((adjust_acks < 7) & req.same_tag &
1317                                 ((req.op == Op.OP_STORE_MISS)
1318                                  | (req.op == Op.OP_STORE_HIT))):
1319                         sync += r1.wb.stb.eq(1)
1320                         comb += stbs_done.eq(0)
1321
1322                         with m.If(req.op == Op.OP_STORE_HIT):
1323                             sync += r1.write_bram.eq(1)
1324                         sync += r1.full.eq(0)
1325                         sync += r1.slow_valid.eq(1)
1326
1327                         # Store requests never come from the MMU
1328                         sync += r1.ls_valid.eq(1)
1329                         comb += stbs_done.eq(0)
1330                         sync += r1.inc_acks.eq(1)
1331                     with m.Else():
1332                         sync += r1.wb.stb.eq(0)
1333                         comb += stbs_done.eq(1)
1334
1335                 # Got ack ? See if complete.
1336                 with m.If(wb_in.ack):
1337                     with m.If(stbs_done & (adjust_acks == 1)):
1338                         sync += r1.state.eq(State.IDLE)
1339                         sync += r1.wb.cyc.eq(0)
1340                         sync += r1.wb.stb.eq(0)
1341                     sync += r1.dec_acks.eq(1)
1342
1343             with m.Case(State.NC_LOAD_WAIT_ACK):
1344                 # Clear stb when slave accepted request
1345                 with m.If(~wb_in.stall):
1346                     sync += r1.wb.stb.eq(0)
1347
1348                 # Got ack ? complete.
1349                 with m.If(wb_in.ack):
1350                     sync += r1.state.eq(State.IDLE)
1351                     sync += r1.full.eq(0)
1352                     sync += r1.slow_valid.eq(1)
1353
1354                     with m.If(~r1.mmu_req):
1355                         sync += r1.ls_valid.eq(1)
1356                     with m.Else():
1357                         sync += r1.mmu_done.eq(1)
1358
1359                     sync += r1.forward_sel.eq(~0) # all 1s
1360                     sync += r1.use_forward1.eq(1)
1361                     sync += r1.wb.cyc.eq(0)
1362                     sync += r1.wb.stb.eq(0)
1363
1364     def dcache_log(self, m, r1, valid_ra, tlb_hit_way, stall_out):
1365
1366         sync = m.d.sync
1367         d_out, wb_in, log_out = self.d_out, self.wb_in, self.log_out
1368
1369         sync += log_out.eq(Cat(r1.state[:3], valid_ra, tlb_hit_way[:3],
1370                                stall_out, req_op[:3], d_out.valid, d_out.error,
1371                                r1.wb.cyc, r1.wb.stb, wb_in.ack, wb_in.stall,
1372                                r1.wb.adr[3:6]))
1373
1374     def elaborate(self, platform):
1375
1376         m = Module()
1377         comb = m.d.comb
1378
1379         # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
1380         cache_tags       = CacheTagArray()
1381         cache_tag_set    = Signal(TAG_RAM_WIDTH)
1382         cache_valid_bits = CacheValidBitsArray()
1383
1384         # TODO attribute ram_style : string;
1385         # TODO attribute ram_style of cache_tags : signal is "distributed";
1386
1387         """note: these are passed to nmigen.hdl.Memory as "attributes".
1388            don't know how, just that they are.
1389         """
1390         dtlb_valid_bits = TLBValidBitsArray()
1391         dtlb_tags       = TLBTagsArray()
1392         dtlb_ptes       = TLBPtesArray()
1393         # TODO attribute ram_style of
1394         #  dtlb_tags : signal is "distributed";
1395         # TODO attribute ram_style of
1396         #  dtlb_ptes : signal is "distributed";
1397
1398         r0      = RegStage0()
1399         r0_full = Signal()
1400
1401         r1 = RegStage1()
1402
1403         reservation = Reservation()
1404
1405         # Async signals on incoming request
1406         req_index    = Signal(INDEX_BITS)
1407         req_row      = Signal(ROW_BITS)
1408         req_hit_way  = Signal(WAY_BITS)
1409         req_tag      = Signal(TAG_BITS)
1410         req_op       = Signal(Op)
1411         req_data     = Signal(64)
1412         req_same_tag = Signal()
1413         req_go       = Signal()
1414
1415         early_req_row     = Signal(ROW_BITS)
1416
1417         cancel_store      = Signal()
1418         set_rsrv          = Signal()
1419         clear_rsrv        = Signal()
1420
1421         r0_valid          = Signal()
1422         r0_stall          = Signal()
1423
1424         use_forward1_next = Signal()
1425         use_forward2_next = Signal()
1426
1427         cache_out         = CacheRamOut()
1428
1429         plru_victim       = PLRUOut()
1430         replace_way       = Signal(WAY_BITS)
1431
1432         # Wishbone read/write/cache write formatting signals
1433         bus_sel           = Signal(8)
1434
1435         # TLB signals
1436         tlb_tag_way   = Signal(TLB_TAG_WAY_BITS)
1437         tlb_pte_way   = Signal(TLB_PTE_WAY_BITS)
1438         tlb_valid_way = Signal(TLB_NUM_WAYS)
1439         tlb_req_index = Signal(TLB_SET_BITS)
1440         tlb_hit       = Signal()
1441         tlb_hit_way   = Signal(TLB_WAY_BITS)
1442         pte           = Signal(TLB_PTE_BITS)
1443         ra            = Signal(REAL_ADDR_BITS)
1444         valid_ra      = Signal()
1445         perm_attr     = PermAttr()
1446         rc_ok         = Signal()
1447         perm_ok       = Signal()
1448         access_ok     = Signal()
1449
1450         tlb_plru_victim = TLBPLRUOut()
1451
1452         # we don't yet handle collisions between loadstore1 requests
1453         # and MMU requests
1454         comb += self.m_out.stall.eq(0)
1455
1456         # Hold off the request in r0 when r1 has an uncompleted request
1457         comb += r0_stall.eq(r0_full & r1.full)
1458         comb += r0_valid.eq(r0_full & ~r1.full)
1459         comb += self.stall_out.eq(r0_stall)
1460
1461         # Wire up wishbone request latch out of stage 1
1462         comb += self.wb_out.eq(r1.wb)
1463
1464         # call sub-functions putting everything together, using shared
1465         # signals established above
1466         self.stage_0(m, r0, r1, r0_full)
1467         self.tlb_read(m, r0_stall, tlb_valid_way,
1468                       tlb_tag_way, tlb_pte_way, dtlb_valid_bits,
1469                       dtlb_tags, dtlb_ptes)
1470         self.tlb_search(m, tlb_req_index, r0, r0_valid,
1471                         tlb_valid_way, tlb_tag_way, tlb_hit_way,
1472                         tlb_pte_way, pte, tlb_hit, valid_ra, perm_attr, ra)
1473         self.tlb_update(m, r0_valid, r0, dtlb_valid_bits, tlb_req_index,
1474                         tlb_hit_way, tlb_hit, tlb_plru_victim, tlb_tag_way,
1475                         dtlb_tags, tlb_pte_way, dtlb_ptes)
1476         self.maybe_plrus(m, r1, plru_victim)
1477         self.cache_tag_read(m, r0_stall, req_index, cache_tag_set, cache_tags)
1478         self.dcache_request(m, r0, ra, req_index, req_row, req_tag,
1479                            r0_valid, r1, cache_valid_bits, replace_way,
1480                            use_forward1_next, use_forward2_next,
1481                            req_hit_way, plru_victim, rc_ok, perm_attr,
1482                            valid_ra, perm_ok, access_ok, req_op, req_go,
1483                            tlb_pte_way,
1484                            tlb_hit, tlb_hit_way, tlb_valid_way, cache_tag_set,
1485                            cancel_store, req_same_tag, r0_stall, early_req_row)
1486         self.reservation_comb(m, cancel_store, set_rsrv, clear_rsrv,
1487                            r0_valid, r0, reservation)
1488         self.reservation_reg(m, r0_valid, access_ok, set_rsrv, clear_rsrv,
1489                            reservation, r0)
1490         self.writeback_control(m, r1, cache_out)
1491         self.rams(m, r1, early_req_row, cache_out, replace_way)
1492         self.dcache_fast_hit(m, req_op, r0_valid, r0, r1,
1493                         req_hit_way, req_index, access_ok,
1494                         tlb_hit, tlb_hit_way, tlb_req_index)
1495         self.dcache_slow(m, r1, use_forward1_next, use_forward2_next,
1496                     cache_valid_bits, r0, replace_way,
1497                     req_hit_way, req_same_tag,
1498                          r0_valid, req_op, cache_tags, req_go, ra)
1499         #self.dcache_log(m, r1, valid_ra, tlb_hit_way, stall_out)
1500
1501         return m
1502
1503
1504 # dcache_tb.vhdl
1505 #
1506 # entity dcache_tb is
1507 # end dcache_tb;
1508 #
1509 # architecture behave of dcache_tb is
1510 #     signal clk          : std_ulogic;
1511 #     signal rst          : std_ulogic;
1512 #
1513 #     signal d_in         : Loadstore1ToDcacheType;
1514 #     signal d_out        : DcacheToLoadstore1Type;
1515 #
1516 #     signal m_in         : MmuToDcacheType;
1517 #     signal m_out        : DcacheToMmuType;
1518 #
1519 #     signal wb_bram_in   : wishbone_master_out;
1520 #     signal wb_bram_out  : wishbone_slave_out;
1521 #
1522 #     constant clk_period : time := 10 ns;
1523 # begin
1524 #     dcache0: entity work.dcache
1525 #         generic map(
1526 #
1527 #             LINE_SIZE => 64,
1528 #             NUM_LINES => 4
1529 #             )
1530 #         port map(
1531 #             clk => clk,
1532 #             rst => rst,
1533 #             d_in => d_in,
1534 #             d_out => d_out,
1535 #             m_in => m_in,
1536 #             m_out => m_out,
1537 #             wishbone_out => wb_bram_in,
1538 #             wishbone_in => wb_bram_out
1539 #             );
1540 #
1541 #     -- BRAM Memory slave
1542 #     bram0: entity work.wishbone_bram_wrapper
1543 #         generic map(
1544 #             MEMORY_SIZE   => 1024,
1545 #             RAM_INIT_FILE => "icache_test.bin"
1546 #             )
1547 #         port map(
1548 #             clk => clk,
1549 #             rst => rst,
1550 #             wishbone_in => wb_bram_in,
1551 #             wishbone_out => wb_bram_out
1552 #             );
1553 #
1554 #     clk_process: process
1555 #     begin
1556 #         clk <= '0';
1557 #         wait for clk_period/2;
1558 #         clk <= '1';
1559 #         wait for clk_period/2;
1560 #     end process;
1561 #
1562 #     rst_process: process
1563 #     begin
1564 #         rst <= '1';
1565 #         wait for 2*clk_period;
1566 #         rst <= '0';
1567 #         wait;
1568 #     end process;
1569 #
1570 #     stim: process
1571 #     begin
1572 #     -- Clear stuff
1573 #     d_in.valid <= '0';
1574 #     d_in.load <= '0';
1575 #     d_in.nc <= '0';
1576 #     d_in.addr <= (others => '0');
1577 #     d_in.data <= (others => '0');
1578 #         m_in.valid <= '0';
1579 #         m_in.addr <= (others => '0');
1580 #         m_in.pte <= (others => '0');
1581 #
1582 #         wait for 4*clk_period;
1583 #     wait until rising_edge(clk);
1584 #
1585 #     -- Cacheable read of address 4
1586 #     d_in.load <= '1';
1587 #     d_in.nc <= '0';
1588 #         d_in.addr <= x"0000000000000004";
1589 #         d_in.valid <= '1';
1590 #     wait until rising_edge(clk);
1591 #         d_in.valid <= '0';
1592 #
1593 #     wait until rising_edge(clk) and d_out.valid = '1';
1594 #         assert d_out.data = x"0000000100000000"
1595 #         report "data @" & to_hstring(d_in.addr) &
1596 #         "=" & to_hstring(d_out.data) &
1597 #         " expected 0000000100000000"
1598 #         severity failure;
1599 # --      wait for clk_period;
1600 #
1601 #     -- Cacheable read of address 30
1602 #     d_in.load <= '1';
1603 #     d_in.nc <= '0';
1604 #         d_in.addr <= x"0000000000000030";
1605 #         d_in.valid <= '1';
1606 #     wait until rising_edge(clk);
1607 #         d_in.valid <= '0';
1608 #
1609 #     wait until rising_edge(clk) and d_out.valid = '1';
1610 #         assert d_out.data = x"0000000D0000000C"
1611 #         report "data @" & to_hstring(d_in.addr) &
1612 #         "=" & to_hstring(d_out.data) &
1613 #         " expected 0000000D0000000C"
1614 #         severity failure;
1615 #
1616 #     -- Non-cacheable read of address 100
1617 #     d_in.load <= '1';
1618 #     d_in.nc <= '1';
1619 #         d_in.addr <= x"0000000000000100";
1620 #         d_in.valid <= '1';
1621 #     wait until rising_edge(clk);
1622 #     d_in.valid <= '0';
1623 #     wait until rising_edge(clk) and d_out.valid = '1';
1624 #         assert d_out.data = x"0000004100000040"
1625 #         report "data @" & to_hstring(d_in.addr) &
1626 #         "=" & to_hstring(d_out.data) &
1627 #         " expected 0000004100000040"
1628 #         severity failure;
1629 #
1630 #     wait until rising_edge(clk);
1631 #     wait until rising_edge(clk);
1632 #     wait until rising_edge(clk);
1633 #     wait until rising_edge(clk);
1634 #
1635 #     std.env.finish;
1636 #     end process;
1637 # end;
1638 def dcache_sim(dut):
1639     # clear stuff
1640     yield dut.d_in.valid.eq(0)
1641     yield dut.d_in.load.eq(0)
1642     yield dut.d_in.nc.eq(0)
1643     yield dut.d_in.adrr.eq(0)
1644     yield dut.d_in.data.eq(0)
1645     yield dut.m_in.valid.eq(0)
1646     yield dut.m_in.addr.eq(0)
1647     yield dut.m_in.pte.eq(0)
1648     # wait 4 * clk_period
1649     yield
1650     yield
1651     yield
1652     yield
1653     # wait_until rising_edge(clk)
1654     yield
1655     # Cacheable read of address 4
1656     yield dut.d_in.load.eq(1)
1657     yield dut.d_in.nc.eq(0)
1658     yield dut.d_in.addr.eq(Const(0x0000000000000004, 64))
1659     yield dut.d_in.valid.eq(1)
1660     # wait-until rising_edge(clk)
1661     yield
1662     yield dut.d_in.valid.eq(0)
1663     yield
1664     while not (yield dut.d_out.valid):
1665         yield
1666     assert dut.d_out.data == 0x0000000100000000, \
1667         f"data @ {dut.d_in.addr}={dut.d_in.data} expected 0000000100000000"
1668
1669
1670     # Cacheable read of address 30
1671     yield dut.d_in.load.eq(1)
1672     yield dut.d_in.nc.eq(0)
1673     yield dut.d_in.addr.eq(Const(0x0000000000000030, 64))
1674     yield dut.d_in.valid.eq(1)
1675     yield
1676     yield dut.d_in.valid.eq(0)
1677     yield
1678     while not (yield dut.d_out.valid):
1679         yield
1680     assert dut.d_out.data == 0x0000000D0000000C, \
1681         f"data @{dut.d_in.addr}={dut.d_out.data} expected 0000000D0000000C"
1682
1683     # Non-cacheable read of address 100
1684     yield dut.d_in.load.eq(1)
1685     yield dut.d_in.nc.eq(1)
1686     yield dut.d_in.addr.eq(Const(0x0000000000000100, 64))
1687     yield dut.d_in.valid.eq(1)
1688     yield
1689     yield dut.d_in.valid.eq(0)
1690     yield
1691     while not (yield dut.d_out.valid):
1692         yield
1693     assert dut.d_out.data == 0x0000004100000040, \
1694         f"data @ {dut.d_in.addr}={dut.d_out.data} expected 0000004100000040"
1695
1696     yield
1697     yield
1698     yield
1699     yield
1700
1701
1702 def test_dcache():
1703     dut = DCache()
1704     vl = rtlil.convert(dut, ports=[])
1705     with open("test_dcache.il", "w") as f:
1706         f.write(vl)
1707
1708     #run_simulation(dut, dcache_sim(), vcd_name='test_dcache.vcd')
1709
1710 if __name__ == '__main__':
1711     test_dcache()
1712