src/soc/experiment/dcache.py

   1 """DCache
   2
   3 based on Anton Blanchard microwatt dcache.vhdl
   4
   5 """
   6
   7 from enum import Enum, unique
   8
   9 from nmigen import Module, Signal, Elaboratable,
  10                    Cat, Repl
  11 from nmigen.cli import main
  12 from nmigen.iocontrol import RecordObject
  13 from nmigen.util import log2_int
  14
  15 from experiment.mem_types import LoadStore1ToDCacheType,
  16                                  DCacheToLoadStore1Type,
  17                                  MMUToDCacheType,
  18                                  DCacheToMMUType
  19
  20 from experiment.wb_types import WB_ADDR_BITS, WB_DATA_BITS, WB_SEL_BITS,
  21                                 WBAddrType, WBDataType, WBSelType,
  22                                 WbMasterOut, WBSlaveOut,
  23                                 WBMasterOutVector, WBSlaveOutVector,
  24                                 WBIOMasterOut, WBIOSlaveOut
  25
  26 # TODO: make these parameters of DCache at some point
  27 LINE_SIZE = 64    # Line size in bytes
  28 NUM_LINES = 32    # Number of lines in a set
  29 NUM_WAYS = 4      # Number of ways
  30 TLB_SET_SIZE = 64 # L1 DTLB entries per set
  31 TLB_NUM_WAYS = 2  # L1 DTLB number of sets
  32 TLB_LG_PGSZ = 12  # L1 DTLB log_2(page_size)
  33 LOG_LENGTH = 0    # Non-zero to enable log data collection
  34
  35 # BRAM organisation: We never access more than
  36 #     -- wishbone_data_bits at a time so to save
  37 #     -- resources we make the array only that wide, and
  38 #     -- use consecutive indices for to make a cache "line"
  39 #     --
  40 #     -- ROW_SIZE is the width in bytes of the BRAM
  41 #     -- (based on WB, so 64-bits)
  42 ROW_SIZE = WB_DATA_BITS // 8;
  43
  44 # ROW_PER_LINE is the number of row (wishbone
  45 # transactions) in a line
  46 ROW_PER_LINE = LINE_SIZE // ROW_SIZE
  47
  48 # BRAM_ROWS is the number of rows in BRAM needed
  49 # to represent the full dcache
  50 BRAM_ROWS = NUM_LINES * ROW_PER_LINE
  51
  52
  53 # Bit fields counts in the address
  54
  55 # REAL_ADDR_BITS is the number of real address
  56 # bits that we store
  57 REAL_ADDR_BITS = 56
  58
  59 # ROW_BITS is the number of bits to select a row
  60 ROW_BITS = log2_int(BRAM_ROWS)
  61
  62 # ROW_LINE_BITS is the number of bits to select
  63 # a row within a line
  64 ROW_LINE_BITS = log2_int(ROW_PER_LINE)
  65
  66 # LINE_OFF_BITS is the number of bits for
  67 # the offset in a cache line
  68 LINE_OFF_BITS = log2_int(LINE_SIZE)
  69
  70 # ROW_OFF_BITS is the number of bits for
  71 # the offset in a row
  72 ROW_OFF_BITS = log2_int(ROW_SIZE)
  73
  74 # INDEX_BITS is the number if bits to
  75 # select a cache line
  76 INDEX_BITS = log2_int(NUM_LINES)
  77
  78 # SET_SIZE_BITS is the log base 2 of the set size
  79 SET_SIZE_BITS = LINE_OFF_BITS + INDEX_BITS
  80
  81 # TAG_BITS is the number of bits of
  82 # the tag part of the address
  83 TAG_BITS = REAL_ADDR_BITS - SET_SIZE_BITS
  84
  85 # TAG_WIDTH is the width in bits of each way of the tag RAM
  86 TAG_WIDTH = TAG_BITS + 7 - ((TAG_BITS + 7) % 8)
  87
  88 # WAY_BITS is the number of bits to select a way
  89 WAY_BITS = log2_int(NUM_WAYS)
  90
  91 # Example of layout for 32 lines of 64 bytes:
  92 #
  93 # ..  tag    |index|  line  |
  94 # ..         |   row   |    |
  95 # ..         |     |---|    | ROW_LINE_BITS  (3)
  96 # ..         |     |--- - --| LINE_OFF_BITS (6)
  97 # ..         |         |- --| ROW_OFF_BITS  (3)
  98 # ..         |----- ---|    | ROW_BITS      (8)
  99 # ..         |-----|        | INDEX_BITS    (5)
 100 # .. --------|              | TAG_BITS      (45)
 101
 102 TAG_RAM_WIDTH = TAG_WIDTH * NUM_WAYS
 103
 104 def CacheTagArray():
 105     return Array(CacheTagSet() for x in range(NUM_LINES))
 106
 107 def CacheValidBitsArray():
 108     return Array(CacheWayValidBits() for x in range(NUM_LINES))
 109
 110 def RowPerLineValidArray():
 111     return Array(Signal() for x in range(ROW_PER_LINE))
 112
 113 # L1 TLB
 114 TLB_SET_BITS     = log2_int(TLB_SET_SIZE)
 115 TLB_WAY_BITS     = log2_int(TLB_NUM_WAYS)
 116 TLB_EA_TAG_BITS  = 64 - (TLB_LG_PGSZ + TLB_SET_BITS)
 117 TLB_TAG_WAY_BITS = TLB_NUM_WAYS * TLB_EA_TAG_BITS
 118 TLB_PTE_BITS     = 64
 119 TLB_PTE_WAY_BITS = TLB_NUM_WAYS * TLB_PTE_BITS;
 120
 121 assert (LINE_SIZE % ROW_SIZE) == 0, "LINE_SIZE not multiple of ROW_SIZE"
 122 assert (LINE_SIZE % 2) == 0, "LINE_SIZE not power of 2"
 123 assert (NUM_LINES % 2) == 0, "NUM_LINES not power of 2"
 124 assert (ROW_PER_LINE % 2) == 0, "ROW_PER_LINE not power of 2"
 125 assert ROW_BITS == (INDEX_BITS + ROW_LINE_BITS), "geometry bits don't add up"
 126 assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS), \
 127         "geometry bits don't add up"
 128 assert REAL_ADDR_BITS == (TAG_BITS + INDEX_BITS + LINE_OFF_BITS), \
 129         "geometry bits don't add up"
 130 assert REAL_ADDR_BITS == (TAG_BITS + ROW_BITS + ROW_OFF_BITS), \
 131          "geometry bits don't add up"
 132 assert 64 == wishbone_data_bits, "Can't yet handle wb width that isn't 64-bits"
 133 assert SET_SIZE_BITS <= TLB_LG_PGSZ, "Set indexed by virtual address"
 134
 135
 136 def TLBValidBitsArray():
 137     return Array(Signal(TLB_NUM_WAYS) for x in range(TLB_SET_SIZE))
 138
 139 def TLBTagsArray():
 140     return Array(Signal(TLB_TAG_WAY_BITS) for x in range (TLB_SET_SIZE))
 141
 142 def TLBPtesArray():
 143     return Array(Signal(TLB_PTE_WAY_BITS) for x in range(TLB_SET_SIZE))
 144
 145 def HitWaySet():
 146     return Array(Signal(NUM_WAYS) for x in range(TLB_NUM_WAYS))
 147
 148 # Cache RAM interface
 149 def CacheRamOut():
 150     return Array(Signal(WB_DATA_BITS) for x in range(NUM_WAYS))
 151
 152 # PLRU output interface
 153 def PLRUOut():
 154     return Array(Signal(WAY_BITS) for x in range(Index()))
 155
 156 # TLB PLRU output interface
 157 def TLBPLRUOut():
 158     return Array(Signal(TLB_WAY_BITS) for x in range(TLB_SET_SIZE))
 159
 160 # Helper functions to decode incoming requests
 161 #
 162 # Return the cache line index (tag index) for an address
 163 def get_index(addr):
 164     return addr[LINE_OFF_BITS:SET_SIZE_BITS]
 165
 166 # Return the cache row index (data memory) for an address
 167 def get_row(addr):
 168     return addr[ROW_OFF_BITS:SET_SIZE_BITS]
 169
 170 # Return the index of a row within a line
 171 def get_row_of_line(row):
 172     row_v = Signal(ROW_BITS)
 173     row_v = Signal(row)
 174     return row_v[0:ROW_LINE_BITS]
 175
 176 # Returns whether this is the last row of a line
 177 def is_last_row_addr(addr, last):
 178     return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
 179
 180 # Returns whether this is the last row of a line
 181 def is_last_row(row, last):
 182     return get_row_of_line(row) == last
 183
 184 # Return the address of the next row in the current cache line
 185 def next_row_addr(addr):
 186     row_idx = Signal(ROW_LINE_BITS)
 187     result  = WBAddrType()
 188     # Is there no simpler way in VHDL to
 189     # generate that 3 bits adder ?
 190     row_idx = addr[ROW_OFF_BITS:LINE_OFF_BITS]
 191     row_idx = Signal(row_idx + 1)
 192     result = addr
 193     result[ROW_OFF_BITS:LINE_OFF_BITS] = row_idx
 194     return result
 195
 196 # Return the next row in the current cache line. We use a
 197 # dedicated function in order to limit the size of the
 198 # generated adder to be only the bits within a cache line
 199 # (3 bits with default settings)
 200 def next_row(row)
 201     row_v = row[0:ROW_LINE_BITS] + 1
 202     return Cat(row_v[:ROW_LINE_BITS], row[ROW_LINE_BITS:])
 203
 204 # Get the tag value from the address
 205 def get_tag(addr):
 206     return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
 207
 208 # Read a tag from a tag memory row
 209 def read_tag(way, tagset):
 210     return tagset[way *TAG_WIDTH:way * TAG_WIDTH + TAG_BITS]
 211
 212 # Read a TLB tag from a TLB tag memory row
 213 def read_tlb_tag(way, tags):
 214     j = way * TLB_EA_TAG_BITS
 215     return tags[j:j + TLB_EA_TAG_BITS]
 216
 217 # Write a TLB tag to a TLB tag memory row
 218 def write_tlb_tag(way, tags), tag):
 219     j = way * TLB_EA_TAG_BITS
 220     tags[j:j + TLB_EA_TAG_BITS] = tag
 221
 222 # Read a PTE from a TLB PTE memory row
 223 def read_tlb_pte(way, ptes):
 224     j = way * TLB_PTE_BITS
 225     return ptes[j:j + TLB_PTE_BITS]
 226
 227 def write_tlb_pte(way, ptes,newpte):
 228     j = way * TLB_PTE_BITS
 229     return ptes[j:j + TLB_PTE_BITS].eq(newpte)
 230
 231
 232 # Record for storing permission, attribute, etc. bits from a PTE
 233 class PermAttr(RecordObject):
 234     def __init__(self):
 235         super().__init__()
 236         self.reference = Signal()
 237         self.changed   = Signal()
 238         self.nocache   = Signal()
 239         self.priv      = Signal()
 240         self.rd_perm   = Signal()
 241         self.wr_perm   = Signal()
 242
 243
 244 def extract_perm_attr(pte):
 245     pa = PermAttr()
 246     pa.reference = pte[8]
 247     pa.changed   = pte[7]
 248     pa.nocache   = pte[5]
 249     pa.priv      = pte[3]
 250     pa.rd_perm   = pte[2]
 251     pa.wr_perm   = pte[1]
 252     return pa;
 253
 254
 255 # Type of operation on a "valid" input
 256 @unique
 257 class Op(Enum):
 258     OP_NONE       = 0
 259     OP_BAD        = 1 # NC cache hit, TLB miss, prot/RC failure
 260     OP_STCX_FAIL  = 2 # conditional store w/o reservation
 261     OP_LOAD_HIT   = 3 # Cache hit on load
 262     OP_LOAD_MISS  = 4 # Load missing cache
 263     OP_LOAD_NC    = 5 # Non-cachable load
 264     OP_STORE_HIT  = 6 # Store hitting cache
 265     OP_STORE_MISS = 7 # Store missing cache
 266
 267
 268 # Cache state machine
 269 @unique
 270 class State(Enum):
 271     IDLE             = 0 # Normal load hit processing
 272     RELOAD_WAIT_ACK  = 1 # Cache reload wait ack
 273     STORE_WAIT_ACK   = 2 # Store wait ack
 274     NC_LOAD_WAIT_ACK = 3 # Non-cachable load wait ack
 275
 276
 277 # Dcache operations:
 278 #
 279 # In order to make timing, we use the BRAMs with
 280 # an output buffer, which means that the BRAM
 281 # output is delayed by an extra cycle.
 282 #
 283 # Thus, the dcache has a 2-stage internal pipeline
 284 # for cache hits with no stalls.
 285 #
 286 # All other operations are handled via stalling
 287 # in the first stage.
 288 #
 289 # The second stage can thus complete a hit at the same
 290 # time as the first stage emits a stall for a complex op.
 291 #
 292 # Stage 0 register, basically contains just the latched request
 293
 294 class RegStage0(RecordObject):
 295     def __init__(self):
 296         super().__init__()
 297         self.req     = LoadStore1ToDCacheType()
 298         self.tlbie   = Signal()
 299         self.doall   = Signal()
 300         self.tlbld   = Signal()
 301         self.mmu_req = Signal() # indicates source of request
 302
 303
 304 class MemAccessRequest(RecordObject):
 305     def __init__(self):
 306         super().__init__()
 307         self.op        = Op()
 308         self.valid     = Signal()
 309         self.dcbz      = Signal()
 310         self.real_addr = Signal(REAL_ADDR_BITS)
 311         self.data      = Signal(64)
 312         self.byte_sel  = Signal(8)
 313         self.hit_way   = Signal(WAY_BITS)
 314         self.same_tag  = Signal()
 315         self.mmu_req   = Signal()
 316
 317
 318 # First stage register, contains state for stage 1 of load hits
 319 # and for the state machine used by all other operations
 320 class RegStage1(RecordObject):
 321     def __init__(self):
 322         super().__init__()
 323         # Info about the request
 324         self.full             = Signal() # have uncompleted request
 325         self.mmu_req          = Signal() # request is from MMU
 326         self.req              = MemAccessRequest()
 327
 328         # Cache hit state
 329         self.hit_way          = Signal(WAY_BITS)
 330         self.hit_load_valid   = Signal()
 331         self.hit_index        = Signal(NUM_LINES)
 332         self.cache_hit        = Signal()
 333
 334         # TLB hit state
 335         self.tlb_hit          = Signal()
 336         self.tlb_hit_way      = Signal(TLB_NUM_WAYS)
 337         self.tlb_hit_index    = Signal(TLB_WAY_BITS)
 338
 339         # 2-stage data buffer for data forwarded from writes to reads
 340         self.forward_data1    = Signal(64)
 341         self.forward_data2    = Signal(64)
 342         self.forward_sel1     = Signal(8)
 343         self.forward_valid1   = Signal()
 344         self.forward_way1     = Signal(WAY_BITS)
 345         self.forward_row1     = Signal(ROW_BITS)
 346         self.use_forward1     = Signal()
 347         self.forward_sel      = Signal(8)
 348
 349         # Cache miss state (reload state machine)
 350         self.state            = State()
 351         self.dcbz             = Signal()
 352         self.write_bram       = Signal()
 353         self.write_tag        = Signal()
 354         self.slow_valid       = Signal()
 355         self.wb               = WishboneMasterOut()
 356         self.reload_tag       = Signal(TAG_BITS)
 357         self.store_way        = Signal(WAY_BITS)
 358         self.store_row        = Signal(ROW_BITS)
 359         self.store_index      = Signal(INDEX_BITS)
 360         self.end_row_ix       = Signal(log2_int(ROW_LINE_BITS))
 361         self.rows_valid       = RowPerLineValidArray()
 362         self.acks_pending     = Signal(3)
 363         self.inc_acks         = Signal()
 364         self.dec_acks         = Signal()
 365
 366         # Signals to complete (possibly with error)
 367         self.ls_valid         = Signal()
 368         self.ls_error         = Signal()
 369         self.mmu_done         = Signal()
 370         self.mmu_error        = Signal()
 371         self.cache_paradox    = Signal()
 372
 373         # Signal to complete a failed stcx.
 374         self.stcx_fail        = Signal()
 375
 376
 377 # Reservation information
 378 class Reservation(RecordObject):
 379     def __init__(self):
 380         super().__init__()
 381         self.valid = Signal()
 382         self.addr  = Signal(64-LINE_OFF_BITS)
 383
 384
 385 class DCache(Elaboratable):
 386     """Set associative dcache write-through
 387     TODO (in no specific order):
 388     * See list in icache.vhdl
 389     * Complete load misses on the cycle when WB data comes instead of
 390       at the end of line (this requires dealing with requests coming in
 391       while not idle...)
 392     """
 393     def __init__(self):
 394         self.d_in      = LoadStore1ToDCacheType()
 395         self.d_out     = DCacheToLoadStore1Type()
 396
 397         self.m_in      = MMUToDCacheType()
 398         self.m_out     = DCacheToMMUType()
 399
 400         self.stall_out = Signal()
 401
 402         self.wb_out    = WBMasterOut()
 403         self.wb_in     = WBSlaveOut()
 404
 405         self.log_out   = Signal(20)
 406
 407     def stage_0(self, m):
 408         """Latch the request in r0.req as long as we're not stalling
 409         """
 410         comb = m.d.comb
 411         sync = m.d.sync
 412         d_in, d_out = self.d_in, self.d_out
 413
 414         r = RegStage0()
 415
 416         # TODO, this goes in unit tests and formal proofs
 417         with m.If(~(d_in.valid & m_in.valid)):
 418             #sync += Display("request collision loadstore vs MMU")
 419             pass
 420
 421         with m.If(m_in.valid):
 422             sync += r.req.valid.eq(1)
 423             sync += r.req.load.eq(~(m_in.tlbie | m_in.tlbld))
 424             sync += r.req.dcbz.eq(0)
 425             sync += r.req.nc.eq(0)
 426             sync += r.req.reserve.eq(0)
 427             sync += r.req.virt_mode.eq(1)
 428             sync += r.req.priv_mode.eq(1)
 429             sync += r.req.addr.eq(m_in.addr)
 430             sync += r.req.data.eq(m_in.pte)
 431             sync += r.req.byte_sel.eq(~0) # Const -1 sets all to 0b111....
 432             sync += r.tlbie.eq(m_in.tlbie)
 433             sync += r.doall.eq(m_in.doall)
 434             sync += r.tlbld.eq(m_in.tlbld)
 435             sync += r.mmu_req.eq(1)
 436         with m.Else():
 437             sync += r.req.eq(d_in)
 438             sync += r.req.tlbie.eq(0)
 439             sync += r.req.doall.eq(0)
 440             sync += r.req.tlbd.eq(0)
 441             sync += r.req.mmu_req.eq(0)
 442             with m.If(~(r1.full & r0_full)):
 443                 sync += r0.eq(r)
 444                 sync += r0_full.eq(r.req.valid)
 445
 446     def tlb_read(self, m, r0_stall, tlb_valid_way,
 447                  tlb_tag_way, tlb_pte_way, dtlb_valid_bits,
 448                  dtlb_tags, dtlb_ptes):
 449         """TLB
 450         Operates in the second cycle on the request latched in r0.req.
 451         TLB updates write the entry at the end of the second cycle.
 452         """
 453         comb = m.d.comb
 454         sync = m.d.sync
 455         m_in, d_in = self.m_in, self.d_in
 456
 457         index    = Signal(TLB_SET_BITS)
 458         addrbits = Signal(TLB_SET_BITS)
 459
 460         amin = TLB_LG_PGSZ
 461         amax = TLB_LG_PGSZ + TLB_SET_BITS
 462
 463         with m.If(m_in.valid):
 464             comb += addrbits.eq(m_in.addr[amin : amax])
 465         with m.Else():
 466             comb += addrbits.eq(d_in.addr[amin : amax])
 467         comb += index.eq(addrbits)
 468
 469         # If we have any op and the previous op isn't finished,
 470         # then keep the same output for next cycle.
 471         with m.If(~r0_stall):
 472             sync += tlb_valid_way.eq(dtlb_valid_bits[index])
 473             sync += tlb_tag_way.eq(dtlb_tags[index])
 474             sync += tlb_pte_way.eq(dtlb_ptes[index])
 475
 476     def maybe_tlb_plrus(self, m, r1, tlb_plru_victim, acc, acc_en, lru):
 477         """Generate TLB PLRUs
 478         """
 479         comb = m.d.comb
 480         sync = m.d.sync
 481
 482         with m.If(TLB_NUM_WAYS > 1):
 483             for i in range(TLB_SET_SIZE):
 484                 # TLB PLRU interface
 485                 tlb_plru        = PLRU(TLB_WAY_BITS)
 486                 setattr(m.submodules, "maybe_plru_%d" % i, tlb_plru)
 487                 tlb_plru_acc    = Signal(TLB_WAY_BITS)
 488                 tlb_plru_acc_en = Signal()
 489                 tlb_plru_out    = Signal(TLB_WAY_BITS)
 490
 491                 comb += tlb_plru.acc.eq(tlb_plru_acc)
 492                 comb += tlb_plru.acc_en.eq(tlb_plru_acc_en)
 493                 comb += tlb_plru.lru.eq(tlb_plru_out)
 494
 495                 # PLRU interface
 496                 with m.If(r1.tlb_hit_index == i):
 497                     comb += tlb_plru.acc_en.eq(r1.tlb_hit)
 498                 with m.Else():
 499                     comb += tlb_plru.acc_en.eq(0)
 500                 comb += tlb_plru.acc.eq(r1.tlb_hit_way)
 501
 502                 comb += tlb_plru_victim[i].eq(tlb_plru.lru)
 503
 504     def tlb_search(self, m, tlb_req_index, r0, tlb_valid_way_ tlb_tag_way,
 505                    tlb_pte_way, pte, tlb_hit, valid_ra, perm_attr, ra):
 506
 507         comb = m.d.comb
 508         sync = m.d.sync
 509
 510         hitway = Signal(TLB_WAY_BITS)
 511         hit    = Signal()
 512         eatag  = Signal(TLB_EA_TAG_BITS)
 513
 514         TLB_LG_END = TLB_LG_PGSZ + TLB_SET_BITS
 515         comb += tlb_req_index.eq(r0.req.addr[TLB_LG_PGSZ : TLB_LG_END])
 516         comb += eatag.eq(r0.req.addr[TLB_LG_END : 64 ])
 517
 518         for i in range(TLB_NUM_WAYS):
 519             with m.If(tlb_valid_way(i)
 520                       & read_tlb_tag(i, tlb_tag_way) == eatag):
 521                 comb += hitway.eq(i)
 522                 comb += hit.eq(1)
 523
 524         comb += tlb_hit.eq(hit & r0_valid)
 525         comb += tlb_hit_way.eq(hitway)
 526
 527         with m.If(tlb_hit):
 528             comb += pte.eq(read_tlb_pte(hitway, tlb_pte_way))
 529         with m.Else():
 530             comb += pte.eq(0)
 531         comb += valid_ra.eq(tlb_hit | ~r0.req.virt_mode)
 532         with m.If(r0.req.virt_mode):
 533             comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
 534                               r0.req.addr[ROW_OFF_BITS:TLB_LG_PGSZ],
 535                               pte[TLB_LG_PGSZ:REAL_ADDR_BITS]))
 536             comb += perm_attr.eq(extract_perm_attr(pte))
 537         with m.Else():
 538             comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
 539                               r0.rq.addr[ROW_OFF_BITS:REAL_ADDR_BITS]))
 540
 541             comb += perm_attr.reference.eq(1)
 542             comb += perm_attr.changed.eq(1)
 543             comb += perm_attr.priv.eq(1)
 544             comb += perm_attr.nocache.eq(0)
 545             comb += perm_attr.rd_perm.eq(1)
 546             comb += perm_attr.wr_perm.eq(1)
 547
 548     def tlb_update(self, m, r0_valid, r0, dtlb_valid_bits, tlb_req_index,
 549                     tlb_hit_way, tlb_hit, tlb_plru_victim, tlb_tag_way,
 550                     dtlb_tags, tlb_pte_way, dtlb_ptes, dtlb_valid_bits):
 551
 552         comb = m.d.comb
 553         sync = m.d.sync
 554
 555         tlbie    = Signal()
 556         tlbwe    = Signal()
 557         repl_way = Signal(TLB_WAY_BITS)
 558         eatag    = Signal(TLB_EA_TAG_BITS)
 559         tagset   = TLBWayTags()
 560         pteset   = TLBWayPtes()
 561
 562         comb += tlbie.eq(r0_valid & r0.tlbie)
 563         comb += tlbwe.eq(r0_valid & r0.tlbldoi)
 564
 565         with m.If(tlbie & r0.doall):
 566             # clear all valid bits at once
 567             for i in range(TLB_SET_SIZE):
 568                 sync += dtlb_valid_bits[i].eq(0)
 569
 570         with m.Elif(tlbie):
 571             with m.If(tlb_hit):
 572                 sync += dtlb_valid_bits[tlb_req_index][tlb_hit_way].eq(0)
 573         with m.Elif(tlbwe):
 574             with m.If(tlb_hit):
 575                 comb += repl_way.eq(tlb_hit_way)
 576             with m.Else():
 577                 comb += repl_way.eq(tlb_plru_victim[tlb_req_index])
 578             comb += eatag.eq(r0.req.addr[TLB_LG_PGSZ + TLB_SET_BITS:64])
 579             comb += tagset.eq(tlb_tag_way)
 580             sync += write_tlb_tag(repl_way, tagset, eatag)
 581             sync += dtlb_tags[tlb_req_index].eq(tagset)
 582             comb += pteset.eq(tlb_pte_way)
 583             sync += write_tlb_pte(repl_way, pteset, r0.req.data)
 584             sync += dtlb_ptes[tlb_req_index].eq(pteset)
 585             sync += dtlb_valid_bits[tlb_req_index][repl_way].eq(1)
 586
 587     def maybe_plrus(self, r1):
 588         """Generate PLRUs
 589         """
 590         comb = m.d.comb
 591         sync = m.d.sync
 592
 593         for i in range(NUM_LINES):
 594             # PLRU interface
 595             plru        = PLRU(TLB_WAY_BITS)
 596             setattr(m.submodules, "plru%d" % i, plru)
 597             plru_acc    = Signal(WAY_BITS)
 598             plru_acc_en = Signal()
 599             plru_out    = Signal(WAY_BITS)
 600
 601             comb += plru.acc.eq(plru_acc)
 602             comb += plru.acc_en.eq(plru_acc_en)
 603             comb += plru.lru.eq(plru_out)
 604
 605             with m.If(r1.hit_index == i):
 606                 comb += plru_acc_en.eq(r1.cache_hit)
 607
 608             comb += plru_acc.eq(r1.hit_way)
 609             comb += plru_victim[i].eq(plru_out)
 610
 611     def cache_tag_read(self, m, r0_stall, req_index, cache_tag_set, cache_tags):
 612         """Cache tag RAM read port
 613         """
 614         comb = m.d.comb
 615         sync = m.d.sync
 616         m_in, d_in = self.m_in, self.d_in
 617
 618         index = Signal(INDEX_BITS)
 619
 620         with m.If(r0_stall):
 621             comb += index.eq(req_index)
 622         with m.Elif(m_in.valid):
 623             comb += index.eq(get_index(m_in.addr))
 624         with m.Else():
 625             comb += index.eq(get_index(d_in.addr))
 626         sync += cache_tag_set.eq(cache_tags[index])
 627
 628     def dcache_request(self, m, r0, ra, req_index, req_row, req_tag,
 629                        r0_valid, r1, cache_valid_bits, replace_way,
 630                        use_forward1_next, use_forward2_next,
 631                        req_hit_way, plru_victim, rc_ok, perm_attr,
 632                        valid_ra, perm_ok, access_ok, req_op, req_ok,
 633                        r0_stall, early_req_row):
 634         """Cache request parsing and hit detection
 635         """
 636
 637         comb = m.d.comb
 638         sync = m.d.sync
 639         m_in, d_in = self.m_in, self.d_in
 640
 641         is_hit      = Signal()
 642         hit_way     = Signal(WAY_BITS)
 643         op          = Op()
 644         opsel       = Signal(3)
 645         go          = Signal()
 646         nc          = Signal()
 647         s_hit       = Signal()
 648         s_tag       = Signal(TAG_BITS)
 649         s_pte       = Signal(TLB_PTE_BITS)
 650         s_ra        = Signal(REAL_ADDR_BITS)
 651         hit_set     = Signal(TLB_NUM_WAYS)
 652         hit_way_set = HitWaySet()
 653         rel_matches = Signal(TLB_NUM_WAYS)
 654         rel_match   = Signal()
 655
 656         # Extract line, row and tag from request
 657         comb += req_index.eq(get_index(r0.req.addr))
 658         comb += req_row.eq(get_row(r0.req.addr))
 659         comb += req_tag.eq(get_tag(ra))
 660
 661         comb += go.eq(r0_valid & ~(r0.tlbie | r0.tlbld) & ~r1.ls_error)
 662
 663         # Test if pending request is a hit on any way
 664         # In order to make timing in virtual mode,
 665         # when we are using the TLB, we compare each
 666         # way with each of the real addresses from each way of
 667         # the TLB, and then decide later which match to use.
 668
 669         with m.If(r0.req.virt_mode):
 670             comb += rel_matches.eq(0)
 671             for j in range(TLB_NUM_WAYS):
 672                 comb += s_pte.eq(read_tlb_pte(j, tlb_pte_way))
 673                 comb += s_ra.eq(Cat(r0.req.addr[0:TLB_LG_PGSZ],
 674                                     s_pte[TLB_LG_PGSZ:REAL_ADDR_BITS]))
 675                 comb += s_tag.eq(get_tag(s_ra))
 676
 677                 for i in range(NUM_WAYS):
 678                     with m.If(go & cache_valid_bits[req_index][i] &
 679                               read_tag(i, cache_tag_set) == s_tag
 680                               & tlb_valid_way[j]):
 681                         comb += hit_way_set[j].eq(i)
 682                         comb += s_hit.eq(1)
 683                 comb += hit_set[j].eq(s_hit)
 684                 with m.If(s_tag == r1.reload_tag):
 685                     comb += rel_matches[j].eq(1)
 686             with m.If(tlb_hit):
 687                 comb += is_hit.eq(hit_set[tlb_hit_way])
 688                 comb += hit_way.eq(hit_way_set[tlb_hit_way])
 689                 comb += rel_match.eq(rel_matches[tlb_hit_way])
 690         with m.Else():
 691             comb += s_tag.eq(get_tag(r0.req.addr))
 692             for i in range(NUM_WAYS):
 693                 with m.If(go & cache_valid_bits[req_index][i] &
 694                           read_tag(i, cache_tag_set) == s_tag):
 695                     comb += hit_way.eq(i)
 696                     comb += is_hit.eq(1)
 697             with m.If(s_tag == r1.reload_tag):
 698                 comb += rel_match.eq(1)
 699         comb += req_same_tag.eq(rel_match)
 700
 701         # See if the request matches the line currently being reloaded
 702         with m.If((r1.state == State.RELOAD_WAIT_ACK) &
 703                   (req_index == r1.store_index) & rel_match):
 704             # For a store, consider this a hit even if the row isn't
 705             # valid since it will be by the time we perform the store.
 706             # For a load, check the appropriate row valid bit.
 707             valid = r1.rows_valid[req_row % ROW_PER_LINE]
 708             comb += is_hit.eq(~r0.req.load | valid)
 709             comb += hit_way.eq(replace_way)
 710
 711         # Whether to use forwarded data for a load or not
 712         comb += use_forward1_next.eq(0)
 713         with m.If((get_row(r1.req.real_addr) == req_row)
 714                   & (r1.req.hit_way == hit_way))
 715             # Only need to consider r1.write_bram here, since if we
 716             # are writing refill data here, then we don't have a
 717             # cache hit this cycle on the line being refilled.
 718             # (There is the possibility that the load following the
 719             # load miss that started the refill could be to the old
 720             # contents of the victim line, since it is a couple of
 721             # cycles after the refill starts before we see the updated
 722             # cache tag. In that case we don't use the bypass.)
 723             comb += use_forward1_next.eq(r1.write_bram)
 724         comb += use_forward2_next.eq(0)
 725         with m.If((r1.forward_row1 == req_row) & (r1.forward_way1 == hit_way)):
 726             comb += use_forward2_next.eq(r1.forward_valid1)
 727
 728         # The way that matched on a hit
 729         comb += req_hit_way.eq(hit_way)
 730
 731         # The way to replace on a miss
 732         with m.If(r1.write_tag):
 733             replace_way.eq(plru_victim[r1.store_index])
 734         with m.Else():
 735             comb += replace_way.eq(r1.store_way)
 736
 737         # work out whether we have permission for this access
 738         # NB we don't yet implement AMR, thus no KUAP
 739         comb += rc_ok.eq(perm_attr.reference
 740                          & (r0.req.load | perm_attr.changed)
 741                 )
 742         comb += perm_ok.eq((r0.req.prive_mode | ~perm_attr.priv)
 743                            & perm_attr.wr_perm
 744                            | (r0.req.load & perm_attr.rd_perm)
 745                           )
 746         comb += access_ok.eq(valid_ra & perm_ok & rc_ok)
 747         # Combine the request and cache hit status to decide what
 748         # operation needs to be done
 749         comb += nc.eq(r0.req.nc | perm_attr.nocache)
 750         comb += op.eq(Op.OP_NONE)
 751         with m.If(go):
 752             with m.If(~access_ok):
 753                 comb += op.eq(Op.OP_BAD)
 754             with m.Elif(cancel_store):
 755                 comb += op.eq(Op.OP_STCX_FAIL)
 756             with m.Else():
 757                 comb += opsel.eq(Cat(is_hit, nc, r0.req.load))
 758                 with m.Switch(opsel):
 759                     with m.Case(Const(0b101, 3)):
 760                         comb += op.eq(Op.OP_LOAD_HIT)
 761                     with m.Case(Cosnt(0b100, 3)):
 762                         comb += op.eq(Op.OP_LOAD_MISS)
 763                     with m.Case(Const(0b110, 3)):
 764                         comb += op.eq(Op.OP_LOAD_NC)
 765                     with m.Case(Const(0b001, 3)):
 766                         comb += op.eq(Op.OP_STORE_HIT)
 767                     with m.Case(Const(0b000, 3)):
 768                         comb += op.eq(Op.OP_STORE_MISS)
 769                     with m.Case(Const(0b010, 3)):
 770                         comb += op.eq(Op.OP_STORE_MISS)
 771                     with m.Case(Const(0b011, 3)):
 772                         comb += op.eq(Op.OP_BAD)
 773                     with m.Case(Const(0b111, 3)):
 774                         comb += op.eq(Op.OP_BAD)
 775                     with m.Default():
 776                         comb += op.eq(Op.OP_NONE)
 777         comb += req_op.eq(op)
 778         comb += req_go.eq(go)
 779
 780         # Version of the row number that is valid one cycle earlier
 781         # in the cases where we need to read the cache data BRAM.
 782         # If we're stalling then we need to keep reading the last
 783         # row requested.
 784         with m.If(~r0_stall):
 785             with m.If(m_in.valid):
 786                 comb += early_req_row.eq(get_row(m_in.addr))
 787             with m.Else():
 788                 comb += early_req_row.eq(get_row(d_in.addr))
 789         with m.Else():
 790             comb += early_req_row.eq(req_row)
 791
 792     def reservation_comb(self, m, cancel_store, set_rsrv, clear_rsrv,
 793                          r0_valid, r0, reservation):
 794         """Handle load-with-reservation and store-conditional instructions
 795         """
 796         comb = m.d.comb
 797         sync = m.d.sync
 798
 799         with m.If(r0_valid & r0.req.reserve):
 800
 801             # XXX generate alignment interrupt if address
 802             # is not aligned XXX or if r0.req.nc = '1'
 803             with m.If(r0.req.load):
 804                 comb += set_rsrv(1) # load with reservation
 805             with m.Else():
 806                 comb += clear_rsrv.eq(1) # store conditional
 807                 with m.If(~reservation.valid | r0.req.addr[LINE_OFF_BITS:64]):
 808                     comb += cancel_store.eq(1)
 809
 810     def reservation_reg(self, m, r0_valid, access_ok, clear_rsrv,
 811                         reservation, r0):
 812
 813         comb = m.d.comb
 814         sync = m.d.sync
 815
 816         with m.If(r0_valid & access_ok):
 817             with m.If(clear_rsrv):
 818                 sync += reservation.valid.eq(0)
 819             with m.Elif(set_rsrv):
 820                 sync += reservation.valid.eq(1)
 821                 sync += reservation.addr.eq(r0.req.addr[LINE_OFF_BITS:64])
 822
 823     def writeback_control(self, m, r1, cache_out):
 824         """Return data for loads & completion control logic
 825         """
 826         comb = m.d.comb
 827         sync = m.d.sync
 828         d_out, m_out = self.d_out, self.m_out
 829
 830         data_out = Signal(64)
 831         data_fwd = Signal(64)
 832
 833         # Use the bypass if are reading the row that was
 834         # written 1 or 2 cycles ago, including for the
 835         # slow_valid = 1 case (i.e. completing a load
 836         # miss or a non-cacheable load).
 837         with m.If(r1.use_forward1):
 838             comb += data_fwd.eq(r1.forward_data1)
 839         with m.Else():
 840             comb += data_fwd.eq(r1.forward_data2)
 841
 842         comb += data_out.eq(cache_out[r1.hit_way])
 843
 844         for i in range(8):
 845             with m.If(r1.forward_sel[i]):
 846                 dsel = data_fwd.word_select(i, 8)
 847                 comb += data_out.word_select(i, 8).eq(dsel)
 848
 849         comb += d_out.valid.eq(r1.ls_valid)
 850         comb += d_out.data.eq(data_out)
 851         comb += d_out.store_done.eq(~r1.stcx_fail)
 852         comb += d_out.error.eq(r1.ls_error)
 853         comb += d_out.cache_paradox.eq(r1.cache_paradox)
 854
 855         # Outputs to MMU
 856         comb += m_out.done.eq(r1.mmu_done)
 857         comb += m_out.err.eq(r1.mmu_error)
 858         comb += m_out.data.eq(data_out)
 859
 860         # We have a valid load or store hit or we just completed
 861         # a slow op such as a load miss, a NC load or a store
 862         #
 863         # Note: the load hit is delayed by one cycle. However it
 864         # can still not collide with r.slow_valid (well unless I
 865         # miscalculated) because slow_valid can only be set on a
 866         # subsequent request and not on its first cycle (the state
 867         # machine must have advanced), which makes slow_valid
 868         # at least 2 cycles from the previous hit_load_valid.
 869
 870         # Sanity: Only one of these must be set in any given cycle
 871
 872         if False: # TODO: need Display to get this to work
 873             assert (r1.slow_valid & r1.stcx_fail) != 1 "unexpected" \
 874              "slow_valid collision with stcx_fail -!- severity FAILURE"
 875
 876             assert ((r1.slow_valid | r1.stcx_fail) | r1.hit_load_valid) != 1
 877              "unexpected hit_load_delayed collision with slow_valid -!-" \
 878              "severity FAILURE"
 879
 880         with m.If(~r1._mmu_req):
 881             # Request came from loadstore1...
 882             # Load hit case is the standard path
 883             with m.If(r1.hit_load_valid):
 884                 #Display(f"completing load hit data={data_out}")
 885                 pass
 886
 887             # error cases complete without stalling
 888             with m.If(r1.ls_error):
 889                 # Display("completing ld/st with error")
 890                 pass
 891
 892             # Slow ops (load miss, NC, stores)
 893             with m.If(r1.slow_valid):
 894                 #Display(f"completing store or load miss data={data_out}")
 895                 pass
 896
 897         with m.Else():
 898             # Request came from MMU
 899             with m.If(r1.hit_load_valid):
 900                 # Display(f"completing load hit to MMU, data={m_out.data}")
 901                 pass
 902             # error cases complete without stalling
 903             with m.If(r1.mmu_error):
 904                 #Display("combpleting MMU ld with error")
 905                 pass
 906
 907             # Slow ops (i.e. load miss)
 908             with m.If(r1.slow_valid):
 909                 #Display("completing MMU load miss, data={m_out.data}")
 910                 pass
 911
 912     def rams(self, m, r1):
 913         """rams
 914         Generate a cache RAM for each way. This handles the normal
 915         reads, writes from reloads and the special store-hit update
 916         path as well.
 917
 918         Note: the BRAMs have an extra read buffer, meaning the output
 919         is pipelined an extra cycle. This differs from the
 920         icache. The writeback logic needs to take that into
 921         account by using 1-cycle delayed signals for load hits.
 922         """
 923         comb = m.d.comb
 924         wb_in = self.wb_in
 925
 926         for i in range(NUM_WAYS):
 927             do_read  = Signal()
 928             rd_addr  = Signal(ROW_BITS)
 929             do_write = Signal()
 930             wr_addr  = Signal(ROW_BITS)
 931             wr_data  = Signal(WB_DATA_BITS)
 932             wr_sel   = Signal(ROW_SIZE)
 933             wr_sel_m = Signal(ROW_SIZE)
 934             _d_out   = Signal(WB_DATA_BITS)
 935
 936             way = CacheRam(ROW_BITS, WB_DATA_BITS, True)
 937             setattr(m.submodules, "cacheram_%d" % i, way)
 938
 939             comb += way.rd_en.eq(do_read)
 940             comb += way.rd_addr.eq(rd_addr)
 941             comb += _d_out.eq(way.rd_data)
 942             comb += way.wr_sel.eq(wr_sel_m)
 943             comb += way.wr_addr.eq(wr_addr)
 944             comb += way.wr_data.eq(wr_data)
 945
 946             # Cache hit reads
 947             comb += do_read.eq(1)
 948             comb += rd_addr.eq(early_req_row)
 949             comb += cache_out[i].eq(_d_out)
 950
 951             # Write mux:
 952             #
 953             # Defaults to wishbone read responses (cache refill)
 954             #
 955             # For timing, the mux on wr_data/sel/addr is not
 956             # dependent on anything other than the current state.
 957
 958             with m.If(r1.write_bram):
 959                 # Write store data to BRAM.  This happens one
 960                 # cycle after the store is in r0.
 961                 comb += wr_data.eq(r1.req.data)
 962                 comb += wr_sel.eq(r1.req.byte_sel)
 963                 comb += wr_addr.eq(get_row(r1.req.real_addr))
 964
 965                 with m.If(i == r1.req.hit_way):
 966                     comb += do_write.eq(1)
 967             with m.Else():
 968                 # Otherwise, we might be doing a reload or a DCBZ
 969                 with m.If(r1.dcbz):
 970                     comb += wr_data.eq(0)
 971                 with m.Else():
 972                     comb += wr_data.eq(wb_in.dat)
 973                 comb += wr_addr.eq(r1.store_row)
 974                 comb += wr_sel.eq(~0) # all 1s
 975
 976             with m.If((r1.state == State.RELOAD_WAIT_ACK)
 977                       & wb_in.ack & (replace_way == i)):
 978                 comb += do_write.eq(1)
 979
 980                 # Mask write selects with do_write since BRAM
 981                 # doesn't have a global write-enable
 982                 with m.If(do_write):
 983                     comb += wr_sel_m.eq(wr_sel)
 984
 985     # Cache hit synchronous machine for the easy case.
 986     # This handles load hits.
 987     # It also handles error cases (TLB miss, cache paradox)
 988     def dcache_fast_hit(self, m, req_op, r0_valid, r1):
 989
 990         comb = m.d.comb
 991         sync = m.d.sync
 992
 993         with m.If(req_op != Op.OP_NONE):
 994             #Display(f"op:{req_op} addr:{r0.req.addr} nc: {r0.req.nc}" \
 995             #      f"idx:{req_index} tag:{req_tag} way: {req_hit_way}"
 996             #     )
 997             pass
 998
 999         with m.If(r0_valid):
1000             sync += r1.mmu_req.eq(r0.mmu_req)
1001
1002         # Fast path for load/store hits.
1003         # Set signals for the writeback controls.
1004         sync += r1.hit_way.eq(req_hit_way)
1005         sync += r1.hit_index.eq(req_index)
1006
1007         with m.If(req_op == Op.OP_LOAD_HIT):
1008             sync += r1.hit_load_valid.eq(1)
1009         with m.Else():
1010             sync += r1.hit_load_valid.eq(0)
1011
1012         with m.If((req_op == Op.OP_LOAD_HIT) | (req_op == Op.OP_STORE_HIT)):
1013             sync += r1.cache_hit.eq(1)
1014         with m.Else():
1015             sync += r1.cache_hit.eq(0)
1016
1017         with m.If(req_op == Op.OP_BAD):
1018             # Display(f"Signalling ld/st error valid_ra={valid_ra}"
1019             #      f"rc_ok={rc_ok} perm_ok={perm_ok}"
1020             sync += r1.ls_error.eq(~r0.mmu_req)
1021             sync += r1.mmu_error.eq(r0.mmu_req)
1022             sync += r1.cache_paradox.eq(access_ok)
1023
1024             with m.Else():
1025                 sync += r1.ls_error.eq(0)
1026                 sync += r1.mmu_error.eq(0)
1027                 sync += r1.cache_paradox.eq(0)
1028
1029         with m.If(req_op == Op.OP_STCX_FAIL):
1030             r1.stcx_fail.eq(1)
1031         with m.Else():
1032             sync += r1.stcx_fail.eq(0)
1033
1034         # Record TLB hit information for updating TLB PLRU
1035         sync += r1.tlb_hit.eq(tlb_hit)
1036         sync += r1.tlb_hit_way.eq(tlb_hit_way)
1037         sync += r1.tlb_hit_index.eq(tlb_req_index)
1038
1039     # Memory accesses are handled by this state machine:
1040     #
1041     #   * Cache load miss/reload (in conjunction with "rams")
1042     #   * Load hits for non-cachable forms
1043     #   * Stores (the collision case is handled in "rams")
1044     #
1045     # All wishbone requests generation is done here.
1046     # This machine operates at stage 1.
1047     def dcache_slow(self, m, r1, use_forward1_next, cache_valid_bits, r0,
1048                     r0_valid, req_op, cache_tag, req_go, ra):
1049
1050         comb = m.d.comb
1051         sync = m.d.sync
1052         wb_in = self.wb_i
1053
1054         req         = MemAccessRequest()
1055         acks        = Signal(3)
1056         adjust_acks = Signal(3)
1057
1058         sync += r1.use_forward1.eq(use_forward1_next)
1059         sync += r1.forward_sel.eq(0)
1060
1061         with m.If(use_forward1_next):
1062             sync += r1.forward_sel.eq(r1.req.byte_sel)
1063         with m.Elif(use_forward2_next):
1064             sync += r1.forward_sel.eq(r1.forward_sel1)
1065
1066         sync += r1.forward_data2.eq(r1.forward_data1)
1067         with m.If(r1.write_bram):
1068             sync += r1.forward_data1.eq(r1.req.data)
1069             sync += r1.forward_sel1.eq(r1.req.byte_sel)
1070             sync += r1.forward_way1.eq(r1.req.hit_way)
1071             sync += r1.forward_row1.eq(get_row(r1.req.real_addr))
1072             sync += r1.forward_valid1.eq(1)
1073         with m.Else():
1074             with m.If(r1.bcbz):
1075                 sync += r1.forward_data1.eq(0)
1076             with m.Else():
1077                 sync += r1.forward_data1.eq(wb_in.dat)
1078             sync += r1.forward_sel1.eq(~0) # all 1s
1079             sync += r1.forward_way1.eq(replace_way)
1080             sync += r1.forward_row1.eq(r1.store_row)
1081             sync += r1.forward_valid1.eq(0)
1082
1083         # One cycle pulses reset
1084         sync += r1.slow_valid.eq(0)
1085         sync += r1.write_bram.eq(0)
1086         sync += r1.inc_acks.eq(0)
1087         sync += r1.dec_acks.eq(0)
1088
1089         sync += r1.ls_valid.eq(0)
1090         # complete tlbies and TLB loads in the third cycle
1091         sync += r1.mmu_done.eq(r0_valid & (r0.tlbie | r0.tlbld))
1092
1093         with m.If((req_op == Op.OP_LOAD_HIT)
1094                   | (req_op == Op.OP_STCX_FAIL)):
1095             with m.If(~r0.mmu_req):
1096                 sync += r1.ls_valid.eq(1)
1097             with m.Else():
1098                 sync += r1.mmu_done.eq(1)
1099
1100         with m.If(r1.write_tag):
1101             # Store new tag in selected way
1102             for i in range(NUM_WAYS):
1103                 with m.If(i == replace_way):
1104                     idx = r1.store_index
1105                     trange = range(i * TAG_WIDTH, (i+1) * TAG_WIDTH)
1106                     sync += cache_tag[idx][trange].eq(r1.reload_tag)
1107             sync += r1.store_way.eq(replace_way)
1108             sync += r1.write_tag.eq(0)
1109
1110         # Take request from r1.req if there is one there,
1111         # else from req_op, ra, etc.
1112         with m.If(r1.full)
1113             comb += req.eq(r1.req)
1114         with m.Else():
1115             comb += req.op.eq(req_op)
1116             comb += req.valid.eq(req_go)
1117             comb += req.mmu_req.eq(r0.mmu_req)
1118             comb += req.dcbz.eq(r0.req.dcbz)
1119             comb += req.real_addr.eq(ra)
1120
1121             with m.If(~r0.req.dcbz):
1122                 comb += req.data.eq(r0.req.data)
1123             with m.Else():
1124                 comb += req.data.eq(0)
1125
1126             # Select all bytes for dcbz
1127             # and for cacheable loads
1128             with m.If(r0.req.dcbz | (r0.req.load & ~r0.req.nc):
1129                 comb += req.byte_sel.eq(~0) # all 1s
1130             with m.Else():
1131                 comb += req.byte_sel.eq(r0.req.byte_sel)
1132             comb += req.hit_way.eq(req_hit_way)
1133             comb += req.same_tag.eq(req_same_tag)
1134
1135             # Store the incoming request from r0,
1136             # if it is a slow request
1137             # Note that r1.full = 1 implies req_op = OP_NONE
1138             with m.If((req_op == Op.OP_LOAD_MISS)
1139                       | (req_op == Op.OP_LOAD_NC)
1140                       | (req_op == Op.OP_STORE_MISS)
1141                       | (req_op == Op.OP_STORE_HIT)):
1142                 sync += r1.req(req)
1143                 sync += r1.full.eq(1)
1144
1145         # Main state machine
1146         with m.Switch(r1.state):
1147
1148             with m.Case(State.IDLE)
1149 # XXX check 'left downto.  probably means len(r1.wb.adr)
1150 #                     r1.wb.adr <= req.real_addr(
1151 #                                   r1.wb.adr'left downto 0
1152 #                                  );
1153                 sync += r1.wb.adr.eq(req.real_addr[0:r1.wb.adr])
1154                 sync += r1.wb.sel.eq(req.byte_sel)
1155                 sync += r1.wb.dat.eq(req.data)
1156                 sync += r1.dcbz.eq(req.dcbz)
1157
1158                 # Keep track of our index and way
1159                 # for subsequent stores.
1160                 sync += r1.store_index.eq(get_index(req.real_addr))
1161                 sync += r1.store_row.eq(get_row(req.real_addr))
1162                 sync += r1.end_row_ix.eq(
1163                          get_row_of_line(get_row(req.real_addr))
1164                         )
1165                 sync += r1.reload_tag.eq(get_tag(req.real_addr))
1166                 sync += r1.req.same_tag.eq(1)
1167
1168                 with m.If(req.op == Op.OP_STORE_HIT):
1169                     sync += r1.store_way.eq(req.hit_way)
1170
1171                 # Reset per-row valid bits,
1172                 # ready for handling OP_LOAD_MISS
1173                 for i in range(ROW_PER_LINE):
1174                     sync += r1.rows_valid[i].eq(0)
1175
1176                 with m.Switch(req.op):
1177                     with m.Case(Op.OP_LOAD_HIT):
1178                         # stay in IDLE state
1179                         pass
1180
1181                     with m.Case(Op.OP_LOAD_MISS):
1182                         #Display(f"cache miss real addr:" \
1183                         #      f"{req_real_addr}" \
1184                         #      f" idx:{get_index(req_real_addr)}" \
1185                         #      f" tag:{get_tag(req.real_addr)}")
1186                         pass
1187
1188                         # Start the wishbone cycle
1189                         sync += r1.wb.we.eq(0)
1190                         sync += r1.wb.cyc.eq(1)
1191                         sync += r1.wb.stb.eq(1)
1192
1193                         # Track that we had one request sent
1194                         sync += r1.state.eq(State.RELOAD_WAIT_ACK)
1195                         sync += r1.write_tag.eq(1)
1196
1197                     with m.Case(Op.OP_LOAD_NC):
1198                         sync += r1.wb.cyc.eq(1)
1199                         sync += r1.wb.stb.eq(1)
1200                         sync += r1.wb.we.eq(0)
1201                         sync += r1.state.eq(State.NC_LOAD_WAIT_ACK)
1202
1203                     with m.Case(Op.OP_STORE_HIT, Op.OP_STORE_MISS):
1204                         with m.If(~req.bcbz):
1205                             sync += r1.state.eq(State.STORE_WAIT_ACK)
1206                             sync += r1.acks_pending.eq(1)
1207                             sync += r1.full.eq(0)
1208                             sync += r1.slow_valid.eq(1)
1209
1210                             with m.If(~req.mmu_req):
1211                                 sync += r1.ls_valid.eq(1)
1212                             with m.Else():
1213                                 sync += r1.mmu_done.eq(1)
1214
1215                             with m.If(req.op == Op.OP_STORE_HIT):
1216                                 sync += r1.write_bram.eq(1)
1217                         with m.Else():
1218                             sync += r1.state.eq(Op.RELOAD_WAIT_ACK)
1219
1220                             with m.If(req.op == Op.OP_STORE_MISS):
1221                                 sync += r1.write_tag.eq(1)
1222
1223                         sync += r1.wb.we.eq(1)
1224                         sync += r1.wb.cyc.eq(1)
1225                         sync += r1.wb.stb.eq(1)
1226
1227                     # OP_NONE and OP_BAD do nothing
1228                     # OP_BAD & OP_STCX_FAIL were
1229                     # handled above already
1230                     with m.Case(Op.OP_NONE):
1231                         pass
1232                     with m.Case(OP_BAD):
1233                         pass
1234                     with m.Case(OP_STCX_FAIL):
1235                         pass
1236
1237             with m.Case(State.RELOAD_WAIT_ACK):
1238                 # Requests are all sent if stb is 0
1239                 comb += stbs_done.eq(~r1.wb.stb)
1240
1241                 with m.If(~wb_in.stall & ~stbs_done):
1242                     # That was the last word?
1243                     # We are done sending.
1244                     # Clear stb and set stbs_done
1245                     # so we can handle an eventual
1246                     # last ack on the same cycle.
1247                     with m.If(is_last_row_addr(
1248                               r1.wb.adr, r1.end_row_ix)):
1249                         sync += r1.wb.stb.eq(0)
1250                         comb += stbs_done.eq(0)
1251
1252                     # Calculate the next row address
1253                     sync += r1.wb.adr.eq(next_row_addr(r1.wb.adr))
1254
1255                 # Incoming acks processing
1256                 sync += r1.forward_valid1.eq(wb_in.ack)
1257                 with m.If(wb_in.ack):
1258                     # XXX needs an Array bit-accessor here
1259                     sync += r1.rows_valid[r1.store_row % ROW_PER_LINE].eq(1)
1260
1261                     # If this is the data we were looking for,
1262                     # we can complete the request next cycle.
1263                     # Compare the whole address in case the
1264                     # request in r1.req is not the one that
1265                     # started this refill.
1266                     with m.If(r1.full & r1.req.same_tag &
1267                               ((r1.dcbz & r1.req.dcbz) |
1268                                (~r1.dcbz & (r1.req.op == Op.OP_LOAD_MISS))) &
1269                                 (r1.store_row == get_row(r1.req.real_addr))):
1270                         sync += r1.full.eq(0)
1271                         sync += r1.slow_valid.eq(1)
1272                             with m.If(~r1.mmu_req):
1273                                 sync += r1.ls_valid.eq(1)
1274                             with m.Else():
1275                                 sync += r1.mmu_done.eq(1)
1276                         sync += r1.forward_sel.eq(~0) # all 1s
1277                         sync += r1.use_forward1.eq(1)
1278
1279                     # Check for completion
1280                     with m.If(stbs_done & is_last_row(r1.store_row,
1281                                                       r1.end_row_ix)):
1282                         # Complete wishbone cycle
1283                         sync += r1.wb.cyc.eq(0)
1284
1285                         # Cache line is now valid
1286                         cv = cache_valid_bits[r1.store_index]
1287                         sync += cv[r1.store_way].eq(1)
1288                         sync += r1.state.eq(State.IDLE)
1289
1290                     # Increment store row counter
1291                     sync += r1.store_row.eq(next_row(r1.store_row))
1292
1293             with m.Case(State.STORE_WAIT_ACK):
1294                 comb += stbs_done.eq(~r1.wb.stb)
1295                 comb += acks.eq(r1.acks_pending)
1296
1297                 with m.If(r1.inc_acks != r1.dec_acks):
1298                     with m.If(r1.inc_acks):
1299                         comb += adjust_acks.eq(acks + 1)
1300                     with m.Else():
1301                         comb += adjust_acks.eq(acks - 1)
1302                 with m.Else():
1303                     comb += adjust_acks.eq(acks)
1304
1305                 sync += r1.acks_pending.eq(adjust_acks)
1306
1307                 # Clear stb when slave accepted request
1308                 with m.If(~wb_in.stall):
1309                     # See if there is another store waiting
1310                     # to be done which is in the same real page.
1311                     with m.If(req.valid):
1312                         ra = req.real_addr[0:SET_SIZE_BITS]
1313                         sync += r1.wb.adr[0:SET_SIZE_BITS].eq(ra)
1314                         sync += r1.wb.dat.eq(req.data)
1315                         sync += r1.wb.sel.eq(req.byte_sel)
1316
1317                     with m.Elif((adjust_acks < 7) & req.same_tag &
1318                                 ((req.op == Op.Op_STORE_MISS)
1319                                  | (req.op == Op.OP_SOTRE_HIT))):
1320                         sync += r1.wb.stb.eq(1)
1321                         comb += stbs_done.eq(0)
1322
1323                         with m.If(req.op == Op.OP_STORE_HIT):
1324                             sync += r1.write_bram.eq(1)
1325                         sync += r1.full.eq(0)
1326                         sync += r1.slow_valid.eq(1)
1327
1328                         # Store requests never come from the MMU
1329                         sync += r1.ls_valid.eq(1)
1330                         comb += stbs_done.eq(0)
1331                         sync += r1.inc_acks.eq(1)
1332                     with m.Else():
1333                         sync += r1.wb.stb.eq(0)
1334                         comb += stbs_done.eq(1)
1335
1336                 # Got ack ? See if complete.
1337                 with m.If(wb_in.ack):
1338                     with m.If(stbs_done & (adjust_acks == 1))
1339                         sync += r1.state.eq(State.IDLE)
1340                         sync += r1.wb.cyc.eq(0)
1341                         sync += r1.wb.stb.eq(0)
1342                     sync += r1.dec_acks.eq(1)
1343
1344             with m.Case(State.NC_LOAD_WAIT_ACK):
1345                 # Clear stb when slave accepted request
1346                 with m.If(~wb_in.stall):
1347                     sync += r1.wb.stb.eq(0)
1348
1349                 # Got ack ? complete.
1350                 with m.If(wb_in.ack):
1351                     sync += r1.state.eq(State.IDLE)
1352                     sync += r1.full.eq(0)
1353                     sync += r1.slow_valid.eq(1)
1354
1355                     with m.If(~r1.mmu_req):
1356                         sync += r1.ls_valid.eq(1)
1357                     with m.Else():
1358                         sync += r1.mmu_done.eq(1)
1359
1360                     sync += r1.forward_sel.eq(~0) # all 1s
1361                     sync += r1.use_forward1.eq(1)
1362                     sync += r1.wb.cyc.eq(0)
1363                     sync += r1.wb.stb.eq(0)
1364
1365     def dcache_log(self, m, r1, valid_ra, tlb_hit_way, stall_out):
1366
1367         sync = m.d.sync
1368         d_out, wb_in, log_out = self.d_out, self.wb_in, self.log_out
1369
1370         sync += log_out.eq(Cat(r1.state[:3], valid_ra, tlb_hit_way[:3],
1371                                stall_out, req_op[:3], d_out.valid, d_out.error,
1372                                r1.wb.cyc, r1.wb.stb, wb_in.ack, wb_in.stall,
1373                                r1.wb.adr[3:6]))
1374
1375     def elaborate(self, platform):
1376
1377         # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
1378         cache_tags       = CacheTagArray()
1379         cache_tag_set    = Signal(TAG_RAM_WIDTH)
1380         cache_valid_bits = CacheValidBitsArray()
1381
1382         # TODO attribute ram_style : string;
1383         # TODO attribute ram_style of cache_tags : signal is "distributed";
1384
1385         """note: these are passed to nmigen.hdl.Memory as "attributes".
1386            don't know how, just that they are.
1387         """
1388         dtlb_valid_bits = TLBValidBitsArray()
1389         dtlb_tags       = TLBTagsArray()
1390         dtlb_ptes       = TLBPtesArray()
1391         # TODO attribute ram_style of
1392         #  dtlb_tags : signal is "distributed";
1393         # TODO attribute ram_style of
1394         #  dtlb_ptes : signal is "distributed";
1395
1396         r0      = RegStage0()
1397         r0_full = Signal()
1398
1399         r1 = RegStage1()
1400
1401         reservation = Reservation()
1402
1403         # Async signals on incoming request
1404         req_index    = Signal(INDEX_BITS)
1405         req_row      = Signal(ROW_BITS)
1406         req_hit_way  = Signal(WAY_BITS)
1407         req_tag      = Signal(TAG_BITS)
1408         req_op       = Op()
1409         req_data     = Signal(64)
1410         req_same_tag = Signal()
1411         req_go       = Signal()
1412
1413         early_req_row     = Signal(ROW_BITS)
1414
1415         cancel_store      = Signal()
1416         set_rsrv          = Signal()
1417         clear_rsrv        = Signal()
1418
1419         r0_valid          = Signal()
1420         r0_stall          = Signal()
1421
1422         use_forward1_next = Signal()
1423         use_forward2_next = Signal()
1424
1425         cache_out         = CacheRamOut()
1426
1427         plru_victim       = PLRUOut()
1428         replace_way       = Signal(WAY_BITS)
1429
1430         # Wishbone read/write/cache write formatting signals
1431         bus_sel           = Signal(8)
1432
1433         # TLB signals
1434         tlb_tag_way   = Signal(TLB_TAG_WAY_BITS)
1435         tlb_pte_way   = Signal(TLB_PTE_WAY_BITS)
1436         tlb_valid_way = Signal(TLB_NUM_WAYS)
1437         tlb_req_index = Signal(TLB_SET_BITS)
1438         tlb_hit       = Signal()
1439         tlb_hit_way   = Signal(TLB_WAY_BITS)
1440         pte           = Signal(TLB_PTE_BITS)
1441         ra            = Signal(REAL_ADDR_BITS)
1442         valid_ra      = Signal()
1443         perm_attr     = PermAttr()
1444         rc_ok         = Signal()
1445         perm_ok       = Signal()
1446         access_ok     = Signal()
1447
1448         tlb_plru_victim = TLBPLRUOut()
1449
1450         # we don't yet handle collisions between loadstore1 requests
1451         # and MMU requests
1452         comb += m_out.stall.eq(0)
1453
1454         # Hold off the request in r0 when r1 has an uncompleted request
1455         comb += r0_stall.eq(r0_full & r1.full)
1456         comb += r0_valid.eq(r0_full & ~r1.full)
1457         comb += stall_out.eq(r0_stall)
1458
1459         # Wire up wishbone request latch out of stage 1
1460         comb += self.wb_out.eq(r1.wb)
1461
1462         # call sub-functions putting everything together, using shared
1463         # signals established above
1464         self.stage_0(m)
1465         self.tlb_read(m, r0_stall, tlb_valid_way,
1466                       tlb_tag_way, tlb_pte_way, dtlb_valid_bits,
1467                       dtlb_tags, dtlb_ptes)
1468         self.tlb_search(self, tlb_req_index, r0, tlb_valid_way_ tlb_tag_way,
1469                         tlb_pte_way, pte, tlb_hit, valid_ra, perm_attr, ra)
1470         self.tlb_update(m, r0_valid, r0, dtlb_valid_bits, tlb_req_index,
1471                         tlb_hit_way, tlb_hit, tlb_plru_victim, tlb_tag_way,
1472                         dtlb_tags, tlb_pte_way, dtlb_ptes, dtlb_valid_bits)
1473         self.maybe_plrus(r1)
1474         self.cache_tag_read(m, r0_stall, req_index, cache_tag_set, cache_tags)
1475         self.dcache_request(m, r0, ra, req_index, req_row, req_tag,
1476                            r0_valid, r1, cache_valid_bits, replace_way,
1477                            use_forward1_next, use_forward2_next,
1478                            req_hit_way, plru_victim, rc_ok, perm_attr,
1479                            valid_ra, perm_ok, access_ok, req_op, req_ok,
1480                            r0_stall, early_req_row)
1481         self.reservation_comb(m, cancel_store, set_rsrv, clear_rsrv,
1482                            r0_valid, r0, reservation)
1483         self.reservation_reg(m, r0_valid, access_ok, clear_rsrv,
1484                            reservation, r0)
1485         self.writeback_control(m, r1, cache_out)
1486         self.rams(m, r1)
1487         self.dcache_fast_hit(m, req_op, r0_valid, r1)
1488         self.dcache_slow(m, r1, use_forward1_next, cache_valid_bits, r0,
1489                          r0_valid, req_op, cache_tag, req_go, ra)
1490         #self.dcache_log(m, r1, valid_ra, tlb_hit_way, stall_out)
1491
1492
1493 # dcache_tb.vhdl
1494 #
1495 # entity dcache_tb is
1496 # end dcache_tb;
1497 #
1498 # architecture behave of dcache_tb is
1499 #     signal clk          : std_ulogic;
1500 #     signal rst          : std_ulogic;
1501 #
1502 #     signal d_in         : Loadstore1ToDcacheType;
1503 #     signal d_out        : DcacheToLoadstore1Type;
1504 #
1505 #     signal m_in         : MmuToDcacheType;
1506 #     signal m_out        : DcacheToMmuType;
1507 #
1508 #     signal wb_bram_in   : wishbone_master_out;
1509 #     signal wb_bram_out  : wishbone_slave_out;
1510 #
1511 #     constant clk_period : time := 10 ns;
1512 # begin
1513 #     dcache0: entity work.dcache
1514 #         generic map(
1515 #
1516 #             LINE_SIZE => 64,
1517 #             NUM_LINES => 4
1518 #             )
1519 #         port map(
1520 #             clk => clk,
1521 #             rst => rst,
1522 #             d_in => d_in,
1523 #             d_out => d_out,
1524 #             m_in => m_in,
1525 #             m_out => m_out,
1526 #             wishbone_out => wb_bram_in,
1527 #             wishbone_in => wb_bram_out
1528 #             );
1529 #
1530 #     -- BRAM Memory slave
1531 #     bram0: entity work.wishbone_bram_wrapper
1532 #         generic map(
1533 #             MEMORY_SIZE   => 1024,
1534 #             RAM_INIT_FILE => "icache_test.bin"
1535 #             )
1536 #         port map(
1537 #             clk => clk,
1538 #             rst => rst,
1539 #             wishbone_in => wb_bram_in,
1540 #             wishbone_out => wb_bram_out
1541 #             );
1542 #
1543 #     clk_process: process
1544 #     begin
1545 #         clk <= '0';
1546 #         wait for clk_period/2;
1547 #         clk <= '1';
1548 #         wait for clk_period/2;
1549 #     end process;
1550 #
1551 #     rst_process: process
1552 #     begin
1553 #         rst <= '1';
1554 #         wait for 2*clk_period;
1555 #         rst <= '0';
1556 #         wait;
1557 #     end process;
1558 #
1559 #     stim: process
1560 #     begin
1561 #     -- Clear stuff
1562 #     d_in.valid <= '0';
1563 #     d_in.load <= '0';
1564 #     d_in.nc <= '0';
1565 #     d_in.addr <= (others => '0');
1566 #     d_in.data <= (others => '0');
1567 #         m_in.valid <= '0';
1568 #         m_in.addr <= (others => '0');
1569 #         m_in.pte <= (others => '0');
1570 #
1571 #         wait for 4*clk_period;
1572 #     wait until rising_edge(clk);
1573 #
1574 #     -- Cacheable read of address 4
1575 #     d_in.load <= '1';
1576 #     d_in.nc <= '0';
1577 #         d_in.addr <= x"0000000000000004";
1578 #         d_in.valid <= '1';
1579 #     wait until rising_edge(clk);
1580 #         d_in.valid <= '0';
1581 #
1582 #     wait until rising_edge(clk) and d_out.valid = '1';
1583 #         assert d_out.data = x"0000000100000000"
1584 #         report "data @" & to_hstring(d_in.addr) &
1585 #         "=" & to_hstring(d_out.data) &
1586 #         " expected 0000000100000000"
1587 #         severity failure;
1588 # --      wait for clk_period;
1589 #
1590 #     -- Cacheable read of address 30
1591 #     d_in.load <= '1';
1592 #     d_in.nc <= '0';
1593 #         d_in.addr <= x"0000000000000030";
1594 #         d_in.valid <= '1';
1595 #     wait until rising_edge(clk);
1596 #         d_in.valid <= '0';
1597 #
1598 #     wait until rising_edge(clk) and d_out.valid = '1';
1599 #         assert d_out.data = x"0000000D0000000C"
1600 #         report "data @" & to_hstring(d_in.addr) &
1601 #         "=" & to_hstring(d_out.data) &
1602 #         " expected 0000000D0000000C"
1603 #         severity failure;
1604 #
1605 #     -- Non-cacheable read of address 100
1606 #     d_in.load <= '1';
1607 #     d_in.nc <= '1';
1608 #         d_in.addr <= x"0000000000000100";
1609 #         d_in.valid <= '1';
1610 #     wait until rising_edge(clk);
1611 #     d_in.valid <= '0';
1612 #     wait until rising_edge(clk) and d_out.valid = '1';
1613 #         assert d_out.data = x"0000004100000040"
1614 #         report "data @" & to_hstring(d_in.addr) &
1615 #         "=" & to_hstring(d_out.data) &
1616 #         " expected 0000004100000040"
1617 #         severity failure;
1618 #
1619 #     wait until rising_edge(clk);
1620 #     wait until rising_edge(clk);
1621 #     wait until rising_edge(clk);
1622 #     wait until rising_edge(clk);
1623 #
1624 #     std.env.finish;
1625 #     end process;
1626 # end;
1627 def dcache_sim(dut):
1628     # clear stuff
1629     yield dut.d_in.valid.eq(0)
1630     yield dut.d_in.load.eq(0)
1631     yield dut.d_in.nc.eq(0)
1632     yield dut.d_in.adrr.eq(0)
1633     yield dut.d_in.data.eq(0)
1634     yield dut.m_in.valid.eq(0)
1635     yield dut.m_in.addr.eq(0)
1636     yield dut.m_in.pte.eq(0)
1637     # wait 4 * clk_period
1638     yield
1639     yield
1640     yield
1641     yield
1642     # wait_until rising_edge(clk)
1643     yield
1644     # Cacheable read of address 4
1645     yield dut.d_in.load.eq(1)
1646     yield dut.d_in.nc.eq(0)
1647     yield dut.d_in.addr.eq(Const(0x0000000000000004, 64))
1648     yield dut.d_in.valid.eq(1)
1649     # wait-until rising_edge(clk)
1650     yield
1651     yield dut.d_in.valid.eq(0)
1652     yield
1653     while not (yield dut.d_out.valid):
1654         yield
1655     assert dut.d_out.data == Const(0x0000000100000000, 64) f"data @" \
1656         f"{dut.d_in.addr}={dut.d_in.data} expected 0000000100000000" \
1657         " -!- severity failure"
1658
1659
1660     # Cacheable read of address 30
1661     yield dut.d_in.load.eq(1)
1662     yield dut.d_in.nc.eq(0)
1663     yield dut.d_in.addr.eq(Const(0x0000000000000030, 64))
1664     yield dut.d_in.valid.eq(1)
1665     yield
1666     yield dut.d_in.valid.eq(0)
1667     yield
1668     while not (yield dut.d_out.valid):
1669         yield
1670     assert dut.d_out.data == Const(0x0000000D0000000C, 64) f"data @" \
1671         f"{dut.d_in.addr}={dut.d_out.data} expected 0000000D0000000C" \
1672         f"-!- severity failure"
1673
1674     # Non-cacheable read of address 100
1675     yield dut.d_in.load.eq(1)
1676     yield dut.d_in.nc.eq(1)
1677     yield dut.d_in.addr.eq(Const(0x0000000000000100, 64))
1678     yield dut.d_in.valid.eq(1)
1679     yield
1680     yield dut.d_in.valid.eq(0)
1681     yield
1682     while not (yield dut.d_out.valid):
1683         yield
1684     assert dut.d_out.data == Const(0x0000004100000040, 64) f"data @" \
1685         f"{dut.d_in.addr}={dut.d_out.data} expected 0000004100000040" \
1686         f"-!- severity failure"
1687
1688     yield
1689     yield
1690     yield
1691     yield
1692
1693
1694 def test_dcache():
1695     dut = DCache()
1696     vl = rtlil.convert(dut, ports=[])
1697     with open("test_dcache.il", "w") as f:
1698         f.write(vl)
1699
1700     run_simulation(dut, dcache_sim(), vcd_name='test_dcache.vcd')
1701
1702 if __name__ == '__main__':
1703     test_dcache()
1704