src/soc/experiment/dcache.py

   1 """DCache
   2
   3 based on Anton Blanchard microwatt dcache.vhdl
   4
   5 """
   6
   7 from enum import Enum, unique
   8
   9 from nmigen import Module, Signal, Elaboratable,
  10                    Cat, Repl
  11 from nmigen.cli import main
  12 from nmigen.iocontrol import RecordObject
  13 from nmigen.util import log2_int
  14
  15 from soc.experiment.mem_types import LoadStore1ToDCacheType,
  16                                  DCacheToLoadStore1Type,
  17                                  MMUToDCacheType,
  18                                  DCacheToMMUType
  19
  20 from soc.experiment.wb_types import WB_ADDR_BITS, WB_DATA_BITS, WB_SEL_BITS,
  21                                 WBAddrType, WBDataType, WBSelType,
  22                                 WbMasterOut, WBSlaveOut,
  23                                 WBMasterOutVector, WBSlaveOutVector,
  24                                 WBIOMasterOut, WBIOSlaveOut
  25
  26 from soc.experiment.cache_ram import CacheRam
  27
  28 # TODO: make these parameters of DCache at some point
  29 LINE_SIZE = 64    # Line size in bytes
  30 NUM_LINES = 32    # Number of lines in a set
  31 NUM_WAYS = 4      # Number of ways
  32 TLB_SET_SIZE = 64 # L1 DTLB entries per set
  33 TLB_NUM_WAYS = 2  # L1 DTLB number of sets
  34 TLB_LG_PGSZ = 12  # L1 DTLB log_2(page_size)
  35 LOG_LENGTH = 0    # Non-zero to enable log data collection
  36
  37 # BRAM organisation: We never access more than
  38 #     -- wishbone_data_bits at a time so to save
  39 #     -- resources we make the array only that wide, and
  40 #     -- use consecutive indices for to make a cache "line"
  41 #     --
  42 #     -- ROW_SIZE is the width in bytes of the BRAM
  43 #     -- (based on WB, so 64-bits)
  44 ROW_SIZE = WB_DATA_BITS // 8;
  45
  46 # ROW_PER_LINE is the number of row (wishbone
  47 # transactions) in a line
  48 ROW_PER_LINE = LINE_SIZE // ROW_SIZE
  49
  50 # BRAM_ROWS is the number of rows in BRAM needed
  51 # to represent the full dcache
  52 BRAM_ROWS = NUM_LINES * ROW_PER_LINE
  53
  54
  55 # Bit fields counts in the address
  56
  57 # REAL_ADDR_BITS is the number of real address
  58 # bits that we store
  59 REAL_ADDR_BITS = 56
  60
  61 # ROW_BITS is the number of bits to select a row
  62 ROW_BITS = log2_int(BRAM_ROWS)
  63
  64 # ROW_LINE_BITS is the number of bits to select
  65 # a row within a line
  66 ROW_LINE_BITS = log2_int(ROW_PER_LINE)
  67
  68 # LINE_OFF_BITS is the number of bits for
  69 # the offset in a cache line
  70 LINE_OFF_BITS = log2_int(LINE_SIZE)
  71
  72 # ROW_OFF_BITS is the number of bits for
  73 # the offset in a row
  74 ROW_OFF_BITS = log2_int(ROW_SIZE)
  75
  76 # INDEX_BITS is the number if bits to
  77 # select a cache line
  78 INDEX_BITS = log2_int(NUM_LINES)
  79
  80 # SET_SIZE_BITS is the log base 2 of the set size
  81 SET_SIZE_BITS = LINE_OFF_BITS + INDEX_BITS
  82
  83 # TAG_BITS is the number of bits of
  84 # the tag part of the address
  85 TAG_BITS = REAL_ADDR_BITS - SET_SIZE_BITS
  86
  87 # TAG_WIDTH is the width in bits of each way of the tag RAM
  88 TAG_WIDTH = TAG_BITS + 7 - ((TAG_BITS + 7) % 8)
  89
  90 # WAY_BITS is the number of bits to select a way
  91 WAY_BITS = log2_int(NUM_WAYS)
  92
  93 # Example of layout for 32 lines of 64 bytes:
  94 #
  95 # ..  tag    |index|  line  |
  96 # ..         |   row   |    |
  97 # ..         |     |---|    | ROW_LINE_BITS  (3)
  98 # ..         |     |--- - --| LINE_OFF_BITS (6)
  99 # ..         |         |- --| ROW_OFF_BITS  (3)
 100 # ..         |----- ---|    | ROW_BITS      (8)
 101 # ..         |-----|        | INDEX_BITS    (5)
 102 # .. --------|              | TAG_BITS      (45)
 103
 104 TAG_RAM_WIDTH = TAG_WIDTH * NUM_WAYS
 105
 106 def CacheTagArray():
 107     return Array(CacheTagSet() for x in range(NUM_LINES))
 108
 109 def CacheValidBitsArray():
 110     return Array(CacheWayValidBits() for x in range(NUM_LINES))
 111
 112 def RowPerLineValidArray():
 113     return Array(Signal() for x in range(ROW_PER_LINE))
 114
 115 # L1 TLB
 116 TLB_SET_BITS     = log2_int(TLB_SET_SIZE)
 117 TLB_WAY_BITS     = log2_int(TLB_NUM_WAYS)
 118 TLB_EA_TAG_BITS  = 64 - (TLB_LG_PGSZ + TLB_SET_BITS)
 119 TLB_TAG_WAY_BITS = TLB_NUM_WAYS * TLB_EA_TAG_BITS
 120 TLB_PTE_BITS     = 64
 121 TLB_PTE_WAY_BITS = TLB_NUM_WAYS * TLB_PTE_BITS;
 122
 123 assert (LINE_SIZE % ROW_SIZE) == 0, "LINE_SIZE not multiple of ROW_SIZE"
 124 assert (LINE_SIZE % 2) == 0, "LINE_SIZE not power of 2"
 125 assert (NUM_LINES % 2) == 0, "NUM_LINES not power of 2"
 126 assert (ROW_PER_LINE % 2) == 0, "ROW_PER_LINE not power of 2"
 127 assert ROW_BITS == (INDEX_BITS + ROW_LINE_BITS), "geometry bits don't add up"
 128 assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS), \
 129         "geometry bits don't add up"
 130 assert REAL_ADDR_BITS == (TAG_BITS + INDEX_BITS + LINE_OFF_BITS), \
 131         "geometry bits don't add up"
 132 assert REAL_ADDR_BITS == (TAG_BITS + ROW_BITS + ROW_OFF_BITS), \
 133          "geometry bits don't add up"
 134 assert 64 == wishbone_data_bits, "Can't yet handle wb width that isn't 64-bits"
 135 assert SET_SIZE_BITS <= TLB_LG_PGSZ, "Set indexed by virtual address"
 136
 137
 138 def TLBValidBitsArray():
 139     return Array(Signal(TLB_NUM_WAYS) for x in range(TLB_SET_SIZE))
 140
 141 def TLBTagsArray():
 142     return Array(Signal(TLB_TAG_WAY_BITS) for x in range (TLB_SET_SIZE))
 143
 144 def TLBPtesArray():
 145     return Array(Signal(TLB_PTE_WAY_BITS) for x in range(TLB_SET_SIZE))
 146
 147 def HitWaySet():
 148     return Array(Signal(NUM_WAYS) for x in range(TLB_NUM_WAYS))
 149
 150 # Cache RAM interface
 151 def CacheRamOut():
 152     return Array(Signal(WB_DATA_BITS) for x in range(NUM_WAYS))
 153
 154 # PLRU output interface
 155 def PLRUOut():
 156     return Array(Signal(WAY_BITS) for x in range(Index()))
 157
 158 # TLB PLRU output interface
 159 def TLBPLRUOut():
 160     return Array(Signal(TLB_WAY_BITS) for x in range(TLB_SET_SIZE))
 161
 162 # Helper functions to decode incoming requests
 163 #
 164 # Return the cache line index (tag index) for an address
 165 def get_index(addr):
 166     return addr[LINE_OFF_BITS:SET_SIZE_BITS]
 167
 168 # Return the cache row index (data memory) for an address
 169 def get_row(addr):
 170     return addr[ROW_OFF_BITS:SET_SIZE_BITS]
 171
 172 # Return the index of a row within a line
 173 def get_row_of_line(row):
 174     row_v = Signal(ROW_BITS)
 175     row_v = Signal(row)
 176     return row_v[0:ROW_LINE_BITS]
 177
 178 # Returns whether this is the last row of a line
 179 def is_last_row_addr(addr, last):
 180     return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
 181
 182 # Returns whether this is the last row of a line
 183 def is_last_row(row, last):
 184     return get_row_of_line(row) == last
 185
 186 # Return the address of the next row in the current cache line
 187 def next_row_addr(addr):
 188     row_idx = Signal(ROW_LINE_BITS)
 189     result  = WBAddrType()
 190     # Is there no simpler way in VHDL to
 191     # generate that 3 bits adder ?
 192     row_idx = addr[ROW_OFF_BITS:LINE_OFF_BITS]
 193     row_idx = Signal(row_idx + 1)
 194     result = addr
 195     result[ROW_OFF_BITS:LINE_OFF_BITS] = row_idx
 196     return result
 197
 198 # Return the next row in the current cache line. We use a
 199 # dedicated function in order to limit the size of the
 200 # generated adder to be only the bits within a cache line
 201 # (3 bits with default settings)
 202 def next_row(row)
 203     row_v = row[0:ROW_LINE_BITS] + 1
 204     return Cat(row_v[:ROW_LINE_BITS], row[ROW_LINE_BITS:])
 205
 206 # Get the tag value from the address
 207 def get_tag(addr):
 208     return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
 209
 210 # Read a tag from a tag memory row
 211 def read_tag(way, tagset):
 212     return tagset[way *TAG_WIDTH:way * TAG_WIDTH + TAG_BITS]
 213
 214 # Read a TLB tag from a TLB tag memory row
 215 def read_tlb_tag(way, tags):
 216     j = way * TLB_EA_TAG_BITS
 217     return tags[j:j + TLB_EA_TAG_BITS]
 218
 219 # Write a TLB tag to a TLB tag memory row
 220 def write_tlb_tag(way, tags), tag):
 221     j = way * TLB_EA_TAG_BITS
 222     tags[j:j + TLB_EA_TAG_BITS] = tag
 223
 224 # Read a PTE from a TLB PTE memory row
 225 def read_tlb_pte(way, ptes):
 226     j = way * TLB_PTE_BITS
 227     return ptes[j:j + TLB_PTE_BITS]
 228
 229 def write_tlb_pte(way, ptes,newpte):
 230     j = way * TLB_PTE_BITS
 231     return ptes[j:j + TLB_PTE_BITS].eq(newpte)
 232
 233
 234 # Record for storing permission, attribute, etc. bits from a PTE
 235 class PermAttr(RecordObject):
 236     def __init__(self):
 237         super().__init__()
 238         self.reference = Signal()
 239         self.changed   = Signal()
 240         self.nocache   = Signal()
 241         self.priv      = Signal()
 242         self.rd_perm   = Signal()
 243         self.wr_perm   = Signal()
 244
 245
 246 def extract_perm_attr(pte):
 247     pa = PermAttr()
 248     pa.reference = pte[8]
 249     pa.changed   = pte[7]
 250     pa.nocache   = pte[5]
 251     pa.priv      = pte[3]
 252     pa.rd_perm   = pte[2]
 253     pa.wr_perm   = pte[1]
 254     return pa;
 255
 256
 257 # Type of operation on a "valid" input
 258 @unique
 259 class Op(Enum):
 260     OP_NONE       = 0
 261     OP_BAD        = 1 # NC cache hit, TLB miss, prot/RC failure
 262     OP_STCX_FAIL  = 2 # conditional store w/o reservation
 263     OP_LOAD_HIT   = 3 # Cache hit on load
 264     OP_LOAD_MISS  = 4 # Load missing cache
 265     OP_LOAD_NC    = 5 # Non-cachable load
 266     OP_STORE_HIT  = 6 # Store hitting cache
 267     OP_STORE_MISS = 7 # Store missing cache
 268
 269
 270 # Cache state machine
 271 @unique
 272 class State(Enum):
 273     IDLE             = 0 # Normal load hit processing
 274     RELOAD_WAIT_ACK  = 1 # Cache reload wait ack
 275     STORE_WAIT_ACK   = 2 # Store wait ack
 276     NC_LOAD_WAIT_ACK = 3 # Non-cachable load wait ack
 277
 278
 279 # Dcache operations:
 280 #
 281 # In order to make timing, we use the BRAMs with
 282 # an output buffer, which means that the BRAM
 283 # output is delayed by an extra cycle.
 284 #
 285 # Thus, the dcache has a 2-stage internal pipeline
 286 # for cache hits with no stalls.
 287 #
 288 # All other operations are handled via stalling
 289 # in the first stage.
 290 #
 291 # The second stage can thus complete a hit at the same
 292 # time as the first stage emits a stall for a complex op.
 293 #
 294 # Stage 0 register, basically contains just the latched request
 295
 296 class RegStage0(RecordObject):
 297     def __init__(self):
 298         super().__init__()
 299         self.req     = LoadStore1ToDCacheType()
 300         self.tlbie   = Signal()
 301         self.doall   = Signal()
 302         self.tlbld   = Signal()
 303         self.mmu_req = Signal() # indicates source of request
 304
 305
 306 class MemAccessRequest(RecordObject):
 307     def __init__(self):
 308         super().__init__()
 309         self.op        = Op()
 310         self.valid     = Signal()
 311         self.dcbz      = Signal()
 312         self.real_addr = Signal(REAL_ADDR_BITS)
 313         self.data      = Signal(64)
 314         self.byte_sel  = Signal(8)
 315         self.hit_way   = Signal(WAY_BITS)
 316         self.same_tag  = Signal()
 317         self.mmu_req   = Signal()
 318
 319
 320 # First stage register, contains state for stage 1 of load hits
 321 # and for the state machine used by all other operations
 322 class RegStage1(RecordObject):
 323     def __init__(self):
 324         super().__init__()
 325         # Info about the request
 326         self.full             = Signal() # have uncompleted request
 327         self.mmu_req          = Signal() # request is from MMU
 328         self.req              = MemAccessRequest()
 329
 330         # Cache hit state
 331         self.hit_way          = Signal(WAY_BITS)
 332         self.hit_load_valid   = Signal()
 333         self.hit_index        = Signal(NUM_LINES)
 334         self.cache_hit        = Signal()
 335
 336         # TLB hit state
 337         self.tlb_hit          = Signal()
 338         self.tlb_hit_way      = Signal(TLB_NUM_WAYS)
 339         self.tlb_hit_index    = Signal(TLB_WAY_BITS)
 340
 341         # 2-stage data buffer for data forwarded from writes to reads
 342         self.forward_data1    = Signal(64)
 343         self.forward_data2    = Signal(64)
 344         self.forward_sel1     = Signal(8)
 345         self.forward_valid1   = Signal()
 346         self.forward_way1     = Signal(WAY_BITS)
 347         self.forward_row1     = Signal(ROW_BITS)
 348         self.use_forward1     = Signal()
 349         self.forward_sel      = Signal(8)
 350
 351         # Cache miss state (reload state machine)
 352         self.state            = State()
 353         self.dcbz             = Signal()
 354         self.write_bram       = Signal()
 355         self.write_tag        = Signal()
 356         self.slow_valid       = Signal()
 357         self.wb               = WishboneMasterOut()
 358         self.reload_tag       = Signal(TAG_BITS)
 359         self.store_way        = Signal(WAY_BITS)
 360         self.store_row        = Signal(ROW_BITS)
 361         self.store_index      = Signal(INDEX_BITS)
 362         self.end_row_ix       = Signal(log2_int(ROW_LINE_BITS))
 363         self.rows_valid       = RowPerLineValidArray()
 364         self.acks_pending     = Signal(3)
 365         self.inc_acks         = Signal()
 366         self.dec_acks         = Signal()
 367
 368         # Signals to complete (possibly with error)
 369         self.ls_valid         = Signal()
 370         self.ls_error         = Signal()
 371         self.mmu_done         = Signal()
 372         self.mmu_error        = Signal()
 373         self.cache_paradox    = Signal()
 374
 375         # Signal to complete a failed stcx.
 376         self.stcx_fail        = Signal()
 377
 378
 379 # Reservation information
 380 class Reservation(RecordObject):
 381     def __init__(self):
 382         super().__init__()
 383         self.valid = Signal()
 384         self.addr  = Signal(64-LINE_OFF_BITS)
 385
 386
 387 class DCache(Elaboratable):
 388     """Set associative dcache write-through
 389     TODO (in no specific order):
 390     * See list in icache.vhdl
 391     * Complete load misses on the cycle when WB data comes instead of
 392       at the end of line (this requires dealing with requests coming in
 393       while not idle...)
 394     """
 395     def __init__(self):
 396         self.d_in      = LoadStore1ToDCacheType()
 397         self.d_out     = DCacheToLoadStore1Type()
 398
 399         self.m_in      = MMUToDCacheType()
 400         self.m_out     = DCacheToMMUType()
 401
 402         self.stall_out = Signal()
 403
 404         self.wb_out    = WBMasterOut()
 405         self.wb_in     = WBSlaveOut()
 406
 407         self.log_out   = Signal(20)
 408
 409     def stage_0(self, m):
 410         """Latch the request in r0.req as long as we're not stalling
 411         """
 412         comb = m.d.comb
 413         sync = m.d.sync
 414         d_in, d_out = self.d_in, self.d_out
 415
 416         r = RegStage0()
 417
 418         # TODO, this goes in unit tests and formal proofs
 419         with m.If(~(d_in.valid & m_in.valid)):
 420             #sync += Display("request collision loadstore vs MMU")
 421             pass
 422
 423         with m.If(m_in.valid):
 424             sync += r.req.valid.eq(1)
 425             sync += r.req.load.eq(~(m_in.tlbie | m_in.tlbld))
 426             sync += r.req.dcbz.eq(0)
 427             sync += r.req.nc.eq(0)
 428             sync += r.req.reserve.eq(0)
 429             sync += r.req.virt_mode.eq(1)
 430             sync += r.req.priv_mode.eq(1)
 431             sync += r.req.addr.eq(m_in.addr)
 432             sync += r.req.data.eq(m_in.pte)
 433             sync += r.req.byte_sel.eq(~0) # Const -1 sets all to 0b111....
 434             sync += r.tlbie.eq(m_in.tlbie)
 435             sync += r.doall.eq(m_in.doall)
 436             sync += r.tlbld.eq(m_in.tlbld)
 437             sync += r.mmu_req.eq(1)
 438         with m.Else():
 439             sync += r.req.eq(d_in)
 440             sync += r.req.tlbie.eq(0)
 441             sync += r.req.doall.eq(0)
 442             sync += r.req.tlbd.eq(0)
 443             sync += r.req.mmu_req.eq(0)
 444             with m.If(~(r1.full & r0_full)):
 445                 sync += r0.eq(r)
 446                 sync += r0_full.eq(r.req.valid)
 447
 448     def tlb_read(self, m, r0_stall, tlb_valid_way,
 449                  tlb_tag_way, tlb_pte_way, dtlb_valid_bits,
 450                  dtlb_tags, dtlb_ptes):
 451         """TLB
 452         Operates in the second cycle on the request latched in r0.req.
 453         TLB updates write the entry at the end of the second cycle.
 454         """
 455         comb = m.d.comb
 456         sync = m.d.sync
 457         m_in, d_in = self.m_in, self.d_in
 458
 459         index    = Signal(TLB_SET_BITS)
 460         addrbits = Signal(TLB_SET_BITS)
 461
 462         amin = TLB_LG_PGSZ
 463         amax = TLB_LG_PGSZ + TLB_SET_BITS
 464
 465         with m.If(m_in.valid):
 466             comb += addrbits.eq(m_in.addr[amin : amax])
 467         with m.Else():
 468             comb += addrbits.eq(d_in.addr[amin : amax])
 469         comb += index.eq(addrbits)
 470
 471         # If we have any op and the previous op isn't finished,
 472         # then keep the same output for next cycle.
 473         with m.If(~r0_stall):
 474             sync += tlb_valid_way.eq(dtlb_valid_bits[index])
 475             sync += tlb_tag_way.eq(dtlb_tags[index])
 476             sync += tlb_pte_way.eq(dtlb_ptes[index])
 477
 478     def maybe_tlb_plrus(self, m, r1, tlb_plru_victim, acc, acc_en, lru):
 479         """Generate TLB PLRUs
 480         """
 481         comb = m.d.comb
 482         sync = m.d.sync
 483
 484         with m.If(TLB_NUM_WAYS > 1):
 485             for i in range(TLB_SET_SIZE):
 486                 # TLB PLRU interface
 487                 tlb_plru        = PLRU(TLB_WAY_BITS)
 488                 setattr(m.submodules, "maybe_plru_%d" % i, tlb_plru)
 489                 tlb_plru_acc    = Signal(TLB_WAY_BITS)
 490                 tlb_plru_acc_en = Signal()
 491                 tlb_plru_out    = Signal(TLB_WAY_BITS)
 492
 493                 comb += tlb_plru.acc.eq(tlb_plru_acc)
 494                 comb += tlb_plru.acc_en.eq(tlb_plru_acc_en)
 495                 comb += tlb_plru.lru.eq(tlb_plru_out)
 496
 497                 # PLRU interface
 498                 with m.If(r1.tlb_hit_index == i):
 499                     comb += tlb_plru.acc_en.eq(r1.tlb_hit)
 500                 with m.Else():
 501                     comb += tlb_plru.acc_en.eq(0)
 502                 comb += tlb_plru.acc.eq(r1.tlb_hit_way)
 503
 504                 comb += tlb_plru_victim[i].eq(tlb_plru.lru)
 505
 506     def tlb_search(self, m, tlb_req_index, r0, tlb_valid_way_ tlb_tag_way,
 507                    tlb_pte_way, pte, tlb_hit, valid_ra, perm_attr, ra):
 508
 509         comb = m.d.comb
 510         sync = m.d.sync
 511
 512         hitway = Signal(TLB_WAY_BITS)
 513         hit    = Signal()
 514         eatag  = Signal(TLB_EA_TAG_BITS)
 515
 516         TLB_LG_END = TLB_LG_PGSZ + TLB_SET_BITS
 517         comb += tlb_req_index.eq(r0.req.addr[TLB_LG_PGSZ : TLB_LG_END])
 518         comb += eatag.eq(r0.req.addr[TLB_LG_END : 64 ])
 519
 520         for i in range(TLB_NUM_WAYS):
 521             with m.If(tlb_valid_way(i)
 522                       & read_tlb_tag(i, tlb_tag_way) == eatag):
 523                 comb += hitway.eq(i)
 524                 comb += hit.eq(1)
 525
 526         comb += tlb_hit.eq(hit & r0_valid)
 527         comb += tlb_hit_way.eq(hitway)
 528
 529         with m.If(tlb_hit):
 530             comb += pte.eq(read_tlb_pte(hitway, tlb_pte_way))
 531         with m.Else():
 532             comb += pte.eq(0)
 533         comb += valid_ra.eq(tlb_hit | ~r0.req.virt_mode)
 534         with m.If(r0.req.virt_mode):
 535             comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
 536                               r0.req.addr[ROW_OFF_BITS:TLB_LG_PGSZ],
 537                               pte[TLB_LG_PGSZ:REAL_ADDR_BITS]))
 538             comb += perm_attr.eq(extract_perm_attr(pte))
 539         with m.Else():
 540             comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
 541                               r0.rq.addr[ROW_OFF_BITS:REAL_ADDR_BITS]))
 542
 543             comb += perm_attr.reference.eq(1)
 544             comb += perm_attr.changed.eq(1)
 545             comb += perm_attr.priv.eq(1)
 546             comb += perm_attr.nocache.eq(0)
 547             comb += perm_attr.rd_perm.eq(1)
 548             comb += perm_attr.wr_perm.eq(1)
 549
 550     def tlb_update(self, m, r0_valid, r0, dtlb_valid_bits, tlb_req_index,
 551                     tlb_hit_way, tlb_hit, tlb_plru_victim, tlb_tag_way,
 552                     dtlb_tags, tlb_pte_way, dtlb_ptes, dtlb_valid_bits):
 553
 554         comb = m.d.comb
 555         sync = m.d.sync
 556
 557         tlbie    = Signal()
 558         tlbwe    = Signal()
 559         repl_way = Signal(TLB_WAY_BITS)
 560         eatag    = Signal(TLB_EA_TAG_BITS)
 561         tagset   = TLBWayTags()
 562         pteset   = TLBWayPtes()
 563
 564         comb += tlbie.eq(r0_valid & r0.tlbie)
 565         comb += tlbwe.eq(r0_valid & r0.tlbldoi)
 566
 567         with m.If(tlbie & r0.doall):
 568             # clear all valid bits at once
 569             for i in range(TLB_SET_SIZE):
 570                 sync += dtlb_valid_bits[i].eq(0)
 571
 572         with m.Elif(tlbie):
 573             with m.If(tlb_hit):
 574                 sync += dtlb_valid_bits[tlb_req_index][tlb_hit_way].eq(0)
 575         with m.Elif(tlbwe):
 576             with m.If(tlb_hit):
 577                 comb += repl_way.eq(tlb_hit_way)
 578             with m.Else():
 579                 comb += repl_way.eq(tlb_plru_victim[tlb_req_index])
 580             comb += eatag.eq(r0.req.addr[TLB_LG_PGSZ + TLB_SET_BITS:64])
 581             comb += tagset.eq(tlb_tag_way)
 582             sync += write_tlb_tag(repl_way, tagset, eatag)
 583             sync += dtlb_tags[tlb_req_index].eq(tagset)
 584             comb += pteset.eq(tlb_pte_way)
 585             sync += write_tlb_pte(repl_way, pteset, r0.req.data)
 586             sync += dtlb_ptes[tlb_req_index].eq(pteset)
 587             sync += dtlb_valid_bits[tlb_req_index][repl_way].eq(1)
 588
 589     def maybe_plrus(self, r1):
 590         """Generate PLRUs
 591         """
 592         comb = m.d.comb
 593         sync = m.d.sync
 594
 595         for i in range(NUM_LINES):
 596             # PLRU interface
 597             plru        = PLRU(TLB_WAY_BITS)
 598             setattr(m.submodules, "plru%d" % i, plru)
 599             plru_acc    = Signal(WAY_BITS)
 600             plru_acc_en = Signal()
 601             plru_out    = Signal(WAY_BITS)
 602
 603             comb += plru.acc.eq(plru_acc)
 604             comb += plru.acc_en.eq(plru_acc_en)
 605             comb += plru.lru.eq(plru_out)
 606
 607             with m.If(r1.hit_index == i):
 608                 comb += plru_acc_en.eq(r1.cache_hit)
 609
 610             comb += plru_acc.eq(r1.hit_way)
 611             comb += plru_victim[i].eq(plru_out)
 612
 613     def cache_tag_read(self, m, r0_stall, req_index, cache_tag_set, cache_tags):
 614         """Cache tag RAM read port
 615         """
 616         comb = m.d.comb
 617         sync = m.d.sync
 618         m_in, d_in = self.m_in, self.d_in
 619
 620         index = Signal(INDEX_BITS)
 621
 622         with m.If(r0_stall):
 623             comb += index.eq(req_index)
 624         with m.Elif(m_in.valid):
 625             comb += index.eq(get_index(m_in.addr))
 626         with m.Else():
 627             comb += index.eq(get_index(d_in.addr))
 628         sync += cache_tag_set.eq(cache_tags[index])
 629
 630     def dcache_request(self, m, r0, ra, req_index, req_row, req_tag,
 631                        r0_valid, r1, cache_valid_bits, replace_way,
 632                        use_forward1_next, use_forward2_next,
 633                        req_hit_way, plru_victim, rc_ok, perm_attr,
 634                        valid_ra, perm_ok, access_ok, req_op, req_ok,
 635                        r0_stall, early_req_row):
 636         """Cache request parsing and hit detection
 637         """
 638
 639         comb = m.d.comb
 640         sync = m.d.sync
 641         m_in, d_in = self.m_in, self.d_in
 642
 643         is_hit      = Signal()
 644         hit_way     = Signal(WAY_BITS)
 645         op          = Op()
 646         opsel       = Signal(3)
 647         go          = Signal()
 648         nc          = Signal()
 649         s_hit       = Signal()
 650         s_tag       = Signal(TAG_BITS)
 651         s_pte       = Signal(TLB_PTE_BITS)
 652         s_ra        = Signal(REAL_ADDR_BITS)
 653         hit_set     = Signal(TLB_NUM_WAYS)
 654         hit_way_set = HitWaySet()
 655         rel_matches = Signal(TLB_NUM_WAYS)
 656         rel_match   = Signal()
 657
 658         # Extract line, row and tag from request
 659         comb += req_index.eq(get_index(r0.req.addr))
 660         comb += req_row.eq(get_row(r0.req.addr))
 661         comb += req_tag.eq(get_tag(ra))
 662
 663         comb += go.eq(r0_valid & ~(r0.tlbie | r0.tlbld) & ~r1.ls_error)
 664
 665         # Test if pending request is a hit on any way
 666         # In order to make timing in virtual mode,
 667         # when we are using the TLB, we compare each
 668         # way with each of the real addresses from each way of
 669         # the TLB, and then decide later which match to use.
 670
 671         with m.If(r0.req.virt_mode):
 672             comb += rel_matches.eq(0)
 673             for j in range(TLB_NUM_WAYS):
 674                 comb += s_pte.eq(read_tlb_pte(j, tlb_pte_way))
 675                 comb += s_ra.eq(Cat(r0.req.addr[0:TLB_LG_PGSZ],
 676                                     s_pte[TLB_LG_PGSZ:REAL_ADDR_BITS]))
 677                 comb += s_tag.eq(get_tag(s_ra))
 678
 679                 for i in range(NUM_WAYS):
 680                     with m.If(go & cache_valid_bits[req_index][i] &
 681                               read_tag(i, cache_tag_set) == s_tag
 682                               & tlb_valid_way[j]):
 683                         comb += hit_way_set[j].eq(i)
 684                         comb += s_hit.eq(1)
 685                 comb += hit_set[j].eq(s_hit)
 686                 with m.If(s_tag == r1.reload_tag):
 687                     comb += rel_matches[j].eq(1)
 688             with m.If(tlb_hit):
 689                 comb += is_hit.eq(hit_set[tlb_hit_way])
 690                 comb += hit_way.eq(hit_way_set[tlb_hit_way])
 691                 comb += rel_match.eq(rel_matches[tlb_hit_way])
 692         with m.Else():
 693             comb += s_tag.eq(get_tag(r0.req.addr))
 694             for i in range(NUM_WAYS):
 695                 with m.If(go & cache_valid_bits[req_index][i] &
 696                           read_tag(i, cache_tag_set) == s_tag):
 697                     comb += hit_way.eq(i)
 698                     comb += is_hit.eq(1)
 699             with m.If(s_tag == r1.reload_tag):
 700                 comb += rel_match.eq(1)
 701         comb += req_same_tag.eq(rel_match)
 702
 703         # See if the request matches the line currently being reloaded
 704         with m.If((r1.state == State.RELOAD_WAIT_ACK) &
 705                   (req_index == r1.store_index) & rel_match):
 706             # For a store, consider this a hit even if the row isn't
 707             # valid since it will be by the time we perform the store.
 708             # For a load, check the appropriate row valid bit.
 709             valid = r1.rows_valid[req_row % ROW_PER_LINE]
 710             comb += is_hit.eq(~r0.req.load | valid)
 711             comb += hit_way.eq(replace_way)
 712
 713         # Whether to use forwarded data for a load or not
 714         comb += use_forward1_next.eq(0)
 715         with m.If((get_row(r1.req.real_addr) == req_row)
 716                   & (r1.req.hit_way == hit_way))
 717             # Only need to consider r1.write_bram here, since if we
 718             # are writing refill data here, then we don't have a
 719             # cache hit this cycle on the line being refilled.
 720             # (There is the possibility that the load following the
 721             # load miss that started the refill could be to the old
 722             # contents of the victim line, since it is a couple of
 723             # cycles after the refill starts before we see the updated
 724             # cache tag. In that case we don't use the bypass.)
 725             comb += use_forward1_next.eq(r1.write_bram)
 726         comb += use_forward2_next.eq(0)
 727         with m.If((r1.forward_row1 == req_row) & (r1.forward_way1 == hit_way)):
 728             comb += use_forward2_next.eq(r1.forward_valid1)
 729
 730         # The way that matched on a hit
 731         comb += req_hit_way.eq(hit_way)
 732
 733         # The way to replace on a miss
 734         with m.If(r1.write_tag):
 735             replace_way.eq(plru_victim[r1.store_index])
 736         with m.Else():
 737             comb += replace_way.eq(r1.store_way)
 738
 739         # work out whether we have permission for this access
 740         # NB we don't yet implement AMR, thus no KUAP
 741         comb += rc_ok.eq(perm_attr.reference
 742                          & (r0.req.load | perm_attr.changed)
 743                 )
 744         comb += perm_ok.eq((r0.req.prive_mode | ~perm_attr.priv)
 745                            & perm_attr.wr_perm
 746                            | (r0.req.load & perm_attr.rd_perm)
 747                           )
 748         comb += access_ok.eq(valid_ra & perm_ok & rc_ok)
 749         # Combine the request and cache hit status to decide what
 750         # operation needs to be done
 751         comb += nc.eq(r0.req.nc | perm_attr.nocache)
 752         comb += op.eq(Op.OP_NONE)
 753         with m.If(go):
 754             with m.If(~access_ok):
 755                 comb += op.eq(Op.OP_BAD)
 756             with m.Elif(cancel_store):
 757                 comb += op.eq(Op.OP_STCX_FAIL)
 758             with m.Else():
 759                 comb += opsel.eq(Cat(is_hit, nc, r0.req.load))
 760                 with m.Switch(opsel):
 761                     with m.Case(Const(0b101, 3)):
 762                         comb += op.eq(Op.OP_LOAD_HIT)
 763                     with m.Case(Cosnt(0b100, 3)):
 764                         comb += op.eq(Op.OP_LOAD_MISS)
 765                     with m.Case(Const(0b110, 3)):
 766                         comb += op.eq(Op.OP_LOAD_NC)
 767                     with m.Case(Const(0b001, 3)):
 768                         comb += op.eq(Op.OP_STORE_HIT)
 769                     with m.Case(Const(0b000, 3)):
 770                         comb += op.eq(Op.OP_STORE_MISS)
 771                     with m.Case(Const(0b010, 3)):
 772                         comb += op.eq(Op.OP_STORE_MISS)
 773                     with m.Case(Const(0b011, 3)):
 774                         comb += op.eq(Op.OP_BAD)
 775                     with m.Case(Const(0b111, 3)):
 776                         comb += op.eq(Op.OP_BAD)
 777                     with m.Default():
 778                         comb += op.eq(Op.OP_NONE)
 779         comb += req_op.eq(op)
 780         comb += req_go.eq(go)
 781
 782         # Version of the row number that is valid one cycle earlier
 783         # in the cases where we need to read the cache data BRAM.
 784         # If we're stalling then we need to keep reading the last
 785         # row requested.
 786         with m.If(~r0_stall):
 787             with m.If(m_in.valid):
 788                 comb += early_req_row.eq(get_row(m_in.addr))
 789             with m.Else():
 790                 comb += early_req_row.eq(get_row(d_in.addr))
 791         with m.Else():
 792             comb += early_req_row.eq(req_row)
 793
 794     def reservation_comb(self, m, cancel_store, set_rsrv, clear_rsrv,
 795                          r0_valid, r0, reservation):
 796         """Handle load-with-reservation and store-conditional instructions
 797         """
 798         comb = m.d.comb
 799         sync = m.d.sync
 800
 801         with m.If(r0_valid & r0.req.reserve):
 802
 803             # XXX generate alignment interrupt if address
 804             # is not aligned XXX or if r0.req.nc = '1'
 805             with m.If(r0.req.load):
 806                 comb += set_rsrv(1) # load with reservation
 807             with m.Else():
 808                 comb += clear_rsrv.eq(1) # store conditional
 809                 with m.If(~reservation.valid | r0.req.addr[LINE_OFF_BITS:64]):
 810                     comb += cancel_store.eq(1)
 811
 812     def reservation_reg(self, m, r0_valid, access_ok, clear_rsrv,
 813                         reservation, r0):
 814
 815         comb = m.d.comb
 816         sync = m.d.sync
 817
 818         with m.If(r0_valid & access_ok):
 819             with m.If(clear_rsrv):
 820                 sync += reservation.valid.eq(0)
 821             with m.Elif(set_rsrv):
 822                 sync += reservation.valid.eq(1)
 823                 sync += reservation.addr.eq(r0.req.addr[LINE_OFF_BITS:64])
 824
 825     def writeback_control(self, m, r1, cache_out):
 826         """Return data for loads & completion control logic
 827         """
 828         comb = m.d.comb
 829         sync = m.d.sync
 830         d_out, m_out = self.d_out, self.m_out
 831
 832         data_out = Signal(64)
 833         data_fwd = Signal(64)
 834
 835         # Use the bypass if are reading the row that was
 836         # written 1 or 2 cycles ago, including for the
 837         # slow_valid = 1 case (i.e. completing a load
 838         # miss or a non-cacheable load).
 839         with m.If(r1.use_forward1):
 840             comb += data_fwd.eq(r1.forward_data1)
 841         with m.Else():
 842             comb += data_fwd.eq(r1.forward_data2)
 843
 844         comb += data_out.eq(cache_out[r1.hit_way])
 845
 846         for i in range(8):
 847             with m.If(r1.forward_sel[i]):
 848                 dsel = data_fwd.word_select(i, 8)
 849                 comb += data_out.word_select(i, 8).eq(dsel)
 850
 851         comb += d_out.valid.eq(r1.ls_valid)
 852         comb += d_out.data.eq(data_out)
 853         comb += d_out.store_done.eq(~r1.stcx_fail)
 854         comb += d_out.error.eq(r1.ls_error)
 855         comb += d_out.cache_paradox.eq(r1.cache_paradox)
 856
 857         # Outputs to MMU
 858         comb += m_out.done.eq(r1.mmu_done)
 859         comb += m_out.err.eq(r1.mmu_error)
 860         comb += m_out.data.eq(data_out)
 861
 862         # We have a valid load or store hit or we just completed
 863         # a slow op such as a load miss, a NC load or a store
 864         #
 865         # Note: the load hit is delayed by one cycle. However it
 866         # can still not collide with r.slow_valid (well unless I
 867         # miscalculated) because slow_valid can only be set on a
 868         # subsequent request and not on its first cycle (the state
 869         # machine must have advanced), which makes slow_valid
 870         # at least 2 cycles from the previous hit_load_valid.
 871
 872         # Sanity: Only one of these must be set in any given cycle
 873
 874         if False: # TODO: need Display to get this to work
 875             assert (r1.slow_valid & r1.stcx_fail) != 1 "unexpected" \
 876              "slow_valid collision with stcx_fail -!- severity FAILURE"
 877
 878             assert ((r1.slow_valid | r1.stcx_fail) | r1.hit_load_valid) != 1
 879              "unexpected hit_load_delayed collision with slow_valid -!-" \
 880              "severity FAILURE"
 881
 882         with m.If(~r1._mmu_req):
 883             # Request came from loadstore1...
 884             # Load hit case is the standard path
 885             with m.If(r1.hit_load_valid):
 886                 #Display(f"completing load hit data={data_out}")
 887                 pass
 888
 889             # error cases complete without stalling
 890             with m.If(r1.ls_error):
 891                 # Display("completing ld/st with error")
 892                 pass
 893
 894             # Slow ops (load miss, NC, stores)
 895             with m.If(r1.slow_valid):
 896                 #Display(f"completing store or load miss data={data_out}")
 897                 pass
 898
 899         with m.Else():
 900             # Request came from MMU
 901             with m.If(r1.hit_load_valid):
 902                 # Display(f"completing load hit to MMU, data={m_out.data}")
 903                 pass
 904             # error cases complete without stalling
 905             with m.If(r1.mmu_error):
 906                 #Display("combpleting MMU ld with error")
 907                 pass
 908
 909             # Slow ops (i.e. load miss)
 910             with m.If(r1.slow_valid):
 911                 #Display("completing MMU load miss, data={m_out.data}")
 912                 pass
 913
 914     def rams(self, m, r1):
 915         """rams
 916         Generate a cache RAM for each way. This handles the normal
 917         reads, writes from reloads and the special store-hit update
 918         path as well.
 919
 920         Note: the BRAMs have an extra read buffer, meaning the output
 921         is pipelined an extra cycle. This differs from the
 922         icache. The writeback logic needs to take that into
 923         account by using 1-cycle delayed signals for load hits.
 924         """
 925         comb = m.d.comb
 926         wb_in = self.wb_in
 927
 928         for i in range(NUM_WAYS):
 929             do_read  = Signal()
 930             rd_addr  = Signal(ROW_BITS)
 931             do_write = Signal()
 932             wr_addr  = Signal(ROW_BITS)
 933             wr_data  = Signal(WB_DATA_BITS)
 934             wr_sel   = Signal(ROW_SIZE)
 935             wr_sel_m = Signal(ROW_SIZE)
 936             _d_out   = Signal(WB_DATA_BITS)
 937
 938             way = CacheRam(ROW_BITS, WB_DATA_BITS, True)
 939             setattr(m.submodules, "cacheram_%d" % i, way)
 940
 941             comb += way.rd_en.eq(do_read)
 942             comb += way.rd_addr.eq(rd_addr)
 943             comb += _d_out.eq(way.rd_data)
 944             comb += way.wr_sel.eq(wr_sel_m)
 945             comb += way.wr_addr.eq(wr_addr)
 946             comb += way.wr_data.eq(wr_data)
 947
 948             # Cache hit reads
 949             comb += do_read.eq(1)
 950             comb += rd_addr.eq(early_req_row)
 951             comb += cache_out[i].eq(_d_out)
 952
 953             # Write mux:
 954             #
 955             # Defaults to wishbone read responses (cache refill)
 956             #
 957             # For timing, the mux on wr_data/sel/addr is not
 958             # dependent on anything other than the current state.
 959
 960             with m.If(r1.write_bram):
 961                 # Write store data to BRAM.  This happens one
 962                 # cycle after the store is in r0.
 963                 comb += wr_data.eq(r1.req.data)
 964                 comb += wr_sel.eq(r1.req.byte_sel)
 965                 comb += wr_addr.eq(get_row(r1.req.real_addr))
 966
 967                 with m.If(i == r1.req.hit_way):
 968                     comb += do_write.eq(1)
 969             with m.Else():
 970                 # Otherwise, we might be doing a reload or a DCBZ
 971                 with m.If(r1.dcbz):
 972                     comb += wr_data.eq(0)
 973                 with m.Else():
 974                     comb += wr_data.eq(wb_in.dat)
 975                 comb += wr_addr.eq(r1.store_row)
 976                 comb += wr_sel.eq(~0) # all 1s
 977
 978             with m.If((r1.state == State.RELOAD_WAIT_ACK)
 979                       & wb_in.ack & (replace_way == i)):
 980                 comb += do_write.eq(1)
 981
 982                 # Mask write selects with do_write since BRAM
 983                 # doesn't have a global write-enable
 984                 with m.If(do_write):
 985                     comb += wr_sel_m.eq(wr_sel)
 986
 987     # Cache hit synchronous machine for the easy case.
 988     # This handles load hits.
 989     # It also handles error cases (TLB miss, cache paradox)
 990     def dcache_fast_hit(self, m, req_op, r0_valid, r1):
 991
 992         comb = m.d.comb
 993         sync = m.d.sync
 994
 995         with m.If(req_op != Op.OP_NONE):
 996             #Display(f"op:{req_op} addr:{r0.req.addr} nc: {r0.req.nc}" \
 997             #      f"idx:{req_index} tag:{req_tag} way: {req_hit_way}"
 998             #     )
 999             pass
1000
1001         with m.If(r0_valid):
1002             sync += r1.mmu_req.eq(r0.mmu_req)
1003
1004         # Fast path for load/store hits.
1005         # Set signals for the writeback controls.
1006         sync += r1.hit_way.eq(req_hit_way)
1007         sync += r1.hit_index.eq(req_index)
1008
1009         with m.If(req_op == Op.OP_LOAD_HIT):
1010             sync += r1.hit_load_valid.eq(1)
1011         with m.Else():
1012             sync += r1.hit_load_valid.eq(0)
1013
1014         with m.If((req_op == Op.OP_LOAD_HIT) | (req_op == Op.OP_STORE_HIT)):
1015             sync += r1.cache_hit.eq(1)
1016         with m.Else():
1017             sync += r1.cache_hit.eq(0)
1018
1019         with m.If(req_op == Op.OP_BAD):
1020             # Display(f"Signalling ld/st error valid_ra={valid_ra}"
1021             #      f"rc_ok={rc_ok} perm_ok={perm_ok}"
1022             sync += r1.ls_error.eq(~r0.mmu_req)
1023             sync += r1.mmu_error.eq(r0.mmu_req)
1024             sync += r1.cache_paradox.eq(access_ok)
1025
1026             with m.Else():
1027                 sync += r1.ls_error.eq(0)
1028                 sync += r1.mmu_error.eq(0)
1029                 sync += r1.cache_paradox.eq(0)
1030
1031         with m.If(req_op == Op.OP_STCX_FAIL):
1032             r1.stcx_fail.eq(1)
1033         with m.Else():
1034             sync += r1.stcx_fail.eq(0)
1035
1036         # Record TLB hit information for updating TLB PLRU
1037         sync += r1.tlb_hit.eq(tlb_hit)
1038         sync += r1.tlb_hit_way.eq(tlb_hit_way)
1039         sync += r1.tlb_hit_index.eq(tlb_req_index)
1040
1041     # Memory accesses are handled by this state machine:
1042     #
1043     #   * Cache load miss/reload (in conjunction with "rams")
1044     #   * Load hits for non-cachable forms
1045     #   * Stores (the collision case is handled in "rams")
1046     #
1047     # All wishbone requests generation is done here.
1048     # This machine operates at stage 1.
1049     def dcache_slow(self, m, r1, use_forward1_next, cache_valid_bits, r0,
1050                     r0_valid, req_op, cache_tag, req_go, ra):
1051
1052         comb = m.d.comb
1053         sync = m.d.sync
1054         wb_in = self.wb_i
1055
1056         req         = MemAccessRequest()
1057         acks        = Signal(3)
1058         adjust_acks = Signal(3)
1059
1060         sync += r1.use_forward1.eq(use_forward1_next)
1061         sync += r1.forward_sel.eq(0)
1062
1063         with m.If(use_forward1_next):
1064             sync += r1.forward_sel.eq(r1.req.byte_sel)
1065         with m.Elif(use_forward2_next):
1066             sync += r1.forward_sel.eq(r1.forward_sel1)
1067
1068         sync += r1.forward_data2.eq(r1.forward_data1)
1069         with m.If(r1.write_bram):
1070             sync += r1.forward_data1.eq(r1.req.data)
1071             sync += r1.forward_sel1.eq(r1.req.byte_sel)
1072             sync += r1.forward_way1.eq(r1.req.hit_way)
1073             sync += r1.forward_row1.eq(get_row(r1.req.real_addr))
1074             sync += r1.forward_valid1.eq(1)
1075         with m.Else():
1076             with m.If(r1.bcbz):
1077                 sync += r1.forward_data1.eq(0)
1078             with m.Else():
1079                 sync += r1.forward_data1.eq(wb_in.dat)
1080             sync += r1.forward_sel1.eq(~0) # all 1s
1081             sync += r1.forward_way1.eq(replace_way)
1082             sync += r1.forward_row1.eq(r1.store_row)
1083             sync += r1.forward_valid1.eq(0)
1084
1085         # One cycle pulses reset
1086         sync += r1.slow_valid.eq(0)
1087         sync += r1.write_bram.eq(0)
1088         sync += r1.inc_acks.eq(0)
1089         sync += r1.dec_acks.eq(0)
1090
1091         sync += r1.ls_valid.eq(0)
1092         # complete tlbies and TLB loads in the third cycle
1093         sync += r1.mmu_done.eq(r0_valid & (r0.tlbie | r0.tlbld))
1094
1095         with m.If((req_op == Op.OP_LOAD_HIT)
1096                   | (req_op == Op.OP_STCX_FAIL)):
1097             with m.If(~r0.mmu_req):
1098                 sync += r1.ls_valid.eq(1)
1099             with m.Else():
1100                 sync += r1.mmu_done.eq(1)
1101
1102         with m.If(r1.write_tag):
1103             # Store new tag in selected way
1104             for i in range(NUM_WAYS):
1105                 with m.If(i == replace_way):
1106                     idx = r1.store_index
1107                     trange = range(i * TAG_WIDTH, (i+1) * TAG_WIDTH)
1108                     sync += cache_tag[idx][trange].eq(r1.reload_tag)
1109             sync += r1.store_way.eq(replace_way)
1110             sync += r1.write_tag.eq(0)
1111
1112         # Take request from r1.req if there is one there,
1113         # else from req_op, ra, etc.
1114         with m.If(r1.full)
1115             comb += req.eq(r1.req)
1116         with m.Else():
1117             comb += req.op.eq(req_op)
1118             comb += req.valid.eq(req_go)
1119             comb += req.mmu_req.eq(r0.mmu_req)
1120             comb += req.dcbz.eq(r0.req.dcbz)
1121             comb += req.real_addr.eq(ra)
1122
1123             with m.If(~r0.req.dcbz):
1124                 comb += req.data.eq(r0.req.data)
1125             with m.Else():
1126                 comb += req.data.eq(0)
1127
1128             # Select all bytes for dcbz
1129             # and for cacheable loads
1130             with m.If(r0.req.dcbz | (r0.req.load & ~r0.req.nc):
1131                 comb += req.byte_sel.eq(~0) # all 1s
1132             with m.Else():
1133                 comb += req.byte_sel.eq(r0.req.byte_sel)
1134             comb += req.hit_way.eq(req_hit_way)
1135             comb += req.same_tag.eq(req_same_tag)
1136
1137             # Store the incoming request from r0,
1138             # if it is a slow request
1139             # Note that r1.full = 1 implies req_op = OP_NONE
1140             with m.If((req_op == Op.OP_LOAD_MISS)
1141                       | (req_op == Op.OP_LOAD_NC)
1142                       | (req_op == Op.OP_STORE_MISS)
1143                       | (req_op == Op.OP_STORE_HIT)):
1144                 sync += r1.req(req)
1145                 sync += r1.full.eq(1)
1146
1147         # Main state machine
1148         with m.Switch(r1.state):
1149
1150             with m.Case(State.IDLE)
1151 # XXX check 'left downto.  probably means len(r1.wb.adr)
1152 #                     r1.wb.adr <= req.real_addr(
1153 #                                   r1.wb.adr'left downto 0
1154 #                                  );
1155                 sync += r1.wb.adr.eq(req.real_addr[0:r1.wb.adr])
1156                 sync += r1.wb.sel.eq(req.byte_sel)
1157                 sync += r1.wb.dat.eq(req.data)
1158                 sync += r1.dcbz.eq(req.dcbz)
1159
1160                 # Keep track of our index and way
1161                 # for subsequent stores.
1162                 sync += r1.store_index.eq(get_index(req.real_addr))
1163                 sync += r1.store_row.eq(get_row(req.real_addr))
1164                 sync += r1.end_row_ix.eq(
1165                          get_row_of_line(get_row(req.real_addr))
1166                         )
1167                 sync += r1.reload_tag.eq(get_tag(req.real_addr))
1168                 sync += r1.req.same_tag.eq(1)
1169
1170                 with m.If(req.op == Op.OP_STORE_HIT):
1171                     sync += r1.store_way.eq(req.hit_way)
1172
1173                 # Reset per-row valid bits,
1174                 # ready for handling OP_LOAD_MISS
1175                 for i in range(ROW_PER_LINE):
1176                     sync += r1.rows_valid[i].eq(0)
1177
1178                 with m.Switch(req.op):
1179                     with m.Case(Op.OP_LOAD_HIT):
1180                         # stay in IDLE state
1181                         pass
1182
1183                     with m.Case(Op.OP_LOAD_MISS):
1184                         #Display(f"cache miss real addr:" \
1185                         #      f"{req_real_addr}" \
1186                         #      f" idx:{get_index(req_real_addr)}" \
1187                         #      f" tag:{get_tag(req.real_addr)}")
1188                         pass
1189
1190                         # Start the wishbone cycle
1191                         sync += r1.wb.we.eq(0)
1192                         sync += r1.wb.cyc.eq(1)
1193                         sync += r1.wb.stb.eq(1)
1194
1195                         # Track that we had one request sent
1196                         sync += r1.state.eq(State.RELOAD_WAIT_ACK)
1197                         sync += r1.write_tag.eq(1)
1198
1199                     with m.Case(Op.OP_LOAD_NC):
1200                         sync += r1.wb.cyc.eq(1)
1201                         sync += r1.wb.stb.eq(1)
1202                         sync += r1.wb.we.eq(0)
1203                         sync += r1.state.eq(State.NC_LOAD_WAIT_ACK)
1204
1205                     with m.Case(Op.OP_STORE_HIT, Op.OP_STORE_MISS):
1206                         with m.If(~req.bcbz):
1207                             sync += r1.state.eq(State.STORE_WAIT_ACK)
1208                             sync += r1.acks_pending.eq(1)
1209                             sync += r1.full.eq(0)
1210                             sync += r1.slow_valid.eq(1)
1211
1212                             with m.If(~req.mmu_req):
1213                                 sync += r1.ls_valid.eq(1)
1214                             with m.Else():
1215                                 sync += r1.mmu_done.eq(1)
1216
1217                             with m.If(req.op == Op.OP_STORE_HIT):
1218                                 sync += r1.write_bram.eq(1)
1219                         with m.Else():
1220                             sync += r1.state.eq(Op.RELOAD_WAIT_ACK)
1221
1222                             with m.If(req.op == Op.OP_STORE_MISS):
1223                                 sync += r1.write_tag.eq(1)
1224
1225                         sync += r1.wb.we.eq(1)
1226                         sync += r1.wb.cyc.eq(1)
1227                         sync += r1.wb.stb.eq(1)
1228
1229                     # OP_NONE and OP_BAD do nothing
1230                     # OP_BAD & OP_STCX_FAIL were
1231                     # handled above already
1232                     with m.Case(Op.OP_NONE):
1233                         pass
1234                     with m.Case(OP_BAD):
1235                         pass
1236                     with m.Case(OP_STCX_FAIL):
1237                         pass
1238
1239             with m.Case(State.RELOAD_WAIT_ACK):
1240                 # Requests are all sent if stb is 0
1241                 comb += stbs_done.eq(~r1.wb.stb)
1242
1243                 with m.If(~wb_in.stall & ~stbs_done):
1244                     # That was the last word?
1245                     # We are done sending.
1246                     # Clear stb and set stbs_done
1247                     # so we can handle an eventual
1248                     # last ack on the same cycle.
1249                     with m.If(is_last_row_addr(
1250                               r1.wb.adr, r1.end_row_ix)):
1251                         sync += r1.wb.stb.eq(0)
1252                         comb += stbs_done.eq(0)
1253
1254                     # Calculate the next row address
1255                     sync += r1.wb.adr.eq(next_row_addr(r1.wb.adr))
1256
1257                 # Incoming acks processing
1258                 sync += r1.forward_valid1.eq(wb_in.ack)
1259                 with m.If(wb_in.ack):
1260                     # XXX needs an Array bit-accessor here
1261                     sync += r1.rows_valid[r1.store_row % ROW_PER_LINE].eq(1)
1262
1263                     # If this is the data we were looking for,
1264                     # we can complete the request next cycle.
1265                     # Compare the whole address in case the
1266                     # request in r1.req is not the one that
1267                     # started this refill.
1268                     with m.If(r1.full & r1.req.same_tag &
1269                               ((r1.dcbz & r1.req.dcbz) |
1270                                (~r1.dcbz & (r1.req.op == Op.OP_LOAD_MISS))) &
1271                                 (r1.store_row == get_row(r1.req.real_addr))):
1272                         sync += r1.full.eq(0)
1273                         sync += r1.slow_valid.eq(1)
1274                             with m.If(~r1.mmu_req):
1275                                 sync += r1.ls_valid.eq(1)
1276                             with m.Else():
1277                                 sync += r1.mmu_done.eq(1)
1278                         sync += r1.forward_sel.eq(~0) # all 1s
1279                         sync += r1.use_forward1.eq(1)
1280
1281                     # Check for completion
1282                     with m.If(stbs_done & is_last_row(r1.store_row,
1283                                                       r1.end_row_ix)):
1284                         # Complete wishbone cycle
1285                         sync += r1.wb.cyc.eq(0)
1286
1287                         # Cache line is now valid
1288                         cv = cache_valid_bits[r1.store_index]
1289                         sync += cv[r1.store_way].eq(1)
1290                         sync += r1.state.eq(State.IDLE)
1291
1292                     # Increment store row counter
1293                     sync += r1.store_row.eq(next_row(r1.store_row))
1294
1295             with m.Case(State.STORE_WAIT_ACK):
1296                 comb += stbs_done.eq(~r1.wb.stb)
1297                 comb += acks.eq(r1.acks_pending)
1298
1299                 with m.If(r1.inc_acks != r1.dec_acks):
1300                     with m.If(r1.inc_acks):
1301                         comb += adjust_acks.eq(acks + 1)
1302                     with m.Else():
1303                         comb += adjust_acks.eq(acks - 1)
1304                 with m.Else():
1305                     comb += adjust_acks.eq(acks)
1306
1307                 sync += r1.acks_pending.eq(adjust_acks)
1308
1309                 # Clear stb when slave accepted request
1310                 with m.If(~wb_in.stall):
1311                     # See if there is another store waiting
1312                     # to be done which is in the same real page.
1313                     with m.If(req.valid):
1314                         ra = req.real_addr[0:SET_SIZE_BITS]
1315                         sync += r1.wb.adr[0:SET_SIZE_BITS].eq(ra)
1316                         sync += r1.wb.dat.eq(req.data)
1317                         sync += r1.wb.sel.eq(req.byte_sel)
1318
1319                     with m.Elif((adjust_acks < 7) & req.same_tag &
1320                                 ((req.op == Op.Op_STORE_MISS)
1321                                  | (req.op == Op.OP_SOTRE_HIT))):
1322                         sync += r1.wb.stb.eq(1)
1323                         comb += stbs_done.eq(0)
1324
1325                         with m.If(req.op == Op.OP_STORE_HIT):
1326                             sync += r1.write_bram.eq(1)
1327                         sync += r1.full.eq(0)
1328                         sync += r1.slow_valid.eq(1)
1329
1330                         # Store requests never come from the MMU
1331                         sync += r1.ls_valid.eq(1)
1332                         comb += stbs_done.eq(0)
1333                         sync += r1.inc_acks.eq(1)
1334                     with m.Else():
1335                         sync += r1.wb.stb.eq(0)
1336                         comb += stbs_done.eq(1)
1337
1338                 # Got ack ? See if complete.
1339                 with m.If(wb_in.ack):
1340                     with m.If(stbs_done & (adjust_acks == 1))
1341                         sync += r1.state.eq(State.IDLE)
1342                         sync += r1.wb.cyc.eq(0)
1343                         sync += r1.wb.stb.eq(0)
1344                     sync += r1.dec_acks.eq(1)
1345
1346             with m.Case(State.NC_LOAD_WAIT_ACK):
1347                 # Clear stb when slave accepted request
1348                 with m.If(~wb_in.stall):
1349                     sync += r1.wb.stb.eq(0)
1350
1351                 # Got ack ? complete.
1352                 with m.If(wb_in.ack):
1353                     sync += r1.state.eq(State.IDLE)
1354                     sync += r1.full.eq(0)
1355                     sync += r1.slow_valid.eq(1)
1356
1357                     with m.If(~r1.mmu_req):
1358                         sync += r1.ls_valid.eq(1)
1359                     with m.Else():
1360                         sync += r1.mmu_done.eq(1)
1361
1362                     sync += r1.forward_sel.eq(~0) # all 1s
1363                     sync += r1.use_forward1.eq(1)
1364                     sync += r1.wb.cyc.eq(0)
1365                     sync += r1.wb.stb.eq(0)
1366
1367     def dcache_log(self, m, r1, valid_ra, tlb_hit_way, stall_out):
1368
1369         sync = m.d.sync
1370         d_out, wb_in, log_out = self.d_out, self.wb_in, self.log_out
1371
1372         sync += log_out.eq(Cat(r1.state[:3], valid_ra, tlb_hit_way[:3],
1373                                stall_out, req_op[:3], d_out.valid, d_out.error,
1374                                r1.wb.cyc, r1.wb.stb, wb_in.ack, wb_in.stall,
1375                                r1.wb.adr[3:6]))
1376
1377     def elaborate(self, platform):
1378
1379         # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
1380         cache_tags       = CacheTagArray()
1381         cache_tag_set    = Signal(TAG_RAM_WIDTH)
1382         cache_valid_bits = CacheValidBitsArray()
1383
1384         # TODO attribute ram_style : string;
1385         # TODO attribute ram_style of cache_tags : signal is "distributed";
1386
1387         """note: these are passed to nmigen.hdl.Memory as "attributes".
1388            don't know how, just that they are.
1389         """
1390         dtlb_valid_bits = TLBValidBitsArray()
1391         dtlb_tags       = TLBTagsArray()
1392         dtlb_ptes       = TLBPtesArray()
1393         # TODO attribute ram_style of
1394         #  dtlb_tags : signal is "distributed";
1395         # TODO attribute ram_style of
1396         #  dtlb_ptes : signal is "distributed";
1397
1398         r0      = RegStage0()
1399         r0_full = Signal()
1400
1401         r1 = RegStage1()
1402
1403         reservation = Reservation()
1404
1405         # Async signals on incoming request
1406         req_index    = Signal(INDEX_BITS)
1407         req_row      = Signal(ROW_BITS)
1408         req_hit_way  = Signal(WAY_BITS)
1409         req_tag      = Signal(TAG_BITS)
1410         req_op       = Op()
1411         req_data     = Signal(64)
1412         req_same_tag = Signal()
1413         req_go       = Signal()
1414
1415         early_req_row     = Signal(ROW_BITS)
1416
1417         cancel_store      = Signal()
1418         set_rsrv          = Signal()
1419         clear_rsrv        = Signal()
1420
1421         r0_valid          = Signal()
1422         r0_stall          = Signal()
1423
1424         use_forward1_next = Signal()
1425         use_forward2_next = Signal()
1426
1427         cache_out         = CacheRamOut()
1428
1429         plru_victim       = PLRUOut()
1430         replace_way       = Signal(WAY_BITS)
1431
1432         # Wishbone read/write/cache write formatting signals
1433         bus_sel           = Signal(8)
1434
1435         # TLB signals
1436         tlb_tag_way   = Signal(TLB_TAG_WAY_BITS)
1437         tlb_pte_way   = Signal(TLB_PTE_WAY_BITS)
1438         tlb_valid_way = Signal(TLB_NUM_WAYS)
1439         tlb_req_index = Signal(TLB_SET_BITS)
1440         tlb_hit       = Signal()
1441         tlb_hit_way   = Signal(TLB_WAY_BITS)
1442         pte           = Signal(TLB_PTE_BITS)
1443         ra            = Signal(REAL_ADDR_BITS)
1444         valid_ra      = Signal()
1445         perm_attr     = PermAttr()
1446         rc_ok         = Signal()
1447         perm_ok       = Signal()
1448         access_ok     = Signal()
1449
1450         tlb_plru_victim = TLBPLRUOut()
1451
1452         # we don't yet handle collisions between loadstore1 requests
1453         # and MMU requests
1454         comb += m_out.stall.eq(0)
1455
1456         # Hold off the request in r0 when r1 has an uncompleted request
1457         comb += r0_stall.eq(r0_full & r1.full)
1458         comb += r0_valid.eq(r0_full & ~r1.full)
1459         comb += stall_out.eq(r0_stall)
1460
1461         # Wire up wishbone request latch out of stage 1
1462         comb += self.wb_out.eq(r1.wb)
1463
1464         # call sub-functions putting everything together, using shared
1465         # signals established above
1466         self.stage_0(m)
1467         self.tlb_read(m, r0_stall, tlb_valid_way,
1468                       tlb_tag_way, tlb_pte_way, dtlb_valid_bits,
1469                       dtlb_tags, dtlb_ptes)
1470         self.tlb_search(self, tlb_req_index, r0, tlb_valid_way_ tlb_tag_way,
1471                         tlb_pte_way, pte, tlb_hit, valid_ra, perm_attr, ra)
1472         self.tlb_update(m, r0_valid, r0, dtlb_valid_bits, tlb_req_index,
1473                         tlb_hit_way, tlb_hit, tlb_plru_victim, tlb_tag_way,
1474                         dtlb_tags, tlb_pte_way, dtlb_ptes, dtlb_valid_bits)
1475         self.maybe_plrus(r1)
1476         self.cache_tag_read(m, r0_stall, req_index, cache_tag_set, cache_tags)
1477         self.dcache_request(m, r0, ra, req_index, req_row, req_tag,
1478                            r0_valid, r1, cache_valid_bits, replace_way,
1479                            use_forward1_next, use_forward2_next,
1480                            req_hit_way, plru_victim, rc_ok, perm_attr,
1481                            valid_ra, perm_ok, access_ok, req_op, req_ok,
1482                            r0_stall, early_req_row)
1483         self.reservation_comb(m, cancel_store, set_rsrv, clear_rsrv,
1484                            r0_valid, r0, reservation)
1485         self.reservation_reg(m, r0_valid, access_ok, clear_rsrv,
1486                            reservation, r0)
1487         self.writeback_control(m, r1, cache_out)
1488         self.rams(m, r1)
1489         self.dcache_fast_hit(m, req_op, r0_valid, r1)
1490         self.dcache_slow(m, r1, use_forward1_next, cache_valid_bits, r0,
1491                          r0_valid, req_op, cache_tag, req_go, ra)
1492         #self.dcache_log(m, r1, valid_ra, tlb_hit_way, stall_out)
1493
1494
1495 # dcache_tb.vhdl
1496 #
1497 # entity dcache_tb is
1498 # end dcache_tb;
1499 #
1500 # architecture behave of dcache_tb is
1501 #     signal clk          : std_ulogic;
1502 #     signal rst          : std_ulogic;
1503 #
1504 #     signal d_in         : Loadstore1ToDcacheType;
1505 #     signal d_out        : DcacheToLoadstore1Type;
1506 #
1507 #     signal m_in         : MmuToDcacheType;
1508 #     signal m_out        : DcacheToMmuType;
1509 #
1510 #     signal wb_bram_in   : wishbone_master_out;
1511 #     signal wb_bram_out  : wishbone_slave_out;
1512 #
1513 #     constant clk_period : time := 10 ns;
1514 # begin
1515 #     dcache0: entity work.dcache
1516 #         generic map(
1517 #
1518 #             LINE_SIZE => 64,
1519 #             NUM_LINES => 4
1520 #             )
1521 #         port map(
1522 #             clk => clk,
1523 #             rst => rst,
1524 #             d_in => d_in,
1525 #             d_out => d_out,
1526 #             m_in => m_in,
1527 #             m_out => m_out,
1528 #             wishbone_out => wb_bram_in,
1529 #             wishbone_in => wb_bram_out
1530 #             );
1531 #
1532 #     -- BRAM Memory slave
1533 #     bram0: entity work.wishbone_bram_wrapper
1534 #         generic map(
1535 #             MEMORY_SIZE   => 1024,
1536 #             RAM_INIT_FILE => "icache_test.bin"
1537 #             )
1538 #         port map(
1539 #             clk => clk,
1540 #             rst => rst,
1541 #             wishbone_in => wb_bram_in,
1542 #             wishbone_out => wb_bram_out
1543 #             );
1544 #
1545 #     clk_process: process
1546 #     begin
1547 #         clk <= '0';
1548 #         wait for clk_period/2;
1549 #         clk <= '1';
1550 #         wait for clk_period/2;
1551 #     end process;
1552 #
1553 #     rst_process: process
1554 #     begin
1555 #         rst <= '1';
1556 #         wait for 2*clk_period;
1557 #         rst <= '0';
1558 #         wait;
1559 #     end process;
1560 #
1561 #     stim: process
1562 #     begin
1563 #     -- Clear stuff
1564 #     d_in.valid <= '0';
1565 #     d_in.load <= '0';
1566 #     d_in.nc <= '0';
1567 #     d_in.addr <= (others => '0');
1568 #     d_in.data <= (others => '0');
1569 #         m_in.valid <= '0';
1570 #         m_in.addr <= (others => '0');
1571 #         m_in.pte <= (others => '0');
1572 #
1573 #         wait for 4*clk_period;
1574 #     wait until rising_edge(clk);
1575 #
1576 #     -- Cacheable read of address 4
1577 #     d_in.load <= '1';
1578 #     d_in.nc <= '0';
1579 #         d_in.addr <= x"0000000000000004";
1580 #         d_in.valid <= '1';
1581 #     wait until rising_edge(clk);
1582 #         d_in.valid <= '0';
1583 #
1584 #     wait until rising_edge(clk) and d_out.valid = '1';
1585 #         assert d_out.data = x"0000000100000000"
1586 #         report "data @" & to_hstring(d_in.addr) &
1587 #         "=" & to_hstring(d_out.data) &
1588 #         " expected 0000000100000000"
1589 #         severity failure;
1590 # --      wait for clk_period;
1591 #
1592 #     -- Cacheable read of address 30
1593 #     d_in.load <= '1';
1594 #     d_in.nc <= '0';
1595 #         d_in.addr <= x"0000000000000030";
1596 #         d_in.valid <= '1';
1597 #     wait until rising_edge(clk);
1598 #         d_in.valid <= '0';
1599 #
1600 #     wait until rising_edge(clk) and d_out.valid = '1';
1601 #         assert d_out.data = x"0000000D0000000C"
1602 #         report "data @" & to_hstring(d_in.addr) &
1603 #         "=" & to_hstring(d_out.data) &
1604 #         " expected 0000000D0000000C"
1605 #         severity failure;
1606 #
1607 #     -- Non-cacheable read of address 100
1608 #     d_in.load <= '1';
1609 #     d_in.nc <= '1';
1610 #         d_in.addr <= x"0000000000000100";
1611 #         d_in.valid <= '1';
1612 #     wait until rising_edge(clk);
1613 #     d_in.valid <= '0';
1614 #     wait until rising_edge(clk) and d_out.valid = '1';
1615 #         assert d_out.data = x"0000004100000040"
1616 #         report "data @" & to_hstring(d_in.addr) &
1617 #         "=" & to_hstring(d_out.data) &
1618 #         " expected 0000004100000040"
1619 #         severity failure;
1620 #
1621 #     wait until rising_edge(clk);
1622 #     wait until rising_edge(clk);
1623 #     wait until rising_edge(clk);
1624 #     wait until rising_edge(clk);
1625 #
1626 #     std.env.finish;
1627 #     end process;
1628 # end;
1629 def dcache_sim(dut):
1630     # clear stuff
1631     yield dut.d_in.valid.eq(0)
1632     yield dut.d_in.load.eq(0)
1633     yield dut.d_in.nc.eq(0)
1634     yield dut.d_in.adrr.eq(0)
1635     yield dut.d_in.data.eq(0)
1636     yield dut.m_in.valid.eq(0)
1637     yield dut.m_in.addr.eq(0)
1638     yield dut.m_in.pte.eq(0)
1639     # wait 4 * clk_period
1640     yield
1641     yield
1642     yield
1643     yield
1644     # wait_until rising_edge(clk)
1645     yield
1646     # Cacheable read of address 4
1647     yield dut.d_in.load.eq(1)
1648     yield dut.d_in.nc.eq(0)
1649     yield dut.d_in.addr.eq(Const(0x0000000000000004, 64))
1650     yield dut.d_in.valid.eq(1)
1651     # wait-until rising_edge(clk)
1652     yield
1653     yield dut.d_in.valid.eq(0)
1654     yield
1655     while not (yield dut.d_out.valid):
1656         yield
1657     assert dut.d_out.data == Const(0x0000000100000000, 64) f"data @" \
1658         f"{dut.d_in.addr}={dut.d_in.data} expected 0000000100000000" \
1659         " -!- severity failure"
1660
1661
1662     # Cacheable read of address 30
1663     yield dut.d_in.load.eq(1)
1664     yield dut.d_in.nc.eq(0)
1665     yield dut.d_in.addr.eq(Const(0x0000000000000030, 64))
1666     yield dut.d_in.valid.eq(1)
1667     yield
1668     yield dut.d_in.valid.eq(0)
1669     yield
1670     while not (yield dut.d_out.valid):
1671         yield
1672     assert dut.d_out.data == Const(0x0000000D0000000C, 64) f"data @" \
1673         f"{dut.d_in.addr}={dut.d_out.data} expected 0000000D0000000C" \
1674         f"-!- severity failure"
1675
1676     # Non-cacheable read of address 100
1677     yield dut.d_in.load.eq(1)
1678     yield dut.d_in.nc.eq(1)
1679     yield dut.d_in.addr.eq(Const(0x0000000000000100, 64))
1680     yield dut.d_in.valid.eq(1)
1681     yield
1682     yield dut.d_in.valid.eq(0)
1683     yield
1684     while not (yield dut.d_out.valid):
1685         yield
1686     assert dut.d_out.data == Const(0x0000004100000040, 64) f"data @" \
1687         f"{dut.d_in.addr}={dut.d_out.data} expected 0000004100000040" \
1688         f"-!- severity failure"
1689
1690     yield
1691     yield
1692     yield
1693     yield
1694
1695
1696 def test_dcache():
1697     dut = DCache()
1698     vl = rtlil.convert(dut, ports=[])
1699     with open("test_dcache.il", "w") as f:
1700         f.write(vl)
1701
1702     run_simulation(dut, dcache_sim(), vcd_name='test_dcache.vcd')
1703
1704 if __name__ == '__main__':
1705     test_dcache()
1706