src/soc/experiment/dcache.py

   1 """DCache
   2
   3 based on Anton Blanchard microwatt dcache.vhdl
   4
   5 """
   6
   7 from enum import Enum, unique
   8
   9 from nmigen import Module, Signal, Elaboratable,
  10                    Cat, Repl
  11 from nmigen.cli import main
  12 from nmigen.iocontrol import RecordObject
  13 from nmigen.util import log2_int
  14
  15 from experiment.mem_types import LoadStore1ToDCacheType,
  16                                  DCacheToLoadStore1Type,
  17                                  MMUToDCacheType,
  18                                  DCacheToMMUType
  19
  20 from experiment.wb_types import WB_ADDR_BITS, WB_DATA_BITS, WB_SEL_BITS,
  21                                 WBAddrType, WBDataType, WBSelType,
  22                                 WbMasterOut, WBSlaveOut,
  23                                 WBMasterOutVector, WBSlaveOutVector,
  24                                 WBIOMasterOut, WBIOSlaveOut
  25
  26
  27 # Record for storing permission, attribute, etc. bits from a PTE
  28 class PermAttr(RecordObject):
  29     def __init__(self):
  30         super().__init__()
  31         self.reference = Signal()
  32         self.changed   = Signal()
  33         self.nocache   = Signal()
  34         self.priv      = Signal()
  35         self.rd_perm   = Signal()
  36         self.wr_perm   = Signal()
  37
  38
  39 def extract_perm_attr(pte):
  40     pa = PermAttr()
  41     pa.reference = pte[8]
  42     pa.changed   = pte[7]
  43     pa.nocache   = pte[5]
  44     pa.priv      = pte[3]
  45     pa.rd_perm   = pte[2]
  46     pa.wr_perm   = pte[1]
  47     return pa;
  48
  49
  50 # Type of operation on a "valid" input
  51 @unique
  52 class Op(Enum):
  53     OP_NONE       = 0
  54     OP_BAD        = 1 # NC cache hit, TLB miss, prot/RC failure
  55     OP_STCX_FAIL  = 2 # conditional store w/o reservation
  56     OP_LOAD_HIT   = 3 # Cache hit on load
  57     OP_LOAD_MISS  = 4 # Load missing cache
  58     OP_LOAD_NC    = 5 # Non-cachable load
  59     OP_STORE_HIT  = 6 # Store hitting cache
  60     OP_STORE_MISS = 7 # Store missing cache
  61
  62
  63 # Cache state machine
  64 @unique
  65 class State(Enum):
  66     IDLE             = 0 # Normal load hit processing
  67     RELOAD_WAIT_ACK  = 1 # Cache reload wait ack
  68     STORE_WAIT_ACK   = 2 # Store wait ack
  69     NC_LOAD_WAIT_ACK = 3 # Non-cachable load wait ack
  70
  71
  72 # Dcache operations:
  73 #
  74 # In order to make timing, we use the BRAMs with
  75 # an output buffer, which means that the BRAM
  76 # output is delayed by an extra cycle.
  77 #
  78 # Thus, the dcache has a 2-stage internal pipeline
  79 # for cache hits with no stalls.
  80 #
  81 # All other operations are handled via stalling
  82 # in the first stage.
  83 #
  84 # The second stage can thus complete a hit at the same
  85 # time as the first stage emits a stall for a complex op.
  86 #
  87 # Stage 0 register, basically contains just the latched request
  88 class RegStage0(RecordObject):
  89     def __init__(self):
  90         super().__init__()
  91         self.req     = LoadStore1ToDCacheType()
  92         self.tlbie   = Signal()
  93         self.doall   = Signal()
  94         self.tlbld   = Signal()
  95         self.mmu_req = Signal() # indicates source of request
  96
  97
  98 class MemAccessRequest(RecordObject):
  99     def __init__(self):
 100         super().__init__()
 101         self.op        = Op()
 102         self.valid     = Signal()
 103         self.dcbz      = Signal()
 104         self.real_addr = Signal(REAL_ADDR_BITS)
 105         self.data      = Signal(64)
 106         self.byte_sel  = Signal(8)
 107         self.hit_way   = Signal(WAY_BITS)
 108         self.same_tag  = Signal()
 109         self.mmu_req   = Signal()
 110
 111
 112 # First stage register, contains state for stage 1 of load hits
 113 # and for the state machine used by all other operations
 114 class RegStage1(RecordObject):
 115     def __init__(self):
 116         super().__init__()
 117         # Info about the request
 118         self.full             = Signal() # have uncompleted request
 119         self.mmu_req          = Signal() # request is from MMU
 120         self.req              = MemAccessRequest()
 121
 122         # Cache hit state
 123         self.hit_way          = Signal(WAY_BITS)
 124         self.hit_load_valid   = Signal()
 125         self.hit_index        = Signal(NUM_LINES)
 126         self.cache_hit        = Signal()
 127
 128         # TLB hit state
 129         self.tlb_hit          = Signal()
 130         self.tlb_hit_way      = Signal(TLB_NUM_WAYS)
 131         self.tlb_hit_index    = Signal(TLB_SET_SIZE)
 132         self.
 133         # 2-stage data buffer for data forwarded from writes to reads
 134         self.forward_data1    = Signal(64)
 135         self.forward_data2    = Signal(64)
 136         self.forward_sel1     = Signal(8)
 137         self.forward_valid1   = Signal()
 138         self.forward_way1     = Signal(WAY_BITS)
 139         self.forward_row1     = Signal(BRAM_ROWS)
 140         self.use_forward1     = Signal()
 141         self.forward_sel      = Signal(8)
 142
 143         # Cache miss state (reload state machine)
 144         self.state            = State()
 145         self.dcbz             = Signal()
 146         self.write_bram       = Signal()
 147         self.write_tag        = Signal()
 148         self.slow_valid       = Signal()
 149         self.wb               = WishboneMasterOut()
 150         self.reload_tag       = Signal(TAG_BITS)
 151         self.store_way        = Signal(WAY_BITS)
 152         self.store_row        = Signal(BRAM_ROWS)
 153         self.store_index      = Signal(NUM_LINES)
 154         self.end_row_ix       = Signal(ROW_LINE_BIT)
 155         self.rows_valid       = RowPerLineValidArray()
 156         self.acks_pending     = Signal(3)
 157         self.inc_acks         = Signal()
 158         self.dec_acks         = Signal()
 159
 160         # Signals to complete (possibly with error)
 161         self.ls_valid         = Signal()
 162         self.ls_error         = Signal()
 163         self.mmu_done         = Signal()
 164         self.mmu_error        = Signal()
 165         self.cache_paradox    = Signal()
 166
 167         # Signal to complete a failed stcx.
 168         self.stcx_fail        = Signal()
 169
 170
 171 # Reservation information
 172 class Reservation(RecordObject):
 173     def __init__(self):
 174         super().__init__()
 175         valid = Signal()
 176         # TODO LINE_OFF_BITS is 6
 177         addr  = Signal(63 downto LINE_OFF_BITS)
 178
 179
 180 # Set associative dcache write-through
 181 #
 182 # TODO (in no specific order):
 183 #
 184 # * See list in icache.vhdl
 185 # * Complete load misses on the cycle when WB data comes instead of
 186 #   at the end of line (this requires dealing with requests coming in
 187 #   while not idle...)
 188 class DCache(Elaboratable):
 189     def __init__(self):
 190         # TODO: make these parameters of DCache at some point
 191         self.LINE_SIZE = 64    # Line size in bytes
 192         self.NUM_LINES = 32    # Number of lines in a set
 193         self.NUM_WAYS = 4      # Number of ways
 194         self.TLB_SET_SIZE = 64 # L1 DTLB entries per set
 195         self.TLB_NUM_WAYS = 2  # L1 DTLB number of sets
 196         self.TLB_LG_PGSZ = 12  # L1 DTLB log_2(page_size)
 197         self.LOG_LENGTH = 0    # Non-zero to enable log data collection
 198
 199         self.d_in      = LoadStore1ToDCacheType()
 200         self.d_out     = DCacheToLoadStore1Type()
 201
 202         self.m_in      = MMUToDCacheType()
 203         self.m_out     = DCacheToMMUType()
 204
 205         self.stall_out = Signal()
 206
 207         self.wb_out    = WBMasterOut()
 208         self.wb_in     = WBSlaveOut()
 209
 210         self.log_out   = Signal(20)
 211
 212     # Latch the request in r0.req as long as we're not stalling
 213     def stage_0(self, m, d_in, m_in):
 214             comb = m.d.comb
 215             sync = m.d.sync
 216
 217             r = RegStage0()
 218
 219             # TODO, this goes in unit tests and formal proofs
 220             # assert ~(d_in.valid & m_in.valid),
 221             # "request collision loadstore vs MMU"
 222             with m.If(~(d_in.valid & m_in.valid)):
 223                 #sync += Display("request collision loadstore vs MMU")
 224                 pass
 225
 226             with m.If(m_in.valid):
 227                 sync += r.req.valid.eq(1)
 228                 sync += r.req.load.eq(~(m_in.tlbie | m_in.tlbld))
 229                 sync += r.req.dcbz.eq(0)
 230                 sync += r.req.nc.eq(0)
 231                 sync += r.req.reserve.eq(0)
 232                 sync += r.req.virt_mode.eq(1)
 233                 sync += r.req.priv_mode.eq(1)
 234                 sync += r.req.addr.eq(m_in.addr)
 235                 sync += r.req.data.eq(m_in.pte)
 236                 sync += r.req.byte_sel.eq(-1) # Const -1 sets all to 0b111....
 237                 sync += r.tlbie.eq(m_in.tlbie)
 238                 sync += r.doall.eq(m_in.doall)
 239                 sync += r.tlbld.eq(m_in.tlbld)
 240                 sync += r.mmu_req.eq(1)
 241             with m.Else():
 242                 sync += r.req.eq(d_in)
 243                 sync += r.req.tlbie.eq(0)
 244                 sync += r.req.doall.eq(0)
 245                 sync += r.req.tlbd.eq(0)
 246                 sync += r.req.mmu_req.eq(0)
 247                 with m.If(~(r1.full & r0_full)):
 248                     sync += r0.eq(r)
 249                     sync += r0_full.eq(r.req.valid)
 250
 251     # TLB
 252     # Operates in the second cycle on the request latched in r0.req.
 253     # TLB updates write the entry at the end of the second cycle.
 254     def tlb_read(self, m, m_in, d_in, r0_stall, tlb_valid_way,
 255                  tlb_tag_way, tlb_pte_way, dtlb_valid_bits,
 256                  dtlb_tags, dtlb_ptes):
 257
 258         comb = m.d.comb
 259         sync = m.d.sync
 260
 261         index    = Signal(log2_int(TLB_SET_BITS), False)
 262         addrbits = Signal(TLB_SET_BITS)
 263
 264         amin = TLB_LG_PGSZ
 265         amax = TLB_LG_PGSZ + TLB_SET_BITS
 266
 267         with m.If(m_in.valid):
 268             comb += addrbits.eq(m_in.addr[amin : amax])
 269         with m.Else():
 270             comb += addrbits.eq(d_in.addr[amin : amax])
 271         comb += index.eq(addrbits)
 272
 273         # If we have any op and the previous op isn't finished,
 274         # then keep the same output for next cycle.
 275         with m.If(~r0_stall):
 276             sync += tlb_valid_way.eq(dtlb_valid_bits[index])
 277             sync += tlb_tag_way.eq(dtlb_tags[index])
 278             sync += tlb_pte_way.eq(dtlb_ptes[index])
 279
 280     # Generate TLB PLRUs
 281     def maybe_tlb_plrus(self, m, r1, tlb_plru_victim, acc, acc_en, lru):
 282         comb = m.d.comb
 283         sync = m.d.sync
 284
 285         with m.If(TLB_NUM_WAYS > 1):
 286             for i in range(TLB_SET_SIZE):
 287                 # TLB PLRU interface
 288                 tlb_plru        = PLRU(TLB_WAY_BITS)
 289                 tlb_plru_acc    = Signal(TLB_WAY_BITS)
 290                 tlb_plru_acc_en = Signal()
 291                 tlb_plru_out    = Signal(TLB_WAY_BITS)
 292
 293                 comb += tlb_plru.acc.eq(tlb_plru_acc)
 294                 comb += tlb_plru.acc_en.eq(tlb_plru_acc_en)
 295                 comb += tlb_plru.lru.eq(tlb_plru_out)
 296
 297                 # PLRU interface
 298                 with m.If(r1.tlb_hit_index == i):
 299                     comb += tlb_plru.acc_en.eq(r1.tlb_hit)
 300                 with m.Else():
 301                     comb += tlb_plru.acc_en.eq(0)
 302                 comb += tlb_plru.acc.eq(r1.tlb_hit_way)
 303
 304                 comb += tlb_plru_victim[i].eq(tlb_plru.lru)
 305
 306     def tlb_search(self, tlb_req_index, r0, tlb_valid_way_ tlb_tag_way,
 307                    tlb_pte_way, pte, tlb_hit, valid_ra, perm_attr, ra):
 308
 309         comb = m.d.comb
 310         sync = m.d.sync
 311
 312         hitway = Signal(TLB_WAY_BITS)
 313         hit    = Signal()
 314         eatag  = Signal(log2_int(TLB_EA_TAG_BITS, False))
 315
 316         TLB_LG_END = TLB_LG_PGSZ + TLB_SET_BITS
 317         comb += tlb_req_index.eq(r0.req.addr[TLB_LG_PGSZ : TLB_LG_END])
 318         comb += eatag.eq(r0.req.addr[TLB_LG_END : 64 ])
 319
 320         for i in range(TLB_NUM_WAYS):
 321             with m.If(tlb_valid_way(i)
 322                       & read_tlb_tag(i, tlb_tag_way) == eatag):
 323                 comb += hitway.eq(i)
 324                 comb += hit.eq(1)
 325
 326         comb += tlb_hit.eq(hit & r0_valid)
 327         comb += tlb_hit_way.eq(hitway)
 328
 329         with m.If(tlb_hit):
 330             comb += pte.eq(read_tlb_pte(hitway, tlb_pte_way))
 331         with m.Else():
 332             comb += pte.eq(0)
 333         comb += valid_ra.eq(tlb_hit | ~r0.req.virt_mode)
 334         with m.If(r0.req.virt_mode):
 335             comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
 336                               r0.req.addr[ROW_OFF_BITS:TLB_LG_PGSZ],
 337                               pte[TLB_LG_PGSZ:REAL_ADDR_BITS]))
 338             comb += perm_attr.eq(extract_perm_attr(pte))
 339         with m.Else():
 340             comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
 341                               r0.rq.addr[ROW_OFF_BITS:REAL_ADDR_BITS]))
 342
 343             comb += perm_attr.reference.eq(1)
 344             comb += perm_attr.changed.eq(1)
 345             comb += perm_attr.priv.eq(1)
 346             comb += perm_attr.nocache.eq(0)
 347             comb += perm_attr.rd_perm.eq(1)
 348             comb += perm_attr.wr_perm.eq(1)
 349
 350     def tlb_update(self, r0_valid, r0, dtlb_valid_bits, tlb_req_index,
 351                     tlb_hit_way, tlb_hit, tlb_plru_victim, tlb_tag_way,
 352                     dtlb_tags, tlb_pte_way, dtlb_ptes, dtlb_valid_bits):
 353
 354         comb = m.d.comb
 355         sync = m.d.sync
 356
 357     #         variable tlbie : std_ulogic;
 358     #         variable tlbwe : std_ulogic;
 359     #         variable repl_way : tlb_way_t;
 360     #         variable eatag : tlb_tag_t;
 361     #         variable tagset : tlb_way_tags_t;
 362     #         variable pteset : tlb_way_ptes_t;
 363     #type tlb_tags_t is array(tlb_index_t) of tlb_way_tags_t;
 364     # --> Array([Signal(log(way_tags length)) for i in range(number of tlbs)])
 365
 366         tlbie    = Signal()
 367         tlbwe    = Signal()
 368         repl_way = Signal(TLB_WAY_BITS)
 369         eatag    = Signal(log2_int(TLB_EA_TAG_BITS, False))
 370         tagset   = TLBWayTags()
 371         pteset   = TLBWayPtes()
 372
 373         comb += tlbie.eq(r0_valid & r0.tlbie)
 374         comb += tlbwe.eq(r0_valid & r0.tlbldoi)
 375
 376         with m.If(tlbie & r0.doall):
 377             # clear all valid bits at once
 378             for i in range(TLB_SET_SIZE):
 379                 sync += dtlb_valid_bits[i].eq(0)
 380
 381         with m.Elif(tlbie):
 382             with m.If(tlb_hit):
 383                 sync += dtlb_valid_bits[tlb_req_index][tlb_hit_way].eq(0)
 384         with m.Elif(tlbwe):
 385             with m.If(tlb_hit):
 386                 comb += repl_way.eq(tlb_hit_way)
 387             with m.Else():
 388                 comb += repl_way.eq(tlb_plru_victim[tlb_req_index])
 389             comb += eatag.eq(r0.req.addr[TLB_LG_PGSZ + TLB_SET_BITS:64])
 390             comb += tagset.eq(tlb_tag_way)
 391             sync += write_tlb_tag(repl_way, tagset, eatag)
 392             sync += dtlb_tags[tlb_req_index].eq(tagset)
 393             comb += pteset.eq(tlb_pte_way)
 394             sync += write_tlb_pte(repl_way, pteset, r0.req.data)
 395             sync += dtlb_ptes[tlb_req_index].eq(pteset)
 396             sync += dtlb_valid_bits[tlb_req_index][repl_way].eq(1)
 397
 398     # Generate PLRUs
 399     def maybe_plrus(self, r1):
 400
 401         comb = m.d.comb
 402         sync = m.d.sync
 403
 404         for i in range(NUM_LINES):
 405             # PLRU interface
 406             plru        = PLRU(TLB_WAY_BITS)
 407             setattr(m.submodules, "plru%d" % i, plru)
 408             plru_acc    = Signal(TLB_WAY_BITS)
 409             plru_acc_en = Signal()
 410             plru_out    = Signal(TLB_WAY_BITS)
 411
 412             comb += plru.acc.eq(plru_acc)
 413             comb += plru.acc_en.eq(plru_acc_en)
 414             comb += plru.lru.eq(plru_out)
 415
 416             with m.If(r1.hit_index == i):
 417                 comb += plru_acc_en.eq(r1.cache_hit)
 418
 419             comb += plru_acc.eq(r1.hit_way)
 420             comb += plru_victim[i].eq(plru_out)
 421
 422     # Cache tag RAM read port
 423     def cache_tag_read(self, r0_stall, req_index, m_in, d_in,
 424                        cache_tag_set, cache_tags):
 425
 426         comb = m.d.comb
 427         sync = m.d.sync
 428
 429         index = Signal(INDEX_BITS)
 430
 431         with m.If(r0_stall):
 432             comb += index.eq(req_index)
 433         with m.Elif(m_in.valid):
 434             comb += index.eq(get_index(m_in.addr))
 435         with m.Else():
 436             comb += index.eq(get_index(d_in.addr))
 437         sync += cache_tag_set.eq(cache_tags[index])
 438
 439     # Cache request parsing and hit detection
 440     def dcache_request(self, r0, ra, req_index, req_row, req_tag,
 441                        r0_valid, r1, cache_valid_bits, replace_way,
 442                        use_forward1_next, use_forward2_next,
 443                        req_hit_way, plru_victim, rc_ok, perm_attr,
 444                        valid_ra, perm_ok, access_ok, req_op, req_ok,
 445                        r0_stall, m_in, early_req_row, d_in):
 446
 447         comb = m.d.comb
 448         sync = m.d.sync
 449
 450         is_hit      = Signal()
 451         hit_way     = Signal(WAY_BITS)
 452         op          = Op()
 453         opsel       = Signal(3)
 454         go          = Signal()
 455         nc          = Signal()
 456         s_hit       = Signal()
 457         s_tag       = Signal(TAG_BITS)
 458         s_pte       = Signal(TLB_PTE_BITS)
 459         s_ra        = Signal(REAL_ADDR_BITS)
 460         hit_set     = Signal(TLB_NUM_WAYS)
 461         hit_way_set = HitWaySet()
 462         rel_matches = Signal(TLB_NUM_WAYS)
 463         rel_match   = Signal()
 464
 465         # Extract line, row and tag from request
 466         comb += req_index.eq(get_index(r0.req.addr))
 467         comb += req_row.eq(get_row(r0.req.addr))
 468         comb += req_tag.eq(get_tag(ra))
 469
 470         comb += go.eq(r0_valid & ~(r0.tlbie | r0.tlbld) & ~r1.ls_error)
 471
 472         # Test if pending request is a hit on any way
 473         # In order to make timing in virtual mode,
 474         # when we are using the TLB, we compare each
 475         # way with each of the real addresses from each way of
 476         # the TLB, and then decide later which match to use.
 477
 478         with m.If(r0.req.virt_mode):
 479             comb += rel_matches.eq(0)
 480             for j in range(TLB_NUM_WAYS):
 481                 comb += s_pte.eq(read_tlb_pte(j, tlb_pte_way))
 482                 comb += s_ra.eq(Cat(r0.req.addr[0:TLB_LG_PGSZ],
 483                                     s_pte[TLB_LG_PGSZ:REAL_ADDR_BITS]))
 484                 comb += s_tag.eq(get_tag(s_ra))
 485
 486                 for i in range(NUM_WAYS):
 487                     with m.If(go & cache_valid_bits[req_index][i] &
 488                               read_tag(i, cache_tag_set) == s_tag
 489                               & tlb_valid_way[j]):
 490                         comb += hit_way_set[j].eq(i)
 491                         comb += s_hit.eq(1)
 492                 comb += hit_set[j].eq(s_hit)
 493                 with m.If(s_tag == r1.reload_tag):
 494                     comb += rel_matches[j].eq(1)
 495             with m.If(tlb_hit):
 496                 comb += is_hit.eq(hit_set[tlb_hit_way])
 497                 comb += hit_way.eq(hit_way_set[tlb_hit_way])
 498                 comb += rel_match.eq(rel_matches[tlb_hit_way])
 499         with m.Else():
 500             comb += s_tag.eq(get_tag(r0.req.addr))
 501             for i in range(NUM_WAYS):
 502                 with m.If(go & cache_valid_bits[req_index][i] &
 503                           read_tag(i, cache_tag_set) == s_tag):
 504                     comb += hit_way.eq(i)
 505                     comb += is_hit.eq(1)
 506             with m.If(s_tag == r1.reload_tag):
 507                 comb += rel_match.eq(1)
 508         comb += req_same_tag.eq(rel_match)
 509
 510         # See if the request matches the line currently being reloaded
 511         with m.If((r1.state == State.RELOAD_WAIT_ACK) &
 512                   (req_index == r1.store_index) & rel_match):
 513             # For a store, consider this a hit even if the row isn't
 514             # valid since it will be by the time we perform the store.
 515             # For a load, check the appropriate row valid bit.
 516             valid = r1.rows_valid[req_row % ROW_PER_LINE]
 517             comb += is_hit.eq(~r0.req.load | valid)
 518             comb += hit_way.eq(replace_way)
 519
 520         # Whether to use forwarded data for a load or not
 521         comb += use_forward1_next.eq(0)
 522         with m.If((get_row(r1.req.real_addr) == req_row)
 523                   & (r1.req.hit_way == hit_way))
 524             # Only need to consider r1.write_bram here, since if we
 525             # are writing refill data here, then we don't have a
 526             # cache hit this cycle on the line being refilled.
 527             # (There is the possibility that the load following the
 528             # load miss that started the refill could be to the old
 529             # contents of the victim line, since it is a couple of
 530             # cycles after the refill starts before we see the updated
 531             # cache tag. In that case we don't use the bypass.)
 532             comb += use_forward1_next.eq(r1.write_bram)
 533         comb += use_forward2_next.eq(0)
 534         with m.If((r1.forward_row1 == req_row) & (r1.forward_way1 == hit_way)):
 535             comb += use_forward2_next.eq(r1.forward_valid1)
 536
 537         # The way that matched on a hit
 538         comb += req_hit_way.eq(hit_way)
 539
 540         # The way to replace on a miss
 541         with m.If(r1.write_tag):
 542             replace_way.eq(plru_victim[r1.store_index])
 543         with m.Else():
 544             comb += replace_way.eq(r1.store_way)
 545
 546         # work out whether we have permission for this access
 547         # NB we don't yet implement AMR, thus no KUAP
 548         comb += rc_ok.eq( perm_attr.reference
 549                          & (r0.req.load | perm_attr.changed)
 550                 )
 551         comb += perm_ok.eq((r0.req.prive_mode | ~perm_attr.priv)
 552                            & perm_attr.wr_perm
 553                            | (r0.req.load & perm_attr.rd_perm)
 554                           )
 555         comb += access_ok.eq(valid_ra & perm_ok & rc_ok)
 556         # Combine the request and cache hit status to decide what
 557         # operation needs to be done
 558         comb += nc.eq(r0.req.nc | perm_attr.nocache)
 559         comb += op.eq(Op.OP_NONE)
 560         with m.If(go):
 561             with m.If(~access_ok):
 562                 comb += op.eq(Op.OP_BAD)
 563             with m.Elif(cancel_store):
 564                 comb += op.eq(Op.OP_STCX_FAIL)
 565             with m.Else():
 566                 comb += opsel.eq(Cat(is_hit, nc, r0.req.load))
 567                 with m.Switch(opsel):
 568                     with m.Case(Const(0b101, 3)):
 569                         comb += op.eq(Op.OP_LOAD_HIT)
 570                     with m.Case(Cosnt(0b100, 3)):
 571                         comb += op.eq(Op.OP_LOAD_MISS)
 572                     with m.Case(Const(0b110, 3)):
 573                         comb += op.eq(Op.OP_LOAD_NC)
 574                     with m.Case(Const(0b001, 3)):
 575                         comb += op.eq(Op.OP_STORE_HIT)
 576                     with m.Case(Const(0b000, 3)):
 577                         comb += op.eq(Op.OP_STORE_MISS)
 578                     with m.Case(Const(0b010, 3)):
 579                         comb += op.eq(Op.OP_STORE_MISS)
 580                     with m.Case(Const(0b011, 3)):
 581                         comb += op.eq(Op.OP_BAD)
 582                     with m.Case(Const(0b111, 3)):
 583                         comb += op.eq(Op.OP_BAD)
 584                     with m.Default():
 585                         comb += op.eq(Op.OP_NONE)
 586         comb += req_op.eq(op)
 587         comb += req_go.eq(go)
 588
 589         # Version of the row number that is valid one cycle earlier
 590         # in the cases where we need to read the cache data BRAM.
 591         # If we're stalling then we need to keep reading the last
 592         # row requested.
 593         with m.If(~r0_stall):
 594             with m.If(m_in.valid):
 595                 comb += early_req_row.eq(get_row(m_in.addr))
 596             with m.Else():
 597                 comb += early_req_row.eq(get_row(d_in.addr))
 598         with m.Else():
 599             comb += early_req_row.eq(req_row)
 600
 601     # Handle load-with-reservation and store-conditional instructions
 602     def reservation_comb(self, cancel_store, set_rsrv, clear_rsrv,
 603                          r0_valid, r0, reservation):
 604
 605         comb = m.d.comb
 606         sync = m.d.sync
 607
 608         with m.If(r0_valid & r0.req.reserve):
 609
 610             # XXX generate alignment interrupt if address
 611             # is not aligned XXX or if r0.req.nc = '1'
 612             with m.If(r0.req.load):
 613                 comb += set_rsrv(1) # load with reservation
 614             with m.Else():
 615                 comb += clear_rsrv.eq(1) # store conditional
 616                 with m.If(~reservation.valid | r0.req.addr[LINE_OFF_BITS:64]):
 617                     comb += cancel_store.eq(1)
 618
 619     def reservation_reg(self, r0_valid, access_ok, clear_rsrv,
 620                         reservation, r0):
 621
 622         comb = m.d.comb
 623         sync = m.d.sync
 624
 625         with m.If(r0_valid & access_ok):
 626             with m.If(clear_rsrv):
 627                 sync += reservation.valid.eq(0)
 628             with m.Elif(set_rsrv):
 629                 sync += reservation.valid.eq(1)
 630                 sync += reservation.addr.eq(r0.req.addr[LINE_OFF_BITS:64])
 631
 632     # Return data for loads & completion control logic
 633     def writeback_control(self, r1, cache_out, d_out, m_out):
 634
 635         comb = m.d.comb
 636         sync = m.d.sync
 637
 638         data_out = Signal(64)
 639         data_fwd = Signal(64)
 640         j        = Signal()
 641
 642         # Use the bypass if are reading the row that was
 643         # written 1 or 2 cycles ago, including for the
 644         # slow_valid = 1 case (i.e. completing a load
 645         # miss or a non-cacheable load).
 646         with m.If(r1.use_forward1):
 647             comb += data_fwd.eq(r1.forward_data1)
 648         with m.Else():
 649             comb += data_fwd.eq(r1.forward_data2)
 650
 651         comb += data_out.eq(cache_out[r1.hit_way])
 652
 653         for i in range(8):
 654             with m.If(r1.forward_sel[i]):
 655                 dsel = data_fwd.word_select(i, 8)
 656                 comb += data_out.word_select(i, 8).eq(dsel)
 657
 658         comb += d_out.valid.eq(r1.ls_valid)
 659         comb += d_out.data.eq(data_out)
 660         comb += d_out.store_done.eq(~r1.stcx_fail)
 661         comb += d_out.error.eq(r1.ls_error)
 662         comb += d_out.cache_paradox.eq(r1.cache_paradox)
 663
 664         # Outputs to MMU
 665         comb += m_out.done.eq(r1.mmu_done)
 666         comb += m_out.err.eq(r1.mmu_error)
 667         comb += m_out.data.eq(data_out)
 668
 669         # We have a valid load or store hit or we just completed
 670         # a slow op such as a load miss, a NC load or a store
 671         #
 672         # Note: the load hit is delayed by one cycle. However it
 673         # can still not collide with r.slow_valid (well unless I
 674         # miscalculated) because slow_valid can only be set on a
 675         # subsequent request and not on its first cycle (the state
 676         # machine must have advanced), which makes slow_valid
 677         # at least 2 cycles from the previous hit_load_valid.
 678
 679         # Sanity: Only one of these must be set in any given cycle
 680
 681         if False: # TODO: need Display to get this to work
 682             assert (r1.slow_valid & r1.stcx_fail) != 1 "unexpected" \
 683              "slow_valid collision with stcx_fail -!- severity FAILURE"
 684
 685             assert ((r1.slow_valid | r1.stcx_fail) | r1.hit_load_valid) != 1
 686              "unexpected hit_load_delayed collision with slow_valid -!-" \
 687              "severity FAILURE"
 688
 689         with m.If(~r1._mmu_req):
 690             # Request came from loadstore1...
 691             # Load hit case is the standard path
 692             with m.If(r1.hit_load_valid):
 693                 #Display(f"completing load hit data={data_out}")
 694                 pass
 695
 696             # error cases complete without stalling
 697             with m.If(r1.ls_error):
 698                 # Display("completing ld/st with error")
 699                 pass
 700
 701             # Slow ops (load miss, NC, stores)
 702             with m.If(r1.slow_valid):
 703                 #Display(f"completing store or load miss data={data_out}")
 704                 pass
 705
 706         with m.Else():
 707             # Request came from MMU
 708             with m.If(r1.hit_load_valid):
 709                 # Display(f"completing load hit to MMU, data={m_out.data}")
 710                 pass
 711             # error cases complete without stalling
 712             with m.If(r1.mmu_error):
 713                 #Display("combpleting MMU ld with error")
 714                 pass
 715
 716             # Slow ops (i.e. load miss)
 717             with m.If(r1.slow_valid):
 718                 #Display("completing MMU load miss, data={m_out.data}")
 719                 pass
 720
 721     # Generate a cache RAM for each way. This handles the normal
 722     # reads, writes from reloads and the special store-hit update
 723     # path as well.
 724     #
 725     # Note: the BRAMs have an extra read buffer, meaning the output
 726     # is pipelined an extra cycle. This differs from the
 727     # icache. The writeback logic needs to take that into
 728     # account by using 1-cycle delayed signals for load hits.
 729     def rams(self, ):
 730         for i in range(NUM_WAYS):
 731             do_read  = Signal()
 732             rd_addr  = Signal(ROW_BITS)
 733             do_write = Signal()
 734             wr_addr  = Signal(ROW_BITS)
 735             wr_data  = Signal(WB_DATA_BITS)
 736             wr_sel   = Signal(ROW_SIZE)
 737             wr_sel_m = Signal(ROW_SIZE)
 738             _d_out   = Signal(WB_DATA_BITS)
 739
 740 #     begin
 741 #       way: entity work.cache_ram
 742 #           generic map (
 743 #               ROW_BITS => ROW_BITS,
 744 #               WIDTH => wishbone_data_bits,
 745 #               ADD_BUF => true
 746 #               )
 747 #           port map (
 748 #               clk     => clk,
 749 #               rd_en   => do_read,
 750 #               rd_addr => rd_addr,
 751 #               rd_data => dout,
 752 #               wr_sel  => wr_sel_m,
 753 #               wr_addr => wr_addr,
 754 #               wr_data => wr_data
 755 #               );
 756 #       process(all)
 757             way = CacheRam(ROW_BITS, WB_DATA_BITS, True)
 758             comb += way.rd_en.eq(do_read)
 759             comb += way.rd_addr.eq(rd_addr)
 760             comb += _d_out.eq(way.rd_data)
 761             comb += way.wr_sel.eq(wr_sel_m)
 762             comb += way.wr_addr.eq(wr_addr)
 763             comb += way.wr_data.eq(wr_data)
 764
 765             # Cache hit reads
 766             comb += do_read.eq(1)
 767             comb += rd_addr.eq(early_req_row)
 768             comb += cache_out[i].eq(_d_out)
 769
 770         # Write mux:
 771         #
 772         # Defaults to wishbone read responses (cache refill)
 773         #
 774         # For timing, the mux on wr_data/sel/addr is not
 775         # dependent on anything other than the current state.
 776
 777             with m.If(r1.write_bram):
 778                 # Write store data to BRAM.  This happens one
 779                 # cycle after the store is in r0.
 780                 comb += wr_data.eq(r1.req.data)
 781                 comb += wr_sel.eq(r1.req.byte_sel)
 782                 comb += wr_addr.eq(get_row(r1.req.real_addr))
 783
 784                 with m.If(i == r1.req.hit_way):
 785                     comb += do_write.eq(1)
 786             with m.Else():
 787                 # Otherwise, we might be doing a reload or a DCBZ
 788                 with m.If(r1.dcbz):
 789                     comb += wr_data.eq(0)
 790                 with m.Else():
 791                     comb += wr_data.eq(wishbone_in.dat)
 792                 comb += wr_addr.eq(r1.store_row)
 793                 comb += wr_sel.eq(~0) # all 1s
 794
 795             with m.If((r1.state == State.RELOAD_WAIT_ACK)
 796                       & wishbone_in.ack & (relpace_way == i)):
 797                 comb += do_write.eq(1)
 798
 799                 # Mask write selects with do_write since BRAM
 800                 # doesn't have a global write-enable
 801                 with m.If(do_write):
 802                     comb += wr_sel_m.eq(wr_sel)
 803
 804     # Cache hit synchronous machine for the easy case.
 805     # This handles load hits.
 806     # It also handles error cases (TLB miss, cache paradox)
 807     def dcache_fast_hit(self, req_op, r0_valid, r1, ):
 808
 809         comb = m.d.comb
 810         sync = m.d.sync
 811
 812         with m.If(req_op != Op.OP_NONE):
 813             #Display(f"op:{req_op} addr:{r0.req.addr} nc: {r0.req.nc}" \
 814             #      f"idx:{req_index} tag:{req_tag} way: {req_hit_way}"
 815             #     )
 816             pass
 817
 818         with m.If(r0_valid):
 819             sync += r1.mmu_req.eq(r0.mmu_req)
 820
 821         # Fast path for load/store hits.
 822         # Set signals for the writeback controls.
 823         sync += r1.hit_way.eq(req_hit_way)
 824         sync += r1.hit_index.eq(req_index)
 825
 826         with m.If(req_op == Op.OP_LOAD_HIT):
 827             sync += r1.hit_load_valid.eq(1)
 828         with m.Else():
 829             sync += r1.hit_load_valid.eq(0)
 830
 831         with m.If((req_op == Op.OP_LOAD_HIT) | (req_op == Op.OP_STORE_HIT)):
 832             sync += r1.cache_hit.eq(1)
 833         with m.Else():
 834             sync += r1.cache_hit.eq(0)
 835
 836         with m.If(req_op == Op.OP_BAD):
 837             # Display(f"Signalling ld/st error valid_ra={valid_ra}"
 838             #      f"rc_ok={rc_ok} perm_ok={perm_ok}"
 839             sync += r1.ls_error.eq(~r0.mmu_req)
 840             sync += r1.mmu_error.eq(r0.mmu_req)
 841             sync += r1.cache_paradox.eq(access_ok)
 842
 843             with m.Else():
 844                 sync += r1.ls_error.eq(0)
 845                 sync += r1.mmu_error.eq(0)
 846                 sync += r1.cache_paradox.eq(0)
 847
 848         with m.If(req_op == Op.OP_STCX_FAIL):
 849             r1.stcx_fail.eq(1)
 850         with m.Else():
 851             sync += r1.stcx_fail.eq(0)
 852
 853         # Record TLB hit information for updating TLB PLRU
 854         sync += r1.tlb_hit.eq(tlb_hit)
 855         sync += r1.tlb_hit_way.eq(tlb_hit_way)
 856         sync += r1.tlb_hit_index.eq(tlb_req_index)
 857
 858     # Memory accesses are handled by this state machine:
 859     #
 860     #   * Cache load miss/reload (in conjunction with "rams")
 861     #   * Load hits for non-cachable forms
 862     #   * Stores (the collision case is handled in "rams")
 863     #
 864     # All wishbone requests generation is done here.
 865     # This machine operates at stage 1.
 866     def dcache_slow(self, r1, use_forward1_next, cache_valid_bits, r0,
 867                     r0_valid, req_op, cache_tag, req_go, ra, wb_in):
 868
 869         comb = m.d.comb
 870         sync = m.d.sync
 871
 872         req         = MemAccessRequest()
 873         acks        = Signal(3)
 874         adjust_acks = Signal(3)
 875
 876         sync += r1.use_forward1.eq(use_forward1_next)
 877         sync += r1.forward_sel.eq(0)
 878
 879         with m.If(use_forward1_next):
 880             sync += r1.forward_sel.eq(r1.req.byte_sel)
 881         with m.Elif(use_forward2_next):
 882             sync += r1.forward_sel.eq(r1.forward_sel1)
 883
 884         sync += r1.forward_data2.eq(r1.forward_data1)
 885         with m.If(r1.write_bram):
 886             sync += r1.forward_data1.eq(r1.req.data)
 887             sync += r1.forward_sel1.eq(r1.req.byte_sel)
 888             sync += r1.forward_way1.eq(r1.req.hit_way)
 889             sync += r1.forward_row1.eq(get_row(r1.req.real_addr))
 890             sync += r1.forward_valid1.eq(1)
 891         with m.Else():
 892             with m.If(r1.bcbz):
 893                 sync += r1.forward_data1.eq(0)
 894             with m.Else():
 895                 sync += r1.forward_data1.eq(wb_in.dat)
 896             sync += r1.forward_sel1.eq(~0) # all 1s
 897             sync += r1.forward_way1.eq(replace_way)
 898             sync += r1.forward_row1.eq(r1.store_row)
 899             sync += r1.forward_valid1.eq(0)
 900
 901         # One cycle pulses reset
 902         sync += r1.slow_valid.eq(0)
 903         sync += r1.write_bram.eq(0)
 904         sync += r1.inc_acks.eq(0)
 905         sync += r1.dec_acks.eq(0)
 906
 907         sync += r1.ls_valid.eq(0)
 908         # complete tlbies and TLB loads in the third cycle
 909         sync += r1.mmu_done.eq(r0_valid & (r0.tlbie | r0.tlbld))
 910
 911         with m.If((req_op == Op.OP_LOAD_HIT)
 912                   | (req_op == Op.OP_STCX_FAIL)):
 913             with m.If(~r0.mmu_req):
 914                 sync += r1.ls_valid.eq(1)
 915             with m.Else():
 916                 sync += r1.mmu_done.eq(1)
 917
 918         with m.If(r1.write_tag):
 919             # Store new tag in selected way
 920             for i in range(NUM_WAYS):
 921                 with m.If(i == replace_way):
 922                     idx = r1.store_index
 923                     trange = range(i * TAG_WIDTH, (i+1) * TAG_WIDTH)
 924                     sync += cache_tag[idx][trange].eq(r1.reload_tag)
 925             sync += r1.store_way.eq(replace_way)
 926             sync += r1.write_tag.eq(0)
 927
 928         # Take request from r1.req if there is one there,
 929         # else from req_op, ra, etc.
 930         with m.If(r1.full)
 931             comb += req.eq(r1.req)
 932         with m.Else():
 933             comb += req.op.eq(req_op)
 934             comb += req.valid.eq(req_go)
 935             comb += req.mmu_req.eq(r0.mmu_req)
 936             comb += req.dcbz.eq(r0.req.dcbz)
 937             comb += req.real_addr.eq(ra)
 938
 939             with m.If(~r0.req.dcbz):
 940                 comb += req.data.eq(r0.req.data)
 941             with m.Else():
 942                 comb += req.data.eq(0)
 943
 944             # Select all bytes for dcbz
 945             # and for cacheable loads
 946             with m.If(r0.req.dcbz | (r0.req.load & ~r0.req.nc):
 947                 comb += req.byte_sel.eq(~0) # all 1s
 948             with m.Else():
 949                 comb += req.byte_sel.eq(r0.req.byte_sel)
 950             comb += req.hit_way.eq(req_hit_way)
 951             comb += req.same_tag.eq(req_same_tag)
 952
 953             # Store the incoming request from r0,
 954             # if it is a slow request
 955             # Note that r1.full = 1 implies req_op = OP_NONE
 956             with m.If((req_op == Op.OP_LOAD_MISS)
 957                       | (req_op == Op.OP_LOAD_NC)
 958                       | (req_op == Op.OP_STORE_MISS)
 959                       | (req_op == Op.OP_STORE_HIT)):
 960                 sync += r1.req(req)
 961                 sync += r1.full.eq(1)
 962
 963         # Main state machine
 964         with m.Switch(r1.state):
 965
 966             with m.Case(State.IDLE)
 967 # XXX check 'left downto.  probably means len(r1.wb.adr)
 968 #                     r1.wb.adr <= req.real_addr(
 969 #                                   r1.wb.adr'left downto 0
 970 #                                  );
 971                 sync += r1.wb.adr.eq(req.real_addr[0:r1.wb.adr])
 972                 sync += r1.wb.sel.eq(req.byte_sel)
 973                 sync += r1.wb.dat.eq(req.data)
 974                 sync += r1.dcbz.eq(req.dcbz)
 975
 976                 # Keep track of our index and way
 977                 # for subsequent stores.
 978                 sync += r1.store_index.eq(get_index(req.real_addr))
 979                 sync += r1.store_row.eq(get_row(req.real_addr))
 980                 sync += r1.end_row_ix.eq(
 981                          get_row_of_line(get_row(req.real_addr))
 982                         )
 983                 sync += r1.reload_tag.eq(get_tag(req.real_addr))
 984                 sync += r1.req.same_tag.eq(1)
 985
 986                 with m.If(req.op == Op.OP_STORE_HIT):
 987                     sync += r1.store_way.eq(req.hit_way)
 988
 989                 # Reset per-row valid bits,
 990                 # ready for handling OP_LOAD_MISS
 991                 for i in range(ROW_PER_LINE):
 992                     sync += r1.rows_valid[i].eq(0)
 993
 994                 with m.Switch(req.op):
 995                     with m.Case(Op.OP_LOAD_HIT):
 996                         # stay in IDLE state
 997                         pass
 998
 999                     with m.Case(Op.OP_LOAD_MISS):
1000                         #Display(f"cache miss real addr:" \
1001                         #      f"{req_real_addr}" \
1002                         #      f" idx:{get_index(req_real_addr)}" \
1003                         #      f" tag:{get_tag(req.real_addr)}")
1004                         pass
1005
1006                         # Start the wishbone cycle
1007                         sync += r1.wb.we.eq(0)
1008                         sync += r1.wb.cyc.eq(1)
1009                         sync += r1.wb.stb.eq(1)
1010
1011                         # Track that we had one request sent
1012                         sync += r1.state.eq(State.RELOAD_WAIT_ACK)
1013                         sync += r1.write_tag.eq(1)
1014
1015                     with m.Case(Op.OP_LOAD_NC):
1016                         sync += r1.wb.cyc.eq(1)
1017                         sync += r1.wb.stb.eq(1)
1018                         sync += r1.wb.we.eq(0)
1019                         sync += r1.state.eq(State.NC_LOAD_WAIT_ACK)
1020
1021                     with m.Case(Op.OP_STORE_HIT, Op.OP_STORE_MISS):
1022                         with m.If(~req.bcbz):
1023                             sync += r1.state.eq(State.STORE_WAIT_ACK)
1024                             sync += r1.acks_pending.eq(1)
1025                             sync += r1.full.eq(0)
1026                             sync += r1.slow_valid.eq(1)
1027
1028                             with m.If(~req.mmu_req):
1029                                 sync += r1.ls_valid.eq(1)
1030                             with m.Else():
1031                                 sync += r1.mmu_done.eq(1)
1032
1033                             with m.If(req.op == Op.OP_STORE_HIT):
1034                                 sync += r1.write_bram.eq(1)
1035                         with m.Else():
1036                             sync += r1.state.eq(Op.RELOAD_WAIT_ACK)
1037
1038                             with m.If(req.op == Op.OP_STORE_MISS):
1039                                 sync += r1.write_tag.eq(1)
1040
1041                         sync += r1.wb.we.eq(1)
1042                         sync += r1.wb.cyc.eq(1)
1043                         sync += r1.wb.stb.eq(1)
1044
1045                     # OP_NONE and OP_BAD do nothing
1046                     # OP_BAD & OP_STCX_FAIL were
1047                     # handled above already
1048                     with m.Case(Op.OP_NONE):
1049                         pass
1050                     with m.Case(OP_BAD):
1051                         pass
1052                     with m.Case(OP_STCX_FAIL):
1053                         pass
1054
1055             with m.Case(State.RELOAD_WAIT_ACK):
1056                 # Requests are all sent if stb is 0
1057                 comb += stbs_done.eq(~r1.wb.stb)
1058
1059                 with m.If(~wb_in.stall & ~stbs_done):
1060                     # That was the last word?
1061                     # We are done sending.
1062                     # Clear stb and set stbs_done
1063                     # so we can handle an eventual
1064                     # last ack on the same cycle.
1065                     with m.If(is_last_row_addr(
1066                               r1.wb.adr, r1.end_row_ix)):
1067                         sync += r1.wb.stb.eq(0)
1068                         comb += stbs_done.eq(0)
1069
1070                     # Calculate the next row address
1071                     sync += r1.wb.adr.eq(next_row_addr(r1.wb.adr))
1072
1073                 # Incoming acks processing
1074                 sync += r1.forward_valid1.eq(wb_in.ack)
1075                 with m.If(wb_in.ack):
1076                     # XXX needs an Array bit-accessor here
1077                     sync += r1.rows_valid[r1.store_row % ROW_PER_LINE].eq(1)
1078
1079                     # If this is the data we were looking for,
1080                     # we can complete the request next cycle.
1081                     # Compare the whole address in case the
1082                     # request in r1.req is not the one that
1083                     # started this refill.
1084                     with m.If(r1.full & r1.req.same_tag &
1085                               ((r1.dcbz & r1.req.dcbz) |
1086                                (~r1.dcbz & (r1.req.op == Op.OP_LOAD_MISS))) &
1087                                 (r1.store_row == get_row(r1.req.real_addr))):
1088                         sync += r1.full.eq(0)
1089                         sync += r1.slow_valid.eq(1)
1090                             with m.If(~r1.mmu_req):
1091                                 sync += r1.ls_valid.eq(1)
1092                             with m.Else():
1093                                 sync += r1.mmu_done.eq(1)
1094                         sync += r1.forward_sel.eq(~0) # all 1s
1095                         sync += r1.use_forward1.eq(1)
1096
1097                     # Check for completion
1098                     with m.If(stbs_done & is_last_row(r1.store_row,
1099                                                       r1.end_row_ix)):
1100                         # Complete wishbone cycle
1101                         sync += r1.wb.cyc.eq(0)
1102
1103                         # Cache line is now valid
1104                         cv = cache_valid_bits[r1.store_index]
1105                         sync += cv[r1.store_way].eq(1)
1106                         sync += r1.state.eq(State.IDLE)
1107
1108                     # Increment store row counter
1109                     sync += r1.store_row.eq(next_row(r1.store_row))
1110
1111             with m.Case(State.STORE_WAIT_ACK):
1112                 comb += stbs_done.eq(~r1.wb.stb)
1113                 comb += acks.eq(r1.acks_pending)
1114
1115                 with m.If(r1.inc_acks != r1.dec_acks):
1116                     with m.If(r1.inc_acks):
1117                         comb += adjust_acks.eq(acks + 1)
1118                     with m.Else():
1119                         comb += adjust_acks.eq(acks - 1)
1120                 with m.Else():
1121                     comb += adjust_acks.eq(acks)
1122
1123                 sync += r1.acks_pending.eq(adjust_acks)
1124
1125                 # Clear stb when slave accepted request
1126                 with m.If(~wb_in.stall):
1127                     # See if there is another store waiting
1128                     # to be done which is in the same real page.
1129                     with m.If(req.valid):
1130                         ra = req.real_addr[0:SET_SIZE_BITS]
1131                         sync += r1.wb.adr[0:SET_SIZE_BITS].eq(ra)
1132                         sync += r1.wb.dat.eq(req.data)
1133                         sync += r1.wb.sel.eq(req.byte_sel)
1134
1135                     with m.Elif((adjust_acks < 7) & req.same_tag &
1136                                 ((req.op == Op.Op_STORE_MISS)
1137                                  | (req.op == Op.OP_SOTRE_HIT))):
1138                         sync += r1.wb.stb.eq(1)
1139                         comb += stbs_done.eq(0)
1140
1141                         with m.If(req.op == Op.OP_STORE_HIT):
1142                             sync += r1.write_bram.eq(1)
1143                         sync += r1.full.eq(0)
1144                         sync += r1.slow_valid.eq(1)
1145
1146                         # Store requests never come from the MMU
1147                         sync += r1.ls_valid.eq(1)
1148                         comb += stbs_done.eq(0)
1149                         sync += r1.inc_acks.eq(1)
1150                     with m.Else():
1151                         sync += r1.wb.stb.eq(0)
1152                         comb += stbs_done.eq(1)
1153
1154                 # Got ack ? See if complete.
1155                 with m.If(wb_in.ack):
1156                     with m.If(stbs_done & (adjust_acks == 1))
1157                         sync += r1.state.eq(State.IDLE)
1158                         sync += r1.wb.cyc.eq(0)
1159                         sync += r1.wb.stb.eq(0)
1160                     sync += r1.dec_acks.eq(1)
1161
1162             with m.Case(State.NC_LOAD_WAIT_ACK):
1163                 # Clear stb when slave accepted request
1164                 with m.If(~wb_in.stall):
1165                     sync += r1.wb.stb.eq(0)
1166
1167                 # Got ack ? complete.
1168                 with m.If(wb_in.ack):
1169                     sync += r1.state.eq(State.IDLE)
1170                     sync += r1.full.eq(0)
1171                     sync += r1.slow_valid.eq(1)
1172
1173                     with m.If(~r1.mmu_req):
1174                         sync += r1.ls_valid.eq(1)
1175                     with m.Else():
1176                         sync += r1.mmu_done.eq(1)
1177
1178                     sync += r1.forward_sel.eq(~0) # all 1s
1179                     sync += r1.use_forward1.eq(1)
1180                     sync += r1.wb.cyc.eq(0)
1181                     sync += r1.wb.stb.eq(0)
1182
1183 #     dc_log: if LOG_LENGTH > 0 generate
1184 # TODO learn how to tranlate vhdl generate into nmigen
1185     def dcache_log(self, r1, valid_ra, tlb_hit_way, stall_out,
1186                    d_out, wb_in, log_out):
1187
1188         comb = m.d.comb
1189         sync = m.d.sync
1190
1191 #         signal log_data : std_ulogic_vector(19 downto 0);
1192         log_data = Signal(20)
1193
1194         comb += log_data
1195
1196 #     begin
1197 #         dcache_log: process(clk)
1198 #         begin
1199 #             if rising_edge(clk) then
1200 #                 log_data <= r1.wb.adr(5 downto 3) &
1201 #                             wishbone_in.stall &
1202 #                             wishbone_in.ack &
1203 #                             r1.wb.stb & r1.wb.cyc &
1204 #                             d_out.error &
1205 #                             d_out.valid &
1206 #                             std_ulogic_vector(
1207 #                              to_unsigned(op_t'pos(req_op), 3)) &
1208 #                             stall_out &
1209 #                             std_ulogic_vector(
1210 #                              to_unsigned(tlb_hit_way, 3)) &
1211 #                             valid_ra &
1212 #                             std_ulogic_vector(
1213 #                              to_unsigned(state_t'pos(r1.state), 3));
1214         sync += log_data.eq(Cat(
1215                  Const(r1.state, 3), valid_ra, Const(tlb_hit_way, 3),
1216                  stall_out, Const(req_op, 3), d_out.valid, d_out.error,
1217                  r1.wb.cyc, r1.wb.stb, wb_in.ack, wb_in.stall,
1218                  r1.wb.adr[3:6]
1219                 ))
1220 #             end if;
1221 #         end process;
1222 #         log_out <= log_data;
1223     # TODO ??? I am very confused need help
1224     comb += log_out.eq(log_data)
1225 #     end generate;
1226 # end;
1227
1228     def elaborate(self, platform):
1229         LINE_SIZE    = self.LINE_SIZE
1230         NUM_LINES    = self.NUM_LINES
1231         NUM_WAYS     = self.NUM_WAYS
1232         TLB_SET_SIZE = self.TLB_SET_SIZE
1233         TLB_NUM_WAYS = self.TLB_NUM_WAYS
1234         TLB_LG_PGSZ  = self.TLB_LG_PGSZ
1235         LOG_LENGTH   = self.LOG_LENGTH
1236
1237         # BRAM organisation: We never access more than
1238         #     -- wishbone_data_bits at a time so to save
1239         #     -- resources we make the array only that wide, and
1240         #     -- use consecutive indices for to make a cache "line"
1241         #     --
1242         #     -- ROW_SIZE is the width in bytes of the BRAM
1243         #     -- (based on WB, so 64-bits)
1244         ROW_SIZE = WB_DATA_BITS / 8;
1245
1246         # ROW_PER_LINE is the number of row (wishbone
1247         # transactions) in a line
1248         ROW_PER_LINE = LINE_SIZE // ROW_SIZE
1249
1250         # BRAM_ROWS is the number of rows in BRAM needed
1251         # to represent the full dcache
1252         BRAM_ROWS = NUM_LINES * ROW_PER_LINE
1253
1254
1255         # Bit fields counts in the address
1256
1257         # REAL_ADDR_BITS is the number of real address
1258         # bits that we store
1259         REAL_ADDR_BITS = 56
1260
1261         # ROW_BITS is the number of bits to select a row
1262         ROW_BITS = log2_int(BRAM_ROWS)
1263
1264         # ROW_LINE_BITS is the number of bits to select
1265         # a row within a line
1266         ROW_LINE_BITS = log2_int(ROW_PER_LINE)
1267
1268         # LINE_OFF_BITS is the number of bits for
1269         # the offset in a cache line
1270         LINE_OFF_BITS = log2_int(LINE_SIZE)
1271
1272         # ROW_OFF_BITS is the number of bits for
1273         # the offset in a row
1274         ROW_OFF_BITS = log2_int(ROW_SIZE)
1275
1276         # INDEX_BITS is the number if bits to
1277         # select a cache line
1278         INDEX_BITS = log2_int(NUM_LINES)
1279
1280         # SET_SIZE_BITS is the log base 2 of the set size
1281         SET_SIZE_BITS = LINE_OFF_BITS + INDEX_BITS
1282
1283         # TAG_BITS is the number of bits of
1284         # the tag part of the address
1285         TAG_BITS = REAL_ADDR_BITS - SET_SIZE_BITS
1286
1287         # TAG_WIDTH is the width in bits of each way of the tag RAM
1288         TAG_WIDTH = TAG_BITS + 7 - ((TAG_BITS + 7) % 8)
1289
1290         # WAY_BITS is the number of bits to select a way
1291         WAY_BITS = log2_int(NUM_WAYS)
1292
1293         # Example of layout for 32 lines of 64 bytes:
1294         #
1295         # ..  tag    |index|  line  |
1296         # ..         |   row   |    |
1297         # ..         |     |---|    | ROW_LINE_BITS  (3)
1298         # ..         |     |--- - --| LINE_OFF_BITS (6)
1299         # ..         |         |- --| ROW_OFF_BITS  (3)
1300         # ..         |----- ---|    | ROW_BITS      (8)
1301         # ..         |-----|        | INDEX_BITS    (5)
1302         # .. --------|              | TAG_BITS      (45)
1303
1304         TAG_RAM_WIDTH = TAG_WIDTH * NUM_WAYS
1305
1306         def CacheTagArray():
1307             return Array(CacheTagSet() for x in range(NUM_LINES))
1308
1309         def CacheValidBitsArray():
1310             return Array(CacheWayValidBits() for x in range(NUM_LINES))
1311
1312         def RowPerLineValidArray():
1313             return Array(Signal() for x in range(ROW_PER_LINE))
1314
1315         # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
1316         cache_tags       = CacheTagArray()
1317         cache_tag_set    = Signal(TAG_RAM_WIDTH)
1318         cache_valid_bits = CacheValidBitsArray()
1319
1320         # TODO attribute ram_style : string;
1321         # TODO attribute ram_style of cache_tags : signal is "distributed";
1322
1323         # L1 TLB
1324         TLB_SET_BITS     = log2_int(TLB_SET_SIZE)
1325         TLB_WAY_BITS     = log2_int(TLB_NUM_WAYS)
1326         TLB_EA_TAG_BITS  = 64 - (TLB_LG_PGSZ + TLB_SET_BITS)
1327         TLB_TAG_WAY_BITS = TLB_NUM_WAYS * TLB_EA_TAG_BITS
1328         TLB_PTE_BITS     = 64
1329         TLB_PTE_WAY_BITS = TLB_NUM_WAYS * TLB_PTE_BITS;
1330
1331         def TLBValidBitsArray():
1332             return Array(
1333              Signal(TLB_NUM_WAYS) for x in range(TLB_SET_SIZE)
1334             )
1335
1336         def TLBTagsArray():
1337             return Array(
1338              Signal(TLB_TAG_WAY_BITS) for x in range (TLB_SET_SIZE)
1339             )
1340
1341         def TLBPtesArray():
1342             return Array(
1343              Signal(TLB_PTE_WAY_BITS) for x in range(TLB_SET_SIZE)
1344             )
1345
1346         def HitWaySet():
1347             return Array(Signal(NUM_WAYS) for x in range(TLB_NUM_WAYS))
1348
1349 """note: these are passed to nmigen.hdl.Memory as "attributes".
1350    don't know how, just that they are.
1351 """
1352         dtlb_valid_bits = TLBValidBitsArray()
1353         dtlb_tags       = TLBTagsArray()
1354         dtlb_ptes       = TLBPtesArray()
1355         # TODO attribute ram_style of
1356         #  dtlb_tags : signal is "distributed";
1357         # TODO attribute ram_style of
1358         #  dtlb_ptes : signal is "distributed";
1359
1360         r0      = RegStage0()
1361         r0_full = Signal()
1362
1363         r1 = RegStage1()
1364
1365         reservation = Reservation()
1366
1367         # Async signals on incoming request
1368         req_index    = Signal(NUM_LINES)
1369         req_row      = Signal(BRAM_ROWS)
1370         req_hit_way  = Signal(WAY_BITS)
1371         req_tag      = Signal(TAG_BITS)
1372         req_op       = Op()
1373         req_data     = Signal(64)
1374         req_same_tag = Signal()
1375         req_go       = Signal()
1376
1377         early_req_row     = Signal(BRAM_ROWS)
1378
1379         cancel_store      = Signal()
1380         set_rsrv          = Signal()
1381         clear_rsrv        = Signal()
1382
1383         r0_valid          = Signal()
1384         r0_stall          = Signal()
1385
1386         use_forward1_next = Signal()
1387         use_forward2_next = Signal()
1388
1389         # Cache RAM interface
1390         def CacheRamOut():
1391             return Array(Signal(WB_DATA_BITS) for x in range(NUM_WAYS))
1392
1393         cache_out         = CacheRamOut()
1394
1395         # PLRU output interface
1396         def PLRUOut():
1397             return Array(Signal(WAY_BITS) for x in range(Index()))
1398
1399         plru_victim       = PLRUOut()
1400         replace_way       = Signal(WAY_BITS)
1401
1402         # Wishbone read/write/cache write formatting signals
1403         bus_sel           = Signal(8)
1404
1405         # TLB signals
1406         tlb_tag_way   = Signal(TLB_TAG_WAY_BITS)
1407         tlb_pte_way   = Signal(TLB_PTE_WAY_BITS)
1408         tlb_valid_way = Signal(TLB_NUM_WAYS)
1409         tlb_req_index = Signal(TLB_SET_SIZE)
1410         tlb_hit       = Signal()
1411         tlb_hit_way   = Signal(TLB_NUM_WAYS)
1412         pte           = Signal(TLB_PTE_BITS)
1413         ra            = Signal(REAL_ADDR_BITS)
1414         valid_ra      = Signal()
1415         perm_attr     = PermAttr()
1416         rc_ok         = Signal()
1417         perm_ok       = Signal()
1418         access_ok     = Signal()
1419
1420         # TLB PLRU output interface
1421         def TLBPLRUOut():
1422             return Array(
1423                 Signal(TLB_WAY_BITS) for x in range(TLB_SET_SIZE)
1424             )
1425
1426         tlb_plru_victim = TLBPLRUOut()
1427
1428         # Helper functions to decode incoming requests
1429         #
1430         # Return the cache line index (tag index) for an address
1431         def get_index(addr):
1432             return addr[LINE_OFF_BITS:SET_SIZE_BITS]
1433
1434         # Return the cache row index (data memory) for an address
1435         def get_row(addr):
1436             return addr[ROW_OFF_BITS:SET_SIZE_BITS]
1437
1438         # Return the index of a row within a line
1439         def get_row_of_line(row):
1440             row_v = Signal(ROW_BITS)
1441             row_v = Signal(row)
1442             return row_v[0:ROW_LINE_BITS]
1443
1444         # Returns whether this is the last row of a line
1445         def is_last_row_addr(addr, last):
1446             return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
1447
1448         # Returns whether this is the last row of a line
1449         def is_last_row(row, last):
1450             return get_row_of_line(row) == last
1451
1452         # Return the address of the next row in the current cache line
1453         def next_row_addr(addr):
1454             row_idx = Signal(ROW_LINE_BITS)
1455             result  = WBAddrType()
1456             # Is there no simpler way in VHDL to
1457             # generate that 3 bits adder ?
1458             row_idx = addr[ROW_OFF_BITS:LINE_OFF_BITS]
1459             row_idx = Signal(row_idx + 1)
1460             result = addr
1461             result[ROW_OFF_BITS:LINE_OFF_BITS] = row_idx
1462             return result
1463
1464         # Return the next row in the current cache line. We use a
1465         # dedicated function in order to limit the size of the
1466         # generated adder to be only the bits within a cache line
1467         # (3 bits with default settings)
1468         def next_row(row)
1469             row_v   = Signal(ROW_BITS)
1470             row_idx = Signal(ROW_LINE_BITS)
1471             result  = Signal(ROW_BITS)
1472
1473             row_v = Signal(row)
1474             row_idx = row_v[ROW_LINE_BITS]
1475             row_v[0:ROW_LINE_BITS] = Signal(row_idx + 1)
1476             return row_v
1477
1478         # Get the tag value from the address
1479         def get_tag(addr):
1480             return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
1481
1482         # Read a tag from a tag memory row
1483         def read_tag(way, tagset):
1484             return tagset[way *TAG_WIDTH:way * TAG_WIDTH + TAG_BITS]
1485
1486         # Read a TLB tag from a TLB tag memory row
1487         def read_tlb_tag(way, tags):
1488             j = Signal()
1489
1490             j = way * TLB_EA_TAG_BITS
1491             return tags[j:j + TLB_EA_TAG_BITS]
1492
1493         # Write a TLB tag to a TLB tag memory row
1494         def write_tlb_tag(way, tags), tag):
1495             j = Signal()
1496
1497             j = way * TLB_EA_TAG_BITS
1498             tags[j:j + TLB_EA_TAG_BITS] = tag
1499
1500         # Read a PTE from a TLB PTE memory row
1501         def read_tlb_pte(way, ptes):
1502             j = Signal()
1503
1504             j = way * TLB_PTE_BITS
1505             return ptes[j:j + TLB_PTE_BITS]
1506
1507         def write_tlb_pte(way, ptes,newpte):
1508             j = Signal()
1509
1510             j = way * TLB_PTE_BITS
1511             return ptes[j:j + TLB_PTE_BITS] = newpte
1512
1513         assert (LINE_SIZE % ROW_SIZE) == 0 "LINE_SIZE not " \
1514          "multiple of ROW_SIZE"
1515
1516         assert (LINE_SIZE % 2) == 0 "LINE_SIZE not power of 2"
1517
1518         assert (NUM_LINES % 2) == 0 "NUM_LINES not power of 2"
1519
1520         assert (ROW_PER_LINE % 2) == 0 "ROW_PER_LINE not" \
1521          "power of 2"
1522
1523         assert ROW_BITS == (INDEX_BITS + ROW_LINE_BITS) \
1524          "geometry bits don't add up"
1525
1526         assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS) \
1527          "geometry bits don't add up"
1528
1529         assert REAL_ADDR_BITS == (TAG_BITS + INDEX_BITS \
1530          + LINE_OFF_BITS) "geometry bits don't add up"
1531
1532         assert REAL_ADDR_BITS == (TAG_BITS + ROW_BITS + ROW_OFF_BITS) \
1533          "geometry bits don't add up"
1534
1535         assert 64 == wishbone_data_bits "Can't yet handle a" \
1536          "wishbone width that isn't 64-bits"
1537
1538         assert SET_SIZE_BITS <= TLB_LG_PGSZ "Set indexed by" \
1539          "virtual address"
1540
1541         # we don't yet handle collisions between loadstore1 requests
1542         # and MMU requests
1543         comb += m_out.stall.eq(0)
1544
1545         # Hold off the request in r0 when r1 has an uncompleted request
1546         comb += r0_stall.eq(r0_full & r1.full)
1547         comb += r0_valid.eq(r0_full & ~r1.full)
1548         comb += stall_out.eq(r0_stall)
1549
1550         # Wire up wishbone request latch out of stage 1
1551         comb += wishbone_out.eq(r1.wb)
1552
1553
1554
1555 # dcache_tb.vhdl
1556 #
1557 # entity dcache_tb is
1558 # end dcache_tb;
1559 #
1560 # architecture behave of dcache_tb is
1561 #     signal clk          : std_ulogic;
1562 #     signal rst          : std_ulogic;
1563 #
1564 #     signal d_in         : Loadstore1ToDcacheType;
1565 #     signal d_out        : DcacheToLoadstore1Type;
1566 #
1567 #     signal m_in         : MmuToDcacheType;
1568 #     signal m_out        : DcacheToMmuType;
1569 #
1570 #     signal wb_bram_in   : wishbone_master_out;
1571 #     signal wb_bram_out  : wishbone_slave_out;
1572 #
1573 #     constant clk_period : time := 10 ns;
1574 # begin
1575 #     dcache0: entity work.dcache
1576 #         generic map(
1577 #
1578 #             LINE_SIZE => 64,
1579 #             NUM_LINES => 4
1580 #             )
1581 #         port map(
1582 #             clk => clk,
1583 #             rst => rst,
1584 #             d_in => d_in,
1585 #             d_out => d_out,
1586 #             m_in => m_in,
1587 #             m_out => m_out,
1588 #             wishbone_out => wb_bram_in,
1589 #             wishbone_in => wb_bram_out
1590 #             );
1591 #
1592 #     -- BRAM Memory slave
1593 #     bram0: entity work.wishbone_bram_wrapper
1594 #         generic map(
1595 #             MEMORY_SIZE   => 1024,
1596 #             RAM_INIT_FILE => "icache_test.bin"
1597 #             )
1598 #         port map(
1599 #             clk => clk,
1600 #             rst => rst,
1601 #             wishbone_in => wb_bram_in,
1602 #             wishbone_out => wb_bram_out
1603 #             );
1604 #
1605 #     clk_process: process
1606 #     begin
1607 #         clk <= '0';
1608 #         wait for clk_period/2;
1609 #         clk <= '1';
1610 #         wait for clk_period/2;
1611 #     end process;
1612 #
1613 #     rst_process: process
1614 #     begin
1615 #         rst <= '1';
1616 #         wait for 2*clk_period;
1617 #         rst <= '0';
1618 #         wait;
1619 #     end process;
1620 #
1621 #     stim: process
1622 #     begin
1623 #     -- Clear stuff
1624 #     d_in.valid <= '0';
1625 #     d_in.load <= '0';
1626 #     d_in.nc <= '0';
1627 #     d_in.addr <= (others => '0');
1628 #     d_in.data <= (others => '0');
1629 #         m_in.valid <= '0';
1630 #         m_in.addr <= (others => '0');
1631 #         m_in.pte <= (others => '0');
1632 #
1633 #         wait for 4*clk_period;
1634 #     wait until rising_edge(clk);
1635 #
1636 #     -- Cacheable read of address 4
1637 #     d_in.load <= '1';
1638 #     d_in.nc <= '0';
1639 #         d_in.addr <= x"0000000000000004";
1640 #         d_in.valid <= '1';
1641 #     wait until rising_edge(clk);
1642 #         d_in.valid <= '0';
1643 #
1644 #     wait until rising_edge(clk) and d_out.valid = '1';
1645 #         assert d_out.data = x"0000000100000000"
1646 #         report "data @" & to_hstring(d_in.addr) &
1647 #         "=" & to_hstring(d_out.data) &
1648 #         " expected 0000000100000000"
1649 #         severity failure;
1650 # --      wait for clk_period;
1651 #
1652 #     -- Cacheable read of address 30
1653 #     d_in.load <= '1';
1654 #     d_in.nc <= '0';
1655 #         d_in.addr <= x"0000000000000030";
1656 #         d_in.valid <= '1';
1657 #     wait until rising_edge(clk);
1658 #         d_in.valid <= '0';
1659 #
1660 #     wait until rising_edge(clk) and d_out.valid = '1';
1661 #         assert d_out.data = x"0000000D0000000C"
1662 #         report "data @" & to_hstring(d_in.addr) &
1663 #         "=" & to_hstring(d_out.data) &
1664 #         " expected 0000000D0000000C"
1665 #         severity failure;
1666 #
1667 #     -- Non-cacheable read of address 100
1668 #     d_in.load <= '1';
1669 #     d_in.nc <= '1';
1670 #         d_in.addr <= x"0000000000000100";
1671 #         d_in.valid <= '1';
1672 #     wait until rising_edge(clk);
1673 #     d_in.valid <= '0';
1674 #     wait until rising_edge(clk) and d_out.valid = '1';
1675 #         assert d_out.data = x"0000004100000040"
1676 #         report "data @" & to_hstring(d_in.addr) &
1677 #         "=" & to_hstring(d_out.data) &
1678 #         " expected 0000004100000040"
1679 #         severity failure;
1680 #
1681 #     wait until rising_edge(clk);
1682 #     wait until rising_edge(clk);
1683 #     wait until rising_edge(clk);
1684 #     wait until rising_edge(clk);
1685 #
1686 #     std.env.finish;
1687 #     end process;
1688 # end;
1689 def dcache_sim(dut):
1690     # clear stuff
1691     yield dut.d_in.valid.eq(0)
1692     yield dut.d_in.load.eq(0)
1693     yield dut.d_in.nc.eq(0)
1694     yield dut.d_in.adrr.eq(0)
1695     yield dut.d_in.data.eq(0)
1696     yield dut.m_in.valid.eq(0)
1697     yield dut.m_in.addr.eq(0)
1698     yield dut.m_in.pte.eq(0)
1699     # wait 4 * clk_period
1700     yield
1701     yield
1702     yield
1703     yield
1704     # wait_until rising_edge(clk)
1705     yield
1706     # Cacheable read of address 4
1707     yield dut.d_in.load.eq(1)
1708     yield dut.d_in.nc.eq(0)
1709     yield dut.d_in.addr.eq(Const(0x0000000000000004, 64))
1710     yield dut.d_in.valid.eq(1)
1711     # wait-until rising_edge(clk)
1712     yield
1713     yield dut.d_in.valid.eq(0)
1714     yield
1715     while not (yield dut.d_out.valid):
1716         yield
1717     assert dut.d_out.data == Const(0x0000000100000000, 64) f"data @" \
1718         f"{dut.d_in.addr}={dut.d_in.data} expected 0000000100000000" \
1719         " -!- severity failure"
1720
1721
1722     # Cacheable read of address 30
1723     yield dut.d_in.load.eq(1)
1724     yield dut.d_in.nc.eq(0)
1725     yield dut.d_in.addr.eq(Const(0x0000000000000030, 64))
1726     yield dut.d_in.valid.eq(1)
1727     yield
1728     yield dut.d_in.valid.eq(0)
1729     yield
1730     while not (yield dut.d_out.valid):
1731         yield
1732     assert dut.d_out.data == Const(0x0000000D0000000C, 64) f"data @" \
1733         f"{dut.d_in.addr}={dut.d_out.data} expected 0000000D0000000C" \
1734         f"-!- severity failure"
1735
1736     # Non-cacheable read of address 100
1737     yield dut.d_in.load.eq(1)
1738     yield dut.d_in.nc.eq(1)
1739     yield dut.d_in.addr.eq(Const(0x0000000000000100, 64))
1740     yield dut.d_in.valid.eq(1)
1741     yield
1742     yield dut.d_in.valid.eq(0)
1743     yield
1744     while not (yield dut.d_out.valid):
1745         yield
1746     assert dut.d_out.data == Const(0x0000004100000040, 64) f"data @" \
1747         f"{dut.d_in.addr}={dut.d_out.data} expected 0000004100000040" \
1748         f"-!- severity failure"
1749
1750     yield
1751     yield
1752     yield
1753     yield
1754
1755
1756 def test_dcache():
1757     dut = DCache()
1758     vl = rtlil.convert(dut, ports=[])
1759     with open("test_dcache.il", "w") as f:
1760         f.write(vl)
1761
1762     run_simulation(dut, dcache_sim(), vcd_name='test_dcache.vcd')
1763
1764 if __name__ == '__main__':
1765     test_dcache()
1766