src/soc/experiment/dcache.py

   1 """DCache
   2
   3 based on Anton Blanchard microwatt dcache.vhdl
   4
   5 """
   6
   7 from enum import Enum, unique
   8
   9 from nmigen import Module, Signal, Elaboratable,
  10                    Cat, Repl
  11 from nmigen.cli import main
  12 from nmigen.iocontrol import RecordObject
  13 from nmigen.util import log2_int
  14
  15 from experiment.mem_types import LoadStore1ToDCacheType,
  16                                  DCacheToLoadStore1Type,
  17                                  MMUToDCacheType,
  18                                  DCacheToMMUType
  19
  20 from experiment.wb_types import WB_ADDR_BITS, WB_DATA_BITS, WB_SEL_BITS,
  21                                 WBAddrType, WBDataType, WBSelType,
  22                                 WbMasterOut, WBSlaveOut,
  23                                 WBMasterOutVector, WBSlaveOutVector,
  24                                 WBIOMasterOut, WBIOSlaveOut
  25
  26
  27 # Record for storing permission, attribute, etc. bits from a PTE
  28 class PermAttr(RecordObject):
  29     def __init__(self):
  30         super().__init__()
  31         self.reference = Signal()
  32         self.changed   = Signal()
  33         self.nocache   = Signal()
  34         self.priv      = Signal()
  35         self.rd_perm   = Signal()
  36         self.wr_perm   = Signal()
  37
  38
  39 def extract_perm_attr(pte):
  40     pa = PermAttr()
  41     pa.reference = pte[8]
  42     pa.changed   = pte[7]
  43     pa.nocache   = pte[5]
  44     pa.priv      = pte[3]
  45     pa.rd_perm   = pte[2]
  46     pa.wr_perm   = pte[1]
  47     return pa;
  48
  49
  50 # Type of operation on a "valid" input
  51 @unique
  52 class Op(Enum):
  53     OP_NONE       = 0
  54     OP_BAD        = 1 # NC cache hit, TLB miss, prot/RC failure
  55     OP_STCX_FAIL  = 2 # conditional store w/o reservation
  56     OP_LOAD_HIT   = 3 # Cache hit on load
  57     OP_LOAD_MISS  = 4 # Load missing cache
  58     OP_LOAD_NC    = 5 # Non-cachable load
  59     OP_STORE_HIT  = 6 # Store hitting cache
  60     OP_STORE_MISS = 7 # Store missing cache
  61
  62
  63 # Cache state machine
  64 @unique
  65 class State(Enum):
  66     IDLE             = 0 # Normal load hit processing
  67     RELOAD_WAIT_ACK  = 1 # Cache reload wait ack
  68     STORE_WAIT_ACK   = 2 # Store wait ack
  69     NC_LOAD_WAIT_ACK = 3 # Non-cachable load wait ack
  70
  71
  72 # Dcache operations:
  73 #
  74 # In order to make timing, we use the BRAMs with
  75 # an output buffer, which means that the BRAM
  76 # output is delayed by an extra cycle.
  77 #
  78 # Thus, the dcache has a 2-stage internal pipeline
  79 # for cache hits with no stalls.
  80 #
  81 # All other operations are handled via stalling
  82 # in the first stage.
  83 #
  84 # The second stage can thus complete a hit at the same
  85 # time as the first stage emits a stall for a complex op.
  86 #
  87 # Stage 0 register, basically contains just the latched request
  88 class RegStage0(RecordObject):
  89     def __init__(self):
  90         super().__init__()
  91         self.req     = LoadStore1ToDCacheType()
  92         self.tlbie   = Signal()
  93         self.doall   = Signal()
  94         self.tlbld   = Signal()
  95         self.mmu_req = Signal() # indicates source of request
  96
  97
  98 class MemAccessRequest(RecordObject):
  99     def __init__(self):
 100         super().__init__()
 101         self.op        = Op()
 102         self.valid     = Signal()
 103         self.dcbz      = Signal()
 104         self.real_addr = Signal(REAL_ADDR_BITS)
 105         self.data      = Signal(64)
 106         self.byte_sel  = Signal(8)
 107         self.hit_way   = Signal(WAY_BITS)
 108         self.same_tag  = Signal()
 109         self.mmu_req   = Signal()
 110
 111
 112 # First stage register, contains state for stage 1 of load hits
 113 # and for the state machine used by all other operations
 114 class RegStage1(RecordObject):
 115     def __init__(self):
 116         super().__init__()
 117         # Info about the request
 118         self.full             = Signal() # have uncompleted request
 119         self.mmu_req          = Signal() # request is from MMU
 120         self.req              = MemAccessRequest()
 121
 122         # Cache hit state
 123         self.hit_way          = Signal(WAY_BITS)
 124         self.hit_load_valid   = Signal()
 125         self.hit_index        = Signal(NUM_LINES)
 126         self.cache_hit        = Signal()
 127
 128         # TLB hit state
 129         self.tlb_hit          = Signal()
 130         self.tlb_hit_way      = Signal(TLB_NUM_WAYS)
 131         self.tlb_hit_index    = Signal(TLB_SET_SIZE)
 132         self.
 133         # 2-stage data buffer for data forwarded from writes to reads
 134         self.forward_data1    = Signal(64)
 135         self.forward_data2    = Signal(64)
 136         self.forward_sel1     = Signal(8)
 137         self.forward_valid1   = Signal()
 138         self.forward_way1     = Signal(WAY_BITS)
 139         self.forward_row1     = Signal(BRAM_ROWS)
 140         self.use_forward1     = Signal()
 141         self.forward_sel      = Signal(8)
 142
 143         # Cache miss state (reload state machine)
 144         self.state            = State()
 145         self.dcbz             = Signal()
 146         self.write_bram       = Signal()
 147         self.write_tag        = Signal()
 148         self.slow_valid       = Signal()
 149         self.wb               = WishboneMasterOut()
 150         self.reload_tag       = Signal(TAG_BITS)
 151         self.store_way        = Signal(WAY_BITS)
 152         self.store_row        = Signal(BRAM_ROWS)
 153         self.store_index      = Signal(NUM_LINES)
 154         self.end_row_ix       = Signal(ROW_LINE_BIT)
 155         self.rows_valid       = RowPerLineValidArray()
 156         self.acks_pending     = Signal(3)
 157         self.inc_acks         = Signal()
 158         self.dec_acks         = Signal()
 159
 160         # Signals to complete (possibly with error)
 161         self.ls_valid         = Signal()
 162         self.ls_error         = Signal()
 163         self.mmu_done         = Signal()
 164         self.mmu_error        = Signal()
 165         self.cache_paradox    = Signal()
 166
 167         # Signal to complete a failed stcx.
 168         self.stcx_fail        = Signal()
 169
 170
 171 # Reservation information
 172 class Reservation(RecordObject):
 173     def __init__(self):
 174         super().__init__()
 175         valid = Signal()
 176         # TODO LINE_OFF_BITS is 6
 177         addr  = Signal(63 downto LINE_OFF_BITS)
 178
 179
 180 # Set associative dcache write-through
 181 #
 182 # TODO (in no specific order):
 183 #
 184 # * See list in icache.vhdl
 185 # * Complete load misses on the cycle when WB data comes instead of
 186 #   at the end of line (this requires dealing with requests coming in
 187 #   while not idle...)
 188 class DCache(Elaboratable):
 189     def __init__(self):
 190         # TODO: make these parameters of DCache at some point
 191         self.LINE_SIZE = 64    # Line size in bytes
 192         self.NUM_LINES = 32    # Number of lines in a set
 193         self.NUM_WAYS = 4      # Number of ways
 194         self.TLB_SET_SIZE = 64 # L1 DTLB entries per set
 195         self.TLB_NUM_WAYS = 2  # L1 DTLB number of sets
 196         self.TLB_LG_PGSZ = 12  # L1 DTLB log_2(page_size)
 197         self.LOG_LENGTH = 0    # Non-zero to enable log data collection
 198
 199         self.d_in      = LoadStore1ToDCacheType()
 200         self.d_out     = DCacheToLoadStore1Type()
 201
 202         self.m_in      = MMUToDCacheType()
 203         self.m_out     = DCacheToMMUType()
 204
 205         self.stall_out = Signal()
 206
 207         self.wb_out    = WBMasterOut()
 208         self.wb_in     = WBSlaveOut()
 209
 210         self.log_out   = Signal(20)
 211
 212     # Latch the request in r0.req as long as we're not stalling
 213     def stage_0(self, m, d_in, m_in):
 214             comb = m.d.comb
 215             sync = m.d.sync
 216
 217             r = RegStage0()
 218
 219             # TODO, this goes in unit tests and formal proofs
 220             # assert ~(d_in.valid & m_in.valid),
 221             # "request collision loadstore vs MMU"
 222             with m.If(~(d_in.valid & m_in.valid)):
 223                 #sync += Display("request collision loadstore vs MMU")
 224                 pass
 225
 226             with m.If(m_in.valid):
 227                 sync += r.req.valid.eq(1)
 228                 sync += r.req.load.eq(~(m_in.tlbie | m_in.tlbld))
 229                 sync += r.req.dcbz.eq(0)
 230                 sync += r.req.nc.eq(0)
 231                 sync += r.req.reserve.eq(0)
 232                 sync += r.req.virt_mode.eq(1)
 233                 sync += r.req.priv_mode.eq(1)
 234                 sync += r.req.addr.eq(m_in.addr)
 235                 sync += r.req.data.eq(m_in.pte)
 236                 sync += r.req.byte_sel.eq(-1) # Const -1 sets all to 0b111....
 237                 sync += r.tlbie.eq(m_in.tlbie)
 238                 sync += r.doall.eq(m_in.doall)
 239                 sync += r.tlbld.eq(m_in.tlbld)
 240                 sync += r.mmu_req.eq(1)
 241             with m.Else():
 242                 sync += r.req.eq(d_in)
 243                 sync += r.req.tlbie.eq(0)
 244                 sync += r.req.doall.eq(0)
 245                 sync += r.req.tlbd.eq(0)
 246                 sync += r.req.mmu_req.eq(0)
 247                 with m.If(~(r1.full & r0_full)):
 248                     sync += r0.eq(r)
 249                     sync += r0_full.eq(r.req.valid)
 250
 251     # TLB
 252     # Operates in the second cycle on the request latched in r0.req.
 253     # TLB updates write the entry at the end of the second cycle.
 254     def tlb_read(self, m, m_in, d_in, r0_stall, tlb_valid_way,
 255                  tlb_tag_way, tlb_pte_way, dtlb_valid_bits,
 256                  dtlb_tags, dtlb_ptes):
 257
 258         comb = m.d.comb
 259         sync = m.d.sync
 260
 261         index    = Signal(log2_int(TLB_SET_BITS), False)
 262         addrbits = Signal(TLB_SET_BITS)
 263
 264         amin = TLB_LG_PGSZ
 265         amax = TLB_LG_PGSZ + TLB_SET_BITS
 266
 267         with m.If(m_in.valid):
 268             comb += addrbits.eq(m_in.addr[amin : amax])
 269         with m.Else():
 270             comb += addrbits.eq(d_in.addr[amin : amax])
 271         comb += index.eq(addrbits)
 272
 273         # If we have any op and the previous op isn't finished,
 274         # then keep the same output for next cycle.
 275         with m.If(~r0_stall):
 276             sync += tlb_valid_way.eq(dtlb_valid_bits[index])
 277             sync += tlb_tag_way.eq(dtlb_tags[index])
 278             sync += tlb_pte_way.eq(dtlb_ptes[index])
 279
 280     # Generate TLB PLRUs
 281     def maybe_tlb_plrus(self, m, r1, tlb_plru_victim, acc, acc_en, lru):
 282         comb = m.d.comb
 283         sync = m.d.sync
 284
 285         with m.If(TLB_NUM_WAYS > 1):
 286             for i in range(TLB_SET_SIZE):
 287                 # TLB PLRU interface
 288                 tlb_plru        = PLRU(TLB_WAY_BITS)
 289                 tlb_plru_acc    = Signal(TLB_WAY_BITS)
 290                 tlb_plru_acc_en = Signal()
 291                 tlb_plru_out    = Signal(TLB_WAY_BITS)
 292
 293                 comb += tlb_plru.acc.eq(tlb_plru_acc)
 294                 comb += tlb_plru.acc_en.eq(tlb_plru_acc_en)
 295                 comb += tlb_plru.lru.eq(tlb_plru_out)
 296
 297                 # PLRU interface
 298                 with m.If(r1.tlb_hit_index == i):
 299                     comb += tlb_plru.acc_en.eq(r1.tlb_hit)
 300                 with m.Else():
 301                     comb += tlb_plru.acc_en.eq(0)
 302                 comb += tlb_plru.acc.eq(r1.tlb_hit_way)
 303
 304                 comb += tlb_plru_victim[i].eq(tlb_plru.lru)
 305
 306     def tlb_search(self, tlb_req_index, r0, tlb_valid_way_ tlb_tag_way,
 307                    tlb_pte_way, pte, tlb_hit, valid_ra, perm_attr, ra):
 308
 309         comb = m.d.comb
 310         sync = m.d.sync
 311
 312         hitway = Signal(TLB_WAY_BITS)
 313         hit    = Signal()
 314         eatag  = Signal(log2_int(TLB_EA_TAG_BITS, False))
 315
 316         TLB_LG_END = TLB_LG_PGSZ + TLB_SET_BITS
 317         comb += tlb_req_index.eq(r0.req.addr[TLB_LG_PGSZ : TLB_LG_END])
 318         comb += eatag.eq(r0.req.addr[TLB_LG_END : 64 ])
 319
 320         for i in range(TLB_NUM_WAYS):
 321             with m.If(tlb_valid_way(i)
 322                       & read_tlb_tag(i, tlb_tag_way) == eatag):
 323                 comb += hitway.eq(i)
 324                 comb += hit.eq(1)
 325
 326         comb += tlb_hit.eq(hit & r0_valid)
 327         comb += tlb_hit_way.eq(hitway)
 328
 329         with m.If(tlb_hit):
 330             comb += pte.eq(read_tlb_pte(hitway, tlb_pte_way))
 331         with m.Else():
 332             comb += pte.eq(0)
 333         comb += valid_ra.eq(tlb_hit | ~r0.req.virt_mode)
 334         with m.If(r0.req.virt_mode):
 335             comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
 336                               r0.req.addr[ROW_OFF_BITS:TLB_LG_PGSZ],
 337                               pte[TLB_LG_PGSZ:REAL_ADDR_BITS]))
 338             comb += perm_attr.eq(extract_perm_attr(pte))
 339         with m.Else():
 340             comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
 341                               r0.rq.addr[ROW_OFF_BITS:REAL_ADDR_BITS]))
 342
 343             comb += perm_attr.reference.eq(1)
 344             comb += perm_attr.changed.eq(1)
 345             comb += perm_attr.priv.eq(1)
 346             comb += perm_attr.nocache.eq(0)
 347             comb += perm_attr.rd_perm.eq(1)
 348             comb += perm_attr.wr_perm.eq(1)
 349
 350     def tlb_update(self, r0_valid, r0, dtlb_valid_bits, tlb_req_index,
 351                     tlb_hit_way, tlb_hit, tlb_plru_victim, tlb_tag_way,
 352                     dtlb_tags, tlb_pte_way, dtlb_ptes, dtlb_valid_bits):
 353
 354         comb = m.d.comb
 355         sync = m.d.sync
 356
 357     #         variable tlbie : std_ulogic;
 358     #         variable tlbwe : std_ulogic;
 359     #         variable repl_way : tlb_way_t;
 360     #         variable eatag : tlb_tag_t;
 361     #         variable tagset : tlb_way_tags_t;
 362     #         variable pteset : tlb_way_ptes_t;
 363     #type tlb_tags_t is array(tlb_index_t) of tlb_way_tags_t;
 364     # --> Array([Signal(log(way_tags length)) for i in range(number of tlbs)])
 365
 366         tlbie    = Signal()
 367         tlbwe    = Signal()
 368         repl_way = Signal(TLB_WAY_BITS)
 369         eatag    = Signal(log2_int(TLB_EA_TAG_BITS, False))
 370         tagset   = TLBWayTags()
 371         pteset   = TLBWayPtes()
 372
 373         comb += tlbie.eq(r0_valid & r0.tlbie)
 374         comb += tlbwe.eq(r0_valid & r0.tlbldoi)
 375
 376         with m.If(tlbie & r0.doall):
 377             # clear all valid bits at once
 378             for i in range(TLB_SET_SIZE):
 379                 sync += dtlb_valid_bits[i].eq(0)
 380
 381         with m.Elif(tlbie):
 382             with m.If(tlb_hit):
 383                 sync += dtlb_valid_bits[tlb_req_index][tlb_hit_way].eq(0)
 384         with m.Elif(tlbwe):
 385             with m.If(tlb_hit):
 386                 comb += repl_way.eq(tlb_hit_way)
 387             with m.Else():
 388                 comb += repl_way.eq(tlb_plru_victim[tlb_req_index])
 389             comb += eatag.eq(r0.req.addr[TLB_LG_PGSZ + TLB_SET_BITS:64])
 390             comb += tagset.eq(tlb_tag_way)
 391             sync += write_tlb_tag(repl_way, tagset, eatag)
 392             sync += dtlb_tags[tlb_req_index].eq(tagset)
 393             comb += pteset.eq(tlb_pte_way)
 394             sync += write_tlb_pte(repl_way, pteset, r0.req.data)
 395             sync += dtlb_ptes[tlb_req_index].eq(pteset)
 396             sync += dtlb_valid_bits[tlb_req_index][repl_way].eq(1)
 397
 398     # Generate PLRUs
 399     def maybe_plrus(self, r1):
 400
 401         comb = m.d.comb
 402         sync = m.d.sync
 403
 404         for i in range(NUM_LINES):
 405             # PLRU interface
 406             plru        = PLRU(TLB_WAY_BITS)
 407             setattr(m.submodules, "plru%d" % i, plru)
 408             plru_acc    = Signal(TLB_WAY_BITS)
 409             plru_acc_en = Signal()
 410             plru_out    = Signal(TLB_WAY_BITS)
 411
 412             comb += plru.acc.eq(plru_acc)
 413             comb += plru.acc_en.eq(plru_acc_en)
 414             comb += plru.lru.eq(plru_out)
 415
 416             with m.If(r1.hit_index == i):
 417                 comb += plru_acc_en.eq(r1.cache_hit)
 418
 419             comb += plru_acc.eq(r1.hit_way)
 420             comb += plru_victim[i].eq(plru_out)
 421
 422     # Cache tag RAM read port
 423     def cache_tag_read(self, r0_stall, req_index, m_in, d_in,
 424                        cache_tag_set, cache_tags):
 425
 426         comb = m.d.comb
 427         sync = m.d.sync
 428
 429         index = Signal(INDEX_BITS)
 430
 431         with m.If(r0_stall):
 432             comb += index.eq(req_index)
 433         with m.Elif(m_in.valid):
 434             comb += index.eq(get_index(m_in.addr))
 435         with m.Else():
 436             comb += index.eq(get_index(d_in.addr))
 437         sync += cache_tag_set.eq(cache_tags[index])
 438
 439     # Cache request parsing and hit detection
 440     def dcache_request(self, r0, ra, req_index, req_row, req_tag,
 441                        r0_valid, r1, cache_valid_bits, replace_way,
 442                        use_forward1_next, use_forward2_next,
 443                        req_hit_way, plru_victim, rc_ok, perm_attr,
 444                        valid_ra, perm_ok, access_ok, req_op, req_ok,
 445                        r0_stall, m_in, early_req_row, d_in):
 446
 447         comb = m.d.comb
 448         sync = m.d.sync
 449
 450         is_hit      = Signal()
 451         hit_way     = Signal(WAY_BITS)
 452         op          = Op()
 453         opsel       = Signal(3)
 454         go          = Signal()
 455         nc          = Signal()
 456         s_hit       = Signal()
 457         s_tag       = Signal(TAG_BITS)
 458         s_pte       = Signal(TLB_PTE_BITS)
 459         s_ra        = Signal(REAL_ADDR_BITS)
 460         hit_set     = Signal(TLB_NUM_WAYS)
 461         hit_way_set = HitWaySet()
 462         rel_matches = Signal(TLB_NUM_WAYS)
 463         rel_match   = Signal()
 464
 465         # Extract line, row and tag from request
 466         comb += req_index.eq(get_index(r0.req.addr))
 467         comb += req_row.eq(get_row(r0.req.addr))
 468         comb += req_tag.eq(get_tag(ra))
 469
 470         comb += go.eq(r0_valid & ~(r0.tlbie | r0.tlbld) & ~r1.ls_error)
 471
 472         # Test if pending request is a hit on any way
 473         # In order to make timing in virtual mode,
 474         # when we are using the TLB, we compare each
 475         # way with each of the real addresses from each way of
 476         # the TLB, and then decide later which match to use.
 477
 478         with m.If(r0.req.virt_mode):
 479             comb += rel_matches.eq(0)
 480             for j in range(TLB_NUM_WAYS):
 481                 comb += s_pte.eq(read_tlb_pte(j, tlb_pte_way))
 482                 comb += s_ra.eq(Cat(r0.req.addr[0:TLB_LG_PGSZ],
 483                                     s_pte[TLB_LG_PGSZ:REAL_ADDR_BITS]))
 484                 comb += s_tag.eq(get_tag(s_ra))
 485
 486                 for i in range(NUM_WAYS):
 487                     with m.If(go & cache_valid_bits[req_index][i] &
 488                               read_tag(i, cache_tag_set) == s_tag
 489                               & tlb_valid_way[j]):
 490                         comb += hit_way_set[j].eq(i)
 491                         comb += s_hit.eq(1)
 492                 comb += hit_set[j].eq(s_hit)
 493                 with m.If(s_tag == r1.reload_tag):
 494                     comb += rel_matches[j].eq(1)
 495             with m.If(tlb_hit):
 496                 comb += is_hit.eq(hit_set[tlb_hit_way])
 497                 comb += hit_way.eq(hit_way_set[tlb_hit_way])
 498                 comb += rel_match.eq(rel_matches[tlb_hit_way])
 499         with m.Else():
 500             comb += s_tag.eq(get_tag(r0.req.addr))
 501             for i in range(NUM_WAYS):
 502                 with m.If(go & cache_valid_bits[req_index][i] &
 503                           read_tag(i, cache_tag_set) == s_tag):
 504                     comb += hit_way.eq(i)
 505                     comb += is_hit.eq(1)
 506             with m.If(s_tag == r1.reload_tag):
 507                 comb += rel_match.eq(1)
 508         comb += req_same_tag.eq(rel_match)
 509
 510         # See if the request matches the line currently being reloaded
 511         with m.If((r1.state == State.RELOAD_WAIT_ACK) &
 512                   (req_index == r1.store_index) & rel_match):
 513             # For a store, consider this a hit even if the row isn't
 514             # valid since it will be by the time we perform the store.
 515             # For a load, check the appropriate row valid bit.
 516             valid = r1.rows_valid[req_row % ROW_PER_LINE]
 517             comb += is_hit.eq(~r0.req.load | valid)
 518             comb += hit_way.eq(replace_way)
 519
 520         # Whether to use forwarded data for a load or not
 521         comb += use_forward1_next.eq(0)
 522         with m.If((get_row(r1.req.real_addr) == req_row)
 523                   & (r1.req.hit_way == hit_way))
 524             # Only need to consider r1.write_bram here, since if we
 525             # are writing refill data here, then we don't have a
 526             # cache hit this cycle on the line being refilled.
 527             # (There is the possibility that the load following the
 528             # load miss that started the refill could be to the old
 529             # contents of the victim line, since it is a couple of
 530             # cycles after the refill starts before we see the updated
 531             # cache tag. In that case we don't use the bypass.)
 532             comb += use_forward1_next.eq(r1.write_bram)
 533         comb += use_forward2_next.eq(0)
 534         with m.If((r1.forward_row1 == req_row) & (r1.forward_way1 == hit_way)):
 535             comb += use_forward2_next.eq(r1.forward_valid1)
 536
 537         # The way that matched on a hit
 538         comb += req_hit_way.eq(hit_way)
 539
 540         # The way to replace on a miss
 541         with m.If(r1.write_tag):
 542             replace_way.eq(plru_victim[r1.store_index])
 543         with m.Else():
 544             comb += replace_way.eq(r1.store_way)
 545
 546         # work out whether we have permission for this access
 547         # NB we don't yet implement AMR, thus no KUAP
 548         comb += rc_ok.eq( perm_attr.reference
 549                          & (r0.req.load | perm_attr.changed)
 550                 )
 551         comb += perm_ok.eq((r0.req.prive_mode | ~perm_attr.priv)
 552                            & perm_attr.wr_perm
 553                            | (r0.req.load & perm_attr.rd_perm)
 554                           )
 555         comb += access_ok.eq(valid_ra & perm_ok & rc_ok)
 556         # Combine the request and cache hit status to decide what
 557         # operation needs to be done
 558         comb += nc.eq(r0.req.nc | perm_attr.nocache)
 559         comb += op.eq(Op.OP_NONE)
 560         with m.If(go):
 561             with m.If(~access_ok):
 562                 comb += op.eq(Op.OP_BAD)
 563             with m.Elif(cancel_store):
 564                 comb += op.eq(Op.OP_STCX_FAIL)
 565             with m.Else():
 566                 comb += opsel.eq(Cat(is_hit, nc, r0.req.load))
 567                 with m.Switch(opsel):
 568                     with m.Case(Const(0b101, 3)):
 569                         comb += op.eq(Op.OP_LOAD_HIT)
 570                     with m.Case(Cosnt(0b100, 3)):
 571                         comb += op.eq(Op.OP_LOAD_MISS)
 572                     with m.Case(Const(0b110, 3)):
 573                         comb += op.eq(Op.OP_LOAD_NC)
 574                     with m.Case(Const(0b001, 3)):
 575                         comb += op.eq(Op.OP_STORE_HIT)
 576                     with m.Case(Const(0b000, 3)):
 577                         comb += op.eq(Op.OP_STORE_MISS)
 578                     with m.Case(Const(0b010, 3)):
 579                         comb += op.eq(Op.OP_STORE_MISS)
 580                     with m.Case(Const(0b011, 3)):
 581                         comb += op.eq(Op.OP_BAD)
 582                     with m.Case(Const(0b111, 3)):
 583                         comb += op.eq(Op.OP_BAD)
 584                     with m.Default():
 585                         comb += op.eq(Op.OP_NONE)
 586         comb += req_op.eq(op)
 587         comb += req_go.eq(go)
 588
 589         # Version of the row number that is valid one cycle earlier
 590         # in the cases where we need to read the cache data BRAM.
 591         # If we're stalling then we need to keep reading the last
 592         # row requested.
 593         with m.If(~r0_stall):
 594             with m.If(m_in.valid):
 595                 comb += early_req_row.eq(get_row(m_in.addr))
 596             with m.Else():
 597                 comb += early_req_row.eq(get_row(d_in.addr))
 598         with m.Else():
 599             comb += early_req_row.eq(req_row)
 600
 601     # Handle load-with-reservation and store-conditional instructions
 602     def reservation_comb(self, cancel_store, set_rsrv, clear_rsrv,
 603                          r0_valid, r0, reservation):
 604
 605         comb = m.d.comb
 606         sync = m.d.sync
 607
 608         with m.If(r0_valid & r0.req.reserve):
 609
 610             # XXX generate alignment interrupt if address
 611             # is not aligned XXX or if r0.req.nc = '1'
 612             with m.If(r0.req.load):
 613                 comb += set_rsrv(1) # load with reservation
 614             with m.Else():
 615                 comb += clear_rsrv.eq(1) # store conditional
 616                 with m.If(~reservation.valid | r0.req.addr[LINE_OFF_BITS:64]):
 617                     comb += cancel_store.eq(1)
 618
 619     def reservation_reg(self, r0_valid, access_ok, clear_rsrv,
 620                         reservation, r0):
 621
 622         comb = m.d.comb
 623         sync = m.d.sync
 624
 625         with m.If(r0_valid & access_ok):
 626             with m.If(clear_rsrv):
 627                 sync += reservation.valid.eq(0)
 628             with m.Elif(set_rsrv):
 629                 sync += reservation.valid.eq(1)
 630                 sync += reservation.addr.eq(r0.req.addr[LINE_OFF_BITS:64])
 631
 632     # Return data for loads & completion control logic
 633     def writeback_control(self, r1, cache_out, d_out, m_out):
 634
 635         comb = m.d.comb
 636         sync = m.d.sync
 637
 638         data_out = Signal(64)
 639         data_fwd = Signal(64)
 640         j        = Signal()
 641
 642         # Use the bypass if are reading the row that was
 643         # written 1 or 2 cycles ago, including for the
 644         # slow_valid = 1 case (i.e. completing a load
 645         # miss or a non-cacheable load).
 646         with m.If(r1.use_forward1):
 647             comb += data_fwd.eq(r1.forward_data1)
 648         with m.Else():
 649             comb += data_fwd.eq(r1.forward_data2)
 650
 651         comb += data_out.eq(cache_out[r1.hit_way])
 652
 653         for i in range(8):
 654             with m.If(r1.forward_sel[i]):
 655                 dsel = data_fwd.word_select(i, 8)
 656                 comb += data_out.word_select(i, 8).eq(dsel)
 657
 658         comb += d_out.valid.eq(r1.ls_valid)
 659         comb += d_out.data.eq(data_out)
 660         comb += d_out.store_done.eq(~r1.stcx_fail)
 661         comb += d_out.error.eq(r1.ls_error)
 662         comb += d_out.cache_paradox.eq(r1.cache_paradox)
 663
 664         # Outputs to MMU
 665         comb += m_out.done.eq(r1.mmu_done)
 666         comb += m_out.err.eq(r1.mmu_error)
 667         comb += m_out.data.eq(data_out)
 668
 669         # We have a valid load or store hit or we just completed
 670         # a slow op such as a load miss, a NC load or a store
 671         #
 672         # Note: the load hit is delayed by one cycle. However it
 673         # can still not collide with r.slow_valid (well unless I
 674         # miscalculated) because slow_valid can only be set on a
 675         # subsequent request and not on its first cycle (the state
 676         # machine must have advanced), which makes slow_valid
 677         # at least 2 cycles from the previous hit_load_valid.
 678
 679         # Sanity: Only one of these must be set in any given cycle
 680
 681         if False: # TODO: need Display to get this to work
 682             assert (r1.slow_valid & r1.stcx_fail) != 1 "unexpected" \
 683              "slow_valid collision with stcx_fail -!- severity FAILURE"
 684
 685             assert ((r1.slow_valid | r1.stcx_fail) | r1.hit_load_valid) != 1
 686              "unexpected hit_load_delayed collision with slow_valid -!-" \
 687              "severity FAILURE"
 688
 689         with m.If(~r1._mmu_req):
 690             # Request came from loadstore1...
 691             # Load hit case is the standard path
 692             with m.If(r1.hit_load_valid):
 693 #                 report
 694 #                  "completing load hit data=" & to_hstring(data_out);
 695                 #Display(f"completing load hit data={data_out}")
 696                 pass
 697
 698             # error cases complete without stalling
 699             with m.If(r1.ls_error):
 700                 # Display("completing ld/st with error")
 701                 pass
 702
 703             # Slow ops (load miss, NC, stores)
 704             with m.If(r1.slow_valid):
 705                 #Display(f"completing store or load miss data={data_out}")
 706                 pass
 707
 708         with m.Else():
 709             # Request came from MMU
 710             with m.If(r1.hit_load_valid):
 711                 # Display(f"completing load hit to MMU, data={m_out.data}")
 712                 pass
 713             # error cases complete without stalling
 714             with m.If(r1.mmu_error):
 715                 #Display("combpleting MMU ld with error")
 716                 pass
 717
 718             # Slow ops (i.e. load miss)
 719             with m.If(r1.slow_valid):
 720                 #Display("completing MMU load miss, data={m_out.data}")
 721                 pass
 722
 723     # Generate a cache RAM for each way. This handles the normal
 724     # reads, writes from reloads and the special store-hit update
 725     # path as well.
 726     #
 727     # Note: the BRAMs have an extra read buffer, meaning the output
 728     # is pipelined an extra cycle. This differs from the
 729     # icache. The writeback logic needs to take that into
 730     # account by using 1-cycle delayed signals for load hits.
 731     def rams(self, ):
 732         for i in range(NUM_WAYS):
 733 #       signal do_read  : std_ulogic;
 734 #       signal rd_addr  : std_ulogic_vector(ROW_BITS-1 downto 0);
 735 #       signal do_write : std_ulogic;
 736 #       signal wr_addr  : std_ulogic_vector(ROW_BITS-1 downto 0);
 737 #       signal wr_data  :
 738 #        std_ulogic_vector(wishbone_data_bits-1 downto 0);
 739 #       signal wr_sel   : std_ulogic_vector(ROW_SIZE-1 downto 0);
 740 #       signal wr_sel_m : std_ulogic_vector(ROW_SIZE-1 downto 0);
 741 #       signal dout     : cache_row_t;
 742             do_read  = Signal()
 743             rd_addr  = Signal(ROW_BITS)
 744             do_write = Signal()
 745             wr_addr  = Signal(ROW_BITS)
 746             wr_data  = Signal(WB_DATA_BITS)
 747             wr_sel   = Signal(ROW_SIZE)
 748             wr_sel_m = Signal(ROW_SIZE)
 749             _d_out   = Signal(WB_DATA_BITS)
 750
 751 #     begin
 752 #       way: entity work.cache_ram
 753 #           generic map (
 754 #               ROW_BITS => ROW_BITS,
 755 #               WIDTH => wishbone_data_bits,
 756 #               ADD_BUF => true
 757 #               )
 758 #           port map (
 759 #               clk     => clk,
 760 #               rd_en   => do_read,
 761 #               rd_addr => rd_addr,
 762 #               rd_data => dout,
 763 #               wr_sel  => wr_sel_m,
 764 #               wr_addr => wr_addr,
 765 #               wr_data => wr_data
 766 #               );
 767 #       process(all)
 768             way = CacheRam(ROW_BITS, WB_DATA_BITS, True)
 769             comb += way.rd_en.eq(do_read)
 770             comb += way.rd_addr.eq(rd_addr)
 771             comb += way.rd_data.eq(_d_out)
 772             comb += way.wr_sel.eq(wr_sel_m)
 773             comb += way.wr_addr.eq(wr_addr)
 774             comb += way.wr_data.eq(wr_data)
 775
 776 #       begin
 777 #           -- Cache hit reads
 778 #           do_read <= '1';
 779 #           rd_addr <=
 780 #            std_ulogic_vector(to_unsigned(early_req_row, ROW_BITS));
 781 #           cache_out(i) <= dout;
 782             # Cache hit reads
 783             comb += do_read.eq(1)
 784             comb += rd_addr.eq(Signal(BRAM_ROWS))
 785             comb += cache_out[i].eq(dout)
 786
 787 #           -- Write mux:
 788 #           --
 789 #           -- Defaults to wishbone read responses (cache refill)
 790 #           --
 791 #           -- For timing, the mux on wr_data/sel/addr is not
 792 #           -- dependent on anything other than the current state.
 793         # Write mux:
 794         #
 795         # Defaults to wishbone read responses (cache refill)
 796         #
 797         # For timing, the mux on wr_data/sel/addr is not
 798         # dependent on anything other than the current state.
 799 #           wr_sel_m <= (others => '0');
 800             comb += wr_sel_m.eq(0)
 801
 802 #           do_write <= '0';
 803             comb += do_write.eq(0)
 804 #             if r1.write_bram = '1' then
 805             with m.If(r1.write_bram):
 806 #                 -- Write store data to BRAM.  This happens one
 807 #                 -- cycle after the store is in r0.
 808                 # Write store data to BRAM.  This happens one
 809                 # cycle after the store is in r0.
 810 #                 wr_data <= r1.req.data;
 811 #                 wr_sel  <= r1.req.byte_sel;
 812 #                 wr_addr <= std_ulogic_vector(to_unsigned(
 813 #                             get_row(r1.req.real_addr), ROW_BITS
 814 #                            ));
 815                 comb += wr_data.eq(r1.req.data)
 816                 comb += wr_sel.eq(r1.req.byte_sel)
 817                 comb += wr_addr.eq(Signal(get_row(r1.req.real_addr)))
 818
 819 #                 if i = r1.req.hit_way then
 820                 with m.If(i == r1.req.hit_way):
 821 #                     do_write <= '1';
 822                     comb += do_write.eq(1)
 823 #                 end if;
 824 #           else
 825                 with m.Else():
 826 #               -- Otherwise, we might be doing a reload or a DCBZ
 827 #                 if r1.dcbz = '1' then
 828                 # Otherwise, we might be doing a reload or a DCBZ
 829                     with m.If(r1.dcbz):
 830 #                     wr_data <= (others => '0');
 831                         comb += wr_data.eq(0)
 832 #                 else
 833                     with m.Else():
 834 #                     wr_data <= wishbone_in.dat;
 835                         comb += wr_data.eq(wishbone_in.dat)
 836 #                 end if;
 837
 838 #                 wr_addr <= std_ulogic_vector(to_unsigned(
 839 #                             r1.store_row, ROW_BITS
 840 #                            ));
 841 #                 wr_sel <= (others => '1');
 842                     comb += wr_addr.eq(Signal(r1.store_row))
 843                     comb += wr_sel.eq(1)
 844
 845 #                 if r1.state = RELOAD_WAIT_ACK and
 846 #                 wishbone_in.ack = '1' and replace_way = i then
 847                 with m.If(r1.state == State.RELOAD_WAIT_ACK
 848                           & wishbone_in.ack & relpace_way == i):
 849 #                     do_write <= '1';
 850                     comb += do_write.eq(1)
 851 #                 end if;
 852 #           end if;
 853
 854 #             -- Mask write selects with do_write since BRAM
 855 #             -- doesn't have a global write-enable
 856 #             if do_write = '1' then
 857 #             -- Mask write selects with do_write since BRAM
 858 #             -- doesn't have a global write-enable
 859                 with m.If(do_write):
 860 #                 wr_sel_m <= wr_sel;
 861                     comb += wr_sel_m.eq(wr_sel)
 862 #             end if;
 863 #         end process;
 864 #     end generate;
 865
 866     # Cache hit synchronous machine for the easy case.
 867     # This handles load hits.
 868     # It also handles error cases (TLB miss, cache paradox)
 869     def dcache_fast_hit(self, req_op, r0_valid, r1, ):
 870
 871         comb = m.d.comb
 872         sync = m.d.sync
 873
 874 #     begin
 875 #         if rising_edge(clk) then
 876 #             if req_op /= OP_NONE then
 877         with m.If(req_op != Op.OP_NONE):
 878 #               report "op:" & op_t'image(req_op) &
 879 #                   " addr:" & to_hstring(r0.req.addr) &
 880 #                   " nc:" & std_ulogic'image(r0.req.nc) &
 881 #                   " idx:" & integer'image(req_index) &
 882 #                   " tag:" & to_hstring(req_tag) &
 883 #                   " way: " & integer'image(req_hit_way);
 884             print(f"op:{req_op} addr:{r0.req.addr} nc: {r0.req.nc}" \
 885                   f"idx:{req_index} tag:{req_tag} way: {req_hit_way}"
 886                  )
 887 #             end if;
 888 #             if r0_valid = '1' then
 889         with m.If(r0_valid):
 890 #                 r1.mmu_req <= r0.mmu_req;
 891             sync += r1.mmu_req.eq(r0.mmu_req)
 892 #             end if;
 893
 894 #             -- Fast path for load/store hits.
 895 #             -- Set signals for the writeback controls.
 896 #             r1.hit_way <= req_hit_way;
 897 #             r1.hit_index <= req_index;
 898         # Fast path for load/store hits.
 899         # Set signals for the writeback controls.
 900         sync += r1.hit_way.eq(req_hit_way)
 901         sync += r1.hit_index.eq(req_index)
 902
 903 #             if req_op = OP_LOAD_HIT then
 904         with m.If(req_op == Op.OP_LOAD_HIT):
 905 #                 r1.hit_load_valid <= '1';
 906             sync += r1.hit_load_valid.eq(1)
 907
 908 #             else
 909         with m.Else():
 910 #                 r1.hit_load_valid <= '0';
 911             sync += r1.hit_load_valid.eq(0)
 912 #             end if;
 913
 914 #             if req_op = OP_LOAD_HIT or req_op = OP_STORE_HIT then
 915         with m.If(req_op == Op.OP_LOAD_HIT | req_op == Op.OP_STORE_HIT):
 916 #                 r1.cache_hit <= '1';
 917             sync += r1.cache_hit.eq(1)
 918 #             else
 919         with m.Else():
 920 #                 r1.cache_hit <= '0';
 921             sync += r1.cache_hit.eq(0)
 922 #             end if;
 923
 924 #             if req_op = OP_BAD then
 925         with m.If(req_op == Op.OP_BAD):
 926 #                 report "Signalling ld/st error valid_ra=" &
 927 #                  std_ulogic'image(valid_ra) & " rc_ok=" &
 928 #                  std_ulogic'image(rc_ok) & " perm_ok=" &
 929 #                  std_ulogic'image(perm_ok);
 930             print(f"Signalling ld/st error valid_ra={valid_ra}"
 931                   f"rc_ok={rc_ok} perm_ok={perm_ok}"
 932
 933 #                 r1.ls_error <= not r0.mmu_req;
 934 #                 r1.mmu_error <= r0.mmu_req;
 935 #                 r1.cache_paradox <= access_ok;
 936             sync += r1.ls_error.eq(~r0.mmu_req)
 937             sync += r1.mmu_error.eq(r0.mmu_req)
 938             sync += r1.cache_paradox.eq(access_ok)
 939
 940 #             else
 941             with m.Else():
 942 #                 r1.ls_error <= '0';
 943 #                 r1.mmu_error <= '0';
 944 #                 r1.cache_paradox <= '0';
 945                 sync += r1.ls_error.eq(0)
 946                 sync += r1.mmu_error.eq(0)
 947                 sync += r1.cache_paradox.eq(0)
 948 #             end if;
 949 #
 950 #             if req_op = OP_STCX_FAIL then
 951             with m.If(req_op == Op.OP_STCX_FAIL):
 952 #                 r1.stcx_fail <= '1';
 953                 r1.stcx_fail.eq(1)
 954
 955 #             else
 956             with m.Else():
 957 #                 r1.stcx_fail <= '0';
 958                 sync += r1.stcx_fail.eq(0)
 959 #             end if;
 960 #
 961 #             -- Record TLB hit information for updating TLB PLRU
 962 #             r1.tlb_hit <= tlb_hit;
 963 #             r1.tlb_hit_way <= tlb_hit_way;
 964 #             r1.tlb_hit_index <= tlb_req_index;
 965             # Record TLB hit information for updating TLB PLRU
 966             sync += r1.tlb_hit.eq(tlb_hit)
 967             sync += r1.tlb_hit_way.eq(tlb_hit_way)
 968             sync += r1.tlb_hit_index.eq(tlb_req_index)
 969 #         end if;
 970 #     end process;
 971
 972     # Memory accesses are handled by this state machine:
 973     #
 974     #   * Cache load miss/reload (in conjunction with "rams")
 975     #   * Load hits for non-cachable forms
 976     #   * Stores (the collision case is handled in "rams")
 977     #
 978     # All wishbone requests generation is done here.
 979     # This machine operates at stage 1.
 980     def dcache_slow(self, r1, use_forward1_next, cache_valid_bits, r0,
 981                     r0_valid, req_op, cache_tag, req_go, ra, wb_in):
 982
 983         comb = m.d.comb
 984         sync = m.d.sync
 985
 986 #         variable stbs_done : boolean;
 987 #         variable req       : mem_access_request_t;
 988 #         variable acks      : unsigned(2 downto 0);
 989         stbs_done = Signal()
 990         req       = MemAccessRequest()
 991         acks      = Signal(3)
 992
 993         comb += stbs_done
 994         comb += req
 995         comb += acks
 996
 997 #     begin
 998 #         if rising_edge(clk) then
 999 #             r1.use_forward1 <= use_forward1_next;
1000 #             r1.forward_sel <= (others => '0');
1001         sync += r1.use_forward1.eq(use_forward1_next)
1002         sync += r1.forward_sel.eq(0)
1003
1004 #             if use_forward1_next = '1' then
1005         with m.If(use_forward1_next):
1006 #                 r1.forward_sel <= r1.req.byte_sel;
1007             sync += r1.forward_sel.eq(r1.req.byte_sel)
1008
1009 #           elsif use_forward2_next = '1' then
1010         with m.Elif(use_forward2_next):
1011 #                 r1.forward_sel <= r1.forward_sel1;
1012             sync += r1.forward_sel.eq(r1.forward_sel1)
1013 #             end if;
1014
1015 #             r1.forward_data2 <= r1.forward_data1;
1016         sync += r1.forward_data2.eq(r1.forward_data1)
1017
1018 #             if r1.write_bram = '1' then
1019         with m.If(r1.write_bram):
1020 #                 r1.forward_data1 <= r1.req.data;
1021 #                 r1.forward_sel1 <= r1.req.byte_sel;
1022 #                 r1.forward_way1 <= r1.req.hit_way;
1023 #                 r1.forward_row1 <= get_row(r1.req.real_addr);
1024 #                 r1.forward_valid1 <= '1';
1025             sync += r1.forward_data1.eq(r1.req.data)
1026             sync += r1.forward_sel1.eq(r1.req.byte_sel)
1027             sync += r1.forward_way1.eq(r1.req.hit_way)
1028             sync += r1.forward_row1.eq(get_row(r1.req.real_addr))
1029             sync += r1.forward_valid1.eq(1)
1030 #             else
1031         with m.Else():
1032
1033 #                 if r1.dcbz = '1' then
1034             with m.If(r1.bcbz):
1035 #                     r1.forward_data1 <= (others => '0');
1036                 sync += r1.forward_data1.eq(0)
1037
1038 #                 else
1039             with m.Else():
1040 #                     r1.forward_data1 <= wishbone_in.dat;
1041                 sync += r1.forward_data1.eq(wb_in.dat)
1042 #                 end if;
1043
1044 #                 r1.forward_sel1 <= (others => '1');
1045 #                 r1.forward_way1 <= replace_way;
1046 #                 r1.forward_row1 <= r1.store_row;
1047 #                 r1.forward_valid1 <= '0';
1048             sync += r1.forward_sel1.eq(1)
1049             sync += r1.forward_way1.eq(replace_way)
1050             sync += r1.forward_row1.eq(r1.store_row)
1051             sync += r1.forward_valid1.eq(0)
1052 #             end if;
1053
1054 #           -- On reset, clear all valid bits to force misses
1055 #             if rst = '1' then
1056         # On reset, clear all valid bits to force misses
1057         # TODO figure out how reset signal works in nmigeni
1058         with m.If("""TODO RST???"""):
1059 #               for i in index_t loop
1060             for i in range(NUM_LINES):
1061 #                   cache_valids(i) <= (others => '0');
1062                 sync += cache_valid_bits[i].eq(0)
1063 #               end loop;
1064
1065 #                 r1.state <= IDLE;
1066 #                 r1.full <= '0';
1067 #                 r1.slow_valid <= '0';
1068 #                 r1.wb.cyc <= '0';
1069 #                 r1.wb.stb <= '0';
1070 #                 r1.ls_valid <= '0';
1071 #                 r1.mmu_done <= '0';
1072             sync += r1.state.eq(State.IDLE)
1073             sync += r1.full.eq(0)
1074             sync += r1.slow_valid.eq(0)
1075             sync += r1.wb.cyc.eq(0)
1076             sync += r1.wb.stb.eq(0)
1077             sync += r1.ls_valid.eq(0)
1078             sync += r1.mmu_done.eq(0)
1079
1080 #               -- Not useful normally but helps avoiding
1081 #               -- tons of sim warnings
1082         # Not useful normally but helps avoiding
1083         # tons of sim warnings
1084 #               r1.wb.adr <= (others => '0');
1085             sync += r1.wb.adr.eq(0)
1086 #             else
1087         with m.Else():
1088 #                 -- One cycle pulses reset
1089 #                 r1.slow_valid <= '0';
1090 #                 r1.write_bram <= '0';
1091 #                 r1.inc_acks <= '0';
1092 #                 r1.dec_acks <= '0';
1093 #
1094 #                 r1.ls_valid <= '0';
1095 #                 -- complete tlbies and TLB loads in the third cycle
1096 #                 r1.mmu_done <= r0_valid and (r0.tlbie or r0.tlbld);
1097             # One cycle pulses reset
1098             sync += r1.slow_valid.eq(0)
1099             sync += r1.write_bram.eq(0)
1100             sync += r1.inc_acks.eq(0)
1101             sync += r1.dec_acks.eq(0)
1102
1103             sync += r1.ls_valid.eq(0)
1104             # complete tlbies and TLB loads in the third cycle
1105             sync += r1.mmu_done.eq(r0_valid & (r0.tlbie | r0.tlbld))
1106
1107 #                 if req_op = OP_LOAD_HIT or req_op = OP_STCX_FAIL then
1108             with m.If(req_op == Op.OP_LOAD_HIT
1109                       | req_op == Op.OP_STCX_FAIL):
1110 #                     if r0.mmu_req = '0' then
1111                 with m.If(~r0.mmu_req):
1112 #                         r1.ls_valid <= '1';
1113                     sync += r1.ls_valid.eq(1)
1114 #                     else
1115                 with m.Else():
1116 #                         r1.mmu_done <= '1';
1117                     sync += r1.mmu_done.eq(1)
1118 #                     end if;
1119 #                 end if;
1120
1121 #                 if r1.write_tag = '1' then
1122             with m.If(r1.write_tag):
1123 #                     -- Store new tag in selected way
1124 #                     for i in 0 to NUM_WAYS-1 loop
1125                 # Store new tag in selected way
1126                 for i in range(NUM_WAYS):
1127 #                         if i = replace_way then
1128                     with m.If(i == replace_way):
1129 #                             cache_tags(r1.store_index)(
1130 #                              (i + 1) * TAG_WIDTH - 1
1131 #                              downto i * TAG_WIDTH
1132 #                             ) <=
1133 #                              (TAG_WIDTH - 1 downto TAG_BITS => '0')
1134 #                              & r1.reload_tag;
1135                         sync += cache_tag[
1136                                  r1.store_index
1137                                 ][i * TAG_WIDTH:(i +1) * TAG_WIDTH].eq(
1138                                  Const(TAG_WIDTH, TAG_WIDTH)
1139                                  & r1.reload_tag
1140                                 )
1141 #                         end if;
1142 #                     end loop;
1143 #                     r1.store_way <= replace_way;
1144 #                     r1.write_tag <= '0';
1145                 sync += r1.store_way.eq(replace_way)
1146                 sync += r1.write_tag.eq(0)
1147 #                 end if;
1148
1149 #                 -- Take request from r1.req if there is one there,
1150 #                 -- else from req_op, ra, etc.
1151 #                 if r1.full = '1' then
1152             # Take request from r1.req if there is one there,
1153             # else from req_op, ra, etc.
1154             with m.If(r1.full)
1155 #                     req := r1.req;
1156                 sync += req.eq(r1.req)
1157
1158 #                 else
1159             with m.Else():
1160 #                     req.op := req_op;
1161 #                     req.valid := req_go;
1162 #                     req.mmu_req := r0.mmu_req;
1163 #                     req.dcbz := r0.req.dcbz;
1164 #                     req.real_addr := ra;
1165                 sync += req.op.eq(req_op)
1166                 sync += req.valid.eq(req_go)
1167                 sync += req.mmu_req.eq(r0.mmu_req)
1168                 sync += req.dcbz.eq(r0.req.dcbz)
1169                 sync += req.real_addr.eq(ra)
1170
1171 #                     -- Force data to 0 for dcbz
1172 #                     if r0.req.dcbz = '0' then
1173                 with m.If(~r0.req.dcbz):
1174 #                         req.data := r0.req.data;
1175                     sync += req.data.eq(r0.req.data)
1176
1177 #                     else
1178                 with m.Else():
1179 #                         req.data := (others => '0');
1180                     sync += req.data.eq(0)
1181 #                     end if;
1182
1183 #                     -- Select all bytes for dcbz
1184 #                     -- and for cacheable loads
1185 #                     if r0.req.dcbz = '1'
1186 #                      or (r0.req.load = '1' and r0.req.nc = '0') then
1187                 # Select all bytes for dcbz
1188                 # and for cacheable loads
1189                 with m.If(r0.req.dcbz | (r0.req.load & ~r0.req.nc):
1190 #                         req.byte_sel := (others => '1');
1191                     sync += req.byte_sel.eq(1)
1192
1193 #                     else
1194                 with m.Else():
1195 #                         req.byte_sel := r0.req.byte_sel;
1196                     sync += req.byte_sel.eq(r0.req.byte_sel)
1197 #                     end if;
1198
1199 #                     req.hit_way := req_hit_way;
1200 #                     req.same_tag := req_same_tag;
1201                 sync += req.hit_way.eq(req_hit_way)
1202                 sync += req.same_tag.eq(req_same_tag)
1203
1204 #                     -- Store the incoming request from r0,
1205 #                     -- if it is a slow request
1206 #                     -- Note that r1.full = 1 implies req_op = OP_NONE
1207 #                     if req_op = OP_LOAD_MISS or req_op = OP_LOAD_NC
1208 #                      or req_op = OP_STORE_MISS
1209 #                      or req_op = OP_STORE_HIT then
1210                 # Store the incoming request from r0,
1211                 # if it is a slow request
1212                 # Note that r1.full = 1 implies req_op = OP_NONE
1213                 with m.If(req_op == Op.OP_LOAD_MISS
1214                           | req_op == Op.OP_LOAD_NC
1215                           | req_op == Op.OP_STORE_MISS
1216                           | req_op == Op.OP_STORE_HIT):
1217 #                         r1.req <= req;
1218 #                         r1.full <= '1';
1219                     sync += r1.req(req)
1220                     sync += r1.full.eq(1)
1221 #                     end if;
1222 #                 end if;
1223 #
1224 #               -- Main state machine
1225 #               case r1.state is
1226             # Main state machine
1227             with m.Switch(r1.state):
1228
1229 #                 when IDLE =>
1230                 with m.Case(State.IDLE)
1231 #                     r1.wb.adr <= req.real_addr(
1232 #                                   r1.wb.adr'left downto 0
1233 #                                  );
1234 #                     r1.wb.sel <= req.byte_sel;
1235 #                     r1.wb.dat <= req.data;
1236 #                     r1.dcbz <= req.dcbz;
1237 #
1238 #                     -- Keep track of our index and way
1239 #                     -- for subsequent stores.
1240 #                     r1.store_index <= get_index(req.real_addr);
1241 #                     r1.store_row <= get_row(req.real_addr);
1242 #                     r1.end_row_ix <=
1243 #                      get_row_of_line(get_row(req.real_addr)) - 1;
1244 #                     r1.reload_tag <= get_tag(req.real_addr);
1245 #                     r1.req.same_tag <= '1';
1246                     sync += r1.wb.adr.eq(req.real_addr[0:r1.wb.adr])
1247                     sync += r1.wb.sel.eq(req.byte_sel)
1248                     sync += r1.wb.dat.eq(req.data)
1249                     sync += r1.dcbz.eq(req.dcbz)
1250
1251                     # Keep track of our index and way
1252                     # for subsequent stores.
1253                     sync += r1.store_index.eq(get_index(req.real_addr))
1254                     sync += r1.store_row.eq(get_row(req.real_addr))
1255                     sync += r1.end_row_ix.eq(
1256                              get_row_of_line(get_row(req.real_addr))
1257                             )
1258                     sync += r1.reload_tag.eq(get_tag(req.real_addr))
1259                     sync += r1.req.same_tag.eq(1)
1260
1261 #                     if req.op = OP_STORE_HIT theni
1262                     with m.If(req.op == Op.OP_STORE_HIT):
1263 #                         r1.store_way <= req.hit_way;
1264                         sync += r1.store_way.eq(req.hit_way)
1265 #                     end if;
1266
1267 #                     -- Reset per-row valid bits,
1268 #                     -- ready for handling OP_LOAD_MISS
1269 #                     for i in 0 to ROW_PER_LINE - 1 loop
1270                     # Reset per-row valid bits,
1271                     # ready for handling OP_LOAD_MISS
1272                     for i in range(ROW_PER_LINE):
1273 #                         r1.rows_valid(i) <= '0';
1274                         sync += r1.rows_valid[i].eq(0)
1275 #                     end loop;
1276
1277 #                     case req.op is
1278                     with m.Switch(req.op):
1279 #                     when OP_LOAD_HIT =>
1280                         with m.Case(Op.OP_LOAD_HIT):
1281 #                         -- stay in IDLE state
1282                             # stay in IDLE state
1283                             pass
1284
1285 #                     when OP_LOAD_MISS =>
1286                         with m.Case(Op.OP_LOAD_MISS):
1287 #                       -- Normal load cache miss,
1288 #                       -- start the reload machine
1289 #                       report "cache miss real addr:" &
1290 #                        to_hstring(req.real_addr) & " idx:" &
1291 #                        integer'image(get_index(req.real_addr)) &
1292 #                        " tag:" & to_hstring(get_tag(req.real_addr));
1293                             # Normal load cache miss,
1294                             # start the reload machine
1295                             print(f"cache miss real addr:" \
1296                                   f"{req_real_addr}" \
1297                                   f" idx:{get_index(req_real_addr)}" \
1298                                   f" tag:{get_tag(req.real_addr)}")
1299
1300 #                       -- Start the wishbone cycle
1301 #                       r1.wb.we  <= '0';
1302 #                       r1.wb.cyc <= '1';
1303 #                       r1.wb.stb <= '1';
1304                             # Start the wishbone cycle
1305                             sync += r1.wb.we.eq(0)
1306                             sync += r1.wb.cyc.eq(1)
1307                             sync += r1.wb.stb.eq(1)
1308
1309 #                       -- Track that we had one request sent
1310 #                       r1.state <= RELOAD_WAIT_ACK;
1311 #                       r1.write_tag <= '1';
1312                             # Track that we had one request sent
1313                             sync += r1.state.eq(State.RELOAD_WAIT_ACK)
1314                             sync += r1.write_tag.eq(1)
1315
1316 #                   when OP_LOAD_NC =>
1317                         with m.Case(Op.OP_LOAD_NC):
1318 #                       r1.wb.cyc <= '1';
1319 #                       r1.wb.stb <= '1';
1320 #                       r1.wb.we <= '0';
1321 #                       r1.state <= NC_LOAD_WAIT_ACK;
1322                             sync += r1.wb.cyc.eq(1)
1323                             sync += r1.wb.stb.eq(1)
1324                             sync += r1.wb.we.eq(0)
1325                             sync += r1.state.eq(State.NC_LOAD_WAIT_ACK)
1326
1327 #                     when OP_STORE_HIT | OP_STORE_MISS =>
1328                         with m.Case(Op.OP_STORE_HIT
1329                                     | Op.OP_STORE_MISS):
1330 #                         if req.dcbz = '0' then
1331                             with m.If(~req.bcbz):
1332 #                             r1.state <= STORE_WAIT_ACK;
1333 #                             r1.acks_pending <= to_unsigned(1, 3);
1334 #                             r1.full <= '0';
1335 #                             r1.slow_valid <= '1';
1336                                 sync += r1.state.eq(
1337                                          State.STORE_WAIT_ACK
1338                                         )
1339                                 sync += r1.acks_pending.eq(
1340                                          '''TODO to_unsignes(1,3)'''
1341                                         )
1342                                 sync += r1.full.eq(0)
1343                                 sync += r1.slow_valid.eq(1)
1344
1345 #                             if req.mmu_req = '0' then
1346                                 with m.If(~req.mmu_req):
1347 #                                 r1.ls_valid <= '1';
1348                                     sync += r1.ls_valid.eq(1)
1349 #                             else
1350                                 with m.Else():
1351 #                                 r1.mmu_done <= '1';
1352                                     sync += r1.mmu_done.eq(1)
1353 #                             end if;
1354
1355 #                             if req.op = OP_STORE_HIT then
1356                                 with m.If(req.op == Op.OP_STORE_HIT):
1357 #                                 r1.write_bram <= '1';
1358                                     sync += r1.write_bram.eq(1)
1359 #                             end if;
1360
1361 #                         else
1362                             with m.Else():
1363 #                             -- dcbz is handled much like a load
1364 #                             -- miss except that we are writing
1365 #                             -- to memory instead of reading
1366 #                             r1.state <= RELOAD_WAIT_ACK;
1367                                 # dcbz is handled much like a load
1368                                 # miss except that we are writing
1369                                 # to memory instead of reading
1370                                 sync += r1.state.eq(Op.RELOAD_WAIT_ACK)
1371
1372 #                             if req.op = OP_STORE_MISS then
1373                                 with m.If(req.op == Op.OP_STORE_MISS):
1374 #                                 r1.write_tag <= '1';
1375                                     sync += r1.write_tag.eq(1)
1376 #                             end if;
1377 #                         end if;
1378
1379 #                         r1.wb.we <= '1';
1380 #                         r1.wb.cyc <= '1';
1381 #                         r1.wb.stb <= '1';
1382                             sync += r1.wb.we.eq(1)
1383                             sync += r1.wb.cyc.eq(1)
1384                             sync += r1.wb.stb.eq(1)
1385
1386 #                   -- OP_NONE and OP_BAD do nothing
1387 #                   -- OP_BAD & OP_STCX_FAIL were handled above already
1388 #                   when OP_NONE =>
1389 #                     when OP_BAD =>
1390 #                     when OP_STCX_FAIL =>
1391                         # OP_NONE and OP_BAD do nothing
1392                         # OP_BAD & OP_STCX_FAIL were
1393                         # handled above already
1394                         with m.Case(Op.OP_NONE):
1395                             pass
1396
1397                         with m.Case(OP_BAD):
1398                             pass
1399
1400                         with m.Case(OP_STCX_FAIL):
1401                             pass
1402 #                   end case;
1403
1404 #                 when RELOAD_WAIT_ACK =>
1405                     with m.Case(State.RELOAD_WAIT_ACK):
1406 #                     -- Requests are all sent if stb is 0
1407                         # Requests are all sent if stb is 0
1408                         sync += stbs_done.eq(~r1.wb.stb)
1409 #                   stbs_done := r1.wb.stb = '0';
1410
1411 #                   -- If we are still sending requests,
1412 #                   -- was one accepted?
1413 #                   if wishbone_in.stall = '0' and not stbs_done then
1414                         # If we are still sending requests,
1415                         # was one accepted?
1416                         with m.If(~wb_in.stall & ~stbs_done):
1417 #                       -- That was the last word ? We are done sending.
1418 #                       -- Clear stb and set stbs_done so we can handle
1419 #                       -- an eventual last ack on the same cycle.
1420 #                       if is_last_row_addr(
1421 #                        r1.wb.adr, r1.end_row_ix
1422 #                       ) then
1423                             # That was the last word?
1424                             # We are done sending.
1425                             # Clear stb and set stbs_done
1426                             # so we can handle an eventual
1427                             # last ack on the same cycle.
1428                             with m.If(is_last_row_addr(
1429                                       r1.wb.adr, r1.end_row_ix)):
1430 #                           r1.wb.stb <= '0';
1431 #                           stbs_done := true;
1432                                 sync += r1.wb.stb.eq(0)
1433                                 sync += stbs_done.eq(0)
1434 #                       end if;
1435
1436 #                       -- Calculate the next row address
1437 #                       r1.wb.adr <= next_row_addr(r1.wb.adr);
1438                             # Calculate the next row address
1439                             sync += r1.wb.adr.eq(next_row_addr(r1.wb.adr))
1440 #                   end if;
1441
1442 #                   -- Incoming acks processing
1443 #                     r1.forward_valid1 <= wishbone_in.ack;
1444                         # Incoming acks processing
1445                         sync += r1.forward_valid1.eq(wb_in.ack)
1446
1447 #                   if wishbone_in.ack = '1' then
1448                         with m.If(wb_in.ack):
1449 #                         r1.rows_valid(
1450 #                          r1.store_row mod ROW_PER_LINE
1451 #                         ) <= '1';
1452                             sync += r1.rows_valid[
1453                                      r1.store_row % ROW_PER_LINE
1454                                     ].eq(1)
1455
1456 #                         -- If this is the data we were looking for,
1457 #                         -- we can complete the request next cycle.
1458 #                         -- Compare the whole address in case the
1459 #                         -- request in r1.req is not the one that
1460 #                         -- started this refill.
1461 #                       if r1.full = '1' and r1.req.same_tag = '1'
1462 #                        and ((r1.dcbz = '1' and r1.req.dcbz = '1')
1463 #                        or (r1.dcbz = '0' and r1.req.op = OP_LOAD_MISS))
1464 #                        and r1.store_row = get_row(r1.req.real_addr) then
1465                             # If this is the data we were looking for,
1466                             # we can complete the request next cycle.
1467                             # Compare the whole address in case the
1468                             # request in r1.req is not the one that
1469                             # started this refill.
1470                             with m.If(r1.full & r1.req.same_tag &
1471                                       ((r1.dcbz & r1.req.dcbz)
1472                                        (~r1.dcbz &
1473                                         r1.req.op == Op.OP_LOAD_MISS)
1474                                        ) &
1475                                        r1.store_row
1476                                        == get_row(r1.req.real_addr):
1477 #                             r1.full <= '0';
1478 #                             r1.slow_valid <= '1';
1479                                 sync += r1.full.eq(0)
1480                                 sync += r1.slow_valid.eq(1)
1481
1482 #                             if r1.mmu_req = '0' then
1483                                     with m.If(~r1.mmu_req):
1484 #                                 r1.ls_valid <= '1';
1485                                         sync += r1.ls_valid.eq(1)
1486 #                             else
1487                                     with m.Else():
1488 #                                 r1.mmu_done <= '1';
1489                                         sync += r1.mmu_done.eq(1)
1490 #                             end if;
1491 #                             r1.forward_sel <= (others => '1');
1492 #                             r1.use_forward1 <= '1';
1493                                 sync += r1.forward_sel.eq(1)
1494                                 sync += r1.use_forward1.eq(1)
1495 #                       end if;
1496
1497 #                       -- Check for completion
1498 #                       if stbs_done and is_last_row(r1.store_row,
1499 #                        r1.end_row_ix) then
1500                             # Check for completion
1501                             with m.If(stbs_done &
1502                                       is_last_row(r1.store_row,
1503                                       r1.end_row_ix)):
1504
1505 #                           -- Complete wishbone cycle
1506 #                           r1.wb.cyc <= '0';
1507                                 # Complete wishbone cycle
1508                                 sync += r1.wb.cyc.eq(0)
1509
1510 #                           -- Cache line is now valid
1511 #                           cache_valids(r1.store_index)(
1512 #                            r1.store_way
1513 #                           ) <= '1';
1514                                 # Cache line is now valid
1515                                 sync += cache_valid_bits[
1516                                          r1.store_index
1517                                         ][r1.store_way].eq(1)
1518
1519 #                           r1.state <= IDLE;
1520                                 sync += r1.state.eq(State.IDLE)
1521 #                       end if;
1522
1523 #                       -- Increment store row counter
1524 #                       r1.store_row <= next_row(r1.store_row);
1525                             # Increment store row counter
1526                             sync += r1.store_row.eq(next_row(
1527                                      r1.store_row
1528                                     ))
1529 #                   end if;
1530
1531 #                 when STORE_WAIT_ACK =>
1532                     with m.Case(State.STORE_WAIT_ACK):
1533 #                     stbs_done := r1.wb.stb = '0';
1534 #                     acks := r1.acks_pending;
1535                         sync += stbs_done.eq(~r1.wb.stb)
1536                         sync += acks.eq(r1.acks_pending)
1537
1538 #                     if r1.inc_acks /= r1.dec_acks then
1539                         with m.If(r1.inc_acks != r1.dec_acks):
1540
1541 #                         if r1.inc_acks = '1' then
1542                             with m.If(r1.inc_acks):
1543 #                             acks := acks + 1;
1544                                 sync += acks.eq(acks + 1)
1545
1546 #                         else
1547                             with m.Else():
1548 #                             acks := acks - 1;
1549                                 sync += acks.eq(acks - 1)
1550 #                         end if;
1551 #                     end if;
1552
1553 #                     r1.acks_pending <= acks;
1554                         sync += r1.acks_pending.eq(acks)
1555
1556 #                     -- Clear stb when slave accepted request
1557 #                     if wishbone_in.stall = '0' then
1558                         # Clear stb when slave accepted request
1559                         with m.If(~wb_in.stall):
1560 #                         -- See if there is another store waiting
1561 #                         -- to be done which is in the same real page.
1562 #                         if req.valid = '1' then
1563                             # See if there is another store waiting
1564                             # to be done which is in the same real page.
1565                             with m.If(req.valid):
1566 #                             r1.wb.adr(
1567 #                              SET_SIZE_BITS - 1 downto 0
1568 #                             ) <= req.real_addr(
1569 #                              SET_SIZE_BITS - 1 downto 0
1570 #                             );
1571 #                             r1.wb.dat <= req.data;
1572 #                             r1.wb.sel <= req.byte_sel;
1573                                 sync += r1.wb.adr[0:SET_SIZE_BITS].eq(
1574                                          req.real_addr[0:SET_SIZE_BITS]
1575                                         )
1576 #                         end if;
1577
1578 #                         if acks < 7 and req.same_tag = '1'
1579 #                          and (req.op = OP_STORE_MISS
1580 #                          or req.op = OP_STORE_HIT) then
1581                             with m.Elif(acks < 7 & req.same_tag &
1582                                         (req.op == Op.Op_STORE_MISS
1583                                          | req.op == Op.OP_SOTRE_HIT)):
1584 #                             r1.wb.stb <= '1';
1585 #                             stbs_done := false;
1586                                 sync += r1.wb.stb.eq(1)
1587                                 sync += stbs_done.eq(0)
1588
1589 #                             if req.op = OP_STORE_HIT then
1590                                 with m.If(req.op == Op.OP_STORE_HIT):
1591 #                                 r1.write_bram <= '1';
1592                                     sync += r1.write_bram.eq(1)
1593 #                             end if;
1594 #                             r1.full <= '0';
1595 #                             r1.slow_valid <= '1';
1596                                 sync += r1.full.eq(0)
1597                                 sync += r1.slow_valid.eq(1)
1598
1599 #                             -- Store requests never come from the MMU
1600 #                             r1.ls_valid <= '1';
1601 #                             stbs_done := false;
1602 #                             r1.inc_acks <= '1';
1603                                 # Store request never come from the MMU
1604                                 sync += r1.ls_valid.eq(1)
1605                                 sync += stbs_done.eq(0)
1606                                 sync += r1.inc_acks.eq(1)
1607 #                         else
1608                             with m.Else():
1609 #                             r1.wb.stb <= '0';
1610 #                             stbs_done := true;
1611                                 sync += r1.wb.stb.eq(0)
1612                                 sync += stbs_done.eq(1)
1613 #                         end if;
1614 #                   end if;
1615
1616 #                   -- Got ack ? See if complete.
1617 #                   if wishbone_in.ack = '1' then
1618                         # Got ack ? See if complete.
1619                         with m.If(wb_in.ack):
1620 #                         if stbs_done and acks = 1 then
1621                             with m.If(stbs_done & acks)
1622 #                             r1.state <= IDLE;
1623 #                             r1.wb.cyc <= '0';
1624 #                             r1.wb.stb <= '0';
1625                                 sync += r1.state.eq(State.IDLE)
1626                                 sync += r1.wb.cyc.eq(0)
1627                                 sync += r1.wb.stb.eq(0)
1628 #                         end if;
1629 #                         r1.dec_acks <= '1';
1630                             sync += r1.dec_acks.eq(1)
1631 #                   end if;
1632
1633 #                 when NC_LOAD_WAIT_ACK =>
1634                     with m.Case(State.NC_LOAD_WAIT_ACK):
1635 #                   -- Clear stb when slave accepted request
1636 #                     if wishbone_in.stall = '0' then
1637                         # Clear stb when slave accepted request
1638                         with m.If(~wb_in.stall):
1639 #                       r1.wb.stb <= '0';
1640                             sync += r1.wb.stb.eq(0)
1641 #                   end if;
1642
1643 #                   -- Got ack ? complete.
1644 #                   if wishbone_in.ack = '1' then
1645                         # Got ack ? complete.
1646                         with m.If(wb_in.ack):
1647 #                         r1.state <= IDLE;
1648 #                         r1.full <= '0';
1649 #                         r1.slow_valid <= '1';
1650                             sync += r1.state.eq(State.IDLE)
1651                             sync += r1.full.eq(0)
1652                             sync += r1.slow_valid.eq(1)
1653
1654 #                         if r1.mmu_req = '0' then
1655                             with m.If(~r1.mmu_req):
1656 #                             r1.ls_valid <= '1';
1657                                 sync += r1.ls_valid.eq(1)
1658
1659 #                         else
1660                             with m.Else():
1661 #                             r1.mmu_done <= '1';
1662                                 sync += r1.mmu_done.eq(1)
1663 #                         end if;
1664
1665 #                         r1.forward_sel <= (others => '1');
1666 #                         r1.use_forward1 <= '1';
1667 #                         r1.wb.cyc <= '0';
1668 #                         r1.wb.stb <= '0';
1669                             sync += r1.forward_sel.eq(1)
1670                             sync += r1.use_forward1.eq(1)
1671                             sync += r1.wb.cyc.eq(0)
1672                             sync += r1.wb.stb.eq(0)
1673 #                   end if;
1674 #                 end case;
1675 #           end if;
1676 #       end if;
1677 #     end process;
1678
1679 #     dc_log: if LOG_LENGTH > 0 generate
1680 # TODO learn how to tranlate vhdl generate into nmigen
1681     def dcache_log(self, r1, valid_ra, tlb_hit_way, stall_out,
1682                    d_out, wb_in, log_out):
1683
1684         comb = m.d.comb
1685         sync = m.d.sync
1686
1687 #         signal log_data : std_ulogic_vector(19 downto 0);
1688         log_data = Signal(20)
1689
1690         comb += log_data
1691
1692 #     begin
1693 #         dcache_log: process(clk)
1694 #         begin
1695 #             if rising_edge(clk) then
1696 #                 log_data <= r1.wb.adr(5 downto 3) &
1697 #                             wishbone_in.stall &
1698 #                             wishbone_in.ack &
1699 #                             r1.wb.stb & r1.wb.cyc &
1700 #                             d_out.error &
1701 #                             d_out.valid &
1702 #                             std_ulogic_vector(
1703 #                              to_unsigned(op_t'pos(req_op), 3)) &
1704 #                             stall_out &
1705 #                             std_ulogic_vector(
1706 #                              to_unsigned(tlb_hit_way, 3)) &
1707 #                             valid_ra &
1708 #                             std_ulogic_vector(
1709 #                              to_unsigned(state_t'pos(r1.state), 3));
1710         sync += log_data.eq(Cat(
1711                  Const(r1.state, 3), valid_ra, Const(tlb_hit_way, 3),
1712                  stall_out, Const(req_op, 3), d_out.valid, d_out.error,
1713                  r1.wb.cyc, r1.wb.stb, wb_in.ack, wb_in.stall,
1714                  r1.wb.adr[3:6]
1715                 ))
1716 #             end if;
1717 #         end process;
1718 #         log_out <= log_data;
1719     # TODO ??? I am very confused need help
1720     comb += log_out.eq(log_data)
1721 #     end generate;
1722 # end;
1723
1724     def elaborate(self, platform):
1725         LINE_SIZE    = self.LINE_SIZE
1726         NUM_LINES    = self.NUM_LINES
1727         NUM_WAYS     = self.NUM_WAYS
1728         TLB_SET_SIZE = self.TLB_SET_SIZE
1729         TLB_NUM_WAYS = self.TLB_NUM_WAYS
1730         TLB_LG_PGSZ  = self.TLB_LG_PGSZ
1731         LOG_LENGTH   = self.LOG_LENGTH
1732
1733         # BRAM organisation: We never access more than
1734         #     -- wishbone_data_bits at a time so to save
1735         #     -- resources we make the array only that wide, and
1736         #     -- use consecutive indices for to make a cache "line"
1737         #     --
1738         #     -- ROW_SIZE is the width in bytes of the BRAM
1739         #     -- (based on WB, so 64-bits)
1740         ROW_SIZE = WB_DATA_BITS / 8;
1741
1742         # ROW_PER_LINE is the number of row (wishbone
1743         # transactions) in a line
1744         ROW_PER_LINE = LINE_SIZE // ROW_SIZE
1745
1746         # BRAM_ROWS is the number of rows in BRAM needed
1747         # to represent the full dcache
1748         BRAM_ROWS = NUM_LINES * ROW_PER_LINE
1749
1750
1751         # Bit fields counts in the address
1752
1753         # REAL_ADDR_BITS is the number of real address
1754         # bits that we store
1755         REAL_ADDR_BITS = 56
1756
1757         # ROW_BITS is the number of bits to select a row
1758         ROW_BITS = log2_int(BRAM_ROWS)
1759
1760         # ROW_LINE_BITS is the number of bits to select
1761         # a row within a line
1762         ROW_LINE_BITS = log2_int(ROW_PER_LINE)
1763
1764         # LINE_OFF_BITS is the number of bits for
1765         # the offset in a cache line
1766         LINE_OFF_BITS = log2_int(LINE_SIZE)
1767
1768         # ROW_OFF_BITS is the number of bits for
1769         # the offset in a row
1770         ROW_OFF_BITS = log2_int(ROW_SIZE)
1771
1772         # INDEX_BITS is the number if bits to
1773         # select a cache line
1774         INDEX_BITS = log2_int(NUM_LINES)
1775
1776         # SET_SIZE_BITS is the log base 2 of the set size
1777         SET_SIZE_BITS = LINE_OFF_BITS + INDEX_BITS
1778
1779         # TAG_BITS is the number of bits of
1780         # the tag part of the address
1781         TAG_BITS = REAL_ADDR_BITS - SET_SIZE_BITS
1782
1783         # TAG_WIDTH is the width in bits of each way of the tag RAM
1784         TAG_WIDTH = TAG_BITS + 7 - ((TAG_BITS + 7) % 8)
1785
1786         # WAY_BITS is the number of bits to select a way
1787         WAY_BITS = log2_int(NUM_WAYS)
1788
1789         # Example of layout for 32 lines of 64 bytes:
1790         #
1791         # ..  tag    |index|  line  |
1792         # ..         |   row   |    |
1793         # ..         |     |---|    | ROW_LINE_BITS  (3)
1794         # ..         |     |--- - --| LINE_OFF_BITS (6)
1795         # ..         |         |- --| ROW_OFF_BITS  (3)
1796         # ..         |----- ---|    | ROW_BITS      (8)
1797         # ..         |-----|        | INDEX_BITS    (5)
1798         # .. --------|              | TAG_BITS      (45)
1799
1800         TAG_RAM_WIDTH = TAG_WIDTH * NUM_WAYS
1801
1802         def CacheTagArray():
1803             return Array(CacheTagSet() for x in range(NUM_LINES))
1804
1805         def CacheValidBitsArray():
1806             return Array(CacheWayValidBits() for x in range(NUM_LINES))
1807
1808         def RowPerLineValidArray():
1809             return Array(Signal() for x in range(ROW_PER_LINE))
1810
1811         # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
1812         cache_tags       = CacheTagArray()
1813         cache_tag_set    = Signal(TAG_RAM_WIDTH)
1814         cache_valid_bits = CacheValidBitsArray()
1815
1816         # TODO attribute ram_style : string;
1817         # TODO attribute ram_style of cache_tags : signal is "distributed";
1818
1819         # L1 TLB
1820         TLB_SET_BITS     = log2_int(TLB_SET_SIZE)
1821         TLB_WAY_BITS     = log2_int(TLB_NUM_WAYS)
1822         TLB_EA_TAG_BITS  = 64 - (TLB_LG_PGSZ + TLB_SET_BITS)
1823         TLB_TAG_WAY_BITS = TLB_NUM_WAYS * TLB_EA_TAG_BITS
1824         TLB_PTE_BITS     = 64
1825         TLB_PTE_WAY_BITS = TLB_NUM_WAYS * TLB_PTE_BITS;
1826
1827         def TLBValidBitsArray():
1828             return Array(
1829              Signal(TLB_NUM_WAYS) for x in range(TLB_SET_SIZE)
1830             )
1831
1832         def TLBTagsArray():
1833             return Array(
1834              Signal(TLB_TAG_WAY_BITS) for x in range (TLB_SET_SIZE)
1835             )
1836
1837         def TLBPtesArray():
1838             return Array(
1839              Signal(TLB_PTE_WAY_BITS) for x in range(TLB_SET_SIZE)
1840             )
1841
1842         def HitWaySet():
1843             return Array(Signal(NUM_WAYS) for x in range(TLB_NUM_WAYS))
1844
1845 """note: these are passed to nmigen.hdl.Memory as "attributes".
1846    don't know how, just that they are.
1847 """
1848         dtlb_valid_bits = TLBValidBitsArray()
1849         dtlb_tags       = TLBTagsArray()
1850         dtlb_ptes       = TLBPtesArray()
1851         # TODO attribute ram_style of
1852         #  dtlb_tags : signal is "distributed";
1853         # TODO attribute ram_style of
1854         #  dtlb_ptes : signal is "distributed";
1855
1856         r0      = RegStage0()
1857         r0_full = Signal()
1858
1859         r1 = RegStage1()
1860
1861         reservation = Reservation()
1862
1863         # Async signals on incoming request
1864         req_index    = Signal(NUM_LINES)
1865         req_row      = Signal(BRAM_ROWS)
1866         req_hit_way  = Signal(WAY_BITS)
1867         req_tag      = Signal(TAG_BITS)
1868         req_op       = Op()
1869         req_data     = Signal(64)
1870         req_same_tag = Signal()
1871         req_go       = Signal()
1872
1873         early_req_row     = Signal(BRAM_ROWS)
1874
1875         cancel_store      = Signal()
1876         set_rsrv          = Signal()
1877         clear_rsrv        = Signal()
1878
1879         r0_valid          = Signal()
1880         r0_stall          = Signal()
1881
1882         use_forward1_next = Signal()
1883         use_forward2_next = Signal()
1884
1885         # Cache RAM interface
1886         def CacheRamOut():
1887             return Array(Signal(WB_DATA_BITS) for x in range(NUM_WAYS))
1888
1889         cache_out         = CacheRamOut()
1890
1891         # PLRU output interface
1892         def PLRUOut():
1893             return Array(Signal(WAY_BITS) for x in range(Index()))
1894
1895         plru_victim       = PLRUOut()
1896         replace_way       = Signal(WAY_BITS)
1897
1898         # Wishbone read/write/cache write formatting signals
1899         bus_sel           = Signal(8)
1900
1901         # TLB signals
1902         tlb_tag_way   = Signal(TLB_TAG_WAY_BITS)
1903         tlb_pte_way   = Signal(TLB_PTE_WAY_BITS)
1904         tlb_valid_way = Signal(TLB_NUM_WAYS)
1905         tlb_req_index = Signal(TLB_SET_SIZE)
1906         tlb_hit       = Signal()
1907         tlb_hit_way   = Signal(TLB_NUM_WAYS)
1908         pte           = Signal(TLB_PTE_BITS)
1909         ra            = Signal(REAL_ADDR_BITS)
1910         valid_ra      = Signal()
1911         perm_attr     = PermAttr()
1912         rc_ok         = Signal()
1913         perm_ok       = Signal()
1914         access_ok     = Signal()
1915
1916         # TLB PLRU output interface
1917         def TLBPLRUOut():
1918             return Array(
1919                 Signal(TLB_WAY_BITS) for x in range(TLB_SET_SIZE)
1920             )
1921
1922         tlb_plru_victim = TLBPLRUOut()
1923
1924         # Helper functions to decode incoming requests
1925         #
1926         # Return the cache line index (tag index) for an address
1927         def get_index(addr):
1928             return addr[LINE_OFF_BITS:SET_SIZE_BITS]
1929
1930         # Return the cache row index (data memory) for an address
1931         def get_row(addr):
1932             return addr[ROW_OFF_BITS:SET_SIZE_BITS]
1933
1934         # Return the index of a row within a line
1935         def get_row_of_line(row):
1936             row_v = Signal(ROW_BITS)
1937             row_v = Signal(row)
1938             return row_v[0:ROW_LINE_BITS]
1939
1940         # Returns whether this is the last row of a line
1941         def is_last_row_addr(addr, last):
1942             return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
1943
1944         # Returns whether this is the last row of a line
1945         def is_last_row(row, last):
1946             return get_row_of_line(row) == last
1947
1948         # Return the address of the next row in the current cache line
1949         def next_row_addr(addr):
1950             row_idx = Signal(ROW_LINE_BITS)
1951             result  = WBAddrType()
1952             # Is there no simpler way in VHDL to
1953             # generate that 3 bits adder ?
1954             row_idx = addr[ROW_OFF_BITS:LINE_OFF_BITS]
1955             row_idx = Signal(row_idx + 1)
1956             result = addr
1957             result[ROW_OFF_BITS:LINE_OFF_BITS] = row_idx
1958             return result
1959
1960         # Return the next row in the current cache line. We use a
1961         # dedicated function in order to limit the size of the
1962         # generated adder to be only the bits within a cache line
1963         # (3 bits with default settings)
1964         def next_row(row)
1965             row_v   = Signal(ROW_BITS)
1966             row_idx = Signal(ROW_LINE_BITS)
1967             result  = Signal(ROW_BITS)
1968
1969             row_v = Signal(row)
1970             row_idx = row_v[ROW_LINE_BITS]
1971             row_v[0:ROW_LINE_BITS] = Signal(row_idx + 1)
1972             return row_v
1973
1974         # Get the tag value from the address
1975         def get_tag(addr):
1976             return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
1977
1978         # Read a tag from a tag memory row
1979         def read_tag(way, tagset):
1980             return tagset[way *TAG_WIDTH:way * TAG_WIDTH + TAG_BITS]
1981
1982         # Read a TLB tag from a TLB tag memory row
1983         def read_tlb_tag(way, tags):
1984             j = Signal()
1985
1986             j = way * TLB_EA_TAG_BITS
1987             return tags[j:j + TLB_EA_TAG_BITS]
1988
1989         # Write a TLB tag to a TLB tag memory row
1990         def write_tlb_tag(way, tags), tag):
1991             j = Signal()
1992
1993             j = way * TLB_EA_TAG_BITS
1994             tags[j:j + TLB_EA_TAG_BITS] = tag
1995
1996         # Read a PTE from a TLB PTE memory row
1997         def read_tlb_pte(way, ptes):
1998             j = Signal()
1999
2000             j = way * TLB_PTE_BITS
2001             return ptes[j:j + TLB_PTE_BITS]
2002
2003         def write_tlb_pte(way, ptes,newpte):
2004             j = Signal()
2005
2006             j = way * TLB_PTE_BITS
2007             return ptes[j:j + TLB_PTE_BITS] = newpte
2008
2009         assert (LINE_SIZE % ROW_SIZE) == 0 "LINE_SIZE not " \
2010          "multiple of ROW_SIZE"
2011
2012         assert (LINE_SIZE % 2) == 0 "LINE_SIZE not power of 2"
2013
2014         assert (NUM_LINES % 2) == 0 "NUM_LINES not power of 2"
2015
2016         assert (ROW_PER_LINE % 2) == 0 "ROW_PER_LINE not" \
2017          "power of 2"
2018
2019         assert ROW_BITS == (INDEX_BITS + ROW_LINE_BITS) \
2020          "geometry bits don't add up"
2021
2022         assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS) \
2023          "geometry bits don't add up"
2024
2025         assert REAL_ADDR_BITS == (TAG_BITS + INDEX_BITS \
2026          + LINE_OFF_BITS) "geometry bits don't add up"
2027
2028         assert REAL_ADDR_BITS == (TAG_BITS + ROW_BITS + ROW_OFF_BITS) \
2029          "geometry bits don't add up"
2030
2031         assert 64 == wishbone_data_bits "Can't yet handle a" \
2032          "wishbone width that isn't 64-bits"
2033
2034         assert SET_SIZE_BITS <= TLB_LG_PGSZ "Set indexed by" \
2035          "virtual address"
2036
2037         # we don't yet handle collisions between loadstore1 requests
2038         # and MMU requests
2039         comb += m_out.stall.eq(0)
2040
2041         # Hold off the request in r0 when r1 has an uncompleted request
2042         comb += r0_stall.eq(r0_full & r1.full)
2043         comb += r0_valid.eq(r0_full & ~r1.full)
2044         comb += stall_out.eq(r0_stall)
2045
2046         # Wire up wishbone request latch out of stage 1
2047         comb += wishbone_out.eq(r1.wb)
2048
2049
2050
2051 # dcache_tb.vhdl
2052 #
2053 # entity dcache_tb is
2054 # end dcache_tb;
2055 #
2056 # architecture behave of dcache_tb is
2057 #     signal clk          : std_ulogic;
2058 #     signal rst          : std_ulogic;
2059 #
2060 #     signal d_in         : Loadstore1ToDcacheType;
2061 #     signal d_out        : DcacheToLoadstore1Type;
2062 #
2063 #     signal m_in         : MmuToDcacheType;
2064 #     signal m_out        : DcacheToMmuType;
2065 #
2066 #     signal wb_bram_in   : wishbone_master_out;
2067 #     signal wb_bram_out  : wishbone_slave_out;
2068 #
2069 #     constant clk_period : time := 10 ns;
2070 # begin
2071 #     dcache0: entity work.dcache
2072 #         generic map(
2073 #
2074 #             LINE_SIZE => 64,
2075 #             NUM_LINES => 4
2076 #             )
2077 #         port map(
2078 #             clk => clk,
2079 #             rst => rst,
2080 #             d_in => d_in,
2081 #             d_out => d_out,
2082 #             m_in => m_in,
2083 #             m_out => m_out,
2084 #             wishbone_out => wb_bram_in,
2085 #             wishbone_in => wb_bram_out
2086 #             );
2087 #
2088 #     -- BRAM Memory slave
2089 #     bram0: entity work.wishbone_bram_wrapper
2090 #         generic map(
2091 #             MEMORY_SIZE   => 1024,
2092 #             RAM_INIT_FILE => "icache_test.bin"
2093 #             )
2094 #         port map(
2095 #             clk => clk,
2096 #             rst => rst,
2097 #             wishbone_in => wb_bram_in,
2098 #             wishbone_out => wb_bram_out
2099 #             );
2100 #
2101 #     clk_process: process
2102 #     begin
2103 #         clk <= '0';
2104 #         wait for clk_period/2;
2105 #         clk <= '1';
2106 #         wait for clk_period/2;
2107 #     end process;
2108 #
2109 #     rst_process: process
2110 #     begin
2111 #         rst <= '1';
2112 #         wait for 2*clk_period;
2113 #         rst <= '0';
2114 #         wait;
2115 #     end process;
2116 #
2117 #     stim: process
2118 #     begin
2119 #     -- Clear stuff
2120 #     d_in.valid <= '0';
2121 #     d_in.load <= '0';
2122 #     d_in.nc <= '0';
2123 #     d_in.addr <= (others => '0');
2124 #     d_in.data <= (others => '0');
2125 #         m_in.valid <= '0';
2126 #         m_in.addr <= (others => '0');
2127 #         m_in.pte <= (others => '0');
2128 #
2129 #         wait for 4*clk_period;
2130 #     wait until rising_edge(clk);
2131 #
2132 #     -- Cacheable read of address 4
2133 #     d_in.load <= '1';
2134 #     d_in.nc <= '0';
2135 #         d_in.addr <= x"0000000000000004";
2136 #         d_in.valid <= '1';
2137 #     wait until rising_edge(clk);
2138 #         d_in.valid <= '0';
2139 #
2140 #     wait until rising_edge(clk) and d_out.valid = '1';
2141 #         assert d_out.data = x"0000000100000000"
2142 #         report "data @" & to_hstring(d_in.addr) &
2143 #         "=" & to_hstring(d_out.data) &
2144 #         " expected 0000000100000000"
2145 #         severity failure;
2146 # --      wait for clk_period;
2147 #
2148 #     -- Cacheable read of address 30
2149 #     d_in.load <= '1';
2150 #     d_in.nc <= '0';
2151 #         d_in.addr <= x"0000000000000030";
2152 #         d_in.valid <= '1';
2153 #     wait until rising_edge(clk);
2154 #         d_in.valid <= '0';
2155 #
2156 #     wait until rising_edge(clk) and d_out.valid = '1';
2157 #         assert d_out.data = x"0000000D0000000C"
2158 #         report "data @" & to_hstring(d_in.addr) &
2159 #         "=" & to_hstring(d_out.data) &
2160 #         " expected 0000000D0000000C"
2161 #         severity failure;
2162 #
2163 #     -- Non-cacheable read of address 100
2164 #     d_in.load <= '1';
2165 #     d_in.nc <= '1';
2166 #         d_in.addr <= x"0000000000000100";
2167 #         d_in.valid <= '1';
2168 #     wait until rising_edge(clk);
2169 #     d_in.valid <= '0';
2170 #     wait until rising_edge(clk) and d_out.valid = '1';
2171 #         assert d_out.data = x"0000004100000040"
2172 #         report "data @" & to_hstring(d_in.addr) &
2173 #         "=" & to_hstring(d_out.data) &
2174 #         " expected 0000004100000040"
2175 #         severity failure;
2176 #
2177 #     wait until rising_edge(clk);
2178 #     wait until rising_edge(clk);
2179 #     wait until rising_edge(clk);
2180 #     wait until rising_edge(clk);
2181 #
2182 #     std.env.finish;
2183 #     end process;
2184 # end;
2185 def dcache_sim(dut):
2186     # clear stuff
2187     yield dut.d_in.valid.eq(0)
2188     yield dut.d_in.load.eq(0)
2189     yield dut.d_in.nc.eq(0)
2190     yield dut.d_in.adrr.eq(0)
2191     yield dut.d_in.data.eq(0)
2192     yield dut.m_in.valid.eq(0)
2193     yield dut.m_in.addr.eq(0)
2194     yield dut.m_in.pte.eq(0)
2195     # wait 4 * clk_period
2196     yield
2197     yield
2198     yield
2199     yield
2200     # wait_until rising_edge(clk)
2201     yield
2202     # Cacheable read of address 4
2203     yield dut.d_in.load.eq(1)
2204     yield dut.d_in.nc.eq(0)
2205     yield dut.d_in.addr.eq(Const(0x0000000000000004, 64))
2206     yield dut.d_in.valid.eq(1)
2207     # wait-until rising_edge(clk)
2208     yield
2209     yield dut.d_in.valid.eq(0)
2210     yield
2211     while not (yield dut.d_out.valid):
2212         yield
2213     assert dut.d_out.data == Const(0x0000000100000000, 64) f"data @" \
2214         f"{dut.d_in.addr}={dut.d_in.data} expected 0000000100000000" \
2215         " -!- severity failure"
2216
2217
2218     # Cacheable read of address 30
2219     yield dut.d_in.load.eq(1)
2220     yield dut.d_in.nc.eq(0)
2221     yield dut.d_in.addr.eq(Const(0x0000000000000030, 64))
2222     yield dut.d_in.valid.eq(1)
2223     yield
2224     yield dut.d_in.valid.eq(0)
2225     yield
2226     while not (yield dut.d_out.valid):
2227         yield
2228     assert dut.d_out.data == Const(0x0000000D0000000C, 64) f"data @" \
2229         f"{dut.d_in.addr}={dut.d_out.data} expected 0000000D0000000C" \
2230         f"-!- severity failure"
2231
2232     # Non-cacheable read of address 100
2233     yield dut.d_in.load.eq(1)
2234     yield dut.d_in.nc.eq(1)
2235     yield dut.d_in.addr.eq(Const(0x0000000000000100, 64))
2236     yield dut.d_in.valid.eq(1)
2237     yield
2238     yield dut.d_in.valid.eq(0)
2239     yield
2240     while not (yield dut.d_out.valid):
2241         yield
2242     assert dut.d_out.data == Const(0x0000004100000040, 64) f"data @" \
2243         f"{dut.d_in.addr}={dut.d_out.data} expected 0000004100000040" \
2244         f"-!- severity failure"
2245
2246     yield
2247     yield
2248     yield
2249     yield
2250
2251
2252 def test_dcache():
2253     dut = DCache()
2254     vl = rtlil.convert(dut, ports=[])
2255     with open("test_dcache.il", "w") as f:
2256         f.write(vl)
2257
2258     run_simulation(dut, dcache_sim(), vcd_name='test_dcache.vcd')
2259
2260 if __name__ == '__main__':
2261     test_dcache()
2262