src/soc/experiment/dcache.py

   1 """DCache
   2
   3 based on Anton Blanchard microwatt dcache.vhdl
   4
   5 """
   6
   7 from enum import Enum, unique
   8
   9 from nmigen import Module, Signal, Elaboratable, Cat, Repl, Array, Const
  10 from nmigen.cli import main
  11 from nmutil.iocontrol import RecordObject
  12 from nmigen.utils import log2_int
  13 from nmigen.cli import rtlil
  14
  15
  16 from soc.experiment.mem_types import (LoadStore1ToDCacheType,
  17                                      DCacheToLoadStore1Type,
  18                                      MMUToDCacheType,
  19                                      DCacheToMMUType)
  20
  21 from soc.experiment.wb_types import (WB_ADDR_BITS, WB_DATA_BITS, WB_SEL_BITS,
  22                                 WBAddrType, WBDataType, WBSelType,
  23                                 WBMasterOut, WBSlaveOut,
  24                                 WBMasterOutVector, WBSlaveOutVector,
  25                                 WBIOMasterOut, WBIOSlaveOut)
  26
  27 from soc.experiment.cache_ram import CacheRam
  28 from soc.experiment.plru import PLRU
  29
  30
  31 # TODO: make these parameters of DCache at some point
  32 LINE_SIZE = 64    # Line size in bytes
  33 NUM_LINES = 32    # Number of lines in a set
  34 NUM_WAYS = 4      # Number of ways
  35 TLB_SET_SIZE = 64 # L1 DTLB entries per set
  36 TLB_NUM_WAYS = 2  # L1 DTLB number of sets
  37 TLB_LG_PGSZ = 12  # L1 DTLB log_2(page_size)
  38 LOG_LENGTH = 0    # Non-zero to enable log data collection
  39
  40 # BRAM organisation: We never access more than
  41 #     -- WB_DATA_BITS at a time so to save
  42 #     -- resources we make the array only that wide, and
  43 #     -- use consecutive indices for to make a cache "line"
  44 #     --
  45 #     -- ROW_SIZE is the width in bytes of the BRAM
  46 #     -- (based on WB, so 64-bits)
  47 ROW_SIZE = WB_DATA_BITS // 8;
  48
  49 # ROW_PER_LINE is the number of row (wishbone
  50 # transactions) in a line
  51 ROW_PER_LINE = LINE_SIZE // ROW_SIZE
  52
  53 # BRAM_ROWS is the number of rows in BRAM needed
  54 # to represent the full dcache
  55 BRAM_ROWS = NUM_LINES * ROW_PER_LINE
  56
  57
  58 # Bit fields counts in the address
  59
  60 # REAL_ADDR_BITS is the number of real address
  61 # bits that we store
  62 REAL_ADDR_BITS = 56
  63
  64 # ROW_BITS is the number of bits to select a row
  65 ROW_BITS = log2_int(BRAM_ROWS)
  66
  67 # ROW_LINE_BITS is the number of bits to select
  68 # a row within a line
  69 ROW_LINE_BITS = log2_int(ROW_PER_LINE)
  70
  71 # LINE_OFF_BITS is the number of bits for
  72 # the offset in a cache line
  73 LINE_OFF_BITS = log2_int(LINE_SIZE)
  74
  75 # ROW_OFF_BITS is the number of bits for
  76 # the offset in a row
  77 ROW_OFF_BITS = log2_int(ROW_SIZE)
  78
  79 # INDEX_BITS is the number if bits to
  80 # select a cache line
  81 INDEX_BITS = log2_int(NUM_LINES)
  82
  83 # SET_SIZE_BITS is the log base 2 of the set size
  84 SET_SIZE_BITS = LINE_OFF_BITS + INDEX_BITS
  85
  86 # TAG_BITS is the number of bits of
  87 # the tag part of the address
  88 TAG_BITS = REAL_ADDR_BITS - SET_SIZE_BITS
  89
  90 # TAG_WIDTH is the width in bits of each way of the tag RAM
  91 TAG_WIDTH = TAG_BITS + 7 - ((TAG_BITS + 7) % 8)
  92
  93 # WAY_BITS is the number of bits to select a way
  94 WAY_BITS = log2_int(NUM_WAYS)
  95
  96 # Example of layout for 32 lines of 64 bytes:
  97 #
  98 # ..  tag    |index|  line  |
  99 # ..         |   row   |    |
 100 # ..         |     |---|    | ROW_LINE_BITS  (3)
 101 # ..         |     |--- - --| LINE_OFF_BITS (6)
 102 # ..         |         |- --| ROW_OFF_BITS  (3)
 103 # ..         |----- ---|    | ROW_BITS      (8)
 104 # ..         |-----|        | INDEX_BITS    (5)
 105 # .. --------|              | TAG_BITS      (45)
 106
 107 TAG_RAM_WIDTH = TAG_WIDTH * NUM_WAYS
 108
 109 def CacheTagArray():
 110     return Array(Signal(TAG_RAM_WIDTH) for x in range(NUM_LINES))
 111
 112 def CacheValidBitsArray():
 113     return Array(Signal(INDEX_BITS) for x in range(NUM_LINES))
 114
 115 def RowPerLineValidArray():
 116     return Array(Signal() for x in range(ROW_PER_LINE))
 117
 118 # L1 TLB
 119 TLB_SET_BITS     = log2_int(TLB_SET_SIZE)
 120 TLB_WAY_BITS     = log2_int(TLB_NUM_WAYS)
 121 TLB_EA_TAG_BITS  = 64 - (TLB_LG_PGSZ + TLB_SET_BITS)
 122 TLB_TAG_WAY_BITS = TLB_NUM_WAYS * TLB_EA_TAG_BITS
 123 TLB_PTE_BITS     = 64
 124 TLB_PTE_WAY_BITS = TLB_NUM_WAYS * TLB_PTE_BITS;
 125
 126 assert (LINE_SIZE % ROW_SIZE) == 0, "LINE_SIZE not multiple of ROW_SIZE"
 127 assert (LINE_SIZE % 2) == 0, "LINE_SIZE not power of 2"
 128 assert (NUM_LINES % 2) == 0, "NUM_LINES not power of 2"
 129 assert (ROW_PER_LINE % 2) == 0, "ROW_PER_LINE not power of 2"
 130 assert ROW_BITS == (INDEX_BITS + ROW_LINE_BITS), "geometry bits don't add up"
 131 assert (LINE_OFF_BITS == ROW_OFF_BITS + ROW_LINE_BITS), \
 132         "geometry bits don't add up"
 133 assert REAL_ADDR_BITS == (TAG_BITS + INDEX_BITS + LINE_OFF_BITS), \
 134         "geometry bits don't add up"
 135 assert REAL_ADDR_BITS == (TAG_BITS + ROW_BITS + ROW_OFF_BITS), \
 136          "geometry bits don't add up"
 137 assert 64 == WB_DATA_BITS, "Can't yet handle wb width that isn't 64-bits"
 138 assert SET_SIZE_BITS <= TLB_LG_PGSZ, "Set indexed by virtual address"
 139
 140
 141 def TLBValidBitsArray():
 142     return Array(Signal(TLB_NUM_WAYS) for x in range(TLB_SET_SIZE))
 143
 144 def TLBTagEAArray():
 145     return Array(Signal(TLB_EA_TAG_BITS) for x in range (TLB_NUM_WAYS))
 146
 147 def TLBTagsArray():
 148     return Array(Signal(TLB_TAG_WAY_BITS) for x in range (TLB_SET_SIZE))
 149
 150 def TLBPtesArray():
 151     return Array(Signal(TLB_PTE_WAY_BITS) for x in range(TLB_SET_SIZE))
 152
 153 def HitWaySet():
 154     return Array(Signal(NUM_WAYS) for x in range(TLB_NUM_WAYS))
 155
 156 # Cache RAM interface
 157 def CacheRamOut():
 158     return Array(Signal(WB_DATA_BITS) for x in range(NUM_WAYS))
 159
 160 # PLRU output interface
 161 def PLRUOut():
 162     return Array(Signal(WAY_BITS) for x in range(NUM_LINES))
 163
 164 # TLB PLRU output interface
 165 def TLBPLRUOut():
 166     return Array(Signal(TLB_WAY_BITS) for x in range(TLB_SET_SIZE))
 167
 168 # Helper functions to decode incoming requests
 169 #
 170 # Return the cache line index (tag index) for an address
 171 def get_index(addr):
 172     return addr[LINE_OFF_BITS:SET_SIZE_BITS]
 173
 174 # Return the cache row index (data memory) for an address
 175 def get_row(addr):
 176     return addr[ROW_OFF_BITS:SET_SIZE_BITS]
 177
 178 # Return the index of a row within a line
 179 def get_row_of_line(row):
 180     return row[:ROW_LINE_BITS]
 181
 182 # Returns whether this is the last row of a line
 183 def is_last_row_addr(addr, last):
 184     return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
 185
 186 # Returns whether this is the last row of a line
 187 def is_last_row(row, last):
 188     return get_row_of_line(row) == last
 189
 190 # Return the next row in the current cache line. We use a
 191 # dedicated function in order to limit the size of the
 192 # generated adder to be only the bits within a cache line
 193 # (3 bits with default settings)
 194 def next_row(row):
 195     row_v = row[0:ROW_LINE_BITS] + 1
 196     return Cat(row_v[:ROW_LINE_BITS], row[ROW_LINE_BITS:])
 197
 198 # Get the tag value from the address
 199 def get_tag(addr):
 200     return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
 201
 202 # Read a tag from a tag memory row
 203 def read_tag(way, tagset):
 204     return tagset.word_select(way, TAG_WIDTH)[:TAG_BITS]
 205
 206 # Read a TLB tag from a TLB tag memory row
 207 def read_tlb_tag(way, tags):
 208     return tags.word_select(way, TLB_EA_TAG_BITS)
 209
 210 # Write a TLB tag to a TLB tag memory row
 211 def write_tlb_tag(way, tags, tag):
 212     return read_tlb_tag(way, tags).eq(tag)
 213
 214 # Read a PTE from a TLB PTE memory row
 215 def read_tlb_pte(way, ptes):
 216     return ptes.word_select(way, TLB_PTE_BITS)
 217
 218 def write_tlb_pte(way, ptes, newpte):
 219     return read_tlb_pte(way, ptes).eq(newpte)
 220
 221
 222 # Record for storing permission, attribute, etc. bits from a PTE
 223 class PermAttr(RecordObject):
 224     def __init__(self):
 225         super().__init__()
 226         self.reference = Signal()
 227         self.changed   = Signal()
 228         self.nocache   = Signal()
 229         self.priv      = Signal()
 230         self.rd_perm   = Signal()
 231         self.wr_perm   = Signal()
 232
 233
 234 def extract_perm_attr(pte):
 235     pa = PermAttr()
 236     pa.reference = pte[8]
 237     pa.changed   = pte[7]
 238     pa.nocache   = pte[5]
 239     pa.priv      = pte[3]
 240     pa.rd_perm   = pte[2]
 241     pa.wr_perm   = pte[1]
 242     return pa;
 243
 244
 245 # Type of operation on a "valid" input
 246 @unique
 247 class Op(Enum):
 248     OP_NONE       = 0
 249     OP_BAD        = 1 # NC cache hit, TLB miss, prot/RC failure
 250     OP_STCX_FAIL  = 2 # conditional store w/o reservation
 251     OP_LOAD_HIT   = 3 # Cache hit on load
 252     OP_LOAD_MISS  = 4 # Load missing cache
 253     OP_LOAD_NC    = 5 # Non-cachable load
 254     OP_STORE_HIT  = 6 # Store hitting cache
 255     OP_STORE_MISS = 7 # Store missing cache
 256
 257
 258 # Cache state machine
 259 @unique
 260 class State(Enum):
 261     IDLE             = 0 # Normal load hit processing
 262     RELOAD_WAIT_ACK  = 1 # Cache reload wait ack
 263     STORE_WAIT_ACK   = 2 # Store wait ack
 264     NC_LOAD_WAIT_ACK = 3 # Non-cachable load wait ack
 265
 266
 267 # Dcache operations:
 268 #
 269 # In order to make timing, we use the BRAMs with
 270 # an output buffer, which means that the BRAM
 271 # output is delayed by an extra cycle.
 272 #
 273 # Thus, the dcache has a 2-stage internal pipeline
 274 # for cache hits with no stalls.
 275 #
 276 # All other operations are handled via stalling
 277 # in the first stage.
 278 #
 279 # The second stage can thus complete a hit at the same
 280 # time as the first stage emits a stall for a complex op.
 281 #
 282 # Stage 0 register, basically contains just the latched request
 283
 284 class RegStage0(RecordObject):
 285     def __init__(self):
 286         super().__init__()
 287         self.req     = LoadStore1ToDCacheType()
 288         self.tlbie   = Signal()
 289         self.doall   = Signal()
 290         self.tlbld   = Signal()
 291         self.mmu_req = Signal() # indicates source of request
 292
 293
 294 class MemAccessRequest(RecordObject):
 295     def __init__(self):
 296         super().__init__()
 297         self.op        = Signal(Op)
 298         self.valid     = Signal()
 299         self.dcbz      = Signal()
 300         self.real_addr = Signal(REAL_ADDR_BITS)
 301         self.data      = Signal(64)
 302         self.byte_sel  = Signal(8)
 303         self.hit_way   = Signal(WAY_BITS)
 304         self.same_tag  = Signal()
 305         self.mmu_req   = Signal()
 306
 307
 308 # First stage register, contains state for stage 1 of load hits
 309 # and for the state machine used by all other operations
 310 class RegStage1(RecordObject):
 311     def __init__(self):
 312         super().__init__()
 313         # Info about the request
 314         self.full             = Signal() # have uncompleted request
 315         self.mmu_req          = Signal() # request is from MMU
 316         self.req              = MemAccessRequest()
 317
 318         # Cache hit state
 319         self.hit_way          = Signal(WAY_BITS)
 320         self.hit_load_valid   = Signal()
 321         self.hit_index        = Signal(NUM_LINES)
 322         self.cache_hit        = Signal()
 323
 324         # TLB hit state
 325         self.tlb_hit          = Signal()
 326         self.tlb_hit_way      = Signal(TLB_NUM_WAYS)
 327         self.tlb_hit_index    = Signal(TLB_WAY_BITS)
 328
 329         # 2-stage data buffer for data forwarded from writes to reads
 330         self.forward_data1    = Signal(64)
 331         self.forward_data2    = Signal(64)
 332         self.forward_sel1     = Signal(8)
 333         self.forward_valid1   = Signal()
 334         self.forward_way1     = Signal(WAY_BITS)
 335         self.forward_row1     = Signal(ROW_BITS)
 336         self.use_forward1     = Signal()
 337         self.forward_sel      = Signal(8)
 338
 339         # Cache miss state (reload state machine)
 340         self.state            = Signal(State)
 341         self.dcbz             = Signal()
 342         self.write_bram       = Signal()
 343         self.write_tag        = Signal()
 344         self.slow_valid       = Signal()
 345         self.wb               = WBMasterOut()
 346         self.reload_tag       = Signal(TAG_BITS)
 347         self.store_way        = Signal(WAY_BITS)
 348         self.store_row        = Signal(ROW_BITS)
 349         self.store_index      = Signal(INDEX_BITS)
 350         self.end_row_ix       = Signal(log2_int(ROW_LINE_BITS, False))
 351         self.rows_valid       = RowPerLineValidArray()
 352         self.acks_pending     = Signal(3)
 353         self.inc_acks         = Signal()
 354         self.dec_acks         = Signal()
 355
 356         # Signals to complete (possibly with error)
 357         self.ls_valid         = Signal()
 358         self.ls_error         = Signal()
 359         self.mmu_done         = Signal()
 360         self.mmu_error        = Signal()
 361         self.cache_paradox    = Signal()
 362
 363         # Signal to complete a failed stcx.
 364         self.stcx_fail        = Signal()
 365
 366
 367 # Reservation information
 368 class Reservation(RecordObject):
 369     def __init__(self):
 370         super().__init__()
 371         self.valid = Signal()
 372         self.addr  = Signal(64-LINE_OFF_BITS)
 373
 374
 375 class DTLBUpdate(Elaboratable):
 376     def __init__(self, dtlb_valid_bits, dtlb_ptes):
 377         self.tlbie    = Signal()
 378         self.tlbwe    = Signal()
 379         self.doall    = Signal()
 380         self.tlb_hit    = Signal()
 381         self.tlb_req_index = Signal(TLB_SET_BITS)
 382
 383         self.dtlb_valid_bits = dtlb_valid_bits
 384         self.dtlb_ptes       = dtlb_ptes
 385
 386         self.tlb_hit_way     = Signal(TLB_WAY_BITS)
 387         self.tlb_tag_way     = Signal(TLB_TAG_WAY_BITS)
 388         self.tlb_pte_way     = Signal(TLB_PTE_WAY_BITS)
 389         self.repl_way        = Signal(TLB_WAY_BITS)
 390         self.eatag           = Signal(TLB_EA_TAG_BITS)
 391         self.pte_data        = Signal(TLB_PTE_BITS)
 392
 393     def elaborate(self, platform):
 394         m = Module()
 395         comb = m.d.comb
 396         sync = m.d.sync
 397
 398         tagset   = Signal(TLB_TAG_WAY_BITS)
 399         pteset   = Signal(TLB_PTE_WAY_BITS)
 400
 401         vb = Signal(TLB_NUM_WAYS)
 402         db = Signal(TLB_PTE_WAY_BITS)
 403
 404         sync += vb.eq(self.dtlb_valid_bits[self.tlb_req_index])
 405         sync += db.eq(self.dtlb_ptes[self.tlb_req_index])
 406
 407         with m.If(self.tlbie & self.doall):
 408             # clear all valid bits at once
 409             for i in range(TLB_SET_SIZE):
 410                 sync += self.dtlb_valid_bits[i].eq(0)
 411
 412         with m.Elif(self.tlbie):
 413             with m.If(self.tlb_hit):
 414                 sync += vb.bit_select(self.tlb_hit_way, 1).eq(Const(0, 1))
 415
 416         with m.Elif(self.tlbwe):
 417
 418             comb += tagset.eq(self.tlb_tag_way)
 419             comb += write_tlb_tag(self.repl_way, tagset, self.eatag)
 420             sync += db.eq(tagset)
 421
 422             comb += pteset.eq(self.tlb_pte_way)
 423             comb += write_tlb_pte(self.repl_way, pteset, self.pte_data)
 424             sync += db.eq(pteset)
 425
 426             sync += vb.bit_select(self.repl_way, 1).eq(1)
 427
 428         return m
 429
 430
 431 class DCache(Elaboratable):
 432     """Set associative dcache write-through
 433     TODO (in no specific order):
 434     * See list in icache.vhdl
 435     * Complete load misses on the cycle when WB data comes instead of
 436       at the end of line (this requires dealing with requests coming in
 437       while not idle...)
 438     """
 439     def __init__(self):
 440         self.d_in      = LoadStore1ToDCacheType()
 441         self.d_out     = DCacheToLoadStore1Type()
 442
 443         self.m_in      = MMUToDCacheType()
 444         self.m_out     = DCacheToMMUType()
 445
 446         self.stall_out = Signal()
 447
 448         self.wb_out    = WBMasterOut()
 449         self.wb_in     = WBSlaveOut()
 450
 451         self.log_out   = Signal(20)
 452
 453     def stage_0(self, m, r0, r1, r0_full):
 454         """Latch the request in r0.req as long as we're not stalling
 455         """
 456         comb = m.d.comb
 457         sync = m.d.sync
 458         d_in, d_out, m_in = self.d_in, self.d_out, self.m_in
 459
 460         r = RegStage0()
 461
 462         # TODO, this goes in unit tests and formal proofs
 463         with m.If(~(d_in.valid & m_in.valid)):
 464             #sync += Display("request collision loadstore vs MMU")
 465             pass
 466
 467         with m.If(m_in.valid):
 468             sync += r.req.valid.eq(1)
 469             sync += r.req.load.eq(~(m_in.tlbie | m_in.tlbld))
 470             sync += r.req.dcbz.eq(0)
 471             sync += r.req.nc.eq(0)
 472             sync += r.req.reserve.eq(0)
 473             sync += r.req.virt_mode.eq(1)
 474             sync += r.req.priv_mode.eq(1)
 475             sync += r.req.addr.eq(m_in.addr)
 476             sync += r.req.data.eq(m_in.pte)
 477             sync += r.req.byte_sel.eq(~0) # Const -1 sets all to 0b111....
 478             sync += r.tlbie.eq(m_in.tlbie)
 479             sync += r.doall.eq(m_in.doall)
 480             sync += r.tlbld.eq(m_in.tlbld)
 481             sync += r.mmu_req.eq(1)
 482         with m.Else():
 483             sync += r.req.eq(d_in)
 484             sync += r.tlbie.eq(0)
 485             sync += r.doall.eq(0)
 486             sync += r.tlbld.eq(0)
 487             sync += r.mmu_req.eq(0)
 488             with m.If(~(r1.full & r0_full)):
 489                 sync += r0.eq(r)
 490                 sync += r0_full.eq(r.req.valid)
 491
 492     def tlb_read(self, m, r0_stall, tlb_valid_way,
 493                  tlb_tag_way, tlb_pte_way, dtlb_valid_bits,
 494                  dtlb_tags, dtlb_ptes):
 495         """TLB
 496         Operates in the second cycle on the request latched in r0.req.
 497         TLB updates write the entry at the end of the second cycle.
 498         """
 499         comb = m.d.comb
 500         sync = m.d.sync
 501         m_in, d_in = self.m_in, self.d_in
 502
 503         index    = Signal(TLB_SET_BITS)
 504         addrbits = Signal(TLB_SET_BITS)
 505
 506         amin = TLB_LG_PGSZ
 507         amax = TLB_LG_PGSZ + TLB_SET_BITS
 508
 509         with m.If(m_in.valid):
 510             comb += addrbits.eq(m_in.addr[amin : amax])
 511         with m.Else():
 512             comb += addrbits.eq(d_in.addr[amin : amax])
 513         comb += index.eq(addrbits)
 514
 515         # If we have any op and the previous op isn't finished,
 516         # then keep the same output for next cycle.
 517         with m.If(~r0_stall):
 518             sync += tlb_valid_way.eq(dtlb_valid_bits[index])
 519             sync += tlb_tag_way.eq(dtlb_tags[index])
 520             sync += tlb_pte_way.eq(dtlb_ptes[index])
 521
 522     def maybe_tlb_plrus(self, m, r1, tlb_plru_victim, acc, acc_en, lru):
 523         """Generate TLB PLRUs
 524         """
 525         comb = m.d.comb
 526         sync = m.d.sync
 527
 528         with m.If(TLB_NUM_WAYS > 1):
 529             for i in range(TLB_SET_SIZE):
 530                 # TLB PLRU interface
 531                 tlb_plru        = PLRU(TLB_WAY_BITS)
 532                 setattr(m.submodules, "maybe_plru_%d" % i, tlb_plru)
 533                 tlb_plru_acc    = Signal(TLB_WAY_BITS)
 534                 tlb_plru_acc_en = Signal()
 535                 tlb_plru_out    = Signal(TLB_WAY_BITS)
 536
 537                 comb += tlb_plru.acc.eq(tlb_plru_acc)
 538                 comb += tlb_plru.acc_en.eq(tlb_plru_acc_en)
 539                 comb += tlb_plru.lru.eq(tlb_plru_out)
 540
 541                 # PLRU interface
 542                 with m.If(r1.tlb_hit_index == i):
 543                     comb += tlb_plru.acc_en.eq(r1.tlb_hit)
 544                 with m.Else():
 545                     comb += tlb_plru.acc_en.eq(0)
 546                 comb += tlb_plru.acc.eq(r1.tlb_hit_way)
 547
 548                 comb += tlb_plru_victim[i].eq(tlb_plru.lru)
 549
 550     def tlb_search(self, m, tlb_req_index, r0, r0_valid,
 551                    tlb_valid_way, tlb_tag_way, tlb_hit_way,
 552                    tlb_pte_way, pte, tlb_hit, valid_ra, perm_attr, ra):
 553
 554         comb = m.d.comb
 555         sync = m.d.sync
 556
 557         hitway = Signal(TLB_WAY_BITS)
 558         hit    = Signal()
 559         eatag  = Signal(TLB_EA_TAG_BITS)
 560
 561         TLB_LG_END = TLB_LG_PGSZ + TLB_SET_BITS
 562         comb += tlb_req_index.eq(r0.req.addr[TLB_LG_PGSZ : TLB_LG_END])
 563         comb += eatag.eq(r0.req.addr[TLB_LG_END : 64 ])
 564
 565         for i in range(TLB_NUM_WAYS):
 566             is_tag_hit = Signal()
 567             comb += is_tag_hit.eq(tlb_valid_way[i]
 568                                   & read_tlb_tag(i, tlb_tag_way) == eatag)
 569             with m.If(is_tag_hit):
 570                 comb += hitway.eq(i)
 571                 comb += hit.eq(1)
 572
 573         comb += tlb_hit.eq(hit & r0_valid)
 574         comb += tlb_hit_way.eq(hitway)
 575
 576         with m.If(tlb_hit):
 577             comb += pte.eq(read_tlb_pte(hitway, tlb_pte_way))
 578         with m.Else():
 579             comb += pte.eq(0)
 580         comb += valid_ra.eq(tlb_hit | ~r0.req.virt_mode)
 581         with m.If(r0.req.virt_mode):
 582             comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
 583                               r0.req.addr[ROW_OFF_BITS:TLB_LG_PGSZ],
 584                               pte[TLB_LG_PGSZ:REAL_ADDR_BITS]))
 585             comb += perm_attr.eq(extract_perm_attr(pte))
 586         with m.Else():
 587             comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
 588                               r0.req.addr[ROW_OFF_BITS:REAL_ADDR_BITS]))
 589
 590             comb += perm_attr.reference.eq(1)
 591             comb += perm_attr.changed.eq(1)
 592             comb += perm_attr.priv.eq(1)
 593             comb += perm_attr.nocache.eq(0)
 594             comb += perm_attr.rd_perm.eq(1)
 595             comb += perm_attr.wr_perm.eq(1)
 596
 597     def tlb_update(self, m, r0_valid, r0, dtlb_valid_bits, tlb_req_index,
 598                     tlb_hit_way, tlb_hit, tlb_plru_victim, tlb_tag_way,
 599                     dtlb_tags, tlb_pte_way, dtlb_ptes):
 600
 601         comb = m.d.comb
 602
 603         tlbie    = Signal()
 604         tlbwe    = Signal()
 605
 606         comb += tlbie.eq(r0_valid & r0.tlbie)
 607         comb += tlbwe.eq(r0_valid & r0.tlbld)
 608
 609         m.submodules.tlb_update = d = DTLBUpdate(dtlb_valid_bits, dtlb_ptes)
 610         comb += d.tlbie.eq(tlbie)
 611         comb += d.tlbwe.eq(tlbwe)
 612         comb += d.doall.eq(r0.doall)
 613         comb += d.tlb_hit.eq(tlb_hit)
 614         comb += d.tlb_hit_way.eq(tlb_hit_way)
 615         comb += d.tlb_tag_way.eq(tlb_tag_way)
 616         comb += d.tlb_pte_way.eq(tlb_pte_way)
 617         comb += d.tlb_req_index.eq(tlb_req_index)
 618
 619         with m.If(tlb_hit):
 620             comb += d.repl_way.eq(tlb_hit_way)
 621         with m.Else():
 622             comb += d.repl_way.eq(tlb_plru_victim[tlb_req_index])
 623         comb += d.eatag.eq(r0.req.addr[TLB_LG_PGSZ + TLB_SET_BITS:64])
 624         comb += d.pte_data.eq(r0.req.data)
 625
 626     def maybe_plrus(self, m, r1, plru_victim):
 627         """Generate PLRUs
 628         """
 629         comb = m.d.comb
 630         sync = m.d.sync
 631
 632         for i in range(NUM_LINES):
 633             # PLRU interface
 634             plru        = PLRU(TLB_WAY_BITS)
 635             setattr(m.submodules, "plru%d" % i, plru)
 636             plru_acc    = Signal(WAY_BITS)
 637             plru_acc_en = Signal()
 638             plru_out    = Signal(WAY_BITS)
 639
 640             comb += plru.acc.eq(plru_acc)
 641             comb += plru.acc_en.eq(plru_acc_en)
 642             comb += plru_out.eq(plru.lru_o)
 643
 644             with m.If(r1.hit_index == i):
 645                 comb += plru_acc_en.eq(r1.cache_hit)
 646
 647             comb += plru_acc.eq(r1.hit_way)
 648             comb += plru_victim[i].eq(plru_out)
 649
 650     def cache_tag_read(self, m, r0_stall, req_index, cache_tag_set, cache_tags):
 651         """Cache tag RAM read port
 652         """
 653         comb = m.d.comb
 654         sync = m.d.sync
 655         m_in, d_in = self.m_in, self.d_in
 656
 657         index = Signal(INDEX_BITS)
 658
 659         with m.If(r0_stall):
 660             comb += index.eq(req_index)
 661         with m.Elif(m_in.valid):
 662             comb += index.eq(get_index(m_in.addr))
 663         with m.Else():
 664             comb += index.eq(get_index(d_in.addr))
 665         sync += cache_tag_set.eq(cache_tags[index])
 666
 667     def dcache_request(self, m, r0, ra, req_index, req_row, req_tag,
 668                        r0_valid, r1, cache_valid_bits, replace_way,
 669                        use_forward1_next, use_forward2_next,
 670                        req_hit_way, plru_victim, rc_ok, perm_attr,
 671                        valid_ra, perm_ok, access_ok, req_op, req_go,
 672                        tlb_pte_way,
 673                        tlb_hit, tlb_hit_way, tlb_valid_way, cache_tag_set,
 674                        cancel_store, req_same_tag, r0_stall, early_req_row):
 675         """Cache request parsing and hit detection
 676         """
 677
 678         comb = m.d.comb
 679         sync = m.d.sync
 680         m_in, d_in = self.m_in, self.d_in
 681
 682         is_hit      = Signal()
 683         hit_way     = Signal(WAY_BITS)
 684         op          = Signal(Op)
 685         opsel       = Signal(3)
 686         go          = Signal()
 687         nc          = Signal()
 688         hit_set     = Array(Signal() for i in range(TLB_NUM_WAYS))
 689         hit_way_set = HitWaySet()
 690         rel_matches = Array(Signal() for i in range(TLB_NUM_WAYS))
 691         rel_match   = Signal()
 692
 693         # Extract line, row and tag from request
 694         comb += req_index.eq(get_index(r0.req.addr))
 695         comb += req_row.eq(get_row(r0.req.addr))
 696         comb += req_tag.eq(get_tag(ra))
 697
 698         comb += go.eq(r0_valid & ~(r0.tlbie | r0.tlbld) & ~r1.ls_error)
 699
 700         # Test if pending request is a hit on any way
 701         # In order to make timing in virtual mode,
 702         # when we are using the TLB, we compare each
 703         # way with each of the real addresses from each way of
 704         # the TLB, and then decide later which match to use.
 705
 706         with m.If(r0.req.virt_mode):
 707             for j in range(TLB_NUM_WAYS):
 708                 s_tag       = Signal(TAG_BITS)
 709                 s_hit       = Signal()
 710                 s_pte       = Signal(TLB_PTE_BITS)
 711                 s_ra        = Signal(REAL_ADDR_BITS)
 712                 comb += s_pte.eq(read_tlb_pte(j, tlb_pte_way))
 713                 comb += s_ra.eq(Cat(r0.req.addr[0:TLB_LG_PGSZ],
 714                                     s_pte[TLB_LG_PGSZ:REAL_ADDR_BITS]))
 715                 comb += s_tag.eq(get_tag(s_ra))
 716
 717                 for i in range(NUM_WAYS):
 718                     is_tag_hit = Signal()
 719                     comb += is_tag_hit.eq(go & cache_valid_bits[req_index][i] &
 720                                   (read_tag(i, cache_tag_set) == s_tag)
 721                                   & tlb_valid_way[j])
 722                     with m.If(is_tag_hit):
 723                         comb += hit_way_set[j].eq(i)
 724                         comb += s_hit.eq(1)
 725                 comb += hit_set[j].eq(s_hit)
 726                 with m.If(s_tag == r1.reload_tag):
 727                     comb += rel_matches[j].eq(1)
 728             with m.If(tlb_hit):
 729                 comb += is_hit.eq(hit_set[tlb_hit_way])
 730                 comb += hit_way.eq(hit_way_set[tlb_hit_way])
 731                 comb += rel_match.eq(rel_matches[tlb_hit_way])
 732         with m.Else():
 733             s_tag       = Signal(TAG_BITS)
 734             comb += s_tag.eq(get_tag(r0.req.addr))
 735             for i in range(NUM_WAYS):
 736                 is_tag_hit = Signal()
 737                 comb += is_tag_hit.eq(go & cache_valid_bits[req_index][i] &
 738                           read_tag(i, cache_tag_set) == s_tag)
 739                 with m.If(is_tag_hit):
 740                     comb += hit_way.eq(i)
 741                     comb += is_hit.eq(1)
 742             with m.If(s_tag == r1.reload_tag):
 743                 comb += rel_match.eq(1)
 744
 745         comb += req_same_tag.eq(rel_match)
 746
 747         # See if the request matches the line currently being reloaded
 748         with m.If((r1.state == State.RELOAD_WAIT_ACK) &
 749                   (req_index == r1.store_index) & rel_match):
 750             # For a store, consider this a hit even if the row isn't
 751             # valid since it will be by the time we perform the store.
 752             # For a load, check the appropriate row valid bit.
 753             valid = r1.rows_valid[req_row % ROW_PER_LINE]
 754             comb += is_hit.eq(~r0.req.load | valid)
 755             comb += hit_way.eq(replace_way)
 756
 757         # Whether to use forwarded data for a load or not
 758         comb += use_forward1_next.eq(0)
 759         with m.If((get_row(r1.req.real_addr) == req_row) &
 760                   (r1.req.hit_way == hit_way)):
 761             # Only need to consider r1.write_bram here, since if we
 762             # are writing refill data here, then we don't have a
 763             # cache hit this cycle on the line being refilled.
 764             # (There is the possibility that the load following the
 765             # load miss that started the refill could be to the old
 766             # contents of the victim line, since it is a couple of
 767             # cycles after the refill starts before we see the updated
 768             # cache tag. In that case we don't use the bypass.)
 769             comb += use_forward1_next.eq(r1.write_bram)
 770         comb += use_forward2_next.eq(0)
 771         with m.If((r1.forward_row1 == req_row) & (r1.forward_way1 == hit_way)):
 772             comb += use_forward2_next.eq(r1.forward_valid1)
 773
 774         # The way that matched on a hit
 775         comb += req_hit_way.eq(hit_way)
 776
 777         # The way to replace on a miss
 778         with m.If(r1.write_tag):
 779             replace_way.eq(plru_victim[r1.store_index])
 780         with m.Else():
 781             comb += replace_way.eq(r1.store_way)
 782
 783         # work out whether we have permission for this access
 784         # NB we don't yet implement AMR, thus no KUAP
 785         comb += rc_ok.eq(perm_attr.reference
 786                          & (r0.req.load | perm_attr.changed)
 787                 )
 788         comb += perm_ok.eq((r0.req.priv_mode | ~perm_attr.priv)
 789                            & perm_attr.wr_perm
 790                            | (r0.req.load & perm_attr.rd_perm)
 791                           )
 792         comb += access_ok.eq(valid_ra & perm_ok & rc_ok)
 793         # Combine the request and cache hit status to decide what
 794         # operation needs to be done
 795         comb += nc.eq(r0.req.nc | perm_attr.nocache)
 796         comb += op.eq(Op.OP_NONE)
 797         with m.If(go):
 798             with m.If(~access_ok):
 799                 comb += op.eq(Op.OP_BAD)
 800             with m.Elif(cancel_store):
 801                 comb += op.eq(Op.OP_STCX_FAIL)
 802             with m.Else():
 803                 comb += opsel.eq(Cat(is_hit, nc, r0.req.load))
 804                 with m.Switch(opsel):
 805                     with m.Case(0b101):
 806                         comb += op.eq(Op.OP_LOAD_HIT)
 807                     with m.Case(0b100):
 808                         comb += op.eq(Op.OP_LOAD_MISS)
 809                     with m.Case(0b110):
 810                         comb += op.eq(Op.OP_LOAD_NC)
 811                     with m.Case(0b001):
 812                         comb += op.eq(Op.OP_STORE_HIT)
 813                     with m.Case(0b000):
 814                         comb += op.eq(Op.OP_STORE_MISS)
 815                     with m.Case(0b010):
 816                         comb += op.eq(Op.OP_STORE_MISS)
 817                     with m.Case(0b011):
 818                         comb += op.eq(Op.OP_BAD)
 819                     with m.Case(0b111):
 820                         comb += op.eq(Op.OP_BAD)
 821                     with m.Default():
 822                         comb += op.eq(Op.OP_NONE)
 823         comb += req_op.eq(op)
 824         comb += req_go.eq(go)
 825
 826         # Version of the row number that is valid one cycle earlier
 827         # in the cases where we need to read the cache data BRAM.
 828         # If we're stalling then we need to keep reading the last
 829         # row requested.
 830         with m.If(~r0_stall):
 831             with m.If(m_in.valid):
 832                 comb += early_req_row.eq(get_row(m_in.addr))
 833             with m.Else():
 834                 comb += early_req_row.eq(get_row(d_in.addr))
 835         with m.Else():
 836             comb += early_req_row.eq(req_row)
 837
 838     def reservation_comb(self, m, cancel_store, set_rsrv, clear_rsrv,
 839                          r0_valid, r0, reservation):
 840         """Handle load-with-reservation and store-conditional instructions
 841         """
 842         comb = m.d.comb
 843         sync = m.d.sync
 844
 845         with m.If(r0_valid & r0.req.reserve):
 846
 847             # XXX generate alignment interrupt if address
 848             # is not aligned XXX or if r0.req.nc = '1'
 849             with m.If(r0.req.load):
 850                 comb += set_rsrv.eq(1) # load with reservation
 851             with m.Else():
 852                 comb += clear_rsrv.eq(1) # store conditional
 853                 with m.If(~reservation.valid | r0.req.addr[LINE_OFF_BITS:64]):
 854                     comb += cancel_store.eq(1)
 855
 856     def reservation_reg(self, m, r0_valid, access_ok, set_rsrv, clear_rsrv,
 857                         reservation, r0):
 858
 859         comb = m.d.comb
 860         sync = m.d.sync
 861
 862         with m.If(r0_valid & access_ok):
 863             with m.If(clear_rsrv):
 864                 sync += reservation.valid.eq(0)
 865             with m.Elif(set_rsrv):
 866                 sync += reservation.valid.eq(1)
 867                 sync += reservation.addr.eq(r0.req.addr[LINE_OFF_BITS:64])
 868
 869     def writeback_control(self, m, r1, cache_out):
 870         """Return data for loads & completion control logic
 871         """
 872         comb = m.d.comb
 873         sync = m.d.sync
 874         d_out, m_out = self.d_out, self.m_out
 875
 876         data_out = Signal(64)
 877         data_fwd = Signal(64)
 878
 879         # Use the bypass if are reading the row that was
 880         # written 1 or 2 cycles ago, including for the
 881         # slow_valid = 1 case (i.e. completing a load
 882         # miss or a non-cacheable load).
 883         with m.If(r1.use_forward1):
 884             comb += data_fwd.eq(r1.forward_data1)
 885         with m.Else():
 886             comb += data_fwd.eq(r1.forward_data2)
 887
 888         comb += data_out.eq(cache_out[r1.hit_way])
 889
 890         for i in range(8):
 891             with m.If(r1.forward_sel[i]):
 892                 dsel = data_fwd.word_select(i, 8)
 893                 comb += data_out.word_select(i, 8).eq(dsel)
 894
 895         comb += d_out.valid.eq(r1.ls_valid)
 896         comb += d_out.data.eq(data_out)
 897         comb += d_out.store_done.eq(~r1.stcx_fail)
 898         comb += d_out.error.eq(r1.ls_error)
 899         comb += d_out.cache_paradox.eq(r1.cache_paradox)
 900
 901         # Outputs to MMU
 902         comb += m_out.done.eq(r1.mmu_done)
 903         comb += m_out.err.eq(r1.mmu_error)
 904         comb += m_out.data.eq(data_out)
 905
 906         # We have a valid load or store hit or we just completed
 907         # a slow op such as a load miss, a NC load or a store
 908         #
 909         # Note: the load hit is delayed by one cycle. However it
 910         # can still not collide with r.slow_valid (well unless I
 911         # miscalculated) because slow_valid can only be set on a
 912         # subsequent request and not on its first cycle (the state
 913         # machine must have advanced), which makes slow_valid
 914         # at least 2 cycles from the previous hit_load_valid.
 915
 916         # Sanity: Only one of these must be set in any given cycle
 917
 918         if False: # TODO: need Display to get this to work
 919             assert (r1.slow_valid & r1.stcx_fail) != 1, \
 920             "unexpected slow_valid collision with stcx_fail"
 921
 922             assert ((r1.slow_valid | r1.stcx_fail) | r1.hit_load_valid) != 1, \
 923              "unexpected hit_load_delayed collision with slow_valid"
 924
 925         with m.If(~r1.mmu_req):
 926             # Request came from loadstore1...
 927             # Load hit case is the standard path
 928             with m.If(r1.hit_load_valid):
 929                 #Display(f"completing load hit data={data_out}")
 930                 pass
 931
 932             # error cases complete without stalling
 933             with m.If(r1.ls_error):
 934                 # Display("completing ld/st with error")
 935                 pass
 936
 937             # Slow ops (load miss, NC, stores)
 938             with m.If(r1.slow_valid):
 939                 #Display(f"completing store or load miss data={data_out}")
 940                 pass
 941
 942         with m.Else():
 943             # Request came from MMU
 944             with m.If(r1.hit_load_valid):
 945                 # Display(f"completing load hit to MMU, data={m_out.data}")
 946                 pass
 947             # error cases complete without stalling
 948             with m.If(r1.mmu_error):
 949                 #Display("combpleting MMU ld with error")
 950                 pass
 951
 952             # Slow ops (i.e. load miss)
 953             with m.If(r1.slow_valid):
 954                 #Display("completing MMU load miss, data={m_out.data}")
 955                 pass
 956
 957     def rams(self, m, r1, early_req_row, cache_out, replace_way):
 958         """rams
 959         Generate a cache RAM for each way. This handles the normal
 960         reads, writes from reloads and the special store-hit update
 961         path as well.
 962
 963         Note: the BRAMs have an extra read buffer, meaning the output
 964         is pipelined an extra cycle. This differs from the
 965         icache. The writeback logic needs to take that into
 966         account by using 1-cycle delayed signals for load hits.
 967         """
 968         comb = m.d.comb
 969         wb_in = self.wb_in
 970
 971         for i in range(NUM_WAYS):
 972             do_read  = Signal()
 973             rd_addr  = Signal(ROW_BITS)
 974             do_write = Signal()
 975             wr_addr  = Signal(ROW_BITS)
 976             wr_data  = Signal(WB_DATA_BITS)
 977             wr_sel   = Signal(ROW_SIZE)
 978             wr_sel_m = Signal(ROW_SIZE)
 979             _d_out   = Signal(WB_DATA_BITS)
 980
 981             way = CacheRam(ROW_BITS, WB_DATA_BITS, True)
 982             setattr(m.submodules, "cacheram_%d" % i, way)
 983
 984             comb += way.rd_en.eq(do_read)
 985             comb += way.rd_addr.eq(rd_addr)
 986             comb += _d_out.eq(way.rd_data_o)
 987             comb += way.wr_sel.eq(wr_sel_m)
 988             comb += way.wr_addr.eq(wr_addr)
 989             comb += way.wr_data.eq(wr_data)
 990
 991             # Cache hit reads
 992             comb += do_read.eq(1)
 993             comb += rd_addr.eq(early_req_row)
 994             comb += cache_out[i].eq(_d_out)
 995
 996             # Write mux:
 997             #
 998             # Defaults to wishbone read responses (cache refill)
 999             #
1000             # For timing, the mux on wr_data/sel/addr is not
1001             # dependent on anything other than the current state.
1002
1003             with m.If(r1.write_bram):
1004                 # Write store data to BRAM.  This happens one
1005                 # cycle after the store is in r0.
1006                 comb += wr_data.eq(r1.req.data)
1007                 comb += wr_sel.eq(r1.req.byte_sel)
1008                 comb += wr_addr.eq(get_row(r1.req.real_addr))
1009
1010                 with m.If(i == r1.req.hit_way):
1011                     comb += do_write.eq(1)
1012             with m.Else():
1013                 # Otherwise, we might be doing a reload or a DCBZ
1014                 with m.If(r1.dcbz):
1015                     comb += wr_data.eq(0)
1016                 with m.Else():
1017                     comb += wr_data.eq(wb_in.dat)
1018                 comb += wr_addr.eq(r1.store_row)
1019                 comb += wr_sel.eq(~0) # all 1s
1020
1021             with m.If((r1.state == State.RELOAD_WAIT_ACK)
1022                       & wb_in.ack & (replace_way == i)):
1023                 comb += do_write.eq(1)
1024
1025                 # Mask write selects with do_write since BRAM
1026                 # doesn't have a global write-enable
1027                 with m.If(do_write):
1028                     comb += wr_sel_m.eq(wr_sel)
1029
1030     # Cache hit synchronous machine for the easy case.
1031     # This handles load hits.
1032     # It also handles error cases (TLB miss, cache paradox)
1033     def dcache_fast_hit(self, m, req_op, r0_valid, r0, r1,
1034                         req_hit_way, req_index, access_ok,
1035                         tlb_hit, tlb_hit_way, tlb_req_index):
1036
1037         comb = m.d.comb
1038         sync = m.d.sync
1039
1040         with m.If(req_op != Op.OP_NONE):
1041             #Display(f"op:{req_op} addr:{r0.req.addr} nc: {r0.req.nc}" \
1042             #      f"idx:{req_index} tag:{req_tag} way: {req_hit_way}"
1043             #     )
1044             pass
1045
1046         with m.If(r0_valid):
1047             sync += r1.mmu_req.eq(r0.mmu_req)
1048
1049         # Fast path for load/store hits.
1050         # Set signals for the writeback controls.
1051         sync += r1.hit_way.eq(req_hit_way)
1052         sync += r1.hit_index.eq(req_index)
1053
1054         with m.If(req_op == Op.OP_LOAD_HIT):
1055             sync += r1.hit_load_valid.eq(1)
1056         with m.Else():
1057             sync += r1.hit_load_valid.eq(0)
1058
1059         with m.If((req_op == Op.OP_LOAD_HIT) | (req_op == Op.OP_STORE_HIT)):
1060             sync += r1.cache_hit.eq(1)
1061         with m.Else():
1062             sync += r1.cache_hit.eq(0)
1063
1064         with m.If(req_op == Op.OP_BAD):
1065             # Display(f"Signalling ld/st error valid_ra={valid_ra}"
1066             #      f"rc_ok={rc_ok} perm_ok={perm_ok}"
1067             sync += r1.ls_error.eq(~r0.mmu_req)
1068             sync += r1.mmu_error.eq(r0.mmu_req)
1069             sync += r1.cache_paradox.eq(access_ok)
1070
1071             with m.Else():
1072                 sync += r1.ls_error.eq(0)
1073                 sync += r1.mmu_error.eq(0)
1074                 sync += r1.cache_paradox.eq(0)
1075
1076         with m.If(req_op == Op.OP_STCX_FAIL):
1077             r1.stcx_fail.eq(1)
1078         with m.Else():
1079             sync += r1.stcx_fail.eq(0)
1080
1081         # Record TLB hit information for updating TLB PLRU
1082         sync += r1.tlb_hit.eq(tlb_hit)
1083         sync += r1.tlb_hit_way.eq(tlb_hit_way)
1084         sync += r1.tlb_hit_index.eq(tlb_req_index)
1085
1086     # Memory accesses are handled by this state machine:
1087     #
1088     #   * Cache load miss/reload (in conjunction with "rams")
1089     #   * Load hits for non-cachable forms
1090     #   * Stores (the collision case is handled in "rams")
1091     #
1092     # All wishbone requests generation is done here.
1093     # This machine operates at stage 1.
1094     def dcache_slow(self, m, r1, use_forward1_next, use_forward2_next,
1095                     cache_valid_bits, r0, replace_way,
1096                     req_hit_way, req_same_tag,
1097                     r0_valid, req_op, cache_tag, req_go, ra):
1098
1099         comb = m.d.comb
1100         sync = m.d.sync
1101         wb_in = self.wb_in
1102
1103         req         = MemAccessRequest()
1104         acks        = Signal(3)
1105         adjust_acks = Signal(3)
1106         stbs_done = Signal()
1107
1108         sync += r1.use_forward1.eq(use_forward1_next)
1109         sync += r1.forward_sel.eq(0)
1110
1111         with m.If(use_forward1_next):
1112             sync += r1.forward_sel.eq(r1.req.byte_sel)
1113         with m.Elif(use_forward2_next):
1114             sync += r1.forward_sel.eq(r1.forward_sel1)
1115
1116         sync += r1.forward_data2.eq(r1.forward_data1)
1117         with m.If(r1.write_bram):
1118             sync += r1.forward_data1.eq(r1.req.data)
1119             sync += r1.forward_sel1.eq(r1.req.byte_sel)
1120             sync += r1.forward_way1.eq(r1.req.hit_way)
1121             sync += r1.forward_row1.eq(get_row(r1.req.real_addr))
1122             sync += r1.forward_valid1.eq(1)
1123         with m.Else():
1124             with m.If(r1.dcbz):
1125                 sync += r1.forward_data1.eq(0)
1126             with m.Else():
1127                 sync += r1.forward_data1.eq(wb_in.dat)
1128             sync += r1.forward_sel1.eq(~0) # all 1s
1129             sync += r1.forward_way1.eq(replace_way)
1130             sync += r1.forward_row1.eq(r1.store_row)
1131             sync += r1.forward_valid1.eq(0)
1132
1133         # One cycle pulses reset
1134         sync += r1.slow_valid.eq(0)
1135         sync += r1.write_bram.eq(0)
1136         sync += r1.inc_acks.eq(0)
1137         sync += r1.dec_acks.eq(0)
1138
1139         sync += r1.ls_valid.eq(0)
1140         # complete tlbies and TLB loads in the third cycle
1141         sync += r1.mmu_done.eq(r0_valid & (r0.tlbie | r0.tlbld))
1142
1143         with m.If((req_op == Op.OP_LOAD_HIT)
1144                   | (req_op == Op.OP_STCX_FAIL)):
1145             with m.If(~r0.mmu_req):
1146                 sync += r1.ls_valid.eq(1)
1147             with m.Else():
1148                 sync += r1.mmu_done.eq(1)
1149
1150         with m.If(r1.write_tag):
1151             # Store new tag in selected way
1152             for i in range(NUM_WAYS):
1153                 with m.If(i == replace_way):
1154                     ct = Signal(TAG_RAM_WIDTH)
1155                     comb += ct.eq(cache_tag[r1.store_index])
1156                     comb += ct.word_select(i, TAG_WIDTH).eq(r1.reload_tag)
1157                     sync += cache_tag[r1.store_index].eq(ct)
1158             sync += r1.store_way.eq(replace_way)
1159             sync += r1.write_tag.eq(0)
1160
1161         # Take request from r1.req if there is one there,
1162         # else from req_op, ra, etc.
1163         with m.If(r1.full):
1164             comb += req.eq(r1.req)
1165         with m.Else():
1166             comb += req.op.eq(req_op)
1167             comb += req.valid.eq(req_go)
1168             comb += req.mmu_req.eq(r0.mmu_req)
1169             comb += req.dcbz.eq(r0.req.dcbz)
1170             comb += req.real_addr.eq(ra)
1171
1172             with m.If(~r0.req.dcbz):
1173                 comb += req.data.eq(r0.req.data)
1174             with m.Else():
1175                 comb += req.data.eq(0)
1176
1177             # Select all bytes for dcbz
1178             # and for cacheable loads
1179             with m.If(r0.req.dcbz | (r0.req.load & ~r0.req.nc)):
1180                 comb += req.byte_sel.eq(~0) # all 1s
1181             with m.Else():
1182                 comb += req.byte_sel.eq(r0.req.byte_sel)
1183             comb += req.hit_way.eq(req_hit_way)
1184             comb += req.same_tag.eq(req_same_tag)
1185
1186             # Store the incoming request from r0,
1187             # if it is a slow request
1188             # Note that r1.full = 1 implies req_op = OP_NONE
1189             with m.If((req_op == Op.OP_LOAD_MISS)
1190                       | (req_op == Op.OP_LOAD_NC)
1191                       | (req_op == Op.OP_STORE_MISS)
1192                       | (req_op == Op.OP_STORE_HIT)):
1193                 sync += r1.req.eq(req)
1194                 sync += r1.full.eq(1)
1195
1196         # Main state machine
1197         with m.Switch(r1.state):
1198
1199             with m.Case(State.IDLE):
1200 # XXX check 'left downto.  probably means len(r1.wb.adr)
1201 #                     r1.wb.adr <= req.real_addr(
1202 #                                   r1.wb.adr'left downto 0
1203 #                                  );
1204                 sync += r1.wb.adr.eq(req.real_addr)
1205                 sync += r1.wb.sel.eq(req.byte_sel)
1206                 sync += r1.wb.dat.eq(req.data)
1207                 sync += r1.dcbz.eq(req.dcbz)
1208
1209                 # Keep track of our index and way
1210                 # for subsequent stores.
1211                 sync += r1.store_index.eq(get_index(req.real_addr))
1212                 sync += r1.store_row.eq(get_row(req.real_addr))
1213                 sync += r1.end_row_ix.eq(
1214                          get_row_of_line(get_row(req.real_addr))
1215                         )
1216                 sync += r1.reload_tag.eq(get_tag(req.real_addr))
1217                 sync += r1.req.same_tag.eq(1)
1218
1219                 with m.If(req.op == Op.OP_STORE_HIT):
1220                     sync += r1.store_way.eq(req.hit_way)
1221
1222                 # Reset per-row valid bits,
1223                 # ready for handling OP_LOAD_MISS
1224                 for i in range(ROW_PER_LINE):
1225                     sync += r1.rows_valid[i].eq(0)
1226
1227                 with m.Switch(req.op):
1228                     with m.Case(Op.OP_LOAD_HIT):
1229                         # stay in IDLE state
1230                         pass
1231
1232                     with m.Case(Op.OP_LOAD_MISS):
1233                         #Display(f"cache miss real addr:" \
1234                         #      f"{req_real_addr}" \
1235                         #      f" idx:{get_index(req_real_addr)}" \
1236                         #      f" tag:{get_tag(req.real_addr)}")
1237                         pass
1238
1239                         # Start the wishbone cycle
1240                         sync += r1.wb.we.eq(0)
1241                         sync += r1.wb.cyc.eq(1)
1242                         sync += r1.wb.stb.eq(1)
1243
1244                         # Track that we had one request sent
1245                         sync += r1.state.eq(State.RELOAD_WAIT_ACK)
1246                         sync += r1.write_tag.eq(1)
1247
1248                     with m.Case(Op.OP_LOAD_NC):
1249                         sync += r1.wb.cyc.eq(1)
1250                         sync += r1.wb.stb.eq(1)
1251                         sync += r1.wb.we.eq(0)
1252                         sync += r1.state.eq(State.NC_LOAD_WAIT_ACK)
1253
1254                     with m.Case(Op.OP_STORE_HIT, Op.OP_STORE_MISS):
1255                         with m.If(~req.dcbz):
1256                             sync += r1.state.eq(State.STORE_WAIT_ACK)
1257                             sync += r1.acks_pending.eq(1)
1258                             sync += r1.full.eq(0)
1259                             sync += r1.slow_valid.eq(1)
1260
1261                             with m.If(~req.mmu_req):
1262                                 sync += r1.ls_valid.eq(1)
1263                             with m.Else():
1264                                 sync += r1.mmu_done.eq(1)
1265
1266                             with m.If(req.op == Op.OP_STORE_HIT):
1267                                 sync += r1.write_bram.eq(1)
1268                         with m.Else():
1269                             sync += r1.state.eq(State.RELOAD_WAIT_ACK)
1270
1271                             with m.If(req.op == Op.OP_STORE_MISS):
1272                                 sync += r1.write_tag.eq(1)
1273
1274                         sync += r1.wb.we.eq(1)
1275                         sync += r1.wb.cyc.eq(1)
1276                         sync += r1.wb.stb.eq(1)
1277
1278                     # OP_NONE and OP_BAD do nothing
1279                     # OP_BAD & OP_STCX_FAIL were
1280                     # handled above already
1281                     with m.Case(Op.OP_NONE):
1282                         pass
1283                     with m.Case(Op.OP_BAD):
1284                         pass
1285                     with m.Case(Op.OP_STCX_FAIL):
1286                         pass
1287
1288             with m.Case(State.RELOAD_WAIT_ACK):
1289                 # Requests are all sent if stb is 0
1290                 comb += stbs_done.eq(~r1.wb.stb)
1291
1292                 with m.If(~wb_in.stall & ~stbs_done):
1293                     # That was the last word?
1294                     # We are done sending.
1295                     # Clear stb and set stbs_done
1296                     # so we can handle an eventual
1297                     # last ack on the same cycle.
1298                     with m.If(is_last_row_addr(
1299                               r1.wb.adr, r1.end_row_ix)):
1300                         sync += r1.wb.stb.eq(0)
1301                         comb += stbs_done.eq(0)
1302
1303                     # Calculate the next row address in the current cache line
1304                     rarange = r1.wb.adr[ROW_OFF_BITS : LINE_OFF_BITS]
1305                     sync += rarange.eq(rarange + 1)
1306
1307                 # Incoming acks processing
1308                 sync += r1.forward_valid1.eq(wb_in.ack)
1309                 with m.If(wb_in.ack):
1310                     # XXX needs an Array bit-accessor here
1311                     sync += r1.rows_valid[r1.store_row % ROW_PER_LINE].eq(1)
1312
1313                     # If this is the data we were looking for,
1314                     # we can complete the request next cycle.
1315                     # Compare the whole address in case the
1316                     # request in r1.req is not the one that
1317                     # started this refill.
1318                     with m.If(r1.full & r1.req.same_tag &
1319                               ((r1.dcbz & r1.req.dcbz) |
1320                                (~r1.dcbz & (r1.req.op == Op.OP_LOAD_MISS))) &
1321                                 (r1.store_row == get_row(r1.req.real_addr))):
1322                         sync += r1.full.eq(0)
1323                         sync += r1.slow_valid.eq(1)
1324                         with m.If(~r1.mmu_req):
1325                             sync += r1.ls_valid.eq(1)
1326                         with m.Else():
1327                             sync += r1.mmu_done.eq(1)
1328                         sync += r1.forward_sel.eq(~0) # all 1s
1329                         sync += r1.use_forward1.eq(1)
1330
1331                     # Check for completion
1332                     with m.If(stbs_done & is_last_row(r1.store_row,
1333                                                       r1.end_row_ix)):
1334                         # Complete wishbone cycle
1335                         sync += r1.wb.cyc.eq(0)
1336
1337                         # Cache line is now valid
1338                         cv = Signal(INDEX_BITS)
1339                         sync += cv.eq(cache_valid_bits[r1.store_index])
1340                         sync += cv.bit_select(r1.store_way, 1).eq(1)
1341                         sync += r1.state.eq(State.IDLE)
1342
1343                     # Increment store row counter
1344                     sync += r1.store_row.eq(next_row(r1.store_row))
1345
1346             with m.Case(State.STORE_WAIT_ACK):
1347                 comb += stbs_done.eq(~r1.wb.stb)
1348                 comb += acks.eq(r1.acks_pending)
1349
1350                 with m.If(r1.inc_acks != r1.dec_acks):
1351                     with m.If(r1.inc_acks):
1352                         comb += adjust_acks.eq(acks + 1)
1353                     with m.Else():
1354                         comb += adjust_acks.eq(acks - 1)
1355                 with m.Else():
1356                     comb += adjust_acks.eq(acks)
1357
1358                 sync += r1.acks_pending.eq(adjust_acks)
1359
1360                 # Clear stb when slave accepted request
1361                 with m.If(~wb_in.stall):
1362                     # See if there is another store waiting
1363                     # to be done which is in the same real page.
1364                     with m.If(req.valid):
1365                         ra = req.real_addr[0:SET_SIZE_BITS]
1366                         sync += r1.wb.adr[0:SET_SIZE_BITS].eq(ra)
1367                         sync += r1.wb.dat.eq(req.data)
1368                         sync += r1.wb.sel.eq(req.byte_sel)
1369
1370                     with m.Elif((adjust_acks < 7) & req.same_tag &
1371                                 ((req.op == Op.OP_STORE_MISS)
1372                                  | (req.op == Op.OP_STORE_HIT))):
1373                         sync += r1.wb.stb.eq(1)
1374                         comb += stbs_done.eq(0)
1375
1376                         with m.If(req.op == Op.OP_STORE_HIT):
1377                             sync += r1.write_bram.eq(1)
1378                         sync += r1.full.eq(0)
1379                         sync += r1.slow_valid.eq(1)
1380
1381                         # Store requests never come from the MMU
1382                         sync += r1.ls_valid.eq(1)
1383                         comb += stbs_done.eq(0)
1384                         sync += r1.inc_acks.eq(1)
1385                     with m.Else():
1386                         sync += r1.wb.stb.eq(0)
1387                         comb += stbs_done.eq(1)
1388
1389                 # Got ack ? See if complete.
1390                 with m.If(wb_in.ack):
1391                     with m.If(stbs_done & (adjust_acks == 1)):
1392                         sync += r1.state.eq(State.IDLE)
1393                         sync += r1.wb.cyc.eq(0)
1394                         sync += r1.wb.stb.eq(0)
1395                     sync += r1.dec_acks.eq(1)
1396
1397             with m.Case(State.NC_LOAD_WAIT_ACK):
1398                 # Clear stb when slave accepted request
1399                 with m.If(~wb_in.stall):
1400                     sync += r1.wb.stb.eq(0)
1401
1402                 # Got ack ? complete.
1403                 with m.If(wb_in.ack):
1404                     sync += r1.state.eq(State.IDLE)
1405                     sync += r1.full.eq(0)
1406                     sync += r1.slow_valid.eq(1)
1407
1408                     with m.If(~r1.mmu_req):
1409                         sync += r1.ls_valid.eq(1)
1410                     with m.Else():
1411                         sync += r1.mmu_done.eq(1)
1412
1413                     sync += r1.forward_sel.eq(~0) # all 1s
1414                     sync += r1.use_forward1.eq(1)
1415                     sync += r1.wb.cyc.eq(0)
1416                     sync += r1.wb.stb.eq(0)
1417
1418     def dcache_log(self, m, r1, valid_ra, tlb_hit_way, stall_out):
1419
1420         sync = m.d.sync
1421         d_out, wb_in, log_out = self.d_out, self.wb_in, self.log_out
1422
1423         sync += log_out.eq(Cat(r1.state[:3], valid_ra, tlb_hit_way[:3],
1424                                stall_out, req_op[:3], d_out.valid, d_out.error,
1425                                r1.wb.cyc, r1.wb.stb, wb_in.ack, wb_in.stall,
1426                                r1.wb.adr[3:6]))
1427
1428     def elaborate(self, platform):
1429
1430         m = Module()
1431         comb = m.d.comb
1432
1433         # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
1434         cache_tags       = CacheTagArray()
1435         cache_tag_set    = Signal(TAG_RAM_WIDTH)
1436         cache_valid_bits = CacheValidBitsArray()
1437
1438         # TODO attribute ram_style : string;
1439         # TODO attribute ram_style of cache_tags : signal is "distributed";
1440
1441         """note: these are passed to nmigen.hdl.Memory as "attributes".
1442            don't know how, just that they are.
1443         """
1444         dtlb_valid_bits = TLBValidBitsArray()
1445         dtlb_tags       = TLBTagsArray()
1446         dtlb_ptes       = TLBPtesArray()
1447         # TODO attribute ram_style of
1448         #  dtlb_tags : signal is "distributed";
1449         # TODO attribute ram_style of
1450         #  dtlb_ptes : signal is "distributed";
1451
1452         r0      = RegStage0()
1453         r0_full = Signal()
1454
1455         r1 = RegStage1()
1456
1457         reservation = Reservation()
1458
1459         # Async signals on incoming request
1460         req_index    = Signal(INDEX_BITS)
1461         req_row      = Signal(ROW_BITS)
1462         req_hit_way  = Signal(WAY_BITS)
1463         req_tag      = Signal(TAG_BITS)
1464         req_op       = Signal(Op)
1465         req_data     = Signal(64)
1466         req_same_tag = Signal()
1467         req_go       = Signal()
1468
1469         early_req_row     = Signal(ROW_BITS)
1470
1471         cancel_store      = Signal()
1472         set_rsrv          = Signal()
1473         clear_rsrv        = Signal()
1474
1475         r0_valid          = Signal()
1476         r0_stall          = Signal()
1477
1478         use_forward1_next = Signal()
1479         use_forward2_next = Signal()
1480
1481         cache_out         = CacheRamOut()
1482
1483         plru_victim       = PLRUOut()
1484         replace_way       = Signal(WAY_BITS)
1485
1486         # Wishbone read/write/cache write formatting signals
1487         bus_sel           = Signal(8)
1488
1489         # TLB signals
1490         tlb_tag_way   = Signal(TLB_TAG_WAY_BITS)
1491         tlb_pte_way   = Signal(TLB_PTE_WAY_BITS)
1492         tlb_valid_way = Signal(TLB_NUM_WAYS)
1493         tlb_req_index = Signal(TLB_SET_BITS)
1494         tlb_hit       = Signal()
1495         tlb_hit_way   = Signal(TLB_WAY_BITS)
1496         pte           = Signal(TLB_PTE_BITS)
1497         ra            = Signal(REAL_ADDR_BITS)
1498         valid_ra      = Signal()
1499         perm_attr     = PermAttr()
1500         rc_ok         = Signal()
1501         perm_ok       = Signal()
1502         access_ok     = Signal()
1503
1504         tlb_plru_victim = TLBPLRUOut()
1505
1506         # we don't yet handle collisions between loadstore1 requests
1507         # and MMU requests
1508         comb += self.m_out.stall.eq(0)
1509
1510         # Hold off the request in r0 when r1 has an uncompleted request
1511         comb += r0_stall.eq(r0_full & r1.full)
1512         comb += r0_valid.eq(r0_full & ~r1.full)
1513         comb += self.stall_out.eq(r0_stall)
1514
1515         # Wire up wishbone request latch out of stage 1
1516         comb += self.wb_out.eq(r1.wb)
1517
1518         # call sub-functions putting everything together, using shared
1519         # signals established above
1520         self.stage_0(m, r0, r1, r0_full)
1521         self.tlb_read(m, r0_stall, tlb_valid_way,
1522                       tlb_tag_way, tlb_pte_way, dtlb_valid_bits,
1523                       dtlb_tags, dtlb_ptes)
1524         self.tlb_search(m, tlb_req_index, r0, r0_valid,
1525                         tlb_valid_way, tlb_tag_way, tlb_hit_way,
1526                         tlb_pte_way, pte, tlb_hit, valid_ra, perm_attr, ra)
1527         self.tlb_update(m, r0_valid, r0, dtlb_valid_bits, tlb_req_index,
1528                         tlb_hit_way, tlb_hit, tlb_plru_victim, tlb_tag_way,
1529                         dtlb_tags, tlb_pte_way, dtlb_ptes)
1530         self.maybe_plrus(m, r1, plru_victim)
1531         self.cache_tag_read(m, r0_stall, req_index, cache_tag_set, cache_tags)
1532         self.dcache_request(m, r0, ra, req_index, req_row, req_tag,
1533                            r0_valid, r1, cache_valid_bits, replace_way,
1534                            use_forward1_next, use_forward2_next,
1535                            req_hit_way, plru_victim, rc_ok, perm_attr,
1536                            valid_ra, perm_ok, access_ok, req_op, req_go,
1537                            tlb_pte_way,
1538                            tlb_hit, tlb_hit_way, tlb_valid_way, cache_tag_set,
1539                            cancel_store, req_same_tag, r0_stall, early_req_row)
1540         self.reservation_comb(m, cancel_store, set_rsrv, clear_rsrv,
1541                            r0_valid, r0, reservation)
1542         self.reservation_reg(m, r0_valid, access_ok, set_rsrv, clear_rsrv,
1543                            reservation, r0)
1544         self.writeback_control(m, r1, cache_out)
1545         self.rams(m, r1, early_req_row, cache_out, replace_way)
1546         self.dcache_fast_hit(m, req_op, r0_valid, r0, r1,
1547                         req_hit_way, req_index, access_ok,
1548                         tlb_hit, tlb_hit_way, tlb_req_index)
1549         self.dcache_slow(m, r1, use_forward1_next, use_forward2_next,
1550                     cache_valid_bits, r0, replace_way,
1551                     req_hit_way, req_same_tag,
1552                          r0_valid, req_op, cache_tags, req_go, ra)
1553         #self.dcache_log(m, r1, valid_ra, tlb_hit_way, stall_out)
1554
1555         return m
1556
1557
1558 # dcache_tb.vhdl
1559 #
1560 # entity dcache_tb is
1561 # end dcache_tb;
1562 #
1563 # architecture behave of dcache_tb is
1564 #     signal clk          : std_ulogic;
1565 #     signal rst          : std_ulogic;
1566 #
1567 #     signal d_in         : Loadstore1ToDcacheType;
1568 #     signal d_out        : DcacheToLoadstore1Type;
1569 #
1570 #     signal m_in         : MmuToDcacheType;
1571 #     signal m_out        : DcacheToMmuType;
1572 #
1573 #     signal wb_bram_in   : wishbone_master_out;
1574 #     signal wb_bram_out  : wishbone_slave_out;
1575 #
1576 #     constant clk_period : time := 10 ns;
1577 # begin
1578 #     dcache0: entity work.dcache
1579 #         generic map(
1580 #
1581 #             LINE_SIZE => 64,
1582 #             NUM_LINES => 4
1583 #             )
1584 #         port map(
1585 #             clk => clk,
1586 #             rst => rst,
1587 #             d_in => d_in,
1588 #             d_out => d_out,
1589 #             m_in => m_in,
1590 #             m_out => m_out,
1591 #             wishbone_out => wb_bram_in,
1592 #             wishbone_in => wb_bram_out
1593 #             );
1594 #
1595 #     -- BRAM Memory slave
1596 #     bram0: entity work.wishbone_bram_wrapper
1597 #         generic map(
1598 #             MEMORY_SIZE   => 1024,
1599 #             RAM_INIT_FILE => "icache_test.bin"
1600 #             )
1601 #         port map(
1602 #             clk => clk,
1603 #             rst => rst,
1604 #             wishbone_in => wb_bram_in,
1605 #             wishbone_out => wb_bram_out
1606 #             );
1607 #
1608 #     clk_process: process
1609 #     begin
1610 #         clk <= '0';
1611 #         wait for clk_period/2;
1612 #         clk <= '1';
1613 #         wait for clk_period/2;
1614 #     end process;
1615 #
1616 #     rst_process: process
1617 #     begin
1618 #         rst <= '1';
1619 #         wait for 2*clk_period;
1620 #         rst <= '0';
1621 #         wait;
1622 #     end process;
1623 #
1624 #     stim: process
1625 #     begin
1626 #     -- Clear stuff
1627 #     d_in.valid <= '0';
1628 #     d_in.load <= '0';
1629 #     d_in.nc <= '0';
1630 #     d_in.addr <= (others => '0');
1631 #     d_in.data <= (others => '0');
1632 #         m_in.valid <= '0';
1633 #         m_in.addr <= (others => '0');
1634 #         m_in.pte <= (others => '0');
1635 #
1636 #         wait for 4*clk_period;
1637 #     wait until rising_edge(clk);
1638 #
1639 #     -- Cacheable read of address 4
1640 #     d_in.load <= '1';
1641 #     d_in.nc <= '0';
1642 #         d_in.addr <= x"0000000000000004";
1643 #         d_in.valid <= '1';
1644 #     wait until rising_edge(clk);
1645 #         d_in.valid <= '0';
1646 #
1647 #     wait until rising_edge(clk) and d_out.valid = '1';
1648 #         assert d_out.data = x"0000000100000000"
1649 #         report "data @" & to_hstring(d_in.addr) &
1650 #         "=" & to_hstring(d_out.data) &
1651 #         " expected 0000000100000000"
1652 #         severity failure;
1653 # --      wait for clk_period;
1654 #
1655 #     -- Cacheable read of address 30
1656 #     d_in.load <= '1';
1657 #     d_in.nc <= '0';
1658 #         d_in.addr <= x"0000000000000030";
1659 #         d_in.valid <= '1';
1660 #     wait until rising_edge(clk);
1661 #         d_in.valid <= '0';
1662 #
1663 #     wait until rising_edge(clk) and d_out.valid = '1';
1664 #         assert d_out.data = x"0000000D0000000C"
1665 #         report "data @" & to_hstring(d_in.addr) &
1666 #         "=" & to_hstring(d_out.data) &
1667 #         " expected 0000000D0000000C"
1668 #         severity failure;
1669 #
1670 #     -- Non-cacheable read of address 100
1671 #     d_in.load <= '1';
1672 #     d_in.nc <= '1';
1673 #         d_in.addr <= x"0000000000000100";
1674 #         d_in.valid <= '1';
1675 #     wait until rising_edge(clk);
1676 #     d_in.valid <= '0';
1677 #     wait until rising_edge(clk) and d_out.valid = '1';
1678 #         assert d_out.data = x"0000004100000040"
1679 #         report "data @" & to_hstring(d_in.addr) &
1680 #         "=" & to_hstring(d_out.data) &
1681 #         " expected 0000004100000040"
1682 #         severity failure;
1683 #
1684 #     wait until rising_edge(clk);
1685 #     wait until rising_edge(clk);
1686 #     wait until rising_edge(clk);
1687 #     wait until rising_edge(clk);
1688 #
1689 #     std.env.finish;
1690 #     end process;
1691 # end;
1692 def dcache_sim(dut):
1693     # clear stuff
1694     yield dut.d_in.valid.eq(0)
1695     yield dut.d_in.load.eq(0)
1696     yield dut.d_in.nc.eq(0)
1697     yield dut.d_in.adrr.eq(0)
1698     yield dut.d_in.data.eq(0)
1699     yield dut.m_in.valid.eq(0)
1700     yield dut.m_in.addr.eq(0)
1701     yield dut.m_in.pte.eq(0)
1702     # wait 4 * clk_period
1703     yield
1704     yield
1705     yield
1706     yield
1707     # wait_until rising_edge(clk)
1708     yield
1709     # Cacheable read of address 4
1710     yield dut.d_in.load.eq(1)
1711     yield dut.d_in.nc.eq(0)
1712     yield dut.d_in.addr.eq(Const(0x0000000000000004, 64))
1713     yield dut.d_in.valid.eq(1)
1714     # wait-until rising_edge(clk)
1715     yield
1716     yield dut.d_in.valid.eq(0)
1717     yield
1718     while not (yield dut.d_out.valid):
1719         yield
1720     assert dut.d_out.data == 0x0000000100000000, \
1721         f"data @ {dut.d_in.addr}={dut.d_in.data} expected 0000000100000000"
1722
1723
1724     # Cacheable read of address 30
1725     yield dut.d_in.load.eq(1)
1726     yield dut.d_in.nc.eq(0)
1727     yield dut.d_in.addr.eq(Const(0x0000000000000030, 64))
1728     yield dut.d_in.valid.eq(1)
1729     yield
1730     yield dut.d_in.valid.eq(0)
1731     yield
1732     while not (yield dut.d_out.valid):
1733         yield
1734     assert dut.d_out.data == 0x0000000D0000000C, \
1735         f"data @{dut.d_in.addr}={dut.d_out.data} expected 0000000D0000000C"
1736
1737     # Non-cacheable read of address 100
1738     yield dut.d_in.load.eq(1)
1739     yield dut.d_in.nc.eq(1)
1740     yield dut.d_in.addr.eq(Const(0x0000000000000100, 64))
1741     yield dut.d_in.valid.eq(1)
1742     yield
1743     yield dut.d_in.valid.eq(0)
1744     yield
1745     while not (yield dut.d_out.valid):
1746         yield
1747     assert dut.d_out.data == 0x0000004100000040, \
1748         f"data @ {dut.d_in.addr}={dut.d_out.data} expected 0000004100000040"
1749
1750     yield
1751     yield
1752     yield
1753     yield
1754
1755
1756 def test_dcache():
1757     dut = DCache()
1758     vl = rtlil.convert(dut, ports=[])
1759     with open("test_dcache.il", "w") as f:
1760         f.write(vl)
1761
1762     #run_simulation(dut, dcache_sim(), vcd_name='test_dcache.vcd')
1763
1764 if __name__ == '__main__':
1765     test_dcache()
1766