src/soc/experiment/dcache.py

   1 """DCache
   2
   3 based on Anton Blanchard microwatt dcache.vhdl
   4
   5 """
   6
   7 from enum import Enum, unique
   8
   9 from nmigen import Module, Signal, Elaboratable, Cat, Repl, Array, Const
  10 from nmigen.cli import main
  11 from nmutil.iocontrol import RecordObject
  12 from nmigen.utils import log2_int
  13 from nmigen.cli import rtlil
  14
  15
  16 from soc.experiment.mem_types import (LoadStore1ToDCacheType,
  17                                      DCacheToLoadStore1Type,
  18                                      MMUToDCacheType,
  19                                      DCacheToMMUType)
  20
  21 from soc.experiment.wb_types import (WB_ADDR_BITS, WB_DATA_BITS, WB_SEL_BITS,
  22                                 WBAddrType, WBDataType, WBSelType,
  23                                 WBMasterOut, WBSlaveOut,
  24                                 WBMasterOutVector, WBSlaveOutVector,
  25                                 WBIOMasterOut, WBIOSlaveOut)
  26
  27 from soc.experiment.cache_ram import CacheRam
  28 from soc.experiment.plru import PLRU
  29
  30
  31 # TODO: make these parameters of DCache at some point
  32 LINE_SIZE = 64    # Line size in bytes
  33 NUM_LINES = 32    # Number of lines in a set
  34 NUM_WAYS = 4      # Number of ways
  35 TLB_SET_SIZE = 64 # L1 DTLB entries per set
  36 TLB_NUM_WAYS = 2  # L1 DTLB number of sets
  37 TLB_LG_PGSZ = 12  # L1 DTLB log_2(page_size)
  38 LOG_LENGTH = 0    # Non-zero to enable log data collection
  39
  40 # BRAM organisation: We never access more than
  41 #     -- WB_DATA_BITS at a time so to save
  42 #     -- resources we make the array only that wide, and
  43 #     -- use consecutive indices for to make a cache "line"
  44 #     --
  45 #     -- ROW_SIZE is the width in bytes of the BRAM
  46 #     -- (based on WB, so 64-bits)
  47 ROW_SIZE = WB_DATA_BITS // 8;
  48
  49 # ROW_PER_LINE is the number of row (wishbone
  50 # transactions) in a line
  51 ROW_PER_LINE = LINE_SIZE // ROW_SIZE
  52
  53 # BRAM_ROWS is the number of rows in BRAM needed
  54 # to represent the full dcache
  55 BRAM_ROWS = NUM_LINES * ROW_PER_LINE
  56
  57
  58 # Bit fields counts in the address
  59
  60 # REAL_ADDR_BITS is the number of real address
  61 # bits that we store
  62 REAL_ADDR_BITS = 56
  63
  64 # ROW_BITS is the number of bits to select a row
  65 ROW_BITS = log2_int(BRAM_ROWS)
  66
  67 # ROW_LINE_BITS is the number of bits to select
  68 # a row within a line
  69 ROW_LINE_BITS = log2_int(ROW_PER_LINE)
  70
  71 # LINE_OFF_BITS is the number of bits for
  72 # the offset in a cache line
  73 LINE_OFF_BITS = log2_int(LINE_SIZE)
  74
  75 # ROW_OFF_BITS is the number of bits for
  76 # the offset in a row
  77 ROW_OFF_BITS = log2_int(ROW_SIZE)
  78
  79 # INDEX_BITS is the number if bits to
  80 # select a cache line
  81 INDEX_BITS = log2_int(NUM_LINES)
  82
  83 # SET_SIZE_BITS is the log base 2 of the set size
  84 SET_SIZE_BITS = LINE_OFF_BITS + INDEX_BITS
  85
  86 # TAG_BITS is the number of bits of
  87 # the tag part of the address
  88 TAG_BITS = REAL_ADDR_BITS - SET_SIZE_BITS
  89
  90 # TAG_WIDTH is the width in bits of each way of the tag RAM
  91 TAG_WIDTH = TAG_BITS + 7 - ((TAG_BITS + 7) % 8)
  92
  93 # WAY_BITS is the number of bits to select a way
  94 WAY_BITS = log2_int(NUM_WAYS)
  95
  96 # Example of layout for 32 lines of 64 bytes:
  97 #
  98 # ..  tag    |index|  line  |
  99 # ..         |   row   |    |
 100 # ..         |     |---|    | ROW_LINE_BITS  (3)
 101 # ..         |     |--- - --| LINE_OFF_BITS (6)
 102 # ..         |         |- --| ROW_OFF_BITS  (3)
 103 # ..         |----- ---|    | ROW_BITS      (8)
 104 # ..         |-----|        | INDEX_BITS    (5)
 105 # .. --------|              | TAG_BITS      (45)
 106
 107 TAG_RAM_WIDTH = TAG_WIDTH * NUM_WAYS
 108
 109 def CacheTagArray():
 110     return Array(Signal(TAG_RAM_WIDTH) for x in range(NUM_LINES))
 111
 112 def CacheValidBitsArray():
 113     return Array(Signal(INDEX_BITS) for x in range(NUM_LINES))
 114
 115 def RowPerLineValidArray():
 116     return Array(Signal() for x in range(ROW_PER_LINE))
 117
 118 # L1 TLB
 119 TLB_SET_BITS     = log2_int(TLB_SET_SIZE)
 120 TLB_WAY_BITS     = log2_int(TLB_NUM_WAYS)
 121 TLB_EA_TAG_BITS  = 64 - (TLB_LG_PGSZ + TLB_SET_BITS)
 122 TLB_TAG_WAY_BITS = TLB_NUM_WAYS * TLB_EA_TAG_BITS
 123 TLB_PTE_BITS     = 64
 124 TLB_PTE_WAY_BITS = TLB_NUM_WAYS * TLB_PTE_BITS;
 125
 126 assert (LINE_SIZE % ROW_SIZE) == 0, "LINE_SIZE not multiple of ROW_SIZE"
 127 assert (LINE_SIZE % 2) == 0, "LINE_SIZE not power of 2"
 128 assert (NUM_LINES % 2) == 0, "NUM_LINES not power of 2"
 129 assert (ROW_PER_LINE % 2) == 0, "ROW_PER_LINE not power of 2"
 130 assert ROW_BITS == (INDEX_BITS + ROW_LINE_BITS), "geometry bits don't add up"
 131 assert (LINE_OFF_BITS == ROW_OFF_BITS + ROW_LINE_BITS), \
 132         "geometry bits don't add up"
 133 assert REAL_ADDR_BITS == (TAG_BITS + INDEX_BITS + LINE_OFF_BITS), \
 134         "geometry bits don't add up"
 135 assert REAL_ADDR_BITS == (TAG_BITS + ROW_BITS + ROW_OFF_BITS), \
 136          "geometry bits don't add up"
 137 assert 64 == WB_DATA_BITS, "Can't yet handle wb width that isn't 64-bits"
 138 assert SET_SIZE_BITS <= TLB_LG_PGSZ, "Set indexed by virtual address"
 139
 140
 141 def TLBValidBitsArray():
 142     return Array(Signal(TLB_NUM_WAYS) for x in range(TLB_SET_SIZE))
 143
 144 def TLBTagEAArray():
 145     return Array(Signal(TLB_EA_TAG_BITS) for x in range (TLB_NUM_WAYS))
 146
 147 def TLBTagsArray():
 148     return Array(Signal(TLB_TAG_WAY_BITS) for x in range (TLB_SET_SIZE))
 149
 150 def TLBPtesArray():
 151     return Array(Signal(TLB_PTE_WAY_BITS) for x in range(TLB_SET_SIZE))
 152
 153 def HitWaySet():
 154     return Array(Signal(NUM_WAYS) for x in range(TLB_NUM_WAYS))
 155
 156 # Cache RAM interface
 157 def CacheRamOut():
 158     return Array(Signal(WB_DATA_BITS) for x in range(NUM_WAYS))
 159
 160 # PLRU output interface
 161 def PLRUOut():
 162     return Array(Signal(WAY_BITS) for x in range(NUM_LINES))
 163
 164 # TLB PLRU output interface
 165 def TLBPLRUOut():
 166     return Array(Signal(TLB_WAY_BITS) for x in range(TLB_SET_SIZE))
 167
 168 # Helper functions to decode incoming requests
 169 #
 170 # Return the cache line index (tag index) for an address
 171 def get_index(addr):
 172     return addr[LINE_OFF_BITS:SET_SIZE_BITS]
 173
 174 # Return the cache row index (data memory) for an address
 175 def get_row(addr):
 176     return addr[ROW_OFF_BITS:SET_SIZE_BITS]
 177
 178 # Return the index of a row within a line
 179 def get_row_of_line(row):
 180     return row[:ROW_LINE_BITS]
 181
 182 # Returns whether this is the last row of a line
 183 def is_last_row_addr(addr, last):
 184     return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
 185
 186 # Returns whether this is the last row of a line
 187 def is_last_row(row, last):
 188     return get_row_of_line(row) == last
 189
 190 # Return the next row in the current cache line. We use a
 191 # dedicated function in order to limit the size of the
 192 # generated adder to be only the bits within a cache line
 193 # (3 bits with default settings)
 194 def next_row(row):
 195     row_v = row[0:ROW_LINE_BITS] + 1
 196     return Cat(row_v[:ROW_LINE_BITS], row[ROW_LINE_BITS:])
 197
 198 # Get the tag value from the address
 199 def get_tag(addr):
 200     return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
 201
 202 # Read a tag from a tag memory row
 203 def read_tag(way, tagset):
 204     return tagset.word_select(way, TAG_WIDTH)[:TAG_BITS]
 205
 206 # Read a TLB tag from a TLB tag memory row
 207 def read_tlb_tag(way, tags):
 208     return tags.word_select(way, TLB_EA_TAG_BITS)
 209
 210 # Write a TLB tag to a TLB tag memory row
 211 def write_tlb_tag(way, tags, tag):
 212     return read_tlb_tag(way, tags).eq(tag)
 213
 214 # Read a PTE from a TLB PTE memory row
 215 def read_tlb_pte(way, ptes):
 216     return ptes.word_select(way, TLB_PTE_BITS)
 217
 218 def write_tlb_pte(way, ptes, newpte):
 219     return read_tlb_pte(way, ptes).eq(newpte)
 220
 221
 222 # Record for storing permission, attribute, etc. bits from a PTE
 223 class PermAttr(RecordObject):
 224     def __init__(self):
 225         super().__init__()
 226         self.reference = Signal()
 227         self.changed   = Signal()
 228         self.nocache   = Signal()
 229         self.priv      = Signal()
 230         self.rd_perm   = Signal()
 231         self.wr_perm   = Signal()
 232
 233
 234 def extract_perm_attr(pte):
 235     pa = PermAttr()
 236     pa.reference = pte[8]
 237     pa.changed   = pte[7]
 238     pa.nocache   = pte[5]
 239     pa.priv      = pte[3]
 240     pa.rd_perm   = pte[2]
 241     pa.wr_perm   = pte[1]
 242     return pa;
 243
 244
 245 # Type of operation on a "valid" input
 246 @unique
 247 class Op(Enum):
 248     OP_NONE       = 0
 249     OP_BAD        = 1 # NC cache hit, TLB miss, prot/RC failure
 250     OP_STCX_FAIL  = 2 # conditional store w/o reservation
 251     OP_LOAD_HIT   = 3 # Cache hit on load
 252     OP_LOAD_MISS  = 4 # Load missing cache
 253     OP_LOAD_NC    = 5 # Non-cachable load
 254     OP_STORE_HIT  = 6 # Store hitting cache
 255     OP_STORE_MISS = 7 # Store missing cache
 256
 257
 258 # Cache state machine
 259 @unique
 260 class State(Enum):
 261     IDLE             = 0 # Normal load hit processing
 262     RELOAD_WAIT_ACK  = 1 # Cache reload wait ack
 263     STORE_WAIT_ACK   = 2 # Store wait ack
 264     NC_LOAD_WAIT_ACK = 3 # Non-cachable load wait ack
 265
 266
 267 # Dcache operations:
 268 #
 269 # In order to make timing, we use the BRAMs with
 270 # an output buffer, which means that the BRAM
 271 # output is delayed by an extra cycle.
 272 #
 273 # Thus, the dcache has a 2-stage internal pipeline
 274 # for cache hits with no stalls.
 275 #
 276 # All other operations are handled via stalling
 277 # in the first stage.
 278 #
 279 # The second stage can thus complete a hit at the same
 280 # time as the first stage emits a stall for a complex op.
 281 #
 282 # Stage 0 register, basically contains just the latched request
 283
 284 class RegStage0(RecordObject):
 285     def __init__(self):
 286         super().__init__()
 287         self.req     = LoadStore1ToDCacheType()
 288         self.tlbie   = Signal()
 289         self.doall   = Signal()
 290         self.tlbld   = Signal()
 291         self.mmu_req = Signal() # indicates source of request
 292
 293
 294 class MemAccessRequest(RecordObject):
 295     def __init__(self):
 296         super().__init__()
 297         self.op        = Signal(Op)
 298         self.valid     = Signal()
 299         self.dcbz      = Signal()
 300         self.real_addr = Signal(REAL_ADDR_BITS)
 301         self.data      = Signal(64)
 302         self.byte_sel  = Signal(8)
 303         self.hit_way   = Signal(WAY_BITS)
 304         self.same_tag  = Signal()
 305         self.mmu_req   = Signal()
 306
 307
 308 # First stage register, contains state for stage 1 of load hits
 309 # and for the state machine used by all other operations
 310 class RegStage1(RecordObject):
 311     def __init__(self):
 312         super().__init__()
 313         # Info about the request
 314         self.full             = Signal() # have uncompleted request
 315         self.mmu_req          = Signal() # request is from MMU
 316         self.req              = MemAccessRequest()
 317
 318         # Cache hit state
 319         self.hit_way          = Signal(WAY_BITS)
 320         self.hit_load_valid   = Signal()
 321         self.hit_index        = Signal(NUM_LINES)
 322         self.cache_hit        = Signal()
 323
 324         # TLB hit state
 325         self.tlb_hit          = Signal()
 326         self.tlb_hit_way      = Signal(TLB_NUM_WAYS)
 327         self.tlb_hit_index    = Signal(TLB_WAY_BITS)
 328
 329         # 2-stage data buffer for data forwarded from writes to reads
 330         self.forward_data1    = Signal(64)
 331         self.forward_data2    = Signal(64)
 332         self.forward_sel1     = Signal(8)
 333         self.forward_valid1   = Signal()
 334         self.forward_way1     = Signal(WAY_BITS)
 335         self.forward_row1     = Signal(ROW_BITS)
 336         self.use_forward1     = Signal()
 337         self.forward_sel      = Signal(8)
 338
 339         # Cache miss state (reload state machine)
 340         self.state            = Signal(State)
 341         self.dcbz             = Signal()
 342         self.write_bram       = Signal()
 343         self.write_tag        = Signal()
 344         self.slow_valid       = Signal()
 345         self.wb               = WBMasterOut()
 346         self.reload_tag       = Signal(TAG_BITS)
 347         self.store_way        = Signal(WAY_BITS)
 348         self.store_row        = Signal(ROW_BITS)
 349         self.store_index      = Signal(INDEX_BITS)
 350         self.end_row_ix       = Signal(log2_int(ROW_LINE_BITS, False))
 351         self.rows_valid       = RowPerLineValidArray()
 352         self.acks_pending     = Signal(3)
 353         self.inc_acks         = Signal()
 354         self.dec_acks         = Signal()
 355
 356         # Signals to complete (possibly with error)
 357         self.ls_valid         = Signal()
 358         self.ls_error         = Signal()
 359         self.mmu_done         = Signal()
 360         self.mmu_error        = Signal()
 361         self.cache_paradox    = Signal()
 362
 363         # Signal to complete a failed stcx.
 364         self.stcx_fail        = Signal()
 365
 366
 367 # Reservation information
 368 class Reservation(RecordObject):
 369     def __init__(self):
 370         super().__init__()
 371         self.valid = Signal()
 372         self.addr  = Signal(64-LINE_OFF_BITS)
 373
 374
 375 class DTLBUpdate(Elaboratable):
 376     def __init__(self):
 377         self.tlbie    = Signal()
 378         self.tlbwe    = Signal()
 379         self.doall    = Signal()
 380         self.updated  = Signal()
 381         self.v_updated  = Signal()
 382         self.tlb_hit    = Signal()
 383         self.tlb_req_index = Signal(TLB_SET_BITS)
 384
 385         self.tlb_hit_way     = Signal(TLB_WAY_BITS)
 386         self.tlb_tag_way     = Signal(TLB_TAG_WAY_BITS)
 387         self.tlb_pte_way     = Signal(TLB_PTE_WAY_BITS)
 388         self.repl_way        = Signal(TLB_WAY_BITS)
 389         self.eatag           = Signal(TLB_EA_TAG_BITS)
 390         self.pte_data        = Signal(TLB_PTE_BITS)
 391
 392         self.dv = Signal(TLB_PTE_WAY_BITS)
 393
 394         self.tb_out = Signal(TLB_TAG_WAY_BITS)
 395         self.pb_out = Signal(TLB_NUM_WAYS)
 396         self.db_out = Signal(TLB_PTE_WAY_BITS)
 397
 398     def elaborate(self, platform):
 399         m = Module()
 400         comb = m.d.comb
 401         sync = m.d.sync
 402
 403         tagset   = Signal(TLB_TAG_WAY_BITS)
 404         pteset   = Signal(TLB_PTE_WAY_BITS)
 405
 406         tb_out, pb_out, db_out = self.tb_out, self.pb_out, self.db_out
 407
 408         with m.If(self.tlbie & self.doall):
 409             pass # clear all back in parent
 410         with m.Elif(self.tlbie):
 411             with m.If(self.tlb_hit):
 412                 comb += db_out.eq(self.dv)
 413                 comb += db_out.bit_select(self.tlb_hit_way, 1).eq(1)
 414                 comb += self.v_updated.eq(1)
 415
 416         with m.Elif(self.tlbwe):
 417
 418             comb += tagset.eq(self.tlb_tag_way)
 419             comb += write_tlb_tag(self.repl_way, tagset, self.eatag)
 420             comb += tb_out.eq(tagset)
 421
 422             comb += pteset.eq(self.tlb_pte_way)
 423             comb += write_tlb_pte(self.repl_way, pteset, self.pte_data)
 424             comb += pb_out.eq(pteset)
 425
 426             comb += db_out.bit_select(self.repl_way, 1).eq(1)
 427
 428             comb += self.updated.eq(1)
 429             comb += self.v_updated.eq(1)
 430
 431         return m
 432
 433     def dcache_request(self, m, r0, ra, req_index, req_row, req_tag,
 434                        r0_valid, r1, cache_valid_bits, replace_way,
 435                        use_forward1_next, use_forward2_next,
 436                        req_hit_way, plru_victim, rc_ok, perm_attr,
 437                        valid_ra, perm_ok, access_ok, req_op, req_go,
 438                        tlb_pte_way,
 439                        tlb_hit, tlb_hit_way, tlb_valid_way, cache_tag_set,
 440                        cancel_store, req_same_tag, r0_stall, early_req_row):
 441         """Cache request parsing and hit detection
 442         """
 443
 444 class DCachePendingHit(Elaboratable):
 445
 446     def __init__(self, tlb_pte_way, tlb_valid_way, tlb_hit_way,
 447                       cache_valid_idx, cache_tag_set,
 448                     req_addr,
 449                     hit_set):
 450
 451         self.go          = Signal()
 452         self.virt_mode   = Signal()
 453         self.is_hit      = Signal()
 454         self.tlb_hit     = Signal()
 455         self.hit_way     = Signal(WAY_BITS)
 456         self.rel_match   = Signal()
 457         self.req_index   = Signal(INDEX_BITS)
 458         self.reload_tag  = Signal(TAG_BITS)
 459
 460         self.tlb_hit_way = tlb_hit_way
 461         self.tlb_pte_way = tlb_pte_way
 462         self.tlb_valid_way = tlb_valid_way
 463         self.cache_valid_idx = cache_valid_idx
 464         self.cache_tag_set = cache_tag_set
 465         self.req_addr = req_addr
 466         self.hit_set = hit_set
 467
 468     def elaborate(self, platform):
 469         m = Module()
 470         comb = m.d.comb
 471         sync = m.d.sync
 472
 473         go = self.go
 474         virt_mode = self.virt_mode
 475         is_hit = self.is_hit
 476         tlb_pte_way = self.tlb_pte_way
 477         tlb_valid_way = self.tlb_valid_way
 478         cache_valid_idx = self.cache_valid_idx
 479         cache_tag_set = self.cache_tag_set
 480         req_addr = self.req_addr
 481         tlb_hit_way = self.tlb_hit_way
 482         tlb_hit = self.tlb_hit
 483         hit_set = self.hit_set
 484         hit_way = self.hit_way
 485         rel_match = self.rel_match
 486         req_index = self.req_index
 487         reload_tag = self.reload_tag
 488
 489         rel_matches = Array(Signal() for i in range(TLB_NUM_WAYS))
 490         hit_way_set = HitWaySet()
 491
 492         # Test if pending request is a hit on any way
 493         # In order to make timing in virtual mode,
 494         # when we are using the TLB, we compare each
 495         # way with each of the real addresses from each way of
 496         # the TLB, and then decide later which match to use.
 497
 498         with m.If(virt_mode):
 499             for j in range(TLB_NUM_WAYS):
 500                 s_tag       = Signal(TAG_BITS, name="s_tag%d" % j)
 501                 s_hit       = Signal()
 502                 s_pte       = Signal(TLB_PTE_BITS)
 503                 s_ra        = Signal(REAL_ADDR_BITS)
 504                 comb += s_pte.eq(read_tlb_pte(j, tlb_pte_way))
 505                 comb += s_ra.eq(Cat(req_addr[0:TLB_LG_PGSZ],
 506                                     s_pte[TLB_LG_PGSZ:REAL_ADDR_BITS]))
 507                 comb += s_tag.eq(get_tag(s_ra))
 508
 509                 for i in range(NUM_WAYS):
 510                     is_tag_hit = Signal()
 511                     comb += is_tag_hit.eq(go & cache_valid_idx[i] &
 512                                   (read_tag(i, cache_tag_set) == s_tag)
 513                                   & tlb_valid_way[j])
 514                     with m.If(is_tag_hit):
 515                         comb += hit_way_set[j].eq(i)
 516                         comb += s_hit.eq(1)
 517                 comb += hit_set[j].eq(s_hit)
 518                 with m.If(s_tag == reload_tag):
 519                     comb += rel_matches[j].eq(1)
 520             with m.If(tlb_hit):
 521                 comb += is_hit.eq(hit_set[tlb_hit_way])
 522                 comb += hit_way.eq(hit_way_set[tlb_hit_way])
 523                 comb += rel_match.eq(rel_matches[tlb_hit_way])
 524         with m.Else():
 525             s_tag       = Signal(TAG_BITS)
 526             comb += s_tag.eq(get_tag(req_addr))
 527             for i in range(NUM_WAYS):
 528                 is_tag_hit = Signal()
 529                 comb += is_tag_hit.eq(go & cache_valid_idx[i] &
 530                           read_tag(i, cache_tag_set) == s_tag)
 531                 with m.If(is_tag_hit):
 532                     comb += hit_way.eq(i)
 533                     comb += is_hit.eq(1)
 534             with m.If(s_tag == reload_tag):
 535                 comb += rel_match.eq(1)
 536
 537         return m
 538
 539
 540 class DCache(Elaboratable):
 541     """Set associative dcache write-through
 542     TODO (in no specific order):
 543     * See list in icache.vhdl
 544     * Complete load misses on the cycle when WB data comes instead of
 545       at the end of line (this requires dealing with requests coming in
 546       while not idle...)
 547     """
 548     def __init__(self):
 549         self.d_in      = LoadStore1ToDCacheType()
 550         self.d_out     = DCacheToLoadStore1Type()
 551
 552         self.m_in      = MMUToDCacheType()
 553         self.m_out     = DCacheToMMUType()
 554
 555         self.stall_out = Signal()
 556
 557         self.wb_out    = WBMasterOut()
 558         self.wb_in     = WBSlaveOut()
 559
 560         self.log_out   = Signal(20)
 561
 562     def stage_0(self, m, r0, r1, r0_full):
 563         """Latch the request in r0.req as long as we're not stalling
 564         """
 565         comb = m.d.comb
 566         sync = m.d.sync
 567         d_in, d_out, m_in = self.d_in, self.d_out, self.m_in
 568
 569         r = RegStage0()
 570
 571         # TODO, this goes in unit tests and formal proofs
 572         with m.If(~(d_in.valid & m_in.valid)):
 573             #sync += Display("request collision loadstore vs MMU")
 574             pass
 575
 576         with m.If(m_in.valid):
 577             sync += r.req.valid.eq(1)
 578             sync += r.req.load.eq(~(m_in.tlbie | m_in.tlbld))
 579             sync += r.req.dcbz.eq(0)
 580             sync += r.req.nc.eq(0)
 581             sync += r.req.reserve.eq(0)
 582             sync += r.req.virt_mode.eq(1)
 583             sync += r.req.priv_mode.eq(1)
 584             sync += r.req.addr.eq(m_in.addr)
 585             sync += r.req.data.eq(m_in.pte)
 586             sync += r.req.byte_sel.eq(~0) # Const -1 sets all to 0b111....
 587             sync += r.tlbie.eq(m_in.tlbie)
 588             sync += r.doall.eq(m_in.doall)
 589             sync += r.tlbld.eq(m_in.tlbld)
 590             sync += r.mmu_req.eq(1)
 591         with m.Else():
 592             sync += r.req.eq(d_in)
 593             sync += r.tlbie.eq(0)
 594             sync += r.doall.eq(0)
 595             sync += r.tlbld.eq(0)
 596             sync += r.mmu_req.eq(0)
 597             with m.If(~(r1.full & r0_full)):
 598                 sync += r0.eq(r)
 599                 sync += r0_full.eq(r.req.valid)
 600
 601     def tlb_read(self, m, r0_stall, tlb_valid_way,
 602                  tlb_tag_way, tlb_pte_way, dtlb_valid_bits,
 603                  dtlb_tags, dtlb_ptes):
 604         """TLB
 605         Operates in the second cycle on the request latched in r0.req.
 606         TLB updates write the entry at the end of the second cycle.
 607         """
 608         comb = m.d.comb
 609         sync = m.d.sync
 610         m_in, d_in = self.m_in, self.d_in
 611
 612         index    = Signal(TLB_SET_BITS)
 613         addrbits = Signal(TLB_SET_BITS)
 614
 615         amin = TLB_LG_PGSZ
 616         amax = TLB_LG_PGSZ + TLB_SET_BITS
 617
 618         with m.If(m_in.valid):
 619             comb += addrbits.eq(m_in.addr[amin : amax])
 620         with m.Else():
 621             comb += addrbits.eq(d_in.addr[amin : amax])
 622         comb += index.eq(addrbits)
 623
 624         # If we have any op and the previous op isn't finished,
 625         # then keep the same output for next cycle.
 626         with m.If(~r0_stall):
 627             sync += tlb_valid_way.eq(dtlb_valid_bits[index])
 628             sync += tlb_tag_way.eq(dtlb_tags[index])
 629             sync += tlb_pte_way.eq(dtlb_ptes[index])
 630
 631     def maybe_tlb_plrus(self, m, r1, tlb_plru_victim, acc, acc_en, lru):
 632         """Generate TLB PLRUs
 633         """
 634         comb = m.d.comb
 635         sync = m.d.sync
 636
 637         with m.If(TLB_NUM_WAYS > 1):
 638             for i in range(TLB_SET_SIZE):
 639                 # TLB PLRU interface
 640                 tlb_plru        = PLRU(WAY_BITS)
 641                 setattr(m.submodules, "maybe_plru_%d" % i, tlb_plru)
 642                 tlb_plru_acc    = Signal(TLB_WAY_BITS)
 643                 tlb_plru_acc_en = Signal()
 644                 tlb_plru_out    = Signal(TLB_WAY_BITS)
 645
 646                 comb += tlb_plru.acc.eq(tlb_plru_acc)
 647                 comb += tlb_plru.acc_en.eq(tlb_plru_acc_en)
 648                 comb += tlb_plru.lru.eq(tlb_plru_out)
 649
 650                 # PLRU interface
 651                 with m.If(r1.tlb_hit_index == i):
 652                     comb += tlb_plru.acc_en.eq(r1.tlb_hit)
 653                 with m.Else():
 654                     comb += tlb_plru.acc_en.eq(0)
 655                 comb += tlb_plru.acc.eq(r1.tlb_hit_way)
 656
 657                 comb += tlb_plru_victim[i].eq(tlb_plru.lru)
 658
 659     def tlb_search(self, m, tlb_req_index, r0, r0_valid,
 660                    tlb_valid_way, tlb_tag_way, tlb_hit_way,
 661                    tlb_pte_way, pte, tlb_hit, valid_ra, perm_attr, ra):
 662
 663         comb = m.d.comb
 664         sync = m.d.sync
 665
 666         hitway = Signal(TLB_WAY_BITS)
 667         hit    = Signal()
 668         eatag  = Signal(TLB_EA_TAG_BITS)
 669
 670         TLB_LG_END = TLB_LG_PGSZ + TLB_SET_BITS
 671         comb += tlb_req_index.eq(r0.req.addr[TLB_LG_PGSZ : TLB_LG_END])
 672         comb += eatag.eq(r0.req.addr[TLB_LG_END : 64 ])
 673
 674         for i in range(TLB_NUM_WAYS):
 675             is_tag_hit = Signal()
 676             comb += is_tag_hit.eq(tlb_valid_way[i]
 677                                   & read_tlb_tag(i, tlb_tag_way) == eatag)
 678             with m.If(is_tag_hit):
 679                 comb += hitway.eq(i)
 680                 comb += hit.eq(1)
 681
 682         comb += tlb_hit.eq(hit & r0_valid)
 683         comb += tlb_hit_way.eq(hitway)
 684
 685         with m.If(tlb_hit):
 686             comb += pte.eq(read_tlb_pte(hitway, tlb_pte_way))
 687         with m.Else():
 688             comb += pte.eq(0)
 689         comb += valid_ra.eq(tlb_hit | ~r0.req.virt_mode)
 690         with m.If(r0.req.virt_mode):
 691             comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
 692                               r0.req.addr[ROW_OFF_BITS:TLB_LG_PGSZ],
 693                               pte[TLB_LG_PGSZ:REAL_ADDR_BITS]))
 694             comb += perm_attr.eq(extract_perm_attr(pte))
 695         with m.Else():
 696             comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
 697                               r0.req.addr[ROW_OFF_BITS:REAL_ADDR_BITS]))
 698
 699             comb += perm_attr.reference.eq(1)
 700             comb += perm_attr.changed.eq(1)
 701             comb += perm_attr.priv.eq(1)
 702             comb += perm_attr.nocache.eq(0)
 703             comb += perm_attr.rd_perm.eq(1)
 704             comb += perm_attr.wr_perm.eq(1)
 705
 706     def tlb_update(self, m, r0_valid, r0, dtlb_valid_bits, tlb_req_index,
 707                     tlb_hit_way, tlb_hit, tlb_plru_victim, tlb_tag_way,
 708                     dtlb_tags, tlb_pte_way, dtlb_ptes):
 709
 710         comb = m.d.comb
 711         sync = m.d.sync
 712
 713         tlbie    = Signal()
 714         tlbwe    = Signal()
 715
 716         comb += tlbie.eq(r0_valid & r0.tlbie)
 717         comb += tlbwe.eq(r0_valid & r0.tlbld)
 718
 719         m.submodules.tlb_update = d = DTLBUpdate()
 720         with m.If(tlbie & r0.doall):
 721             # clear all valid bits at once
 722             for i in range(TLB_SET_SIZE):
 723                 sync += dtlb_valid_bits[i].eq(0)
 724         with m.If(d.updated):
 725             sync += dtlb_tags[tlb_req_index].eq(d.tb_out)
 726             sync += dtlb_ptes[tlb_req_index].eq(d.pb_out)
 727         with m.If(d.v_updated):
 728             sync += dtlb_valid_bits[tlb_req_index].eq(d.db_out)
 729
 730         comb += d.dv.eq(dtlb_valid_bits[tlb_req_index])
 731
 732         comb += d.tlbie.eq(tlbie)
 733         comb += d.tlbwe.eq(tlbwe)
 734         comb += d.doall.eq(r0.doall)
 735         comb += d.tlb_hit.eq(tlb_hit)
 736         comb += d.tlb_hit_way.eq(tlb_hit_way)
 737         comb += d.tlb_tag_way.eq(tlb_tag_way)
 738         comb += d.tlb_pte_way.eq(tlb_pte_way)
 739         comb += d.tlb_req_index.eq(tlb_req_index)
 740
 741         with m.If(tlb_hit):
 742             comb += d.repl_way.eq(tlb_hit_way)
 743         with m.Else():
 744             comb += d.repl_way.eq(tlb_plru_victim[tlb_req_index])
 745         comb += d.eatag.eq(r0.req.addr[TLB_LG_PGSZ + TLB_SET_BITS:64])
 746         comb += d.pte_data.eq(r0.req.data)
 747
 748     def maybe_plrus(self, m, r1, plru_victim):
 749         """Generate PLRUs
 750         """
 751         comb = m.d.comb
 752         sync = m.d.sync
 753
 754         for i in range(NUM_LINES):
 755             # PLRU interface
 756             plru        = PLRU(WAY_BITS)
 757             setattr(m.submodules, "plru%d" % i, plru)
 758             plru_acc    = Signal(WAY_BITS)
 759             plru_acc_en = Signal()
 760             plru_out    = Signal(WAY_BITS)
 761
 762             comb += plru.acc.eq(plru_acc)
 763             comb += plru.acc_en.eq(plru_acc_en)
 764             comb += plru_out.eq(plru.lru_o)
 765
 766             with m.If(r1.hit_index == i):
 767                 comb += plru_acc_en.eq(r1.cache_hit)
 768
 769             comb += plru_acc.eq(r1.hit_way)
 770             comb += plru_victim[i].eq(plru_out)
 771
 772     def cache_tag_read(self, m, r0_stall, req_index, cache_tag_set, cache_tags):
 773         """Cache tag RAM read port
 774         """
 775         comb = m.d.comb
 776         sync = m.d.sync
 777         m_in, d_in = self.m_in, self.d_in
 778
 779         index = Signal(INDEX_BITS)
 780
 781         with m.If(r0_stall):
 782             comb += index.eq(req_index)
 783         with m.Elif(m_in.valid):
 784             comb += index.eq(get_index(m_in.addr))
 785         with m.Else():
 786             comb += index.eq(get_index(d_in.addr))
 787         sync += cache_tag_set.eq(cache_tags[index])
 788
 789     def dcache_request(self, m, r0, ra, req_index, req_row, req_tag,
 790                        r0_valid, r1, cache_valid_bits, replace_way,
 791                        use_forward1_next, use_forward2_next,
 792                        req_hit_way, plru_victim, rc_ok, perm_attr,
 793                        valid_ra, perm_ok, access_ok, req_op, req_go,
 794                        tlb_pte_way,
 795                        tlb_hit, tlb_hit_way, tlb_valid_way, cache_tag_set,
 796                        cancel_store, req_same_tag, r0_stall, early_req_row):
 797         """Cache request parsing and hit detection
 798         """
 799
 800         comb = m.d.comb
 801         sync = m.d.sync
 802         m_in, d_in = self.m_in, self.d_in
 803
 804         is_hit      = Signal()
 805         hit_way     = Signal(WAY_BITS)
 806         op          = Signal(Op)
 807         opsel       = Signal(3)
 808         go          = Signal()
 809         nc          = Signal()
 810         hit_set     = Array(Signal() for i in range(TLB_NUM_WAYS))
 811         cache_valid_idx = Signal(INDEX_BITS)
 812
 813         # Extract line, row and tag from request
 814         comb += req_index.eq(get_index(r0.req.addr))
 815         comb += req_row.eq(get_row(r0.req.addr))
 816         comb += req_tag.eq(get_tag(ra))
 817
 818         comb += go.eq(r0_valid & ~(r0.tlbie | r0.tlbld) & ~r1.ls_error)
 819         comb += cache_valid_idx.eq(cache_valid_bits[req_index])
 820
 821         m.submodules.dcache_pend = dc = DCachePendingHit(tlb_pte_way,
 822                                 tlb_valid_way, tlb_hit_way,
 823                                 cache_valid_idx, cache_tag_set,
 824                                 r0.req.addr,
 825                                 hit_set)
 826
 827         comb += dc.tlb_hit.eq(tlb_hit)
 828         comb += dc.reload_tag.eq(r1.reload_tag)
 829         comb += dc.virt_mode.eq(r0.req.virt_mode)
 830         comb += dc.go.eq(go)
 831         comb += dc.req_index.eq(req_index)
 832         comb += is_hit.eq(dc.is_hit)
 833         comb += hit_way.eq(dc.hit_way)
 834         comb += req_same_tag.eq(dc.rel_match)
 835
 836         # See if the request matches the line currently being reloaded
 837         with m.If((r1.state == State.RELOAD_WAIT_ACK) &
 838                   (req_index == r1.store_index) & req_same_tag):
 839             # For a store, consider this a hit even if the row isn't
 840             # valid since it will be by the time we perform the store.
 841             # For a load, check the appropriate row valid bit.
 842             valid = r1.rows_valid[req_row % ROW_PER_LINE]
 843             comb += is_hit.eq(~r0.req.load | valid)
 844             comb += hit_way.eq(replace_way)
 845
 846         # Whether to use forwarded data for a load or not
 847         comb += use_forward1_next.eq(0)
 848         with m.If((get_row(r1.req.real_addr) == req_row) &
 849                   (r1.req.hit_way == hit_way)):
 850             # Only need to consider r1.write_bram here, since if we
 851             # are writing refill data here, then we don't have a
 852             # cache hit this cycle on the line being refilled.
 853             # (There is the possibility that the load following the
 854             # load miss that started the refill could be to the old
 855             # contents of the victim line, since it is a couple of
 856             # cycles after the refill starts before we see the updated
 857             # cache tag. In that case we don't use the bypass.)
 858             comb += use_forward1_next.eq(r1.write_bram)
 859         comb += use_forward2_next.eq(0)
 860         with m.If((r1.forward_row1 == req_row) & (r1.forward_way1 == hit_way)):
 861             comb += use_forward2_next.eq(r1.forward_valid1)
 862
 863         # The way that matched on a hit
 864         comb += req_hit_way.eq(hit_way)
 865
 866         # The way to replace on a miss
 867         with m.If(r1.write_tag):
 868             replace_way.eq(plru_victim[r1.store_index])
 869         with m.Else():
 870             comb += replace_way.eq(r1.store_way)
 871
 872         # work out whether we have permission for this access
 873         # NB we don't yet implement AMR, thus no KUAP
 874         comb += rc_ok.eq(perm_attr.reference
 875                          & (r0.req.load | perm_attr.changed)
 876                 )
 877         comb += perm_ok.eq((r0.req.priv_mode | ~perm_attr.priv)
 878                            & perm_attr.wr_perm
 879                            | (r0.req.load & perm_attr.rd_perm)
 880                           )
 881         comb += access_ok.eq(valid_ra & perm_ok & rc_ok)
 882         # Combine the request and cache hit status to decide what
 883         # operation needs to be done
 884         comb += nc.eq(r0.req.nc | perm_attr.nocache)
 885         comb += op.eq(Op.OP_NONE)
 886         with m.If(go):
 887             with m.If(~access_ok):
 888                 comb += op.eq(Op.OP_BAD)
 889             with m.Elif(cancel_store):
 890                 comb += op.eq(Op.OP_STCX_FAIL)
 891             with m.Else():
 892                 comb += opsel.eq(Cat(is_hit, nc, r0.req.load))
 893                 with m.Switch(opsel):
 894                     with m.Case(0b101):
 895                         comb += op.eq(Op.OP_LOAD_HIT)
 896                     with m.Case(0b100):
 897                         comb += op.eq(Op.OP_LOAD_MISS)
 898                     with m.Case(0b110):
 899                         comb += op.eq(Op.OP_LOAD_NC)
 900                     with m.Case(0b001):
 901                         comb += op.eq(Op.OP_STORE_HIT)
 902                     with m.Case(0b000):
 903                         comb += op.eq(Op.OP_STORE_MISS)
 904                     with m.Case(0b010):
 905                         comb += op.eq(Op.OP_STORE_MISS)
 906                     with m.Case(0b011):
 907                         comb += op.eq(Op.OP_BAD)
 908                     with m.Case(0b111):
 909                         comb += op.eq(Op.OP_BAD)
 910                     with m.Default():
 911                         comb += op.eq(Op.OP_NONE)
 912         comb += req_op.eq(op)
 913         comb += req_go.eq(go)
 914
 915         # Version of the row number that is valid one cycle earlier
 916         # in the cases where we need to read the cache data BRAM.
 917         # If we're stalling then we need to keep reading the last
 918         # row requested.
 919         with m.If(~r0_stall):
 920             with m.If(m_in.valid):
 921                 comb += early_req_row.eq(get_row(m_in.addr))
 922             with m.Else():
 923                 comb += early_req_row.eq(get_row(d_in.addr))
 924         with m.Else():
 925             comb += early_req_row.eq(req_row)
 926
 927     def reservation_comb(self, m, cancel_store, set_rsrv, clear_rsrv,
 928                          r0_valid, r0, reservation):
 929         """Handle load-with-reservation and store-conditional instructions
 930         """
 931         comb = m.d.comb
 932         sync = m.d.sync
 933
 934         with m.If(r0_valid & r0.req.reserve):
 935
 936             # XXX generate alignment interrupt if address
 937             # is not aligned XXX or if r0.req.nc = '1'
 938             with m.If(r0.req.load):
 939                 comb += set_rsrv.eq(1) # load with reservation
 940             with m.Else():
 941                 comb += clear_rsrv.eq(1) # store conditional
 942                 with m.If(~reservation.valid | r0.req.addr[LINE_OFF_BITS:64]):
 943                     comb += cancel_store.eq(1)
 944
 945     def reservation_reg(self, m, r0_valid, access_ok, set_rsrv, clear_rsrv,
 946                         reservation, r0):
 947
 948         comb = m.d.comb
 949         sync = m.d.sync
 950
 951         with m.If(r0_valid & access_ok):
 952             with m.If(clear_rsrv):
 953                 sync += reservation.valid.eq(0)
 954             with m.Elif(set_rsrv):
 955                 sync += reservation.valid.eq(1)
 956                 sync += reservation.addr.eq(r0.req.addr[LINE_OFF_BITS:64])
 957
 958     def writeback_control(self, m, r1, cache_out):
 959         """Return data for loads & completion control logic
 960         """
 961         comb = m.d.comb
 962         sync = m.d.sync
 963         d_out, m_out = self.d_out, self.m_out
 964
 965         data_out = Signal(64)
 966         data_fwd = Signal(64)
 967
 968         # Use the bypass if are reading the row that was
 969         # written 1 or 2 cycles ago, including for the
 970         # slow_valid = 1 case (i.e. completing a load
 971         # miss or a non-cacheable load).
 972         with m.If(r1.use_forward1):
 973             comb += data_fwd.eq(r1.forward_data1)
 974         with m.Else():
 975             comb += data_fwd.eq(r1.forward_data2)
 976
 977         comb += data_out.eq(cache_out[r1.hit_way])
 978
 979         for i in range(8):
 980             with m.If(r1.forward_sel[i]):
 981                 dsel = data_fwd.word_select(i, 8)
 982                 comb += data_out.word_select(i, 8).eq(dsel)
 983
 984         comb += d_out.valid.eq(r1.ls_valid)
 985         comb += d_out.data.eq(data_out)
 986         comb += d_out.store_done.eq(~r1.stcx_fail)
 987         comb += d_out.error.eq(r1.ls_error)
 988         comb += d_out.cache_paradox.eq(r1.cache_paradox)
 989
 990         # Outputs to MMU
 991         comb += m_out.done.eq(r1.mmu_done)
 992         comb += m_out.err.eq(r1.mmu_error)
 993         comb += m_out.data.eq(data_out)
 994
 995         # We have a valid load or store hit or we just completed
 996         # a slow op such as a load miss, a NC load or a store
 997         #
 998         # Note: the load hit is delayed by one cycle. However it
 999         # can still not collide with r.slow_valid (well unless I
1000         # miscalculated) because slow_valid can only be set on a
1001         # subsequent request and not on its first cycle (the state
1002         # machine must have advanced), which makes slow_valid
1003         # at least 2 cycles from the previous hit_load_valid.
1004
1005         # Sanity: Only one of these must be set in any given cycle
1006
1007         if False: # TODO: need Display to get this to work
1008             assert (r1.slow_valid & r1.stcx_fail) != 1, \
1009             "unexpected slow_valid collision with stcx_fail"
1010
1011             assert ((r1.slow_valid | r1.stcx_fail) | r1.hit_load_valid) != 1, \
1012              "unexpected hit_load_delayed collision with slow_valid"
1013
1014         with m.If(~r1.mmu_req):
1015             # Request came from loadstore1...
1016             # Load hit case is the standard path
1017             with m.If(r1.hit_load_valid):
1018                 #Display(f"completing load hit data={data_out}")
1019                 pass
1020
1021             # error cases complete without stalling
1022             with m.If(r1.ls_error):
1023                 # Display("completing ld/st with error")
1024                 pass
1025
1026             # Slow ops (load miss, NC, stores)
1027             with m.If(r1.slow_valid):
1028                 #Display(f"completing store or load miss data={data_out}")
1029                 pass
1030
1031         with m.Else():
1032             # Request came from MMU
1033             with m.If(r1.hit_load_valid):
1034                 # Display(f"completing load hit to MMU, data={m_out.data}")
1035                 pass
1036             # error cases complete without stalling
1037             with m.If(r1.mmu_error):
1038                 #Display("combpleting MMU ld with error")
1039                 pass
1040
1041             # Slow ops (i.e. load miss)
1042             with m.If(r1.slow_valid):
1043                 #Display("completing MMU load miss, data={m_out.data}")
1044                 pass
1045
1046     def rams(self, m, r1, early_req_row, cache_out, replace_way):
1047         """rams
1048         Generate a cache RAM for each way. This handles the normal
1049         reads, writes from reloads and the special store-hit update
1050         path as well.
1051
1052         Note: the BRAMs have an extra read buffer, meaning the output
1053         is pipelined an extra cycle. This differs from the
1054         icache. The writeback logic needs to take that into
1055         account by using 1-cycle delayed signals for load hits.
1056         """
1057         comb = m.d.comb
1058         wb_in = self.wb_in
1059
1060         for i in range(NUM_WAYS):
1061             do_read  = Signal()
1062             rd_addr  = Signal(ROW_BITS)
1063             do_write = Signal()
1064             wr_addr  = Signal(ROW_BITS)
1065             wr_data  = Signal(WB_DATA_BITS)
1066             wr_sel   = Signal(ROW_SIZE)
1067             wr_sel_m = Signal(ROW_SIZE)
1068             _d_out   = Signal(WB_DATA_BITS)
1069
1070             way = CacheRam(ROW_BITS, WB_DATA_BITS, True)
1071             setattr(m.submodules, "cacheram_%d" % i, way)
1072
1073             comb += way.rd_en.eq(do_read)
1074             comb += way.rd_addr.eq(rd_addr)
1075             comb += _d_out.eq(way.rd_data_o)
1076             comb += way.wr_sel.eq(wr_sel_m)
1077             comb += way.wr_addr.eq(wr_addr)
1078             comb += way.wr_data.eq(wr_data)
1079
1080             # Cache hit reads
1081             comb += do_read.eq(1)
1082             comb += rd_addr.eq(early_req_row)
1083             comb += cache_out[i].eq(_d_out)
1084
1085             # Write mux:
1086             #
1087             # Defaults to wishbone read responses (cache refill)
1088             #
1089             # For timing, the mux on wr_data/sel/addr is not
1090             # dependent on anything other than the current state.
1091
1092             with m.If(r1.write_bram):
1093                 # Write store data to BRAM.  This happens one
1094                 # cycle after the store is in r0.
1095                 comb += wr_data.eq(r1.req.data)
1096                 comb += wr_sel.eq(r1.req.byte_sel)
1097                 comb += wr_addr.eq(get_row(r1.req.real_addr))
1098
1099                 with m.If(i == r1.req.hit_way):
1100                     comb += do_write.eq(1)
1101             with m.Else():
1102                 # Otherwise, we might be doing a reload or a DCBZ
1103                 with m.If(r1.dcbz):
1104                     comb += wr_data.eq(0)
1105                 with m.Else():
1106                     comb += wr_data.eq(wb_in.dat)
1107                 comb += wr_addr.eq(r1.store_row)
1108                 comb += wr_sel.eq(~0) # all 1s
1109
1110             with m.If((r1.state == State.RELOAD_WAIT_ACK)
1111                       & wb_in.ack & (replace_way == i)):
1112                 comb += do_write.eq(1)
1113
1114                 # Mask write selects with do_write since BRAM
1115                 # doesn't have a global write-enable
1116                 with m.If(do_write):
1117                     comb += wr_sel_m.eq(wr_sel)
1118
1119     # Cache hit synchronous machine for the easy case.
1120     # This handles load hits.
1121     # It also handles error cases (TLB miss, cache paradox)
1122     def dcache_fast_hit(self, m, req_op, r0_valid, r0, r1,
1123                         req_hit_way, req_index, access_ok,
1124                         tlb_hit, tlb_hit_way, tlb_req_index):
1125
1126         comb = m.d.comb
1127         sync = m.d.sync
1128
1129         with m.If(req_op != Op.OP_NONE):
1130             #Display(f"op:{req_op} addr:{r0.req.addr} nc: {r0.req.nc}" \
1131             #      f"idx:{req_index} tag:{req_tag} way: {req_hit_way}"
1132             #     )
1133             pass
1134
1135         with m.If(r0_valid):
1136             sync += r1.mmu_req.eq(r0.mmu_req)
1137
1138         # Fast path for load/store hits.
1139         # Set signals for the writeback controls.
1140         sync += r1.hit_way.eq(req_hit_way)
1141         sync += r1.hit_index.eq(req_index)
1142
1143         with m.If(req_op == Op.OP_LOAD_HIT):
1144             sync += r1.hit_load_valid.eq(1)
1145         with m.Else():
1146             sync += r1.hit_load_valid.eq(0)
1147
1148         with m.If((req_op == Op.OP_LOAD_HIT) | (req_op == Op.OP_STORE_HIT)):
1149             sync += r1.cache_hit.eq(1)
1150         with m.Else():
1151             sync += r1.cache_hit.eq(0)
1152
1153         with m.If(req_op == Op.OP_BAD):
1154             # Display(f"Signalling ld/st error valid_ra={valid_ra}"
1155             #      f"rc_ok={rc_ok} perm_ok={perm_ok}"
1156             sync += r1.ls_error.eq(~r0.mmu_req)
1157             sync += r1.mmu_error.eq(r0.mmu_req)
1158             sync += r1.cache_paradox.eq(access_ok)
1159
1160             with m.Else():
1161                 sync += r1.ls_error.eq(0)
1162                 sync += r1.mmu_error.eq(0)
1163                 sync += r1.cache_paradox.eq(0)
1164
1165         with m.If(req_op == Op.OP_STCX_FAIL):
1166             r1.stcx_fail.eq(1)
1167         with m.Else():
1168             sync += r1.stcx_fail.eq(0)
1169
1170         # Record TLB hit information for updating TLB PLRU
1171         sync += r1.tlb_hit.eq(tlb_hit)
1172         sync += r1.tlb_hit_way.eq(tlb_hit_way)
1173         sync += r1.tlb_hit_index.eq(tlb_req_index)
1174
1175     # Memory accesses are handled by this state machine:
1176     #
1177     #   * Cache load miss/reload (in conjunction with "rams")
1178     #   * Load hits for non-cachable forms
1179     #   * Stores (the collision case is handled in "rams")
1180     #
1181     # All wishbone requests generation is done here.
1182     # This machine operates at stage 1.
1183     def dcache_slow(self, m, r1, use_forward1_next, use_forward2_next,
1184                     cache_valid_bits, r0, replace_way,
1185                     req_hit_way, req_same_tag,
1186                     r0_valid, req_op, cache_tag, req_go, ra):
1187
1188         comb = m.d.comb
1189         sync = m.d.sync
1190         wb_in = self.wb_in
1191
1192         req         = MemAccessRequest()
1193         acks        = Signal(3)
1194         adjust_acks = Signal(3)
1195         stbs_done = Signal()
1196
1197         sync += r1.use_forward1.eq(use_forward1_next)
1198         sync += r1.forward_sel.eq(0)
1199
1200         with m.If(use_forward1_next):
1201             sync += r1.forward_sel.eq(r1.req.byte_sel)
1202         with m.Elif(use_forward2_next):
1203             sync += r1.forward_sel.eq(r1.forward_sel1)
1204
1205         sync += r1.forward_data2.eq(r1.forward_data1)
1206         with m.If(r1.write_bram):
1207             sync += r1.forward_data1.eq(r1.req.data)
1208             sync += r1.forward_sel1.eq(r1.req.byte_sel)
1209             sync += r1.forward_way1.eq(r1.req.hit_way)
1210             sync += r1.forward_row1.eq(get_row(r1.req.real_addr))
1211             sync += r1.forward_valid1.eq(1)
1212         with m.Else():
1213             with m.If(r1.dcbz):
1214                 sync += r1.forward_data1.eq(0)
1215             with m.Else():
1216                 sync += r1.forward_data1.eq(wb_in.dat)
1217             sync += r1.forward_sel1.eq(~0) # all 1s
1218             sync += r1.forward_way1.eq(replace_way)
1219             sync += r1.forward_row1.eq(r1.store_row)
1220             sync += r1.forward_valid1.eq(0)
1221
1222         # One cycle pulses reset
1223         sync += r1.slow_valid.eq(0)
1224         sync += r1.write_bram.eq(0)
1225         sync += r1.inc_acks.eq(0)
1226         sync += r1.dec_acks.eq(0)
1227
1228         sync += r1.ls_valid.eq(0)
1229         # complete tlbies and TLB loads in the third cycle
1230         sync += r1.mmu_done.eq(r0_valid & (r0.tlbie | r0.tlbld))
1231
1232         with m.If((req_op == Op.OP_LOAD_HIT)
1233                   | (req_op == Op.OP_STCX_FAIL)):
1234             with m.If(~r0.mmu_req):
1235                 sync += r1.ls_valid.eq(1)
1236             with m.Else():
1237                 sync += r1.mmu_done.eq(1)
1238
1239         with m.If(r1.write_tag):
1240             # Store new tag in selected way
1241             for i in range(NUM_WAYS):
1242                 with m.If(i == replace_way):
1243                     ct = Signal(TAG_RAM_WIDTH)
1244                     comb += ct.eq(cache_tag[r1.store_index])
1245                     comb += ct.word_select(i, TAG_WIDTH).eq(r1.reload_tag)
1246                     sync += cache_tag[r1.store_index].eq(ct)
1247             sync += r1.store_way.eq(replace_way)
1248             sync += r1.write_tag.eq(0)
1249
1250         # Take request from r1.req if there is one there,
1251         # else from req_op, ra, etc.
1252         with m.If(r1.full):
1253             comb += req.eq(r1.req)
1254         with m.Else():
1255             comb += req.op.eq(req_op)
1256             comb += req.valid.eq(req_go)
1257             comb += req.mmu_req.eq(r0.mmu_req)
1258             comb += req.dcbz.eq(r0.req.dcbz)
1259             comb += req.real_addr.eq(ra)
1260
1261             with m.If(~r0.req.dcbz):
1262                 comb += req.data.eq(r0.req.data)
1263             with m.Else():
1264                 comb += req.data.eq(0)
1265
1266             # Select all bytes for dcbz
1267             # and for cacheable loads
1268             with m.If(r0.req.dcbz | (r0.req.load & ~r0.req.nc)):
1269                 comb += req.byte_sel.eq(~0) # all 1s
1270             with m.Else():
1271                 comb += req.byte_sel.eq(r0.req.byte_sel)
1272             comb += req.hit_way.eq(req_hit_way)
1273             comb += req.same_tag.eq(req_same_tag)
1274
1275             # Store the incoming request from r0,
1276             # if it is a slow request
1277             # Note that r1.full = 1 implies req_op = OP_NONE
1278             with m.If((req_op == Op.OP_LOAD_MISS)
1279                       | (req_op == Op.OP_LOAD_NC)
1280                       | (req_op == Op.OP_STORE_MISS)
1281                       | (req_op == Op.OP_STORE_HIT)):
1282                 sync += r1.req.eq(req)
1283                 sync += r1.full.eq(1)
1284
1285         # Main state machine
1286         with m.Switch(r1.state):
1287
1288             with m.Case(State.IDLE):
1289 # XXX check 'left downto.  probably means len(r1.wb.adr)
1290 #                     r1.wb.adr <= req.real_addr(
1291 #                                   r1.wb.adr'left downto 0
1292 #                                  );
1293                 sync += r1.wb.adr.eq(req.real_addr)
1294                 sync += r1.wb.sel.eq(req.byte_sel)
1295                 sync += r1.wb.dat.eq(req.data)
1296                 sync += r1.dcbz.eq(req.dcbz)
1297
1298                 # Keep track of our index and way
1299                 # for subsequent stores.
1300                 sync += r1.store_index.eq(get_index(req.real_addr))
1301                 sync += r1.store_row.eq(get_row(req.real_addr))
1302                 sync += r1.end_row_ix.eq(
1303                          get_row_of_line(get_row(req.real_addr))
1304                         )
1305                 sync += r1.reload_tag.eq(get_tag(req.real_addr))
1306                 sync += r1.req.same_tag.eq(1)
1307
1308                 with m.If(req.op == Op.OP_STORE_HIT):
1309                     sync += r1.store_way.eq(req.hit_way)
1310
1311                 # Reset per-row valid bits,
1312                 # ready for handling OP_LOAD_MISS
1313                 for i in range(ROW_PER_LINE):
1314                     sync += r1.rows_valid[i].eq(0)
1315
1316                 with m.Switch(req.op):
1317                     with m.Case(Op.OP_LOAD_HIT):
1318                         # stay in IDLE state
1319                         pass
1320
1321                     with m.Case(Op.OP_LOAD_MISS):
1322                         #Display(f"cache miss real addr:" \
1323                         #      f"{req_real_addr}" \
1324                         #      f" idx:{get_index(req_real_addr)}" \
1325                         #      f" tag:{get_tag(req.real_addr)}")
1326                         pass
1327
1328                         # Start the wishbone cycle
1329                         sync += r1.wb.we.eq(0)
1330                         sync += r1.wb.cyc.eq(1)
1331                         sync += r1.wb.stb.eq(1)
1332
1333                         # Track that we had one request sent
1334                         sync += r1.state.eq(State.RELOAD_WAIT_ACK)
1335                         sync += r1.write_tag.eq(1)
1336
1337                     with m.Case(Op.OP_LOAD_NC):
1338                         sync += r1.wb.cyc.eq(1)
1339                         sync += r1.wb.stb.eq(1)
1340                         sync += r1.wb.we.eq(0)
1341                         sync += r1.state.eq(State.NC_LOAD_WAIT_ACK)
1342
1343                     with m.Case(Op.OP_STORE_HIT, Op.OP_STORE_MISS):
1344                         with m.If(~req.dcbz):
1345                             sync += r1.state.eq(State.STORE_WAIT_ACK)
1346                             sync += r1.acks_pending.eq(1)
1347                             sync += r1.full.eq(0)
1348                             sync += r1.slow_valid.eq(1)
1349
1350                             with m.If(~req.mmu_req):
1351                                 sync += r1.ls_valid.eq(1)
1352                             with m.Else():
1353                                 sync += r1.mmu_done.eq(1)
1354
1355                             with m.If(req.op == Op.OP_STORE_HIT):
1356                                 sync += r1.write_bram.eq(1)
1357                         with m.Else():
1358                             sync += r1.state.eq(State.RELOAD_WAIT_ACK)
1359
1360                             with m.If(req.op == Op.OP_STORE_MISS):
1361                                 sync += r1.write_tag.eq(1)
1362
1363                         sync += r1.wb.we.eq(1)
1364                         sync += r1.wb.cyc.eq(1)
1365                         sync += r1.wb.stb.eq(1)
1366
1367                     # OP_NONE and OP_BAD do nothing
1368                     # OP_BAD & OP_STCX_FAIL were
1369                     # handled above already
1370                     with m.Case(Op.OP_NONE):
1371                         pass
1372                     with m.Case(Op.OP_BAD):
1373                         pass
1374                     with m.Case(Op.OP_STCX_FAIL):
1375                         pass
1376
1377             with m.Case(State.RELOAD_WAIT_ACK):
1378                 # Requests are all sent if stb is 0
1379                 comb += stbs_done.eq(~r1.wb.stb)
1380
1381                 with m.If(~wb_in.stall & ~stbs_done):
1382                     # That was the last word?
1383                     # We are done sending.
1384                     # Clear stb and set stbs_done
1385                     # so we can handle an eventual
1386                     # last ack on the same cycle.
1387                     with m.If(is_last_row_addr(
1388                               r1.wb.adr, r1.end_row_ix)):
1389                         sync += r1.wb.stb.eq(0)
1390                         comb += stbs_done.eq(0)
1391
1392                     # Calculate the next row address in the current cache line
1393                     rarange = r1.wb.adr[ROW_OFF_BITS : LINE_OFF_BITS]
1394                     sync += rarange.eq(rarange + 1)
1395
1396                 # Incoming acks processing
1397                 sync += r1.forward_valid1.eq(wb_in.ack)
1398                 with m.If(wb_in.ack):
1399                     # XXX needs an Array bit-accessor here
1400                     sync += r1.rows_valid[r1.store_row % ROW_PER_LINE].eq(1)
1401
1402                     # If this is the data we were looking for,
1403                     # we can complete the request next cycle.
1404                     # Compare the whole address in case the
1405                     # request in r1.req is not the one that
1406                     # started this refill.
1407                     with m.If(r1.full & r1.req.same_tag &
1408                               ((r1.dcbz & r1.req.dcbz) |
1409                                (~r1.dcbz & (r1.req.op == Op.OP_LOAD_MISS))) &
1410                                 (r1.store_row == get_row(r1.req.real_addr))):
1411                         sync += r1.full.eq(0)
1412                         sync += r1.slow_valid.eq(1)
1413                         with m.If(~r1.mmu_req):
1414                             sync += r1.ls_valid.eq(1)
1415                         with m.Else():
1416                             sync += r1.mmu_done.eq(1)
1417                         sync += r1.forward_sel.eq(~0) # all 1s
1418                         sync += r1.use_forward1.eq(1)
1419
1420                     # Check for completion
1421                     with m.If(stbs_done & is_last_row(r1.store_row,
1422                                                       r1.end_row_ix)):
1423                         # Complete wishbone cycle
1424                         sync += r1.wb.cyc.eq(0)
1425
1426                         # Cache line is now valid
1427                         cv = Signal(INDEX_BITS)
1428                         sync += cv.eq(cache_valid_bits[r1.store_index])
1429                         sync += cv.bit_select(r1.store_way, 1).eq(1)
1430                         sync += r1.state.eq(State.IDLE)
1431
1432                     # Increment store row counter
1433                     sync += r1.store_row.eq(next_row(r1.store_row))
1434
1435             with m.Case(State.STORE_WAIT_ACK):
1436                 comb += stbs_done.eq(~r1.wb.stb)
1437                 comb += acks.eq(r1.acks_pending)
1438
1439                 with m.If(r1.inc_acks != r1.dec_acks):
1440                     with m.If(r1.inc_acks):
1441                         comb += adjust_acks.eq(acks + 1)
1442                     with m.Else():
1443                         comb += adjust_acks.eq(acks - 1)
1444                 with m.Else():
1445                     comb += adjust_acks.eq(acks)
1446
1447                 sync += r1.acks_pending.eq(adjust_acks)
1448
1449                 # Clear stb when slave accepted request
1450                 with m.If(~wb_in.stall):
1451                     # See if there is another store waiting
1452                     # to be done which is in the same real page.
1453                     with m.If(req.valid):
1454                         ra = req.real_addr[0:SET_SIZE_BITS]
1455                         sync += r1.wb.adr[0:SET_SIZE_BITS].eq(ra)
1456                         sync += r1.wb.dat.eq(req.data)
1457                         sync += r1.wb.sel.eq(req.byte_sel)
1458
1459                     with m.Elif((adjust_acks < 7) & req.same_tag &
1460                                 ((req.op == Op.OP_STORE_MISS)
1461                                  | (req.op == Op.OP_STORE_HIT))):
1462                         sync += r1.wb.stb.eq(1)
1463                         comb += stbs_done.eq(0)
1464
1465                         with m.If(req.op == Op.OP_STORE_HIT):
1466                             sync += r1.write_bram.eq(1)
1467                         sync += r1.full.eq(0)
1468                         sync += r1.slow_valid.eq(1)
1469
1470                         # Store requests never come from the MMU
1471                         sync += r1.ls_valid.eq(1)
1472                         comb += stbs_done.eq(0)
1473                         sync += r1.inc_acks.eq(1)
1474                     with m.Else():
1475                         sync += r1.wb.stb.eq(0)
1476                         comb += stbs_done.eq(1)
1477
1478                 # Got ack ? See if complete.
1479                 with m.If(wb_in.ack):
1480                     with m.If(stbs_done & (adjust_acks == 1)):
1481                         sync += r1.state.eq(State.IDLE)
1482                         sync += r1.wb.cyc.eq(0)
1483                         sync += r1.wb.stb.eq(0)
1484                     sync += r1.dec_acks.eq(1)
1485
1486             with m.Case(State.NC_LOAD_WAIT_ACK):
1487                 # Clear stb when slave accepted request
1488                 with m.If(~wb_in.stall):
1489                     sync += r1.wb.stb.eq(0)
1490
1491                 # Got ack ? complete.
1492                 with m.If(wb_in.ack):
1493                     sync += r1.state.eq(State.IDLE)
1494                     sync += r1.full.eq(0)
1495                     sync += r1.slow_valid.eq(1)
1496
1497                     with m.If(~r1.mmu_req):
1498                         sync += r1.ls_valid.eq(1)
1499                     with m.Else():
1500                         sync += r1.mmu_done.eq(1)
1501
1502                     sync += r1.forward_sel.eq(~0) # all 1s
1503                     sync += r1.use_forward1.eq(1)
1504                     sync += r1.wb.cyc.eq(0)
1505                     sync += r1.wb.stb.eq(0)
1506
1507     def dcache_log(self, m, r1, valid_ra, tlb_hit_way, stall_out):
1508
1509         sync = m.d.sync
1510         d_out, wb_in, log_out = self.d_out, self.wb_in, self.log_out
1511
1512         sync += log_out.eq(Cat(r1.state[:3], valid_ra, tlb_hit_way[:3],
1513                                stall_out, req_op[:3], d_out.valid, d_out.error,
1514                                r1.wb.cyc, r1.wb.stb, wb_in.ack, wb_in.stall,
1515                                r1.wb.adr[3:6]))
1516
1517     def elaborate(self, platform):
1518
1519         m = Module()
1520         comb = m.d.comb
1521
1522         # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
1523         cache_tags       = CacheTagArray()
1524         cache_tag_set    = Signal(TAG_RAM_WIDTH)
1525         cache_valid_bits = CacheValidBitsArray()
1526
1527         # TODO attribute ram_style : string;
1528         # TODO attribute ram_style of cache_tags : signal is "distributed";
1529
1530         """note: these are passed to nmigen.hdl.Memory as "attributes".
1531            don't know how, just that they are.
1532         """
1533         dtlb_valid_bits = TLBValidBitsArray()
1534         dtlb_tags       = TLBTagsArray()
1535         dtlb_ptes       = TLBPtesArray()
1536         # TODO attribute ram_style of
1537         #  dtlb_tags : signal is "distributed";
1538         # TODO attribute ram_style of
1539         #  dtlb_ptes : signal is "distributed";
1540
1541         r0      = RegStage0()
1542         r0_full = Signal()
1543
1544         r1 = RegStage1()
1545
1546         reservation = Reservation()
1547
1548         # Async signals on incoming request
1549         req_index    = Signal(INDEX_BITS)
1550         req_row      = Signal(ROW_BITS)
1551         req_hit_way  = Signal(WAY_BITS)
1552         req_tag      = Signal(TAG_BITS)
1553         req_op       = Signal(Op)
1554         req_data     = Signal(64)
1555         req_same_tag = Signal()
1556         req_go       = Signal()
1557
1558         early_req_row     = Signal(ROW_BITS)
1559
1560         cancel_store      = Signal()
1561         set_rsrv          = Signal()
1562         clear_rsrv        = Signal()
1563
1564         r0_valid          = Signal()
1565         r0_stall          = Signal()
1566
1567         use_forward1_next = Signal()
1568         use_forward2_next = Signal()
1569
1570         cache_out         = CacheRamOut()
1571
1572         plru_victim       = PLRUOut()
1573         replace_way       = Signal(WAY_BITS)
1574
1575         # Wishbone read/write/cache write formatting signals
1576         bus_sel           = Signal(8)
1577
1578         # TLB signals
1579         tlb_tag_way   = Signal(TLB_TAG_WAY_BITS)
1580         tlb_pte_way   = Signal(TLB_PTE_WAY_BITS)
1581         tlb_valid_way = Signal(TLB_NUM_WAYS)
1582         tlb_req_index = Signal(TLB_SET_BITS)
1583         tlb_hit       = Signal()
1584         tlb_hit_way   = Signal(TLB_WAY_BITS)
1585         pte           = Signal(TLB_PTE_BITS)
1586         ra            = Signal(REAL_ADDR_BITS)
1587         valid_ra      = Signal()
1588         perm_attr     = PermAttr()
1589         rc_ok         = Signal()
1590         perm_ok       = Signal()
1591         access_ok     = Signal()
1592
1593         tlb_plru_victim = TLBPLRUOut()
1594
1595         # we don't yet handle collisions between loadstore1 requests
1596         # and MMU requests
1597         comb += self.m_out.stall.eq(0)
1598
1599         # Hold off the request in r0 when r1 has an uncompleted request
1600         comb += r0_stall.eq(r0_full & r1.full)
1601         comb += r0_valid.eq(r0_full & ~r1.full)
1602         comb += self.stall_out.eq(r0_stall)
1603
1604         # Wire up wishbone request latch out of stage 1
1605         comb += self.wb_out.eq(r1.wb)
1606
1607         # call sub-functions putting everything together, using shared
1608         # signals established above
1609         self.stage_0(m, r0, r1, r0_full)
1610         self.tlb_read(m, r0_stall, tlb_valid_way,
1611                       tlb_tag_way, tlb_pte_way, dtlb_valid_bits,
1612                       dtlb_tags, dtlb_ptes)
1613         self.tlb_search(m, tlb_req_index, r0, r0_valid,
1614                         tlb_valid_way, tlb_tag_way, tlb_hit_way,
1615                         tlb_pte_way, pte, tlb_hit, valid_ra, perm_attr, ra)
1616         self.tlb_update(m, r0_valid, r0, dtlb_valid_bits, tlb_req_index,
1617                         tlb_hit_way, tlb_hit, tlb_plru_victim, tlb_tag_way,
1618                         dtlb_tags, tlb_pte_way, dtlb_ptes)
1619         self.maybe_plrus(m, r1, plru_victim)
1620         self.cache_tag_read(m, r0_stall, req_index, cache_tag_set, cache_tags)
1621         self.dcache_request(m, r0, ra, req_index, req_row, req_tag,
1622                            r0_valid, r1, cache_valid_bits, replace_way,
1623                            use_forward1_next, use_forward2_next,
1624                            req_hit_way, plru_victim, rc_ok, perm_attr,
1625                            valid_ra, perm_ok, access_ok, req_op, req_go,
1626                            tlb_pte_way,
1627                            tlb_hit, tlb_hit_way, tlb_valid_way, cache_tag_set,
1628                            cancel_store, req_same_tag, r0_stall, early_req_row)
1629         self.reservation_comb(m, cancel_store, set_rsrv, clear_rsrv,
1630                            r0_valid, r0, reservation)
1631         self.reservation_reg(m, r0_valid, access_ok, set_rsrv, clear_rsrv,
1632                            reservation, r0)
1633         self.writeback_control(m, r1, cache_out)
1634         self.rams(m, r1, early_req_row, cache_out, replace_way)
1635         self.dcache_fast_hit(m, req_op, r0_valid, r0, r1,
1636                         req_hit_way, req_index, access_ok,
1637                         tlb_hit, tlb_hit_way, tlb_req_index)
1638         self.dcache_slow(m, r1, use_forward1_next, use_forward2_next,
1639                     cache_valid_bits, r0, replace_way,
1640                     req_hit_way, req_same_tag,
1641                          r0_valid, req_op, cache_tags, req_go, ra)
1642         #self.dcache_log(m, r1, valid_ra, tlb_hit_way, stall_out)
1643
1644         return m
1645
1646
1647 # dcache_tb.vhdl
1648 #
1649 # entity dcache_tb is
1650 # end dcache_tb;
1651 #
1652 # architecture behave of dcache_tb is
1653 #     signal clk          : std_ulogic;
1654 #     signal rst          : std_ulogic;
1655 #
1656 #     signal d_in         : Loadstore1ToDcacheType;
1657 #     signal d_out        : DcacheToLoadstore1Type;
1658 #
1659 #     signal m_in         : MmuToDcacheType;
1660 #     signal m_out        : DcacheToMmuType;
1661 #
1662 #     signal wb_bram_in   : wishbone_master_out;
1663 #     signal wb_bram_out  : wishbone_slave_out;
1664 #
1665 #     constant clk_period : time := 10 ns;
1666 # begin
1667 #     dcache0: entity work.dcache
1668 #         generic map(
1669 #
1670 #             LINE_SIZE => 64,
1671 #             NUM_LINES => 4
1672 #             )
1673 #         port map(
1674 #             clk => clk,
1675 #             rst => rst,
1676 #             d_in => d_in,
1677 #             d_out => d_out,
1678 #             m_in => m_in,
1679 #             m_out => m_out,
1680 #             wishbone_out => wb_bram_in,
1681 #             wishbone_in => wb_bram_out
1682 #             );
1683 #
1684 #     -- BRAM Memory slave
1685 #     bram0: entity work.wishbone_bram_wrapper
1686 #         generic map(
1687 #             MEMORY_SIZE   => 1024,
1688 #             RAM_INIT_FILE => "icache_test.bin"
1689 #             )
1690 #         port map(
1691 #             clk => clk,
1692 #             rst => rst,
1693 #             wishbone_in => wb_bram_in,
1694 #             wishbone_out => wb_bram_out
1695 #             );
1696 #
1697 #     clk_process: process
1698 #     begin
1699 #         clk <= '0';
1700 #         wait for clk_period/2;
1701 #         clk <= '1';
1702 #         wait for clk_period/2;
1703 #     end process;
1704 #
1705 #     rst_process: process
1706 #     begin
1707 #         rst <= '1';
1708 #         wait for 2*clk_period;
1709 #         rst <= '0';
1710 #         wait;
1711 #     end process;
1712 #
1713 #     stim: process
1714 #     begin
1715 #     -- Clear stuff
1716 #     d_in.valid <= '0';
1717 #     d_in.load <= '0';
1718 #     d_in.nc <= '0';
1719 #     d_in.addr <= (others => '0');
1720 #     d_in.data <= (others => '0');
1721 #         m_in.valid <= '0';
1722 #         m_in.addr <= (others => '0');
1723 #         m_in.pte <= (others => '0');
1724 #
1725 #         wait for 4*clk_period;
1726 #     wait until rising_edge(clk);
1727 #
1728 #     -- Cacheable read of address 4
1729 #     d_in.load <= '1';
1730 #     d_in.nc <= '0';
1731 #         d_in.addr <= x"0000000000000004";
1732 #         d_in.valid <= '1';
1733 #     wait until rising_edge(clk);
1734 #         d_in.valid <= '0';
1735 #
1736 #     wait until rising_edge(clk) and d_out.valid = '1';
1737 #         assert d_out.data = x"0000000100000000"
1738 #         report "data @" & to_hstring(d_in.addr) &
1739 #         "=" & to_hstring(d_out.data) &
1740 #         " expected 0000000100000000"
1741 #         severity failure;
1742 # --      wait for clk_period;
1743 #
1744 #     -- Cacheable read of address 30
1745 #     d_in.load <= '1';
1746 #     d_in.nc <= '0';
1747 #         d_in.addr <= x"0000000000000030";
1748 #         d_in.valid <= '1';
1749 #     wait until rising_edge(clk);
1750 #         d_in.valid <= '0';
1751 #
1752 #     wait until rising_edge(clk) and d_out.valid = '1';
1753 #         assert d_out.data = x"0000000D0000000C"
1754 #         report "data @" & to_hstring(d_in.addr) &
1755 #         "=" & to_hstring(d_out.data) &
1756 #         " expected 0000000D0000000C"
1757 #         severity failure;
1758 #
1759 #     -- Non-cacheable read of address 100
1760 #     d_in.load <= '1';
1761 #     d_in.nc <= '1';
1762 #         d_in.addr <= x"0000000000000100";
1763 #         d_in.valid <= '1';
1764 #     wait until rising_edge(clk);
1765 #     d_in.valid <= '0';
1766 #     wait until rising_edge(clk) and d_out.valid = '1';
1767 #         assert d_out.data = x"0000004100000040"
1768 #         report "data @" & to_hstring(d_in.addr) &
1769 #         "=" & to_hstring(d_out.data) &
1770 #         " expected 0000004100000040"
1771 #         severity failure;
1772 #
1773 #     wait until rising_edge(clk);
1774 #     wait until rising_edge(clk);
1775 #     wait until rising_edge(clk);
1776 #     wait until rising_edge(clk);
1777 #
1778 #     std.env.finish;
1779 #     end process;
1780 # end;
1781 def dcache_sim(dut):
1782     # clear stuff
1783     yield dut.d_in.valid.eq(0)
1784     yield dut.d_in.load.eq(0)
1785     yield dut.d_in.nc.eq(0)
1786     yield dut.d_in.adrr.eq(0)
1787     yield dut.d_in.data.eq(0)
1788     yield dut.m_in.valid.eq(0)
1789     yield dut.m_in.addr.eq(0)
1790     yield dut.m_in.pte.eq(0)
1791     # wait 4 * clk_period
1792     yield
1793     yield
1794     yield
1795     yield
1796     # wait_until rising_edge(clk)
1797     yield
1798     # Cacheable read of address 4
1799     yield dut.d_in.load.eq(1)
1800     yield dut.d_in.nc.eq(0)
1801     yield dut.d_in.addr.eq(Const(0x0000000000000004, 64))
1802     yield dut.d_in.valid.eq(1)
1803     # wait-until rising_edge(clk)
1804     yield
1805     yield dut.d_in.valid.eq(0)
1806     yield
1807     while not (yield dut.d_out.valid):
1808         yield
1809     assert dut.d_out.data == 0x0000000100000000, \
1810         f"data @ {dut.d_in.addr}={dut.d_in.data} expected 0000000100000000"
1811
1812
1813     # Cacheable read of address 30
1814     yield dut.d_in.load.eq(1)
1815     yield dut.d_in.nc.eq(0)
1816     yield dut.d_in.addr.eq(Const(0x0000000000000030, 64))
1817     yield dut.d_in.valid.eq(1)
1818     yield
1819     yield dut.d_in.valid.eq(0)
1820     yield
1821     while not (yield dut.d_out.valid):
1822         yield
1823     assert dut.d_out.data == 0x0000000D0000000C, \
1824         f"data @{dut.d_in.addr}={dut.d_out.data} expected 0000000D0000000C"
1825
1826     # Non-cacheable read of address 100
1827     yield dut.d_in.load.eq(1)
1828     yield dut.d_in.nc.eq(1)
1829     yield dut.d_in.addr.eq(Const(0x0000000000000100, 64))
1830     yield dut.d_in.valid.eq(1)
1831     yield
1832     yield dut.d_in.valid.eq(0)
1833     yield
1834     while not (yield dut.d_out.valid):
1835         yield
1836     assert dut.d_out.data == 0x0000004100000040, \
1837         f"data @ {dut.d_in.addr}={dut.d_out.data} expected 0000004100000040"
1838
1839     yield
1840     yield
1841     yield
1842     yield
1843
1844
1845 def test_dcache():
1846     dut = DCache()
1847     vl = rtlil.convert(dut, ports=[])
1848     with open("test_dcache.il", "w") as f:
1849         f.write(vl)
1850
1851     #run_simulation(dut, dcache_sim(), vcd_name='test_dcache.vcd')
1852
1853 if __name__ == '__main__':
1854     test_dcache()
1855