src/soc/experiment/dcache.py

   1 """DCache
   2
   3 based on Anton Blanchard microwatt dcache.vhdl
   4
   5 note that the microwatt dcache wishbone interface expects "stall".
   6 for simplicity at the moment this is hard-coded to cyc & ~ack.
   7 see WB4 spec, p84, section 5.2.1
   8
   9 IMPORTANT: for store, the data is sampled the cycle AFTER the "valid"
  10 is raised.  sigh
  11
  12 Links:
  13
  14 * https://libre-soc.org/3d_gpu/architecture/set_associative_cache.jpg
  15 * https://bugs.libre-soc.org/show_bug.cgi?id=469
  16
  17 """
  18
  19 import sys
  20
  21 from nmutil.gtkw import write_gtkw
  22
  23 sys.setrecursionlimit(1000000)
  24
  25 from enum import Enum, unique
  26
  27 from nmigen import (Module, Signal, Elaboratable, Cat, Repl, Array, Const,
  28                     Record)
  29 from nmutil.util import Display
  30
  31 from copy import deepcopy
  32 from random import randint, seed
  33
  34 from nmigen_soc.wishbone.bus import Interface
  35
  36 from nmigen.cli import main
  37 from nmutil.iocontrol import RecordObject
  38 from nmigen.utils import log2_int
  39 from soc.experiment.mem_types import (LoadStore1ToDCacheType,
  40                                      DCacheToLoadStore1Type,
  41                                      MMUToDCacheType,
  42                                      DCacheToMMUType)
  43
  44 from soc.experiment.wb_types import (WB_ADDR_BITS, WB_DATA_BITS, WB_SEL_BITS,
  45                                 WBAddrType, WBDataType, WBSelType,
  46                                 WBMasterOut, WBSlaveOut,
  47                                 WBMasterOutVector, WBSlaveOutVector,
  48                                 WBIOMasterOut, WBIOSlaveOut)
  49
  50 from soc.experiment.cache_ram import CacheRam
  51 #from soc.experiment.plru import PLRU
  52 from nmutil.plru import PLRU
  53
  54 # for test
  55 from soc.bus.sram import SRAM
  56 from nmigen import Memory
  57 from nmigen.cli import rtlil
  58
  59 # NOTE: to use cxxsim, export NMIGEN_SIM_MODE=cxxsim from the shell
  60 # Also, check out the cxxsim nmigen branch, and latest yosys from git
  61 from nmutil.sim_tmp_alternative import Simulator
  62
  63 from nmutil.util import wrap
  64
  65
  66 # TODO: make these parameters of DCache at some point
  67 LINE_SIZE = 64    # Line size in bytes
  68 NUM_LINES = 16    # Number of lines in a set
  69 NUM_WAYS = 4      # Number of ways
  70 TLB_SET_SIZE = 64 # L1 DTLB entries per set
  71 TLB_NUM_WAYS = 2  # L1 DTLB number of sets
  72 TLB_LG_PGSZ = 12  # L1 DTLB log_2(page_size)
  73 LOG_LENGTH = 0    # Non-zero to enable log data collection
  74
  75 # BRAM organisation: We never access more than
  76 #     -- WB_DATA_BITS at a time so to save
  77 #     -- resources we make the array only that wide, and
  78 #     -- use consecutive indices to make a cache "line"
  79 #     --
  80 #     -- ROW_SIZE is the width in bytes of the BRAM
  81 #     -- (based on WB, so 64-bits)
  82 ROW_SIZE = WB_DATA_BITS // 8;
  83
  84 # ROW_PER_LINE is the number of row (wishbone
  85 # transactions) in a line
  86 ROW_PER_LINE = LINE_SIZE // ROW_SIZE
  87
  88 # BRAM_ROWS is the number of rows in BRAM needed
  89 # to represent the full dcache
  90 BRAM_ROWS = NUM_LINES * ROW_PER_LINE
  91
  92 print ("ROW_SIZE", ROW_SIZE)
  93 print ("ROW_PER_LINE", ROW_PER_LINE)
  94 print ("BRAM_ROWS", BRAM_ROWS)
  95 print ("NUM_WAYS", NUM_WAYS)
  96
  97 # Bit fields counts in the address
  98
  99 # REAL_ADDR_BITS is the number of real address
 100 # bits that we store
 101 REAL_ADDR_BITS = 56
 102
 103 # ROW_BITS is the number of bits to select a row
 104 ROW_BITS = log2_int(BRAM_ROWS)
 105
 106 # ROW_LINE_BITS is the number of bits to select
 107 # a row within a line
 108 ROW_LINE_BITS = log2_int(ROW_PER_LINE)
 109
 110 # LINE_OFF_BITS is the number of bits for
 111 # the offset in a cache line
 112 LINE_OFF_BITS = log2_int(LINE_SIZE)
 113
 114 # ROW_OFF_BITS is the number of bits for
 115 # the offset in a row
 116 ROW_OFF_BITS = log2_int(ROW_SIZE)
 117
 118 # INDEX_BITS is the number if bits to
 119 # select a cache line
 120 INDEX_BITS = log2_int(NUM_LINES)
 121
 122 # SET_SIZE_BITS is the log base 2 of the set size
 123 SET_SIZE_BITS = LINE_OFF_BITS + INDEX_BITS
 124
 125 # TAG_BITS is the number of bits of
 126 # the tag part of the address
 127 TAG_BITS = REAL_ADDR_BITS - SET_SIZE_BITS
 128
 129 # TAG_WIDTH is the width in bits of each way of the tag RAM
 130 TAG_WIDTH = TAG_BITS + 7 - ((TAG_BITS + 7) % 8)
 131
 132 # WAY_BITS is the number of bits to select a way
 133 WAY_BITS = log2_int(NUM_WAYS)
 134
 135 # Example of layout for 32 lines of 64 bytes:
 136 layout = """\
 137   ..  tag    |index|  line  |
 138   ..         |   row   |    |
 139   ..         |     |---|    | ROW_LINE_BITS  (3)
 140   ..         |     |--- - --| LINE_OFF_BITS (6)
 141   ..         |         |- --| ROW_OFF_BITS  (3)
 142   ..         |----- ---|    | ROW_BITS      (8)
 143   ..         |-----|        | INDEX_BITS    (5)
 144   .. --------|              | TAG_BITS      (45)
 145 """
 146 print (layout)
 147 print ("Dcache TAG %d IDX %d ROW_BITS %d ROFF %d LOFF %d RLB %d" % \
 148             (TAG_BITS, INDEX_BITS, ROW_BITS,
 149              ROW_OFF_BITS, LINE_OFF_BITS, ROW_LINE_BITS))
 150 print ("index @: %d-%d" % (LINE_OFF_BITS, SET_SIZE_BITS))
 151 print ("row @: %d-%d" % (LINE_OFF_BITS, ROW_OFF_BITS))
 152 print ("tag @: %d-%d width %d" % (SET_SIZE_BITS, REAL_ADDR_BITS, TAG_WIDTH))
 153
 154 TAG_RAM_WIDTH = TAG_WIDTH * NUM_WAYS
 155
 156 print ("TAG_RAM_WIDTH", TAG_RAM_WIDTH)
 157
 158 def CacheTagArray():
 159     tag_layout = [('valid', 1),
 160                   ('tag', TAG_RAM_WIDTH),
 161                  ]
 162     return Array(Record(tag_layout, name="tag%d" % x) for x in range(NUM_LINES))
 163
 164 def RowPerLineValidArray():
 165     return Array(Signal(name="rows_valid%d" % x) \
 166                         for x in range(ROW_PER_LINE))
 167
 168 # L1 TLB
 169 TLB_SET_BITS     = log2_int(TLB_SET_SIZE)
 170 TLB_WAY_BITS     = log2_int(TLB_NUM_WAYS)
 171 TLB_EA_TAG_BITS  = 64 - (TLB_LG_PGSZ + TLB_SET_BITS)
 172 TLB_TAG_WAY_BITS = TLB_NUM_WAYS * TLB_EA_TAG_BITS
 173 TLB_PTE_BITS     = 64
 174 TLB_PTE_WAY_BITS = TLB_NUM_WAYS * TLB_PTE_BITS;
 175
 176 def ispow2(x):
 177     return (1<<log2_int(x, False)) == x
 178
 179 assert (LINE_SIZE % ROW_SIZE) == 0, "LINE_SIZE not multiple of ROW_SIZE"
 180 assert ispow2(LINE_SIZE), "LINE_SIZE not power of 2"
 181 assert ispow2(NUM_LINES), "NUM_LINES not power of 2"
 182 assert ispow2(ROW_PER_LINE), "ROW_PER_LINE not power of 2"
 183 assert ROW_BITS == (INDEX_BITS + ROW_LINE_BITS), "geometry bits don't add up"
 184 assert (LINE_OFF_BITS == ROW_OFF_BITS + ROW_LINE_BITS), \
 185         "geometry bits don't add up"
 186 assert REAL_ADDR_BITS == (TAG_BITS + INDEX_BITS + LINE_OFF_BITS), \
 187         "geometry bits don't add up"
 188 assert REAL_ADDR_BITS == (TAG_BITS + ROW_BITS + ROW_OFF_BITS), \
 189          "geometry bits don't add up"
 190 assert 64 == WB_DATA_BITS, "Can't yet handle wb width that isn't 64-bits"
 191 assert SET_SIZE_BITS <= TLB_LG_PGSZ, "Set indexed by virtual address"
 192
 193
 194 def TLBValidBitsArray():
 195     return Array(Signal(TLB_NUM_WAYS, name="tlbvalid%d" % x) \
 196                 for x in range(TLB_SET_SIZE))
 197
 198 def TLBTagEAArray():
 199     return Array(Signal(TLB_EA_TAG_BITS, name="tlbtagea%d" % x) \
 200                 for x in range (TLB_NUM_WAYS))
 201
 202 def TLBTagsArray():
 203     return Array(Signal(TLB_TAG_WAY_BITS, name="tlbtags%d" % x) \
 204                 for x in range (TLB_SET_SIZE))
 205
 206 def TLBPtesArray():
 207     return Array(Signal(TLB_PTE_WAY_BITS, name="tlbptes%d" % x) \
 208                 for x in range(TLB_SET_SIZE))
 209
 210 def HitWaySet():
 211     return Array(Signal(WAY_BITS, name="hitway_%d" % x) \
 212                         for x in range(TLB_NUM_WAYS))
 213
 214 # Cache RAM interface
 215 def CacheRamOut():
 216     return Array(Signal(WB_DATA_BITS, name="cache_out%d" % x) \
 217                  for x in range(NUM_WAYS))
 218
 219 # PLRU output interface
 220 def PLRUOut():
 221     return Array(Signal(WAY_BITS, name="plru_out%d" % x) \
 222                 for x in range(NUM_LINES))
 223
 224 # TLB PLRU output interface
 225 def TLBPLRUOut():
 226     return Array(Signal(TLB_WAY_BITS, name="tlbplru_out%d" % x) \
 227                 for x in range(TLB_SET_SIZE))
 228
 229 # Helper functions to decode incoming requests
 230 #
 231 # Return the cache line index (tag index) for an address
 232 def get_index(addr):
 233     return addr[LINE_OFF_BITS:SET_SIZE_BITS]
 234
 235 # Return the cache row index (data memory) for an address
 236 def get_row(addr):
 237     return addr[ROW_OFF_BITS:SET_SIZE_BITS]
 238
 239 # Return the index of a row within a line
 240 def get_row_of_line(row):
 241     return row[:ROW_BITS][:ROW_LINE_BITS]
 242
 243 # Returns whether this is the last row of a line
 244 def is_last_row_addr(addr, last):
 245     return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
 246
 247 # Returns whether this is the last row of a line
 248 def is_last_row(row, last):
 249     return get_row_of_line(row) == last
 250
 251 # Return the next row in the current cache line. We use a
 252 # dedicated function in order to limit the size of the
 253 # generated adder to be only the bits within a cache line
 254 # (3 bits with default settings)
 255 def next_row(row):
 256     row_v = row[0:ROW_LINE_BITS] + 1
 257     return Cat(row_v[:ROW_LINE_BITS], row[ROW_LINE_BITS:])
 258
 259 # Get the tag value from the address
 260 def get_tag(addr):
 261     return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
 262
 263 # Read a tag from a tag memory row
 264 def read_tag(way, tagset):
 265     return tagset.word_select(way, TAG_WIDTH)[:TAG_BITS]
 266
 267 # Read a TLB tag from a TLB tag memory row
 268 def read_tlb_tag(way, tags):
 269     return tags.word_select(way, TLB_EA_TAG_BITS)
 270
 271 # Write a TLB tag to a TLB tag memory row
 272 def write_tlb_tag(way, tags, tag):
 273     return read_tlb_tag(way, tags).eq(tag)
 274
 275 # Read a PTE from a TLB PTE memory row
 276 def read_tlb_pte(way, ptes):
 277     return ptes.word_select(way, TLB_PTE_BITS)
 278
 279 def write_tlb_pte(way, ptes, newpte):
 280     return read_tlb_pte(way, ptes).eq(newpte)
 281
 282
 283 # Record for storing permission, attribute, etc. bits from a PTE
 284 class PermAttr(RecordObject):
 285     def __init__(self, name=None):
 286         super().__init__(name=name)
 287         self.reference = Signal()
 288         self.changed   = Signal()
 289         self.nocache   = Signal()
 290         self.priv      = Signal()
 291         self.rd_perm   = Signal()
 292         self.wr_perm   = Signal()
 293
 294
 295 def extract_perm_attr(pte):
 296     pa = PermAttr()
 297     return pa;
 298
 299
 300 # Type of operation on a "valid" input
 301 @unique
 302 class Op(Enum):
 303     OP_NONE       = 0
 304     OP_BAD        = 1 # NC cache hit, TLB miss, prot/RC failure
 305     OP_STCX_FAIL  = 2 # conditional store w/o reservation
 306     OP_LOAD_HIT   = 3 # Cache hit on load
 307     OP_LOAD_MISS  = 4 # Load missing cache
 308     OP_LOAD_NC    = 5 # Non-cachable load
 309     OP_STORE_HIT  = 6 # Store hitting cache
 310     OP_STORE_MISS = 7 # Store missing cache
 311
 312
 313 # Cache state machine
 314 @unique
 315 class State(Enum):
 316     IDLE             = 0 # Normal load hit processing
 317     RELOAD_WAIT_ACK  = 1 # Cache reload wait ack
 318     STORE_WAIT_ACK   = 2 # Store wait ack
 319     NC_LOAD_WAIT_ACK = 3 # Non-cachable load wait ack
 320
 321
 322 # Dcache operations:
 323 #
 324 # In order to make timing, we use the BRAMs with
 325 # an output buffer, which means that the BRAM
 326 # output is delayed by an extra cycle.
 327 #
 328 # Thus, the dcache has a 2-stage internal pipeline
 329 # for cache hits with no stalls.
 330 #
 331 # All other operations are handled via stalling
 332 # in the first stage.
 333 #
 334 # The second stage can thus complete a hit at the same
 335 # time as the first stage emits a stall for a complex op.
 336 #
 337 # Stage 0 register, basically contains just the latched request
 338
 339 class RegStage0(RecordObject):
 340     def __init__(self, name=None):
 341         super().__init__(name=name)
 342         self.req     = LoadStore1ToDCacheType(name="lsmem")
 343         self.tlbie   = Signal() # indicates a tlbie request (from MMU)
 344         self.doall   = Signal() # with tlbie, indicates flush whole TLB
 345         self.tlbld   = Signal() # indicates a TLB load request (from MMU)
 346         self.mmu_req = Signal() # indicates source of request
 347         self.d_valid = Signal() # indicates req.data is valid now
 348
 349
 350 class MemAccessRequest(RecordObject):
 351     def __init__(self, name=None):
 352         super().__init__(name=name)
 353         self.op        = Signal(Op)
 354         self.valid     = Signal()
 355         self.dcbz      = Signal()
 356         self.real_addr = Signal(REAL_ADDR_BITS)
 357         self.data      = Signal(64)
 358         self.byte_sel  = Signal(8)
 359         self.hit_way   = Signal(WAY_BITS)
 360         self.same_tag  = Signal()
 361         self.mmu_req   = Signal()
 362
 363
 364 # First stage register, contains state for stage 1 of load hits
 365 # and for the state machine used by all other operations
 366 class RegStage1(RecordObject):
 367     def __init__(self, name=None):
 368         super().__init__(name=name)
 369         # Info about the request
 370         self.full             = Signal() # have uncompleted request
 371         self.mmu_req          = Signal() # request is from MMU
 372         self.req              = MemAccessRequest(name="reqmem")
 373
 374         # Cache hit state
 375         self.hit_way          = Signal(WAY_BITS)
 376         self.hit_load_valid   = Signal()
 377         self.hit_index        = Signal(INDEX_BITS)
 378         self.cache_hit        = Signal()
 379
 380         # TLB hit state
 381         self.tlb_hit          = Signal()
 382         self.tlb_hit_way      = Signal(TLB_NUM_WAYS)
 383         self.tlb_hit_index    = Signal(TLB_WAY_BITS)
 384
 385         # 2-stage data buffer for data forwarded from writes to reads
 386         self.forward_data1    = Signal(64)
 387         self.forward_data2    = Signal(64)
 388         self.forward_sel1     = Signal(8)
 389         self.forward_valid1   = Signal()
 390         self.forward_way1     = Signal(WAY_BITS)
 391         self.forward_row1     = Signal(ROW_BITS)
 392         self.use_forward1     = Signal()
 393         self.forward_sel      = Signal(8)
 394
 395         # Cache miss state (reload state machine)
 396         self.state            = Signal(State)
 397         self.dcbz             = Signal()
 398         self.write_bram       = Signal()
 399         self.write_tag        = Signal()
 400         self.slow_valid       = Signal()
 401         self.wb               = WBMasterOut("wb")
 402         self.reload_tag       = Signal(TAG_BITS)
 403         self.store_way        = Signal(WAY_BITS)
 404         self.store_row        = Signal(ROW_BITS)
 405         self.store_index      = Signal(INDEX_BITS)
 406         self.end_row_ix       = Signal(ROW_LINE_BITS)
 407         self.rows_valid       = RowPerLineValidArray()
 408         self.acks_pending     = Signal(3)
 409         self.inc_acks         = Signal()
 410         self.dec_acks         = Signal()
 411
 412         # Signals to complete (possibly with error)
 413         self.ls_valid         = Signal()
 414         self.ls_error         = Signal()
 415         self.mmu_done         = Signal()
 416         self.mmu_error        = Signal()
 417         self.cache_paradox    = Signal()
 418
 419         # Signal to complete a failed stcx.
 420         self.stcx_fail        = Signal()
 421
 422
 423 # Reservation information
 424 class Reservation(RecordObject):
 425     def __init__(self):
 426         super().__init__()
 427         self.valid = Signal()
 428         self.addr  = Signal(64-LINE_OFF_BITS)
 429
 430
 431 class DTLBUpdate(Elaboratable):
 432     def __init__(self):
 433         self.tlbie    = Signal()
 434         self.tlbwe    = Signal()
 435         self.doall    = Signal()
 436         self.updated  = Signal()
 437         self.v_updated  = Signal()
 438         self.tlb_hit    = Signal()
 439         self.tlb_req_index = Signal(TLB_SET_BITS)
 440
 441         self.tlb_hit_way     = Signal(TLB_WAY_BITS)
 442         self.tlb_tag_way     = Signal(TLB_TAG_WAY_BITS)
 443         self.tlb_pte_way     = Signal(TLB_PTE_WAY_BITS)
 444         self.repl_way        = Signal(TLB_WAY_BITS)
 445         self.eatag           = Signal(TLB_EA_TAG_BITS)
 446         self.pte_data        = Signal(TLB_PTE_BITS)
 447
 448         self.dv = Signal(TLB_NUM_WAYS) # tlb_way_valids_t
 449
 450         self.tb_out = Signal(TLB_TAG_WAY_BITS) # tlb_way_tags_t
 451         self.db_out = Signal(TLB_NUM_WAYS)     # tlb_way_valids_t
 452         self.pb_out = Signal(TLB_PTE_WAY_BITS) # tlb_way_ptes_t
 453
 454     def elaborate(self, platform):
 455         m = Module()
 456         comb = m.d.comb
 457         sync = m.d.sync
 458
 459         tagset   = Signal(TLB_TAG_WAY_BITS)
 460         pteset   = Signal(TLB_PTE_WAY_BITS)
 461
 462         tb_out, pb_out, db_out = self.tb_out, self.pb_out, self.db_out
 463         comb += db_out.eq(self.dv)
 464
 465         with m.If(self.tlbie & self.doall):
 466             pass # clear all back in parent
 467         with m.Elif(self.tlbie):
 468             with m.If(self.tlb_hit):
 469                 comb += db_out.bit_select(self.tlb_hit_way, 1).eq(0)
 470                 comb += self.v_updated.eq(1)
 471
 472         with m.Elif(self.tlbwe):
 473
 474             comb += tagset.eq(self.tlb_tag_way)
 475             comb += write_tlb_tag(self.repl_way, tagset, self.eatag)
 476             comb += tb_out.eq(tagset)
 477
 478             comb += pteset.eq(self.tlb_pte_way)
 479             comb += write_tlb_pte(self.repl_way, pteset, self.pte_data)
 480             comb += pb_out.eq(pteset)
 481
 482             comb += db_out.bit_select(self.repl_way, 1).eq(1)
 483
 484             comb += self.updated.eq(1)
 485             comb += self.v_updated.eq(1)
 486
 487         return m
 488
 489
 490 class DCachePendingHit(Elaboratable):
 491
 492     def __init__(self, tlb_pte_way, tlb_valid_way, tlb_hit_way,
 493                       cache_i_validdx, cache_tag_set,
 494                     req_addr,
 495                     hit_set):
 496
 497         self.go          = Signal()
 498         self.virt_mode   = Signal()
 499         self.is_hit      = Signal()
 500         self.tlb_hit     = Signal()
 501         self.hit_way     = Signal(WAY_BITS)
 502         self.rel_match   = Signal()
 503         self.req_index   = Signal(INDEX_BITS)
 504         self.reload_tag  = Signal(TAG_BITS)
 505
 506         self.tlb_hit_way = tlb_hit_way
 507         self.tlb_pte_way = tlb_pte_way
 508         self.tlb_valid_way = tlb_valid_way
 509         self.cache_i_validdx = cache_i_validdx
 510         self.cache_tag_set = cache_tag_set
 511         self.req_addr = req_addr
 512         self.hit_set = hit_set
 513
 514     def elaborate(self, platform):
 515         m = Module()
 516         comb = m.d.comb
 517         sync = m.d.sync
 518
 519         go = self.go
 520         virt_mode = self.virt_mode
 521         is_hit = self.is_hit
 522         tlb_pte_way = self.tlb_pte_way
 523         tlb_valid_way = self.tlb_valid_way
 524         cache_i_validdx = self.cache_i_validdx
 525         cache_tag_set = self.cache_tag_set
 526         req_addr = self.req_addr
 527         tlb_hit_way = self.tlb_hit_way
 528         tlb_hit = self.tlb_hit
 529         hit_set = self.hit_set
 530         hit_way = self.hit_way
 531         rel_match = self.rel_match
 532         req_index = self.req_index
 533         reload_tag = self.reload_tag
 534
 535         rel_matches = Array(Signal(name="rel_matches_%d" % i) \
 536                                     for i in range(TLB_NUM_WAYS))
 537         hit_way_set = HitWaySet()
 538
 539         # Test if pending request is a hit on any way
 540         # In order to make timing in virtual mode,
 541         # when we are using the TLB, we compare each
 542         # way with each of the real addresses from each way of
 543         # the TLB, and then decide later which match to use.
 544
 545         with m.If(virt_mode):
 546             for j in range(TLB_NUM_WAYS): # tlb_num_way_t
 547                 s_tag       = Signal(TAG_BITS, name="s_tag%d" % j)
 548                 s_hit       = Signal()
 549                 s_pte       = Signal(TLB_PTE_BITS)
 550                 s_ra        = Signal(REAL_ADDR_BITS)
 551                 comb += s_pte.eq(read_tlb_pte(j, tlb_pte_way))
 552                 comb += s_ra.eq(Cat(req_addr[0:TLB_LG_PGSZ],
 553                                     s_pte[TLB_LG_PGSZ:REAL_ADDR_BITS]))
 554                 comb += s_tag.eq(get_tag(s_ra))
 555
 556                 for i in range(NUM_WAYS): # way_t
 557                     is_tag_hit = Signal(name="is_tag_hit_%d_%d" % (j, i))
 558                     comb += is_tag_hit.eq(go & cache_i_validdx[i] &
 559                                   (read_tag(i, cache_tag_set) == s_tag)
 560                                   & tlb_valid_way[j])
 561                     with m.If(is_tag_hit):
 562                         comb += hit_way_set[j].eq(i)
 563                         comb += s_hit.eq(1)
 564                 comb += hit_set[j].eq(s_hit)
 565                 with m.If(s_tag == reload_tag):
 566                     comb += rel_matches[j].eq(1)
 567             with m.If(tlb_hit):
 568                 comb += is_hit.eq(hit_set[tlb_hit_way])
 569                 comb += hit_way.eq(hit_way_set[tlb_hit_way])
 570                 comb += rel_match.eq(rel_matches[tlb_hit_way])
 571         with m.Else():
 572             s_tag       = Signal(TAG_BITS)
 573             comb += s_tag.eq(get_tag(req_addr))
 574             for i in range(NUM_WAYS): # way_t
 575                 is_tag_hit = Signal(name="is_tag_hit_%d" % i)
 576                 comb += is_tag_hit.eq(go & cache_i_validdx[i] &
 577                           (read_tag(i, cache_tag_set) == s_tag))
 578                 with m.If(is_tag_hit):
 579                     comb += hit_way.eq(i)
 580                     comb += is_hit.eq(1)
 581             with m.If(s_tag == reload_tag):
 582                 comb += rel_match.eq(1)
 583
 584         return m
 585
 586
 587 class DCache(Elaboratable):
 588     """Set associative dcache write-through
 589
 590     TODO (in no specific order):
 591     * See list in icache.vhdl
 592     * Complete load misses on the cycle when WB data comes instead of
 593       at the end of line (this requires dealing with requests coming in
 594       while not idle...)
 595     """
 596     def __init__(self):
 597         self.d_in      = LoadStore1ToDCacheType("d_in")
 598         self.d_out     = DCacheToLoadStore1Type("d_out")
 599
 600         self.m_in      = MMUToDCacheType("m_in")
 601         self.m_out     = DCacheToMMUType("m_out")
 602
 603         self.stall_out = Signal()
 604
 605         # standard naming (wired to non-standard for compatibility)
 606         self.bus = Interface(addr_width=32,
 607                             data_width=64,
 608                             granularity=8,
 609                             features={'stall'},
 610                             alignment=0,
 611                             name="dcache")
 612
 613         self.log_out   = Signal(20)
 614
 615     def stage_0(self, m, r0, r1, r0_full):
 616         """Latch the request in r0.req as long as we're not stalling
 617         """
 618         comb = m.d.comb
 619         sync = m.d.sync
 620         d_in, d_out, m_in = self.d_in, self.d_out, self.m_in
 621
 622         r = RegStage0("stage0")
 623
 624         # TODO, this goes in unit tests and formal proofs
 625         with m.If(d_in.valid & m_in.valid):
 626             sync += Display("request collision loadstore vs MMU")
 627
 628         with m.If(m_in.valid):
 629             comb += r.req.valid.eq(1)
 630             comb += r.req.load.eq(~(m_in.tlbie | m_in.tlbld))# no invalidate
 631             comb += r.req.dcbz.eq(0)
 632             comb += r.req.nc.eq(0)
 633             comb += r.req.reserve.eq(0)
 634             comb += r.req.virt_mode.eq(0)
 635             comb += r.req.priv_mode.eq(1)
 636             comb += r.req.addr.eq(m_in.addr)
 637             comb += r.req.data.eq(m_in.pte)
 638             comb += r.req.byte_sel.eq(~0) # Const -1 sets all to 0b111....
 639             comb += r.tlbie.eq(m_in.tlbie)
 640             comb += r.doall.eq(m_in.doall)
 641             comb += r.tlbld.eq(m_in.tlbld)
 642             comb += r.mmu_req.eq(1)
 643             m.d.sync += Display("    DCACHE req mmu addr %x pte %x ld %d",
 644                                  m_in.addr, m_in.pte, r.req.load)
 645
 646         with m.Else():
 647             comb += r.req.eq(d_in)
 648             comb += r.req.data.eq(0)
 649             comb += r.tlbie.eq(0)
 650             comb += r.doall.eq(0)
 651             comb += r.tlbld.eq(0)
 652             comb += r.mmu_req.eq(0)
 653         with m.If((~r1.full & ~d_in.hold) | ~r0_full):
 654             sync += r0.eq(r)
 655             sync += r0_full.eq(r.req.valid)
 656             # Sample data the cycle after a request comes in from loadstore1.
 657             # If another request has come in already then the data will get
 658             # put directly into req.data below.
 659             with m.If(r0.req.valid & ~r.req.valid & ~r0.d_valid &
 660                      ~r0.mmu_req):
 661                 sync += r0.req.data.eq(d_in.data)
 662                 sync += r0.d_valid.eq(1)
 663         with m.If(d_in.valid):
 664             m.d.sync += Display("    DCACHE req cache "
 665                                 "virt %d addr %x data %x ld %d",
 666                                  r.req.virt_mode, r.req.addr,
 667                                  r.req.data, r.req.load)
 668
 669     def tlb_read(self, m, r0_stall, tlb_valid_way,
 670                  tlb_tag_way, tlb_pte_way, dtlb_valid_bits,
 671                  dtlb_tags, dtlb_ptes):
 672         """TLB
 673         Operates in the second cycle on the request latched in r0.req.
 674         TLB updates write the entry at the end of the second cycle.
 675         """
 676         comb = m.d.comb
 677         sync = m.d.sync
 678         m_in, d_in = self.m_in, self.d_in
 679
 680         index    = Signal(TLB_SET_BITS)
 681         addrbits = Signal(TLB_SET_BITS)
 682
 683         amin = TLB_LG_PGSZ
 684         amax = TLB_LG_PGSZ + TLB_SET_BITS
 685
 686         with m.If(m_in.valid):
 687             comb += addrbits.eq(m_in.addr[amin : amax])
 688         with m.Else():
 689             comb += addrbits.eq(d_in.addr[amin : amax])
 690         comb += index.eq(addrbits)
 691
 692         # If we have any op and the previous op isn't finished,
 693         # then keep the same output for next cycle.
 694         with m.If(~r0_stall):
 695             sync += tlb_valid_way.eq(dtlb_valid_bits[index])
 696             sync += tlb_tag_way.eq(dtlb_tags[index])
 697             sync += tlb_pte_way.eq(dtlb_ptes[index])
 698
 699     def maybe_tlb_plrus(self, m, r1, tlb_plru_victim):
 700         """Generate TLB PLRUs
 701         """
 702         comb = m.d.comb
 703         sync = m.d.sync
 704
 705         if TLB_NUM_WAYS == 0:
 706             return
 707         for i in range(TLB_SET_SIZE):
 708             # TLB PLRU interface
 709             tlb_plru        = PLRU(TLB_WAY_BITS)
 710             setattr(m.submodules, "maybe_plru_%d" % i, tlb_plru)
 711             tlb_plru_acc_en = Signal()
 712
 713             comb += tlb_plru_acc_en.eq(r1.tlb_hit & (r1.tlb_hit_index == i))
 714             comb += tlb_plru.acc_en.eq(tlb_plru_acc_en)
 715             comb += tlb_plru.acc_i.eq(r1.tlb_hit_way)
 716             comb += tlb_plru_victim[i].eq(tlb_plru.lru_o)
 717
 718     def tlb_search(self, m, tlb_req_index, r0, r0_valid,
 719                    tlb_valid_way, tlb_tag_way, tlb_hit_way,
 720                    tlb_pte_way, pte, tlb_hit, valid_ra, perm_attr, ra):
 721
 722         comb = m.d.comb
 723
 724         hitway = Signal(TLB_WAY_BITS)
 725         hit    = Signal()
 726         eatag  = Signal(TLB_EA_TAG_BITS)
 727
 728         TLB_LG_END = TLB_LG_PGSZ + TLB_SET_BITS
 729         comb += tlb_req_index.eq(r0.req.addr[TLB_LG_PGSZ : TLB_LG_END])
 730         comb += eatag.eq(r0.req.addr[TLB_LG_END : 64 ])
 731
 732         for i in range(TLB_NUM_WAYS):
 733             is_tag_hit = Signal(name="is_tag_hit%d" % i)
 734             tlb_tag = Signal(TLB_EA_TAG_BITS, name="tlb_tag%d" % i)
 735             comb += tlb_tag.eq(read_tlb_tag(i, tlb_tag_way))
 736             comb += is_tag_hit.eq(tlb_valid_way[i] & (tlb_tag == eatag))
 737             with m.If(is_tag_hit):
 738                 comb += hitway.eq(i)
 739                 comb += hit.eq(1)
 740
 741         comb += tlb_hit.eq(hit & r0_valid)
 742         comb += tlb_hit_way.eq(hitway)
 743
 744         with m.If(tlb_hit):
 745             comb += pte.eq(read_tlb_pte(hitway, tlb_pte_way))
 746         comb += valid_ra.eq(tlb_hit | ~r0.req.virt_mode)
 747
 748         with m.If(r0.req.virt_mode):
 749             comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
 750                               r0.req.addr[ROW_OFF_BITS:TLB_LG_PGSZ],
 751                               pte[TLB_LG_PGSZ:REAL_ADDR_BITS]))
 752             comb += perm_attr.reference.eq(pte[8])
 753             comb += perm_attr.changed.eq(pte[7])
 754             comb += perm_attr.nocache.eq(pte[5])
 755             comb += perm_attr.priv.eq(pte[3])
 756             comb += perm_attr.rd_perm.eq(pte[2])
 757             comb += perm_attr.wr_perm.eq(pte[1])
 758         with m.Else():
 759             comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
 760                               r0.req.addr[ROW_OFF_BITS:REAL_ADDR_BITS]))
 761             comb += perm_attr.reference.eq(1)
 762             comb += perm_attr.changed.eq(1)
 763             comb += perm_attr.nocache.eq(0)
 764             comb += perm_attr.priv.eq(1)
 765             comb += perm_attr.rd_perm.eq(1)
 766             comb += perm_attr.wr_perm.eq(1)
 767
 768         with m.If(valid_ra):
 769             m.d.sync += Display("DCACHE virt mode %d hit %d ra %x pte %x",
 770                                 r0.req.virt_mode, tlb_hit, ra, pte)
 771             m.d.sync += Display("       perm ref=%d", perm_attr.reference)
 772             m.d.sync += Display("       perm chg=%d", perm_attr.changed)
 773             m.d.sync += Display("       perm noc=%d", perm_attr.nocache)
 774             m.d.sync += Display("       perm prv=%d", perm_attr.priv)
 775             m.d.sync += Display("       perm rdp=%d", perm_attr.rd_perm)
 776             m.d.sync += Display("       perm wrp=%d", perm_attr.wr_perm)
 777
 778     def tlb_update(self, m, r0_valid, r0, dtlb_valid_bits, tlb_req_index,
 779                     tlb_hit_way, tlb_hit, tlb_plru_victim, tlb_tag_way,
 780                     dtlb_tags, tlb_pte_way, dtlb_ptes):
 781
 782         dtlb_valids = TLBValidBitsArray()
 783
 784         comb = m.d.comb
 785         sync = m.d.sync
 786
 787         tlbie    = Signal()
 788         tlbwe    = Signal()
 789
 790         comb += tlbie.eq(r0_valid & r0.tlbie)
 791         comb += tlbwe.eq(r0_valid & r0.tlbld)
 792
 793         m.submodules.tlb_update = d = DTLBUpdate()
 794         with m.If(tlbie & r0.doall):
 795             # clear all valid bits at once
 796             for i in range(TLB_SET_SIZE):
 797                 sync += dtlb_valid_bits[i].eq(0)
 798         with m.If(d.updated):
 799             sync += dtlb_tags[tlb_req_index].eq(d.tb_out)
 800             sync += dtlb_ptes[tlb_req_index].eq(d.pb_out)
 801         with m.If(d.v_updated):
 802             sync += dtlb_valid_bits[tlb_req_index].eq(d.db_out)
 803
 804         comb += d.dv.eq(dtlb_valid_bits[tlb_req_index])
 805
 806         comb += d.tlbie.eq(tlbie)
 807         comb += d.tlbwe.eq(tlbwe)
 808         comb += d.doall.eq(r0.doall)
 809         comb += d.tlb_hit.eq(tlb_hit)
 810         comb += d.tlb_hit_way.eq(tlb_hit_way)
 811         comb += d.tlb_tag_way.eq(tlb_tag_way)
 812         comb += d.tlb_pte_way.eq(tlb_pte_way)
 813         comb += d.tlb_req_index.eq(tlb_req_index)
 814
 815         with m.If(tlb_hit):
 816             comb += d.repl_way.eq(tlb_hit_way)
 817         with m.Else():
 818             comb += d.repl_way.eq(tlb_plru_victim[tlb_req_index])
 819         comb += d.eatag.eq(r0.req.addr[TLB_LG_PGSZ + TLB_SET_BITS:64])
 820         comb += d.pte_data.eq(r0.req.data)
 821
 822     def maybe_plrus(self, m, r1, plru_victim):
 823         """Generate PLRUs
 824         """
 825         comb = m.d.comb
 826         sync = m.d.sync
 827
 828         if TLB_NUM_WAYS == 0:
 829             return
 830
 831         for i in range(NUM_LINES):
 832             # PLRU interface
 833             plru        = PLRU(WAY_BITS)
 834             setattr(m.submodules, "plru%d" % i, plru)
 835             plru_acc_en = Signal()
 836
 837             comb += plru_acc_en.eq(r1.cache_hit & (r1.hit_index == i))
 838             comb += plru.acc_en.eq(plru_acc_en)
 839             comb += plru.acc_i.eq(r1.hit_way)
 840             comb += plru_victim[i].eq(plru.lru_o)
 841
 842     def cache_tag_read(self, m, r0_stall, req_index, cache_tag_set, cache_tags):
 843         """Cache tag RAM read port
 844         """
 845         comb = m.d.comb
 846         sync = m.d.sync
 847         m_in, d_in = self.m_in, self.d_in
 848
 849         index = Signal(INDEX_BITS)
 850
 851         with m.If(r0_stall):
 852             comb += index.eq(req_index)
 853         with m.Elif(m_in.valid):
 854             comb += index.eq(get_index(m_in.addr))
 855         with m.Else():
 856             comb += index.eq(get_index(d_in.addr))
 857         sync += cache_tag_set.eq(cache_tags[index].tag)
 858
 859     def dcache_request(self, m, r0, ra, req_index, req_row, req_tag,
 860                        r0_valid, r1, cache_tags, replace_way,
 861                        use_forward1_next, use_forward2_next,
 862                        req_hit_way, plru_victim, rc_ok, perm_attr,
 863                        valid_ra, perm_ok, access_ok, req_op, req_go,
 864                        tlb_pte_way,
 865                        tlb_hit, tlb_hit_way, tlb_valid_way, cache_tag_set,
 866                        cancel_store, req_same_tag, r0_stall, early_req_row):
 867         """Cache request parsing and hit detection
 868         """
 869
 870         comb = m.d.comb
 871         m_in, d_in = self.m_in, self.d_in
 872
 873         is_hit      = Signal()
 874         hit_way     = Signal(WAY_BITS)
 875         op          = Signal(Op)
 876         opsel       = Signal(3)
 877         go          = Signal()
 878         nc          = Signal()
 879         hit_set     = Array(Signal(name="hit_set_%d" % i) \
 880                                   for i in range(TLB_NUM_WAYS))
 881         cache_i_validdx = Signal(NUM_WAYS)
 882
 883         # Extract line, row and tag from request
 884         comb += req_index.eq(get_index(r0.req.addr))
 885         comb += req_row.eq(get_row(r0.req.addr))
 886         comb += req_tag.eq(get_tag(ra))
 887
 888         if False: # display on comb is a bit... busy.
 889             comb += Display("dcache_req addr:%x ra: %x idx: %x tag: %x row: %x",
 890                     r0.req.addr, ra, req_index, req_tag, req_row)
 891
 892         comb += go.eq(r0_valid & ~(r0.tlbie | r0.tlbld) & ~r1.ls_error)
 893         comb += cache_i_validdx.eq(cache_tags[req_index].valid)
 894
 895         m.submodules.dcache_pend = dc = DCachePendingHit(tlb_pte_way,
 896                                 tlb_valid_way, tlb_hit_way,
 897                                 cache_i_validdx, cache_tag_set,
 898                                 r0.req.addr,
 899                                 hit_set)
 900
 901         comb += dc.tlb_hit.eq(tlb_hit)
 902         comb += dc.reload_tag.eq(r1.reload_tag)
 903         comb += dc.virt_mode.eq(r0.req.virt_mode)
 904         comb += dc.go.eq(go)
 905         comb += dc.req_index.eq(req_index)
 906         comb += is_hit.eq(dc.is_hit)
 907         comb += hit_way.eq(dc.hit_way)
 908         comb += req_same_tag.eq(dc.rel_match)
 909
 910         # See if the request matches the line currently being reloaded
 911         with m.If((r1.state == State.RELOAD_WAIT_ACK) &
 912                   (req_index == r1.store_index) & req_same_tag):
 913             # For a store, consider this a hit even if the row isn't
 914             # valid since it will be by the time we perform the store.
 915             # For a load, check the appropriate row valid bit.
 916             rrow = Signal(ROW_LINE_BITS)
 917             comb += rrow.eq(req_row)
 918             valid = r1.rows_valid[rrow]
 919             comb += is_hit.eq((~r0.req.load) | valid)
 920             comb += hit_way.eq(replace_way)
 921
 922         # Whether to use forwarded data for a load or not
 923         with m.If((get_row(r1.req.real_addr) == req_row) &
 924                   (r1.req.hit_way == hit_way)):
 925             # Only need to consider r1.write_bram here, since if we
 926             # are writing refill data here, then we don't have a
 927             # cache hit this cycle on the line being refilled.
 928             # (There is the possibility that the load following the
 929             # load miss that started the refill could be to the old
 930             # contents of the victim line, since it is a couple of
 931             # cycles after the refill starts before we see the updated
 932             # cache tag. In that case we don't use the bypass.)
 933             comb += use_forward1_next.eq(r1.write_bram)
 934         with m.If((r1.forward_row1 == req_row) & (r1.forward_way1 == hit_way)):
 935             comb += use_forward2_next.eq(r1.forward_valid1)
 936
 937         # The way that matched on a hit
 938         comb += req_hit_way.eq(hit_way)
 939
 940         # The way to replace on a miss
 941         with m.If(r1.write_tag):
 942             comb += replace_way.eq(plru_victim[r1.store_index])
 943         with m.Else():
 944             comb += replace_way.eq(r1.store_way)
 945
 946         # work out whether we have permission for this access
 947         # NB we don't yet implement AMR, thus no KUAP
 948         comb += rc_ok.eq(perm_attr.reference
 949                          & (r0.req.load | perm_attr.changed))
 950         comb += perm_ok.eq((r0.req.priv_mode | (~perm_attr.priv)) &
 951                            (perm_attr.wr_perm |
 952                               (r0.req.load & perm_attr.rd_perm)))
 953         comb += access_ok.eq(valid_ra & perm_ok & rc_ok)
 954         # Combine the request and cache hit status to decide what
 955         # operation needs to be done
 956         comb += nc.eq(r0.req.nc | perm_attr.nocache)
 957         comb += op.eq(Op.OP_NONE)
 958         with m.If(go):
 959             with m.If(~access_ok):
 960                 m.d.sync += Display("DCACHE access fail valid_ra=%d p=%d rc=%d",
 961                                  valid_ra, perm_ok, rc_ok)
 962                 comb += op.eq(Op.OP_BAD)
 963             with m.Elif(cancel_store):
 964                 m.d.sync += Display("DCACHE cancel store")
 965                 comb += op.eq(Op.OP_STCX_FAIL)
 966             with m.Else():
 967                 m.d.sync += Display("DCACHE valid_ra=%d nc=%d ld=%d",
 968                                  valid_ra, nc, r0.req.load)
 969                 comb += opsel.eq(Cat(is_hit, nc, r0.req.load))
 970                 with m.Switch(opsel):
 971                     with m.Case(0b101): comb += op.eq(Op.OP_LOAD_HIT)
 972                     with m.Case(0b100): comb += op.eq(Op.OP_LOAD_MISS)
 973                     with m.Case(0b110): comb += op.eq(Op.OP_LOAD_NC)
 974                     with m.Case(0b001): comb += op.eq(Op.OP_STORE_HIT)
 975                     with m.Case(0b000): comb += op.eq(Op.OP_STORE_MISS)
 976                     with m.Case(0b010): comb += op.eq(Op.OP_STORE_MISS)
 977                     with m.Case(0b011): comb += op.eq(Op.OP_BAD)
 978                     with m.Case(0b111): comb += op.eq(Op.OP_BAD)
 979         comb += req_op.eq(op)
 980         comb += req_go.eq(go)
 981
 982         # Version of the row number that is valid one cycle earlier
 983         # in the cases where we need to read the cache data BRAM.
 984         # If we're stalling then we need to keep reading the last
 985         # row requested.
 986         with m.If(~r0_stall):
 987             with m.If(m_in.valid):
 988                 comb += early_req_row.eq(get_row(m_in.addr))
 989             with m.Else():
 990                 comb += early_req_row.eq(get_row(d_in.addr))
 991         with m.Else():
 992             comb += early_req_row.eq(req_row)
 993
 994     def reservation_comb(self, m, cancel_store, set_rsrv, clear_rsrv,
 995                          r0_valid, r0, reservation):
 996         """Handle load-with-reservation and store-conditional instructions
 997         """
 998         comb = m.d.comb
 999
1000         with m.If(r0_valid & r0.req.reserve):
1001             # XXX generate alignment interrupt if address
1002             # is not aligned XXX or if r0.req.nc = '1'
1003             with m.If(r0.req.load):
1004                 comb += set_rsrv.eq(r0.req.atomic_last) # load with reservation
1005             with m.Else():
1006                 comb += clear_rsrv.eq(r0.req.atomic_last) # store conditional
1007                 with m.If((~reservation.valid) |
1008                          (r0.req.addr[LINE_OFF_BITS:64] != reservation.addr)):
1009                     comb += cancel_store.eq(1)
1010
1011     def reservation_reg(self, m, r0_valid, access_ok, set_rsrv, clear_rsrv,
1012                         reservation, r0):
1013
1014         comb = m.d.comb
1015         sync = m.d.sync
1016
1017         with m.If(r0_valid & access_ok):
1018             with m.If(clear_rsrv):
1019                 sync += reservation.valid.eq(0)
1020             with m.Elif(set_rsrv):
1021                 sync += reservation.valid.eq(1)
1022                 sync += reservation.addr.eq(r0.req.addr[LINE_OFF_BITS:64])
1023
1024     def writeback_control(self, m, r1, cache_out_row):
1025         """Return data for loads & completion control logic
1026         """
1027         comb = m.d.comb
1028         sync = m.d.sync
1029         d_out, m_out = self.d_out, self.m_out
1030
1031         data_out = Signal(64)
1032         data_fwd = Signal(64)
1033
1034         # Use the bypass if are reading the row that was
1035         # written 1 or 2 cycles ago, including for the
1036         # slow_valid = 1 case (i.e. completing a load
1037         # miss or a non-cacheable load).
1038         with m.If(r1.use_forward1):
1039             comb += data_fwd.eq(r1.forward_data1)
1040         with m.Else():
1041             comb += data_fwd.eq(r1.forward_data2)
1042
1043         comb += data_out.eq(cache_out_row)
1044
1045         for i in range(8):
1046             with m.If(r1.forward_sel[i]):
1047                 dsel = data_fwd.word_select(i, 8)
1048                 comb += data_out.word_select(i, 8).eq(dsel)
1049
1050         comb += d_out.valid.eq(r1.ls_valid)
1051         comb += d_out.data.eq(data_out)
1052         comb += d_out.store_done.eq(~r1.stcx_fail)
1053         comb += d_out.error.eq(r1.ls_error)
1054         comb += d_out.cache_paradox.eq(r1.cache_paradox)
1055
1056         # Outputs to MMU
1057         comb += m_out.done.eq(r1.mmu_done)
1058         comb += m_out.err.eq(r1.mmu_error)
1059         comb += m_out.data.eq(data_out)
1060
1061         # We have a valid load or store hit or we just completed
1062         # a slow op such as a load miss, a NC load or a store
1063         #
1064         # Note: the load hit is delayed by one cycle. However it
1065         # can still not collide with r.slow_valid (well unless I
1066         # miscalculated) because slow_valid can only be set on a
1067         # subsequent request and not on its first cycle (the state
1068         # machine must have advanced), which makes slow_valid
1069         # at least 2 cycles from the previous hit_load_valid.
1070
1071         # Sanity: Only one of these must be set in any given cycle
1072
1073         if False: # TODO: need Display to get this to work
1074             assert (r1.slow_valid & r1.stcx_fail) != 1, \
1075             "unexpected slow_valid collision with stcx_fail"
1076
1077             assert ((r1.slow_valid | r1.stcx_fail) | r1.hit_load_valid) != 1, \
1078              "unexpected hit_load_delayed collision with slow_valid"
1079
1080         with m.If(~r1.mmu_req):
1081             # Request came from loadstore1...
1082             # Load hit case is the standard path
1083             with m.If(r1.hit_load_valid):
1084                 sync += Display("completing load hit data=%x", data_out)
1085
1086             # error cases complete without stalling
1087             with m.If(r1.ls_error):
1088                 with m.If(r1.dcbz):
1089                     sync += Display("completing dcbz with error")
1090                 with m.Else():
1091                     sync += Display("completing ld/st with error")
1092
1093             # Slow ops (load miss, NC, stores)
1094             with m.If(r1.slow_valid):
1095                 sync += Display("completing store or load miss adr=%x data=%x",
1096                                 r1.req.real_addr, data_out)
1097
1098         with m.Else():
1099             # Request came from MMU
1100             with m.If(r1.hit_load_valid):
1101                 sync += Display("completing load hit to MMU, data=%x",
1102                                 m_out.data)
1103             # error cases complete without stalling
1104             with m.If(r1.mmu_error):
1105                 sync += Display("combpleting MMU ld with error")
1106
1107             # Slow ops (i.e. load miss)
1108             with m.If(r1.slow_valid):
1109                 sync += Display("completing MMU load miss, adr=%x data=%x",
1110                                 r1.req.real_addr, m_out.data)
1111
1112     def rams(self, m, r1, early_req_row, cache_out_row, replace_way):
1113         """rams
1114         Generate a cache RAM for each way. This handles the normal
1115         reads, writes from reloads and the special store-hit update
1116         path as well.
1117
1118         Note: the BRAMs have an extra read buffer, meaning the output
1119         is pipelined an extra cycle. This differs from the
1120         icache. The writeback logic needs to take that into
1121         account by using 1-cycle delayed signals for load hits.
1122         """
1123         comb = m.d.comb
1124         bus = self.bus
1125
1126         for i in range(NUM_WAYS):
1127             do_read  = Signal(name="do_rd%d" % i)
1128             rd_addr  = Signal(ROW_BITS, name="rd_addr_%d" % i)
1129             do_write = Signal(name="do_wr%d" % i)
1130             wr_addr  = Signal(ROW_BITS, name="wr_addr_%d" % i)
1131             wr_data  = Signal(WB_DATA_BITS, name="din_%d" % i)
1132             wr_sel   = Signal(ROW_SIZE)
1133             wr_sel_m = Signal(ROW_SIZE)
1134             _d_out   = Signal(WB_DATA_BITS, name="dout_%d" % i) # cache_row_t
1135
1136             way = CacheRam(ROW_BITS, WB_DATA_BITS, ADD_BUF=True, ram_num=i)
1137             setattr(m.submodules, "cacheram_%d" % i, way)
1138
1139             comb += way.rd_en.eq(do_read)
1140             comb += way.rd_addr.eq(rd_addr)
1141             comb += _d_out.eq(way.rd_data_o)
1142             comb += way.wr_sel.eq(wr_sel_m)
1143             comb += way.wr_addr.eq(wr_addr)
1144             comb += way.wr_data.eq(wr_data)
1145
1146             # Cache hit reads
1147             comb += do_read.eq(1)
1148             comb += rd_addr.eq(early_req_row)
1149             with m.If(r1.hit_way == i):
1150                 comb += cache_out_row.eq(_d_out)
1151
1152             # Write mux:
1153             #
1154             # Defaults to wishbone read responses (cache refill)
1155             #
1156             # For timing, the mux on wr_data/sel/addr is not
1157             # dependent on anything other than the current state.
1158
1159             with m.If(r1.write_bram):
1160                 # Write store data to BRAM.  This happens one
1161                 # cycle after the store is in r0.
1162                 comb += wr_data.eq(r1.req.data)
1163                 comb += wr_sel.eq(r1.req.byte_sel)
1164                 comb += wr_addr.eq(get_row(r1.req.real_addr))
1165
1166                 with m.If(i == r1.req.hit_way):
1167                     comb += do_write.eq(1)
1168             with m.Else():
1169                 # Otherwise, we might be doing a reload or a DCBZ
1170                 with m.If(r1.dcbz):
1171                     comb += wr_data.eq(0)
1172                 with m.Else():
1173                     comb += wr_data.eq(bus.dat_r)
1174                 comb += wr_addr.eq(r1.store_row)
1175                 comb += wr_sel.eq(~0) # all 1s
1176
1177                 with m.If((r1.state == State.RELOAD_WAIT_ACK)
1178                           & bus.ack & (replace_way == i)):
1179                     comb += do_write.eq(1)
1180
1181             # Mask write selects with do_write since BRAM
1182             # doesn't have a global write-enable
1183             with m.If(do_write):
1184                 comb += wr_sel_m.eq(wr_sel)
1185
1186     # Cache hit synchronous machine for the easy case.
1187     # This handles load hits.
1188     # It also handles error cases (TLB miss, cache paradox)
1189     def dcache_fast_hit(self, m, req_op, r0_valid, r0, r1,
1190                         req_hit_way, req_index, req_tag, access_ok,
1191                         tlb_hit, tlb_hit_way, tlb_req_index):
1192
1193         comb = m.d.comb
1194         sync = m.d.sync
1195
1196         with m.If(req_op != Op.OP_NONE):
1197             sync += Display("op:%d addr:%x nc: %d idx: %x tag: %x way: %x",
1198                     req_op, r0.req.addr, r0.req.nc,
1199                     req_index, req_tag, req_hit_way)
1200
1201         with m.If(r0_valid):
1202             sync += r1.mmu_req.eq(r0.mmu_req)
1203
1204         # Fast path for load/store hits.
1205         # Set signals for the writeback controls.
1206         sync += r1.hit_way.eq(req_hit_way)
1207         sync += r1.hit_index.eq(req_index)
1208
1209         with m.If(req_op == Op.OP_LOAD_HIT):
1210             sync += r1.hit_load_valid.eq(1)
1211         with m.Else():
1212             sync += r1.hit_load_valid.eq(0)
1213
1214         with m.If((req_op == Op.OP_LOAD_HIT) | (req_op == Op.OP_STORE_HIT)):
1215             sync += r1.cache_hit.eq(1)
1216         with m.Else():
1217             sync += r1.cache_hit.eq(0)
1218
1219         with m.If(req_op == Op.OP_BAD):
1220             sync += Display("Signalling ld/st error "
1221                             "ls_error=%i mmu_error=%i cache_paradox=%i",
1222                             ~r0.mmu_req,r0.mmu_req,access_ok)
1223             sync += r1.ls_error.eq(~r0.mmu_req)
1224             sync += r1.mmu_error.eq(r0.mmu_req)
1225             sync += r1.cache_paradox.eq(access_ok)
1226
1227         with m.Else():
1228             sync += r1.ls_error.eq(0)
1229             sync += r1.mmu_error.eq(0)
1230             sync += r1.cache_paradox.eq(0)
1231
1232         with m.If(req_op == Op.OP_STCX_FAIL):
1233             sync += r1.stcx_fail.eq(1)
1234         with m.Else():
1235             sync += r1.stcx_fail.eq(0)
1236
1237         # Record TLB hit information for updating TLB PLRU
1238         sync += r1.tlb_hit.eq(tlb_hit)
1239         sync += r1.tlb_hit_way.eq(tlb_hit_way)
1240         sync += r1.tlb_hit_index.eq(tlb_req_index)
1241
1242     # Memory accesses are handled by this state machine:
1243     #
1244     #   * Cache load miss/reload (in conjunction with "rams")
1245     #   * Load hits for non-cachable forms
1246     #   * Stores (the collision case is handled in "rams")
1247     #
1248     # All wishbone requests generation is done here.
1249     # This machine operates at stage 1.
1250     def dcache_slow(self, m, r1, use_forward1_next, use_forward2_next,
1251                     r0, replace_way,
1252                     req_hit_way, req_same_tag,
1253                     r0_valid, req_op, cache_tags, req_go, ra):
1254
1255         comb = m.d.comb
1256         sync = m.d.sync
1257         bus = self.bus
1258         d_in = self.d_in
1259
1260         req         = MemAccessRequest("mreq_ds")
1261
1262         req_row = Signal(ROW_BITS)
1263         req_idx = Signal(INDEX_BITS)
1264         req_tag = Signal(TAG_BITS)
1265         comb += req_idx.eq(get_index(req.real_addr))
1266         comb += req_row.eq(get_row(req.real_addr))
1267         comb += req_tag.eq(get_tag(req.real_addr))
1268
1269         sync += r1.use_forward1.eq(use_forward1_next)
1270         sync += r1.forward_sel.eq(0)
1271
1272         with m.If(use_forward1_next):
1273             sync += r1.forward_sel.eq(r1.req.byte_sel)
1274         with m.Elif(use_forward2_next):
1275             sync += r1.forward_sel.eq(r1.forward_sel1)
1276
1277         sync += r1.forward_data2.eq(r1.forward_data1)
1278         with m.If(r1.write_bram):
1279             sync += r1.forward_data1.eq(r1.req.data)
1280             sync += r1.forward_sel1.eq(r1.req.byte_sel)
1281             sync += r1.forward_way1.eq(r1.req.hit_way)
1282             sync += r1.forward_row1.eq(get_row(r1.req.real_addr))
1283             sync += r1.forward_valid1.eq(1)
1284         with m.Else():
1285             with m.If(r1.dcbz):
1286                 sync += r1.forward_data1.eq(0)
1287             with m.Else():
1288                 sync += r1.forward_data1.eq(bus.dat_r)
1289             sync += r1.forward_sel1.eq(~0) # all 1s
1290             sync += r1.forward_way1.eq(replace_way)
1291             sync += r1.forward_row1.eq(r1.store_row)
1292             sync += r1.forward_valid1.eq(0)
1293
1294         # One cycle pulses reset
1295         sync += r1.slow_valid.eq(0)
1296         sync += r1.write_bram.eq(0)
1297         sync += r1.inc_acks.eq(0)
1298         sync += r1.dec_acks.eq(0)
1299
1300         sync += r1.ls_valid.eq(0)
1301         # complete tlbies and TLB loads in the third cycle
1302         sync += r1.mmu_done.eq(r0_valid & (r0.tlbie | r0.tlbld))
1303
1304         with m.If((req_op == Op.OP_LOAD_HIT) | (req_op == Op.OP_STCX_FAIL)):
1305             with m.If(~r0.mmu_req):
1306                 sync += r1.ls_valid.eq(1)
1307             with m.Else():
1308                 sync += r1.mmu_done.eq(1)
1309
1310         with m.If(r1.write_tag):
1311             # Store new tag in selected way
1312             for i in range(NUM_WAYS):
1313                 with m.If(i == replace_way):
1314                     ct = Signal(TAG_RAM_WIDTH)
1315                     comb += ct.eq(cache_tags[r1.store_index].tag)
1316                     """
1317 TODO: check this
1318 cache_tags(r1.store_index)((i + 1) * TAG_WIDTH - 1 downto i * TAG_WIDTH) <=
1319                     (TAG_WIDTH - 1 downto TAG_BITS => '0') & r1.reload_tag;
1320                     """
1321                     comb += ct.word_select(i, TAG_WIDTH).eq(r1.reload_tag)
1322                     sync += cache_tags[r1.store_index].tag.eq(ct)
1323             sync += r1.store_way.eq(replace_way)
1324             sync += r1.write_tag.eq(0)
1325
1326         # Take request from r1.req if there is one there,
1327         # else from req_op, ra, etc.
1328         with m.If(r1.full):
1329             comb += req.eq(r1.req)
1330         with m.Else():
1331             comb += req.op.eq(req_op)
1332             comb += req.valid.eq(req_go)
1333             comb += req.mmu_req.eq(r0.mmu_req)
1334             comb += req.dcbz.eq(r0.req.dcbz)
1335             comb += req.real_addr.eq(ra)
1336
1337             with m.If(r0.req.dcbz):
1338                 # force data to 0 for dcbz
1339                 comb += req.data.eq(0)
1340             with m.Elif(r0.d_valid):
1341                 comb += req.data.eq(r0.req.data)
1342             with m.Else():
1343                 comb += req.data.eq(d_in.data)
1344
1345             # Select all bytes for dcbz
1346             # and for cacheable loads
1347             with m.If(r0.req.dcbz | (r0.req.load & ~r0.req.nc)):
1348                 comb += req.byte_sel.eq(~0) # all 1s
1349             with m.Else():
1350                 comb += req.byte_sel.eq(r0.req.byte_sel)
1351             comb += req.hit_way.eq(req_hit_way)
1352             comb += req.same_tag.eq(req_same_tag)
1353
1354             # Store the incoming request from r0,
1355             # if it is a slow request
1356             # Note that r1.full = 1 implies req_op = OP_NONE
1357             with m.If((req_op == Op.OP_LOAD_MISS)
1358                       | (req_op == Op.OP_LOAD_NC)
1359                       | (req_op == Op.OP_STORE_MISS)
1360                       | (req_op == Op.OP_STORE_HIT)):
1361                 sync += r1.req.eq(req)
1362                 sync += r1.full.eq(1)
1363
1364         # Main state machine
1365         with m.Switch(r1.state):
1366
1367             with m.Case(State.IDLE):
1368                 sync += r1.wb.adr.eq(req.real_addr[ROW_LINE_BITS:])
1369                 sync += r1.wb.sel.eq(req.byte_sel)
1370                 sync += r1.wb.dat.eq(req.data)
1371                 sync += r1.dcbz.eq(req.dcbz)
1372
1373                 # Keep track of our index and way
1374                 # for subsequent stores.
1375                 sync += r1.store_index.eq(req_idx)
1376                 sync += r1.store_row.eq(req_row)
1377                 sync += r1.end_row_ix.eq(get_row_of_line(req_row)-1)
1378                 sync += r1.reload_tag.eq(req_tag)
1379                 sync += r1.req.same_tag.eq(1)
1380
1381                 with m.If(req.op == Op.OP_STORE_HIT):
1382                     sync += r1.store_way.eq(req.hit_way)
1383
1384                 # Reset per-row valid bits,
1385                 # ready for handling OP_LOAD_MISS
1386                 for i in range(ROW_PER_LINE):
1387                     sync += r1.rows_valid[i].eq(0)
1388
1389                 with m.If(req_op != Op.OP_NONE):
1390                     sync += Display("cache op %d", req.op)
1391
1392                 with m.Switch(req.op):
1393                     with m.Case(Op.OP_LOAD_HIT):
1394                         # stay in IDLE state
1395                         pass
1396
1397                     with m.Case(Op.OP_LOAD_MISS):
1398                         sync += Display("cache miss real addr: %x " \
1399                                 "idx: %x tag: %x",
1400                                 req.real_addr, req_row, req_tag)
1401
1402                         # Start the wishbone cycle
1403                         sync += r1.wb.we.eq(0)
1404                         sync += r1.wb.cyc.eq(1)
1405                         sync += r1.wb.stb.eq(1)
1406
1407                         # Track that we had one request sent
1408                         sync += r1.state.eq(State.RELOAD_WAIT_ACK)
1409                         sync += r1.write_tag.eq(1)
1410
1411                     with m.Case(Op.OP_LOAD_NC):
1412                         sync += r1.wb.cyc.eq(1)
1413                         sync += r1.wb.stb.eq(1)
1414                         sync += r1.wb.we.eq(0)
1415                         sync += r1.state.eq(State.NC_LOAD_WAIT_ACK)
1416
1417                     with m.Case(Op.OP_STORE_HIT, Op.OP_STORE_MISS):
1418                         with m.If(~req.dcbz):
1419                             sync += r1.state.eq(State.STORE_WAIT_ACK)
1420                             sync += r1.acks_pending.eq(1)
1421                             sync += r1.full.eq(0)
1422                             sync += r1.slow_valid.eq(1)
1423
1424                             with m.If(~req.mmu_req):
1425                                 sync += r1.ls_valid.eq(1)
1426                             with m.Else():
1427                                 sync += r1.mmu_done.eq(1)
1428
1429                             with m.If(req.op == Op.OP_STORE_HIT):
1430                                 sync += r1.write_bram.eq(1)
1431                         with m.Else():
1432                             # dcbz is handled much like a load miss except
1433                             # that we are writing to memory instead of reading
1434                             sync += r1.state.eq(State.RELOAD_WAIT_ACK)
1435
1436                             with m.If(req.op == Op.OP_STORE_MISS):
1437                                 sync += r1.write_tag.eq(1)
1438
1439                         sync += r1.wb.we.eq(1)
1440                         sync += r1.wb.cyc.eq(1)
1441                         sync += r1.wb.stb.eq(1)
1442
1443                     # OP_NONE and OP_BAD do nothing
1444                     # OP_BAD & OP_STCX_FAIL were
1445                     # handled above already
1446                     with m.Case(Op.OP_NONE):
1447                         pass
1448                     with m.Case(Op.OP_BAD):
1449                         pass
1450                     with m.Case(Op.OP_STCX_FAIL):
1451                         pass
1452
1453             with m.Case(State.RELOAD_WAIT_ACK):
1454                 ld_stbs_done = Signal()
1455                 # Requests are all sent if stb is 0
1456                 comb += ld_stbs_done.eq(~r1.wb.stb)
1457
1458                 # If we are still sending requests, was one accepted?
1459                 with m.If((~bus.stall) & r1.wb.stb):
1460                     # That was the last word?  We are done sending.
1461                     # Clear stb and set ld_stbs_done so we can handle an
1462                     # eventual last ack on the same cycle.
1463                     # sigh - reconstruct wb adr with 3 extra 0s at front
1464                     wb_adr = Cat(Const(0, ROW_OFF_BITS), r1.wb.adr)
1465                     with m.If(is_last_row_addr(wb_adr, r1.end_row_ix)):
1466                         sync += r1.wb.stb.eq(0)
1467                         comb += ld_stbs_done.eq(1)
1468
1469                     # Calculate the next row address in the current cache line
1470                     row = Signal(LINE_OFF_BITS-ROW_OFF_BITS)
1471                     comb += row.eq(r1.wb.adr)
1472                     sync += r1.wb.adr[:LINE_OFF_BITS-ROW_OFF_BITS].eq(row+1)
1473
1474                 # Incoming acks processing
1475                 sync += r1.forward_valid1.eq(bus.ack)
1476                 with m.If(bus.ack):
1477                     srow = Signal(ROW_LINE_BITS)
1478                     comb += srow.eq(r1.store_row)
1479                     sync += r1.rows_valid[srow].eq(1)
1480
1481                     # If this is the data we were looking for,
1482                     # we can complete the request next cycle.
1483                     # Compare the whole address in case the
1484                     # request in r1.req is not the one that
1485                     # started this refill.
1486                     with m.If(req.valid & r1.req.same_tag &
1487                               ((r1.dcbz & r1.req.dcbz) |
1488                                (~r1.dcbz & (r1.req.op == Op.OP_LOAD_MISS))) &
1489                                 (r1.store_row == get_row(req.real_addr))):
1490                         sync += r1.full.eq(0)
1491                         sync += r1.slow_valid.eq(1)
1492                         with m.If(~r1.mmu_req):
1493                             sync += r1.ls_valid.eq(1)
1494                         with m.Else():
1495                             sync += r1.mmu_done.eq(1)
1496                         sync += r1.forward_sel.eq(~0) # all 1s
1497                         sync += r1.use_forward1.eq(1)
1498
1499                     # Check for completion
1500                     with m.If(ld_stbs_done & is_last_row(r1.store_row,
1501                                                       r1.end_row_ix)):
1502                         # Complete wishbone cycle
1503                         sync += r1.wb.cyc.eq(0)
1504
1505                         # Cache line is now valid
1506                         cv = Signal(INDEX_BITS)
1507                         comb += cv.eq(cache_tags[r1.store_index].valid)
1508                         comb += cv.bit_select(r1.store_way, 1).eq(1)
1509                         sync += cache_tags[r1.store_index].valid.eq(cv)
1510
1511                         sync += r1.state.eq(State.IDLE)
1512                         sync += Display("cache valid set %x "
1513                                         "idx %d way %d",
1514                                          cv, r1.store_index, r1.store_way)
1515
1516                     # Increment store row counter
1517                     sync += r1.store_row.eq(next_row(r1.store_row))
1518
1519             with m.Case(State.STORE_WAIT_ACK):
1520                 st_stbs_done = Signal()
1521                 acks        = Signal(3)
1522                 adjust_acks = Signal(3)
1523
1524                 comb += st_stbs_done.eq(~r1.wb.stb)
1525                 comb += acks.eq(r1.acks_pending)
1526
1527                 with m.If(r1.inc_acks != r1.dec_acks):
1528                     with m.If(r1.inc_acks):
1529                         comb += adjust_acks.eq(acks + 1)
1530                     with m.Else():
1531                         comb += adjust_acks.eq(acks - 1)
1532                 with m.Else():
1533                     comb += adjust_acks.eq(acks)
1534
1535                 sync += r1.acks_pending.eq(adjust_acks)
1536
1537                 # Clear stb when slave accepted request
1538                 with m.If(~bus.stall):
1539                     # See if there is another store waiting
1540                     # to be done which is in the same real page.
1541                     with m.If(req.valid):
1542                         _ra = req.real_addr[ROW_LINE_BITS:SET_SIZE_BITS]
1543                         sync += r1.wb.adr[0:SET_SIZE_BITS].eq(_ra)
1544                         sync += r1.wb.dat.eq(req.data)
1545                         sync += r1.wb.sel.eq(req.byte_sel)
1546
1547                     with m.If((adjust_acks < 7) & req.same_tag &
1548                                 ((req.op == Op.OP_STORE_MISS)
1549                                  | (req.op == Op.OP_STORE_HIT))):
1550                         sync += r1.wb.stb.eq(1)
1551                         comb += st_stbs_done.eq(0)
1552
1553                         with m.If(req.op == Op.OP_STORE_HIT):
1554                             sync += r1.write_bram.eq(1)
1555                         sync += r1.full.eq(0)
1556                         sync += r1.slow_valid.eq(1)
1557
1558                         # Store requests never come from the MMU
1559                         sync += r1.ls_valid.eq(1)
1560                         comb += st_stbs_done.eq(0)
1561                         sync += r1.inc_acks.eq(1)
1562                     with m.Else():
1563                         sync += r1.wb.stb.eq(0)
1564                         comb += st_stbs_done.eq(1)
1565
1566                 # Got ack ? See if complete.
1567                 with m.If(bus.ack):
1568                     with m.If(st_stbs_done & (adjust_acks == 1)):
1569                         sync += r1.state.eq(State.IDLE)
1570                         sync += r1.wb.cyc.eq(0)
1571                         sync += r1.wb.stb.eq(0)
1572                     sync += r1.dec_acks.eq(1)
1573
1574             with m.Case(State.NC_LOAD_WAIT_ACK):
1575                 # Clear stb when slave accepted request
1576                 with m.If(~bus.stall):
1577                     sync += r1.wb.stb.eq(0)
1578
1579                 # Got ack ? complete.
1580                 with m.If(bus.ack):
1581                     sync += r1.state.eq(State.IDLE)
1582                     sync += r1.full.eq(0)
1583                     sync += r1.slow_valid.eq(1)
1584
1585                     with m.If(~r1.mmu_req):
1586                         sync += r1.ls_valid.eq(1)
1587                     with m.Else():
1588                         sync += r1.mmu_done.eq(1)
1589
1590                     sync += r1.forward_sel.eq(~0) # all 1s
1591                     sync += r1.use_forward1.eq(1)
1592                     sync += r1.wb.cyc.eq(0)
1593                     sync += r1.wb.stb.eq(0)
1594
1595     def dcache_log(self, m, r1, valid_ra, tlb_hit_way, stall_out):
1596
1597         sync = m.d.sync
1598         d_out, bus, log_out = self.d_out, self.bus, self.log_out
1599
1600         sync += log_out.eq(Cat(r1.state[:3], valid_ra, tlb_hit_way[:3],
1601                                stall_out, req_op[:3], d_out.valid, d_out.error,
1602                                r1.wb.cyc, r1.wb.stb, bus.ack, bus.stall,
1603                                r1.real_adr[3:6]))
1604
1605     def elaborate(self, platform):
1606
1607         m = Module()
1608         comb = m.d.comb
1609         d_in = self.d_in
1610
1611         # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
1612         cache_tags       = CacheTagArray()
1613         cache_tag_set    = Signal(TAG_RAM_WIDTH)
1614
1615         # TODO attribute ram_style : string;
1616         # TODO attribute ram_style of cache_tags : signal is "distributed";
1617
1618         """note: these are passed to nmigen.hdl.Memory as "attributes".
1619            don't know how, just that they are.
1620         """
1621         dtlb_valid_bits = TLBValidBitsArray()
1622         dtlb_tags       = TLBTagsArray()
1623         dtlb_ptes       = TLBPtesArray()
1624         # TODO attribute ram_style of
1625         #  dtlb_tags : signal is "distributed";
1626         # TODO attribute ram_style of
1627         #  dtlb_ptes : signal is "distributed";
1628
1629         r0      = RegStage0("r0")
1630         r0_full = Signal()
1631
1632         r1 = RegStage1("r1")
1633
1634         reservation = Reservation()
1635
1636         # Async signals on incoming request
1637         req_index    = Signal(INDEX_BITS)
1638         req_row      = Signal(ROW_BITS)
1639         req_hit_way  = Signal(WAY_BITS)
1640         req_tag      = Signal(TAG_BITS)
1641         req_op       = Signal(Op)
1642         req_data     = Signal(64)
1643         req_same_tag = Signal()
1644         req_go       = Signal()
1645
1646         early_req_row     = Signal(ROW_BITS)
1647
1648         cancel_store      = Signal()
1649         set_rsrv          = Signal()
1650         clear_rsrv        = Signal()
1651
1652         r0_valid          = Signal()
1653         r0_stall          = Signal()
1654
1655         use_forward1_next = Signal()
1656         use_forward2_next = Signal()
1657
1658         cache_out_row     = Signal(WB_DATA_BITS)
1659
1660         plru_victim       = PLRUOut()
1661         replace_way       = Signal(WAY_BITS)
1662
1663         # Wishbone read/write/cache write formatting signals
1664         bus_sel           = Signal(8)
1665
1666         # TLB signals
1667         tlb_tag_way   = Signal(TLB_TAG_WAY_BITS)
1668         tlb_pte_way   = Signal(TLB_PTE_WAY_BITS)
1669         tlb_valid_way = Signal(TLB_NUM_WAYS)
1670         tlb_req_index = Signal(TLB_SET_BITS)
1671         tlb_hit       = Signal()
1672         tlb_hit_way   = Signal(TLB_WAY_BITS)
1673         pte           = Signal(TLB_PTE_BITS)
1674         ra            = Signal(REAL_ADDR_BITS)
1675         valid_ra      = Signal()
1676         perm_attr     = PermAttr("dc_perms")
1677         rc_ok         = Signal()
1678         perm_ok       = Signal()
1679         access_ok     = Signal()
1680
1681         tlb_plru_victim = TLBPLRUOut()
1682
1683         # we don't yet handle collisions between loadstore1 requests
1684         # and MMU requests
1685         comb += self.m_out.stall.eq(0)
1686
1687         # Hold off the request in r0 when r1 has an uncompleted request
1688         comb += r0_stall.eq(r0_full & (r1.full | d_in.hold))
1689         comb += r0_valid.eq(r0_full & ~r1.full & ~d_in.hold)
1690         comb += self.stall_out.eq(r0_stall)
1691
1692
1693         # deal with litex not doing wishbone pipeline mode
1694         # XXX in wrong way.  FIFOs are needed in the SRAM test
1695         # so that stb/ack match up. same thing done in icache.py
1696         comb += self.bus.stall.eq(self.bus.cyc & ~self.bus.ack)
1697
1698         # Wire up wishbone request latch out of stage 1
1699         comb += self.bus.we.eq(r1.wb.we)
1700         comb += self.bus.adr.eq(r1.wb.adr)
1701         comb += self.bus.sel.eq(r1.wb.sel)
1702         comb += self.bus.stb.eq(r1.wb.stb)
1703         comb += self.bus.dat_w.eq(r1.wb.dat)
1704         comb += self.bus.cyc.eq(r1.wb.cyc)
1705
1706         # call sub-functions putting everything together, using shared
1707         # signals established above
1708         self.stage_0(m, r0, r1, r0_full)
1709         self.tlb_read(m, r0_stall, tlb_valid_way,
1710                       tlb_tag_way, tlb_pte_way, dtlb_valid_bits,
1711                       dtlb_tags, dtlb_ptes)
1712         self.tlb_search(m, tlb_req_index, r0, r0_valid,
1713                         tlb_valid_way, tlb_tag_way, tlb_hit_way,
1714                         tlb_pte_way, pte, tlb_hit, valid_ra, perm_attr, ra)
1715         self.tlb_update(m, r0_valid, r0, dtlb_valid_bits, tlb_req_index,
1716                         tlb_hit_way, tlb_hit, tlb_plru_victim, tlb_tag_way,
1717                         dtlb_tags, tlb_pte_way, dtlb_ptes)
1718         self.maybe_plrus(m, r1, plru_victim)
1719         self.maybe_tlb_plrus(m, r1, tlb_plru_victim)
1720         self.cache_tag_read(m, r0_stall, req_index, cache_tag_set, cache_tags)
1721         self.dcache_request(m, r0, ra, req_index, req_row, req_tag,
1722                            r0_valid, r1, cache_tags, replace_way,
1723                            use_forward1_next, use_forward2_next,
1724                            req_hit_way, plru_victim, rc_ok, perm_attr,
1725                            valid_ra, perm_ok, access_ok, req_op, req_go,
1726                            tlb_pte_way,
1727                            tlb_hit, tlb_hit_way, tlb_valid_way, cache_tag_set,
1728                            cancel_store, req_same_tag, r0_stall, early_req_row)
1729         self.reservation_comb(m, cancel_store, set_rsrv, clear_rsrv,
1730                            r0_valid, r0, reservation)
1731         self.reservation_reg(m, r0_valid, access_ok, set_rsrv, clear_rsrv,
1732                            reservation, r0)
1733         self.writeback_control(m, r1, cache_out_row)
1734         self.rams(m, r1, early_req_row, cache_out_row, replace_way)
1735         self.dcache_fast_hit(m, req_op, r0_valid, r0, r1,
1736                         req_hit_way, req_index, req_tag, access_ok,
1737                         tlb_hit, tlb_hit_way, tlb_req_index)
1738         self.dcache_slow(m, r1, use_forward1_next, use_forward2_next,
1739                     r0, replace_way,
1740                     req_hit_way, req_same_tag,
1741                          r0_valid, req_op, cache_tags, req_go, ra)
1742         #self.dcache_log(m, r1, valid_ra, tlb_hit_way, stall_out)
1743
1744         return m
1745
1746
1747 if __name__ == '__main__':
1748     dut = DCache()
1749     vl = rtlil.convert(dut, ports=[])
1750     with open("test_dcache.il", "w") as f:
1751         f.write(vl)