src/soc/experiment/dcache.py

   1 """DCache
   2
   3 based on Anton Blanchard microwatt dcache.vhdl
   4
   5 """
   6
   7 from enum import Enum, unique
   8
   9 from nmigen import Module, Signal, Elaboratable,
  10                    Cat, Repl
  11 from nmigen.cli import main
  12 from nmigen.iocontrol import RecordObject
  13 from nmigen.util import log2_int
  14
  15 from experiment.mem_types import LoadStore1ToDCacheType,
  16                                  DCacheToLoadStore1Type,
  17                                  MMUToDCacheType,
  18                                  DCacheToMMUType
  19
  20 from experiment.wb_types import WB_ADDR_BITS, WB_DATA_BITS, WB_SEL_BITS,
  21                                 WBAddrType, WBDataType, WBSelType,
  22                                 WbMasterOut, WBSlaveOut,
  23                                 WBMasterOutVector, WBSlaveOutVector,
  24                                 WBIOMasterOut, WBIOSlaveOut
  25
  26
  27 # Record for storing permission, attribute, etc. bits from a PTE
  28 class PermAttr(RecordObject):
  29     def __init__(self):
  30         super().__init__()
  31         self.reference = Signal()
  32         self.changed   = Signal()
  33         self.nocache   = Signal()
  34         self.priv      = Signal()
  35         self.rd_perm   = Signal()
  36         self.wr_perm   = Signal()
  37
  38
  39 def extract_perm_attr(pte):
  40     pa = PermAttr()
  41     pa.reference = pte[8]
  42     pa.changed   = pte[7]
  43     pa.nocache   = pte[5]
  44     pa.priv      = pte[3]
  45     pa.rd_perm   = pte[2]
  46     pa.wr_perm   = pte[1]
  47     return pa;
  48
  49
  50 # Type of operation on a "valid" input
  51 @unique
  52 class Op(Enum):
  53     OP_NONE       = 0
  54     OP_BAD        = 1 # NC cache hit, TLB miss, prot/RC failure
  55     OP_STCX_FAIL  = 2 # conditional store w/o reservation
  56     OP_LOAD_HIT   = 3 # Cache hit on load
  57     OP_LOAD_MISS  = 4 # Load missing cache
  58     OP_LOAD_NC    = 5 # Non-cachable load
  59     OP_STORE_HIT  = 6 # Store hitting cache
  60     OP_STORE_MISS = 7 # Store missing cache
  61
  62
  63 # Cache state machine
  64 @unique
  65 class State(Enum):
  66     IDLE             = 0 # Normal load hit processing
  67     RELOAD_WAIT_ACK  = 1 # Cache reload wait ack
  68     STORE_WAIT_ACK   = 2 # Store wait ack
  69     NC_LOAD_WAIT_ACK = 3 # Non-cachable load wait ack
  70
  71
  72 # Dcache operations:
  73 #
  74 # In order to make timing, we use the BRAMs with
  75 # an output buffer, which means that the BRAM
  76 # output is delayed by an extra cycle.
  77 #
  78 # Thus, the dcache has a 2-stage internal pipeline
  79 # for cache hits with no stalls.
  80 #
  81 # All other operations are handled via stalling
  82 # in the first stage.
  83 #
  84 # The second stage can thus complete a hit at the same
  85 # time as the first stage emits a stall for a complex op.
  86 #
  87 # Stage 0 register, basically contains just the latched request
  88 class RegStage0(RecordObject):
  89     def __init__(self):
  90         super().__init__()
  91         self.req     = LoadStore1ToDCacheType()
  92         self.tlbie   = Signal()
  93         self.doall   = Signal()
  94         self.tlbld   = Signal()
  95         self.mmu_req = Signal() # indicates source of request
  96
  97
  98 class MemAccessRequest(RecordObject):
  99     def __init__(self):
 100         super().__init__()
 101         self.op        = Op()
 102         self.valid     = Signal()
 103         self.dcbz      = Signal()
 104         self.real_addr = Signal(REAL_ADDR_BITS)
 105         self.data      = Signal(64)
 106         self.byte_sel  = Signal(8)
 107         self.hit_way   = Signal(WAY_BITS)
 108         self.same_tag  = Signal()
 109         self.mmu_req   = Signal()
 110
 111
 112 # First stage register, contains state for stage 1 of load hits
 113 # and for the state machine used by all other operations
 114 class RegStage1(RecordObject):
 115     def __init__(self):
 116         super().__init__()
 117         # Info about the request
 118         self.full             = Signal() # have uncompleted request
 119         self.mmu_req          = Signal() # request is from MMU
 120         self.req              = MemAccessRequest()
 121
 122         # Cache hit state
 123         self.hit_way          = Signal(WAY_BITS)
 124         self.hit_load_valid   = Signal()
 125         self.hit_index        = Signal(NUM_LINES)
 126         self.cache_hit        = Signal()
 127
 128         # TLB hit state
 129         self.tlb_hit          = Signal()
 130         self.tlb_hit_way      = Signal(TLB_NUM_WAYS)
 131         self.tlb_hit_index    = Signal(TLB_SET_SIZE)
 132         self.
 133         # 2-stage data buffer for data forwarded from writes to reads
 134         self.forward_data1    = Signal(64)
 135         self.forward_data2    = Signal(64)
 136         self.forward_sel1     = Signal(8)
 137         self.forward_valid1   = Signal()
 138         self.forward_way1     = Signal(WAY_BITS)
 139         self.forward_row1     = Signal(BRAM_ROWS)
 140         self.use_forward1     = Signal()
 141         self.forward_sel      = Signal(8)
 142
 143         # Cache miss state (reload state machine)
 144         self.state            = State()
 145         self.dcbz             = Signal()
 146         self.write_bram       = Signal()
 147         self.write_tag        = Signal()
 148         self.slow_valid       = Signal()
 149         self.wb               = WishboneMasterOut()
 150         self.reload_tag       = Signal(TAG_BITS)
 151         self.store_way        = Signal(WAY_BITS)
 152         self.store_row        = Signal(BRAM_ROWS)
 153         self.store_index      = Signal(NUM_LINES)
 154         self.end_row_ix       = Signal(ROW_LINE_BIT)
 155         self.rows_valid       = RowPerLineValidArray()
 156         self.acks_pending     = Signal(3)
 157         self.inc_acks         = Signal()
 158         self.dec_acks         = Signal()
 159
 160         # Signals to complete (possibly with error)
 161         self.ls_valid         = Signal()
 162         self.ls_error         = Signal()
 163         self.mmu_done         = Signal()
 164         self.mmu_error        = Signal()
 165         self.cache_paradox    = Signal()
 166
 167         # Signal to complete a failed stcx.
 168         self.stcx_fail        = Signal()
 169
 170
 171 # Reservation information
 172 class Reservation(RecordObject):
 173     def __init__(self):
 174         super().__init__()
 175         valid = Signal()
 176         # TODO LINE_OFF_BITS is 6
 177         addr  = Signal(63 downto LINE_OFF_BITS)
 178
 179
 180 # Set associative dcache write-through
 181 #
 182 # TODO (in no specific order):
 183 #
 184 # * See list in icache.vhdl
 185 # * Complete load misses on the cycle when WB data comes instead of
 186 #   at the end of line (this requires dealing with requests coming in
 187 #   while not idle...)
 188 class DCache(Elaboratable):
 189     def __init__(self):
 190         # TODO: make these parameters of DCache at some point
 191         self.LINE_SIZE = 64    # Line size in bytes
 192         self.NUM_LINES = 32    # Number of lines in a set
 193         self.NUM_WAYS = 4      # Number of ways
 194         self.TLB_SET_SIZE = 64 # L1 DTLB entries per set
 195         self.TLB_NUM_WAYS = 2  # L1 DTLB number of sets
 196         self.TLB_LG_PGSZ = 12  # L1 DTLB log_2(page_size)
 197         self.LOG_LENGTH = 0    # Non-zero to enable log data collection
 198
 199         self.d_in      = LoadStore1ToDCacheType()
 200         self.d_out     = DCacheToLoadStore1Type()
 201
 202         self.m_in      = MMUToDCacheType()
 203         self.m_out     = DCacheToMMUType()
 204
 205         self.stall_out = Signal()
 206
 207         self.wb_out    = WBMasterOut()
 208         self.wb_in     = WBSlaveOut()
 209
 210         self.log_out   = Signal(20)
 211
 212     # Latch the request in r0.req as long as we're not stalling
 213     def stage_0(self, m, d_in, m_in):
 214             comb = m.d.comb
 215             sync = m.d.sync
 216
 217             r = RegStage0()
 218
 219             # TODO, this goes in unit tests and formal proofs
 220             # assert ~(d_in.valid & m_in.valid),
 221             # "request collision loadstore vs MMU"
 222             with m.If(~(d_in.valid & m_in.valid)):
 223                 #sync += Display("request collision loadstore vs MMU")
 224                 pass
 225
 226             with m.If(m_in.valid):
 227                 sync += r.req.valid.eq(1)
 228                 sync += r.req.load.eq(~(m_in.tlbie | m_in.tlbld))
 229                 sync += r.req.dcbz.eq(0)
 230                 sync += r.req.nc.eq(0)
 231                 sync += r.req.reserve.eq(0)
 232                 sync += r.req.virt_mode.eq(1)
 233                 sync += r.req.priv_mode.eq(1)
 234                 sync += r.req.addr.eq(m_in.addr)
 235                 sync += r.req.data.eq(m_in.pte)
 236                 sync += r.req.byte_sel.eq(-1) # Const -1 sets all to 0b111....
 237                 sync += r.tlbie.eq(m_in.tlbie)
 238                 sync += r.doall.eq(m_in.doall)
 239                 sync += r.tlbld.eq(m_in.tlbld)
 240                 sync += r.mmu_req.eq(1)
 241             with m.Else():
 242                 sync += r.req.eq(d_in)
 243                 sync += r.req.tlbie.eq(0)
 244                 sync += r.req.doall.eq(0)
 245                 sync += r.req.tlbd.eq(0)
 246                 sync += r.req.mmu_req.eq(0)
 247                 with m.If(~(r1.full & r0_full)):
 248                     sync += r0.eq(r)
 249                     sync += r0_full.eq(r.req.valid)
 250
 251     # TLB
 252     # Operates in the second cycle on the request latched in r0.req.
 253     # TLB updates write the entry at the end of the second cycle.
 254     def tlb_read(self, m, m_in, d_in, r0_stall, tlb_valid_way,
 255                  tlb_tag_way, tlb_pte_way, dtlb_valid_bits,
 256                  dtlb_tags, dtlb_ptes):
 257
 258         comb = m.d.comb
 259         sync = m.d.sync
 260
 261         index    = Signal(log2_int(TLB_SET_BITS), False)
 262         addrbits = Signal(TLB_SET_BITS)
 263
 264         amin = TLB_LG_PGSZ
 265         amax = TLB_LG_PGSZ + TLB_SET_BITS
 266
 267         with m.If(m_in.valid):
 268             comb += addrbits.eq(m_in.addr[amin : amax])
 269         with m.Else():
 270             comb += addrbits.eq(d_in.addr[amin : amax])
 271         comb += index.eq(addrbits)
 272
 273         # If we have any op and the previous op isn't finished,
 274         # then keep the same output for next cycle.
 275         with m.If(~r0_stall):
 276             sync += tlb_valid_way.eq(dtlb_valid_bits[index])
 277             sync += tlb_tag_way.eq(dtlb_tags[index])
 278             sync += tlb_pte_way.eq(dtlb_ptes[index])
 279
 280     # Generate TLB PLRUs
 281     def maybe_tlb_plrus(self, m, r1, tlb_plru_victim, acc, acc_en, lru):
 282         comb = m.d.comb
 283         sync = m.d.sync
 284
 285         with m.If(TLB_NUM_WAYS > 1):
 286             for i in range(TLB_SET_SIZE):
 287                 # TLB PLRU interface
 288                 tlb_plru        = PLRU(TLB_WAY_BITS)
 289                 tlb_plru_acc    = Signal(TLB_WAY_BITS)
 290                 tlb_plru_acc_en = Signal()
 291                 tlb_plru_out    = Signal(TLB_WAY_BITS)
 292
 293                 comb += tlb_plru.acc.eq(tlb_plru_acc)
 294                 comb += tlb_plru.acc_en.eq(tlb_plru_acc_en)
 295                 comb += tlb_plru.lru.eq(tlb_plru_out)
 296
 297                 # PLRU interface
 298                 with m.If(r1.tlb_hit_index == i):
 299                     comb += tlb_plru.acc_en.eq(r1.tlb_hit)
 300                 with m.Else():
 301                     comb += tlb_plru.acc_en.eq(0)
 302                 comb += tlb_plru.acc.eq(r1.tlb_hit_way)
 303
 304                 comb += tlb_plru_victim[i].eq(tlb_plru.lru)
 305
 306     def tlb_search(self, tlb_req_index, r0, tlb_valid_way_ tlb_tag_way,
 307                    tlb_pte_way, pte, tlb_hit, valid_ra, perm_attr, ra):
 308
 309         comb = m.d.comb
 310         sync = m.d.sync
 311
 312         hitway = Signal(TLB_WAY_BITS)
 313         hit    = Signal()
 314         eatag  = Signal(log2_int(TLB_EA_TAG_BITS, False))
 315
 316         TLB_LG_END = TLB_LG_PGSZ + TLB_SET_BITS
 317         comb += tlb_req_index.eq(r0.req.addr[TLB_LG_PGSZ : TLB_LG_END])
 318         comb += eatag.eq(r0.req.addr[TLB_LG_END : 64 ])
 319
 320         for i in range(TLB_NUM_WAYS):
 321             with m.If(tlb_valid_way(i)
 322                       & read_tlb_tag(i, tlb_tag_way) == eatag):
 323                 comb += hitway.eq(i)
 324                 comb += hit.eq(1)
 325
 326         comb += tlb_hit.eq(hit & r0_valid)
 327         comb += tlb_hit_way.eq(hitway)
 328
 329         with m.If(tlb_hit):
 330             comb += pte.eq(read_tlb_pte(hitway, tlb_pte_way))
 331         with m.Else():
 332             comb += pte.eq(0)
 333         comb += valid_ra.eq(tlb_hit | ~r0.req.virt_mode)
 334         with m.If(r0.req.virt_mode):
 335             comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
 336                               r0.req.addr[ROW_OFF_BITS:TLB_LG_PGSZ],
 337                               pte[TLB_LG_PGSZ:REAL_ADDR_BITS]))
 338             comb += perm_attr.eq(extract_perm_attr(pte))
 339         with m.Else():
 340             comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
 341                               r0.rq.addr[ROW_OFF_BITS:REAL_ADDR_BITS]))
 342
 343             comb += perm_attr.reference.eq(1)
 344             comb += perm_attr.changed.eq(1)
 345             comb += perm_attr.priv.eq(1)
 346             comb += perm_attr.nocache.eq(0)
 347             comb += perm_attr.rd_perm.eq(1)
 348             comb += perm_attr.wr_perm.eq(1)
 349
 350     def tlb_update(self, r0_valid, r0, dtlb_valid_bits, tlb_req_index,
 351                     tlb_hit_way, tlb_hit, tlb_plru_victim, tlb_tag_way,
 352                     dtlb_tags, tlb_pte_way, dtlb_ptes, dtlb_valid_bits):
 353
 354         comb = m.d.comb
 355         sync = m.d.sync
 356
 357     #         variable tlbie : std_ulogic;
 358     #         variable tlbwe : std_ulogic;
 359     #         variable repl_way : tlb_way_t;
 360     #         variable eatag : tlb_tag_t;
 361     #         variable tagset : tlb_way_tags_t;
 362     #         variable pteset : tlb_way_ptes_t;
 363     #type tlb_tags_t is array(tlb_index_t) of tlb_way_tags_t;
 364     # --> Array([Signal(log(way_tags length)) for i in range(number of tlbs)])
 365
 366         tlbie    = Signal()
 367         tlbwe    = Signal()
 368         repl_way = Signal(TLB_WAY_BITS)
 369         eatag    = Signal(log2_int(TLB_EA_TAG_BITS, False))
 370         tagset   = TLBWayTags()
 371         pteset   = TLBWayPtes()
 372
 373         comb += tlbie
 374         comb += tlbwe
 375         comb += repl_way
 376         comb += eatag
 377         comb += tagset
 378         comb += pteset
 379
 380     #     begin
 381     #         if rising_edge(clk) then
 382     #             tlbie := r0_valid and r0.tlbie;
 383     #             tlbwe := r0_valid and r0.tlbldoi;
 384         comb += tlbie.eq(r0_valid & r0.tlbie)
 385         comb += tlbwe.eq(r0_valid & r0.tlbldoi)
 386
 387         with m.If(tlbie & r0.doall):
 388             # clear all valid bits at once
 389             for i in range(TLB_SET_SIZE):
 390                 sync += dtlb_valid_bits[i].eq(0)
 391
 392         with m.Elif(tlbie):
 393             with m.If(tlb_hit):
 394                 sync += dtlb_valid_bits[tlb_req_index][tlb_hit_way].eq(0)
 395         with m.Elif(tlbwe):
 396             with m.If(tlb_hit):
 397                 comb += repl_way.eq(tlb_hit_way)
 398             with m.Else():
 399                 comb += repl_way.eq(tlb_plru_victim[tlb_req_index])
 400             comb += eatag.eq(r0.req.addr[TLB_LG_PGSZ + TLB_SET_BITS:64])
 401             comb += tagset.eq(tlb_tag_way)
 402             sync += write_tlb_tag(repl_way, tagset, eatag)
 403             sync += dtlb_tags[tlb_req_index].eq(tagset)
 404             comb += pteset.eq(tlb_pte_way)
 405             sync += write_tlb_pte(repl_way, pteset, r0.req.data)
 406             sync += dtlb_ptes[tlb_req_index].eq(pteset)
 407             sync += dtlb_valid_bits[tlb_req_index][repl_way].eq(1)
 408
 409 #     -- Generate PLRUs
 410 #     maybe_plrus: if NUM_WAYS > 1 generate
 411     # Generate PLRUs
 412     def maybe_plrus(self, r1):
 413
 414         comb = m.d.comb
 415         sync = m.d.sync
 416
 417 #     begin
 418         # TODO learn translation of generate into nmgien @lkcl
 419 #       plrus: for i in 0 to NUM_LINES-1 generate
 420         for i in range(NUM_LINES):
 421 #           -- PLRU interface
 422 #           signal plru_acc    : std_ulogic_vector(WAY_BITS-1 downto 0);
 423 #           signal plru_acc_en : std_ulogic;
 424 #           signal plru_out    : std_ulogic_vector(WAY_BITS-1 downto 0);
 425             plru        = PLRU(WAY_BITS)
 426             plru_acc    = Signal(WAY_BITS)
 427             plru_acc_en = Signal()
 428             plru_out    = Signal(WAY_BITS)
 429 #
 430 #       begin
 431         # TODO learn tranlation of entity, generic map, port map in
 432         # nmigen @lkcl
 433 #           plru : entity work.plru
 434 #               generic map (
 435 #                   BITS => WAY_BITS
 436 #                   )
 437 #               port map (
 438 #                   clk => clk,
 439 #                   rst => rst,
 440 #                   acc => plru_acc,
 441 #                   acc_en => plru_acc_en,
 442 #                   lru => plru_out
 443 #                   );
 444             comb += plru.acc.eq(plru_acc)
 445             comb += plru.acc_en.eq(plru_acc_en)
 446             comb += plru.lru.eq(plru_out)
 447
 448 #           process(all)
 449 #           begin
 450 #               -- PLRU interface
 451 #               if r1.hit_index = i then
 452             # PLRU interface
 453             with m.If(r1.hit_index == i):
 454 #                   plru_acc_en <= r1.cache_hit;
 455                 comb += plru_acc_en.eq(r1.cache_hit)
 456 #               else
 457             with m.Else():
 458 #                   plru_acc_en <= '0';
 459                 comb += plru_acc_en.eq(0)
 460 #               end if;
 461 #               plru_acc <= std_ulogic_vector(to_unsigned(
 462 #                            r1.hit_way, WAY_BITS
 463 #                           ));
 464 #               plru_victim(i) <= plru_out;
 465             comb += plru_acc.eq(r1.hit_way)
 466             comb += plru_victim[i].eq(plru_out)
 467 #           end process;
 468 #       end generate;
 469 #     end generate;
 470
 471 #     -- Cache tag RAM read port
 472 #     cache_tag_read : process(clk)
 473     # Cache tag RAM read port
 474     def cache_tag_read(self, r0_stall, req_index, m_in, d_in,
 475                        cache_tag_set, cache_tags):
 476
 477         comb = m.d.comb
 478         sync = m.d.sync
 479
 480 #         variable index : index_t;
 481         index = Signal(NUM_LINES)
 482
 483         comb += index
 484
 485 #     begin
 486 #         if rising_edge(clk) then
 487 #             if r0_stall = '1' then
 488         with m.If(r0_stall):
 489 #                 index := req_index;
 490             sync += index.eq(req_index)
 491
 492 #             elsif m_in.valid = '1' then
 493         with m.Elif(m_in.valid):
 494 #                 index := get_index(m_in.addr);
 495             sync += index.eq(get_index(m_in.addr))
 496
 497 #             else
 498         with m.Else():
 499 #                 index := get_index(d_in.addr);
 500             sync += index.eq(get_index(d_in.addr))
 501 #             end if;
 502 #             cache_tag_set <= cache_tags(index);
 503         sync += cache_tag_set.eq(cache_tags[index])
 504 #         end if;
 505 #     end process;
 506
 507     # Cache request parsing and hit detection
 508     def dcache_request(self, r0, ra, req_index, req_row, req_tag,
 509                        r0_valid, r1, cache_valid_bits, replace_way,
 510                        use_forward1_next, use_forward2_next,
 511                        req_hit_way, plru_victim, rc_ok, perm_attr,
 512                        valid_ra, perm_ok, access_ok, req_op, req_ok,
 513                        r0_stall, m_in, early_req_row, d_in):
 514
 515         comb = m.d.comb
 516         sync = m.d.sync
 517
 518 #         variable is_hit  : std_ulogic;
 519 #         variable hit_way : way_t;
 520 #         variable op      : op_t;
 521 #         variable opsel   : std_ulogic_vector(2 downto 0);
 522 #         variable go      : std_ulogic;
 523 #         variable nc      : std_ulogic;
 524 #         variable s_hit   : std_ulogic;
 525 #         variable s_tag   : cache_tag_t;
 526 #         variable s_pte   : tlb_pte_t;
 527 #         variable s_ra    : std_ulogic_vector(
 528 #                             REAL_ADDR_BITS - 1 downto 0
 529 #                            );
 530 #         variable hit_set     : std_ulogic_vector(
 531 #                                 TLB_NUM_WAYS - 1 downto 0
 532 #                                );
 533 #         variable hit_way_set : hit_way_set_t;
 534 #         variable rel_matches : std_ulogic_vector(
 535 #                                 TLB_NUM_WAYS - 1 downto 0
 536 #                                );
 537         rel_match   = Signal()
 538         is_hit      = Signal()
 539         hit_way     = Signal(WAY_BITS)
 540         op          = Op()
 541         opsel       = Signal(3)
 542         go          = Signal()
 543         nc          = Signal()
 544         s_hit       = Signal()
 545         s_tag       = Signal(TAG_BITS)
 546         s_pte       = Signal(TLB_PTE_BITS)
 547         s_ra        = Signal(REAL_ADDR_BITS)
 548         hit_set     = Signal(TLB_NUM_WAYS)
 549         hit_way_set = HitWaySet()
 550         rel_matches = Signal(TLB_NUM_WAYS)
 551         rel_match   = Signal()
 552
 553 #     begin
 554 #         -- Extract line, row and tag from request
 555 #         req_index <= get_index(r0.req.addr);
 556 #         req_row <= get_row(r0.req.addr);
 557 #         req_tag <= get_tag(ra);
 558 #
 559 #         go := r0_valid and not (r0.tlbie or r0.tlbld)
 560 #               and not r1.ls_error;
 561         # Extract line, row and tag from request
 562         comb += req_index.eq(get_index(r0.req.addr))
 563         comb += req_row.eq(get_row(r0.req.addr))
 564         comb += req_tag.eq(get_tag(ra))
 565
 566         comb += go.eq(r0_valid & ~(r0.tlbie | r0.tlbld) & ~r1.ls_error)
 567
 568 #         hit_way := 0;
 569 #         is_hit := '0';
 570 #         rel_match := '0';
 571         # Test if pending request is a hit on any way
 572         # In order to make timing in virtual mode,
 573         # when we are using the TLB, we compare each
 574         # way with each of the real addresses from each way of
 575         # the TLB, and then decide later which match to use.
 576
 577 #         if r0.req.virt_mode = '1' then
 578         with m.If(r0.req.virt_mode):
 579 #             rel_matches := (others => '0');
 580             comb += rel_matches.eq(0)
 581 #             for j in tlb_way_t loop
 582             for j in range(TLB_NUM_WAYS):
 583 #                 hit_way_set(j) := 0;
 584 #                 s_hit := '0';
 585 #                 s_pte := read_tlb_pte(j, tlb_pte_way);
 586 #                 s_ra  := s_pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ)
 587 #                          & r0.req.addr(TLB_LG_PGSZ - 1 downto 0);
 588 #                 s_tag := get_tag(s_ra);
 589                 comb += hit_way_set[j].eq(0)
 590                 comb += s_hit.eq(0)
 591                 comb += s_pte.eq(read_tlb_pte(j, tlb_pte_way))
 592                 comb += s_ra.eq(Cat(
 593                          r0.req.addr[0:TLB_LG_PGSZ],
 594                          s_pte[TLB_LG_PGSZ:REAL_ADDR_BITS]
 595                         ))
 596                 comb += s_tag.eq(get_tag(s_ra))
 597
 598 #                 for i in way_t loop
 599                 for i in range(NUM_WAYS):
 600 #                     if go = '1' and cache_valids(req_index)(i) = '1'
 601 #                      and read_tag(i, cache_tag_set) = s_tag
 602 #                      and tlb_valid_way(j) = '1' then
 603                     with m.If(go & cache_valid_bits[req_index][i] &
 604                               read_tag(i, cache_tag_set) == s_tag
 605                               & tlb_valid_way[j]):
 606 #                         hit_way_set(j) := i;
 607 #                         s_hit := '1';
 608                         comb += hit_way_set[j].eq(i)
 609                         comb += s_hit.eq(1)
 610 #                     end if;
 611 #                 end loop;
 612 #                 hit_set(j) := s_hit;
 613                 comb += hit_set[j].eq(s_hit)
 614 #                 if s_tag = r1.reload_tag then
 615                 with m.If(s_tag == r1.reload_tag):
 616 #                     rel_matches(j) := '1';
 617                     comb += rel_matches[j].eq(1)
 618 #                 end if;
 619 #             end loop;
 620 #             if tlb_hit = '1' then
 621             with m.If(tlb_hit):
 622 #                 is_hit := hit_set(tlb_hit_way);
 623 #                 hit_way := hit_way_set(tlb_hit_way);
 624 #                 rel_match := rel_matches(tlb_hit_way);
 625                 comb += is_hit.eq(hit_set[tlb_hit_way])
 626                 comb += hit_way.eq(hit_way_set[tlb_hit_way])
 627                 comb += rel_match.eq(rel_matches[tlb_hit_way])
 628 #             end if;
 629 #         else
 630         with m.Else():
 631 #             s_tag := get_tag(r0.req.addr);
 632             comb += s_tag.eq(get_tag(r0.req.addr))
 633 #             for i in way_t loop
 634             for i in range(NUM_WAYS):
 635 #                 if go = '1' and cache_valids(req_index)(i) = '1' and
 636 #                     read_tag(i, cache_tag_set) = s_tag then
 637                 with m.If(go & cache_valid_bits[req_index][i] &
 638                           read_tag(i, cache_tag_set) == s_tag):
 639 #                     hit_way := i;
 640 #                     is_hit := '1';
 641                     comb += hit_way.eq(i)
 642                     comb += is_hit.eq(1)
 643 #                 end if;
 644 #             end loop;
 645 #             if s_tag = r1.reload_tag then
 646             with m.If(s_tag == r1.reload_tag):
 647 #                 rel_match := '1';
 648                 comb += rel_match.eq(1)
 649 #             end if;
 650 #         end if;
 651 #         req_same_tag <= rel_match;
 652         comb += req_same_tag.eq(rel_match)
 653
 654 #         if r1.state = RELOAD_WAIT_ACK and req_index = r1.store_index
 655 #          and rel_match = '1' then
 656         # See if the request matches the line currently being reloaded
 657         with m.If(r1.state == State.RELOAD_WAIT_ACK & req_index ==
 658                   r1.store_index & rel_match):
 659             # For a store, consider this a hit even if the row isn't
 660             # valid since it will be by the time we perform the store.
 661             # For a load, check the appropriate row valid bit.
 662 #             is_hit :=
 663 #              not r0.req.load
 664 #               or r1.rows_valid(req_row mod ROW_PER_LINE);
 665 #             hit_way := replace_way;
 666             comb += is_hit.eq(~r0.req.load
 667                               | r1.rows_valid[req_row % ROW_PER_LINE]
 668                              )
 669             comb += hit_way.eq(replace_way)
 670 #         end if;
 671
 672 #         -- Whether to use forwarded data for a load or not
 673         # Whether to use forwarded data for a load or not
 674 #         use_forward1_next <= '0';
 675         comb += use_forward1_next.eq(0)
 676 #         if get_row(r1.req.real_addr) = req_row
 677 #          and r1.req.hit_way = hit_way then
 678         with m.If(get_row(r1.req.real_addr) == req_row
 679                   & r1.req.hit_way == hit_way)
 680             # Only need to consider r1.write_bram here, since if we
 681             # are writing refill data here, then we don't have a
 682             # cache hit this cycle on the line being refilled.
 683             # (There is the possibility that the load following the
 684             # load miss that started the refill could be to the old
 685             # contents of the victim line, since it is a couple of
 686             # cycles after the refill starts before we see the updated
 687             # cache tag. In that case we don't use the bypass.)
 688 #             use_forward1_next <= r1.write_bram;
 689             comb += use_forward1_next.eq(r1.write_bram)
 690 #         end if;
 691 #         use_forward2_next <= '0';
 692         comb += use_forward2_next.eq(0)
 693 #         if r1.forward_row1 = req_row
 694 #          and r1.forward_way1 = hit_way then
 695         with m.If(r1.forward_row1 == req_row
 696                   & r1.forward_way1 == hit_way):
 697 #             use_forward2_next <= r1.forward_valid1;
 698             comb += use_forward2_next.eq(r1.forward_valid1)
 699 #         end if;
 700
 701         # The way that matched on a hit
 702 #           req_hit_way <= hit_way;
 703         comb += req_hit_way.eq(hit_way)
 704
 705         # The way to replace on a miss
 706 #         if r1.write_tag = '1' then
 707         with m.If(r1.write_tag):
 708 #             replace_way <= to_integer(unsigned(
 709 #                             plru_victim(r1.store_index)
 710 #                            ));
 711             replace_way.eq(plru_victim[r1.store_index])
 712 #         else
 713         with m.Else():
 714 #             replace_way <= r1.store_way;
 715             comb += replace_way.eq(r1.store_way)
 716 #         end if;
 717
 718         # work out whether we have permission for this access
 719         # NB we don't yet implement AMR, thus no KUAP
 720 #         rc_ok <= perm_attr.reference and
 721 #                  (r0.req.load or perm_attr.changed);
 722 #         perm_ok <= (r0.req.priv_mode or not perm_attr.priv) and
 723 #                    (perm_attr.wr_perm or (r0.req.load
 724 #                    and perm_attr.rd_perm));
 725 #         access_ok <= valid_ra and perm_ok and rc_ok;
 726         comb += rc_ok.eq(
 727                  perm_attr.reference
 728                  & (r0.req.load | perm_attr.changed)
 729                 )
 730         comb += perm_ok.eq((r0.req.prive_mode | ~perm_attr.priv)
 731                            & perm_attr.wr_perm
 732                            | (r0.req.load & perm_attr.rd_perm)
 733                           )
 734         comb += access_ok.eq(valid_ra & perm_ok & rc_ok)
 735 #         nc := r0.req.nc or perm_attr.nocache;
 736 #         op := OP_NONE;
 737         # Combine the request and cache hit status to decide what
 738         # operation needs to be done
 739         comb += nc.eq(r0.req.nc | perm_attr.nocache)
 740         comb += op.eq(Op.OP_NONE)
 741 #         if go = '1' then
 742         with m.If(go):
 743 #             if access_ok = '0' then
 744             with m.If(~access_ok):
 745 #                 op := OP_BAD;
 746                 comb += op.eq(Op.OP_BAD)
 747 #             elsif cancel_store = '1' then
 748             with m.Elif(cancel_store):
 749 #                 op := OP_STCX_FAIL;
 750                 comb += op.eq(Op.OP_STCX_FAIL)
 751 #             else
 752             with m.Else():
 753 #                 opsel := r0.req.load & nc & is_hit;
 754                 comb += opsel.eq(Cat(is_hit, nc, r0.req.load))
 755 #                 case opsel is
 756                 with m.Switch(opsel):
 757 #                     when "101" => op := OP_LOAD_HIT;
 758 #                     when "100" => op := OP_LOAD_MISS;
 759 #                     when "110" => op := OP_LOAD_NC;
 760 #                     when "001" => op := OP_STORE_HIT;
 761 #                     when "000" => op := OP_STORE_MISS;
 762 #                     when "010" => op := OP_STORE_MISS;
 763 #                     when "011" => op := OP_BAD;
 764 #                     when "111" => op := OP_BAD;
 765 #                     when others => op := OP_NONE;
 766                     with m.Case(Const(0b101, 3)):
 767                         comb += op.eq(Op.OP_LOAD_HIT)
 768
 769                     with m.Case(Cosnt(0b100, 3)):
 770                         comb += op.eq(Op.OP_LOAD_MISS)
 771
 772                     with m.Case(Const(0b110, 3)):
 773                         comb += op.eq(Op.OP_LOAD_NC)
 774
 775                     with m.Case(Const(0b001, 3)):
 776                         comb += op.eq(Op.OP_STORE_HIT)
 777
 778                     with m.Case(Const(0b000, 3)):
 779                         comb += op.eq(Op.OP_STORE_MISS)
 780
 781                     with m.Case(Const(0b010, 3)):
 782                         comb += op.eq(Op.OP_STORE_MISS)
 783
 784                     with m.Case(Const(0b011, 3)):
 785                         comb += op.eq(Op.OP_BAD)
 786
 787                     with m.Case(Const(0b111, 3)):
 788                         comb += op.eq(Op.OP_BAD)
 789
 790                     with m.Default():
 791                         comb += op.eq(Op.OP_NONE)
 792 #                 end case;
 793 #             end if;
 794 #         end if;
 795 #       req_op <= op;
 796 #         req_go <= go;
 797         comb += req_op.eq(op)
 798         comb += req_go.eq(go)
 799
 800         # Version of the row number that is valid one cycle earlier
 801         # in the cases where we need to read the cache data BRAM.
 802         # If we're stalling then we need to keep reading the last
 803         # row requested.
 804 #         if r0_stall = '0' then
 805         with m.If(~r0_stall):
 806 #             if m_in.valid = '1' then
 807             with m.If(m_in.valid):
 808 #                 early_req_row <= get_row(m_in.addr);
 809                 comb += early_req_row.eq(get_row(m_in.addr))
 810 #             else
 811             with m.Else():
 812 #                 early_req_row <= get_row(d_in.addr);
 813                 comb += early_req_row.eq(get_row(d_in.addr))
 814 #             end if;
 815 #         else
 816         with m.Else():
 817 #             early_req_row <= req_row;
 818             comb += early_req_row.eq(req_row)
 819 #         end if;
 820 #     end process;
 821
 822     # Handle load-with-reservation and store-conditional instructions
 823     def reservation_comb(self, cancel_store, set_rsrv, clear_rsrv,
 824                          r0_valid, r0, reservation):
 825
 826         comb = m.d.comb
 827         sync = m.d.sync
 828
 829 #     begin
 830 #         cancel_store <= '0';
 831 #         set_rsrv <= '0';
 832 #         clear_rsrv <= '0';
 833 #         if r0_valid = '1' and r0.req.reserve = '1' then
 834         with m.If(r0_valid & r0.req.reserve):
 835
 836 #             -- XXX generate alignment interrupt if address
 837 #             -- is not aligned XXX or if r0.req.nc = '1'
 838 #             if r0.req.load = '1' then
 839             # XXX generate alignment interrupt if address
 840             # is not aligned XXX or if r0.req.nc = '1'
 841             with m.If(r0.req.load):
 842 #                 -- load with reservation
 843 #                 set_rsrv <= '1';
 844                 # load with reservation
 845                 comb += set_rsrv(1)
 846 #             else
 847             with m.Else():
 848 #                 -- store conditional
 849 #                 clear_rsrv <= '1';
 850                 # store conditional
 851                 comb += clear_rsrv.eq(1)
 852 #                 if reservation.valid = '0' or r0.req.addr(63
 853 #                  downto LINE_OFF_BITS) /= reservation.addr then
 854                 with m.If(~reservation.valid
 855                           | r0.req.addr[LINE_OFF_BITS:64]):
 856 #                     cancel_store <= '1';
 857                     comb += cancel_store.eq(1)
 858 #                 end if;
 859 #             end if;
 860 #         end if;
 861 #     end process;
 862
 863     def reservation_reg(self, r0_valid, access_ok, clear_rsrv,
 864                         reservation, r0):
 865
 866         comb = m.d.comb
 867         sync = m.d.sync
 868
 869 #     begin
 870 #         if rising_edge(clk) then
 871 #             if rst = '1' then
 872 #                 reservation.valid <= '0';
 873             # TODO understand how resets work in nmigen
 874 #             elsif r0_valid = '1' and access_ok = '1' then
 875             with m.Elif(r0_valid & access_ok):
 876 #                 if clear_rsrv = '1' then
 877                 with m.If(clear_rsrv):
 878 #                     reservation.valid <= '0';
 879                     sync += reservation.valid.ea(0)
 880 #                 elsif set_rsrv = '1' then
 881                 with m.Elif(set_rsrv):
 882 #                     reservation.valid <= '1';
 883 #                     reservation.addr <=
 884 #                      r0.req.addr(63 downto LINE_OFF_BITS);
 885                     sync += reservation.valid.eq(1)
 886                     sync += reservation.addr.eq(
 887                              r0.req.addr[LINE_OFF_BITS:64]
 888                             )
 889 #                 end if;
 890 #             end if;
 891 #         end if;
 892 #     end process;
 893
 894     # Return data for loads & completion control logic
 895     def writeback_control(self, r1, cache_out, d_out, m_out):
 896
 897         comb = m.d.comb
 898         sync = m.d.sync
 899
 900 #         variable data_out : std_ulogic_vector(63 downto 0);
 901 #         variable data_fwd : std_ulogic_vector(63 downto 0);
 902 #         variable j        : integer;
 903         data_out = Signal(64)
 904         data_fwd = Signal(64)
 905         j        = Signal()
 906
 907 #     begin
 908 #         -- Use the bypass if are reading the row that was
 909 #         -- written 1 or 2 cycles ago, including for the
 910 #         -- slow_valid = 1 case (i.e. completing a load
 911 #         -- miss or a non-cacheable load).
 912 #         if r1.use_forward1 = '1' then
 913         # Use the bypass if are reading the row that was
 914         # written 1 or 2 cycles ago, including for the
 915         # slow_valid = 1 case (i.e. completing a load
 916         # miss or a non-cacheable load).
 917         with m.If(r1.use_forward1):
 918 #             data_fwd := r1.forward_data1;
 919             comb += data_fwd.eq(r1.forward_data1)
 920 #         else
 921         with m.Else():
 922 #             data_fwd := r1.forward_data2;
 923             comb += data_fwd.eq(r1.forward_data2)
 924 #         end if;
 925
 926 #         data_out := cache_out(r1.hit_way);
 927         comb += data_out.eq(cache_out[r1.hit_way])
 928
 929 #         for i in 0 to 7 loop
 930         for i in range(8):
 931 #             j := i * 8;
 932             comb += i * 8
 933
 934 #             if r1.forward_sel(i) = '1' then
 935             with m.If(r1.forward_sel[i]):
 936 #                 data_out(j + 7 downto j) := data_fwd(j + 7 downto j);
 937                 comb += data_out[j:j+8].eq(data_fwd[j:j+8])
 938 #             end if;
 939 #         end loop;
 940
 941 #         d_out.valid <= r1.ls_valid;
 942 #         d_out.data <= data_out;
 943 #         d_out.store_done <= not r1.stcx_fail;
 944 #         d_out.error <= r1.ls_error;
 945 #         d_out.cache_paradox <= r1.cache_paradox;
 946         comb += d_out.valid.eq(r1.ls_valid)
 947         comb += d_out.data.eq(data_out)
 948         comb += d_out.store_done.eq(~r1.stcx_fail)
 949         comb += d_out.error.eq(r1.ls_error)
 950         comb += d_out.cache_paradox.eq(r1.cache_paradox)
 951
 952 #         -- Outputs to MMU
 953 #         m_out.done <= r1.mmu_done;
 954 #         m_out.err <= r1.mmu_error;
 955 #         m_out.data <= data_out;
 956         comb += m_out.done.eq(r1.mmu_done)
 957         comb += m_out.err.eq(r1.mmu_error)
 958         comb += m_out.data.eq(data_out)
 959
 960 #         -- We have a valid load or store hit or we just completed
 961 #         -- a slow op such as a load miss, a NC load or a store
 962 #         --
 963 #         -- Note: the load hit is delayed by one cycle. However it
 964 #         -- can still not collide with r.slow_valid (well unless I
 965 #         -- miscalculated) because slow_valid can only be set on a
 966 #         -- subsequent request and not on its first cycle (the state
 967 #         -- machine must have advanced), which makes slow_valid
 968 #         -- at least 2 cycles from the previous hit_load_valid.
 969 #
 970 #         -- Sanity: Only one of these must be set in any given cycle
 971 #         assert (r1.slow_valid and r1.stcx_fail) /= '1'
 972 #          report "unexpected slow_valid collision with stcx_fail"
 973 #          severity FAILURE;
 974 #         assert ((r1.slow_valid or r1.stcx_fail) and r1.hit_load_valid)
 975 #          /= '1' report "unexpected hit_load_delayed collision with
 976 #          slow_valid" severity FAILURE;
 977         # We have a valid load or store hit or we just completed
 978         # a slow op such as a load miss, a NC load or a store
 979         #
 980         # Note: the load hit is delayed by one cycle. However it
 981         # can still not collide with r.slow_valid (well unless I
 982         # miscalculated) because slow_valid can only be set on a
 983         # subsequent request and not on its first cycle (the state
 984         # machine must have advanced), which makes slow_valid
 985         # at least 2 cycles from the previous hit_load_valid.
 986
 987         # Sanity: Only one of these must be set in any given cycle
 988         assert (r1.slow_valid & r1.stcx_fail) != 1 "unexpected" \
 989          "slow_valid collision with stcx_fail -!- severity FAILURE"
 990
 991         assert ((r1.slow_valid | r1.stcx_fail) | r1.hit_load_valid) != 1
 992          "unexpected hit_load_delayed collision with slow_valid -!-" \
 993          "severity FAILURE"
 994
 995 #         if r1.mmu_req = '0' then
 996         with m.If(~r1._mmu_req):
 997 #             -- Request came from loadstore1...
 998 #             -- Load hit case is the standard path
 999 #             if r1.hit_load_valid = '1' then
1000             # Request came from loadstore1...
1001             # Load hit case is the standard path
1002             with m.If(r1.hit_load_valid):
1003 #                 report
1004 #                  "completing load hit data=" & to_hstring(data_out);
1005                 print(f"completing load hit data={data_out}")
1006 #             end if;
1007
1008 #             -- error cases complete without stalling
1009 #             if r1.ls_error = '1' then
1010             # error cases complete without stalling
1011             with m.If(r1.ls_error):
1012 #                 report "completing ld/st with error";
1013                 print("completing ld/st with error")
1014 #             end if;
1015
1016 #             -- Slow ops (load miss, NC, stores)
1017 #             if r1.slow_valid = '1' then
1018             # Slow ops (load miss, NC, stores)
1019             with m.If(r1.slow_valid):
1020 #                 report
1021 #                  "completing store or load miss data="
1022 #                   & to_hstring(data_out);
1023                 print(f"completing store or load miss data={data_out}")
1024 #             end if;
1025
1026 #         else
1027         with m.Else():
1028 #             -- Request came from MMU
1029 #             if r1.hit_load_valid = '1' then
1030             # Request came from MMU
1031             with m.If(r1.hit_load_valid):
1032 #                 report "completing load hit to MMU, data="
1033 #                  & to_hstring(m_out.data);
1034                 print(f"completing load hit to MMU, data={m_out.data}")
1035 #             end if;
1036 #
1037 #             -- error cases complete without stalling
1038 #             if r1.mmu_error = '1' then
1039 #                 report "completing MMU ld with error";
1040             # error cases complete without stalling
1041             with m.If(r1.mmu_error):
1042                 print("combpleting MMU ld with error")
1043 #             end if;
1044 #
1045 #             -- Slow ops (i.e. load miss)
1046 #             if r1.slow_valid = '1' then
1047             # Slow ops (i.e. load miss)
1048             with m.If(r1.slow_valid):
1049 #                 report "completing MMU load miss, data="
1050 #                  & to_hstring(m_out.data);
1051                 print("completing MMU load miss, data={m_out.data}")
1052 #             end if;
1053 #         end if;
1054 #     end process;
1055
1056 #     -- Generate a cache RAM for each way. This handles the normal
1057 #     -- reads, writes from reloads and the special store-hit update
1058 #     -- path as well.
1059 #     --
1060 #     -- Note: the BRAMs have an extra read buffer, meaning the output
1061 #     -- is pipelined an extra cycle. This differs from the
1062 #     -- icache. The writeback logic needs to take that into
1063 #     -- account by using 1-cycle delayed signals for load hits.
1064 #     --
1065 #     rams: for i in 0 to NUM_WAYS-1 generate
1066     # Generate a cache RAM for each way. This handles the normal
1067     # reads, writes from reloads and the special store-hit update
1068     # path as well.
1069     #
1070     # Note: the BRAMs have an extra read buffer, meaning the output
1071     # is pipelined an extra cycle. This differs from the
1072     # icache. The writeback logic needs to take that into
1073     # account by using 1-cycle delayed signals for load hits.
1074     def rams(self, ):
1075         for i in range(NUM_WAYS):
1076 #       signal do_read  : std_ulogic;
1077 #       signal rd_addr  : std_ulogic_vector(ROW_BITS-1 downto 0);
1078 #       signal do_write : std_ulogic;
1079 #       signal wr_addr  : std_ulogic_vector(ROW_BITS-1 downto 0);
1080 #       signal wr_data  :
1081 #        std_ulogic_vector(wishbone_data_bits-1 downto 0);
1082 #       signal wr_sel   : std_ulogic_vector(ROW_SIZE-1 downto 0);
1083 #       signal wr_sel_m : std_ulogic_vector(ROW_SIZE-1 downto 0);
1084 #       signal dout     : cache_row_t;
1085             do_read  = Signal()
1086             rd_addr  = Signal(ROW_BITS)
1087             do_write = Signal()
1088             wr_addr  = Signal(ROW_BITS)
1089             wr_data  = Signal(WB_DATA_BITS)
1090             wr_sel   = Signal(ROW_SIZE)
1091             wr_sel_m = Signal(ROW_SIZE)
1092             _d_out   = Signal(WB_DATA_BITS)
1093
1094 #     begin
1095 #       way: entity work.cache_ram
1096 #           generic map (
1097 #               ROW_BITS => ROW_BITS,
1098 #               WIDTH => wishbone_data_bits,
1099 #               ADD_BUF => true
1100 #               )
1101 #           port map (
1102 #               clk     => clk,
1103 #               rd_en   => do_read,
1104 #               rd_addr => rd_addr,
1105 #               rd_data => dout,
1106 #               wr_sel  => wr_sel_m,
1107 #               wr_addr => wr_addr,
1108 #               wr_data => wr_data
1109 #               );
1110 #       process(all)
1111             way = CacheRam(ROW_BITS, WB_DATA_BITS, True)
1112             comb += way.rd_en.eq(do_read)
1113             comb += way.rd_addr.eq(rd_addr)
1114             comb += way.rd_data.eq(_d_out)
1115             comb += way.wr_sel.eq(wr_sel_m)
1116             comb += way.wr_addr.eq(wr_addr)
1117             comb += way.wr_data.eq(wr_data)
1118
1119 #       begin
1120 #           -- Cache hit reads
1121 #           do_read <= '1';
1122 #           rd_addr <=
1123 #            std_ulogic_vector(to_unsigned(early_req_row, ROW_BITS));
1124 #           cache_out(i) <= dout;
1125             # Cache hit reads
1126             comb += do_read.eq(1)
1127             comb += rd_addr.eq(Signal(BRAM_ROWS))
1128             comb += cache_out[i].eq(dout)
1129
1130 #           -- Write mux:
1131 #           --
1132 #           -- Defaults to wishbone read responses (cache refill)
1133 #           --
1134 #           -- For timing, the mux on wr_data/sel/addr is not
1135 #           -- dependent on anything other than the current state.
1136         # Write mux:
1137         #
1138         # Defaults to wishbone read responses (cache refill)
1139         #
1140         # For timing, the mux on wr_data/sel/addr is not
1141         # dependent on anything other than the current state.
1142 #           wr_sel_m <= (others => '0');
1143             comb += wr_sel_m.eq(0)
1144
1145 #           do_write <= '0';
1146             comb += do_write.eq(0)
1147 #             if r1.write_bram = '1' then
1148             with m.If(r1.write_bram):
1149 #                 -- Write store data to BRAM.  This happens one
1150 #                 -- cycle after the store is in r0.
1151                 # Write store data to BRAM.  This happens one
1152                 # cycle after the store is in r0.
1153 #                 wr_data <= r1.req.data;
1154 #                 wr_sel  <= r1.req.byte_sel;
1155 #                 wr_addr <= std_ulogic_vector(to_unsigned(
1156 #                             get_row(r1.req.real_addr), ROW_BITS
1157 #                            ));
1158                 comb += wr_data.eq(r1.req.data)
1159                 comb += wr_sel.eq(r1.req.byte_sel)
1160                 comb += wr_addr.eq(Signal(get_row(r1.req.real_addr)))
1161
1162 #                 if i = r1.req.hit_way then
1163                 with m.If(i == r1.req.hit_way):
1164 #                     do_write <= '1';
1165                     comb += do_write.eq(1)
1166 #                 end if;
1167 #           else
1168                 with m.Else():
1169 #               -- Otherwise, we might be doing a reload or a DCBZ
1170 #                 if r1.dcbz = '1' then
1171                 # Otherwise, we might be doing a reload or a DCBZ
1172                     with m.If(r1.dcbz):
1173 #                     wr_data <= (others => '0');
1174                         comb += wr_data.eq(0)
1175 #                 else
1176                     with m.Else():
1177 #                     wr_data <= wishbone_in.dat;
1178                         comb += wr_data.eq(wishbone_in.dat)
1179 #                 end if;
1180
1181 #                 wr_addr <= std_ulogic_vector(to_unsigned(
1182 #                             r1.store_row, ROW_BITS
1183 #                            ));
1184 #                 wr_sel <= (others => '1');
1185                     comb += wr_addr.eq(Signal(r1.store_row))
1186                     comb += wr_sel.eq(1)
1187
1188 #                 if r1.state = RELOAD_WAIT_ACK and
1189 #                 wishbone_in.ack = '1' and replace_way = i then
1190                 with m.If(r1.state == State.RELOAD_WAIT_ACK
1191                           & wishbone_in.ack & relpace_way == i):
1192 #                     do_write <= '1';
1193                     comb += do_write.eq(1)
1194 #                 end if;
1195 #           end if;
1196
1197 #             -- Mask write selects with do_write since BRAM
1198 #             -- doesn't have a global write-enable
1199 #             if do_write = '1' then
1200 #             -- Mask write selects with do_write since BRAM
1201 #             -- doesn't have a global write-enable
1202                 with m.If(do_write):
1203 #                 wr_sel_m <= wr_sel;
1204                     comb += wr_sel_m.eq(wr_sel)
1205 #             end if;
1206 #         end process;
1207 #     end generate;
1208
1209     # Cache hit synchronous machine for the easy case.
1210     # This handles load hits.
1211     # It also handles error cases (TLB miss, cache paradox)
1212     def dcache_fast_hit(self, req_op, r0_valid, r1, ):
1213
1214         comb = m.d.comb
1215         sync = m.d.sync
1216
1217 #     begin
1218 #         if rising_edge(clk) then
1219 #             if req_op /= OP_NONE then
1220         with m.If(req_op != Op.OP_NONE):
1221 #               report "op:" & op_t'image(req_op) &
1222 #                   " addr:" & to_hstring(r0.req.addr) &
1223 #                   " nc:" & std_ulogic'image(r0.req.nc) &
1224 #                   " idx:" & integer'image(req_index) &
1225 #                   " tag:" & to_hstring(req_tag) &
1226 #                   " way: " & integer'image(req_hit_way);
1227             print(f"op:{req_op} addr:{r0.req.addr} nc: {r0.req.nc}" \
1228                   f"idx:{req_index} tag:{req_tag} way: {req_hit_way}"
1229                  )
1230 #             end if;
1231 #             if r0_valid = '1' then
1232         with m.If(r0_valid):
1233 #                 r1.mmu_req <= r0.mmu_req;
1234             sync += r1.mmu_req.eq(r0.mmu_req)
1235 #             end if;
1236
1237 #             -- Fast path for load/store hits.
1238 #             -- Set signals for the writeback controls.
1239 #             r1.hit_way <= req_hit_way;
1240 #             r1.hit_index <= req_index;
1241         # Fast path for load/store hits.
1242         # Set signals for the writeback controls.
1243         sync += r1.hit_way.eq(req_hit_way)
1244         sync += r1.hit_index.eq(req_index)
1245
1246 #             if req_op = OP_LOAD_HIT then
1247         with m.If(req_op == Op.OP_LOAD_HIT):
1248 #                 r1.hit_load_valid <= '1';
1249             sync += r1.hit_load_valid.eq(1)
1250
1251 #             else
1252         with m.Else():
1253 #                 r1.hit_load_valid <= '0';
1254             sync += r1.hit_load_valid.eq(0)
1255 #             end if;
1256
1257 #             if req_op = OP_LOAD_HIT or req_op = OP_STORE_HIT then
1258         with m.If(req_op == Op.OP_LOAD_HIT | req_op == Op.OP_STORE_HIT):
1259 #                 r1.cache_hit <= '1';
1260             sync += r1.cache_hit.eq(1)
1261 #             else
1262         with m.Else():
1263 #                 r1.cache_hit <= '0';
1264             sync += r1.cache_hit.eq(0)
1265 #             end if;
1266
1267 #             if req_op = OP_BAD then
1268         with m.If(req_op == Op.OP_BAD):
1269 #                 report "Signalling ld/st error valid_ra=" &
1270 #                  std_ulogic'image(valid_ra) & " rc_ok=" &
1271 #                  std_ulogic'image(rc_ok) & " perm_ok=" &
1272 #                  std_ulogic'image(perm_ok);
1273             print(f"Signalling ld/st error valid_ra={valid_ra}"
1274                   f"rc_ok={rc_ok} perm_ok={perm_ok}"
1275
1276 #                 r1.ls_error <= not r0.mmu_req;
1277 #                 r1.mmu_error <= r0.mmu_req;
1278 #                 r1.cache_paradox <= access_ok;
1279             sync += r1.ls_error.eq(~r0.mmu_req)
1280             sync += r1.mmu_error.eq(r0.mmu_req)
1281             sync += r1.cache_paradox.eq(access_ok)
1282
1283 #             else
1284             with m.Else():
1285 #                 r1.ls_error <= '0';
1286 #                 r1.mmu_error <= '0';
1287 #                 r1.cache_paradox <= '0';
1288                 sync += r1.ls_error.eq(0)
1289                 sync += r1.mmu_error.eq(0)
1290                 sync += r1.cache_paradox.eq(0)
1291 #             end if;
1292 #
1293 #             if req_op = OP_STCX_FAIL then
1294             with m.If(req_op == Op.OP_STCX_FAIL):
1295 #                 r1.stcx_fail <= '1';
1296                 r1.stcx_fail.eq(1)
1297
1298 #             else
1299             with m.Else():
1300 #                 r1.stcx_fail <= '0';
1301                 sync += r1.stcx_fail.eq(0)
1302 #             end if;
1303 #
1304 #             -- Record TLB hit information for updating TLB PLRU
1305 #             r1.tlb_hit <= tlb_hit;
1306 #             r1.tlb_hit_way <= tlb_hit_way;
1307 #             r1.tlb_hit_index <= tlb_req_index;
1308             # Record TLB hit information for updating TLB PLRU
1309             sync += r1.tlb_hit.eq(tlb_hit)
1310             sync += r1.tlb_hit_way.eq(tlb_hit_way)
1311             sync += r1.tlb_hit_index.eq(tlb_req_index)
1312 #         end if;
1313 #     end process;
1314
1315     # Memory accesses are handled by this state machine:
1316     #
1317     #   * Cache load miss/reload (in conjunction with "rams")
1318     #   * Load hits for non-cachable forms
1319     #   * Stores (the collision case is handled in "rams")
1320     #
1321     # All wishbone requests generation is done here.
1322     # This machine operates at stage 1.
1323     def dcache_slow(self, r1, use_forward1_next, cache_valid_bits, r0,
1324                     r0_valid, req_op, cache_tag, req_go, ra, wb_in):
1325
1326         comb = m.d.comb
1327         sync = m.d.sync
1328
1329 #         variable stbs_done : boolean;
1330 #         variable req       : mem_access_request_t;
1331 #         variable acks      : unsigned(2 downto 0);
1332         stbs_done = Signal()
1333         req       = MemAccessRequest()
1334         acks      = Signal(3)
1335
1336         comb += stbs_done
1337         comb += req
1338         comb += acks
1339
1340 #     begin
1341 #         if rising_edge(clk) then
1342 #             r1.use_forward1 <= use_forward1_next;
1343 #             r1.forward_sel <= (others => '0');
1344         sync += r1.use_forward1.eq(use_forward1_next)
1345         sync += r1.forward_sel.eq(0)
1346
1347 #             if use_forward1_next = '1' then
1348         with m.If(use_forward1_next):
1349 #                 r1.forward_sel <= r1.req.byte_sel;
1350             sync += r1.forward_sel.eq(r1.req.byte_sel)
1351
1352 #           elsif use_forward2_next = '1' then
1353         with m.Elif(use_forward2_next):
1354 #                 r1.forward_sel <= r1.forward_sel1;
1355             sync += r1.forward_sel.eq(r1.forward_sel1)
1356 #             end if;
1357
1358 #             r1.forward_data2 <= r1.forward_data1;
1359         sync += r1.forward_data2.eq(r1.forward_data1)
1360
1361 #             if r1.write_bram = '1' then
1362         with m.If(r1.write_bram):
1363 #                 r1.forward_data1 <= r1.req.data;
1364 #                 r1.forward_sel1 <= r1.req.byte_sel;
1365 #                 r1.forward_way1 <= r1.req.hit_way;
1366 #                 r1.forward_row1 <= get_row(r1.req.real_addr);
1367 #                 r1.forward_valid1 <= '1';
1368             sync += r1.forward_data1.eq(r1.req.data)
1369             sync += r1.forward_sel1.eq(r1.req.byte_sel)
1370             sync += r1.forward_way1.eq(r1.req.hit_way)
1371             sync += r1.forward_row1.eq(get_row(r1.req.real_addr))
1372             sync += r1.forward_valid1.eq(1)
1373 #             else
1374         with m.Else():
1375
1376 #                 if r1.dcbz = '1' then
1377             with m.If(r1.bcbz):
1378 #                     r1.forward_data1 <= (others => '0');
1379                 sync += r1.forward_data1.eq(0)
1380
1381 #                 else
1382             with m.Else():
1383 #                     r1.forward_data1 <= wishbone_in.dat;
1384                 sync += r1.forward_data1.eq(wb_in.dat)
1385 #                 end if;
1386
1387 #                 r1.forward_sel1 <= (others => '1');
1388 #                 r1.forward_way1 <= replace_way;
1389 #                 r1.forward_row1 <= r1.store_row;
1390 #                 r1.forward_valid1 <= '0';
1391             sync += r1.forward_sel1.eq(1)
1392             sync += r1.forward_way1.eq(replace_way)
1393             sync += r1.forward_row1.eq(r1.store_row)
1394             sync += r1.forward_valid1.eq(0)
1395 #             end if;
1396
1397 #           -- On reset, clear all valid bits to force misses
1398 #             if rst = '1' then
1399         # On reset, clear all valid bits to force misses
1400         # TODO figure out how reset signal works in nmigeni
1401         with m.If("""TODO RST???"""):
1402 #               for i in index_t loop
1403             for i in range(NUM_LINES):
1404 #                   cache_valids(i) <= (others => '0');
1405                 sync += cache_valid_bits[i].eq(0)
1406 #               end loop;
1407
1408 #                 r1.state <= IDLE;
1409 #                 r1.full <= '0';
1410 #                 r1.slow_valid <= '0';
1411 #                 r1.wb.cyc <= '0';
1412 #                 r1.wb.stb <= '0';
1413 #                 r1.ls_valid <= '0';
1414 #                 r1.mmu_done <= '0';
1415             sync += r1.state.eq(State.IDLE)
1416             sync += r1.full.eq(0)
1417             sync += r1.slow_valid.eq(0)
1418             sync += r1.wb.cyc.eq(0)
1419             sync += r1.wb.stb.eq(0)
1420             sync += r1.ls_valid.eq(0)
1421             sync += r1.mmu_done.eq(0)
1422
1423 #               -- Not useful normally but helps avoiding
1424 #               -- tons of sim warnings
1425         # Not useful normally but helps avoiding
1426         # tons of sim warnings
1427 #               r1.wb.adr <= (others => '0');
1428             sync += r1.wb.adr.eq(0)
1429 #             else
1430         with m.Else():
1431 #                 -- One cycle pulses reset
1432 #                 r1.slow_valid <= '0';
1433 #                 r1.write_bram <= '0';
1434 #                 r1.inc_acks <= '0';
1435 #                 r1.dec_acks <= '0';
1436 #
1437 #                 r1.ls_valid <= '0';
1438 #                 -- complete tlbies and TLB loads in the third cycle
1439 #                 r1.mmu_done <= r0_valid and (r0.tlbie or r0.tlbld);
1440             # One cycle pulses reset
1441             sync += r1.slow_valid.eq(0)
1442             sync += r1.write_bram.eq(0)
1443             sync += r1.inc_acks.eq(0)
1444             sync += r1.dec_acks.eq(0)
1445
1446             sync += r1.ls_valid.eq(0)
1447             # complete tlbies and TLB loads in the third cycle
1448             sync += r1.mmu_done.eq(r0_valid & (r0.tlbie | r0.tlbld))
1449
1450 #                 if req_op = OP_LOAD_HIT or req_op = OP_STCX_FAIL then
1451             with m.If(req_op == Op.OP_LOAD_HIT
1452                       | req_op == Op.OP_STCX_FAIL):
1453 #                     if r0.mmu_req = '0' then
1454                 with m.If(~r0.mmu_req):
1455 #                         r1.ls_valid <= '1';
1456                     sync += r1.ls_valid.eq(1)
1457 #                     else
1458                 with m.Else():
1459 #                         r1.mmu_done <= '1';
1460                     sync += r1.mmu_done.eq(1)
1461 #                     end if;
1462 #                 end if;
1463
1464 #                 if r1.write_tag = '1' then
1465             with m.If(r1.write_tag):
1466 #                     -- Store new tag in selected way
1467 #                     for i in 0 to NUM_WAYS-1 loop
1468                 # Store new tag in selected way
1469                 for i in range(NUM_WAYS):
1470 #                         if i = replace_way then
1471                     with m.If(i == replace_way):
1472 #                             cache_tags(r1.store_index)(
1473 #                              (i + 1) * TAG_WIDTH - 1
1474 #                              downto i * TAG_WIDTH
1475 #                             ) <=
1476 #                              (TAG_WIDTH - 1 downto TAG_BITS => '0')
1477 #                              & r1.reload_tag;
1478                         sync += cache_tag[
1479                                  r1.store_index
1480                                 ][i * TAG_WIDTH:(i +1) * TAG_WIDTH].eq(
1481                                  Const(TAG_WIDTH, TAG_WIDTH)
1482                                  & r1.reload_tag
1483                                 )
1484 #                         end if;
1485 #                     end loop;
1486 #                     r1.store_way <= replace_way;
1487 #                     r1.write_tag <= '0';
1488                 sync += r1.store_way.eq(replace_way)
1489                 sync += r1.write_tag.eq(0)
1490 #                 end if;
1491
1492 #                 -- Take request from r1.req if there is one there,
1493 #                 -- else from req_op, ra, etc.
1494 #                 if r1.full = '1' then
1495             # Take request from r1.req if there is one there,
1496             # else from req_op, ra, etc.
1497             with m.If(r1.full)
1498 #                     req := r1.req;
1499                 sync += req.eq(r1.req)
1500
1501 #                 else
1502             with m.Else():
1503 #                     req.op := req_op;
1504 #                     req.valid := req_go;
1505 #                     req.mmu_req := r0.mmu_req;
1506 #                     req.dcbz := r0.req.dcbz;
1507 #                     req.real_addr := ra;
1508                 sync += req.op.eq(req_op)
1509                 sync += req.valid.eq(req_go)
1510                 sync += req.mmu_req.eq(r0.mmu_req)
1511                 sync += req.dcbz.eq(r0.req.dcbz)
1512                 sync += req.real_addr.eq(ra)
1513
1514 #                     -- Force data to 0 for dcbz
1515 #                     if r0.req.dcbz = '0' then
1516                 with m.If(~r0.req.dcbz):
1517 #                         req.data := r0.req.data;
1518                     sync += req.data.eq(r0.req.data)
1519
1520 #                     else
1521                 with m.Else():
1522 #                         req.data := (others => '0');
1523                     sync += req.data.eq(0)
1524 #                     end if;
1525
1526 #                     -- Select all bytes for dcbz
1527 #                     -- and for cacheable loads
1528 #                     if r0.req.dcbz = '1'
1529 #                      or (r0.req.load = '1' and r0.req.nc = '0') then
1530                 # Select all bytes for dcbz
1531                 # and for cacheable loads
1532                 with m.If(r0.req.dcbz | (r0.req.load & ~r0.req.nc):
1533 #                         req.byte_sel := (others => '1');
1534                     sync += req.byte_sel.eq(1)
1535
1536 #                     else
1537                 with m.Else():
1538 #                         req.byte_sel := r0.req.byte_sel;
1539                     sync += req.byte_sel.eq(r0.req.byte_sel)
1540 #                     end if;
1541
1542 #                     req.hit_way := req_hit_way;
1543 #                     req.same_tag := req_same_tag;
1544                 sync += req.hit_way.eq(req_hit_way)
1545                 sync += req.same_tag.eq(req_same_tag)
1546
1547 #                     -- Store the incoming request from r0,
1548 #                     -- if it is a slow request
1549 #                     -- Note that r1.full = 1 implies req_op = OP_NONE
1550 #                     if req_op = OP_LOAD_MISS or req_op = OP_LOAD_NC
1551 #                      or req_op = OP_STORE_MISS
1552 #                      or req_op = OP_STORE_HIT then
1553                 # Store the incoming request from r0,
1554                 # if it is a slow request
1555                 # Note that r1.full = 1 implies req_op = OP_NONE
1556                 with m.If(req_op == Op.OP_LOAD_MISS
1557                           | req_op == Op.OP_LOAD_NC
1558                           | req_op == Op.OP_STORE_MISS
1559                           | req_op == Op.OP_STORE_HIT):
1560 #                         r1.req <= req;
1561 #                         r1.full <= '1';
1562                     sync += r1.req(req)
1563                     sync += r1.full.eq(1)
1564 #                     end if;
1565 #                 end if;
1566 #
1567 #               -- Main state machine
1568 #               case r1.state is
1569             # Main state machine
1570             with m.Switch(r1.state):
1571
1572 #                 when IDLE =>
1573                 with m.Case(State.IDLE)
1574 #                     r1.wb.adr <= req.real_addr(
1575 #                                   r1.wb.adr'left downto 0
1576 #                                  );
1577 #                     r1.wb.sel <= req.byte_sel;
1578 #                     r1.wb.dat <= req.data;
1579 #                     r1.dcbz <= req.dcbz;
1580 #
1581 #                     -- Keep track of our index and way
1582 #                     -- for subsequent stores.
1583 #                     r1.store_index <= get_index(req.real_addr);
1584 #                     r1.store_row <= get_row(req.real_addr);
1585 #                     r1.end_row_ix <=
1586 #                      get_row_of_line(get_row(req.real_addr)) - 1;
1587 #                     r1.reload_tag <= get_tag(req.real_addr);
1588 #                     r1.req.same_tag <= '1';
1589                     sync += r1.wb.adr.eq(req.real_addr[0:r1.wb.adr])
1590                     sync += r1.wb.sel.eq(req.byte_sel)
1591                     sync += r1.wb.dat.eq(req.data)
1592                     sync += r1.dcbz.eq(req.dcbz)
1593
1594                     # Keep track of our index and way
1595                     # for subsequent stores.
1596                     sync += r1.store_index.eq(get_index(req.real_addr))
1597                     sync += r1.store_row.eq(get_row(req.real_addr))
1598                     sync += r1.end_row_ix.eq(
1599                              get_row_of_line(get_row(req.real_addr))
1600                             )
1601                     sync += r1.reload_tag.eq(get_tag(req.real_addr))
1602                     sync += r1.req.same_tag.eq(1)
1603
1604 #                     if req.op = OP_STORE_HIT theni
1605                     with m.If(req.op == Op.OP_STORE_HIT):
1606 #                         r1.store_way <= req.hit_way;
1607                         sync += r1.store_way.eq(req.hit_way)
1608 #                     end if;
1609
1610 #                     -- Reset per-row valid bits,
1611 #                     -- ready for handling OP_LOAD_MISS
1612 #                     for i in 0 to ROW_PER_LINE - 1 loop
1613                     # Reset per-row valid bits,
1614                     # ready for handling OP_LOAD_MISS
1615                     for i in range(ROW_PER_LINE):
1616 #                         r1.rows_valid(i) <= '0';
1617                         sync += r1.rows_valid[i].eq(0)
1618 #                     end loop;
1619
1620 #                     case req.op is
1621                     with m.Switch(req.op):
1622 #                     when OP_LOAD_HIT =>
1623                         with m.Case(Op.OP_LOAD_HIT):
1624 #                         -- stay in IDLE state
1625                             # stay in IDLE state
1626                             pass
1627
1628 #                     when OP_LOAD_MISS =>
1629                         with m.Case(Op.OP_LOAD_MISS):
1630 #                       -- Normal load cache miss,
1631 #                       -- start the reload machine
1632 #                       report "cache miss real addr:" &
1633 #                        to_hstring(req.real_addr) & " idx:" &
1634 #                        integer'image(get_index(req.real_addr)) &
1635 #                        " tag:" & to_hstring(get_tag(req.real_addr));
1636                             # Normal load cache miss,
1637                             # start the reload machine
1638                             print(f"cache miss real addr:" \
1639                                   f"{req_real_addr}" \
1640                                   f" idx:{get_index(req_real_addr)}" \
1641                                   f" tag:{get_tag(req.real_addr)}")
1642
1643 #                       -- Start the wishbone cycle
1644 #                       r1.wb.we  <= '0';
1645 #                       r1.wb.cyc <= '1';
1646 #                       r1.wb.stb <= '1';
1647                             # Start the wishbone cycle
1648                             sync += r1.wb.we.eq(0)
1649                             sync += r1.wb.cyc.eq(1)
1650                             sync += r1.wb.stb.eq(1)
1651
1652 #                       -- Track that we had one request sent
1653 #                       r1.state <= RELOAD_WAIT_ACK;
1654 #                       r1.write_tag <= '1';
1655                             # Track that we had one request sent
1656                             sync += r1.state.eq(State.RELOAD_WAIT_ACK)
1657                             sync += r1.write_tag.eq(1)
1658
1659 #                   when OP_LOAD_NC =>
1660                         with m.Case(Op.OP_LOAD_NC):
1661 #                       r1.wb.cyc <= '1';
1662 #                       r1.wb.stb <= '1';
1663 #                       r1.wb.we <= '0';
1664 #                       r1.state <= NC_LOAD_WAIT_ACK;
1665                             sync += r1.wb.cyc.eq(1)
1666                             sync += r1.wb.stb.eq(1)
1667                             sync += r1.wb.we.eq(0)
1668                             sync += r1.state.eq(State.NC_LOAD_WAIT_ACK)
1669
1670 #                     when OP_STORE_HIT | OP_STORE_MISS =>
1671                         with m.Case(Op.OP_STORE_HIT
1672                                     | Op.OP_STORE_MISS):
1673 #                         if req.dcbz = '0' then
1674                             with m.If(~req.bcbz):
1675 #                             r1.state <= STORE_WAIT_ACK;
1676 #                             r1.acks_pending <= to_unsigned(1, 3);
1677 #                             r1.full <= '0';
1678 #                             r1.slow_valid <= '1';
1679                                 sync += r1.state.eq(
1680                                          State.STORE_WAIT_ACK
1681                                         )
1682                                 sync += r1.acks_pending.eq(
1683                                          '''TODO to_unsignes(1,3)'''
1684                                         )
1685                                 sync += r1.full.eq(0)
1686                                 sync += r1.slow_valid.eq(1)
1687
1688 #                             if req.mmu_req = '0' then
1689                                 with m.If(~req.mmu_req):
1690 #                                 r1.ls_valid <= '1';
1691                                     sync += r1.ls_valid.eq(1)
1692 #                             else
1693                                 with m.Else():
1694 #                                 r1.mmu_done <= '1';
1695                                     sync += r1.mmu_done.eq(1)
1696 #                             end if;
1697
1698 #                             if req.op = OP_STORE_HIT then
1699                                 with m.If(req.op == Op.OP_STORE_HIT):
1700 #                                 r1.write_bram <= '1';
1701                                     sync += r1.write_bram.eq(1)
1702 #                             end if;
1703
1704 #                         else
1705                             with m.Else():
1706 #                             -- dcbz is handled much like a load
1707 #                             -- miss except that we are writing
1708 #                             -- to memory instead of reading
1709 #                             r1.state <= RELOAD_WAIT_ACK;
1710                                 # dcbz is handled much like a load
1711                                 # miss except that we are writing
1712                                 # to memory instead of reading
1713                                 sync += r1.state.eq(Op.RELOAD_WAIT_ACK)
1714
1715 #                             if req.op = OP_STORE_MISS then
1716                                 with m.If(req.op == Op.OP_STORE_MISS):
1717 #                                 r1.write_tag <= '1';
1718                                     sync += r1.write_tag.eq(1)
1719 #                             end if;
1720 #                         end if;
1721
1722 #                         r1.wb.we <= '1';
1723 #                         r1.wb.cyc <= '1';
1724 #                         r1.wb.stb <= '1';
1725                             sync += r1.wb.we.eq(1)
1726                             sync += r1.wb.cyc.eq(1)
1727                             sync += r1.wb.stb.eq(1)
1728
1729 #                   -- OP_NONE and OP_BAD do nothing
1730 #                   -- OP_BAD & OP_STCX_FAIL were handled above already
1731 #                   when OP_NONE =>
1732 #                     when OP_BAD =>
1733 #                     when OP_STCX_FAIL =>
1734                         # OP_NONE and OP_BAD do nothing
1735                         # OP_BAD & OP_STCX_FAIL were
1736                         # handled above already
1737                         with m.Case(Op.OP_NONE):
1738                             pass
1739
1740                         with m.Case(OP_BAD):
1741                             pass
1742
1743                         with m.Case(OP_STCX_FAIL):
1744                             pass
1745 #                   end case;
1746
1747 #                 when RELOAD_WAIT_ACK =>
1748                     with m.Case(State.RELOAD_WAIT_ACK):
1749 #                     -- Requests are all sent if stb is 0
1750                         # Requests are all sent if stb is 0
1751                         sync += stbs_done.eq(~r1.wb.stb)
1752 #                   stbs_done := r1.wb.stb = '0';
1753
1754 #                   -- If we are still sending requests,
1755 #                   -- was one accepted?
1756 #                   if wishbone_in.stall = '0' and not stbs_done then
1757                         # If we are still sending requests,
1758                         # was one accepted?
1759                         with m.If(~wb_in.stall & ~stbs_done):
1760 #                       -- That was the last word ? We are done sending.
1761 #                       -- Clear stb and set stbs_done so we can handle
1762 #                       -- an eventual last ack on the same cycle.
1763 #                       if is_last_row_addr(
1764 #                        r1.wb.adr, r1.end_row_ix
1765 #                       ) then
1766                             # That was the last word?
1767                             # We are done sending.
1768                             # Clear stb and set stbs_done
1769                             # so we can handle an eventual
1770                             # last ack on the same cycle.
1771                             with m.If(is_last_row_addr(
1772                                       r1.wb.adr, r1.end_row_ix)):
1773 #                           r1.wb.stb <= '0';
1774 #                           stbs_done := true;
1775                                 sync += r1.wb.stb.eq(0)
1776                                 sync += stbs_done.eq(0)
1777 #                       end if;
1778
1779 #                       -- Calculate the next row address
1780 #                       r1.wb.adr <= next_row_addr(r1.wb.adr);
1781                             # Calculate the next row address
1782                             sync += r1.wb.adr.eq(next_row_addr(r1.wb.adr))
1783 #                   end if;
1784
1785 #                   -- Incoming acks processing
1786 #                     r1.forward_valid1 <= wishbone_in.ack;
1787                         # Incoming acks processing
1788                         sync += r1.forward_valid1.eq(wb_in.ack)
1789
1790 #                   if wishbone_in.ack = '1' then
1791                         with m.If(wb_in.ack):
1792 #                         r1.rows_valid(
1793 #                          r1.store_row mod ROW_PER_LINE
1794 #                         ) <= '1';
1795                             sync += r1.rows_valid[
1796                                      r1.store_row % ROW_PER_LINE
1797                                     ].eq(1)
1798
1799 #                         -- If this is the data we were looking for,
1800 #                         -- we can complete the request next cycle.
1801 #                         -- Compare the whole address in case the
1802 #                         -- request in r1.req is not the one that
1803 #                         -- started this refill.
1804 #                       if r1.full = '1' and r1.req.same_tag = '1'
1805 #                        and ((r1.dcbz = '1' and r1.req.dcbz = '1')
1806 #                        or (r1.dcbz = '0' and r1.req.op = OP_LOAD_MISS))
1807 #                        and r1.store_row = get_row(r1.req.real_addr) then
1808                             # If this is the data we were looking for,
1809                             # we can complete the request next cycle.
1810                             # Compare the whole address in case the
1811                             # request in r1.req is not the one that
1812                             # started this refill.
1813                             with m.If(r1.full & r1.req.same_tag &
1814                                       ((r1.dcbz & r1.req.dcbz)
1815                                        (~r1.dcbz &
1816                                         r1.req.op == Op.OP_LOAD_MISS)
1817                                        ) &
1818                                        r1.store_row
1819                                        == get_row(r1.req.real_addr):
1820 #                             r1.full <= '0';
1821 #                             r1.slow_valid <= '1';
1822                                 sync += r1.full.eq(0)
1823                                 sync += r1.slow_valid.eq(1)
1824
1825 #                             if r1.mmu_req = '0' then
1826                                     with m.If(~r1.mmu_req):
1827 #                                 r1.ls_valid <= '1';
1828                                         sync += r1.ls_valid.eq(1)
1829 #                             else
1830                                     with m.Else():
1831 #                                 r1.mmu_done <= '1';
1832                                         sync += r1.mmu_done.eq(1)
1833 #                             end if;
1834 #                             r1.forward_sel <= (others => '1');
1835 #                             r1.use_forward1 <= '1';
1836                                 sync += r1.forward_sel.eq(1)
1837                                 sync += r1.use_forward1.eq(1)
1838 #                       end if;
1839
1840 #                       -- Check for completion
1841 #                       if stbs_done and is_last_row(r1.store_row,
1842 #                        r1.end_row_ix) then
1843                             # Check for completion
1844                             with m.If(stbs_done &
1845                                       is_last_row(r1.store_row,
1846                                       r1.end_row_ix)):
1847
1848 #                           -- Complete wishbone cycle
1849 #                           r1.wb.cyc <= '0';
1850                                 # Complete wishbone cycle
1851                                 sync += r1.wb.cyc.eq(0)
1852
1853 #                           -- Cache line is now valid
1854 #                           cache_valids(r1.store_index)(
1855 #                            r1.store_way
1856 #                           ) <= '1';
1857                                 # Cache line is now valid
1858                                 sync += cache_valid_bits[
1859                                          r1.store_index
1860                                         ][r1.store_way].eq(1)
1861
1862 #                           r1.state <= IDLE;
1863                                 sync += r1.state.eq(State.IDLE)
1864 #                       end if;
1865
1866 #                       -- Increment store row counter
1867 #                       r1.store_row <= next_row(r1.store_row);
1868                             # Increment store row counter
1869                             sync += r1.store_row.eq(next_row(
1870                                      r1.store_row
1871                                     ))
1872 #                   end if;
1873
1874 #                 when STORE_WAIT_ACK =>
1875                     with m.Case(State.STORE_WAIT_ACK):
1876 #                     stbs_done := r1.wb.stb = '0';
1877 #                     acks := r1.acks_pending;
1878                         sync += stbs_done.eq(~r1.wb.stb)
1879                         sync += acks.eq(r1.acks_pending)
1880
1881 #                     if r1.inc_acks /= r1.dec_acks then
1882                         with m.If(r1.inc_acks != r1.dec_acks):
1883
1884 #                         if r1.inc_acks = '1' then
1885                             with m.If(r1.inc_acks):
1886 #                             acks := acks + 1;
1887                                 sync += acks.eq(acks + 1)
1888
1889 #                         else
1890                             with m.Else():
1891 #                             acks := acks - 1;
1892                                 sync += acks.eq(acks - 1)
1893 #                         end if;
1894 #                     end if;
1895
1896 #                     r1.acks_pending <= acks;
1897                         sync += r1.acks_pending.eq(acks)
1898
1899 #                     -- Clear stb when slave accepted request
1900 #                     if wishbone_in.stall = '0' then
1901                         # Clear stb when slave accepted request
1902                         with m.If(~wb_in.stall):
1903 #                         -- See if there is another store waiting
1904 #                         -- to be done which is in the same real page.
1905 #                         if req.valid = '1' then
1906                             # See if there is another store waiting
1907                             # to be done which is in the same real page.
1908                             with m.If(req.valid):
1909 #                             r1.wb.adr(
1910 #                              SET_SIZE_BITS - 1 downto 0
1911 #                             ) <= req.real_addr(
1912 #                              SET_SIZE_BITS - 1 downto 0
1913 #                             );
1914 #                             r1.wb.dat <= req.data;
1915 #                             r1.wb.sel <= req.byte_sel;
1916                                 sync += r1.wb.adr[0:SET_SIZE_BITS].eq(
1917                                          req.real_addr[0:SET_SIZE_BITS]
1918                                         )
1919 #                         end if;
1920
1921 #                         if acks < 7 and req.same_tag = '1'
1922 #                          and (req.op = OP_STORE_MISS
1923 #                          or req.op = OP_STORE_HIT) then
1924                             with m.Elif(acks < 7 & req.same_tag &
1925                                         (req.op == Op.Op_STORE_MISS
1926                                          | req.op == Op.OP_SOTRE_HIT)):
1927 #                             r1.wb.stb <= '1';
1928 #                             stbs_done := false;
1929                                 sync += r1.wb.stb.eq(1)
1930                                 sync += stbs_done.eq(0)
1931
1932 #                             if req.op = OP_STORE_HIT then
1933                                 with m.If(req.op == Op.OP_STORE_HIT):
1934 #                                 r1.write_bram <= '1';
1935                                     sync += r1.write_bram.eq(1)
1936 #                             end if;
1937 #                             r1.full <= '0';
1938 #                             r1.slow_valid <= '1';
1939                                 sync += r1.full.eq(0)
1940                                 sync += r1.slow_valid.eq(1)
1941
1942 #                             -- Store requests never come from the MMU
1943 #                             r1.ls_valid <= '1';
1944 #                             stbs_done := false;
1945 #                             r1.inc_acks <= '1';
1946                                 # Store request never come from the MMU
1947                                 sync += r1.ls_valid.eq(1)
1948                                 sync += stbs_done.eq(0)
1949                                 sync += r1.inc_acks.eq(1)
1950 #                         else
1951                             with m.Else():
1952 #                             r1.wb.stb <= '0';
1953 #                             stbs_done := true;
1954                                 sync += r1.wb.stb.eq(0)
1955                                 sync += stbs_done.eq(1)
1956 #                         end if;
1957 #                   end if;
1958
1959 #                   -- Got ack ? See if complete.
1960 #                   if wishbone_in.ack = '1' then
1961                         # Got ack ? See if complete.
1962                         with m.If(wb_in.ack):
1963 #                         if stbs_done and acks = 1 then
1964                             with m.If(stbs_done & acks)
1965 #                             r1.state <= IDLE;
1966 #                             r1.wb.cyc <= '0';
1967 #                             r1.wb.stb <= '0';
1968                                 sync += r1.state.eq(State.IDLE)
1969                                 sync += r1.wb.cyc.eq(0)
1970                                 sync += r1.wb.stb.eq(0)
1971 #                         end if;
1972 #                         r1.dec_acks <= '1';
1973                             sync += r1.dec_acks.eq(1)
1974 #                   end if;
1975
1976 #                 when NC_LOAD_WAIT_ACK =>
1977                     with m.Case(State.NC_LOAD_WAIT_ACK):
1978 #                   -- Clear stb when slave accepted request
1979 #                     if wishbone_in.stall = '0' then
1980                         # Clear stb when slave accepted request
1981                         with m.If(~wb_in.stall):
1982 #                       r1.wb.stb <= '0';
1983                             sync += r1.wb.stb.eq(0)
1984 #                   end if;
1985
1986 #                   -- Got ack ? complete.
1987 #                   if wishbone_in.ack = '1' then
1988                         # Got ack ? complete.
1989                         with m.If(wb_in.ack):
1990 #                         r1.state <= IDLE;
1991 #                         r1.full <= '0';
1992 #                         r1.slow_valid <= '1';
1993                             sync += r1.state.eq(State.IDLE)
1994                             sync += r1.full.eq(0)
1995                             sync += r1.slow_valid.eq(1)
1996
1997 #                         if r1.mmu_req = '0' then
1998                             with m.If(~r1.mmu_req):
1999 #                             r1.ls_valid <= '1';
2000                                 sync += r1.ls_valid.eq(1)
2001
2002 #                         else
2003                             with m.Else():
2004 #                             r1.mmu_done <= '1';
2005                                 sync += r1.mmu_done.eq(1)
2006 #                         end if;
2007
2008 #                         r1.forward_sel <= (others => '1');
2009 #                         r1.use_forward1 <= '1';
2010 #                         r1.wb.cyc <= '0';
2011 #                         r1.wb.stb <= '0';
2012                             sync += r1.forward_sel.eq(1)
2013                             sync += r1.use_forward1.eq(1)
2014                             sync += r1.wb.cyc.eq(0)
2015                             sync += r1.wb.stb.eq(0)
2016 #                   end if;
2017 #                 end case;
2018 #           end if;
2019 #       end if;
2020 #     end process;
2021
2022 #     dc_log: if LOG_LENGTH > 0 generate
2023 # TODO learn how to tranlate vhdl generate into nmigen
2024     def dcache_log(self, r1, valid_ra, tlb_hit_way, stall_out,
2025                    d_out, wb_in, log_out):
2026
2027         comb = m.d.comb
2028         sync = m.d.sync
2029
2030 #         signal log_data : std_ulogic_vector(19 downto 0);
2031         log_data = Signal(20)
2032
2033         comb += log_data
2034
2035 #     begin
2036 #         dcache_log: process(clk)
2037 #         begin
2038 #             if rising_edge(clk) then
2039 #                 log_data <= r1.wb.adr(5 downto 3) &
2040 #                             wishbone_in.stall &
2041 #                             wishbone_in.ack &
2042 #                             r1.wb.stb & r1.wb.cyc &
2043 #                             d_out.error &
2044 #                             d_out.valid &
2045 #                             std_ulogic_vector(
2046 #                              to_unsigned(op_t'pos(req_op), 3)) &
2047 #                             stall_out &
2048 #                             std_ulogic_vector(
2049 #                              to_unsigned(tlb_hit_way, 3)) &
2050 #                             valid_ra &
2051 #                             std_ulogic_vector(
2052 #                              to_unsigned(state_t'pos(r1.state), 3));
2053         sync += log_data.eq(Cat(
2054                  Const(r1.state, 3), valid_ra, Const(tlb_hit_way, 3),
2055                  stall_out, Const(req_op, 3), d_out.valid, d_out.error,
2056                  r1.wb.cyc, r1.wb.stb, wb_in.ack, wb_in.stall,
2057                  r1.wb.adr[3:6]
2058                 ))
2059 #             end if;
2060 #         end process;
2061 #         log_out <= log_data;
2062     # TODO ??? I am very confused need help
2063     comb += log_out.eq(log_data)
2064 #     end generate;
2065 # end;
2066
2067     def elaborate(self, platform):
2068         LINE_SIZE    = self.LINE_SIZE
2069         NUM_LINES    = self.NUM_LINES
2070         NUM_WAYS     = self.NUM_WAYS
2071         TLB_SET_SIZE = self.TLB_SET_SIZE
2072         TLB_NUM_WAYS = self.TLB_NUM_WAYS
2073         TLB_LG_PGSZ  = self.TLB_LG_PGSZ
2074         LOG_LENGTH   = self.LOG_LENGTH
2075
2076         # BRAM organisation: We never access more than
2077         #     -- wishbone_data_bits at a time so to save
2078         #     -- resources we make the array only that wide, and
2079         #     -- use consecutive indices for to make a cache "line"
2080         #     --
2081         #     -- ROW_SIZE is the width in bytes of the BRAM
2082         #     -- (based on WB, so 64-bits)
2083         ROW_SIZE = WB_DATA_BITS / 8;
2084
2085         # ROW_PER_LINE is the number of row (wishbone
2086         # transactions) in a line
2087         ROW_PER_LINE = LINE_SIZE // ROW_SIZE
2088
2089         # BRAM_ROWS is the number of rows in BRAM needed
2090         # to represent the full dcache
2091         BRAM_ROWS = NUM_LINES * ROW_PER_LINE
2092
2093
2094         # Bit fields counts in the address
2095
2096         # REAL_ADDR_BITS is the number of real address
2097         # bits that we store
2098         REAL_ADDR_BITS = 56
2099
2100         # ROW_BITS is the number of bits to select a row
2101         ROW_BITS = log2_int(BRAM_ROWS)
2102
2103         # ROW_LINE_BITS is the number of bits to select
2104         # a row within a line
2105         ROW_LINE_BITS = log2_int(ROW_PER_LINE)
2106
2107         # LINE_OFF_BITS is the number of bits for
2108         # the offset in a cache line
2109         LINE_OFF_BITS = log2_int(LINE_SIZE)
2110
2111         # ROW_OFF_BITS is the number of bits for
2112         # the offset in a row
2113         ROW_OFF_BITS = log2_int(ROW_SIZE)
2114
2115         # INDEX_BITS is the number if bits to
2116         # select a cache line
2117         INDEX_BITS = log2_int(NUM_LINES)
2118
2119         # SET_SIZE_BITS is the log base 2 of the set size
2120         SET_SIZE_BITS = LINE_OFF_BITS + INDEX_BITS
2121
2122         # TAG_BITS is the number of bits of
2123         # the tag part of the address
2124         TAG_BITS = REAL_ADDR_BITS - SET_SIZE_BITS
2125
2126         # TAG_WIDTH is the width in bits of each way of the tag RAM
2127         TAG_WIDTH = TAG_BITS + 7 - ((TAG_BITS + 7) % 8)
2128
2129         # WAY_BITS is the number of bits to select a way
2130         WAY_BITS = log2_int(NUM_WAYS)
2131
2132         # Example of layout for 32 lines of 64 bytes:
2133         #
2134         # ..  tag    |index|  line  |
2135         # ..         |   row   |    |
2136         # ..         |     |---|    | ROW_LINE_BITS  (3)
2137         # ..         |     |--- - --| LINE_OFF_BITS (6)
2138         # ..         |         |- --| ROW_OFF_BITS  (3)
2139         # ..         |----- ---|    | ROW_BITS      (8)
2140         # ..         |-----|        | INDEX_BITS    (5)
2141         # .. --------|              | TAG_BITS      (45)
2142
2143         TAG_RAM_WIDTH = TAG_WIDTH * NUM_WAYS
2144
2145         def CacheTagArray():
2146             return Array(CacheTagSet() for x in range(NUM_LINES))
2147
2148         def CacheValidBitsArray():
2149             return Array(CacheWayValidBits() for x in range(NUM_LINES))
2150
2151         def RowPerLineValidArray():
2152             return Array(Signal() for x in range(ROW_PER_LINE))
2153
2154         # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
2155         cache_tags       = CacheTagArray()
2156         cache_tag_set    = Signal(TAG_RAM_WIDTH)
2157         cache_valid_bits = CacheValidBitsArray()
2158
2159         # TODO attribute ram_style : string;
2160         # TODO attribute ram_style of cache_tags : signal is "distributed";
2161
2162         # L1 TLB
2163         TLB_SET_BITS     = log2_int(TLB_SET_SIZE)
2164         TLB_WAY_BITS     = log2_int(TLB_NUM_WAYS)
2165         TLB_EA_TAG_BITS  = 64 - (TLB_LG_PGSZ + TLB_SET_BITS)
2166         TLB_TAG_WAY_BITS = TLB_NUM_WAYS * TLB_EA_TAG_BITS
2167         TLB_PTE_BITS     = 64
2168         TLB_PTE_WAY_BITS = TLB_NUM_WAYS * TLB_PTE_BITS;
2169
2170         def TLBValidBitsArray():
2171             return Array(
2172              Signal(TLB_NUM_WAYS) for x in range(TLB_SET_SIZE)
2173             )
2174
2175         def TLBTagsArray():
2176             return Array(
2177              Signal(TLB_TAG_WAY_BITS) for x in range (TLB_SET_SIZE)
2178             )
2179
2180         def TLBPtesArray():
2181             return Array(
2182              Signal(TLB_PTE_WAY_BITS) for x in range(TLB_SET_SIZE)
2183             )
2184
2185         def HitWaySet():
2186             return Array(Signal(NUM_WAYS) for x in range(TLB_NUM_WAYS))
2187
2188 """note: these are passed to nmigen.hdl.Memory as "attributes".
2189    don't know how, just that they are.
2190 """
2191         dtlb_valid_bits = TLBValidBitsArray()
2192         dtlb_tags       = TLBTagsArray()
2193         dtlb_ptes       = TLBPtesArray()
2194         # TODO attribute ram_style of
2195         #  dtlb_tags : signal is "distributed";
2196         # TODO attribute ram_style of
2197         #  dtlb_ptes : signal is "distributed";
2198
2199         r0      = RegStage0()
2200         r0_full = Signal()
2201
2202         r1 = RegStage1()
2203
2204         reservation = Reservation()
2205
2206         # Async signals on incoming request
2207         req_index    = Signal(NUM_LINES)
2208         req_row      = Signal(BRAM_ROWS)
2209         req_hit_way  = Signal(WAY_BITS)
2210         req_tag      = Signal(TAG_BITS)
2211         req_op       = Op()
2212         req_data     = Signal(64)
2213         req_same_tag = Signal()
2214         req_go       = Signal()
2215
2216         early_req_row     = Signal(BRAM_ROWS)
2217
2218         cancel_store      = Signal()
2219         set_rsrv          = Signal()
2220         clear_rsrv        = Signal()
2221
2222         r0_valid          = Signal()
2223         r0_stall          = Signal()
2224
2225         use_forward1_next = Signal()
2226         use_forward2_next = Signal()
2227
2228         # Cache RAM interface
2229         def CacheRamOut():
2230             return Array(Signal(WB_DATA_BITS) for x in range(NUM_WAYS))
2231
2232         cache_out         = CacheRamOut()
2233
2234         # PLRU output interface
2235         def PLRUOut():
2236             return Array(Signal(WAY_BITS) for x in range(Index()))
2237
2238         plru_victim       = PLRUOut()
2239         replace_way       = Signal(WAY_BITS)
2240
2241         # Wishbone read/write/cache write formatting signals
2242         bus_sel           = Signal(8)
2243
2244         # TLB signals
2245         tlb_tag_way   = Signal(TLB_TAG_WAY_BITS)
2246         tlb_pte_way   = Signal(TLB_PTE_WAY_BITS)
2247         tlb_valid_way = Signal(TLB_NUM_WAYS)
2248         tlb_req_index = Signal(TLB_SET_SIZE)
2249         tlb_hit       = Signal()
2250         tlb_hit_way   = Signal(TLB_NUM_WAYS)
2251         pte           = Signal(TLB_PTE_BITS)
2252         ra            = Signal(REAL_ADDR_BITS)
2253         valid_ra      = Signal()
2254         perm_attr     = PermAttr()
2255         rc_ok         = Signal()
2256         perm_ok       = Signal()
2257         access_ok     = Signal()
2258
2259         # TLB PLRU output interface
2260         def TLBPLRUOut():
2261             return Array(
2262                 Signal(TLB_WAY_BITS) for x in range(TLB_SET_SIZE)
2263             )
2264
2265         tlb_plru_victim = TLBPLRUOut()
2266
2267         # Helper functions to decode incoming requests
2268         #
2269         # Return the cache line index (tag index) for an address
2270         def get_index(addr):
2271             return addr[LINE_OFF_BITS:SET_SIZE_BITS]
2272
2273         # Return the cache row index (data memory) for an address
2274         def get_row(addr):
2275             return addr[ROW_OFF_BITS:SET_SIZE_BITS]
2276
2277         # Return the index of a row within a line
2278         def get_row_of_line(row):
2279             row_v = Signal(ROW_BITS)
2280             row_v = Signal(row)
2281             return row_v[0:ROW_LINE_BITS]
2282
2283         # Returns whether this is the last row of a line
2284         def is_last_row_addr(addr, last):
2285             return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
2286
2287         # Returns whether this is the last row of a line
2288         def is_last_row(row, last):
2289             return get_row_of_line(row) == last
2290
2291         # Return the address of the next row in the current cache line
2292         def next_row_addr(addr):
2293             row_idx = Signal(ROW_LINE_BITS)
2294             result  = WBAddrType()
2295             # Is there no simpler way in VHDL to
2296             # generate that 3 bits adder ?
2297             row_idx = addr[ROW_OFF_BITS:LINE_OFF_BITS]
2298             row_idx = Signal(row_idx + 1)
2299             result = addr
2300             result[ROW_OFF_BITS:LINE_OFF_BITS] = row_idx
2301             return result
2302
2303         # Return the next row in the current cache line. We use a
2304         # dedicated function in order to limit the size of the
2305         # generated adder to be only the bits within a cache line
2306         # (3 bits with default settings)
2307         def next_row(row)
2308             row_v   = Signal(ROW_BITS)
2309             row_idx = Signal(ROW_LINE_BITS)
2310             result  = Signal(ROW_BITS)
2311
2312             row_v = Signal(row)
2313             row_idx = row_v[ROW_LINE_BITS]
2314             row_v[0:ROW_LINE_BITS] = Signal(row_idx + 1)
2315             return row_v
2316
2317         # Get the tag value from the address
2318         def get_tag(addr):
2319             return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
2320
2321         # Read a tag from a tag memory row
2322         def read_tag(way, tagset):
2323             return tagset[way *TAG_WIDTH:way * TAG_WIDTH + TAG_BITS]
2324
2325         # Read a TLB tag from a TLB tag memory row
2326         def read_tlb_tag(way, tags):
2327             j = Signal()
2328
2329             j = way * TLB_EA_TAG_BITS
2330             return tags[j:j + TLB_EA_TAG_BITS]
2331
2332         # Write a TLB tag to a TLB tag memory row
2333         def write_tlb_tag(way, tags), tag):
2334             j = Signal()
2335
2336             j = way * TLB_EA_TAG_BITS
2337             tags[j:j + TLB_EA_TAG_BITS] = tag
2338
2339         # Read a PTE from a TLB PTE memory row
2340         def read_tlb_pte(way, ptes):
2341             j = Signal()
2342
2343             j = way * TLB_PTE_BITS
2344             return ptes[j:j + TLB_PTE_BITS]
2345
2346         def write_tlb_pte(way, ptes,newpte):
2347             j = Signal()
2348
2349             j = way * TLB_PTE_BITS
2350             return ptes[j:j + TLB_PTE_BITS] = newpte
2351
2352         assert (LINE_SIZE % ROW_SIZE) == 0 "LINE_SIZE not " \
2353          "multiple of ROW_SIZE"
2354
2355         assert (LINE_SIZE % 2) == 0 "LINE_SIZE not power of 2"
2356
2357         assert (NUM_LINES % 2) == 0 "NUM_LINES not power of 2"
2358
2359         assert (ROW_PER_LINE % 2) == 0 "ROW_PER_LINE not" \
2360          "power of 2"
2361
2362         assert ROW_BITS == (INDEX_BITS + ROW_LINE_BITS) \
2363          "geometry bits don't add up"
2364
2365         assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS) \
2366          "geometry bits don't add up"
2367
2368         assert REAL_ADDR_BITS == (TAG_BITS + INDEX_BITS \
2369          + LINE_OFF_BITS) "geometry bits don't add up"
2370
2371         assert REAL_ADDR_BITS == (TAG_BITS + ROW_BITS + ROW_OFF_BITS) \
2372          "geometry bits don't add up"
2373
2374         assert 64 == wishbone_data_bits "Can't yet handle a" \
2375          "wishbone width that isn't 64-bits"
2376
2377         assert SET_SIZE_BITS <= TLB_LG_PGSZ "Set indexed by" \
2378          "virtual address"
2379
2380         # we don't yet handle collisions between loadstore1 requests
2381         # and MMU requests
2382         comb += m_out.stall.eq(0)
2383
2384         # Hold off the request in r0 when r1 has an uncompleted request
2385         comb += r0_stall.eq(r0_full & r1.full)
2386         comb += r0_valid.eq(r0_full & ~r1.full)
2387         comb += stall_out.eq(r0_stall)
2388
2389         # Wire up wishbone request latch out of stage 1
2390         comb += wishbone_out.eq(r1.wb)
2391
2392
2393
2394 # dcache_tb.vhdl
2395 #
2396 # entity dcache_tb is
2397 # end dcache_tb;
2398 #
2399 # architecture behave of dcache_tb is
2400 #     signal clk          : std_ulogic;
2401 #     signal rst          : std_ulogic;
2402 #
2403 #     signal d_in         : Loadstore1ToDcacheType;
2404 #     signal d_out        : DcacheToLoadstore1Type;
2405 #
2406 #     signal m_in         : MmuToDcacheType;
2407 #     signal m_out        : DcacheToMmuType;
2408 #
2409 #     signal wb_bram_in   : wishbone_master_out;
2410 #     signal wb_bram_out  : wishbone_slave_out;
2411 #
2412 #     constant clk_period : time := 10 ns;
2413 # begin
2414 #     dcache0: entity work.dcache
2415 #         generic map(
2416 #
2417 #             LINE_SIZE => 64,
2418 #             NUM_LINES => 4
2419 #             )
2420 #         port map(
2421 #             clk => clk,
2422 #             rst => rst,
2423 #             d_in => d_in,
2424 #             d_out => d_out,
2425 #             m_in => m_in,
2426 #             m_out => m_out,
2427 #             wishbone_out => wb_bram_in,
2428 #             wishbone_in => wb_bram_out
2429 #             );
2430 #
2431 #     -- BRAM Memory slave
2432 #     bram0: entity work.wishbone_bram_wrapper
2433 #         generic map(
2434 #             MEMORY_SIZE   => 1024,
2435 #             RAM_INIT_FILE => "icache_test.bin"
2436 #             )
2437 #         port map(
2438 #             clk => clk,
2439 #             rst => rst,
2440 #             wishbone_in => wb_bram_in,
2441 #             wishbone_out => wb_bram_out
2442 #             );
2443 #
2444 #     clk_process: process
2445 #     begin
2446 #         clk <= '0';
2447 #         wait for clk_period/2;
2448 #         clk <= '1';
2449 #         wait for clk_period/2;
2450 #     end process;
2451 #
2452 #     rst_process: process
2453 #     begin
2454 #         rst <= '1';
2455 #         wait for 2*clk_period;
2456 #         rst <= '0';
2457 #         wait;
2458 #     end process;
2459 #
2460 #     stim: process
2461 #     begin
2462 #     -- Clear stuff
2463 #     d_in.valid <= '0';
2464 #     d_in.load <= '0';
2465 #     d_in.nc <= '0';
2466 #     d_in.addr <= (others => '0');
2467 #     d_in.data <= (others => '0');
2468 #         m_in.valid <= '0';
2469 #         m_in.addr <= (others => '0');
2470 #         m_in.pte <= (others => '0');
2471 #
2472 #         wait for 4*clk_period;
2473 #     wait until rising_edge(clk);
2474 #
2475 #     -- Cacheable read of address 4
2476 #     d_in.load <= '1';
2477 #     d_in.nc <= '0';
2478 #         d_in.addr <= x"0000000000000004";
2479 #         d_in.valid <= '1';
2480 #     wait until rising_edge(clk);
2481 #         d_in.valid <= '0';
2482 #
2483 #     wait until rising_edge(clk) and d_out.valid = '1';
2484 #         assert d_out.data = x"0000000100000000"
2485 #         report "data @" & to_hstring(d_in.addr) &
2486 #         "=" & to_hstring(d_out.data) &
2487 #         " expected 0000000100000000"
2488 #         severity failure;
2489 # --      wait for clk_period;
2490 #
2491 #     -- Cacheable read of address 30
2492 #     d_in.load <= '1';
2493 #     d_in.nc <= '0';
2494 #         d_in.addr <= x"0000000000000030";
2495 #         d_in.valid <= '1';
2496 #     wait until rising_edge(clk);
2497 #         d_in.valid <= '0';
2498 #
2499 #     wait until rising_edge(clk) and d_out.valid = '1';
2500 #         assert d_out.data = x"0000000D0000000C"
2501 #         report "data @" & to_hstring(d_in.addr) &
2502 #         "=" & to_hstring(d_out.data) &
2503 #         " expected 0000000D0000000C"
2504 #         severity failure;
2505 #
2506 #     -- Non-cacheable read of address 100
2507 #     d_in.load <= '1';
2508 #     d_in.nc <= '1';
2509 #         d_in.addr <= x"0000000000000100";
2510 #         d_in.valid <= '1';
2511 #     wait until rising_edge(clk);
2512 #     d_in.valid <= '0';
2513 #     wait until rising_edge(clk) and d_out.valid = '1';
2514 #         assert d_out.data = x"0000004100000040"
2515 #         report "data @" & to_hstring(d_in.addr) &
2516 #         "=" & to_hstring(d_out.data) &
2517 #         " expected 0000004100000040"
2518 #         severity failure;
2519 #
2520 #     wait until rising_edge(clk);
2521 #     wait until rising_edge(clk);
2522 #     wait until rising_edge(clk);
2523 #     wait until rising_edge(clk);
2524 #
2525 #     std.env.finish;
2526 #     end process;
2527 # end;
2528 def dcache_sim(dut):
2529     # clear stuff
2530     yield dut.d_in.valid.eq(0)
2531     yield dut.d_in.load.eq(0)
2532     yield dut.d_in.nc.eq(0)
2533     yield dut.d_in.adrr.eq(0)
2534     yield dut.d_in.data.eq(0)
2535     yield dut.m_in.valid.eq(0)
2536     yield dut.m_in.addr.eq(0)
2537     yield dut.m_in.pte.eq(0)
2538     # wait 4 * clk_period
2539     yield
2540     yield
2541     yield
2542     yield
2543     # wait_until rising_edge(clk)
2544     yield
2545     # Cacheable read of address 4
2546     yield dut.d_in.load.eq(1)
2547     yield dut.d_in.nc.eq(0)
2548     yield dut.d_in.addr.eq(Const(0x0000000000000004, 64))
2549     yield dut.d_in.valid.eq(1)
2550     # wait-until rising_edge(clk)
2551     yield
2552     yield dut.d_in.valid.eq(0)
2553     yield
2554     while not (yield dut.d_out.valid):
2555         yield
2556     assert dut.d_out.data == Const(0x0000000100000000, 64) f"data @" \
2557         f"{dut.d_in.addr}={dut.d_in.data} expected 0000000100000000" \
2558         " -!- severity failure"
2559
2560
2561     # Cacheable read of address 30
2562     yield dut.d_in.load.eq(1)
2563     yield dut.d_in.nc.eq(0)
2564     yield dut.d_in.addr.eq(Const(0x0000000000000030, 64))
2565     yield dut.d_in.valid.eq(1)
2566     yield
2567     yield dut.d_in.valid.eq(0)
2568     yield
2569     while not (yield dut.d_out.valid):
2570         yield
2571     assert dut.d_out.data == Const(0x0000000D0000000C, 64) f"data @" \
2572         f"{dut.d_in.addr}={dut.d_out.data} expected 0000000D0000000C" \
2573         f"-!- severity failure"
2574
2575     # Non-cacheable read of address 100
2576     yield dut.d_in.load.eq(1)
2577     yield dut.d_in.nc.eq(1)
2578     yield dut.d_in.addr.eq(Const(0x0000000000000100, 64))
2579     yield dut.d_in.valid.eq(1)
2580     yield
2581     yield dut.d_in.valid.eq(0)
2582     yield
2583     while not (yield dut.d_out.valid):
2584         yield
2585     assert dut.d_out.data == Const(0x0000004100000040, 64) f"data @" \
2586         f"{dut.d_in.addr}={dut.d_out.data} expected 0000004100000040" \
2587         f"-!- severity failure"
2588
2589     yield
2590     yield
2591     yield
2592     yield
2593
2594
2595 def test_dcache():
2596     dut = DCache()
2597     vl = rtlil.convert(dut, ports=[])
2598     with open("test_dcache.il", "w") as f:
2599         f.write(vl)
2600
2601     run_simulation(dut, dcache_sim(), vcd_name='test_dcache.vcd')
2602
2603 if __name__ == '__main__':
2604     test_dcache()
2605