src/soc/experiment/dcache.py

   1 """Dcache
   2
   3 based on Anton Blanchard microwatt dcache.vhdl
   4
   5 """
   6
   7 from enum import Enum, unique
   8
   9 from nmigen import Module, Signal, Elaboratable,
  10                    Cat, Repl
  11 from nmigen.cli import main
  12 from nmigen.iocontrol import RecordObject
  13 from nmigen.util import log2_int
  14
  15 from experiment.mem_types import LoadStore1ToDcacheType,
  16                                  DcacheToLoadStore1Type,
  17                                  MmuToDcacheType,
  18                                  DcacheToMmuType
  19
  20 from experiment.wb_types import WB_ADDR_BITS, WB_DATA_BITS, WB_SEL_BITS,
  21                                 WBAddrType, WBDataType, WBSelType,
  22                                 WbMasterOut, WBSlaveOut,
  23                                 WBMasterOutVector, WBSlaveOutVector,
  24                                 WBIOMasterOut, WBIOSlaveOut
  25
  26
  27 # Record for storing permission, attribute, etc. bits from a PTE
  28 class PermAttr(RecordObject):
  29     def __init__(self):
  30         super().__init__()
  31         self.reference = Signal()
  32         self.changed   = Signal()
  33         self.nocache   = Signal()
  34         self.priv      = Signal()
  35         self.rd_perm   = Signal()
  36         self.wr_perm   = Signal()
  37
  38
  39 def extract_perm_attr(pte):
  40     pa = PermAttr()
  41     pa.reference = pte[8]
  42     pa.changed   = pte[7]
  43     pa.nocache   = pte[5]
  44     pa.priv      = pte[3]
  45     pa.rd_perm   = pte[2]
  46     pa.wr_perm   = pte[1]
  47     return pa;
  48
  49
  50 # Type of operation on a "valid" input
  51 @unique
  52 class Op(Enum):
  53     OP_NONE       = 0
  54     OP_BAD        = 1 # NC cache hit, TLB miss, prot/RC failure
  55     OP_STCX_FAIL  = 2 # conditional store w/o reservation
  56     OP_LOAD_HIT   = 3 # Cache hit on load
  57     OP_LOAD_MISS  = 4 # Load missing cache
  58     OP_LOAD_NC    = 5 # Non-cachable load
  59     OP_STORE_HIT  = 6 # Store hitting cache
  60     OP_STORE_MISS = 7 # Store missing cache
  61
  62
  63 # Cache state machine
  64 @unique
  65 class State(Enum):
  66     IDLE             = 0 # Normal load hit processing
  67     RELOAD_WAIT_ACK  = 1 # Cache reload wait ack
  68     STORE_WAIT_ACK   = 2 # Store wait ack
  69     NC_LOAD_WAIT_ACK = 3 # Non-cachable load wait ack
  70
  71
  72 # Dcache operations:
  73 #
  74 # In order to make timing, we use the BRAMs with
  75 # an output buffer, which means that the BRAM
  76 # output is delayed by an extra cycle.
  77 #
  78 # Thus, the dcache has a 2-stage internal pipeline
  79 # for cache hits with no stalls.
  80 #
  81 # All other operations are handled via stalling
  82 # in the first stage.
  83 #
  84 # The second stage can thus complete a hit at the same
  85 # time as the first stage emits a stall for a complex op.
  86 #
  87 # Stage 0 register, basically contains just the latched request
  88 class RegStage0(RecordObject):
  89     def __init__(self):
  90         super().__init__()
  91         self.req     = LoadStore1ToDcacheType()
  92         self.tlbie   = Signal()
  93         self.doall   = Signal()
  94         self.tlbld   = Signal()
  95         self.mmu_req = Signal() # indicates source of request
  96
  97
  98 class MemAccessRequest(RecordObject):
  99     def __init__(self):
 100         super().__init__()
 101         self.op        = Op()
 102         self.valid     = Signal()
 103         self.dcbz      = Signal()
 104         self.real_addr = Signal(REAL_ADDR_BITS)
 105         self.data      = Signal(64)
 106         self.byte_sel  = Signal(8)
 107         self.hit_way   = Signal(WAY_BITS)
 108         self.same_tag  = Signal()
 109         self.mmu_req   = Signal()
 110
 111
 112 # First stage register, contains state for stage 1 of load hits
 113 # and for the state machine used by all other operations
 114 class RegStage1(RecordObject):
 115     def __init__(self):
 116         super().__init__()
 117         # Info about the request
 118         self.full             = Signal() # have uncompleted request
 119         self.mmu_req          = Signal() # request is from MMU
 120         self.req              = MemAccessRequest()
 121
 122         # Cache hit state
 123         self.hit_way          = Signal(WAY_BITS)
 124         self.hit_load_valid   = Signal()
 125         self.hit_index        = Signal(NUM_LINES)
 126         self.cache_hit        = Signal()
 127
 128         # TLB hit state
 129         self.tlb_hit          = Signal()
 130         self.tlb_hit_way      = Signal(TLB_NUM_WAYS)
 131         self.tlb_hit_index    = Signal(TLB_SET_SIZE)
 132         self.
 133         # 2-stage data buffer for data forwarded from writes to reads
 134         self.forward_data1    = Signal(64)
 135         self.forward_data2    = Signal(64)
 136         self.forward_sel1     = Signal(8)
 137         self.forward_valid1   = Signal()
 138         self.forward_way1     = Signal(WAY_BITS)
 139         self.forward_row1     = Signal(BRAM_ROWS)
 140         self.use_forward1     = Signal()
 141         self.forward_sel      = Signal(8)
 142
 143         # Cache miss state (reload state machine)
 144         self.state            = State()
 145         self.dcbz             = Signal()
 146         self.write_bram       = Signal()
 147         self.write_tag        = Signal()
 148         self.slow_valid       = Signal()
 149         self.wb               = WishboneMasterOut()
 150         self.reload_tag       = Signal(TAG_BITS)
 151         self.store_way        = Signal(WAY_BITS)
 152         self.store_row        = Signal(BRAM_ROWS)
 153         self.store_index      = Signal(NUM_LINES)
 154         self.end_row_ix       = Signal(ROW_LINE_BIT)
 155         self.rows_valid       = RowPerLineValidArray()
 156         self.acks_pending     = Signal(3)
 157         self.inc_acks         = Signal()
 158         self.dec_acks         = Signal()
 159
 160         # Signals to complete (possibly with error)
 161         self.ls_valid         = Signal()
 162         self.ls_error         = Signal()
 163         self.mmu_done         = Signal()
 164         self.mmu_error        = Signal()
 165         self.cache_paradox    = Signal()
 166
 167         # Signal to complete a failed stcx.
 168         self.stcx_fail        = Signal()
 169
 170
 171 # Reservation information
 172 class Reservation(RecordObject):
 173     def __init__(self):
 174         super().__init__()
 175         valid = Signal()
 176         # TODO LINE_OFF_BITS is 6
 177         addr  = Signal(63 downto LINE_OFF_BITS)
 178
 179
 180 # Set associative dcache write-through
 181 #
 182 # TODO (in no specific order):
 183 #
 184 # * See list in icache.vhdl
 185 # * Complete load misses on the cycle when WB data comes instead of
 186 #   at the end of line (this requires dealing with requests coming in
 187 #   while not idle...)
 188 class Dcache(Elaboratable):
 189     def __init__(self):
 190         # TODO: make these parameters of Dcache at some point
 191         self.LINE_SIZE = 64    # Line size in bytes
 192         self.NUM_LINES = 32    # Number of lines in a set
 193         self.NUM_WAYS = 4      # Number of ways
 194         self.TLB_SET_SIZE = 64 # L1 DTLB entries per set
 195         self.TLB_NUM_WAYS = 2  # L1 DTLB number of sets
 196         self.TLB_LG_PGSZ = 12  # L1 DTLB log_2(page_size)
 197         self.LOG_LENGTH = 0    # Non-zero to enable log data collection
 198
 199         self.d_in      = LoadStore1ToDcacheType()
 200         self.d_out     = DcacheToLoadStore1Type()
 201
 202         self.m_in      = MmuToDcacheType()
 203         self.m_out     = DcacheToMmuType()
 204
 205         self.stall_out = Signal()
 206
 207         self.wb_out    = WBMasterOut()
 208         self.wb_in     = WBSlaveOut()
 209
 210         self.log_out   = Signal(20)
 211
 212     # Latch the request in r0.req as long as we're not stalling
 213     def stage_0(self, m, d_in, m_in):
 214             comb = m.d.comb
 215             sync = m.d.sync
 216
 217     #         variable r : reg_stage_0_t;
 218             r = RegStage0()
 219             comb += r
 220
 221     #     begin
 222     #         if rising_edge(clk) then
 223     #             assert (d_in.valid and m_in.valid) = '0'
 224     #              report "request collision loadstore vs MMU";
 225             assert ~(d_in.valid & m_in.valid) "request collision
 226              loadstore vs MMU"
 227
 228     #             if m_in.valid = '1' then
 229             with m.If(m_in.valid):
 230     #                 r.req.valid := '1';
 231     #                 r.req.load := not (m_in.tlbie or m_in.tlbld);
 232     #                 r.req.dcbz := '0';
 233     #                 r.req.nc := '0';
 234     #                 r.req.reserve := '0';
 235     #                 r.req.virt_mode := '0';
 236     #                 r.req.priv_mode := '1';
 237     #                 r.req.addr := m_in.addr;
 238     #                 r.req.data := m_in.pte;
 239     #                 r.req.byte_sel := (others => '1');
 240     #                 r.tlbie := m_in.tlbie;
 241     #                 r.doall := m_in.doall;
 242     #                 r.tlbld := m_in.tlbld;
 243     #                 r.mmu_req := '1';
 244                 sync += r.req.valid.eq(1)
 245                 sync += r.req.load.eq(~(m_in.tlbie | m_in.tlbld))
 246                 sync += r.req.priv_mode.eq(1)
 247                 sync += r.req.addr.eq(m_in.addr)
 248                 sync += r.req.data.eq(m_in.pte)
 249                 sync += r.req.byte_sel.eq(1)
 250                 sync += r.tlbie.eq(m_in.tlbie)
 251                 sync += r.doall.eq(m_in.doall)
 252                 sync += r.tlbld.eq(m_in.tlbld)
 253                 sync += r.mmu_req.eq(1)
 254     #             else
 255             with m.Else():
 256     #                 r.req := d_in;
 257     #                 r.tlbie := '0';
 258     #                 r.doall := '0';
 259     #                 r.tlbld := '0';
 260     #                 r.mmu_req := '0';
 261                 sync += r.req.eq(d_in)
 262     #             end if;
 263     #             if rst = '1' then
 264     #                 r0_full <= '0';
 265     #             elsif r1.full = '0' or r0_full = '0' then
 266                 with m.If(~r1.full | ~r0_full):
 267     #                 r0 <= r;
 268     #                 r0_full <= r.req.valid;
 269                     sync += r0.eq(r)
 270                     sync += r0_full.eq(r.req.valid)
 271     #             end if;
 272     #         end if;
 273     #     end process;
 274
 275     # TLB
 276     # Operates in the second cycle on the request latched in r0.req.
 277     # TLB updates write the entry at the end of the second cycle.
 278     def tlb_read(self, m, m_in, d_in, r0_stall, tlb_valid_way,
 279                  tlb_tag_way, tlb_pte_way, dtlb_valid_bits,
 280                  dtlb_tags, dtlb_ptes):
 281
 282         comb = m.d.comb
 283         sync = m.d.sync
 284
 285     #         variable index : tlb_index_t;
 286     #         variable addrbits :
 287     #          std_ulogic_vector(TLB_SET_BITS - 1 downto 0);
 288         index    = TLB_SET_SIZE
 289         addrbits = Signal(TLB_SET_BITS)
 290
 291         comb += index
 292         comb += addrbits
 293
 294     #     begin
 295     #         if rising_edge(clk) then
 296     #             if m_in.valid = '1' then
 297         with m.If(m_in.valid):
 298     #                 addrbits := m_in.addr(TLB_LG_PGSZ + TLB_SET_BITS
 299     #                                       - 1 downto TLB_LG_PGSZ);
 300             sync += addrbits.eq(m_in.addr[
 301                      TLB_LG_PGSZ:TLB_LG_PGSZ + TLB_SET_BITS
 302                     ])
 303     #             else
 304         with m.Else():
 305     #                 addrbits := d_in.addr(TLB_LG_PGSZ + TLB_SET_BITS
 306     #                                       - 1 downto TLB_LG_PGSZ);
 307             sync += addrbits.eq(d_in.addr[
 308                      TLB_LG_PGSZ:TLB_LG_PGSZ + TLB_SET_BITS
 309                     ])
 310     #             end if;
 311
 312     #             index := to_integer(unsigned(addrbits));
 313         sync += index.eq(addrbits)
 314     #             -- If we have any op and the previous op isn't
 315     #             -- finished, then keep the same output for next cycle.
 316     #             if r0_stall = '0' then
 317     # If we have any op and the previous op isn't finished,
 318     # then keep the same output for next cycle.
 319         with m.If(~r0_stall):
 320             sync += tlb_valid_way.eq(dtlb_valid_bits[index])
 321             sync += tlb_tag_way.eq(dtlb_tags[index])
 322             sync += tlb_pte_way.eq(dtlb_ptes[index])
 323     #             end if;
 324     #         end if;
 325     #     end process;
 326
 327     #     -- Generate TLB PLRUs
 328     #     maybe_tlb_plrus: if TLB_NUM_WAYS > 1 generate
 329     # Generate TLB PLRUs
 330     def maybe_tlb_plrus(self, m, r1, tlb_plru_victim, acc, acc_en, lru):
 331             comb = m.d.comb
 332             sync = m.d.sync
 333
 334             with m.If(TLB_NUM_WAYS > 1):
 335                 for i in range(TLB_SET_SIZE):
 336                     # TLB PLRU interface
 337                     tlb_plru        = PLRU(TLB_WAY_BITS)
 338                     tlb_plru_acc    = Signal(TLB_WAY_BITS)
 339                     tlb_plru_acc_en = Signal()
 340                     tlb_plru_out    = Signal(TLB_WAY_BITS)
 341
 342                     comb += tlb_plru.acc.eq(tlb_plru_acc)
 343                     comb += tlb_plru.acc_en.eq(tlb_plru_acc_en)
 344                     comb += tlb_plru.lru.eq(tlb_plru_out)
 345
 346                     # PLRU interface
 347                     with m.If(r1.tlb_hit_index == i):
 348                         comb += tlb_plru.acc_en.eq(
 349                                  r1.tlb_hit
 350                                 )
 351
 352                     with m.Else():
 353                         comb += tlb_plru.acc_en.eq(0)
 354                     comb += tlb_plru.acc.eq(
 355                              r1.tlb_hit_way
 356                             )
 357
 358                     comb += tlb_plru_victim[i].eq(tlb_plru.lru)
 359
 360     def tlb_search(self, tlb_req_index, r0, tlb_valid_way_ tlb_tag_way,
 361                    tlb_pte_way, pte, tlb_hit, valid_ra, perm_attr, ra):
 362
 363         comb = m.d.comb
 364         sync = m.d.sync
 365
 366 #         variable hitway : tlb_way_t;
 367 #         variable hit : std_ulogic;
 368 #         variable eatag : tlb_tag_t;
 369         hitway = TLBWay()
 370         hit    = Signal()
 371         eatag  = TLBTag()
 372
 373         comb += hitway
 374         comb += hit
 375         comb += eatag
 376
 377 #     begin
 378 #         tlb_req_index <=
 379 #          to_integer(unsigned(r0.req.addr(
 380 #           TLB_LG_PGSZ + TLB_SET_BITS - 1 downto TLB_LG_PGSZ
 381 #          )));
 382 #         hitway := 0;
 383 #         hit := '0';
 384 #         eatag := r0.req.addr(63 downto TLB_LG_PGSZ + TLB_SET_BITS);
 385 #         for i in tlb_way_t loop
 386 #             if tlb_valid_way(i) = '1' and
 387 #                 read_tlb_tag(i, tlb_tag_way) = eatag then
 388 #                 hitway := i;
 389 #                 hit := '1';
 390 #             end if;
 391 #         end loop;
 392 #         tlb_hit <= hit and r0_valid;
 393 #         tlb_hit_way <= hitway;
 394         comb += tlb_req_index.eq(r0.req.addr[
 395                  TLB_LG_PGSZ:TLB_LG_PGSZ + TLB_SET_BITS
 396                 ])
 397
 398         comb += eatag.eq(r0.req.addr[
 399                  TLB_LG_PGSZ + TLB_SET_BITS:64
 400                 ])
 401
 402         for i in TLBWay():
 403             with m.If(tlb_valid_way(i)
 404                       & read_tlb_tag(i, tlb_tag_way) == eatag):
 405
 406                 comb += hitway.eq(i)
 407                 comb += hit.eq(1)
 408
 409         comb += tlb_hit.eq(hit & r0_valid)
 410         comb += tlb_hit_way.eq(hitway)
 411
 412 #         if tlb_hit = '1' then
 413         with m.If(tlb_hit):
 414 #             pte <= read_tlb_pte(hitway, tlb_pte_way);
 415             comb += pte.eq(read_tlb_pte(hitway, tlb_pte_way))
 416 #         else
 417         with m.Else():
 418 #             pte <= (others => '0');
 419             comb += pte.eq(0)
 420 #         end if;
 421 #         valid_ra <= tlb_hit or not r0.req.virt_mode;
 422         comb += valid_ra.eq(tlb_hit | ~r0.req.virt_mode)
 423 #         if r0.req.virt_mode = '1' then
 424         with m.If(r0.req.virt_mode):
 425 #             ra <= pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) &
 426 #                   r0.req.addr(TLB_LG_PGSZ - 1 downto ROW_OFF_BITS) &
 427 #                   (ROW_OFF_BITS-1 downto 0 => '0');
 428 #             perm_attr <= extract_perm_attr(pte);
 429             comb += ra.eq(Cat(
 430                      Const(ROW_OFF_BITS, ROW_OFF_BITS),
 431                      r0.req.addr[ROW_OFF_BITS:TLB_LG_PGSZ],
 432                      pte[TLB_LG_PGSZ:REAL_ADDR_BITS]
 433                     ))
 434             comb += perm_attr.eq(extract_perm_attr(pte))
 435 #         else
 436         with m.Else():
 437 #             ra <= r0.req.addr(
 438 #                    REAL_ADDR_BITS - 1 downto ROW_OFF_BITS
 439 #                   ) & (ROW_OFF_BITS-1 downto 0 => '0');
 440             comb += ra.eq(Cat(
 441                      Const(ROW_OFF_BITS, ROW_OFF_BITS),
 442                      r0.rq.addr[ROW_OFF_BITS:REAL_ADDR_BITS]
 443                     )
 444
 445 #             perm_attr <= real_mode_perm_attr;
 446             comb += perm_attr.reference.eq(1)
 447             comb += perm_attr.changed.eq(1)
 448             comb += perm_attr.priv.eq(1)
 449             comb += perm_attr.nocache.eq(0)
 450             comb += perm_attr.rd_perm.eq(1)
 451             comb += perm_attr.wr_perm.eq(1)
 452 #         end if;
 453 #     end process;
 454
 455     def tlb_update(self, r0_valid, r0, dtlb_valid_bits, tlb_req_index,
 456                     tlb_hit_way, tlb_hit, tlb_plru_victim, tlb_tag_way,
 457                     dtlb_tags, tlb_pte_way, dtlb_ptes, dtlb_valid_bits):
 458
 459         comb = m.d.comb
 460         sync = m.d.sync
 461
 462     #         variable tlbie : std_ulogic;
 463     #         variable tlbwe : std_ulogic;
 464     #         variable repl_way : tlb_way_t;
 465     #         variable eatag : tlb_tag_t;
 466     #         variable tagset : tlb_way_tags_t;
 467     #         variable pteset : tlb_way_ptes_t;
 468         tlbie    = Signal()
 469         tlbwe    = Signal()
 470         repl_way = TLBWay()
 471         eatag    = TLBTag()
 472         tagset   = TLBWayTags()
 473         pteset   = TLBWayPtes()
 474
 475         comb += tlbie
 476         comb += tlbwe
 477         comb += repl_way
 478         comb += eatag
 479         comb += tagset
 480         comb += pteset
 481
 482     #     begin
 483     #         if rising_edge(clk) then
 484     #             tlbie := r0_valid and r0.tlbie;
 485     #             tlbwe := r0_valid and r0.tlbldoi;
 486         sync += tlbie.eq(r0_valid & r0.tlbie)
 487         sync += tlbwe.eq(r0_valid & r0.tlbldoi)
 488
 489     #             if rst = '1' or (tlbie = '1' and r0.doall = '1') then
 490     #        with m.If (TODO understand how signal resets work in nmigen)
 491     #                 -- clear all valid bits at once
 492     #                 for i in tlb_index_t loop
 493     #                     dtlb_valids(i) <= (others => '0');
 494     #                 end loop;
 495         # clear all valid bits at once
 496         for i in range(TLB_SET_SIZE):
 497             sync += dtlb_valid_bits[i].eq(0)
 498
 499     #             elsif tlbie = '1' then
 500         with m.Elif(tlbie):
 501     #                 if tlb_hit = '1' then
 502             with m.If(tlb_hit):
 503     #                     dtlb_valids(tlb_req_index)(tlb_hit_way) <= '0';
 504                 sync += dtlb_valid_bits[tlb_req_index][tlb_hit_way].eq(0)
 505     #                 end if;
 506     #             elsif tlbwe = '1' then
 507         with m.Elif(tlbwe):
 508     #                 if tlb_hit = '1' then
 509             with m.If(tlb_hit):
 510     #                     repl_way := tlb_hit_way;
 511                 sync += repl_way.eq(tlb_hit_way)
 512     #                 else
 513             with m.Else():
 514     #                     repl_way := to_integer(unsigned(
 515     #                       tlb_plru_victim(tlb_req_index)));
 516                 sync += repl_way.eq(tlb_plru_victim[tlb_req_index])
 517     #                 end if;
 518     #                 eatag := r0.req.addr(
 519     #                           63 downto TLB_LG_PGSZ + TLB_SET_BITS
 520     #                          );
 521     #                 tagset := tlb_tag_way;
 522     #                 write_tlb_tag(repl_way, tagset, eatag);
 523     #                 dtlb_tags(tlb_req_index) <= tagset;
 524     #                 pteset := tlb_pte_way;
 525     #                 write_tlb_pte(repl_way, pteset, r0.req.data);
 526     #                 dtlb_ptes(tlb_req_index) <= pteset;
 527     #                 dtlb_valids(tlb_req_index)(repl_way) <= '1';
 528             sync += eatag.eq(r0.req.addr[TLB_LG_PGSZ + TLB_SET_BITS:64])
 529             sync += tagset.eq(tlb_tag_way)
 530             sync += write_tlb_tag(repl_way, tagset, eatag)
 531             sync += dtlb_tags[tlb_req_index].eq(tagset)
 532             sync += pteset.eq(tlb_pte_way)
 533             sync += write_tlb_pte(repl_way, pteset, r0.req.data)
 534             sync += dtlb_ptes[tlb_req_index].eq(pteset)
 535             sync += dtlb_valid_bits[tlb_req_index][repl_way].eq(1)
 536     #             end if;
 537     #         end if;
 538     #     end process;
 539
 540 #     -- Generate PLRUs
 541 #     maybe_plrus: if NUM_WAYS > 1 generate
 542     # Generate PLRUs
 543     def maybe_plrus(self, r1):
 544
 545         comb = m.d.comb
 546         sync = m.d.sync
 547
 548 #     begin
 549         # TODO learn translation of generate into nmgien @lkcl
 550 #       plrus: for i in 0 to NUM_LINES-1 generate
 551         for i in range(NUM_LINES):
 552 #           -- PLRU interface
 553 #           signal plru_acc    : std_ulogic_vector(WAY_BITS-1 downto 0);
 554 #           signal plru_acc_en : std_ulogic;
 555 #           signal plru_out    : std_ulogic_vector(WAY_BITS-1 downto 0);
 556             plru        = PLRU(WAY_BITS)
 557             plru_acc    = Signal(WAY_BITS)
 558             plru_acc_en = Signal()
 559             plru_out    = Signal(WAY_BITS)
 560 #
 561 #       begin
 562         # TODO learn tranlation of entity, generic map, port map in
 563         # nmigen @lkcl
 564 #           plru : entity work.plru
 565 #               generic map (
 566 #                   BITS => WAY_BITS
 567 #                   )
 568 #               port map (
 569 #                   clk => clk,
 570 #                   rst => rst,
 571 #                   acc => plru_acc,
 572 #                   acc_en => plru_acc_en,
 573 #                   lru => plru_out
 574 #                   );
 575             comb += plru.acc.eq(plru_acc)
 576             comb += plru.acc_en.eq(plru_acc_en)
 577             comb += plru.lru.eq(plru_out)
 578
 579 #           process(all)
 580 #           begin
 581 #               -- PLRU interface
 582 #               if r1.hit_index = i then
 583             # PLRU interface
 584             with m.If(r1.hit_index == i):
 585 #                   plru_acc_en <= r1.cache_hit;
 586                 comb += plru_acc_en.eq(r1.cache_hit)
 587 #               else
 588             with m.Else():
 589 #                   plru_acc_en <= '0';
 590                 comb += plru_acc_en.eq(0)
 591 #               end if;
 592 #               plru_acc <= std_ulogic_vector(to_unsigned(
 593 #                            r1.hit_way, WAY_BITS
 594 #                           ));
 595 #               plru_victim(i) <= plru_out;
 596             comb += plru_acc.eq(r1.hit_way)
 597             comb += plru_victim[i].eq(plru_out)
 598 #           end process;
 599 #       end generate;
 600 #     end generate;
 601
 602 #     -- Cache tag RAM read port
 603 #     cache_tag_read : process(clk)
 604     # Cache tag RAM read port
 605     def cache_tag_read(self, r0_stall, req_index, m_in, d_in,
 606                        cache_tag_set, cache_tags):
 607
 608         comb = m.d.comb
 609         sync = m.d.sync
 610
 611 #         variable index : index_t;
 612         index = Signal(NUM_LINES)
 613
 614         comb += index
 615
 616 #     begin
 617 #         if rising_edge(clk) then
 618 #             if r0_stall = '1' then
 619         with m.If(r0_stall):
 620 #                 index := req_index;
 621             sync += index.eq(req_index)
 622
 623 #             elsif m_in.valid = '1' then
 624         with m.Elif(m_in.valid):
 625 #                 index := get_index(m_in.addr);
 626             sync += index.eq(get_index(m_in.addr))
 627
 628 #             else
 629         with m.Else():
 630 #                 index := get_index(d_in.addr);
 631             sync += index.eq(get_index(d_in.addr))
 632 #             end if;
 633 #             cache_tag_set <= cache_tags(index);
 634         sync += cache_tag_set.eq(cache_tags[index])
 635 #         end if;
 636 #     end process;
 637
 638     # Cache request parsing and hit detection
 639     def dcache_request(self, r0, ra, req_index, req_row, req_tag,
 640                        r0_valid, r1, cache_valid_bits, replace_way,
 641                        use_forward1_next, use_forward2_next,
 642                        req_hit_way, plru_victim, rc_ok, perm_attr,
 643                        valid_ra, perm_ok, access_ok, req_op, req_ok,
 644                        r0_stall, m_in, early_req_row, d_in):
 645
 646         comb = m.d.comb
 647         sync = m.d.sync
 648
 649 #         variable is_hit  : std_ulogic;
 650 #         variable hit_way : way_t;
 651 #         variable op      : op_t;
 652 #         variable opsel   : std_ulogic_vector(2 downto 0);
 653 #         variable go      : std_ulogic;
 654 #         variable nc      : std_ulogic;
 655 #         variable s_hit   : std_ulogic;
 656 #         variable s_tag   : cache_tag_t;
 657 #         variable s_pte   : tlb_pte_t;
 658 #         variable s_ra    : std_ulogic_vector(
 659 #                             REAL_ADDR_BITS - 1 downto 0
 660 #                            );
 661 #         variable hit_set     : std_ulogic_vector(
 662 #                                 TLB_NUM_WAYS - 1 downto 0
 663 #                                );
 664 #         variable hit_way_set : hit_way_set_t;
 665 #         variable rel_matches : std_ulogic_vector(
 666 #                                 TLB_NUM_WAYS - 1 downto 0
 667 #                                );
 668         rel_match   = Signal()
 669         is_hit      = Signal()
 670         hit_way     = Signal(WAY_BITS)
 671         op          = Op()
 672         opsel       = Signal(3)
 673         go          = Signal()
 674         nc          = Signal()
 675         s_hit       = Signal()
 676         s_tag       = Signal(TAG_BITS)
 677         s_pte       = Signal(TLB_PTE_BITS)
 678         s_ra        = Signal(REAL_ADDR_BITS)
 679         hit_set     = Signal(TLB_NUM_WAYS)
 680         hit_way_set = HitWaySet()
 681         rel_matches = Signal(TLB_NUM_WAYS)
 682         rel_match   = Signal()
 683
 684 #     begin
 685 #         -- Extract line, row and tag from request
 686 #         req_index <= get_index(r0.req.addr);
 687 #         req_row <= get_row(r0.req.addr);
 688 #         req_tag <= get_tag(ra);
 689 #
 690 #         go := r0_valid and not (r0.tlbie or r0.tlbld)
 691 #               and not r1.ls_error;
 692         # Extract line, row and tag from request
 693         comb += req_index.eq(get_index(r0.req.addr))
 694         comb += req_row.eq(get_row(r0.req.addr))
 695         comb += req_tag.eq(get_tag(ra))
 696
 697         comb += go.eq(r0_valid & ~(r0.tlbie | r0.tlbld) & ~r1.ls_error)
 698
 699 #         hit_way := 0;
 700 #         is_hit := '0';
 701 #         rel_match := '0';
 702         # Test if pending request is a hit on any way
 703         # In order to make timing in virtual mode,
 704         # when we are using the TLB, we compare each
 705         # way with each of the real addresses from each way of
 706         # the TLB, and then decide later which match to use.
 707
 708 #         if r0.req.virt_mode = '1' then
 709         with m.If(r0.req.virt_mode):
 710 #             rel_matches := (others => '0');
 711             comb += rel_matches.eq(0)
 712 #             for j in tlb_way_t loop
 713             for j in range(TLB_NUM_WAYS):
 714 #                 hit_way_set(j) := 0;
 715 #                 s_hit := '0';
 716 #                 s_pte := read_tlb_pte(j, tlb_pte_way);
 717 #                 s_ra  := s_pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ)
 718 #                          & r0.req.addr(TLB_LG_PGSZ - 1 downto 0);
 719 #                 s_tag := get_tag(s_ra);
 720                 comb += hit_way_set[j].eq(0)
 721                 comb += s_hit.eq(0)
 722                 comb += s_pte.eq(read_tlb_pte(j, tlb_pte_way))
 723                 comb += s_ra.eq(Cat(
 724                          r0.req.addr[0:TLB_LG_PGSZ],
 725                          s_pte[TLB_LG_PGSZ:REAL_ADDR_BITS]
 726                         ))
 727                 comb += s_tag.eq(get_tag(s_ra))
 728
 729 #                 for i in way_t loop
 730                 for i in range(NUM_WAYS):
 731 #                     if go = '1' and cache_valids(req_index)(i) = '1'
 732 #                      and read_tag(i, cache_tag_set) = s_tag
 733 #                      and tlb_valid_way(j) = '1' then
 734                     with m.If(go & cache_valid_bits[req_index][i] &
 735                               read_tag(i, cache_tag_set) == s_tag
 736                               & tlb_valid_way[j]):
 737 #                         hit_way_set(j) := i;
 738 #                         s_hit := '1';
 739                         comb += hit_way_set[j].eq(i)
 740                         comb += s_hit.eq(1)
 741 #                     end if;
 742 #                 end loop;
 743 #                 hit_set(j) := s_hit;
 744                 comb += hit_set[j].eq(s_hit)
 745 #                 if s_tag = r1.reload_tag then
 746                 with m.If(s_tag == r1.reload_tag):
 747 #                     rel_matches(j) := '1';
 748                     comb += rel_matches[j].eq(1)
 749 #                 end if;
 750 #             end loop;
 751 #             if tlb_hit = '1' then
 752             with m.If(tlb_hit):
 753 #                 is_hit := hit_set(tlb_hit_way);
 754 #                 hit_way := hit_way_set(tlb_hit_way);
 755 #                 rel_match := rel_matches(tlb_hit_way);
 756                 comb += is_hit.eq(hit_set[tlb_hit_way])
 757                 comb += hit_way.eq(hit_way_set[tlb_hit_way])
 758                 comb += rel_match.eq(rel_matches[tlb_hit_way])
 759 #             end if;
 760 #         else
 761         with m.Else():
 762 #             s_tag := get_tag(r0.req.addr);
 763             comb += s_tag.eq(get_tag(r0.req.addr))
 764 #             for i in way_t loop
 765             for i in range(NUM_WAYS):
 766 #                 if go = '1' and cache_valids(req_index)(i) = '1' and
 767 #                     read_tag(i, cache_tag_set) = s_tag then
 768                 with m.If(go & cache_valid_bits[req_index][i] &
 769                           read_tag(i, cache_tag_set) == s_tag):
 770 #                     hit_way := i;
 771 #                     is_hit := '1';
 772                     comb += hit_way.eq(i)
 773                     comb += is_hit.eq(1)
 774 #                 end if;
 775 #             end loop;
 776 #             if s_tag = r1.reload_tag then
 777             with m.If(s_tag == r1.reload_tag):
 778 #                 rel_match := '1';
 779                 comb += rel_match.eq(1)
 780 #             end if;
 781 #         end if;
 782 #         req_same_tag <= rel_match;
 783         comb += req_same_tag.eq(rel_match)
 784
 785 #         if r1.state = RELOAD_WAIT_ACK and req_index = r1.store_index
 786 #          and rel_match = '1' then
 787         # See if the request matches the line currently being reloaded
 788         with m.If(r1.state == State.RELOAD_WAIT_ACK & req_index ==
 789                   r1.store_index & rel_match):
 790             # For a store, consider this a hit even if the row isn't
 791             # valid since it will be by the time we perform the store.
 792             # For a load, check the appropriate row valid bit.
 793 #             is_hit :=
 794 #              not r0.req.load
 795 #               or r1.rows_valid(req_row mod ROW_PER_LINE);
 796 #             hit_way := replace_way;
 797             comb += is_hit.eq(~r0.req.load
 798                               | r1.rows_valid[req_row % ROW_PER_LINE]
 799                              )
 800             comb += hit_way.eq(replace_way)
 801 #         end if;
 802
 803 #         -- Whether to use forwarded data for a load or not
 804         # Whether to use forwarded data for a load or not
 805 #         use_forward1_next <= '0';
 806         comb += use_forward1_next.eq(0)
 807 #         if get_row(r1.req.real_addr) = req_row
 808 #          and r1.req.hit_way = hit_way then
 809         with m.If(get_row(r1.req.real_addr) == req_row
 810                   & r1.req.hit_way == hit_way)
 811             # Only need to consider r1.write_bram here, since if we
 812             # are writing refill data here, then we don't have a
 813             # cache hit this cycle on the line being refilled.
 814             # (There is the possibility that the load following the
 815             # load miss that started the refill could be to the old
 816             # contents of the victim line, since it is a couple of
 817             # cycles after the refill starts before we see the updated
 818             # cache tag. In that case we don't use the bypass.)
 819 #             use_forward1_next <= r1.write_bram;
 820             comb += use_forward1_next.eq(r1.write_bram)
 821 #         end if;
 822 #         use_forward2_next <= '0';
 823         comb += use_forward2_next.eq(0)
 824 #         if r1.forward_row1 = req_row
 825 #          and r1.forward_way1 = hit_way then
 826         with m.If(r1.forward_row1 == req_row
 827                   & r1.forward_way1 == hit_way):
 828 #             use_forward2_next <= r1.forward_valid1;
 829             comb += use_forward2_next.eq(r1.forward_valid1)
 830 #         end if;
 831
 832         # The way that matched on a hit
 833 #           req_hit_way <= hit_way;
 834         comb += req_hit_way.eq(hit_way)
 835
 836         # The way to replace on a miss
 837 #         if r1.write_tag = '1' then
 838         with m.If(r1.write_tag):
 839 #             replace_way <= to_integer(unsigned(
 840 #                             plru_victim(r1.store_index)
 841 #                            ));
 842             replace_way.eq(plru_victim[r1.store_index])
 843 #         else
 844         with m.Else():
 845 #             replace_way <= r1.store_way;
 846             comb += replace_way.eq(r1.store_way)
 847 #         end if;
 848
 849         # work out whether we have permission for this access
 850         # NB we don't yet implement AMR, thus no KUAP
 851 #         rc_ok <= perm_attr.reference and
 852 #                  (r0.req.load or perm_attr.changed);
 853 #         perm_ok <= (r0.req.priv_mode or not perm_attr.priv) and
 854 #                    (perm_attr.wr_perm or (r0.req.load
 855 #                    and perm_attr.rd_perm));
 856 #         access_ok <= valid_ra and perm_ok and rc_ok;
 857         comb += rc_ok.eq(
 858                  perm_attr.reference
 859                  & (r0.req.load | perm_attr.changed)
 860                 )
 861         comb += perm_ok.eq((r0.req.prive_mode | ~perm_attr.priv)
 862                            & perm_attr.wr_perm
 863                            | (r0.req.load & perm_attr.rd_perm)
 864                           )
 865         comb += access_ok.eq(valid_ra & perm_ok & rc_ok)
 866 #         nc := r0.req.nc or perm_attr.nocache;
 867 #         op := OP_NONE;
 868         # Combine the request and cache hit status to decide what
 869         # operation needs to be done
 870         comb += nc.eq(r0.req.nc | perm_attr.nocache)
 871         comb += op.eq(Op.OP_NONE)
 872 #         if go = '1' then
 873         with m.If(go):
 874 #             if access_ok = '0' then
 875             with m.If(~access_ok):
 876 #                 op := OP_BAD;
 877                 comb += op.eq(Op.OP_BAD)
 878 #             elsif cancel_store = '1' then
 879             with m.Elif(cancel_store):
 880 #                 op := OP_STCX_FAIL;
 881                 comb += op.eq(Op.OP_STCX_FAIL)
 882 #             else
 883             with m.Else():
 884 #                 opsel := r0.req.load & nc & is_hit;
 885                 comb += opsel.eq(Cat(is_hit, nc, r0.req.load))
 886 #                 case opsel is
 887                 with m.Switch(opsel):
 888 #                     when "101" => op := OP_LOAD_HIT;
 889 #                     when "100" => op := OP_LOAD_MISS;
 890 #                     when "110" => op := OP_LOAD_NC;
 891 #                     when "001" => op := OP_STORE_HIT;
 892 #                     when "000" => op := OP_STORE_MISS;
 893 #                     when "010" => op := OP_STORE_MISS;
 894 #                     when "011" => op := OP_BAD;
 895 #                     when "111" => op := OP_BAD;
 896 #                     when others => op := OP_NONE;
 897                     with m.Case(Const(0b101, 3)):
 898                         comb += op.eq(Op.OP_LOAD_HIT)
 899
 900                     with m.Case(Cosnt(0b100, 3)):
 901                         comb += op.eq(Op.OP_LOAD_MISS)
 902
 903                     with m.Case(Const(0b110, 3)):
 904                         comb += op.eq(Op.OP_LOAD_NC)
 905
 906                     with m.Case(Const(0b001, 3)):
 907                         comb += op.eq(Op.OP_STORE_HIT)
 908
 909                     with m.Case(Const(0b000, 3)):
 910                         comb += op.eq(Op.OP_STORE_MISS)
 911
 912                     with m.Case(Const(0b010, 3)):
 913                         comb += op.eq(Op.OP_STORE_MISS)
 914
 915                     with m.Case(Const(0b011, 3)):
 916                         comb += op.eq(Op.OP_BAD)
 917
 918                     with m.Case(Const(0b111, 3)):
 919                         comb += op.eq(Op.OP_BAD)
 920
 921                     with m.Default():
 922                         comb += op.eq(Op.OP_NONE)
 923 #                 end case;
 924 #             end if;
 925 #         end if;
 926 #       req_op <= op;
 927 #         req_go <= go;
 928         comb += req_op.eq(op)
 929         comb += req_go.eq(go)
 930
 931         # Version of the row number that is valid one cycle earlier
 932         # in the cases where we need to read the cache data BRAM.
 933         # If we're stalling then we need to keep reading the last
 934         # row requested.
 935 #         if r0_stall = '0' then
 936         with m.If(~r0_stall):
 937 #             if m_in.valid = '1' then
 938             with m.If(m_in.valid):
 939 #                 early_req_row <= get_row(m_in.addr);
 940                 comb += early_req_row.eq(get_row(m_in.addr))
 941 #             else
 942             with m.Else():
 943 #                 early_req_row <= get_row(d_in.addr);
 944                 comb += early_req_row.eq(get_row(d_in.addr))
 945 #             end if;
 946 #         else
 947         with m.Else():
 948 #             early_req_row <= req_row;
 949             comb += early_req_row.eq(req_row)
 950 #         end if;
 951 #     end process;
 952
 953     # Handle load-with-reservation and store-conditional instructions
 954     def reservation_comb(self, cancel_store, set_rsrv, clear_rsrv,
 955                          r0_valid, r0, reservation):
 956
 957         comb = m.d.comb
 958         sync = m.d.sync
 959
 960 #     begin
 961 #         cancel_store <= '0';
 962 #         set_rsrv <= '0';
 963 #         clear_rsrv <= '0';
 964 #         if r0_valid = '1' and r0.req.reserve = '1' then
 965         with m.If(r0_valid & r0.req.reserve):
 966
 967 #             -- XXX generate alignment interrupt if address
 968 #             -- is not aligned XXX or if r0.req.nc = '1'
 969 #             if r0.req.load = '1' then
 970             # XXX generate alignment interrupt if address
 971             # is not aligned XXX or if r0.req.nc = '1'
 972             with m.If(r0.req.load):
 973 #                 -- load with reservation
 974 #                 set_rsrv <= '1';
 975                 # load with reservation
 976                 comb += set_rsrv(1)
 977 #             else
 978             with m.Else():
 979 #                 -- store conditional
 980 #                 clear_rsrv <= '1';
 981                 # store conditional
 982                 comb += clear_rsrv.eq(1)
 983 #                 if reservation.valid = '0' or r0.req.addr(63
 984 #                  downto LINE_OFF_BITS) /= reservation.addr then
 985                 with m.If(~reservation.valid
 986                           | r0.req.addr[LINE_OFF_BITS:64]):
 987 #                     cancel_store <= '1';
 988                     comb += cancel_store.eq(1)
 989 #                 end if;
 990 #             end if;
 991 #         end if;
 992 #     end process;
 993
 994     def reservation_reg(self, r0_valid, access_ok, clear_rsrv,
 995                         reservation, r0):
 996
 997         comb = m.d.comb
 998         sync = m.d.sync
 999
1000 #     begin
1001 #         if rising_edge(clk) then
1002 #             if rst = '1' then
1003 #                 reservation.valid <= '0';
1004             # TODO understand how resets work in nmigen
1005 #             elsif r0_valid = '1' and access_ok = '1' then
1006             with m.Elif(r0_valid & access_ok):
1007 #                 if clear_rsrv = '1' then
1008                 with m.If(clear_rsrv):
1009 #                     reservation.valid <= '0';
1010                     sync += reservation.valid.ea(0)
1011 #                 elsif set_rsrv = '1' then
1012                 with m.Elif(set_rsrv):
1013 #                     reservation.valid <= '1';
1014 #                     reservation.addr <=
1015 #                      r0.req.addr(63 downto LINE_OFF_BITS);
1016                     sync += reservation.valid.eq(1)
1017                     sync += reservation.addr.eq(
1018                              r0.req.addr[LINE_OFF_BITS:64]
1019                             )
1020 #                 end if;
1021 #             end if;
1022 #         end if;
1023 #     end process;
1024
1025     # Return data for loads & completion control logic
1026     def writeback_control(self, r1, cache_out, d_out, m_out):
1027
1028         comb = m.d.comb
1029         sync = m.d.sync
1030
1031 #         variable data_out : std_ulogic_vector(63 downto 0);
1032 #         variable data_fwd : std_ulogic_vector(63 downto 0);
1033 #         variable j        : integer;
1034         data_out = Signal(64)
1035         data_fwd = Signal(64)
1036         j        = Signal()
1037
1038 #     begin
1039 #         -- Use the bypass if are reading the row that was
1040 #         -- written 1 or 2 cycles ago, including for the
1041 #         -- slow_valid = 1 case (i.e. completing a load
1042 #         -- miss or a non-cacheable load).
1043 #         if r1.use_forward1 = '1' then
1044         # Use the bypass if are reading the row that was
1045         # written 1 or 2 cycles ago, including for the
1046         # slow_valid = 1 case (i.e. completing a load
1047         # miss or a non-cacheable load).
1048         with m.If(r1.use_forward1):
1049 #             data_fwd := r1.forward_data1;
1050             comb += data_fwd.eq(r1.forward_data1)
1051 #         else
1052         with m.Else():
1053 #             data_fwd := r1.forward_data2;
1054             comb += data_fwd.eq(r1.forward_data2)
1055 #         end if;
1056
1057 #         data_out := cache_out(r1.hit_way);
1058         comb += data_out.eq(cache_out[r1.hit_way])
1059
1060 #         for i in 0 to 7 loop
1061         for i in range(8):
1062 #             j := i * 8;
1063             comb += i * 8
1064
1065 #             if r1.forward_sel(i) = '1' then
1066             with m.If(r1.forward_sel[i]):
1067 #                 data_out(j + 7 downto j) := data_fwd(j + 7 downto j);
1068                 comb += data_out[j:j+8].eq(data_fwd[j:j+8])
1069 #             end if;
1070 #         end loop;
1071
1072 #         d_out.valid <= r1.ls_valid;
1073 #         d_out.data <= data_out;
1074 #         d_out.store_done <= not r1.stcx_fail;
1075 #         d_out.error <= r1.ls_error;
1076 #         d_out.cache_paradox <= r1.cache_paradox;
1077         comb += d_out.valid.eq(r1.ls_valid)
1078         comb += d_out.data.eq(data_out)
1079         comb += d_out.store_done.eq(~r1.stcx_fail)
1080         comb += d_out.error.eq(r1.ls_error)
1081         comb += d_out.cache_paradox.eq(r1.cache_paradox)
1082
1083 #         -- Outputs to MMU
1084 #         m_out.done <= r1.mmu_done;
1085 #         m_out.err <= r1.mmu_error;
1086 #         m_out.data <= data_out;
1087         comb += m_out.done.eq(r1.mmu_done)
1088         comb += m_out.err.eq(r1.mmu_error)
1089         comb += m_out.data.eq(data_out)
1090
1091 #         -- We have a valid load or store hit or we just completed
1092 #         -- a slow op such as a load miss, a NC load or a store
1093 #         --
1094 #         -- Note: the load hit is delayed by one cycle. However it
1095 #         -- can still not collide with r.slow_valid (well unless I
1096 #         -- miscalculated) because slow_valid can only be set on a
1097 #         -- subsequent request and not on its first cycle (the state
1098 #         -- machine must have advanced), which makes slow_valid
1099 #         -- at least 2 cycles from the previous hit_load_valid.
1100 #
1101 #         -- Sanity: Only one of these must be set in any given cycle
1102 #         assert (r1.slow_valid and r1.stcx_fail) /= '1'
1103 #          report "unexpected slow_valid collision with stcx_fail"
1104 #          severity FAILURE;
1105 #         assert ((r1.slow_valid or r1.stcx_fail) and r1.hit_load_valid)
1106 #          /= '1' report "unexpected hit_load_delayed collision with
1107 #          slow_valid" severity FAILURE;
1108         # We have a valid load or store hit or we just completed
1109         # a slow op such as a load miss, a NC load or a store
1110         #
1111         # Note: the load hit is delayed by one cycle. However it
1112         # can still not collide with r.slow_valid (well unless I
1113         # miscalculated) because slow_valid can only be set on a
1114         # subsequent request and not on its first cycle (the state
1115         # machine must have advanced), which makes slow_valid
1116         # at least 2 cycles from the previous hit_load_valid.
1117
1118         # Sanity: Only one of these must be set in any given cycle
1119         assert (r1.slow_valid & r1.stcx_fail) != 1 "unexpected" \
1120          "slow_valid collision with stcx_fail -!- severity FAILURE"
1121
1122         assert ((r1.slow_valid | r1.stcx_fail) | r1.hit_load_valid) != 1
1123          "unexpected hit_load_delayed collision with slow_valid -!-" \
1124          "severity FAILURE"
1125
1126 #         if r1.mmu_req = '0' then
1127         with m.If(~r1._mmu_req):
1128 #             -- Request came from loadstore1...
1129 #             -- Load hit case is the standard path
1130 #             if r1.hit_load_valid = '1' then
1131             # Request came from loadstore1...
1132             # Load hit case is the standard path
1133             with m.If(r1.hit_load_valid):
1134 #                 report
1135 #                  "completing load hit data=" & to_hstring(data_out);
1136                 print(f"completing load hit data={data_out}")
1137 #             end if;
1138
1139 #             -- error cases complete without stalling
1140 #             if r1.ls_error = '1' then
1141             # error cases complete without stalling
1142             with m.If(r1.ls_error):
1143 #                 report "completing ld/st with error";
1144                 print("completing ld/st with error")
1145 #             end if;
1146
1147 #             -- Slow ops (load miss, NC, stores)
1148 #             if r1.slow_valid = '1' then
1149             # Slow ops (load miss, NC, stores)
1150             with m.If(r1.slow_valid):
1151 #                 report
1152 #                  "completing store or load miss data="
1153 #                   & to_hstring(data_out);
1154                 print(f"completing store or load miss data={data_out}")
1155 #             end if;
1156
1157 #         else
1158         with m.Else():
1159 #             -- Request came from MMU
1160 #             if r1.hit_load_valid = '1' then
1161             # Request came from MMU
1162             with m.If(r1.hit_load_valid):
1163 #                 report "completing load hit to MMU, data="
1164 #                  & to_hstring(m_out.data);
1165                 print(f"completing load hit to MMU, data={m_out.data}")
1166 #             end if;
1167 #
1168 #             -- error cases complete without stalling
1169 #             if r1.mmu_error = '1' then
1170 #                 report "completing MMU ld with error";
1171             # error cases complete without stalling
1172             with m.If(r1.mmu_error):
1173                 print("combpleting MMU ld with error")
1174 #             end if;
1175 #
1176 #             -- Slow ops (i.e. load miss)
1177 #             if r1.slow_valid = '1' then
1178             # Slow ops (i.e. load miss)
1179             with m.If(r1.slow_valid):
1180 #                 report "completing MMU load miss, data="
1181 #                  & to_hstring(m_out.data);
1182                 print("completing MMU load miss, data={m_out.data}")
1183 #             end if;
1184 #         end if;
1185 #     end process;
1186
1187 #     -- Generate a cache RAM for each way. This handles the normal
1188 #     -- reads, writes from reloads and the special store-hit update
1189 #     -- path as well.
1190 #     --
1191 #     -- Note: the BRAMs have an extra read buffer, meaning the output
1192 #     -- is pipelined an extra cycle. This differs from the
1193 #     -- icache. The writeback logic needs to take that into
1194 #     -- account by using 1-cycle delayed signals for load hits.
1195 #     --
1196 #     rams: for i in 0 to NUM_WAYS-1 generate
1197     # Generate a cache RAM for each way. This handles the normal
1198     # reads, writes from reloads and the special store-hit update
1199     # path as well.
1200     #
1201     # Note: the BRAMs have an extra read buffer, meaning the output
1202     # is pipelined an extra cycle. This differs from the
1203     # icache. The writeback logic needs to take that into
1204     # account by using 1-cycle delayed signals for load hits.
1205     def rams(self, ):
1206         for i in range(NUM_WAYS):
1207 #       signal do_read  : std_ulogic;
1208 #       signal rd_addr  : std_ulogic_vector(ROW_BITS-1 downto 0);
1209 #       signal do_write : std_ulogic;
1210 #       signal wr_addr  : std_ulogic_vector(ROW_BITS-1 downto 0);
1211 #       signal wr_data  :
1212 #        std_ulogic_vector(wishbone_data_bits-1 downto 0);
1213 #       signal wr_sel   : std_ulogic_vector(ROW_SIZE-1 downto 0);
1214 #       signal wr_sel_m : std_ulogic_vector(ROW_SIZE-1 downto 0);
1215 #       signal dout     : cache_row_t;
1216             do_read  = Signal()
1217             rd_addr  = Signal(ROW_BITS)
1218             do_write = Signal()
1219             wr_addr  = Signal(ROW_BITS)
1220             wr_data  = Signal(WB_DATA_BITS)
1221             wr_sel   = Signal(ROW_SIZE)
1222             wr_sel_m = Signal(ROW_SIZE)
1223             _d_out   = Signal(WB_DATA_BITS)
1224
1225 #     begin
1226 #       way: entity work.cache_ram
1227 #           generic map (
1228 #               ROW_BITS => ROW_BITS,
1229 #               WIDTH => wishbone_data_bits,
1230 #               ADD_BUF => true
1231 #               )
1232 #           port map (
1233 #               clk     => clk,
1234 #               rd_en   => do_read,
1235 #               rd_addr => rd_addr,
1236 #               rd_data => dout,
1237 #               wr_sel  => wr_sel_m,
1238 #               wr_addr => wr_addr,
1239 #               wr_data => wr_data
1240 #               );
1241 #       process(all)
1242             way = CacheRam(ROW_BITS, WB_DATA_BITS, True)
1243             comb += way.rd_en.eq(do_read)
1244             comb += way.rd_addr.eq(rd_addr)
1245             comb += way.rd_data.eq(_d_out)
1246             comb += way.wr_sel.eq(wr_sel_m)
1247             comb += way.wr_addr.eq(wr_addr)
1248             comb += way.wr_data.eq(wr_data)
1249
1250 #       begin
1251 #           -- Cache hit reads
1252 #           do_read <= '1';
1253 #           rd_addr <=
1254 #            std_ulogic_vector(to_unsigned(early_req_row, ROW_BITS));
1255 #           cache_out(i) <= dout;
1256             # Cache hit reads
1257             comb += do_read.eq(1)
1258             comb += rd_addr.eq(Signal(BRAM_ROWS))
1259             comb += cache_out[i].eq(dout)
1260
1261 #           -- Write mux:
1262 #           --
1263 #           -- Defaults to wishbone read responses (cache refill)
1264 #           --
1265 #           -- For timing, the mux on wr_data/sel/addr is not
1266 #           -- dependent on anything other than the current state.
1267         # Write mux:
1268         #
1269         # Defaults to wishbone read responses (cache refill)
1270         #
1271         # For timing, the mux on wr_data/sel/addr is not
1272         # dependent on anything other than the current state.
1273 #           wr_sel_m <= (others => '0');
1274             comb += wr_sel_m.eq(0)
1275
1276 #           do_write <= '0';
1277             comb += do_write.eq(0)
1278 #             if r1.write_bram = '1' then
1279             with m.If(r1.write_bram):
1280 #                 -- Write store data to BRAM.  This happens one
1281 #                 -- cycle after the store is in r0.
1282                 # Write store data to BRAM.  This happens one
1283                 # cycle after the store is in r0.
1284 #                 wr_data <= r1.req.data;
1285 #                 wr_sel  <= r1.req.byte_sel;
1286 #                 wr_addr <= std_ulogic_vector(to_unsigned(
1287 #                             get_row(r1.req.real_addr), ROW_BITS
1288 #                            ));
1289                 comb += wr_data.eq(r1.req.data)
1290                 comb += wr_sel.eq(r1.req.byte_sel)
1291                 comb += wr_addr.eq(Signal(get_row(r1.req.real_addr)))
1292
1293 #                 if i = r1.req.hit_way then
1294                 with m.If(i == r1.req.hit_way):
1295 #                     do_write <= '1';
1296                     comb += do_write.eq(1)
1297 #                 end if;
1298 #           else
1299                 with m.Else():
1300 #               -- Otherwise, we might be doing a reload or a DCBZ
1301 #                 if r1.dcbz = '1' then
1302                 # Otherwise, we might be doing a reload or a DCBZ
1303                     with m.If(r1.dcbz):
1304 #                     wr_data <= (others => '0');
1305                         comb += wr_data.eq(0)
1306 #                 else
1307                     with m.Else():
1308 #                     wr_data <= wishbone_in.dat;
1309                         comb += wr_data.eq(wishbone_in.dat)
1310 #                 end if;
1311
1312 #                 wr_addr <= std_ulogic_vector(to_unsigned(
1313 #                             r1.store_row, ROW_BITS
1314 #                            ));
1315 #                 wr_sel <= (others => '1');
1316                     comb += wr_addr.eq(Signal(r1.store_row))
1317                     comb += wr_sel.eq(1)
1318
1319 #                 if r1.state = RELOAD_WAIT_ACK and
1320 #                 wishbone_in.ack = '1' and replace_way = i then
1321                 with m.If(r1.state == State.RELOAD_WAIT_ACK
1322                           & wishbone_in.ack & relpace_way == i):
1323 #                     do_write <= '1';
1324                     comb += do_write.eq(1)
1325 #                 end if;
1326 #           end if;
1327
1328 #             -- Mask write selects with do_write since BRAM
1329 #             -- doesn't have a global write-enable
1330 #             if do_write = '1' then
1331 #             -- Mask write selects with do_write since BRAM
1332 #             -- doesn't have a global write-enable
1333                 with m.If(do_write):
1334 #                 wr_sel_m <= wr_sel;
1335                     comb += wr_sel_m.eq(wr_sel)
1336 #             end if;
1337 #         end process;
1338 #     end generate;
1339
1340     # Cache hit synchronous machine for the easy case.
1341     # This handles load hits.
1342     # It also handles error cases (TLB miss, cache paradox)
1343     def dcache_fast_hit(self, req_op, r0_valid, r1, ):
1344
1345         comb = m.d.comb
1346         sync = m.d.sync
1347
1348 #     begin
1349 #         if rising_edge(clk) then
1350 #             if req_op /= OP_NONE then
1351         with m.If(req_op != Op.OP_NONE):
1352 #               report "op:" & op_t'image(req_op) &
1353 #                   " addr:" & to_hstring(r0.req.addr) &
1354 #                   " nc:" & std_ulogic'image(r0.req.nc) &
1355 #                   " idx:" & integer'image(req_index) &
1356 #                   " tag:" & to_hstring(req_tag) &
1357 #                   " way: " & integer'image(req_hit_way);
1358             print(f"op:{req_op} addr:{r0.req.addr} nc: {r0.req.nc}" \
1359                   f"idx:{req_index} tag:{req_tag} way: {req_hit_way}"
1360                  )
1361 #             end if;
1362 #             if r0_valid = '1' then
1363         with m.If(r0_valid):
1364 #                 r1.mmu_req <= r0.mmu_req;
1365             sync += r1.mmu_req.eq(r0.mmu_req)
1366 #             end if;
1367
1368 #             -- Fast path for load/store hits.
1369 #             -- Set signals for the writeback controls.
1370 #             r1.hit_way <= req_hit_way;
1371 #             r1.hit_index <= req_index;
1372         # Fast path for load/store hits.
1373         # Set signals for the writeback controls.
1374         sync += r1.hit_way.eq(req_hit_way)
1375         sync += r1.hit_index.eq(req_index)
1376
1377 #             if req_op = OP_LOAD_HIT then
1378         with m.If(req_op == Op.OP_LOAD_HIT):
1379 #                 r1.hit_load_valid <= '1';
1380             sync += r1.hit_load_valid.eq(1)
1381
1382 #             else
1383         with m.Else():
1384 #                 r1.hit_load_valid <= '0';
1385             sync += r1.hit_load_valid.eq(0)
1386 #             end if;
1387
1388 #             if req_op = OP_LOAD_HIT or req_op = OP_STORE_HIT then
1389         with m.If(req_op == Op.OP_LOAD_HIT | req_op == Op.OP_STORE_HIT):
1390 #                 r1.cache_hit <= '1';
1391             sync += r1.cache_hit.eq(1)
1392 #             else
1393         with m.Else():
1394 #                 r1.cache_hit <= '0';
1395             sync += r1.cache_hit.eq(0)
1396 #             end if;
1397
1398 #             if req_op = OP_BAD then
1399         with m.If(req_op == Op.OP_BAD):
1400 #                 report "Signalling ld/st error valid_ra=" &
1401 #                  std_ulogic'image(valid_ra) & " rc_ok=" &
1402 #                  std_ulogic'image(rc_ok) & " perm_ok=" &
1403 #                  std_ulogic'image(perm_ok);
1404             print(f"Signalling ld/st error valid_ra={valid_ra}"
1405                   f"rc_ok={rc_ok} perm_ok={perm_ok}"
1406
1407 #                 r1.ls_error <= not r0.mmu_req;
1408 #                 r1.mmu_error <= r0.mmu_req;
1409 #                 r1.cache_paradox <= access_ok;
1410             sync += r1.ls_error.eq(~r0.mmu_req)
1411             sync += r1.mmu_error.eq(r0.mmu_req)
1412             sync += r1.cache_paradox.eq(access_ok)
1413
1414 #             else
1415             with m.Else():
1416 #                 r1.ls_error <= '0';
1417 #                 r1.mmu_error <= '0';
1418 #                 r1.cache_paradox <= '0';
1419                 sync += r1.ls_error.eq(0)
1420                 sync += r1.mmu_error.eq(0)
1421                 sync += r1.cache_paradox.eq(0)
1422 #             end if;
1423 #
1424 #             if req_op = OP_STCX_FAIL then
1425             with m.If(req_op == Op.OP_STCX_FAIL):
1426 #                 r1.stcx_fail <= '1';
1427                 r1.stcx_fail.eq(1)
1428
1429 #             else
1430             with m.Else():
1431 #                 r1.stcx_fail <= '0';
1432                 sync += r1.stcx_fail.eq(0)
1433 #             end if;
1434 #
1435 #             -- Record TLB hit information for updating TLB PLRU
1436 #             r1.tlb_hit <= tlb_hit;
1437 #             r1.tlb_hit_way <= tlb_hit_way;
1438 #             r1.tlb_hit_index <= tlb_req_index;
1439             # Record TLB hit information for updating TLB PLRU
1440             sync += r1.tlb_hit.eq(tlb_hit)
1441             sync += r1.tlb_hit_way.eq(tlb_hit_way)
1442             sync += r1.tlb_hit_index.eq(tlb_req_index)
1443 #         end if;
1444 #     end process;
1445
1446     # Memory accesses are handled by this state machine:
1447     #
1448     #   * Cache load miss/reload (in conjunction with "rams")
1449     #   * Load hits for non-cachable forms
1450     #   * Stores (the collision case is handled in "rams")
1451     #
1452     # All wishbone requests generation is done here.
1453     # This machine operates at stage 1.
1454     def dcache_slow(self, r1, use_forward1_next, cache_valid_bits, r0,
1455                     r0_valid, req_op, cache_tag, req_go, ra, wb_in):
1456
1457         comb = m.d.comb
1458         sync = m.d.sync
1459
1460 #         variable stbs_done : boolean;
1461 #         variable req       : mem_access_request_t;
1462 #         variable acks      : unsigned(2 downto 0);
1463         stbs_done = Signal()
1464         req       = MemAccessRequest()
1465         acks      = Signal(3)
1466
1467         comb += stbs_done
1468         comb += req
1469         comb += acks
1470
1471 #     begin
1472 #         if rising_edge(clk) then
1473 #             r1.use_forward1 <= use_forward1_next;
1474 #             r1.forward_sel <= (others => '0');
1475         sync += r1.use_forward1.eq(use_forward1_next)
1476         sync += r1.forward_sel.eq(0)
1477
1478 #             if use_forward1_next = '1' then
1479         with m.If(use_forward1_next):
1480 #                 r1.forward_sel <= r1.req.byte_sel;
1481             sync += r1.forward_sel.eq(r1.req.byte_sel)
1482
1483 #           elsif use_forward2_next = '1' then
1484         with m.Elif(use_forward2_next):
1485 #                 r1.forward_sel <= r1.forward_sel1;
1486             sync += r1.forward_sel.eq(r1.forward_sel1)
1487 #             end if;
1488
1489 #             r1.forward_data2 <= r1.forward_data1;
1490         sync += r1.forward_data2.eq(r1.forward_data1)
1491
1492 #             if r1.write_bram = '1' then
1493         with m.If(r1.write_bram):
1494 #                 r1.forward_data1 <= r1.req.data;
1495 #                 r1.forward_sel1 <= r1.req.byte_sel;
1496 #                 r1.forward_way1 <= r1.req.hit_way;
1497 #                 r1.forward_row1 <= get_row(r1.req.real_addr);
1498 #                 r1.forward_valid1 <= '1';
1499             sync += r1.forward_data1.eq(r1.req.data)
1500             sync += r1.forward_sel1.eq(r1.req.byte_sel)
1501             sync += r1.forward_way1.eq(r1.req.hit_way)
1502             sync += r1.forward_row1.eq(get_row(r1.req.real_addr))
1503             sync += r1.forward_valid1.eq(1)
1504 #             else
1505         with m.Else():
1506
1507 #                 if r1.dcbz = '1' then
1508             with m.If(r1.bcbz):
1509 #                     r1.forward_data1 <= (others => '0');
1510                 sync += r1.forward_data1.eq(0)
1511
1512 #                 else
1513             with m.Else():
1514 #                     r1.forward_data1 <= wishbone_in.dat;
1515                 sync += r1.forward_data1.eq(wb_in.dat)
1516 #                 end if;
1517
1518 #                 r1.forward_sel1 <= (others => '1');
1519 #                 r1.forward_way1 <= replace_way;
1520 #                 r1.forward_row1 <= r1.store_row;
1521 #                 r1.forward_valid1 <= '0';
1522             sync += r1.forward_sel1.eq(1)
1523             sync += r1.forward_way1.eq(replace_way)
1524             sync += r1.forward_row1.eq(r1.store_row)
1525             sync += r1.forward_valid1.eq(0)
1526 #             end if;
1527
1528 #           -- On reset, clear all valid bits to force misses
1529 #             if rst = '1' then
1530         # On reset, clear all valid bits to force misses
1531         # TODO figure out how reset signal works in nmigeni
1532         with m.If("""TODO RST???"""):
1533 #               for i in index_t loop
1534             for i in range(NUM_LINES):
1535 #                   cache_valids(i) <= (others => '0');
1536                 sync += cache_valid_bits[i].eq(0)
1537 #               end loop;
1538
1539 #                 r1.state <= IDLE;
1540 #                 r1.full <= '0';
1541 #                 r1.slow_valid <= '0';
1542 #                 r1.wb.cyc <= '0';
1543 #                 r1.wb.stb <= '0';
1544 #                 r1.ls_valid <= '0';
1545 #                 r1.mmu_done <= '0';
1546             sync += r1.state.eq(State.IDLE)
1547             sync += r1.full.eq(0)
1548             sync += r1.slow_valid.eq(0)
1549             sync += r1.wb.cyc.eq(0)
1550             sync += r1.wb.stb.eq(0)
1551             sync += r1.ls_valid.eq(0)
1552             sync += r1.mmu_done.eq(0)
1553
1554 #               -- Not useful normally but helps avoiding
1555 #               -- tons of sim warnings
1556         # Not useful normally but helps avoiding
1557         # tons of sim warnings
1558 #               r1.wb.adr <= (others => '0');
1559             sync += r1.wb.adr.eq(0)
1560 #             else
1561         with m.Else():
1562 #                 -- One cycle pulses reset
1563 #                 r1.slow_valid <= '0';
1564 #                 r1.write_bram <= '0';
1565 #                 r1.inc_acks <= '0';
1566 #                 r1.dec_acks <= '0';
1567 #
1568 #                 r1.ls_valid <= '0';
1569 #                 -- complete tlbies and TLB loads in the third cycle
1570 #                 r1.mmu_done <= r0_valid and (r0.tlbie or r0.tlbld);
1571             # One cycle pulses reset
1572             sync += r1.slow_valid.eq(0)
1573             sync += r1.write_bram.eq(0)
1574             sync += r1.inc_acks.eq(0)
1575             sync += r1.dec_acks.eq(0)
1576
1577             sync += r1.ls_valid.eq(0)
1578             # complete tlbies and TLB loads in the third cycle
1579             sync += r1.mmu_done.eq(r0_valid & (r0.tlbie | r0.tlbld))
1580
1581 #                 if req_op = OP_LOAD_HIT or req_op = OP_STCX_FAIL then
1582             with m.If(req_op == Op.OP_LOAD_HIT
1583                       | req_op == Op.OP_STCX_FAIL):
1584 #                     if r0.mmu_req = '0' then
1585                 with m.If(~r0.mmu_req):
1586 #                         r1.ls_valid <= '1';
1587                     sync += r1.ls_valid.eq(1)
1588 #                     else
1589                 with m.Else():
1590 #                         r1.mmu_done <= '1';
1591                     sync += r1.mmu_done.eq(1)
1592 #                     end if;
1593 #                 end if;
1594
1595 #                 if r1.write_tag = '1' then
1596             with m.If(r1.write_tag):
1597 #                     -- Store new tag in selected way
1598 #                     for i in 0 to NUM_WAYS-1 loop
1599                 # Store new tag in selected way
1600                 for i in range(NUM_WAYS):
1601 #                         if i = replace_way then
1602                     with m.If(i == replace_way):
1603 #                             cache_tags(r1.store_index)(
1604 #                              (i + 1) * TAG_WIDTH - 1
1605 #                              downto i * TAG_WIDTH
1606 #                             ) <=
1607 #                              (TAG_WIDTH - 1 downto TAG_BITS => '0')
1608 #                              & r1.reload_tag;
1609                         sync += cache_tag[
1610                                  r1.store_index
1611                                 ][i * TAG_WIDTH:(i +1) * TAG_WIDTH].eq(
1612                                  Const(TAG_WIDTH, TAG_WIDTH)
1613                                  & r1.reload_tag
1614                                 )
1615 #                         end if;
1616 #                     end loop;
1617 #                     r1.store_way <= replace_way;
1618 #                     r1.write_tag <= '0';
1619                 sync += r1.store_way.eq(replace_way)
1620                 sync += r1.write_tag.eq(0)
1621 #                 end if;
1622
1623 #                 -- Take request from r1.req if there is one there,
1624 #                 -- else from req_op, ra, etc.
1625 #                 if r1.full = '1' then
1626             # Take request from r1.req if there is one there,
1627             # else from req_op, ra, etc.
1628             with m.If(r1.full)
1629 #                     req := r1.req;
1630                 sync += req.eq(r1.req)
1631
1632 #                 else
1633             with m.Else():
1634 #                     req.op := req_op;
1635 #                     req.valid := req_go;
1636 #                     req.mmu_req := r0.mmu_req;
1637 #                     req.dcbz := r0.req.dcbz;
1638 #                     req.real_addr := ra;
1639                 sync += req.op.eq(req_op)
1640                 sync += req.valid.eq(req_go)
1641                 sync += req.mmu_req.eq(r0.mmu_req)
1642                 sync += req.dcbz.eq(r0.req.dcbz)
1643                 sync += req.real_addr.eq(ra)
1644
1645 #                     -- Force data to 0 for dcbz
1646 #                     if r0.req.dcbz = '0' then
1647                 with m.If(~r0.req.dcbz):
1648 #                         req.data := r0.req.data;
1649                     sync += req.data.eq(r0.req.data)
1650
1651 #                     else
1652                 with m.Else():
1653 #                         req.data := (others => '0');
1654                     sync += req.data.eq(0)
1655 #                     end if;
1656
1657 #                     -- Select all bytes for dcbz
1658 #                     -- and for cacheable loads
1659 #                     if r0.req.dcbz = '1'
1660 #                      or (r0.req.load = '1' and r0.req.nc = '0') then
1661                 # Select all bytes for dcbz
1662                 # and for cacheable loads
1663                 with m.If(r0.req.dcbz | (r0.req.load & ~r0.req.nc):
1664 #                         req.byte_sel := (others => '1');
1665                     sync += req.byte_sel.eq(1)
1666
1667 #                     else
1668                 with m.Else():
1669 #                         req.byte_sel := r0.req.byte_sel;
1670                     sync += req.byte_sel.eq(r0.req.byte_sel)
1671 #                     end if;
1672
1673 #                     req.hit_way := req_hit_way;
1674 #                     req.same_tag := req_same_tag;
1675                 sync += req.hit_way.eq(req_hit_way)
1676                 sync += req.same_tag.eq(req_same_tag)
1677
1678 #                     -- Store the incoming request from r0,
1679 #                     -- if it is a slow request
1680 #                     -- Note that r1.full = 1 implies req_op = OP_NONE
1681 #                     if req_op = OP_LOAD_MISS or req_op = OP_LOAD_NC
1682 #                      or req_op = OP_STORE_MISS
1683 #                      or req_op = OP_STORE_HIT then
1684                 # Store the incoming request from r0,
1685                 # if it is a slow request
1686                 # Note that r1.full = 1 implies req_op = OP_NONE
1687                 with m.If(req_op == Op.OP_LOAD_MISS
1688                           | req_op == Op.OP_LOAD_NC
1689                           | req_op == Op.OP_STORE_MISS
1690                           | req_op == Op.OP_STORE_HIT):
1691 #                         r1.req <= req;
1692 #                         r1.full <= '1';
1693                     sync += r1.req(req)
1694                     sync += r1.full.eq(1)
1695 #                     end if;
1696 #                 end if;
1697 #
1698 #               -- Main state machine
1699 #               case r1.state is
1700             # Main state machine
1701             with m.Switch(r1.state):
1702
1703 #                 when IDLE =>
1704                 with m.Case(State.IDLE)
1705 #                     r1.wb.adr <= req.real_addr(
1706 #                                   r1.wb.adr'left downto 0
1707 #                                  );
1708 #                     r1.wb.sel <= req.byte_sel;
1709 #                     r1.wb.dat <= req.data;
1710 #                     r1.dcbz <= req.dcbz;
1711 #
1712 #                     -- Keep track of our index and way
1713 #                     -- for subsequent stores.
1714 #                     r1.store_index <= get_index(req.real_addr);
1715 #                     r1.store_row <= get_row(req.real_addr);
1716 #                     r1.end_row_ix <=
1717 #                      get_row_of_line(get_row(req.real_addr)) - 1;
1718 #                     r1.reload_tag <= get_tag(req.real_addr);
1719 #                     r1.req.same_tag <= '1';
1720                     sync += r1.wb.adr.eq(req.real_addr[0:r1.wb.adr])
1721                     sync += r1.wb.sel.eq(req.byte_sel)
1722                     sync += r1.wb.dat.eq(req.data)
1723                     sync += r1.dcbz.eq(req.dcbz)
1724
1725                     # Keep track of our index and way
1726                     # for subsequent stores.
1727                     sync += r1.store_index.eq(get_index(req.real_addr))
1728                     sync += r1.store_row.eq(get_row(req.real_addr))
1729                     sync += r1.end_row_ix.eq(
1730                              get_row_of_line(get_row(req.real_addr))
1731                             )
1732                     sync += r1.reload_tag.eq(get_tag(req.real_addr))
1733                     sync += r1.req.same_tag.eq(1)
1734
1735 #                     if req.op = OP_STORE_HIT theni
1736                     with m.If(req.op == Op.OP_STORE_HIT):
1737 #                         r1.store_way <= req.hit_way;
1738                         sync += r1.store_way.eq(req.hit_way)
1739 #                     end if;
1740
1741 #                     -- Reset per-row valid bits,
1742 #                     -- ready for handling OP_LOAD_MISS
1743 #                     for i in 0 to ROW_PER_LINE - 1 loop
1744                     # Reset per-row valid bits,
1745                     # ready for handling OP_LOAD_MISS
1746                     for i in range(ROW_PER_LINE):
1747 #                         r1.rows_valid(i) <= '0';
1748                         sync += r1.rows_valid[i].eq(0)
1749 #                     end loop;
1750
1751 #                     case req.op is
1752                     with m.Switch(req.op):
1753 #                     when OP_LOAD_HIT =>
1754                         with m.Case(Op.OP_LOAD_HIT):
1755 #                         -- stay in IDLE state
1756                             # stay in IDLE state
1757                             pass
1758
1759 #                     when OP_LOAD_MISS =>
1760                         with m.Case(Op.OP_LOAD_MISS):
1761 #                       -- Normal load cache miss,
1762 #                       -- start the reload machine
1763 #                       report "cache miss real addr:" &
1764 #                        to_hstring(req.real_addr) & " idx:" &
1765 #                        integer'image(get_index(req.real_addr)) &
1766 #                        " tag:" & to_hstring(get_tag(req.real_addr));
1767                             # Normal load cache miss,
1768                             # start the reload machine
1769                             print(f"cache miss real addr:" \
1770                                   f"{req_real_addr}" \
1771                                   f" idx:{get_index(req_real_addr)}" \
1772                                   f" tag:{get_tag(req.real_addr)}")
1773
1774 #                       -- Start the wishbone cycle
1775 #                       r1.wb.we  <= '0';
1776 #                       r1.wb.cyc <= '1';
1777 #                       r1.wb.stb <= '1';
1778                             # Start the wishbone cycle
1779                             sync += r1.wb.we.eq(0)
1780                             sync += r1.wb.cyc.eq(1)
1781                             sync += r1.wb.stb.eq(1)
1782
1783 #                       -- Track that we had one request sent
1784 #                       r1.state <= RELOAD_WAIT_ACK;
1785 #                       r1.write_tag <= '1';
1786                             # Track that we had one request sent
1787                             sync += r1.state.eq(State.RELOAD_WAIT_ACK)
1788                             sync += r1.write_tag.eq(1)
1789
1790 #                   when OP_LOAD_NC =>
1791                         with m.Case(Op.OP_LOAD_NC):
1792 #                       r1.wb.cyc <= '1';
1793 #                       r1.wb.stb <= '1';
1794 #                       r1.wb.we <= '0';
1795 #                       r1.state <= NC_LOAD_WAIT_ACK;
1796                             sync += r1.wb.cyc.eq(1)
1797                             sync += r1.wb.stb.eq(1)
1798                             sync += r1.wb.we.eq(0)
1799                             sync += r1.state.eq(State.NC_LOAD_WAIT_ACK)
1800
1801 #                     when OP_STORE_HIT | OP_STORE_MISS =>
1802                         with m.Case(Op.OP_STORE_HIT
1803                                     | Op.OP_STORE_MISS):
1804 #                         if req.dcbz = '0' then
1805                             with m.If(~req.bcbz):
1806 #                             r1.state <= STORE_WAIT_ACK;
1807 #                             r1.acks_pending <= to_unsigned(1, 3);
1808 #                             r1.full <= '0';
1809 #                             r1.slow_valid <= '1';
1810                                 sync += r1.state.eq(
1811                                          State.STORE_WAIT_ACK
1812                                         )
1813                                 sync += r1.acks_pending.eq(
1814                                          '''TODO to_unsignes(1,3)'''
1815                                         )
1816                                 sync += r1.full.eq(0)
1817                                 sync += r1.slow_valid.eq(1)
1818
1819 #                             if req.mmu_req = '0' then
1820                                 with m.If(~req.mmu_req):
1821 #                                 r1.ls_valid <= '1';
1822                                     sync += r1.ls_valid.eq(1)
1823 #                             else
1824                                 with m.Else():
1825 #                                 r1.mmu_done <= '1';
1826                                     sync += r1.mmu_done.eq(1)
1827 #                             end if;
1828
1829 #                             if req.op = OP_STORE_HIT then
1830                                 with m.If(req.op == Op.OP_STORE_HIT):
1831 #                                 r1.write_bram <= '1';
1832                                     sync += r1.write_bram.eq(1)
1833 #                             end if;
1834
1835 #                         else
1836                             with m.Else():
1837 #                             -- dcbz is handled much like a load
1838 #                             -- miss except that we are writing
1839 #                             -- to memory instead of reading
1840 #                             r1.state <= RELOAD_WAIT_ACK;
1841                                 # dcbz is handled much like a load
1842                                 # miss except that we are writing
1843                                 # to memory instead of reading
1844                                 sync += r1.state.eq(Op.RELOAD_WAIT_ACK)
1845
1846 #                             if req.op = OP_STORE_MISS then
1847                                 with m.If(req.op == Op.OP_STORE_MISS):
1848 #                                 r1.write_tag <= '1';
1849                                     sync += r1.write_tag.eq(1)
1850 #                             end if;
1851 #                         end if;
1852
1853 #                         r1.wb.we <= '1';
1854 #                         r1.wb.cyc <= '1';
1855 #                         r1.wb.stb <= '1';
1856                             sync += r1.wb.we.eq(1)
1857                             sync += r1.wb.cyc.eq(1)
1858                             sync += r1.wb.stb.eq(1)
1859
1860 #                   -- OP_NONE and OP_BAD do nothing
1861 #                   -- OP_BAD & OP_STCX_FAIL were handled above already
1862 #                   when OP_NONE =>
1863 #                     when OP_BAD =>
1864 #                     when OP_STCX_FAIL =>
1865                         # OP_NONE and OP_BAD do nothing
1866                         # OP_BAD & OP_STCX_FAIL were
1867                         # handled above already
1868                         with m.Case(Op.OP_NONE):
1869                             pass
1870
1871                         with m.Case(OP_BAD):
1872                             pass
1873
1874                         with m.Case(OP_STCX_FAIL):
1875                             pass
1876 #                   end case;
1877
1878 #                 when RELOAD_WAIT_ACK =>
1879                     with m.Case(State.RELOAD_WAIT_ACK):
1880 #                     -- Requests are all sent if stb is 0
1881                         # Requests are all sent if stb is 0
1882                         sync += stbs_done.eq(~r1.wb.stb)
1883 #                   stbs_done := r1.wb.stb = '0';
1884
1885 #                   -- If we are still sending requests,
1886 #                   -- was one accepted?
1887 #                   if wishbone_in.stall = '0' and not stbs_done then
1888                         # If we are still sending requests,
1889                         # was one accepted?
1890                         with m.If(~wb_in.stall & ~stbs_done):
1891 #                       -- That was the last word ? We are done sending.
1892 #                       -- Clear stb and set stbs_done so we can handle
1893 #                       -- an eventual last ack on the same cycle.
1894 #                       if is_last_row_addr(
1895 #                        r1.wb.adr, r1.end_row_ix
1896 #                       ) then
1897                             # That was the last word?
1898                             # We are done sending.
1899                             # Clear stb and set stbs_done
1900                             # so we can handle an eventual
1901                             # last ack on the same cycle.
1902                             with m.If(is_last_row_addr(
1903                                       r1.wb.adr, r1.end_row_ix)):
1904 #                           r1.wb.stb <= '0';
1905 #                           stbs_done := true;
1906                                 sync += r1.wb.stb.eq(0)
1907                                 sync += stbs_done.eq(0)
1908 #                       end if;
1909
1910 #                       -- Calculate the next row address
1911 #                       r1.wb.adr <= next_row_addr(r1.wb.adr);
1912                             # Calculate the next row address
1913                             sync += r1.wb.adr.eq(next_row_addr(r1.wb.adr))
1914 #                   end if;
1915
1916 #                   -- Incoming acks processing
1917 #                     r1.forward_valid1 <= wishbone_in.ack;
1918                         # Incoming acks processing
1919                         sync += r1.forward_valid1.eq(wb_in.ack)
1920
1921 #                   if wishbone_in.ack = '1' then
1922                         with m.If(wb_in.ack):
1923 #                         r1.rows_valid(
1924 #                          r1.store_row mod ROW_PER_LINE
1925 #                         ) <= '1';
1926                             sync += r1.rows_valid[
1927                                      r1.store_row % ROW_PER_LINE
1928                                     ].eq(1)
1929
1930 #                         -- If this is the data we were looking for,
1931 #                         -- we can complete the request next cycle.
1932 #                         -- Compare the whole address in case the
1933 #                         -- request in r1.req is not the one that
1934 #                         -- started this refill.
1935 #                       if r1.full = '1' and r1.req.same_tag = '1'
1936 #                        and ((r1.dcbz = '1' and r1.req.dcbz = '1')
1937 #                        or (r1.dcbz = '0' and r1.req.op = OP_LOAD_MISS))
1938 #                        and r1.store_row = get_row(r1.req.real_addr) then
1939                             # If this is the data we were looking for,
1940                             # we can complete the request next cycle.
1941                             # Compare the whole address in case the
1942                             # request in r1.req is not the one that
1943                             # started this refill.
1944                             with m.If(r1.full & r1.req.same_tag &
1945                                       ((r1.dcbz & r1.req.dcbz)
1946                                        (~r1.dcbz &
1947                                         r1.req.op == Op.OP_LOAD_MISS)
1948                                        ) &
1949                                        r1.store_row
1950                                        == get_row(r1.req.real_addr):
1951 #                             r1.full <= '0';
1952 #                             r1.slow_valid <= '1';
1953                                 sync += r1.full.eq(0)
1954                                 sync += r1.slow_valid.eq(1)
1955
1956 #                             if r1.mmu_req = '0' then
1957                                     with m.If(~r1.mmu_req):
1958 #                                 r1.ls_valid <= '1';
1959                                         sync += r1.ls_valid.eq(1)
1960 #                             else
1961                                     with m.Else():
1962 #                                 r1.mmu_done <= '1';
1963                                         sync += r1.mmu_done.eq(1)
1964 #                             end if;
1965 #                             r1.forward_sel <= (others => '1');
1966 #                             r1.use_forward1 <= '1';
1967                                 sync += r1.forward_sel.eq(1)
1968                                 sync += r1.use_forward1.eq(1)
1969 #                       end if;
1970
1971 #                       -- Check for completion
1972 #                       if stbs_done and is_last_row(r1.store_row,
1973 #                        r1.end_row_ix) then
1974                             # Check for completion
1975                             with m.If(stbs_done &
1976                                       is_last_row(r1.store_row,
1977                                       r1.end_row_ix)):
1978
1979 #                           -- Complete wishbone cycle
1980 #                           r1.wb.cyc <= '0';
1981                                 # Complete wishbone cycle
1982                                 sync += r1.wb.cyc.eq(0)
1983
1984 #                           -- Cache line is now valid
1985 #                           cache_valids(r1.store_index)(
1986 #                            r1.store_way
1987 #                           ) <= '1';
1988                                 # Cache line is now valid
1989                                 sync += cache_valid_bits[
1990                                          r1.store_index
1991                                         ][r1.store_way].eq(1)
1992
1993 #                           r1.state <= IDLE;
1994                                 sync += r1.state.eq(State.IDLE)
1995 #                       end if;
1996
1997 #                       -- Increment store row counter
1998 #                       r1.store_row <= next_row(r1.store_row);
1999                             # Increment store row counter
2000                             sync += r1.store_row.eq(next_row(
2001                                      r1.store_row
2002                                     ))
2003 #                   end if;
2004
2005 #                 when STORE_WAIT_ACK =>
2006                     with m.Case(State.STORE_WAIT_ACK):
2007 #                     stbs_done := r1.wb.stb = '0';
2008 #                     acks := r1.acks_pending;
2009                         sync += stbs_done.eq(~r1.wb.stb)
2010                         sync += acks.eq(r1.acks_pending)
2011
2012 #                     if r1.inc_acks /= r1.dec_acks then
2013                         with m.If(r1.inc_acks != r1.dec_acks):
2014
2015 #                         if r1.inc_acks = '1' then
2016                             with m.If(r1.inc_acks):
2017 #                             acks := acks + 1;
2018                                 sync += acks.eq(acks + 1)
2019
2020 #                         else
2021                             with m.Else():
2022 #                             acks := acks - 1;
2023                                 sync += acks.eq(acks - 1)
2024 #                         end if;
2025 #                     end if;
2026
2027 #                     r1.acks_pending <= acks;
2028                         sync += r1.acks_pending.eq(acks)
2029
2030 #                     -- Clear stb when slave accepted request
2031 #                     if wishbone_in.stall = '0' then
2032                         # Clear stb when slave accepted request
2033                         with m.If(~wb_in.stall):
2034 #                         -- See if there is another store waiting
2035 #                         -- to be done which is in the same real page.
2036 #                         if req.valid = '1' then
2037                             # See if there is another store waiting
2038                             # to be done which is in the same real page.
2039                             with m.If(req.valid):
2040 #                             r1.wb.adr(
2041 #                              SET_SIZE_BITS - 1 downto 0
2042 #                             ) <= req.real_addr(
2043 #                              SET_SIZE_BITS - 1 downto 0
2044 #                             );
2045 #                             r1.wb.dat <= req.data;
2046 #                             r1.wb.sel <= req.byte_sel;
2047                                 sync += r1.wb.adr[0:SET_SIZE_BITS].eq(
2048                                          req.real_addr[0:SET_SIZE_BITS]
2049                                         )
2050 #                         end if;
2051
2052 #                         if acks < 7 and req.same_tag = '1'
2053 #                          and (req.op = OP_STORE_MISS
2054 #                          or req.op = OP_STORE_HIT) then
2055                             with m.Elif(acks < 7 & req.same_tag &
2056                                         (req.op == Op.Op_STORE_MISS
2057                                          | req.op == Op.OP_SOTRE_HIT)):
2058 #                             r1.wb.stb <= '1';
2059 #                             stbs_done := false;
2060                                 sync += r1.wb.stb.eq(1)
2061                                 sync += stbs_done.eq(0)
2062
2063 #                             if req.op = OP_STORE_HIT then
2064                                 with m.If(req.op == Op.OP_STORE_HIT):
2065 #                                 r1.write_bram <= '1';
2066                                     sync += r1.write_bram.eq(1)
2067 #                             end if;
2068 #                             r1.full <= '0';
2069 #                             r1.slow_valid <= '1';
2070                                 sync += r1.full.eq(0)
2071                                 sync += r1.slow_valid.eq(1)
2072
2073 #                             -- Store requests never come from the MMU
2074 #                             r1.ls_valid <= '1';
2075 #                             stbs_done := false;
2076 #                             r1.inc_acks <= '1';
2077                                 # Store request never come from the MMU
2078                                 sync += r1.ls_valid.eq(1)
2079                                 sync += stbs_done.eq(0)
2080                                 sync += r1.inc_acks.eq(1)
2081 #                         else
2082                             with m.Else():
2083 #                             r1.wb.stb <= '0';
2084 #                             stbs_done := true;
2085                                 sync += r1.wb.stb.eq(0)
2086                                 sync += stbs_done.eq(1)
2087 #                         end if;
2088 #                   end if;
2089
2090 #                   -- Got ack ? See if complete.
2091 #                   if wishbone_in.ack = '1' then
2092                         # Got ack ? See if complete.
2093                         with m.If(wb_in.ack):
2094 #                         if stbs_done and acks = 1 then
2095                             with m.If(stbs_done & acks)
2096 #                             r1.state <= IDLE;
2097 #                             r1.wb.cyc <= '0';
2098 #                             r1.wb.stb <= '0';
2099                                 sync += r1.state.eq(State.IDLE)
2100                                 sync += r1.wb.cyc.eq(0)
2101                                 sync += r1.wb.stb.eq(0)
2102 #                         end if;
2103 #                         r1.dec_acks <= '1';
2104                             sync += r1.dec_acks.eq(1)
2105 #                   end if;
2106
2107 #                 when NC_LOAD_WAIT_ACK =>
2108                     with m.Case(State.NC_LOAD_WAIT_ACK):
2109 #                   -- Clear stb when slave accepted request
2110 #                     if wishbone_in.stall = '0' then
2111                         # Clear stb when slave accepted request
2112                         with m.If(~wb_in.stall):
2113 #                       r1.wb.stb <= '0';
2114                             sync += r1.wb.stb.eq(0)
2115 #                   end if;
2116
2117 #                   -- Got ack ? complete.
2118 #                   if wishbone_in.ack = '1' then
2119                         # Got ack ? complete.
2120                         with m.If(wb_in.ack):
2121 #                         r1.state <= IDLE;
2122 #                         r1.full <= '0';
2123 #                         r1.slow_valid <= '1';
2124                             sync += r1.state.eq(State.IDLE)
2125                             sync += r1.full.eq(0)
2126                             sync += r1.slow_valid.eq(1)
2127
2128 #                         if r1.mmu_req = '0' then
2129                             with m.If(~r1.mmu_req):
2130 #                             r1.ls_valid <= '1';
2131                                 sync += r1.ls_valid.eq(1)
2132
2133 #                         else
2134                             with m.Else():
2135 #                             r1.mmu_done <= '1';
2136                                 sync += r1.mmu_done.eq(1)
2137 #                         end if;
2138
2139 #                         r1.forward_sel <= (others => '1');
2140 #                         r1.use_forward1 <= '1';
2141 #                         r1.wb.cyc <= '0';
2142 #                         r1.wb.stb <= '0';
2143                             sync += r1.forward_sel.eq(1)
2144                             sync += r1.use_forward1.eq(1)
2145                             sync += r1.wb.cyc.eq(0)
2146                             sync += r1.wb.stb.eq(0)
2147 #                   end if;
2148 #                 end case;
2149 #           end if;
2150 #       end if;
2151 #     end process;
2152
2153 #     dc_log: if LOG_LENGTH > 0 generate
2154 # TODO learn how to tranlate vhdl generate into nmigen
2155     def dcache_log(self, r1, valid_ra, tlb_hit_way, stall_out,
2156                    d_out, wb_in, log_out):
2157
2158         comb = m.d.comb
2159         sync = m.d.sync
2160
2161 #         signal log_data : std_ulogic_vector(19 downto 0);
2162         log_data = Signal(20)
2163
2164         comb += log_data
2165
2166 #     begin
2167 #         dcache_log: process(clk)
2168 #         begin
2169 #             if rising_edge(clk) then
2170 #                 log_data <= r1.wb.adr(5 downto 3) &
2171 #                             wishbone_in.stall &
2172 #                             wishbone_in.ack &
2173 #                             r1.wb.stb & r1.wb.cyc &
2174 #                             d_out.error &
2175 #                             d_out.valid &
2176 #                             std_ulogic_vector(
2177 #                              to_unsigned(op_t'pos(req_op), 3)) &
2178 #                             stall_out &
2179 #                             std_ulogic_vector(
2180 #                              to_unsigned(tlb_hit_way, 3)) &
2181 #                             valid_ra &
2182 #                             std_ulogic_vector(
2183 #                              to_unsigned(state_t'pos(r1.state), 3));
2184         sync += log_data.eq(Cat(
2185                  Const(r1.state, 3), valid_ra, Const(tlb_hit_way, 3),
2186                  stall_out, Const(req_op, 3), d_out.valid, d_out.error,
2187                  r1.wb.cyc, r1.wb.stb, wb_in.ack, wb_in.stall,
2188                  r1.wb.adr[3:6]
2189                 ))
2190 #             end if;
2191 #         end process;
2192 #         log_out <= log_data;
2193     # TODO ??? I am very confused need help
2194     comb += log_out.eq(log_data)
2195 #     end generate;
2196 # end;
2197
2198     def elaborate(self, platform):
2199         LINE_SIZE    = self.LINE_SIZE
2200         NUM_LINES    = self.NUM_LINES
2201         NUM_WAYS     = self.NUM_WAYS
2202         TLB_SET_SIZE = self.TLB_SET_SIZE
2203         TLB_NUM_WAYS = self.TLB_NUM_WAYS
2204         TLB_LG_PGSZ  = self.TLB_LG_PGSZ
2205         LOG_LENGTH   = self.LOG_LENGTH
2206
2207         # BRAM organisation: We never access more than
2208         #     -- wishbone_data_bits at a time so to save
2209         #     -- resources we make the array only that wide, and
2210         #     -- use consecutive indices for to make a cache "line"
2211         #     --
2212         #     -- ROW_SIZE is the width in bytes of the BRAM
2213         #     -- (based on WB, so 64-bits)
2214         ROW_SIZE = WB_DATA_BITS / 8;
2215
2216         # ROW_PER_LINE is the number of row (wishbone
2217         # transactions) in a line
2218         ROW_PER_LINE = LINE_SIZE // ROW_SIZE
2219
2220         # BRAM_ROWS is the number of rows in BRAM needed
2221         # to represent the full dcache
2222         BRAM_ROWS = NUM_LINES * ROW_PER_LINE
2223
2224
2225         # Bit fields counts in the address
2226
2227         # REAL_ADDR_BITS is the number of real address
2228         # bits that we store
2229         REAL_ADDR_BITS = 56
2230
2231         # ROW_BITS is the number of bits to select a row
2232         ROW_BITS = log2_int(BRAM_ROWS)
2233
2234         # ROW_LINE_BITS is the number of bits to select
2235         # a row within a line
2236         ROW_LINE_BITS = log2_int(ROW_PER_LINE)
2237
2238         # LINE_OFF_BITS is the number of bits for
2239         # the offset in a cache line
2240         LINE_OFF_BITS = log2_int(LINE_SIZE)
2241
2242         # ROW_OFF_BITS is the number of bits for
2243         # the offset in a row
2244         ROW_OFF_BITS = log2_int(ROW_SIZE)
2245
2246         # INDEX_BITS is the number if bits to
2247         # select a cache line
2248         INDEX_BITS = log2_int(NUM_LINES)
2249
2250         # SET_SIZE_BITS is the log base 2 of the set size
2251         SET_SIZE_BITS = LINE_OFF_BITS + INDEX_BITS
2252
2253         # TAG_BITS is the number of bits of
2254         # the tag part of the address
2255         TAG_BITS = REAL_ADDR_BITS - SET_SIZE_BITS
2256
2257         # TAG_WIDTH is the width in bits of each way of the tag RAM
2258         TAG_WIDTH = TAG_BITS + 7 - ((TAG_BITS + 7) % 8)
2259
2260         # WAY_BITS is the number of bits to select a way
2261         WAY_BITS = log2_int(NUM_WAYS)
2262
2263         # Example of layout for 32 lines of 64 bytes:
2264         #
2265         # ..  tag    |index|  line  |
2266         # ..         |   row   |    |
2267         # ..         |     |---|    | ROW_LINE_BITS  (3)
2268         # ..         |     |--- - --| LINE_OFF_BITS (6)
2269         # ..         |         |- --| ROW_OFF_BITS  (3)
2270         # ..         |----- ---|    | ROW_BITS      (8)
2271         # ..         |-----|        | INDEX_BITS    (5)
2272         # .. --------|              | TAG_BITS      (45)
2273
2274         TAG_RAM_WIDTH = TAG_WIDTH * NUM_WAYS
2275
2276         def CacheTagArray():
2277             return Array(CacheTagSet() for x in range(NUM_LINES))
2278
2279         def CacheValidBitsArray():
2280             return Array(CacheWayValidBits() for x in range(NUM_LINES))
2281
2282         def RowPerLineValidArray():
2283             return Array(Signal() for x in range(ROW_PER_LINE))
2284
2285         # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
2286         cache_tags       = CacheTagArray()
2287         cache_tag_set    = Signal(TAG_RAM_WIDTH)
2288         cache_valid_bits = CacheValidBitsArray()
2289
2290         # TODO attribute ram_style : string;
2291         # TODO attribute ram_style of cache_tags : signal is "distributed";
2292
2293         # L1 TLB
2294         TLB_SET_BITS     = log2_int(TLB_SET_SIZE)
2295         TLB_WAY_BITS     = log2_int(TLB_NUM_WAYS)
2296         TLB_EA_TAG_BITS  = 64 - (TLB_LG_PGSZ + TLB_SET_BITS)
2297         TLB_TAG_WAY_BITS = TLB_NUM_WAYS * TLB_EA_TAG_BITS
2298         TLB_PTE_BITS     = 64
2299         TLB_PTE_WAY_BITS = TLB_NUM_WAYS * TLB_PTE_BITS;
2300
2301         def TLBValidBitsArray():
2302             return Array(
2303              Signal(TLB_NUM_WAYS) for x in range(TLB_SET_SIZE)
2304             )
2305
2306         def TLBTagsArray():
2307             return Array(
2308              Signal(TLB_TAG_WAY_BITS) for x in range (TLB_SET_SIZE)
2309             )
2310
2311         def TLBPtesArray():
2312             return Array(
2313              Signal(TLB_PTE_WAY_BITS) for x in range(TLB_SET_SIZE)
2314             )
2315
2316         def HitWaySet():
2317             return Array(Signal(NUM_WAYS) for x in range(TLB_NUM_WAYS))
2318
2319 """note: these are passed to nmigen.hdl.Memory as "attributes".
2320    don't know how, just that they are.
2321 """
2322         dtlb_valid_bits = TLBValidBitsArray()
2323         dtlb_tags       = TLBTagsArray()
2324         dtlb_ptes       = TLBPtesArray()
2325         # TODO attribute ram_style of
2326         #  dtlb_tags : signal is "distributed";
2327         # TODO attribute ram_style of
2328         #  dtlb_ptes : signal is "distributed";
2329
2330         r0      = RegStage0()
2331         r0_full = Signal()
2332
2333         r1 = RegStage1()
2334
2335         reservation = Reservation()
2336
2337         # Async signals on incoming request
2338         req_index    = Signal(NUM_LINES)
2339         req_row      = Signal(BRAM_ROWS)
2340         req_hit_way  = Signal(WAY_BITS)
2341         req_tag      = Signal(TAG_BITS)
2342         req_op       = Op()
2343         req_data     = Signal(64)
2344         req_same_tag = Signal()
2345         req_go       = Signal()
2346
2347         early_req_row     = Signal(BRAM_ROWS)
2348
2349         cancel_store      = Signal()
2350         set_rsrv          = Signal()
2351         clear_rsrv        = Signal()
2352
2353         r0_valid          = Signal()
2354         r0_stall          = Signal()
2355
2356         use_forward1_next = Signal()
2357         use_forward2_next = Signal()
2358
2359         # Cache RAM interface
2360         def CacheRamOut():
2361             return Array(Signal(WB_DATA_BITS) for x in range(NUM_WAYS))
2362
2363         cache_out         = CacheRamOut()
2364
2365         # PLRU output interface
2366         def PLRUOut():
2367             return Array(Signal(WAY_BITS) for x in range(Index()))
2368
2369         plru_victim       = PLRUOut()
2370         replace_way       = Signal(WAY_BITS)
2371
2372         # Wishbone read/write/cache write formatting signals
2373         bus_sel           = Signal(8)
2374
2375         # TLB signals
2376         tlb_tag_way   = Signal(TLB_TAG_WAY_BITS)
2377         tlb_pte_way   = Signal(TLB_PTE_WAY_BITS)
2378         tlb_valid_way = Signal(TLB_NUM_WAYS)
2379         tlb_req_index = Signal(TLB_SET_SIZE)
2380         tlb_hit       = Signal()
2381         tlb_hit_way   = Signal(TLB_NUM_WAYS)
2382         pte           = Signal(TLB_PTE_BITS)
2383         ra            = Signal(REAL_ADDR_BITS)
2384         valid_ra      = Signal()
2385         perm_attr     = PermAttr()
2386         rc_ok         = Signal()
2387         perm_ok       = Signal()
2388         access_ok     = Signal()
2389
2390         # TLB PLRU output interface
2391         def TLBPLRUOut():
2392             return Array(
2393                 Signal(TLB_WAY_BITS) for x in range(TLB_SET_SIZE)
2394             )
2395
2396         tlb_plru_victim = TLBPLRUOut()
2397
2398         # Helper functions to decode incoming requests
2399         #
2400         # Return the cache line index (tag index) for an address
2401         def get_index(addr):
2402             return addr[LINE_OFF_BITS:SET_SIZE_BITS]
2403
2404         # Return the cache row index (data memory) for an address
2405         def get_row(addr):
2406             return addr[ROW_OFF_BITS:SET_SIZE_BITS]
2407
2408         # Return the index of a row within a line
2409         def get_row_of_line(row):
2410             row_v = Signal(ROW_BITS)
2411             row_v = Signal(row)
2412             return row_v[0:ROW_LINE_BITS]
2413
2414         # Returns whether this is the last row of a line
2415         def is_last_row_addr(addr, last):
2416             return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
2417
2418         # Returns whether this is the last row of a line
2419         def is_last_row(row, last):
2420             return get_row_of_line(row) == last
2421
2422         # Return the address of the next row in the current cache line
2423         def next_row_addr(addr):
2424             row_idx = Signal(ROW_LINE_BITS)
2425             result  = WBAddrType()
2426             # Is there no simpler way in VHDL to
2427             # generate that 3 bits adder ?
2428             row_idx = addr[ROW_OFF_BITS:LINE_OFF_BITS]
2429             row_idx = Signal(row_idx + 1)
2430             result = addr
2431             result[ROW_OFF_BITS:LINE_OFF_BITS] = row_idx
2432             return result
2433
2434         # Return the next row in the current cache line. We use a
2435         # dedicated function in order to limit the size of the
2436         # generated adder to be only the bits within a cache line
2437         # (3 bits with default settings)
2438         def next_row(row)
2439             row_v   = Signal(ROW_BITS)
2440             row_idx = Signal(ROW_LINE_BITS)
2441             result  = Signal(ROW_BITS)
2442
2443             row_v = Signal(row)
2444             row_idx = row_v[ROW_LINE_BITS]
2445             row_v[0:ROW_LINE_BITS] = Signal(row_idx + 1)
2446             return row_v
2447
2448         # Get the tag value from the address
2449         def get_tag(addr):
2450             return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
2451
2452         # Read a tag from a tag memory row
2453         def read_tag(way, tagset):
2454             return tagset[way *TAG_WIDTH:way * TAG_WIDTH + TAG_BITS]
2455
2456         # Read a TLB tag from a TLB tag memory row
2457         def read_tlb_tag(way, tags):
2458             j = Signal()
2459
2460             j = way * TLB_EA_TAG_BITS
2461             return tags[j:j + TLB_EA_TAG_BITS]
2462
2463         # Write a TLB tag to a TLB tag memory row
2464         def write_tlb_tag(way, tags), tag):
2465             j = Signal()
2466
2467             j = way * TLB_EA_TAG_BITS
2468             tags[j:j + TLB_EA_TAG_BITS] = tag
2469
2470         # Read a PTE from a TLB PTE memory row
2471         def read_tlb_pte(way, ptes):
2472             j = Signal()
2473
2474             j = way * TLB_PTE_BITS
2475             return ptes[j:j + TLB_PTE_BITS]
2476
2477         def write_tlb_pte(way, ptes,newpte):
2478             j = Signal()
2479
2480             j = way * TLB_PTE_BITS
2481             return ptes[j:j + TLB_PTE_BITS] = newpte
2482
2483         assert (LINE_SIZE % ROW_SIZE) == 0 "LINE_SIZE not " \
2484          "multiple of ROW_SIZE"
2485
2486         assert (LINE_SIZE % 2) == 0 "LINE_SIZE not power of 2"
2487
2488         assert (NUM_LINES % 2) == 0 "NUM_LINES not power of 2"
2489
2490         assert (ROW_PER_LINE % 2) == 0 "ROW_PER_LINE not" \
2491          "power of 2"
2492
2493         assert ROW_BITS == (INDEX_BITS + ROW_LINE_BITS) \
2494          "geometry bits don't add up"
2495
2496         assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS) \
2497          "geometry bits don't add up"
2498
2499         assert REAL_ADDR_BITS == (TAG_BITS + INDEX_BITS \
2500          + LINE_OFF_BITS) "geometry bits don't add up"
2501
2502         assert REAL_ADDR_BITS == (TAG_BITS + ROW_BITS + ROW_OFF_BITS) \
2503          "geometry bits don't add up"
2504
2505         assert 64 == wishbone_data_bits "Can't yet handle a" \
2506          "wishbone width that isn't 64-bits"
2507
2508         assert SET_SIZE_BITS <= TLB_LG_PGSZ "Set indexed by" \
2509          "virtual address"
2510
2511         # we don't yet handle collisions between loadstore1 requests
2512         # and MMU requests
2513         comb += m_out.stall.eq(0)
2514
2515         # Hold off the request in r0 when r1 has an uncompleted request
2516         comb += r0_stall.eq(r0_full & r1.full)
2517         comb += r0_valid.eq(r0_full & ~r1.full)
2518         comb += stall_out.eq(r0_stall)
2519
2520         # Wire up wishbone request latch out of stage 1
2521         comb += wishbone_out.eq(r1.wb)
2522
2523
2524
2525 # dcache_tb.vhdl
2526 #
2527 # entity dcache_tb is
2528 # end dcache_tb;
2529 #
2530 # architecture behave of dcache_tb is
2531 #     signal clk          : std_ulogic;
2532 #     signal rst          : std_ulogic;
2533 #
2534 #     signal d_in         : Loadstore1ToDcacheType;
2535 #     signal d_out        : DcacheToLoadstore1Type;
2536 #
2537 #     signal m_in         : MmuToDcacheType;
2538 #     signal m_out        : DcacheToMmuType;
2539 #
2540 #     signal wb_bram_in   : wishbone_master_out;
2541 #     signal wb_bram_out  : wishbone_slave_out;
2542 #
2543 #     constant clk_period : time := 10 ns;
2544 # begin
2545 #     dcache0: entity work.dcache
2546 #         generic map(
2547 #
2548 #             LINE_SIZE => 64,
2549 #             NUM_LINES => 4
2550 #             )
2551 #         port map(
2552 #             clk => clk,
2553 #             rst => rst,
2554 #             d_in => d_in,
2555 #             d_out => d_out,
2556 #             m_in => m_in,
2557 #             m_out => m_out,
2558 #             wishbone_out => wb_bram_in,
2559 #             wishbone_in => wb_bram_out
2560 #             );
2561 #
2562 #     -- BRAM Memory slave
2563 #     bram0: entity work.wishbone_bram_wrapper
2564 #         generic map(
2565 #             MEMORY_SIZE   => 1024,
2566 #             RAM_INIT_FILE => "icache_test.bin"
2567 #             )
2568 #         port map(
2569 #             clk => clk,
2570 #             rst => rst,
2571 #             wishbone_in => wb_bram_in,
2572 #             wishbone_out => wb_bram_out
2573 #             );
2574 #
2575 #     clk_process: process
2576 #     begin
2577 #         clk <= '0';
2578 #         wait for clk_period/2;
2579 #         clk <= '1';
2580 #         wait for clk_period/2;
2581 #     end process;
2582 #
2583 #     rst_process: process
2584 #     begin
2585 #         rst <= '1';
2586 #         wait for 2*clk_period;
2587 #         rst <= '0';
2588 #         wait;
2589 #     end process;
2590 #
2591 #     stim: process
2592 #     begin
2593 #     -- Clear stuff
2594 #     d_in.valid <= '0';
2595 #     d_in.load <= '0';
2596 #     d_in.nc <= '0';
2597 #     d_in.addr <= (others => '0');
2598 #     d_in.data <= (others => '0');
2599 #         m_in.valid <= '0';
2600 #         m_in.addr <= (others => '0');
2601 #         m_in.pte <= (others => '0');
2602 #
2603 #         wait for 4*clk_period;
2604 #     wait until rising_edge(clk);
2605 #
2606 #     -- Cacheable read of address 4
2607 #     d_in.load <= '1';
2608 #     d_in.nc <= '0';
2609 #         d_in.addr <= x"0000000000000004";
2610 #         d_in.valid <= '1';
2611 #     wait until rising_edge(clk);
2612 #         d_in.valid <= '0';
2613 #
2614 #     wait until rising_edge(clk) and d_out.valid = '1';
2615 #         assert d_out.data = x"0000000100000000"
2616 #         report "data @" & to_hstring(d_in.addr) &
2617 #         "=" & to_hstring(d_out.data) &
2618 #         " expected 0000000100000000"
2619 #         severity failure;
2620 # --      wait for clk_period;
2621 #
2622 #     -- Cacheable read of address 30
2623 #     d_in.load <= '1';
2624 #     d_in.nc <= '0';
2625 #         d_in.addr <= x"0000000000000030";
2626 #         d_in.valid <= '1';
2627 #     wait until rising_edge(clk);
2628 #         d_in.valid <= '0';
2629 #
2630 #     wait until rising_edge(clk) and d_out.valid = '1';
2631 #         assert d_out.data = x"0000000D0000000C"
2632 #         report "data @" & to_hstring(d_in.addr) &
2633 #         "=" & to_hstring(d_out.data) &
2634 #         " expected 0000000D0000000C"
2635 #         severity failure;
2636 #
2637 #     -- Non-cacheable read of address 100
2638 #     d_in.load <= '1';
2639 #     d_in.nc <= '1';
2640 #         d_in.addr <= x"0000000000000100";
2641 #         d_in.valid <= '1';
2642 #     wait until rising_edge(clk);
2643 #     d_in.valid <= '0';
2644 #     wait until rising_edge(clk) and d_out.valid = '1';
2645 #         assert d_out.data = x"0000004100000040"
2646 #         report "data @" & to_hstring(d_in.addr) &
2647 #         "=" & to_hstring(d_out.data) &
2648 #         " expected 0000004100000040"
2649 #         severity failure;
2650 #
2651 #     wait until rising_edge(clk);
2652 #     wait until rising_edge(clk);
2653 #     wait until rising_edge(clk);
2654 #     wait until rising_edge(clk);
2655 #
2656 #     std.env.finish;
2657 #     end process;
2658 # end;
2659 def dcache_sim(dut):
2660     # clear stuff
2661     yield dut.d_in.valid.eq(0)
2662     yield dut.d_in.load.eq(0)
2663     yield dut.d_in.nc.eq(0)
2664     yield dut.d_in.adrr.eq(0)
2665     yield dut.d_in.data.eq(0)
2666     yield dut.m_in.valid.eq(0)
2667     yield dut.m_in.addr.eq(0)
2668     yield dut.m_in.pte.eq(0)
2669     # wait 4 * clk_period
2670     yield
2671     yield
2672     yield
2673     yield
2674     # wait_until rising_edge(clk)
2675     yield
2676     # Cacheable read of address 4
2677     yield dut.d_in.load.eq(1)
2678     yield dut.d_in.nc.eq(0)
2679     yield dut.d_in.addr.eq(Const(0x0000000000000004, 64))
2680     yield dut.d_in.valid.eq(1)
2681     # wait-until rising_edge(clk)
2682     yield
2683     yield dut.d_in.valid.eq(0)
2684     yield
2685     while not (yield dut.d_out.valid):
2686         yield
2687     assert dut.d_out.data == Const(0x0000000100000000, 64) f"data @" \
2688         f"{dut.d_in.addr}={dut.d_in.data} expected 0000000100000000" \
2689         " -!- severity failure"
2690
2691
2692     # Cacheable read of address 30
2693     yield dut.d_in.load.eq(1)
2694     yield dut.d_in.nc.eq(0)
2695     yield dut.d_in.addr.eq(Const(0x0000000000000030, 64))
2696     yield dut.d_in.valid.eq(1)
2697     yield
2698     yield dut.d_in.valid.eq(0)
2699     yield
2700     while not (yield dut.d_out.valid):
2701         yield
2702     assert dut.d_out.data == Const(0x0000000D0000000C, 64) f"data @" \
2703         f"{dut.d_in.addr}={dut.d_out.data} expected 0000000D0000000C" \
2704         f"-!- severity failure"
2705
2706     # Non-cacheable read of address 100
2707     yield dut.d_in.load.eq(1)
2708     yield dut.d_in.nc.eq(1)
2709     yield dut.d_in.addr.eq(Const(0x0000000000000100, 64))
2710     yield dut.d_in.valid.eq(1)
2711     yield
2712     yield dut.d_in.valid.eq(0)
2713     yield
2714     while not (yield dut.d_out.valid):
2715         yield
2716     assert dut.d_out.data == Const(0x0000004100000040, 64) f"data @" \
2717         f"{dut.d_in.addr}={dut.d_out.data} expected 0000004100000040" \
2718         f"-!- severity failure"
2719
2720     yield
2721     yield
2722     yield
2723     yield
2724
2725
2726 def test_dcache():
2727     dut = Dcache()
2728     vl = rtlil.convert(dut, ports=[])
2729     with open("test_dcache.il", "w") as f:
2730         f.write(vl)
2731
2732     run_simulation(dut, dcache_sim(), vcd_name='test_dcache.vcd')
2733
2734 if __name__ == '__main__':
2735     test_dcache()
2736