src/soc/experiment/dcache.py

   1 """Dcache
   2
   3 based on Anton Blanchard microwatt dcache.vhdl
   4
   5 """
   6
   7 from enum import Enum, unique
   8
   9 from nmigen import Module, Signal, Elaboratable,
  10                    Cat, Repl
  11 from nmigen.cli import main
  12 from nmigen.iocontrol import RecordObject
  13 from nmigen.util import log2_int
  14
  15 from experiment.mem_types import LoadStore1ToDcacheType,
  16                                  DcacheToLoadStore1Type,
  17                                  MmuToDcacheType,
  18                                  DcacheToMmuType
  19
  20 from experiment.wb_types import WB_ADDR_BITS, WB_DATA_BITS, WB_SEL_BITS,
  21                                 WBAddrType, WBDataType, WBSelType,
  22                                 WbMasterOut, WBSlaveOut, WBMasterOutVector,
  23                                 WBSlaveOutVector, WBIOMasterOut,
  24                                 WBIOSlaveOut
  25
  26 # --
  27 # -- Set associative dcache write-through
  28 # --
  29 # -- TODO (in no specific order):
  30 # --
  31 # -- * See list in icache.vhdl
  32 # -- * Complete load misses on the cycle when WB data comes instead of
  33 # --   at the end of line (this requires dealing with requests coming in
  34 # --   while not idle...)
  35 # --
  36 # library ieee;
  37 # use ieee.std_logic_1164.all;
  38 # use ieee.numeric_std.all;
  39 #
  40 # library work;
  41 # use work.utils.all;
  42 # use work.common.all;
  43 # use work.helpers.all;
  44 # use work.wishbone_types.all;
  45 #
  46 # entity dcache is
  47 class Dcache(Elaboratable):
  48 #     generic (
  49 #         -- Line size in bytes
  50 #         LINE_SIZE : positive := 64;
  51 #         -- Number of lines in a set
  52 #         NUM_LINES : positive := 32;
  53 #         -- Number of ways
  54 #         NUM_WAYS  : positive := 4;
  55 #         -- L1 DTLB entries per set
  56 #         TLB_SET_SIZE : positive := 64;
  57 #         -- L1 DTLB number of sets
  58 #         TLB_NUM_WAYS : positive := 2;
  59 #         -- L1 DTLB log_2(page_size)
  60 #         TLB_LG_PGSZ : positive := 12;
  61 #         -- Non-zero to enable log data collection
  62 #         LOG_LENGTH : natural := 0
  63 #         );
  64     def __init__(self):
  65         # Line size in bytes
  66         self.LINE_SIZE = 64
  67         # Number of lines in a set
  68         self.NUM_LINES = 32
  69         # Number of ways
  70         self.NUM_WAYS = 4
  71         # L1 DTLB entries per set
  72         self.TLB_SET_SIZE = 64
  73         # L1 DTLB number of sets
  74         self.TLB_NUM_WAYS = 2
  75         # L1 DTLB log_2(page_size)
  76         self.TLB_LG_PGSZ = 12
  77         # Non-zero to enable log data collection
  78         self.LOG_LENGTH = 0
  79 #     port (
  80 #         clk          : in std_ulogic;
  81 #         rst          : in std_ulogic;
  82 #
  83 #         d_in         : in Loadstore1ToDcacheType;
  84 #         d_out        : out DcacheToLoadstore1Type;
  85 #
  86 #         m_in         : in MmuToDcacheType;
  87 #         m_out        : out DcacheToMmuType;
  88 #
  89 #       stall_out    : out std_ulogic;
  90 #
  91 #         wishbone_out : out wishbone_master_out;
  92 #         wishbone_in  : in wishbone_slave_out;
  93 #
  94 #         log_out      : out std_ulogic_vector(19 downto 0)
  95 #         );
  96         self.d_in      = LoadStore1ToDcacheType()
  97         self.d_out     = DcacheToLoadStore1Type()
  98
  99         self.m_in      = MmuToDcacheType()
 100         self.m_out     = DcacheToMmuType()
 101
 102         self.stall_out = Signal()
 103
 104         self.wb_out    = WBMasterOut()
 105         self.wb_in     = WBSlaveOut()
 106
 107         self.log_out   = Signal(20)
 108 # end entity dcache;
 109
 110 # architecture rtl of dcache is
 111     def elaborate(self, platform):
 112         LINE_SIZE    = self.LINE_SIZE
 113         NUM_LINES    = self.NUM_LINES
 114         NUM_WAYS     = self.NUM_WAYS
 115         TLB_SET_SIZE = self.TLB_SET_SIZE
 116         TLB_NUM_WAYS = self.TLB_NUM_WAYS
 117         TLB_LG_PGSZ  = self.TLB_LG_PGSZ
 118         LOG_LENGTH   = self.LOG_LENGTH
 119
 120 #     -- BRAM organisation: We never access more than
 121 #     -- wishbone_data_bits at a time so to save
 122 #     -- resources we make the array only that wide, and
 123 #     -- use consecutive indices for to make a cache "line"
 124 #     --
 125 #     -- ROW_SIZE is the width in bytes of the BRAM
 126 #     -- (based on WB, so 64-bits)
 127 #     constant ROW_SIZE : natural := wishbone_data_bits / 8;
 128         # BRAM organisation: We never access more than
 129         #     -- wishbone_data_bits at a time so to save
 130         #     -- resources we make the array only that wide, and
 131         #     -- use consecutive indices for to make a cache "line"
 132         #     --
 133         #     -- ROW_SIZE is the width in bytes of the BRAM
 134         #     -- (based on WB, so 64-bits)
 135         ROW_SIZE = WB_DATA_BITS / 8;
 136
 137 #     -- ROW_PER_LINE is the number of row (wishbone
 138 #     -- transactions) in a line
 139 #     constant ROW_PER_LINE  : natural := LINE_SIZE / ROW_SIZE;
 140 #     -- BRAM_ROWS is the number of rows in BRAM needed
 141 #     -- to represent the full dcache
 142 #     constant BRAM_ROWS : natural := NUM_LINES * ROW_PER_LINE;
 143         # ROW_PER_LINE is the number of row (wishbone
 144         # transactions) in a line
 145         ROW_PER_LINE = LINE_SIZE / ROW_SIZE
 146         # BRAM_ROWS is the number of rows in BRAM needed
 147         # to represent the full dcache
 148         BRAM_ROWS = NUM_LINES * ROW_PER_LINE
 149
 150 #     -- Bit fields counts in the address
 151 #
 152 #     -- REAL_ADDR_BITS is the number of real address
 153 #     -- bits that we store
 154 #     constant REAL_ADDR_BITS : positive := 56;
 155 #     -- ROW_BITS is the number of bits to select a row
 156 #     constant ROW_BITS      : natural := log2(BRAM_ROWS);
 157 #     -- ROW_LINEBITS is the number of bits to select
 158 #     -- a row within a line
 159 #     constant ROW_LINEBITS  : natural := log2(ROW_PER_LINE);
 160 #     -- LINE_OFF_BITS is the number of bits for
 161 #     -- the offset in a cache line
 162 #     constant LINE_OFF_BITS : natural := log2(LINE_SIZE);
 163 #     -- ROW_OFF_BITS is the number of bits for
 164 #     -- the offset in a row
 165 #     constant ROW_OFF_BITS : natural := log2(ROW_SIZE);
 166 #     -- INDEX_BITS is the number if bits to
 167 #     -- select a cache line
 168 #     constant INDEX_BITS : natural := log2(NUM_LINES);
 169 #     -- SET_SIZE_BITS is the log base 2 of the set size
 170 #     constant SET_SIZE_BITS : natural := LINE_OFF_BITS
 171 #                                         + INDEX_BITS;
 172 #     -- TAG_BITS is the number of bits of
 173 #     -- the tag part of the address
 174 #     constant TAG_BITS : natural := REAL_ADDR_BITS - SET_SIZE_BITS;
 175 #     -- TAG_WIDTH is the width in bits of each way of the tag RAM
 176 #     constant TAG_WIDTH : natural := TAG_BITS + 7
 177 #                                     - ((TAG_BITS + 7) mod 8);
 178 #     -- WAY_BITS is the number of bits to select a way
 179 #     constant WAY_BITS : natural := log2(NUM_WAYS);
 180         # Bit fields counts in the address
 181
 182         # REAL_ADDR_BITS is the number of real address
 183         # bits that we store
 184         REAL_ADDR_BITS = 56
 185         # ROW_BITS is the number of bits to select a row
 186         ROW_BITS = log2_int(BRAM_ROWS)
 187         # ROW_LINE_BITS is the number of bits to select
 188         # a row within a line
 189         ROW_LINE_BITS = log2_int(ROW_PER_LINE)
 190         # LINE_OFF_BITS is the number of bits for
 191         # the offset in a cache line
 192         LINE_OFF_BITS = log2_int(LINE_SIZE)
 193         # ROW_OFF_BITS is the number of bits for
 194         # the offset in a row
 195         ROW_OFF_BITS = log2_int(ROW_SIZE)
 196         # INDEX_BITS is the number if bits to
 197         # select a cache line
 198         INDEX_BITS = log2_int(NUM_LINES)
 199         # SET_SIZE_BITS is the log base 2 of the set size
 200         SET_SIZE_BITS = LINE_OFF_BITS + INDEX_BITS
 201         # TAG_BITS is the number of bits of
 202         # the tag part of the address
 203         TAG_BITS = REAL_ADDR_BITS - SET_SIZE_BITS
 204         # TAG_WIDTH is the width in bits of each way of the tag RAM
 205         TAG_WIDTH = TAG_BITS + 7 - ((TAG_BITS + 7) % 8)
 206         # WAY_BITS is the number of bits to select a way
 207         WAY_BITS = log2_int(NUM_WAYS)
 208
 209 #     -- Example of layout for 32 lines of 64 bytes:
 210 #     --
 211 #     -- ..  tag    |index|  line  |
 212 #     -- ..         |   row   |    |
 213 #     -- ..         |     |---|    | ROW_LINEBITS  (3)
 214 #     -- ..         |     |--- - --| LINE_OFF_BITS (6)
 215 #     -- ..         |         |- --| ROW_OFF_BITS  (3)
 216 #     -- ..         |----- ---|    | ROW_BITS      (8)
 217 #     -- ..         |-----|        | INDEX_BITS    (5)
 218 #     -- .. --------|              | TAG_BITS      (45)
 219         # Example of layout for 32 lines of 64 bytes:
 220         #
 221         # ..  tag    |index|  line  |
 222         # ..         |   row   |    |
 223         # ..         |     |---|    | ROW_LINE_BITS  (3)
 224         # ..         |     |--- - --| LINE_OFF_BITS (6)
 225         # ..         |         |- --| ROW_OFF_BITS  (3)
 226         # ..         |----- ---|    | ROW_BITS      (8)
 227         # ..         |-----|        | INDEX_BITS    (5)
 228         # .. --------|              | TAG_BITS      (45)
 229
 230
 231 #     subtype row_t is integer range 0 to BRAM_ROWS-1;
 232 #     subtype index_t is integer range 0 to NUM_LINES-1;
 233 #     subtype way_t is integer range 0 to NUM_WAYS-1;
 234 #     subtype row_in_line_t is unsigned(ROW_LINE_BITS-1 downto 0);
 235         ROW         = BRAM_ROWS
 236         INDEX       = NUM_LINES
 237         WAY         = NUM_WAYS
 238         ROW_IN_LINE = ROW_LINE_BITS
 239
 240 #     -- The cache data BRAM organized as described above for each way
 241 #     subtype cache_row_t is
 242 #      std_ulogic_vector(wishbone_data_bits-1 downto 0);
 243         # The cache data BRAM organized as described above for each way
 244         CACHE_ROW   = WB_DATA_BITS
 245
 246 #     -- The cache tags LUTRAM has a row per set.
 247 #     -- Vivado is a pain and will not handle a
 248 #     -- clean (commented) definition of the cache
 249 #     -- tags as a 3d memory. For now, work around
 250 #     -- it by putting all the tags
 251 #     subtype cache_tag_t is std_logic_vector(TAG_BITS-1 downto 0);
 252         # The cache tags LUTRAM has a row per set.
 253         # Vivado is a pain and will not handle a
 254         # clean (commented) definition of the cache
 255         # tags as a 3d memory. For now, work around
 256         # it by putting all the tags
 257         CACHE_TAG   = TAG_BITS
 258
 259 #     -- type cache_tags_set_t is array(way_t) of cache_tag_t;
 260 #     -- type cache_tags_array_t is array(index_t) of cache_tags_set_t;
 261 #     constant TAG_RAM_WIDTH : natural := TAG_WIDTH * NUM_WAYS;
 262 #     subtype cache_tags_set_t is
 263 #      std_logic_vector(TAG_RAM_WIDTH-1 downto 0);
 264 #     type cache_tags_array_t is array(index_t) of cache_tags_set_t;
 265         # type cache_tags_set_t is array(way_t) of cache_tag_t;
 266         # type cache_tags_array_t is array(index_t) of cache_tags_set_t;
 267         TAG_RAM_WIDTH = TAG_WIDTH * NUM_WAYS
 268
 269         CACHE_TAG_SET = TAG_RAM_WIDTH
 270
 271         def CacheTagArray():
 272             return Array(CacheTagSet() for x in range(INDEX))
 273
 274 #     -- The cache valid bits
 275 #     subtype cache_way_valids_t is
 276 #      std_ulogic_vector(NUM_WAYS-1 downto 0);
 277 #     type cache_valids_t is array(index_t) of cache_way_valids_t;
 278 #     type row_per_line_valid_t is
 279 #      array(0 to ROW_PER_LINE - 1) of std_ulogic;
 280         # The cache valid bits
 281         CACHE_WAY_VALID_BITS = NUM_WAYS
 282
 283         def CacheValidBitsArray():
 284             return Array(CacheWayValidBits() for x in range(INDEX))
 285
 286         def RowPerLineValidArray():
 287             return Array(Signal() for x in range(ROW_PER_LINE))
 288
 289 #     -- Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
 290 #     signal cache_tags    : cache_tags_array_t;
 291 #     signal cache_tag_set : cache_tags_set_t;
 292 #     signal cache_valids  : cache_valids_t;
 293 #
 294 #     attribute ram_style : string;
 295 #     attribute ram_style of cache_tags : signal is "distributed";
 296         # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
 297         cache_tags       = CacheTagArray()
 298         cache_tag_set    = Signal(CACHE_TAG_SET)
 299         cache_valid_bits = CacheValidBitsArray()
 300
 301         # TODO attribute ram_style : string;
 302         # TODO attribute ram_style of cache_tags : signal is "distributed";
 303
 304 #     -- L1 TLB.
 305 #     constant TLB_SET_BITS : natural := log2(TLB_SET_SIZE);
 306 #     constant TLB_WAY_BITS : natural := log2(TLB_NUM_WAYS);
 307 #     constant TLB_EA_TAG_BITS : natural :=
 308 #      64 - (TLB_LG_PGSZ + TLB_SET_BITS);
 309 #     constant TLB_TAG_WAY_BITS : natural :=
 310 #      TLB_NUM_WAYS * TLB_EA_TAG_BITS;
 311 #     constant TLB_PTE_BITS : natural := 64;
 312 #     constant TLB_PTE_WAY_BITS : natural :=
 313 #      TLB_NUM_WAYS * TLB_PTE_BITS;
 314         # L1 TLB
 315         TLB_SET_BITS     = log2_int(TLB_SET_SIZE)
 316         TLB_WAY_BITS     = log2_int(TLB_NUM_WAYS)
 317         TLB_EA_TAG_BITS  = 64 - (TLB_LG_PGSZ + TLB_SET_BITS)
 318         TLB_TAG_WAY_BITS = TLB_NUM_WAYS * TLB_EA_TAG_BITS
 319         TLB_PTE_BITS     = 64
 320         TLB_PTE_WAY_BITS = TLB_NUM_WAYS * TLB_PTE_BITS;
 321
 322 #     subtype tlb_way_t is integer range 0 to TLB_NUM_WAYS - 1;
 323 #     subtype tlb_index_t is integer range 0 to TLB_SET_SIZE - 1;
 324 #     subtype tlb_way_valids_t is
 325 #      std_ulogic_vector(TLB_NUM_WAYS-1 downto 0);
 326 #     type tlb_valids_t is
 327 #      array(tlb_index_t) of tlb_way_valids_t;
 328 #     subtype tlb_tag_t is
 329 #      std_ulogic_vector(TLB_EA_TAG_BITS - 1 downto 0);
 330 #     subtype tlb_way_tags_t is
 331 #      std_ulogic_vector(TLB_TAG_WAY_BITS-1 downto 0);
 332 #     type tlb_tags_t is
 333 #      array(tlb_index_t) of tlb_way_tags_t;
 334 #     subtype tlb_pte_t is
 335 #      std_ulogic_vector(TLB_PTE_BITS - 1 downto 0);
 336 #     subtype tlb_way_ptes_t is
 337 #      std_ulogic_vector(TLB_PTE_WAY_BITS-1 downto 0);
 338 #     type tlb_ptes_t is array(tlb_index_t) of tlb_way_ptes_t;
 339 #     type hit_way_set_t is array(tlb_way_t) of way_t;
 340         TLB_WAY = TLB_NUM_WAYS
 341
 342         TLB_INDEX = TLB_SET_SIZE
 343
 344         TLB_WAY_VALID_BITS = TLB_NUM_WAYS
 345
 346         def TLBValidBitsArray():
 347             return Array(
 348              Signal(TLB_WAY_VALID_BITS) for x in range(TLB_SET_SIZE)
 349             )
 350
 351         TLB_TAG = TLB_EA_TAG_BITS
 352
 353         TLB_WAY_TAGS = TLB_TAG_WAY_BITS
 354
 355         def TLBTagsArray():
 356             return Array(
 357              Signal(TLB_WAY_TAGS) for x in range (TLB_SET_SIZE)
 358             )
 359
 360         TLB_PTE = TLB_PTE_BITS
 361
 362         TLB_WAY_PTES = TLB_PTE_WAY_BITS
 363
 364         def TLBPtesArray():
 365             return Array(
 366              Signal(TLB_WAY_PTES) for x in range(TLB_SET_SIZE)
 367             )
 368
 369         def HitWaySet():
 370             return Array(Signal(WAY) for x in range(TLB_NUM_WAYS))
 371
 372 #     signal dtlb_valids : tlb_valids_t;
 373 #     signal dtlb_tags : tlb_tags_t;
 374 #     signal dtlb_ptes : tlb_ptes_t;
 375
 376 """note: these are passed to nmigen.hdl.Memory as "attributes".  don't
 377    know how, just that they are.
 378 """
 379 #     attribute ram_style of dtlb_tags : signal is "distributed";
 380 #     attribute ram_style of dtlb_ptes : signal is "distributed";
 381         dtlb_valids = TLBValidBitsArray()
 382         dtlb_tags   = TLBTagsArray()
 383         dtlb_ptes   = TLBPtesArray()
 384         # TODO attribute ram_style of dtlb_tags : signal is "distributed";
 385         # TODO attribute ram_style of dtlb_ptes : signal is "distributed";
 386
 387
 388 #     -- Record for storing permission, attribute, etc. bits from a PTE
 389 #     type perm_attr_t is record
 390 #         reference : std_ulogic;
 391 #         changed   : std_ulogic;
 392 #         nocache   : std_ulogic;
 393 #         priv      : std_ulogic;
 394 #         rd_perm   : std_ulogic;
 395 #         wr_perm   : std_ulogic;
 396 #     end record;
 397         # Record for storing permission, attribute, etc. bits from a PTE
 398         class PermAttr(RecordObject):
 399             def __init__(self):
 400                 super().__init__()
 401                 self.reference = Signal()
 402                 self.changed   = Signal()
 403                 self.nocache   = Signal()
 404                 self.priv      = Signal()
 405                 self.rd_perm   = Signal()
 406                 self.wr_perm   = Signal()
 407
 408 #     function extract_perm_attr(
 409 #      pte : std_ulogic_vector(TLB_PTE_BITS - 1 downto 0))
 410 #      return perm_attr_t is
 411 #         variable pa : perm_attr_t;
 412 #     begin
 413 #         pa.reference := pte(8);
 414 #         pa.changed := pte(7);
 415 #         pa.nocache := pte(5);
 416 #         pa.priv := pte(3);
 417 #         pa.rd_perm := pte(2);
 418 #         pa.wr_perm := pte(1);
 419 #         return pa;
 420 #     end;
 421         def extract_perm_attr(pte=Signal(TLB_PTE_BITS)):
 422             pa = PermAttr()
 423             pa.reference = pte[8]
 424             pa.changed   = pte[7]
 425             pa.nocache   = pte[5]
 426             pa.priv      = pte[3]
 427             pa.rd_perm   = pte[2]
 428             pa.wr_perm   = pte[1]
 429             return pa;
 430
 431 #     constant real_mode_perm_attr : perm_attr_t :=
 432 #      (nocache => '0', others => '1');
 433         REAL_MODE_PERM_ATTR = PermAttr()
 434         REAL_MODE_PERM_ATTR.reference = 1
 435         REAL_MODE_PERM_ATTR.changed   = 1
 436         REAL_MODE_PERM_ATTR.priv      = 1
 437         REAL_MODE_PERM_ATTR.rd_perm   = 1
 438         REAL_MODE_PERM_ATTR.wr_perm   = 1
 439
 440 #     -- Type of operation on a "valid" input
 441 #     type op_t is
 442 #      (
 443 #       OP_NONE,
 444 #       OP_BAD,        -- NC cache hit, TLB miss, prot/RC failure
 445 #       OP_STCX_FAIL,  -- conditional store w/o reservation
 446 #       OP_LOAD_HIT,   -- Cache hit on load
 447 #       OP_LOAD_MISS,  -- Load missing cache
 448 #       OP_LOAD_NC,    -- Non-cachable load
 449 #       OP_STORE_HIT,  -- Store hitting cache
 450 #       OP_STORE_MISS  -- Store missing cache
 451 #      );
 452         # Type of operation on a "valid" input
 453         @unique
 454         class OP(Enum):
 455           OP_NONE       = 0
 456           OP_BAD        = 1 # NC cache hit, TLB miss, prot/RC failure
 457           OP_STCX_FAIL  = 2 # conditional store w/o reservation
 458           OP_LOAD_HIT   = 3 # Cache hit on load
 459           OP_LOAD_MISS  = 4 # Load missing cache
 460           OP_LOAD_NC    = 5 # Non-cachable load
 461           OP_STORE_HIT  = 6 # Store hitting cache
 462           OP_STORE_MISS = 7 # Store missing cache
 463
 464 #     -- Cache state machine
 465 #     type state_t is
 466 #      (
 467 #       IDLE,            -- Normal load hit processing
 468 #       RELOAD_WAIT_ACK, -- Cache reload wait ack
 469 #       STORE_WAIT_ACK,  -- Store wait ack
 470 #       NC_LOAD_WAIT_ACK -- Non-cachable load wait ack
 471 #      );
 472         # Cache state machine
 473         @unique
 474         class State(Enum):
 475             IDLE             = 0 # Normal load hit processing
 476             RELOAD_WAIT_ACK  = 1 # Cache reload wait ack
 477             STORE_WAIT_ACK   = 2 # Store wait ack
 478             NC_LOAD_WAIT_ACK = 3 # Non-cachable load wait ack
 479
 480 #     -- Dcache operations:
 481 #     --
 482 #     -- In order to make timing, we use the BRAMs with
 483 #     -- an output buffer, which means that the BRAM
 484 #     -- output is delayed by an extra cycle.
 485 #     --
 486 #     -- Thus, the dcache has a 2-stage internal pipeline
 487 #     -- for cache hits with no stalls.
 488 #     --
 489 #     -- All other operations are handled via stalling
 490 #     -- in the first stage.
 491 #     --
 492 #     -- The second stage can thus complete a hit at the same
 493 #     -- time as the first stage emits a stall for a complex op.
 494 #
 495 #     -- Stage 0 register, basically contains just the latched request
 496 #     type reg_stage_0_t is record
 497 #         req   : Loadstore1ToDcacheType;
 498 #         tlbie : std_ulogic;
 499 #         doall : std_ulogic;
 500 #         tlbld : std_ulogic;
 501 #         mmu_req : std_ulogic;   -- indicates source of request
 502 #     end record;
 503 # Dcache operations:
 504 #
 505 # In order to make timing, we use the BRAMs with
 506 # an output buffer, which means that the BRAM
 507 # output is delayed by an extra cycle.
 508 #
 509 # Thus, the dcache has a 2-stage internal pipeline
 510 # for cache hits with no stalls.
 511 #
 512 # All other operations are handled via stalling
 513 # in the first stage.
 514 #
 515 # The second stage can thus complete a hit at the same
 516 # time as the first stage emits a stall for a complex op.
 517 #
 518         # Stage 0 register, basically contains just the latched request
 519         class RegStage0(RecordObject):
 520             def __init__(self):
 521                 super().__init__()
 522                 self.req     = LoadStore1ToDcacheType()
 523                 self.tlbie   = Signal()
 524                 self.doall   = Signal()
 525                 self.tlbld   = Signal()
 526                 self.mmu_req = Signal() # indicates source of request
 527
 528 #     signal r0 : reg_stage_0_t;
 529 #     signal r0_full : std_ulogic;
 530         r0      = RegStage0()
 531         r0_full = Signal()
 532
 533 #     type mem_access_request_t is record
 534 #         op        : op_t;
 535 #         valid     : std_ulogic;
 536 #         dcbz      : std_ulogic;
 537 #         real_addr : std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0);
 538 #         data      : std_ulogic_vector(63 downto 0);
 539 #         byte_sel  : std_ulogic_vector(7 downto 0);
 540 #         hit_way   : way_t;
 541 #         same_tag  : std_ulogic;
 542 #         mmu_req   : std_ulogic;
 543 #     end record;
 544         class MemAccessRequest(RecordObject):
 545             def __init__(self):
 546                 super().__init__()
 547                 self.op        = Op()
 548                 self.valid     = Signal()
 549                 self.dcbz      = Signal()
 550                 self.real_addr = Signal(REAL_ADDR_BITS)
 551                 self.data      = Signal(64)
 552                 self.byte_sel  = Signal(8)
 553                 self.hit_way   = Signal(WAY)
 554                 self.same_tag  = Signal()
 555                 self.mmu_req   = Signal()
 556
 557 #     -- First stage register, contains state for stage 1 of load hits
 558 #     -- and for the state machine used by all other operations
 559 #     type reg_stage_1_t is record
 560 #         -- Info about the request
 561 #         full    : std_ulogic; -- have uncompleted request
 562 #         mmu_req : std_ulogic; -- request is from MMU
 563 #         req     : mem_access_request_t;
 564 #
 565 #         -- Cache hit state
 566 #         hit_way        : way_t;
 567 #         hit_load_valid : std_ulogic;
 568 #         hit_index      : index_t;
 569 #         cache_hit      : std_ulogic;
 570 #
 571 #         -- TLB hit state
 572 #         tlb_hit       : std_ulogic;
 573 #         tlb_hit_way   : tlb_way_t;
 574 #         tlb_hit_index : tlb_index_t;
 575 #
 576 #         -- 2-stage data buffer for data forwarded from writes to reads
 577 #         forward_data1  : std_ulogic_vector(63 downto 0);
 578 #         forward_data2  : std_ulogic_vector(63 downto 0);
 579 #         forward_sel1   : std_ulogic_vector(7 downto 0);
 580 #         forward_valid1 : std_ulogic;
 581 #         forward_way1   : way_t;
 582 #         forward_row1   : row_t;
 583 #         use_forward1   : std_ulogic;
 584 #         forward_sel    : std_ulogic_vector(7 downto 0);
 585 #
 586 #         -- Cache miss state (reload state machine)
 587 #         state            : state_t;
 588 #         dcbz             : std_ulogic;
 589 #         write_bram       : std_ulogic;
 590 #         write_tag        : std_ulogic;
 591 #         slow_valid       : std_ulogic;
 592 #         wb               : wishbone_master_out;
 593 #         reload_tag       : cache_tag_t;
 594 #         store_way        : way_t;
 595 #         store_row        : row_t;
 596 #         store_index      : index_t;
 597 #         end_row_ix       : row_in_line_t;
 598 #         rows_valid       : row_per_line_valid_t;
 599 #         acks_pending     : unsigned(2 downto 0);
 600 #         inc_acks         : std_ulogic;
 601 #         dec_acks         : std_ulogic;
 602 #
 603 #         -- Signals to complete (possibly with error)
 604 #         ls_valid         : std_ulogic;
 605 #         ls_error         : std_ulogic;
 606 #         mmu_done         : std_ulogic;
 607 #         mmu_error        : std_ulogic;
 608 #         cache_paradox    : std_ulogic;
 609 #
 610 #         -- Signal to complete a failed stcx.
 611 #         stcx_fail        : std_ulogic;
 612 #     end record;
 613 # First stage register, contains state for stage 1 of load hits
 614 # and for the state machine used by all other operations
 615         class RegStage1(RecordObject):
 616             def __init__(self):
 617                 super().__init__()
 618                 # Info about the request
 619                 self.full             = Signal() # have uncompleted request
 620                 self.mmu_req          = Signal() # request is from MMU
 621                 self.req              = MemAccessRequest()
 622
 623                 # Cache hit state
 624                 self.hit_way          = Signal(WAY)
 625                 self.hit_load_valid   = Signal()
 626                 self.hit_index        = Signal(INDEX)
 627                 self.cache_hit        = Signal()
 628
 629                 # TLB hit state
 630                 self.tlb_hit          = Signal()
 631                 self.tlb_hit_way      = Signal(TLB_WAY)
 632                 self.tlb_hit_index    = Signal(TLB_SET_SIZE)
 633                 self.
 634                 # 2-stage data buffer for data forwarded from writes to reads
 635                 self.forward_data1    = Signal(64)
 636                 self.forward_data2    = Signal(64)
 637                 self.forward_sel1     = Signal(8)
 638                 self.forward_valid1   = Signal()
 639                 self.forward_way1     = Signal(WAY)
 640                 self.forward_row1     = Signal(ROW)
 641                 self.use_forward1     = Signal()
 642                 self.forward_sel      = Signal(8)
 643
 644                 # Cache miss state (reload state machine)
 645                 self.state            = State()
 646                 self.dcbz             = Signal()
 647                 self.write_bram       = Signal()
 648                 self.write_tag        = Signal()
 649                 self.slow_valid       = Signal()
 650                 self.wb               = WishboneMasterOut()
 651                 self.reload_tag       = Signal(CACHE_TAG)
 652                 self.store_way        = Signal(WAY)
 653                 self.store_row        = Signal(ROW)
 654                 self.store_index      = Signal(INDEX)
 655                 self.end_row_ix       = Signal(ROW_IN_LINE)
 656                 self.rows_valid       = RowPerLineValidArray()
 657                 self.acks_pending     = Signal(3)
 658                 self.inc_acks         = Signal()
 659                 self.dec_acks         = Signal()
 660
 661                 # Signals to complete (possibly with error)
 662                 self.ls_valid         = Signal()
 663                 self.ls_error         = Signal()
 664                 self.mmu_done         = Signal()
 665                 self.mmu_error        = Signal()
 666                 self.cache_paradox    = Signal()
 667
 668                 # Signal to complete a failed stcx.
 669                 self.stcx_fail        = Signal()
 670
 671 #     signal r1 : reg_stage_1_t;
 672         r1 = RegStage1()
 673
 674 #     -- Reservation information
 675 #     --
 676 #     type reservation_t is record
 677 #         valid : std_ulogic;
 678 #         addr  : std_ulogic_vector(63 downto LINE_OFF_BITS);
 679 #     end record;
 680 # Reservation information
 681
 682         class Reservation(RecordObject):
 683             def __init__(self):
 684                 super().__init__()
 685                 valid = Signal()
 686                 # TODO LINE_OFF_BITS is 6
 687                 addr  = Signal(63 downto LINE_OFF_BITS)
 688
 689 #     signal reservation : reservation_t;
 690         reservation = Reservation()
 691
 692 #     -- Async signals on incoming request
 693 #     signal req_index   : index_t;
 694 #     signal req_row     : row_t;
 695 #     signal req_hit_way : way_t;
 696 #     signal req_tag     : cache_tag_t;
 697 #     signal req_op      : op_t;
 698 #     signal req_data    : std_ulogic_vector(63 downto 0);
 699 #     signal req_same_tag : std_ulogic;
 700 #     signal req_go      : std_ulogic;
 701         # Async signals on incoming request
 702         req_index    = Signal(INDEX)
 703         req_row      = Signal(ROW)
 704         req_hit_way  = Signal(WAY)
 705         req_tag      = Signal(CACHE_TAG)
 706         req_op       = Op()
 707         req_data     = Signal(64)
 708         req_same_tag = Signal()
 709         req_go       = Signal()
 710
 711 #     signal early_req_row  : row_t;
 712 #
 713 #     signal cancel_store : std_ulogic;
 714 #     signal set_rsrv     : std_ulogic;
 715 #     signal clear_rsrv   : std_ulogic;
 716 #
 717 #     signal r0_valid   : std_ulogic;
 718 #     signal r0_stall   : std_ulogic;
 719 #
 720 #     signal use_forward1_next : std_ulogic;
 721 #     signal use_forward2_next : std_ulogic;
 722         early_req_row  = Signal(ROW)
 723
 724         cancel_store = Signal()
 725         set_rsrv     = Signal()
 726         clear_rsrv   = Signal()
 727
 728         r0_valid   = Signal()
 729         r0_stall   = Signal()
 730
 731         use_forward1_next = Signal()
 732         use_forward2_next = Signal()
 733
 734 #     -- Cache RAM interface
 735 #     type cache_ram_out_t is array(way_t) of cache_row_t;
 736 #     signal cache_out   : cache_ram_out_t;
 737         # Cache RAM interface
 738         def CacheRamOut():
 739             return Array(Signal(CACHE_ROW) for x in range(NUM_WAYS))
 740
 741         cache_out = CacheRamOut()
 742
 743 #     -- PLRU output interface
 744 #     type plru_out_t is array(index_t) of
 745 #      std_ulogic_vector(WAY_BITS-1 downto 0);
 746 #     signal plru_victim : plru_out_t;
 747 #     signal replace_way : way_t;
 748         # PLRU output interface
 749         def PLRUOut():
 750             return Array(Signal(WAY_BITS) for x in range(Index()))
 751
 752         plru_victim = PLRUOut()
 753         replace_way = Signal(WAY)
 754
 755 #     -- Wishbone read/write/cache write formatting signals
 756 #     signal bus_sel     : std_ulogic_vector(7 downto 0);
 757         # Wishbone read/write/cache write formatting signals
 758         bus_sel = Signal(8)
 759
 760 #     -- TLB signals
 761 #     signal tlb_tag_way : tlb_way_tags_t;
 762 #     signal tlb_pte_way : tlb_way_ptes_t;
 763 #     signal tlb_valid_way : tlb_way_valids_t;
 764 #     signal tlb_req_index : tlb_index_t;
 765 #     signal tlb_hit : std_ulogic;
 766 #     signal tlb_hit_way : tlb_way_t;
 767 #     signal pte : tlb_pte_t;
 768 #     signal ra : std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0);
 769 #     signal valid_ra : std_ulogic;
 770 #     signal perm_attr : perm_attr_t;
 771 #     signal rc_ok : std_ulogic;
 772 #     signal perm_ok : std_ulogic;
 773 #     signal access_ok : std_ulogic;
 774         # TLB signals
 775         tlb_tag_way   = Signal(TLB_WAY_TAGS)
 776         tlb_pte_way   = Signal(TLB_WAY_PTES)
 777         tlb_valid_way = Signal(TLB_WAY_VALID_BITS)
 778         tlb_req_index = Signal(TLB_SET_SIZE)
 779         tlb_hit       = Signal()
 780         tlb_hit_way   = Signal(TLB_WAY)
 781         pte           = Signal(TLB_PTE)
 782         ra            = Signal(REAL_ADDR_BITS)
 783         valid_ra      = Signal()
 784         perm_attr     = PermAttr()
 785         rc_ok         = Signal()
 786         perm_ok       = Signal()
 787         access_ok     = Signal()
 788
 789 #     -- TLB PLRU output interface
 790 #     type tlb_plru_out_t is array(tlb_index_t) of
 791 #      std_ulogic_vector(TLB_WAY_BITS-1 downto 0);
 792 #     signal tlb_plru_victim : tlb_plru_out_t;
 793         # TLB PLRU output interface
 794         DEF TLBPLRUOut():
 795             return Array(Signal(TLB_WAY_BITS) for x in range(TLB_SET_SIZE))
 796
 797         tlb_plru_victim = TLBPLRUOut()
 798
 799 #     -- Helper functions to decode incoming requests
 800 #
 801 #     -- Return the cache line index (tag index) for an address
 802 #     function get_index(addr: std_ulogic_vector) return index_t is
 803 #     begin
 804 #         return to_integer(
 805 #          unsigned(addr(SET_SIZE_BITS - 1 downto LINE_OFF_BITS))
 806 #         );
 807 #     end;
 808 # Helper functions to decode incoming requests
 809 #
 810         # Return the cache line index (tag index) for an address
 811         def get_index(addr):
 812             return addr[LINE_OFF_BITS:SET_SIZE_BITS]
 813
 814 #     -- Return the cache row index (data memory) for an address
 815 #     function get_row(addr: std_ulogic_vector) return row_t is
 816 #     begin
 817 #         return to_integer(
 818 #          unsigned(addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS))
 819 #         );
 820 #     end;
 821         # Return the cache row index (data memory) for an address
 822         def get_row(addr):
 823             return addr[ROW_OFF_BITS:SET_SIZE_BITS]
 824
 825 #     -- Return the index of a row within a line
 826 #     function get_row_of_line(row: row_t) return row_in_line_t is
 827 #       variable row_v : unsigned(ROW_BITS-1 downto 0);
 828 #     begin
 829 #       row_v := to_unsigned(row, ROW_BITS);
 830 #         return row_v(ROW_LINEBITS-1 downto 0);
 831 #     end;
 832         # Return the index of a row within a line
 833         def get_row_of_line(row):
 834             row_v = Signal(ROW_BITS)
 835             row_v = Signal(row)
 836             return row_v[0:ROW_LINE_BITS]
 837
 838 #     -- Returns whether this is the last row of a line
 839 #     function is_last_row_addr(addr: wishbone_addr_type;
 840 #      last: row_in_line_t) return boolean is
 841 #     begin
 842 #       return
 843 #        unsigned(addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS)) = last;
 844 #     end;
 845         # Returns whether this is the last row of a line
 846         def is_last_row_addr(addr, last):
 847             return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
 848
 849 #     -- Returns whether this is the last row of a line
 850 #     function is_last_row(row: row_t; last: row_in_line_t)
 851 #      return boolean is
 852 #     begin
 853 #         return get_row_of_line(row) = last;
 854 #     end;
 855         # Returns whether this is the last row of a line
 856         def is_last_row(row, last):
 857             return get_row_of_line(row) == last
 858
 859 #     -- Return the address of the next row in the current cache line
 860 #     function next_row_addr(addr: wishbone_addr_type)
 861 #      return std_ulogic_vector is
 862 #       variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
 863 #       variable result  : wishbone_addr_type;
 864 #     begin
 865 #       -- Is there no simpler way in VHDL to
 866 #       -- generate that 3 bits adder ?
 867 #       row_idx := addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS);
 868 #       row_idx := std_ulogic_vector(unsigned(row_idx) + 1);
 869 #       result := addr;
 870 #       result(LINE_OFF_BITS-1 downto ROW_OFF_BITS) := row_idx;
 871 #       return result;
 872 #     end;
 873         # Return the address of the next row in the current cache line
 874         def next_row_addr(addr):
 875             row_idx = Signal(ROW_LINE_BITS)
 876             result  = WBAddrType()
 877             # Is there no simpler way in VHDL to
 878             # generate that 3 bits adder ?
 879             row_idx = addr[ROW_OFF_BITS:LINE_OFF_BITS]
 880             row_idx = Signal(row_idx + 1)
 881             result = addr
 882             result[ROW_OFF_BITS:LINE_OFF_BITS] = row_idx
 883             return result
 884
 885 #     -- Return the next row in the current cache line. We use a
 886 #     -- dedicated function in order to limit the size of the
 887 #     -- generated adder to be only the bits within a cache line
 888 #     -- (3 bits with default settings)
 889 #     function next_row(row: row_t) return row_t is
 890 #        variable row_v  : std_ulogic_vector(ROW_BITS-1 downto 0);
 891 #        variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
 892 #        variable result : std_ulogic_vector(ROW_BITS-1 downto 0);
 893 #     begin
 894 #        row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
 895 #        row_idx := row_v(ROW_LINEBITS-1 downto 0);
 896 #        row_v(ROW_LINEBITS-1 downto 0) :=
 897 #         std_ulogic_vector(unsigned(row_idx) + 1);
 898 #        return to_integer(unsigned(row_v));
 899 #     end;
 900 # Return the next row in the current cache line. We use a
 901 # dedicated function in order to limit the size of the
 902 # generated adder to be only the bits within a cache line
 903 # (3 bits with default settings)
 904         def next_row(row)
 905             row_v   = Signal(ROW_BITS)
 906             row_idx = Signal(ROW_LINE_BITS)
 907             result  = Signal(ROW_BITS)
 908
 909             row_v = Signal(row)
 910             row_idx = row_v[ROW_LINE_BITS]
 911             row_v[0:ROW_LINE_BITS] = Signal(row_idx + 1)
 912             return row_v
 913
 914 #     -- Get the tag value from the address
 915 #     function get_tag(addr: std_ulogic_vector) return cache_tag_t is
 916 #     begin
 917 #         return addr(REAL_ADDR_BITS - 1 downto SET_SIZE_BITS);
 918 #     end;
 919         # Get the tag value from the address
 920         def get_tag(addr):
 921             return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
 922
 923 #     -- Read a tag from a tag memory row
 924 #     function read_tag(way: way_t; tagset: cache_tags_set_t)
 925 #      return cache_tag_t is
 926 #     begin
 927 #       return tagset(way * TAG_WIDTH + TAG_BITS
 928 #                     - 1 downto way * TAG_WIDTH);
 929 #     end;
 930         # Read a tag from a tag memory row
 931         def read_tag(way, tagset):
 932             return tagset[way *TAG_WIDTH:way * TAG_WIDTH + TAG_BITS]
 933
 934 #     -- Read a TLB tag from a TLB tag memory row
 935 #     function read_tlb_tag(way: tlb_way_t; tags: tlb_way_tags_t)
 936 #      return tlb_tag_t is
 937 #         variable j : integer;
 938 #     begin
 939 #         j := way * TLB_EA_TAG_BITS;
 940 #         return tags(j + TLB_EA_TAG_BITS - 1 downto j);
 941 #     end;
 942         # Read a TLB tag from a TLB tag memory row
 943         def read_tlb_tag(way, tags):
 944             j = Signal()
 945
 946             j = way * TLB_EA_TAG_BITS
 947             return tags[j:j + TLB_EA_TAG_BITS]
 948
 949 #     -- Write a TLB tag to a TLB tag memory row
 950 #     procedure write_tlb_tag(way: tlb_way_t; tags: inout tlb_way_tags_t;
 951 #                             tag: tlb_tag_t) is
 952 #         variable j : integer;
 953 #     begin
 954 #         j := way * TLB_EA_TAG_BITS;
 955 #         tags(j + TLB_EA_TAG_BITS - 1 downto j) := tag;
 956 #     end;
 957         # Write a TLB tag to a TLB tag memory row
 958         def write_tlb_tag(way, tags), tag):
 959             j = Signal()
 960
 961             j = way * TLB_EA_TAG_BITS
 962             tags[j:j + TLB_EA_TAG_BITS] = tag
 963
 964 #     -- Read a PTE from a TLB PTE memory row
 965 #     function read_tlb_pte(way: tlb_way_t; ptes: tlb_way_ptes_t)
 966 #      return tlb_pte_t is
 967 #         variable j : integer;
 968 #     begin
 969 #         j := way * TLB_PTE_BITS;
 970 #         return ptes(j + TLB_PTE_BITS - 1 downto j);
 971 #     end;
 972         # Read a PTE from a TLB PTE memory row
 973         def read_tlb_pte(way, ptes):
 974             j = Signal()
 975
 976             j = way * TLB_PTE_BITS
 977             return ptes[j:j + TLB_PTE_BITS]
 978
 979 #     procedure write_tlb_pte(way: tlb_way_t;
 980 #      ptes: inout tlb_way_ptes_t; newpte: tlb_pte_t) is
 981 #         variable j : integer;
 982 #     begin
 983 #         j := way * TLB_PTE_BITS;
 984 #         ptes(j + TLB_PTE_BITS - 1 downto j) := newpte;
 985 #     end;
 986         def write_tlb_pte(way, ptes,newpte):
 987             j = Signal()
 988
 989             j = way * TLB_PTE_BITS
 990             return ptes[j:j + TLB_PTE_BITS] = newpte
 991
 992 # begin
 993 #
 994 """these, because they are constants, can actually be done *as*
 995    python asserts:
 996    assert LINE_SIZE % ROWSIZE == 0, "line size not ...."
 997 """
 998 #     assert LINE_SIZE mod ROW_SIZE = 0
 999 #      report "LINE_SIZE not multiple of ROW_SIZE" severity FAILURE;
1000 #     assert ispow2(LINE_SIZE)
1001 #      report "LINE_SIZE not power of 2" severity FAILURE;
1002 #     assert ispow2(NUM_LINES)
1003 #      report "NUM_LINES not power of 2" severity FAILURE;
1004 #     assert ispow2(ROW_PER_LINE)
1005 #      report "ROW_PER_LINE not power of 2" severity FAILURE;
1006 #     assert (ROW_BITS = INDEX_BITS + ROW_LINEBITS)
1007 #      report "geometry bits don't add up" severity FAILURE;
1008 #     assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS)
1009 #      report "geometry bits don't add up" severity FAILURE;
1010 #     assert (REAL_ADDR_BITS = TAG_BITS + INDEX_BITS + LINE_OFF_BITS)
1011 #      report "geometry bits don't add up" severity FAILURE;
1012 #     assert (REAL_ADDR_BITS = TAG_BITS + ROW_BITS + ROW_OFF_BITS)
1013 #      report "geometry bits don't add up" severity FAILURE;
1014 #     assert (64 = wishbone_data_bits)
1015 #      report "Can't yet handle a wishbone width that isn't 64-bits"
1016 #      severity FAILURE;
1017 #     assert SET_SIZE_BITS <= TLB_LG_PGSZ
1018 #      report "Set indexed by virtual address" severity FAILURE;
1019         assert (LINE_SIZE % ROW_SIZE) == 0 "LINE_SIZE not " \
1020          "multiple of ROW_SIZE"
1021
1022         assert (LINE_SIZE % 2) == 0 "LINE_SIZE not power of 2"
1023
1024         assert (NUM_LINES % 2) == 0 "NUM_LINES not power of 2"
1025
1026         assert (ROW_PER_LINE % 2) == 0 "ROW_PER_LINE not" \
1027          "power of 2"
1028
1029         assert ROW_BITS == (INDEX_BITS + ROW_LINE_BITS) \
1030          "geometry bits don't add up"
1031
1032         assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS) \
1033          "geometry bits don't add up"
1034
1035         assert REAL_ADDR_BITS == (TAG_BITS + INDEX_BITS \
1036          + LINE_OFF_BITS) "geometry bits don't add up"
1037
1038         assert REAL_ADDR_BITS == (TAG_BITS + ROW_BITS + ROW_OFF_BITS) \
1039          "geometry bits don't add up"
1040
1041         assert 64 == wishbone_data_bits "Can't yet handle a" \
1042          "wishbone width that isn't 64-bits"
1043
1044         assert SET_SIZE_BITS <= TLB_LG_PGSZ "Set indexed by" \
1045          "virtual address"
1046
1047 #     -- Latch the request in r0.req as long as we're not stalling
1048 #     stage_0 : process(clk)
1049 # Latch the request in r0.req as long as we're not stalling
1050 class Stage0(Elaboratable):
1051     def __init__(self):
1052         pass
1053
1054     def elaborate(self, platform):
1055         m = Module()
1056
1057         comb = m.d.comb
1058         sync = m.d.sync
1059
1060 #         variable r : reg_stage_0_t;
1061         r = RegStage0()
1062         comb += r
1063
1064 #     begin
1065 #         if rising_edge(clk) then
1066 #             assert (d_in.valid and m_in.valid) = '0'
1067 #              report "request collision loadstore vs MMU";
1068         assert ~(d_in.valid & m_in.valid) "request collision
1069          loadstore vs MMU"
1070
1071 #             if m_in.valid = '1' then
1072         with m.If(m_in.valid):
1073 #                 r.req.valid := '1';
1074 #                 r.req.load := not (m_in.tlbie or m_in.tlbld);
1075 #                 r.req.dcbz := '0';
1076 #                 r.req.nc := '0';
1077 #                 r.req.reserve := '0';
1078 #                 r.req.virt_mode := '0';
1079 #                 r.req.priv_mode := '1';
1080 #                 r.req.addr := m_in.addr;
1081 #                 r.req.data := m_in.pte;
1082 #                 r.req.byte_sel := (others => '1');
1083 #                 r.tlbie := m_in.tlbie;
1084 #                 r.doall := m_in.doall;
1085 #                 r.tlbld := m_in.tlbld;
1086 #                 r.mmu_req := '1';
1087             sync += r.req.valid.eq(1)
1088             sync += r.req.load.eq(~(m_in.tlbie | m_in.tlbld))
1089             sync += r.req.priv_mode.eq(1)
1090             sync += r.req.addr.eq(m_in.addr)
1091             sync += r.req.data.eq(m_in.pte)
1092             sync += r.req.byte_sel.eq(1)
1093             sync += r.tlbie.eq(m_in.tlbie)
1094             sync += r.doall.eq(m_in.doall)
1095             sync += r.tlbld.eq(m_in.tlbld)
1096             sync += r.mmu_req.eq(1)
1097 #             else
1098         with m.Else():
1099 #                 r.req := d_in;
1100 #                 r.tlbie := '0';
1101 #                 r.doall := '0';
1102 #                 r.tlbld := '0';
1103 #                 r.mmu_req := '0';
1104             sync += r.req.eq(d_in)
1105 #             end if;
1106 #             if rst = '1' then
1107 #                 r0_full <= '0';
1108 #             elsif r1.full = '0' or r0_full = '0' then
1109             with m.If(~r1.full | ~r0_full):
1110 #                 r0 <= r;
1111 #                 r0_full <= r.req.valid;
1112                 sync += r0.eq(r)
1113                 sync += r0_full.eq(r.req.valid)
1114 #             end if;
1115 #         end if;
1116 #     end process;
1117 #
1118 #     -- we don't yet handle collisions between loadstore1 requests
1119 #     -- and MMU requests
1120 #     m_out.stall <= '0';
1121 # we don't yet handle collisions between loadstore1 requests
1122 # and MMU requests
1123 comb += m_out.stall.eq(0)
1124
1125 #     -- Hold off the request in r0 when r1 has an uncompleted request
1126 #     r0_stall <= r0_full and r1.full;
1127 #     r0_valid <= r0_full and not r1.full;
1128 #     stall_out <= r0_stall;
1129 # Hold off the request in r0 when r1 has an uncompleted request
1130 comb += r0_stall.eq(r0_full & r1.full)
1131 comb += r0_valid.eq(r0_full & ~r1.full)
1132 comb += stall_out.eq(r0_stall)
1133
1134 #     -- TLB
1135 #     -- Operates in the second cycle on the request latched in r0.req.
1136 #     -- TLB updates write the entry at the end of the second cycle.
1137 #     tlb_read : process(clk)
1138 # TLB
1139 # Operates in the second cycle on the request latched in r0.req.
1140 # TLB updates write the entry at the end of the second cycle.
1141 class TLBRead(Elaboratable):
1142     def __init__(self):
1143         pass
1144
1145     def elaborate(self, platform):
1146         m = Module()
1147
1148         comb = m.d.comb
1149         sync = m.d.sync
1150
1151 #         variable index : tlb_index_t;
1152 #         variable addrbits :
1153 #          std_ulogic_vector(TLB_SET_BITS - 1 downto 0);
1154         index    = TLB_SET_SIZE
1155         addrbits = Signal(TLB_SET_BITS)
1156
1157         comb += index
1158         comb += addrbits
1159
1160 #     begin
1161 #         if rising_edge(clk) then
1162 #             if m_in.valid = '1' then
1163         with m.If(m_in.valid):
1164 #                 addrbits := m_in.addr(TLB_LG_PGSZ + TLB_SET_BITS
1165 #                                       - 1 downto TLB_LG_PGSZ);
1166             sync += addrbits.eq(m_in.addr[
1167                      TLB_LG_PGSZ:TLB_LG_PGSZ + TLB_SET_BITS
1168                     ])
1169 #             else
1170         with m.Else():
1171 #                 addrbits := d_in.addr(TLB_LG_PGSZ + TLB_SET_BITS
1172 #                                       - 1 downto TLB_LG_PGSZ);
1173             sync += addrbits.eq(d_in.addr[
1174                      TLB_LG_PGSZ:TLB_LG_PGSZ + TLB_SET_BITS
1175                     ])
1176 #             end if;
1177
1178 #             index := to_integer(unsigned(addrbits));
1179         sync += index.eq(addrbits)
1180 #             -- If we have any op and the previous op isn't finished,
1181 #             -- then keep the same output for next cycle.
1182 #             if r0_stall = '0' then
1183 # If we have any op and the previous op isn't finished,
1184 # then keep the same output for next cycle.
1185         with m.If(~r0_stall):
1186             sync += tlb_valid_way.eq(dtlb_valids[index])
1187             sync += tlb_tag_way.eq(dtlb_tags[index])
1188             sync += tlb_pte_way.eq(dtlb_ptes[index])
1189 #             end if;
1190 #         end if;
1191 #     end process;
1192
1193 #     -- Generate TLB PLRUs
1194 #     maybe_tlb_plrus: if TLB_NUM_WAYS > 1 generate
1195 # Generate TLB PLRUs
1196 class MaybeTLBPLRUs(Elaboratable):
1197     def __init__(self):
1198         pass
1199
1200     def elaborate(self, platform):
1201         m = Module()
1202
1203         comb = m.d.comb
1204         sync = m.d.sync
1205
1206         with m.If(TLB_NUM_WAYS > 1):
1207 #     begin
1208 # TODO understand how to conver generate statements
1209 #       tlb_plrus: for i in 0 to TLB_SET_SIZE - 1 generate
1210 #           -- TLB PLRU interface
1211 #           signal tlb_plru_acc :
1212 #            std_ulogic_vector(TLB_WAY_BITS-1 downto 0);
1213 #           signal tlb_plru_acc_en : std_ulogic;
1214 #           signal tlb_plru_out :
1215 #            std_ulogic_vector(TLB_WAY_BITS-1 downto 0);
1216 #       begin
1217 #           tlb_plru : entity work.plru
1218 #               generic map (
1219 #                   BITS => TLB_WAY_BITS
1220 #                   )
1221 #               port map (
1222 #                   clk => clk,
1223 #                   rst => rst,
1224 #                   acc => tlb_plru_acc,
1225 #                   acc_en => tlb_plru_acc_en,
1226 #                   lru => tlb_plru_out
1227 #                   );
1228 #
1229 #           process(all)
1230 #           begin
1231 #               -- PLRU interface
1232 #               if r1.tlb_hit_index = i then
1233 #                   tlb_plru_acc_en <= r1.tlb_hit;
1234 #               else
1235 #                   tlb_plru_acc_en <= '0';
1236 #               end if;
1237 #               tlb_plru_acc <=
1238 #                std_ulogic_vector(to_unsigned(
1239 #                                   r1.tlb_hit_way, TLB_WAY_BITS
1240 #                                  ));
1241 #               tlb_plru_victim(i) <= tlb_plru_out;
1242 #           end process;
1243 #       end generate;
1244 #     end generate;
1245 # end TODO
1246 #
1247 #     tlb_search : process(all)
1248 class TLBSearch(Elaboratable):
1249     def __init__(self):
1250         pass
1251
1252     def elborate(self, platform):
1253         m = Module()
1254
1255         comb = m.d.comb
1256         sync = m.d.sync
1257
1258 #         variable hitway : tlb_way_t;
1259 #         variable hit : std_ulogic;
1260 #         variable eatag : tlb_tag_t;
1261         hitway = TLBWay()
1262         hit    = Signal()
1263         eatag  = TLBTag()
1264
1265         comb += hitway
1266         comb += hit
1267         comb += eatag
1268
1269 #     begin
1270 #         tlb_req_index <=
1271 #          to_integer(unsigned(r0.req.addr(
1272 #           TLB_LG_PGSZ + TLB_SET_BITS - 1 downto TLB_LG_PGSZ
1273 #          )));
1274 #         hitway := 0;
1275 #         hit := '0';
1276 #         eatag := r0.req.addr(63 downto TLB_LG_PGSZ + TLB_SET_BITS);
1277 #         for i in tlb_way_t loop
1278 #             if tlb_valid_way(i) = '1' and
1279 #                 read_tlb_tag(i, tlb_tag_way) = eatag then
1280 #                 hitway := i;
1281 #                 hit := '1';
1282 #             end if;
1283 #         end loop;
1284 #         tlb_hit <= hit and r0_valid;
1285 #         tlb_hit_way <= hitway;
1286         comb += tlb_req_index.eq(r0.req.addr[
1287                  TLB_LG_PGSZ:TLB_LG_PGSZ + TLB_SET_BITS
1288                 ])
1289
1290         comb += eatag.eq(r0.req.addr[
1291                  TLB_LG_PGSZ + TLB_SET_BITS:64
1292                 ])
1293
1294         for i in TLBWay():
1295             with m.If(tlb_valid_way(i)
1296                       & read_tlb_tag(i, tlb_tag_way) == eatag):
1297
1298                 comb += hitway.eq(i)
1299                 comb += hit.eq(1)
1300
1301         comb += tlb_hit.eq(hit & r0_valid)
1302         comb += tlb_hit_way.eq(hitway)
1303
1304 #         if tlb_hit = '1' then
1305         with m.If(tlb_hit):
1306 #             pte <= read_tlb_pte(hitway, tlb_pte_way);
1307             comb += pte.eq(read_tlb_pte(hitway, tlb_pte_way))
1308 #         else
1309         with m.Else():
1310 #             pte <= (others => '0');
1311             comb += pte.eq(0)
1312 #         end if;
1313 #         valid_ra <= tlb_hit or not r0.req.virt_mode;
1314         comb += valid_ra.eq(tlb_hit | ~r0.req.virt_mode)
1315 #         if r0.req.virt_mode = '1' then
1316         with m.If(r0.req.virt_mode):
1317 #             ra <= pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) &
1318 #                   r0.req.addr(TLB_LG_PGSZ - 1 downto ROW_OFF_BITS) &
1319 #                   (ROW_OFF_BITS-1 downto 0 => '0');
1320 #             perm_attr <= extract_perm_attr(pte);
1321             comb += ra.eq(Cat(
1322                      Const(ROW_OFF_BITS, ROW_OFF_BITS),
1323                      r0.req.addr[ROW_OFF_BITS:TLB_LG_PGSZ],
1324                      pte[TLB_LG_PGSZ:REAL_ADDR_BITS]
1325                     ))
1326             comb += perm_attr.eq(extract_perm_attr(pte))
1327 #         else
1328         with m.Else():
1329 #             ra <= r0.req.addr(
1330 #                    REAL_ADDR_BITS - 1 downto ROW_OFF_BITS
1331 #                   ) & (ROW_OFF_BITS-1 downto 0 => '0');
1332             comb += ra.eq(Cat(
1333                      Const(ROW_OFF_BITS, ROW_OFF_BITS),
1334                      r0.rq.addr[ROW_OFF_BITS:REAL_ADDR_BITS]
1335                     )
1336
1337 #             perm_attr <= real_mode_perm_attr;
1338             comb += perm_attr.eq(real_mode_perm_attr)
1339 #         end if;
1340 #     end process;
1341
1342 #     tlb_update : process(clk)
1343 class TLBUpdate(Elaboratable):
1344     def __init__(self):
1345         pass
1346
1347     def elaborate(self, platform):
1348         m = Module()
1349
1350         comb = m.d.comb
1351         sync = m.d.sync
1352
1353 #         variable tlbie : std_ulogic;
1354 #         variable tlbwe : std_ulogic;
1355 #         variable repl_way : tlb_way_t;
1356 #         variable eatag : tlb_tag_t;
1357 #         variable tagset : tlb_way_tags_t;
1358 #         variable pteset : tlb_way_ptes_t;
1359         tlbie    = Signal()
1360         tlbwe    = Signal()
1361         repl_way = TLBWay()
1362         eatag    = TLBTag()
1363         tagset   = TLBWayTags()
1364         pteset   = TLBWayPtes()
1365
1366         comb += tlbie
1367         comb += tlbwe
1368         comb += repl_way
1369         comb += eatag
1370         comb += tagset
1371         comb += pteset
1372
1373 #     begin
1374 #         if rising_edge(clk) then
1375 #             tlbie := r0_valid and r0.tlbie;
1376 #             tlbwe := r0_valid and r0.tlbldoi;
1377         sync += tlbie.eq(r0_valid & r0.tlbie)
1378         sync += tlbwe.eq(r0_valid & r0.tlbldoi)
1379
1380 #             if rst = '1' or (tlbie = '1' and r0.doall = '1') then
1381 #        with m.If (TODO understand how signal resets work in nmigen)
1382 #                 -- clear all valid bits at once
1383 #                 for i in tlb_index_t loop
1384 #                     dtlb_valids(i) <= (others => '0');
1385 #                 end loop;
1386             # clear all valid bits at once
1387             for i in range(TLB_SET_SIZE):
1388                 sync += dtlb_valids[i].eq(0)
1389 #             elsif tlbie = '1' then
1390         with m.Elif(tlbie):
1391 #                 if tlb_hit = '1' then
1392             with m.If(tlb_hit):
1393 #                     dtlb_valids(tlb_req_index)(tlb_hit_way) <= '0';
1394                 sync += dtlb_valids[tlb_req_index][tlb_hit_way].eq(0)
1395 #                 end if;
1396 #             elsif tlbwe = '1' then
1397         with m.Elif(tlbwe):
1398 #                 if tlb_hit = '1' then
1399             with m.If(tlb_hit):
1400 #                     repl_way := tlb_hit_way;
1401                 sync += repl_way.eq(tlb_hit_way)
1402 #                 else
1403             with m.Else():
1404 #                     repl_way := to_integer(unsigned(
1405 #                       tlb_plru_victim(tlb_req_index)));
1406                 sync += repl_way.eq(tlb_plru_victim[tlb_req_index])
1407 #                 end if;
1408 #                 eatag := r0.req.addr(
1409 #                           63 downto TLB_LG_PGSZ + TLB_SET_BITS
1410 #                          );
1411 #                 tagset := tlb_tag_way;
1412 #                 write_tlb_tag(repl_way, tagset, eatag);
1413 #                 dtlb_tags(tlb_req_index) <= tagset;
1414 #                 pteset := tlb_pte_way;
1415 #                 write_tlb_pte(repl_way, pteset, r0.req.data);
1416 #                 dtlb_ptes(tlb_req_index) <= pteset;
1417 #                 dtlb_valids(tlb_req_index)(repl_way) <= '1';
1418             sync += eatag.eq(r0.req.addr[TLB_LG_PGSZ + TLB_SET_BITS:64])
1419             sync += tagset.eq(tlb_tag_way)
1420             sync += write_tlb_tag(repl_way, tagset, eatag)
1421             sync += dtlb_tags[tlb_req_index].eq(tagset)
1422             sync += pteset.eq(tlb_pte_way)
1423             sync += write_tlb_pte(repl_way, pteset, r0.req.data)
1424             sync += dtlb_ptes[tlb_req_index].eq(pteset)
1425             sync += dtlb_valids[tlb_req_index][repl_way].eq(1)
1426 #             end if;
1427 #         end if;
1428 #     end process;
1429
1430 #     -- Generate PLRUs
1431 #     maybe_plrus: if NUM_WAYS > 1 generate
1432 class MaybePLRUs(Elaboratable):
1433     def __init__(self):
1434         pass
1435
1436     def elaborate(self, platform):
1437         m = Module()
1438
1439         comb = m.d.comb
1440         sync = m.d.sync
1441
1442 #     begin
1443         # TODO learn translation of generate into nmgien @lkcl
1444 #       plrus: for i in 0 to NUM_LINES-1 generate
1445 #           -- PLRU interface
1446 #           signal plru_acc    : std_ulogic_vector(WAY_BITS-1 downto 0);
1447 #           signal plru_acc_en : std_ulogic;
1448 #           signal plru_out    : std_ulogic_vector(WAY_BITS-1 downto 0);
1449 #
1450 #       begin
1451         # TODO learn tranlation of entity, generic map, port map in
1452         # nmigen @lkcl
1453 #           plru : entity work.plru
1454 #               generic map (
1455 #                   BITS => WAY_BITS
1456 #                   )
1457 #               port map (
1458 #                   clk => clk,
1459 #                   rst => rst,
1460 #                   acc => plru_acc,
1461 #                   acc_en => plru_acc_en,
1462 #                   lru => plru_out
1463 #                   );
1464 #
1465 #           process(all)
1466 #           begin
1467 #               -- PLRU interface
1468 #               if r1.hit_index = i then
1469         # PLRU interface
1470         with m.If(r1.hit_index == i):
1471 #                   plru_acc_en <= r1.cache_hit;
1472             comb += plru_acc_en.eq(r1.cache_hit)
1473 #               else
1474         with m.Else():
1475 #                   plru_acc_en <= '0';
1476             comb += plru_acc_en.eq(0)
1477 #               end if;
1478 #               plru_acc <= std_ulogic_vector(to_unsigned(
1479 #                            r1.hit_way, WAY_BITS
1480 #                           ));
1481 #               plru_victim(i) <= plru_out;
1482         comb += plru_acc.eq(r1.hit_way)
1483         comb += plru_victime[i].eq(plru_out)
1484 #           end process;
1485 #       end generate;
1486 #     end generate;
1487
1488 #     -- Cache tag RAM read port
1489 #     cache_tag_read : process(clk)
1490 # Cache tag RAM read port
1491 class CacheTagRead(Elaboratable):
1492     def __init__(self):
1493         pass
1494
1495     def elaborate(self, platform):
1496         m = Module()
1497
1498         comb = m.d.comb
1499         sync = m.d.sync
1500
1501 #         variable index : index_t;
1502         index = Signal(INDEX)
1503
1504         comb += index
1505
1506 #     begin
1507 #         if rising_edge(clk) then
1508 #             if r0_stall = '1' then
1509         with m.If(r0_stall):
1510 #                 index := req_index;
1511             sync += index.eq(req_index)
1512
1513 #             elsif m_in.valid = '1' then
1514         with m.Elif(m_in.valid):
1515 #                 index := get_index(m_in.addr);
1516             sync += index.eq(get_index(m_in.addr))
1517
1518 #             else
1519         with m.Else():
1520 #                 index := get_index(d_in.addr);
1521             sync += index.eq(get_index(d_in.addr))
1522 #             end if;
1523 #             cache_tag_set <= cache_tags(index);
1524         sync += cache_tag_set.eq(cache_tags(index))
1525 #         end if;
1526 #     end process;
1527
1528 #     -- Cache request parsing and hit detection
1529 #     dcache_request : process(all)
1530 # Cache request parsing and hit detection
1531 class DcacheRequest(Elaboratable):
1532     def __init__(self):
1533         pass
1534
1535     def elaborate(self, platform):
1536 #         variable is_hit  : std_ulogic;
1537 #         variable hit_way : way_t;
1538 #         variable op      : op_t;
1539 #         variable opsel   : std_ulogic_vector(2 downto 0);
1540 #         variable go      : std_ulogic;
1541 #         variable nc      : std_ulogic;
1542 #         variable s_hit   : std_ulogic;
1543 #         variable s_tag   : cache_tag_t;
1544 #         variable s_pte   : tlb_pte_t;
1545 #         variable s_ra    : std_ulogic_vector(
1546 #                             REAL_ADDR_BITS - 1 downto 0
1547 #                            );
1548 #         variable hit_set     : std_ulogic_vector(
1549 #                                 TLB_NUM_WAYS - 1 downto 0
1550 #                                );
1551 #         variable hit_way_set : hit_way_set_t;
1552 #         variable rel_matches : std_ulogic_vector(
1553 #                                 TLB_NUM_WAYS - 1 downto 0
1554 #                                );
1555         rel_match   = Signal()
1556         is_hit      = Signal()
1557         hit_way     = Signal(WAY)
1558         op          = Op()
1559         opsel       = Signal(3)
1560         go          = Signal()
1561         nc          = Signal()
1562         s_hit       = Signal()
1563         s_tag       = Signal(CACHE_TAG)
1564         s_pte       = Signal(TLB_PTE)
1565         s_ra        = Signal(REAL_ADDR_BITS)
1566         hit_set     = Signal(TLB_NUM_WAYS)
1567         hit_way_set = HitWaySet()
1568         rel_matches = Signal(TLB_NUM_WAYS)
1569         rel_match   = Signal()
1570
1571         comb += rel_match
1572         comb += is_hit
1573         comb += hit_way
1574         comb += op
1575         comb += opsel
1576         comb += go
1577         comb += nc
1578         comb += s_hit
1579         comb += s_tag
1580         comb += s_pte
1581         comb += s_ra
1582         comb += hit_set
1583         comb += hit_way_set
1584         comb += rel_matches
1585         comb += rel_match
1586
1587 #     begin
1588 #         -- Extract line, row and tag from request
1589 #         req_index <= get_index(r0.req.addr);
1590 #         req_row <= get_row(r0.req.addr);
1591 #         req_tag <= get_tag(ra);
1592 #
1593 #         go := r0_valid and not (r0.tlbie or r0.tlbld)
1594 #               and not r1.ls_error;
1595         # Extract line, row and tag from request
1596         comb += req_index.eq(get_index(r0.req.addr))
1597         comb += req_row.eq(get_row(r0.req.addr))
1598         comb += req_tag.eq(get_tag(ra))
1599
1600         comb += go.eq(r0_valid & ~(r0.tlbie | r0.tlbld) & ~r1.ls_error)
1601
1602 #         -- Test if pending request is a hit on any way
1603 #         -- In order to make timing in virtual mode,
1604 #         -- when we are using the TLB, we compare each
1605 #         --way with each of the real addresses from each way of
1606 #         -- the TLB, and then decide later which match to use.
1607 #         hit_way := 0;
1608 #         is_hit := '0';
1609 #         rel_match := '0';
1610         # Test if pending request is a hit on any way
1611         # In order to make timing in virtual mode,
1612         # when we are using the TLB, we compare each
1613         # way with each of the real addresses from each way of
1614         # the TLB, and then decide later which match to use.
1615         comb += hit_way.eq(0)
1616         comb += is_hit.eq(0)
1617         comb += rel_match.eq(0)
1618
1619 #         if r0.req.virt_mode = '1' then
1620         with m.If(r0.req.virt_mode):
1621 #             rel_matches := (others => '0');
1622             comb += rel_matches.eq(0)
1623 #             for j in tlb_way_t loop
1624             for j in range(TLB_WAY):
1625 #                 hit_way_set(j) := 0;
1626 #                 s_hit := '0';
1627 #                 s_pte := read_tlb_pte(j, tlb_pte_way);
1628 #                 s_ra  := s_pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ)
1629 #                          & r0.req.addr(TLB_LG_PGSZ - 1 downto 0);
1630 #                 s_tag := get_tag(s_ra);
1631                 comb += hit_way_set[j].eq(0)
1632                 comb += s_hit.eq(0)
1633                 comb += s_pte.eq(read_tlb_pte(j, tlb_pte_way))
1634                 comb += s_ra.eq(Cat(
1635                          r0.req.addr[0:TLB_LG_PGSZ],
1636                          s_pte[TLB_LG_PGSZ:REAL_ADDR_BITS]
1637                         ))
1638                 comb += s_tag.eq(get_tag(s_ra))
1639
1640 #                 for i in way_t loop
1641                 for i in range(WAY):
1642 #                     if go = '1' and cache_valids(req_index)(i) = '1'
1643 #                      and read_tag(i, cache_tag_set) = s_tag
1644 #                      and tlb_valid_way(j) = '1' then
1645                     with m.If(go & cache_valid_bits[req_index][i] &
1646                               read_tag(i, cache_tag_set) == s_tag
1647                               & tlb_valid_way[j]):
1648 #                         hit_way_set(j) := i;
1649 #                         s_hit := '1';
1650                         comb += hit_way_set[j].eq(i)
1651                         comb += s_hit.eq(1)
1652 #                     end if;
1653 #                 end loop;
1654 #                 hit_set(j) := s_hit;
1655                 comb += hit_set[j].eq(s_hit)
1656 #                 if s_tag = r1.reload_tag then
1657                 with m.If(s_tag == r1.reload_tag):
1658 #                     rel_matches(j) := '1';
1659                     comb += rel_matches[j].eq(1)
1660 #                 end if;
1661 #             end loop;
1662 #             if tlb_hit = '1' then
1663             with m.If(tlb_hit):
1664 #                 is_hit := hit_set(tlb_hit_way);
1665 #                 hit_way := hit_way_set(tlb_hit_way);
1666 #                 rel_match := rel_matches(tlb_hit_way);
1667                 comb += is_hit.eq(hit_set[tlb_hit_way])
1668                 comb += hit_way.eq(hit_way_set[tlb_hit_way])
1669                 comb += rel_match.eq(rel_matches[tlb_hit_way])
1670 #             end if;
1671 #         else
1672         with m.Else():
1673 #             s_tag := get_tag(r0.req.addr);
1674             comb += s_tag.eq(get_tag(r0.req.addr))
1675 #             for i in way_t loop
1676             for i in range(WAY):
1677 #                 if go = '1' and cache_valids(req_index)(i) = '1' and
1678 #                     read_tag(i, cache_tag_set) = s_tag then
1679                 with m.If(go & cache_valid_bits[req_index][i] &
1680                           read_tag(i, cache_tag_set) == s_tag):
1681 #                     hit_way := i;
1682 #                     is_hit := '1';
1683                     comb += hit_way.eq(i)
1684                     comb += is_hit.eq(1)
1685 #                 end if;
1686 #             end loop;
1687 #             if s_tag = r1.reload_tag then
1688             with m.If(s_tag == r1.reload_tag):
1689 #                 rel_match := '1';
1690                 comb += rel_match.eq(1)
1691 #             end if;
1692 #         end if;
1693 #         req_same_tag <= rel_match;
1694         comb += req_same_tag.eq(rel_match)
1695
1696 #         -- See if the request matches the line currently being reloaded
1697 #         if r1.state = RELOAD_WAIT_ACK and req_index = r1.store_index
1698 #          and rel_match = '1' then
1699         # See if the request matches the line currently being reloaded
1700         with m.If(r1.state == State.RELOAD_WAIT_ACK & req_index ==
1701                   r1.store_index & rel_match):
1702 #             -- For a store, consider this a hit even if the row isn't
1703 #             -- valid since it will be by the time we perform the store.
1704 #             -- For a load, check the appropriate row valid bit.
1705             # For a store, consider this a hit even if the row isn't
1706             # valid since it will be by the time we perform the store.
1707             # For a load, check the appropriate row valid bit.
1708 #             is_hit :=
1709 #              not r0.req.load or r1.rows_valid(req_row mod ROW_PER_LINE);
1710 #             hit_way := replace_way;
1711             comb += is_hit.eq(~r0.req.load
1712                      | r1.rows_valid[req_row % ROW_PER_LINE])
1713             comb += hit_way.eq(replace_way)
1714 #         end if;
1715
1716 #         -- Whether to use forwarded data for a load or not
1717         # Whether to use forwarded data for a load or not
1718 #         use_forward1_next <= '0';
1719         comb += use_forward1_next.eq(0)
1720 #         if get_row(r1.req.real_addr) = req_row
1721 #          and r1.req.hit_way = hit_way then
1722         with m.If(get_row(r1.req.real_addr) == req_row
1723                   & r1.req.hit_way == hit_way)
1724 #             -- Only need to consider r1.write_bram here, since if we
1725 #             -- are writing refill data here, then we don't have a
1726 #             -- cache hit this cycle on the line being refilled.
1727 #             -- (There is the possibility that the load following the
1728 #             -- load miss that started the refill could be to the old
1729 #             -- contents of the victim line, since it is a couple of
1730 #             -- cycles after the refill starts before we see the updated
1731 #             -- cache tag. In that case we don't use the bypass.)
1732             # Only need to consider r1.write_bram here, since if we
1733             # are writing refill data here, then we don't have a
1734             # cache hit this cycle on the line being refilled.
1735             # (There is the possibility that the load following the
1736             # load miss that started the refill could be to the old
1737             # contents of the victim line, since it is a couple of
1738             # cycles after the refill starts before we see the updated
1739             # cache tag. In that case we don't use the bypass.)
1740 #             use_forward1_next <= r1.write_bram;
1741             comb += use_forward1_next.eq(r1.write_bram)
1742 #         end if;
1743 #         use_forward2_next <= '0';
1744         comb += use_forward2_next.eq(0)
1745 #         if r1.forward_row1 = req_row and r1.forward_way1 = hit_way then
1746         with m.If(r1.forward_row1 == req_row & r1.forward_way1 == hit_way):
1747 #             use_forward2_next <= r1.forward_valid1;
1748             comb += use_forward2_next.eq(r1.forward_valid1)
1749 #         end if;
1750
1751 #           -- The way that matched on a hit
1752         # The way that matched on a hit
1753 #           req_hit_way <= hit_way;
1754         comb += req_hit_way.eq(hit_way)
1755
1756 #         -- The way to replace on a miss
1757         # The way to replace on a miss
1758 #         if r1.write_tag = '1' then
1759         with m.If(r1.write_tag):
1760 #             replace_way <= to_integer(unsigned(
1761 #                             plru_victim(r1.store_index)
1762 #                            ));
1763             replace_way.eq(plru_victim[r1.store_index])
1764 #         else
1765         with m.Else():
1766 #             replace_way <= r1.store_way;
1767             comb += replace_way.eq(r1.store_way)
1768 #         end if;
1769
1770 #         -- work out whether we have permission for this access
1771 #         -- NB we don't yet implement AMR, thus no KUAP
1772         # work out whether we have permission for this access
1773         # NB we don't yet implement AMR, thus no KUAP
1774 #         rc_ok <= perm_attr.reference and
1775 #                  (r0.req.load or perm_attr.changed);
1776 #         perm_ok <= (r0.req.priv_mode or not perm_attr.priv) and
1777 #                    (perm_attr.wr_perm or (r0.req.load
1778 #                    and perm_attr.rd_perm));
1779 #         access_ok <= valid_ra and perm_ok and rc_ok;
1780         comb += rc_ok.eq(
1781                  perm_attr.reference & (r0.req.load | perm_attr.changed)
1782                 )
1783         comb += perm_ok.eq((r0.req.prive_mode | ~perm_attr.priv)
1784                            & perm_attr.wr_perm
1785                            | (r0.req.load & perm_attr.rd_perm)
1786                           )
1787         comb += access_ok.eq(valid_ra & perm_ok & rc_ok)
1788 #       -- Combine the request and cache hit status to decide what
1789 #       -- operation needs to be done
1790 #         nc := r0.req.nc or perm_attr.nocache;
1791 #         op := OP_NONE;
1792         # Combine the request and cache hit status to decide what
1793         # operation needs to be done
1794         comb += nc.eq(r0.req.nc | perm_attr.nocache)
1795         comb += op.eq(Op.OP_NONE)
1796 #         if go = '1' then
1797         with m.If(go):
1798 #             if access_ok = '0' then
1799             with m.If(~access_ok):
1800 #                 op := OP_BAD;
1801                 comb += op.eq(Op.OP_BAD)
1802 #             elsif cancel_store = '1' then
1803             with m.Elif(cancel_store):
1804 #                 op := OP_STCX_FAIL;
1805                 comb += op.eq(Op.OP_STCX_FAIL)
1806 #             else
1807             with m.Else():
1808 #                 opsel := r0.req.load & nc & is_hit;
1809                 comb += opsel.eq(Cat(is_hit, nc, r0.req.load))
1810 #                 case opsel is
1811                 with m.Switch(opsel):
1812 #                     when "101" => op := OP_LOAD_HIT;
1813 #                     when "100" => op := OP_LOAD_MISS;
1814 #                     when "110" => op := OP_LOAD_NC;
1815 #                     when "001" => op := OP_STORE_HIT;
1816 #                     when "000" => op := OP_STORE_MISS;
1817 #                     when "010" => op := OP_STORE_MISS;
1818 #                     when "011" => op := OP_BAD;
1819 #                     when "111" => op := OP_BAD;
1820 #                     when others => op := OP_NONE;
1821                     with m.Case(Const(0b101, 3)):
1822                         comb += op.eq(Op.OP_LOAD_HIT)
1823
1824                     with m.Case(Cosnt(0b100, 3)):
1825                         comb += op.eq(Op.OP_LOAD_MISS)
1826
1827                     with m.Case(Const(0b110, 3)):
1828                         comb += op.eq(Op.OP_LOAD_NC)
1829
1830                     with m.Case(Const(0b001, 3)):
1831                         comb += op.eq(Op.OP_STORE_HIT)
1832
1833                     with m.Case(Const(0b000, 3)):
1834                         comb += op.eq(Op.OP_STORE_MISS)
1835
1836                     with m.Case(Const(0b010, 3)):
1837                         comb += op.eq(Op.OP_STORE_MISS)
1838
1839                     with m.Case(Const(0b011, 3)):
1840                         comb += op.eq(Op.OP_BAD)
1841
1842                     with m.Case(Const(0b111, 3)):
1843                         comb += op.eq(Op.OP_BAD)
1844
1845                     with m.Default():
1846                         comb += op.eq(Op.OP_NONE)
1847 #                 end case;
1848 #             end if;
1849 #         end if;
1850 #       req_op <= op;
1851 #         req_go <= go;
1852         comb += req_op.eq(op)
1853         comb += req_go.eq(go)
1854
1855 #         -- Version of the row number that is valid one cycle earlier
1856 #         -- in the cases where we need to read the cache data BRAM.
1857 #         -- If we're stalling then we need to keep reading the last
1858 #         -- row requested.
1859         # Version of the row number that is valid one cycle earlier
1860         # in the cases where we need to read the cache data BRAM.
1861         # If we're stalling then we need to keep reading the last
1862         # row requested.
1863 #         if r0_stall = '0' then
1864         with m.If(~r0_stall):
1865 #             if m_in.valid = '1' then
1866             with m.If(m_in.valid):
1867 #                 early_req_row <= get_row(m_in.addr);
1868                 comb += early_req_row.eq(get_row(m_in.addr))
1869 #             else
1870             with m.Else():
1871 #                 early_req_row <= get_row(d_in.addr);
1872                 comb += early_req_row.eq(get_row(d_in.addr))
1873 #             end if;
1874 #         else
1875         with m.Else():
1876 #             early_req_row <= req_row;
1877             comb += early_req_row.eq(req_row)
1878 #         end if;
1879 #     end process;
1880
1881 #     -- Wire up wishbone request latch out of stage 1
1882 #     wishbone_out <= r1.wb;
1883     # Wire up wishbone request latch out of stage 1
1884     comb += wishbone_out.eq(r1.wb)
1885
1886 #     -- Handle load-with-reservation and store-conditional instructions
1887 #     reservation_comb: process(all)
1888 # Handle load-with-reservation and store-conditional instructions
1889 class ReservationComb(Elaboratable):
1890     def __init__(self):
1891         pass
1892
1893     def elaborate(self, platform):
1894         m = Module()
1895
1896         comb = m.d.comb
1897         sync = m.d.sync
1898
1899 #     begin
1900 #         cancel_store <= '0';
1901 #         set_rsrv <= '0';
1902 #         clear_rsrv <= '0';
1903 #         if r0_valid = '1' and r0.req.reserve = '1' then
1904         with m.If(r0_valid & r0.req.reserve):
1905
1906 #             -- XXX generate alignment interrupt if address
1907 #             -- is not aligned XXX or if r0.req.nc = '1'
1908 #             if r0.req.load = '1' then
1909             # XXX generate alignment interrupt if address
1910             # is not aligned XXX or if r0.req.nc = '1'
1911             with m.If(r0.req.load):
1912 #                 -- load with reservation
1913 #                 set_rsrv <= '1';
1914                 # load with reservation
1915                 comb += set_rsrv(1)
1916 #             else
1917             with m.Else():
1918 #                 -- store conditional
1919 #                 clear_rsrv <= '1';
1920                 # store conditional
1921                 comb += clear_rsrv.eq(1)
1922 #                 if reservation.valid = '0' or r0.req.addr(63
1923 #                  downto LINE_OFF_BITS) /= reservation.addr then
1924                 with m.If(~reservation.valid
1925                           | r0.req.addr[LINE_OFF_BITS:64]):
1926 #                     cancel_store <= '1';
1927                     comb += cancel_store.eq(1)
1928 #                 end if;
1929 #             end if;
1930 #         end if;
1931 #     end process;
1932
1933 #     reservation_reg: process(clk)
1934 class ReservationReg(Elaboratable):
1935     def __init__(self):
1936         pass
1937
1938     def elaborate(self, platform):
1939         m = Module()
1940
1941         comb = m.d.comb
1942         sync = m.d.sync
1943
1944 #     begin
1945 #         if rising_edge(clk) then
1946 #             if rst = '1' then
1947 #                 reservation.valid <= '0';
1948             # TODO understand how resets work in nmigen
1949 #             elsif r0_valid = '1' and access_ok = '1' then
1950             with m.Elif(r0_valid & access_ok)""
1951 #                 if clear_rsrv = '1' then
1952                 with m.If(clear_rsrv):
1953 #                     reservation.valid <= '0';
1954                     sync += reservation.valid.ea(0)
1955 #                 elsif set_rsrv = '1' then
1956                 with m.Elif(set_rsrv):
1957 #                     reservation.valid <= '1';
1958 #                     reservation.addr <=
1959 #                      r0.req.addr(63 downto LINE_OFF_BITS);
1960                     sync += reservation.valid.eq(1)
1961                     sync += reservation.addr(r0.req.addr[LINE_OFF_BITS:64])
1962 #                 end if;
1963 #             end if;
1964 #         end if;
1965 #     end process;
1966 #
1967 #     -- Return data for loads & completion control logic
1968 #     writeback_control: process(all)
1969 # Return data for loads & completion control logic
1970 class WriteBackControl(Elaboratable):
1971     def __init__(self):
1972         pass
1973
1974     def elaborate(self, platform):
1975         m = Module()
1976
1977         comb = m.d.comb
1978         sync = m.d.sync
1979
1980 #         variable data_out : std_ulogic_vector(63 downto 0);
1981 #         variable data_fwd : std_ulogic_vector(63 downto 0);
1982 #         variable j        : integer;
1983         data_out = Signal(64)
1984         data_fwd = Signal(64)
1985         j        = Signal()
1986
1987 #     begin
1988 #         -- Use the bypass if are reading the row that was
1989 #         -- written 1 or 2 cycles ago, including for the
1990 #         -- slow_valid = 1 case (i.e. completing a load
1991 #         -- miss or a non-cacheable load).
1992 #         if r1.use_forward1 = '1' then
1993         # Use the bypass if are reading the row that was
1994         # written 1 or 2 cycles ago, including for the
1995         # slow_valid = 1 case (i.e. completing a load
1996         # miss or a non-cacheable load).
1997         with m.If(r1.use_forward1):
1998 #             data_fwd := r1.forward_data1;
1999             comb += data_fwd.eq(r1.forward_data1)
2000 #         else
2001         with m.Else():
2002 #             data_fwd := r1.forward_data2;
2003             comb += data_fwd.eq(r1.forward_data2)
2004 #         end if;
2005
2006 #         data_out := cache_out(r1.hit_way);
2007         comb += data_out.eq(cache_out[r1.hit_way])
2008
2009 #         for i in 0 to 7 loop
2010         for i in range(8):
2011 #             j := i * 8;
2012             comb += i * 8
2013
2014 #             if r1.forward_sel(i) = '1' then
2015             with m.If(r1.forward_sel[i]):
2016 #                 data_out(j + 7 downto j) := data_fwd(j + 7 downto j);
2017                 comb += data_out[j:j+8].eq(data_fwd[j:j+8])
2018 #             end if;
2019 #         end loop;
2020
2021 #         d_out.valid <= r1.ls_valid;
2022 #         d_out.data <= data_out;
2023 #         d_out.store_done <= not r1.stcx_fail;
2024 #         d_out.error <= r1.ls_error;
2025 #         d_out.cache_paradox <= r1.cache_paradox;
2026         comb += d_out.valid.eq(r1.ls_valid)
2027         comb += d_out.data.eq(data_out)
2028         comb += d_out.store_done.eq(~r1.stcx_fail)
2029         comb += d_out.error.eq(r1.ls_error)
2030         comb += d_out.cache_paradox.eq(r1.cache_paradox)
2031
2032 #         -- Outputs to MMU
2033 #         m_out.done <= r1.mmu_done;
2034 #         m_out.err <= r1.mmu_error;
2035 #         m_out.data <= data_out;
2036         comb += m_out.done.eq(r1.mmu_done)
2037         comb += m_out.err.eq(r1.mmu_error)
2038         comb += m_out.data.eq(data_out)
2039
2040 #         -- We have a valid load or store hit or we just completed
2041 #         -- a slow op such as a load miss, a NC load or a store
2042 #         --
2043 #         -- Note: the load hit is delayed by one cycle. However it
2044 #         -- can still not collide with r.slow_valid (well unless I
2045 #         -- miscalculated) because slow_valid can only be set on a
2046 #         -- subsequent request and not on its first cycle (the state
2047 #         -- machine must have advanced), which makes slow_valid
2048 #         -- at least 2 cycles from the previous hit_load_valid.
2049 #
2050 #         -- Sanity: Only one of these must be set in any given cycle
2051 #         assert (r1.slow_valid and r1.stcx_fail) /= '1'
2052 #          report "unexpected slow_valid collision with stcx_fail"
2053 #          severity FAILURE;
2054 #         assert ((r1.slow_valid or r1.stcx_fail) and r1.hit_load_valid)
2055 #          /= '1' report "unexpected hit_load_delayed collision with
2056 #          slow_valid" severity FAILURE;
2057         # We have a valid load or store hit or we just completed
2058         # a slow op such as a load miss, a NC load or a store
2059         #
2060         # Note: the load hit is delayed by one cycle. However it
2061         # can still not collide with r.slow_valid (well unless I
2062         # miscalculated) because slow_valid can only be set on a
2063         # subsequent request and not on its first cycle (the state
2064         # machine must have advanced), which makes slow_valid
2065         # at least 2 cycles from the previous hit_load_valid.
2066
2067         # Sanity: Only one of these must be set in any given cycle
2068         assert (r1.slow_valid & r1.stcx_fail) != 1 "unexpected" \
2069          "slow_valid collision with stcx_fail -!- severity FAILURE"
2070
2071         assert ((r1.slow_valid | r1.stcx_fail) | r1.hit_load_valid) != 1
2072          "unexpected hit_load_delayed collision with slow_valid -!-" \
2073          "severity FAILURE"
2074
2075 #         if r1.mmu_req = '0' then
2076         with m.If(~r1._mmu_req):
2077 #             -- Request came from loadstore1...
2078 #             -- Load hit case is the standard path
2079 #             if r1.hit_load_valid = '1' then
2080             # Request came from loadstore1...
2081             # Load hit case is the standard path
2082             with m.If(r1.hit_load_valid):
2083 #                 report
2084 #                  "completing load hit data=" & to_hstring(data_out);
2085                 print(f"completing load hit data={data_out}")
2086 #             end if;
2087
2088 #             -- error cases complete without stalling
2089 #             if r1.ls_error = '1' then
2090             # error cases complete without stalling
2091             with m.If(r1.ls_error):
2092 #                 report "completing ld/st with error";
2093                 print("completing ld/st with error")
2094 #             end if;
2095
2096 #             -- Slow ops (load miss, NC, stores)
2097 #             if r1.slow_valid = '1' then
2098             # Slow ops (load miss, NC, stores)
2099             with m.If(r1.slow_valid):
2100 #                 report
2101 #                  "completing store or load miss data="
2102 #                   & to_hstring(data_out);
2103                 print(f"completing store or load miss data={data_out}")
2104 #             end if;
2105
2106 #         else
2107         with m.Else():
2108 #             -- Request came from MMU
2109 #             if r1.hit_load_valid = '1' then
2110             # Request came from MMU
2111             with m.If(r1.hit_load_valid):
2112 #                 report "completing load hit to MMU, data="
2113 #                  & to_hstring(m_out.data);
2114                 print(f"completing load hit to MMU, data={m_out.data}")
2115 #             end if;
2116 #
2117 #             -- error cases complete without stalling
2118 #             if r1.mmu_error = '1' then
2119 #                 report "completing MMU ld with error";
2120             # error cases complete without stalling
2121             with m.If(r1.mmu_error):
2122                 print("combpleting MMU ld with error")
2123 #             end if;
2124 #
2125 #             -- Slow ops (i.e. load miss)
2126 #             if r1.slow_valid = '1' then
2127             # Slow ops (i.e. load miss)
2128             with m.If(r1.slow_valid):
2129 #                 report "completing MMU load miss, data="
2130 #                  & to_hstring(m_out.data);
2131                 print("completing MMU load miss, data={m_out.data}")
2132 #             end if;
2133 #         end if;
2134 #     end process;
2135
2136 # begin TODO
2137 #     -- Generate a cache RAM for each way. This handles the normal
2138 #     -- reads, writes from reloads and the special store-hit update
2139 #     -- path as well.
2140 #     --
2141 #     -- Note: the BRAMs have an extra read buffer, meaning the output
2142 #     -- is pipelined an extra cycle. This differs from the
2143 #     -- icache. The writeback logic needs to take that into
2144 #     -- account by using 1-cycle delayed signals for load hits.
2145 #     --
2146 #     rams: for i in 0 to NUM_WAYS-1 generate
2147 #       signal do_read  : std_ulogic;
2148 #       signal rd_addr  : std_ulogic_vector(ROW_BITS-1 downto 0);
2149 #       signal do_write : std_ulogic;
2150 #       signal wr_addr  : std_ulogic_vector(ROW_BITS-1 downto 0);
2151 #       signal wr_data  :
2152 #        std_ulogic_vector(wishbone_data_bits-1 downto 0);
2153 #       signal wr_sel   : std_ulogic_vector(ROW_SIZE-1 downto 0);
2154 #       signal wr_sel_m : std_ulogic_vector(ROW_SIZE-1 downto 0);
2155 #       signal dout     : cache_row_t;
2156 #     begin
2157 #       way: entity work.cache_ram
2158 #           generic map (
2159 #               ROW_BITS => ROW_BITS,
2160 #               WIDTH => wishbone_data_bits,
2161 #               ADD_BUF => true
2162 #               )
2163 #           port map (
2164 #               clk     => clk,
2165 #               rd_en   => do_read,
2166 #               rd_addr => rd_addr,
2167 #               rd_data => dout,
2168 #               wr_sel  => wr_sel_m,
2169 #               wr_addr => wr_addr,
2170 #               wr_data => wr_data
2171 #               );
2172 #       process(all)
2173 # end TODO
2174 class TODO(Elaboratable):
2175     def __init__(self):
2176         pass
2177
2178     def elaborate(self, platform):
2179         m = Module()
2180
2181         comb = m.d.comb
2182         sync = m.d.sync
2183
2184 #       begin
2185 #           -- Cache hit reads
2186 #           do_read <= '1';
2187 #           rd_addr <=
2188 #            std_ulogic_vector(to_unsigned(early_req_row, ROW_BITS));
2189 #           cache_out(i) <= dout;
2190         # Cache hit reads
2191         comb += do_read.eq(1)
2192         comb += rd_addr.eq(Signal(ROW))
2193         comb += cache_out[i].eq(dout)
2194
2195 #           -- Write mux:
2196 #           --
2197 #           -- Defaults to wishbone read responses (cache refill)
2198 #           --
2199 #           -- For timing, the mux on wr_data/sel/addr is not
2200 #           -- dependent on anything other than the current state.
2201         # Write mux:
2202         #
2203         # Defaults to wishbone read responses (cache refill)
2204         #
2205         # For timing, the mux on wr_data/sel/addr is not
2206         # dependent on anything other than the current state.
2207 #           wr_sel_m <= (others => '0');
2208         comb += wr_sel_m.eq(0)
2209
2210 #           do_write <= '0';
2211         comb += do_write.eq(0)
2212 #             if r1.write_bram = '1' then
2213         with m.If(r1.write_bram):
2214 #                 -- Write store data to BRAM.  This happens one
2215 #                 -- cycle after the store is in r0.
2216             # Write store data to BRAM.  This happens one
2217             # cycle after the store is in r0.
2218 #                 wr_data <= r1.req.data;
2219 #                 wr_sel  <= r1.req.byte_sel;
2220 #                 wr_addr <= std_ulogic_vector(to_unsigned(
2221 #                             get_row(r1.req.real_addr), ROW_BITS
2222 #                            ));
2223             comb += wr_data.eq(r1.req.data)
2224             comb += wr_sel.eq(r1.req.byte_sel)
2225             comb += wr_addr.eq(Signal(get_row(r1.req.real_addr)))
2226
2227 #                 if i = r1.req.hit_way then
2228             with m.If(i == r1.req.hit_way):
2229 #                     do_write <= '1';
2230                 comb += do_write.eq(1)
2231 #                 end if;
2232 #           else
2233             with m.Else():
2234 #               -- Otherwise, we might be doing a reload or a DCBZ
2235 #                 if r1.dcbz = '1' then
2236                 # Otherwise, we might be doing a reload or a DCBZ
2237                 with m.If(r1.dcbz):
2238 #                     wr_data <= (others => '0');
2239                     comb += wr_data.eq(0)
2240 #                 else
2241                 with m.Else():
2242 #                     wr_data <= wishbone_in.dat;
2243                     comb += wr_data.eq(wishbone_in.dat)
2244 #                 end if;
2245
2246 #                 wr_addr <= std_ulogic_vector(to_unsigned(
2247 #                             r1.store_row, ROW_BITS
2248 #                            ));
2249 #                 wr_sel <= (others => '1');
2250             comb += wr_addr.eq(Signal(r1.store_row))
2251             comb += wr_sel.eq(1)
2252
2253 #                 if r1.state = RELOAD_WAIT_ACK and
2254 #                 wishbone_in.ack = '1' and replace_way = i then
2255             with m.If(r1.state == State.RELOAD_WAIT_ACK & wishbone_in.ack
2256                       & relpace_way == i):
2257 #                     do_write <= '1';
2258                 comb += do_write.eq(1)
2259 #                 end if;
2260 #           end if;
2261
2262 #             -- Mask write selects with do_write since BRAM
2263 #             -- doesn't have a global write-enable
2264 #             if do_write = '1' then
2265 #             -- Mask write selects with do_write since BRAM
2266 #             -- doesn't have a global write-enable
2267             with m.If(do_write):
2268 #                 wr_sel_m <= wr_sel;
2269                 comb += wr_sel_m.eq(wr_sel)
2270 #             end if;
2271 #         end process;
2272 #     end generate;
2273
2274 #     -- Cache hit synchronous machine for the easy case.
2275 #     -- This handles load hits.
2276 #     -- It also handles error cases (TLB miss, cache paradox)
2277 #     dcache_fast_hit : process(clk)
2278 # Cache hit synchronous machine for the easy case.
2279 # This handles load hits.
2280 # It also handles error cases (TLB miss, cache paradox)
2281 class DcacheFastHit(Elaboratable):
2282     def __init__(self):
2283         pass
2284
2285     def elaborate(self, platform):
2286         m = Module()
2287
2288         comb = m.d.comb
2289         sync = m.d.sync
2290
2291 #     begin
2292 #         if rising_edge(clk) then
2293 #             if req_op /= OP_NONE then
2294         with m.If(req_op != Op.OP_NONE):
2295 #               report "op:" & op_t'image(req_op) &
2296 #                   " addr:" & to_hstring(r0.req.addr) &
2297 #                   " nc:" & std_ulogic'image(r0.req.nc) &
2298 #                   " idx:" & integer'image(req_index) &
2299 #                   " tag:" & to_hstring(req_tag) &
2300 #                   " way: " & integer'image(req_hit_way);
2301             print(f"op:{req_op} addr:{r0.req.addr} nc: {r0.req.nc}" \
2302                   f"idx:{req_index} tag:{req_tag} way: {req_hit_way}"
2303                  )
2304 #             end if;
2305 #             if r0_valid = '1' then
2306         with m.If(r0_valid):
2307 #                 r1.mmu_req <= r0.mmu_req;
2308             sync += r1.mmu_req.eq(r0.mmu_req)
2309 #             end if;
2310
2311 #             -- Fast path for load/store hits.
2312 #             -- Set signals for the writeback controls.
2313 #             r1.hit_way <= req_hit_way;
2314 #             r1.hit_index <= req_index;
2315         # Fast path for load/store hits.
2316         # Set signals for the writeback controls.
2317         sync += r1.hit_way.eq(req_hit_way)
2318         sync += r1.hit_index.eq(req_index)
2319
2320 #             if req_op = OP_LOAD_HIT then
2321         with m.If(req_op == Op.OP_LOAD_HIT):
2322 #                 r1.hit_load_valid <= '1';
2323             sync += r1.hit_load_valid.eq(1)
2324
2325 #             else
2326         with m.Else():
2327 #                 r1.hit_load_valid <= '0';
2328             sync += r1.hit_load_valid.eq(0)
2329 #             end if;
2330
2331 #             if req_op = OP_LOAD_HIT or req_op = OP_STORE_HIT then
2332         with m.If(req_op == Op.OP_LOAD_HIT | req_op == Op.OP_STORE_HIT):
2333 #                 r1.cache_hit <= '1';
2334             sync += r1.cache_hit.eq(1)
2335 #             else
2336         with m.Else():
2337 #                 r1.cache_hit <= '0';
2338             sync += r1.cache_hit.eq(0)
2339 #             end if;
2340
2341 #             if req_op = OP_BAD then
2342         with m.If(req_op == Op.OP_BAD):
2343 #                 report "Signalling ld/st error valid_ra=" &
2344 #                  std_ulogic'image(valid_ra) & " rc_ok=" &
2345 #                  std_ulogic'image(rc_ok) & " perm_ok=" &
2346 #                  std_ulogic'image(perm_ok);
2347             print(f"Signalling ld/st error valid_ra={valid_ra}"
2348                   f"rc_ok={rc_ok} perm_ok={perm_ok}"
2349
2350 #                 r1.ls_error <= not r0.mmu_req;
2351 #                 r1.mmu_error <= r0.mmu_req;
2352 #                 r1.cache_paradox <= access_ok;
2353             sync += r1.ls_error.eq(~r0.mmu_req)
2354             sync += r1.mmu_error.eq(r0.mmu_req)
2355             sync += r1.cache_paradox.eq(access_ok)
2356
2357 #             else
2358             with m.Else():
2359 #                 r1.ls_error <= '0';
2360 #                 r1.mmu_error <= '0';
2361 #                 r1.cache_paradox <= '0';
2362                 sync += r1.ls_error.eq(0)
2363                 sync += r1.mmu_error.eq(0)
2364                 sync += r1.cache_paradox.eq(0)
2365 #             end if;
2366 #
2367 #             if req_op = OP_STCX_FAIL then
2368             with m.If(req_op == Op.OP_STCX_FAIL):
2369 #                 r1.stcx_fail <= '1';
2370                 r1.stcx_fail.eq(1)
2371
2372 #             else
2373             with m.Else():
2374 #                 r1.stcx_fail <= '0';
2375                 sync += r1.stcx_fail.eq(0)
2376 #             end if;
2377 #
2378 #             -- Record TLB hit information for updating TLB PLRU
2379 #             r1.tlb_hit <= tlb_hit;
2380 #             r1.tlb_hit_way <= tlb_hit_way;
2381 #             r1.tlb_hit_index <= tlb_req_index;
2382             # Record TLB hit information for updating TLB PLRU
2383             sync += r1.tlb_hit.eq(tlb_hit)
2384             sync += r1.tlb_hit_way.eq(tlb_hit_way)
2385             sync += r1.tlb_hit_index.eq(tlb_req_index)
2386 #         end if;
2387 #     end process;
2388
2389 #     -- Memory accesses are handled by this state machine:
2390 #     --
2391 #     --   * Cache load miss/reload (in conjunction with "rams")
2392 #     --   * Load hits for non-cachable forms
2393 #     --   * Stores (the collision case is handled in "rams")
2394 #     --
2395 #     -- All wishbone requests generation is done here.
2396 #     -- This machine operates at stage 1.
2397 #     dcache_slow : process(clk)
2398 # Memory accesses are handled by this state machine:
2399 #
2400 #   * Cache load miss/reload (in conjunction with "rams")
2401 #   * Load hits for non-cachable forms
2402 #   * Stores (the collision case is handled in "rams")
2403 #
2404 # All wishbone requests generation is done here.
2405 # This machine operates at stage 1.
2406 class DcacheSlow(Elaboratable):
2407     def __init__(self):
2408         pass
2409
2410     def elaborate(self, platform):
2411         m = Module()
2412
2413         comb = m.d.comb
2414         sync = m.d.sync
2415
2416 #         variable stbs_done : boolean;
2417 #         variable req       : mem_access_request_t;
2418 #         variable acks      : unsigned(2 downto 0);
2419         stbs_done = Signal()
2420         req       = MemAccessRequest()
2421         acks      = Signal(3)
2422
2423         comb += stbs_done
2424         comb += req
2425         comb += acks
2426
2427 #     begin
2428 #         if rising_edge(clk) then
2429 #             r1.use_forward1 <= use_forward1_next;
2430 #             r1.forward_sel <= (others => '0');
2431         sync += r1.use_forward1.eq(use_forward1_next)
2432         sync += r1.forward_sel.eq(0)
2433
2434 #             if use_forward1_next = '1' then
2435         with m.If(use_forward1_next):
2436 #                 r1.forward_sel <= r1.req.byte_sel;
2437             sync += r1.forward_sel.eq(r1.req.byte_sel)
2438
2439 #           elsif use_forward2_next = '1' then
2440         with m.Elif(use_forward2_next):
2441 #                 r1.forward_sel <= r1.forward_sel1;
2442             sync += r1.forward_sel.eq(r1.forward_sel1)
2443 #             end if;
2444
2445 #             r1.forward_data2 <= r1.forward_data1;
2446         sync += r1.forward_data2.eq(r1.forward_data1)
2447
2448 #             if r1.write_bram = '1' then
2449         with m.If(r1.write_bram):
2450 #                 r1.forward_data1 <= r1.req.data;
2451 #                 r1.forward_sel1 <= r1.req.byte_sel;
2452 #                 r1.forward_way1 <= r1.req.hit_way;
2453 #                 r1.forward_row1 <= get_row(r1.req.real_addr);
2454 #                 r1.forward_valid1 <= '1';
2455             sync += r1.forward_data1.eq(r1.req.data)
2456             sync += r1.forward_sel1.eq(r1.req.byte_sel)
2457             sync += r1.forward_way1.eq(r1.req.hit_way)
2458             sync += r1.forward_row1.eq(get_row(r1.req.real_addr))
2459             sync += r1.forward_valid1.eq(1)
2460 #             else
2461         with m.Else():
2462
2463 #                 if r1.dcbz = '1' then
2464             with m.If(r1.bcbz):
2465 #                     r1.forward_data1 <= (others => '0');
2466                 sync += r1.forward_data1.eq(0)
2467
2468 #                 else
2469             with m.Else():
2470 #                     r1.forward_data1 <= wishbone_in.dat;
2471                 sync += r1.forward_data1.eq(wb_in.dat)
2472 #                 end if;
2473
2474 #                 r1.forward_sel1 <= (others => '1');
2475 #                 r1.forward_way1 <= replace_way;
2476 #                 r1.forward_row1 <= r1.store_row;
2477 #                 r1.forward_valid1 <= '0';
2478             sync += r1.forward_sel1.eq(1)
2479             sync += r1.forward_way1.eq(replace_way)
2480             sync += r1.forward_row1.eq(r1.store_row)
2481             sync += r1.forward_valid1.eq(0)
2482 #             end if;
2483
2484 #           -- On reset, clear all valid bits to force misses
2485 #             if rst = '1' then
2486         # On reset, clear all valid bits to force misses
2487         # TODO figure out how reset signal works in nmigeni
2488         with m.If("""TODO RST???"""):
2489 #               for i in index_t loop
2490             for i in range(INDEX):
2491 #                   cache_valids(i) <= (others => '0');
2492                 sync += cache_valid_bits[i].eq(0)
2493 #               end loop;
2494
2495 #                 r1.state <= IDLE;
2496 #                 r1.full <= '0';
2497 #                 r1.slow_valid <= '0';
2498 #                 r1.wb.cyc <= '0';
2499 #                 r1.wb.stb <= '0';
2500 #                 r1.ls_valid <= '0';
2501 #                 r1.mmu_done <= '0';
2502             sync += r1.state.eq(State.IDLE)
2503             sync += r1.full.eq(0)
2504             sync += r1.slow_valid.eq(0)
2505             sync += r1.wb.cyc.eq(0)
2506             sync += r1.wb.stb.eq(0)
2507             sync += r1.ls_valid.eq(0)
2508             sync += r1.mmu_done.eq(0)
2509
2510 #               -- Not useful normally but helps avoiding
2511 #               -- tons of sim warnings
2512         # Not useful normally but helps avoiding
2513         # tons of sim warnings
2514 #               r1.wb.adr <= (others => '0');
2515             sync += r1.wb.adr.eq(0)
2516 #             else
2517         with m.Else():
2518 #                 -- One cycle pulses reset
2519 #                 r1.slow_valid <= '0';
2520 #                 r1.write_bram <= '0';
2521 #                 r1.inc_acks <= '0';
2522 #                 r1.dec_acks <= '0';
2523 #
2524 #                 r1.ls_valid <= '0';
2525 #                 -- complete tlbies and TLB loads in the third cycle
2526 #                 r1.mmu_done <= r0_valid and (r0.tlbie or r0.tlbld);
2527             # One cycle pulses reset
2528             sync += r1.slow_valid.eq(0)
2529             sync += r1.write_bram.eq(0)
2530             sync += r1.inc_acks.eq(0)
2531             sync += r1.dec_acks.eq(0)
2532
2533             sync += r1.ls_valid.eq(0)
2534             # complete tlbies and TLB loads in the third cycle
2535             sync += r1.mmu_done.eq(r0_valid & (r0.tlbie | r0.tlbld))
2536
2537 #                 if req_op = OP_LOAD_HIT or req_op = OP_STCX_FAIL then
2538             with m.If(req_op == Op.OP_LOAD_HIT | req_op == Op.OP_STCX_FAIL)
2539 #                     if r0.mmu_req = '0' then
2540                 with m.If(~r0.mmu_req):
2541 #                         r1.ls_valid <= '1';
2542                     sync += r1.ls_valid.eq(1)
2543 #                     else
2544                 with m.Else():
2545 #                         r1.mmu_done <= '1';
2546                     sync += r1.mmu_done.eq(1)
2547 #                     end if;
2548 #                 end if;
2549
2550 #                 if r1.write_tag = '1' then
2551             with m.If(r1.write_tag):
2552 #                     -- Store new tag in selected way
2553 #                     for i in 0 to NUM_WAYS-1 loop
2554                 # Store new tag in selected way
2555                 for i in range(NUM_WAYS):
2556 #                         if i = replace_way then
2557                     with m.If(i == replace_way):
2558 #                             cache_tags(r1.store_index)(
2559 #                              (i + 1) * TAG_WIDTH - 1
2560 #                              downto i * TAG_WIDTH
2561 #                             ) <=
2562 #                              (TAG_WIDTH - 1 downto TAG_BITS => '0')
2563 #                              & r1.reload_tag;
2564                         sync += cache_tag[
2565                                  r1.store_index
2566                                 ][i * TAG_WIDTH:(i +1) * TAG_WIDTH].eq(
2567                                  Const(TAG_WIDTH, TAG_WIDTH)
2568                                  & r1.reload_tag
2569                                 )
2570 #                         end if;
2571 #                     end loop;
2572 #                     r1.store_way <= replace_way;
2573 #                     r1.write_tag <= '0';
2574                 sync += r1.store_way.eq(replace_way)
2575                 sync += r1.write_tag.eq(0)
2576 #                 end if;
2577
2578 #                 -- Take request from r1.req if there is one there,
2579 #                 -- else from req_op, ra, etc.
2580 #                 if r1.full = '1' then
2581             # Take request from r1.req if there is one there,
2582             # else from req_op, ra, etc.
2583             with m.If(r1.full)
2584 #                     req := r1.req;
2585                 sync += req.eq(r1.req)
2586
2587 #                 else
2588             with m.Else():
2589 #                     req.op := req_op;
2590 #                     req.valid := req_go;
2591 #                     req.mmu_req := r0.mmu_req;
2592 #                     req.dcbz := r0.req.dcbz;
2593 #                     req.real_addr := ra;
2594                 sync += req.op.eq(req_op)
2595                 sync += req.valid.eq(req_go)
2596                 sync += req.mmu_req.eq(r0.mmu_req)
2597                 sync += req.dcbz.eq(r0.req.dcbz)
2598                 sync += req.real_addr.eq(ra)
2599
2600 #                     -- Force data to 0 for dcbz
2601 #                     if r0.req.dcbz = '0' then
2602                 with m.If(~r0.req.dcbz):
2603 #                         req.data := r0.req.data;
2604                     sync += req.data.eq(r0.req.data)
2605
2606 #                     else
2607                 with m.Else():
2608 #                         req.data := (others => '0');
2609                     sync += req.data.eq(0)
2610 #                     end if;
2611
2612 #                     -- Select all bytes for dcbz
2613 #                     -- and for cacheable loads
2614 #                     if r0.req.dcbz = '1'
2615 #                      or (r0.req.load = '1' and r0.req.nc = '0') then
2616                 # Select all bytes for dcbz
2617                 # and for cacheable loads
2618                 with m.If(r0.req.dcbz | (r0.req.load & ~r0.req.nc):
2619 #                         req.byte_sel := (others => '1');
2620                     sync += req.byte_sel.eq(1)
2621
2622 #                     else
2623                 with m.Else():
2624 #                         req.byte_sel := r0.req.byte_sel;
2625                     sync += req.byte_sel.eq(r0.req.byte_sel)
2626 #                     end if;
2627
2628 #                     req.hit_way := req_hit_way;
2629 #                     req.same_tag := req_same_tag;
2630                 sync += req.hit_way.eq(req_hit_way)
2631                 sync += req.same_tag.eq(req_same_tag)
2632
2633 #                     -- Store the incoming request from r0,
2634 #                     -- if it is a slow request
2635 #                     -- Note that r1.full = 1 implies req_op = OP_NONE
2636 #                     if req_op = OP_LOAD_MISS or req_op = OP_LOAD_NC
2637 #                      or req_op = OP_STORE_MISS
2638 #                      or req_op = OP_STORE_HIT then
2639                 # Store the incoming request from r0,
2640                 # if it is a slow request
2641                 # Note that r1.full = 1 implies req_op = OP_NONE
2642                 with m.If(req_op == Op.OP_LOAD_MISS
2643                           | req_op == Op.OP_LOAD_NC
2644                           | req_op == Op.OP_STORE_MISS
2645                           | req_op == Op.OP_STORE_HIT):
2646 #                         r1.req <= req;
2647 #                         r1.full <= '1';
2648                     sync += r1.req(req)
2649                     sync += r1.full.eq(1)
2650 #                     end if;
2651 #                 end if;
2652 #
2653 #               -- Main state machine
2654 #               case r1.state is
2655             # Main state machine
2656             with m.Switch(r1.state):
2657
2658 #                 when IDLE =>
2659                 with m.Case(State.IDLE)
2660 #                     r1.wb.adr <= req.real_addr(r1.wb.adr'left downto 0);
2661 #                     r1.wb.sel <= req.byte_sel;
2662 #                     r1.wb.dat <= req.data;
2663 #                     r1.dcbz <= req.dcbz;
2664 #
2665 #                     -- Keep track of our index and way
2666 #                     -- for subsequent stores.
2667 #                     r1.store_index <= get_index(req.real_addr);
2668 #                     r1.store_row <= get_row(req.real_addr);
2669 #                     r1.end_row_ix <=
2670 #                      get_row_of_line(get_row(req.real_addr)) - 1;
2671 #                     r1.reload_tag <= get_tag(req.real_addr);
2672 #                     r1.req.same_tag <= '1';
2673                     sync += r1.wb.adr.eq(req.real_addr[0:r1.wb.adr])
2674                     sync += r1.wb.sel.eq(req.byte_sel)
2675                     sync += r1.wb.dat.eq(req.data)
2676                     sync += r1.dcbz.eq(req.dcbz)
2677
2678                     # Keep track of our index and way
2679                     # for subsequent stores.
2680                     sync += r1.store_index.eq(get_index(req.real_addr))
2681                     sync += r1.store_row.eq(get_row(req.real_addr))
2682                     sync += r1.end_row_ix.eq(
2683                              get_row_of_line(get_row(req.real_addr))
2684                             )
2685                     sync += r1.reload_tag.eq(get_tag(req.real_addr))
2686                     sync += r1.req.same_tag.eq(1)
2687
2688 #                     if req.op = OP_STORE_HIT theni
2689                     with m.If(req.op == Op.OP_STORE_HIT):
2690 #                         r1.store_way <= req.hit_way;
2691                         sync += r1.store_way.eq(req.hit_way)
2692 #                     end if;
2693
2694 #                     -- Reset per-row valid bits,
2695 #                     -- ready for handling OP_LOAD_MISS
2696 #                     for i in 0 to ROW_PER_LINE - 1 loop
2697                     # Reset per-row valid bits,
2698                     # ready for handling OP_LOAD_MISS
2699                     for i in range(ROW_PER_LINE):
2700 #                         r1.rows_valid(i) <= '0';
2701                         sync += r1.rows_valid[i].eq(0)
2702 #                     end loop;
2703
2704 #                     case req.op is
2705                     with m.Switch(req.op):
2706 #                     when OP_LOAD_HIT =>
2707                         with m.Case(Op.OP_LOAD_HIT):
2708 #                         -- stay in IDLE state
2709                             # stay in IDLE state
2710                             pass
2711
2712 #                     when OP_LOAD_MISS =>
2713                         with m.Case(Op.OP_LOAD_MISS):
2714 #                       -- Normal load cache miss,
2715 #                       -- start the reload machine
2716 #                       report "cache miss real addr:" &
2717 #                        to_hstring(req.real_addr) & " idx:" &
2718 #                        integer'image(get_index(req.real_addr)) &
2719 #                        " tag:" & to_hstring(get_tag(req.real_addr));
2720                             # Normal load cache miss,
2721                             # start the reload machine
2722                             print(f"cache miss real addr:{req_real_addr}" \
2723                                   f" idx:{get_index(req_real_addr)}" \
2724                                   f" tag:{get_tag(req.real_addr)}")
2725
2726 #                       -- Start the wishbone cycle
2727 #                       r1.wb.we  <= '0';
2728 #                       r1.wb.cyc <= '1';
2729 #                       r1.wb.stb <= '1';
2730                             # Start the wishbone cycle
2731                             sync += r1.wb.we.eq(0)
2732                             sync += r1.wb.cyc.eq(1)
2733                             sync += r1.wb.stb.eq(1)
2734
2735 #                       -- Track that we had one request sent
2736 #                       r1.state <= RELOAD_WAIT_ACK;
2737 #                       r1.write_tag <= '1';
2738                             # Track that we had one request sent
2739                             sync += r1.state.eq(State.RELOAD_WAIT_ACK)
2740                             sync += r1.write_tag.eq(1)
2741
2742 #                   when OP_LOAD_NC =>
2743                         with m.Case(Op.OP_LOAD_NC):
2744 #                       r1.wb.cyc <= '1';
2745 #                       r1.wb.stb <= '1';
2746 #                       r1.wb.we <= '0';
2747 #                       r1.state <= NC_LOAD_WAIT_ACK;
2748                             sync += r1.wb.cyc.eq(1)
2749                             sync += r1.wb.stb.eq(1)
2750                             sync += r1.wb.we.eq(0)
2751                             sync += r1.state.eq(State.NC_LOAD_WAIT_ACK)
2752
2753 #                     when OP_STORE_HIT | OP_STORE_MISS =>
2754                         with m.Case(Op.OP_STORE_HIT | Op.OP_STORE_MISS):
2755 #                         if req.dcbz = '0' then
2756                             with m.If(~req.bcbz):
2757 #                             r1.state <= STORE_WAIT_ACK;
2758 #                             r1.acks_pending <= to_unsigned(1, 3);
2759 #                             r1.full <= '0';
2760 #                             r1.slow_valid <= '1';
2761                                 sync += r1.state.eq(State.STORE_WAIT_ACK)
2762                                 sync += r1.acks_pending.eq(
2763                                          '''TODO to_unsignes(1,3)'''
2764                                         )
2765                                 sync += r1.full.eq(0)
2766                                 sync += r1.slow_valid.eq(1)
2767
2768 #                             if req.mmu_req = '0' then
2769                                 with m.If(~req.mmu_req):
2770 #                                 r1.ls_valid <= '1';
2771                                     sync += r1.ls_valid.eq(1)
2772 #                             else
2773                                 with m.Else():
2774 #                                 r1.mmu_done <= '1';
2775                                     sync += r1.mmu_done.eq(1)
2776 #                             end if;
2777
2778 #                             if req.op = OP_STORE_HIT then
2779                                 with m.If(req.op == Op.OP_STORE_HIT):
2780 #                                 r1.write_bram <= '1';
2781                                     sync += r1.write_bram.eq(1)
2782 #                             end if;
2783
2784 #                         else
2785                             with m.Else():
2786 #                             -- dcbz is handled much like a load
2787 #                             -- miss except that we are writing
2788 #                             -- to memory instead of reading
2789 #                             r1.state <= RELOAD_WAIT_ACK;
2790                                 # dcbz is handled much like a load
2791                                 # miss except that we are writing
2792                                 # to memory instead of reading
2793                                 sync += r1.state.eq(Op.RELOAD_WAIT_ACK)
2794
2795 #                             if req.op = OP_STORE_MISS then
2796                                 with m.If(req.op == Op.OP_STORE_MISS):
2797 #                                 r1.write_tag <= '1';
2798                                     sync += r1.write_tag.eq(1)
2799 #                             end if;
2800 #                         end if;
2801
2802 #                         r1.wb.we <= '1';
2803 #                         r1.wb.cyc <= '1';
2804 #                         r1.wb.stb <= '1';
2805                             sync += r1.wb.we.eq(1)
2806                             sync += r1.wb.cyc.eq(1)
2807                             sync += r1.wb.stb.eq(1)
2808
2809 #                   -- OP_NONE and OP_BAD do nothing
2810 #                   -- OP_BAD & OP_STCX_FAIL were handled above already
2811 #                   when OP_NONE =>
2812 #                     when OP_BAD =>
2813 #                     when OP_STCX_FAIL =>
2814                         # OP_NONE and OP_BAD do nothing
2815                         # OP_BAD & OP_STCX_FAIL were handled above already
2816                         with m.Case(Op.OP_NONE):
2817                             pass
2818
2819                         with m.Case(OP_BAD):
2820                             pass
2821
2822                         with m.Case(OP_STCX_FAIL):
2823                             pass
2824 #                   end case;
2825
2826 #                 when RELOAD_WAIT_ACK =>
2827                     with m.Case(State.RELOAD_WAIT_ACK):
2828 #                     -- Requests are all sent if stb is 0
2829                         # Requests are all sent if stb is 0
2830                         sync += stbs_done.eq(~r1.wb.stb)
2831 #                   stbs_done := r1.wb.stb = '0';
2832
2833 #                   -- If we are still sending requests,
2834 #                   -- was one accepted?
2835 #                   if wishbone_in.stall = '0' and not stbs_done then
2836                         # If we are still sending requests,
2837                         # was one accepted?
2838                         with m.If(~wb_in.stall & ~stbs_done):
2839 #                       -- That was the last word ? We are done sending.
2840 #                       -- Clear stb and set stbs_done so we can handle
2841 #                       -- an eventual last ack on the same cycle.
2842 #                       if is_last_row_addr(r1.wb.adr, r1.end_row_ix) then
2843                             # That was the last word ? We are done sending.
2844                             # Clear stb and set stbs_done so we can handle
2845                             # an eventual last ack on the same cycle.
2846                             with m.If(is_last_row_addr(
2847                                       r1.wb.adr, r1.end_row_ix)):
2848 #                           r1.wb.stb <= '0';
2849 #                           stbs_done := true;
2850                                 sync += r1.wb.stb.eq(0)
2851                                 sync += stbs_done.eq(0)
2852 #                       end if;
2853
2854 #                       -- Calculate the next row address
2855 #                       r1.wb.adr <= next_row_addr(r1.wb.adr);
2856                             # Calculate the next row address
2857                             sync += r1.wb.adr.eq(next_row_addr(r1.wb.adr))
2858 #                   end if;
2859
2860 #                   -- Incoming acks processing
2861 #                     r1.forward_valid1 <= wishbone_in.ack;
2862                         # Incoming acks processing
2863                         sync += r1.forward_valid1.eq(wb_in.ack)
2864
2865 #                   if wishbone_in.ack = '1' then
2866                         with m.If(wb_in.ack):
2867 #                         r1.rows_valid(
2868 #                          r1.store_row mod ROW_PER_LINE
2869 #                         ) <= '1';
2870                             sync += r1.rows_valid[
2871                                      r1.store_row % ROW_PER_LINE
2872                                     ].eq(1)
2873
2874 #                         -- If this is the data we were looking for,
2875 #                         -- we can complete the request next cycle.
2876 #                         -- Compare the whole address in case the
2877 #                         -- request in r1.req is not the one that
2878 #                         -- started this refill.
2879 #                       if r1.full = '1' and r1.req.same_tag = '1'
2880 #                        and ((r1.dcbz = '1' and r1.req.dcbz = '1')
2881 #                        or (r1.dcbz = '0' and r1.req.op = OP_LOAD_MISS))
2882 #                        and r1.store_row = get_row(r1.req.real_addr) then
2883                             # If this is the data we were looking for,
2884                             # we can complete the request next cycle.
2885                             # Compare the whole address in case the
2886                             # request in r1.req is not the one that
2887                             # started this refill.
2888                             with m.If(r1.full & r1.req.same_tag &
2889                                       ((r1.dcbz & r1.req.dcbz)
2890                                        (~r1.dcbz &
2891                                         r1.req.op == Op.OP_LOAD_MISS)
2892                                        ) &
2893                                        r1.store_row
2894                                        == get_row(r1.req.real_addr):
2895 #                             r1.full <= '0';
2896 #                             r1.slow_valid <= '1';
2897                                 sync += r1.full.eq(0)
2898                                 sync += r1.slow_valid.eq(1)
2899
2900 #                             if r1.mmu_req = '0' then
2901                                     with m.If(~r1.mmu_req):
2902 #                                 r1.ls_valid <= '1';
2903                                         sync += r1.ls_valid.eq(1)
2904 #                             else
2905                                     with m.Else():
2906 #                                 r1.mmu_done <= '1';
2907                                         sync += r1.mmu_done.eq(1)
2908 #                             end if;
2909 #                             r1.forward_sel <= (others => '1');
2910 #                             r1.use_forward1 <= '1';
2911                                 sync += r1.forward_sel.eq(1)
2912                                 sync += r1.use_forward1.eq(1)
2913 #                       end if;
2914
2915 #                       -- Check for completion
2916 #                       if stbs_done and is_last_row(r1.store_row,
2917 #                        r1.end_row_ix) then
2918                             # Check for completion
2919                             with m.If(stbs_done &
2920                                       is_last_row(r1.store_row,
2921                                       r1.end_row_ix)):
2922
2923 #                           -- Complete wishbone cycle
2924 #                           r1.wb.cyc <= '0';
2925                                 # Complete wishbone cycle
2926                                 sync += r1.wb.cyc.eq(0)
2927
2928 #                           -- Cache line is now valid
2929 #                           cache_valids(r1.store_index)(
2930 #                            r1.store_way
2931 #                           ) <= '1';
2932                                 # Cache line is now valid
2933                                 sync += cache_valid_bits[
2934                                          r1.store_index
2935                                         ][r1.store_way].eq(1)
2936
2937 #                           r1.state <= IDLE;
2938                                 sync += r1.state.eq(State.IDLE)
2939 #                       end if;
2940
2941 #                       -- Increment store row counter
2942 #                       r1.store_row <= next_row(r1.store_row);
2943                             # Increment store row counter
2944                             sync += r1.store_row.eq(next_row(r1.store_row))
2945 #                   end if;
2946
2947 #                 when STORE_WAIT_ACK =>
2948                     with m.Case(State.STORE_WAIT_ACK):
2949 #                     stbs_done := r1.wb.stb = '0';
2950 #                     acks := r1.acks_pending;
2951                         sync += stbs_done.eq(~r1.wb.stb)
2952                         sync += acks.eq(r1.acks_pending)
2953
2954 #                     if r1.inc_acks /= r1.dec_acks then
2955                         with m.If(r1.inc_acks != r1.dec_acks):
2956
2957 #                         if r1.inc_acks = '1' then
2958                             with m.If(r1.inc_acks):
2959 #                             acks := acks + 1;
2960                                 sync += acks.eq(acks + 1)
2961
2962 #                         else
2963                             with m.Else():
2964 #                             acks := acks - 1;
2965                                 sync += acks.eq(acks - 1)
2966 #                         end if;
2967 #                     end if;
2968
2969 #                     r1.acks_pending <= acks;
2970                         sync += r1.acks_pending.eq(acks)
2971
2972 #                     -- Clear stb when slave accepted request
2973 #                     if wishbone_in.stall = '0' then
2974                         # Clear stb when slave accepted request
2975                         with m.If(~wb_in.stall):
2976 #                         -- See if there is another store waiting
2977 #                         -- to be done which is in the same real page.
2978 #                         if req.valid = '1' then
2979                             # See if there is another store waiting
2980                             # to be done which is in the same real page.
2981                             with m.If(req.valid):
2982 #                             r1.wb.adr(
2983 #                              SET_SIZE_BITS - 1 downto 0
2984 #                             ) <= req.real_addr(
2985 #                              SET_SIZE_BITS - 1 downto 0
2986 #                             );
2987 #                             r1.wb.dat <= req.data;
2988 #                             r1.wb.sel <= req.byte_sel;
2989                                 sync += r1.wb.adr[0:SET_SIZE_BITS].eq(
2990                                          req.real_addr[0:SET_SIZE_BITS]
2991                                         )
2992 #                         end if;
2993
2994 #                         if acks < 7 and req.same_tag = '1'
2995 #                          and (req.op = OP_STORE_MISS
2996 #                          or req.op = OP_STORE_HIT) then
2997                             with m.Elif(acks < 7 & req.same_tag &
2998                                         (req.op == Op.Op_STORE_MISS
2999                                          | req.op == Op.OP_SOTRE_HIT)):
3000 #                             r1.wb.stb <= '1';
3001 #                             stbs_done := false;
3002                                 sync += r1.wb.stb.eq(1)
3003                                 sync += stbs_done.eq(0)
3004
3005 #                             if req.op = OP_STORE_HIT then
3006                                 with m.If(req.op == Op.OP_STORE_HIT):
3007 #                                 r1.write_bram <= '1';
3008                                     sync += r1.write_bram.eq(1)
3009 #                             end if;
3010 #                             r1.full <= '0';
3011 #                             r1.slow_valid <= '1';
3012                                 sync += r1.full.eq(0)
3013                                 sync += r1.slow_valid.eq(1)
3014
3015 #                             -- Store requests never come from the MMU
3016 #                             r1.ls_valid <= '1';
3017 #                             stbs_done := false;
3018 #                             r1.inc_acks <= '1';
3019                                 # Store request never come from the MMU
3020                                 sync += r1.ls_valid.eq(1)
3021                                 sync += stbs_done.eq(0)
3022                                 sync += r1.inc_acks.eq(1)
3023 #                         else
3024                             with m.Else():
3025 #                             r1.wb.stb <= '0';
3026 #                             stbs_done := true;
3027                                 sync += r1.wb.stb.eq(0)
3028                                 sync += stbs_done.eq(1)
3029 #                         end if;
3030 #                   end if;
3031
3032 #                   -- Got ack ? See if complete.
3033 #                   if wishbone_in.ack = '1' then
3034                         # Got ack ? See if complete.
3035                         with m.If(wb_in.ack):
3036 #                         if stbs_done and acks = 1 then
3037                             with m.If(stbs_done & acks)
3038 #                             r1.state <= IDLE;
3039 #                             r1.wb.cyc <= '0';
3040 #                             r1.wb.stb <= '0';
3041                                 sync += r1.state.eq(State.IDLE)
3042                                 sync += r1.wb.cyc.eq(0)
3043                                 sync += r1.wb.stb.eq(0)
3044 #                         end if;
3045 #                         r1.dec_acks <= '1';
3046                             sync += r1.dec_acks.eq(1)
3047 #                   end if;
3048
3049 #                 when NC_LOAD_WAIT_ACK =>
3050                     with m.Case(State.NC_LOAD_WAIT_ACK):
3051 #                   -- Clear stb when slave accepted request
3052 #                     if wishbone_in.stall = '0' then
3053                         # Clear stb when slave accepted request
3054                         with m.If(~wb_in.stall):
3055 #                       r1.wb.stb <= '0';
3056                             sync += r1.wb.stb.eq(0)
3057 #                   end if;
3058
3059 #                   -- Got ack ? complete.
3060 #                   if wishbone_in.ack = '1' then
3061                         # Got ack ? complete.
3062                         with m.If(wb_in.ack):
3063 #                         r1.state <= IDLE;
3064 #                         r1.full <= '0';
3065 #                         r1.slow_valid <= '1';
3066                             sync += r1.state.eq(State.IDLE)
3067                             sync += r1.full.eq(0)
3068                             sync += r1.slow_valid.eq(1)
3069
3070 #                         if r1.mmu_req = '0' then
3071                             with m.If(~r1.mmu_req):
3072 #                             r1.ls_valid <= '1';
3073                                 sync += r1.ls_valid.eq(1)
3074
3075 #                         else
3076                             with m.Else():
3077 #                             r1.mmu_done <= '1';
3078                                 sync += r1.mmu_done.eq(1)
3079 #                         end if;
3080
3081 #                         r1.forward_sel <= (others => '1');
3082 #                         r1.use_forward1 <= '1';
3083 #                         r1.wb.cyc <= '0';
3084 #                         r1.wb.stb <= '0';
3085                             sync += r1.forward_sel.eq(1)
3086                             sync += r1.use_forward1.eq(1)
3087                             sync += r1.wb.cyc.eq(0)
3088                             sync += r1.wb.stb.eq(0)
3089 #                   end if;
3090 #                 end case;
3091 #           end if;
3092 #       end if;
3093 #     end process;
3094
3095 #     dc_log: if LOG_LENGTH > 0 generate
3096 # TODO learn how to tranlate vhdl generate into nmigen
3097 class DcacheLog(Elaborate):
3098     def __init__(self):
3099         pass
3100
3101     def elaborate(self, platform):
3102         m = Module()
3103
3104         comb = m.d.comb
3105         sync = m.d.sync
3106
3107 #         signal log_data : std_ulogic_vector(19 downto 0);
3108         log_data = Signal(20)
3109
3110         comb += log_data
3111
3112 #     begin
3113 #         dcache_log: process(clk)
3114 #         begin
3115 #             if rising_edge(clk) then
3116 #                 log_data <= r1.wb.adr(5 downto 3) &
3117 #                             wishbone_in.stall &
3118 #                             wishbone_in.ack &
3119 #                             r1.wb.stb & r1.wb.cyc &
3120 #                             d_out.error &
3121 #                             d_out.valid &
3122 #                             std_ulogic_vector(
3123 #                              to_unsigned(op_t'pos(req_op), 3)) &
3124 #                             stall_out &
3125 #                             std_ulogic_vector(
3126 #                              to_unsigned(tlb_hit_way, 3)) &
3127 #                             valid_ra &
3128 #                             std_ulogic_vector(
3129 #                              to_unsigned(state_t'pos(r1.state), 3));
3130         sync += log_data.eq(Cat(
3131                  Const(r1.state, 3), valid_ra, Const(tlb_hit_way, 3),
3132                  stall_out, Const(req_op, 3), d_out.valid, d_out.error,
3133                  r1.wb.cyc, r1.wb.stb, wb_in.ack, wb_in.stall,
3134                  r1.wb.adr[3:6]
3135                 ))
3136 #             end if;
3137 #         end process;
3138 #         log_out <= log_data;
3139     # TODO ??? I am very confused need help
3140     comb += log_out.eq(log_data)
3141 #     end generate;
3142 # end;