src/soc/experiment/dcache.py

   1 """Dcache
   2
   3 based on Anton Blanchard microwatt dcache.vhdl
   4
   5 """
   6
   7 from enum import Enum, unique
   8
   9 from nmigen import Module, Signal, Elaboratable,
  10                    Cat, Repl
  11 from nmigen.cli import main
  12 from nmigen.iocontrol import RecordObject
  13 from nmigen.util import log2_int
  14
  15 from experiment.mem_types import LoadStore1ToDcacheType,
  16                                  DcacheToLoadStore1Type,
  17                                  MmuToDcacheType,
  18                                  DcacheToMmuType
  19
  20 from experiment.wb_types import WB_ADDR_BITS, WB_DATA_BITS, WB_SEL_BITS,
  21                                 WBAddrType, WBDataType, WBSelType,
  22                                 WbMasterOut, WBSlaveOut, WBMasterOutVector,
  23                                 WBSlaveOutVector, WBIOMasterOut,
  24                                 WBIOSlaveOut
  25
  26 # --
  27 # -- Set associative dcache write-through
  28 # --
  29 # -- TODO (in no specific order):
  30 # --
  31 # -- * See list in icache.vhdl
  32 # -- * Complete load misses on the cycle when WB data comes instead of
  33 # --   at the end of line (this requires dealing with requests coming in
  34 # --   while not idle...)
  35 # --
  36 # library ieee;
  37 # use ieee.std_logic_1164.all;
  38 # use ieee.numeric_std.all;
  39 #
  40 # library work;
  41 # use work.utils.all;
  42 # use work.common.all;
  43 # use work.helpers.all;
  44 # use work.wishbone_types.all;
  45 #
  46 # entity dcache is
  47 class Dcache(Elaboratable):
  48 #     generic (
  49 #         -- Line size in bytes
  50 #         LINE_SIZE : positive := 64;
  51 #         -- Number of lines in a set
  52 #         NUM_LINES : positive := 32;
  53 #         -- Number of ways
  54 #         NUM_WAYS  : positive := 4;
  55 #         -- L1 DTLB entries per set
  56 #         TLB_SET_SIZE : positive := 64;
  57 #         -- L1 DTLB number of sets
  58 #         TLB_NUM_WAYS : positive := 2;
  59 #         -- L1 DTLB log_2(page_size)
  60 #         TLB_LG_PGSZ : positive := 12;
  61 #         -- Non-zero to enable log data collection
  62 #         LOG_LENGTH : natural := 0
  63 #         );
  64     def __init__(self):
  65         # Line size in bytes
  66         self.LINE_SIZE = 64
  67         # Number of lines in a set
  68         self.NUM_LINES = 32
  69         # Number of ways
  70         self.NUM_WAYS = 4
  71         # L1 DTLB entries per set
  72         self.TLB_SET_SIZE = 64
  73         # L1 DTLB number of sets
  74         self.TLB_NUM_WAYS = 2
  75         # L1 DTLB log_2(page_size)
  76         self.TLB_LG_PGSZ = 12
  77         # Non-zero to enable log data collection
  78         self.LOG_LENGTH = 0
  79 #     port (
  80 #         clk          : in std_ulogic;
  81 #         rst          : in std_ulogic;
  82 #
  83 #         d_in         : in Loadstore1ToDcacheType;
  84 #         d_out        : out DcacheToLoadstore1Type;
  85 #
  86 #         m_in         : in MmuToDcacheType;
  87 #         m_out        : out DcacheToMmuType;
  88 #
  89 #       stall_out    : out std_ulogic;
  90 #
  91 #         wishbone_out : out wishbone_master_out;
  92 #         wishbone_in  : in wishbone_slave_out;
  93 #
  94 #         log_out      : out std_ulogic_vector(19 downto 0)
  95 #         );
  96         self.d_in      = LoadStore1ToDcacheType()
  97         self.d_out     = DcacheToLoadStore1Type()
  98
  99         self.m_in      = MmuToDcacheType()
 100         self.m_out     = DcacheToMmuType()
 101
 102         self.stall_out = Signal()
 103
 104         self.wb_out    = WBMasterOut()
 105         self.wb_in     = WBSlaveOut()
 106
 107         self.log_out   = Signal(20)
 108 # end entity dcache;
 109
 110 # architecture rtl of dcache is
 111     def elaborate(self, platform):
 112         LINE_SIZE    = self.LINE_SIZE
 113         NUM_LINES    = self.NUM_LINES
 114         NUM_WAYS     = self.NUM_WAYS
 115         TLB_SET_SIZE = self.TLB_SET_SIZE
 116         TLB_NUM_WAYS = self.TLB_NUM_WAYS
 117         TLB_LG_PGSZ  = self.TLB_LG_PGSZ
 118         LOG_LENGTH   = self.LOG_LENGTH
 119
 120 #     -- BRAM organisation: We never access more than
 121 #     -- wishbone_data_bits at a time so to save
 122 #     -- resources we make the array only that wide, and
 123 #     -- use consecutive indices for to make a cache "line"
 124 #     --
 125 #     -- ROW_SIZE is the width in bytes of the BRAM
 126 #     -- (based on WB, so 64-bits)
 127 #     constant ROW_SIZE : natural := wishbone_data_bits / 8;
 128         # BRAM organisation: We never access more than
 129         #     -- wishbone_data_bits at a time so to save
 130         #     -- resources we make the array only that wide, and
 131         #     -- use consecutive indices for to make a cache "line"
 132         #     --
 133         #     -- ROW_SIZE is the width in bytes of the BRAM
 134         #     -- (based on WB, so 64-bits)
 135         ROW_SIZE = WB_DATA_BITS / 8;
 136
 137 #     -- ROW_PER_LINE is the number of row (wishbone
 138 #     -- transactions) in a line
 139 #     constant ROW_PER_LINE  : natural := LINE_SIZE / ROW_SIZE;
 140 #     -- BRAM_ROWS is the number of rows in BRAM needed
 141 #     -- to represent the full dcache
 142 #     constant BRAM_ROWS : natural := NUM_LINES * ROW_PER_LINE;
 143         # ROW_PER_LINE is the number of row (wishbone
 144         # transactions) in a line
 145         ROW_PER_LINE = LINE_SIZE / ROW_SIZE
 146         # BRAM_ROWS is the number of rows in BRAM needed
 147         # to represent the full dcache
 148         BRAM_ROWS = NUM_LINES * ROW_PER_LINE
 149
 150 #     -- Bit fields counts in the address
 151 #
 152 #     -- REAL_ADDR_BITS is the number of real address
 153 #     -- bits that we store
 154 #     constant REAL_ADDR_BITS : positive := 56;
 155 #     -- ROW_BITS is the number of bits to select a row
 156 #     constant ROW_BITS      : natural := log2(BRAM_ROWS);
 157 #     -- ROW_LINEBITS is the number of bits to select
 158 #     -- a row within a line
 159 #     constant ROW_LINEBITS  : natural := log2(ROW_PER_LINE);
 160 #     -- LINE_OFF_BITS is the number of bits for
 161 #     -- the offset in a cache line
 162 #     constant LINE_OFF_BITS : natural := log2(LINE_SIZE);
 163 #     -- ROW_OFF_BITS is the number of bits for
 164 #     -- the offset in a row
 165 #     constant ROW_OFF_BITS : natural := log2(ROW_SIZE);
 166 #     -- INDEX_BITS is the number if bits to
 167 #     -- select a cache line
 168 #     constant INDEX_BITS : natural := log2(NUM_LINES);
 169 #     -- SET_SIZE_BITS is the log base 2 of the set size
 170 #     constant SET_SIZE_BITS : natural := LINE_OFF_BITS
 171 #                                         + INDEX_BITS;
 172 #     -- TAG_BITS is the number of bits of
 173 #     -- the tag part of the address
 174 #     constant TAG_BITS : natural := REAL_ADDR_BITS - SET_SIZE_BITS;
 175 #     -- TAG_WIDTH is the width in bits of each way of the tag RAM
 176 #     constant TAG_WIDTH : natural := TAG_BITS + 7
 177 #                                     - ((TAG_BITS + 7) mod 8);
 178 #     -- WAY_BITS is the number of bits to select a way
 179 #     constant WAY_BITS : natural := log2(NUM_WAYS);
 180         # Bit fields counts in the address
 181
 182         # REAL_ADDR_BITS is the number of real address
 183         # bits that we store
 184         REAL_ADDR_BITS = 56
 185         # ROW_BITS is the number of bits to select a row
 186         ROW_BITS = log2_int(BRAM_ROWS)
 187         # ROW_LINE_BITS is the number of bits to select
 188         # a row within a line
 189         ROW_LINE_BITS = log2_int(ROW_PER_LINE)
 190         # LINE_OFF_BITS is the number of bits for
 191         # the offset in a cache line
 192         LINE_OFF_BITS = log2_int(LINE_SIZE)
 193         # ROW_OFF_BITS is the number of bits for
 194         # the offset in a row
 195         ROW_OFF_BITS = log2_int(ROW_SIZE)
 196         # INDEX_BITS is the number if bits to
 197         # select a cache line
 198         INDEX_BITS = log2_int(NUM_LINES)
 199         # SET_SIZE_BITS is the log base 2 of the set size
 200         SET_SIZE_BITS = LINE_OFF_BITS + INDEX_BITS
 201         # TAG_BITS is the number of bits of
 202         # the tag part of the address
 203         TAG_BITS = REAL_ADDR_BITS - SET_SIZE_BITS
 204         # TAG_WIDTH is the width in bits of each way of the tag RAM
 205         TAG_WIDTH = TAG_BITS + 7 - ((TAG_BITS + 7) % 8)
 206         # WAY_BITS is the number of bits to select a way
 207         WAY_BITS = log2_int(NUM_WAYS)
 208
 209 #     -- Example of layout for 32 lines of 64 bytes:
 210 #     --
 211 #     -- ..  tag    |index|  line  |
 212 #     -- ..         |   row   |    |
 213 #     -- ..         |     |---|    | ROW_LINEBITS  (3)
 214 #     -- ..         |     |--- - --| LINE_OFF_BITS (6)
 215 #     -- ..         |         |- --| ROW_OFF_BITS  (3)
 216 #     -- ..         |----- ---|    | ROW_BITS      (8)
 217 #     -- ..         |-----|        | INDEX_BITS    (5)
 218 #     -- .. --------|              | TAG_BITS      (45)
 219         # Example of layout for 32 lines of 64 bytes:
 220         #
 221         # ..  tag    |index|  line  |
 222         # ..         |   row   |    |
 223         # ..         |     |---|    | ROW_LINE_BITS  (3)
 224         # ..         |     |--- - --| LINE_OFF_BITS (6)
 225         # ..         |         |- --| ROW_OFF_BITS  (3)
 226         # ..         |----- ---|    | ROW_BITS      (8)
 227         # ..         |-----|        | INDEX_BITS    (5)
 228         # .. --------|              | TAG_BITS      (45)
 229
 230
 231 #     subtype row_t is integer range 0 to BRAM_ROWS-1;
 232 #     subtype index_t is integer range 0 to NUM_LINES-1;
 233 #     subtype way_t is integer range 0 to NUM_WAYS-1;
 234 #     subtype row_in_line_t is unsigned(ROW_LINE_BITS-1 downto 0);
 235         ROW         = BRAM_ROWS
 236         INDEX       = NUM_LINES
 237         WAY         = NUM_WAYS
 238         ROW_IN_LINE = ROW_LINE_BITS
 239
 240 #     -- The cache data BRAM organized as described above for each way
 241 #     subtype cache_row_t is
 242 #      std_ulogic_vector(wishbone_data_bits-1 downto 0);
 243         # The cache data BRAM organized as described above for each way
 244         CACHE_ROW   = WB_DATA_BITS
 245
 246 #     -- The cache tags LUTRAM has a row per set.
 247 #     -- Vivado is a pain and will not handle a
 248 #     -- clean (commented) definition of the cache
 249 #     -- tags as a 3d memory. For now, work around
 250 #     -- it by putting all the tags
 251 #     subtype cache_tag_t is std_logic_vector(TAG_BITS-1 downto 0);
 252         # The cache tags LUTRAM has a row per set.
 253         # Vivado is a pain and will not handle a
 254         # clean (commented) definition of the cache
 255         # tags as a 3d memory. For now, work around
 256         # it by putting all the tags
 257         CACHE_TAG   = TAG_BITS
 258
 259 #     -- type cache_tags_set_t is array(way_t) of cache_tag_t;
 260 #     -- type cache_tags_array_t is array(index_t) of cache_tags_set_t;
 261 #     constant TAG_RAM_WIDTH : natural := TAG_WIDTH * NUM_WAYS;
 262 #     subtype cache_tags_set_t is
 263 #      std_logic_vector(TAG_RAM_WIDTH-1 downto 0);
 264 #     type cache_tags_array_t is array(index_t) of cache_tags_set_t;
 265         # type cache_tags_set_t is array(way_t) of cache_tag_t;
 266         # type cache_tags_array_t is array(index_t) of cache_tags_set_t;
 267         TAG_RAM_WIDTH = TAG_WIDTH * NUM_WAYS
 268
 269         CACHE_TAG_SET = TAG_RAM_WIDTH
 270
 271         def CacheTagArray():
 272             return Array(CacheTagSet() for x in range(INDEX))
 273
 274 #     -- The cache valid bits
 275 #     subtype cache_way_valids_t is
 276 #      std_ulogic_vector(NUM_WAYS-1 downto 0);
 277 #     type cache_valids_t is array(index_t) of cache_way_valids_t;
 278 #     type row_per_line_valid_t is
 279 #      array(0 to ROW_PER_LINE - 1) of std_ulogic;
 280         # The cache valid bits
 281         CACHE_WAY_VALID_BITS = NUM_WAYS
 282
 283         def CacheValidBitsArray():
 284             return Array(CacheWayValidBits() for x in range(INDEX))
 285
 286         def RowPerLineValidArray():
 287             return Array(Signal() for x in range(ROW_PER_LINE))
 288
 289 #     -- Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
 290 #     signal cache_tags    : cache_tags_array_t;
 291 #     signal cache_tag_set : cache_tags_set_t;
 292 #     signal cache_valids  : cache_valids_t;
 293 #
 294 #     attribute ram_style : string;
 295 #     attribute ram_style of cache_tags : signal is "distributed";
 296         # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
 297         cache_tags       = CacheTagArray()
 298         cache_tag_set    = Signal(CACHE_TAG_SET)
 299         cache_valid_bits = CacheValidBitsArray()
 300
 301         # TODO attribute ram_style : string;
 302         # TODO attribute ram_style of cache_tags : signal is "distributed";
 303
 304 #     -- L1 TLB.
 305 #     constant TLB_SET_BITS : natural := log2(TLB_SET_SIZE);
 306 #     constant TLB_WAY_BITS : natural := log2(TLB_NUM_WAYS);
 307 #     constant TLB_EA_TAG_BITS : natural :=
 308 #      64 - (TLB_LG_PGSZ + TLB_SET_BITS);
 309 #     constant TLB_TAG_WAY_BITS : natural :=
 310 #      TLB_NUM_WAYS * TLB_EA_TAG_BITS;
 311 #     constant TLB_PTE_BITS : natural := 64;
 312 #     constant TLB_PTE_WAY_BITS : natural :=
 313 #      TLB_NUM_WAYS * TLB_PTE_BITS;
 314         # L1 TLB
 315         TLB_SET_BITS     = log2_int(TLB_SET_SIZE)
 316         TLB_WAY_BITS     = log2_int(TLB_NUM_WAYS)
 317         TLB_EA_TAG_BITS  = 64 - (TLB_LG_PGSZ + TLB_SET_BITS)
 318         TLB_TAG_WAY_BITS = TLB_NUM_WAYS * TLB_EA_TAG_BITS
 319         TLB_PTE_BITS     = 64
 320         TLB_PTE_WAY_BITS = TLB_NUM_WAYS * TLB_PTE_BITS;
 321
 322 #     subtype tlb_way_t is integer range 0 to TLB_NUM_WAYS - 1;
 323 #     subtype tlb_index_t is integer range 0 to TLB_SET_SIZE - 1;
 324 #     subtype tlb_way_valids_t is
 325 #      std_ulogic_vector(TLB_NUM_WAYS-1 downto 0);
 326 #     type tlb_valids_t is
 327 #      array(tlb_index_t) of tlb_way_valids_t;
 328 #     subtype tlb_tag_t is
 329 #      std_ulogic_vector(TLB_EA_TAG_BITS - 1 downto 0);
 330 #     subtype tlb_way_tags_t is
 331 #      std_ulogic_vector(TLB_TAG_WAY_BITS-1 downto 0);
 332 #     type tlb_tags_t is
 333 #      array(tlb_index_t) of tlb_way_tags_t;
 334 #     subtype tlb_pte_t is
 335 #      std_ulogic_vector(TLB_PTE_BITS - 1 downto 0);
 336 #     subtype tlb_way_ptes_t is
 337 #      std_ulogic_vector(TLB_PTE_WAY_BITS-1 downto 0);
 338 #     type tlb_ptes_t is array(tlb_index_t) of tlb_way_ptes_t;
 339 #     type hit_way_set_t is array(tlb_way_t) of way_t;
 340         TLB_WAY = TLB_NUM_WAYS
 341
 342         TLB_INDEX = TLB_SET_SIZE
 343
 344         TLB_WAY_VALID_BITS = TLB_NUM_WAYS
 345
 346         def TLBValidBitsArray():
 347             return Array(
 348              Signal(TLB_WAY_VALID_BITS) for x in range(TLB_SET_SIZE)
 349             )
 350
 351         TLB_TAG = TLB_EA_TAG_BITS
 352
 353         TLB_WAY_TAGS = TLB_TAG_WAY_BITS
 354
 355         def TLBTagsArray():
 356             return Array(
 357              Signal(TLB_WAY_TAGS) for x in range (TLB_SET_SIZE)
 358             )
 359
 360         TLB_PTE = TLB_PTE_BITS
 361
 362         TLB_WAY_PTES = TLB_PTE_WAY_BITS
 363
 364         def TLBPtesArray():
 365             return Array(
 366              Signal(TLB_WAY_PTES) for x in range(TLB_SET_SIZE)
 367             )
 368
 369         def HitWaySet():
 370             return Array(Signal(WAY) for x in range(TLB_NUM_WAYS))
 371
 372 #     signal dtlb_valids : tlb_valids_t;
 373 #     signal dtlb_tags : tlb_tags_t;
 374 #     signal dtlb_ptes : tlb_ptes_t;
 375
 376 """note: these are passed to nmigen.hdl.Memory as "attributes".  don't
 377    know how, just that they are.
 378 """
 379 #     attribute ram_style of dtlb_tags : signal is "distributed";
 380 #     attribute ram_style of dtlb_ptes : signal is "distributed";
 381         dtlb_valids = TLBValidBitsArray()
 382         dtlb_tags   = TLBTagsArray()
 383         dtlb_ptes   = TLBPtesArray()
 384         # TODO attribute ram_style of dtlb_tags : signal is "distributed";
 385         # TODO attribute ram_style of dtlb_ptes : signal is "distributed";
 386
 387
 388 #     -- Record for storing permission, attribute, etc. bits from a PTE
 389 #     type perm_attr_t is record
 390 #         reference : std_ulogic;
 391 #         changed   : std_ulogic;
 392 #         nocache   : std_ulogic;
 393 #         priv      : std_ulogic;
 394 #         rd_perm   : std_ulogic;
 395 #         wr_perm   : std_ulogic;
 396 #     end record;
 397         # Record for storing permission, attribute, etc. bits from a PTE
 398         class PermAttr(RecordObject):
 399             def __init__(self):
 400                 super().__init__()
 401                 self.reference = Signal()
 402                 self.changed   = Signal()
 403                 self.nocache   = Signal()
 404                 self.priv      = Signal()
 405                 self.rd_perm   = Signal()
 406                 self.wr_perm   = Signal()
 407
 408 #     function extract_perm_attr(
 409 #      pte : std_ulogic_vector(TLB_PTE_BITS - 1 downto 0))
 410 #      return perm_attr_t is
 411 #         variable pa : perm_attr_t;
 412 #     begin
 413 #         pa.reference := pte(8);
 414 #         pa.changed := pte(7);
 415 #         pa.nocache := pte(5);
 416 #         pa.priv := pte(3);
 417 #         pa.rd_perm := pte(2);
 418 #         pa.wr_perm := pte(1);
 419 #         return pa;
 420 #     end;
 421         def extract_perm_attr(pte=Signal(TLB_PTE_BITS)):
 422             pa = PermAttr()
 423             pa.reference = pte[8]
 424             pa.changed   = pte[7]
 425             pa.nocache   = pte[5]
 426             pa.priv      = pte[3]
 427             pa.rd_perm   = pte[2]
 428             pa.wr_perm   = pte[1]
 429             return pa;
 430
 431 #     constant real_mode_perm_attr : perm_attr_t :=
 432 #      (nocache => '0', others => '1');
 433         REAL_MODE_PERM_ATTR = PermAttr()
 434         REAL_MODE_PERM_ATTR.reference = 1
 435         REAL_MODE_PERM_ATTR.changed   = 1
 436         REAL_MODE_PERM_ATTR.priv      = 1
 437         REAL_MODE_PERM_ATTR.rd_perm   = 1
 438         REAL_MODE_PERM_ATTR.wr_perm   = 1
 439
 440 #     -- Type of operation on a "valid" input
 441 #     type op_t is
 442 #      (
 443 #       OP_NONE,
 444 #       OP_BAD,        -- NC cache hit, TLB miss, prot/RC failure
 445 #       OP_STCX_FAIL,  -- conditional store w/o reservation
 446 #       OP_LOAD_HIT,   -- Cache hit on load
 447 #       OP_LOAD_MISS,  -- Load missing cache
 448 #       OP_LOAD_NC,    -- Non-cachable load
 449 #       OP_STORE_HIT,  -- Store hitting cache
 450 #       OP_STORE_MISS  -- Store missing cache
 451 #      );
 452         # Type of operation on a "valid" input
 453         @unique
 454         class OP(Enum):
 455           OP_NONE       = 0
 456           OP_BAD        = 1 # NC cache hit, TLB miss, prot/RC failure
 457           OP_STCX_FAIL  = 2 # conditional store w/o reservation
 458           OP_LOAD_HIT   = 3 # Cache hit on load
 459           OP_LOAD_MISS  = 4 # Load missing cache
 460           OP_LOAD_NC    = 5 # Non-cachable load
 461           OP_STORE_HIT  = 6 # Store hitting cache
 462           OP_STORE_MISS = 7 # Store missing cache
 463
 464 #     -- Cache state machine
 465 #     type state_t is
 466 #      (
 467 #       IDLE,            -- Normal load hit processing
 468 #       RELOAD_WAIT_ACK, -- Cache reload wait ack
 469 #       STORE_WAIT_ACK,  -- Store wait ack
 470 #       NC_LOAD_WAIT_ACK -- Non-cachable load wait ack
 471 #      );
 472         # Cache state machine
 473         @unique
 474         class State(Enum):
 475             IDLE             = 0 # Normal load hit processing
 476             RELOAD_WAIT_ACK  = 1 # Cache reload wait ack
 477             STORE_WAIT_ACK   = 2 # Store wait ack
 478             NC_LOAD_WAIT_ACK = 3 # Non-cachable load wait ack
 479
 480 #     -- Dcache operations:
 481 #     --
 482 #     -- In order to make timing, we use the BRAMs with
 483 #     -- an output buffer, which means that the BRAM
 484 #     -- output is delayed by an extra cycle.
 485 #     --
 486 #     -- Thus, the dcache has a 2-stage internal pipeline
 487 #     -- for cache hits with no stalls.
 488 #     --
 489 #     -- All other operations are handled via stalling
 490 #     -- in the first stage.
 491 #     --
 492 #     -- The second stage can thus complete a hit at the same
 493 #     -- time as the first stage emits a stall for a complex op.
 494 #
 495 #     -- Stage 0 register, basically contains just the latched request
 496 #     type reg_stage_0_t is record
 497 #         req   : Loadstore1ToDcacheType;
 498 #         tlbie : std_ulogic;
 499 #         doall : std_ulogic;
 500 #         tlbld : std_ulogic;
 501 #         mmu_req : std_ulogic;   -- indicates source of request
 502 #     end record;
 503 # Dcache operations:
 504 #
 505 # In order to make timing, we use the BRAMs with
 506 # an output buffer, which means that the BRAM
 507 # output is delayed by an extra cycle.
 508 #
 509 # Thus, the dcache has a 2-stage internal pipeline
 510 # for cache hits with no stalls.
 511 #
 512 # All other operations are handled via stalling
 513 # in the first stage.
 514 #
 515 # The second stage can thus complete a hit at the same
 516 # time as the first stage emits a stall for a complex op.
 517 #
 518         # Stage 0 register, basically contains just the latched request
 519         class RegStage0(RecordObject):
 520             def __init__(self):
 521                 super().__init__()
 522                 self.req     = LoadStore1ToDcacheType()
 523                 self.tlbie   = Signal()
 524                 self.doall   = Signal()
 525                 self.tlbld   = Signal()
 526                 self.mmu_req = Signal() # indicates source of request
 527
 528 #     signal r0 : reg_stage_0_t;
 529 #     signal r0_full : std_ulogic;
 530         r0      = RegStage0()
 531         r0_full = Signal()
 532
 533 #     type mem_access_request_t is record
 534 #         op        : op_t;
 535 #         valid     : std_ulogic;
 536 #         dcbz      : std_ulogic;
 537 #         real_addr : std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0);
 538 #         data      : std_ulogic_vector(63 downto 0);
 539 #         byte_sel  : std_ulogic_vector(7 downto 0);
 540 #         hit_way   : way_t;
 541 #         same_tag  : std_ulogic;
 542 #         mmu_req   : std_ulogic;
 543 #     end record;
 544         class MemAccessRequest(RecordObject):
 545             def __init__(self):
 546                 super().__init__()
 547                 self.op        = Op()
 548                 self.valid     = Signal()
 549                 self.dcbz      = Signal()
 550                 self.real_addr = Signal(REAL_ADDR_BITS)
 551                 self.data      = Signal(64)
 552                 self.byte_sel  = Signal(8)
 553                 self.hit_way   = Signal(WAY)
 554                 self.same_tag  = Signal()
 555                 self.mmu_req   = Signal()
 556
 557 #     -- First stage register, contains state for stage 1 of load hits
 558 #     -- and for the state machine used by all other operations
 559 #     type reg_stage_1_t is record
 560 #         -- Info about the request
 561 #         full    : std_ulogic; -- have uncompleted request
 562 #         mmu_req : std_ulogic; -- request is from MMU
 563 #         req     : mem_access_request_t;
 564 #
 565 #         -- Cache hit state
 566 #         hit_way        : way_t;
 567 #         hit_load_valid : std_ulogic;
 568 #         hit_index      : index_t;
 569 #         cache_hit      : std_ulogic;
 570 #
 571 #         -- TLB hit state
 572 #         tlb_hit       : std_ulogic;
 573 #         tlb_hit_way   : tlb_way_t;
 574 #         tlb_hit_index : tlb_index_t;
 575 #
 576 #         -- 2-stage data buffer for data forwarded from writes to reads
 577 #         forward_data1  : std_ulogic_vector(63 downto 0);
 578 #         forward_data2  : std_ulogic_vector(63 downto 0);
 579 #         forward_sel1   : std_ulogic_vector(7 downto 0);
 580 #         forward_valid1 : std_ulogic;
 581 #         forward_way1   : way_t;
 582 #         forward_row1   : row_t;
 583 #         use_forward1   : std_ulogic;
 584 #         forward_sel    : std_ulogic_vector(7 downto 0);
 585 #
 586 #         -- Cache miss state (reload state machine)
 587 #         state            : state_t;
 588 #         dcbz             : std_ulogic;
 589 #         write_bram       : std_ulogic;
 590 #         write_tag        : std_ulogic;
 591 #         slow_valid       : std_ulogic;
 592 #         wb               : wishbone_master_out;
 593 #         reload_tag       : cache_tag_t;
 594 #         store_way        : way_t;
 595 #         store_row        : row_t;
 596 #         store_index      : index_t;
 597 #         end_row_ix       : row_in_line_t;
 598 #         rows_valid       : row_per_line_valid_t;
 599 #         acks_pending     : unsigned(2 downto 0);
 600 #         inc_acks         : std_ulogic;
 601 #         dec_acks         : std_ulogic;
 602 #
 603 #         -- Signals to complete (possibly with error)
 604 #         ls_valid         : std_ulogic;
 605 #         ls_error         : std_ulogic;
 606 #         mmu_done         : std_ulogic;
 607 #         mmu_error        : std_ulogic;
 608 #         cache_paradox    : std_ulogic;
 609 #
 610 #         -- Signal to complete a failed stcx.
 611 #         stcx_fail        : std_ulogic;
 612 #     end record;
 613 # First stage register, contains state for stage 1 of load hits
 614 # and for the state machine used by all other operations
 615         class RegStage1(RecordObject):
 616             def __init__(self):
 617                 super().__init__()
 618                 # Info about the request
 619                 self.full             = Signal() # have uncompleted request
 620                 self.mmu_req          = Signal() # request is from MMU
 621                 self.req              = MemAccessRequest()
 622
 623                 # Cache hit state
 624                 self.hit_way          = Signal(WAY)
 625                 self.hit_load_valid   = Signal()
 626                 self.hit_index        = Signal(INDEX)
 627                 self.cache_hit        = Signal()
 628
 629                 # TLB hit state
 630                 self.tlb_hit          = Signal()
 631                 self.tlb_hit_way      = Signal(TLB_WAY)
 632                 self.tlb_hit_index    = Signal(TLB_SET_SIZE)
 633                 self.
 634                 # 2-stage data buffer for data forwarded from writes to reads
 635                 self.forward_data1    = Signal(64)
 636                 self.forward_data2    = Signal(64)
 637                 self.forward_sel1     = Signal(8)
 638                 self.forward_valid1   = Signal()
 639                 self.forward_way1     = Signal(WAY)
 640                 self.forward_row1     = Signal(ROW)
 641                 self.use_forward1     = Signal()
 642                 self.forward_sel      = Signal(8)
 643
 644                 # Cache miss state (reload state machine)
 645                 self.state            = State()
 646                 self.dcbz             = Signal()
 647                 self.write_bram       = Signal()
 648                 self.write_tag        = Signal()
 649                 self.slow_valid       = Signal()
 650                 self.wb               = WishboneMasterOut()
 651                 self.reload_tag       = Signal(CACHE_TAG)
 652                 self.store_way        = Signal(WAY)
 653                 self.store_row        = Signal(ROW)
 654                 self.store_index      = Signal(INDEX)
 655                 self.end_row_ix       = Signal(ROW_IN_LINE)
 656                 self.rows_valid       = RowPerLineValidArray()
 657                 self.acks_pending     = Signal(3)
 658                 self.inc_acks         = Signal()
 659                 self.dec_acks         = Signal()
 660
 661                 # Signals to complete (possibly with error)
 662                 self.ls_valid         = Signal()
 663                 self.ls_error         = Signal()
 664                 self.mmu_done         = Signal()
 665                 self.mmu_error        = Signal()
 666                 self.cache_paradox    = Signal()
 667
 668                 # Signal to complete a failed stcx.
 669                 self.stcx_fail        = Signal()
 670
 671 #     signal r1 : reg_stage_1_t;
 672         r1 = RegStage1()
 673
 674 #     -- Reservation information
 675 #     --
 676 #     type reservation_t is record
 677 #         valid : std_ulogic;
 678 #         addr  : std_ulogic_vector(63 downto LINE_OFF_BITS);
 679 #     end record;
 680 # Reservation information
 681
 682         class Reservation(RecordObject):
 683             def __init__(self):
 684                 super().__init__()
 685                 valid = Signal()
 686                 # TODO LINE_OFF_BITS is 6
 687                 addr  = Signal(63 downto LINE_OFF_BITS)
 688
 689 #     signal reservation : reservation_t;
 690         reservation = Reservation()
 691
 692 #     -- Async signals on incoming request
 693 #     signal req_index   : index_t;
 694 #     signal req_row     : row_t;
 695 #     signal req_hit_way : way_t;
 696 #     signal req_tag     : cache_tag_t;
 697 #     signal req_op      : op_t;
 698 #     signal req_data    : std_ulogic_vector(63 downto 0);
 699 #     signal req_same_tag : std_ulogic;
 700 #     signal req_go      : std_ulogic;
 701         # Async signals on incoming request
 702         req_index    = Signal(INDEX)
 703         req_row      = Signal(ROW)
 704         req_hit_way  = Signal(WAY)
 705         req_tag      = Signal(CACHE_TAG)
 706         req_op       = Op()
 707         req_data     = Signal(64)
 708         req_same_tag = Signal()
 709         req_go       = Signal()
 710
 711 #     signal early_req_row  : row_t;
 712 #
 713 #     signal cancel_store : std_ulogic;
 714 #     signal set_rsrv     : std_ulogic;
 715 #     signal clear_rsrv   : std_ulogic;
 716 #
 717 #     signal r0_valid   : std_ulogic;
 718 #     signal r0_stall   : std_ulogic;
 719 #
 720 #     signal use_forward1_next : std_ulogic;
 721 #     signal use_forward2_next : std_ulogic;
 722         early_req_row  = Signal(ROW)
 723
 724         cancel_store = Signal()
 725         set_rsrv     = Signal()
 726         clear_rsrv   = Signal()
 727
 728         r0_valid   = Signal()
 729         r0_stall   = Signal()
 730
 731         use_forward1_next = Signal()
 732         use_forward2_next = Signal()
 733
 734 #     -- Cache RAM interface
 735 #     type cache_ram_out_t is array(way_t) of cache_row_t;
 736 #     signal cache_out   : cache_ram_out_t;
 737         # Cache RAM interface
 738         def CacheRamOut():
 739             return Array(Signal(CACHE_ROW) for x in range(NUM_WAYS))
 740
 741         cache_out = CacheRamOut()
 742
 743 #     -- PLRU output interface
 744 #     type plru_out_t is array(index_t) of
 745 #      std_ulogic_vector(WAY_BITS-1 downto 0);
 746 #     signal plru_victim : plru_out_t;
 747 #     signal replace_way : way_t;
 748         # PLRU output interface
 749         def PLRUOut():
 750             return Array(Signal(WAY_BITS) for x in range(Index()))
 751
 752         plru_victim = PLRUOut()
 753         replace_way = Signal(WAY)
 754
 755 #     -- Wishbone read/write/cache write formatting signals
 756 #     signal bus_sel     : std_ulogic_vector(7 downto 0);
 757         # Wishbone read/write/cache write formatting signals
 758         bus_sel = Signal(8)
 759
 760 #     -- TLB signals
 761 #     signal tlb_tag_way : tlb_way_tags_t;
 762 #     signal tlb_pte_way : tlb_way_ptes_t;
 763 #     signal tlb_valid_way : tlb_way_valids_t;
 764 #     signal tlb_req_index : tlb_index_t;
 765 #     signal tlb_hit : std_ulogic;
 766 #     signal tlb_hit_way : tlb_way_t;
 767 #     signal pte : tlb_pte_t;
 768 #     signal ra : std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0);
 769 #     signal valid_ra : std_ulogic;
 770 #     signal perm_attr : perm_attr_t;
 771 #     signal rc_ok : std_ulogic;
 772 #     signal perm_ok : std_ulogic;
 773 #     signal access_ok : std_ulogic;
 774         # TLB signals
 775         tlb_tag_way   = Signal(TLB_WAY_TAGS)
 776         tlb_pte_way   = Signal(TLB_WAY_PTES)
 777         tlb_valid_way = Signal(TLB_WAY_VALID_BITS)
 778         tlb_req_index = Signal(TLB_SET_SIZE)
 779         tlb_hit       = Signal()
 780         tlb_hit_way   = Signal(TLB_WAY)
 781         pte           = Signal(TLB_PTE)
 782         ra            = Signal(REAL_ADDR_BITS)
 783         valid_ra      = Signal()
 784         perm_attr     = PermAttr()
 785         rc_ok         = Signal()
 786         perm_ok       = Signal()
 787         access_ok     = Signal()
 788
 789 #     -- TLB PLRU output interface
 790 #     type tlb_plru_out_t is array(tlb_index_t) of
 791 #      std_ulogic_vector(TLB_WAY_BITS-1 downto 0);
 792 #     signal tlb_plru_victim : tlb_plru_out_t;
 793         # TLB PLRU output interface
 794         DEF TLBPLRUOut():
 795             return Array(Signal(TLB_WAY_BITS) for x in range(TLB_SET_SIZE))
 796
 797         tlb_plru_victim = TLBPLRUOut()
 798
 799 #     -- Helper functions to decode incoming requests
 800 #
 801 #     -- Return the cache line index (tag index) for an address
 802 #     function get_index(addr: std_ulogic_vector) return index_t is
 803 #     begin
 804 #         return to_integer(
 805 #          unsigned(addr(SET_SIZE_BITS - 1 downto LINE_OFF_BITS))
 806 #         );
 807 #     end;
 808 # Helper functions to decode incoming requests
 809 #
 810         # Return the cache line index (tag index) for an address
 811         def get_index(addr):
 812             return addr[LINE_OFF_BITS:SET_SIZE_BITS]
 813
 814 #     -- Return the cache row index (data memory) for an address
 815 #     function get_row(addr: std_ulogic_vector) return row_t is
 816 #     begin
 817 #         return to_integer(
 818 #          unsigned(addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS))
 819 #         );
 820 #     end;
 821         # Return the cache row index (data memory) for an address
 822         def get_row(addr):
 823             return addr[ROW_OFF_BITS:SET_SIZE_BITS]
 824
 825 #     -- Return the index of a row within a line
 826 #     function get_row_of_line(row: row_t) return row_in_line_t is
 827 #       variable row_v : unsigned(ROW_BITS-1 downto 0);
 828 #     begin
 829 #       row_v := to_unsigned(row, ROW_BITS);
 830 #         return row_v(ROW_LINEBITS-1 downto 0);
 831 #     end;
 832         # Return the index of a row within a line
 833         def get_row_of_line(row):
 834             row_v = Signal(ROW_BITS)
 835             row_v = Signal(row)
 836             return row_v[0:ROW_LINE_BITS]
 837
 838 #     -- Returns whether this is the last row of a line
 839 #     function is_last_row_addr(addr: wishbone_addr_type;
 840 #      last: row_in_line_t) return boolean is
 841 #     begin
 842 #       return
 843 #        unsigned(addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS)) = last;
 844 #     end;
 845         # Returns whether this is the last row of a line
 846         def is_last_row_addr(addr, last):
 847             return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
 848
 849 #     -- Returns whether this is the last row of a line
 850 #     function is_last_row(row: row_t; last: row_in_line_t)
 851 #      return boolean is
 852 #     begin
 853 #         return get_row_of_line(row) = last;
 854 #     end;
 855         # Returns whether this is the last row of a line
 856         def is_last_row(row, last):
 857             return get_row_of_line(row) == last
 858
 859 #     -- Return the address of the next row in the current cache line
 860 #     function next_row_addr(addr: wishbone_addr_type)
 861 #      return std_ulogic_vector is
 862 #       variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
 863 #       variable result  : wishbone_addr_type;
 864 #     begin
 865 #       -- Is there no simpler way in VHDL to
 866 #       -- generate that 3 bits adder ?
 867 #       row_idx := addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS);
 868 #       row_idx := std_ulogic_vector(unsigned(row_idx) + 1);
 869 #       result := addr;
 870 #       result(LINE_OFF_BITS-1 downto ROW_OFF_BITS) := row_idx;
 871 #       return result;
 872 #     end;
 873         # Return the address of the next row in the current cache line
 874         def next_row_addr(addr):
 875             row_idx = Signal(ROW_LINE_BITS)
 876             result  = WBAddrType()
 877             # Is there no simpler way in VHDL to
 878             # generate that 3 bits adder ?
 879             row_idx = addr[ROW_OFF_BITS:LINE_OFF_BITS]
 880             row_idx = Signal(row_idx + 1)
 881             result = addr
 882             result[ROW_OFF_BITS:LINE_OFF_BITS] = row_idx
 883             return result
 884
 885 #     -- Return the next row in the current cache line. We use a
 886 #     -- dedicated function in order to limit the size of the
 887 #     -- generated adder to be only the bits within a cache line
 888 #     -- (3 bits with default settings)
 889 #     function next_row(row: row_t) return row_t is
 890 #        variable row_v  : std_ulogic_vector(ROW_BITS-1 downto 0);
 891 #        variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
 892 #        variable result : std_ulogic_vector(ROW_BITS-1 downto 0);
 893 #     begin
 894 #        row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
 895 #        row_idx := row_v(ROW_LINEBITS-1 downto 0);
 896 #        row_v(ROW_LINEBITS-1 downto 0) :=
 897 #         std_ulogic_vector(unsigned(row_idx) + 1);
 898 #        return to_integer(unsigned(row_v));
 899 #     end;
 900 # Return the next row in the current cache line. We use a
 901 # dedicated function in order to limit the size of the
 902 # generated adder to be only the bits within a cache line
 903 # (3 bits with default settings)
 904         def next_row(row)
 905             row_v   = Signal(ROW_BITS)
 906             row_idx = Signal(ROW_LINE_BITS)
 907             result  = Signal(ROW_BITS)
 908
 909             row_v = Signal(row)
 910             row_idx = row_v[ROW_LINE_BITS]
 911             row_v[0:ROW_LINE_BITS] = Signal(row_idx + 1)
 912             return row_v
 913
 914 #     -- Get the tag value from the address
 915 #     function get_tag(addr: std_ulogic_vector) return cache_tag_t is
 916 #     begin
 917 #         return addr(REAL_ADDR_BITS - 1 downto SET_SIZE_BITS);
 918 #     end;
 919         # Get the tag value from the address
 920         def get_tag(addr):
 921             return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
 922
 923 #     -- Read a tag from a tag memory row
 924 #     function read_tag(way: way_t; tagset: cache_tags_set_t)
 925 #      return cache_tag_t is
 926 #     begin
 927 #       return tagset(way * TAG_WIDTH + TAG_BITS
 928 #                     - 1 downto way * TAG_WIDTH);
 929 #     end;
 930         # Read a tag from a tag memory row
 931         def read_tag(way, tagset):
 932             return tagset[way *TAG_WIDTH:way * TAG_WIDTH + TAG_BITS]
 933
 934 #     -- Read a TLB tag from a TLB tag memory row
 935 #     function read_tlb_tag(way: tlb_way_t; tags: tlb_way_tags_t)
 936 #      return tlb_tag_t is
 937 #         variable j : integer;
 938 #     begin
 939 #         j := way * TLB_EA_TAG_BITS;
 940 #         return tags(j + TLB_EA_TAG_BITS - 1 downto j);
 941 #     end;
 942         # Read a TLB tag from a TLB tag memory row
 943         def read_tlb_tag(way, tags):
 944             j = Signal()
 945
 946             j = way * TLB_EA_TAG_BITS
 947             return tags[j:j + TLB_EA_TAG_BITS]
 948
 949 #     -- Write a TLB tag to a TLB tag memory row
 950 #     procedure write_tlb_tag(way: tlb_way_t; tags: inout tlb_way_tags_t;
 951 #                             tag: tlb_tag_t) is
 952 #         variable j : integer;
 953 #     begin
 954 #         j := way * TLB_EA_TAG_BITS;
 955 #         tags(j + TLB_EA_TAG_BITS - 1 downto j) := tag;
 956 #     end;
 957         # Write a TLB tag to a TLB tag memory row
 958         def write_tlb_tag(way, tags), tag):
 959             j = Signal()
 960
 961             j = way * TLB_EA_TAG_BITS
 962             tags[j:j + TLB_EA_TAG_BITS] = tag
 963
 964 #     -- Read a PTE from a TLB PTE memory row
 965 #     function read_tlb_pte(way: tlb_way_t; ptes: tlb_way_ptes_t)
 966 #      return tlb_pte_t is
 967 #         variable j : integer;
 968 #     begin
 969 #         j := way * TLB_PTE_BITS;
 970 #         return ptes(j + TLB_PTE_BITS - 1 downto j);
 971 #     end;
 972         # Read a PTE from a TLB PTE memory row
 973         def read_tlb_pte(way, ptes):
 974             j = Signal()
 975
 976             j = way * TLB_PTE_BITS
 977             return ptes[j:j + TLB_PTE_BITS]
 978
 979 #     procedure write_tlb_pte(way: tlb_way_t;
 980 #      ptes: inout tlb_way_ptes_t; newpte: tlb_pte_t) is
 981 #         variable j : integer;
 982 #     begin
 983 #         j := way * TLB_PTE_BITS;
 984 #         ptes(j + TLB_PTE_BITS - 1 downto j) := newpte;
 985 #     end;
 986         def write_tlb_pte(way, ptes,newpte):
 987             j = Signal()
 988
 989             j = way * TLB_PTE_BITS
 990             return ptes[j:j + TLB_PTE_BITS] = newpte
 991
 992 # begin
 993 #
 994 """these, because they are constants, can actually be done *as*
 995    python asserts:
 996    assert LINE_SIZE % ROWSIZE == 0, "line size not ...."
 997 """
 998 #     assert LINE_SIZE mod ROW_SIZE = 0
 999 #      report "LINE_SIZE not multiple of ROW_SIZE" severity FAILURE;
1000 #     assert ispow2(LINE_SIZE)
1001 #      report "LINE_SIZE not power of 2" severity FAILURE;
1002 #     assert ispow2(NUM_LINES)
1003 #      report "NUM_LINES not power of 2" severity FAILURE;
1004 #     assert ispow2(ROW_PER_LINE)
1005 #      report "ROW_PER_LINE not power of 2" severity FAILURE;
1006 #     assert (ROW_BITS = INDEX_BITS + ROW_LINEBITS)
1007 #      report "geometry bits don't add up" severity FAILURE;
1008 #     assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS)
1009 #      report "geometry bits don't add up" severity FAILURE;
1010 #     assert (REAL_ADDR_BITS = TAG_BITS + INDEX_BITS + LINE_OFF_BITS)
1011 #      report "geometry bits don't add up" severity FAILURE;
1012 #     assert (REAL_ADDR_BITS = TAG_BITS + ROW_BITS + ROW_OFF_BITS)
1013 #      report "geometry bits don't add up" severity FAILURE;
1014 #     assert (64 = wishbone_data_bits)
1015 #      report "Can't yet handle a wishbone width that isn't 64-bits"
1016 #      severity FAILURE;
1017 #     assert SET_SIZE_BITS <= TLB_LG_PGSZ
1018 #      report "Set indexed by virtual address" severity FAILURE;
1019         assert (LINE_SIZE % ROW_SIZE) == 0 "LINE_SIZE not " \
1020          "multiple of ROW_SIZE -!- severity FAILURE"
1021
1022         assert (LINE_SIZE % 2) == 0 "LINE_SIZE not power of" \
1023          "2 -!- severity FAILURE"
1024
1025         assert (NUM_LINES % 2) == 0 "NUM_LINES not power of" \
1026          "2 -!- severity FAILURE"
1027
1028         assert (ROW_PER_LINE % 2) == 0 "ROW_PER_LINE not" \
1029          "power of 2 -!- severity FAILURE"
1030
1031         assert ROW_BITS == (INDEX_BITS + ROW_LINE_BITS) \
1032          "geometry bits don't add up -!- severity FAILURE"
1033
1034         assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS) \
1035          "geometry bits don't add up -!- severity FAILURE"
1036
1037         assert REAL_ADDR_BITS == (TAG_BITS + INDEX_BITS \
1038          + LINE_OFF_BITS) "geometry bits don't add up -!-" \
1039          "severity FAILURE"
1040
1041         assert REAL_ADDR_BITS == (TAG_BITS + ROW_BITS + ROW_OFF_BITS) \
1042          "geometry bits don't add up -!- severity FAILURE"
1043
1044         assert 64 == wishbone_data_bits "Can't yet handle a" \
1045          "wishbone width that isn't 64-bits -!- severity FAILURE"
1046
1047         assert SET_SIZE_BITS <= TLB_LG_PGSZ "Set indexed by" \
1048          "virtual address -!- severity FAILURE"
1049
1050 #     -- Latch the request in r0.req as long as we're not stalling
1051 #     stage_0 : process(clk)
1052 # Latch the request in r0.req as long as we're not stalling
1053 class Stage0(Elaboratable):
1054     def __init__(self):
1055         pass
1056
1057     def elaborate(self, platform):
1058         m = Module()
1059
1060         comb = m.d.comb
1061         sync = m.d.sync
1062
1063 #         variable r : reg_stage_0_t;
1064         r = RegStage0()
1065         comb += r
1066
1067 #     begin
1068 #         if rising_edge(clk) then
1069 #             assert (d_in.valid and m_in.valid) = '0'
1070 #              report "request collision loadstore vs MMU";
1071         assert ~(d_in.valid & m_in.valid) "request collision
1072          loadstore vs MMU"
1073
1074 #             if m_in.valid = '1' then
1075         with m.If(m_in.valid):
1076 #                 r.req.valid := '1';
1077 #                 r.req.load := not (m_in.tlbie or m_in.tlbld);
1078 #                 r.req.dcbz := '0';
1079 #                 r.req.nc := '0';
1080 #                 r.req.reserve := '0';
1081 #                 r.req.virt_mode := '0';
1082 #                 r.req.priv_mode := '1';
1083 #                 r.req.addr := m_in.addr;
1084 #                 r.req.data := m_in.pte;
1085 #                 r.req.byte_sel := (others => '1');
1086 #                 r.tlbie := m_in.tlbie;
1087 #                 r.doall := m_in.doall;
1088 #                 r.tlbld := m_in.tlbld;
1089 #                 r.mmu_req := '1';
1090             sync += r.req.valid.eq(1)
1091             sync += r.req.load.eq(~(m_in.tlbie | m_in.tlbld))
1092             sync += r.req.priv_mode.eq(1)
1093             sync += r.req.addr.eq(m_in.addr)
1094             sync += r.req.data.eq(m_in.pte)
1095             sync += r.req.byte_sel.eq(1)
1096             sync += r.tlbie.eq(m_in.tlbie)
1097             sync += r.doall.eq(m_in.doall)
1098             sync += r.tlbld.eq(m_in.tlbld)
1099             sync += r.mmu_req.eq(1)
1100 #             else
1101         with m.Else():
1102 #                 r.req := d_in;
1103 #                 r.tlbie := '0';
1104 #                 r.doall := '0';
1105 #                 r.tlbld := '0';
1106 #                 r.mmu_req := '0';
1107             sync += r.req.eq(d_in)
1108 #             end if;
1109 #             if rst = '1' then
1110 #                 r0_full <= '0';
1111 #             elsif r1.full = '0' or r0_full = '0' then
1112             with m.If(~r1.full | ~r0_full):
1113 #                 r0 <= r;
1114 #                 r0_full <= r.req.valid;
1115                 sync += r0.eq(r)
1116                 sync += r0_full.eq(r.req.valid)
1117 #             end if;
1118 #         end if;
1119 #     end process;
1120 #
1121 #     -- we don't yet handle collisions between loadstore1 requests
1122 #     -- and MMU requests
1123 #     m_out.stall <= '0';
1124 # we don't yet handle collisions between loadstore1 requests
1125 # and MMU requests
1126 comb += m_out.stall.eq(0)
1127
1128 #     -- Hold off the request in r0 when r1 has an uncompleted request
1129 #     r0_stall <= r0_full and r1.full;
1130 #     r0_valid <= r0_full and not r1.full;
1131 #     stall_out <= r0_stall;
1132 # Hold off the request in r0 when r1 has an uncompleted request
1133 comb += r0_stall.eq(r0_full & r1.full)
1134 comb += r0_valid.eq(r0_full & ~r1.full)
1135 comb += stall_out.eq(r0_stall)
1136
1137 #     -- TLB
1138 #     -- Operates in the second cycle on the request latched in r0.req.
1139 #     -- TLB updates write the entry at the end of the second cycle.
1140 #     tlb_read : process(clk)
1141 # TLB
1142 # Operates in the second cycle on the request latched in r0.req.
1143 # TLB updates write the entry at the end of the second cycle.
1144 class TLBRead(Elaboratable):
1145     def __init__(self):
1146         pass
1147
1148     def elaborate(self, platform):
1149         m = Module()
1150
1151         comb = m.d.comb
1152         sync = m.d.sync
1153
1154 #         variable index : tlb_index_t;
1155 #         variable addrbits :
1156 #          std_ulogic_vector(TLB_SET_BITS - 1 downto 0);
1157         index    = TLB_SET_SIZE
1158         addrbits = Signal(TLB_SET_BITS)
1159
1160         comb += index
1161         comb += addrbits
1162
1163 #     begin
1164 #         if rising_edge(clk) then
1165 #             if m_in.valid = '1' then
1166         with m.If(m_in.valid):
1167 #                 addrbits := m_in.addr(TLB_LG_PGSZ + TLB_SET_BITS
1168 #                                       - 1 downto TLB_LG_PGSZ);
1169             sync += addrbits.eq(m_in.addr[
1170                      TLB_LG_PGSZ:TLB_LG_PGSZ + TLB_SET_BITS
1171                     ])
1172 #             else
1173         with m.Else():
1174 #                 addrbits := d_in.addr(TLB_LG_PGSZ + TLB_SET_BITS
1175 #                                       - 1 downto TLB_LG_PGSZ);
1176             sync += addrbits.eq(d_in.addr[
1177                      TLB_LG_PGSZ:TLB_LG_PGSZ + TLB_SET_BITS
1178                     ])
1179 #             end if;
1180
1181 #             index := to_integer(unsigned(addrbits));
1182         sync += index.eq(addrbits)
1183 #             -- If we have any op and the previous op isn't finished,
1184 #             -- then keep the same output for next cycle.
1185 #             if r0_stall = '0' then
1186 # If we have any op and the previous op isn't finished,
1187 # then keep the same output for next cycle.
1188         with m.If(~r0_stall):
1189             sync += tlb_valid_way.eq(dtlb_valids[index])
1190             sync += tlb_tag_way.eq(dtlb_tags[index])
1191             sync += tlb_pte_way.eq(dtlb_ptes[index])
1192 #             end if;
1193 #         end if;
1194 #     end process;
1195
1196 #     -- Generate TLB PLRUs
1197 #     maybe_tlb_plrus: if TLB_NUM_WAYS > 1 generate
1198 # Generate TLB PLRUs
1199 class MaybeTLBPLRUs(Elaboratable):
1200     def __init__(self):
1201         pass
1202
1203     def elaborate(self, platform):
1204         m = Module()
1205
1206         comb = m.d.comb
1207         sync = m.d.sync
1208
1209         with m.If(TLB_NUM_WAYS > 1):
1210 #     begin
1211 # TODO understand how to conver generate statements
1212 #       tlb_plrus: for i in 0 to TLB_SET_SIZE - 1 generate
1213 #           -- TLB PLRU interface
1214 #           signal tlb_plru_acc :
1215 #            std_ulogic_vector(TLB_WAY_BITS-1 downto 0);
1216 #           signal tlb_plru_acc_en : std_ulogic;
1217 #           signal tlb_plru_out :
1218 #            std_ulogic_vector(TLB_WAY_BITS-1 downto 0);
1219 #       begin
1220 #           tlb_plru : entity work.plru
1221 #               generic map (
1222 #                   BITS => TLB_WAY_BITS
1223 #                   )
1224 #               port map (
1225 #                   clk => clk,
1226 #                   rst => rst,
1227 #                   acc => tlb_plru_acc,
1228 #                   acc_en => tlb_plru_acc_en,
1229 #                   lru => tlb_plru_out
1230 #                   );
1231 #
1232 #           process(all)
1233 #           begin
1234 #               -- PLRU interface
1235 #               if r1.tlb_hit_index = i then
1236 #                   tlb_plru_acc_en <= r1.tlb_hit;
1237 #               else
1238 #                   tlb_plru_acc_en <= '0';
1239 #               end if;
1240 #               tlb_plru_acc <=
1241 #                std_ulogic_vector(to_unsigned(
1242 #                                   r1.tlb_hit_way, TLB_WAY_BITS
1243 #                                  ));
1244 #               tlb_plru_victim(i) <= tlb_plru_out;
1245 #           end process;
1246 #       end generate;
1247 #     end generate;
1248 # end TODO
1249 #
1250 #     tlb_search : process(all)
1251 class TLBSearch(Elaboratable):
1252     def __init__(self):
1253         pass
1254
1255     def elborate(self, platform):
1256         m = Module()
1257
1258         comb = m.d.comb
1259         sync = m.d.sync
1260
1261 #         variable hitway : tlb_way_t;
1262 #         variable hit : std_ulogic;
1263 #         variable eatag : tlb_tag_t;
1264         hitway = TLBWay()
1265         hit    = Signal()
1266         eatag  = TLBTag()
1267
1268         comb += hitway
1269         comb += hit
1270         comb += eatag
1271
1272 #     begin
1273 #         tlb_req_index <=
1274 #          to_integer(unsigned(r0.req.addr(
1275 #           TLB_LG_PGSZ + TLB_SET_BITS - 1 downto TLB_LG_PGSZ
1276 #          )));
1277 #         hitway := 0;
1278 #         hit := '0';
1279 #         eatag := r0.req.addr(63 downto TLB_LG_PGSZ + TLB_SET_BITS);
1280 #         for i in tlb_way_t loop
1281 #             if tlb_valid_way(i) = '1' and
1282 #                 read_tlb_tag(i, tlb_tag_way) = eatag then
1283 #                 hitway := i;
1284 #                 hit := '1';
1285 #             end if;
1286 #         end loop;
1287 #         tlb_hit <= hit and r0_valid;
1288 #         tlb_hit_way <= hitway;
1289         comb += tlb_req_index.eq(r0.req.addr[
1290                  TLB_LG_PGSZ:TLB_LG_PGSZ + TLB_SET_BITS
1291                 ])
1292
1293         comb += eatag.eq(r0.req.addr[
1294                  TLB_LG_PGSZ + TLB_SET_BITS:64
1295                 ])
1296
1297         for i in TLBWay():
1298             with m.If(tlb_valid_way(i)
1299                       & read_tlb_tag(i, tlb_tag_way) == eatag):
1300
1301                 comb += hitway.eq(i)
1302                 comb += hit.eq(1)
1303
1304         comb += tlb_hit.eq(hit & r0_valid)
1305         comb += tlb_hit_way.eq(hitway)
1306
1307 #         if tlb_hit = '1' then
1308         with m.If(tlb_hit):
1309 #             pte <= read_tlb_pte(hitway, tlb_pte_way);
1310             comb += pte.eq(read_tlb_pte(hitway, tlb_pte_way))
1311 #         else
1312         with m.Else():
1313 #             pte <= (others => '0');
1314             comb += pte.eq(0)
1315 #         end if;
1316 #         valid_ra <= tlb_hit or not r0.req.virt_mode;
1317         comb += valid_ra.eq(tlb_hit | ~r0.req.virt_mode)
1318 #         if r0.req.virt_mode = '1' then
1319         with m.If(r0.req.virt_mode):
1320 #             ra <= pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) &
1321 #                   r0.req.addr(TLB_LG_PGSZ - 1 downto ROW_OFF_BITS) &
1322 #                   (ROW_OFF_BITS-1 downto 0 => '0');
1323 #             perm_attr <= extract_perm_attr(pte);
1324             comb += ra.eq(Cat(
1325                      Const(ROW_OFF_BITS, ROW_OFF_BITS),
1326                      r0.req.addr[ROW_OFF_BITS:TLB_LG_PGSZ],
1327                      pte[TLB_LG_PGSZ:REAL_ADDR_BITS]
1328                     ))
1329             comb += perm_attr.eq(extract_perm_attr(pte))
1330 #         else
1331         with m.Else():
1332 #             ra <= r0.req.addr(
1333 #                    REAL_ADDR_BITS - 1 downto ROW_OFF_BITS
1334 #                   ) & (ROW_OFF_BITS-1 downto 0 => '0');
1335             comb += ra.eq(Cat(
1336                      Const(ROW_OFF_BITS, ROW_OFF_BITS),
1337                      r0.rq.addr[ROW_OFF_BITS:REAL_ADDR_BITS]
1338                     )
1339
1340 #             perm_attr <= real_mode_perm_attr;
1341             comb += perm_attr.eq(real_mode_perm_attr)
1342 #         end if;
1343 #     end process;
1344
1345 #     tlb_update : process(clk)
1346 class TLBUpdate(Elaboratable):
1347     def __init__(self):
1348         pass
1349
1350     def elaborate(self, platform):
1351         m = Module()
1352
1353         comb = m.d.comb
1354         sync = m.d.sync
1355
1356 #         variable tlbie : std_ulogic;
1357 #         variable tlbwe : std_ulogic;
1358 #         variable repl_way : tlb_way_t;
1359 #         variable eatag : tlb_tag_t;
1360 #         variable tagset : tlb_way_tags_t;
1361 #         variable pteset : tlb_way_ptes_t;
1362         tlbie    = Signal()
1363         tlbwe    = Signal()
1364         repl_way = TLBWay()
1365         eatag    = TLBTag()
1366         tagset   = TLBWayTags()
1367         pteset   = TLBWayPtes()
1368
1369         comb += tlbie
1370         comb += tlbwe
1371         comb += repl_way
1372         comb += eatag
1373         comb += tagset
1374         comb += pteset
1375
1376 #     begin
1377 #         if rising_edge(clk) then
1378 #             tlbie := r0_valid and r0.tlbie;
1379 #             tlbwe := r0_valid and r0.tlbldoi;
1380         sync += tlbie.eq(r0_valid & r0.tlbie)
1381         sync += tlbwe.eq(r0_valid & r0.tlbldoi)
1382
1383 #             if rst = '1' or (tlbie = '1' and r0.doall = '1') then
1384 #        with m.If (TODO understand how signal resets work in nmigen)
1385 #                 -- clear all valid bits at once
1386 #                 for i in tlb_index_t loop
1387 #                     dtlb_valids(i) <= (others => '0');
1388 #                 end loop;
1389             # clear all valid bits at once
1390             for i in range(TLB_SET_SIZE):
1391                 sync += dtlb_valids[i].eq(0)
1392 #             elsif tlbie = '1' then
1393         with m.Elif(tlbie):
1394 #                 if tlb_hit = '1' then
1395             with m.If(tlb_hit):
1396 #                     dtlb_valids(tlb_req_index)(tlb_hit_way) <= '0';
1397                 sync += dtlb_valids[tlb_req_index][tlb_hit_way].eq(0)
1398 #                 end if;
1399 #             elsif tlbwe = '1' then
1400         with m.Elif(tlbwe):
1401 #                 if tlb_hit = '1' then
1402             with m.If(tlb_hit):
1403 #                     repl_way := tlb_hit_way;
1404                 sync += repl_way.eq(tlb_hit_way)
1405 #                 else
1406             with m.Else():
1407 #                     repl_way := to_integer(unsigned(
1408 #                       tlb_plru_victim(tlb_req_index)));
1409                 sync += repl_way.eq(tlb_plru_victim[tlb_req_index])
1410 #                 end if;
1411 #                 eatag := r0.req.addr(
1412 #                           63 downto TLB_LG_PGSZ + TLB_SET_BITS
1413 #                          );
1414 #                 tagset := tlb_tag_way;
1415 #                 write_tlb_tag(repl_way, tagset, eatag);
1416 #                 dtlb_tags(tlb_req_index) <= tagset;
1417 #                 pteset := tlb_pte_way;
1418 #                 write_tlb_pte(repl_way, pteset, r0.req.data);
1419 #                 dtlb_ptes(tlb_req_index) <= pteset;
1420 #                 dtlb_valids(tlb_req_index)(repl_way) <= '1';
1421             sync += eatag.eq(r0.req.addr[TLB_LG_PGSZ + TLB_SET_BITS:64])
1422             sync += tagset.eq(tlb_tag_way)
1423             sync += write_tlb_tag(repl_way, tagset, eatag)
1424             sync += dtlb_tags[tlb_req_index].eq(tagset)
1425             sync += pteset.eq(tlb_pte_way)
1426             sync += write_tlb_pte(repl_way, pteset, r0.req.data)
1427             sync += dtlb_ptes[tlb_req_index].eq(pteset)
1428             sync += dtlb_valids[tlb_req_index][repl_way].eq(1)
1429 #             end if;
1430 #         end if;
1431 #     end process;
1432
1433 #     -- Generate PLRUs
1434 #     maybe_plrus: if NUM_WAYS > 1 generate
1435 class MaybePLRUs(Elaboratable):
1436     def __init__(self):
1437         pass
1438
1439     def elaborate(self, platform):
1440         m = Module()
1441
1442         comb = m.d.comb
1443         sync = m.d.sync
1444
1445 #     begin
1446         # TODO learn translation of generate into nmgien @lkcl
1447 #       plrus: for i in 0 to NUM_LINES-1 generate
1448 #           -- PLRU interface
1449 #           signal plru_acc    : std_ulogic_vector(WAY_BITS-1 downto 0);
1450 #           signal plru_acc_en : std_ulogic;
1451 #           signal plru_out    : std_ulogic_vector(WAY_BITS-1 downto 0);
1452 #
1453 #       begin
1454         # TODO learn tranlation of entity, generic map, port map in
1455         # nmigen @lkcl
1456 #           plru : entity work.plru
1457 #               generic map (
1458 #                   BITS => WAY_BITS
1459 #                   )
1460 #               port map (
1461 #                   clk => clk,
1462 #                   rst => rst,
1463 #                   acc => plru_acc,
1464 #                   acc_en => plru_acc_en,
1465 #                   lru => plru_out
1466 #                   );
1467 #
1468 #           process(all)
1469 #           begin
1470 #               -- PLRU interface
1471 #               if r1.hit_index = i then
1472         # PLRU interface
1473         with m.If(r1.hit_index == i):
1474 #                   plru_acc_en <= r1.cache_hit;
1475             comb += plru_acc_en.eq(r1.cache_hit)
1476 #               else
1477         with m.Else():
1478 #                   plru_acc_en <= '0';
1479             comb += plru_acc_en.eq(0)
1480 #               end if;
1481 #               plru_acc <= std_ulogic_vector(to_unsigned(
1482 #                            r1.hit_way, WAY_BITS
1483 #                           ));
1484 #               plru_victim(i) <= plru_out;
1485         comb += plru_acc.eq(r1.hit_way)
1486         comb += plru_victime[i].eq(plru_out)
1487 #           end process;
1488 #       end generate;
1489 #     end generate;
1490
1491 #     -- Cache tag RAM read port
1492 #     cache_tag_read : process(clk)
1493 # Cache tag RAM read port
1494 class CacheTagRead(Elaboratable):
1495     def __init__(self):
1496         pass
1497
1498     def elaborate(self, platform):
1499         m = Module()
1500
1501         comb = m.d.comb
1502         sync = m.d.sync
1503
1504 #         variable index : index_t;
1505         index = Signal(INDEX)
1506
1507         comb += index
1508
1509 #     begin
1510 #         if rising_edge(clk) then
1511 #             if r0_stall = '1' then
1512         with m.If(r0_stall):
1513 #                 index := req_index;
1514             sync += index.eq(req_index)
1515
1516 #             elsif m_in.valid = '1' then
1517         with m.Elif(m_in.valid):
1518 #                 index := get_index(m_in.addr);
1519             sync += index.eq(get_index(m_in.addr))
1520
1521 #             else
1522         with m.Else():
1523 #                 index := get_index(d_in.addr);
1524             sync += index.eq(get_index(d_in.addr))
1525 #             end if;
1526 #             cache_tag_set <= cache_tags(index);
1527         sync += cache_tag_set.eq(cache_tags(index))
1528 #         end if;
1529 #     end process;
1530
1531 #     -- Cache request parsing and hit detection
1532 #     dcache_request : process(all)
1533 # Cache request parsing and hit detection
1534 class DcacheRequest(Elaboratable):
1535     def __init__(self):
1536         pass
1537
1538     def elaborate(self, platform):
1539 #         variable is_hit  : std_ulogic;
1540 #         variable hit_way : way_t;
1541 #         variable op      : op_t;
1542 #         variable opsel   : std_ulogic_vector(2 downto 0);
1543 #         variable go      : std_ulogic;
1544 #         variable nc      : std_ulogic;
1545 #         variable s_hit   : std_ulogic;
1546 #         variable s_tag   : cache_tag_t;
1547 #         variable s_pte   : tlb_pte_t;
1548 #         variable s_ra    : std_ulogic_vector(
1549 #                             REAL_ADDR_BITS - 1 downto 0
1550 #                            );
1551 #         variable hit_set     : std_ulogic_vector(
1552 #                                 TLB_NUM_WAYS - 1 downto 0
1553 #                                );
1554 #         variable hit_way_set : hit_way_set_t;
1555 #         variable rel_matches : std_ulogic_vector(
1556 #                                 TLB_NUM_WAYS - 1 downto 0
1557 #                                );
1558         rel_match   = Signal()
1559         is_hit      = Signal()
1560         hit_way     = Signal(WAY)
1561         op          = Op()
1562         opsel       = Signal(3)
1563         go          = Signal()
1564         nc          = Signal()
1565         s_hit       = Signal()
1566         s_tag       = Signal(CACHE_TAG)
1567         s_pte       = Signal(TLB_PTE)
1568         s_ra        = Signal(REAL_ADDR_BITS)
1569         hit_set     = Signal(TLB_NUM_WAYS)
1570         hit_way_set = HitWaySet()
1571         rel_matches = Signal(TLB_NUM_WAYS)
1572         rel_match   = Signal()
1573
1574         comb += rel_match
1575         comb += is_hit
1576         comb += hit_way
1577         comb += op
1578         comb += opsel
1579         comb += go
1580         comb += nc
1581         comb += s_hit
1582         comb += s_tag
1583         comb += s_pte
1584         comb += s_ra
1585         comb += hit_set
1586         comb += hit_way_set
1587         comb += rel_matches
1588         comb += rel_match
1589
1590 #     begin
1591 #         -- Extract line, row and tag from request
1592 #         req_index <= get_index(r0.req.addr);
1593 #         req_row <= get_row(r0.req.addr);
1594 #         req_tag <= get_tag(ra);
1595 #
1596 #         go := r0_valid and not (r0.tlbie or r0.tlbld)
1597 #               and not r1.ls_error;
1598         # Extract line, row and tag from request
1599         comb += req_index.eq(get_index(r0.req.addr))
1600         comb += req_row.eq(get_row(r0.req.addr))
1601         comb += req_tag.eq(get_tag(ra))
1602
1603         comb += go.eq(r0_valid & ~(r0.tlbie | r0.tlbld) & ~r1.ls_error)
1604
1605 #         -- Test if pending request is a hit on any way
1606 #         -- In order to make timing in virtual mode,
1607 #         -- when we are using the TLB, we compare each
1608 #         --way with each of the real addresses from each way of
1609 #         -- the TLB, and then decide later which match to use.
1610 #         hit_way := 0;
1611 #         is_hit := '0';
1612 #         rel_match := '0';
1613         # Test if pending request is a hit on any way
1614         # In order to make timing in virtual mode,
1615         # when we are using the TLB, we compare each
1616         # way with each of the real addresses from each way of
1617         # the TLB, and then decide later which match to use.
1618         comb += hit_way.eq(0)
1619         comb += is_hit.eq(0)
1620         comb += rel_match.eq(0)
1621
1622 #         if r0.req.virt_mode = '1' then
1623         with m.If(r0.req.virt_mode):
1624 #             rel_matches := (others => '0');
1625             comb += rel_matches.eq(0)
1626 #             for j in tlb_way_t loop
1627             for j in range(TLB_WAY):
1628 #                 hit_way_set(j) := 0;
1629 #                 s_hit := '0';
1630 #                 s_pte := read_tlb_pte(j, tlb_pte_way);
1631 #                 s_ra  := s_pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ)
1632 #                          & r0.req.addr(TLB_LG_PGSZ - 1 downto 0);
1633 #                 s_tag := get_tag(s_ra);
1634                 comb += hit_way_set[j].eq(0)
1635                 comb += s_hit.eq(0)
1636                 comb += s_pte.eq(read_tlb_pte(j, tlb_pte_way))
1637                 comb += s_ra.eq(Cat(
1638                          r0.req.addr[0:TLB_LG_PGSZ],
1639                          s_pte[TLB_LG_PGSZ:REAL_ADDR_BITS]
1640                         ))
1641                 comb += s_tag.eq(get_tag(s_ra))
1642
1643 #                 for i in way_t loop
1644                 for i in range(WAY):
1645 #                     if go = '1' and cache_valids(req_index)(i) = '1'
1646 #                      and read_tag(i, cache_tag_set) = s_tag
1647 #                      and tlb_valid_way(j) = '1' then
1648                     with m.If(go & cache_valid_bits[req_index][i] &
1649                               read_tag(i, cache_tag_set) == s_tag
1650                               & tlb_valid_way[j]):
1651 #                         hit_way_set(j) := i;
1652 #                         s_hit := '1';
1653                         comb += hit_way_set[j].eq(i)
1654                         comb += s_hit.eq(1)
1655 #                     end if;
1656 #                 end loop;
1657 #                 hit_set(j) := s_hit;
1658                 comb += hit_set[j].eq(s_hit)
1659 #                 if s_tag = r1.reload_tag then
1660                 with m.If(s_tag == r1.reload_tag):
1661 #                     rel_matches(j) := '1';
1662                     comb += rel_matches[j].eq(1)
1663 #                 end if;
1664 #             end loop;
1665 #             if tlb_hit = '1' then
1666             with m.If(tlb_hit):
1667 #                 is_hit := hit_set(tlb_hit_way);
1668 #                 hit_way := hit_way_set(tlb_hit_way);
1669 #                 rel_match := rel_matches(tlb_hit_way);
1670                 comb += is_hit.eq(hit_set[tlb_hit_way])
1671                 comb += hit_way.eq(hit_way_set[tlb_hit_way])
1672                 comb += rel_match.eq(rel_matches[tlb_hit_way])
1673 #             end if;
1674 #         else
1675         with m.Else():
1676 #             s_tag := get_tag(r0.req.addr);
1677             comb += s_tag.eq(get_tag(r0.req.addr))
1678 #             for i in way_t loop
1679             for i in range(WAY):
1680 #                 if go = '1' and cache_valids(req_index)(i) = '1' and
1681 #                     read_tag(i, cache_tag_set) = s_tag then
1682                 with m.If(go & cache_valid_bits[req_index][i] &
1683                           read_tag(i, cache_tag_set) == s_tag):
1684 #                     hit_way := i;
1685 #                     is_hit := '1';
1686                     comb += hit_way.eq(i)
1687                     comb += is_hit.eq(1)
1688 #                 end if;
1689 #             end loop;
1690 #             if s_tag = r1.reload_tag then
1691             with m.If(s_tag == r1.reload_tag):
1692 #                 rel_match := '1';
1693                 comb += rel_match.eq(1)
1694 #             end if;
1695 #         end if;
1696 #         req_same_tag <= rel_match;
1697         comb += req_same_tag.eq(rel_match)
1698
1699 #         -- See if the request matches the line currently being reloaded
1700 #         if r1.state = RELOAD_WAIT_ACK and req_index = r1.store_index
1701 #          and rel_match = '1' then
1702         # See if the request matches the line currently being reloaded
1703         with m.If(r1.state == State.RELOAD_WAIT_ACK & req_index ==
1704                   r1.store_index & rel_match):
1705 #             -- For a store, consider this a hit even if the row isn't
1706 #             -- valid since it will be by the time we perform the store.
1707 #             -- For a load, check the appropriate row valid bit.
1708             # For a store, consider this a hit even if the row isn't
1709             # valid since it will be by the time we perform the store.
1710             # For a load, check the appropriate row valid bit.
1711 #             is_hit :=
1712 #              not r0.req.load or r1.rows_valid(req_row mod ROW_PER_LINE);
1713 #             hit_way := replace_way;
1714             comb += is_hit.eq(~r0.req.load
1715                      | r1.rows_valid[req_row % ROW_PER_LINE])
1716             comb += hit_way.eq(replace_way)
1717 #         end if;
1718
1719 #         -- Whether to use forwarded data for a load or not
1720         # Whether to use forwarded data for a load or not
1721 #         use_forward1_next <= '0';
1722         comb += use_forward1_next.eq(0)
1723 #         if get_row(r1.req.real_addr) = req_row
1724 #          and r1.req.hit_way = hit_way then
1725         with m.If(get_row(r1.req.real_addr) == req_row
1726                   & r1.req.hit_way == hit_way)
1727 #             -- Only need to consider r1.write_bram here, since if we
1728 #             -- are writing refill data here, then we don't have a
1729 #             -- cache hit this cycle on the line being refilled.
1730 #             -- (There is the possibility that the load following the
1731 #             -- load miss that started the refill could be to the old
1732 #             -- contents of the victim line, since it is a couple of
1733 #             -- cycles after the refill starts before we see the updated
1734 #             -- cache tag. In that case we don't use the bypass.)
1735             # Only need to consider r1.write_bram here, since if we
1736             # are writing refill data here, then we don't have a
1737             # cache hit this cycle on the line being refilled.
1738             # (There is the possibility that the load following the
1739             # load miss that started the refill could be to the old
1740             # contents of the victim line, since it is a couple of
1741             # cycles after the refill starts before we see the updated
1742             # cache tag. In that case we don't use the bypass.)
1743 #             use_forward1_next <= r1.write_bram;
1744             comb += use_forward1_next.eq(r1.write_bram)
1745 #         end if;
1746 #         use_forward2_next <= '0';
1747         comb += use_forward2_next.eq(0)
1748 #         if r1.forward_row1 = req_row and r1.forward_way1 = hit_way then
1749         with m.If(r1.forward_row1 == req_row & r1.forward_way1 == hit_way):
1750 #             use_forward2_next <= r1.forward_valid1;
1751             comb += use_forward2_next.eq(r1.forward_valid1)
1752 #         end if;
1753
1754 #           -- The way that matched on a hit
1755         # The way that matched on a hit
1756 #           req_hit_way <= hit_way;
1757         comb += req_hit_way.eq(hit_way)
1758
1759 #         -- The way to replace on a miss
1760         # The way to replace on a miss
1761 #         if r1.write_tag = '1' then
1762         with m.If(r1.write_tag):
1763 #             replace_way <= to_integer(unsigned(
1764 #                             plru_victim(r1.store_index)
1765 #                            ));
1766             replace_way.eq(plru_victim[r1.store_index])
1767 #         else
1768         with m.Else():
1769 #             replace_way <= r1.store_way;
1770             comb += replace_way.eq(r1.store_way)
1771 #         end if;
1772
1773 #         -- work out whether we have permission for this access
1774 #         -- NB we don't yet implement AMR, thus no KUAP
1775         # work out whether we have permission for this access
1776         # NB we don't yet implement AMR, thus no KUAP
1777 #         rc_ok <= perm_attr.reference and
1778 #                  (r0.req.load or perm_attr.changed);
1779 #         perm_ok <= (r0.req.priv_mode or not perm_attr.priv) and
1780 #                    (perm_attr.wr_perm or (r0.req.load
1781 #                    and perm_attr.rd_perm));
1782 #         access_ok <= valid_ra and perm_ok and rc_ok;
1783         comb += rc_ok.eq(
1784                  perm_attr.reference & (r0.req.load | perm_attr.changed)
1785                 )
1786         comb += perm_ok.eq((r0.req.prive_mode | ~perm_attr.priv)
1787                            & perm_attr.wr_perm
1788                            | (r0.req.load & perm_attr.rd_perm)
1789                           )
1790         comb += access_ok.eq(valid_ra & perm_ok & rc_ok)
1791 #       -- Combine the request and cache hit status to decide what
1792 #       -- operation needs to be done
1793 #         nc := r0.req.nc or perm_attr.nocache;
1794 #         op := OP_NONE;
1795         # Combine the request and cache hit status to decide what
1796         # operation needs to be done
1797         comb += nc.eq(r0.req.nc | perm_attr.nocache)
1798         comb += op.eq(Op.OP_NONE)
1799 #         if go = '1' then
1800         with m.If(go):
1801 #             if access_ok = '0' then
1802             with m.If(~access_ok):
1803 #                 op := OP_BAD;
1804                 comb += op.eq(Op.OP_BAD)
1805 #             elsif cancel_store = '1' then
1806             with m.Elif(cancel_store):
1807 #                 op := OP_STCX_FAIL;
1808                 comb += op.eq(Op.OP_STCX_FAIL)
1809 #             else
1810             with m.Else():
1811 #                 opsel := r0.req.load & nc & is_hit;
1812                 comb += opsel.eq(Cat(is_hit, nc, r0.req.load))
1813 #                 case opsel is
1814                 with m.Switch(opsel):
1815 #                     when "101" => op := OP_LOAD_HIT;
1816 #                     when "100" => op := OP_LOAD_MISS;
1817 #                     when "110" => op := OP_LOAD_NC;
1818 #                     when "001" => op := OP_STORE_HIT;
1819 #                     when "000" => op := OP_STORE_MISS;
1820 #                     when "010" => op := OP_STORE_MISS;
1821 #                     when "011" => op := OP_BAD;
1822 #                     when "111" => op := OP_BAD;
1823 #                     when others => op := OP_NONE;
1824                     with m.Case(Const(0b101, 3)):
1825                         comb += op.eq(Op.OP_LOAD_HIT)
1826
1827                     with m.Case(Cosnt(0b100, 3)):
1828                         comb += op.eq(Op.OP_LOAD_MISS)
1829
1830                     with m.Case(Const(0b110, 3)):
1831                         comb += op.eq(Op.OP_LOAD_NC)
1832
1833                     with m.Case(Const(0b001, 3)):
1834                         comb += op.eq(Op.OP_STORE_HIT)
1835
1836                     with m.Case(Const(0b000, 3)):
1837                         comb += op.eq(Op.OP_STORE_MISS)
1838
1839                     with m.Case(Const(0b010, 3)):
1840                         comb += op.eq(Op.OP_STORE_MISS)
1841
1842                     with m.Case(Const(0b011, 3)):
1843                         comb += op.eq(Op.OP_BAD)
1844
1845                     with m.Case(Const(0b111, 3)):
1846                         comb += op.eq(Op.OP_BAD)
1847
1848                     with m.Default():
1849                         comb += op.eq(Op.OP_NONE)
1850 #                 end case;
1851 #             end if;
1852 #         end if;
1853 #       req_op <= op;
1854 #         req_go <= go;
1855         comb += req_op.eq(op)
1856         comb += req_go.eq(go)
1857
1858 #         -- Version of the row number that is valid one cycle earlier
1859 #         -- in the cases where we need to read the cache data BRAM.
1860 #         -- If we're stalling then we need to keep reading the last
1861 #         -- row requested.
1862         # Version of the row number that is valid one cycle earlier
1863         # in the cases where we need to read the cache data BRAM.
1864         # If we're stalling then we need to keep reading the last
1865         # row requested.
1866 #         if r0_stall = '0' then
1867         with m.If(~r0_stall):
1868 #             if m_in.valid = '1' then
1869             with m.If(m_in.valid):
1870 #                 early_req_row <= get_row(m_in.addr);
1871                 comb += early_req_row.eq(get_row(m_in.addr))
1872 #             else
1873             with m.Else():
1874 #                 early_req_row <= get_row(d_in.addr);
1875                 comb += early_req_row.eq(get_row(d_in.addr))
1876 #             end if;
1877 #         else
1878         with m.Else():
1879 #             early_req_row <= req_row;
1880             comb += early_req_row.eq(req_row)
1881 #         end if;
1882 #     end process;
1883
1884 #     -- Wire up wishbone request latch out of stage 1
1885 #     wishbone_out <= r1.wb;
1886     # Wire up wishbone request latch out of stage 1
1887     comb += wishbone_out.eq(r1.wb)
1888
1889 #     -- Handle load-with-reservation and store-conditional instructions
1890 #     reservation_comb: process(all)
1891 # Handle load-with-reservation and store-conditional instructions
1892 class ReservationComb(Elaboratable):
1893     def __init__(self):
1894         pass
1895
1896     def elaborate(self, platform):
1897         m = Module()
1898
1899         comb = m.d.comb
1900         sync = m.d.sync
1901
1902 #     begin
1903 #         cancel_store <= '0';
1904 #         set_rsrv <= '0';
1905 #         clear_rsrv <= '0';
1906 #         if r0_valid = '1' and r0.req.reserve = '1' then
1907         with m.If(r0_valid & r0.req.reserve):
1908
1909 #             -- XXX generate alignment interrupt if address
1910 #             -- is not aligned XXX or if r0.req.nc = '1'
1911 #             if r0.req.load = '1' then
1912             # XXX generate alignment interrupt if address
1913             # is not aligned XXX or if r0.req.nc = '1'
1914             with m.If(r0.req.load):
1915 #                 -- load with reservation
1916 #                 set_rsrv <= '1';
1917                 # load with reservation
1918                 comb += set_rsrv(1)
1919 #             else
1920             with m.Else():
1921 #                 -- store conditional
1922 #                 clear_rsrv <= '1';
1923                 # store conditional
1924                 comb += clear_rsrv.eq(1)
1925 #                 if reservation.valid = '0' or r0.req.addr(63
1926 #                  downto LINE_OFF_BITS) /= reservation.addr then
1927                 with m.If(~reservation.valid
1928                           | r0.req.addr[LINE_OFF_BITS:64]):
1929 #                     cancel_store <= '1';
1930                     comb += cancel_store.eq(1)
1931 #                 end if;
1932 #             end if;
1933 #         end if;
1934 #     end process;
1935
1936 #     reservation_reg: process(clk)
1937 class ReservationReg(Elaboratable):
1938     def __init__(self):
1939         pass
1940
1941     def elaborate(self, platform):
1942         m = Module()
1943
1944         comb = m.d.comb
1945         sync = m.d.sync
1946
1947 #     begin
1948 #         if rising_edge(clk) then
1949 #             if rst = '1' then
1950 #                 reservation.valid <= '0';
1951             # TODO understand how resets work in nmigen
1952 #             elsif r0_valid = '1' and access_ok = '1' then
1953             with m.Elif(r0_valid & access_ok)""
1954 #                 if clear_rsrv = '1' then
1955                 with m.If(clear_rsrv):
1956 #                     reservation.valid <= '0';
1957                     sync += reservation.valid.ea(0)
1958 #                 elsif set_rsrv = '1' then
1959                 with m.Elif(set_rsrv):
1960 #                     reservation.valid <= '1';
1961 #                     reservation.addr <=
1962 #                      r0.req.addr(63 downto LINE_OFF_BITS);
1963                     sync += reservation.valid.eq(1)
1964                     sync += reservation.addr(r0.req.addr[LINE_OFF_BITS:64])
1965 #                 end if;
1966 #             end if;
1967 #         end if;
1968 #     end process;
1969 #
1970 #     -- Return data for loads & completion control logic
1971 #     writeback_control: process(all)
1972 # Return data for loads & completion control logic
1973 class WriteBackControl(Elaboratable):
1974     def __init__(self):
1975         pass
1976
1977     def elaborate(self, platform):
1978         m = Module()
1979
1980         comb = m.d.comb
1981         sync = m.d.sync
1982
1983 #         variable data_out : std_ulogic_vector(63 downto 0);
1984 #         variable data_fwd : std_ulogic_vector(63 downto 0);
1985 #         variable j        : integer;
1986         data_out = Signal(64)
1987         data_fwd = Signal(64)
1988         j        = Signal()
1989
1990 #     begin
1991 #         -- Use the bypass if are reading the row that was
1992 #         -- written 1 or 2 cycles ago, including for the
1993 #         -- slow_valid = 1 case (i.e. completing a load
1994 #         -- miss or a non-cacheable load).
1995 #         if r1.use_forward1 = '1' then
1996         # Use the bypass if are reading the row that was
1997         # written 1 or 2 cycles ago, including for the
1998         # slow_valid = 1 case (i.e. completing a load
1999         # miss or a non-cacheable load).
2000         with m.If(r1.use_forward1):
2001 #             data_fwd := r1.forward_data1;
2002             comb += data_fwd.eq(r1.forward_data1)
2003 #         else
2004         with m.Else():
2005 #             data_fwd := r1.forward_data2;
2006             comb += data_fwd.eq(r1.forward_data2)
2007 #         end if;
2008
2009 #         data_out := cache_out(r1.hit_way);
2010         comb += data_out.eq(cache_out[r1.hit_way])
2011
2012 #         for i in 0 to 7 loop
2013         for i in range(8):
2014 #             j := i * 8;
2015             comb += i * 8
2016
2017 #             if r1.forward_sel(i) = '1' then
2018             with m.If(r1.forward_sel[i]):
2019 #                 data_out(j + 7 downto j) := data_fwd(j + 7 downto j);
2020                 comb += data_out[j:j+8].eq(data_fwd[j:j+8])
2021 #             end if;
2022 #         end loop;
2023
2024 #         d_out.valid <= r1.ls_valid;
2025 #         d_out.data <= data_out;
2026 #         d_out.store_done <= not r1.stcx_fail;
2027 #         d_out.error <= r1.ls_error;
2028 #         d_out.cache_paradox <= r1.cache_paradox;
2029         comb += d_out.valid.eq(r1.ls_valid)
2030         comb += d_out.data.eq(data_out)
2031         comb += d_out.store_done.eq(~r1.stcx_fail)
2032         comb += d_out.error.eq(r1.ls_error)
2033         comb += d_out.cache_paradox.eq(r1.cache_paradox)
2034
2035 #         -- Outputs to MMU
2036 #         m_out.done <= r1.mmu_done;
2037 #         m_out.err <= r1.mmu_error;
2038 #         m_out.data <= data_out;
2039         comb += m_out.done.eq(r1.mmu_done)
2040         comb += m_out.err.eq(r1.mmu_error)
2041         comb += m_out.data.eq(data_out)
2042
2043 #         -- We have a valid load or store hit or we just completed
2044 #         -- a slow op such as a load miss, a NC load or a store
2045 #         --
2046 #         -- Note: the load hit is delayed by one cycle. However it
2047 #         -- can still not collide with r.slow_valid (well unless I
2048 #         -- miscalculated) because slow_valid can only be set on a
2049 #         -- subsequent request and not on its first cycle (the state
2050 #         -- machine must have advanced), which makes slow_valid
2051 #         -- at least 2 cycles from the previous hit_load_valid.
2052 #
2053 #         -- Sanity: Only one of these must be set in any given cycle
2054 #         assert (r1.slow_valid and r1.stcx_fail) /= '1'
2055 #          report "unexpected slow_valid collision with stcx_fail"
2056 #          severity FAILURE;
2057 #         assert ((r1.slow_valid or r1.stcx_fail) and r1.hit_load_valid)
2058 #          /= '1' report "unexpected hit_load_delayed collision with
2059 #          slow_valid" severity FAILURE;
2060         # We have a valid load or store hit or we just completed
2061         # a slow op such as a load miss, a NC load or a store
2062         #
2063         # Note: the load hit is delayed by one cycle. However it
2064         # can still not collide with r.slow_valid (well unless I
2065         # miscalculated) because slow_valid can only be set on a
2066         # subsequent request and not on its first cycle (the state
2067         # machine must have advanced), which makes slow_valid
2068         # at least 2 cycles from the previous hit_load_valid.
2069
2070         # Sanity: Only one of these must be set in any given cycle
2071         assert (r1.slow_valid & r1.stcx_fail) != 1 "unexpected" \
2072          "slow_valid collision with stcx_fail -!- severity FAILURE"
2073
2074         assert ((r1.slow_valid | r1.stcx_fail) | r1.hit_load_valid) != 1
2075          "unexpected hit_load_delayed collision with slow_valid -!-" \
2076          "severity FAILURE"
2077
2078 #         if r1.mmu_req = '0' then
2079         with m.If(~r1._mmu_req):
2080 #             -- Request came from loadstore1...
2081 #             -- Load hit case is the standard path
2082 #             if r1.hit_load_valid = '1' then
2083             # Request came from loadstore1...
2084             # Load hit case is the standard path
2085             with m.If(r1.hit_load_valid):
2086 #                 report
2087 #                  "completing load hit data=" & to_hstring(data_out);
2088                 print(f"completing load hit data={data_out}")
2089 #             end if;
2090
2091 #             -- error cases complete without stalling
2092 #             if r1.ls_error = '1' then
2093             # error cases complete without stalling
2094             with m.If(r1.ls_error):
2095 #                 report "completing ld/st with error";
2096                 print("completing ld/st with error")
2097 #             end if;
2098
2099 #             -- Slow ops (load miss, NC, stores)
2100 #             if r1.slow_valid = '1' then
2101             # Slow ops (load miss, NC, stores)
2102             with m.If(r1.slow_valid):
2103 #                 report
2104 #                  "completing store or load miss data="
2105 #                   & to_hstring(data_out);
2106                 print(f"completing store or load miss data={data_out}")
2107 #             end if;
2108
2109 #         else
2110         with m.Else():
2111 #             -- Request came from MMU
2112 #             if r1.hit_load_valid = '1' then
2113             # Request came from MMU
2114             with m.If(r1.hit_load_valid):
2115 #                 report "completing load hit to MMU, data="
2116 #                  & to_hstring(m_out.data);
2117                 print(f"completing load hit to MMU, data={m_out.data}")
2118 #             end if;
2119 #
2120 #             -- error cases complete without stalling
2121 #             if r1.mmu_error = '1' then
2122 #                 report "completing MMU ld with error";
2123             # error cases complete without stalling
2124             with m.If(r1.mmu_error):
2125                 print("combpleting MMU ld with error")
2126 #             end if;
2127 #
2128 #             -- Slow ops (i.e. load miss)
2129 #             if r1.slow_valid = '1' then
2130             # Slow ops (i.e. load miss)
2131             with m.If(r1.slow_valid):
2132 #                 report "completing MMU load miss, data="
2133 #                  & to_hstring(m_out.data);
2134                 print("completing MMU load miss, data={m_out.data}")
2135 #             end if;
2136 #         end if;
2137 #     end process;
2138
2139 # begin TODO
2140 #     -- Generate a cache RAM for each way. This handles the normal
2141 #     -- reads, writes from reloads and the special store-hit update
2142 #     -- path as well.
2143 #     --
2144 #     -- Note: the BRAMs have an extra read buffer, meaning the output
2145 #     -- is pipelined an extra cycle. This differs from the
2146 #     -- icache. The writeback logic needs to take that into
2147 #     -- account by using 1-cycle delayed signals for load hits.
2148 #     --
2149 #     rams: for i in 0 to NUM_WAYS-1 generate
2150 #       signal do_read  : std_ulogic;
2151 #       signal rd_addr  : std_ulogic_vector(ROW_BITS-1 downto 0);
2152 #       signal do_write : std_ulogic;
2153 #       signal wr_addr  : std_ulogic_vector(ROW_BITS-1 downto 0);
2154 #       signal wr_data  :
2155 #        std_ulogic_vector(wishbone_data_bits-1 downto 0);
2156 #       signal wr_sel   : std_ulogic_vector(ROW_SIZE-1 downto 0);
2157 #       signal wr_sel_m : std_ulogic_vector(ROW_SIZE-1 downto 0);
2158 #       signal dout     : cache_row_t;
2159 #     begin
2160 #       way: entity work.cache_ram
2161 #           generic map (
2162 #               ROW_BITS => ROW_BITS,
2163 #               WIDTH => wishbone_data_bits,
2164 #               ADD_BUF => true
2165 #               )
2166 #           port map (
2167 #               clk     => clk,
2168 #               rd_en   => do_read,
2169 #               rd_addr => rd_addr,
2170 #               rd_data => dout,
2171 #               wr_sel  => wr_sel_m,
2172 #               wr_addr => wr_addr,
2173 #               wr_data => wr_data
2174 #               );
2175 #       process(all)
2176 # end TODO
2177 class TODO(Elaboratable):
2178     def __init__(self):
2179         pass
2180
2181     def elaborate(self, platform):
2182         m = Module()
2183
2184         comb = m.d.comb
2185         sync = m.d.sync
2186
2187 #       begin
2188 #           -- Cache hit reads
2189 #           do_read <= '1';
2190 #           rd_addr <=
2191 #            std_ulogic_vector(to_unsigned(early_req_row, ROW_BITS));
2192 #           cache_out(i) <= dout;
2193         # Cache hit reads
2194         comb += do_read.eq(1)
2195         comb += rd_addr.eq(Signal(ROW))
2196         comb += cache_out[i].eq(dout)
2197
2198 #           -- Write mux:
2199 #           --
2200 #           -- Defaults to wishbone read responses (cache refill)
2201 #           --
2202 #           -- For timing, the mux on wr_data/sel/addr is not
2203 #           -- dependent on anything other than the current state.
2204         # Write mux:
2205         #
2206         # Defaults to wishbone read responses (cache refill)
2207         #
2208         # For timing, the mux on wr_data/sel/addr is not
2209         # dependent on anything other than the current state.
2210 #           wr_sel_m <= (others => '0');
2211         comb += wr_sel_m.eq(0)
2212
2213 #           do_write <= '0';
2214         comb += do_write.eq(0)
2215 #             if r1.write_bram = '1' then
2216         with m.If(r1.write_bram):
2217 #                 -- Write store data to BRAM.  This happens one
2218 #                 -- cycle after the store is in r0.
2219             # Write store data to BRAM.  This happens one
2220             # cycle after the store is in r0.
2221 #                 wr_data <= r1.req.data;
2222 #                 wr_sel  <= r1.req.byte_sel;
2223 #                 wr_addr <= std_ulogic_vector(to_unsigned(
2224 #                             get_row(r1.req.real_addr), ROW_BITS
2225 #                            ));
2226             comb += wr_data.eq(r1.req.data)
2227             comb += wr_sel.eq(r1.req.byte_sel)
2228             comb += wr_addr.eq(Signal(get_row(r1.req.real_addr)))
2229
2230 #                 if i = r1.req.hit_way then
2231             with m.If(i == r1.req.hit_way):
2232 #                     do_write <= '1';
2233                 comb += do_write.eq(1)
2234 #                 end if;
2235 #           else
2236             with m.Else():
2237 #               -- Otherwise, we might be doing a reload or a DCBZ
2238 #                 if r1.dcbz = '1' then
2239                 # Otherwise, we might be doing a reload or a DCBZ
2240                 with m.If(r1.dcbz):
2241 #                     wr_data <= (others => '0');
2242                     comb += wr_data.eq(0)
2243 #                 else
2244                 with m.Else():
2245 #                     wr_data <= wishbone_in.dat;
2246                     comb += wr_data.eq(wishbone_in.dat)
2247 #                 end if;
2248
2249 #                 wr_addr <= std_ulogic_vector(to_unsigned(
2250 #                             r1.store_row, ROW_BITS
2251 #                            ));
2252 #                 wr_sel <= (others => '1');
2253             comb += wr_addr.eq(Signal(r1.store_row))
2254             comb += wr_sel.eq(1)
2255
2256 #                 if r1.state = RELOAD_WAIT_ACK and
2257 #                 wishbone_in.ack = '1' and replace_way = i then
2258             with m.If(r1.state == State.RELOAD_WAIT_ACK & wishbone_in.ack
2259                       & relpace_way == i):
2260 #                     do_write <= '1';
2261                 comb += do_write.eq(1)
2262 #                 end if;
2263 #           end if;
2264
2265 #             -- Mask write selects with do_write since BRAM
2266 #             -- doesn't have a global write-enable
2267 #             if do_write = '1' then
2268 #             -- Mask write selects with do_write since BRAM
2269 #             -- doesn't have a global write-enable
2270             with m.If(do_write):
2271 #                 wr_sel_m <= wr_sel;
2272                 comb += wr_sel_m.eq(wr_sel)
2273 #             end if;
2274 #         end process;
2275 #     end generate;
2276
2277 #     -- Cache hit synchronous machine for the easy case.
2278 #     -- This handles load hits.
2279 #     -- It also handles error cases (TLB miss, cache paradox)
2280 #     dcache_fast_hit : process(clk)
2281 # Cache hit synchronous machine for the easy case.
2282 # This handles load hits.
2283 # It also handles error cases (TLB miss, cache paradox)
2284 class DcacheFastHit(Elaboratable):
2285     def __init__(self):
2286         pass
2287
2288     def elaborate(self, platform):
2289         m = Module()
2290
2291         comb = m.d.comb
2292         sync = m.d.sync
2293
2294 #     begin
2295 #         if rising_edge(clk) then
2296 #             if req_op /= OP_NONE then
2297         with m.If(req_op != Op.OP_NONE):
2298 #               report "op:" & op_t'image(req_op) &
2299 #                   " addr:" & to_hstring(r0.req.addr) &
2300 #                   " nc:" & std_ulogic'image(r0.req.nc) &
2301 #                   " idx:" & integer'image(req_index) &
2302 #                   " tag:" & to_hstring(req_tag) &
2303 #                   " way: " & integer'image(req_hit_way);
2304             print(f"op:{req_op} addr:{r0.req.addr} nc: {r0.req.nc}" \
2305                   f"idx:{req_index} tag:{req_tag} way: {req_hit_way}"
2306                  )
2307 #             end if;
2308 #             if r0_valid = '1' then
2309         with m.If(r0_valid):
2310 #                 r1.mmu_req <= r0.mmu_req;
2311             sync += r1.mmu_req.eq(r0.mmu_req)
2312 #             end if;
2313
2314 #             -- Fast path for load/store hits.
2315 #             -- Set signals for the writeback controls.
2316 #             r1.hit_way <= req_hit_way;
2317 #             r1.hit_index <= req_index;
2318         # Fast path for load/store hits.
2319         # Set signals for the writeback controls.
2320         sync += r1.hit_way.eq(req_hit_way)
2321         sync += r1.hit_index.eq(req_index)
2322
2323 #             if req_op = OP_LOAD_HIT then
2324         with m.If(req_op == Op.OP_LOAD_HIT):
2325 #                 r1.hit_load_valid <= '1';
2326             sync += r1.hit_load_valid.eq(1)
2327
2328 #             else
2329         with m.Else():
2330 #                 r1.hit_load_valid <= '0';
2331             sync += r1.hit_load_valid.eq(0)
2332 #             end if;
2333
2334 #             if req_op = OP_LOAD_HIT or req_op = OP_STORE_HIT then
2335         with m.If(req_op == Op.OP_LOAD_HIT | req_op == Op.OP_STORE_HIT):
2336 #                 r1.cache_hit <= '1';
2337             sync += r1.cache_hit.eq(1)
2338 #             else
2339         with m.Else():
2340 #                 r1.cache_hit <= '0';
2341             sync += r1.cache_hit.eq(0)
2342 #             end if;
2343
2344 #             if req_op = OP_BAD then
2345         with m.If(req_op == Op.OP_BAD):
2346 #                 report "Signalling ld/st error valid_ra=" &
2347 #                  std_ulogic'image(valid_ra) & " rc_ok=" &
2348 #                  std_ulogic'image(rc_ok) & " perm_ok=" &
2349 #                  std_ulogic'image(perm_ok);
2350             print(f"Signalling ld/st error valid_ra={valid_ra}"
2351                   f"rc_ok={rc_ok} perm_ok={perm_ok}"
2352
2353 #                 r1.ls_error <= not r0.mmu_req;
2354 #                 r1.mmu_error <= r0.mmu_req;
2355 #                 r1.cache_paradox <= access_ok;
2356             sync += r1.ls_error.eq(~r0.mmu_req)
2357             sync += r1.mmu_error.eq(r0.mmu_req)
2358             sync += r1.cache_paradox.eq(access_ok)
2359
2360 #             else
2361             with m.Else():
2362 #                 r1.ls_error <= '0';
2363 #                 r1.mmu_error <= '0';
2364 #                 r1.cache_paradox <= '0';
2365                 sync += r1.ls_error.eq(0)
2366                 sync += r1.mmu_error.eq(0)
2367                 sync += r1.cache_paradox.eq(0)
2368 #             end if;
2369 #
2370 #             if req_op = OP_STCX_FAIL then
2371             with m.If(req_op == Op.OP_STCX_FAIL):
2372 #                 r1.stcx_fail <= '1';
2373                 r1.stcx_fail.eq(1)
2374
2375 #             else
2376             with m.Else():
2377 #                 r1.stcx_fail <= '0';
2378                 sync += r1.stcx_fail.eq(0)
2379 #             end if;
2380 #
2381 #             -- Record TLB hit information for updating TLB PLRU
2382 #             r1.tlb_hit <= tlb_hit;
2383 #             r1.tlb_hit_way <= tlb_hit_way;
2384 #             r1.tlb_hit_index <= tlb_req_index;
2385             # Record TLB hit information for updating TLB PLRU
2386             sync += r1.tlb_hit.eq(tlb_hit)
2387             sync += r1.tlb_hit_way.eq(tlb_hit_way)
2388             sync += r1.tlb_hit_index.eq(tlb_req_index)
2389 #         end if;
2390 #     end process;
2391
2392 #     -- Memory accesses are handled by this state machine:
2393 #     --
2394 #     --   * Cache load miss/reload (in conjunction with "rams")
2395 #     --   * Load hits for non-cachable forms
2396 #     --   * Stores (the collision case is handled in "rams")
2397 #     --
2398 #     -- All wishbone requests generation is done here.
2399 #     -- This machine operates at stage 1.
2400 #     dcache_slow : process(clk)
2401 # Memory accesses are handled by this state machine:
2402 #
2403 #   * Cache load miss/reload (in conjunction with "rams")
2404 #   * Load hits for non-cachable forms
2405 #   * Stores (the collision case is handled in "rams")
2406 #
2407 # All wishbone requests generation is done here.
2408 # This machine operates at stage 1.
2409 class DcacheSlow(Elaboratable):
2410     def __init__(self):
2411         pass
2412
2413     def elaborate(self, platform):
2414         m = Module()
2415
2416         comb = m.d.comb
2417         sync = m.d.sync
2418
2419 #         variable stbs_done : boolean;
2420 #         variable req       : mem_access_request_t;
2421 #         variable acks      : unsigned(2 downto 0);
2422         stbs_done = Signal()
2423         req       = MemAccessRequest()
2424         acks      = Signal(3)
2425
2426         comb += stbs_done
2427         comb += req
2428         comb += acks
2429
2430 #     begin
2431 #         if rising_edge(clk) then
2432 #             r1.use_forward1 <= use_forward1_next;
2433 #             r1.forward_sel <= (others => '0');
2434         sync += r1.use_forward1.eq(use_forward1_next)
2435         sync += r1.forward_sel.eq(0)
2436
2437 #             if use_forward1_next = '1' then
2438         with m.If(use_forward1_next):
2439 #                 r1.forward_sel <= r1.req.byte_sel;
2440             sync += r1.forward_sel.eq(r1.req.byte_sel)
2441
2442 #           elsif use_forward2_next = '1' then
2443         with m.Elif(use_forward2_next):
2444 #                 r1.forward_sel <= r1.forward_sel1;
2445             sync += r1.forward_sel.eq(r1.forward_sel1)
2446 #             end if;
2447
2448 #             r1.forward_data2 <= r1.forward_data1;
2449         sync += r1.forward_data2.eq(r1.forward_data1)
2450
2451 #             if r1.write_bram = '1' then
2452         with m.If(r1.write_bram):
2453 #                 r1.forward_data1 <= r1.req.data;
2454 #                 r1.forward_sel1 <= r1.req.byte_sel;
2455 #                 r1.forward_way1 <= r1.req.hit_way;
2456 #                 r1.forward_row1 <= get_row(r1.req.real_addr);
2457 #                 r1.forward_valid1 <= '1';
2458             sync += r1.forward_data1.eq(r1.req.data)
2459             sync += r1.forward_sel1.eq(r1.req.byte_sel)
2460             sync += r1.forward_way1.eq(r1.req.hit_way)
2461             sync += r1.forward_row1.eq(get_row(r1.req.real_addr))
2462             sync += r1.forward_valid1.eq(1)
2463 #             else
2464         with m.Else():
2465
2466 #                 if r1.dcbz = '1' then
2467             with m.If(r1.bcbz):
2468 #                     r1.forward_data1 <= (others => '0');
2469                 sync += r1.forward_data1.eq(0)
2470
2471 #                 else
2472             with m.Else():
2473 #                     r1.forward_data1 <= wishbone_in.dat;
2474                 sync += r1.forward_data1.eq(wb_in.dat)
2475 #                 end if;
2476
2477 #                 r1.forward_sel1 <= (others => '1');
2478 #                 r1.forward_way1 <= replace_way;
2479 #                 r1.forward_row1 <= r1.store_row;
2480 #                 r1.forward_valid1 <= '0';
2481             sync += r1.forward_sel1.eq(1)
2482             sync += r1.forward_way1.eq(replace_way)
2483             sync += r1.forward_row1.eq(r1.store_row)
2484             sync += r1.forward_valid1.eq(0)
2485 #             end if;
2486
2487 #           -- On reset, clear all valid bits to force misses
2488 #             if rst = '1' then
2489         # On reset, clear all valid bits to force misses
2490         # TODO figure out how reset signal works in nmigeni
2491         with m.If("""TODO RST???"""):
2492 #               for i in index_t loop
2493             for i in range(INDEX):
2494 #                   cache_valids(i) <= (others => '0');
2495                 sync += cache_valid_bits[i].eq(0)
2496 #               end loop;
2497
2498 #                 r1.state <= IDLE;
2499 #                 r1.full <= '0';
2500 #                 r1.slow_valid <= '0';
2501 #                 r1.wb.cyc <= '0';
2502 #                 r1.wb.stb <= '0';
2503 #                 r1.ls_valid <= '0';
2504 #                 r1.mmu_done <= '0';
2505             sync += r1.state.eq(State.IDLE)
2506             sync += r1.full.eq(0)
2507             sync += r1.slow_valid.eq(0)
2508             sync += r1.wb.cyc.eq(0)
2509             sync += r1.wb.stb.eq(0)
2510             sync += r1.ls_valid.eq(0)
2511             sync += r1.mmu_done.eq(0)
2512
2513 #               -- Not useful normally but helps avoiding
2514 #               -- tons of sim warnings
2515         # Not useful normally but helps avoiding
2516         # tons of sim warnings
2517 #               r1.wb.adr <= (others => '0');
2518             sync += r1.wb.adr.eq(0)
2519 #             else
2520         with m.Else():
2521 #                 -- One cycle pulses reset
2522 #                 r1.slow_valid <= '0';
2523 #                 r1.write_bram <= '0';
2524 #                 r1.inc_acks <= '0';
2525 #                 r1.dec_acks <= '0';
2526 #
2527 #                 r1.ls_valid <= '0';
2528 #                 -- complete tlbies and TLB loads in the third cycle
2529 #                 r1.mmu_done <= r0_valid and (r0.tlbie or r0.tlbld);
2530             # One cycle pulses reset
2531             sync += r1.slow_valid.eq(0)
2532             sync += r1.write_bram.eq(0)
2533             sync += r1.inc_acks.eq(0)
2534             sync += r1.dec_acks.eq(0)
2535
2536             sync += r1.ls_valid.eq(0)
2537             # complete tlbies and TLB loads in the third cycle
2538             sync += r1.mmu_done.eq(r0_valid & (r0.tlbie | r0.tlbld))
2539
2540 #                 if req_op = OP_LOAD_HIT or req_op = OP_STCX_FAIL then
2541             with m.If(req_op == Op.OP_LOAD_HIT | req_op == Op.OP_STCX_FAIL)
2542 #                     if r0.mmu_req = '0' then
2543                 with m.If(~r0.mmu_req):
2544 #                         r1.ls_valid <= '1';
2545                     sync += r1.ls_valid.eq(1)
2546 #                     else
2547                 with m.Else():
2548 #                         r1.mmu_done <= '1';
2549                     sync += r1.mmu_done.eq(1)
2550 #                     end if;
2551 #                 end if;
2552
2553 #                 if r1.write_tag = '1' then
2554             with m.If(r1.write_tag):
2555 #                     -- Store new tag in selected way
2556 #                     for i in 0 to NUM_WAYS-1 loop
2557                 # Store new tag in selected way
2558                 for i in range(NUM_WAYS):
2559 #                         if i = replace_way then
2560                     with m.If(i == replace_way):
2561 #                             cache_tags(r1.store_index)(
2562 #                              (i + 1) * TAG_WIDTH - 1
2563 #                              downto i * TAG_WIDTH
2564 #                             ) <=
2565 #                              (TAG_WIDTH - 1 downto TAG_BITS => '0')
2566 #                              & r1.reload_tag;
2567                         sync += cache_tag[
2568                                  r1.store_index
2569                                 ][i * TAG_WIDTH:(i +1) * TAG_WIDTH].eq(
2570                                  Const(TAG_WIDTH, TAG_WIDTH)
2571                                  & r1.reload_tag
2572                                 )
2573 #                         end if;
2574 #                     end loop;
2575 #                     r1.store_way <= replace_way;
2576 #                     r1.write_tag <= '0';
2577                 sync += r1.store_way.eq(replace_way)
2578                 sync += r1.write_tag.eq(0)
2579 #                 end if;
2580
2581 #                 -- Take request from r1.req if there is one there,
2582 #                 -- else from req_op, ra, etc.
2583 #                 if r1.full = '1' then
2584             # Take request from r1.req if there is one there,
2585             # else from req_op, ra, etc.
2586             with m.If(r1.full)
2587 #                     req := r1.req;
2588                 sync += req.eq(r1.req)
2589
2590 #                 else
2591             with m.Else():
2592 #                     req.op := req_op;
2593 #                     req.valid := req_go;
2594 #                     req.mmu_req := r0.mmu_req;
2595 #                     req.dcbz := r0.req.dcbz;
2596 #                     req.real_addr := ra;
2597                 sync += req.op.eq(req_op)
2598                 sync += req.valid.eq(req_go)
2599                 sync += req.mmu_req.eq(r0.mmu_req)
2600                 sync += req.dcbz.eq(r0.req.dcbz)
2601                 sync += req.real_addr.eq(ra)
2602
2603 #                     -- Force data to 0 for dcbz
2604 #                     if r0.req.dcbz = '0' then
2605                 with m.If(~r0.req.dcbz):
2606 #                         req.data := r0.req.data;
2607                     sync += req.data.eq(r0.req.data)
2608
2609 #                     else
2610                 with m.Else():
2611 #                         req.data := (others => '0');
2612                     sync += req.data.eq(0)
2613 #                     end if;
2614
2615 #                     -- Select all bytes for dcbz
2616 #                     -- and for cacheable loads
2617 #                     if r0.req.dcbz = '1'
2618 #                      or (r0.req.load = '1' and r0.req.nc = '0') then
2619                 # Select all bytes for dcbz
2620                 # and for cacheable loads
2621                 with m.If(r0.req.dcbz | (r0.req.load & ~r0.req.nc):
2622 #                         req.byte_sel := (others => '1');
2623                     sync += req.byte_sel.eq(1)
2624
2625 #                     else
2626                 with m.Else():
2627 #                         req.byte_sel := r0.req.byte_sel;
2628                     sync += req.byte_sel.eq(r0.req.byte_sel)
2629 #                     end if;
2630
2631 #                     req.hit_way := req_hit_way;
2632 #                     req.same_tag := req_same_tag;
2633                 sync += req.hit_way.eq(req_hit_way)
2634                 sync += req.same_tag.eq(req_same_tag)
2635
2636 #                     -- Store the incoming request from r0,
2637 #                     -- if it is a slow request
2638 #                     -- Note that r1.full = 1 implies req_op = OP_NONE
2639 #                     if req_op = OP_LOAD_MISS or req_op = OP_LOAD_NC
2640 #                      or req_op = OP_STORE_MISS
2641 #                      or req_op = OP_STORE_HIT then
2642                 # Store the incoming request from r0,
2643                 # if it is a slow request
2644                 # Note that r1.full = 1 implies req_op = OP_NONE
2645                 with m.If(req_op == Op.OP_LOAD_MISS
2646                           | req_op == Op.OP_LOAD_NC
2647                           | req_op == Op.OP_STORE_MISS
2648                           | req_op == Op.OP_STORE_HIT):
2649 #                         r1.req <= req;
2650 #                         r1.full <= '1';
2651                     sync += r1.req(req)
2652                     sync += r1.full.eq(1)
2653 #                     end if;
2654 #                 end if;
2655 #
2656 #               -- Main state machine
2657 #               case r1.state is
2658             # Main state machine
2659             with m.Switch(r1.state):
2660
2661 #                 when IDLE =>
2662                 with m.Case(State.IDLE)
2663 #                     r1.wb.adr <= req.real_addr(r1.wb.adr'left downto 0);
2664 #                     r1.wb.sel <= req.byte_sel;
2665 #                     r1.wb.dat <= req.data;
2666 #                     r1.dcbz <= req.dcbz;
2667 #
2668 #                     -- Keep track of our index and way
2669 #                     -- for subsequent stores.
2670 #                     r1.store_index <= get_index(req.real_addr);
2671 #                     r1.store_row <= get_row(req.real_addr);
2672 #                     r1.end_row_ix <=
2673 #                      get_row_of_line(get_row(req.real_addr)) - 1;
2674 #                     r1.reload_tag <= get_tag(req.real_addr);
2675 #                     r1.req.same_tag <= '1';
2676                     sync += r1.wb.adr.eq(req.real_addr[0:r1.wb.adr])
2677                     sync += r1.wb.sel.eq(req.byte_sel)
2678                     sync += r1.wb.dat.eq(req.data)
2679                     sync += r1.dcbz.eq(req.dcbz)
2680
2681                     # Keep track of our index and way
2682                     # for subsequent stores.
2683                     sync += r1.store_index.eq(get_index(req.real_addr))
2684                     sync += r1.store_row.eq(get_row(req.real_addr))
2685                     sync += r1.end_row_ix.eq(
2686                              get_row_of_line(get_row(req.real_addr))
2687                             )
2688                     sync += r1.reload_tag.eq(get_tag(req.real_addr))
2689                     sync += r1.req.same_tag.eq(1)
2690
2691 #                     if req.op = OP_STORE_HIT theni
2692                     with m.If(req.op == Op.OP_STORE_HIT):
2693 #                         r1.store_way <= req.hit_way;
2694                         sync += r1.store_way.eq(req.hit_way)
2695 #                     end if;
2696
2697 #                     -- Reset per-row valid bits,
2698 #                     -- ready for handling OP_LOAD_MISS
2699 #                     for i in 0 to ROW_PER_LINE - 1 loop
2700                     # Reset per-row valid bits,
2701                     # ready for handling OP_LOAD_MISS
2702                     for i in range(ROW_PER_LINE):
2703 #                         r1.rows_valid(i) <= '0';
2704                         sync += r1.rows_valid[i].eq(0)
2705 #                     end loop;
2706
2707 #                     case req.op is
2708                     with m.Switch(req.op):
2709 #                     when OP_LOAD_HIT =>
2710                         with m.Case(Op.OP_LOAD_HIT):
2711 #                         -- stay in IDLE state
2712                             # stay in IDLE state
2713                             pass
2714
2715 #                     when OP_LOAD_MISS =>
2716                         with m.Case(Op.OP_LOAD_MISS):
2717 #                       -- Normal load cache miss,
2718 #                       -- start the reload machine
2719 #                       report "cache miss real addr:" &
2720 #                        to_hstring(req.real_addr) & " idx:" &
2721 #                        integer'image(get_index(req.real_addr)) &
2722 #                        " tag:" & to_hstring(get_tag(req.real_addr));
2723                             # Normal load cache miss,
2724                             # start the reload machine
2725                             print(f"cache miss real addr:{req_real_addr}" \
2726                                   f" idx:{get_index(req_real_addr)}" \
2727                                   f" tag:{get_tag(req.real_addr)}")
2728
2729 #                       -- Start the wishbone cycle
2730 #                       r1.wb.we  <= '0';
2731 #                       r1.wb.cyc <= '1';
2732 #                       r1.wb.stb <= '1';
2733                             # Start the wishbone cycle
2734                             sync += r1.wb.we.eq(0)
2735                             sync += r1.wb.cyc.eq(1)
2736                             sync += r1.wb.stb.eq(1)
2737
2738 #                       -- Track that we had one request sent
2739 #                       r1.state <= RELOAD_WAIT_ACK;
2740 #                       r1.write_tag <= '1';
2741                             # Track that we had one request sent
2742                             sync += r1.state.eq(State.RELOAD_WAIT_ACK)
2743                             sync += r1.write_tag.eq(1)
2744
2745 #                   when OP_LOAD_NC =>
2746                         with m.Case(Op.OP_LOAD_NC):
2747 #                       r1.wb.cyc <= '1';
2748 #                       r1.wb.stb <= '1';
2749 #                       r1.wb.we <= '0';
2750 #                       r1.state <= NC_LOAD_WAIT_ACK;
2751                             sync += r1.wb.cyc.eq(1)
2752                             sync += r1.wb.stb.eq(1)
2753                             sync += r1.wb.we.eq(0)
2754                             sync += r1.state.eq(State.NC_LOAD_WAIT_ACK)
2755
2756 #                     when OP_STORE_HIT | OP_STORE_MISS =>
2757                         with m.Case(Op.OP_STORE_HIT | Op.OP_STORE_MISS):
2758 #                         if req.dcbz = '0' then
2759                             with m.If(~req.bcbz):
2760 #                             r1.state <= STORE_WAIT_ACK;
2761 #                             r1.acks_pending <= to_unsigned(1, 3);
2762 #                             r1.full <= '0';
2763 #                             r1.slow_valid <= '1';
2764                                 sync += r1.state.eq(State.STORE_WAIT_ACK)
2765                                 sync += r1.acks_pending.eq(
2766                                          '''TODO to_unsignes(1,3)'''
2767                                         )
2768                                 sync += r1.full.eq(0)
2769                                 sync += r1.slow_valid.eq(1)
2770
2771 #                             if req.mmu_req = '0' then
2772                                 with m.If(~req.mmu_req):
2773 #                                 r1.ls_valid <= '1';
2774                                     sync += r1.ls_valid.eq(1)
2775 #                             else
2776                                 with m.Else():
2777 #                                 r1.mmu_done <= '1';
2778                                     sync += r1.mmu_done.eq(1)
2779 #                             end if;
2780
2781 #                             if req.op = OP_STORE_HIT then
2782                                 with m.If(req.op == Op.OP_STORE_HIT):
2783 #                                 r1.write_bram <= '1';
2784                                     sync += r1.write_bram.eq(1)
2785 #                             end if;
2786
2787 #                         else
2788                             with m.Else():
2789 #                             -- dcbz is handled much like a load
2790 #                             -- miss except that we are writing
2791 #                             -- to memory instead of reading
2792 #                             r1.state <= RELOAD_WAIT_ACK;
2793                                 # dcbz is handled much like a load
2794                                 # miss except that we are writing
2795                                 # to memory instead of reading
2796                                 sync += r1.state.eq(Op.RELOAD_WAIT_ACK)
2797
2798 #                             if req.op = OP_STORE_MISS then
2799                                 with m.If(req.op == Op.OP_STORE_MISS):
2800 #                                 r1.write_tag <= '1';
2801                                     sync += r1.write_tag.eq(1)
2802 #                             end if;
2803 #                         end if;
2804
2805 #                         r1.wb.we <= '1';
2806 #                         r1.wb.cyc <= '1';
2807 #                         r1.wb.stb <= '1';
2808                             sync += r1.wb.we.eq(1)
2809                             sync += r1.wb.cyc.eq(1)
2810                             sync += r1.wb.stb.eq(1)
2811
2812 #                   -- OP_NONE and OP_BAD do nothing
2813 #                   -- OP_BAD & OP_STCX_FAIL were handled above already
2814 #                   when OP_NONE =>
2815 #                     when OP_BAD =>
2816 #                     when OP_STCX_FAIL =>
2817                         # OP_NONE and OP_BAD do nothing
2818                         # OP_BAD & OP_STCX_FAIL were handled above already
2819                         with m.Case(Op.OP_NONE):
2820                             pass
2821
2822                         with m.Case(OP_BAD):
2823                             pass
2824
2825                         with m.Case(OP_STCX_FAIL):
2826                             pass
2827 #                   end case;
2828
2829 #                 when RELOAD_WAIT_ACK =>
2830                     with m.Case(State.RELOAD_WAIT_ACK):
2831 #                     -- Requests are all sent if stb is 0
2832                         # Requests are all sent if stb is 0
2833                         sync += stbs_done.eq(~r1.wb.stb)
2834 #                   stbs_done := r1.wb.stb = '0';
2835
2836 #                   -- If we are still sending requests,
2837 #                   -- was one accepted?
2838 #                   if wishbone_in.stall = '0' and not stbs_done then
2839                         # If we are still sending requests,
2840                         # was one accepted?
2841                         with m.If(~wb_in.stall & ~stbs_done):
2842 #                       -- That was the last word ? We are done sending.
2843 #                       -- Clear stb and set stbs_done so we can handle
2844 #                       -- an eventual last ack on the same cycle.
2845 #                       if is_last_row_addr(r1.wb.adr, r1.end_row_ix) then
2846                             # That was the last word ? We are done sending.
2847                             # Clear stb and set stbs_done so we can handle
2848                             # an eventual last ack on the same cycle.
2849                             with m.If(is_last_row_addr(
2850                                       r1.wb.adr, r1.end_row_ix)):
2851 #                           r1.wb.stb <= '0';
2852 #                           stbs_done := true;
2853                                 sync += r1.wb.stb.eq(0)
2854                                 sync += stbs_done.eq(0)
2855 #                       end if;
2856
2857 #                       -- Calculate the next row address
2858 #                       r1.wb.adr <= next_row_addr(r1.wb.adr);
2859                             # Calculate the next row address
2860                             sync += r1.wb.adr.eq(next_row_addr(r1.wb.adr))
2861 #                   end if;
2862
2863 #                   -- Incoming acks processing
2864 #                     r1.forward_valid1 <= wishbone_in.ack;
2865                         # Incoming acks processing
2866                         sync += r1.forward_valid1.eq(wb_in.ack)
2867
2868 #                   if wishbone_in.ack = '1' then
2869                         with m.If(wb_in.ack):
2870 #                         r1.rows_valid(
2871 #                          r1.store_row mod ROW_PER_LINE
2872 #                         ) <= '1';
2873                             sync += r1.rows_valid[
2874                                      r1.store_row % ROW_PER_LINE
2875                                     ].eq(1)
2876
2877 #                         -- If this is the data we were looking for,
2878 #                         -- we can complete the request next cycle.
2879 #                         -- Compare the whole address in case the
2880 #                         -- request in r1.req is not the one that
2881 #                         -- started this refill.
2882 #                       if r1.full = '1' and r1.req.same_tag = '1'
2883 #                        and ((r1.dcbz = '1' and r1.req.dcbz = '1')
2884 #                        or (r1.dcbz = '0' and r1.req.op = OP_LOAD_MISS))
2885 #                        and r1.store_row = get_row(r1.req.real_addr) then
2886                             # If this is the data we were looking for,
2887                             # we can complete the request next cycle.
2888                             # Compare the whole address in case the
2889                             # request in r1.req is not the one that
2890                             # started this refill.
2891                             with m.If(r1.full & r1.req.same_tag &
2892                                       ((r1.dcbz & r1.req.dcbz)
2893                                        (~r1.dcbz &
2894                                         r1.req.op == Op.OP_LOAD_MISS)
2895                                        ) &
2896                                        r1.store_row
2897                                        == get_row(r1.req.real_addr):
2898 #                             r1.full <= '0';
2899 #                             r1.slow_valid <= '1';
2900                                 sync += r1.full.eq(0)
2901                                 sync += r1.slow_valid.eq(1)
2902
2903 #                             if r1.mmu_req = '0' then
2904                                     with m.If(~r1.mmu_req):
2905 #                                 r1.ls_valid <= '1';
2906                                         sync += r1.ls_valid.eq(1)
2907 #                             else
2908                                     with m.Else():
2909 #                                 r1.mmu_done <= '1';
2910                                         sync += r1.mmu_done.eq(1)
2911 #                             end if;
2912 #                             r1.forward_sel <= (others => '1');
2913 #                             r1.use_forward1 <= '1';
2914                                 sync += r1.forward_sel.eq(1)
2915                                 sync += r1.use_forward1.eq(1)
2916 #                       end if;
2917
2918 #                       -- Check for completion
2919 #                       if stbs_done and is_last_row(r1.store_row,
2920 #                        r1.end_row_ix) then
2921                             # Check for completion
2922                             with m.If(stbs_done &
2923                                       is_last_row(r1.store_row,
2924                                       r1.end_row_ix)):
2925
2926 #                           -- Complete wishbone cycle
2927 #                           r1.wb.cyc <= '0';
2928                                 # Complete wishbone cycle
2929                                 sync += r1.wb.cyc.eq(0)
2930
2931 #                           -- Cache line is now valid
2932 #                           cache_valids(r1.store_index)(
2933 #                            r1.store_way
2934 #                           ) <= '1';
2935                                 # Cache line is now valid
2936                                 sync += cache_valid_bits[
2937                                          r1.store_index
2938                                         ][r1.store_way].eq(1)
2939
2940 #                           r1.state <= IDLE;
2941                                 sync += r1.state.eq(State.IDLE)
2942 #                       end if;
2943
2944 #                       -- Increment store row counter
2945 #                       r1.store_row <= next_row(r1.store_row);
2946                             # Increment store row counter
2947                             sync += r1.store_row.eq(next_row(r1.store_row))
2948 #                   end if;
2949
2950 #                 when STORE_WAIT_ACK =>
2951                     with m.Case(State.STORE_WAIT_ACK):
2952 #                     stbs_done := r1.wb.stb = '0';
2953 #                     acks := r1.acks_pending;
2954                         sync += stbs_done.eq(~r1.wb.stb)
2955                         sync += acks.eq(r1.acks_pending)
2956
2957 #                     if r1.inc_acks /= r1.dec_acks then
2958                         with m.If(r1.inc_acks != r1.dec_acks):
2959
2960 #                         if r1.inc_acks = '1' then
2961                             with m.If(r1.inc_acks):
2962 #                             acks := acks + 1;
2963                                 sync += acks.eq(acks + 1)
2964
2965 #                         else
2966                             with m.Else():
2967 #                             acks := acks - 1;
2968                                 sync += acks.eq(acks - 1)
2969 #                         end if;
2970 #                     end if;
2971
2972 #                     r1.acks_pending <= acks;
2973                         sync += r1.acks_pending.eq(acks)
2974
2975 #                     -- Clear stb when slave accepted request
2976 #                     if wishbone_in.stall = '0' then
2977                         # Clear stb when slave accepted request
2978                         with m.If(~wb_in.stall):
2979 #                         -- See if there is another store waiting
2980 #                         -- to be done which is in the same real page.
2981 #                         if req.valid = '1' then
2982                             # See if there is another store waiting
2983                             # to be done which is in the same real page.
2984                             with m.If(req.valid):
2985 #                             r1.wb.adr(
2986 #                              SET_SIZE_BITS - 1 downto 0
2987 #                             ) <= req.real_addr(
2988 #                              SET_SIZE_BITS - 1 downto 0
2989 #                             );
2990 #                             r1.wb.dat <= req.data;
2991 #                             r1.wb.sel <= req.byte_sel;
2992                                 sync += r1.wb.adr[0:SET_SIZE_BITS].eq(
2993                                          req.real_addr[0:SET_SIZE_BITS]
2994                                         )
2995 #                         end if;
2996
2997 #                         if acks < 7 and req.same_tag = '1'
2998 #                          and (req.op = OP_STORE_MISS
2999 #                          or req.op = OP_STORE_HIT) then
3000                             with m.Elif(acks < 7 & req.same_tag &
3001                                         (req.op == Op.Op_STORE_MISS
3002                                          | req.op == Op.OP_SOTRE_HIT)):
3003 #                             r1.wb.stb <= '1';
3004 #                             stbs_done := false;
3005                                 sync += r1.wb.stb.eq(1)
3006                                 sync += stbs_done.eq(0)
3007
3008 #                             if req.op = OP_STORE_HIT then
3009                                 with m.If(req.op == Op.OP_STORE_HIT):
3010 #                                 r1.write_bram <= '1';
3011                                     sync += r1.write_bram.eq(1)
3012 #                             end if;
3013 #                             r1.full <= '0';
3014 #                             r1.slow_valid <= '1';
3015                                 sync += r1.full.eq(0)
3016                                 sync += r1.slow_valid.eq(1)
3017
3018 #                             -- Store requests never come from the MMU
3019 #                             r1.ls_valid <= '1';
3020 #                             stbs_done := false;
3021 #                             r1.inc_acks <= '1';
3022                                 # Store request never come from the MMU
3023                                 sync += r1.ls_valid.eq(1)
3024                                 sync += stbs_done.eq(0)
3025                                 sync += r1.inc_acks.eq(1)
3026 #                         else
3027                             with m.Else():
3028 #                             r1.wb.stb <= '0';
3029 #                             stbs_done := true;
3030                                 sync += r1.wb.stb.eq(0)
3031                                 sync += stbs_done.eq(1)
3032 #                         end if;
3033 #                   end if;
3034
3035 #                   -- Got ack ? See if complete.
3036 #                   if wishbone_in.ack = '1' then
3037                         # Got ack ? See if complete.
3038                         with m.If(wb_in.ack):
3039 #                         if stbs_done and acks = 1 then
3040                             with m.If(stbs_done & acks)
3041 #                             r1.state <= IDLE;
3042 #                             r1.wb.cyc <= '0';
3043 #                             r1.wb.stb <= '0';
3044                                 sync += r1.state.eq(State.IDLE)
3045                                 sync += r1.wb.cyc.eq(0)
3046                                 sync += r1.wb.stb.eq(0)
3047 #                         end if;
3048 #                         r1.dec_acks <= '1';
3049                             sync += r1.dec_acks.eq(1)
3050 #                   end if;
3051
3052 #                 when NC_LOAD_WAIT_ACK =>
3053                     with m.Case(State.NC_LOAD_WAIT_ACK):
3054 #                   -- Clear stb when slave accepted request
3055 #                     if wishbone_in.stall = '0' then
3056                         # Clear stb when slave accepted request
3057                         with m.If(~wb_in.stall):
3058 #                       r1.wb.stb <= '0';
3059                             sync += r1.wb.stb.eq(0)
3060 #                   end if;
3061
3062 #                   -- Got ack ? complete.
3063 #                   if wishbone_in.ack = '1' then
3064                         # Got ack ? complete.
3065                         with m.If(wb_in.ack):
3066 #                         r1.state <= IDLE;
3067 #                         r1.full <= '0';
3068 #                         r1.slow_valid <= '1';
3069                             sync += r1.state.eq(State.IDLE)
3070                             sync += r1.full.eq(0)
3071                             sync += r1.slow_valid.eq(1)
3072
3073 #                         if r1.mmu_req = '0' then
3074                             with m.If(~r1.mmu_req):
3075 #                             r1.ls_valid <= '1';
3076                                 sync += r1.ls_valid.eq(1)
3077
3078 #                         else
3079                             with m.Else():
3080 #                             r1.mmu_done <= '1';
3081                                 sync += r1.mmu_done.eq(1)
3082 #                         end if;
3083
3084 #                         r1.forward_sel <= (others => '1');
3085 #                         r1.use_forward1 <= '1';
3086 #                         r1.wb.cyc <= '0';
3087 #                         r1.wb.stb <= '0';
3088                             sync += r1.forward_sel.eq(1)
3089                             sync += r1.use_forward1.eq(1)
3090                             sync += r1.wb.cyc.eq(0)
3091                             sync += r1.wb.stb.eq(0)
3092 #                   end if;
3093 #                 end case;
3094 #           end if;
3095 #       end if;
3096 #     end process;
3097
3098 #     dc_log: if LOG_LENGTH > 0 generate
3099 # TODO learn how to tranlate vhdl generate into nmigen
3100 class DcacheLog(Elaborate):
3101     def __init__(self):
3102         pass
3103
3104     def elaborate(self, platform):
3105         m = Module()
3106
3107         comb = m.d.comb
3108         sync = m.d.sync
3109
3110 #         signal log_data : std_ulogic_vector(19 downto 0);
3111         log_data = Signal(20)
3112
3113         comb += log_data
3114
3115 #     begin
3116 #         dcache_log: process(clk)
3117 #         begin
3118 #             if rising_edge(clk) then
3119 #                 log_data <= r1.wb.adr(5 downto 3) &
3120 #                             wishbone_in.stall &
3121 #                             wishbone_in.ack &
3122 #                             r1.wb.stb & r1.wb.cyc &
3123 #                             d_out.error &
3124 #                             d_out.valid &
3125 #                             std_ulogic_vector(
3126 #                              to_unsigned(op_t'pos(req_op), 3)) &
3127 #                             stall_out &
3128 #                             std_ulogic_vector(
3129 #                              to_unsigned(tlb_hit_way, 3)) &
3130 #                             valid_ra &
3131 #                             std_ulogic_vector(
3132 #                              to_unsigned(state_t'pos(r1.state), 3));
3133         sync += log_data.eq(Cat(
3134                  Const(r1.state, 3), valid_ra, Const(tlb_hit_way, 3),
3135                  stall_out, Const(req_op, 3), d_out.valid, d_out.error,
3136                  r1.wb.cyc, r1.wb.stb, wb_in.ack, wb_in.stall,
3137                  r1.wb.adr[3:6]
3138                 ))
3139 #             end if;
3140 #         end process;
3141 #         log_out <= log_data;
3142     # TODO ??? I am very confused need help
3143     comb += log_out.eq(log_data)
3144 #     end generate;
3145 # end;