src/soc/experiment/icache.py

   1 """ICache
   2
   3 based on Anton Blanchard microwatt icache.vhdl
   4
   5 Set associative icache
   6
   7 TODO (in no specific order):
   8 * Add debug interface to inspect cache content
   9 * Add snoop/invalidate path
  10 * Add multi-hit error detection
  11 * Pipelined bus interface (wb or axi)
  12 * Maybe add parity? There's a few bits free in each BRAM row on Xilinx
  13 * Add optimization: service hits on partially loaded lines
  14 * Add optimization: (maybe) interrupt reload on fluch/redirect
  15 * Check if playing with the geometry of the cache tags allow for more
  16   efficient use of distributed RAM and less logic/muxes. Currently we
  17   write TAG_BITS width which may not match full ram blocks and might
  18   cause muxes to be inferred for "partial writes".
  19 * Check if making the read size of PLRU a ROM helps utilization
  20
  21 """
  22 from enum import Enum, unique
  23 from nmigen import (Module, Signal, Elaboratable, Cat, Array, Const)
  24 from nmigen.cli import main
  25 from nmigen.cli import rtlil
  26 from nmutil.iocontrol import RecordObject
  27 from nmutil.byterev import byte_reverse
  28 from nmutil.mask import Mask
  29 from nmigen.utils import log2_int
  30 from nmutil.util import Display
  31
  32 from soc.experiment.mem_types import (Fetch1ToICacheType,
  33                                       ICacheToDecode1Type,
  34                                       MMUToICacheType)
  35
  36 from soc.experiment.wb_types import (WB_ADDR_BITS, WB_DATA_BITS,
  37                                      WB_SEL_BITS, WBAddrType, WBDataType,
  38                                      WBSelType, WBMasterOut, WBSlaveOut,
  39                                      WBMasterOutVector, WBSlaveOutVector,
  40                                      WBIOMasterOut, WBIOSlaveOut)
  41
  42 from soc.experiment.cache_ram import CacheRam
  43 from soc.experiment.plru import PLRU
  44
  45 # for test
  46 from nmigen_soc.wishbone.sram import SRAM
  47 from nmigen import Memory
  48 from nmigen.cli import rtlil
  49 if True:
  50     from nmigen.back.pysim import Simulator, Delay, Settle
  51 else:
  52     from nmigen.sim.cxxsim import Simulator, Delay, Settle
  53 from nmutil.util import wrap
  54
  55
  56
  57 SIM            = 0
  58 LINE_SIZE      = 64
  59 # BRAM organisation: We never access more than wishbone_data_bits
  60 # at a time so to save resources we make the array only that wide,
  61 # and use consecutive indices for to make a cache "line"
  62 #
  63 # ROW_SIZE is the width in bytes of the BRAM (based on WB, so 64-bits)
  64 ROW_SIZE       = WB_DATA_BITS // 8
  65 # Number of lines in a set
  66 NUM_LINES      = 32
  67 # Number of ways
  68 NUM_WAYS       = 4
  69 # L1 ITLB number of entries (direct mapped)
  70 TLB_SIZE       = 64
  71 # L1 ITLB log_2(page_size)
  72 TLB_LG_PGSZ    = 12
  73 # Number of real address bits that we store
  74 REAL_ADDR_BITS = 56
  75 # Non-zero to enable log data collection
  76 LOG_LENGTH     = 0
  77
  78 ROW_SIZE_BITS  = ROW_SIZE * 8
  79 # ROW_PER_LINE is the number of row
  80 # (wishbone) transactions in a line
  81 ROW_PER_LINE   = LINE_SIZE // ROW_SIZE
  82 # BRAM_ROWS is the number of rows in
  83 # BRAM needed to represent the full icache
  84 BRAM_ROWS      = NUM_LINES * ROW_PER_LINE
  85 # INSN_PER_ROW is the number of 32bit
  86 # instructions per BRAM row
  87 INSN_PER_ROW   = ROW_SIZE_BITS // 32
  88
  89 # Bit fields counts in the address
  90 #
  91 # INSN_BITS is the number of bits to
  92 # select an instruction in a row
  93 INSN_BITS      = log2_int(INSN_PER_ROW)
  94 # ROW_BITS is the number of bits to
  95 # select a row
  96 ROW_BITS       = log2_int(BRAM_ROWS)
  97 # ROW_LINEBITS is the number of bits to
  98 # select a row within a line
  99 ROW_LINE_BITS   = log2_int(ROW_PER_LINE)
 100 # LINE_OFF_BITS is the number of bits for
 101 # the offset in a cache line
 102 LINE_OFF_BITS  = log2_int(LINE_SIZE)
 103 # ROW_OFF_BITS is the number of bits for
 104 # the offset in a row
 105 ROW_OFF_BITS   = log2_int(ROW_SIZE)
 106 # INDEX_BITS is the number of bits to
 107 # select a cache line
 108 INDEX_BITS     = log2_int(NUM_LINES)
 109 # SET_SIZE_BITS is the log base 2 of
 110 # the set size
 111 SET_SIZE_BITS  = LINE_OFF_BITS + INDEX_BITS
 112 # TAG_BITS is the number of bits of
 113 # the tag part of the address
 114 TAG_BITS       = REAL_ADDR_BITS - SET_SIZE_BITS
 115 # WAY_BITS is the number of bits to
 116 # select a way
 117 WAY_BITS       = log2_int(NUM_WAYS)
 118 TAG_RAM_WIDTH  = TAG_BITS * NUM_WAYS
 119
 120 #     -- L1 ITLB.
 121 #     constant TLB_BITS : natural := log2(TLB_SIZE);
 122 #     constant TLB_EA_TAG_BITS : natural := 64 - (TLB_LG_PGSZ + TLB_BITS);
 123 #     constant TLB_PTE_BITS : natural := 64;
 124 TLB_BITS        = log2_int(TLB_SIZE)
 125 TLB_EA_TAG_BITS = 64 - (TLB_LG_PGSZ + TLB_BITS)
 126 TLB_PTE_BITS    = 64
 127
 128 # architecture rtl of icache is
 129 #constant ROW_SIZE_BITS : natural := ROW_SIZE*8;
 130 #-- ROW_PER_LINE is the number of row (wishbone
 131 #-- transactions) in a line
 132 #constant ROW_PER_LINE  : natural := LINE_SIZE / ROW_SIZE;
 133 #-- BRAM_ROWS is the number of rows in BRAM
 134 #-- needed to represent the full
 135 #-- icache
 136 #constant BRAM_ROWS     : natural := NUM_LINES * ROW_PER_LINE;
 137 #-- INSN_PER_ROW is the number of 32bit instructions per BRAM row
 138 #constant INSN_PER_ROW  : natural := ROW_SIZE_BITS / 32;
 139 #-- Bit fields counts in the address
 140 #
 141 #-- INSN_BITS is the number of bits to select
 142 #-- an instruction in a row
 143 #constant INSN_BITS     : natural := log2(INSN_PER_ROW);
 144 #-- ROW_BITS is the number of bits to select a row
 145 #constant ROW_BITS      : natural := log2(BRAM_ROWS);
 146 #-- ROW_LINEBITS is the number of bits to
 147 #-- select a row within a line
 148 #constant ROW_LINEBITS  : natural := log2(ROW_PER_LINE);
 149 #-- LINE_OFF_BITS is the number of bits for the offset
 150 #-- in a cache line
 151 #constant LINE_OFF_BITS : natural := log2(LINE_SIZE);
 152 #-- ROW_OFF_BITS is the number of bits for the offset in a row
 153 #constant ROW_OFF_BITS  : natural := log2(ROW_SIZE);
 154 #-- INDEX_BITS is the number of bits to select a cache line
 155 #constant INDEX_BITS    : natural := log2(NUM_LINES);
 156 #-- SET_SIZE_BITS is the log base 2 of the set size
 157 #constant SET_SIZE_BITS : natural := LINE_OFF_BITS + INDEX_BITS;
 158 #-- TAG_BITS is the number of bits of the tag part of the address
 159 #constant TAG_BITS      : natural := REAL_ADDR_BITS - SET_SIZE_BITS;
 160 #-- WAY_BITS is the number of bits to select a way
 161 #constant WAY_BITS     : natural := log2(NUM_WAYS);
 162
 163 #-- Example of layout for 32 lines of 64 bytes:
 164 #--
 165 #-- ..  tag    |index|  line  |
 166 #-- ..         |   row   |    |
 167 #-- ..         |     |   | |00| zero          (2)
 168 #-- ..         |     |   |-|  | INSN_BITS     (1)
 169 #-- ..         |     |---|    | ROW_LINEBITS  (3)
 170 #-- ..         |     |--- - --| LINE_OFF_BITS (6)
 171 #-- ..         |         |- --| ROW_OFF_BITS  (3)
 172 #-- ..         |----- ---|    | ROW_BITS      (8)
 173 #-- ..         |-----|        | INDEX_BITS    (5)
 174 #-- .. --------|              | TAG_BITS      (53)
 175    # Example of layout for 32 lines of 64 bytes:
 176    #
 177    # ..  tag    |index|  line  |
 178    # ..         |   row   |    |
 179    # ..         |     |   | |00| zero          (2)
 180    # ..         |     |   |-|  | INSN_BITS     (1)
 181    # ..         |     |---|    | ROW_LINEBITS  (3)
 182    # ..         |     |--- - --| LINE_OFF_BITS (6)
 183    # ..         |         |- --| ROW_OFF_BITS  (3)
 184    # ..         |----- ---|    | ROW_BITS      (8)
 185    # ..         |-----|        | INDEX_BITS    (5)
 186    # .. --------|              | TAG_BITS      (53)
 187
 188 #subtype row_t is integer range 0 to BRAM_ROWS-1;
 189 #subtype index_t is integer range 0 to NUM_LINES-1;
 190 #subtype way_t is integer range 0 to NUM_WAYS-1;
 191 #subtype row_in_line_t is unsigned(ROW_LINEBITS-1 downto 0);
 192 #
 193 #-- The cache data BRAM organized as described above for each way
 194 #subtype cache_row_t is std_ulogic_vector(ROW_SIZE_BITS-1 downto 0);
 195 #
 196 #-- The cache tags LUTRAM has a row per set. Vivado is a pain and will
 197 #-- not handle a clean (commented) definition of the cache tags as a 3d
 198 #-- memory. For now, work around it by putting all the tags
 199 #subtype cache_tag_t is std_logic_vector(TAG_BITS-1 downto 0);
 200 #  type cache_tags_set_t is array(way_t) of cache_tag_t;
 201 #  type cache_tags_array_t is array(index_t) of cache_tags_set_t;
 202 #constant TAG_RAM_WIDTH : natural := TAG_BITS * NUM_WAYS;
 203 #subtype cache_tags_set_t is std_logic_vector(TAG_RAM_WIDTH-1 downto 0);
 204 #type cache_tags_array_t is array(index_t) of cache_tags_set_t;
 205 def CacheTagArray():
 206     return Array(Signal(TAG_RAM_WIDTH) for x in range(NUM_LINES))
 207
 208 #-- The cache valid bits
 209 #subtype cache_way_valids_t is std_ulogic_vector(NUM_WAYS-1 downto 0);
 210 #type cache_valids_t is array(index_t) of cache_way_valids_t;
 211 #type row_per_line_valid_t is array(0 to ROW_PER_LINE - 1) of std_ulogic;
 212 def CacheValidBitsArray():
 213     return Array(Signal(NUM_WAYS) for x in range(NUM_LINES))
 214
 215 def RowPerLineValidArray():
 216     return Array(Signal() for x in range(ROW_PER_LINE))
 217
 218
 219 #attribute ram_style : string;
 220 #attribute ram_style of cache_tags : signal is "distributed";
 221    # TODO to be passed to nigmen as ram attributes
 222    # attribute ram_style : string;
 223    # attribute ram_style of cache_tags : signal is "distributed";
 224
 225
 226 #subtype tlb_index_t is integer range 0 to TLB_SIZE - 1;
 227 #type tlb_valids_t is array(tlb_index_t) of std_ulogic;
 228 #subtype tlb_tag_t is std_ulogic_vector(TLB_EA_TAG_BITS - 1 downto 0);
 229 #type tlb_tags_t is array(tlb_index_t) of tlb_tag_t;
 230 #subtype tlb_pte_t is std_ulogic_vector(TLB_PTE_BITS - 1 downto 0);
 231 #type tlb_ptes_t is array(tlb_index_t) of tlb_pte_t;
 232 def TLBValidBitsArray():
 233     return Array(Signal() for x in range(TLB_SIZE))
 234
 235 def TLBTagArray():
 236     return Array(Signal(TLB_EA_TAG_BITS) for x in range(TLB_SIZE))
 237
 238 def TLBPTEArray():
 239     return Array(Signal(TLB_PTE_BITS) for x in range(TLB_SIZE))
 240
 241
 242 #-- Cache RAM interface
 243 #type cache_ram_out_t is array(way_t) of cache_row_t;
 244 # Cache RAM interface
 245 def CacheRamOut():
 246     return Array(Signal(ROW_SIZE_BITS) for x in range(NUM_WAYS))
 247
 248 #-- PLRU output interface
 249 #type plru_out_t is array(index_t) of
 250 # std_ulogic_vector(WAY_BITS-1 downto 0);
 251 # PLRU output interface
 252 def PLRUOut():
 253     return Array(Signal(WAY_BITS) for x in range(NUM_LINES))
 254
 255 #     -- Return the cache line index (tag index) for an address
 256 #     function get_index(addr: std_ulogic_vector(63 downto 0))
 257 #      return index_t is
 258 #     begin
 259 #         return to_integer(unsigned(
 260 #          addr(SET_SIZE_BITS - 1 downto LINE_OFF_BITS)
 261 #         ));
 262 #     end;
 263 # Return the cache line index (tag index) for an address
 264 def get_index(addr):
 265     return addr[LINE_OFF_BITS:SET_SIZE_BITS]
 266
 267 #     -- Return the cache row index (data memory) for an address
 268 #     function get_row(addr: std_ulogic_vector(63 downto 0))
 269 #       return row_t is
 270 #     begin
 271 #         return to_integer(unsigned(
 272 #          addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS)
 273 #         ));
 274 #     end;
 275 # Return the cache row index (data memory) for an address
 276 def get_row(addr):
 277     return addr[ROW_OFF_BITS:SET_SIZE_BITS]
 278
 279 #     -- Return the index of a row within a line
 280 #     function get_row_of_line(row: row_t) return row_in_line_t is
 281 #       variable row_v : unsigned(ROW_BITS-1 downto 0);
 282 #     begin
 283 #       row_v := to_unsigned(row, ROW_BITS);
 284 #         return row_v(ROW_LINEBITS-1 downto 0);
 285 #     end;
 286 # Return the index of a row within a line
 287 def get_row_of_line(row):
 288     return row[:ROW_LINE_BITS]
 289
 290 #     -- Returns whether this is the last row of a line
 291 #     function is_last_row_addr(addr: wishbone_addr_type;
 292 #      last: row_in_line_t
 293 #     )
 294 #      return boolean is
 295 #     begin
 296 #       return unsigned(
 297 #        addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS)
 298 #       ) = last;
 299 #     end;
 300 # Returns whether this is the last row of a line
 301 def is_last_row_addr(addr, last):
 302     return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
 303
 304 #     -- Returns whether this is the last row of a line
 305 #     function is_last_row(row: row_t;
 306 #      last: row_in_line_t) return boolean is
 307 #     begin
 308 #       return get_row_of_line(row) = last;
 309 #     end;
 310 # Returns whether this is the last row of a line
 311 def is_last_row(row, last):
 312     return get_row_of_line(row) == last
 313
 314 #     -- Return the address of the next row in the current cache line
 315 #     function next_row_addr(addr: wishbone_addr_type)
 316 #       return std_ulogic_vector is
 317 #       variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
 318 #       variable result  : wishbone_addr_type;
 319 #     begin
 320 #       -- Is there no simpler way in VHDL to generate that 3 bits adder ?
 321 #       row_idx := addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS);
 322 #       row_idx := std_ulogic_vector(unsigned(row_idx) + 1);
 323 #       result := addr;
 324 #       result(LINE_OFF_BITS-1 downto ROW_OFF_BITS) := row_idx;
 325 #       return result;
 326 #     end;
 327 # Return the address of the next row in the current cache line
 328 def next_row_addr(addr):
 329     row_idx = addr[ROW_OFF_BITS:LINE_OFF_BITS] + 1
 330     return addr[ROW_OFF_BITS:LINE_OFF_BITS].eq(row_idx)
 331
 332 #     -- Return the next row in the current cache line. We use a dedicated
 333 #     -- function in order to limit the size of the generated adder to be
 334 #     -- only the bits within a cache line (3 bits with default settings)
 335 #     function next_row(row: row_t) return row_t is
 336 #       variable row_v   : std_ulogic_vector(ROW_BITS-1 downto 0);
 337 #       variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
 338 #       variable result  : std_ulogic_vector(ROW_BITS-1 downto 0);
 339 #     begin
 340 #       row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
 341 #       row_idx := row_v(ROW_LINEBITS-1 downto 0);
 342 #       row_v(ROW_LINEBITS-1 downto 0) :=
 343 #        std_ulogic_vector(unsigned(row_idx) + 1);
 344 #       return to_integer(unsigned(row_v));
 345 #     end;
 346 # Return the next row in the current cache line. We use a dedicated
 347 # function in order to limit the size of the generated adder to be
 348 # only the bits within a cache line (3 bits with default settings)
 349 def next_row(row):
 350     row_idx = row[:ROW_LINE_BITS]
 351     return row[:ROW_LINE_BITS].eq(row_idx + 1)
 352
 353 #     -- Read the instruction word for the given address in the
 354 #     -- current cache row
 355 #     function read_insn_word(addr: std_ulogic_vector(63 downto 0);
 356 #                           data: cache_row_t) return std_ulogic_vector is
 357 #       variable word: integer range 0 to INSN_PER_ROW-1;
 358 #     begin
 359 #         word := to_integer(unsigned(addr(INSN_BITS+2-1 downto 2)));
 360 #       return data(31+word*32 downto word*32);
 361 #     end;
 362 # Read the instruction word for the given address
 363 # in the current cache row
 364 def read_insn_word(addr, data):
 365     word = addr[2:INSN_BITS+3]
 366     return data.word_select(word, 32)
 367
 368 #     -- Get the tag value from the address
 369 #     function get_tag(
 370 #      addr: std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0)
 371 #     )
 372 #      return cache_tag_t is
 373 #     begin
 374 #         return addr(REAL_ADDR_BITS - 1 downto SET_SIZE_BITS);
 375 #     end;
 376 # Get the tag value from the address
 377 def get_tag(addr):
 378     return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
 379
 380 #     -- Read a tag from a tag memory row
 381 #     function read_tag(way: way_t; tagset: cache_tags_set_t)
 382 #      return cache_tag_t is
 383 #     begin
 384 #       return tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS);
 385 #     end;
 386 # Read a tag from a tag memory row
 387 def read_tag(way, tagset):
 388     return tagset[way * TAG_BITS:(way + 1) * TAG_BITS]
 389
 390 #     -- Write a tag to tag memory row
 391 #     procedure write_tag(way: in way_t;
 392 #      tagset: inout cache_tags_set_t; tag: cache_tag_t) is
 393 #     begin
 394 #       tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS) := tag;
 395 #     end;
 396 # Write a tag to tag memory row
 397 def write_tag(way, tagset, tag):
 398     tagset[way * TAG_BITS:(way + 1) * TAG_BITS] = tag
 399
 400 #     -- Simple hash for direct-mapped TLB index
 401 #     function hash_ea(addr: std_ulogic_vector(63 downto 0))
 402 #      return tlb_index_t is
 403 #         variable hash : std_ulogic_vector(TLB_BITS - 1 downto 0);
 404 #     begin
 405 #         hash := addr(TLB_LG_PGSZ + TLB_BITS - 1 downto TLB_LG_PGSZ)
 406 #                 xor addr(
 407 #                  TLB_LG_PGSZ + 2 * TLB_BITS - 1 downto
 408 #                  TLB_LG_PGSZ + TLB_BITS
 409 #                 )
 410 #                 xor addr(
 411 #                  TLB_LG_PGSZ + 3 * TLB_BITS - 1 downto
 412 #                  TLB_LG_PGSZ + 2 * TLB_BITS
 413 #                 );
 414 #         return to_integer(unsigned(hash));
 415 #     end;
 416 # Simple hash for direct-mapped TLB index
 417 def hash_ea(addr):
 418     hsh = addr[TLB_LG_PGSZ:TLB_LG_PGSZ + TLB_BITS] ^ addr[
 419            TLB_LG_PGSZ + TLB_BITS:TLB_LG_PGSZ + 2 * TLB_BITS
 420           ] ^ addr[
 421            TLB_LG_PGSZ + 2 * TLB_BITS:TLB_LG_PGSZ + 3 * TLB_BITS
 422           ]
 423     return hsh
 424
 425 # begin
 426 #
 427 #     assert LINE_SIZE mod ROW_SIZE = 0;
 428 #     assert ispow2(LINE_SIZE) report "LINE_SIZE not power of 2"
 429 #      severity FAILURE;
 430 #     assert ispow2(NUM_LINES) report "NUM_LINES not power of 2"
 431 #      severity FAILURE;
 432 #     assert ispow2(ROW_PER_LINE) report "ROW_PER_LINE not power of 2"
 433 #      severity FAILURE;
 434 #     assert ispow2(INSN_PER_ROW) report "INSN_PER_ROW not power of 2"
 435 #      severity FAILURE;
 436 #     assert (ROW_BITS = INDEX_BITS + ROW_LINEBITS)
 437 #       report "geometry bits don't add up" severity FAILURE;
 438 #     assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS)
 439 #       report "geometry bits don't add up" severity FAILURE;
 440 #     assert (REAL_ADDR_BITS = TAG_BITS + INDEX_BITS + LINE_OFF_BITS)
 441 #       report "geometry bits don't add up" severity FAILURE;
 442 #     assert (REAL_ADDR_BITS = TAG_BITS + ROW_BITS + ROW_OFF_BITS)
 443 #       report "geometry bits don't add up" severity FAILURE;
 444 #
 445 #     sim_debug: if SIM generate
 446 #     debug: process
 447 #     begin
 448 #       report "ROW_SIZE      = " & natural'image(ROW_SIZE);
 449 #       report "ROW_PER_LINE  = " & natural'image(ROW_PER_LINE);
 450 #       report "BRAM_ROWS     = " & natural'image(BRAM_ROWS);
 451 #       report "INSN_PER_ROW  = " & natural'image(INSN_PER_ROW);
 452 #       report "INSN_BITS     = " & natural'image(INSN_BITS);
 453 #       report "ROW_BITS      = " & natural'image(ROW_BITS);
 454 #       report "ROW_LINEBITS  = " & natural'image(ROW_LINEBITS);
 455 #       report "LINE_OFF_BITS = " & natural'image(LINE_OFF_BITS);
 456 #       report "ROW_OFF_BITS  = " & natural'image(ROW_OFF_BITS);
 457 #       report "INDEX_BITS    = " & natural'image(INDEX_BITS);
 458 #       report "TAG_BITS      = " & natural'image(TAG_BITS);
 459 #       report "WAY_BITS      = " & natural'image(WAY_BITS);
 460 #       wait;
 461 #     end process;
 462 #     end generate;
 463
 464 # Cache reload state machine
 465 @unique
 466 class State(Enum):
 467     IDLE     = 0
 468     CLR_TAG  = 1
 469     WAIT_ACK = 2
 470
 471 #     type reg_internal_t is record
 472 #       -- Cache hit state (Latches for 1 cycle BRAM access)
 473 #       hit_way   : way_t;
 474 #       hit_nia   : std_ulogic_vector(63 downto 0);
 475 #       hit_smark : std_ulogic;
 476 #       hit_valid : std_ulogic;
 477 #
 478 #       -- Cache miss state (reload state machine)
 479 #         state            : state_t;
 480 #         wb               : wishbone_master_out;
 481 #       store_way        : way_t;
 482 #         store_index      : index_t;
 483 #       store_row        : row_t;
 484 #         store_tag        : cache_tag_t;
 485 #         store_valid      : std_ulogic;
 486 #         end_row_ix       : row_in_line_t;
 487 #         rows_valid       : row_per_line_valid_t;
 488 #
 489 #         -- TLB miss state
 490 #         fetch_failed     : std_ulogic;
 491 #     end record;
 492 class RegInternal(RecordObject):
 493     def __init__(self):
 494         super().__init__()
 495         # Cache hit state (Latches for 1 cycle BRAM access)
 496         self.hit_way      = Signal(NUM_WAYS)
 497         self.hit_nia      = Signal(64)
 498         self.hit_smark    = Signal()
 499         self.hit_valid    = Signal()
 500
 501         # Cache miss state (reload state machine)
 502         self.state        = Signal(State)
 503         self.wb           = WBMasterOut()
 504         self.store_way    = Signal(NUM_WAYS)
 505         self.store_index  = Signal(NUM_LINES)
 506         self.store_row    = Signal(BRAM_ROWS)
 507         self.store_tag    = Signal(TAG_BITS)
 508         self.store_valid  = Signal()
 509         self.end_row_ix   = Signal(ROW_LINE_BITS)
 510         self.rows_valid   = RowPerLineValidArray()
 511
 512         # TLB miss state
 513         self.fetch_failed = Signal()
 514
 515 # -- 64 bit direct mapped icache. All instructions are 4B aligned.
 516 #
 517 # entity icache is
 518 #     generic (
 519 #         SIM : boolean := false;
 520 #         -- Line size in bytes
 521 #         LINE_SIZE : positive := 64;
 522 #         -- BRAM organisation: We never access more
 523 #         -- than wishbone_data_bits
 524 #         -- at a time so to save resources we make the
 525 #         -- array only that wide,
 526 #         -- and use consecutive indices for to make a cache "line"
 527 #         --
 528 #         -- ROW_SIZE is the width in bytes of the BRAM (based on WB,
 529 #         -- so 64-bits)
 530 #         ROW_SIZE  : positive := wishbone_data_bits / 8;
 531 #         -- Number of lines in a set
 532 #         NUM_LINES : positive := 32;
 533 #         -- Number of ways
 534 #         NUM_WAYS  : positive := 4;
 535 #         -- L1 ITLB number of entries (direct mapped)
 536 #         TLB_SIZE : positive := 64;
 537 #         -- L1 ITLB log_2(page_size)
 538 #         TLB_LG_PGSZ : positive := 12;
 539 #         -- Number of real address bits that we store
 540 #         REAL_ADDR_BITS : positive := 56;
 541 #         -- Non-zero to enable log data collection
 542 #         LOG_LENGTH : natural := 0
 543 #         );
 544 #     port (
 545 #         clk          : in std_ulogic;
 546 #         rst          : in std_ulogic;
 547 #
 548 #         i_in         : in Fetch1ToIcacheType;
 549 #         i_out        : out IcacheToDecode1Type;
 550 #
 551 #         m_in         : in MmuToIcacheType;
 552 #
 553 #         stall_in     : in std_ulogic;
 554 #       stall_out    : out std_ulogic;
 555 #       flush_in     : in std_ulogic;
 556 #       inval_in     : in std_ulogic;
 557 #
 558 #         wishbone_out : out wishbone_master_out;
 559 #         wishbone_in  : in wishbone_slave_out;
 560 #
 561 #         log_out      : out std_ulogic_vector(53 downto 0)
 562 #         );
 563 # end entity icache;
 564 # 64 bit direct mapped icache. All instructions are 4B aligned.
 565 class ICache(Elaboratable):
 566     """64 bit direct mapped icache. All instructions are 4B aligned."""
 567     def __init__(self):
 568         self.i_in           = Fetch1ToICacheType()
 569         self.i_out          = ICacheToDecode1Type()
 570
 571         self.m_in           = MMUToICacheType()
 572
 573         self.stall_in       = Signal()
 574         self.stall_out      = Signal()
 575         self.flush_in       = Signal()
 576         self.inval_in       = Signal()
 577
 578         self.wb_out         = WBMasterOut()
 579         self.wb_in          = WBSlaveOut()
 580
 581         self.log_out        = Signal(54)
 582
 583
 584 #     -- Generate a cache RAM for each way
 585 #     rams: for i in 0 to NUM_WAYS-1 generate
 586 #       signal do_read  : std_ulogic;
 587 #       signal do_write : std_ulogic;
 588 #       signal rd_addr  : std_ulogic_vector(ROW_BITS-1 downto 0);
 589 #       signal wr_addr  : std_ulogic_vector(ROW_BITS-1 downto 0);
 590 #       signal dout     : cache_row_t;
 591 #       signal wr_sel   : std_ulogic_vector(ROW_SIZE-1 downto 0);
 592 #     begin
 593 #       way: entity work.cache_ram
 594 #           generic map (
 595 #               ROW_BITS => ROW_BITS,
 596 #               WIDTH => ROW_SIZE_BITS
 597 #               )
 598 #           port map (
 599 #               clk     => clk,
 600 #               rd_en   => do_read,
 601 #               rd_addr => rd_addr,
 602 #               rd_data => dout,
 603 #               wr_sel  => wr_sel,
 604 #               wr_addr => wr_addr,
 605 #               wr_data => wishbone_in.dat
 606 #               );
 607 #       process(all)
 608 #       begin
 609 #           do_read <= not (stall_in or use_previous);
 610 #           do_write <= '0';
 611 #           if wishbone_in.ack = '1' and replace_way = i then
 612 #               do_write <= '1';
 613 #           end if;
 614 #           cache_out(i) <= dout;
 615 #           rd_addr <=
 616 #            std_ulogic_vector(to_unsigned(req_row, ROW_BITS));
 617 #           wr_addr <=
 618 #            std_ulogic_vector(to_unsigned(r.store_row, ROW_BITS));
 619 #             for i in 0 to ROW_SIZE-1 loop
 620 #                 wr_sel(i) <= do_write;
 621 #             end loop;
 622 #       end process;
 623 #     end generate;
 624     def rams(self, m, r, cache_out, use_previous, replace_way, req_row):
 625         comb = m.d.comb
 626
 627         wb_in, stall_in = self.wb_in, self.stall_in
 628
 629         do_read  = Signal()
 630         do_write = Signal()
 631         rd_addr  = Signal(ROW_BITS)
 632         wr_addr  = Signal(ROW_BITS)
 633         _d_out   = Signal(ROW_SIZE_BITS)
 634         wr_sel   = Signal(ROW_SIZE)
 635
 636         for i in range(NUM_WAYS):
 637             way = CacheRam(ROW_BITS, ROW_SIZE_BITS)
 638             comb += way.rd_en.eq(do_read)
 639             comb += way.rd_addr.eq(rd_addr)
 640             comb += way.rd_data_o.eq(_d_out)
 641             comb += way.wr_sel.eq(wr_sel)
 642             comb += way.wr_addr.eq(wr_addr)
 643             comb += way.wr_data.eq(wb_in.dat)
 644
 645             comb += do_read.eq(~(stall_in | use_previous))
 646             comb += do_write.eq(0)
 647
 648             with m.If(wb_in.ack & (replace_way == i)):
 649                 comb += do_write.eq(1)
 650
 651             comb += cache_out[i].eq(_d_out)
 652             comb += rd_addr.eq(req_row)
 653             comb += wr_addr.eq(r.store_row)
 654             for j in range(ROW_SIZE):
 655                 comb += wr_sel[j].eq(do_write)
 656
 657 #     -- Generate PLRUs
 658 #     maybe_plrus: if NUM_WAYS > 1 generate
 659 #     begin
 660 #       plrus: for i in 0 to NUM_LINES-1 generate
 661 #           -- PLRU interface
 662 #           signal plru_acc    : std_ulogic_vector(WAY_BITS-1 downto 0);
 663 #           signal plru_acc_en : std_ulogic;
 664 #           signal plru_out    : std_ulogic_vector(WAY_BITS-1 downto 0);
 665 #
 666 #       begin
 667 #           plru : entity work.plru
 668 #               generic map (
 669 #                   BITS => WAY_BITS
 670 #                   )
 671 #               port map (
 672 #                   clk => clk,
 673 #                   rst => rst,
 674 #                   acc => plru_acc,
 675 #                   acc_en => plru_acc_en,
 676 #                   lru => plru_out
 677 #                   );
 678 #
 679 #           process(all)
 680 #           begin
 681 #               -- PLRU interface
 682 #               if get_index(r.hit_nia) = i then
 683 #                   plru_acc_en <= r.hit_valid;
 684 #               else
 685 #                   plru_acc_en <= '0';
 686 #               end if;
 687 #               plru_acc <=
 688 #                std_ulogic_vector(to_unsigned(r.hit_way, WAY_BITS));
 689 #               plru_victim(i) <= plru_out;
 690 #           end process;
 691 #       end generate;
 692 #     end generate;
 693     def maybe_plrus(self, m, r, plru_victim):
 694         comb = m.d.comb
 695
 696         with m.If(NUM_WAYS > 1):
 697             for i in range(NUM_LINES):
 698                 plru_acc_i  = Signal(WAY_BITS)
 699                 plru_acc_en = Signal()
 700                 plru_out    = Signal(WAY_BITS)
 701                 plru        = PLRU(WAY_BITS)
 702                 comb += plru.acc_i.eq(plru_acc_i)
 703                 comb += plru.acc_en.eq(plru_acc_en)
 704                 comb += plru.lru_o.eq(plru_out)
 705
 706                 # PLRU interface
 707                 with m.If(get_index(r.hit_nia) == i):
 708                     comb += plru.acc_en.eq(r.hit_valid)
 709
 710                 with m.Else():
 711                     comb += plru.acc_en.eq(0)
 712
 713                 comb += plru.acc_i.eq(r.hit_way)
 714                 comb += plru_victim[i].eq(plru.lru_o)
 715
 716 #     -- TLB hit detection and real address generation
 717 #     itlb_lookup : process(all)
 718 #         variable pte : tlb_pte_t;
 719 #         variable ttag : tlb_tag_t;
 720 #     begin
 721 #         tlb_req_index <= hash_ea(i_in.nia);
 722 #         pte := itlb_ptes(tlb_req_index);
 723 #         ttag := itlb_tags(tlb_req_index);
 724 #         if i_in.virt_mode = '1' then
 725 #             real_addr <= pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) &
 726 #                          i_in.nia(TLB_LG_PGSZ - 1 downto 0);
 727 #             if ttag = i_in.nia(63 downto TLB_LG_PGSZ + TLB_BITS) then
 728 #                 ra_valid <= itlb_valids(tlb_req_index);
 729 #             else
 730 #                 ra_valid <= '0';
 731 #             end if;
 732 #             eaa_priv <= pte(3);
 733 #         else
 734 #             real_addr <= i_in.nia(REAL_ADDR_BITS - 1 downto 0);
 735 #             ra_valid <= '1';
 736 #             eaa_priv <= '1';
 737 #         end if;
 738 #
 739 #         -- no IAMR, so no KUEP support for now
 740 #         priv_fault <= eaa_priv and not i_in.priv_mode;
 741 #         access_ok <= ra_valid and not priv_fault;
 742 #     end process;
 743     # TLB hit detection and real address generation
 744     def itlb_lookup(self, m, tlb_req_index, itlb_ptes, itlb_tags,
 745                     real_addr, itlb_valid_bits, ra_valid, eaa_priv,
 746                     priv_fault, access_ok):
 747         comb = m.d.comb
 748
 749         i_in = self.i_in
 750
 751         pte  = Signal(TLB_PTE_BITS)
 752         ttag = Signal(TLB_EA_TAG_BITS)
 753
 754         comb += tlb_req_index.eq(hash_ea(i_in.nia))
 755         comb += pte.eq(itlb_ptes[tlb_req_index])
 756         comb += ttag.eq(itlb_tags[tlb_req_index])
 757
 758         with m.If(i_in.virt_mode):
 759             comb += real_addr.eq(Cat(
 760                      i_in.nia[:TLB_LG_PGSZ],
 761                      pte[TLB_LG_PGSZ:REAL_ADDR_BITS]
 762                     ))
 763
 764             with m.If(ttag == i_in.nia[TLB_LG_PGSZ + TLB_BITS:64]):
 765                 comb += ra_valid.eq(itlb_valid_bits[tlb_req_index])
 766
 767             with m.Else():
 768                 comb += ra_valid.eq(0)
 769
 770         with m.Else():
 771             comb += real_addr.eq(i_in.nia[:REAL_ADDR_BITS])
 772             comb += ra_valid.eq(1)
 773             comb += eaa_priv.eq(1)
 774
 775         # No IAMR, so no KUEP support for now
 776         comb += priv_fault.eq(eaa_priv & ~i_in.priv_mode)
 777         comb += access_ok.eq(ra_valid & ~priv_fault)
 778
 779 #     -- iTLB update
 780 #     itlb_update: process(clk)
 781 #         variable wr_index : tlb_index_t;
 782 #     begin
 783 #         if rising_edge(clk) then
 784 #             wr_index := hash_ea(m_in.addr);
 785 #             if rst = '1' or
 786 #              (m_in.tlbie = '1' and m_in.doall = '1') then
 787 #                 -- clear all valid bits
 788 #                 for i in tlb_index_t loop
 789 #                     itlb_valids(i) <= '0';
 790 #                 end loop;
 791 #             elsif m_in.tlbie = '1' then
 792 #                 -- clear entry regardless of hit or miss
 793 #                 itlb_valids(wr_index) <= '0';
 794 #             elsif m_in.tlbld = '1' then
 795 #                 itlb_tags(wr_index) <=
 796 #                  m_in.addr(63 downto TLB_LG_PGSZ + TLB_BITS);
 797 #                 itlb_ptes(wr_index) <= m_in.pte;
 798 #                 itlb_valids(wr_index) <= '1';
 799 #             end if;
 800 #         end if;
 801 #     end process;
 802     # iTLB update
 803     def itlb_update(self, m, itlb_valid_bits, itlb_tags, itlb_ptes):
 804         comb = m.d.comb
 805         sync = m.d.sync
 806
 807         m_in = self.m_in
 808
 809         wr_index = Signal(TLB_SIZE)
 810         comb += wr_index.eq(hash_ea(m_in.addr))
 811
 812         with m.If(m_in.tlbie & m_in.doall):
 813             # Clear all valid bits
 814             for i in range(TLB_SIZE):
 815                 sync += itlb_valid_bits[i].eq(0)
 816
 817         with m.Elif(m_in.tlbie):
 818             # Clear entry regardless of hit or miss
 819             sync += itlb_valid_bits[wr_index].eq(0)
 820
 821         with m.Elif(m_in.tlbld):
 822             sync += itlb_tags[wr_index].eq(
 823                      m_in.addr[TLB_LG_PGSZ + TLB_BITS:64]
 824                     )
 825             sync += itlb_ptes[wr_index].eq(m_in.pte)
 826             sync += itlb_valid_bits[wr_index].eq(1)
 827
 828 #     -- Cache hit detection, output to fetch2 and other misc logic
 829 #     icache_comb : process(all)
 830     # Cache hit detection, output to fetch2 and other misc logic
 831     def icache_comb(self, m, use_previous, r, req_index, req_row,
 832                     req_tag, real_addr, req_laddr, cache_valid_bits,
 833                     cache_tags, access_ok, req_is_hit,
 834                     req_is_miss, replace_way, plru_victim, cache_out):
 835 #       variable is_hit  : std_ulogic;
 836 #       variable hit_way : way_t;
 837         comb = m.d.comb
 838
 839         i_in, i_out, wb_out = self.i_in, self.i_out, self.wb_out
 840         flush_in, stall_out = self.flush_in, self.stall_out
 841
 842         is_hit  = Signal()
 843         hit_way = Signal(NUM_WAYS)
 844 #     begin
 845 #         -- i_in.sequential means that i_in.nia this cycle
 846 #         -- is 4 more than last cycle.  If we read more
 847 #         -- than 32 bits at a time, had a cache hit last
 848 #         -- cycle, and we don't want the first 32-bit chunk
 849 #         -- then we can keep the data we read last cycle
 850 #         -- and just use that.
 851 #         if unsigned(i_in.nia(INSN_BITS+2-1 downto 2)) /= 0 then
 852 #             use_previous <= i_in.sequential and r.hit_valid;
 853 #         else
 854 #             use_previous <= '0';
 855 #         end if;
 856         # i_in.sequential means that i_in.nia this cycle is 4 more than
 857         # last cycle.  If we read more than 32 bits at a time, had a
 858         # cache hit last cycle, and we don't want the first 32-bit chunk
 859         # then we can keep the data we read last cycle and just use that.
 860         with m.If(i_in.nia[2:INSN_BITS+2] != 0):
 861             comb += use_previous.eq(i_in.sequential & r.hit_valid)
 862
 863         with m.Else():
 864             comb += use_previous.eq(0)
 865
 866 #       -- Extract line, row and tag from request
 867 #         req_index <= get_index(i_in.nia);
 868 #         req_row <= get_row(i_in.nia);
 869 #         req_tag <= get_tag(real_addr);
 870         # Extract line, row and tag from request
 871         comb += req_index.eq(get_index(i_in.nia))
 872         comb += req_row.eq(get_row(i_in.nia))
 873         comb += req_tag.eq(get_tag(real_addr))
 874
 875 #       -- Calculate address of beginning of cache row, will be
 876 #       -- used for cache miss processing if needed
 877 #       req_laddr <=
 878 #        (63 downto REAL_ADDR_BITS => '0') &
 879 #        real_addr(REAL_ADDR_BITS - 1 downto ROW_OFF_BITS) &
 880 #        (ROW_OFF_BITS-1 downto 0 => '0');
 881         # Calculate address of beginning of cache row, will be
 882         # used for cache miss processing if needed
 883         comb += req_laddr.eq(Cat(
 884                  Const(0b0, ROW_OFF_BITS),
 885                  real_addr[ROW_OFF_BITS:REAL_ADDR_BITS],
 886                  Const(0, REAL_ADDR_BITS)
 887                 ))
 888
 889 #       -- Test if pending request is a hit on any way
 890 #       hit_way := 0;
 891 #       is_hit := '0';
 892 #       for i in way_t loop
 893 #           if i_in.req = '1' and
 894 #                 (cache_valids(req_index)(i) = '1' or
 895 #                  (r.state = WAIT_ACK and
 896 #                   req_index = r.store_index and
 897 #                   i = r.store_way and
 898 #                   r.rows_valid(req_row mod ROW_PER_LINE) = '1')) then
 899 #               if read_tag(i, cache_tags(req_index)) = req_tag then
 900 #                   hit_way := i;
 901 #                   is_hit := '1';
 902 #               end if;
 903 #           end if;
 904 #       end loop;
 905         # Test if pending request is a hit on any way
 906         for i in range(NUM_WAYS):
 907             with m.If(i_in.req &
 908                       (cache_valid_bits[req_index][i] |
 909                        ((r.state == State.WAIT_ACK)
 910                         & (req_index == r.store_index)
 911                         & (i == r.store_way)
 912                         & r.rows_valid[req_row % ROW_PER_LINE]))):
 913                 with m.If(read_tag(i, cache_tags[req_index]) == req_tag):
 914                     comb += hit_way.eq(i)
 915                     comb += is_hit.eq(1)
 916
 917 #       -- Generate the "hit" and "miss" signals
 918 #       -- for the synchronous blocks
 919 #       if i_in.req = '1' and access_ok = '1' and flush_in = '0'
 920 #        and rst = '0' then
 921 #           req_is_hit  <= is_hit;
 922 #           req_is_miss <= not is_hit;
 923 #       else
 924 #           req_is_hit  <= '0';
 925 #           req_is_miss <= '0';
 926 #       end if;
 927 #       req_hit_way <= hit_way;
 928         # Generate the "hit" and "miss" signals
 929         # for the synchronous blocks
 930         with m.If(i_in.req & access_ok & ~flush_in):
 931             comb += req_is_hit.eq(is_hit)
 932             comb += req_is_miss.eq(~is_hit)
 933
 934         with m.Else():
 935             comb += req_is_hit.eq(0)
 936             comb += req_is_miss.eq(0)
 937
 938 #       -- The way to replace on a miss
 939 #       if r.state = CLR_TAG then
 940 #           replace_way <=
 941 #            to_integer(unsigned(plru_victim(r.store_index)));
 942 #       else
 943 #           replace_way <= r.store_way;
 944 #       end if;
 945         # The way to replace on a miss
 946         with m.If(r.state == State.CLR_TAG):
 947             comb += replace_way.eq(plru_victim[r.store_index])
 948
 949         with m.Else():
 950             comb += replace_way.eq(r.store_way)
 951
 952 #       -- Output instruction from current cache row
 953 #       --
 954 #       -- Note: This is a mild violation of our design principle of
 955 #       -- having pipeline stages output from a clean latch. In this
 956 #       -- case we output the result of a mux. The alternative would
 957 #       -- be output an entire row which I prefer not to do just yet
 958 #       -- as it would force fetch2 to know about some of the cache
 959 #       -- geometry information.
 960 #       i_out.insn <= read_insn_word(r.hit_nia, cache_out(r.hit_way));
 961 #       i_out.valid <= r.hit_valid;
 962 #       i_out.nia <= r.hit_nia;
 963 #       i_out.stop_mark <= r.hit_smark;
 964 #       i_out.fetch_failed <= r.fetch_failed;
 965         # Output instruction from current cache row
 966         #
 967         # Note: This is a mild violation of our design principle of
 968         # having pipeline stages output from a clean latch. In this
 969         # case we output the result of a mux. The alternative would
 970         # be output an entire row which I prefer not to do just yet
 971         # as it would force fetch2 to know about some of the cache
 972         # geometry information.
 973         comb += i_out.insn.eq(
 974                  read_insn_word(r.hit_nia, cache_out[r.hit_way])
 975                 )
 976         comb += i_out.valid.eq(r.hit_valid)
 977         comb += i_out.nia.eq(r.hit_nia)
 978         comb += i_out.stop_mark.eq(r.hit_smark)
 979         comb += i_out.fetch_failed.eq(r.fetch_failed)
 980
 981 #       -- Stall fetch1 if we have a miss on cache or TLB
 982 #       -- or a protection fault
 983 #       stall_out <= not (is_hit and access_ok);
 984         # Stall fetch1 if we have a miss on cache or TLB
 985         # or a protection fault
 986         comb += stall_out.eq(~(is_hit & access_ok))
 987
 988 #       -- Wishbone requests output (from the cache miss reload machine)
 989 #       wishbone_out <= r.wb;
 990         # Wishbone requests output (from the cache miss reload machine)
 991         comb += wb_out.eq(r.wb)
 992 #     end process;
 993
 994 #     -- Cache hit synchronous machine
 995 #     icache_hit : process(clk)
 996     # Cache hit synchronous machine
 997     def icache_hit(self, m, use_previous, r, req_is_hit, req_hit_way,
 998                    req_index, req_tag, real_addr):
 999         sync = m.d.sync
1000
1001         i_in, stall_in = self.i_in, self.stall_in
1002         flush_in       = self.flush_in
1003
1004 #     begin
1005 #         if rising_edge(clk) then
1006 #             -- keep outputs to fetch2 unchanged on a stall
1007 #             -- except that flush or reset sets valid to 0
1008 #             -- If use_previous, keep the same data as last
1009 #             -- cycle and use the second half
1010 #             if stall_in = '1' or use_previous = '1' then
1011 #                 if rst = '1' or flush_in = '1' then
1012 #                     r.hit_valid <= '0';
1013 #             end if;
1014         # keep outputs to fetch2 unchanged on a stall
1015         # except that flush or reset sets valid to 0
1016         # If use_previous, keep the same data as last
1017         # cycle and use the second half
1018         with m.If(stall_in | use_previous):
1019             with m.If(flush_in):
1020                 sync += r.hit_valid.eq(0)
1021 #             else
1022 #                 -- On a hit, latch the request for the next cycle,
1023 #                 -- when the BRAM data will be available on the
1024 #                 -- cache_out output of the corresponding way
1025 #                 r.hit_valid <= req_is_hit;
1026 #                 if req_is_hit = '1' then
1027 #                     r.hit_way <= req_hit_way;
1028         with m.Else():
1029             # On a hit, latch the request for the next cycle,
1030             # when the BRAM data will be available on the
1031             # cache_out output of the corresponding way
1032             sync += r.hit_valid.eq(req_is_hit)
1033
1034             with m.If(req_is_hit):
1035                 sync += r.hit_way.eq(req_hit_way)
1036
1037 #                     report "cache hit nia:" & to_hstring(i_in.nia) &
1038 #                         " IR:" & std_ulogic'image(i_in.virt_mode) &
1039 #                         " SM:" & std_ulogic'image(i_in.stop_mark) &
1040 #                         " idx:" & integer'image(req_index) &
1041 #                         " tag:" & to_hstring(req_tag) &
1042 #                         " way:" & integer'image(req_hit_way) &
1043 #                         " RA:" & to_hstring(real_addr);
1044                 print(f"cache hit nia:{i_in.nia}, " \
1045                       f"IR:{i_in.virt_mode}, " \
1046                       f"SM:{i_in.stop_mark}, idx:{req_index}, " \
1047                       f"tag:{req_tag}, way:{req_hit_way}, " \
1048                       f"RA:{real_addr}")
1049 #                 end if;
1050 #           end if;
1051 #             if stall_in = '0' then
1052 #                 -- Send stop marks and NIA down regardless of validity
1053 #                 r.hit_smark <= i_in.stop_mark;
1054 #                 r.hit_nia <= i_in.nia;
1055 #             end if;
1056         with m.If(~stall_in):
1057             # Send stop marks and NIA down regardless of validity
1058             sync += r.hit_smark.eq(i_in.stop_mark)
1059             sync += r.hit_nia.eq(i_in.nia)
1060 #       end if;
1061 #     end process;
1062
1063 #     -- Cache miss/reload synchronous machine
1064 #     icache_miss : process(clk)
1065     # Cache miss/reload synchronous machine
1066     def icache_miss(self, m, cache_valid_bits, r, req_is_miss,
1067                     req_index, req_laddr, req_tag, replace_way,
1068                     cache_tags, access_ok, real_addr):
1069         comb = m.d.comb
1070         sync = m.d.sync
1071
1072         i_in, wb_in, m_in  = self.i_in, self.wb_in, self.m_in
1073         stall_in, flush_in = self.stall_in, self.flush_in
1074         inval_in           = self.inval_in
1075
1076 #       variable tagset    : cache_tags_set_t;
1077 #       variable stbs_done : boolean;
1078
1079         tagset    = Signal(TAG_RAM_WIDTH)
1080         stbs_done = Signal()
1081
1082 #     begin
1083 #         if rising_edge(clk) then
1084 #           -- On reset, clear all valid bits to force misses
1085 #             if rst = '1' then
1086         # On reset, clear all valid bits to force misses
1087 #               for i in index_t loop
1088 #                   cache_valids(i) <= (others => '0');
1089 #               end loop;
1090 #                 r.state <= IDLE;
1091 #                 r.wb.cyc <= '0';
1092 #                 r.wb.stb <= '0';
1093 #               -- We only ever do reads on wishbone
1094 #               r.wb.dat <= (others => '0');
1095 #               r.wb.sel <= "11111111";
1096 #               r.wb.we  <= '0';
1097
1098         # We only ever do reads on wishbone
1099         comb += r.wb.sel.eq(~0) # set to all 1s
1100
1101 #               -- Not useful normally but helps avoiding
1102 #               -- tons of sim warnings
1103 #               r.wb.adr <= (others => '0');
1104
1105 #             else
1106
1107 #                 -- Process cache invalidations
1108 #                 if inval_in = '1' then
1109 #                     for i in index_t loop
1110 #                         cache_valids(i) <= (others => '0');
1111 #                     end loop;
1112 #                     r.store_valid <= '0';
1113 #                 end if;
1114         # Process cache invalidations
1115         with m.If(inval_in):
1116             for i in range(NUM_LINES):
1117                 sync += cache_valid_bits[i].eq(~1) # NO just set to zero.
1118                                                    # look again: others == 0
1119
1120             sync += r.store_valid.eq(0)
1121
1122 #               -- Main state machine
1123 #               case r.state is
1124             # Main state machine
1125             with m.Switch(r.state):
1126
1127 #               when IDLE =>
1128                 with m.Case(State.IDLE):
1129 #                     -- Reset per-row valid flags,
1130 #                     -- only used in WAIT_ACK
1131 #                     for i in 0 to ROW_PER_LINE - 1 loop
1132 #                         r.rows_valid(i) <= '0';
1133 #                     end loop;
1134                     # Reset per-row valid flags,
1135                     # only used in WAIT_ACK
1136                     for i in range(ROW_PER_LINE):
1137                         sync += r.rows_valid[i].eq(0)
1138
1139 #                   -- We need to read a cache line
1140 #                   if req_is_miss = '1' then
1141 #                       report "cache miss nia:" & to_hstring(i_in.nia) &
1142 #                             " IR:" & std_ulogic'image(i_in.virt_mode) &
1143 #                           " SM:" & std_ulogic'image(i_in.stop_mark) &
1144 #                           " idx:" & integer'image(req_index) &
1145 #                           " way:" & integer'image(replace_way) &
1146 #                           " tag:" & to_hstring(req_tag) &
1147 #                             " RA:" & to_hstring(real_addr);
1148                     # We need to read a cache line
1149                     with m.If(req_is_miss):
1150                         # XXX no, do not use "f".  use sync += Display
1151                         # and use %d for integer, %x for hex.
1152                         print(f"cache miss nia:{i_in.nia} " \
1153                               f"IR:{i_in.virt_mode} " \
1154                               f"SM:{i_in.stop_mark} " \
1155                               F"idx:{req_index} " \
1156                               f"way:{replace_way} tag:{req_tag} " \
1157                               f"RA:{real_addr}")
1158
1159 #                       -- Keep track of our index and way for
1160 #                       -- subsequent stores
1161 #                       r.store_index <= req_index;
1162 #                       r.store_row <= get_row(req_laddr);
1163 #                       r.store_tag <= req_tag;
1164 #                       r.store_valid <= '1';
1165 #                       r.end_row_ix <=
1166 #                        get_row_of_line(get_row(req_laddr)) - 1;
1167                         # Keep track of our index and way
1168                         # for subsequent stores
1169                         sync += r.store_index.eq(req_index)
1170                         sync += r.store_row.eq(get_row(req_laddr))
1171                         sync += r.store_tag.eq(req_tag)
1172                         sync += r.store_valid.eq(1)
1173                         sync += r.end_row_ix.eq(
1174                                  get_row_of_line(
1175                                   get_row(req_laddr)
1176                                  ) - 1
1177                                 )
1178
1179 #                       -- Prep for first wishbone read. We calculate the
1180 #                       -- address of the start of the cache line and
1181 #                       -- start the WB cycle.
1182 #                       r.wb.adr <= req_laddr(r.wb.adr'left downto 0);
1183 #                       r.wb.cyc <= '1';
1184 #                       r.wb.stb <= '1';
1185                         # Prep for first wishbone read.
1186                         # We calculate the
1187                         # address of the start of the cache line and
1188                         # start the WB cycle.
1189                         sync += r.wb.adr.eq(
1190                                  req_laddr[:r.wb.adr]
1191                                 )
1192                         sync += r.wb.cyc.eq(1)
1193                         sync += r.wb.stb.eq(1)
1194
1195 #                       -- Track that we had one request sent
1196 #                       r.state <= CLR_TAG;
1197                         # Track that we had one request sent
1198                         sync += r.state.eq(State.CLR_TAG)
1199 #                   end if;
1200
1201 #               when CLR_TAG | WAIT_ACK =>
1202                 with m.Case(State.CLR_TAG, State.WAIT_ACK):
1203 #                     if r.state = CLR_TAG then
1204                     with m.If(r.state == State.CLR_TAG):
1205 #                         -- Get victim way from plru
1206 #                       r.store_way <= replace_way;
1207                         # Get victim way from plru
1208                         sync += r.store_way.eq(replace_way)
1209 #
1210 #                       -- Force misses on that way while
1211 #                       -- reloading that line
1212 #                       cache_valids(req_index)(replace_way) <= '0';
1213                         # Force misses on that way while
1214                         # realoading that line
1215                         sync += cache_valid_bits[
1216                                  req_index
1217                                 ][replace_way].eq(0)
1218
1219 #                       -- Store new tag in selected way
1220 #                       for i in 0 to NUM_WAYS-1 loop
1221 #                           if i = replace_way then
1222 #                               tagset := cache_tags(r.store_index);
1223 #                               write_tag(i, tagset, r.store_tag);
1224 #                               cache_tags(r.store_index) <= tagset;
1225 #                           end if;
1226 #                       end loop;
1227                         for i in range(NUM_WAYS):
1228                             with m.If(i == replace_way):
1229                                 comb += tagset.eq(
1230                                          cache_tags[r.store_index]
1231                                         )
1232                                 sync += write_tag(
1233                                          i, tagset, r.store_tag
1234                                         )
1235                                 sync += cache_tags[r.store_index].eq(
1236                                          tagset
1237                                         )
1238
1239 #                         r.state <= WAIT_ACK;
1240                         sync += r.state.eq(State.WAIT_ACK)
1241 #                     end if;
1242
1243 #                   -- Requests are all sent if stb is 0
1244 #                   stbs_done := r.wb.stb = '0';
1245                     # Requests are all sent if stb is 0
1246                     comb += stbs_done.eq(r.wb.stb == 0)
1247
1248 #                   -- If we are still sending requests,
1249 #                   -- was one accepted ?
1250 #                   if wishbone_in.stall = '0' and not stbs_done then
1251                     # If we are still sending requests,
1252                     # was one accepted?
1253                     with m.If(~wb_in.stall & ~stbs_done):
1254 #                       -- That was the last word ? We are done sending.
1255 #                       -- Clear stb and set stbs_done so we can handle
1256 #                       -- an eventual last ack on the same cycle.
1257 #                       if is_last_row_addr(r.wb.adr, r.end_row_ix) then
1258 #                           r.wb.stb <= '0';
1259 #                           stbs_done := true;
1260 #                       end if;
1261                         # That was the last word ?
1262                         # We are done sending.
1263                         # Clear stb and set stbs_done
1264                         # so we can handle
1265                         # an eventual last ack on
1266                         # the same cycle.
1267                         with m.If(is_last_row_addr(
1268                                   r.wb.adr, r.end_row_ix)):
1269                             sync += r.wb.stb.eq(0)
1270                             stbs_done.eq(1)
1271
1272 #                       -- Calculate the next row address
1273 #                       r.wb.adr <= next_row_addr(r.wb.adr);
1274                         # Calculate the next row address
1275                         sync += r.wb.adr.eq(next_row_addr(r.wb.adr))
1276 #                   end if;
1277
1278 #                   -- Incoming acks processing
1279 #                   if wishbone_in.ack = '1' then
1280                     # Incoming acks processing
1281                     with m.If(wb_in.ack):
1282 #                         r.rows_valid(r.store_row mod ROW_PER_LINE)
1283 #                          <= '1';
1284                         sync += r.rows_valid[
1285                                  r.store_row & ROW_PER_LINE
1286                                 ].eq(1)
1287
1288 #                       -- Check for completion
1289 #                       if stbs_done and
1290 #                        is_last_row(r.store_row, r.end_row_ix) then
1291                         # Check for completion
1292                         with m.If(stbs_done & is_last_row(
1293                                   r.store_row, r.end_row_ix)):
1294 #                           -- Complete wishbone cycle
1295 #                           r.wb.cyc <= '0';
1296                             # Complete wishbone cycle
1297                             sync += r.wb.cyc.eq(0)
1298
1299 #                           -- Cache line is now valid
1300 #                           cache_valids(r.store_index)(replace_way) <=
1301 #                            r.store_valid and not inval_in;
1302                             # Cache line is now valid
1303                             sync += cache_valid_bits[
1304                                      r.store_index
1305                                     ][relace_way].eq(
1306                                      r.store_valid & ~inval_in
1307                                     )
1308
1309 #                           -- We are done
1310 #                           r.state <= IDLE;
1311                             # We are done
1312                             sync += r.state.eq(State.IDLE)
1313 #                       end if;
1314
1315 #                       -- Increment store row counter
1316 #                       r.store_row <= next_row(r.store_row);
1317                         # Increment store row counter
1318                         sync += store_row.eq(next_row(r.store_row))
1319 #                   end if;
1320 #               end case;
1321 #           end if;
1322 #
1323 #             -- TLB miss and protection fault processing
1324 #             if rst = '1' or flush_in = '1' or m_in.tlbld = '1' then
1325 #                 r.fetch_failed <= '0';
1326 #             elsif i_in.req = '1' and access_ok = '0' and
1327 #              stall_in = '0' then
1328 #                 r.fetch_failed <= '1';
1329 #             end if;
1330         # TLB miss and protection fault processing
1331         with m.If('''TODO nmigen rst''' | flush_in | m_in.tlbld):
1332             sync += r.fetch_failed.eq(0)
1333
1334         with m.Elif(i_in.req & ~access_ok & ~stall_in):
1335             sync += r.fetch_failed.eq(1)
1336 #       end if;
1337 #     end process;
1338
1339 #     icache_log: if LOG_LENGTH > 0 generate
1340     def icache_log(self, m, req_hit_way, ra_valid, access_ok,
1341                    req_is_miss, req_is_hit, lway, wstate, r):
1342         comb = m.d.comb
1343         sync = m.d.sync
1344
1345         wb_in, i_out       = self.wb_in, self.i_out
1346         log_out, stall_out = self.log_out, self.stall_out
1347
1348 #         -- Output data to logger
1349 #         signal log_data    : std_ulogic_vector(53 downto 0);
1350 #     begin
1351 #         data_log: process(clk)
1352 #             variable lway: way_t;
1353 #             variable wstate: std_ulogic;
1354         # Output data to logger
1355         for i in range(LOG_LENGTH):
1356             # Output data to logger
1357             log_data = Signal(54)
1358             lway     = Signal(NUM_WAYS)
1359             wstate   = Signal()
1360
1361 #         begin
1362 #             if rising_edge(clk) then
1363 #                 lway := req_hit_way;
1364 #                 wstate := '0';
1365             comb += lway.eq(req_hit_way)
1366             comb += wstate.eq(0)
1367
1368 #                 if r.state /= IDLE then
1369 #                     wstate := '1';
1370 #                 end if;
1371             with m.If(r.state != State.IDLE):
1372                 sync += wstate.eq(1)
1373
1374 #                 log_data <= i_out.valid &
1375 #                             i_out.insn &
1376 #                             wishbone_in.ack &
1377 #                             r.wb.adr(5 downto 3) &
1378 #                             r.wb.stb & r.wb.cyc &
1379 #                             wishbone_in.stall &
1380 #                             stall_out &
1381 #                             r.fetch_failed &
1382 #                             r.hit_nia(5 downto 2) &
1383 #                             wstate &
1384 #                             std_ulogic_vector(to_unsigned(lway, 3)) &
1385 #                             req_is_hit & req_is_miss &
1386 #                             access_ok &
1387 #                             ra_valid;
1388             sync += log_data.eq(Cat(
1389                      ra_valid, access_ok, req_is_miss, req_is_hit,
1390                      lway, wstate, r.hit_nia[2:6],
1391                      r.fetch_failed, stall_out, wb_in.stall, r.wb.cyc,
1392                      r.wb.stb, r.wb.adr[3:6], wb_in.ack, i_out.insn,
1393                      i_out.valid
1394                     ))
1395 #             end if;
1396 #         end process;
1397 #         log_out <= log_data;
1398             comb += log_out.eq(log_data)
1399 #     end generate;
1400 # end;
1401
1402     def elaborate(self, platform):
1403
1404         m                = Module()
1405         comb             = m.d.comb
1406
1407         # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
1408         cache_tags       = CacheTagArray()
1409         cache_valid_bits = CacheValidBitsArray()
1410
1411 #     signal itlb_valids : tlb_valids_t;
1412 #     signal itlb_tags : tlb_tags_t;
1413 #     signal itlb_ptes : tlb_ptes_t;
1414 #     attribute ram_style of itlb_tags : signal is "distributed";
1415 #     attribute ram_style of itlb_ptes : signal is "distributed";
1416         itlb_valid_bits  = TLBValidBitsArray()
1417         itlb_tags        = TLBTagArray()
1418         itlb_ptes        = TLBPTEArray()
1419         # TODO to be passed to nmigen as ram attributes
1420         # attribute ram_style of itlb_tags : signal is "distributed";
1421         # attribute ram_style of itlb_ptes : signal is "distributed";
1422
1423 #     -- Privilege bit from PTE EAA field
1424 #     signal eaa_priv  : std_ulogic;
1425         # Privilege bit from PTE EAA field
1426         eaa_priv         = Signal()
1427
1428 #     signal r : reg_internal_t;
1429         r                = RegInternal()
1430
1431 #     -- Async signals on incoming request
1432 #     signal req_index   : index_t;
1433 #     signal req_row     : row_t;
1434 #     signal req_hit_way : way_t;
1435 #     signal req_tag     : cache_tag_t;
1436 #     signal req_is_hit  : std_ulogic;
1437 #     signal req_is_miss : std_ulogic;
1438 #     signal req_laddr   : std_ulogic_vector(63 downto 0);
1439         # Async signal on incoming request
1440         req_index        = Signal(NUM_LINES)
1441         req_row          = Signal(BRAM_ROWS)
1442         req_hit_way      = Signal(NUM_WAYS)
1443         req_tag          = Signal(TAG_BITS)
1444         req_is_hit       = Signal()
1445         req_is_miss      = Signal()
1446         req_laddr        = Signal(64)
1447
1448 #     signal tlb_req_index : tlb_index_t;
1449 #     signal real_addr     : std_ulogic_vector(
1450 #                             REAL_ADDR_BITS - 1 downto 0
1451 #                            );
1452 #     signal ra_valid      : std_ulogic;
1453 #     signal priv_fault    : std_ulogic;
1454 #     signal access_ok     : std_ulogic;
1455 #     signal use_previous  : std_ulogic;
1456         tlb_req_index    = Signal(TLB_SIZE)
1457         real_addr        = Signal(REAL_ADDR_BITS)
1458         ra_valid         = Signal()
1459         priv_fault       = Signal()
1460         access_ok        = Signal()
1461         use_previous     = Signal()
1462
1463 #     signal cache_out   : cache_ram_out_t;
1464         cache_out        = CacheRamOut()
1465
1466 #     signal plru_victim : plru_out_t;
1467 #     signal replace_way : way_t;
1468         plru_victim      = PLRUOut()
1469         replace_way      = Signal(NUM_WAYS)
1470
1471         # call sub-functions putting everything together, using shared
1472         # signals established above
1473         self.rams(m, r, cache_out, use_previous, replace_way, req_row)
1474         self.maybe_plrus(m, r, plru_victim)
1475         self.itlb_lookup(m, tlb_req_index, itlb_ptes, itlb_tags,
1476                          real_addr, itlb_valid_bits, ra_valid, eaa_priv,
1477                          priv_fault, access_ok)
1478         self.itlb_update(m, itlb_valid_bits, itlb_tags, itlb_ptes)
1479         self.icache_comb(m, use_previous, r, req_index, req_row,
1480                          req_tag, real_addr, req_laddr, cache_valid_bits,
1481                          cache_tags, access_ok, req_is_hit, req_is_miss,
1482                          replace_way, plru_victim, cache_out)
1483         self.icache_hit(m, use_previous, r, req_is_hit, req_hit_way,
1484                         req_index, req_tag, real_addr)
1485         self.icache_miss(m, cache_valid_bits, r, req_is_miss, req_index,
1486                          req_laddr, req_tag, replace_way, cache_tags,
1487                          access_ok, real_addr)
1488         #self.icache_log(m, log_out, req_hit_way, ra_valid, access_ok,
1489         #                req_is_miss, req_is_hit, lway, wstate, r)
1490
1491         return m
1492
1493
1494 # icache_tb.vhdl
1495 #
1496 # library ieee;
1497 # use ieee.std_logic_1164.all;
1498 #
1499 # library work;
1500 # use work.common.all;
1501 # use work.wishbone_types.all;
1502 #
1503 # entity icache_tb is
1504 # end icache_tb;
1505 #
1506 # architecture behave of icache_tb is
1507 #     signal clk          : std_ulogic;
1508 #     signal rst          : std_ulogic;
1509 #
1510 #     signal i_out        : Fetch1ToIcacheType;
1511 #     signal i_in         : IcacheToDecode1Type;
1512 #
1513 #     signal m_out        : MmuToIcacheType;
1514 #
1515 #     signal wb_bram_in   : wishbone_master_out;
1516 #     signal wb_bram_out  : wishbone_slave_out;
1517 #
1518 #     constant clk_period : time := 10 ns;
1519 # begin
1520 #     icache0: entity work.icache
1521 #         generic map(
1522 #             LINE_SIZE => 64,
1523 #             NUM_LINES => 4
1524 #             )
1525 #         port map(
1526 #             clk => clk,
1527 #             rst => rst,
1528 #             i_in => i_out,
1529 #             i_out => i_in,
1530 #             m_in => m_out,
1531 #             stall_in => '0',
1532 #           flush_in => '0',
1533 #             inval_in => '0',
1534 #             wishbone_out => wb_bram_in,
1535 #             wishbone_in => wb_bram_out
1536 #             );
1537 #
1538 #     -- BRAM Memory slave
1539 #     bram0: entity work.wishbone_bram_wrapper
1540 #         generic map(
1541 #             MEMORY_SIZE   => 1024,
1542 #             RAM_INIT_FILE => "icache_test.bin"
1543 #             )
1544 #         port map(
1545 #             clk => clk,
1546 #             rst => rst,
1547 #             wishbone_in => wb_bram_in,
1548 #             wishbone_out => wb_bram_out
1549 #             );
1550 #
1551 #     clk_process: process
1552 #     begin
1553 #         clk <= '0';
1554 #         wait for clk_period/2;
1555 #         clk <= '1';
1556 #         wait for clk_period/2;
1557 #     end process;
1558 #
1559 #     rst_process: process
1560 #     begin
1561 #         rst <= '1';
1562 #         wait for 2*clk_period;
1563 #         rst <= '0';
1564 #         wait;
1565 #     end process;
1566 #
1567 #     stim: process
1568 #     begin
1569 #         i_out.req <= '0';
1570 #         i_out.nia <= (others => '0');
1571 #       i_out.stop_mark <= '0';
1572 #
1573 #         m_out.tlbld <= '0';
1574 #         m_out.tlbie <= '0';
1575 #         m_out.addr <= (others => '0');
1576 #         m_out.pte <= (others => '0');
1577 #
1578 #         wait until rising_edge(clk);
1579 #         wait until rising_edge(clk);
1580 #         wait until rising_edge(clk);
1581 #         wait until rising_edge(clk);
1582 #
1583 #         i_out.req <= '1';
1584 #         i_out.nia <= x"0000000000000004";
1585 #
1586 #         wait for 30*clk_period;
1587 #         wait until rising_edge(clk);
1588 #
1589 #         assert i_in.valid = '1' severity failure;
1590 #         assert i_in.insn = x"00000001"
1591 #           report "insn @" & to_hstring(i_out.nia) &
1592 #           "=" & to_hstring(i_in.insn) &
1593 #           " expected 00000001"
1594 #           severity failure;
1595 #
1596 #         i_out.req <= '0';
1597 #
1598 #         wait until rising_edge(clk);
1599 #
1600 #         -- hit
1601 #         i_out.req <= '1';
1602 #         i_out.nia <= x"0000000000000008";
1603 #         wait until rising_edge(clk);
1604 #         wait until rising_edge(clk);
1605 #         assert i_in.valid = '1' severity failure;
1606 #         assert i_in.insn = x"00000002"
1607 #           report "insn @" & to_hstring(i_out.nia) &
1608 #           "=" & to_hstring(i_in.insn) &
1609 #           " expected 00000002"
1610 #           severity failure;
1611 #         wait until rising_edge(clk);
1612 #
1613 #         -- another miss
1614 #         i_out.req <= '1';
1615 #         i_out.nia <= x"0000000000000040";
1616 #
1617 #         wait for 30*clk_period;
1618 #         wait until rising_edge(clk);
1619 #
1620 #         assert i_in.valid = '1' severity failure;
1621 #         assert i_in.insn = x"00000010"
1622 #           report "insn @" & to_hstring(i_out.nia) &
1623 #           "=" & to_hstring(i_in.insn) &
1624 #           " expected 00000010"
1625 #           severity failure;
1626 #
1627 #         -- test something that aliases
1628 #         i_out.req <= '1';
1629 #         i_out.nia <= x"0000000000000100";
1630 #         wait until rising_edge(clk);
1631 #         wait until rising_edge(clk);
1632 #         assert i_in.valid = '0' severity failure;
1633 #         wait until rising_edge(clk);
1634 #
1635 #         wait for 30*clk_period;
1636 #         wait until rising_edge(clk);
1637 #
1638 #         assert i_in.valid = '1' severity failure;
1639 #         assert i_in.insn = x"00000040"
1640 #           report "insn @" & to_hstring(i_out.nia) &
1641 #           "=" & to_hstring(i_in.insn) &
1642 #           " expected 00000040"
1643 #           severity failure;
1644 #
1645 #         i_out.req <= '0';
1646 #
1647 #         std.env.finish;
1648 #     end process;
1649 # end;
1650 def icache_sim(dut):
1651     i_out, i_in, m_out, m_in = dut.i_out, dut.i_in, dut.m_out, dut.m_in
1652
1653     yield i_out.req.eq(0)
1654     yield i_out.nia.eq(~1)
1655     yield i_out.stop_mark.eq(0)
1656     yield m_out.tlbld.eq(0)
1657     yield m_out.tlbie.eq(0)
1658     yield m_out.addr.eq(~1)
1659     yield m_out.pte.eq(~1)
1660     yield
1661     yield
1662     yield
1663     yield
1664     yield i_out.req.eq(1)
1665     yield i_out.nia.eq(Const(0x0000000000000004, 64))
1666     for i in range(30):
1667         yield
1668     yield
1669     assert i_in.valid
1670     assert i_in.insn == Const(0x00000001, 32), \
1671         ("insn @%x=%x expected 00000001" % i_out.nia, i_in.insn)
1672     yield i_out.req.eq(0)
1673     yield
1674
1675     # hit
1676     yield i_out.req.eq(1)
1677     yield i_out.nia.eq(Const(0x0000000000000008, 64))
1678     yield
1679     yield
1680     assert i_in.valid
1681     assert i_in.insn == Const(0x00000002, 32), \
1682         ("insn @%x=%x expected 00000002" % i_out.nia, i_in.insn)
1683     yield
1684
1685     # another miss
1686     yield i_out.req(1)
1687     yield i_out.nia.eq(Const(0x0000000000000040, 64))
1688     for i in range(30):
1689         yield
1690     yield
1691     assert i_in.valid
1692     assert i_in.insn == Const(0x00000010, 32), \
1693         ("insn @%x=%x expected 00000010" % i_out.nia, i_in.insn)
1694
1695     # test something that aliases
1696     yield i_out.req.eq(1)
1697     yield i_out.nia.eq(Const(0x0000000000000100, 64))
1698     yield
1699     yield
1700     assert i_in.valid
1701     for i in range(30):
1702         yield
1703     yield
1704     assert i_in.valid
1705     assert i_in.insn == Const(0x00000040, 32), \
1706          ("insn @%x=%x expected 00000040" % i_out.nia, i_in.insn)
1707     yield i_out.req.eq(0)
1708
1709
1710 def test_icache():
1711     dut = ICache()
1712
1713     m = Module()
1714     m.submodules.icache = dut
1715
1716     # nmigen Simulation
1717     sim = Simulator(m)
1718     sim.add_clock(1e-6)
1719
1720     sim.add_sync_process(wrap(icache_sim(dut)))
1721     with sim.write_vcd('test_icache.vcd'):
1722         sim.run()
1723
1724 if __name__ == '__main__':
1725     dut = ICache()
1726     vl = rtlil.convert(dut, ports=[])
1727     with open("test_icache.il", "w") as f:
1728         f.write(vl)
1729
1730     test_icache()