src/soc/experiment/icache.py

   1 """ICache
   2
   3 based on Anton Blanchard microwatt icache.vhdl
   4
   5 Set associative icache
   6
   7 TODO (in no specific order):
   8 * Add debug interface to inspect cache content
   9 * Add snoop/invalidate path
  10 * Add multi-hit error detection
  11 * Pipelined bus interface (wb or axi)
  12 * Maybe add parity? There's a few bits free in each BRAM row on Xilinx
  13 * Add optimization: service hits on partially loaded lines
  14 * Add optimization: (maybe) interrupt reload on fluch/redirect
  15 * Check if playing with the geometry of the cache tags allow for more
  16   efficient use of distributed RAM and less logic/muxes. Currently we
  17   write TAG_BITS width which may not match full ram blocks and might
  18   cause muxes to be inferred for "partial writes".
  19 * Check if making the read size of PLRU a ROM helps utilization
  20
  21 """
  22 from enum import Enum, unique
  23 from nmigen import (Module, Signal, Elaboratable, Cat, Array, Const)
  24 from nmigen.cli import main
  25 from nmigen.cli import rtlil
  26 from nmutil.iocontrol import RecordObject
  27 from nmutil.byterev import byte_reverse
  28 from nmutil.mask import Mask
  29 from nmigen.utils import log2_int
  30 from nmutil.util import Display
  31
  32 from soc.experiment.mem_types import (Fetch1ToICacheType,
  33                                       ICacheToDecode1Type,
  34                                       MMUToICacheType)
  35
  36 from soc.experiment.wb_types import (WB_ADDR_BITS, WB_DATA_BITS,
  37                                      WB_SEL_BITS, WBAddrType, WBDataType,
  38                                      WBSelType, WBMasterOut, WBSlaveOut,
  39                                      WBMasterOutVector, WBSlaveOutVector,
  40                                      WBIOMasterOut, WBIOSlaveOut)
  41
  42 from soc.experiment.cache_ram import CacheRam
  43 from soc.experiment.plru import PLRU
  44
  45 # for test
  46 from nmigen_soc.wishbone.sram import SRAM
  47 from nmigen import Memory
  48 from nmigen.cli import rtlil
  49 if True:
  50     from nmigen.back.pysim import Simulator, Delay, Settle
  51 else:
  52     from nmigen.sim.cxxsim import Simulator, Delay, Settle
  53 from nmutil.util import wrap
  54
  55
  56
  57 SIM            = 0
  58 LINE_SIZE      = 64
  59 # BRAM organisation: We never access more than wishbone_data_bits
  60 # at a time so to save resources we make the array only that wide,
  61 # and use consecutive indices for to make a cache "line"
  62 #
  63 # ROW_SIZE is the width in bytes of the BRAM (based on WB, so 64-bits)
  64 ROW_SIZE       = WB_DATA_BITS // 8
  65 # Number of lines in a set
  66 NUM_LINES      = 32
  67 # Number of ways
  68 NUM_WAYS       = 4
  69 # L1 ITLB number of entries (direct mapped)
  70 TLB_SIZE       = 64
  71 # L1 ITLB log_2(page_size)
  72 TLB_LG_PGSZ    = 12
  73 # Number of real address bits that we store
  74 REAL_ADDR_BITS = 56
  75 # Non-zero to enable log data collection
  76 LOG_LENGTH     = 0
  77
  78 ROW_SIZE_BITS  = ROW_SIZE * 8
  79 # ROW_PER_LINE is the number of row
  80 # (wishbone) transactions in a line
  81 ROW_PER_LINE   = LINE_SIZE // ROW_SIZE
  82 # BRAM_ROWS is the number of rows in
  83 # BRAM needed to represent the full icache
  84 BRAM_ROWS      = NUM_LINES * ROW_PER_LINE
  85 # INSN_PER_ROW is the number of 32bit
  86 # instructions per BRAM row
  87 INSN_PER_ROW   = ROW_SIZE_BITS // 32
  88
  89 # Bit fields counts in the address
  90 #
  91 # INSN_BITS is the number of bits to
  92 # select an instruction in a row
  93 INSN_BITS      = log2_int(INSN_PER_ROW)
  94 # ROW_BITS is the number of bits to
  95 # select a row
  96 ROW_BITS       = log2_int(BRAM_ROWS)
  97 # ROW_LINEBITS is the number of bits to
  98 # select a row within a line
  99 ROW_LINE_BITS   = log2_int(ROW_PER_LINE)
 100 # LINE_OFF_BITS is the number of bits for
 101 # the offset in a cache line
 102 LINE_OFF_BITS  = log2_int(LINE_SIZE)
 103 # ROW_OFF_BITS is the number of bits for
 104 # the offset in a row
 105 ROW_OFF_BITS   = log2_int(ROW_SIZE)
 106 # INDEX_BITS is the number of bits to
 107 # select a cache line
 108 INDEX_BITS     = log2_int(NUM_LINES)
 109 # SET_SIZE_BITS is the log base 2 of
 110 # the set size
 111 SET_SIZE_BITS  = LINE_OFF_BITS + INDEX_BITS
 112 # TAG_BITS is the number of bits of
 113 # the tag part of the address
 114 TAG_BITS       = REAL_ADDR_BITS - SET_SIZE_BITS
 115 # WAY_BITS is the number of bits to
 116 # select a way
 117 WAY_BITS       = log2_int(NUM_WAYS)
 118 TAG_RAM_WIDTH  = TAG_BITS * NUM_WAYS
 119
 120 #     -- L1 ITLB.
 121 #     constant TLB_BITS : natural := log2(TLB_SIZE);
 122 #     constant TLB_EA_TAG_BITS : natural := 64 - (TLB_LG_PGSZ + TLB_BITS);
 123 #     constant TLB_PTE_BITS : natural := 64;
 124 TLB_BITS        = log2_int(TLB_SIZE)
 125 TLB_EA_TAG_BITS = 64 - (TLB_LG_PGSZ + TLB_BITS)
 126 TLB_PTE_BITS    = 64
 127
 128 # architecture rtl of icache is
 129 #constant ROW_SIZE_BITS : natural := ROW_SIZE*8;
 130 #-- ROW_PER_LINE is the number of row (wishbone
 131 #-- transactions) in a line
 132 #constant ROW_PER_LINE  : natural := LINE_SIZE / ROW_SIZE;
 133 #-- BRAM_ROWS is the number of rows in BRAM
 134 #-- needed to represent the full
 135 #-- icache
 136 #constant BRAM_ROWS     : natural := NUM_LINES * ROW_PER_LINE;
 137 #-- INSN_PER_ROW is the number of 32bit instructions per BRAM row
 138 #constant INSN_PER_ROW  : natural := ROW_SIZE_BITS / 32;
 139 #-- Bit fields counts in the address
 140 #
 141 #-- INSN_BITS is the number of bits to select
 142 #-- an instruction in a row
 143 #constant INSN_BITS     : natural := log2(INSN_PER_ROW);
 144 #-- ROW_BITS is the number of bits to select a row
 145 #constant ROW_BITS      : natural := log2(BRAM_ROWS);
 146 #-- ROW_LINEBITS is the number of bits to
 147 #-- select a row within a line
 148 #constant ROW_LINEBITS  : natural := log2(ROW_PER_LINE);
 149 #-- LINE_OFF_BITS is the number of bits for the offset
 150 #-- in a cache line
 151 #constant LINE_OFF_BITS : natural := log2(LINE_SIZE);
 152 #-- ROW_OFF_BITS is the number of bits for the offset in a row
 153 #constant ROW_OFF_BITS  : natural := log2(ROW_SIZE);
 154 #-- INDEX_BITS is the number of bits to select a cache line
 155 #constant INDEX_BITS    : natural := log2(NUM_LINES);
 156 #-- SET_SIZE_BITS is the log base 2 of the set size
 157 #constant SET_SIZE_BITS : natural := LINE_OFF_BITS + INDEX_BITS;
 158 #-- TAG_BITS is the number of bits of the tag part of the address
 159 #constant TAG_BITS      : natural := REAL_ADDR_BITS - SET_SIZE_BITS;
 160 #-- WAY_BITS is the number of bits to select a way
 161 #constant WAY_BITS     : natural := log2(NUM_WAYS);
 162
 163 #-- Example of layout for 32 lines of 64 bytes:
 164 #--
 165 #-- ..  tag    |index|  line  |
 166 #-- ..         |   row   |    |
 167 #-- ..         |     |   | |00| zero          (2)
 168 #-- ..         |     |   |-|  | INSN_BITS     (1)
 169 #-- ..         |     |---|    | ROW_LINEBITS  (3)
 170 #-- ..         |     |--- - --| LINE_OFF_BITS (6)
 171 #-- ..         |         |- --| ROW_OFF_BITS  (3)
 172 #-- ..         |----- ---|    | ROW_BITS      (8)
 173 #-- ..         |-----|        | INDEX_BITS    (5)
 174 #-- .. --------|              | TAG_BITS      (53)
 175    # Example of layout for 32 lines of 64 bytes:
 176    #
 177    # ..  tag    |index|  line  |
 178    # ..         |   row   |    |
 179    # ..         |     |   | |00| zero          (2)
 180    # ..         |     |   |-|  | INSN_BITS     (1)
 181    # ..         |     |---|    | ROW_LINEBITS  (3)
 182    # ..         |     |--- - --| LINE_OFF_BITS (6)
 183    # ..         |         |- --| ROW_OFF_BITS  (3)
 184    # ..         |----- ---|    | ROW_BITS      (8)
 185    # ..         |-----|        | INDEX_BITS    (5)
 186    # .. --------|              | TAG_BITS      (53)
 187
 188 #subtype row_t is integer range 0 to BRAM_ROWS-1;
 189 #subtype index_t is integer range 0 to NUM_LINES-1;
 190 #subtype way_t is integer range 0 to NUM_WAYS-1;
 191 #subtype row_in_line_t is unsigned(ROW_LINEBITS-1 downto 0);
 192 #
 193 #-- The cache data BRAM organized as described above for each way
 194 #subtype cache_row_t is std_ulogic_vector(ROW_SIZE_BITS-1 downto 0);
 195 #
 196 #-- The cache tags LUTRAM has a row per set. Vivado is a pain and will
 197 #-- not handle a clean (commented) definition of the cache tags as a 3d
 198 #-- memory. For now, work around it by putting all the tags
 199 #subtype cache_tag_t is std_logic_vector(TAG_BITS-1 downto 0);
 200 #  type cache_tags_set_t is array(way_t) of cache_tag_t;
 201 #  type cache_tags_array_t is array(index_t) of cache_tags_set_t;
 202 #constant TAG_RAM_WIDTH : natural := TAG_BITS * NUM_WAYS;
 203 #subtype cache_tags_set_t is std_logic_vector(TAG_RAM_WIDTH-1 downto 0);
 204 #type cache_tags_array_t is array(index_t) of cache_tags_set_t;
 205 def CacheTagArray():
 206     return Array(Signal(TAG_RAM_WIDTH, name="cachetag_%d" %x) \
 207                  for x in range(NUM_LINES))
 208
 209 #-- The cache valid bits
 210 #subtype cache_way_valids_t is std_ulogic_vector(NUM_WAYS-1 downto 0);
 211 #type cache_valids_t is array(index_t) of cache_way_valids_t;
 212 #type row_per_line_valid_t is array(0 to ROW_PER_LINE - 1) of std_ulogic;
 213 def CacheValidBitsArray():
 214     return Array(Signal(NUM_WAYS, name="cahcevalid_%d" %x) \
 215                  for x in range(NUM_LINES))
 216
 217 def RowPerLineValidArray():
 218     return Array(Signal(name="rows_valid_%d" %x) \
 219                  for x in range(ROW_PER_LINE))
 220
 221
 222 #attribute ram_style : string;
 223 #attribute ram_style of cache_tags : signal is "distributed";
 224    # TODO to be passed to nigmen as ram attributes
 225    # attribute ram_style : string;
 226    # attribute ram_style of cache_tags : signal is "distributed";
 227
 228
 229 #subtype tlb_index_t is integer range 0 to TLB_SIZE - 1;
 230 #type tlb_valids_t is array(tlb_index_t) of std_ulogic;
 231 #subtype tlb_tag_t is std_ulogic_vector(TLB_EA_TAG_BITS - 1 downto 0);
 232 #type tlb_tags_t is array(tlb_index_t) of tlb_tag_t;
 233 #subtype tlb_pte_t is std_ulogic_vector(TLB_PTE_BITS - 1 downto 0);
 234 #type tlb_ptes_t is array(tlb_index_t) of tlb_pte_t;
 235 def TLBValidBitsArray():
 236     return Array(Signal(name="tlbvalid_%d" %x) \
 237                  for x in range(TLB_SIZE))
 238
 239 def TLBTagArray():
 240     return Array(Signal(TLB_EA_TAG_BITS, name="tlbtag_%d" %x) \
 241                  for x in range(TLB_SIZE))
 242
 243 def TLBPtesArray():
 244     return Array(Signal(TLB_PTE_BITS, name="tlbptes_%d" %x) \
 245                  for x in range(TLB_SIZE))
 246
 247
 248 #-- Cache RAM interface
 249 #type cache_ram_out_t is array(way_t) of cache_row_t;
 250 # Cache RAM interface
 251 def CacheRamOut():
 252     return Array(Signal(ROW_SIZE_BITS, name="cache_out_%d" %x) \
 253                  for x in range(NUM_WAYS))
 254
 255 #-- PLRU output interface
 256 #type plru_out_t is array(index_t) of
 257 # std_ulogic_vector(WAY_BITS-1 downto 0);
 258 # PLRU output interface
 259 def PLRUOut():
 260     return Array(Signal(WAY_BITS, name="plru_out_%d" %x) \
 261                  for x in range(NUM_LINES))
 262
 263 #     -- Return the cache line index (tag index) for an address
 264 #     function get_index(addr: std_ulogic_vector(63 downto 0))
 265 #      return index_t is
 266 #     begin
 267 #         return to_integer(unsigned(
 268 #          addr(SET_SIZE_BITS - 1 downto LINE_OFF_BITS)
 269 #         ));
 270 #     end;
 271 # Return the cache line index (tag index) for an address
 272 def get_index(addr):
 273     return addr[LINE_OFF_BITS:SET_SIZE_BITS]
 274
 275 #     -- Return the cache row index (data memory) for an address
 276 #     function get_row(addr: std_ulogic_vector(63 downto 0))
 277 #       return row_t is
 278 #     begin
 279 #         return to_integer(unsigned(
 280 #          addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS)
 281 #         ));
 282 #     end;
 283 # Return the cache row index (data memory) for an address
 284 def get_row(addr):
 285     return addr[ROW_OFF_BITS:SET_SIZE_BITS]
 286
 287 #     -- Return the index of a row within a line
 288 #     function get_row_of_line(row: row_t) return row_in_line_t is
 289 #       variable row_v : unsigned(ROW_BITS-1 downto 0);
 290 #     begin
 291 #       row_v := to_unsigned(row, ROW_BITS);
 292 #         return row_v(ROW_LINEBITS-1 downto 0);
 293 #     end;
 294 # Return the index of a row within a line
 295 def get_row_of_line(row):
 296     return row[:ROW_LINE_BITS]
 297
 298 #     -- Returns whether this is the last row of a line
 299 #     function is_last_row_addr(addr: wishbone_addr_type;
 300 #      last: row_in_line_t
 301 #     )
 302 #      return boolean is
 303 #     begin
 304 #       return unsigned(
 305 #        addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS)
 306 #       ) = last;
 307 #     end;
 308 # Returns whether this is the last row of a line
 309 def is_last_row_addr(addr, last):
 310     return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
 311
 312 #     -- Returns whether this is the last row of a line
 313 #     function is_last_row(row: row_t;
 314 #      last: row_in_line_t) return boolean is
 315 #     begin
 316 #       return get_row_of_line(row) = last;
 317 #     end;
 318 # Returns whether this is the last row of a line
 319 def is_last_row(row, last):
 320     return get_row_of_line(row) == last
 321
 322 #     -- Return the next row in the current cache line. We use a dedicated
 323 #     -- function in order to limit the size of the generated adder to be
 324 #     -- only the bits within a cache line (3 bits with default settings)
 325 #     function next_row(row: row_t) return row_t is
 326 #       variable row_v   : std_ulogic_vector(ROW_BITS-1 downto 0);
 327 #       variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
 328 #       variable result  : std_ulogic_vector(ROW_BITS-1 downto 0);
 329 #     begin
 330 #       row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
 331 #       row_idx := row_v(ROW_LINEBITS-1 downto 0);
 332 #       row_v(ROW_LINEBITS-1 downto 0) :=
 333 #        std_ulogic_vector(unsigned(row_idx) + 1);
 334 #       return to_integer(unsigned(row_v));
 335 #     end;
 336 # Return the next row in the current cache line. We use a dedicated
 337 # function in order to limit the size of the generated adder to be
 338 # only the bits within a cache line (3 bits with default settings)
 339 def next_row(row):
 340     row_v = row[0:ROW_LINE_BITS] + 1
 341     return Cat(row_v[:ROW_LINE_BITS], row[ROW_LINE_BITS:])
 342 #     -- Read the instruction word for the given address in the
 343 #     -- current cache row
 344 #     function read_insn_word(addr: std_ulogic_vector(63 downto 0);
 345 #                           data: cache_row_t) return std_ulogic_vector is
 346 #       variable word: integer range 0 to INSN_PER_ROW-1;
 347 #     begin
 348 #         word := to_integer(unsigned(addr(INSN_BITS+2-1 downto 2)));
 349 #       return data(31+word*32 downto word*32);
 350 #     end;
 351 # Read the instruction word for the given address
 352 # in the current cache row
 353 def read_insn_word(addr, data):
 354     word = addr[2:INSN_BITS+3]
 355     return data.word_select(word, 32)
 356
 357 #     -- Get the tag value from the address
 358 #     function get_tag(
 359 #      addr: std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0)
 360 #     )
 361 #      return cache_tag_t is
 362 #     begin
 363 #         return addr(REAL_ADDR_BITS - 1 downto SET_SIZE_BITS);
 364 #     end;
 365 # Get the tag value from the address
 366 def get_tag(addr):
 367     return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
 368
 369 #     -- Read a tag from a tag memory row
 370 #     function read_tag(way: way_t; tagset: cache_tags_set_t)
 371 #      return cache_tag_t is
 372 #     begin
 373 #       return tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS);
 374 #     end;
 375 # Read a tag from a tag memory row
 376 def read_tag(way, tagset):
 377     return tagset[way * TAG_BITS:(way + 1) * TAG_BITS]
 378
 379 #     -- Write a tag to tag memory row
 380 #     procedure write_tag(way: in way_t;
 381 #      tagset: inout cache_tags_set_t; tag: cache_tag_t) is
 382 #     begin
 383 #       tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS) := tag;
 384 #     end;
 385 # Write a tag to tag memory row
 386 def write_tag(way, tagset, tag):
 387     return tagset[way * TAG_BITS:(way + 1) * TAG_BITS].eq(tag)
 388
 389 #     -- Simple hash for direct-mapped TLB index
 390 #     function hash_ea(addr: std_ulogic_vector(63 downto 0))
 391 #      return tlb_index_t is
 392 #         variable hash : std_ulogic_vector(TLB_BITS - 1 downto 0);
 393 #     begin
 394 #         hash := addr(TLB_LG_PGSZ + TLB_BITS - 1 downto TLB_LG_PGSZ)
 395 #                 xor addr(
 396 #                  TLB_LG_PGSZ + 2 * TLB_BITS - 1 downto
 397 #                  TLB_LG_PGSZ + TLB_BITS
 398 #                 )
 399 #                 xor addr(
 400 #                  TLB_LG_PGSZ + 3 * TLB_BITS - 1 downto
 401 #                  TLB_LG_PGSZ + 2 * TLB_BITS
 402 #                 );
 403 #         return to_integer(unsigned(hash));
 404 #     end;
 405 # Simple hash for direct-mapped TLB index
 406 def hash_ea(addr):
 407     hsh = addr[TLB_LG_PGSZ:TLB_LG_PGSZ + TLB_BITS] ^ addr[
 408            TLB_LG_PGSZ + TLB_BITS:TLB_LG_PGSZ + 2 * TLB_BITS
 409           ] ^ addr[
 410            TLB_LG_PGSZ + 2 * TLB_BITS:TLB_LG_PGSZ + 3 * TLB_BITS
 411           ]
 412     return hsh
 413
 414 # begin
 415 #
 416 #     assert LINE_SIZE mod ROW_SIZE = 0;
 417 #     assert ispow2(LINE_SIZE) report "LINE_SIZE not power of 2"
 418 #      severity FAILURE;
 419 #     assert ispow2(NUM_LINES) report "NUM_LINES not power of 2"
 420 #      severity FAILURE;
 421 #     assert ispow2(ROW_PER_LINE) report "ROW_PER_LINE not power of 2"
 422 #      severity FAILURE;
 423 #     assert ispow2(INSN_PER_ROW) report "INSN_PER_ROW not power of 2"
 424 #      severity FAILURE;
 425 #     assert (ROW_BITS = INDEX_BITS + ROW_LINEBITS)
 426 #       report "geometry bits don't add up" severity FAILURE;
 427 #     assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS)
 428 #       report "geometry bits don't add up" severity FAILURE;
 429 #     assert (REAL_ADDR_BITS = TAG_BITS + INDEX_BITS + LINE_OFF_BITS)
 430 #       report "geometry bits don't add up" severity FAILURE;
 431 #     assert (REAL_ADDR_BITS = TAG_BITS + ROW_BITS + ROW_OFF_BITS)
 432 #       report "geometry bits don't add up" severity FAILURE;
 433 #
 434 #     sim_debug: if SIM generate
 435 #     debug: process
 436 #     begin
 437 #       report "ROW_SIZE      = " & natural'image(ROW_SIZE);
 438 #       report "ROW_PER_LINE  = " & natural'image(ROW_PER_LINE);
 439 #       report "BRAM_ROWS     = " & natural'image(BRAM_ROWS);
 440 #       report "INSN_PER_ROW  = " & natural'image(INSN_PER_ROW);
 441 #       report "INSN_BITS     = " & natural'image(INSN_BITS);
 442 #       report "ROW_BITS      = " & natural'image(ROW_BITS);
 443 #       report "ROW_LINEBITS  = " & natural'image(ROW_LINEBITS);
 444 #       report "LINE_OFF_BITS = " & natural'image(LINE_OFF_BITS);
 445 #       report "ROW_OFF_BITS  = " & natural'image(ROW_OFF_BITS);
 446 #       report "INDEX_BITS    = " & natural'image(INDEX_BITS);
 447 #       report "TAG_BITS      = " & natural'image(TAG_BITS);
 448 #       report "WAY_BITS      = " & natural'image(WAY_BITS);
 449 #       wait;
 450 #     end process;
 451 #     end generate;
 452
 453 # Cache reload state machine
 454 @unique
 455 class State(Enum):
 456     IDLE     = 0
 457     CLR_TAG  = 1
 458     WAIT_ACK = 2
 459
 460 #     type reg_internal_t is record
 461 #       -- Cache hit state (Latches for 1 cycle BRAM access)
 462 #       hit_way   : way_t;
 463 #       hit_nia   : std_ulogic_vector(63 downto 0);
 464 #       hit_smark : std_ulogic;
 465 #       hit_valid : std_ulogic;
 466 #
 467 #       -- Cache miss state (reload state machine)
 468 #         state            : state_t;
 469 #         wb               : wishbone_master_out;
 470 #       store_way        : way_t;
 471 #         store_index      : index_t;
 472 #       store_row        : row_t;
 473 #         store_tag        : cache_tag_t;
 474 #         store_valid      : std_ulogic;
 475 #         end_row_ix       : row_in_line_t;
 476 #         rows_valid       : row_per_line_valid_t;
 477 #
 478 #         -- TLB miss state
 479 #         fetch_failed     : std_ulogic;
 480 #     end record;
 481 class RegInternal(RecordObject):
 482     def __init__(self):
 483         super().__init__()
 484         # Cache hit state (Latches for 1 cycle BRAM access)
 485         self.hit_way      = Signal(NUM_WAYS)
 486         self.hit_nia      = Signal(64)
 487         self.hit_smark    = Signal()
 488         self.hit_valid    = Signal()
 489
 490         # Cache miss state (reload state machine)
 491         self.state        = Signal(State)
 492         self.wb           = WBMasterOut("wb")
 493         self.store_way    = Signal(NUM_WAYS)
 494         self.store_index  = Signal(NUM_LINES)
 495         self.store_row    = Signal(BRAM_ROWS)
 496         self.store_tag    = Signal(TAG_BITS)
 497         self.store_valid  = Signal()
 498         self.end_row_ix   = Signal(ROW_LINE_BITS)
 499         self.rows_valid   = RowPerLineValidArray()
 500
 501         # TLB miss state
 502         self.fetch_failed = Signal()
 503
 504 # -- 64 bit direct mapped icache. All instructions are 4B aligned.
 505 #
 506 # entity icache is
 507 #     generic (
 508 #         SIM : boolean := false;
 509 #         -- Line size in bytes
 510 #         LINE_SIZE : positive := 64;
 511 #         -- BRAM organisation: We never access more
 512 #         -- than wishbone_data_bits
 513 #         -- at a time so to save resources we make the
 514 #         -- array only that wide,
 515 #         -- and use consecutive indices for to make a cache "line"
 516 #         --
 517 #         -- ROW_SIZE is the width in bytes of the BRAM (based on WB,
 518 #         -- so 64-bits)
 519 #         ROW_SIZE  : positive := wishbone_data_bits / 8;
 520 #         -- Number of lines in a set
 521 #         NUM_LINES : positive := 32;
 522 #         -- Number of ways
 523 #         NUM_WAYS  : positive := 4;
 524 #         -- L1 ITLB number of entries (direct mapped)
 525 #         TLB_SIZE : positive := 64;
 526 #         -- L1 ITLB log_2(page_size)
 527 #         TLB_LG_PGSZ : positive := 12;
 528 #         -- Number of real address bits that we store
 529 #         REAL_ADDR_BITS : positive := 56;
 530 #         -- Non-zero to enable log data collection
 531 #         LOG_LENGTH : natural := 0
 532 #         );
 533 #     port (
 534 #         clk          : in std_ulogic;
 535 #         rst          : in std_ulogic;
 536 #
 537 #         i_in         : in Fetch1ToIcacheType;
 538 #         i_out        : out IcacheToDecode1Type;
 539 #
 540 #         m_in         : in MmuToIcacheType;
 541 #
 542 #         stall_in     : in std_ulogic;
 543 #       stall_out    : out std_ulogic;
 544 #       flush_in     : in std_ulogic;
 545 #       inval_in     : in std_ulogic;
 546 #
 547 #         wishbone_out : out wishbone_master_out;
 548 #         wishbone_in  : in wishbone_slave_out;
 549 #
 550 #         log_out      : out std_ulogic_vector(53 downto 0)
 551 #         );
 552 # end entity icache;
 553 # 64 bit direct mapped icache. All instructions are 4B aligned.
 554 class ICache(Elaboratable):
 555     """64 bit direct mapped icache. All instructions are 4B aligned."""
 556     def __init__(self):
 557         self.i_in           = Fetch1ToICacheType(name="i_in")
 558         self.i_out          = ICacheToDecode1Type(name="i_out")
 559
 560         self.m_in           = MMUToICacheType(name="m_in")
 561
 562         self.stall_in       = Signal()
 563         self.stall_out      = Signal()
 564         self.flush_in       = Signal()
 565         self.inval_in       = Signal()
 566
 567         self.wb_out         = WBMasterOut(name="wb_out")
 568         self.wb_in          = WBSlaveOut(name="wb_in")
 569
 570         self.log_out        = Signal(54)
 571
 572
 573 #     -- Generate a cache RAM for each way
 574 #     rams: for i in 0 to NUM_WAYS-1 generate
 575 #       signal do_read  : std_ulogic;
 576 #       signal do_write : std_ulogic;
 577 #       signal rd_addr  : std_ulogic_vector(ROW_BITS-1 downto 0);
 578 #       signal wr_addr  : std_ulogic_vector(ROW_BITS-1 downto 0);
 579 #       signal dout     : cache_row_t;
 580 #       signal wr_sel   : std_ulogic_vector(ROW_SIZE-1 downto 0);
 581 #     begin
 582 #       way: entity work.cache_ram
 583 #           generic map (
 584 #               ROW_BITS => ROW_BITS,
 585 #               WIDTH => ROW_SIZE_BITS
 586 #               )
 587 #           port map (
 588 #               clk     => clk,
 589 #               rd_en   => do_read,
 590 #               rd_addr => rd_addr,
 591 #               rd_data => dout,
 592 #               wr_sel  => wr_sel,
 593 #               wr_addr => wr_addr,
 594 #               wr_data => wishbone_in.dat
 595 #               );
 596 #       process(all)
 597 #       begin
 598 #           do_read <= not (stall_in or use_previous);
 599 #           do_write <= '0';
 600 #           if wishbone_in.ack = '1' and replace_way = i then
 601 #               do_write <= '1';
 602 #           end if;
 603 #           cache_out(i) <= dout;
 604 #           rd_addr <=
 605 #            std_ulogic_vector(to_unsigned(req_row, ROW_BITS));
 606 #           wr_addr <=
 607 #            std_ulogic_vector(to_unsigned(r.store_row, ROW_BITS));
 608 #             for i in 0 to ROW_SIZE-1 loop
 609 #                 wr_sel(i) <= do_write;
 610 #             end loop;
 611 #       end process;
 612 #     end generate;
 613     def rams(self, m, r, cache_out, use_previous, replace_way, req_row):
 614         comb = m.d.comb
 615
 616         wb_in, stall_in = self.wb_in, self.stall_in
 617
 618         do_read  = Signal()
 619         do_write = Signal()
 620         rd_addr  = Signal(ROW_BITS)
 621         wr_addr  = Signal(ROW_BITS)
 622         _d_out   = Signal(ROW_SIZE_BITS)
 623         wr_sel   = Signal(ROW_SIZE)
 624
 625         for i in range(NUM_WAYS):
 626             way = CacheRam(ROW_BITS, ROW_SIZE_BITS)
 627             comb += way.rd_en.eq(do_read)
 628             comb += way.rd_addr.eq(rd_addr)
 629             comb += way.rd_data_o.eq(_d_out)
 630             comb += way.wr_sel.eq(wr_sel)
 631             comb += way.wr_addr.eq(wr_addr)
 632             comb += way.wr_data.eq(wb_in.dat)
 633
 634             comb += do_read.eq(~(stall_in | use_previous))
 635
 636             with m.If(wb_in.ack & (replace_way == i)):
 637                 comb += do_write.eq(1)
 638
 639             comb += cache_out[i].eq(_d_out)
 640             comb += rd_addr.eq(req_row)
 641             comb += wr_addr.eq(r.store_row)
 642             for j in range(ROW_SIZE):
 643                 comb += wr_sel[j].eq(do_write)
 644
 645 #     -- Generate PLRUs
 646 #     maybe_plrus: if NUM_WAYS > 1 generate
 647 #     begin
 648 #       plrus: for i in 0 to NUM_LINES-1 generate
 649 #           -- PLRU interface
 650 #           signal plru_acc    : std_ulogic_vector(WAY_BITS-1 downto 0);
 651 #           signal plru_acc_en : std_ulogic;
 652 #           signal plru_out    : std_ulogic_vector(WAY_BITS-1 downto 0);
 653 #
 654 #       begin
 655 #           plru : entity work.plru
 656 #               generic map (
 657 #                   BITS => WAY_BITS
 658 #                   )
 659 #               port map (
 660 #                   clk => clk,
 661 #                   rst => rst,
 662 #                   acc => plru_acc,
 663 #                   acc_en => plru_acc_en,
 664 #                   lru => plru_out
 665 #                   );
 666 #
 667 #           process(all)
 668 #           begin
 669 #               -- PLRU interface
 670 #               if get_index(r.hit_nia) = i then
 671 #                   plru_acc_en <= r.hit_valid;
 672 #               else
 673 #                   plru_acc_en <= '0';
 674 #               end if;
 675 #               plru_acc <=
 676 #                std_ulogic_vector(to_unsigned(r.hit_way, WAY_BITS));
 677 #               plru_victim(i) <= plru_out;
 678 #           end process;
 679 #       end generate;
 680 #     end generate;
 681     def maybe_plrus(self, m, r, plru_victim):
 682         comb = m.d.comb
 683
 684         with m.If(NUM_WAYS > 1):
 685             for i in range(NUM_LINES):
 686                 plru_acc_i  = Signal(WAY_BITS)
 687                 plru_acc_en = Signal()
 688                 plru_out    = Signal(WAY_BITS)
 689                 plru        = PLRU(WAY_BITS)
 690                 comb += plru.acc_i.eq(plru_acc_i)
 691                 comb += plru.acc_en.eq(plru_acc_en)
 692                 comb += plru.lru_o.eq(plru_out)
 693
 694                 # PLRU interface
 695                 with m.If(get_index(r.hit_nia) == i):
 696                     comb += plru.acc_en.eq(r.hit_valid)
 697
 698                 comb += plru.acc_i.eq(r.hit_way)
 699                 comb += plru_victim[i].eq(plru.lru_o)
 700
 701 #     -- TLB hit detection and real address generation
 702 #     itlb_lookup : process(all)
 703 #         variable pte : tlb_pte_t;
 704 #         variable ttag : tlb_tag_t;
 705 #     begin
 706 #         tlb_req_index <= hash_ea(i_in.nia);
 707 #         pte := itlb_ptes(tlb_req_index);
 708 #         ttag := itlb_tags(tlb_req_index);
 709 #         if i_in.virt_mode = '1' then
 710 #             real_addr <= pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) &
 711 #                          i_in.nia(TLB_LG_PGSZ - 1 downto 0);
 712 #             if ttag = i_in.nia(63 downto TLB_LG_PGSZ + TLB_BITS) then
 713 #                 ra_valid <= itlb_valids(tlb_req_index);
 714 #             else
 715 #                 ra_valid <= '0';
 716 #             end if;
 717 #             eaa_priv <= pte(3);
 718 #         else
 719 #             real_addr <= i_in.nia(REAL_ADDR_BITS - 1 downto 0);
 720 #             ra_valid <= '1';
 721 #             eaa_priv <= '1';
 722 #         end if;
 723 #
 724 #         -- no IAMR, so no KUEP support for now
 725 #         priv_fault <= eaa_priv and not i_in.priv_mode;
 726 #         access_ok <= ra_valid and not priv_fault;
 727 #     end process;
 728     # TLB hit detection and real address generation
 729     def itlb_lookup(self, m, tlb_req_index, itlb_ptes, itlb_tags,
 730                     real_addr, itlb_valid_bits, ra_valid, eaa_priv,
 731                     priv_fault, access_ok):
 732         comb = m.d.comb
 733
 734         i_in = self.i_in
 735
 736         pte  = Signal(TLB_PTE_BITS)
 737         ttag = Signal(TLB_EA_TAG_BITS)
 738
 739         comb += tlb_req_index.eq(hash_ea(i_in.nia))
 740         comb += pte.eq(itlb_ptes[tlb_req_index])
 741         comb += ttag.eq(itlb_tags[tlb_req_index])
 742
 743         with m.If(i_in.virt_mode):
 744             comb += real_addr.eq(Cat(
 745                      i_in.nia[:TLB_LG_PGSZ],
 746                      pte[TLB_LG_PGSZ:REAL_ADDR_BITS]
 747                     ))
 748
 749             with m.If(ttag == i_in.nia[TLB_LG_PGSZ + TLB_BITS:64]):
 750                 comb += ra_valid.eq(itlb_valid_bits[tlb_req_index])
 751
 752         with m.Else():
 753             comb += real_addr.eq(i_in.nia[:REAL_ADDR_BITS])
 754             comb += ra_valid.eq(1)
 755             comb += eaa_priv.eq(1)
 756
 757         # No IAMR, so no KUEP support for now
 758         comb += priv_fault.eq(eaa_priv & ~i_in.priv_mode)
 759         comb += access_ok.eq(ra_valid & ~priv_fault)
 760
 761 #     -- iTLB update
 762 #     itlb_update: process(clk)
 763 #         variable wr_index : tlb_index_t;
 764 #     begin
 765 #         if rising_edge(clk) then
 766 #             wr_index := hash_ea(m_in.addr);
 767 #             if rst = '1' or
 768 #              (m_in.tlbie = '1' and m_in.doall = '1') then
 769 #                 -- clear all valid bits
 770 #                 for i in tlb_index_t loop
 771 #                     itlb_valids(i) <= '0';
 772 #                 end loop;
 773 #             elsif m_in.tlbie = '1' then
 774 #                 -- clear entry regardless of hit or miss
 775 #                 itlb_valids(wr_index) <= '0';
 776 #             elsif m_in.tlbld = '1' then
 777 #                 itlb_tags(wr_index) <=
 778 #                  m_in.addr(63 downto TLB_LG_PGSZ + TLB_BITS);
 779 #                 itlb_ptes(wr_index) <= m_in.pte;
 780 #                 itlb_valids(wr_index) <= '1';
 781 #             end if;
 782 #         end if;
 783 #     end process;
 784     # iTLB update
 785     def itlb_update(self, m, itlb_valid_bits, itlb_tags, itlb_ptes):
 786         comb = m.d.comb
 787         sync = m.d.sync
 788
 789         m_in = self.m_in
 790
 791         wr_index = Signal(TLB_SIZE)
 792         comb += wr_index.eq(hash_ea(m_in.addr))
 793
 794         with m.If(m_in.tlbie & m_in.doall):
 795             # Clear all valid bits
 796             for i in range(TLB_SIZE):
 797                 sync += itlb_valid_bits[i].eq(0)
 798
 799         with m.Elif(m_in.tlbie):
 800             # Clear entry regardless of hit or miss
 801             sync += itlb_valid_bits[wr_index].eq(0)
 802
 803         with m.Elif(m_in.tlbld):
 804             sync += itlb_tags[wr_index].eq(
 805                      m_in.addr[TLB_LG_PGSZ + TLB_BITS:64]
 806                     )
 807             sync += itlb_ptes[wr_index].eq(m_in.pte)
 808             sync += itlb_valid_bits[wr_index].eq(1)
 809
 810 #     -- Cache hit detection, output to fetch2 and other misc logic
 811 #     icache_comb : process(all)
 812     # Cache hit detection, output to fetch2 and other misc logic
 813     def icache_comb(self, m, use_previous, r, req_index, req_row,
 814                     req_tag, real_addr, req_laddr, cache_valid_bits,
 815                     cache_tags, access_ok, req_is_hit,
 816                     req_is_miss, replace_way, plru_victim, cache_out):
 817 #       variable is_hit  : std_ulogic;
 818 #       variable hit_way : way_t;
 819         comb = m.d.comb
 820
 821         i_in, i_out, wb_out = self.i_in, self.i_out, self.wb_out
 822         flush_in, stall_out = self.flush_in, self.stall_out
 823
 824         is_hit  = Signal()
 825         hit_way = Signal(NUM_WAYS)
 826 #     begin
 827 #         -- i_in.sequential means that i_in.nia this cycle
 828 #         -- is 4 more than last cycle.  If we read more
 829 #         -- than 32 bits at a time, had a cache hit last
 830 #         -- cycle, and we don't want the first 32-bit chunk
 831 #         -- then we can keep the data we read last cycle
 832 #         -- and just use that.
 833 #         if unsigned(i_in.nia(INSN_BITS+2-1 downto 2)) /= 0 then
 834 #             use_previous <= i_in.sequential and r.hit_valid;
 835 #         else
 836 #             use_previous <= '0';
 837 #         end if;
 838         # i_in.sequential means that i_in.nia this cycle is 4 more than
 839         # last cycle.  If we read more than 32 bits at a time, had a
 840         # cache hit last cycle, and we don't want the first 32-bit chunk
 841         # then we can keep the data we read last cycle and just use that.
 842         with m.If(i_in.nia[2:INSN_BITS+2] != 0):
 843             comb += use_previous.eq(i_in.sequential & r.hit_valid)
 844
 845 #       -- Extract line, row and tag from request
 846 #         req_index <= get_index(i_in.nia);
 847 #         req_row <= get_row(i_in.nia);
 848 #         req_tag <= get_tag(real_addr);
 849         # Extract line, row and tag from request
 850         comb += req_index.eq(get_index(i_in.nia))
 851         comb += req_row.eq(get_row(i_in.nia))
 852         comb += req_tag.eq(get_tag(real_addr))
 853
 854 #       -- Calculate address of beginning of cache row, will be
 855 #       -- used for cache miss processing if needed
 856 #       req_laddr <=
 857 #        (63 downto REAL_ADDR_BITS => '0') &
 858 #        real_addr(REAL_ADDR_BITS - 1 downto ROW_OFF_BITS) &
 859 #        (ROW_OFF_BITS-1 downto 0 => '0');
 860         # Calculate address of beginning of cache row, will be
 861         # used for cache miss processing if needed
 862         comb += req_laddr.eq(Cat(
 863                  Const(0b0, ROW_OFF_BITS),
 864                  real_addr[ROW_OFF_BITS:REAL_ADDR_BITS]
 865                 ))
 866
 867 #       -- Test if pending request is a hit on any way
 868 #       hit_way := 0;
 869 #       is_hit := '0';
 870 #       for i in way_t loop
 871 #           if i_in.req = '1' and
 872 #                 (cache_valids(req_index)(i) = '1' or
 873 #                  (r.state = WAIT_ACK and
 874 #                   req_index = r.store_index and
 875 #                   i = r.store_way and
 876 #                   r.rows_valid(req_row mod ROW_PER_LINE) = '1')) then
 877 #               if read_tag(i, cache_tags(req_index)) = req_tag then
 878 #                   hit_way := i;
 879 #                   is_hit := '1';
 880 #               end if;
 881 #           end if;
 882 #       end loop;
 883         # Test if pending request is a hit on any way
 884         for i in range(NUM_WAYS):
 885             with m.If(i_in.req &
 886                       (cache_valid_bits[req_index][i] |
 887                        ((r.state == State.WAIT_ACK)
 888                         & (req_index == r.store_index)
 889                         & (i == r.store_way)
 890                         & r.rows_valid[req_row % ROW_PER_LINE]))):
 891                 with m.If(read_tag(i, cache_tags[req_index]) == req_tag):
 892                     comb += hit_way.eq(i)
 893                     comb += is_hit.eq(1)
 894
 895 #       -- Generate the "hit" and "miss" signals
 896 #       -- for the synchronous blocks
 897 #       if i_in.req = '1' and access_ok = '1' and flush_in = '0'
 898 #        and rst = '0' then
 899 #           req_is_hit  <= is_hit;
 900 #           req_is_miss <= not is_hit;
 901 #       else
 902 #           req_is_hit  <= '0';
 903 #           req_is_miss <= '0';
 904 #       end if;
 905 #       req_hit_way <= hit_way;
 906         # Generate the "hit" and "miss" signals
 907         # for the synchronous blocks
 908         with m.If(i_in.req & access_ok & ~flush_in):
 909             comb += req_is_hit.eq(is_hit)
 910             comb += req_is_miss.eq(~is_hit)
 911
 912         with m.Else():
 913             comb += req_is_hit.eq(0)
 914             comb += req_is_miss.eq(0)
 915
 916 #       -- The way to replace on a miss
 917 #       if r.state = CLR_TAG then
 918 #           replace_way <=
 919 #            to_integer(unsigned(plru_victim(r.store_index)));
 920 #       else
 921 #           replace_way <= r.store_way;
 922 #       end if;
 923         # The way to replace on a miss
 924         with m.If(r.state == State.CLR_TAG):
 925             comb += replace_way.eq(plru_victim[r.store_index])
 926
 927         with m.Else():
 928             comb += replace_way.eq(r.store_way)
 929
 930 #       -- Output instruction from current cache row
 931 #       --
 932 #       -- Note: This is a mild violation of our design principle of
 933 #       -- having pipeline stages output from a clean latch. In this
 934 #       -- case we output the result of a mux. The alternative would
 935 #       -- be output an entire row which I prefer not to do just yet
 936 #       -- as it would force fetch2 to know about some of the cache
 937 #       -- geometry information.
 938 #       i_out.insn <= read_insn_word(r.hit_nia, cache_out(r.hit_way));
 939 #       i_out.valid <= r.hit_valid;
 940 #       i_out.nia <= r.hit_nia;
 941 #       i_out.stop_mark <= r.hit_smark;
 942 #       i_out.fetch_failed <= r.fetch_failed;
 943         # Output instruction from current cache row
 944         #
 945         # Note: This is a mild violation of our design principle of
 946         # having pipeline stages output from a clean latch. In this
 947         # case we output the result of a mux. The alternative would
 948         # be output an entire row which I prefer not to do just yet
 949         # as it would force fetch2 to know about some of the cache
 950         # geometry information.
 951         comb += i_out.insn.eq(read_insn_word(r.hit_nia, cache_out[r.hit_way]))
 952         comb += i_out.valid.eq(r.hit_valid)
 953         comb += i_out.nia.eq(r.hit_nia)
 954         comb += i_out.stop_mark.eq(r.hit_smark)
 955         comb += i_out.fetch_failed.eq(r.fetch_failed)
 956
 957 #       -- Stall fetch1 if we have a miss on cache or TLB
 958 #       -- or a protection fault
 959 #       stall_out <= not (is_hit and access_ok);
 960         # Stall fetch1 if we have a miss on cache or TLB
 961         # or a protection fault
 962         comb += stall_out.eq(~(is_hit & access_ok))
 963
 964 #       -- Wishbone requests output (from the cache miss reload machine)
 965 #       wishbone_out <= r.wb;
 966         # Wishbone requests output (from the cache miss reload machine)
 967         comb += wb_out.eq(r.wb)
 968 #     end process;
 969
 970 #     -- Cache hit synchronous machine
 971 #     icache_hit : process(clk)
 972     # Cache hit synchronous machine
 973     def icache_hit(self, m, use_previous, r, req_is_hit, req_hit_way,
 974                    req_index, req_tag, real_addr):
 975         sync = m.d.sync
 976
 977         i_in, stall_in = self.i_in, self.stall_in
 978         flush_in       = self.flush_in
 979
 980 #     begin
 981 #         if rising_edge(clk) then
 982 #             -- keep outputs to fetch2 unchanged on a stall
 983 #             -- except that flush or reset sets valid to 0
 984 #             -- If use_previous, keep the same data as last
 985 #             -- cycle and use the second half
 986 #             if stall_in = '1' or use_previous = '1' then
 987 #                 if rst = '1' or flush_in = '1' then
 988 #                     r.hit_valid <= '0';
 989 #             end if;
 990         # keep outputs to fetch2 unchanged on a stall
 991         # except that flush or reset sets valid to 0
 992         # If use_previous, keep the same data as last
 993         # cycle and use the second half
 994         with m.If(stall_in | use_previous):
 995             with m.If(flush_in):
 996                 sync += r.hit_valid.eq(0)
 997 #             else
 998 #                 -- On a hit, latch the request for the next cycle,
 999 #                 -- when the BRAM data will be available on the
1000 #                 -- cache_out output of the corresponding way
1001 #                 r.hit_valid <= req_is_hit;
1002 #                 if req_is_hit = '1' then
1003 #                     r.hit_way <= req_hit_way;
1004         with m.Else():
1005             # On a hit, latch the request for the next cycle,
1006             # when the BRAM data will be available on the
1007             # cache_out output of the corresponding way
1008             sync += r.hit_valid.eq(req_is_hit)
1009
1010             with m.If(req_is_hit):
1011                 sync += r.hit_way.eq(req_hit_way)
1012
1013 #                     report "cache hit nia:" & to_hstring(i_in.nia) &
1014 #                         " IR:" & std_ulogic'image(i_in.virt_mode) &
1015 #                         " SM:" & std_ulogic'image(i_in.stop_mark) &
1016 #                         " idx:" & integer'image(req_index) &
1017 #                         " tag:" & to_hstring(req_tag) &
1018 #                         " way:" & integer'image(req_hit_way) &
1019 #                         " RA:" & to_hstring(real_addr);
1020                 # XXX NO do not use f"" use %d and %x.  see dcache.py Display
1021                 print(f"cache hit nia:{i_in.nia}, " \
1022                       f"IR:{i_in.virt_mode}, " \
1023                       f"SM:{i_in.stop_mark}, idx:{req_index}, " \
1024                       f"tag:{req_tag}, way:{req_hit_way}, " \
1025                       f"RA:{real_addr}")
1026 #                 end if;
1027 #           end if;
1028 #             if stall_in = '0' then
1029 #                 -- Send stop marks and NIA down regardless of validity
1030 #                 r.hit_smark <= i_in.stop_mark;
1031 #                 r.hit_nia <= i_in.nia;
1032 #             end if;
1033         with m.If(~stall_in):
1034             # Send stop marks and NIA down regardless of validity
1035             sync += r.hit_smark.eq(i_in.stop_mark)
1036             sync += r.hit_nia.eq(i_in.nia)
1037 #       end if;
1038 #     end process;
1039
1040 #     -- Cache miss/reload synchronous machine
1041 #     icache_miss : process(clk)
1042     # Cache miss/reload synchronous machine
1043     def icache_miss(self, m, cache_valid_bits, r, req_is_miss,
1044                     req_index, req_laddr, req_tag, replace_way,
1045                     cache_tags, access_ok, real_addr):
1046         comb = m.d.comb
1047         sync = m.d.sync
1048
1049         i_in, wb_in, m_in  = self.i_in, self.wb_in, self.m_in
1050         stall_in, flush_in = self.stall_in, self.flush_in
1051         inval_in           = self.inval_in
1052
1053 #       variable tagset    : cache_tags_set_t;
1054 #       variable stbs_done : boolean;
1055
1056         tagset    = Signal(TAG_RAM_WIDTH)
1057         stbs_done = Signal()
1058
1059 #     begin
1060 #         if rising_edge(clk) then
1061 #           -- On reset, clear all valid bits to force misses
1062 #             if rst = '1' then
1063         # On reset, clear all valid bits to force misses
1064 #               for i in index_t loop
1065 #                   cache_valids(i) <= (others => '0');
1066 #               end loop;
1067 #                 r.state <= IDLE;
1068 #                 r.wb.cyc <= '0';
1069 #                 r.wb.stb <= '0';
1070 #               -- We only ever do reads on wishbone
1071 #               r.wb.dat <= (others => '0');
1072 #               r.wb.sel <= "11111111";
1073 #               r.wb.we  <= '0';
1074
1075         # We only ever do reads on wishbone
1076         comb += r.wb.sel.eq(~0) # set to all 1s
1077
1078 #               -- Not useful normally but helps avoiding
1079 #               -- tons of sim warnings
1080 #               r.wb.adr <= (others => '0');
1081
1082 #             else
1083
1084 #                 -- Process cache invalidations
1085 #                 if inval_in = '1' then
1086 #                     for i in index_t loop
1087 #                         cache_valids(i) <= (others => '0');
1088 #                     end loop;
1089 #                     r.store_valid <= '0';
1090 #                 end if;
1091         # Process cache invalidations
1092         with m.If(inval_in):
1093             for i in range(NUM_LINES):
1094                 sync += cache_valid_bits[i].eq(0)
1095                 sync += r.store_valid.eq(0)
1096
1097 #               -- Main state machine
1098 #               case r.state is
1099             # Main state machine
1100             with m.Switch(r.state):
1101
1102 #               when IDLE =>
1103                 with m.Case(State.IDLE):
1104 #                     -- Reset per-row valid flags,
1105 #                     -- only used in WAIT_ACK
1106 #                     for i in 0 to ROW_PER_LINE - 1 loop
1107 #                         r.rows_valid(i) <= '0';
1108 #                     end loop;
1109                     # Reset per-row valid flags,
1110                     # only used in WAIT_ACK
1111                     for i in range(ROW_PER_LINE):
1112                         sync += r.rows_valid[i].eq(0)
1113
1114 #                   -- We need to read a cache line
1115 #                   if req_is_miss = '1' then
1116 #                       report "cache miss nia:" & to_hstring(i_in.nia) &
1117 #                             " IR:" & std_ulogic'image(i_in.virt_mode) &
1118 #                           " SM:" & std_ulogic'image(i_in.stop_mark) &
1119 #                           " idx:" & integer'image(req_index) &
1120 #                           " way:" & integer'image(replace_way) &
1121 #                           " tag:" & to_hstring(req_tag) &
1122 #                             " RA:" & to_hstring(real_addr);
1123                     # We need to read a cache line
1124                     with m.If(req_is_miss):
1125                         # XXX no, do not use "f".  use sync += Display
1126                         # and use %d for integer, %x for hex.
1127                         print(f"cache miss nia:{i_in.nia} " \
1128                               f"IR:{i_in.virt_mode} " \
1129                               f"SM:{i_in.stop_mark} " \
1130                               F"idx:{req_index} " \
1131                               f"way:{replace_way} tag:{req_tag} " \
1132                               f"RA:{real_addr}")
1133
1134 #                       -- Keep track of our index and way for
1135 #                       -- subsequent stores
1136 #                       r.store_index <= req_index;
1137 #                       r.store_row <= get_row(req_laddr);
1138 #                       r.store_tag <= req_tag;
1139 #                       r.store_valid <= '1';
1140 #                       r.end_row_ix <=
1141 #                        get_row_of_line(get_row(req_laddr)) - 1;
1142                         # Keep track of our index and way
1143                         # for subsequent stores
1144                         sync += r.store_index.eq(req_index)
1145                         sync += r.store_row.eq(get_row(req_laddr))
1146                         sync += r.store_tag.eq(req_tag)
1147                         sync += r.store_valid.eq(1)
1148                         sync += r.end_row_ix.eq(
1149                                  get_row_of_line(
1150                                   get_row(req_laddr)
1151                                  ) - 1
1152                                 )
1153
1154 #                       -- Prep for first wishbone read. We calculate the
1155 #                       -- address of the start of the cache line and
1156 #                       -- start the WB cycle.
1157 #                       r.wb.adr <= req_laddr(r.wb.adr'left downto 0);
1158 #                       r.wb.cyc <= '1';
1159 #                       r.wb.stb <= '1';
1160                         # Prep for first wishbone read.
1161                         # We calculate the
1162                         # address of the start of the cache line and
1163                         # start the WB cycle.
1164                         sync += r.wb.adr.eq(req_laddr)
1165                         sync += r.wb.cyc.eq(1)
1166                         sync += r.wb.stb.eq(1)
1167
1168 #                       -- Track that we had one request sent
1169 #                       r.state <= CLR_TAG;
1170                         # Track that we had one request sent
1171                         sync += r.state.eq(State.CLR_TAG)
1172 #                   end if;
1173
1174 #               when CLR_TAG | WAIT_ACK =>
1175                 with m.Case(State.CLR_TAG, State.WAIT_ACK):
1176 #                     if r.state = CLR_TAG then
1177                     with m.If(r.state == State.CLR_TAG):
1178 #                         -- Get victim way from plru
1179 #                       r.store_way <= replace_way;
1180                         # Get victim way from plru
1181                         sync += r.store_way.eq(replace_way)
1182 #
1183 #                       -- Force misses on that way while
1184 #                       -- reloading that line
1185 #                       cache_valids(req_index)(replace_way) <= '0';
1186                         # Force misses on that way while
1187                         # realoading that line
1188                         cv = Signal(INDEX_BITS)
1189                         comb += cv.eq(cache_valid_bits[req_index])
1190                         comb += cv.bit_select(replace_way, 1).eq(0)
1191                         sync += cache_valid_bits[req_index].eq(cv)
1192
1193 #                       -- Store new tag in selected way
1194 #                       for i in 0 to NUM_WAYS-1 loop
1195 #                           if i = replace_way then
1196 #                               tagset := cache_tags(r.store_index);
1197 #                               write_tag(i, tagset, r.store_tag);
1198 #                               cache_tags(r.store_index) <= tagset;
1199 #                           end if;
1200 #                       end loop;
1201                         for i in range(NUM_WAYS):
1202                             with m.If(i == replace_way):
1203                                 sync += tagset.eq(cache_tags[r.store_index])
1204                                 sync += write_tag(i, tagset, r.store_tag)
1205                                 sync += cache_tags[r.store_index].eq(tagset)
1206
1207 #                         r.state <= WAIT_ACK;
1208                         sync += r.state.eq(State.WAIT_ACK)
1209 #                     end if;
1210
1211 #                   -- Requests are all sent if stb is 0
1212 #                   stbs_done := r.wb.stb = '0';
1213                     # Requests are all sent if stb is 0
1214                     comb += stbs_done.eq(r.wb.stb == 0)
1215
1216 #                   -- If we are still sending requests,
1217 #                   -- was one accepted ?
1218 #                   if wishbone_in.stall = '0' and not stbs_done then
1219                     # If we are still sending requests,
1220                     # was one accepted?
1221                     with m.If(~wb_in.stall & ~stbs_done):
1222 #                       -- That was the last word ? We are done sending.
1223 #                       -- Clear stb and set stbs_done so we can handle
1224 #                       -- an eventual last ack on the same cycle.
1225 #                       if is_last_row_addr(r.wb.adr, r.end_row_ix) then
1226 #                           r.wb.stb <= '0';
1227 #                           stbs_done := true;
1228 #                       end if;
1229                         # That was the last word ?
1230                         # We are done sending.
1231                         # Clear stb and set stbs_done
1232                         # so we can handle
1233                         # an eventual last ack on
1234                         # the same cycle.
1235                         with m.If(is_last_row_addr(r.wb.adr, r.end_row_ix)):
1236                             sync += r.wb.stb.eq(0)
1237                             comb += stbs_done.eq(1)
1238
1239 #                       -- Calculate the next row address
1240 #                       r.wb.adr <= next_row_addr(r.wb.adr);
1241                         # Calculate the next row address
1242                         rarange = r.wb.adr[ROW_OFF_BITS:LINE_OFF_BITS]
1243                         sync += rarange.eq(rarange + 1)
1244 #                   end if;
1245
1246 #                   -- Incoming acks processing
1247 #                   if wishbone_in.ack = '1' then
1248                     # Incoming acks processing
1249                     with m.If(wb_in.ack):
1250 #                         r.rows_valid(r.store_row mod ROW_PER_LINE)
1251 #                          <= '1';
1252                         sync += r.rows_valid[r.store_row & ROW_PER_LINE].eq(1)
1253
1254 #                       -- Check for completion
1255 #                       if stbs_done and
1256 #                        is_last_row(r.store_row, r.end_row_ix) then
1257                         # Check for completion
1258                         with m.If(stbs_done &
1259                                   is_last_row(r.store_row, r.end_row_ix)):
1260 #                           -- Complete wishbone cycle
1261 #                           r.wb.cyc <= '0';
1262                             # Complete wishbone cycle
1263                             sync += r.wb.cyc.eq(0)
1264
1265 #                           -- Cache line is now valid
1266 #                           cache_valids(r.store_index)(replace_way) <=
1267 #                            r.store_valid and not inval_in;
1268                             # Cache line is now valid
1269                             cv = Signal(INDEX_BITS)
1270                             sync += cv.eq(cache_valid_bits[r.store_index])
1271                             sync += cv.bit_select(replace_way, 1).eq(
1272                                         r.store_valid & ~inval_in)
1273
1274 #                           -- We are done
1275 #                           r.state <= IDLE;
1276                             # We are done
1277                             sync += r.state.eq(State.IDLE)
1278 #                       end if;
1279
1280 #                       -- Increment store row counter
1281 #                       r.store_row <= next_row(r.store_row);
1282                         # Increment store row counter
1283                         sync += r.store_row.eq(next_row(r.store_row))
1284 #                   end if;
1285 #               end case;
1286 #           end if;
1287 #
1288 #             -- TLB miss and protection fault processing
1289 #             if rst = '1' or flush_in = '1' or m_in.tlbld = '1' then
1290 #                 r.fetch_failed <= '0';
1291 #             elsif i_in.req = '1' and access_ok = '0' and
1292 #              stall_in = '0' then
1293 #                 r.fetch_failed <= '1';
1294 #             end if;
1295         # TLB miss and protection fault processing
1296         with m.If(flush_in | m_in.tlbld):
1297             sync += r.fetch_failed.eq(0)
1298         with m.Elif(i_in.req & ~access_ok & ~stall_in):
1299             sync += r.fetch_failed.eq(1)
1300 #       end if;
1301 #     end process;
1302
1303 #     icache_log: if LOG_LENGTH > 0 generate
1304     def icache_log(self, m, req_hit_way, ra_valid, access_ok,
1305                    req_is_miss, req_is_hit, lway, wstate, r):
1306         comb = m.d.comb
1307         sync = m.d.sync
1308
1309         wb_in, i_out       = self.wb_in, self.i_out
1310         log_out, stall_out = self.log_out, self.stall_out
1311
1312 #         -- Output data to logger
1313 #         signal log_data    : std_ulogic_vector(53 downto 0);
1314 #     begin
1315 #         data_log: process(clk)
1316 #             variable lway: way_t;
1317 #             variable wstate: std_ulogic;
1318         # Output data to logger
1319         for i in range(LOG_LENGTH):
1320             # Output data to logger
1321             log_data = Signal(54)
1322             lway     = Signal(NUM_WAYS)
1323             wstate   = Signal()
1324
1325 #         begin
1326 #             if rising_edge(clk) then
1327 #                 lway := req_hit_way;
1328 #                 wstate := '0';
1329             comb += lway.eq(req_hit_way)
1330             comb += wstate.eq(0)
1331
1332 #                 if r.state /= IDLE then
1333 #                     wstate := '1';
1334 #                 end if;
1335             with m.If(r.state != State.IDLE):
1336                 sync += wstate.eq(1)
1337
1338 #                 log_data <= i_out.valid &
1339 #                             i_out.insn &
1340 #                             wishbone_in.ack &
1341 #                             r.wb.adr(5 downto 3) &
1342 #                             r.wb.stb & r.wb.cyc &
1343 #                             wishbone_in.stall &
1344 #                             stall_out &
1345 #                             r.fetch_failed &
1346 #                             r.hit_nia(5 downto 2) &
1347 #                             wstate &
1348 #                             std_ulogic_vector(to_unsigned(lway, 3)) &
1349 #                             req_is_hit & req_is_miss &
1350 #                             access_ok &
1351 #                             ra_valid;
1352             sync += log_data.eq(Cat(
1353                      ra_valid, access_ok, req_is_miss, req_is_hit,
1354                      lway, wstate, r.hit_nia[2:6],
1355                      r.fetch_failed, stall_out, wb_in.stall, r.wb.cyc,
1356                      r.wb.stb, r.wb.adr[3:6], wb_in.ack, i_out.insn,
1357                      i_out.valid
1358                     ))
1359 #             end if;
1360 #         end process;
1361 #         log_out <= log_data;
1362             comb += log_out.eq(log_data)
1363 #     end generate;
1364 # end;
1365
1366     def elaborate(self, platform):
1367
1368         m                = Module()
1369         comb             = m.d.comb
1370
1371         # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
1372         cache_tags       = CacheTagArray()
1373         cache_valid_bits = CacheValidBitsArray()
1374
1375 #     signal itlb_valids : tlb_valids_t;
1376 #     signal itlb_tags : tlb_tags_t;
1377 #     signal itlb_ptes : tlb_ptes_t;
1378 #     attribute ram_style of itlb_tags : signal is "distributed";
1379 #     attribute ram_style of itlb_ptes : signal is "distributed";
1380         itlb_valid_bits  = TLBValidBitsArray()
1381         itlb_tags        = TLBTagArray()
1382         itlb_ptes        = TLBPtesArray()
1383         # TODO to be passed to nmigen as ram attributes
1384         # attribute ram_style of itlb_tags : signal is "distributed";
1385         # attribute ram_style of itlb_ptes : signal is "distributed";
1386
1387 #     -- Privilege bit from PTE EAA field
1388 #     signal eaa_priv  : std_ulogic;
1389         # Privilege bit from PTE EAA field
1390         eaa_priv         = Signal()
1391
1392 #     signal r : reg_internal_t;
1393         r                = RegInternal()
1394
1395 #     -- Async signals on incoming request
1396 #     signal req_index   : index_t;
1397 #     signal req_row     : row_t;
1398 #     signal req_hit_way : way_t;
1399 #     signal req_tag     : cache_tag_t;
1400 #     signal req_is_hit  : std_ulogic;
1401 #     signal req_is_miss : std_ulogic;
1402 #     signal req_laddr   : std_ulogic_vector(63 downto 0);
1403         # Async signal on incoming request
1404         req_index        = Signal(NUM_LINES)
1405         req_row          = Signal(BRAM_ROWS)
1406         req_hit_way      = Signal(NUM_WAYS)
1407         req_tag          = Signal(TAG_BITS)
1408         req_is_hit       = Signal()
1409         req_is_miss      = Signal()
1410         req_laddr        = Signal(64)
1411
1412 #     signal tlb_req_index : tlb_index_t;
1413 #     signal real_addr     : std_ulogic_vector(
1414 #                             REAL_ADDR_BITS - 1 downto 0
1415 #                            );
1416 #     signal ra_valid      : std_ulogic;
1417 #     signal priv_fault    : std_ulogic;
1418 #     signal access_ok     : std_ulogic;
1419 #     signal use_previous  : std_ulogic;
1420         tlb_req_index    = Signal(TLB_SIZE)
1421         real_addr        = Signal(REAL_ADDR_BITS)
1422         ra_valid         = Signal()
1423         priv_fault       = Signal()
1424         access_ok        = Signal()
1425         use_previous     = Signal()
1426
1427 #     signal cache_out   : cache_ram_out_t;
1428         cache_out        = CacheRamOut()
1429
1430 #     signal plru_victim : plru_out_t;
1431 #     signal replace_way : way_t;
1432         plru_victim      = PLRUOut()
1433         replace_way      = Signal(NUM_WAYS)
1434
1435         # call sub-functions putting everything together, using shared
1436         # signals established above
1437         self.rams(m, r, cache_out, use_previous, replace_way, req_row)
1438         self.maybe_plrus(m, r, plru_victim)
1439         self.itlb_lookup(m, tlb_req_index, itlb_ptes, itlb_tags,
1440                          real_addr, itlb_valid_bits, ra_valid, eaa_priv,
1441                          priv_fault, access_ok)
1442         self.itlb_update(m, itlb_valid_bits, itlb_tags, itlb_ptes)
1443         self.icache_comb(m, use_previous, r, req_index, req_row,
1444                          req_tag, real_addr, req_laddr, cache_valid_bits,
1445                          cache_tags, access_ok, req_is_hit, req_is_miss,
1446                          replace_way, plru_victim, cache_out)
1447         self.icache_hit(m, use_previous, r, req_is_hit, req_hit_way,
1448                         req_index, req_tag, real_addr)
1449         self.icache_miss(m, cache_valid_bits, r, req_is_miss, req_index,
1450                          req_laddr, req_tag, replace_way, cache_tags,
1451                          access_ok, real_addr)
1452         #self.icache_log(m, log_out, req_hit_way, ra_valid, access_ok,
1453         #                req_is_miss, req_is_hit, lway, wstate, r)
1454
1455         return m
1456
1457
1458 # icache_tb.vhdl
1459 #
1460 # library ieee;
1461 # use ieee.std_logic_1164.all;
1462 #
1463 # library work;
1464 # use work.common.all;
1465 # use work.wishbone_types.all;
1466 #
1467 # entity icache_tb is
1468 # end icache_tb;
1469 #
1470 # architecture behave of icache_tb is
1471 #     signal clk          : std_ulogic;
1472 #     signal rst          : std_ulogic;
1473 #
1474 #     signal i_out        : Fetch1ToIcacheType;
1475 #     signal i_in         : IcacheToDecode1Type;
1476 #
1477 #     signal m_out        : MmuToIcacheType;
1478 #
1479 #     signal wb_bram_in   : wishbone_master_out;
1480 #     signal wb_bram_out  : wishbone_slave_out;
1481 #
1482 #     constant clk_period : time := 10 ns;
1483 # begin
1484 #     icache0: entity work.icache
1485 #         generic map(
1486 #             LINE_SIZE => 64,
1487 #             NUM_LINES => 4
1488 #             )
1489 #         port map(
1490 #             clk => clk,
1491 #             rst => rst,
1492 #             i_in => i_out,
1493 #             i_out => i_in,
1494 #             m_in => m_out,
1495 #             stall_in => '0',
1496 #           flush_in => '0',
1497 #             inval_in => '0',
1498 #             wishbone_out => wb_bram_in,
1499 #             wishbone_in => wb_bram_out
1500 #             );
1501 #
1502 #     -- BRAM Memory slave
1503 #     bram0: entity work.wishbone_bram_wrapper
1504 #         generic map(
1505 #             MEMORY_SIZE   => 1024,
1506 #             RAM_INIT_FILE => "icache_test.bin"
1507 #             )
1508 #         port map(
1509 #             clk => clk,
1510 #             rst => rst,
1511 #             wishbone_in => wb_bram_in,
1512 #             wishbone_out => wb_bram_out
1513 #             );
1514 #
1515 #     clk_process: process
1516 #     begin
1517 #         clk <= '0';
1518 #         wait for clk_period/2;
1519 #         clk <= '1';
1520 #         wait for clk_period/2;
1521 #     end process;
1522 #
1523 #     rst_process: process
1524 #     begin
1525 #         rst <= '1';
1526 #         wait for 2*clk_period;
1527 #         rst <= '0';
1528 #         wait;
1529 #     end process;
1530 #
1531 #     stim: process
1532 #     begin
1533 #         i_out.req <= '0';
1534 #         i_out.nia <= (others => '0');
1535 #       i_out.stop_mark <= '0';
1536 #
1537 #         m_out.tlbld <= '0';
1538 #         m_out.tlbie <= '0';
1539 #         m_out.addr <= (others => '0');
1540 #         m_out.pte <= (others => '0');
1541 #
1542 #         wait until rising_edge(clk);
1543 #         wait until rising_edge(clk);
1544 #         wait until rising_edge(clk);
1545 #         wait until rising_edge(clk);
1546 #
1547 #         i_out.req <= '1';
1548 #         i_out.nia <= x"0000000000000004";
1549 #
1550 #         wait for 30*clk_period;
1551 #         wait until rising_edge(clk);
1552 #
1553 #         assert i_in.valid = '1' severity failure;
1554 #         assert i_in.insn = x"00000001"
1555 #           report "insn @" & to_hstring(i_out.nia) &
1556 #           "=" & to_hstring(i_in.insn) &
1557 #           " expected 00000001"
1558 #           severity failure;
1559 #
1560 #         i_out.req <= '0';
1561 #
1562 #         wait until rising_edge(clk);
1563 #
1564 #         -- hit
1565 #         i_out.req <= '1';
1566 #         i_out.nia <= x"0000000000000008";
1567 #         wait until rising_edge(clk);
1568 #         wait until rising_edge(clk);
1569 #         assert i_in.valid = '1' severity failure;
1570 #         assert i_in.insn = x"00000002"
1571 #           report "insn @" & to_hstring(i_out.nia) &
1572 #           "=" & to_hstring(i_in.insn) &
1573 #           " expected 00000002"
1574 #           severity failure;
1575 #         wait until rising_edge(clk);
1576 #
1577 #         -- another miss
1578 #         i_out.req <= '1';
1579 #         i_out.nia <= x"0000000000000040";
1580 #
1581 #         wait for 30*clk_period;
1582 #         wait until rising_edge(clk);
1583 #
1584 #         assert i_in.valid = '1' severity failure;
1585 #         assert i_in.insn = x"00000010"
1586 #           report "insn @" & to_hstring(i_out.nia) &
1587 #           "=" & to_hstring(i_in.insn) &
1588 #           " expected 00000010"
1589 #           severity failure;
1590 #
1591 #         -- test something that aliases
1592 #         i_out.req <= '1';
1593 #         i_out.nia <= x"0000000000000100";
1594 #         wait until rising_edge(clk);
1595 #         wait until rising_edge(clk);
1596 #         assert i_in.valid = '0' severity failure;
1597 #         wait until rising_edge(clk);
1598 #
1599 #         wait for 30*clk_period;
1600 #         wait until rising_edge(clk);
1601 #
1602 #         assert i_in.valid = '1' severity failure;
1603 #         assert i_in.insn = x"00000040"
1604 #           report "insn @" & to_hstring(i_out.nia) &
1605 #           "=" & to_hstring(i_in.insn) &
1606 #           " expected 00000040"
1607 #           severity failure;
1608 #
1609 #         i_out.req <= '0';
1610 #
1611 #         std.env.finish;
1612 #     end process;
1613 # end;
1614 def icache_sim(dut):
1615     i_out = dut.i_in
1616     i_in  = dut.i_out
1617     m_out = dut.m_in
1618
1619     yield i_in.valid.eq(0)
1620     yield i_out.req.eq(0)
1621     yield i_out.nia.eq(~1)
1622     yield i_out.stop_mark.eq(0)
1623     yield m_out.tlbld.eq(0)
1624     yield m_out.tlbie.eq(0)
1625     yield m_out.addr.eq(~1)
1626     yield m_out.pte.eq(~1)
1627     yield
1628     yield
1629     yield
1630     yield
1631     yield i_out.req.eq(1)
1632     yield i_out.nia.eq(Const(0x0000000000000004, 64))
1633     for i in range(30):
1634         yield
1635     yield
1636     valid = yield i_in.valid
1637     insn  = yield i_in.insn
1638     print(f"valid? {valid}")
1639     #assert valid
1640     #assert insn == 0x00000001, \
1641         #("insn @%x=%x expected 00000001" % i_out.nia, i_in.insn)
1642     yield i_out.req.eq(0)
1643     yield
1644
1645     # hit
1646     yield i_out.req.eq(1)
1647     yield i_out.nia.eq(Const(0x0000000000000008, 64))
1648     yield
1649     yield
1650     valid = yield i_in.valid
1651     insn  = yield i_in.insn
1652     #assert valid
1653     #assert insn == 0x00000002, \
1654         #("insn @%x=%x expected 00000002" % i_out.nia, i_in.insn)
1655     yield
1656
1657     # another miss
1658     yield i_out.req.eq(1)
1659     yield i_out.nia.eq(Const(0x0000000000000040, 64))
1660     for i in range(30):
1661         yield
1662     yield
1663     valid = yield i_in.valid
1664     insn  = yield i_in.insn
1665     #assert valid
1666     #assert insn == 0x00000010, \
1667         #("insn @%x=%x expected 00000010" % i_out.nia, i_in.insn)
1668
1669     # test something that aliases
1670     yield i_out.req.eq(1)
1671     yield i_out.nia.eq(Const(0x0000000000000100, 64))
1672     yield
1673     yield
1674     #assert i_in.valid == Const(1, 1)
1675     for i in range(30):
1676         yield
1677     yield
1678     valid = yield i_in.valid
1679     insn  = yield i_in.insn
1680     #assert valid
1681     #assert insn == 0x00000040, \
1682          #("insn @%x=%x expected 00000040" % i_out.nia, i_in.insn)
1683     yield i_out.req.eq(0)
1684
1685
1686 def test_icache():
1687     dut = ICache()
1688
1689     m = Module()
1690     m.submodules.icache = dut
1691
1692     # nmigen Simulation
1693     sim = Simulator(m)
1694     sim.add_clock(1e-6)
1695
1696     sim.add_sync_process(wrap(icache_sim(dut)))
1697     with sim.write_vcd('test_icache.vcd'):
1698         sim.run()
1699
1700 if __name__ == '__main__':
1701     dut = ICache()
1702     vl = rtlil.convert(dut, ports=[])
1703     with open("test_icache.il", "w") as f:
1704         f.write(vl)
1705
1706     test_icache()