src/soc/experiment/icache.py

   1 """ICache
   2
   3 based on Anton Blanchard microwatt icache.vhdl
   4
   5 Set associative icache
   6
   7 TODO (in no specific order):
   8 * Add debug interface to inspect cache content
   9 * Add snoop/invalidate path
  10 * Add multi-hit error detection
  11 * Pipelined bus interface (wb or axi)
  12 * Maybe add parity? There's a few bits free in each BRAM row on Xilinx
  13 * Add optimization: service hits on partially loaded lines
  14 * Add optimization: (maybe) interrupt reload on fluch/redirect
  15 * Check if playing with the geometry of the cache tags allow for more
  16   efficient use of distributed RAM and less logic/muxes. Currently we
  17   write TAG_BITS width which may not match full ram blocks and might
  18   cause muxes to be inferred for "partial writes".
  19 * Check if making the read size of PLRU a ROM helps utilization
  20
  21 """
  22 from enum import Enum, unique
  23 from nmigen import (Module, Signal, Elaboratable, Cat, Array, Const, Repl)
  24 from nmigen.cli import main, rtlil
  25 from nmutil.iocontrol import RecordObject
  26 from nmigen.utils import log2_int
  27 from nmutil.util import Display
  28
  29 #from nmutil.plru import PLRU
  30 from soc.experiment.cache_ram import CacheRam
  31 from soc.experiment.plru import PLRU
  32
  33 from soc.experiment.mem_types import (Fetch1ToICacheType,
  34                                       ICacheToDecode1Type,
  35                                       MMUToICacheType)
  36
  37 from soc.experiment.wb_types import (WB_ADDR_BITS, WB_DATA_BITS,
  38                                      WB_SEL_BITS, WBAddrType, WBDataType,
  39                                      WBSelType, WBMasterOut, WBSlaveOut,
  40                                      WBMasterOutVector, WBSlaveOutVector,
  41                                      WBIOMasterOut, WBIOSlaveOut)
  42
  43 # for test
  44 from nmigen_soc.wishbone.sram import SRAM
  45 from nmigen import Memory
  46 from nmutil.util import wrap
  47 from nmigen.cli import main, rtlil
  48 if True:
  49     from nmigen.back.pysim import Simulator, Delay, Settle
  50 else:
  51     from nmigen.sim.cxxsim import Simulator, Delay, Settle
  52
  53
  54 SIM            = 0
  55 LINE_SIZE      = 64
  56 # BRAM organisation: We never access more than wishbone_data_bits
  57 # at a time so to save resources we make the array only that wide,
  58 # and use consecutive indices for to make a cache "line"
  59 #
  60 # ROW_SIZE is the width in bytes of the BRAM (based on WB, so 64-bits)
  61 ROW_SIZE       = WB_DATA_BITS // 8
  62 # Number of lines in a set
  63 NUM_LINES      = 16
  64 # Number of ways
  65 NUM_WAYS       = 4
  66 # L1 ITLB number of entries (direct mapped)
  67 TLB_SIZE       = 64
  68 # L1 ITLB log_2(page_size)
  69 TLB_LG_PGSZ    = 12
  70 # Number of real address bits that we store
  71 REAL_ADDR_BITS = 56
  72 # Non-zero to enable log data collection
  73 LOG_LENGTH     = 0
  74
  75 ROW_SIZE_BITS  = ROW_SIZE * 8
  76 # ROW_PER_LINE is the number of row
  77 # (wishbone) transactions in a line
  78 ROW_PER_LINE   = LINE_SIZE // ROW_SIZE
  79 # BRAM_ROWS is the number of rows in
  80 # BRAM needed to represent the full icache
  81 BRAM_ROWS      = NUM_LINES * ROW_PER_LINE
  82 # INSN_PER_ROW is the number of 32bit
  83 # instructions per BRAM row
  84 INSN_PER_ROW   = ROW_SIZE_BITS // 32
  85
  86 print("ROW_SIZE", ROW_SIZE)
  87 print("ROW_SIZE_BITS", ROW_SIZE_BITS)
  88 print("ROW_PER_LINE", ROW_PER_LINE)
  89 print("BRAM_ROWS", BRAM_ROWS)
  90 print("INSN_PER_ROW", INSN_PER_ROW)
  91
  92 # Bit fields counts in the address
  93 #
  94 # INSN_BITS is the number of bits to
  95 # select an instruction in a row
  96 INSN_BITS      = log2_int(INSN_PER_ROW)
  97 # ROW_BITS is the number of bits to
  98 # select a row
  99 ROW_BITS       = log2_int(BRAM_ROWS)
 100 # ROW_LINEBITS is the number of bits to
 101 # select a row within a line
 102 ROW_LINEBITS   = log2_int(ROW_PER_LINE)
 103 # LINE_OFF_BITS is the number of bits for
 104 # the offset in a cache line
 105 LINE_OFF_BITS  = log2_int(LINE_SIZE)
 106 # ROW_OFF_BITS is the number of bits for
 107 # the offset in a row
 108 ROW_OFF_BITS   = log2_int(ROW_SIZE)
 109 # INDEX_BITS is the number of bits to
 110 # select a cache line
 111 INDEX_BITS     = log2_int(NUM_LINES)
 112 # SET_SIZE_BITS is the log base 2 of
 113 # the set size
 114 SET_SIZE_BITS  = LINE_OFF_BITS + INDEX_BITS
 115 # TAG_BITS is the number of bits of
 116 # the tag part of the address
 117 TAG_BITS       = REAL_ADDR_BITS - SET_SIZE_BITS
 118 # TAG_WIDTH is the width in bits of each way of the tag RAM
 119 TAG_WIDTH = TAG_BITS + 7 - ((TAG_BITS + 7) % 8)
 120
 121 # WAY_BITS is the number of bits to
 122 # select a way
 123 WAY_BITS       = log2_int(NUM_WAYS)
 124 TAG_RAM_WIDTH  = TAG_BITS * NUM_WAYS
 125
 126 #     -- L1 ITLB.
 127 #     constant TLB_BITS : natural := log2(TLB_SIZE);
 128 #     constant TLB_EA_TAG_BITS : natural := 64 - (TLB_LG_PGSZ + TLB_BITS);
 129 #     constant TLB_PTE_BITS : natural := 64;
 130 TLB_BITS        = log2_int(TLB_SIZE)
 131 TLB_EA_TAG_BITS = 64 - (TLB_LG_PGSZ + TLB_BITS)
 132 TLB_PTE_BITS    = 64
 133
 134
 135 print("INSN_BITS", INSN_BITS)
 136 print("ROW_BITS", ROW_BITS)
 137 print("ROW_LINEBITS", ROW_LINEBITS)
 138 print("LINE_OFF_BITS", LINE_OFF_BITS)
 139 print("ROW_OFF_BITS", ROW_OFF_BITS)
 140 print("INDEX_BITS", INDEX_BITS)
 141 print("SET_SIZE_BITS", SET_SIZE_BITS)
 142 print("TAG_BITS", TAG_BITS)
 143 print("WAY_BITS", WAY_BITS)
 144 print("TAG_RAM_WIDTH", TAG_RAM_WIDTH)
 145 print("TLB_BITS", TLB_BITS)
 146 print("TLB_EA_TAG_BITS", TLB_EA_TAG_BITS)
 147 print("TLB_PTE_BITS", TLB_PTE_BITS)
 148
 149
 150
 151
 152 # architecture rtl of icache is
 153 #constant ROW_SIZE_BITS : natural := ROW_SIZE*8;
 154 #-- ROW_PER_LINE is the number of row (wishbone
 155 #-- transactions) in a line
 156 #constant ROW_PER_LINE  : natural := LINE_SIZE / ROW_SIZE;
 157 #-- BRAM_ROWS is the number of rows in BRAM
 158 #-- needed to represent the full
 159 #-- icache
 160 #constant BRAM_ROWS     : natural := NUM_LINES * ROW_PER_LINE;
 161 #-- INSN_PER_ROW is the number of 32bit instructions per BRAM row
 162 #constant INSN_PER_ROW  : natural := ROW_SIZE_BITS / 32;
 163 #-- Bit fields counts in the address
 164 #
 165 #-- INSN_BITS is the number of bits to select
 166 #-- an instruction in a row
 167 #constant INSN_BITS     : natural := log2(INSN_PER_ROW);
 168 #-- ROW_BITS is the number of bits to select a row
 169 #constant ROW_BITS      : natural := log2(BRAM_ROWS);
 170 #-- ROW_LINEBITS is the number of bits to
 171 #-- select a row within a line
 172 #constant ROW_LINEBITS  : natural := log2(ROW_PER_LINE);
 173 #-- LINE_OFF_BITS is the number of bits for the offset
 174 #-- in a cache line
 175 #constant LINE_OFF_BITS : natural := log2(LINE_SIZE);
 176 #-- ROW_OFF_BITS is the number of bits for the offset in a row
 177 #constant ROW_OFF_BITS  : natural := log2(ROW_SIZE);
 178 #-- INDEX_BITS is the number of bits to select a cache line
 179 #constant INDEX_BITS    : natural := log2(NUM_LINES);
 180 #-- SET_SIZE_BITS is the log base 2 of the set size
 181 #constant SET_SIZE_BITS : natural := LINE_OFF_BITS + INDEX_BITS;
 182 #-- TAG_BITS is the number of bits of the tag part of the address
 183 #constant TAG_BITS      : natural := REAL_ADDR_BITS - SET_SIZE_BITS;
 184 #-- WAY_BITS is the number of bits to select a way
 185 #constant WAY_BITS     : natural := log2(NUM_WAYS);
 186
 187 #-- Example of layout for 32 lines of 64 bytes:
 188 #--
 189 #-- ..  tag    |index|  line  |
 190 #-- ..         |   row   |    |
 191 #-- ..         |     |   | |00| zero          (2)
 192 #-- ..         |     |   |-|  | INSN_BITS     (1)
 193 #-- ..         |     |---|    | ROW_LINEBITS  (3)
 194 #-- ..         |     |--- - --| LINE_OFF_BITS (6)
 195 #-- ..         |         |- --| ROW_OFF_BITS  (3)
 196 #-- ..         |----- ---|    | ROW_BITS      (8)
 197 #-- ..         |-----|        | INDEX_BITS    (5)
 198 #-- .. --------|              | TAG_BITS      (53)
 199    # Example of layout for 32 lines of 64 bytes:
 200    #
 201    # ..  tag    |index|  line  |
 202    # ..         |   row   |    |
 203    # ..         |     |   | |00| zero          (2)
 204    # ..         |     |   |-|  | INSN_BITS     (1)
 205    # ..         |     |---|    | ROW_LINEBITS  (3)
 206    # ..         |     |--- - --| LINE_OFF_BITS (6)
 207    # ..         |         |- --| ROW_OFF_BITS  (3)
 208    # ..         |----- ---|    | ROW_BITS      (8)
 209    # ..         |-----|        | INDEX_BITS    (5)
 210    # .. --------|              | TAG_BITS      (53)
 211
 212 #subtype row_t is integer range 0 to BRAM_ROWS-1;
 213 #subtype index_t is integer range 0 to NUM_LINES-1;
 214 #subtype way_t is integer range 0 to NUM_WAYS-1;
 215 #subtype row_in_line_t is unsigned(ROW_LINEBITS-1 downto 0);
 216 #
 217 #-- The cache data BRAM organized as described above for each way
 218 #subtype cache_row_t is std_ulogic_vector(ROW_SIZE_BITS-1 downto 0);
 219 #
 220 #-- The cache tags LUTRAM has a row per set. Vivado is a pain and will
 221 #-- not handle a clean (commented) definition of the cache tags as a 3d
 222 #-- memory. For now, work around it by putting all the tags
 223 #subtype cache_tag_t is std_logic_vector(TAG_BITS-1 downto 0);
 224 #  type cache_tags_set_t is array(way_t) of cache_tag_t;
 225 #  type cache_tags_array_t is array(index_t) of cache_tags_set_t;
 226 #constant TAG_RAM_WIDTH : natural := TAG_BITS * NUM_WAYS;
 227 #subtype cache_tags_set_t is std_logic_vector(TAG_RAM_WIDTH-1 downto 0);
 228 #type cache_tags_array_t is array(index_t) of cache_tags_set_t;
 229 def CacheTagArray():
 230     return Array(Signal(TAG_RAM_WIDTH, name="cachetag_%d" %x) \
 231                  for x in range(NUM_LINES))
 232
 233 #-- The cache valid bits
 234 #subtype cache_way_valids_t is std_ulogic_vector(NUM_WAYS-1 downto 0);
 235 #type cache_valids_t is array(index_t) of cache_way_valids_t;
 236 #type row_per_line_valid_t is array(0 to ROW_PER_LINE - 1) of std_ulogic;
 237 def CacheValidBitsArray():
 238     return Array(Signal(NUM_WAYS, name="cachevalid_%d" %x) \
 239                  for x in range(NUM_LINES))
 240
 241 def RowPerLineValidArray():
 242     return Array(Signal(name="rows_valid_%d" %x) \
 243                  for x in range(ROW_PER_LINE))
 244
 245
 246 #attribute ram_style : string;
 247 #attribute ram_style of cache_tags : signal is "distributed";
 248    # TODO to be passed to nigmen as ram attributes
 249    # attribute ram_style : string;
 250    # attribute ram_style of cache_tags : signal is "distributed";
 251
 252
 253 #subtype tlb_index_t is integer range 0 to TLB_SIZE - 1;
 254 #type tlb_valids_t is array(tlb_index_t) of std_ulogic;
 255 #subtype tlb_tag_t is std_ulogic_vector(TLB_EA_TAG_BITS - 1 downto 0);
 256 #type tlb_tags_t is array(tlb_index_t) of tlb_tag_t;
 257 #subtype tlb_pte_t is std_ulogic_vector(TLB_PTE_BITS - 1 downto 0);
 258 #type tlb_ptes_t is array(tlb_index_t) of tlb_pte_t;
 259 def TLBValidBitsArray():
 260     return Array(Signal(name="tlbvalid_%d" %x) \
 261                  for x in range(TLB_SIZE))
 262
 263 def TLBTagArray():
 264     return Array(Signal(TLB_EA_TAG_BITS, name="tlbtag_%d" %x) \
 265                  for x in range(TLB_SIZE))
 266
 267 def TLBPtesArray():
 268     return Array(Signal(TLB_PTE_BITS, name="tlbptes_%d" %x) \
 269                  for x in range(TLB_SIZE))
 270
 271
 272 #-- Cache RAM interface
 273 #type cache_ram_out_t is array(way_t) of cache_row_t;
 274 # Cache RAM interface
 275 def CacheRamOut():
 276     return Array(Signal(ROW_SIZE_BITS, name="cache_out_%d" %x) \
 277                  for x in range(NUM_WAYS))
 278
 279 #-- PLRU output interface
 280 #type plru_out_t is array(index_t) of
 281 # std_ulogic_vector(WAY_BITS-1 downto 0);
 282 # PLRU output interface
 283 def PLRUOut():
 284     return Array(Signal(WAY_BITS, name="plru_out_%d" %x) \
 285                  for x in range(NUM_LINES))
 286
 287 #     -- Return the cache line index (tag index) for an address
 288 #     function get_index(addr: std_ulogic_vector(63 downto 0))
 289 #      return index_t is
 290 #     begin
 291 #         return to_integer(unsigned(
 292 #          addr(SET_SIZE_BITS - 1 downto LINE_OFF_BITS)
 293 #         ));
 294 #     end;
 295 # Return the cache line index (tag index) for an address
 296 def get_index(addr):
 297     return addr[LINE_OFF_BITS:SET_SIZE_BITS]
 298
 299 #     -- Return the cache row index (data memory) for an address
 300 #     function get_row(addr: std_ulogic_vector(63 downto 0))
 301 #       return row_t is
 302 #     begin
 303 #         return to_integer(unsigned(
 304 #          addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS)
 305 #         ));
 306 #     end;
 307 # Return the cache row index (data memory) for an address
 308 def get_row(addr):
 309     return addr[ROW_OFF_BITS:SET_SIZE_BITS]
 310
 311 #     -- Return the index of a row within a line
 312 #     function get_row_of_line(row: row_t) return row_in_line_t is
 313 #       variable row_v : unsigned(ROW_BITS-1 downto 0);
 314 #     begin
 315 #       row_v := to_unsigned(row, ROW_BITS);
 316 #         return row_v(ROW_LINEBITS-1 downto 0);
 317 #     end;
 318 # Return the index of a row within a line
 319 def get_row_of_line(row):
 320     return row[:ROW_LINEBITS]
 321
 322 #     -- Returns whether this is the last row of a line
 323 #     function is_last_row_addr(addr: wishbone_addr_type;
 324 #      last: row_in_line_t
 325 #     )
 326 #      return boolean is
 327 #     begin
 328 #       return unsigned(
 329 #        addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS)
 330 #       ) = last;
 331 #     end;
 332 # Returns whether this is the last row of a line
 333 def is_last_row_addr(addr, last):
 334     return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
 335
 336 #     -- Returns whether this is the last row of a line
 337 #     function is_last_row(row: row_t;
 338 #      last: row_in_line_t) return boolean is
 339 #     begin
 340 #       return get_row_of_line(row) = last;
 341 #     end;
 342 # Returns whether this is the last row of a line
 343 def is_last_row(row, last):
 344     return get_row_of_line(row) == last
 345
 346 #     -- Return the next row in the current cache line. We use a dedicated
 347 #     -- function in order to limit the size of the generated adder to be
 348 #     -- only the bits within a cache line (3 bits with default settings)
 349 #     function next_row(row: row_t) return row_t is
 350 #       variable row_v   : std_ulogic_vector(ROW_BITS-1 downto 0);
 351 #       variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
 352 #       variable result  : std_ulogic_vector(ROW_BITS-1 downto 0);
 353 #     begin
 354 #       row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
 355 #       row_idx := row_v(ROW_LINEBITS-1 downto 0);
 356 #       row_v(ROW_LINEBITS-1 downto 0) :=
 357 #        std_ulogic_vector(unsigned(row_idx) + 1);
 358 #       return to_integer(unsigned(row_v));
 359 #     end;
 360 # Return the next row in the current cache line. We use a dedicated
 361 # function in order to limit the size of the generated adder to be
 362 # only the bits within a cache line (3 bits with default settings)
 363 def next_row(row):
 364     row_v = row[0:ROW_LINEBITS] + 1
 365     return Cat(row_v[:ROW_LINEBITS], row[ROW_LINEBITS:])
 366 #     -- Read the instruction word for the given address in the
 367 #     -- current cache row
 368 #     function read_insn_word(addr: std_ulogic_vector(63 downto 0);
 369 #                           data: cache_row_t) return std_ulogic_vector is
 370 #       variable word: integer range 0 to INSN_PER_ROW-1;
 371 #     begin
 372 #         word := to_integer(unsigned(addr(INSN_BITS+2-1 downto 2)));
 373 #       return data(31+word*32 downto word*32);
 374 #     end;
 375 # Read the instruction word for the given address
 376 # in the current cache row
 377 def read_insn_word(addr, data):
 378     word = addr[2:INSN_BITS+2]
 379     return data.word_select(word, 32)
 380
 381 #     -- Get the tag value from the address
 382 #     function get_tag(
 383 #      addr: std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0)
 384 #     )
 385 #      return cache_tag_t is
 386 #     begin
 387 #         return addr(REAL_ADDR_BITS - 1 downto SET_SIZE_BITS);
 388 #     end;
 389 # Get the tag value from the address
 390 def get_tag(addr):
 391     return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
 392
 393 #     -- Read a tag from a tag memory row
 394 #     function read_tag(way: way_t; tagset: cache_tags_set_t)
 395 #      return cache_tag_t is
 396 #     begin
 397 #       return tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS);
 398 #     end;
 399 # Read a tag from a tag memory row
 400 def read_tag(way, tagset):
 401     return tagset.word_select(way, TAG_BITS)
 402
 403 #     -- Write a tag to tag memory row
 404 #     procedure write_tag(way: in way_t;
 405 #      tagset: inout cache_tags_set_t; tag: cache_tag_t) is
 406 #     begin
 407 #       tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS) := tag;
 408 #     end;
 409 # Write a tag to tag memory row
 410 def write_tag(way, tagset, tag):
 411     return read_tag(way, tagset).eq(tag)
 412
 413 #     -- Simple hash for direct-mapped TLB index
 414 #     function hash_ea(addr: std_ulogic_vector(63 downto 0))
 415 #      return tlb_index_t is
 416 #         variable hash : std_ulogic_vector(TLB_BITS - 1 downto 0);
 417 #     begin
 418 #         hash := addr(TLB_LG_PGSZ + TLB_BITS - 1 downto TLB_LG_PGSZ)
 419 #                 xor addr(
 420 #                  TLB_LG_PGSZ + 2 * TLB_BITS - 1 downto
 421 #                  TLB_LG_PGSZ + TLB_BITS
 422 #                 )
 423 #                 xor addr(
 424 #                  TLB_LG_PGSZ + 3 * TLB_BITS - 1 downto
 425 #                  TLB_LG_PGSZ + 2 * TLB_BITS
 426 #                 );
 427 #         return to_integer(unsigned(hash));
 428 #     end;
 429 # Simple hash for direct-mapped TLB index
 430 def hash_ea(addr):
 431     hsh = addr[TLB_LG_PGSZ:TLB_LG_PGSZ + TLB_BITS] ^ addr[
 432            TLB_LG_PGSZ + TLB_BITS:TLB_LG_PGSZ + 2 * TLB_BITS
 433           ] ^ addr[
 434            TLB_LG_PGSZ + 2 * TLB_BITS:TLB_LG_PGSZ + 3 * TLB_BITS
 435           ]
 436     return hsh
 437
 438 # begin
 439 #
 440 # XXX put these assert statements in - as python asserts
 441 #
 442 #     assert LINE_SIZE mod ROW_SIZE = 0;
 443 #     assert ispow2(LINE_SIZE) report "LINE_SIZE not power of 2"
 444 #     assert ispow2(NUM_LINES) report "NUM_LINES not power of 2"
 445 #     assert ispow2(ROW_PER_LINE) report "ROW_PER_LINE not power of 2"
 446 #     assert ispow2(INSN_PER_ROW) report "INSN_PER_ROW not power of 2"
 447 #     assert (ROW_BITS = INDEX_BITS + ROW_LINEBITS)
 448 #       report "geometry bits don't add up"
 449 #     assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS)
 450 #       report "geometry bits don't add up"
 451 #     assert (REAL_ADDR_BITS = TAG_BITS + INDEX_BITS + LINE_OFF_BITS)
 452 #       report "geometry bits don't add up"
 453 #     assert (REAL_ADDR_BITS = TAG_BITS + ROW_BITS + ROW_OFF_BITS)
 454 #       report "geometry bits don't add up"
 455 #
 456 #     sim_debug: if SIM generate
 457 #     debug: process
 458 #     begin
 459 #       report "ROW_SIZE      = " & natural'image(ROW_SIZE);
 460 #       report "ROW_PER_LINE  = " & natural'image(ROW_PER_LINE);
 461 #       report "BRAM_ROWS     = " & natural'image(BRAM_ROWS);
 462 #       report "INSN_PER_ROW  = " & natural'image(INSN_PER_ROW);
 463 #       report "INSN_BITS     = " & natural'image(INSN_BITS);
 464 #       report "ROW_BITS      = " & natural'image(ROW_BITS);
 465 #       report "ROW_LINEBITS  = " & natural'image(ROW_LINEBITS);
 466 #       report "LINE_OFF_BITS = " & natural'image(LINE_OFF_BITS);
 467 #       report "ROW_OFF_BITS  = " & natural'image(ROW_OFF_BITS);
 468 #       report "INDEX_BITS    = " & natural'image(INDEX_BITS);
 469 #       report "TAG_BITS      = " & natural'image(TAG_BITS);
 470 #       report "WAY_BITS      = " & natural'image(WAY_BITS);
 471 #       wait;
 472 #     end process;
 473 #     end generate;
 474
 475 # Cache reload state machine
 476 @unique
 477 class State(Enum):
 478     IDLE     = 0
 479     CLR_TAG  = 1
 480     WAIT_ACK = 2
 481
 482
 483 class RegInternal(RecordObject):
 484     def __init__(self):
 485         super().__init__()
 486         # Cache hit state (Latches for 1 cycle BRAM access)
 487         self.hit_way      = Signal(NUM_WAYS)
 488         self.hit_nia      = Signal(64)
 489         self.hit_smark    = Signal()
 490         self.hit_valid    = Signal()
 491
 492         # Cache miss state (reload state machine)
 493         self.state        = Signal(State, reset=State.IDLE)
 494         self.wb           = WBMasterOut("wb")
 495         self.req_adr      = Signal(64)
 496         self.store_way    = Signal(NUM_WAYS)
 497         self.store_index  = Signal(NUM_LINES)
 498         self.store_row    = Signal(BRAM_ROWS)
 499         self.store_tag    = Signal(TAG_BITS)
 500         self.store_valid  = Signal()
 501         self.end_row_ix   = Signal(ROW_LINEBITS)
 502         self.rows_valid   = RowPerLineValidArray()
 503
 504         # TLB miss state
 505         self.fetch_failed = Signal()
 506
 507 # -- 64 bit direct mapped icache. All instructions are 4B aligned.
 508 #
 509 # entity icache is
 510 #     generic (
 511 #         SIM : boolean := false;
 512 #         -- Line size in bytes
 513 #         LINE_SIZE : positive := 64;
 514 #         -- BRAM organisation: We never access more
 515 #         -- than wishbone_data_bits
 516 #         -- at a time so to save resources we make the
 517 #         -- array only that wide,
 518 #         -- and use consecutive indices for to make a cache "line"
 519 #         --
 520 #         -- ROW_SIZE is the width in bytes of the BRAM (based on WB,
 521 #         -- so 64-bits)
 522 #         ROW_SIZE  : positive := wishbone_data_bits / 8;
 523 #         -- Number of lines in a set
 524 #         NUM_LINES : positive := 32;
 525 #         -- Number of ways
 526 #         NUM_WAYS  : positive := 4;
 527 #         -- L1 ITLB number of entries (direct mapped)
 528 #         TLB_SIZE : positive := 64;
 529 #         -- L1 ITLB log_2(page_size)
 530 #         TLB_LG_PGSZ : positive := 12;
 531 #         -- Number of real address bits that we store
 532 #         REAL_ADDR_BITS : positive := 56;
 533 #         -- Non-zero to enable log data collection
 534 #         LOG_LENGTH : natural := 0
 535 #         );
 536 #     port (
 537 #         clk          : in std_ulogic;
 538 #         rst          : in std_ulogic;
 539 #
 540 #         i_in         : in Fetch1ToIcacheType;
 541 #         i_out        : out IcacheToDecode1Type;
 542 #
 543 #         m_in         : in MmuToIcacheType;
 544 #
 545 #         stall_in     : in std_ulogic;
 546 #       stall_out    : out std_ulogic;
 547 #       flush_in     : in std_ulogic;
 548 #       inval_in     : in std_ulogic;
 549 #
 550 #         wishbone_out : out wishbone_master_out;
 551 #         wishbone_in  : in wishbone_slave_out;
 552 #
 553 #         log_out      : out std_ulogic_vector(53 downto 0)
 554 #         );
 555 # end entity icache;
 556 # 64 bit direct mapped icache. All instructions are 4B aligned.
 557 class ICache(Elaboratable):
 558     """64 bit direct mapped icache. All instructions are 4B aligned."""
 559     def __init__(self):
 560         self.i_in           = Fetch1ToICacheType(name="i_in")
 561         self.i_out          = ICacheToDecode1Type(name="i_out")
 562
 563         self.m_in           = MMUToICacheType(name="m_in")
 564
 565         self.stall_in       = Signal()
 566         self.stall_out      = Signal()
 567         self.flush_in       = Signal()
 568         self.inval_in       = Signal()
 569
 570         self.wb_out         = WBMasterOut(name="wb_out")
 571         self.wb_in          = WBSlaveOut(name="wb_in")
 572
 573         self.log_out        = Signal(54)
 574
 575
 576     # Generate a cache RAM for each way
 577     def rams(self, m, r, cache_out_row, use_previous, replace_way, req_row):
 578         comb = m.d.comb
 579
 580         wb_in, stall_in = self.wb_in, self.stall_in
 581
 582
 583         for i in range(NUM_WAYS):
 584             do_read  = Signal(name="do_rd_%d" % i)
 585             do_write = Signal(name="do_wr_%d" % i)
 586             rd_addr  = Signal(ROW_BITS)
 587             wr_addr  = Signal(ROW_BITS)
 588             d_out    = Signal(ROW_SIZE_BITS, name="d_out_%d" % i)
 589             wr_sel   = Signal(ROW_SIZE)
 590
 591             way = CacheRam(ROW_BITS, ROW_SIZE_BITS, True)
 592             setattr(m.submodules, "cacheram_%d" % i, way)
 593
 594             comb += way.rd_en.eq(do_read)
 595             comb += way.rd_addr.eq(rd_addr)
 596             comb += d_out.eq(way.rd_data_o)
 597             comb += way.wr_sel.eq(wr_sel)
 598             comb += way.wr_addr.eq(wr_addr)
 599             comb += way.wr_data.eq(wb_in.dat)
 600
 601             comb += do_read.eq(~(stall_in | use_previous))
 602             comb += do_write.eq(wb_in.ack & (replace_way == i))
 603
 604             with m.If(r.hit_way == i):
 605                 comb += cache_out_row.eq(d_out)
 606             comb += rd_addr.eq(req_row)
 607             comb += wr_addr.eq(r.store_row)
 608             comb += wr_sel.eq(Repl(do_write, ROW_SIZE))
 609
 610     #     -- Generate PLRUs
 611     def maybe_plrus(self, m, r, plru_victim):
 612         comb = m.d.comb
 613
 614         with m.If(NUM_WAYS > 1):
 615             for i in range(NUM_LINES):
 616                 plru_acc_i  = Signal(WAY_BITS)
 617                 plru_acc_en = Signal()
 618                 plru        = PLRU(WAY_BITS)
 619                 setattr(m.submodules, "plru_%d" % i, plru)
 620
 621                 comb += plru.acc_i.eq(plru_acc_i)
 622                 comb += plru.acc_en.eq(plru_acc_en)
 623
 624                 # PLRU interface
 625                 with m.If(get_index(r.hit_nia) == i):
 626                     comb += plru.acc_en.eq(r.hit_valid)
 627
 628                 comb += plru.acc_i.eq(r.hit_way)
 629                 comb += plru_victim[i].eq(plru.lru_o)
 630
 631     # TLB hit detection and real address generation
 632     def itlb_lookup(self, m, tlb_req_index, itlb_ptes, itlb_tags,
 633                     real_addr, itlb_valid_bits, ra_valid, eaa_priv,
 634                     priv_fault, access_ok):
 635         comb = m.d.comb
 636
 637         i_in = self.i_in
 638
 639         pte  = Signal(TLB_PTE_BITS)
 640         ttag = Signal(TLB_EA_TAG_BITS)
 641
 642         comb += tlb_req_index.eq(hash_ea(i_in.nia))
 643         comb += pte.eq(itlb_ptes[tlb_req_index])
 644         comb += ttag.eq(itlb_tags[tlb_req_index])
 645
 646         with m.If(i_in.virt_mode):
 647             comb += real_addr.eq(Cat(
 648                      i_in.nia[:TLB_LG_PGSZ],
 649                      pte[TLB_LG_PGSZ:REAL_ADDR_BITS]
 650                     ))
 651
 652             with m.If(ttag == i_in.nia[TLB_LG_PGSZ + TLB_BITS:64]):
 653                 comb += ra_valid.eq(itlb_valid_bits[tlb_req_index])
 654
 655             comb += eaa_priv.eq(pte[3])
 656
 657         with m.Else():
 658             comb += real_addr.eq(i_in.nia[:REAL_ADDR_BITS])
 659             comb += ra_valid.eq(1)
 660             comb += eaa_priv.eq(1)
 661
 662         # No IAMR, so no KUEP support for now
 663         comb += priv_fault.eq(eaa_priv & ~i_in.priv_mode)
 664         comb += access_ok.eq(ra_valid & ~priv_fault)
 665
 666     # iTLB update
 667     def itlb_update(self, m, itlb_valid_bits, itlb_tags, itlb_ptes):
 668         comb = m.d.comb
 669         sync = m.d.sync
 670
 671         m_in = self.m_in
 672
 673         wr_index = Signal(TLB_SIZE)
 674         sync += wr_index.eq(hash_ea(m_in.addr))
 675
 676         with m.If(m_in.tlbie & m_in.doall):
 677             # Clear all valid bits
 678             for i in range(TLB_SIZE):
 679                 sync += itlb_valid_bits[i].eq(0)
 680
 681         with m.Elif(m_in.tlbie):
 682             # Clear entry regardless of hit or miss
 683             sync += itlb_valid_bits[wr_index].eq(0)
 684
 685         with m.Elif(m_in.tlbld):
 686             sync += itlb_tags[wr_index].eq(
 687                      m_in.addr[TLB_LG_PGSZ + TLB_BITS:64]
 688                     )
 689             sync += itlb_ptes[wr_index].eq(m_in.pte)
 690             sync += itlb_valid_bits[wr_index].eq(1)
 691
 692 #     -- Cache hit detection, output to fetch2 and other misc logic
 693 #     icache_comb : process(all)
 694     # Cache hit detection, output to fetch2 and other misc logic
 695     def icache_comb(self, m, use_previous, r, req_index, req_row,
 696                     req_tag, real_addr, req_laddr, cache_valid_bits,
 697                     cache_tags, access_ok, req_is_hit,
 698                     req_is_miss, replace_way, plru_victim, cache_out_row):
 699 #       variable is_hit  : std_ulogic;
 700 #       variable hit_way : way_t;
 701         comb = m.d.comb
 702
 703         #comb += Display("ENTER icache_comb - use_previous:%x req_index:%x " \
 704         #                "req_row:%x req_tag:%x real_addr:%x req_laddr:%x " \
 705         #                "access_ok:%x req_is_hit:%x req_is_miss:%x " \
 706         #                "replace_way:%x", use_previous, req_index, req_row, \
 707         #                req_tag, real_addr, req_laddr, access_ok, \
 708         #                req_is_hit, req_is_miss, replace_way)
 709
 710         i_in, i_out, wb_out = self.i_in, self.i_out, self.wb_out
 711         flush_in, stall_out = self.flush_in, self.stall_out
 712
 713         is_hit  = Signal()
 714         hit_way = Signal(NUM_WAYS)
 715 #     begin
 716 #         -- i_in.sequential means that i_in.nia this cycle
 717 #         -- is 4 more than last cycle.  If we read more
 718 #         -- than 32 bits at a time, had a cache hit last
 719 #         -- cycle, and we don't want the first 32-bit chunk
 720 #         -- then we can keep the data we read last cycle
 721 #         -- and just use that.
 722 #         if unsigned(i_in.nia(INSN_BITS+2-1 downto 2)) /= 0 then
 723 #             use_previous <= i_in.sequential and r.hit_valid;
 724 #         else
 725 #             use_previous <= '0';
 726 #         end if;
 727         # i_in.sequential means that i_in.nia this cycle is 4 more than
 728         # last cycle.  If we read more than 32 bits at a time, had a
 729         # cache hit last cycle, and we don't want the first 32-bit chunk
 730         # then we can keep the data we read last cycle and just use that.
 731         with m.If(i_in.nia[2:INSN_BITS+2] != 0):
 732             comb += use_previous.eq(i_in.sequential & r.hit_valid)
 733
 734 #       -- Extract line, row and tag from request
 735 #         req_index <= get_index(i_in.nia);
 736 #         req_row <= get_row(i_in.nia);
 737 #         req_tag <= get_tag(real_addr);
 738         # Extract line, row and tag from request
 739         comb += req_index.eq(get_index(i_in.nia))
 740         comb += req_row.eq(get_row(i_in.nia))
 741         comb += req_tag.eq(get_tag(real_addr))
 742
 743 #       -- Calculate address of beginning of cache row, will be
 744 #       -- used for cache miss processing if needed
 745 #       req_laddr <=
 746 #        (63 downto REAL_ADDR_BITS => '0') &
 747 #        real_addr(REAL_ADDR_BITS - 1 downto ROW_OFF_BITS) &
 748 #        (ROW_OFF_BITS-1 downto 0 => '0');
 749         # Calculate address of beginning of cache row, will be
 750         # used for cache miss processing if needed
 751         comb += req_laddr.eq(Cat(
 752                  Const(0b0, ROW_OFF_BITS),
 753                  real_addr[ROW_OFF_BITS:REAL_ADDR_BITS],
 754                  Const(0b0, 8)
 755                 ))
 756
 757 #       -- Test if pending request is a hit on any way
 758 #       hit_way := 0;
 759 #       is_hit := '0';
 760 #       for i in way_t loop
 761 #           if i_in.req = '1' and
 762 #                 (cache_valids(req_index)(i) = '1' or
 763 #                  (r.state = WAIT_ACK and
 764 #                   req_index = r.store_index and
 765 #                   i = r.store_way and
 766 #                   r.rows_valid(req_row mod ROW_PER_LINE) = '1')) then
 767 #               if read_tag(i, cache_tags(req_index)) = req_tag then
 768 #                   hit_way := i;
 769 #                   is_hit := '1';
 770 #               end if;
 771 #           end if;
 772 #       end loop;
 773         # Test if pending request is a hit on any way
 774         hitcond = Signal()
 775         comb += hitcond.eq((r.state == State.WAIT_ACK)
 776                     & (req_index == r.store_index)
 777                     & r.rows_valid[req_row % ROW_PER_LINE])
 778         with m.If(i_in.req):
 779             cvb = Signal(NUM_WAYS)
 780             ctag = Signal(TAG_RAM_WIDTH)
 781             comb += ctag.eq(cache_tags[req_index])
 782             comb += cvb.eq(cache_valid_bits[req_index])
 783             for i in range(NUM_WAYS):
 784                 tagi = Signal(TAG_BITS, name="ti%d" % i)
 785                 comb += tagi.eq(read_tag(i, ctag))
 786                 hit_test = Signal(name="hit_test%d" % i)
 787                 comb += hit_test.eq(i == r.store_way)
 788                 with m.If((cvb[i] | (hitcond & hit_test)) & (tagi == req_tag)):
 789                     comb += hit_way.eq(i)
 790                     comb += is_hit.eq(1)
 791
 792 #       -- Generate the "hit" and "miss" signals
 793 #       -- for the synchronous blocks
 794 #       if i_in.req = '1' and access_ok = '1' and flush_in = '0'
 795 #        and rst = '0' then
 796 #           req_is_hit  <= is_hit;
 797 #           req_is_miss <= not is_hit;
 798 #       else
 799 #           req_is_hit  <= '0';
 800 #           req_is_miss <= '0';
 801 #       end if;
 802 #       req_hit_way <= hit_way;
 803         # Generate the "hit" and "miss" signals
 804         # for the synchronous blocks
 805         with m.If(i_in.req & access_ok & ~flush_in):
 806             comb += req_is_hit.eq(is_hit)
 807             comb += req_is_miss.eq(~is_hit)
 808
 809         with m.Else():
 810             comb += req_is_hit.eq(0)
 811             comb += req_is_miss.eq(0)
 812
 813 #       -- The way to replace on a miss
 814 #       if r.state = CLR_TAG then
 815 #           replace_way <=
 816 #            to_integer(unsigned(plru_victim(r.store_index)));
 817 #       else
 818 #           replace_way <= r.store_way;
 819 #       end if;
 820         # The way to replace on a miss
 821         with m.If(r.state == State.CLR_TAG):
 822             comb += replace_way.eq(plru_victim[r.store_index])
 823
 824         with m.Else():
 825             comb += replace_way.eq(r.store_way)
 826
 827 #       -- Output instruction from current cache row
 828 #       --
 829 #       -- Note: This is a mild violation of our design principle of
 830 #       -- having pipeline stages output from a clean latch. In this
 831 #       -- case we output the result of a mux. The alternative would
 832 #       -- be output an entire row which I prefer not to do just yet
 833 #       -- as it would force fetch2 to know about some of the cache
 834 #       -- geometry information.
 835 #       i_out.insn <= read_insn_word(r.hit_nia, cache_out(r.hit_way));
 836 #       i_out.valid <= r.hit_valid;
 837 #       i_out.nia <= r.hit_nia;
 838 #       i_out.stop_mark <= r.hit_smark;
 839 #       i_out.fetch_failed <= r.fetch_failed;
 840         # Output instruction from current cache row
 841         #
 842         # Note: This is a mild violation of our design principle of
 843         # having pipeline stages output from a clean latch. In this
 844         # case we output the result of a mux. The alternative would
 845         # be output an entire row which I prefer not to do just yet
 846         # as it would force fetch2 to know about some of the cache
 847         # geometry information.
 848         #comb += Display("BEFORE read_insn_word - r.hit_nia:%x " \
 849         #                "r.hit_way:%x, cache_out[r.hit_way]:%x", r.hit_nia, \
 850         #                r.hit_way, cache_out[r.hit_way])
 851         comb += i_out.insn.eq(read_insn_word(r.hit_nia, cache_out_row))
 852         comb += i_out.valid.eq(r.hit_valid)
 853         comb += i_out.nia.eq(r.hit_nia)
 854         comb += i_out.stop_mark.eq(r.hit_smark)
 855         comb += i_out.fetch_failed.eq(r.fetch_failed)
 856
 857 #       -- Stall fetch1 if we have a miss on cache or TLB
 858 #       -- or a protection fault
 859 #       stall_out <= not (is_hit and access_ok);
 860         # Stall fetch1 if we have a miss on cache or TLB
 861         # or a protection fault
 862         comb += stall_out.eq(~(is_hit & access_ok))
 863
 864 #       -- Wishbone requests output (from the cache miss reload machine)
 865 #       wishbone_out <= r.wb;
 866         # Wishbone requests output (from the cache miss reload machine)
 867         comb += wb_out.eq(r.wb)
 868 #     end process;
 869
 870 #     -- Cache hit synchronous machine
 871 #     icache_hit : process(clk)
 872     # Cache hit synchronous machine
 873     def icache_hit(self, m, use_previous, r, req_is_hit, req_hit_way,
 874                    req_index, req_tag, real_addr):
 875         sync = m.d.sync
 876
 877         i_in, stall_in = self.i_in, self.stall_in
 878         flush_in       = self.flush_in
 879
 880 #     begin
 881 #         if rising_edge(clk) then
 882 #             -- keep outputs to fetch2 unchanged on a stall
 883 #             -- except that flush or reset sets valid to 0
 884 #             -- If use_previous, keep the same data as last
 885 #             -- cycle and use the second half
 886 #             if stall_in = '1' or use_previous = '1' then
 887 #                 if rst = '1' or flush_in = '1' then
 888 #                     r.hit_valid <= '0';
 889 #             end if;
 890         # keep outputs to fetch2 unchanged on a stall
 891         # except that flush or reset sets valid to 0
 892         # If use_previous, keep the same data as last
 893         # cycle and use the second half
 894         with m.If(stall_in | use_previous):
 895             with m.If(flush_in):
 896                 sync += r.hit_valid.eq(0)
 897 #             else
 898 #                 -- On a hit, latch the request for the next cycle,
 899 #                 -- when the BRAM data will be available on the
 900 #                 -- cache_out output of the corresponding way
 901 #                 r.hit_valid <= req_is_hit;
 902 #                 if req_is_hit = '1' then
 903 #                     r.hit_way <= req_hit_way;
 904         with m.Else():
 905             # On a hit, latch the request for the next cycle,
 906             # when the BRAM data will be available on the
 907             # cache_out output of the corresponding way
 908             sync += r.hit_valid.eq(req_is_hit)
 909
 910             with m.If(req_is_hit):
 911                 sync += r.hit_way.eq(req_hit_way)
 912
 913 #                     report "cache hit nia:" & to_hstring(i_in.nia) &
 914 #                         " IR:" & std_ulogic'image(i_in.virt_mode) &
 915 #                         " SM:" & std_ulogic'image(i_in.stop_mark) &
 916 #                         " idx:" & integer'image(req_index) &
 917 #                         " tag:" & to_hstring(req_tag) &
 918 #                         " way:" & integer'image(req_hit_way) &
 919 #                         " RA:" & to_hstring(real_addr);
 920                 sync += Display("cache hit nia:%x IR:%x SM:%x idx:%x " \
 921                                 "tag:%x way:%x RA:%x", i_in.nia, \
 922                                 i_in.virt_mode, i_in.stop_mark, req_index, \
 923                                 req_tag, req_hit_way, real_addr)
 924
 925
 926
 927 #                 end if;
 928 #           end if;
 929 #             if stall_in = '0' then
 930 #                 -- Send stop marks and NIA down regardless of validity
 931 #                 r.hit_smark <= i_in.stop_mark;
 932 #                 r.hit_nia <= i_in.nia;
 933 #             end if;
 934         with m.If(~stall_in):
 935             # Send stop marks and NIA down regardless of validity
 936             sync += r.hit_smark.eq(i_in.stop_mark)
 937             sync += r.hit_nia.eq(i_in.nia)
 938 #       end if;
 939 #     end process;
 940
 941 #     -- Cache miss/reload synchronous machine
 942 #     icache_miss : process(clk)
 943     # Cache miss/reload synchronous machine
 944     def icache_miss(self, m, cache_valid_bits, r, req_is_miss,
 945                     req_index, req_laddr, req_tag, replace_way,
 946                     cache_tags, access_ok, real_addr):
 947         comb = m.d.comb
 948         sync = m.d.sync
 949
 950         i_in, wb_in, m_in  = self.i_in, self.wb_in, self.m_in
 951         stall_in, flush_in = self.stall_in, self.flush_in
 952         inval_in           = self.inval_in
 953
 954 #       variable tagset    : cache_tags_set_t;
 955 #       variable stbs_done : boolean;
 956
 957         tagset    = Signal(TAG_RAM_WIDTH)
 958         stbs_done = Signal()
 959
 960 #     begin
 961 #         if rising_edge(clk) then
 962 #           -- On reset, clear all valid bits to force misses
 963 #             if rst = '1' then
 964         # On reset, clear all valid bits to force misses
 965 #               for i in index_t loop
 966 #                   cache_valids(i) <= (others => '0');
 967 #               end loop;
 968 #                 r.state <= IDLE;
 969 #                 r.wb.cyc <= '0';
 970 #                 r.wb.stb <= '0';
 971 #               -- We only ever do reads on wishbone
 972 #               r.wb.dat <= (others => '0');
 973 #               r.wb.sel <= "11111111";
 974 #               r.wb.we  <= '0';
 975
 976 #               -- Not useful normally but helps avoiding
 977 #               -- tons of sim warnings
 978 #               r.wb.adr <= (others => '0');
 979
 980 #             else
 981
 982 #                 -- Process cache invalidations
 983 #                 if inval_in = '1' then
 984 #                     for i in index_t loop
 985 #                         cache_valids(i) <= (others => '0');
 986 #                     end loop;
 987 #                     r.store_valid <= '0';
 988 #                 end if;
 989         comb += r.wb.sel.eq(-1)
 990         comb += r.wb.adr.eq(r.req_adr[3:])
 991
 992         # Process cache invalidations
 993         with m.If(inval_in):
 994             for i in range(NUM_LINES):
 995                 sync += cache_valid_bits[i].eq(0)
 996             sync += r.store_valid.eq(0)
 997
 998 #               -- Main state machine
 999 #               case r.state is
1000         # Main state machine
1001         with m.Switch(r.state):
1002
1003 #           when IDLE =>
1004             with m.Case(State.IDLE):
1005 #                 -- Reset per-row valid flags,
1006 #                 -- only used in WAIT_ACK
1007 #                 for i in 0 to ROW_PER_LINE - 1 loop
1008 #                     r.rows_valid(i) <= '0';
1009 #                 end loop;
1010                 # Reset per-row valid flags,
1011                 # only used in WAIT_ACK
1012                 for i in range(ROW_PER_LINE):
1013                     sync += r.rows_valid[i].eq(0)
1014
1015 #               -- We need to read a cache line
1016 #               if req_is_miss = '1' then
1017 #               report "cache miss nia:" & to_hstring(i_in.nia) &
1018 #                         " IR:" & std_ulogic'image(i_in.virt_mode) &
1019 #                   " SM:" & std_ulogic'image(i_in.stop_mark) &
1020 #                   " idx:" & integer'image(req_index) &
1021 #                   " way:" & integer'image(replace_way) &
1022 #                   " tag:" & to_hstring(req_tag) &
1023 #                         " RA:" & to_hstring(real_addr);
1024                 # We need to read a cache line
1025                 with m.If(req_is_miss):
1026                     sync += Display(
1027                              "cache miss nia:%x IR:%x SM:%x idx:%x " \
1028                              " way:%x tag:%x RA:%x", i_in.nia, \
1029                              i_in.virt_mode, i_in.stop_mark, req_index, \
1030                              replace_way, req_tag, real_addr)
1031
1032 #               -- Keep track of our index and way for
1033 #                   -- subsequent stores
1034 #               r.store_index <= req_index;
1035 #               r.store_row <= get_row(req_laddr);
1036 #                   r.store_tag <= req_tag;
1037 #                   r.store_valid <= '1';
1038 #                   r.end_row_ix <=
1039 #                    get_row_of_line(get_row(req_laddr)) - 1;
1040                     # Keep track of our index and way
1041                     # for subsequent stores
1042                     sync += r.store_index.eq(req_index)
1043                     sync += r.store_row.eq(get_row(req_laddr))
1044                     sync += r.store_tag.eq(req_tag)
1045                     sync += r.store_valid.eq(1)
1046                     sync += r.end_row_ix.eq(
1047                              get_row_of_line(
1048                               get_row(req_laddr)
1049                              ) - 1
1050                             )
1051
1052 #               -- Prep for first wishbone read. We calculate the
1053 #                   -- address of the start of the cache line and
1054 #                   -- start the WB cycle.
1055 #               r.wb.adr <= req_laddr(r.wb.adr'left downto 0);
1056 #               r.wb.cyc <= '1';
1057 #               r.wb.stb <= '1';
1058                     # Prep for first wishbone read.
1059                     # We calculate the
1060                     # address of the start of the cache line and
1061                     # start the WB cycle.
1062                     sync += r.req_adr.eq(req_laddr)
1063                     sync += r.wb.cyc.eq(1)
1064                     sync += r.wb.stb.eq(1)
1065
1066 #               -- Track that we had one request sent
1067 #               r.state <= CLR_TAG;
1068                     # Track that we had one request sent
1069                     sync += r.state.eq(State.CLR_TAG)
1070 #               end if;
1071
1072 #           when CLR_TAG | WAIT_ACK =>
1073             with m.Case(State.CLR_TAG, State.WAIT_ACK):
1074 #                 if r.state = CLR_TAG then
1075                 with m.If(r.state == State.CLR_TAG):
1076 #                     -- Get victim way from plru
1077 #               r.store_way <= replace_way;
1078                     # Get victim way from plru
1079                     sync += r.store_way.eq(replace_way)
1080 #
1081 #               -- Force misses on that way while
1082 #                   -- reloading that line
1083 #               cache_valids(req_index)(replace_way) <= '0';
1084                     # Force misses on that way while
1085                     # realoading that line
1086                     cv = Signal(INDEX_BITS)
1087                     comb += cv.eq(cache_valid_bits[req_index])
1088                     comb += cv.bit_select(replace_way, 1).eq(0)
1089                     sync += cache_valid_bits[req_index].eq(cv)
1090
1091 #               -- Store new tag in selected way
1092 #               for i in 0 to NUM_WAYS-1 loop
1093 #                   if i = replace_way then
1094 #                       tagset := cache_tags(r.store_index);
1095 #                       write_tag(i, tagset, r.store_tag);
1096 #                       cache_tags(r.store_index) <= tagset;
1097 #                   end if;
1098 #               end loop;
1099                     for i in range(NUM_WAYS):
1100                         with m.If(i == replace_way):
1101                             comb += tagset.eq(cache_tags[r.store_index])
1102                             comb += write_tag(i, tagset, r.store_tag)
1103                             sync += cache_tags[r.store_index].eq(tagset)
1104
1105 #                     r.state <= WAIT_ACK;
1106                     sync += r.state.eq(State.WAIT_ACK)
1107 #                 end if;
1108
1109 #               -- Requests are all sent if stb is 0
1110 #               stbs_done := r.wb.stb = '0';
1111                 # Requests are all sent if stb is 0
1112                 stbs_zero = Signal()
1113                 comb += stbs_zero.eq(r.wb.stb == 0)
1114                 comb += stbs_done.eq(stbs_zero)
1115
1116 #               -- If we are still sending requests,
1117 #               -- was one accepted ?
1118 #               if wishbone_in.stall = '0' and not stbs_done then
1119                 # If we are still sending requests,
1120                 # was one accepted?
1121                 with m.If(~wb_in.stall & ~stbs_zero):
1122 #               -- That was the last word ? We are done sending.
1123 #                   -- Clear stb and set stbs_done so we can handle
1124 #                   -- an eventual last ack on the same cycle.
1125 #               if is_last_row_addr(r.wb.adr, r.end_row_ix) then
1126 #                   r.wb.stb <= '0';
1127 #                   stbs_done := true;
1128 #               end if;
1129                     # That was the last word ?
1130                     # We are done sending.
1131                     # Clear stb and set stbs_done
1132                     # so we can handle
1133                     # an eventual last ack on
1134                     # the same cycle.
1135                     with m.If(is_last_row_addr(r.req_adr, r.end_row_ix)):
1136                         sync += Display("IS_LAST_ROW_ADDR " \
1137                                         "r.wb.addr:%x r.end_row_ix:%x " \
1138                                         "r.wb.stb:%x stbs_zero:%x " \
1139                                         "stbs_done:%x", r.wb.adr, \
1140                                         r.end_row_ix, r.wb.stb, \
1141                                         stbs_zero, stbs_done)
1142                         sync += r.wb.stb.eq(0)
1143                         comb += stbs_done.eq(1)
1144
1145 #               -- Calculate the next row address
1146 #               r.wb.adr <= next_row_addr(r.wb.adr);
1147                     # Calculate the next row address
1148                     rarange = Signal(LINE_OFF_BITS - ROW_OFF_BITS)
1149                     comb += rarange.eq(
1150                              r.req_adr[ROW_OFF_BITS:LINE_OFF_BITS] + 1
1151                             )
1152                     sync += r.req_adr[ROW_OFF_BITS:LINE_OFF_BITS].eq(
1153                              rarange
1154                             )
1155                     sync += Display("RARANGE r.wb.adr:%x stbs_zero:%x " \
1156                                     "stbs_done:%x", rarange, stbs_zero, \
1157                                     stbs_done)
1158 #               end if;
1159
1160 #               -- Incoming acks processing
1161 #               if wishbone_in.ack = '1' then
1162                 # Incoming acks processing
1163                 with m.If(wb_in.ack):
1164 #                     r.rows_valid(r.store_row mod ROW_PER_LINE)
1165 #                      <= '1';
1166                     sync += Display("WB_IN_ACK stbs_zero:%x " \
1167                                     "stbs_done:%x", \
1168                                     stbs_zero, stbs_done)
1169
1170                     sync += r.rows_valid[r.store_row % ROW_PER_LINE].eq(1)
1171
1172 #               -- Check for completion
1173 #               if stbs_done and
1174 #                    is_last_row(r.store_row, r.end_row_ix) then
1175                     # Check for completion
1176                     with m.If(stbs_done &
1177                               is_last_row(r.store_row, r.end_row_ix)):
1178 #                   -- Complete wishbone cycle
1179 #                   r.wb.cyc <= '0';
1180                         # Complete wishbone cycle
1181                         sync += r.wb.cyc.eq(0)
1182
1183 #                   -- Cache line is now valid
1184 #                   cache_valids(r.store_index)(replace_way) <=
1185 #                        r.store_valid and not inval_in;
1186                         # Cache line is now valid
1187                         cv = Signal(INDEX_BITS)
1188                         comb += cv.eq(cache_valid_bits[r.store_index])
1189                         comb += cv.bit_select(replace_way, 1).eq(
1190                                  r.store_valid & ~inval_in
1191                                 )
1192                         sync += cache_valid_bits[r.store_index].eq(cv)
1193
1194 #                   -- We are done
1195 #                   r.state <= IDLE;
1196                         # We are done
1197                         sync += r.state.eq(State.IDLE)
1198 #               end if;
1199
1200 #               -- Increment store row counter
1201 #               r.store_row <= next_row(r.store_row);
1202                     # Increment store row counter
1203                     sync += r.store_row.eq(next_row(r.store_row))
1204 #               end if;
1205 #           end case;
1206 #       end if;
1207 #
1208 #             -- TLB miss and protection fault processing
1209 #             if rst = '1' or flush_in = '1' or m_in.tlbld = '1' then
1210 #                 r.fetch_failed <= '0';
1211 #             elsif i_in.req = '1' and access_ok = '0' and
1212 #              stall_in = '0' then
1213 #                 r.fetch_failed <= '1';
1214 #             end if;
1215         # TLB miss and protection fault processing
1216         with m.If(flush_in | m_in.tlbld):
1217             sync += r.fetch_failed.eq(0)
1218
1219         with m.Elif(i_in.req & ~access_ok & ~stall_in):
1220             sync += r.fetch_failed.eq(1)
1221 #       end if;
1222 #     end process;
1223
1224 #     icache_log: if LOG_LENGTH > 0 generate
1225     def icache_log(self, m, req_hit_way, ra_valid, access_ok,
1226                    req_is_miss, req_is_hit, lway, wstate, r):
1227         comb = m.d.comb
1228         sync = m.d.sync
1229
1230         wb_in, i_out       = self.wb_in, self.i_out
1231         log_out, stall_out = self.log_out, self.stall_out
1232
1233 #         -- Output data to logger
1234 #         signal log_data    : std_ulogic_vector(53 downto 0);
1235 #     begin
1236 #         data_log: process(clk)
1237 #             variable lway: way_t;
1238 #             variable wstate: std_ulogic;
1239         # Output data to logger
1240         for i in range(LOG_LENGTH):
1241             # Output data to logger
1242             log_data = Signal(54)
1243             lway     = Signal(NUM_WAYS)
1244             wstate   = Signal()
1245
1246 #         begin
1247 #             if rising_edge(clk) then
1248 #                 lway := req_hit_way;
1249 #                 wstate := '0';
1250             sync += lway.eq(req_hit_way)
1251             sync += wstate.eq(0)
1252
1253 #                 if r.state /= IDLE then
1254 #                     wstate := '1';
1255 #                 end if;
1256             with m.If(r.state != State.IDLE):
1257                 sync += wstate.eq(1)
1258
1259 #                 log_data <= i_out.valid &
1260 #                             i_out.insn &
1261 #                             wishbone_in.ack &
1262 #                             r.wb.adr(5 downto 3) &
1263 #                             r.wb.stb & r.wb.cyc &
1264 #                             wishbone_in.stall &
1265 #                             stall_out &
1266 #                             r.fetch_failed &
1267 #                             r.hit_nia(5 downto 2) &
1268 #                             wstate &
1269 #                             std_ulogic_vector(to_unsigned(lway, 3)) &
1270 #                             req_is_hit & req_is_miss &
1271 #                             access_ok &
1272 #                             ra_valid;
1273             sync += log_data.eq(Cat(
1274                      ra_valid, access_ok, req_is_miss, req_is_hit,
1275                      lway, wstate, r.hit_nia[2:6],
1276                      r.fetch_failed, stall_out, wb_in.stall, r.wb.cyc,
1277                      r.wb.stb, r.wb.adr[3:6], wb_in.ack, i_out.insn,
1278                      i_out.valid
1279                     ))
1280 #             end if;
1281 #         end process;
1282 #         log_out <= log_data;
1283             comb += log_out.eq(log_data)
1284 #     end generate;
1285 # end;
1286
1287     def elaborate(self, platform):
1288
1289         m                = Module()
1290         comb             = m.d.comb
1291
1292         # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
1293         cache_tags       = CacheTagArray()
1294         cache_valid_bits = CacheValidBitsArray()
1295
1296 #     signal itlb_valids : tlb_valids_t;
1297 #     signal itlb_tags : tlb_tags_t;
1298 #     signal itlb_ptes : tlb_ptes_t;
1299 #     attribute ram_style of itlb_tags : signal is "distributed";
1300 #     attribute ram_style of itlb_ptes : signal is "distributed";
1301         itlb_valid_bits  = TLBValidBitsArray()
1302         itlb_tags        = TLBTagArray()
1303         itlb_ptes        = TLBPtesArray()
1304         # TODO to be passed to nmigen as ram attributes
1305         # attribute ram_style of itlb_tags : signal is "distributed";
1306         # attribute ram_style of itlb_ptes : signal is "distributed";
1307
1308 #     -- Privilege bit from PTE EAA field
1309 #     signal eaa_priv  : std_ulogic;
1310         # Privilege bit from PTE EAA field
1311         eaa_priv         = Signal()
1312
1313 #     signal r : reg_internal_t;
1314         r                = RegInternal()
1315
1316 #     -- Async signals on incoming request
1317 #     signal req_index   : index_t;
1318 #     signal req_row     : row_t;
1319 #     signal req_hit_way : way_t;
1320 #     signal req_tag     : cache_tag_t;
1321 #     signal req_is_hit  : std_ulogic;
1322 #     signal req_is_miss : std_ulogic;
1323 #     signal req_laddr   : std_ulogic_vector(63 downto 0);
1324         # Async signal on incoming request
1325         req_index        = Signal(NUM_LINES)
1326         req_row          = Signal(BRAM_ROWS)
1327         req_hit_way      = Signal(NUM_WAYS)
1328         req_tag          = Signal(TAG_BITS)
1329         req_is_hit       = Signal()
1330         req_is_miss      = Signal()
1331         req_laddr        = Signal(64)
1332
1333 #     signal tlb_req_index : tlb_index_t;
1334 #     signal real_addr     : std_ulogic_vector(
1335 #                             REAL_ADDR_BITS - 1 downto 0
1336 #                            );
1337 #     signal ra_valid      : std_ulogic;
1338 #     signal priv_fault    : std_ulogic;
1339 #     signal access_ok     : std_ulogic;
1340 #     signal use_previous  : std_ulogic;
1341         tlb_req_index    = Signal(TLB_SIZE)
1342         real_addr        = Signal(REAL_ADDR_BITS)
1343         ra_valid         = Signal()
1344         priv_fault       = Signal()
1345         access_ok        = Signal()
1346         use_previous     = Signal()
1347
1348 #     signal cache_out   : cache_ram_out_t;
1349         cache_out_row    = Signal(ROW_SIZE_BITS)
1350
1351 #     signal plru_victim : plru_out_t;
1352 #     signal replace_way : way_t;
1353         plru_victim      = PLRUOut()
1354         replace_way      = Signal(NUM_WAYS)
1355
1356         # call sub-functions putting everything together, using shared
1357         # signals established above
1358         self.rams(m, r, cache_out_row, use_previous, replace_way, req_row)
1359         self.maybe_plrus(m, r, plru_victim)
1360         self.itlb_lookup(m, tlb_req_index, itlb_ptes, itlb_tags,
1361                          real_addr, itlb_valid_bits, ra_valid, eaa_priv,
1362                          priv_fault, access_ok)
1363         self.itlb_update(m, itlb_valid_bits, itlb_tags, itlb_ptes)
1364         self.icache_comb(m, use_previous, r, req_index, req_row,
1365                          req_tag, real_addr, req_laddr, cache_valid_bits,
1366                          cache_tags, access_ok, req_is_hit, req_is_miss,
1367                          replace_way, plru_victim, cache_out_row)
1368         self.icache_hit(m, use_previous, r, req_is_hit, req_hit_way,
1369                         req_index, req_tag, real_addr)
1370         self.icache_miss(m, cache_valid_bits, r, req_is_miss, req_index,
1371                          req_laddr, req_tag, replace_way, cache_tags,
1372                          access_ok, real_addr)
1373         #self.icache_log(m, log_out, req_hit_way, ra_valid, access_ok,
1374         #                req_is_miss, req_is_hit, lway, wstate, r)
1375
1376         return m
1377
1378
1379 # icache_tb.vhdl
1380 #
1381 # library ieee;
1382 # use ieee.std_logic_1164.all;
1383 #
1384 # library work;
1385 # use work.common.all;
1386 # use work.wishbone_types.all;
1387 #
1388 # entity icache_tb is
1389 # end icache_tb;
1390 #
1391 # architecture behave of icache_tb is
1392 #     signal clk          : std_ulogic;
1393 #     signal rst          : std_ulogic;
1394 #
1395 #     signal i_out        : Fetch1ToIcacheType;
1396 #     signal i_in         : IcacheToDecode1Type;
1397 #
1398 #     signal m_out        : MmuToIcacheType;
1399 #
1400 #     signal wb_bram_in   : wishbone_master_out;
1401 #     signal wb_bram_out  : wishbone_slave_out;
1402 #
1403 #     constant clk_period : time := 10 ns;
1404 # begin
1405 #     icache0: entity work.icache
1406 #         generic map(
1407 #             LINE_SIZE => 64,
1408 #             NUM_LINES => 4
1409 #             )
1410 #         port map(
1411 #             clk => clk,
1412 #             rst => rst,
1413 #             i_in => i_out,
1414 #             i_out => i_in,
1415 #             m_in => m_out,
1416 #             stall_in => '0',
1417 #           flush_in => '0',
1418 #             inval_in => '0',
1419 #             wishbone_out => wb_bram_in,
1420 #             wishbone_in => wb_bram_out
1421 #             );
1422 #
1423 #     -- BRAM Memory slave
1424 #     bram0: entity work.wishbone_bram_wrapper
1425 #         generic map(
1426 #             MEMORY_SIZE   => 1024,
1427 #             RAM_INIT_FILE => "icache_test.bin"
1428 #             )
1429 #         port map(
1430 #             clk => clk,
1431 #             rst => rst,
1432 #             wishbone_in => wb_bram_in,
1433 #             wishbone_out => wb_bram_out
1434 #             );
1435 #
1436 #     clk_process: process
1437 #     begin
1438 #         clk <= '0';
1439 #         wait for clk_period/2;
1440 #         clk <= '1';
1441 #         wait for clk_period/2;
1442 #     end process;
1443 #
1444 #     rst_process: process
1445 #     begin
1446 #         rst <= '1';
1447 #         wait for 2*clk_period;
1448 #         rst <= '0';
1449 #         wait;
1450 #     end process;
1451 #
1452 #     stim: process
1453 #     begin
1454 #         i_out.req <= '0';
1455 #         i_out.nia <= (others => '0');
1456 #       i_out.stop_mark <= '0';
1457 #
1458 #         m_out.tlbld <= '0';
1459 #         m_out.tlbie <= '0';
1460 #         m_out.addr <= (others => '0');
1461 #         m_out.pte <= (others => '0');
1462 #
1463 #         wait until rising_edge(clk);
1464 #         wait until rising_edge(clk);
1465 #         wait until rising_edge(clk);
1466 #         wait until rising_edge(clk);
1467 #
1468 #         i_out.req <= '1';
1469 #         i_out.nia <= x"0000000000000004";
1470 #
1471 #         wait for 30*clk_period;
1472 #         wait until rising_edge(clk);
1473 #
1474 #         assert i_in.valid = '1' severity failure;
1475 #         assert i_in.insn = x"00000001"
1476 #           report "insn @" & to_hstring(i_out.nia) &
1477 #           "=" & to_hstring(i_in.insn) &
1478 #           " expected 00000001"
1479 #           severity failure;
1480 #
1481 #         i_out.req <= '0';
1482 #
1483 #         wait until rising_edge(clk);
1484 #
1485 #         -- hit
1486 #         i_out.req <= '1';
1487 #         i_out.nia <= x"0000000000000008";
1488 #         wait until rising_edge(clk);
1489 #         wait until rising_edge(clk);
1490 #         assert i_in.valid = '1' severity failure;
1491 #         assert i_in.insn = x"00000002"
1492 #           report "insn @" & to_hstring(i_out.nia) &
1493 #           "=" & to_hstring(i_in.insn) &
1494 #           " expected 00000002"
1495 #           severity failure;
1496 #         wait until rising_edge(clk);
1497 #
1498 #         -- another miss
1499 #         i_out.req <= '1';
1500 #         i_out.nia <= x"0000000000000040";
1501 #
1502 #         wait for 30*clk_period;
1503 #         wait until rising_edge(clk);
1504 #
1505 #         assert i_in.valid = '1' severity failure;
1506 #         assert i_in.insn = x"00000010"
1507 #           report "insn @" & to_hstring(i_out.nia) &
1508 #           "=" & to_hstring(i_in.insn) &
1509 #           " expected 00000010"
1510 #           severity failure;
1511 #
1512 #         -- test something that aliases
1513 #         i_out.req <= '1';
1514 #         i_out.nia <= x"0000000000000100";
1515 #         wait until rising_edge(clk);
1516 #         wait until rising_edge(clk);
1517 #         assert i_in.valid = '0' severity failure;
1518 #         wait until rising_edge(clk);
1519 #
1520 #         wait for 30*clk_period;
1521 #         wait until rising_edge(clk);
1522 #
1523 #         assert i_in.valid = '1' severity failure;
1524 #         assert i_in.insn = x"00000040"
1525 #           report "insn @" & to_hstring(i_out.nia) &
1526 #           "=" & to_hstring(i_in.insn) &
1527 #           " expected 00000040"
1528 #           severity failure;
1529 #
1530 #         i_out.req <= '0';
1531 #
1532 #         std.env.finish;
1533 #     end process;
1534 # end;
1535 def icache_sim(dut):
1536     i_out = dut.i_in
1537     i_in  = dut.i_out
1538     m_out = dut.m_in
1539
1540     yield i_in.valid.eq(0)
1541     yield i_out.priv_mode.eq(1)
1542     yield i_out.req.eq(0)
1543     yield i_out.nia.eq(0)
1544     yield i_out.stop_mark.eq(0)
1545     yield m_out.tlbld.eq(0)
1546     yield m_out.tlbie.eq(0)
1547     yield m_out.addr.eq(0)
1548     yield m_out.pte.eq(0)
1549     yield
1550     yield
1551     yield
1552     yield
1553     yield i_out.req.eq(1)
1554     yield i_out.nia.eq(Const(0x0000000000000004, 64))
1555     for i in range(30):
1556         yield
1557     yield
1558     valid = yield i_in.valid
1559     nia   = yield i_out.nia
1560     insn  = yield i_in.insn
1561     print(f"valid? {valid}")
1562     assert valid
1563     assert insn == 0x00000001, \
1564         "insn @%x=%x expected 00000001" % (nia, insn)
1565     yield i_out.req.eq(0)
1566     yield
1567
1568     # hit
1569     yield i_out.req.eq(1)
1570     yield i_out.nia.eq(Const(0x0000000000000008, 64))
1571     yield
1572     yield
1573     valid = yield i_in.valid
1574     nia   = yield i_in.nia
1575     insn  = yield i_in.insn
1576     assert valid
1577     assert insn == 0x00000002, \
1578         "insn @%x=%x expected 00000002" % (nia, insn)
1579     yield
1580
1581     # another miss
1582     yield i_out.req.eq(1)
1583     yield i_out.nia.eq(Const(0x0000000000000040, 64))
1584     for i in range(30):
1585         yield
1586     yield
1587     valid = yield i_in.valid
1588     nia   = yield i_out.nia
1589     insn  = yield i_in.insn
1590     assert valid
1591     assert insn == 0x00000010, \
1592         "insn @%x=%x expected 00000010" % (nia, insn)
1593
1594     # test something that aliases
1595     yield i_out.req.eq(1)
1596     yield i_out.nia.eq(Const(0x0000000000000100, 64))
1597     yield
1598     yield
1599     valid = yield i_in.valid
1600     assert ~valid
1601     for i in range(30):
1602         yield
1603     yield
1604     insn  = yield i_in.insn
1605     valid = yield i_in.valid
1606     insn  = yield i_in.insn
1607     assert valid
1608     assert insn == 0x00000040, \
1609          "insn @%x=%x expected 00000040" % (nia, insn)
1610     yield i_out.req.eq(0)
1611
1612
1613
1614 def test_icache(mem):
1615      dut    = ICache()
1616
1617      memory = Memory(width=64, depth=16*64, init=mem)
1618      sram   = SRAM(memory=memory, granularity=8)
1619
1620      m      = Module()
1621
1622      m.submodules.icache = dut
1623      m.submodules.sram   = sram
1624
1625      m.d.comb += sram.bus.cyc.eq(dut.wb_out.cyc)
1626      m.d.comb += sram.bus.stb.eq(dut.wb_out.stb)
1627      m.d.comb += sram.bus.we.eq(dut.wb_out.we)
1628      m.d.comb += sram.bus.sel.eq(dut.wb_out.sel)
1629      m.d.comb += sram.bus.adr.eq(dut.wb_out.adr)
1630      m.d.comb += sram.bus.dat_w.eq(dut.wb_out.dat)
1631
1632      m.d.comb += dut.wb_in.ack.eq(sram.bus.ack)
1633      m.d.comb += dut.wb_in.dat.eq(sram.bus.dat_r)
1634
1635      # nmigen Simulation
1636      sim = Simulator(m)
1637      sim.add_clock(1e-6)
1638
1639      sim.add_sync_process(wrap(icache_sim(dut)))
1640      with sim.write_vcd('test_icache.vcd'):
1641          sim.run()
1642
1643 if __name__ == '__main__':
1644     dut = ICache()
1645     vl = rtlil.convert(dut, ports=[])
1646     with open("test_icache.il", "w") as f:
1647         f.write(vl)
1648
1649     mem = []
1650     for i in range(512):
1651         mem.append((i*2)| ((i*2+1)<<32))
1652
1653     test_icache(mem)
1654