src/soc/experiment/icache.py

   1 """ICache
   2
   3 based on Anton Blanchard microwatt icache.vhdl
   4
   5 Set associative icache
   6
   7 TODO (in no specific order):
   8 * Add debug interface to inspect cache content
   9 * Add snoop/invalidate path
  10 * Add multi-hit error detection
  11 * Pipelined bus interface (wb or axi)
  12 * Maybe add parity? There's a few bits free in each BRAM row on Xilinx
  13 * Add optimization: service hits on partially loaded lines
  14 * Add optimization: (maybe) interrupt reload on fluch/redirect
  15 * Check if playing with the geometry of the cache tags allow for more
  16   efficient use of distributed RAM and less logic/muxes. Currently we
  17   write TAG_BITS width which may not match full ram blocks and might
  18   cause muxes to be inferred for "partial writes".
  19 * Check if making the read size of PLRU a ROM helps utilization
  20
  21 """
  22 from enum import Enum, unique
  23 from nmigen import (Module, Signal, Elaboratable, Cat, Array, Const)
  24 from nmigen.cli import main, rtlil
  25 from nmutil.iocontrol import RecordObject
  26 from nmigen.utils import log2_int
  27 from nmutil.util import Display
  28
  29 #from nmutil.plru import PLRU
  30 from soc.experiment.cache_ram import CacheRam
  31 from soc.experiment.plru import PLRU
  32
  33 from soc.experiment.mem_types import (Fetch1ToICacheType,
  34                                       ICacheToDecode1Type,
  35                                       MMUToICacheType)
  36
  37 from soc.experiment.wb_types import (WB_ADDR_BITS, WB_DATA_BITS,
  38                                      WB_SEL_BITS, WBAddrType, WBDataType,
  39                                      WBSelType, WBMasterOut, WBSlaveOut,
  40                                      WBMasterOutVector, WBSlaveOutVector,
  41                                      WBIOMasterOut, WBIOSlaveOut)
  42
  43 # for test
  44 from nmigen_soc.wishbone.sram import SRAM
  45 from nmigen import Memory
  46 from nmutil.util import wrap
  47 from nmigen.cli import main, rtlil
  48 if True:
  49     from nmigen.back.pysim import Simulator, Delay, Settle
  50 else:
  51     from nmigen.sim.cxxsim import Simulator, Delay, Settle
  52
  53
  54 SIM            = 0
  55 LINE_SIZE      = 64
  56 # BRAM organisation: We never access more than wishbone_data_bits
  57 # at a time so to save resources we make the array only that wide,
  58 # and use consecutive indices for to make a cache "line"
  59 #
  60 # ROW_SIZE is the width in bytes of the BRAM (based on WB, so 64-bits)
  61 ROW_SIZE       = WB_DATA_BITS // 8
  62 # Number of lines in a set
  63 NUM_LINES      = 32
  64 # Number of ways
  65 NUM_WAYS       = 4
  66 # L1 ITLB number of entries (direct mapped)
  67 TLB_SIZE       = 64
  68 # L1 ITLB log_2(page_size)
  69 TLB_LG_PGSZ    = 12
  70 # Number of real address bits that we store
  71 REAL_ADDR_BITS = 56
  72 # Non-zero to enable log data collection
  73 LOG_LENGTH     = 0
  74
  75 ROW_SIZE_BITS  = ROW_SIZE * 8
  76 # ROW_PER_LINE is the number of row
  77 # (wishbone) transactions in a line
  78 ROW_PER_LINE   = LINE_SIZE // ROW_SIZE
  79 # BRAM_ROWS is the number of rows in
  80 # BRAM needed to represent the full icache
  81 BRAM_ROWS      = NUM_LINES * ROW_PER_LINE
  82 # INSN_PER_ROW is the number of 32bit
  83 # instructions per BRAM row
  84 INSN_PER_ROW   = ROW_SIZE_BITS // 32
  85
  86 print("ROW_SIZE", ROW_SIZE)
  87 print("ROW_SIZE_BITS", ROW_SIZE_BITS)
  88 print("ROW_PER_LINE", ROW_PER_LINE)
  89 print("BRAM_ROWS", BRAM_ROWS)
  90 print("INSN_PER_ROW", INSN_PER_ROW)
  91
  92 # Bit fields counts in the address
  93 #
  94 # INSN_BITS is the number of bits to
  95 # select an instruction in a row
  96 INSN_BITS      = log2_int(INSN_PER_ROW)
  97 # ROW_BITS is the number of bits to
  98 # select a row
  99 ROW_BITS       = log2_int(BRAM_ROWS)
 100 # ROW_LINEBITS is the number of bits to
 101 # select a row within a line
 102 ROW_LINE_BITS   = log2_int(ROW_PER_LINE)
 103 # LINE_OFF_BITS is the number of bits for
 104 # the offset in a cache line
 105 LINE_OFF_BITS  = log2_int(LINE_SIZE)
 106 # ROW_OFF_BITS is the number of bits for
 107 # the offset in a row
 108 ROW_OFF_BITS   = log2_int(ROW_SIZE)
 109 # INDEX_BITS is the number of bits to
 110 # select a cache line
 111 INDEX_BITS     = log2_int(NUM_LINES)
 112 # SET_SIZE_BITS is the log base 2 of
 113 # the set size
 114 SET_SIZE_BITS  = LINE_OFF_BITS + INDEX_BITS
 115 # TAG_BITS is the number of bits of
 116 # the tag part of the address
 117 TAG_BITS       = REAL_ADDR_BITS - SET_SIZE_BITS
 118 # TAG_WIDTH is the width in bits of each way of the tag RAM
 119 TAG_WIDTH = TAG_BITS + 7 - ((TAG_BITS + 7) % 8)
 120
 121 # WAY_BITS is the number of bits to
 122 # select a way
 123 WAY_BITS       = log2_int(NUM_WAYS)
 124 TAG_RAM_WIDTH  = TAG_BITS * NUM_WAYS
 125
 126 #     -- L1 ITLB.
 127 #     constant TLB_BITS : natural := log2(TLB_SIZE);
 128 #     constant TLB_EA_TAG_BITS : natural := 64 - (TLB_LG_PGSZ + TLB_BITS);
 129 #     constant TLB_PTE_BITS : natural := 64;
 130 TLB_BITS        = log2_int(TLB_SIZE)
 131 TLB_EA_TAG_BITS = 64 - (TLB_LG_PGSZ + TLB_BITS)
 132 TLB_PTE_BITS    = 64
 133
 134
 135 print("INSN_BITS", INSN_BITS)
 136 print("ROW_BITS", ROW_BITS)
 137 print("ROW_LINE_BITS", ROW_LINE_BITS)
 138 print("LINE_OFF_BITS", LINE_OFF_BITS)
 139 print("ROW_OFF_BITS", ROW_OFF_BITS)
 140 print("INDEX_BITS", INDEX_BITS)
 141 print("SET_SIZE_BITS", SET_SIZE_BITS)
 142 print("TAG_BITS", TAG_BITS)
 143 print("WAY_BITS", WAY_BITS)
 144 print("TAG_RAM_WIDTH", TAG_RAM_WIDTH)
 145 print("TLB_BITS", TLB_BITS)
 146 print("TLB_EA_TAG_BITS", TLB_EA_TAG_BITS)
 147 print("TLB_PTE_BITS", TLB_PTE_BITS)
 148
 149
 150
 151
 152 # architecture rtl of icache is
 153 #constant ROW_SIZE_BITS : natural := ROW_SIZE*8;
 154 #-- ROW_PER_LINE is the number of row (wishbone
 155 #-- transactions) in a line
 156 #constant ROW_PER_LINE  : natural := LINE_SIZE / ROW_SIZE;
 157 #-- BRAM_ROWS is the number of rows in BRAM
 158 #-- needed to represent the full
 159 #-- icache
 160 #constant BRAM_ROWS     : natural := NUM_LINES * ROW_PER_LINE;
 161 #-- INSN_PER_ROW is the number of 32bit instructions per BRAM row
 162 #constant INSN_PER_ROW  : natural := ROW_SIZE_BITS / 32;
 163 #-- Bit fields counts in the address
 164 #
 165 #-- INSN_BITS is the number of bits to select
 166 #-- an instruction in a row
 167 #constant INSN_BITS     : natural := log2(INSN_PER_ROW);
 168 #-- ROW_BITS is the number of bits to select a row
 169 #constant ROW_BITS      : natural := log2(BRAM_ROWS);
 170 #-- ROW_LINEBITS is the number of bits to
 171 #-- select a row within a line
 172 #constant ROW_LINEBITS  : natural := log2(ROW_PER_LINE);
 173 #-- LINE_OFF_BITS is the number of bits for the offset
 174 #-- in a cache line
 175 #constant LINE_OFF_BITS : natural := log2(LINE_SIZE);
 176 #-- ROW_OFF_BITS is the number of bits for the offset in a row
 177 #constant ROW_OFF_BITS  : natural := log2(ROW_SIZE);
 178 #-- INDEX_BITS is the number of bits to select a cache line
 179 #constant INDEX_BITS    : natural := log2(NUM_LINES);
 180 #-- SET_SIZE_BITS is the log base 2 of the set size
 181 #constant SET_SIZE_BITS : natural := LINE_OFF_BITS + INDEX_BITS;
 182 #-- TAG_BITS is the number of bits of the tag part of the address
 183 #constant TAG_BITS      : natural := REAL_ADDR_BITS - SET_SIZE_BITS;
 184 #-- WAY_BITS is the number of bits to select a way
 185 #constant WAY_BITS     : natural := log2(NUM_WAYS);
 186
 187 #-- Example of layout for 32 lines of 64 bytes:
 188 #--
 189 #-- ..  tag    |index|  line  |
 190 #-- ..         |   row   |    |
 191 #-- ..         |     |   | |00| zero          (2)
 192 #-- ..         |     |   |-|  | INSN_BITS     (1)
 193 #-- ..         |     |---|    | ROW_LINEBITS  (3)
 194 #-- ..         |     |--- - --| LINE_OFF_BITS (6)
 195 #-- ..         |         |- --| ROW_OFF_BITS  (3)
 196 #-- ..         |----- ---|    | ROW_BITS      (8)
 197 #-- ..         |-----|        | INDEX_BITS    (5)
 198 #-- .. --------|              | TAG_BITS      (53)
 199    # Example of layout for 32 lines of 64 bytes:
 200    #
 201    # ..  tag    |index|  line  |
 202    # ..         |   row   |    |
 203    # ..         |     |   | |00| zero          (2)
 204    # ..         |     |   |-|  | INSN_BITS     (1)
 205    # ..         |     |---|    | ROW_LINEBITS  (3)
 206    # ..         |     |--- - --| LINE_OFF_BITS (6)
 207    # ..         |         |- --| ROW_OFF_BITS  (3)
 208    # ..         |----- ---|    | ROW_BITS      (8)
 209    # ..         |-----|        | INDEX_BITS    (5)
 210    # .. --------|              | TAG_BITS      (53)
 211
 212 #subtype row_t is integer range 0 to BRAM_ROWS-1;
 213 #subtype index_t is integer range 0 to NUM_LINES-1;
 214 #subtype way_t is integer range 0 to NUM_WAYS-1;
 215 #subtype row_in_line_t is unsigned(ROW_LINEBITS-1 downto 0);
 216 #
 217 #-- The cache data BRAM organized as described above for each way
 218 #subtype cache_row_t is std_ulogic_vector(ROW_SIZE_BITS-1 downto 0);
 219 #
 220 #-- The cache tags LUTRAM has a row per set. Vivado is a pain and will
 221 #-- not handle a clean (commented) definition of the cache tags as a 3d
 222 #-- memory. For now, work around it by putting all the tags
 223 #subtype cache_tag_t is std_logic_vector(TAG_BITS-1 downto 0);
 224 #  type cache_tags_set_t is array(way_t) of cache_tag_t;
 225 #  type cache_tags_array_t is array(index_t) of cache_tags_set_t;
 226 #constant TAG_RAM_WIDTH : natural := TAG_BITS * NUM_WAYS;
 227 #subtype cache_tags_set_t is std_logic_vector(TAG_RAM_WIDTH-1 downto 0);
 228 #type cache_tags_array_t is array(index_t) of cache_tags_set_t;
 229 def CacheTagArray():
 230     return Array(Signal(TAG_RAM_WIDTH, name="cachetag_%d" %x) \
 231                  for x in range(NUM_LINES))
 232
 233 #-- The cache valid bits
 234 #subtype cache_way_valids_t is std_ulogic_vector(NUM_WAYS-1 downto 0);
 235 #type cache_valids_t is array(index_t) of cache_way_valids_t;
 236 #type row_per_line_valid_t is array(0 to ROW_PER_LINE - 1) of std_ulogic;
 237 def CacheValidBitsArray():
 238     return Array(Signal(NUM_WAYS, name="cachevalid_%d" %x) \
 239                  for x in range(NUM_LINES))
 240
 241 def RowPerLineValidArray():
 242     return Array(Signal(name="rows_valid_%d" %x) \
 243                  for x in range(ROW_PER_LINE))
 244
 245
 246 #attribute ram_style : string;
 247 #attribute ram_style of cache_tags : signal is "distributed";
 248    # TODO to be passed to nigmen as ram attributes
 249    # attribute ram_style : string;
 250    # attribute ram_style of cache_tags : signal is "distributed";
 251
 252
 253 #subtype tlb_index_t is integer range 0 to TLB_SIZE - 1;
 254 #type tlb_valids_t is array(tlb_index_t) of std_ulogic;
 255 #subtype tlb_tag_t is std_ulogic_vector(TLB_EA_TAG_BITS - 1 downto 0);
 256 #type tlb_tags_t is array(tlb_index_t) of tlb_tag_t;
 257 #subtype tlb_pte_t is std_ulogic_vector(TLB_PTE_BITS - 1 downto 0);
 258 #type tlb_ptes_t is array(tlb_index_t) of tlb_pte_t;
 259 def TLBValidBitsArray():
 260     return Array(Signal(name="tlbvalid_%d" %x) \
 261                  for x in range(TLB_SIZE))
 262
 263 def TLBTagArray():
 264     return Array(Signal(TLB_EA_TAG_BITS, name="tlbtag_%d" %x) \
 265                  for x in range(TLB_SIZE))
 266
 267 def TLBPtesArray():
 268     return Array(Signal(TLB_PTE_BITS, name="tlbptes_%d" %x) \
 269                  for x in range(TLB_SIZE))
 270
 271
 272 #-- Cache RAM interface
 273 #type cache_ram_out_t is array(way_t) of cache_row_t;
 274 # Cache RAM interface
 275 def CacheRamOut():
 276     return Array(Signal(ROW_SIZE_BITS, name="cache_out_%d" %x) \
 277                  for x in range(NUM_WAYS))
 278
 279 #-- PLRU output interface
 280 #type plru_out_t is array(index_t) of
 281 # std_ulogic_vector(WAY_BITS-1 downto 0);
 282 # PLRU output interface
 283 def PLRUOut():
 284     return Array(Signal(WAY_BITS, name="plru_out_%d" %x) \
 285                  for x in range(NUM_LINES))
 286
 287 #     -- Return the cache line index (tag index) for an address
 288 #     function get_index(addr: std_ulogic_vector(63 downto 0))
 289 #      return index_t is
 290 #     begin
 291 #         return to_integer(unsigned(
 292 #          addr(SET_SIZE_BITS - 1 downto LINE_OFF_BITS)
 293 #         ));
 294 #     end;
 295 # Return the cache line index (tag index) for an address
 296 def get_index(addr):
 297     return addr[LINE_OFF_BITS:SET_SIZE_BITS]
 298
 299 #     -- Return the cache row index (data memory) for an address
 300 #     function get_row(addr: std_ulogic_vector(63 downto 0))
 301 #       return row_t is
 302 #     begin
 303 #         return to_integer(unsigned(
 304 #          addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS)
 305 #         ));
 306 #     end;
 307 # Return the cache row index (data memory) for an address
 308 def get_row(addr):
 309     return addr[ROW_OFF_BITS:SET_SIZE_BITS]
 310
 311 #     -- Return the index of a row within a line
 312 #     function get_row_of_line(row: row_t) return row_in_line_t is
 313 #       variable row_v : unsigned(ROW_BITS-1 downto 0);
 314 #     begin
 315 #       row_v := to_unsigned(row, ROW_BITS);
 316 #         return row_v(ROW_LINEBITS-1 downto 0);
 317 #     end;
 318 # Return the index of a row within a line
 319 def get_row_of_line(row):
 320     return row[:ROW_LINE_BITS]
 321
 322 #     -- Returns whether this is the last row of a line
 323 #     function is_last_row_addr(addr: wishbone_addr_type;
 324 #      last: row_in_line_t
 325 #     )
 326 #      return boolean is
 327 #     begin
 328 #       return unsigned(
 329 #        addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS)
 330 #       ) = last;
 331 #     end;
 332 # Returns whether this is the last row of a line
 333 def is_last_row_addr(addr, last):
 334     return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
 335
 336 #     -- Returns whether this is the last row of a line
 337 #     function is_last_row(row: row_t;
 338 #      last: row_in_line_t) return boolean is
 339 #     begin
 340 #       return get_row_of_line(row) = last;
 341 #     end;
 342 # Returns whether this is the last row of a line
 343 def is_last_row(row, last):
 344     return get_row_of_line(row) == last
 345
 346 #     -- Return the next row in the current cache line. We use a dedicated
 347 #     -- function in order to limit the size of the generated adder to be
 348 #     -- only the bits within a cache line (3 bits with default settings)
 349 #     function next_row(row: row_t) return row_t is
 350 #       variable row_v   : std_ulogic_vector(ROW_BITS-1 downto 0);
 351 #       variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
 352 #       variable result  : std_ulogic_vector(ROW_BITS-1 downto 0);
 353 #     begin
 354 #       row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
 355 #       row_idx := row_v(ROW_LINEBITS-1 downto 0);
 356 #       row_v(ROW_LINEBITS-1 downto 0) :=
 357 #        std_ulogic_vector(unsigned(row_idx) + 1);
 358 #       return to_integer(unsigned(row_v));
 359 #     end;
 360 # Return the next row in the current cache line. We use a dedicated
 361 # function in order to limit the size of the generated adder to be
 362 # only the bits within a cache line (3 bits with default settings)
 363 def next_row(row):
 364     row_v = row[0:ROW_LINE_BITS] + 1
 365     return Cat(row_v[:ROW_LINE_BITS], row[ROW_LINE_BITS:])
 366 #     -- Read the instruction word for the given address in the
 367 #     -- current cache row
 368 #     function read_insn_word(addr: std_ulogic_vector(63 downto 0);
 369 #                           data: cache_row_t) return std_ulogic_vector is
 370 #       variable word: integer range 0 to INSN_PER_ROW-1;
 371 #     begin
 372 #         word := to_integer(unsigned(addr(INSN_BITS+2-1 downto 2)));
 373 #       return data(31+word*32 downto word*32);
 374 #     end;
 375 # Read the instruction word for the given address
 376 # in the current cache row
 377 def read_insn_word(addr, data):
 378     word = addr[2:INSN_BITS+2]
 379     return data.word_select(word, 32)
 380
 381 #     -- Get the tag value from the address
 382 #     function get_tag(
 383 #      addr: std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0)
 384 #     )
 385 #      return cache_tag_t is
 386 #     begin
 387 #         return addr(REAL_ADDR_BITS - 1 downto SET_SIZE_BITS);
 388 #     end;
 389 # Get the tag value from the address
 390 def get_tag(addr):
 391     return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
 392
 393 #     -- Read a tag from a tag memory row
 394 #     function read_tag(way: way_t; tagset: cache_tags_set_t)
 395 #      return cache_tag_t is
 396 #     begin
 397 #       return tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS);
 398 #     end;
 399 # Read a tag from a tag memory row
 400 def read_tag(way, tagset):
 401     return tagset.word_select(way, TAG_WIDTH)[:TAG_BITS]
 402
 403 #     -- Write a tag to tag memory row
 404 #     procedure write_tag(way: in way_t;
 405 #      tagset: inout cache_tags_set_t; tag: cache_tag_t) is
 406 #     begin
 407 #       tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS) := tag;
 408 #     end;
 409 # Write a tag to tag memory row
 410 def write_tag(way, tagset, tag):
 411     return tagset[way * TAG_BITS:(way + 1) * TAG_BITS].eq(tag)
 412
 413 #     -- Simple hash for direct-mapped TLB index
 414 #     function hash_ea(addr: std_ulogic_vector(63 downto 0))
 415 #      return tlb_index_t is
 416 #         variable hash : std_ulogic_vector(TLB_BITS - 1 downto 0);
 417 #     begin
 418 #         hash := addr(TLB_LG_PGSZ + TLB_BITS - 1 downto TLB_LG_PGSZ)
 419 #                 xor addr(
 420 #                  TLB_LG_PGSZ + 2 * TLB_BITS - 1 downto
 421 #                  TLB_LG_PGSZ + TLB_BITS
 422 #                 )
 423 #                 xor addr(
 424 #                  TLB_LG_PGSZ + 3 * TLB_BITS - 1 downto
 425 #                  TLB_LG_PGSZ + 2 * TLB_BITS
 426 #                 );
 427 #         return to_integer(unsigned(hash));
 428 #     end;
 429 # Simple hash for direct-mapped TLB index
 430 def hash_ea(addr):
 431     hsh = addr[TLB_LG_PGSZ:TLB_LG_PGSZ + TLB_BITS] ^ addr[
 432            TLB_LG_PGSZ + TLB_BITS:TLB_LG_PGSZ + 2 * TLB_BITS
 433           ] ^ addr[
 434            TLB_LG_PGSZ + 2 * TLB_BITS:TLB_LG_PGSZ + 3 * TLB_BITS
 435           ]
 436     return hsh
 437
 438 # begin
 439 #
 440 #     assert LINE_SIZE mod ROW_SIZE = 0;
 441 #     assert ispow2(LINE_SIZE) report "LINE_SIZE not power of 2"
 442 #      severity FAILURE;
 443 #     assert ispow2(NUM_LINES) report "NUM_LINES not power of 2"
 444 #      severity FAILURE;
 445 #     assert ispow2(ROW_PER_LINE) report "ROW_PER_LINE not power of 2"
 446 #      severity FAILURE;
 447 #     assert ispow2(INSN_PER_ROW) report "INSN_PER_ROW not power of 2"
 448 #      severity FAILURE;
 449 #     assert (ROW_BITS = INDEX_BITS + ROW_LINEBITS)
 450 #       report "geometry bits don't add up" severity FAILURE;
 451 #     assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS)
 452 #       report "geometry bits don't add up" severity FAILURE;
 453 #     assert (REAL_ADDR_BITS = TAG_BITS + INDEX_BITS + LINE_OFF_BITS)
 454 #       report "geometry bits don't add up" severity FAILURE;
 455 #     assert (REAL_ADDR_BITS = TAG_BITS + ROW_BITS + ROW_OFF_BITS)
 456 #       report "geometry bits don't add up" severity FAILURE;
 457 #
 458 #     sim_debug: if SIM generate
 459 #     debug: process
 460 #     begin
 461 #       report "ROW_SIZE      = " & natural'image(ROW_SIZE);
 462 #       report "ROW_PER_LINE  = " & natural'image(ROW_PER_LINE);
 463 #       report "BRAM_ROWS     = " & natural'image(BRAM_ROWS);
 464 #       report "INSN_PER_ROW  = " & natural'image(INSN_PER_ROW);
 465 #       report "INSN_BITS     = " & natural'image(INSN_BITS);
 466 #       report "ROW_BITS      = " & natural'image(ROW_BITS);
 467 #       report "ROW_LINEBITS  = " & natural'image(ROW_LINEBITS);
 468 #       report "LINE_OFF_BITS = " & natural'image(LINE_OFF_BITS);
 469 #       report "ROW_OFF_BITS  = " & natural'image(ROW_OFF_BITS);
 470 #       report "INDEX_BITS    = " & natural'image(INDEX_BITS);
 471 #       report "TAG_BITS      = " & natural'image(TAG_BITS);
 472 #       report "WAY_BITS      = " & natural'image(WAY_BITS);
 473 #       wait;
 474 #     end process;
 475 #     end generate;
 476
 477 # Cache reload state machine
 478 @unique
 479 class State(Enum):
 480     IDLE     = 0
 481     CLR_TAG  = 1
 482     WAIT_ACK = 2
 483
 484 #     type reg_internal_t is record
 485 #       -- Cache hit state (Latches for 1 cycle BRAM access)
 486 #       hit_way   : way_t;
 487 #       hit_nia   : std_ulogic_vector(63 downto 0);
 488 #       hit_smark : std_ulogic;
 489 #       hit_valid : std_ulogic;
 490 #
 491 #       -- Cache miss state (reload state machine)
 492 #         state            : state_t;
 493 #         wb               : wishbone_master_out;
 494 #       store_way        : way_t;
 495 #         store_index      : index_t;
 496 #       store_row        : row_t;
 497 #         store_tag        : cache_tag_t;
 498 #         store_valid      : std_ulogic;
 499 #         end_row_ix       : row_in_line_t;
 500 #         rows_valid       : row_per_line_valid_t;
 501 #
 502 #         -- TLB miss state
 503 #         fetch_failed     : std_ulogic;
 504 #     end record;
 505 class RegInternal(RecordObject):
 506     def __init__(self):
 507         super().__init__()
 508         # Cache hit state (Latches for 1 cycle BRAM access)
 509         self.hit_way      = Signal(NUM_WAYS)
 510         self.hit_nia      = Signal(64)
 511         self.hit_smark    = Signal()
 512         self.hit_valid    = Signal()
 513
 514         # Cache miss state (reload state machine)
 515         self.state        = Signal(State, reset=State.IDLE)
 516         self.wb           = WBMasterOut("wb")
 517         self.store_way    = Signal(NUM_WAYS)
 518         self.store_index  = Signal(NUM_LINES)
 519         self.store_row    = Signal(BRAM_ROWS)
 520         self.store_tag    = Signal(TAG_BITS)
 521         self.store_valid  = Signal()
 522         self.end_row_ix   = Signal(ROW_LINE_BITS)
 523         self.rows_valid   = RowPerLineValidArray()
 524
 525         # TLB miss state
 526         self.fetch_failed = Signal()
 527
 528 # -- 64 bit direct mapped icache. All instructions are 4B aligned.
 529 #
 530 # entity icache is
 531 #     generic (
 532 #         SIM : boolean := false;
 533 #         -- Line size in bytes
 534 #         LINE_SIZE : positive := 64;
 535 #         -- BRAM organisation: We never access more
 536 #         -- than wishbone_data_bits
 537 #         -- at a time so to save resources we make the
 538 #         -- array only that wide,
 539 #         -- and use consecutive indices for to make a cache "line"
 540 #         --
 541 #         -- ROW_SIZE is the width in bytes of the BRAM (based on WB,
 542 #         -- so 64-bits)
 543 #         ROW_SIZE  : positive := wishbone_data_bits / 8;
 544 #         -- Number of lines in a set
 545 #         NUM_LINES : positive := 32;
 546 #         -- Number of ways
 547 #         NUM_WAYS  : positive := 4;
 548 #         -- L1 ITLB number of entries (direct mapped)
 549 #         TLB_SIZE : positive := 64;
 550 #         -- L1 ITLB log_2(page_size)
 551 #         TLB_LG_PGSZ : positive := 12;
 552 #         -- Number of real address bits that we store
 553 #         REAL_ADDR_BITS : positive := 56;
 554 #         -- Non-zero to enable log data collection
 555 #         LOG_LENGTH : natural := 0
 556 #         );
 557 #     port (
 558 #         clk          : in std_ulogic;
 559 #         rst          : in std_ulogic;
 560 #
 561 #         i_in         : in Fetch1ToIcacheType;
 562 #         i_out        : out IcacheToDecode1Type;
 563 #
 564 #         m_in         : in MmuToIcacheType;
 565 #
 566 #         stall_in     : in std_ulogic;
 567 #       stall_out    : out std_ulogic;
 568 #       flush_in     : in std_ulogic;
 569 #       inval_in     : in std_ulogic;
 570 #
 571 #         wishbone_out : out wishbone_master_out;
 572 #         wishbone_in  : in wishbone_slave_out;
 573 #
 574 #         log_out      : out std_ulogic_vector(53 downto 0)
 575 #         );
 576 # end entity icache;
 577 # 64 bit direct mapped icache. All instructions are 4B aligned.
 578 class ICache(Elaboratable):
 579     """64 bit direct mapped icache. All instructions are 4B aligned."""
 580     def __init__(self):
 581         self.i_in           = Fetch1ToICacheType(name="i_in")
 582         self.i_out          = ICacheToDecode1Type(name="i_out")
 583
 584         self.m_in           = MMUToICacheType(name="m_in")
 585
 586         self.stall_in       = Signal()
 587         self.stall_out      = Signal()
 588         self.flush_in       = Signal()
 589         self.inval_in       = Signal()
 590
 591         self.wb_out         = WBMasterOut(name="wb_out")
 592         self.wb_in          = WBSlaveOut(name="wb_in")
 593
 594         self.log_out        = Signal(54)
 595
 596
 597 #     -- Generate a cache RAM for each way
 598 #     rams: for i in 0 to NUM_WAYS-1 generate
 599 #       signal do_read  : std_ulogic;
 600 #       signal do_write : std_ulogic;
 601 #       signal rd_addr  : std_ulogic_vector(ROW_BITS-1 downto 0);
 602 #       signal wr_addr  : std_ulogic_vector(ROW_BITS-1 downto 0);
 603 #       signal dout     : cache_row_t;
 604 #       signal wr_sel   : std_ulogic_vector(ROW_SIZE-1 downto 0);
 605 #     begin
 606 #       way: entity work.cache_ram
 607 #           generic map (
 608 #               ROW_BITS => ROW_BITS,
 609 #               WIDTH => ROW_SIZE_BITS
 610 #               )
 611 #           port map (
 612 #               clk     => clk,
 613 #               rd_en   => do_read,
 614 #               rd_addr => rd_addr,
 615 #               rd_data => dout,
 616 #               wr_sel  => wr_sel,
 617 #               wr_addr => wr_addr,
 618 #               wr_data => wishbone_in.dat
 619 #               );
 620 #       process(all)
 621 #       begin
 622 #           do_read <= not (stall_in or use_previous);
 623 #           do_write <= '0';
 624 #           if wishbone_in.ack = '1' and replace_way = i then
 625 #               do_write <= '1';
 626 #           end if;
 627 #           cache_out(i) <= dout;
 628 #           rd_addr <=
 629 #            std_ulogic_vector(to_unsigned(req_row, ROW_BITS));
 630 #           wr_addr <=
 631 #            std_ulogic_vector(to_unsigned(r.store_row, ROW_BITS));
 632 #             for i in 0 to ROW_SIZE-1 loop
 633 #                 wr_sel(i) <= do_write;
 634 #             end loop;
 635 #       end process;
 636 #     end generate;
 637     def rams(self, m, r, cache_out, use_previous, replace_way, req_row):
 638         comb = m.d.comb
 639
 640         wb_in, stall_in = self.wb_in, self.stall_in
 641
 642
 643         for i in range(NUM_WAYS):
 644             do_read  = Signal(name="do_rd_%d" % i)
 645             do_write = Signal(name="do_wr_%d" % i)
 646             rd_addr  = Signal(ROW_BITS)
 647             wr_addr  = Signal(ROW_BITS)
 648             d_out    = Signal(ROW_SIZE_BITS, name="d_out_%d" % i)
 649             wr_sel   = Signal(ROW_SIZE)
 650
 651             way = CacheRam(ROW_BITS, ROW_SIZE_BITS, True)
 652             setattr(m.submodules, "cacheram_%d" % i, way)
 653
 654             comb += way.rd_en.eq(do_read)
 655             comb += way.rd_addr.eq(rd_addr)
 656             comb += d_out.eq(way.rd_data_o)
 657             comb += way.wr_sel.eq(wr_sel)
 658             comb += way.wr_addr.eq(wr_addr)
 659             comb += way.wr_data.eq(wb_in.dat)
 660
 661             comb += do_read.eq(~(stall_in | use_previous))
 662
 663             with m.If(wb_in.ack & (replace_way == i)):
 664                 comb += do_write.eq(1)
 665
 666             comb += cache_out[i].eq(d_out)
 667             comb += rd_addr.eq(req_row)
 668             comb += wr_addr.eq(r.store_row)
 669             for j in range(ROW_SIZE):
 670                 comb += wr_sel[j].eq(do_write)
 671
 672 #     -- Generate PLRUs
 673 #     maybe_plrus: if NUM_WAYS > 1 generate
 674 #     begin
 675 #       plrus: for i in 0 to NUM_LINES-1 generate
 676 #           -- PLRU interface
 677 #           signal plru_acc    : std_ulogic_vector(WAY_BITS-1 downto 0);
 678 #           signal plru_acc_en : std_ulogic;
 679 #           signal plru_out    : std_ulogic_vector(WAY_BITS-1 downto 0);
 680 #
 681 #       begin
 682 #           plru : entity work.plru
 683 #               generic map (
 684 #                   BITS => WAY_BITS
 685 #                   )
 686 #               port map (
 687 #                   clk => clk,
 688 #                   rst => rst,
 689 #                   acc => plru_acc,
 690 #                   acc_en => plru_acc_en,
 691 #                   lru => plru_out
 692 #                   );
 693 #
 694 #           process(all)
 695 #           begin
 696 #               -- PLRU interface
 697 #               if get_index(r.hit_nia) = i then
 698 #                   plru_acc_en <= r.hit_valid;
 699 #               else
 700 #                   plru_acc_en <= '0';
 701 #               end if;
 702 #               plru_acc <=
 703 #                std_ulogic_vector(to_unsigned(r.hit_way, WAY_BITS));
 704 #               plru_victim(i) <= plru_out;
 705 #           end process;
 706 #       end generate;
 707 #     end generate;
 708     def maybe_plrus(self, m, r, plru_victim):
 709         comb = m.d.comb
 710
 711         with m.If(NUM_WAYS > 1):
 712             for i in range(NUM_LINES):
 713                 plru_acc_i  = Signal(WAY_BITS)
 714                 plru_acc_en = Signal()
 715                 plru_out    = Signal(WAY_BITS)
 716                 plru        = PLRU(WAY_BITS)
 717                 comb += plru.acc_i.eq(plru_acc_i)
 718                 comb += plru.acc_en.eq(plru_acc_en)
 719                 comb += plru.lru_o.eq(plru_out)
 720
 721                 # PLRU interface
 722                 with m.If(get_index(r.hit_nia) == i):
 723                     comb += plru.acc_en.eq(r.hit_valid)
 724
 725                 comb += plru.acc_i.eq(r.hit_way)
 726                 comb += plru_victim[i].eq(plru.lru_o)
 727
 728 #     -- TLB hit detection and real address generation
 729 #     itlb_lookup : process(all)
 730 #         variable pte : tlb_pte_t;
 731 #         variable ttag : tlb_tag_t;
 732 #     begin
 733 #         tlb_req_index <= hash_ea(i_in.nia);
 734 #         pte := itlb_ptes(tlb_req_index);
 735 #         ttag := itlb_tags(tlb_req_index);
 736 #         if i_in.virt_mode = '1' then
 737 #             real_addr <= pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) &
 738 #                          i_in.nia(TLB_LG_PGSZ - 1 downto 0);
 739 #             if ttag = i_in.nia(63 downto TLB_LG_PGSZ + TLB_BITS) then
 740 #                 ra_valid <= itlb_valids(tlb_req_index);
 741 #             else
 742 #                 ra_valid <= '0';
 743 #             end if;
 744 #             eaa_priv <= pte(3);
 745 #         else
 746 #             real_addr <= i_in.nia(REAL_ADDR_BITS - 1 downto 0);
 747 #             ra_valid <= '1';
 748 #             eaa_priv <= '1';
 749 #         end if;
 750 #
 751 #         -- no IAMR, so no KUEP support for now
 752 #         priv_fault <= eaa_priv and not i_in.priv_mode;
 753 #         access_ok <= ra_valid and not priv_fault;
 754 #     end process;
 755     # TLB hit detection and real address generation
 756     def itlb_lookup(self, m, tlb_req_index, itlb_ptes, itlb_tags,
 757                     real_addr, itlb_valid_bits, ra_valid, eaa_priv,
 758                     priv_fault, access_ok):
 759         comb = m.d.comb
 760
 761         i_in = self.i_in
 762
 763         pte  = Signal(TLB_PTE_BITS)
 764         ttag = Signal(TLB_EA_TAG_BITS)
 765
 766         comb += tlb_req_index.eq(hash_ea(i_in.nia))
 767         comb += pte.eq(itlb_ptes[tlb_req_index])
 768         comb += ttag.eq(itlb_tags[tlb_req_index])
 769
 770         with m.If(i_in.virt_mode):
 771             comb += real_addr.eq(Cat(
 772                      i_in.nia[:TLB_LG_PGSZ],
 773                      pte[TLB_LG_PGSZ:REAL_ADDR_BITS]
 774                     ))
 775
 776             with m.If(ttag == i_in.nia[TLB_LG_PGSZ + TLB_BITS:64]):
 777                 comb += ra_valid.eq(itlb_valid_bits[tlb_req_index])
 778
 779             comb += eaa_priv.eq(pte[3])
 780
 781         with m.Else():
 782             comb += real_addr.eq(i_in.nia[:REAL_ADDR_BITS])
 783             comb += ra_valid.eq(1)
 784             comb += eaa_priv.eq(1)
 785
 786         # No IAMR, so no KUEP support for now
 787         comb += priv_fault.eq(eaa_priv & ~i_in.priv_mode)
 788         comb += access_ok.eq(ra_valid & ~priv_fault)
 789
 790 #     -- iTLB update
 791 #     itlb_update: process(clk)
 792 #         variable wr_index : tlb_index_t;
 793 #     begin
 794 #         if rising_edge(clk) then
 795 #             wr_index := hash_ea(m_in.addr);
 796 #             if rst = '1' or
 797 #              (m_in.tlbie = '1' and m_in.doall = '1') then
 798 #                 -- clear all valid bits
 799 #                 for i in tlb_index_t loop
 800 #                     itlb_valids(i) <= '0';
 801 #                 end loop;
 802 #             elsif m_in.tlbie = '1' then
 803 #                 -- clear entry regardless of hit or miss
 804 #                 itlb_valids(wr_index) <= '0';
 805 #             elsif m_in.tlbld = '1' then
 806 #                 itlb_tags(wr_index) <=
 807 #                  m_in.addr(63 downto TLB_LG_PGSZ + TLB_BITS);
 808 #                 itlb_ptes(wr_index) <= m_in.pte;
 809 #                 itlb_valids(wr_index) <= '1';
 810 #             end if;
 811 #         end if;
 812 #     end process;
 813     # iTLB update
 814     def itlb_update(self, m, itlb_valid_bits, itlb_tags, itlb_ptes):
 815         comb = m.d.comb
 816         sync = m.d.sync
 817
 818         m_in = self.m_in
 819
 820         wr_index = Signal(TLB_SIZE)
 821         sync += wr_index.eq(hash_ea(m_in.addr))
 822
 823         with m.If(m_in.tlbie & m_in.doall):
 824             # Clear all valid bits
 825             for i in range(TLB_SIZE):
 826                 sync += itlb_valid_bits[i].eq(0)
 827
 828         with m.Elif(m_in.tlbie):
 829             # Clear entry regardless of hit or miss
 830             sync += itlb_valid_bits[wr_index].eq(0)
 831
 832         with m.Elif(m_in.tlbld):
 833             sync += itlb_tags[wr_index].eq(
 834                      m_in.addr[TLB_LG_PGSZ + TLB_BITS:64]
 835                     )
 836             sync += itlb_ptes[wr_index].eq(m_in.pte)
 837             sync += itlb_valid_bits[wr_index].eq(1)
 838
 839 #     -- Cache hit detection, output to fetch2 and other misc logic
 840 #     icache_comb : process(all)
 841     # Cache hit detection, output to fetch2 and other misc logic
 842     def icache_comb(self, m, use_previous, r, req_index, req_row,
 843                     req_tag, real_addr, req_laddr, cache_valid_bits,
 844                     cache_tags, access_ok, req_is_hit,
 845                     req_is_miss, replace_way, plru_victim, cache_out):
 846 #       variable is_hit  : std_ulogic;
 847 #       variable hit_way : way_t;
 848         comb = m.d.comb
 849
 850         #comb += Display("ENTER icache_comb - use_previous:%x req_index:%x " \
 851         #                "req_row:%x req_tag:%x real_addr:%x req_laddr:%x " \
 852         #                "access_ok:%x req_is_hit:%x req_is_miss:%x " \
 853         #                "replace_way:%x", use_previous, req_index, req_row, \
 854         #                req_tag, real_addr, req_laddr, access_ok, \
 855         #                req_is_hit, req_is_miss, replace_way)
 856
 857         i_in, i_out, wb_out = self.i_in, self.i_out, self.wb_out
 858         flush_in, stall_out = self.flush_in, self.stall_out
 859
 860         is_hit  = Signal()
 861         hit_way = Signal(NUM_WAYS)
 862 #     begin
 863 #         -- i_in.sequential means that i_in.nia this cycle
 864 #         -- is 4 more than last cycle.  If we read more
 865 #         -- than 32 bits at a time, had a cache hit last
 866 #         -- cycle, and we don't want the first 32-bit chunk
 867 #         -- then we can keep the data we read last cycle
 868 #         -- and just use that.
 869 #         if unsigned(i_in.nia(INSN_BITS+2-1 downto 2)) /= 0 then
 870 #             use_previous <= i_in.sequential and r.hit_valid;
 871 #         else
 872 #             use_previous <= '0';
 873 #         end if;
 874         # i_in.sequential means that i_in.nia this cycle is 4 more than
 875         # last cycle.  If we read more than 32 bits at a time, had a
 876         # cache hit last cycle, and we don't want the first 32-bit chunk
 877         # then we can keep the data we read last cycle and just use that.
 878         with m.If(i_in.nia[2:INSN_BITS+2] != 0):
 879             comb += use_previous.eq(i_in.sequential & r.hit_valid)
 880
 881 #       -- Extract line, row and tag from request
 882 #         req_index <= get_index(i_in.nia);
 883 #         req_row <= get_row(i_in.nia);
 884 #         req_tag <= get_tag(real_addr);
 885         # Extract line, row and tag from request
 886         comb += req_index.eq(get_index(i_in.nia))
 887         comb += req_row.eq(get_row(i_in.nia))
 888         comb += req_tag.eq(get_tag(real_addr))
 889
 890 #       -- Calculate address of beginning of cache row, will be
 891 #       -- used for cache miss processing if needed
 892 #       req_laddr <=
 893 #        (63 downto REAL_ADDR_BITS => '0') &
 894 #        real_addr(REAL_ADDR_BITS - 1 downto ROW_OFF_BITS) &
 895 #        (ROW_OFF_BITS-1 downto 0 => '0');
 896         # Calculate address of beginning of cache row, will be
 897         # used for cache miss processing if needed
 898         comb += req_laddr.eq(Cat(
 899                  Const(0b0, ROW_OFF_BITS),
 900                  real_addr[ROW_OFF_BITS:REAL_ADDR_BITS],
 901                  Const(0b0, 8)
 902                 ))
 903
 904 #       -- Test if pending request is a hit on any way
 905 #       hit_way := 0;
 906 #       is_hit := '0';
 907 #       for i in way_t loop
 908 #           if i_in.req = '1' and
 909 #                 (cache_valids(req_index)(i) = '1' or
 910 #                  (r.state = WAIT_ACK and
 911 #                   req_index = r.store_index and
 912 #                   i = r.store_way and
 913 #                   r.rows_valid(req_row mod ROW_PER_LINE) = '1')) then
 914 #               if read_tag(i, cache_tags(req_index)) = req_tag then
 915 #                   hit_way := i;
 916 #                   is_hit := '1';
 917 #               end if;
 918 #           end if;
 919 #       end loop;
 920         # Test if pending request is a hit on any way
 921         for i in range(NUM_WAYS):
 922             with m.If(i_in.req &
 923                       (cache_valid_bits[req_index][i] |
 924                        ((r.state == State.WAIT_ACK)
 925                         & (req_index == r.store_index)
 926                         & (i == r.store_way)
 927                         & r.rows_valid[req_row % ROW_PER_LINE]))):
 928                 with m.If(read_tag(i, cache_tags[req_index]) == req_tag):
 929                     comb += hit_way.eq(i)
 930                     comb += is_hit.eq(1)
 931
 932 #       -- Generate the "hit" and "miss" signals
 933 #       -- for the synchronous blocks
 934 #       if i_in.req = '1' and access_ok = '1' and flush_in = '0'
 935 #        and rst = '0' then
 936 #           req_is_hit  <= is_hit;
 937 #           req_is_miss <= not is_hit;
 938 #       else
 939 #           req_is_hit  <= '0';
 940 #           req_is_miss <= '0';
 941 #       end if;
 942 #       req_hit_way <= hit_way;
 943         # Generate the "hit" and "miss" signals
 944         # for the synchronous blocks
 945         with m.If(i_in.req & access_ok & ~flush_in):
 946             comb += req_is_hit.eq(is_hit)
 947             comb += req_is_miss.eq(~is_hit)
 948
 949         with m.Else():
 950             comb += req_is_hit.eq(0)
 951             comb += req_is_miss.eq(0)
 952
 953 #       -- The way to replace on a miss
 954 #       if r.state = CLR_TAG then
 955 #           replace_way <=
 956 #            to_integer(unsigned(plru_victim(r.store_index)));
 957 #       else
 958 #           replace_way <= r.store_way;
 959 #       end if;
 960         # The way to replace on a miss
 961         with m.If(r.state == State.CLR_TAG):
 962             comb += replace_way.eq(plru_victim[r.store_index])
 963
 964         with m.Else():
 965             comb += replace_way.eq(r.store_way)
 966
 967 #       -- Output instruction from current cache row
 968 #       --
 969 #       -- Note: This is a mild violation of our design principle of
 970 #       -- having pipeline stages output from a clean latch. In this
 971 #       -- case we output the result of a mux. The alternative would
 972 #       -- be output an entire row which I prefer not to do just yet
 973 #       -- as it would force fetch2 to know about some of the cache
 974 #       -- geometry information.
 975 #       i_out.insn <= read_insn_word(r.hit_nia, cache_out(r.hit_way));
 976 #       i_out.valid <= r.hit_valid;
 977 #       i_out.nia <= r.hit_nia;
 978 #       i_out.stop_mark <= r.hit_smark;
 979 #       i_out.fetch_failed <= r.fetch_failed;
 980         # Output instruction from current cache row
 981         #
 982         # Note: This is a mild violation of our design principle of
 983         # having pipeline stages output from a clean latch. In this
 984         # case we output the result of a mux. The alternative would
 985         # be output an entire row which I prefer not to do just yet
 986         # as it would force fetch2 to know about some of the cache
 987         # geometry information.
 988         #comb += Display("BEFORE read_insn_word - r.hit_nia:%x " \
 989         #                "r.hit_way:%x, cache_out[r.hit_way]:%x", r.hit_nia, \
 990         #                r.hit_way, cache_out[r.hit_way])
 991         comb += i_out.insn.eq(read_insn_word(r.hit_nia, cache_out[r.hit_way]))
 992         comb += i_out.valid.eq(r.hit_valid)
 993         comb += i_out.nia.eq(r.hit_nia)
 994         comb += i_out.stop_mark.eq(r.hit_smark)
 995         comb += i_out.fetch_failed.eq(r.fetch_failed)
 996
 997 #       -- Stall fetch1 if we have a miss on cache or TLB
 998 #       -- or a protection fault
 999 #       stall_out <= not (is_hit and access_ok);
1000         # Stall fetch1 if we have a miss on cache or TLB
1001         # or a protection fault
1002         comb += stall_out.eq(~(is_hit & access_ok))
1003
1004 #       -- Wishbone requests output (from the cache miss reload machine)
1005 #       wishbone_out <= r.wb;
1006         # Wishbone requests output (from the cache miss reload machine)
1007         comb += wb_out.eq(r.wb)
1008 #     end process;
1009
1010 #     -- Cache hit synchronous machine
1011 #     icache_hit : process(clk)
1012     # Cache hit synchronous machine
1013     def icache_hit(self, m, use_previous, r, req_is_hit, req_hit_way,
1014                    req_index, req_tag, real_addr):
1015         sync = m.d.sync
1016
1017         i_in, stall_in = self.i_in, self.stall_in
1018         flush_in       = self.flush_in
1019
1020 #     begin
1021 #         if rising_edge(clk) then
1022 #             -- keep outputs to fetch2 unchanged on a stall
1023 #             -- except that flush or reset sets valid to 0
1024 #             -- If use_previous, keep the same data as last
1025 #             -- cycle and use the second half
1026 #             if stall_in = '1' or use_previous = '1' then
1027 #                 if rst = '1' or flush_in = '1' then
1028 #                     r.hit_valid <= '0';
1029 #             end if;
1030         # keep outputs to fetch2 unchanged on a stall
1031         # except that flush or reset sets valid to 0
1032         # If use_previous, keep the same data as last
1033         # cycle and use the second half
1034         with m.If(stall_in | use_previous):
1035             with m.If(flush_in):
1036                 sync += r.hit_valid.eq(0)
1037 #             else
1038 #                 -- On a hit, latch the request for the next cycle,
1039 #                 -- when the BRAM data will be available on the
1040 #                 -- cache_out output of the corresponding way
1041 #                 r.hit_valid <= req_is_hit;
1042 #                 if req_is_hit = '1' then
1043 #                     r.hit_way <= req_hit_way;
1044         with m.Else():
1045             # On a hit, latch the request for the next cycle,
1046             # when the BRAM data will be available on the
1047             # cache_out output of the corresponding way
1048             sync += r.hit_valid.eq(req_is_hit)
1049
1050             with m.If(req_is_hit):
1051                 sync += r.hit_way.eq(req_hit_way)
1052
1053 #                     report "cache hit nia:" & to_hstring(i_in.nia) &
1054 #                         " IR:" & std_ulogic'image(i_in.virt_mode) &
1055 #                         " SM:" & std_ulogic'image(i_in.stop_mark) &
1056 #                         " idx:" & integer'image(req_index) &
1057 #                         " tag:" & to_hstring(req_tag) &
1058 #                         " way:" & integer'image(req_hit_way) &
1059 #                         " RA:" & to_hstring(real_addr);
1060                 sync += Display("cache hit nia:%x, IR:%x, SM:%x, idx:%x, " \
1061                                 "tag:%x, way:%x, RA:%x", i_in.nia, \
1062                                 i_in.virt_mode, i_in.stop_mark, req_index, \
1063                                 req_tag, req_hit_way, real_addr)
1064
1065
1066
1067 #                 end if;
1068 #           end if;
1069 #             if stall_in = '0' then
1070 #                 -- Send stop marks and NIA down regardless of validity
1071 #                 r.hit_smark <= i_in.stop_mark;
1072 #                 r.hit_nia <= i_in.nia;
1073 #             end if;
1074         with m.If(~stall_in):
1075             # Send stop marks and NIA down regardless of validity
1076             sync += r.hit_smark.eq(i_in.stop_mark)
1077             sync += r.hit_nia.eq(i_in.nia)
1078 #       end if;
1079 #     end process;
1080
1081 #     -- Cache miss/reload synchronous machine
1082 #     icache_miss : process(clk)
1083     # Cache miss/reload synchronous machine
1084     def icache_miss(self, m, cache_valid_bits, r, req_is_miss,
1085                     req_index, req_laddr, req_tag, replace_way,
1086                     cache_tags, access_ok, real_addr):
1087         comb = m.d.comb
1088         sync = m.d.sync
1089
1090         i_in, wb_in, m_in  = self.i_in, self.wb_in, self.m_in
1091         stall_in, flush_in = self.stall_in, self.flush_in
1092         inval_in           = self.inval_in
1093
1094 #       variable tagset    : cache_tags_set_t;
1095 #       variable stbs_done : boolean;
1096
1097         tagset    = Signal(TAG_RAM_WIDTH)
1098         stbs_done = Signal()
1099
1100 #     begin
1101 #         if rising_edge(clk) then
1102 #           -- On reset, clear all valid bits to force misses
1103 #             if rst = '1' then
1104         # On reset, clear all valid bits to force misses
1105 #               for i in index_t loop
1106 #                   cache_valids(i) <= (others => '0');
1107 #               end loop;
1108 #                 r.state <= IDLE;
1109 #                 r.wb.cyc <= '0';
1110 #                 r.wb.stb <= '0';
1111 #               -- We only ever do reads on wishbone
1112 #               r.wb.dat <= (others => '0');
1113 #               r.wb.sel <= "11111111";
1114 #               r.wb.we  <= '0';
1115
1116 #               -- Not useful normally but helps avoiding
1117 #               -- tons of sim warnings
1118 #               r.wb.adr <= (others => '0');
1119
1120 #             else
1121
1122 #                 -- Process cache invalidations
1123 #                 if inval_in = '1' then
1124 #                     for i in index_t loop
1125 #                         cache_valids(i) <= (others => '0');
1126 #                     end loop;
1127 #                     r.store_valid <= '0';
1128 #                 end if;
1129         # Process cache invalidations
1130         with m.If(inval_in):
1131             for i in range(NUM_LINES):
1132                 sync += cache_valid_bits[i].eq(0)
1133             sync += r.store_valid.eq(0)
1134
1135 #               -- Main state machine
1136 #               case r.state is
1137         # Main state machine
1138         with m.Switch(r.state):
1139
1140 #           when IDLE =>
1141             with m.Case(State.IDLE):
1142 #                 -- Reset per-row valid flags,
1143 #                 -- only used in WAIT_ACK
1144 #                 for i in 0 to ROW_PER_LINE - 1 loop
1145 #                     r.rows_valid(i) <= '0';
1146 #                 end loop;
1147                 # Reset per-row valid flags,
1148                 # only used in WAIT_ACK
1149                 for i in range(ROW_PER_LINE):
1150                     sync += r.rows_valid[i].eq(0)
1151
1152 #               -- We need to read a cache line
1153 #               if req_is_miss = '1' then
1154 #               report "cache miss nia:" & to_hstring(i_in.nia) &
1155 #                         " IR:" & std_ulogic'image(i_in.virt_mode) &
1156 #                   " SM:" & std_ulogic'image(i_in.stop_mark) &
1157 #                   " idx:" & integer'image(req_index) &
1158 #                   " way:" & integer'image(replace_way) &
1159 #                   " tag:" & to_hstring(req_tag) &
1160 #                         " RA:" & to_hstring(real_addr);
1161                 # We need to read a cache line
1162                 with m.If(req_is_miss):
1163                     sync += Display(
1164                              "cache miss nia:%x IR:%x SM:%x idx:%x way:%x " \
1165                              "tag:%x RA:%x", i_in.nia, i_in.virt_mode, \
1166                              i_in.stop_mark, req_index, replace_way, \
1167                              req_tag, real_addr)
1168
1169 #               -- Keep track of our index and way for
1170 #                   -- subsequent stores
1171 #               r.store_index <= req_index;
1172 #               r.store_row <= get_row(req_laddr);
1173 #                   r.store_tag <= req_tag;
1174 #                   r.store_valid <= '1';
1175 #                   r.end_row_ix <=
1176 #                    get_row_of_line(get_row(req_laddr)) - 1;
1177                     # Keep track of our index and way
1178                     # for subsequent stores
1179                     sync += r.store_index.eq(req_index)
1180                     sync += r.store_row.eq(get_row(req_laddr))
1181                     sync += r.store_tag.eq(req_tag)
1182                     sync += r.store_valid.eq(1)
1183                     sync += r.end_row_ix.eq(
1184                              get_row_of_line(
1185                               get_row(req_laddr)
1186                              ) - 1
1187                             )
1188
1189 #               -- Prep for first wishbone read. We calculate the
1190 #                   -- address of the start of the cache line and
1191 #                   -- start the WB cycle.
1192 #               r.wb.adr <= req_laddr(r.wb.adr'left downto 0);
1193 #               r.wb.cyc <= '1';
1194 #               r.wb.stb <= '1';
1195                     # Prep for first wishbone read.
1196                     # We calculate the
1197                     # address of the start of the cache line and
1198                     # start the WB cycle.
1199                     sync += r.wb.adr.eq(req_laddr)
1200                     sync += r.wb.cyc.eq(1)
1201                     sync += r.wb.stb.eq(1)
1202
1203 #               -- Track that we had one request sent
1204 #               r.state <= CLR_TAG;
1205                     # Track that we had one request sent
1206                     sync += r.state.eq(State.CLR_TAG)
1207 #               end if;
1208
1209 #           when CLR_TAG | WAIT_ACK =>
1210             with m.Case(State.CLR_TAG, State.WAIT_ACK):
1211 #                 if r.state = CLR_TAG then
1212                 with m.If(r.state == State.CLR_TAG):
1213 #                     -- Get victim way from plru
1214 #               r.store_way <= replace_way;
1215                     # Get victim way from plru
1216                     sync += r.store_way.eq(replace_way)
1217 #
1218 #               -- Force misses on that way while
1219 #                   -- reloading that line
1220 #               cache_valids(req_index)(replace_way) <= '0';
1221                     # Force misses on that way while
1222                     # realoading that line
1223                     cv = Signal(INDEX_BITS)
1224                     comb += cv.eq(cache_valid_bits[req_index])
1225                     comb += cv.bit_select(replace_way, 1).eq(0)
1226                     sync += cache_valid_bits[req_index].eq(cv)
1227
1228 #               -- Store new tag in selected way
1229 #               for i in 0 to NUM_WAYS-1 loop
1230 #                   if i = replace_way then
1231 #                       tagset := cache_tags(r.store_index);
1232 #                       write_tag(i, tagset, r.store_tag);
1233 #                       cache_tags(r.store_index) <= tagset;
1234 #                   end if;
1235 #               end loop;
1236                     for i in range(NUM_WAYS):
1237                         with m.If(i == replace_way):
1238                             comb += tagset.eq(cache_tags[r.store_index])
1239                             comb += write_tag(i, tagset, r.store_tag)
1240                             sync += cache_tags[r.store_index].eq(tagset)
1241
1242 #                     r.state <= WAIT_ACK;
1243                     sync += r.state.eq(State.WAIT_ACK)
1244 #                 end if;
1245
1246 #               -- Requests are all sent if stb is 0
1247 #               stbs_done := r.wb.stb = '0';
1248                 # Requests are all sent if stb is 0
1249                 stbs_zero = Signal()
1250                 comb += stbs_zero.eq(r.wb.stb == 0)
1251                 comb += stbs_done.eq(stbs_zero)
1252
1253 #               -- If we are still sending requests,
1254 #               -- was one accepted ?
1255 #               if wishbone_in.stall = '0' and not stbs_done then
1256                 # If we are still sending requests,
1257                 # was one accepted?
1258                 with m.If(~wb_in.stall & stbs_zero):
1259 #               -- That was the last word ? We are done sending.
1260 #                   -- Clear stb and set stbs_done so we can handle
1261 #                   -- an eventual last ack on the same cycle.
1262 #               if is_last_row_addr(r.wb.adr, r.end_row_ix) then
1263 #                   r.wb.stb <= '0';
1264 #                   stbs_done := true;
1265 #               end if;
1266                     # That was the last word ?
1267                     # We are done sending.
1268                     # Clear stb and set stbs_done
1269                     # so we can handle
1270                     # an eventual last ack on
1271                     # the same cycle.
1272                     with m.If(is_last_row_addr(r.wb.adr, r.end_row_ix)):
1273                         sync += r.wb.stb.eq(0)
1274                         comb += stbs_done.eq(1)
1275
1276 #               -- Calculate the next row address
1277 #               r.wb.adr <= next_row_addr(r.wb.adr);
1278                     # Calculate the next row address
1279                     rarange = Signal(64)
1280                     comb += rarange.eq(
1281                              r.wb.adr[ROW_OFF_BITS:LINE_OFF_BITS] + 1
1282                             )
1283                     sync += r.wb.adr.eq(rarange)
1284                     sync += Display("r.wb.adr:%x", rarange)
1285 #               end if;
1286
1287 #               -- Incoming acks processing
1288 #               if wishbone_in.ack = '1' then
1289                 # Incoming acks processing
1290                 with m.If(wb_in.ack):
1291 #                     r.rows_valid(r.store_row mod ROW_PER_LINE)
1292 #                      <= '1';
1293                     sync += r.rows_valid[r.store_row % ROW_PER_LINE].eq(1)
1294
1295 #               -- Check for completion
1296 #               if stbs_done and
1297 #                    is_last_row(r.store_row, r.end_row_ix) then
1298                     # Check for completion
1299                     with m.If(stbs_done &
1300                               is_last_row(r.store_row, r.end_row_ix)):
1301 #                   -- Complete wishbone cycle
1302 #                   r.wb.cyc <= '0';
1303                         # Complete wishbone cycle
1304                         sync += r.wb.cyc.eq(0)
1305
1306 #                   -- Cache line is now valid
1307 #                   cache_valids(r.store_index)(replace_way) <=
1308 #                        r.store_valid and not inval_in;
1309                         # Cache line is now valid
1310                         cv = Signal(INDEX_BITS)
1311                         comb += cv.eq(cache_valid_bits[r.store_index])
1312                         comb += cv.bit_select(replace_way, 1).eq(
1313                                  r.store_valid & ~inval_in
1314                                 )
1315                         sync += cache_valid_bits[r.store_index].eq(cv)
1316
1317 #                   -- We are done
1318 #                   r.state <= IDLE;
1319                         # We are done
1320                         sync += r.state.eq(State.IDLE)
1321 #               end if;
1322
1323 #               -- Increment store row counter
1324 #               r.store_row <= next_row(r.store_row);
1325                     # Increment store row counter
1326                     sync += r.store_row.eq(next_row(r.store_row))
1327 #               end if;
1328 #           end case;
1329 #       end if;
1330 #
1331 #             -- TLB miss and protection fault processing
1332 #             if rst = '1' or flush_in = '1' or m_in.tlbld = '1' then
1333 #                 r.fetch_failed <= '0';
1334 #             elsif i_in.req = '1' and access_ok = '0' and
1335 #              stall_in = '0' then
1336 #                 r.fetch_failed <= '1';
1337 #             end if;
1338         # TLB miss and protection fault processing
1339         with m.If(flush_in | m_in.tlbld):
1340             sync += r.fetch_failed.eq(0)
1341
1342         with m.Elif(i_in.req & ~access_ok & ~stall_in):
1343             sync += r.fetch_failed.eq(1)
1344 #       end if;
1345 #     end process;
1346
1347 #     icache_log: if LOG_LENGTH > 0 generate
1348     def icache_log(self, m, req_hit_way, ra_valid, access_ok,
1349                    req_is_miss, req_is_hit, lway, wstate, r):
1350         comb = m.d.comb
1351         sync = m.d.sync
1352
1353         wb_in, i_out       = self.wb_in, self.i_out
1354         log_out, stall_out = self.log_out, self.stall_out
1355
1356 #         -- Output data to logger
1357 #         signal log_data    : std_ulogic_vector(53 downto 0);
1358 #     begin
1359 #         data_log: process(clk)
1360 #             variable lway: way_t;
1361 #             variable wstate: std_ulogic;
1362         # Output data to logger
1363         for i in range(LOG_LENGTH):
1364             # Output data to logger
1365             log_data = Signal(54)
1366             lway     = Signal(NUM_WAYS)
1367             wstate   = Signal()
1368
1369 #         begin
1370 #             if rising_edge(clk) then
1371 #                 lway := req_hit_way;
1372 #                 wstate := '0';
1373             sync += lway.eq(req_hit_way)
1374             sync += wstate.eq(0)
1375
1376 #                 if r.state /= IDLE then
1377 #                     wstate := '1';
1378 #                 end if;
1379             with m.If(r.state != State.IDLE):
1380                 sync += wstate.eq(1)
1381
1382 #                 log_data <= i_out.valid &
1383 #                             i_out.insn &
1384 #                             wishbone_in.ack &
1385 #                             r.wb.adr(5 downto 3) &
1386 #                             r.wb.stb & r.wb.cyc &
1387 #                             wishbone_in.stall &
1388 #                             stall_out &
1389 #                             r.fetch_failed &
1390 #                             r.hit_nia(5 downto 2) &
1391 #                             wstate &
1392 #                             std_ulogic_vector(to_unsigned(lway, 3)) &
1393 #                             req_is_hit & req_is_miss &
1394 #                             access_ok &
1395 #                             ra_valid;
1396             sync += log_data.eq(Cat(
1397                      ra_valid, access_ok, req_is_miss, req_is_hit,
1398                      lway, wstate, r.hit_nia[2:6],
1399                      r.fetch_failed, stall_out, wb_in.stall, r.wb.cyc,
1400                      r.wb.stb, r.wb.adr[3:6], wb_in.ack, i_out.insn,
1401                      i_out.valid
1402                     ))
1403 #             end if;
1404 #         end process;
1405 #         log_out <= log_data;
1406             comb += log_out.eq(log_data)
1407 #     end generate;
1408 # end;
1409
1410     def elaborate(self, platform):
1411
1412         m                = Module()
1413         comb             = m.d.comb
1414
1415         # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
1416         cache_tags       = CacheTagArray()
1417         cache_valid_bits = CacheValidBitsArray()
1418
1419 #     signal itlb_valids : tlb_valids_t;
1420 #     signal itlb_tags : tlb_tags_t;
1421 #     signal itlb_ptes : tlb_ptes_t;
1422 #     attribute ram_style of itlb_tags : signal is "distributed";
1423 #     attribute ram_style of itlb_ptes : signal is "distributed";
1424         itlb_valid_bits  = TLBValidBitsArray()
1425         itlb_tags        = TLBTagArray()
1426         itlb_ptes        = TLBPtesArray()
1427         # TODO to be passed to nmigen as ram attributes
1428         # attribute ram_style of itlb_tags : signal is "distributed";
1429         # attribute ram_style of itlb_ptes : signal is "distributed";
1430
1431 #     -- Privilege bit from PTE EAA field
1432 #     signal eaa_priv  : std_ulogic;
1433         # Privilege bit from PTE EAA field
1434         eaa_priv         = Signal()
1435
1436 #     signal r : reg_internal_t;
1437         r                = RegInternal()
1438
1439 #     -- Async signals on incoming request
1440 #     signal req_index   : index_t;
1441 #     signal req_row     : row_t;
1442 #     signal req_hit_way : way_t;
1443 #     signal req_tag     : cache_tag_t;
1444 #     signal req_is_hit  : std_ulogic;
1445 #     signal req_is_miss : std_ulogic;
1446 #     signal req_laddr   : std_ulogic_vector(63 downto 0);
1447         # Async signal on incoming request
1448         req_index        = Signal(NUM_LINES)
1449         req_row          = Signal(BRAM_ROWS)
1450         req_hit_way      = Signal(NUM_WAYS)
1451         req_tag          = Signal(TAG_BITS)
1452         req_is_hit       = Signal()
1453         req_is_miss      = Signal()
1454         req_laddr        = Signal(64)
1455
1456 #     signal tlb_req_index : tlb_index_t;
1457 #     signal real_addr     : std_ulogic_vector(
1458 #                             REAL_ADDR_BITS - 1 downto 0
1459 #                            );
1460 #     signal ra_valid      : std_ulogic;
1461 #     signal priv_fault    : std_ulogic;
1462 #     signal access_ok     : std_ulogic;
1463 #     signal use_previous  : std_ulogic;
1464         tlb_req_index    = Signal(TLB_SIZE)
1465         real_addr        = Signal(REAL_ADDR_BITS)
1466         ra_valid         = Signal()
1467         priv_fault       = Signal()
1468         access_ok        = Signal()
1469         use_previous     = Signal()
1470
1471 #     signal cache_out   : cache_ram_out_t;
1472         cache_out        = CacheRamOut()
1473
1474 #     signal plru_victim : plru_out_t;
1475 #     signal replace_way : way_t;
1476         plru_victim      = PLRUOut()
1477         replace_way      = Signal(NUM_WAYS)
1478
1479         # call sub-functions putting everything together, using shared
1480         # signals established above
1481         self.rams(m, r, cache_out, use_previous, replace_way, req_row)
1482         self.maybe_plrus(m, r, plru_victim)
1483         self.itlb_lookup(m, tlb_req_index, itlb_ptes, itlb_tags,
1484                          real_addr, itlb_valid_bits, ra_valid, eaa_priv,
1485                          priv_fault, access_ok)
1486         self.itlb_update(m, itlb_valid_bits, itlb_tags, itlb_ptes)
1487         self.icache_comb(m, use_previous, r, req_index, req_row,
1488                          req_tag, real_addr, req_laddr, cache_valid_bits,
1489                          cache_tags, access_ok, req_is_hit, req_is_miss,
1490                          replace_way, plru_victim, cache_out)
1491         self.icache_hit(m, use_previous, r, req_is_hit, req_hit_way,
1492                         req_index, req_tag, real_addr)
1493         self.icache_miss(m, cache_valid_bits, r, req_is_miss, req_index,
1494                          req_laddr, req_tag, replace_way, cache_tags,
1495                          access_ok, real_addr)
1496         #self.icache_log(m, log_out, req_hit_way, ra_valid, access_ok,
1497         #                req_is_miss, req_is_hit, lway, wstate, r)
1498
1499         return m
1500
1501
1502 # icache_tb.vhdl
1503 #
1504 # library ieee;
1505 # use ieee.std_logic_1164.all;
1506 #
1507 # library work;
1508 # use work.common.all;
1509 # use work.wishbone_types.all;
1510 #
1511 # entity icache_tb is
1512 # end icache_tb;
1513 #
1514 # architecture behave of icache_tb is
1515 #     signal clk          : std_ulogic;
1516 #     signal rst          : std_ulogic;
1517 #
1518 #     signal i_out        : Fetch1ToIcacheType;
1519 #     signal i_in         : IcacheToDecode1Type;
1520 #
1521 #     signal m_out        : MmuToIcacheType;
1522 #
1523 #     signal wb_bram_in   : wishbone_master_out;
1524 #     signal wb_bram_out  : wishbone_slave_out;
1525 #
1526 #     constant clk_period : time := 10 ns;
1527 # begin
1528 #     icache0: entity work.icache
1529 #         generic map(
1530 #             LINE_SIZE => 64,
1531 #             NUM_LINES => 4
1532 #             )
1533 #         port map(
1534 #             clk => clk,
1535 #             rst => rst,
1536 #             i_in => i_out,
1537 #             i_out => i_in,
1538 #             m_in => m_out,
1539 #             stall_in => '0',
1540 #           flush_in => '0',
1541 #             inval_in => '0',
1542 #             wishbone_out => wb_bram_in,
1543 #             wishbone_in => wb_bram_out
1544 #             );
1545 #
1546 #     -- BRAM Memory slave
1547 #     bram0: entity work.wishbone_bram_wrapper
1548 #         generic map(
1549 #             MEMORY_SIZE   => 1024,
1550 #             RAM_INIT_FILE => "icache_test.bin"
1551 #             )
1552 #         port map(
1553 #             clk => clk,
1554 #             rst => rst,
1555 #             wishbone_in => wb_bram_in,
1556 #             wishbone_out => wb_bram_out
1557 #             );
1558 #
1559 #     clk_process: process
1560 #     begin
1561 #         clk <= '0';
1562 #         wait for clk_period/2;
1563 #         clk <= '1';
1564 #         wait for clk_period/2;
1565 #     end process;
1566 #
1567 #     rst_process: process
1568 #     begin
1569 #         rst <= '1';
1570 #         wait for 2*clk_period;
1571 #         rst <= '0';
1572 #         wait;
1573 #     end process;
1574 #
1575 #     stim: process
1576 #     begin
1577 #         i_out.req <= '0';
1578 #         i_out.nia <= (others => '0');
1579 #       i_out.stop_mark <= '0';
1580 #
1581 #         m_out.tlbld <= '0';
1582 #         m_out.tlbie <= '0';
1583 #         m_out.addr <= (others => '0');
1584 #         m_out.pte <= (others => '0');
1585 #
1586 #         wait until rising_edge(clk);
1587 #         wait until rising_edge(clk);
1588 #         wait until rising_edge(clk);
1589 #         wait until rising_edge(clk);
1590 #
1591 #         i_out.req <= '1';
1592 #         i_out.nia <= x"0000000000000004";
1593 #
1594 #         wait for 30*clk_period;
1595 #         wait until rising_edge(clk);
1596 #
1597 #         assert i_in.valid = '1' severity failure;
1598 #         assert i_in.insn = x"00000001"
1599 #           report "insn @" & to_hstring(i_out.nia) &
1600 #           "=" & to_hstring(i_in.insn) &
1601 #           " expected 00000001"
1602 #           severity failure;
1603 #
1604 #         i_out.req <= '0';
1605 #
1606 #         wait until rising_edge(clk);
1607 #
1608 #         -- hit
1609 #         i_out.req <= '1';
1610 #         i_out.nia <= x"0000000000000008";
1611 #         wait until rising_edge(clk);
1612 #         wait until rising_edge(clk);
1613 #         assert i_in.valid = '1' severity failure;
1614 #         assert i_in.insn = x"00000002"
1615 #           report "insn @" & to_hstring(i_out.nia) &
1616 #           "=" & to_hstring(i_in.insn) &
1617 #           " expected 00000002"
1618 #           severity failure;
1619 #         wait until rising_edge(clk);
1620 #
1621 #         -- another miss
1622 #         i_out.req <= '1';
1623 #         i_out.nia <= x"0000000000000040";
1624 #
1625 #         wait for 30*clk_period;
1626 #         wait until rising_edge(clk);
1627 #
1628 #         assert i_in.valid = '1' severity failure;
1629 #         assert i_in.insn = x"00000010"
1630 #           report "insn @" & to_hstring(i_out.nia) &
1631 #           "=" & to_hstring(i_in.insn) &
1632 #           " expected 00000010"
1633 #           severity failure;
1634 #
1635 #         -- test something that aliases
1636 #         i_out.req <= '1';
1637 #         i_out.nia <= x"0000000000000100";
1638 #         wait until rising_edge(clk);
1639 #         wait until rising_edge(clk);
1640 #         assert i_in.valid = '0' severity failure;
1641 #         wait until rising_edge(clk);
1642 #
1643 #         wait for 30*clk_period;
1644 #         wait until rising_edge(clk);
1645 #
1646 #         assert i_in.valid = '1' severity failure;
1647 #         assert i_in.insn = x"00000040"
1648 #           report "insn @" & to_hstring(i_out.nia) &
1649 #           "=" & to_hstring(i_in.insn) &
1650 #           " expected 00000040"
1651 #           severity failure;
1652 #
1653 #         i_out.req <= '0';
1654 #
1655 #         std.env.finish;
1656 #     end process;
1657 # end;
1658 def icache_sim(dut):
1659     i_out = dut.i_in
1660     i_in  = dut.i_out
1661     m_out = dut.m_in
1662
1663     yield i_in.valid.eq(0)
1664     yield i_out.priv_mode.eq(1)
1665     yield i_out.req.eq(0)
1666     yield i_out.nia.eq(0)
1667     yield i_out.stop_mark.eq(0)
1668     yield m_out.tlbld.eq(0)
1669     yield m_out.tlbie.eq(0)
1670     yield m_out.addr.eq(0)
1671     yield m_out.pte.eq(0)
1672     yield
1673     yield
1674     yield
1675     yield
1676     yield i_out.req.eq(1)
1677     yield i_out.nia.eq(Const(0x0000000000000004, 64))
1678     for i in range(30):
1679         yield
1680     yield
1681     valid = yield i_in.valid
1682     nia   = yield i_out.nia
1683     insn  = yield i_in.insn
1684     print(f"valid? {valid}")
1685     assert valid
1686     assert insn == 0x00000001, \
1687         "insn @%x=%x expected 00000001" % (nia, insn)
1688     yield i_out.req.eq(0)
1689     yield
1690
1691     # hit
1692     yield i_out.req.eq(1)
1693     yield i_out.nia.eq(Const(0x0000000000000008, 64))
1694     yield
1695     yield
1696     valid = yield i_in.valid
1697     nia   = yield i_in.nia
1698     insn  = yield i_in.insn
1699     assert valid
1700     assert insn == 0x00000002, \
1701         "insn @%x=%x expected 00000002" % (nia, insn)
1702     yield
1703
1704     # another miss
1705     yield i_out.req.eq(1)
1706     yield i_out.nia.eq(Const(0x0000000000000040, 64))
1707     for i in range(30):
1708         yield
1709     yield
1710     valid = yield i_in.valid
1711     nia   = yield i_out.nia
1712     insn  = yield i_in.insn
1713     assert valid
1714     assert insn == 0x00000010, \
1715         "insn @%x=%x expected 00000010" % (nia, insn)
1716
1717     # test something that aliases
1718     yield i_out.req.eq(1)
1719     yield i_out.nia.eq(Const(0x0000000000000100, 64))
1720     yield
1721     yield
1722     valid = yield i_in.valid
1723     assert ~valid
1724     for i in range(30):
1725         yield
1726     yield
1727     insn  = yield i_in.insn
1728     valid = yield i_in.valid
1729     insn  = yield i_in.insn
1730     assert valid
1731     assert insn == 0x00000040, \
1732          "insn @%x=%x expected 00000040" % (nia, insn)
1733     yield i_out.req.eq(0)
1734
1735
1736
1737 def test_icache(mem):
1738      dut    = ICache()
1739
1740      memory = Memory(width=64, depth=16*64, init=mem)
1741      sram   = SRAM(memory=memory, granularity=8)
1742
1743      m      = Module()
1744
1745      m.submodules.icache = dut
1746      m.submodules.sram   = sram
1747
1748      m.d.comb += sram.bus.cyc.eq(dut.wb_out.cyc)
1749      m.d.comb += sram.bus.stb.eq(dut.wb_out.stb)
1750      m.d.comb += sram.bus.we.eq(dut.wb_out.we)
1751      m.d.comb += sram.bus.sel.eq(dut.wb_out.sel)
1752      m.d.comb += sram.bus.adr.eq(dut.wb_out.adr[3:])
1753      m.d.comb += sram.bus.dat_w.eq(dut.wb_out.dat)
1754
1755      m.d.comb += dut.wb_in.ack.eq(sram.bus.ack)
1756      m.d.comb += dut.wb_in.dat.eq(sram.bus.dat_r)
1757
1758      # nmigen Simulation
1759      sim = Simulator(m)
1760      sim.add_clock(1e-6)
1761
1762      sim.add_sync_process(wrap(icache_sim(dut)))
1763      with sim.write_vcd('test_icache.vcd'):
1764          sim.run()
1765
1766 if __name__ == '__main__':
1767     dut = ICache()
1768     vl = rtlil.convert(dut, ports=[])
1769     with open("test_icache.il", "w") as f:
1770         f.write(vl)
1771
1772     mem = []
1773     for i in range(512):
1774         mem.append((i*2)| ((i*2+1)<<32))
1775
1776     test_icache(mem)
1777