src/soc/experiment/icache.py

   1 """ICache
   2
   3 based on Anton Blanchard microwatt icache.vhdl
   4
   5 Set associative icache
   6
   7 TODO (in no specific order):
   8 * Add debug interface to inspect cache content
   9 * Add snoop/invalidate path
  10 * Add multi-hit error detection
  11 * Pipelined bus interface (wb or axi)
  12 * Maybe add parity? There's a few bits free in each BRAM row on Xilinx
  13 * Add optimization: service hits on partially loaded lines
  14 * Add optimization: (maybe) interrupt reload on fluch/redirect
  15 * Check if playing with the geometry of the cache tags allow for more
  16   efficient use of distributed RAM and less logic/muxes. Currently we
  17   write TAG_BITS width which may not match full ram blocks and might
  18   cause muxes to be inferred for "partial writes".
  19 * Check if making the read size of PLRU a ROM helps utilization
  20
  21 """
  22 from enum import Enum, unique
  23 from nmigen import (Module, Signal, Elaboratable, Cat, Signal)
  24 from nmigen.cli import main
  25 from nmigen.cli import rtlil
  26 from nmutil.iocontrol import RecordObject
  27 from nmutil.byterev import byte_reverse
  28 from nmutil.mask import Mask
  29 from nmigen.util import log2_int
  30
  31
  32 from soc.experiment.mem_types import Fetch1ToICacheType,
  33                                      ICacheToDecode1Type,
  34                                      MMUToICacheType
  35
  36 from experiment.wb_types import WB_ADDR_BITS, WB_DATA_BITS, WB_SEL_BITS,
  37                                 WBAddrType, WBDataType, WBSelType,
  38                                 WbMasterOut, WBSlaveOut,
  39                                 WBMasterOutVector, WBSlaveOutVector,
  40                                 WBIOMasterOut, WBIOSlaveOut
  41
  42
  43 # Cache reload state machine
  44 @unique
  45 class State(Enum):
  46     IDLE
  47     CLR_TAG
  48     WAIT_ACK
  49
  50 #     type reg_internal_t is record
  51 #       -- Cache hit state (Latches for 1 cycle BRAM access)
  52 #       hit_way   : way_t;
  53 #       hit_nia   : std_ulogic_vector(63 downto 0);
  54 #       hit_smark : std_ulogic;
  55 #       hit_valid : std_ulogic;
  56 #
  57 #       -- Cache miss state (reload state machine)
  58 #         state            : state_t;
  59 #         wb               : wishbone_master_out;
  60 #       store_way        : way_t;
  61 #         store_index      : index_t;
  62 #       store_row        : row_t;
  63 #         store_tag        : cache_tag_t;
  64 #         store_valid      : std_ulogic;
  65 #         end_row_ix       : row_in_line_t;
  66 #         rows_valid       : row_per_line_valid_t;
  67 #
  68 #         -- TLB miss state
  69 #         fetch_failed     : std_ulogic;
  70 #     end record;
  71 class RegInternal(RecordObject):
  72     def __init__(self):
  73         super().__init__()
  74         # Cache hit state (Latches for 1 cycle BRAM access)
  75         self.hit_way      = Signal(NUM_WAYS)
  76         self.hit_nia      = Signal(64)
  77         self.hit_smark    = Signal()
  78         self.hit_valid    = Signal()
  79
  80         # Cache miss state (reload state machine)
  81         self.state        = State()
  82         self.wb           = WBMasterOut()
  83         self.store_way    = Signal(NUM_WAYS)
  84         self.store_index  = Signal(NUM_LINES)
  85         self.store_row    = Signal(BRAM_ROWS)
  86         self.store_tag    = Signal(TAG_BITS)
  87         self.store_valid  = Signal()
  88         self.end_row_ix   = Signal(ROW_LINE_BITS)
  89         self.rows_valid   = RowPerLineValidArray()
  90
  91         # TLB miss state
  92         self.fetch_failed = Signal()
  93
  94 # -- 64 bit direct mapped icache. All instructions are 4B aligned.
  95 #
  96 # entity icache is
  97 #     generic (
  98 #         SIM : boolean := false;
  99 #         -- Line size in bytes
 100 #         LINE_SIZE : positive := 64;
 101 #         -- BRAM organisation: We never access more than wishbone_data_bits
 102 #         -- at a time so to save resources we make the array only that wide,
 103 #         -- and use consecutive indices for to make a cache "line"
 104 #         --
 105 #         -- ROW_SIZE is the width in bytes of the BRAM (based on WB,
 106 #         -- so 64-bits)
 107 #         ROW_SIZE  : positive := wishbone_data_bits / 8;
 108 #         -- Number of lines in a set
 109 #         NUM_LINES : positive := 32;
 110 #         -- Number of ways
 111 #         NUM_WAYS  : positive := 4;
 112 #         -- L1 ITLB number of entries (direct mapped)
 113 #         TLB_SIZE : positive := 64;
 114 #         -- L1 ITLB log_2(page_size)
 115 #         TLB_LG_PGSZ : positive := 12;
 116 #         -- Number of real address bits that we store
 117 #         REAL_ADDR_BITS : positive := 56;
 118 #         -- Non-zero to enable log data collection
 119 #         LOG_LENGTH : natural := 0
 120 #         );
 121 #     port (
 122 #         clk          : in std_ulogic;
 123 #         rst          : in std_ulogic;
 124 #
 125 #         i_in         : in Fetch1ToIcacheType;
 126 #         i_out        : out IcacheToDecode1Type;
 127 #
 128 #         m_in         : in MmuToIcacheType;
 129 #
 130 #         stall_in     : in std_ulogic;
 131 #       stall_out    : out std_ulogic;
 132 #       flush_in     : in std_ulogic;
 133 #       inval_in     : in std_ulogic;
 134 #
 135 #         wishbone_out : out wishbone_master_out;
 136 #         wishbone_in  : in wishbone_slave_out;
 137 #
 138 #         log_out      : out std_ulogic_vector(53 downto 0)
 139 #         );
 140 # end entity icache;
 141 # 64 bit direct mapped icache. All instructions are 4B aligned.
 142 class ICache(Elaboratable):
 143     """64 bit direct mapped icache. All instructions are 4B aligned."""
 144     def __init__(self):
 145         self.SIM            = 0
 146         self.LINE_SIZE      = 64
 147         # BRAM organisation: We never access more than wishbone_data_bits
 148         # at a time so to save resources we make the array only that wide,
 149         # and use consecutive indices for to make a cache "line"
 150         #
 151         # ROW_SIZE is the width in bytes of the BRAM (based on WB, so 64-bits)
 152         self.ROW_SIZE       = WB_DATA_BITS / 8
 153         # Number of lines in a set
 154         self.NUM_LINES      = 32
 155         # Number of ways
 156         self.NUM_WAYS       = 4
 157         # L1 ITLB number of entries (direct mapped)
 158         self.TLB_SIZE       = 64
 159         # L1 ITLB log_2(page_size)
 160         self.TLB_LG_PGSZ    = 12
 161         # Number of real address bits that we store
 162         self.REAL_ADDR_BITS = 56
 163         # Non-zero to enable log data collection
 164         self.LOG_LENGTH     = 0
 165
 166         self.i_in           = Fetch1ToICacheType()
 167         self.i_out          = ICacheToDecode1Type()
 168
 169         self.m_in           = MMUToICacheType()
 170
 171         self.stall_in       = Signal()
 172         self.stall_out      = Signal()
 173         self.flush_in       = Signal()
 174         self.inval_in       = Signal()
 175
 176         self.wb_out         = WBMasterOut()
 177         self.wb_in          = WBSlaveOut()
 178
 179         self.log_out        = Signal(54)
 180
 181 #     -- Return the cache line index (tag index) for an address
 182 #     function get_index(addr: std_ulogic_vector(63 downto 0))
 183 #      return index_t is
 184 #     begin
 185 #         return to_integer(unsigned(
 186 #          addr(SET_SIZE_BITS - 1 downto LINE_OFF_BITS)
 187 #         ));
 188 #     end;
 189     # Return the cache line index (tag index) for an address
 190     def get_index(addr):
 191         return addr[LINE_OFF_BITS:SET_SIZE_BITS]
 192
 193 #     -- Return the cache row index (data memory) for an address
 194 #     function get_row(addr: std_ulogic_vector(63 downto 0)) return row_t is
 195 #     begin
 196 #         return to_integer(unsigned(
 197 #          addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS)
 198 #         ));
 199 #     end;
 200     # Return the cache row index (data memory) for an address
 201     def get_row(addr):
 202         return addr[ROW_OFF_BITS:SET_SIZE_BITS]
 203
 204 #     -- Return the index of a row within a line
 205 #     function get_row_of_line(row: row_t) return row_in_line_t is
 206 #       variable row_v : unsigned(ROW_BITS-1 downto 0);
 207 #     begin
 208 #       row_v := to_unsigned(row, ROW_BITS);
 209 #         return row_v(ROW_LINEBITS-1 downto 0);
 210 #     end;
 211     # Return the index of a row within a line
 212     def get_row_of_line(row):
 213         row[:ROW_LINE_BITS]
 214
 215 #     -- Returns whether this is the last row of a line
 216 #     function is_last_row_addr(addr: wishbone_addr_type; last: row_in_line_t)
 217 #      return boolean is
 218 #     begin
 219 #       return unsigned(addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS)) = last;
 220 #     end;
 221     # Returns whether this is the last row of a line
 222     def is_last_row_addr(addr, last):
 223         return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
 224
 225 #     -- Returns whether this is the last row of a line
 226 #     function is_last_row(row: row_t; last: row_in_line_t) return boolean is
 227 #     begin
 228 #       return get_row_of_line(row) = last;
 229 #     end;
 230     # Returns whether this is the last row of a line
 231     def is_last_row(row, last):
 232         return get_row_of_line(row) == last
 233
 234 #     -- Return the address of the next row in the current cache line
 235 #     function next_row_addr(addr: wishbone_addr_type)
 236 #       return std_ulogic_vector is
 237 #       variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
 238 #       variable result  : wishbone_addr_type;
 239 #     begin
 240 #       -- Is there no simpler way in VHDL to generate that 3 bits adder ?
 241 #       row_idx := addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS);
 242 #       row_idx := std_ulogic_vector(unsigned(row_idx) + 1);
 243 #       result := addr;
 244 #       result(LINE_OFF_BITS-1 downto ROW_OFF_BITS) := row_idx;
 245 #       return result;
 246 #     end;
 247     # Return the address of the next row in the current cache line
 248     def next_row_addr(addr):
 249         # TODO no idea what's going on here, looks like double assignments
 250         # overriding earlier assignments ??? Help please!
 251
 252 #     -- Return the next row in the current cache line. We use a dedicated
 253 #     -- function in order to limit the size of the generated adder to be
 254 #     -- only the bits within a cache line (3 bits with default settings)
 255 #     function next_row(row: row_t) return row_t is
 256 #       variable row_v   : std_ulogic_vector(ROW_BITS-1 downto 0);
 257 #       variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
 258 #       variable result  : std_ulogic_vector(ROW_BITS-1 downto 0);
 259 #     begin
 260 #       row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
 261 #       row_idx := row_v(ROW_LINEBITS-1 downto 0);
 262 #       row_v(ROW_LINEBITS-1 downto 0) :=
 263 #        std_ulogic_vector(unsigned(row_idx) + 1);
 264 #       return to_integer(unsigned(row_v));
 265 #     end;
 266     # Return the next row in the current cache line. We use a dedicated
 267     # function in order to limit the size of the generated adder to be
 268     # only the bits within a cache line (3 bits with default settings)
 269     def next_row(row):
 270         # TODO no idea what's going on here, looks like double assignments
 271         # overriding earlier assignments ??? Help please!
 272
 273 #     -- Read the instruction word for the given address in the
 274 #     -- current cache row
 275 #     function read_insn_word(addr: std_ulogic_vector(63 downto 0);
 276 #                           data: cache_row_t) return std_ulogic_vector is
 277 #       variable word: integer range 0 to INSN_PER_ROW-1;
 278 #     begin
 279 #         word := to_integer(unsigned(addr(INSN_BITS+2-1 downto 2)));
 280 #       return data(31+word*32 downto word*32);
 281 #     end;
 282     # Read the instruction word for the given address
 283     # in the current cache row
 284     def read_insn_word(addr, data):
 285         word = addr[2:INSN_BITS+3]
 286         return data[word * 32:32 + word * 32]
 287
 288 #     -- Get the tag value from the address
 289 #     function get_tag(addr: std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0))
 290 #      return cache_tag_t is
 291 #     begin
 292 #         return addr(REAL_ADDR_BITS - 1 downto SET_SIZE_BITS);
 293 #     end;
 294     # Get the tag value from the address
 295     def get_tag(addr):
 296         return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
 297
 298 #     -- Read a tag from a tag memory row
 299 #     function read_tag(way: way_t; tagset: cache_tags_set_t)
 300 #      return cache_tag_t is
 301 #     begin
 302 #       return tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS);
 303 #     end;
 304     # Read a tag from a tag memory row
 305     def read_tag(way, tagset):
 306         return tagset[way * TAG_BITS:(way + 1) * TAG_BITS]
 307
 308 #     -- Write a tag to tag memory row
 309 #     procedure write_tag(way: in way_t; tagset: inout cache_tags_set_t;
 310 #                       tag: cache_tag_t) is
 311 #     begin
 312 #       tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS) := tag;
 313 #     end;
 314     # Write a tag to tag memory row
 315     def write_tag(way, tagset, tag):
 316         tagset[way * TAG_BITS:(way + 1) * TAG_BITS] = tag
 317
 318 #     -- Simple hash for direct-mapped TLB index
 319 #     function hash_ea(addr: std_ulogic_vector(63 downto 0))
 320 #      return tlb_index_t is
 321 #         variable hash : std_ulogic_vector(TLB_BITS - 1 downto 0);
 322 #     begin
 323 #         hash := addr(TLB_LG_PGSZ + TLB_BITS - 1 downto TLB_LG_PGSZ)
 324 #                 xor addr(
 325 #                  TLB_LG_PGSZ + 2 * TLB_BITS - 1 downto
 326 #                  TLB_LG_PGSZ + TLB_BITS
 327 #                 )
 328 #                 xor addr(
 329 #                  TLB_LG_PGSZ + 3 * TLB_BITS - 1 downto
 330 #                  TLB_LG_PGSZ + 2 * TLB_BITS
 331 #                 );
 332 #         return to_integer(unsigned(hash));
 333 #     end;
 334     # Simple hash for direct-mapped TLB index
 335     def hash_ea(addr):
 336         hash = addr[TLB_LG_PGSZ:TLB_LG_PGSZ + TLB_BITS]
 337                ^ addr[TLB_LG_PGSZ + TLB_BITS:TLB_LG_PGSZ + 2 * TLB_BITS]
 338                ^ addr[TLB_LG_PGSZ + 2 * TLB_BITS: TLB_LG_PGSZE + 3 * TLB_BITS]
 339         return hash
 340
 341 #     -- Generate a cache RAM for each way
 342 #     rams: for i in 0 to NUM_WAYS-1 generate
 343 #       signal do_read  : std_ulogic;
 344 #       signal do_write : std_ulogic;
 345 #       signal rd_addr  : std_ulogic_vector(ROW_BITS-1 downto 0);
 346 #       signal wr_addr  : std_ulogic_vector(ROW_BITS-1 downto 0);
 347 #       signal dout     : cache_row_t;
 348 #       signal wr_sel   : std_ulogic_vector(ROW_SIZE-1 downto 0);
 349 #     begin
 350 #       way: entity work.cache_ram
 351 #           generic map (
 352 #               ROW_BITS => ROW_BITS,
 353 #               WIDTH => ROW_SIZE_BITS
 354 #               )
 355 #           port map (
 356 #               clk     => clk,
 357 #               rd_en   => do_read,
 358 #               rd_addr => rd_addr,
 359 #               rd_data => dout,
 360 #               wr_sel  => wr_sel,
 361 #               wr_addr => wr_addr,
 362 #               wr_data => wishbone_in.dat
 363 #               );
 364 #       process(all)
 365 #       begin
 366 #           do_read <= not (stall_in or use_previous);
 367 #           do_write <= '0';
 368 #           if wishbone_in.ack = '1' and replace_way = i then
 369 #               do_write <= '1';
 370 #           end if;
 371 #           cache_out(i) <= dout;
 372 #           rd_addr <= std_ulogic_vector(to_unsigned(req_row, ROW_BITS));
 373 #           wr_addr <= std_ulogic_vector(to_unsigned(r.store_row, ROW_BITS));
 374 #             for i in 0 to ROW_SIZE-1 loop
 375 #                 wr_sel(i) <= do_write;
 376 #             end loop;
 377 #       end process;
 378 #     end generate;
 379     def rams(self, m):
 380         comb = m.d.comb
 381
 382         do_read  = Signal()
 383         do_write = Signal()
 384         rd_addr  = Signal(ROW_BITS)
 385         wr_addr  = Signal(ROW_BITS)
 386         _d_out   = Signal(ROW_SIZE_BITS)
 387         wr_sel   = Signal(ROW_SIZE)
 388
 389         for i in range(NUM_WAYS)
 390             way = CacheRam(ROW_BITS, ROW_SIZE_BITS)
 391             comb += way.rd_en.eq(do_read)
 392             comb += way.rd_addr.eq(rd_addr)
 393             comb += way.rd_data.eq(_d_out)
 394             comb += way.wr_sel.eq(wr_sel)
 395             comb += way.wr_add.eq(wr_addr)
 396             comb += way.wr_data.eq('''TODO ?? wishbone_in.data ??''')
 397
 398             comb += do_read.eq(~(stall_in | use_previous))
 399             comb += do_write.eq(0)
 400
 401             with m.If(wb_in.ack & replace_way == i):
 402                 do_write.eq(1)
 403
 404             comb += cache_out[i].eq(_d_out)
 405             comb += rd_addr.eq(Signal(req_row))
 406             comb += wr_addr.eq(Signal(r.store_row))
 407             for j in range(ROW_SIZE):
 408                 comb += wr_sel[j].eq(do_write)
 409
 410 #     -- Generate PLRUs
 411 #     maybe_plrus: if NUM_WAYS > 1 generate
 412 #     begin
 413 #       plrus: for i in 0 to NUM_LINES-1 generate
 414 #           -- PLRU interface
 415 #           signal plru_acc    : std_ulogic_vector(WAY_BITS-1 downto 0);
 416 #           signal plru_acc_en : std_ulogic;
 417 #           signal plru_out    : std_ulogic_vector(WAY_BITS-1 downto 0);
 418 #
 419 #       begin
 420 #           plru : entity work.plru
 421 #               generic map (
 422 #                   BITS => WAY_BITS
 423 #                   )
 424 #               port map (
 425 #                   clk => clk,
 426 #                   rst => rst,
 427 #                   acc => plru_acc,
 428 #                   acc_en => plru_acc_en,
 429 #                   lru => plru_out
 430 #                   );
 431 #
 432 #           process(all)
 433 #           begin
 434 #               -- PLRU interface
 435 #               if get_index(r.hit_nia) = i then
 436 #                   plru_acc_en <= r.hit_valid;
 437 #               else
 438 #                   plru_acc_en <= '0';
 439 #               end if;
 440 #               plru_acc <=
 441 #                std_ulogic_vector(to_unsigned(r.hit_way, WAY_BITS));
 442 #               plru_victim(i) <= plru_out;
 443 #           end process;
 444 #       end generate;
 445 #     end generate;
 446     def maybe_plrus(self, m):
 447         comb += m.d.comb
 448
 449         with m.If(NUM_WAYS > 1):
 450             plru_acc    = Signal(WAY_BITS)
 451             plru_acc_en = Signal()
 452             plru_out    = Signal(WAY_BITS)
 453
 454             for i in range(NUM_LINES):
 455                 plru = PLRU(WAY_BITS)
 456                 comb += plru.acc.eq(plru_acc)
 457                 comb += plru.acc_en.eq(plru_acc_en)
 458                 comb += plru.lru.eq(plru_out)
 459
 460                 # PLRU interface
 461                 with m.If(get_index(r.hit_nia) == i):
 462                     comb += plru.acc_en.eq(r.hit_valid)
 463
 464                 with m.Else():
 465                     comb += plru.acc_en.eq(0)
 466
 467                 comb += plru.acc.eq(r.hit_way)
 468                 comb += plru_victim[i].eq(plru.lru)
 469
 470 #     -- TLB hit detection and real address generation
 471 #     itlb_lookup : process(all)
 472 #         variable pte : tlb_pte_t;
 473 #         variable ttag : tlb_tag_t;
 474 #     begin
 475 #         tlb_req_index <= hash_ea(i_in.nia);
 476 #         pte := itlb_ptes(tlb_req_index);
 477 #         ttag := itlb_tags(tlb_req_index);
 478 #         if i_in.virt_mode = '1' then
 479 #             real_addr <= pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) &
 480 #                          i_in.nia(TLB_LG_PGSZ - 1 downto 0);
 481 #             if ttag = i_in.nia(63 downto TLB_LG_PGSZ + TLB_BITS) then
 482 #                 ra_valid <= itlb_valids(tlb_req_index);
 483 #             else
 484 #                 ra_valid <= '0';
 485 #             end if;
 486 #             eaa_priv <= pte(3);
 487 #         else
 488 #             real_addr <= i_in.nia(REAL_ADDR_BITS - 1 downto 0);
 489 #             ra_valid <= '1';
 490 #             eaa_priv <= '1';
 491 #         end if;
 492 #
 493 #         -- no IAMR, so no KUEP support for now
 494 #         priv_fault <= eaa_priv and not i_in.priv_mode;
 495 #         access_ok <= ra_valid and not priv_fault;
 496 #     end process;
 497     # TLB hit detection and real address generation
 498     def itlb_lookup(self, m):
 499         comb = m.d.comb
 500
 501         comb += tlb_req_index.eq(hash_ea(i_in.nia))
 502         comb += pte.eq(itlb_ptes[tlb_req_index])
 503         comb += ttag.eq(itlb_tags[tlb_req_index])
 504
 505         with m.If(i_in.virt_mode):
 506             comb += real_addr.eq(Cat(
 507                      i_in.nia[:TLB_LB_PGSZ],
 508                      pte[TLB_LG_PGSZ:REAL_ADDR_BITS]
 509                     ))
 510
 511             with m.If(ttag == i_in.nia[TLB_LG_PGSZ + TLB_BITS:64]):
 512                 comb += ra_valid.eq(itlb_valid_bits[tlb_req_index])
 513
 514             with m.Else():
 515                 comb += ra_valid.eq(0)
 516
 517         with m.Else():
 518             comb += real_addr.eq(i_in.nia[:REAL_ADDR_BITS])
 519             comb += ra_valid.eq(1)
 520             comb += eaa_priv.eq(1)
 521
 522         # No IAMR, so no KUEP support for now
 523         comb += priv_fault.eq(eaa_priv & ~i_in.priv_mode)
 524         comb += access_ok.eq(ra_valid & ~priv_fault)
 525
 526 #     -- iTLB update
 527 #     itlb_update: process(clk)
 528 #         variable wr_index : tlb_index_t;
 529 #     begin
 530 #         if rising_edge(clk) then
 531 #             wr_index := hash_ea(m_in.addr);
 532 #             if rst = '1' or (m_in.tlbie = '1' and m_in.doall = '1') then
 533 #                 -- clear all valid bits
 534 #                 for i in tlb_index_t loop
 535 #                     itlb_valids(i) <= '0';
 536 #                 end loop;
 537 #             elsif m_in.tlbie = '1' then
 538 #                 -- clear entry regardless of hit or miss
 539 #                 itlb_valids(wr_index) <= '0';
 540 #             elsif m_in.tlbld = '1' then
 541 #                 itlb_tags(wr_index) <= m_in.addr(
 542 #                                         63 downto TLB_LG_PGSZ + TLB_BITS
 543 #                                        );
 544 #                 itlb_ptes(wr_index) <= m_in.pte;
 545 #                 itlb_valids(wr_index) <= '1';
 546 #             end if;
 547 #         end if;
 548 #     end process;
 549     # iTLB update
 550     def itlb_update(self, m):
 551         sync = m.d.sync
 552
 553         wr_index = Signal(TLB_SIZE)
 554         sync += wr_index.eq(hash_ea(m_in.addr))
 555
 556         with m.If('''TODO rst in nmigen''' | (m_in.tlbie & m_in.doall)):
 557             # Clear all valid bits
 558             for i in range(TLB_SIZE):
 559                 sync += itlb_vlaids[i].eq(0)
 560
 561         with m.Elif(m_in.tlbie):
 562             # Clear entry regardless of hit or miss
 563             sync += itlb_valid_bits[wr_index].eq(0)
 564
 565         with m.Elif(m_in.tlbld):
 566             sync += itlb_tags[wr_index].eq(
 567                      m_in.addr[TLB_LG_PGSZ + TLB_BITS:64]
 568                     )
 569             sync += itlb_ptes[wr_index].eq(m_in.pte)
 570             sync += itlb_valid_bits[wr_index].eq(1)
 571
 572 #     -- Cache hit detection, output to fetch2 and other misc logic
 573 #     icache_comb : process(all)
 574     # Cache hit detection, output to fetch2 and other misc logic
 575     def icache_comb(self, m):
 576 #       variable is_hit  : std_ulogic;
 577 #       variable hit_way : way_t;
 578         comb = m.d.comb
 579
 580         is_hit  = Signal()
 581         hit_way = Signal(NUM_WAYS)
 582 #     begin
 583 #         -- i_in.sequential means that i_in.nia this cycle is 4 more than
 584 #         -- last cycle.  If we read more than 32 bits at a time, had a
 585 #         -- cache hit last cycle, and we don't want the first 32-bit chunk
 586 #         -- then we can keep the data we read last cycle and just use that.
 587 #         if unsigned(i_in.nia(INSN_BITS+2-1 downto 2)) /= 0 then
 588 #             use_previous <= i_in.sequential and r.hit_valid;
 589 #         else
 590 #             use_previous <= '0';
 591 #         end if;
 592         # i_in.sequential means that i_in.nia this cycle is 4 more than
 593         # last cycle.  If we read more than 32 bits at a time, had a
 594         # cache hit last cycle, and we don't want the first 32-bit chunk
 595         # then we can keep the data we read last cycle and just use that.
 596         with m.If(i_in.nia[2:INSN_BITS+2] != 0):
 597             comb += use_previous.eq(i_in.sequential & r.hit_valid)
 598
 599         with m.else():
 600             comb += use_previous.eq(0)
 601
 602 #       -- Extract line, row and tag from request
 603 #         req_index <= get_index(i_in.nia);
 604 #         req_row <= get_row(i_in.nia);
 605 #         req_tag <= get_tag(real_addr);
 606         # Extract line, row and tag from request
 607         comb += req_index.eq(get_index(i_in.nia))
 608         comb += req_row.eq(get_row(i_in.nia))
 609         comb += req_tag.eq(get_tag(real_addr))
 610
 611 #       -- Calculate address of beginning of cache row, will be
 612 #       -- used for cache miss processing if needed
 613 #       req_laddr <= (63 downto REAL_ADDR_BITS => '0') &
 614 #                      real_addr(REAL_ADDR_BITS - 1 downto ROW_OFF_BITS) &
 615 #                    (ROW_OFF_BITS-1 downto 0 => '0');
 616         # Calculate address of beginning of cache row, will be
 617         # used for cache miss processing if needed
 618         comb += req_laddr.eq(Cat(
 619                  Const(0b0, ROW_OFF_BITS),
 620                  real_addr[ROW_OFF_BITS:REAL_ADDR_BITS],
 621                  Const(0, REAL_ADDR_BITS)
 622                 ))
 623
 624 #       -- Test if pending request is a hit on any way
 625 #       hit_way := 0;
 626 #       is_hit := '0';
 627 #       for i in way_t loop
 628 #           if i_in.req = '1' and
 629 #                 (cache_valids(req_index)(i) = '1' or
 630 #                  (r.state = WAIT_ACK and
 631 #                   req_index = r.store_index and
 632 #                   i = r.store_way and
 633 #                   r.rows_valid(req_row mod ROW_PER_LINE) = '1')) then
 634 #               if read_tag(i, cache_tags(req_index)) = req_tag then
 635 #                   hit_way := i;
 636 #                   is_hit := '1';
 637 #               end if;
 638 #           end if;
 639 #       end loop;
 640         # Test if pending request is a hit on any way
 641         for i in range(NUM_WAYS):
 642             with m.If(i_in.req &
 643                       (cache_valid_bits[req_index][i] |
 644                        ((r.state == State.WAIT_ACK)
 645                         & (req_index == r.store_index)
 646                         & (i == r.store_way)
 647                         & r.rows_valid[req_row % ROW_PER_LINE])):
 648                 with m.If(read_tag(i, cahce_tags[req_index]) == req_tag):
 649                     comb += hit_way.eq(i)
 650                     comb += is_hit.eq(1)
 651
 652 #       -- Generate the "hit" and "miss" signals for the synchronous blocks
 653 #       if i_in.req = '1' and access_ok = '1' and flush_in = '0'
 654 #        and rst = '0' then
 655 #           req_is_hit  <= is_hit;
 656 #           req_is_miss <= not is_hit;
 657 #       else
 658 #           req_is_hit  <= '0';
 659 #           req_is_miss <= '0';
 660 #       end if;
 661 #       req_hit_way <= hit_way;
 662         # Generate the "hit" and "miss" signals for the synchronous blocks
 663         with m.If(i_in.rq & access_ok & ~flush_in & '''TODO nmigen rst'''):
 664             comb += req_is_hit.eq(is_hit)
 665             comb += req_is_miss.eq(~is_hit)
 666
 667         with m.Else():
 668             comb += req_is_hit.eq(0)
 669             comb += req_is_miss.eq(0)
 670
 671 #       -- The way to replace on a miss
 672 #       if r.state = CLR_TAG then
 673 #           replace_way <= to_integer(unsigned(plru_victim(r.store_index)));
 674 #       else
 675 #           replace_way <= r.store_way;
 676 #       end if;
 677         # The way to replace on a miss
 678         with m.If(r.state == State.CLR_TAG):
 679             comb += replace_way.eq(plru_victim[r.store_index])
 680
 681         with m.Else():
 682             comb += replace_way.eq(r.store_way)
 683
 684 #       -- Output instruction from current cache row
 685 #       --
 686 #       -- Note: This is a mild violation of our design principle of
 687 #       -- having pipeline stages output from a clean latch. In this
 688 #       -- case we output the result of a mux. The alternative would
 689 #       -- be output an entire row which I prefer not to do just yet
 690 #       -- as it would force fetch2 to know about some of the cache
 691 #       -- geometry information.
 692 #       i_out.insn <= read_insn_word(r.hit_nia, cache_out(r.hit_way));
 693 #       i_out.valid <= r.hit_valid;
 694 #       i_out.nia <= r.hit_nia;
 695 #       i_out.stop_mark <= r.hit_smark;
 696 #       i_out.fetch_failed <= r.fetch_failed;
 697         # Output instruction from current cache row
 698         #
 699         # Note: This is a mild violation of our design principle of
 700         # having pipeline stages output from a clean latch. In this
 701         # case we output the result of a mux. The alternative would
 702         # be output an entire row which I prefer not to do just yet
 703         # as it would force fetch2 to know about some of the cache
 704         # geometry information.
 705         comb += i_out.insn.eq(
 706                  read_insn_word(r.hit_nia, cache_out[r.hit_way])
 707                 )
 708         comb += i_out.valid.eq(r.hit_valid)
 709         comb += i_out.nia.eq(r.hit_nia)
 710         comb += i_out.stop_mark.eq(r.hit_smark)
 711         comb += i_out.fetch_failed.eq(r.fetch_failed)
 712
 713 #       -- Stall fetch1 if we have a miss on cache or TLB
 714 #       -- or a protection fault
 715 #       stall_out <= not (is_hit and access_ok);
 716         # Stall fetch1 if we have a miss on cache or TLB
 717         # or a protection fault
 718         comb += stall_out.eq(~(is_hit & access_ok))
 719
 720 #       -- Wishbone requests output (from the cache miss reload machine)
 721 #       wishbone_out <= r.wb;
 722         # Wishbone requests output (from the cache miss reload machine)
 723         comb += wb_out.eq(r.wb)
 724 #     end process;
 725
 726 #     -- Cache hit synchronous machine
 727 #     icache_hit : process(clk)
 728     # Cache hit synchronous machine
 729     def icache_hit(self, m):
 730         sync = m.d.sync
 731 #     begin
 732 #         if rising_edge(clk) then
 733 #             -- keep outputs to fetch2 unchanged on a stall
 734 #             -- except that flush or reset sets valid to 0
 735 #             -- If use_previous, keep the same data as last
 736 #             -- cycle and use the second half
 737 #             if stall_in = '1' or use_previous = '1' then
 738 #                 if rst = '1' or flush_in = '1' then
 739 #                     r.hit_valid <= '0';
 740 #             end if;
 741         # keep outputs to fetch2 unchanged on a stall
 742         # except that flush or reset sets valid to 0
 743         # If use_previous, keep the same data as last
 744         # cycle and use the second half
 745         with m.If(stall_in | use_previous):
 746             with m.If('''TODO rst nmigen''' | flush_in):
 747                 sync += r.hit_valid.eq(0)
 748 #             else
 749 #                 -- On a hit, latch the request for the next cycle,
 750 #                 -- when the BRAM data will be available on the
 751 #                 -- cache_out output of the corresponding way
 752 #                 r.hit_valid <= req_is_hit;
 753 #                 if req_is_hit = '1' then
 754 #                     r.hit_way <= req_hit_way;
 755         with m.Else():
 756             # On a hit, latch the request for the next cycle,
 757             # when the BRAM data will be available on the
 758             # cache_out output of the corresponding way
 759             sync += r.hit_valid.eq(req_is_hit)
 760
 761             with m.If(req_is_hit):
 762                 sync += r.hit_way.eq(req_hit_way)
 763
 764 #                     report "cache hit nia:" & to_hstring(i_in.nia) &
 765 #                         " IR:" & std_ulogic'image(i_in.virt_mode) &
 766 #                         " SM:" & std_ulogic'image(i_in.stop_mark) &
 767 #                         " idx:" & integer'image(req_index) &
 768 #                         " tag:" & to_hstring(req_tag) &
 769 #                         " way:" & integer'image(req_hit_way) &
 770 #                         " RA:" & to_hstring(real_addr);
 771                 print(f"cache hit nia:{i_in.nia}, IR:{i_in.virt_mode}, " \
 772                       f"SM:{i_in.stop_mark}, idx:{req_index}, " \
 773                       f"tag:{req_tag}, way:{req_hit_way}, RA:{real_addr}")
 774 #                 end if;
 775 #           end if;
 776 #             if stall_in = '0' then
 777 #                 -- Send stop marks and NIA down regardless of validity
 778 #                 r.hit_smark <= i_in.stop_mark;
 779 #                 r.hit_nia <= i_in.nia;
 780 #             end if;
 781         with m.If(~stall_in):
 782             # Send stop marks and NIA down regardless of validity
 783             sync += r.hit_smark.eq(i_in.stop_mark)
 784             sync += r.hit_nia.eq(i_in.nia)
 785 #       end if;
 786 #     end process;
 787
 788 #     -- Cache miss/reload synchronous machine
 789 #     icache_miss : process(clk)
 790     # Cache miss/reload synchronous machine
 791     def icache_miss(self, m):
 792         comb = m.d.comb
 793         sync = m.d.sync
 794
 795 #       variable tagset    : cache_tags_set_t;
 796 #       variable stbs_done : boolean;
 797
 798         tagset    = Signal(TAG_RAM_WIDTH)
 799         stbs_done = Signal()
 800
 801 #     begin
 802 #         if rising_edge(clk) then
 803 #           -- On reset, clear all valid bits to force misses
 804 #             if rst = '1' then
 805         # On reset, clear all valid bits to force misses
 806         with m.If('''TODO rst nmigen'''):
 807 #               for i in index_t loop
 808 #                   cache_valids(i) <= (others => '0');
 809 #               end loop;
 810             for i in Signal(NUM_LINES):
 811                 sync += cache_valid_bits[i].eq(~1)
 812
 813 #                 r.state <= IDLE;
 814 #                 r.wb.cyc <= '0';
 815 #                 r.wb.stb <= '0';
 816             sync += r.state.eq(State.IDLE)
 817             sync += r.wb.cyc.eq(0)
 818             sync += r.wb.stb.eq(0)
 819
 820 #               -- We only ever do reads on wishbone
 821 #               r.wb.dat <= (others => '0');
 822 #               r.wb.sel <= "11111111";
 823 #               r.wb.we  <= '0';
 824             # We only ever do reads on wishbone
 825             sync += r.wb.dat.eq(~1)
 826             sync += r.wb.sel.eq(Const(0b11111111, 8))
 827             sync += r.wb.we.eq(0)
 828
 829 #               -- Not useful normally but helps avoiding tons of sim warnings
 830 #               r.wb.adr <= (others => '0');
 831             # Not useful normally but helps avoiding tons of sim warnings
 832             sync += r.wb.adr.eq(~1)
 833
 834 #             else
 835         with m.Else():
 836 #                 -- Process cache invalidations
 837 #                 if inval_in = '1' then
 838 #                     for i in index_t loop
 839 #                         cache_valids(i) <= (others => '0');
 840 #                     end loop;
 841 #                     r.store_valid <= '0';
 842 #                 end if;
 843             # Process cache invalidations
 844             with m.If(inval_in):
 845                 for i in range(NUM_LINES):
 846                     sync += cache_valid_bits[i].eq(~1)
 847
 848                 sync += r.store_valid.eq(0)
 849
 850 #               -- Main state machine
 851 #               case r.state is
 852                 # Main state machine
 853                 with m.Switch(r.state):
 854
 855 #               when IDLE =>
 856                     with m.Case(State.IDLE):
 857 #                     -- Reset per-row valid flags, only used in WAIT_ACK
 858 #                     for i in 0 to ROW_PER_LINE - 1 loop
 859 #                         r.rows_valid(i) <= '0';
 860 #                     end loop;
 861                         # Reset per-row valid flags, onlyy used in WAIT_ACK
 862                         for i in range(ROW_PER_LINE):
 863                             sync += r.rows_valid[i].eq(0)
 864
 865 #                   -- We need to read a cache line
 866 #                   if req_is_miss = '1' then
 867 #                       report "cache miss nia:" & to_hstring(i_in.nia) &
 868 #                             " IR:" & std_ulogic'image(i_in.virt_mode) &
 869 #                           " SM:" & std_ulogic'image(i_in.stop_mark) &
 870 #                           " idx:" & integer'image(req_index) &
 871 #                           " way:" & integer'image(replace_way) &
 872 #                           " tag:" & to_hstring(req_tag) &
 873 #                             " RA:" & to_hstring(real_addr);
 874                         # We need to read a cache line
 875                         with m.If(req_is_miss):
 876                             print(f"cache miss nia:{i_in.nia} " \
 877                                   f"IR:{i_in.virt_mode} " \
 878                                   f"SM:{i_in.stop_mark} idx:{req_index} " \
 879                                   f"way:{replace_way} tag:{req_tag} " \
 880                                   f"RA:{real_addr}")
 881
 882 #                       -- Keep track of our index and way for
 883 #                       -- subsequent stores
 884 #                       r.store_index <= req_index;
 885 #                       r.store_row <= get_row(req_laddr);
 886 #                       r.store_tag <= req_tag;
 887 #                       r.store_valid <= '1';
 888 #                       r.end_row_ix <=
 889 #                        get_row_of_line(get_row(req_laddr)) - 1;
 890                             # Keep track of our index and way
 891                             # for subsequent stores
 892                             sync += r.store_index.eq(req_index)
 893                             sync += r.store_row.eq(get_row(req_laddr))
 894                             sync += r.store_tag.eq(req_tag)
 895                             sync += r.store_valid.eq(1)
 896                             sync += r.end_row_ix.eq(
 897                                      get_row_of_line(get_row(req_laddr)) - 1
 898                                     )
 899
 900 #                       -- Prep for first wishbone read. We calculate the
 901 #                       -- address of the start of the cache line and
 902 #                       -- start the WB cycle.
 903 #                       r.wb.adr <= req_laddr(r.wb.adr'left downto 0);
 904 #                       r.wb.cyc <= '1';
 905 #                       r.wb.stb <= '1';
 906                             # Prep for first wishbone read. We calculate the
 907                             # address of the start of the cache line and
 908                             # start the WB cycle.
 909                             sync += r.wb.adr.eq(
 910                                      req_laddr[:r.wb.adr'''left?''']
 911                                     )
 912
 913 #                       -- Track that we had one request sent
 914 #                       r.state <= CLR_TAG;
 915                             # Track that we had one request sent
 916                             sync += r.state.eq(State.CLR_TAG)
 917 #                   end if;
 918
 919 #               when CLR_TAG | WAIT_ACK =>
 920                     with m.Case(State.CLR_TAG, State.WAIT_ACK):
 921 #                     if r.state = CLR_TAG then
 922                         with m.If(r.state == State.CLR_TAG):
 923 #                         -- Get victim way from plru
 924 #                       r.store_way <= replace_way;
 925                             # Get victim way from plru
 926                             sync += r.store_way.eq(replace_way)
 927 #
 928 #                       -- Force misses on that way while reloading that line
 929 #                       cache_valids(req_index)(replace_way) <= '0';
 930                             # Force misses on that way while
 931                             # realoading that line
 932                             sync += cache_valid_bits[
 933                                      req_index
 934                                     ][replace_way].eq(0)
 935
 936 #                       -- Store new tag in selected way
 937 #                       for i in 0 to NUM_WAYS-1 loop
 938 #                           if i = replace_way then
 939 #                               tagset := cache_tags(r.store_index);
 940 #                               write_tag(i, tagset, r.store_tag);
 941 #                               cache_tags(r.store_index) <= tagset;
 942 #                           end if;
 943 #                       end loop;
 944                             for i in range(NUM_WAYS):
 945                                 with m.If(i == replace_way):
 946                                     comb += tagset.eq(
 947                                              cache_tags[r.store_index]
 948                                             )
 949                                     sync += write_tag(i, tagset, r.store_tag)
 950                                     sync += cache_tags(r.store_index).eq(
 951                                              tagset
 952                                             )
 953
 954 #                         r.state <= WAIT_ACK;
 955                             sync += r.state.eq(State.WAIT_ACK)
 956 #                     end if;
 957
 958 #                   -- Requests are all sent if stb is 0
 959 #                   stbs_done := r.wb.stb = '0';
 960                         # Requests are all sent if stb is 0
 961                         comb += stbs_done.eq(r.wb.stb == 0)
 962
 963 #                   -- If we are still sending requests, was one accepted ?
 964 #                   if wishbone_in.stall = '0' and not stbs_done then
 965                         # If we are still sending requests, was one accepted?
 966                         with m.If(~wb_in.stall & ~stbs_done):
 967 #                       -- That was the last word ? We are done sending.
 968 #                       -- Clear stb and set stbs_done so we can handle
 969 #                       -- an eventual last ack on the same cycle.
 970 #                       if is_last_row_addr(r.wb.adr, r.end_row_ix) then
 971 #                           r.wb.stb <= '0';
 972 #                           stbs_done := true;
 973 #                       end if;
 974                             # That was the last word ? We are done sending.
 975                             # Clear stb and set stbs_done so we can handle
 976                             # an eventual last ack on the same cycle.
 977                             with m.If(is_last_row_addr(
 978                                       r.wb.adr, r.end_row_ix)):
 979                                 sync += r.wb.stb.eq(0)
 980                                 stbs_done.eq(1)
 981
 982 #                       -- Calculate the next row address
 983 #                       r.wb.adr <= next_row_addr(r.wb.adr);
 984                             # Calculate the next row address
 985                             sync += r.wb.adr.eq(next_row_addr(r.wb.adr))
 986 #                   end if;
 987
 988 #                   -- Incoming acks processing
 989 #                   if wishbone_in.ack = '1' then
 990                         # Incoming acks processing
 991                         with m.If(wb_in.ack):
 992 #                         r.rows_valid(r.store_row mod ROW_PER_LINE) <= '1';
 993                             sync += r.rows_valid[
 994                                      r.store_row & ROW_PER_LINE
 995                                     ].eq(1)
 996
 997 #                       -- Check for completion
 998 #                       if stbs_done and
 999 #                        is_last_row(r.store_row, r.end_row_ix) then
1000                             # Check for completion
1001                             with m.If(stbs_done & is_last_row(
1002                                       r.store_row, r.end_row_ix)):
1003 #                           -- Complete wishbone cycle
1004 #                           r.wb.cyc <= '0';
1005                                 # Complete wishbone cycle
1006                                 sync += r.wb.cyc.eq(0)
1007
1008 #                           -- Cache line is now valid
1009 #                           cache_valids(r.store_index)(replace_way) <=
1010 #                            r.store_valid and not inval_in;
1011                                 # Cache line is now valid
1012                                 sync += cache_valid_bits[
1013                                          r.store_index
1014                                         ][relace_way].eq(
1015                                          r.store_valid & ~inval_in
1016                                         )
1017
1018 #                           -- We are done
1019 #                           r.state <= IDLE;
1020                                 # We are done
1021                                 sync += r.state.eq(State.IDLE)
1022 #                       end if;
1023
1024 #                       -- Increment store row counter
1025 #                       r.store_row <= next_row(r.store_row);
1026                             # Increment store row counter
1027                             sync += store_row.eq(next_row(r.store_row))
1028 #                   end if;
1029 #               end case;
1030 #           end if;
1031 #
1032 #             -- TLB miss and protection fault processing
1033 #             if rst = '1' or flush_in = '1' or m_in.tlbld = '1' then
1034 #                 r.fetch_failed <= '0';
1035 #             elsif i_in.req = '1' and access_ok = '0' and stall_in = '0' then
1036 #                 r.fetch_failed <= '1';
1037 #             end if;
1038             # TLB miss and protection fault processing
1039             with m.If('''TODO nmigen rst''' | flush_in | m_in.tlbld):
1040                 sync += r.fetch_failed.eq(0)
1041
1042             with m.Elif(i_in.req & ~access_ok & ~stall_in):
1043                 sync += r.fetch_failed.eq(1)
1044 #       end if;
1045 #     end process;
1046
1047 #     icache_log: if LOG_LENGTH > 0 generate
1048     def icache_log(self, m, log_out):
1049         comb = m.d.comb
1050         sync = m.d.sync
1051
1052 #         -- Output data to logger
1053 #         signal log_data    : std_ulogic_vector(53 downto 0);
1054 #     begin
1055 #         data_log: process(clk)
1056 #             variable lway: way_t;
1057 #             variable wstate: std_ulogic;
1058         # Output data to logger
1059         for i in range(LOG_LENGTH)
1060             # Output data to logger
1061             log_data = Signal(54)
1062             lway     = Signal(NUM_WAYS)
1063             wstate   = Signal()
1064
1065 #         begin
1066 #             if rising_edge(clk) then
1067 #                 lway := req_hit_way;
1068 #                 wstate := '0';
1069             comb += lway.eq(req_hit_way)
1070             comb += wstate.eq(0)
1071
1072 #                 if r.state /= IDLE then
1073 #                     wstate := '1';
1074 #                 end if;
1075             with m.If(r.state != State.IDLE):
1076                 comb += wstate.eq(1)
1077
1078 #                 log_data <= i_out.valid &
1079 #                             i_out.insn &
1080 #                             wishbone_in.ack &
1081 #                             r.wb.adr(5 downto 3) &
1082 #                             r.wb.stb & r.wb.cyc &
1083 #                             wishbone_in.stall &
1084 #                             stall_out &
1085 #                             r.fetch_failed &
1086 #                             r.hit_nia(5 downto 2) &
1087 #                             wstate &
1088 #                             std_ulogic_vector(to_unsigned(lway, 3)) &
1089 #                             req_is_hit & req_is_miss &
1090 #                             access_ok &
1091 #                             ra_valid;
1092             sync += log_data.eq(Cat(
1093                      ra_valid, access_ok, req_is_miss, req_is_hit,
1094                      lway '''truncate to 3 bits?''', wstate, r.hit_nia[2:6],
1095                      r.fetch_failed, stall_out, wb_in.stall, r.wb.cyc,
1096                      r.wb.stb, r.wb.adr[3:6], wb_in.ack, i_out.insn,
1097                      i_out.valid
1098                     ))
1099 #             end if;
1100 #         end process;
1101 #         log_out <= log_data;
1102             comb += log_out.eq(log_data)
1103 #     end generate;
1104 # end;
1105
1106     def elaborate(self, platform):
1107 # architecture rtl of icache is
1108 #     constant ROW_SIZE_BITS : natural := ROW_SIZE*8;
1109 #     -- ROW_PER_LINE is the number of row (wishbone transactions) in a line
1110 #     constant ROW_PER_LINE  : natural := LINE_SIZE / ROW_SIZE;
1111 #     -- BRAM_ROWS is the number of rows in BRAM needed to represent the full
1112 #     -- icache
1113 #     constant BRAM_ROWS     : natural := NUM_LINES * ROW_PER_LINE;
1114 #     -- INSN_PER_ROW is the number of 32bit instructions per BRAM row
1115 #     constant INSN_PER_ROW  : natural := ROW_SIZE_BITS / 32;
1116 #     -- Bit fields counts in the address
1117 #
1118 #     -- INSN_BITS is the number of bits to select an instruction in a row
1119 #     constant INSN_BITS     : natural := log2(INSN_PER_ROW);
1120 #     -- ROW_BITS is the number of bits to select a row
1121 #     constant ROW_BITS      : natural := log2(BRAM_ROWS);
1122 #     -- ROW_LINEBITS is the number of bits to select a row within a line
1123 #     constant ROW_LINEBITS  : natural := log2(ROW_PER_LINE);
1124 #     -- LINE_OFF_BITS is the number of bits for the offset in a cache line
1125 #     constant LINE_OFF_BITS : natural := log2(LINE_SIZE);
1126 #     -- ROW_OFF_BITS is the number of bits for the offset in a row
1127 #     constant ROW_OFF_BITS  : natural := log2(ROW_SIZE);
1128 #     -- INDEX_BITS is the number of bits to select a cache line
1129 #     constant INDEX_BITS    : natural := log2(NUM_LINES);
1130 #     -- SET_SIZE_BITS is the log base 2 of the set size
1131 #     constant SET_SIZE_BITS : natural := LINE_OFF_BITS + INDEX_BITS;
1132 #     -- TAG_BITS is the number of bits of the tag part of the address
1133 #     constant TAG_BITS      : natural := REAL_ADDR_BITS - SET_SIZE_BITS;
1134 #     -- WAY_BITS is the number of bits to select a way
1135 #     constant WAY_BITS     : natural := log2(NUM_WAYS);
1136
1137         ROW_SIZE_BITS  = ROW_SIZE * 8
1138         # ROW_PER_LINE is the number of row
1139         # (wishbone) transactions in a line
1140         ROW_PER_LINE   = LINE_SIZE / ROW_SIZE
1141         # BRAM_ROWS is the number of rows in
1142         # BRAM needed to represent the full icache
1143         BRAM_ROWS      = NUM_LINES * ROW_PER_LINE
1144         # INSN_PER_ROW is the number of 32bit
1145         # instructions per BRAM row
1146         INSN_PER_ROW   = ROW_SIZE_BITS / 32
1147
1148         # Bit fields counts in the address
1149         #
1150         # INSN_BITS is the number of bits to
1151         # select an instruction in a row
1152         INSN_BITS      = log2_int(INSN_PER_ROW)
1153         # ROW_BITS is the number of bits to
1154         # select a row
1155         ROW_BITS       = log2_int(BRAM_ROWS)
1156         # ROW_LINEBITS is the number of bits to
1157         # select a row within a line
1158         ROW_LINE_BITS   = log2_int(ROW_PER_LINE)
1159         # LINE_OFF_BITS is the number of bits for
1160         # the offset in a cache line
1161         LINE_OFF_BITS  = log2_int(LINE_SIZE)
1162         # ROW_OFF_BITS is the number of bits for
1163         # the offset in a row
1164         ROW_OFF_BITS   = log2_int(ROW_SIZE)
1165         # INDEX_BITS is the number of bits to
1166         # select a cache line
1167         INDEX_BITS     = log2_int(NUM_LINES)
1168         # SET_SIZE_BITS is the log base 2 of
1169         # the set size
1170         SET_SIZE_BITS  = LINE_OFF_BITS + INDEX_BITS
1171         # TAG_BITS is the number of bits of
1172         # the tag part of the address
1173         TAG_BITS       = REAL_ADDR_BITS - SET_SIZE_BITS
1174         # WAY_BITS is the number of bits to
1175         # select a way
1176         WAY_BITS       = log2_int(NUM_WAYS)
1177         TAG_RAM_WIDTH  = TAG_BITS * NUM_WAYS
1178
1179 #     -- Example of layout for 32 lines of 64 bytes:
1180 #     --
1181 #     -- ..  tag    |index|  line  |
1182 #     -- ..         |   row   |    |
1183 #     -- ..         |     |   | |00| zero          (2)
1184 #     -- ..         |     |   |-|  | INSN_BITS     (1)
1185 #     -- ..         |     |---|    | ROW_LINEBITS  (3)
1186 #     -- ..         |     |--- - --| LINE_OFF_BITS (6)
1187 #     -- ..         |         |- --| ROW_OFF_BITS  (3)
1188 #     -- ..         |----- ---|    | ROW_BITS      (8)
1189 #     -- ..         |-----|        | INDEX_BITS    (5)
1190 #     -- .. --------|              | TAG_BITS      (53)
1191         # Example of layout for 32 lines of 64 bytes:
1192         #
1193         # ..  tag    |index|  line  |
1194         # ..         |   row   |    |
1195         # ..         |     |   | |00| zero          (2)
1196         # ..         |     |   |-|  | INSN_BITS     (1)
1197         # ..         |     |---|    | ROW_LINEBITS  (3)
1198         # ..         |     |--- - --| LINE_OFF_BITS (6)
1199         # ..         |         |- --| ROW_OFF_BITS  (3)
1200         # ..         |----- ---|    | ROW_BITS      (8)
1201         # ..         |-----|        | INDEX_BITS    (5)
1202         # .. --------|              | TAG_BITS      (53)
1203
1204 #     subtype row_t is integer range 0 to BRAM_ROWS-1;
1205 #     subtype index_t is integer range 0 to NUM_LINES-1;
1206 #     subtype way_t is integer range 0 to NUM_WAYS-1;
1207 #     subtype row_in_line_t is unsigned(ROW_LINEBITS-1 downto 0);
1208 #
1209 #     -- The cache data BRAM organized as described above for each way
1210 #     subtype cache_row_t is std_ulogic_vector(ROW_SIZE_BITS-1 downto 0);
1211 #
1212 #     -- The cache tags LUTRAM has a row per set. Vivado is a pain and will
1213 #     -- not handle a clean (commented) definition of the cache tags as a 3d
1214 #     -- memory. For now, work around it by putting all the tags
1215 #     subtype cache_tag_t is std_logic_vector(TAG_BITS-1 downto 0);
1216 # --    type cache_tags_set_t is array(way_t) of cache_tag_t;
1217 # --    type cache_tags_array_t is array(index_t) of cache_tags_set_t;
1218 #     constant TAG_RAM_WIDTH : natural := TAG_BITS * NUM_WAYS;
1219 #     subtype cache_tags_set_t is std_logic_vector(TAG_RAM_WIDTH-1 downto 0);
1220 #     type cache_tags_array_t is array(index_t) of cache_tags_set_t;
1221         def CacheTagArray():
1222             return Array(Signal(TAG_RAM_WIDTH) for x in range(NUM_LINES))
1223
1224 #     -- The cache valid bits
1225 #     subtype cache_way_valids_t is std_ulogic_vector(NUM_WAYS-1 downto 0);
1226 #     type cache_valids_t is array(index_t) of cache_way_valids_t;
1227 #     type row_per_line_valid_t is array(0 to ROW_PER_LINE - 1) of std_ulogic;
1228         def CacheValidBitsArray():
1229             return Array(Signal() for x in ROW_PER_LINE)
1230
1231         def RowPerLineValidArray():
1232             return Array(Signal() for x in range ROW_PER_LINE)
1233
1234 #     -- Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
1235 #     signal cache_tags   : cache_tags_array_t;
1236 #     signal cache_valids : cache_valids_t;
1237         # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
1238         cache_tags = CacheTagArray()
1239         cache_valid_bits = CacheValidBitsArray()
1240
1241 #     attribute ram_style : string;
1242 #     attribute ram_style of cache_tags : signal is "distributed";
1243         # TODO to be passed to nigmen as ram attributes
1244         # attribute ram_style : string;
1245         # attribute ram_style of cache_tags : signal is "distributed";
1246
1247 #     -- L1 ITLB.
1248 #     constant TLB_BITS : natural := log2(TLB_SIZE);
1249 #     constant TLB_EA_TAG_BITS : natural := 64 - (TLB_LG_PGSZ + TLB_BITS);
1250 #     constant TLB_PTE_BITS : natural := 64;
1251         TLB_BITS        = log2_int(TLB_SIZE)
1252         TLB_EA_TAG_BITS = 64 - (TLB_LG_PGSZ + TLB_BITS)
1253         TLB_PTE_BITS    = 64
1254
1255 #     subtype tlb_index_t is integer range 0 to TLB_SIZE - 1;
1256 #     type tlb_valids_t is array(tlb_index_t) of std_ulogic;
1257 #     subtype tlb_tag_t is std_ulogic_vector(TLB_EA_TAG_BITS - 1 downto 0);
1258 #     type tlb_tags_t is array(tlb_index_t) of tlb_tag_t;
1259 #     subtype tlb_pte_t is std_ulogic_vector(TLB_PTE_BITS - 1 downto 0);
1260 #     type tlb_ptes_t is array(tlb_index_t) of tlb_pte_t;
1261         def TLBValidBitsArray():
1262             return Array(Signal() for x in range(TLB_SIZE))
1263
1264         def TLBTagArray():
1265             return Array(Signal(TLB_EA_TAG_BITS) for x in range(TLB_SIZE))
1266
1267         def TLBPTEArray():
1268             return Array(Signal(LTB_PTE_BITS) for x in range(TLB_SIZE))
1269
1270 #     signal itlb_valids : tlb_valids_t;
1271 #     signal itlb_tags : tlb_tags_t;
1272 #     signal itlb_ptes : tlb_ptes_t;
1273 #     attribute ram_style of itlb_tags : signal is "distributed";
1274 #     attribute ram_style of itlb_ptes : signal is "distributed";
1275         itlb_valid_bits = TLBValidBitsArray()
1276         itlb_tags       = TLBTagArray()
1277         itlb_ptes       = TLBPTEArray()
1278         # TODO to be passed to nmigen as ram attributes
1279         # attribute ram_style of itlb_tags : signal is "distributed";
1280         # attribute ram_style of itlb_ptes : signal is "distributed";
1281
1282 #     -- Privilege bit from PTE EAA field
1283 #     signal eaa_priv  : std_ulogic;
1284         # Privilege bit from PTE EAA field
1285         eaa_priv        = Signal()
1286
1287
1288 #     signal r : reg_internal_t;
1289         r = RegInternal()
1290
1291 #     -- Async signals on incoming request
1292 #     signal req_index   : index_t;
1293 #     signal req_row     : row_t;
1294 #     signal req_hit_way : way_t;
1295 #     signal req_tag     : cache_tag_t;
1296 #     signal req_is_hit  : std_ulogic;
1297 #     signal req_is_miss : std_ulogic;
1298 #     signal req_laddr   : std_ulogic_vector(63 downto 0);
1299         # Async signal on incoming request
1300         req_index     = Signal(NUM_LINES)
1301         req_row       = Signal(BRAM_ROWS)
1302         req_hit_way   = Signal(NUM_WAYS)
1303         req_tag       = Signal(TAG_BITS)
1304         req_is_hit    = Signal()
1305         req_is_miss   = Signal()
1306         req_laddr     = Signal(64)
1307
1308 #     signal tlb_req_index : tlb_index_t;
1309 #     signal real_addr     : std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0);
1310 #     signal ra_valid      : std_ulogic;
1311 #     signal priv_fault    : std_ulogic;
1312 #     signal access_ok     : std_ulogic;
1313 #     signal use_previous  : std_ulogic;
1314         tlb_req_index = Signal(TLB_SIZE)
1315         real_addr     = Signal(REAL_ADDR_BITS)
1316         ra_valid      = Signal()
1317         priv_fault    = Signal()
1318         access_ok     = Signal()
1319         use_previous  = Signal()
1320
1321 #     -- Cache RAM interface
1322 #     type cache_ram_out_t is array(way_t) of cache_row_t;
1323 #     signal cache_out   : cache_ram_out_t;
1324         # Cache RAM interface
1325         def CacheRamOut():
1326             return Array(Signal(ROW_SIZE_BITS) for x in range(NUM_WAYS))
1327
1328         cache_out     = CacheRamOut()
1329
1330 #     -- PLRU output interface
1331 #     type plru_out_t is array(index_t) of
1332 #      std_ulogic_vector(WAY_BITS-1 downto 0);
1333 #     signal plru_victim : plru_out_t;
1334 #     signal replace_way : way_t;
1335         # PLRU output interface
1336         def PLRUOut():
1337             return Array(Signal(WAY_BITS) for x in range(NUM_LINES))
1338
1339         plru_victim   = PLRUOut()
1340         replace_way   = Signal(NUM_WAYS)
1341
1342 # begin
1343 #
1344 #     assert LINE_SIZE mod ROW_SIZE = 0;
1345 #     assert ispow2(LINE_SIZE) report "LINE_SIZE not power of 2"
1346 #      severity FAILURE;
1347 #     assert ispow2(NUM_LINES) report "NUM_LINES not power of 2"
1348 #      severity FAILURE;
1349 #     assert ispow2(ROW_PER_LINE) report "ROW_PER_LINE not power of 2"
1350 #      severity FAILURE;
1351 #     assert ispow2(INSN_PER_ROW) report "INSN_PER_ROW not power of 2"
1352 #      severity FAILURE;
1353 #     assert (ROW_BITS = INDEX_BITS + ROW_LINEBITS)
1354 #       report "geometry bits don't add up" severity FAILURE;
1355 #     assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS)
1356 #       report "geometry bits don't add up" severity FAILURE;
1357 #     assert (REAL_ADDR_BITS = TAG_BITS + INDEX_BITS + LINE_OFF_BITS)
1358 #       report "geometry bits don't add up" severity FAILURE;
1359 #     assert (REAL_ADDR_BITS = TAG_BITS + ROW_BITS + ROW_OFF_BITS)
1360 #       report "geometry bits don't add up" severity FAILURE;
1361 #
1362 #     sim_debug: if SIM generate
1363 #     debug: process
1364 #     begin
1365 #       report "ROW_SIZE      = " & natural'image(ROW_SIZE);
1366 #       report "ROW_PER_LINE  = " & natural'image(ROW_PER_LINE);
1367 #       report "BRAM_ROWS     = " & natural'image(BRAM_ROWS);
1368 #       report "INSN_PER_ROW  = " & natural'image(INSN_PER_ROW);
1369 #       report "INSN_BITS     = " & natural'image(INSN_BITS);
1370 #       report "ROW_BITS      = " & natural'image(ROW_BITS);
1371 #       report "ROW_LINEBITS  = " & natural'image(ROW_LINEBITS);
1372 #       report "LINE_OFF_BITS = " & natural'image(LINE_OFF_BITS);
1373 #       report "ROW_OFF_BITS  = " & natural'image(ROW_OFF_BITS);
1374 #       report "INDEX_BITS    = " & natural'image(INDEX_BITS);
1375 #       report "TAG_BITS      = " & natural'image(TAG_BITS);
1376 #       report "WAY_BITS      = " & natural'image(WAY_BITS);
1377 #       wait;
1378 #     end process;
1379 #     end generate;
1380