3 based on Anton Blanchard microwatt icache.vhdl
7 TODO (in no specific order):
8 * Add debug interface to inspect cache content
9 * Add snoop/invalidate path
10 * Add multi-hit error detection
11 * Pipelined bus interface (wb or axi)
12 * Maybe add parity? There's a few bits free in each BRAM row on Xilinx
13 * Add optimization: service hits on partially loaded lines
14 * Add optimization: (maybe) interrupt reload on fluch/redirect
15 * Check if playing with the geometry of the cache tags allow for more
16 efficient use of distributed RAM and less logic/muxes. Currently we
17 write TAG_BITS width which may not match full ram blocks and might
18 cause muxes to be inferred for "partial writes".
19 * Check if making the read size of PLRU a ROM helps utilization
22 from enum
import Enum
, unique
23 from nmigen
import (Module
, Signal
, Elaboratable
, Cat
, Array
, Const
, Repl
)
24 from nmigen
.cli
import main
, rtlil
25 from nmutil
.iocontrol
import RecordObject
26 from nmigen
.utils
import log2_int
27 from nmutil
.util
import Display
29 #from nmutil.plru import PLRU
30 from soc
.experiment
.cache_ram
import CacheRam
31 from soc
.experiment
.plru
import PLRU
33 from soc
.experiment
.mem_types
import (Fetch1ToICacheType
,
37 from soc
.experiment
.wb_types
import (WB_ADDR_BITS
, WB_DATA_BITS
,
38 WB_SEL_BITS
, WBAddrType
, WBDataType
,
39 WBSelType
, WBMasterOut
, WBSlaveOut
,
40 WBMasterOutVector
, WBSlaveOutVector
,
41 WBIOMasterOut
, WBIOSlaveOut
)
44 from nmigen_soc
.wishbone
.sram
import SRAM
45 from nmigen
import Memory
46 from nmutil
.util
import wrap
47 from nmigen
.cli
import main
, rtlil
49 from nmigen
.back
.pysim
import Simulator
, Delay
, Settle
51 from nmigen
.sim
.cxxsim
import Simulator
, Delay
, Settle
56 # BRAM organisation: We never access more than wishbone_data_bits
57 # at a time so to save resources we make the array only that wide,
58 # and use consecutive indices for to make a cache "line"
60 # ROW_SIZE is the width in bytes of the BRAM (based on WB, so 64-bits)
61 ROW_SIZE
= WB_DATA_BITS
// 8
62 # Number of lines in a set
66 # L1 ITLB number of entries (direct mapped)
68 # L1 ITLB log_2(page_size)
70 # Number of real address bits that we store
72 # Non-zero to enable log data collection
75 ROW_SIZE_BITS
= ROW_SIZE
* 8
76 # ROW_PER_LINE is the number of row
77 # (wishbone) transactions in a line
78 ROW_PER_LINE
= LINE_SIZE
// ROW_SIZE
79 # BRAM_ROWS is the number of rows in
80 # BRAM needed to represent the full icache
81 BRAM_ROWS
= NUM_LINES
* ROW_PER_LINE
82 # INSN_PER_ROW is the number of 32bit
83 # instructions per BRAM row
84 INSN_PER_ROW
= ROW_SIZE_BITS
// 32
86 # Bit fields counts in the address
88 # INSN_BITS is the number of bits to
89 # select an instruction in a row
90 INSN_BITS
= log2_int(INSN_PER_ROW
)
91 # ROW_BITS is the number of bits to
93 ROW_BITS
= log2_int(BRAM_ROWS
)
94 # ROW_LINE_BITS is the number of bits to
95 # select a row within a line
96 ROW_LINE_BITS
= log2_int(ROW_PER_LINE
)
97 # LINE_OFF_BITS is the number of bits for
98 # the offset in a cache line
99 LINE_OFF_BITS
= log2_int(LINE_SIZE
)
100 # ROW_OFF_BITS is the number of bits for
101 # the offset in a row
102 ROW_OFF_BITS
= log2_int(ROW_SIZE
)
103 # INDEX_BITS is the number of bits to
104 # select a cache line
105 INDEX_BITS
= log2_int(NUM_LINES
)
106 # SET_SIZE_BITS is the log base 2 of
108 SET_SIZE_BITS
= LINE_OFF_BITS
+ INDEX_BITS
109 # TAG_BITS is the number of bits of
110 # the tag part of the address
111 TAG_BITS
= REAL_ADDR_BITS
- SET_SIZE_BITS
112 # TAG_WIDTH is the width in bits of each way of the tag RAM
113 TAG_WIDTH
= TAG_BITS
+ 7 - ((TAG_BITS
+ 7) % 8)
115 # WAY_BITS is the number of bits to
117 WAY_BITS
= log2_int(NUM_WAYS
)
118 TAG_RAM_WIDTH
= TAG_BITS
* NUM_WAYS
121 #constant TLB_BITS : natural := log2(TLB_SIZE);
122 #constant TLB_EA_TAG_BITS : natural := 64 - (TLB_LG_PGSZ + TLB_BITS);
123 #constant TLB_PTE_BITS : natural := 64;
124 TLB_BITS
= log2_int(TLB_SIZE
)
125 TLB_EA_TAG_BITS
= 64 - (TLB_LG_PGSZ
+ TLB_BITS
)
128 print("BRAM_ROWS =", BRAM_ROWS
)
129 print("INDEX_BITS =", INDEX_BITS
)
130 print("INSN_BITS =", INSN_BITS
)
131 print("INSN_PER_ROW =", INSN_PER_ROW
)
132 print("LINE_SIZE =", LINE_SIZE
)
133 print("LINE_OFF_BITS =", LINE_OFF_BITS
)
134 print("LOG_LENGTH =", LOG_LENGTH
)
135 print("NUM_LINES =", NUM_LINES
)
136 print("NUM_WAYS =", NUM_WAYS
)
137 print("REAL_ADDR_BITS =", REAL_ADDR_BITS
)
138 print("ROW_BITS =", ROW_BITS
)
139 print("ROW_OFF_BITS =", ROW_OFF_BITS
)
140 print("ROW_LINE_BITS =", ROW_LINE_BITS
)
141 print("ROW_PER_LINE =", ROW_PER_LINE
)
142 print("ROW_SIZE =", ROW_SIZE
)
143 print("ROW_SIZE_BITS =", ROW_SIZE_BITS
)
144 print("SET_SIZE_BITS =", SET_SIZE_BITS
)
146 print("TAG_BITS =", TAG_BITS
)
147 print("TAG_RAM_WIDTH =", TAG_RAM_WIDTH
)
148 print("TAG_BITS =", TAG_BITS
)
149 print("TLB_BITS =", TLB_BITS
)
150 print("TLB_EA_TAG_BITS =", TLB_EA_TAG_BITS
)
151 print("TLB_LG_PGSZ =", TLB_LG_PGSZ
)
152 print("TLB_PTE_BITS =", TLB_PTE_BITS
)
153 print("TLB_SIZE =", TLB_SIZE
)
154 print("WAY_BITS =", WAY_BITS
)
156 # from microwatt/utils.vhdl
158 if ((n
<< 32) & ((n
-1) << 32)) == 0:
164 assert LINE_SIZE
% ROW_SIZE
== 0
165 assert ispow2(LINE_SIZE
), "LINE_SIZE not power of 2"
166 assert ispow2(NUM_LINES
), "NUM_LINES not power of 2"
167 assert ispow2(ROW_PER_LINE
), "ROW_PER_LINE not power of 2"
168 assert ispow2(INSN_PER_ROW
), "INSN_PER_ROW not power of 2"
169 assert (ROW_BITS
== (INDEX_BITS
+ ROW_LINE_BITS
)), \
170 "geometry bits don't add up"
171 assert (LINE_OFF_BITS
== (ROW_OFF_BITS
+ ROW_LINE_BITS
)), \
172 "geometry bits don't add up"
173 assert (REAL_ADDR_BITS
== (TAG_BITS
+ INDEX_BITS
+ LINE_OFF_BITS
)), \
174 "geometry bits don't add up"
175 assert (REAL_ADDR_BITS
== (TAG_BITS
+ ROW_BITS
+ ROW_OFF_BITS
)), \
176 "geometry bits don't add up"
178 # architecture rtl of icache is
179 #constant ROW_SIZE_BITS : natural := ROW_SIZE*8;
180 #-- ROW_PER_LINE is the number of row (wishbone
181 #-- transactions) in a line
182 #constant ROW_PER_LINE : natural := LINE_SIZE / ROW_SIZE;
183 #-- BRAM_ROWS is the number of rows in BRAM
184 #-- needed to represent the full
186 #constant BRAM_ROWS : natural := NUM_LINES * ROW_PER_LINE;
187 #-- INSN_PER_ROW is the number of 32bit instructions per BRAM row
188 #constant INSN_PER_ROW : natural := ROW_SIZE_BITS / 32;
189 #-- Bit fields counts in the address
191 #-- INSN_BITS is the number of bits to select
192 #-- an instruction in a row
193 #constant INSN_BITS : natural := log2(INSN_PER_ROW);
194 #-- ROW_BITS is the number of bits to select a row
195 #constant ROW_BITS : natural := log2(BRAM_ROWS);
196 #-- ROW_LINE_BITS is the number of bits to
197 #-- select a row within a line
198 #constant ROW_LINE_BITS : natural := log2(ROW_PER_LINE);
199 #-- LINE_OFF_BITS is the number of bits for the offset
201 #constant LINE_OFF_BITS : natural := log2(LINE_SIZE);
202 #-- ROW_OFF_BITS is the number of bits for the offset in a row
203 #constant ROW_OFF_BITS : natural := log2(ROW_SIZE);
204 #-- INDEX_BITS is the number of bits to select a cache line
205 #constant INDEX_BITS : natural := log2(NUM_LINES);
206 #-- SET_SIZE_BITS is the log base 2 of the set size
207 #constant SET_SIZE_BITS : natural := LINE_OFF_BITS + INDEX_BITS;
208 #-- TAG_BITS is the number of bits of the tag part of the address
209 #constant TAG_BITS : natural := REAL_ADDR_BITS - SET_SIZE_BITS;
210 #-- WAY_BITS is the number of bits to select a way
211 #constant WAY_BITS : natural := log2(NUM_WAYS);
213 #-- Example of layout for 32 lines of 64 bytes:
215 #-- .. tag |index| line |
217 #-- .. | | | |00| zero (2)
218 #-- .. | | |-| | INSN_BITS (1)
219 #-- .. | |---| | ROW_LINE_BITS (3)
220 #-- .. | |--- - --| LINE_OFF_BITS (6)
221 #-- .. | |- --| ROW_OFF_BITS (3)
222 #-- .. |----- ---| | ROW_BITS (8)
223 #-- .. |-----| | INDEX_BITS (5)
224 #-- .. --------| | TAG_BITS (53)
225 # Example of layout for 32 lines of 64 bytes:
227 # .. tag |index| line |
229 # .. | | | |00| zero (2)
230 # .. | | |-| | INSN_BITS (1)
231 # .. | |---| | ROW_LINE_BITS (3)
232 # .. | |--- - --| LINE_OFF_BITS (6)
233 # .. | |- --| ROW_OFF_BITS (3)
234 # .. |----- ---| | ROW_BITS (8)
235 # .. |-----| | INDEX_BITS (5)
236 # .. --------| | TAG_BITS (53)
238 #subtype row_t is integer range 0 to BRAM_ROWS-1;
239 #subtype index_t is integer range 0 to NUM_LINES-1;
240 #subtype way_t is integer range 0 to NUM_WAYS-1;
241 #subtype row_in_line_t is unsigned(ROW_LINE_BITS-1 downto 0);
243 #-- The cache data BRAM organized as described above for each way
244 #subtype cache_row_t is std_ulogic_vector(ROW_SIZE_BITS-1 downto 0);
246 #-- The cache tags LUTRAM has a row per set. Vivado is a pain and will
247 #-- not handle a clean (commented) definition of the cache tags as a 3d
248 #-- memory. For now, work around it by putting all the tags
249 #subtype cache_tag_t is std_logic_vector(TAG_BITS-1 downto 0);
250 # type cache_tags_set_t is array(way_t) of cache_tag_t;
251 # type cache_tags_array_t is array(index_t) of cache_tags_set_t;
252 #constant TAG_RAM_WIDTH : natural := TAG_BITS * NUM_WAYS;
253 #subtype cache_tags_set_t is std_logic_vector(TAG_RAM_WIDTH-1 downto 0);
254 #type cache_tags_array_t is array(index_t) of cache_tags_set_t;
256 return Array(Signal(TAG_RAM_WIDTH
, name
="cachetag_%d" %x) \
257 for x
in range(NUM_LINES
))
259 #-- The cache valid bits
260 #subtype cache_way_valids_t is std_ulogic_vector(NUM_WAYS-1 downto 0);
261 #type cache_valids_t is array(index_t) of cache_way_valids_t;
262 #type row_per_line_valid_t is array(0 to ROW_PER_LINE - 1) of std_ulogic;
263 def CacheValidBitsArray():
264 return Array(Signal(NUM_WAYS
, name
="cachevalid_%d" %x) \
265 for x
in range(NUM_LINES
))
267 def RowPerLineValidArray():
268 return Array(Signal(name
="rows_valid_%d" %x) \
269 for x
in range(ROW_PER_LINE
))
272 #attribute ram_style : string;
273 #attribute ram_style of cache_tags : signal is "distributed";
274 # TODO to be passed to nigmen as ram attributes
275 # attribute ram_style : string;
276 # attribute ram_style of cache_tags : signal is "distributed";
279 #subtype tlb_index_t is integer range 0 to TLB_SIZE - 1;
280 #type tlb_valids_t is array(tlb_index_t) of std_ulogic;
281 #subtype tlb_tag_t is std_ulogic_vector(TLB_EA_TAG_BITS - 1 downto 0);
282 #type tlb_tags_t is array(tlb_index_t) of tlb_tag_t;
283 #subtype tlb_pte_t is std_ulogic_vector(TLB_PTE_BITS - 1 downto 0);
284 #type tlb_ptes_t is array(tlb_index_t) of tlb_pte_t;
285 def TLBValidBitsArray():
286 return Array(Signal(name
="tlbvalid_%d" %x) \
287 for x
in range(TLB_SIZE
))
290 return Array(Signal(TLB_EA_TAG_BITS
, name
="tlbtag_%d" %x) \
291 for x
in range(TLB_SIZE
))
294 return Array(Signal(TLB_PTE_BITS
, name
="tlbptes_%d" %x) \
295 for x
in range(TLB_SIZE
))
298 #-- Cache RAM interface
299 #type cache_ram_out_t is array(way_t) of cache_row_t;
300 # Cache RAM interface
302 return Array(Signal(ROW_SIZE_BITS
, name
="cache_out_%d" %x) \
303 for x
in range(NUM_WAYS
))
305 #-- PLRU output interface
306 #type plru_out_t is array(index_t) of
307 # std_ulogic_vector(WAY_BITS-1 downto 0);
308 # PLRU output interface
310 return Array(Signal(WAY_BITS
, name
="plru_out_%d" %x) \
311 for x
in range(NUM_LINES
))
313 # -- Return the cache line index (tag index) for an address
314 # function get_index(addr: std_ulogic_vector(63 downto 0))
317 # return to_integer(unsigned(
318 # addr(SET_SIZE_BITS - 1 downto LINE_OFF_BITS)
321 # Return the cache line index (tag index) for an address
323 return addr
[LINE_OFF_BITS
:SET_SIZE_BITS
]
325 # -- Return the cache row index (data memory) for an address
326 # function get_row(addr: std_ulogic_vector(63 downto 0))
329 # return to_integer(unsigned(
330 # addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS)
333 # Return the cache row index (data memory) for an address
335 return addr
[ROW_OFF_BITS
:SET_SIZE_BITS
]
337 # -- Return the index of a row within a line
338 # function get_row_of_line(row: row_t) return row_in_line_t is
339 # variable row_v : unsigned(ROW_BITS-1 downto 0);
341 # row_v := to_unsigned(row, ROW_BITS);
342 # return row_v(ROW_LINE_BITS-1 downto 0);
344 # Return the index of a row within a line
345 def get_row_of_line(row
):
346 return row
[:ROW_LINE_BITS
]
348 # -- Returns whether this is the last row of a line
349 # function is_last_row_addr(addr: wishbone_addr_type;
350 # last: row_in_line_t
355 # addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS)
358 # Returns whether this is the last row of a line
359 def is_last_row_addr(addr
, last
):
360 return addr
[ROW_OFF_BITS
:LINE_OFF_BITS
] == last
362 # -- Returns whether this is the last row of a line
363 # function is_last_row(row: row_t;
364 # last: row_in_line_t) return boolean is
366 # return get_row_of_line(row) = last;
368 # Returns whether this is the last row of a line
369 def is_last_row(row
, last
):
370 return get_row_of_line(row
) == last
372 # -- Return the next row in the current cache line. We use a dedicated
373 # -- function in order to limit the size of the generated adder to be
374 # -- only the bits within a cache line (3 bits with default settings)
375 # function next_row(row: row_t) return row_t is
376 # variable row_v : std_ulogic_vector(ROW_BITS-1 downto 0);
377 # variable row_idx : std_ulogic_vector(ROW_LINE_BITS-1 downto 0);
378 # variable result : std_ulogic_vector(ROW_BITS-1 downto 0);
380 # row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
381 # row_idx := row_v(ROW_LINE_BITS-1 downto 0);
382 # row_v(ROW_LINE_BITS-1 downto 0) :=
383 # std_ulogic_vector(unsigned(row_idx) + 1);
384 # return to_integer(unsigned(row_v));
386 # Return the next row in the current cache line. We use a dedicated
387 # function in order to limit the size of the generated adder to be
388 # only the bits within a cache line (3 bits with default settings)
390 row_v
= row
[0:ROW_LINE_BITS
] + 1
391 return Cat(row_v
[:ROW_LINE_BITS
], row
[ROW_LINE_BITS
:])
392 # -- Read the instruction word for the given address in the
393 # -- current cache row
394 # function read_insn_word(addr: std_ulogic_vector(63 downto 0);
395 # data: cache_row_t) return std_ulogic_vector is
396 # variable word: integer range 0 to INSN_PER_ROW-1;
398 # word := to_integer(unsigned(addr(INSN_BITS+2-1 downto 2)));
399 # return data(31+word*32 downto word*32);
401 # Read the instruction word for the given address
402 # in the current cache row
403 def read_insn_word(addr
, data
):
404 word
= addr
[2:INSN_BITS
+2]
405 return data
.word_select(word
, 32)
407 # -- Get the tag value from the address
409 # addr: std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0)
411 # return cache_tag_t is
413 # return addr(REAL_ADDR_BITS - 1 downto SET_SIZE_BITS);
415 # Get the tag value from the address
417 return addr
[SET_SIZE_BITS
:REAL_ADDR_BITS
]
419 # -- Read a tag from a tag memory row
420 # function read_tag(way: way_t; tagset: cache_tags_set_t)
421 # return cache_tag_t is
423 # return tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS);
425 # Read a tag from a tag memory row
426 def read_tag(way
, tagset
):
427 return tagset
.word_select(way
, TAG_BITS
)
429 # -- Write a tag to tag memory row
430 # procedure write_tag(way: in way_t;
431 # tagset: inout cache_tags_set_t; tag: cache_tag_t) is
433 # tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS) := tag;
435 # Write a tag to tag memory row
436 def write_tag(way
, tagset
, tag
):
437 return read_tag(way
, tagset
).eq(tag
)
439 # -- Simple hash for direct-mapped TLB index
440 # function hash_ea(addr: std_ulogic_vector(63 downto 0))
441 # return tlb_index_t is
442 # variable hash : std_ulogic_vector(TLB_BITS - 1 downto 0);
444 # hash := addr(TLB_LG_PGSZ + TLB_BITS - 1 downto TLB_LG_PGSZ)
446 # TLB_LG_PGSZ + 2 * TLB_BITS - 1 downto
447 # TLB_LG_PGSZ + TLB_BITS
450 # TLB_LG_PGSZ + 3 * TLB_BITS - 1 downto
451 # TLB_LG_PGSZ + 2 * TLB_BITS
453 # return to_integer(unsigned(hash));
455 # Simple hash for direct-mapped TLB index
457 hsh
= addr
[TLB_LG_PGSZ
:TLB_LG_PGSZ
+ TLB_BITS
] ^ addr
[
458 TLB_LG_PGSZ
+ TLB_BITS
:TLB_LG_PGSZ
+ 2 * TLB_BITS
460 TLB_LG_PGSZ
+ 2 * TLB_BITS
:TLB_LG_PGSZ
+ 3 * TLB_BITS
465 # Cache reload state machine
473 class RegInternal(RecordObject
):
476 # Cache hit state (Latches for 1 cycle BRAM access)
477 self
.hit_way
= Signal(NUM_WAYS
)
478 self
.hit_nia
= Signal(64)
479 self
.hit_smark
= Signal()
480 self
.hit_valid
= Signal()
482 # Cache miss state (reload state machine)
483 self
.state
= Signal(State
, reset
=State
.IDLE
)
484 self
.wb
= WBMasterOut("wb")
485 self
.req_adr
= Signal(64)
486 self
.store_way
= Signal(NUM_WAYS
)
487 self
.store_index
= Signal(NUM_LINES
)
488 self
.store_row
= Signal(BRAM_ROWS
)
489 self
.store_tag
= Signal(TAG_BITS
)
490 self
.store_valid
= Signal()
491 self
.end_row_ix
= Signal(ROW_LINE_BITS
)
492 self
.rows_valid
= RowPerLineValidArray()
495 self
.fetch_failed
= Signal()
497 # -- 64 bit direct mapped icache. All instructions are 4B aligned.
501 # SIM : boolean := false;
502 # -- Line size in bytes
503 # LINE_SIZE : positive := 64;
504 # -- BRAM organisation: We never access more
505 # -- than wishbone_data_bits
506 # -- at a time so to save resources we make the
507 # -- array only that wide,
508 # -- and use consecutive indices for to make a cache "line"
510 # -- ROW_SIZE is the width in bytes of the BRAM (based on WB,
512 # ROW_SIZE : positive := wishbone_data_bits / 8;
513 # -- Number of lines in a set
514 # NUM_LINES : positive := 32;
516 # NUM_WAYS : positive := 4;
517 # -- L1 ITLB number of entries (direct mapped)
518 # TLB_SIZE : positive := 64;
519 # -- L1 ITLB log_2(page_size)
520 # TLB_LG_PGSZ : positive := 12;
521 # -- Number of real address bits that we store
522 # REAL_ADDR_BITS : positive := 56;
523 # -- Non-zero to enable log data collection
524 # LOG_LENGTH : natural := 0
527 # clk : in std_ulogic;
528 # rst : in std_ulogic;
530 # i_in : in Fetch1ToIcacheType;
531 # i_out : out IcacheToDecode1Type;
533 # m_in : in MmuToIcacheType;
535 # stall_in : in std_ulogic;
536 # stall_out : out std_ulogic;
537 # flush_in : in std_ulogic;
538 # inval_in : in std_ulogic;
540 # wishbone_out : out wishbone_master_out;
541 # wishbone_in : in wishbone_slave_out;
543 # log_out : out std_ulogic_vector(53 downto 0)
546 # 64 bit direct mapped icache. All instructions are 4B aligned.
547 class ICache(Elaboratable
):
548 """64 bit direct mapped icache. All instructions are 4B aligned."""
550 self
.i_in
= Fetch1ToICacheType(name
="i_in")
551 self
.i_out
= ICacheToDecode1Type(name
="i_out")
553 self
.m_in
= MMUToICacheType(name
="m_in")
555 self
.stall_in
= Signal()
556 self
.stall_out
= Signal()
557 self
.flush_in
= Signal()
558 self
.inval_in
= Signal()
560 self
.wb_out
= WBMasterOut(name
="wb_out")
561 self
.wb_in
= WBSlaveOut(name
="wb_in")
563 self
.log_out
= Signal(54)
566 # Generate a cache RAM for each way
567 def rams(self
, m
, r
, cache_out_row
, use_previous
,
568 replace_way
, req_row
):
573 wb_in
, stall_in
= self
.wb_in
, self
.stall_in
575 for i
in range(NUM_WAYS
):
576 do_read
= Signal(name
="do_rd_%d" % i
)
577 do_write
= Signal(name
="do_wr_%d" % i
)
578 rd_addr
= Signal(ROW_BITS
)
579 wr_addr
= Signal(ROW_BITS
)
580 d_out
= Signal(ROW_SIZE_BITS
, name
="d_out_%d" % i
)
581 wr_sel
= Signal(ROW_SIZE
)
583 way
= CacheRam(ROW_BITS
, ROW_SIZE_BITS
, True)
584 setattr(m
.submodules
, "cacheram_%d" % i
, way
)
586 comb
+= way
.rd_en
.eq(do_read
)
587 comb
+= way
.rd_addr
.eq(rd_addr
)
588 comb
+= d_out
.eq(way
.rd_data_o
)
589 comb
+= way
.wr_sel
.eq(wr_sel
)
590 comb
+= way
.wr_addr
.eq(wr_addr
)
591 comb
+= way
.wr_data
.eq(wb_in
.dat
)
593 comb
+= do_read
.eq(~
(stall_in | use_previous
))
594 comb
+= do_write
.eq(wb_in
.ack
& (replace_way
== i
))
597 sync
+= Display("cache write adr: %x data: %lx",
598 wr_addr
, way
.wr_data
)
600 with m
.If(r
.hit_way
== i
):
601 comb
+= cache_out_row
.eq(d_out
)
603 sync
+= Display("cache read adr: %x data: %x",
606 comb
+= rd_addr
.eq(req_row
)
607 comb
+= wr_addr
.eq(r
.store_row
)
608 comb
+= wr_sel
.eq(Repl(do_write
, ROW_SIZE
))
611 def maybe_plrus(self
, m
, r
, plru_victim
):
614 with m
.If(NUM_WAYS
> 1):
615 for i
in range(NUM_LINES
):
616 plru_acc_i
= Signal(WAY_BITS
)
617 plru_acc_en
= Signal()
618 plru
= PLRU(WAY_BITS
)
619 setattr(m
.submodules
, "plru_%d" % i
, plru
)
621 comb
+= plru
.acc_i
.eq(plru_acc_i
)
622 comb
+= plru
.acc_en
.eq(plru_acc_en
)
625 with m
.If(get_index(r
.hit_nia
) == i
):
626 comb
+= plru
.acc_en
.eq(r
.hit_valid
)
628 comb
+= plru
.acc_i
.eq(r
.hit_way
)
629 comb
+= plru_victim
[i
].eq(plru
.lru_o
)
631 # TLB hit detection and real address generation
632 def itlb_lookup(self
, m
, tlb_req_index
, itlb_ptes
, itlb_tags
,
633 real_addr
, itlb_valid_bits
, ra_valid
, eaa_priv
,
634 priv_fault
, access_ok
):
640 pte
= Signal(TLB_PTE_BITS
)
641 ttag
= Signal(TLB_EA_TAG_BITS
)
643 comb
+= tlb_req_index
.eq(hash_ea(i_in
.nia
))
644 comb
+= pte
.eq(itlb_ptes
[tlb_req_index
])
645 comb
+= ttag
.eq(itlb_tags
[tlb_req_index
])
647 with m
.If(i_in
.virt_mode
):
648 comb
+= real_addr
.eq(Cat(
649 i_in
.nia
[:TLB_LG_PGSZ
],
650 pte
[TLB_LG_PGSZ
:REAL_ADDR_BITS
]
653 with m
.If(ttag
== i_in
.nia
[TLB_LG_PGSZ
+ TLB_BITS
:64]):
654 comb
+= ra_valid
.eq(itlb_valid_bits
[tlb_req_index
])
656 comb
+= eaa_priv
.eq(pte
[3])
659 comb
+= real_addr
.eq(i_in
.nia
[:REAL_ADDR_BITS
])
660 comb
+= ra_valid
.eq(1)
661 comb
+= eaa_priv
.eq(1)
663 # No IAMR, so no KUEP support for now
664 comb
+= priv_fault
.eq(eaa_priv
& ~i_in
.priv_mode
)
665 comb
+= access_ok
.eq(ra_valid
& ~priv_fault
)
668 def itlb_update(self
, m
, itlb_valid_bits
, itlb_tags
, itlb_ptes
):
674 wr_index
= Signal(TLB_SIZE
)
675 comb
+= wr_index
.eq(hash_ea(m_in
.addr
))
677 with m
.If(m_in
.tlbie
& m_in
.doall
):
678 # Clear all valid bits
679 for i
in range(TLB_SIZE
):
680 sync
+= itlb_valid_bits
[i
].eq(0)
682 with m
.Elif(m_in
.tlbie
):
683 # Clear entry regardless of hit or miss
684 sync
+= itlb_valid_bits
[wr_index
].eq(0)
686 with m
.Elif(m_in
.tlbld
):
687 sync
+= itlb_tags
[wr_index
].eq(
688 m_in
.addr
[TLB_LG_PGSZ
+ TLB_BITS
:64]
690 sync
+= itlb_ptes
[wr_index
].eq(m_in
.pte
)
691 sync
+= itlb_valid_bits
[wr_index
].eq(1)
693 # Cache hit detection, output to fetch2 and other misc logic
694 def icache_comb(self
, m
, use_previous
, r
, req_index
, req_row
,
695 req_hit_way
, req_tag
, real_addr
, req_laddr
,
696 cache_valid_bits
, cache_tags
, access_ok
,
697 req_is_hit
, req_is_miss
, replace_way
,
698 plru_victim
, cache_out_row
):
702 i_in
, i_out
, wb_out
= self
.i_in
, self
.i_out
, self
.wb_out
703 flush_in
, stall_out
= self
.flush_in
, self
.stall_out
706 hit_way
= Signal(NUM_WAYS
)
708 # i_in.sequential means that i_in.nia this cycle is 4 more than
709 # last cycle. If we read more than 32 bits at a time, had a
710 # cache hit last cycle, and we don't want the first 32-bit chunk
711 # then we can keep the data we read last cycle and just use that.
712 with m
.If(i_in
.nia
[2:INSN_BITS
+2] != 0):
713 comb
+= use_previous
.eq(i_in
.sequential
& r
.hit_valid
)
715 # Extract line, row and tag from request
716 comb
+= req_index
.eq(get_index(i_in
.nia
))
717 comb
+= req_row
.eq(get_row(i_in
.nia
))
718 comb
+= req_tag
.eq(get_tag(real_addr
))
720 # Calculate address of beginning of cache row, will be
721 # used for cache miss processing if needed
722 comb
+= req_laddr
.eq(Cat(
723 Const(0, ROW_OFF_BITS
),
724 real_addr
[ROW_OFF_BITS
:REAL_ADDR_BITS
],
727 # Test if pending request is a hit on any way
729 comb
+= hitcond
.eq((r
.state
== State
.WAIT_ACK
)
730 & (req_index
== r
.store_index
)
731 & r
.rows_valid
[req_row
% ROW_PER_LINE
])
733 cvb
= Signal(NUM_WAYS
)
734 ctag
= Signal(TAG_RAM_WIDTH
)
735 comb
+= ctag
.eq(cache_tags
[req_index
])
736 comb
+= cvb
.eq(cache_valid_bits
[req_index
])
737 for i
in range(NUM_WAYS
):
738 tagi
= Signal(TAG_BITS
, name
="tag_i%d" % i
)
739 comb
+= tagi
.eq(read_tag(i
, ctag
))
740 hit_test
= Signal(name
="hit_test%d" % i
)
741 comb
+= hit_test
.eq(i
== r
.store_way
)
742 with m
.If((cvb
[i
] |
(hitcond
& hit_test
))
743 & (tagi
== req_tag
)):
744 comb
+= hit_way
.eq(i
)
747 # Generate the "hit" and "miss" signals
748 # for the synchronous blocks
749 with m
.If(i_in
.req
& access_ok
& ~flush_in
):
750 comb
+= req_is_hit
.eq(is_hit
)
751 comb
+= req_is_miss
.eq(~is_hit
)
754 comb
+= req_is_hit
.eq(0)
755 comb
+= req_is_miss
.eq(0)
757 comb
+= req_hit_way
.eq(hit_way
)
759 # The way to replace on a miss
760 with m
.If(r
.state
== State
.CLR_TAG
):
761 comb
+= replace_way
.eq(plru_victim
[r
.store_index
])
763 comb
+= replace_way
.eq(r
.store_way
)
765 # Output instruction from current cache row
767 # Note: This is a mild violation of our design principle of
768 # having pipeline stages output from a clean latch. In this
769 # case we output the result of a mux. The alternative would
770 # be output an entire row which I prefer not to do just yet
771 # as it would force fetch2 to know about some of the cache
772 # geometry information.
773 comb
+= i_out
.insn
.eq(read_insn_word(r
.hit_nia
, cache_out_row
))
774 comb
+= i_out
.valid
.eq(r
.hit_valid
)
775 comb
+= i_out
.nia
.eq(r
.hit_nia
)
776 comb
+= i_out
.stop_mark
.eq(r
.hit_smark
)
777 comb
+= i_out
.fetch_failed
.eq(r
.fetch_failed
)
779 # Stall fetch1 if we have a miss on cache or TLB
780 # or a protection fault
781 comb
+= stall_out
.eq(~
(is_hit
& access_ok
))
783 # Wishbone requests output (from the cache miss reload machine)
784 comb
+= wb_out
.eq(r
.wb
)
786 # Cache hit synchronous machine
787 def icache_hit(self
, m
, use_previous
, r
, req_is_hit
, req_hit_way
,
788 req_index
, req_tag
, real_addr
):
791 i_in
, stall_in
= self
.i_in
, self
.stall_in
792 flush_in
= self
.flush_in
794 # keep outputs to fetch2 unchanged on a stall
795 # except that flush or reset sets valid to 0
796 # If use_previous, keep the same data as last
797 # cycle and use the second half
798 with m
.If(stall_in | use_previous
):
800 sync
+= r
.hit_valid
.eq(0)
802 # On a hit, latch the request for the next cycle,
803 # when the BRAM data will be available on the
804 # cache_out output of the corresponding way
805 sync
+= r
.hit_valid
.eq(req_is_hit
)
807 with m
.If(req_is_hit
):
808 sync
+= r
.hit_way
.eq(req_hit_way
)
810 "cache hit nia:%x IR:%x SM:%x idx:%x tag:%x " \
811 "way:%x RA:%x", i_in
.nia
, i_in
.virt_mode
, \
812 i_in
.stop_mark
, req_index
, req_tag
, \
813 req_hit_way
, real_addr
818 with m
.If(~stall_in
):
819 # Send stop marks and NIA down regardless of validity
820 sync
+= r
.hit_smark
.eq(i_in
.stop_mark
)
821 sync
+= r
.hit_nia
.eq(i_in
.nia
)
823 def icache_miss_idle(self
, m
, r
, req_is_miss
, req_laddr
,
824 req_index
, req_tag
, replace_way
, real_addr
):
830 # Reset per-row valid flags, only used in WAIT_ACK
831 for i
in range(ROW_PER_LINE
):
832 sync
+= r
.rows_valid
[i
].eq(0)
834 # We need to read a cache line
835 with m
.If(req_is_miss
):
837 "cache miss nia:%x IR:%x SM:%x idx:%x "
838 " way:%x tag:%x RA:%x", i_in
.nia
,
839 i_in
.virt_mode
, i_in
.stop_mark
, req_index
,
840 replace_way
, req_tag
, real_addr
843 # Keep track of our index and way for subsequent stores
844 st_row
= Signal(BRAM_ROWS
)
845 comb
+= st_row
.eq(get_row(req_laddr
))
846 sync
+= r
.store_index
.eq(req_index
)
847 sync
+= r
.store_row
.eq(st_row
)
848 sync
+= r
.store_tag
.eq(req_tag
)
849 sync
+= r
.store_valid
.eq(1)
850 sync
+= r
.end_row_ix
.eq(get_row_of_line(st_row
) - 1)
852 # Prep for first wishbone read. We calculate the address
853 # of the start of the cache line and start the WB cycle.
854 sync
+= r
.req_adr
.eq(req_laddr
)
855 sync
+= r
.wb
.cyc
.eq(1)
856 sync
+= r
.wb
.stb
.eq(1)
858 # Track that we had one request sent
859 sync
+= r
.state
.eq(State
.CLR_TAG
)
861 def icache_miss_clr_tag(self
, m
, r
, replace_way
,
862 cache_valid_bits
, req_index
,
868 # Get victim way from plru
869 sync
+= r
.store_way
.eq(replace_way
)
870 # Force misses on that way while reloading that line
871 cv
= Signal(INDEX_BITS
)
872 comb
+= cv
.eq(cache_valid_bits
[req_index
])
873 comb
+= cv
.bit_select(replace_way
, 1).eq(0)
874 sync
+= cache_valid_bits
[req_index
].eq(cv
)
876 for i
in range(NUM_WAYS
):
877 with m
.If(i
== replace_way
):
878 comb
+= tagset
.eq(cache_tags
[r
.store_index
])
879 comb
+= write_tag(i
, tagset
, r
.store_tag
)
880 sync
+= cache_tags
[r
.store_index
].eq(tagset
)
882 sync
+= r
.state
.eq(State
.WAIT_ACK
)
884 def icache_miss_wait_ack(self
, m
, r
, replace_way
, inval_in
,
885 stbs_done
, cache_valid_bits
):
891 # Requests are all sent if stb is 0
893 comb
+= stbs_zero
.eq(r
.wb
.stb
== 0)
894 comb
+= stbs_done
.eq(stbs_zero
)
896 # If we are still sending requests, was one accepted?
897 with m
.If(~wb_in
.stall
& ~stbs_zero
):
898 # That was the last word ? # We are done sending.
899 # Clear stb and set stbs_done # so we can handle
900 # an eventual last ack on # the same cycle.
901 with m
.If(is_last_row_addr(r
.req_adr
, r
.end_row_ix
)):
903 "IS_LAST_ROW_ADDR r.wb.addr:%x " \
904 "r.end_row_ix:%x r.wb.stb:%x stbs_zero:%x " \
905 "stbs_done:%x", r
.wb
.adr
, r
.end_row_ix
,
906 r
.wb
.stb
, stbs_zero
, stbs_done
908 sync
+= r
.wb
.stb
.eq(0)
909 comb
+= stbs_done
.eq(1)
911 # Calculate the next row address
912 rarange
= Signal(LINE_OFF_BITS
- ROW_OFF_BITS
)
914 r
.req_adr
[ROW_OFF_BITS
:LINE_OFF_BITS
] + 1
916 sync
+= r
.req_adr
[ROW_OFF_BITS
:LINE_OFF_BITS
].eq(
919 sync
+= Display("RARANGE r.req_adr:%x rarange:%x "
920 "stbs_zero:%x stbs_done:%x",
921 r
.req_adr
, rarange
, stbs_zero
, stbs_done
)
923 # Incoming acks processing
924 with m
.If(wb_in
.ack
):
925 sync
+= Display("WB_IN_ACK data:%x stbs_zero:%x "
927 wb_in
.dat
, stbs_zero
, stbs_done
)
929 sync
+= r
.rows_valid
[r
.store_row
% ROW_PER_LINE
].eq(1)
931 # Check for completion
932 with m
.If(stbs_done
&
933 is_last_row(r
.store_row
, r
.end_row_ix
)):
934 # Complete wishbone cycle
935 sync
+= r
.wb
.cyc
.eq(0)
936 sync
+= r
.req_adr
.eq(0) # be nice, clear addr
938 # Cache line is now valid
939 cv
= Signal(INDEX_BITS
)
940 comb
+= cv
.eq(cache_valid_bits
[r
.store_index
])
941 comb
+= cv
.bit_select(replace_way
, 1).eq(
942 r
.store_valid
& ~inval_in
944 sync
+= cache_valid_bits
[r
.store_index
].eq(cv
)
946 sync
+= r
.state
.eq(State
.IDLE
)
948 # not completed, move on to next request in row
950 # Increment store row counter
951 sync
+= r
.store_row
.eq(next_row(r
.store_row
))
954 # Cache miss/reload synchronous machine
955 def icache_miss(self
, m
, cache_valid_bits
, r
, req_is_miss
,
956 req_index
, req_laddr
, req_tag
, replace_way
,
957 cache_tags
, access_ok
, real_addr
):
961 i_in
, wb_in
, m_in
= self
.i_in
, self
.wb_in
, self
.m_in
962 stall_in
, flush_in
= self
.stall_in
, self
.flush_in
963 inval_in
= self
.inval_in
965 # variable tagset : cache_tags_set_t;
966 # variable stbs_done : boolean;
968 tagset
= Signal(TAG_RAM_WIDTH
)
971 comb
+= r
.wb
.sel
.eq(-1)
972 comb
+= r
.wb
.adr
.eq(r
.req_adr
[3:])
974 # Process cache invalidations
976 for i
in range(NUM_LINES
):
977 sync
+= cache_valid_bits
[i
].eq(0)
978 sync
+= r
.store_valid
.eq(0)
981 with m
.Switch(r
.state
):
983 with m
.Case(State
.IDLE
):
984 self
.icache_miss_idle(
985 m
, r
, req_is_miss
, req_laddr
,
986 req_index
, req_tag
, replace_way
,
990 with m
.Case(State
.CLR_TAG
, State
.WAIT_ACK
):
991 with m
.If(r
.state
== State
.CLR_TAG
):
992 self
.icache_miss_clr_tag(
994 cache_valid_bits
, req_index
,
998 self
.icache_miss_wait_ack(
999 m
, r
, replace_way
, inval_in
,
1000 stbs_done
, cache_valid_bits
1003 # TLB miss and protection fault processing
1004 with m
.If(flush_in | m_in
.tlbld
):
1005 sync
+= r
.fetch_failed
.eq(0)
1006 with m
.Elif(i_in
.req
& ~access_ok
& ~stall_in
):
1007 sync
+= r
.fetch_failed
.eq(1)
1009 # icache_log: if LOG_LENGTH > 0 generate
1010 def icache_log(self
, m
, req_hit_way
, ra_valid
, access_ok
,
1011 req_is_miss
, req_is_hit
, lway
, wstate
, r
):
1015 wb_in
, i_out
= self
.wb_in
, self
.i_out
1016 log_out
, stall_out
= self
.log_out
, self
.stall_out
1018 # -- Output data to logger
1019 # signal log_data : std_ulogic_vector(53 downto 0);
1021 # data_log: process(clk)
1022 # variable lway: way_t;
1023 # variable wstate: std_ulogic;
1024 # Output data to logger
1025 for i
in range(LOG_LENGTH
):
1026 # Output data to logger
1027 log_data
= Signal(54)
1028 lway
= Signal(NUM_WAYS
)
1032 # if rising_edge(clk) then
1033 # lway := req_hit_way;
1035 sync
+= lway
.eq(req_hit_way
)
1036 sync
+= wstate
.eq(0)
1038 # if r.state /= IDLE then
1041 with m
.If(r
.state
!= State
.IDLE
):
1042 sync
+= wstate
.eq(1)
1044 # log_data <= i_out.valid &
1047 # r.wb.adr(5 downto 3) &
1048 # r.wb.stb & r.wb.cyc &
1049 # wishbone_in.stall &
1052 # r.hit_nia(5 downto 2) &
1054 # std_ulogic_vector(to_unsigned(lway, 3)) &
1055 # req_is_hit & req_is_miss &
1058 sync
+= log_data
.eq(Cat(
1059 ra_valid
, access_ok
, req_is_miss
, req_is_hit
,
1060 lway
, wstate
, r
.hit_nia
[2:6], r
.fetch_failed
,
1061 stall_out
, wb_in
.stall
, r
.wb
.cyc
, r
.wb
.stb
,
1062 r
.wb
.adr
[3:6], wb_in
.ack
, i_out
.insn
, i_out
.valid
1066 # log_out <= log_data;
1067 comb
+= log_out
.eq(log_data
)
1071 def elaborate(self
, platform
):
1076 # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
1077 cache_tags
= CacheTagArray()
1078 cache_valid_bits
= CacheValidBitsArray()
1080 itlb_valid_bits
= TLBValidBitsArray()
1081 itlb_tags
= TLBTagArray()
1082 itlb_ptes
= TLBPtesArray()
1083 # TODO to be passed to nmigen as ram attributes
1084 # attribute ram_style of itlb_tags : signal is "distributed";
1085 # attribute ram_style of itlb_ptes : signal is "distributed";
1087 # Privilege bit from PTE EAA field
1092 # Async signal on incoming request
1093 req_index
= Signal(NUM_LINES
)
1094 req_row
= Signal(BRAM_ROWS
)
1095 req_hit_way
= Signal(NUM_WAYS
)
1096 req_tag
= Signal(TAG_BITS
)
1097 req_is_hit
= Signal()
1098 req_is_miss
= Signal()
1099 req_laddr
= Signal(64)
1101 tlb_req_index
= Signal(TLB_SIZE
)
1102 real_addr
= Signal(REAL_ADDR_BITS
)
1104 priv_fault
= Signal()
1105 access_ok
= Signal()
1106 use_previous
= Signal()
1108 cache_out_row
= Signal(ROW_SIZE_BITS
)
1110 plru_victim
= PLRUOut()
1111 replace_way
= Signal(NUM_WAYS
)
1113 # call sub-functions putting everything together,
1114 # using shared signals established above
1115 self
.rams(m
, r
, cache_out_row
, use_previous
, replace_way
, req_row
)
1116 self
.maybe_plrus(m
, r
, plru_victim
)
1117 self
.itlb_lookup(m
, tlb_req_index
, itlb_ptes
, itlb_tags
, real_addr
,
1118 itlb_valid_bits
, ra_valid
, eaa_priv
, priv_fault
,
1120 self
.itlb_update(m
, itlb_valid_bits
, itlb_tags
, itlb_ptes
)
1121 self
.icache_comb(m
, use_previous
, r
, req_index
, req_row
, req_hit_way
,
1122 req_tag
, real_addr
, req_laddr
, cache_valid_bits
,
1123 cache_tags
, access_ok
, req_is_hit
, req_is_miss
,
1124 replace_way
, plru_victim
, cache_out_row
)
1125 self
.icache_hit(m
, use_previous
, r
, req_is_hit
, req_hit_way
,
1126 req_index
, req_tag
, real_addr
)
1127 self
.icache_miss(m
, cache_valid_bits
, r
, req_is_miss
, req_index
,
1128 req_laddr
, req_tag
, replace_way
, cache_tags
,
1129 access_ok
, real_addr
)
1130 #self.icache_log(m, log_out, req_hit_way, ra_valid, access_ok,
1131 # req_is_miss, req_is_hit, lway, wstate, r)
1136 def icache_sim(dut
):
1141 yield i_in
.valid
.eq(0)
1142 yield i_out
.priv_mode
.eq(1)
1143 yield i_out
.req
.eq(0)
1144 yield i_out
.nia
.eq(0)
1145 yield i_out
.stop_mark
.eq(0)
1146 yield m_out
.tlbld
.eq(0)
1147 yield m_out
.tlbie
.eq(0)
1148 yield m_out
.addr
.eq(0)
1149 yield m_out
.pte
.eq(0)
1154 yield i_out
.req
.eq(1)
1155 yield i_out
.nia
.eq(Const(0x0000000000000004, 64))
1159 valid
= yield i_in
.valid
1160 nia
= yield i_out
.nia
1161 insn
= yield i_in
.insn
1162 print(f
"valid? {valid}")
1164 assert insn
== 0x00000001, \
1165 "insn @%x=%x expected 00000001" % (nia
, insn
)
1166 yield i_out
.req
.eq(0)
1172 yield i_out
.req
.eq(1)
1173 yield i_out
.nia
.eq(Const(0x0000000000000008, 64))
1176 valid
= yield i_in
.valid
1177 nia
= yield i_in
.nia
1178 insn
= yield i_in
.insn
1180 assert insn
== 0x00000002, \
1181 "insn @%x=%x expected 00000002" % (nia
, insn
)
1185 yield i_out
.req
.eq(1)
1186 yield i_out
.nia
.eq(Const(0x0000000000000040, 64))
1190 valid
= yield i_in
.valid
1191 nia
= yield i_out
.nia
1192 insn
= yield i_in
.insn
1194 assert insn
== 0x00000010, \
1195 "insn @%x=%x expected 00000010" % (nia
, insn
)
1197 # test something that aliases
1198 yield i_out
.req
.eq(1)
1199 yield i_out
.nia
.eq(Const(0x0000000000000100, 64))
1202 valid
= yield i_in
.valid
1207 insn
= yield i_in
.insn
1208 valid
= yield i_in
.valid
1209 insn
= yield i_in
.insn
1211 assert insn
== 0x00000040, \
1212 "insn @%x=%x expected 00000040" % (nia
, insn
)
1213 yield i_out
.req
.eq(0)
1217 def test_icache(mem
):
1220 memory
= Memory(width
=64, depth
=512, init
=mem
)
1221 sram
= SRAM(memory
=memory
, granularity
=8)
1225 m
.submodules
.icache
= dut
1226 m
.submodules
.sram
= sram
1228 m
.d
.comb
+= sram
.bus
.cyc
.eq(dut
.wb_out
.cyc
)
1229 m
.d
.comb
+= sram
.bus
.stb
.eq(dut
.wb_out
.stb
)
1230 m
.d
.comb
+= sram
.bus
.we
.eq(dut
.wb_out
.we
)
1231 m
.d
.comb
+= sram
.bus
.sel
.eq(dut
.wb_out
.sel
)
1232 m
.d
.comb
+= sram
.bus
.adr
.eq(dut
.wb_out
.adr
)
1233 m
.d
.comb
+= sram
.bus
.dat_w
.eq(dut
.wb_out
.dat
)
1235 m
.d
.comb
+= dut
.wb_in
.ack
.eq(sram
.bus
.ack
)
1236 m
.d
.comb
+= dut
.wb_in
.dat
.eq(sram
.bus
.dat_r
)
1242 sim
.add_sync_process(wrap(icache_sim(dut
)))
1243 with sim
.write_vcd('test_icache.vcd'):
1246 if __name__
== '__main__':
1248 vl
= rtlil
.convert(dut
, ports
=[])
1249 with
open("test_icache.il", "w") as f
:
1253 for i
in range(512):
1254 mem
.append((i
*2)|
((i
*2+1)<<32))