entity icache is
generic (
+ SIM : boolean := false;
-- Line size in bytes
LINE_SIZE : positive := 64;
-- Number of lines in a set
NUM_LINES : positive := 32;
-- Number of ways
- NUM_WAYS : positive := 4
+ NUM_WAYS : positive := 4;
+ -- L1 ITLB number of entries (direct mapped)
+ TLB_SIZE : positive := 64;
+ -- L1 ITLB log_2(page_size)
+ TLB_LG_PGSZ : positive := 12;
+ -- Number of real address bits that we store
+ REAL_ADDR_BITS : positive := 56
);
port (
clk : in std_ulogic;
i_in : in Fetch1ToIcacheType;
i_out : out IcacheToFetch2Type;
+ m_in : in MmuToIcacheType;
+
stall_out : out std_ulogic;
flush_in : in std_ulogic;
constant LINE_OFF_BITS : natural := log2(LINE_SIZE);
-- ROW_OFF_BITS is the number of bits for the offset in a row
constant ROW_OFF_BITS : natural := log2(ROW_SIZE);
- -- INDEX_BITS is the number if bits to select a cache line
+ -- INDEX_BITS is the number of bits to select a cache line
constant INDEX_BITS : natural := log2(NUM_LINES);
+ -- SET_SIZE_BITS is the log base 2 of the set size
+ constant SET_SIZE_BITS : natural := LINE_OFF_BITS + INDEX_BITS;
-- TAG_BITS is the number of bits of the tag part of the address
- constant TAG_BITS : natural := 64 - LINE_OFF_BITS - INDEX_BITS;
+ constant TAG_BITS : natural := REAL_ADDR_BITS - SET_SIZE_BITS;
-- WAY_BITS is the number of bits to select a way
constant WAY_BITS : natural := log2(NUM_WAYS);
attribute ram_style : string;
attribute ram_style of cache_tags : signal is "distributed";
+ -- L1 ITLB.
+ constant TLB_BITS : natural := log2(TLB_SIZE);
+ constant TLB_EA_TAG_BITS : natural := 64 - (TLB_LG_PGSZ + TLB_BITS);
+ constant TLB_PTE_BITS : natural := 64;
+
+ subtype tlb_index_t is integer range 0 to TLB_SIZE - 1;
+ type tlb_valids_t is array(tlb_index_t) of std_ulogic;
+ subtype tlb_tag_t is std_ulogic_vector(TLB_EA_TAG_BITS - 1 downto 0);
+ type tlb_tags_t is array(tlb_index_t) of tlb_tag_t;
+ subtype tlb_pte_t is std_ulogic_vector(TLB_PTE_BITS - 1 downto 0);
+ type tlb_ptes_t is array(tlb_index_t) of tlb_pte_t;
+
+ signal itlb_valids : tlb_valids_t;
+ signal itlb_tags : tlb_tags_t;
+ signal itlb_ptes : tlb_ptes_t;
+ attribute ram_style of itlb_tags : signal is "distributed";
+ attribute ram_style of itlb_ptes : signal is "distributed";
+
+ -- Privilege bit from PTE EAA field
+ signal eaa_priv : std_ulogic;
+
-- Cache reload state machine
type state_t is (IDLE, WAIT_ACK);
store_way : way_t;
store_index : index_t;
store_row : row_t;
+
+ -- TLB miss state
+ fetch_failed : std_ulogic;
end record;
signal r : reg_internal_t;
signal req_is_miss : std_ulogic;
signal req_laddr : std_ulogic_vector(63 downto 0);
+ signal tlb_req_index : tlb_index_t;
+ signal real_addr : std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0);
+ signal ra_valid : std_ulogic;
+ signal priv_fault : std_ulogic;
+ signal access_ok : std_ulogic;
+
-- Cache RAM interface
type cache_ram_out_t is array(way_t) of cache_row_t;
signal cache_out : cache_ram_out_t;
-- Return the cache line index (tag index) for an address
function get_index(addr: std_ulogic_vector(63 downto 0)) return index_t is
begin
- return to_integer(unsigned(addr(63-TAG_BITS downto LINE_OFF_BITS)));
+ return to_integer(unsigned(addr(SET_SIZE_BITS - 1 downto LINE_OFF_BITS)));
end;
-- Return the cache row index (data memory) for an address
function get_row(addr: std_ulogic_vector(63 downto 0)) return row_t is
begin
- return to_integer(unsigned(addr(63-TAG_BITS downto ROW_OFF_BITS)));
+ return to_integer(unsigned(addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS)));
end;
-- Returns whether this is the last row of a line
end;
-- Get the tag value from the address
- function get_tag(addr: std_ulogic_vector(63 downto 0)) return cache_tag_t is
+ function get_tag(addr: std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0)) return cache_tag_t is
begin
- return addr(63 downto 64-TAG_BITS);
+ return addr(REAL_ADDR_BITS - 1 downto SET_SIZE_BITS);
end;
-- Read a tag from a tag memory row
tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS) := tag;
end;
+ -- Simple hash for direct-mapped TLB index
+ function hash_ea(addr: std_ulogic_vector(63 downto 0)) return tlb_index_t is
+ variable hash : std_ulogic_vector(TLB_BITS - 1 downto 0);
+ begin
+ hash := addr(TLB_LG_PGSZ + TLB_BITS - 1 downto TLB_LG_PGSZ)
+ xor addr(TLB_LG_PGSZ + 2 * TLB_BITS - 1 downto TLB_LG_PGSZ + TLB_BITS)
+ xor addr(TLB_LG_PGSZ + 3 * TLB_BITS - 1 downto TLB_LG_PGSZ + 2 * TLB_BITS);
+ return to_integer(unsigned(hash));
+ end;
begin
assert LINE_SIZE mod ROW_SIZE = 0;
report "geometry bits don't add up" severity FAILURE;
assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS)
report "geometry bits don't add up" severity FAILURE;
- assert (64 = TAG_BITS + INDEX_BITS + LINE_OFF_BITS)
+ assert (REAL_ADDR_BITS = TAG_BITS + INDEX_BITS + LINE_OFF_BITS)
report "geometry bits don't add up" severity FAILURE;
- assert (64 = TAG_BITS + ROW_BITS + ROW_OFF_BITS)
+ assert (REAL_ADDR_BITS = TAG_BITS + ROW_BITS + ROW_OFF_BITS)
report "geometry bits don't add up" severity FAILURE;
+ sim_debug: if SIM generate
debug: process
begin
report "ROW_SIZE = " & natural'image(ROW_SIZE);
report "WAY_BITS = " & natural'image(WAY_BITS);
wait;
end process;
+ end generate;
-- Generate a cache RAM for each way
rams: for i in 0 to NUM_WAYS-1 generate
end generate;
end generate;
+ -- TLB hit detection and real address generation
+ itlb_lookup : process(all)
+ variable pte : tlb_pte_t;
+ variable ttag : tlb_tag_t;
+ begin
+ tlb_req_index <= hash_ea(i_in.nia);
+ pte := itlb_ptes(tlb_req_index);
+ ttag := itlb_tags(tlb_req_index);
+ if i_in.virt_mode = '1' then
+ real_addr <= pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) &
+ i_in.nia(TLB_LG_PGSZ - 1 downto 0);
+ if ttag = i_in.nia(63 downto TLB_LG_PGSZ + TLB_BITS) then
+ ra_valid <= itlb_valids(tlb_req_index);
+ else
+ ra_valid <= '0';
+ end if;
+ eaa_priv <= pte(3);
+ else
+ real_addr <= i_in.nia(REAL_ADDR_BITS - 1 downto 0);
+ ra_valid <= '1';
+ eaa_priv <= '1';
+ end if;
+
+ -- no IAMR, so no KUEP support for now
+ priv_fault <= eaa_priv and not i_in.priv_mode;
+ access_ok <= ra_valid and not priv_fault;
+ end process;
+
+ -- iTLB update
+ itlb_update: process(clk)
+ variable wr_index : tlb_index_t;
+ begin
+ if rising_edge(clk) then
+ wr_index := hash_ea(m_in.addr);
+ if rst = '1' or (m_in.tlbie = '1' and m_in.doall = '1') then
+ -- clear all valid bits
+ for i in tlb_index_t loop
+ itlb_valids(i) <= '0';
+ end loop;
+ elsif m_in.tlbie = '1' then
+ -- clear entry regardless of hit or miss
+ itlb_valids(wr_index) <= '0';
+ elsif m_in.tlbld = '1' then
+ itlb_tags(wr_index) <= m_in.addr(63 downto TLB_LG_PGSZ + TLB_BITS);
+ itlb_ptes(wr_index) <= m_in.pte;
+ itlb_valids(wr_index) <= '1';
+ end if;
+ end if;
+ end process;
+
-- Cache hit detection, output to fetch2 and other misc logic
icache_comb : process(all)
variable is_hit : std_ulogic;
-- Extract line, row and tag from request
req_index <= get_index(i_in.nia);
req_row <= get_row(i_in.nia);
- req_tag <= get_tag(i_in.nia);
+ req_tag <= get_tag(real_addr);
-- Calculate address of beginning of cache line, will be
-- used for cache miss processing if needed
--
- req_laddr <= i_in.nia(63 downto LINE_OFF_BITS) &
+ req_laddr <= (63 downto REAL_ADDR_BITS => '0') &
+ real_addr(REAL_ADDR_BITS - 1 downto LINE_OFF_BITS) &
(LINE_OFF_BITS-1 downto 0 => '0');
-- Test if pending request is a hit on any way
end loop;
-- Generate the "hit" and "miss" signals for the synchronous blocks
- req_is_hit <= i_in.req and is_hit and not flush_in;
- req_is_miss <= i_in.req and not is_hit and not flush_in;
+ if i_in.req = '1' and access_ok = '1' and flush_in = '0' and rst = '0' then
+ req_is_hit <= is_hit;
+ req_is_miss <= not is_hit;
+ else
+ req_is_hit <= '0';
+ req_is_miss <= '0';
+ end if;
req_hit_way <= hit_way;
-- The way to replace on a miss
i_out.valid <= r.hit_valid;
i_out.nia <= r.hit_nia;
i_out.stop_mark <= r.hit_smark;
+ i_out.fetch_failed <= r.fetch_failed;
- -- Stall fetch1 if we have a miss
- stall_out <= not is_hit;
+ -- Stall fetch1 if we have a miss on cache or TLB or a protection fault
+ stall_out <= not (is_hit and access_ok);
-- Wishbone requests output (from the cache miss reload machine)
wishbone_out <= r.wb;
-- On a hit, latch the request for the next cycle, when the BRAM data
-- will be available on the cache_out output of the corresponding way
--
+ r.hit_valid <= req_is_hit;
+ -- Send stop marks and NIA down regardless of validity
+ r.hit_smark <= i_in.stop_mark;
+ r.hit_nia <= i_in.nia;
if req_is_hit = '1' then
r.hit_way <= req_hit_way;
- r.hit_nia <= i_in.nia;
r.hit_smark <= i_in.stop_mark;
- r.hit_valid <= '1';
report "cache hit nia:" & to_hstring(i_in.nia) &
+ " IR:" & std_ulogic'image(i_in.virt_mode) &
" SM:" & std_ulogic'image(i_in.stop_mark) &
" idx:" & integer'image(req_index) &
" tag:" & to_hstring(req_tag) &
- " way: " & integer'image(req_hit_way);
- else
- r.hit_valid <= '0';
-
- -- Send stop marks down regardless of validity
- r.hit_smark <= i_in.stop_mark;
+ " way:" & integer'image(req_hit_way) &
+ " RA:" & to_hstring(real_addr);
end if;
end if;
end process;
-- We need to read a cache line
if req_is_miss = '1' then
report "cache miss nia:" & to_hstring(i_in.nia) &
+ " IR:" & std_ulogic'image(i_in.virt_mode) &
" SM:" & std_ulogic'image(i_in.stop_mark) &
" idx:" & integer'image(req_index) &
" way:" & integer'image(replace_way) &
- " tag:" & to_hstring(req_tag);
+ " tag:" & to_hstring(req_tag) &
+ " RA:" & to_hstring(real_addr);
-- Force misses on that way while reloading that line
cache_valids(req_index)(replace_way) <= '0';
end if;
end case;
end if;
+
+ -- TLB miss and protection fault processing
+ if rst = '1' or flush_in = '1' or m_in.tlbld = '1' then
+ r.fetch_failed <= '0';
+ elsif i_in.req = '1' and access_ok = '0' then
+ r.fetch_failed <= '1';
+ end if;
end if;
end process;
end;