big_endian : std_ulogic;
stop_mark: std_ulogic;
sequential: std_ulogic;
+ predicted : std_ulogic;
nia: std_ulogic_vector(63 downto 0);
end record;
nia: std_ulogic_vector(63 downto 0);
insn: std_ulogic_vector(31 downto 0);
big_endian: std_ulogic;
+ next_predicted: std_ulogic;
end record;
type Decode1ToDecode2Type is record
big_endian: std_ulogic;
mode_32bit: std_ulogic;
redirect_nia: std_ulogic_vector(63 downto 0);
+ br_nia : std_ulogic_vector(63 downto 0);
+ br_last : std_ulogic;
+ br_taken : std_ulogic;
end record;
constant Execute1ToFetch1Init : Execute1ToFetch1Type := (redirect => '0', virt_mode => '0',
priv_mode => '0', big_endian => '0',
- mode_32bit => '0', others => (others => '0'));
+ mode_32bit => '0', br_taken => '0',
+ br_last => '0', others => (others => '0'));
type Execute1ToLoadstore1Type is record
valid : std_ulogic;
DISABLE_FLATTEN : boolean := false;
EX1_BYPASS : boolean := true;
HAS_FPU : boolean := true;
+ HAS_BTC : boolean := true;
ALT_RESET_ADDRESS : std_ulogic_vector(63 downto 0) := (others => '0');
LOG_LENGTH : natural := 512
);
fetch1_0: entity work.fetch1
generic map (
RESET_ADDRESS => (others => '0'),
- ALT_RESET_ADDRESS => ALT_RESET_ADDRESS
+ ALT_RESET_ADDRESS => ALT_RESET_ADDRESS,
+ HAS_BTC => HAS_BTC
)
port map (
clk => clk,
alt_reset_in => alt_reset_d,
stall_in => fetch1_stall_in,
flush_in => fetch1_flush,
+ inval_btc => ex1_icache_inval or mmu_to_icache.tlbie,
stop_in => dbg_core_stop,
d_in => decode1_to_fetch1,
e_in => execute1_to_fetch1,
bv.br_nia := (others => '0');
end if;
bv.br_offset := br_offset;
- bv.predict := v.br_pred and f_in.valid and not flush_in and not busy_out;
+ if f_in.next_predicted = '1' then
+ v.br_pred := '1';
+ end if;
+ bv.predict := v.br_pred and f_in.valid and not flush_in and not busy_out and not f_in.next_predicted;
-- after a clock edge...
br_target := std_ulogic_vector(signed(br.br_nia) + br.br_offset);
last_nia : std_ulogic_vector(63 downto 0);
redirect : std_ulogic;
abs_br : std_ulogic;
+ taken_br : std_ulogic;
+ br_last : std_ulogic;
do_intr : std_ulogic;
vector : integer range 0 to 16#fff#;
br_offset : std_ulogic_vector(63 downto 0);
fp_exception_next => '0', trace_next => '0', prev_op => OP_ILLEGAL,
mul_in_progress => '0', mul_finish => '0', div_in_progress => '0', cntz_in_progress => '0',
next_lr => (others => '0'), last_nia => (others => '0'),
- redirect => '0', abs_br => '0', do_intr => '0', vector => 0,
+ redirect => '0', abs_br => '0', taken_br => '0', br_last => '0', do_intr => '0', vector => 0,
br_offset => (others => '0'), redir_mode => "0000",
others => (others => '0'));
variable trapval : std_ulogic_vector(4 downto 0);
variable illegal : std_ulogic;
variable is_branch : std_ulogic;
+ variable is_direct_branch : std_ulogic;
variable taken_branch : std_ulogic;
variable abs_branch : std_ulogic;
variable spr_val : std_ulogic_vector(63 downto 0);
sum_with_carry := (others => '0');
newcrf := (others => '0');
is_branch := '0';
+ is_direct_branch := '0';
taken_branch := '0';
abs_branch := '0';
hold_wr_data := '0';
v.br_offset := (others => '0');
v.redir_mode := ctrl.msr(MSR_IR) & not ctrl.msr(MSR_PR) &
not ctrl.msr(MSR_LE) & not ctrl.msr(MSR_SF);
+ v.taken_br := '0';
+ v.br_last := '0';
lv := Execute1ToLoadstore1Init;
fv := Execute1ToFPUInit;
when OP_B =>
is_branch := '1';
taken_branch := '1';
+ is_direct_branch := '1';
abs_branch := insn_aa(e_in.insn);
if ctrl.msr(MSR_BE) = '1' then
do_trace := '1';
bo := insn_bo(e_in.insn);
bi := insn_bi(e_in.insn);
is_branch := '1';
+ is_direct_branch := '1';
taken_branch := ppc_bc_taken(bo, bi, cr_in, a_in);
abs_branch := insn_aa(e_in.insn);
if ctrl.msr(MSR_BE) = '1' then
if taken_branch = '1' then
ctrl_tmp.cfar <= e_in.nia;
end if;
- if e_in.br_pred = '0' then
+ if taken_branch = '1' then
v.br_offset := b_in;
v.abs_br := abs_branch;
else
if taken_branch /= e_in.br_pred then
v.redirect := '1';
end if;
+ v.br_last := is_direct_branch;
+ v.taken_br := taken_branch;
end if;
elsif valid_in = '1' and exception = '0' and illegal = '0' then
-- Outputs to fetch1
f.redirect := r.redirect;
+ f.br_nia := r.last_nia;
+ f.br_last := r.br_last and not r.do_intr;
+ f.br_taken := r.taken_br;
if r.do_intr = '1' then
f.redirect_nia := std_ulogic_vector(to_unsigned(r.vector, 64));
f.virt_mode := '0';
entity fetch1 is
generic(
RESET_ADDRESS : std_logic_vector(63 downto 0) := (others => '0');
- ALT_RESET_ADDRESS : std_logic_vector(63 downto 0) := (others => '0')
+ ALT_RESET_ADDRESS : std_logic_vector(63 downto 0) := (others => '0');
+ HAS_BTC : boolean := true
);
port(
clk : in std_ulogic;
-- Control inputs:
stall_in : in std_ulogic;
flush_in : in std_ulogic;
+ inval_btc : in std_ulogic;
stop_in : in std_ulogic;
alt_reset_in : in std_ulogic;
architecture behaviour of fetch1 is
type reg_internal_t is record
mode_32bit: std_ulogic;
+ rd_is_niap4: std_ulogic;
+ predicted: std_ulogic;
+ predicted_nia: std_ulogic_vector(63 downto 0);
end record;
signal r, r_next : Fetch1ToIcacheType;
signal r_int, r_next_int : reg_internal_t;
+ signal advance_nia : std_ulogic;
signal log_nia : std_ulogic_vector(42 downto 0);
+
+ constant BTC_ADDR_BITS : integer := 10;
+ constant BTC_TAG_BITS : integer := 62 - BTC_ADDR_BITS;
+ constant BTC_TARGET_BITS : integer := 62;
+ constant BTC_SIZE : integer := 2 ** BTC_ADDR_BITS;
+ constant BTC_WIDTH : integer := BTC_TAG_BITS + BTC_TARGET_BITS;
+ type btc_mem_type is array (0 to BTC_SIZE - 1) of std_ulogic_vector(BTC_WIDTH - 1 downto 0);
+
+ signal btc_rd_data : std_ulogic_vector(BTC_WIDTH - 1 downto 0) := (others => '0');
+ signal btc_rd_valid : std_ulogic := '0';
+
begin
regs : process(clk)
" R:" & std_ulogic'image(e_in.redirect) & std_ulogic'image(d_in.redirect) &
" S:" & std_ulogic'image(stall_in) &
" T:" & std_ulogic'image(stop_in) &
- " nia:" & to_hstring(r_next.nia) &
- " SM:" & std_ulogic'image(r_next.stop_mark);
+ " nia:" & to_hstring(r_next.nia);
end if;
- r <= r_next;
- r_int <= r_next_int;
+ if rst = '1' or e_in.redirect = '1' or d_in.redirect = '1' or stall_in = '0' then
+ r.virt_mode <= r_next.virt_mode;
+ r.priv_mode <= r_next.priv_mode;
+ r.big_endian <= r_next.big_endian;
+ r_int.mode_32bit <= r_next_int.mode_32bit;
+ end if;
+ if advance_nia = '1' then
+ r.predicted <= r_next.predicted;
+ r.nia <= r_next.nia;
+ r_int.predicted <= r_next_int.predicted;
+ r_int.predicted_nia <= r_next_int.predicted_nia;
+ r_int.rd_is_niap4 <= r_next.sequential;
+ end if;
+ r.sequential <= r_next.sequential and advance_nia;
+ -- always send the up-to-date stop mark and req
+ r.stop_mark <= stop_in;
+ r.req <= not rst;
end if;
end process;
log_out <= log_nia;
+ btc : if HAS_BTC generate
+ signal btc_memory : btc_mem_type;
+ attribute ram_style : string;
+ attribute ram_style of btc_memory : signal is "block";
+
+ signal btc_valids : std_ulogic_vector(BTC_SIZE - 1 downto 0);
+ attribute ram_style of btc_valids : signal is "distributed";
+
+ signal btc_wr : std_ulogic;
+ signal btc_wr_data : std_ulogic_vector(BTC_WIDTH - 1 downto 0);
+ signal btc_wr_addr : std_ulogic_vector(BTC_ADDR_BITS - 1 downto 0);
+ signal btc_wr_v : std_ulogic;
+ begin
+ btc_wr_data <= e_in.br_nia(63 downto BTC_ADDR_BITS + 2) &
+ e_in.redirect_nia(63 downto 2);
+ btc_wr_addr <= e_in.br_nia(BTC_ADDR_BITS + 1 downto 2);
+ btc_wr <= e_in.br_last;
+ btc_wr_v <= e_in.br_taken;
+
+ btc_ram : process(clk)
+ variable raddr : unsigned(BTC_ADDR_BITS - 1 downto 0);
+ begin
+ if rising_edge(clk) then
+ raddr := unsigned(r.nia(BTC_ADDR_BITS + 1 downto 2)) +
+ to_unsigned(2, BTC_ADDR_BITS);
+ if advance_nia = '1' then
+ btc_rd_data <= btc_memory(to_integer(raddr));
+ btc_rd_valid <= btc_valids(to_integer(raddr));
+ end if;
+ if btc_wr = '1' then
+ btc_memory(to_integer(unsigned(btc_wr_addr))) <= btc_wr_data;
+ end if;
+ if inval_btc = '1' or rst = '1' then
+ btc_valids <= (others => '0');
+ elsif btc_wr = '1' then
+ btc_valids(to_integer(unsigned(btc_wr_addr))) <= btc_wr_v;
+ end if;
+ end if;
+ end process;
+ end generate;
+
comb : process(all)
variable v : Fetch1ToIcacheType;
variable v_int : reg_internal_t;
v := r;
v_int := r_int;
v.sequential := '0';
+ v.predicted := '0';
+ v_int.predicted := '0';
if rst = '1' then
if alt_reset_in = '1' then
v.priv_mode := '1';
v.big_endian := '0';
v_int.mode_32bit := '0';
+ v_int.predicted_nia := (others => '0');
elsif e_in.redirect = '1' then
v.nia := e_in.redirect_nia(63 downto 2) & "00";
if e_in.mode_32bit = '1' then
if r_int.mode_32bit = '1' then
v.nia(63 downto 32) := (others => '0');
end if;
- elsif stall_in = '0' then
-
- -- If the last NIA value went down with a stop mark, it didn't get
- -- executed, and hence we shouldn't increment NIA.
- if r.stop_mark = '0' then
- if r_int.mode_32bit = '0' then
- v.nia := std_ulogic_vector(unsigned(r.nia) + 4);
- else
- v.nia := x"00000000" & std_ulogic_vector(unsigned(r.nia(31 downto 0)) + 4);
- end if;
- v.sequential := '1';
- end if;
- end if;
+ elsif r_int.predicted = '1' then
+ v.nia := r_int.predicted_nia;
+ v.predicted := '1';
+ else
+ v.sequential := '1';
+ v.nia := std_ulogic_vector(unsigned(r.nia) + 4);
+ if r_int.mode_32bit = '1' then
+ v.nia(63 downto 32) := x"00000000";
+ end if;
+ if btc_rd_valid = '1' and r_int.rd_is_niap4 = '1' and
+ btc_rd_data(BTC_WIDTH - 1 downto BTC_TARGET_BITS)
+ = v.nia(BTC_TAG_BITS + BTC_ADDR_BITS + 1 downto BTC_ADDR_BITS + 2) then
+ v_int.predicted := '1';
+ end if;
+ end if;
+ v_int.predicted_nia := btc_rd_data(BTC_TARGET_BITS - 1 downto 0) & "00";
- v.req := not rst and not stop_in;
- v.stop_mark := stop_in;
+ -- If the last NIA value went down with a stop mark, it didn't get
+ -- executed, and hence we shouldn't increment NIA.
+ advance_nia <= rst or e_in.redirect or d_in.redirect or (not r.stop_mark and not stall_in);
r_next <= v;
r_next_int <= v_int;
RESET_LOW : boolean := true;
CLK_FREQUENCY : positive := 100000000;
HAS_FPU : boolean := true;
+ HAS_BTC : boolean := true;
USE_LITEDRAM : boolean := false;
NO_BRAM : boolean := false;
DISABLE_FLATTEN_CORE : boolean := false;
SIM => false,
CLK_FREQ => CLK_FREQUENCY,
HAS_FPU => HAS_FPU,
+ HAS_BTC => HAS_BTC,
HAS_DRAM => USE_LITEDRAM,
DRAM_SIZE => 256 * 1024 * 1024,
DRAM_INIT_SIZE => PAYLOAD_SIZE,
CLK_INPUT : positive := 100000000;
CLK_FREQUENCY : positive := 100000000;
HAS_FPU : boolean := true;
+ HAS_BTC : boolean := false;
LOG_LENGTH : natural := 512;
DISABLE_FLATTEN_CORE : boolean := false;
UART_IS_16550 : boolean := true
SIM => false,
CLK_FREQ => CLK_FREQUENCY,
HAS_FPU => HAS_FPU,
+ HAS_BTC => HAS_BTC,
LOG_LENGTH => LOG_LENGTH,
DISABLE_FLATTEN_CORE => DISABLE_FLATTEN_CORE,
UART0_IS_16550 => UART_IS_16550
RESET_LOW : boolean := true;
CLK_FREQUENCY : positive := 100000000;
HAS_FPU : boolean := true;
+ HAS_BTC : boolean := true;
USE_LITEDRAM : boolean := false;
NO_BRAM : boolean := false;
DISABLE_FLATTEN_CORE : boolean := false;
SIM => false,
CLK_FREQ => CLK_FREQUENCY,
HAS_FPU => HAS_FPU,
+ HAS_BTC => HAS_BTC,
HAS_DRAM => USE_LITEDRAM,
DRAM_SIZE => 512 * 1024 * 1024,
DRAM_INIT_SIZE => PAYLOAD_SIZE,
i_out.stop_mark <= r.hit_smark;
i_out.fetch_failed <= r.fetch_failed;
i_out.big_endian <= r.big_endian;
+ i_out.next_predicted <= i_in.predicted;
-- Stall fetch1 if we have a miss on cache or TLB or a protection fault
stall_out <= not (is_hit and access_ok);
- log_length=2048
- uart_is_16550
- has_fpu
+ - has_btc
tools:
vivado: {part : xc7a100tcsg324-1}
toplevel : toplevel
- log_length=2048
- uart_is_16550
- has_fpu
+ - has_btc
tools:
vivado: {part : xc7a200tsbg484-1}
toplevel : toplevel
- log_length=2048
- uart_is_16550
- has_fpu
+ - has_btc
generate: [litedram_nexys_video]
tools:
vivado: {part : xc7a200tsbg484-1}
- uart_is_16550
- has_uart1
- has_fpu=false
+ - has_btc=false
tools:
vivado: {part : xc7a35ticsg324-1L}
toplevel : toplevel
- uart_is_16550
- has_uart1
- has_fpu=false
+ - has_btc=false
generate: [litedram_arty, liteeth_arty]
tools:
vivado: {part : xc7a35ticsg324-1L}
- uart_is_16550
- has_uart1
- has_fpu
+ - has_btc
tools:
vivado: {part : xc7a100ticsg324-1L}
toplevel : toplevel
- uart_is_16550
- has_uart1
- has_fpu
+ - has_btc
generate: [litedram_arty, liteeth_arty]
tools:
vivado: {part : xc7a100ticsg324-1L}
- log_length=512
- uart_is_16550
- has_fpu=false
+ - has_btc=false
tools:
vivado: {part : xc7a35tcpg236-1}
toplevel : toplevel
paramtype : generic
default : true
+ has_btc:
+ datatype : bool
+ description : Include a branch target cache in the core
+ paramtype : generic
+ default : true
+
disable_flatten_core:
datatype : bool
description : Prevent Vivado from flattening the main core components
CLK_FREQ : positive;
SIM : boolean;
HAS_FPU : boolean := true;
+ HAS_BTC : boolean := true;
DISABLE_FLATTEN_CORE : boolean := false;
HAS_DRAM : boolean := false;
DRAM_SIZE : integer := 0;
generic map(
SIM => SIM,
HAS_FPU => HAS_FPU,
+ HAS_BTC => HAS_BTC,
DISABLE_FLATTEN => DISABLE_FLATTEN_CORE,
ALT_RESET_ADDRESS => (23 downto 0 => '0', others => '1'),
LOG_LENGTH => LOG_LENGTH