ispr1: gspr_index_t; -- (G)SPR used for branch condition (CTR) or mfspr
ispr2: gspr_index_t; -- (G)SPR used for branch target (CTR, LR, TAR)
decode: decode_rom_t;
+ br_pred: std_ulogic; -- Branch was predicted to be taken
+ end record;
+ constant Decode1ToDecode2Init : Decode1ToDecode2Type :=
+ (valid => '0', stop_mark => '0', nia => (others => '0'), insn => (others => '0'),
+ ispr1 => (others => '0'), ispr2 => (others => '0'), decode => decode_rom_init, br_pred => '0');
+
+ type Decode1ToFetch1Type is record
+ redirect : std_ulogic;
+ redirect_nia : std_ulogic_vector(63 downto 0);
end record;
- constant Decode1ToDecode2Init : Decode1ToDecode2Type := (valid => '0', stop_mark => '0', nia => (others => '0'), insn => (others => '0'), ispr1 => (others => '0'), ispr2 => (others => '0'), decode => decode_rom_init);
type Decode2ToExecute1Type is record
valid: std_ulogic;
sign_extend : std_ulogic; -- do we need to sign extend?
update : std_ulogic; -- is this an update instruction?
reserve : std_ulogic; -- set for larx/stcx
+ br_pred : std_ulogic;
end record;
constant Decode2ToExecute1Init : Decode2ToExecute1Type :=
(valid => '0', unit => NONE, insn_type => OP_ILLEGAL, bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0',
lr => '0', rc => '0', oe => '0', invert_a => '0',
invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0',
- is_32bit => '0', is_signed => '0', xerc => xerc_init, reserve => '0',
+ is_32bit => '0', is_signed => '0', xerc => xerc_init, reserve => '0', br_pred => '0',
byte_reverse => '0', sign_extend => '0', update => '0', nia => (others => '0'), read_data1 => (others => '0'), read_data2 => (others => '0'), read_data3 => (others => '0'), cr => (others => '0'), insn => (others => '0'), data_len => (others => '0'), others => (others => '0'));
type Execute1ToMultiplyType is record
-- decode signals
signal decode1_to_decode2: Decode1ToDecode2Type;
+ signal decode1_to_fetch1: Decode1ToFetch1Type;
signal decode2_to_execute1: Decode2ToExecute1Type;
-- register file signals
signal dcache_stall_out: std_ulogic;
signal flush: std_ulogic;
+ signal decode1_flush: std_ulogic;
+ signal fetch1_flush: std_ulogic;
signal complete: std_ulogic;
signal terminate: std_ulogic;
rst => rst_fetch1,
alt_reset_in => alt_reset_d,
stall_in => fetch1_stall_in,
- flush_in => flush,
+ flush_in => fetch1_flush,
stop_in => dbg_core_stop,
+ d_in => decode1_to_fetch1,
e_in => execute1_to_fetch1,
i_out => fetch1_to_icache,
log_out => log_data(42 downto 0)
);
fetch1_stall_in <= icache_stall_out or decode1_busy;
+ fetch1_flush <= flush or decode1_flush;
icache_0: entity work.icache
generic map(
i_in => fetch1_to_icache,
i_out => icache_to_decode1,
m_in => mmu_to_icache,
- flush_in => flush,
+ flush_in => fetch1_flush,
inval_in => dbg_icache_rst or ex1_icache_inval,
stall_in => icache_stall_in,
stall_out => icache_stall_out,
rst => rst_dec1,
stall_in => decode1_stall_in,
flush_in => flush,
+ flush_out => decode1_flush,
busy_out => decode1_busy,
f_in => icache_to_decode1,
d_out => decode1_to_decode2,
+ f_out => decode1_to_fetch1,
log_out => log_data(109 downto 97)
);
entity decode1 is
port (
- clk : in std_ulogic;
- rst : in std_ulogic;
-
- stall_in : in std_ulogic;
- flush_in : in std_ulogic;
- busy_out : out std_ulogic;
-
- f_in : in IcacheToDecode1Type;
- d_out : out Decode1ToDecode2Type;
- log_out : out std_ulogic_vector(12 downto 0)
+ clk : in std_ulogic;
+ rst : in std_ulogic;
+
+ stall_in : in std_ulogic;
+ flush_in : in std_ulogic;
+ busy_out : out std_ulogic;
+ flush_out : out std_ulogic;
+
+ f_in : in IcacheToDecode1Type;
+ f_out : out Decode1ToFetch1Type;
+ d_out : out Decode1ToDecode2Type;
+ log_out : out std_ulogic_vector(12 downto 0)
);
end entity decode1;
decode1_1: process(all)
variable v : Decode1ToDecode2Type;
+ variable f : Decode1ToFetch1Type;
variable majorop : major_opcode_t;
variable op_19_bits: std_ulogic_vector(2 downto 0);
variable sprn : spr_num_t;
+ variable br_nia : std_ulogic_vector(61 downto 0);
+ variable br_target : std_ulogic_vector(61 downto 0);
+ variable br_offset : signed(23 downto 0);
begin
- v := r;
+ v := Decode1ToDecode2Init;
v.valid := f_in.valid;
v.nia := f_in.nia;
else
v.decode := major_decode_rom_array(to_integer(majorop));
+ end if;
+ -- Branch predictor
+ -- Note bclr, bcctr and bctar are predicted not taken as we have no
+ -- count cache or link stack.
+ br_offset := (others => '0');
+ if majorop = 18 then
+ -- Unconditional branches are always taken
+ v.br_pred := '1';
+ br_offset := signed(f_in.insn(25 downto 2));
+ elsif majorop = 16 then
+ -- Predict backward branches as taken, forward as untaken
+ v.br_pred := f_in.insn(15);
+ br_offset := resize(signed(f_in.insn(15 downto 2)), 24);
+ end if;
+ br_nia := f_in.nia(63 downto 2);
+ if f_in.insn(1) = '1' then
+ br_nia := (others => '0');
end if;
+ br_target := std_ulogic_vector(signed(br_nia) + br_offset);
+ f.redirect := v.br_pred and f_in.valid and not flush_in and not s.valid;
+ f.redirect_nia := br_target & "00";
-- Update registers
rin <= v;
-- Update outputs
d_out <= r;
+ f_out <= f;
+ flush_out <= f.redirect;
end process;
dec1_log : process(clk)
v.e.sign_extend := d_in.decode.sign_extend;
v.e.update := d_in.decode.update;
v.e.reserve := d_in.decode.reserve;
+ v.e.br_pred := d_in.br_pred;
-- issue control
control_valid_in <= d_in.valid;
end if;
update_gpr_write_valid <= d_in.decode.update;
update_gpr_write_reg <= decoded_reg_a.reg;
+ if v.e.lr = '1' then
+ -- there are no instructions that have both update=1 and lr=1
+ update_gpr_write_valid <= '1';
+ update_gpr_write_reg <= fast_spr_num(SPR_LR);
+ end if;
gpr_a_read_valid <= decoded_reg_a.reg_valid;
gpr_a_read <= decoded_reg_a.reg;
variable exception_nextpc : std_ulogic;
variable trapval : std_ulogic_vector(4 downto 0);
variable illegal : std_ulogic;
+ variable is_branch : std_ulogic;
+ variable taken_branch : std_ulogic;
+ variable abs_branch : std_ulogic;
begin
result := (others => '0');
result_with_carry := (others => '0');
result_en := '0';
newcrf := (others => '0');
+ is_branch := '0';
+ taken_branch := '0';
+ abs_branch := '0';
v := r;
v.e := Execute1ToWritebackInit;
result := logical_result;
result_en := '1';
when OP_B =>
- f_out.redirect <= '1';
- if (insn_aa(e_in.insn)) then
- f_out.redirect_nia <= b_in;
- else
- f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(b_in));
- end if;
+ is_branch := '1';
+ taken_branch := '1';
+ abs_branch := insn_aa(e_in.insn);
when OP_BC =>
-- read_data1 is CTR
bo := insn_bo(e_in.insn);
result_en := '1';
v.e.write_reg := fast_spr_num(SPR_CTR);
end if;
- if ppc_bc_taken(bo, bi, e_in.cr, a_in) = 1 then
- f_out.redirect <= '1';
- if (insn_aa(e_in.insn)) then
- f_out.redirect_nia <= b_in;
- else
- f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(b_in));
- end if;
- end if;
+ is_branch := '1';
+ taken_branch := ppc_bc_taken(bo, bi, e_in.cr, a_in);
+ abs_branch := insn_aa(e_in.insn);
when OP_BCREG =>
-- read_data1 is CTR
-- read_data2 is target register (CTR, LR or TAR)
result_en := '1';
v.e.write_reg := fast_spr_num(SPR_CTR);
end if;
- if ppc_bc_taken(bo, bi, e_in.cr, a_in) = 1 then
+ if ppc_bc_taken(bo, bi, e_in.cr, a_in) = '1' then
f_out.redirect <= '1';
f_out.redirect_nia <= b_in(63 downto 2) & "00";
end if;
v.e.rc := e_in.rc and valid_in;
+ -- Mispredicted branches cause a redirect
+ if is_branch = '1' and taken_branch /= e_in.br_pred then
+ f_out.redirect <= '1';
+ if taken_branch = '1' then
+ if abs_branch = '1' then
+ f_out.redirect_nia <= b_in;
+ else
+ f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(b_in));
+ end if;
+ else
+ f_out.redirect_nia <= next_nia;
+ end if;
+ end if;
+
-- Update LR on the next cycle after a branch link
- --
- -- WARNING: The LR update isn't tracked by our hazard tracker. This
- -- will work (well I hope) because it only happens on branches
- -- which will flush all decoded instructions. By the time
- -- fetch catches up, we'll have the new LR. This will
- -- *not* work properly however if we have a branch predictor,
- -- in which case the solution would probably be to keep a
- -- local cache of the updated LR in execute1 (flushed on
- -- exceptions) that is used instead of the value from
- -- decode when its content is valid.
+ -- If we're not writing back anything else, we can write back LR
+ -- this cycle, otherwise we take an extra cycle.
if e_in.lr = '1' then
- v.lr_update := '1';
- v.next_lr := next_nia;
- v.e.valid := '0';
- report "Delayed LR update to " & to_hstring(next_nia);
- v.busy := '1';
+ if result_en = '0' then
+ result_en := '1';
+ result := next_nia;
+ v.e.write_reg := fast_spr_num(SPR_LR);
+ else
+ v.lr_update := '1';
+ v.next_lr := next_nia;
+ v.e.valid := '0';
+ report "Delayed LR update to " & to_hstring(next_nia);
+ v.busy := '1';
+ end if;
end if;
elsif valid_in = '1' then
-- redirect from execution unit
e_in : in Execute1ToFetch1Type;
+ -- redirect from decode1
+ d_in : in Decode1ToFetch1Type;
+
-- Request to icache
i_out : out Fetch1ToIcacheType;
report "fetch1 rst:" & std_ulogic'image(rst) &
" IR:" & std_ulogic'image(e_in.virt_mode) &
" P:" & std_ulogic'image(e_in.priv_mode) &
- " R:" & std_ulogic'image(e_in.redirect) &
+ " R:" & std_ulogic'image(e_in.redirect) & std_ulogic'image(d_in.redirect) &
" S:" & std_ulogic'image(stall_in) &
" T:" & std_ulogic'image(stop_in) &
" nia:" & to_hstring(r_next.nia) &
v.nia := e_in.redirect_nia;
v.virt_mode := e_in.virt_mode;
v.priv_mode := e_in.priv_mode;
+ elsif d_in.redirect = '1' then
+ v.nia := d_in.redirect_nia;
elsif stall_in = '0' then
-- For debug stop/step to work properly we need a little bit of
function ppc_divd (ra, rb: std_ulogic_vector(63 downto 0)) return std_ulogic_vector;
function ppc_divwu (ra, rb: std_ulogic_vector(63 downto 0)) return std_ulogic_vector;
- function ppc_bc_taken(bo, bi: std_ulogic_vector(4 downto 0); cr: std_ulogic_vector(31 downto 0); ctr: std_ulogic_vector(63 downto 0)) return integer;
+ function ppc_bc_taken(bo, bi: std_ulogic_vector(4 downto 0); cr: std_ulogic_vector(31 downto 0); ctr: std_ulogic_vector(63 downto 0)) return std_ulogic;
end package ppc_fx_insns;
package body ppc_fx_insns is
return std_ulogic_vector(resize(tmp, ra'length));
end;
- function ppc_bc_taken(bo, bi: std_ulogic_vector(4 downto 0); cr: std_ulogic_vector(31 downto 0); ctr: std_ulogic_vector(63 downto 0)) return integer is
+ function ppc_bc_taken(bo, bi: std_ulogic_vector(4 downto 0); cr: std_ulogic_vector(31 downto 0); ctr: std_ulogic_vector(63 downto 0)) return std_ulogic is
variable crfield: integer;
variable crbit_match: std_ulogic;
variable ctr_not_zero: std_ulogic;
variable ctr_ok: std_ulogic;
variable cond_ok: std_ulogic;
- variable ret: integer;
begin
crfield := to_integer(unsigned(bi));
-- BE bit numbering
ctr_not_zero := '1' when ctr /= x"0000000000000001" else '0';
ctr_ok := bo(4-2) or (ctr_not_zero xor bo(4-3));
cond_ok := bo(4-0) or crbit_match;
- if ctr_ok = '1' and cond_ok = '1' then
- ret := 1;
- else
- ret := 0;
- end if;
- return ret;
+ return ctr_ok and cond_ok;
end;
end package body ppc_fx_insns;