others => (others => '0'));
type Loadstore1ToExecute1Type is record
+ busy : std_ulogic;
exception : std_ulogic;
invalid : std_ulogic;
perm_error : std_ulogic;
complete_in : in std_ulogic;
valid_in : in std_ulogic;
flush_in : in std_ulogic;
- stall_in : in std_ulogic;
+ busy_in : in std_ulogic;
+ deferred : in std_ulogic;
sgl_pipe_in : in std_ulogic;
stop_mark_in : in std_ulogic;
gpr_write_in : in gspr_index_t;
gpr_bypassable : in std_ulogic;
+ update_gpr_write_valid : in std_ulogic;
+ update_gpr_write_reg : in gspr_index_t;
gpr_a_read_valid_in : in std_ulogic;
gpr_a_read_in : in gspr_index_t;
port map (
clk => clk,
- stall_in => stall_in,
+ busy_in => busy_in,
+ deferred => deferred,
+ complete_in => complete_in,
+ flush_in => flush_in,
+ issuing => valid_out,
gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write_in,
gpr_read_valid_in => gpr_a_read_valid_in,
gpr_read_in => gpr_a_read_in,
+ ugpr_write_valid => update_gpr_write_valid,
+ ugpr_write_reg => update_gpr_write_reg,
stall_out => stall_a_out,
use_bypass => gpr_bypass_a
port map (
clk => clk,
- stall_in => stall_in,
+ busy_in => busy_in,
+ deferred => deferred,
+ complete_in => complete_in,
+ flush_in => flush_in,
+ issuing => valid_out,
gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write_in,
gpr_read_valid_in => gpr_b_read_valid_in,
gpr_read_in => gpr_b_read_in,
+ ugpr_write_valid => update_gpr_write_valid,
+ ugpr_write_reg => update_gpr_write_reg,
stall_out => stall_b_out,
use_bypass => gpr_bypass_b
port map (
clk => clk,
- stall_in => stall_in,
+ busy_in => busy_in,
+ deferred => deferred,
+ complete_in => complete_in,
+ flush_in => flush_in,
+ issuing => valid_out,
gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write_in,
gpr_read_valid_in => gpr_c_read_valid_in,
gpr_read_in => gpr_c_read_in_fmt,
+ ugpr_write_valid => update_gpr_write_valid,
+ ugpr_write_reg => update_gpr_write_reg,
stall_out => stall_c_out,
use_bypass => gpr_bypass_c
port map (
clk => clk,
- stall_in => stall_in,
+ busy_in => busy_in,
+ deferred => deferred,
+ complete_in => complete_in,
+ flush_in => flush_in,
+ issuing => valid_out,
cr_read_in => cr_read_in,
cr_write_in => cr_write_valid,
control0: process(clk)
if rising_edge(clk) then
- assert r_int.outstanding >= 0 and r_int.outstanding <= (PIPELINE_DEPTH+1) report "Outstanding bad " & integer'image(r_int.outstanding) severity failure;
+ assert rin_int.outstanding >= 0 and rin_int.outstanding <= (PIPELINE_DEPTH+1)
+ report "Outstanding bad " & integer'image(rin_int.outstanding) severity failure;
r_int <= rin_int;
end if;
end process;
v_int := r_int;
-- asynchronous
- valid_tmp := valid_in and not flush_in and not stall_in;
- stall_tmp := stall_in;
+ valid_tmp := valid_in and not flush_in;
+ stall_tmp := '0';
- if complete_in = '1' then
+ if flush_in = '1' then
+ -- expect to see complete_in next cycle
+ v_int.outstanding := 1;
+ elsif complete_in = '1' then
v_int.outstanding := r_int.outstanding - 1;
end if;
if rst = '1' then
- v_int.state := IDLE;
- v_int.outstanding := 0;
- stall_tmp := '0';
+ v_int := reg_internal_init;
valid_tmp := '0';
end if;
end if;
if valid_tmp = '1' then
- v_int.outstanding := v_int.outstanding + 1;
+ if deferred = '0' then
+ v_int.outstanding := v_int.outstanding + 1;
+ end if;
gpr_write_valid <= gpr_write_valid_in;
cr_write_valid <= cr_write_in;
-- update outputs
valid_out <= valid_tmp;
- stall_out <= stall_tmp;
+ stall_out <= stall_tmp or deferred;
-- update registers
rin_int <= v_int;
signal icache_stall_out : std_ulogic;
signal icache_stall_in : std_ulogic;
signal decode1_stall_in : std_ulogic;
- signal decode2_stall_in : std_ulogic;
+ signal decode2_busy_in : std_ulogic;
signal decode2_stall_out : std_ulogic;
signal ex1_icache_inval: std_ulogic;
- signal ex1_stall_out: std_ulogic;
- signal ls1_stall_out: std_ulogic;
+ signal ex1_busy_out: std_ulogic;
signal dcache_stall_out: std_ulogic;
signal flush: std_ulogic;
port map (
clk => clk,
rst => rst_dec2,
- stall_in => decode2_stall_in,
+ busy_in => decode2_busy_in,
stall_out => decode2_stall_out,
flush_in => flush,
complete_in => complete,
c_out => decode2_to_cr_file,
log_out => log_data(119 downto 110)
- decode2_stall_in <= ex1_stall_out or ls1_stall_out;
+ decode2_busy_in <= ex1_busy_out;
register_file_0: entity work.register_file
generic map (
clk => clk,
rst => rst_ex1,
flush_out => flush,
- stall_out => ex1_stall_out,
+ busy_out => ex1_busy_out,
e_in => decode2_to_execute1,
l_in => loadstore1_to_execute1,
ext_irq_in => ext_irq,
m_out => loadstore1_to_mmu,
m_in => mmu_to_loadstore1,
dc_stall => dcache_stall_out,
- stall_out => ls1_stall_out,
log_out => log_data(149 downto 140)
entity cr_hazard is
generic (
- PIPELINE_DEPTH : natural := 2
+ PIPELINE_DEPTH : natural := 1
clk : in std_ulogic;
- stall_in : in std_ulogic;
+ busy_in : in std_ulogic;
+ deferred : in std_ulogic;
+ complete_in : in std_ulogic;
+ flush_in : in std_ulogic;
+ issuing : in std_ulogic;
cr_read_in : in std_ulogic;
cr_write_in : in std_ulogic;
end record;
constant pipeline_entry_init : pipeline_entry_type := (valid => '0');
- type pipeline_t is array(0 to PIPELINE_DEPTH-1) of pipeline_entry_type;
+ type pipeline_t is array(0 to PIPELINE_DEPTH) of pipeline_entry_type;
constant pipeline_t_init : pipeline_t := (others => pipeline_entry_init);
signal r, rin : pipeline_t := pipeline_t_init;
cr_hazard0: process(clk)
if rising_edge(clk) then
- if stall_in = '0' then
- r <= rin;
- end if;
+ r <= rin;
end if;
end process;
v := r;
- stall_out <= '0';
- loop_0: for i in 0 to PIPELINE_DEPTH-1 loop
- if (r(i).valid = cr_read_in) then
- stall_out <= '1';
- end if;
- end loop;
- v(0).valid := cr_write_in;
- loop_1: for i in 0 to PIPELINE_DEPTH-2 loop
- -- propagate to next slot
- v(i+1) := r(i);
- end loop;
+ -- XXX assumes PIPELINE_DEPTH = 1
+ if complete_in = '1' then
+ v(1).valid := '0';
+ end if;
+ stall_out <= cr_read_in and (v(0).valid or v(1).valid);
- -- asynchronous output
- if cr_read_in = '0' then
- stall_out <= '0';
+ -- XXX assumes PIPELINE_DEPTH = 1
+ if busy_in = '0' then
+ v(1) := r(0);
+ v(0).valid := '0';
+ end if;
+ if deferred = '0' and issuing = '1' then
+ v(0).valid := cr_write_in;
+ end if;
+ if flush_in = '1' then
+ v(0).valid := '0';
+ v(1).valid := '0';
end if;
-- update registers
rst : in std_ulogic;
complete_in : in std_ulogic;
- stall_in : in std_ulogic;
+ busy_in : in std_ulogic;
stall_out : out std_ulogic;
stopped_out : out std_ulogic;
signal r, rin : reg_type;
+ signal deferred : std_ulogic;
signal log_data : std_ulogic_vector(9 downto 0);
type decode_input_reg_t is record
signal gpr_write : gspr_index_t;
signal gpr_bypassable : std_ulogic;
+ signal update_gpr_write_valid : std_ulogic;
+ signal update_gpr_write_reg : gspr_index_t;
signal gpr_a_read_valid : std_ulogic;
signal gpr_a_read :gspr_index_t;
signal gpr_a_bypass : std_ulogic;
complete_in => complete_in,
valid_in => control_valid_in,
- stall_in => stall_in,
+ busy_in => busy_in,
+ deferred => deferred,
flush_in => flush_in,
sgl_pipe_in => control_sgl_pipe,
stop_mark_in => d_in.stop_mark,
gpr_write_in => gpr_write,
gpr_bypassable => gpr_bypassable,
+ update_gpr_write_valid => update_gpr_write_valid,
+ update_gpr_write_reg => update_gpr_write_reg,
gpr_a_read_valid_in => gpr_a_read_valid,
gpr_a_read_in => gpr_a_read,
gpr_bypass_c => gpr_c_bypass
+ deferred <= r.e.valid and busy_in;
decode2_0: process(clk)
if rising_edge(clk) then
- if rin.e.valid = '1' then
- report "execute " & to_hstring(rin.e.nia);
+ if rst = '1' or flush_in = '1' or deferred = '0' then
+ if rin.e.valid = '1' then
+ report "execute " & to_hstring(rin.e.nia);
+ end if;
+ r <= rin;
end if;
- r <= rin;
end if;
end process;
if EX1_BYPASS and d_in.decode.unit = ALU then
gpr_bypassable <= '1';
end if;
+ update_gpr_write_valid <= d_in.decode.update;
+ update_gpr_write_reg <= decoded_reg_a.reg;
gpr_a_read_valid <= decoded_reg_a.reg_valid;
gpr_a_read <= decoded_reg_a.reg;
v.e.insn_type := OP_ILLEGAL;
end if;
- if rst = '1' then
+ if rst = '1' or flush_in = '1' then
v.e := Decode2ToExecute1Init;
end if;
-- asynchronous
flush_out : out std_ulogic;
- stall_out : out std_ulogic;
+ busy_out : out std_ulogic;
e_in : in Decode2ToExecute1Type;
l_in : in Loadstore1ToExecute1Type;
architecture behaviour of execute1 is
type reg_type is record
e : Execute1ToWritebackType;
+ busy: std_ulogic;
+ terminate: std_ulogic;
lr_update : std_ulogic;
next_lr : std_ulogic_vector(63 downto 0);
mul_in_progress : std_ulogic;
log_addr_spr : std_ulogic_vector(31 downto 0);
end record;
constant reg_type_init : reg_type :=
- (e => Execute1ToWritebackInit, lr_update => '0',
+ (e => Execute1ToWritebackInit, busy => '0', lr_update => '0', terminate => '0',
mul_in_progress => '0', div_in_progress => '0', cntz_in_progress => '0',
slow_op_insn => OP_ILLEGAL, slow_op_rc => '0', slow_op_oe => '0', slow_op_xerc => xerc_init,
next_lr => (others => '0'), ldst_nia => (others => '0'), others => (others => '0'));
signal a_in, b_in, c_in : std_ulogic_vector(63 downto 0);
+ signal valid_in : std_ulogic;
signal ctrl: ctrl_t := (irq_state => WRITE_SRR0, others => (others => '0'));
signal ctrl_tmp: ctrl_t := (irq_state => WRITE_SRR0, others => (others => '0'));
signal right_shift, rot_clear_left, rot_clear_right: std_ulogic;
b_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data2 = '1' else e_in.read_data2;
c_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data3 = '1' else e_in.read_data3;
+ busy_out <= l_in.busy or r.busy;
+ valid_in <= e_in.valid and not busy_out;
+ terminate_out <= r.terminate;
execute1_0: process(clk)
if rising_edge(clk) then
r <= rin;
ctrl <= ctrl_tmp;
- assert not (r.lr_update = '1' and e_in.valid = '1')
+ assert not (r.lr_update = '1' and valid_in = '1')
report "LR update collision with valid in EX1"
severity failure;
if r.lr_update = '1' then
end if;
end if;
- terminate_out <= '0';
+ v.terminate := '0';
icache_inval <= '0';
- stall_out <= '0';
+ v.busy := '0';
f_out <= Execute1ToFetch1TypeInit;
-- send MSR[IR] and ~MSR[PR] up to fetch1
f_out.virt_mode <= ctrl.msr(MSR_IR);
f_out.virt_mode <= '0';
f_out.priv_mode <= '1';
f_out.redirect_nia <= ctrl.irq_nia;
- v.e.valid := e_in.valid;
+ v.e.valid := '1';
report "Writing SRR1: " & to_hstring(ctrl.srr1);
- elsif irq_valid = '1' and e_in.valid = '1' then
+ elsif irq_valid = '1' and valid_in = '1' then
-- we need two cycles to write srr0 and 1
-- will need more when we have to write HEIR
-- Don't deliver the interrupt until we have a valid instruction
exception := '1';
ctrl_tmp.srr1 <= msr_copy(ctrl.msr);
- elsif e_in.valid = '1' and ctrl.msr(MSR_PR) = '1' and
+ elsif valid_in = '1' and ctrl.msr(MSR_PR) = '1' and
instr_is_privileged(e_in.insn_type, e_in.insn) then
-- generate a program interrupt
exception := '1';
ctrl_tmp.srr1(63 - 45) <= '1';
report "privileged instruction";
- elsif e_in.valid = '1' and e_in.unit = ALU then
+ elsif valid_in = '1' and e_in.unit = ALU then
report "execute nia " & to_hstring(e_in.nia);
-- check bits 1-10 of the instruction to make sure it's attn
-- if not then it is illegal
if e_in.insn(10 downto 1) = "0100000000" then
- terminate_out <= '1';
+ v.terminate := '1';
report "ATTN";
illegal := '1';
when OP_CNTZ =>
v.e.valid := '0';
v.cntz_in_progress := '1';
- stall_out <= '1';
+ v.busy := '1';
when OP_EXTS =>
-- note data_len is a 1-hot encoding
negative := (e_in.data_len(0) and c_in(7)) or
when OP_MUL_L64 | OP_MUL_H64 | OP_MUL_H32 =>
v.e.valid := '0';
v.mul_in_progress := '1';
- stall_out <= '1';
+ v.busy := '1';
x_to_multiply.valid <= '1';
when OP_DIV | OP_DIVE | OP_MOD =>
v.e.valid := '0';
v.div_in_progress := '1';
- stall_out <= '1';
+ v.busy := '1';
x_to_divider.valid <= '1';
when others =>
- terminate_out <= '1';
+ v.terminate := '1';
report "illegal";
end case;
- v.e.rc := e_in.rc and e_in.valid;
+ v.e.rc := e_in.rc and valid_in;
-- Update LR on the next cycle after a branch link
v.next_lr := next_nia;
v.e.valid := '0';
report "Delayed LR update to " & to_hstring(next_nia);
- stall_out <= '1';
+ v.busy := '1';
end if;
- elsif e_in.valid = '1' then
+ elsif valid_in = '1' then
-- instruction for other units, i.e. LDST
v.ldst_nia := e_in.nia;
v.e.valid := '0';
end if;
v.e.valid := '1';
- stall_out <= '1';
+ v.busy := '1';
v.mul_in_progress := r.mul_in_progress;
v.div_in_progress := r.div_in_progress;
end if;
v.e.exc_write_data := next_nia;
end if;
ctrl_tmp.irq_state <= WRITE_SRR1;
- v.e.valid := '1';
+ v.busy := '1';
+ v.e.valid := '0';
end if;
v.e.write_data := result;
v.e.exc_write_data := r.ldst_nia;
report "ldst exception writing srr0=" & to_hstring(r.ldst_nia);
ctrl_tmp.irq_state <= WRITE_SRR1;
- v.e.valid := '1'; -- complete the original load or store
end if;
-- Outputs to loadstore1 (async)
r.e.write_enable &
r.e.valid &
f_out.redirect &
- stall_out &
+ r.busy &
end if;
end process;
entity gpr_hazard is
generic (
- PIPELINE_DEPTH : natural := 2
+ PIPELINE_DEPTH : natural := 1
clk : in std_ulogic;
- stall_in : in std_ulogic;
+ busy_in : in std_ulogic;
+ deferred : in std_ulogic;
+ complete_in : in std_ulogic;
+ flush_in : in std_ulogic;
+ issuing : in std_ulogic;
gpr_write_valid_in : in std_ulogic;
gpr_write_in : in std_ulogic_vector(5 downto 0);
gpr_read_valid_in : in std_ulogic;
gpr_read_in : in std_ulogic_vector(5 downto 0);
+ ugpr_write_valid : in std_ulogic;
+ ugpr_write_reg : in std_ulogic_vector(5 downto 0);
stall_out : out std_ulogic;
use_bypass : out std_ulogic
valid : std_ulogic;
bypass : std_ulogic;
gpr : std_ulogic_vector(5 downto 0);
+ ugpr_valid : std_ulogic;
+ ugpr : std_ulogic_vector(5 downto 0);
end record;
- constant pipeline_entry_init : pipeline_entry_type := (valid => '0', bypass => '0', gpr => (others => '0'));
+ constant pipeline_entry_init : pipeline_entry_type := (valid => '0', bypass => '0', gpr => (others => '0'),
+ ugpr_valid => '0', ugpr => (others => '0'));
- type pipeline_t is array(0 to PIPELINE_DEPTH-1) of pipeline_entry_type;
+ type pipeline_t is array(0 to PIPELINE_DEPTH) of pipeline_entry_type;
constant pipeline_t_init : pipeline_t := (others => pipeline_entry_init);
signal r, rin : pipeline_t := pipeline_t_init;
v := r;
+ if complete_in = '1' then
+ v(PIPELINE_DEPTH).valid := '0';
+ v(PIPELINE_DEPTH).ugpr_valid := '0';
+ end if;
stall_out <= '0';
use_bypass <= '0';
if gpr_read_valid_in = '1' then
- if r(0).valid = '1' and r(0).gpr = gpr_read_in then
- if r(0).bypass = '1' and stall_in = '0' then
- use_bypass <= '1';
- else
- stall_out <= '1';
- end if;
- end if;
- loop_0: for i in 1 to PIPELINE_DEPTH-1 loop
- if r(i).valid = '1' and r(i).gpr = gpr_read_in then
+ loop_0: for i in 0 to PIPELINE_DEPTH loop
+ if v(i).valid = '1' and r(i).gpr = gpr_read_in then
if r(i).bypass = '1' then
use_bypass <= '1';
stall_out <= '1';
end if;
end if;
+ if v(i).ugpr_valid = '1' and r(i).ugpr = gpr_read_in then
+ stall_out <= '1';
+ end if;
end loop;
end if;
- if stall_in = '0' then
+ -- XXX assumes PIPELINE_DEPTH = 1
+ if busy_in = '0' then
+ v(1) := v(0);
+ v(0).valid := '0';
+ v(0).ugpr_valid := '0';
+ end if;
+ if deferred = '0' and issuing = '1' then
v(0).valid := gpr_write_valid_in;
v(0).bypass := bypass_avail;
v(0).gpr := gpr_write_in;
- loop_1: for i in 1 to PIPELINE_DEPTH-1 loop
- -- propagate to next slot
- v(i).valid := r(i-1).valid;
- v(i).bypass := r(i-1).bypass;
- v(i).gpr := r(i-1).gpr;
- end loop;
- else
- -- stage 0 stalled, so stage 1 becomes empty
- loop_1b: for i in 1 to PIPELINE_DEPTH-1 loop
- -- propagate to next slot
- if i = 1 then
- v(i).valid := '0';
- else
- v(i).valid := r(i-1).valid;
- v(i).bypass := r(i-1).bypass;
- v(i).gpr := r(i-1).gpr;
- end if;
- end loop;
+ v(0).ugpr_valid := ugpr_write_valid;
+ v(0).ugpr := ugpr_write_reg;
+ end if;
+ if flush_in = '1' then
+ v(0).valid := '0';
+ v(0).ugpr_valid := '0';
+ v(1).valid := '0';
+ v(1).ugpr_valid := '0';
end if;
-- update registers
m_in : in MmuToLoadstore1Type;
dc_stall : in std_ulogic;
- stall_out : out std_ulogic;
log_out : out std_ulogic_vector(9 downto 0)
type reg_stage_t is record
+ busy : std_ulogic;
-- latch most of the input request
load : std_ulogic;
tlbie : std_ulogic;
if rising_edge(clk) then
if rst = '1' then
r.state <= IDLE;
+ r.busy <= '0';
r <= rin;
end if;
l_out.store_done <= d_in.store_done;
-- update exception info back to execute1
+ e_out.busy <= r.busy;
e_out.exception <= exception;
e_out.instr_fault <= r.instr_fault;
e_out.invalid <= m_in.invalid;
end if;
end if;
- stall_out <= stall;
+ v.busy := stall;
-- Update registers
rin <= v;
ls1_log: process(clk)
if rising_edge(clk) then
- log_data <= stall_out &
+ log_data <= r.busy &
e_out.exception &
l_out.valid &
m_out.valid &
architecture behaviour of writeback is
- writeback_1: process(all)
+ writeback_0: process(clk)
variable x : std_ulogic_vector(0 downto 0);
variable y : std_ulogic_vector(0 downto 0);
variable w : std_ulogic_vector(0 downto 0);
+ begin
+ if rising_edge(clk) then
+ -- Do consistency checks only on the clock edge
+ x(0) := e_in.valid;
+ y(0) := l_in.valid;
+ assert (to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure;
+ x(0) := e_in.write_enable or e_in.exc_write_enable;
+ y(0) := l_in.write_enable;
+ assert (to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure;
+ w(0) := e_in.write_cr_enable;
+ x(0) := (e_in.write_enable and e_in.rc);
+ assert (to_integer(unsigned(w)) + to_integer(unsigned(x))) <= 1 severity failure;
+ end if;
+ end process;
+ writeback_1: process(all)
variable cf: std_ulogic_vector(3 downto 0);
variable zero : std_ulogic;
variable sign : std_ulogic;
variable scf : std_ulogic_vector(3 downto 0);
- x(0) := e_in.valid;
- y(0) := l_in.valid;
- assert (to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure;
- x(0) := e_in.write_enable or e_in.exc_write_enable;
- y(0) := l_in.write_enable;
- assert (to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure;
- w(0) := e_in.write_cr_enable;
- x(0) := (e_in.write_enable and e_in.rc);
- assert (to_integer(unsigned(w)) + to_integer(unsigned(x))) <= 1 severity failure;
w_out <= WritebackToRegisterFileInit;
c_out <= WritebackToCrFileInit;