variable bv : br_predictor_t;
variable fprs, fprabc : std_ulogic;
variable in3rc : std_ulogic;
+ variable may_read_rb : std_ulogic;
begin
v := Decode1ToDecode2Init;
vi := reg_internal_t_init;
fprs := '0';
fprabc := '0';
in3rc := '0';
+ may_read_rb := '0';
if f_in.valid = '1' then
report "Decode insn " & to_hstring(f_in.insn) & " at " & to_hstring(f_in.nia);
vi.override := not decode_op_4_valid(to_integer(unsigned(minor4op)));
v.decode := decode_op_4_array(to_integer(unsigned(f_in.insn(5 downto 0))));
in3rc := '1';
+ may_read_rb := '1';
+
+ when 23 =>
+ -- rlwnm[.]
+ may_read_rb := '1';
when 31 =>
-- major opcode 31, lots of things
v.decode := decode_op_31_array(to_integer(unsigned(f_in.insn(10 downto 1))));
+ may_read_rb := '1';
if std_match(f_in.insn(10 downto 1), "01-1010011") then
-- mfspr or mtspr
when 30 =>
v.decode := decode_op_30_array(to_integer(unsigned(f_in.insn(4 downto 1))));
+ may_read_rb := f_in.insn(4);
when 52 | 53 | 54 | 55 =>
-- stfd[u] and stfs[u]
in3rc := '1';
fprabc := '1';
fprs := '1';
+ may_read_rb := '1';
end if;
when 62 =>
in3rc := '1';
fprabc := '1';
fprs := '1';
+ may_read_rb := '1';
end if;
when others =>
else
vr.reg_3_addr := fprs & insn_rs(f_in.insn);
end if;
+ vr.read_1_enable := f_in.valid and not f_in.fetch_failed;
+ vr.read_2_enable := f_in.valid and not f_in.fetch_failed and may_read_rb;
+ vr.read_3_enable := f_in.valid and not f_in.fetch_failed;
if f_in.fetch_failed = '1' then
v.valid := '1';
architecture behaviour of register_file is
type regfile is array(0 to 63) of std_ulogic_vector(63 downto 0);
signal registers : regfile := (others => (others => '0'));
- signal rd_port_b : std_ulogic_vector(63 downto 0);
signal dbg_data : std_ulogic_vector(63 downto 0);
signal dbg_ack : std_ulogic;
+ signal dbg_gpr_done : std_ulogic;
signal addr_1_reg : gspr_index_t;
signal addr_2_reg : gspr_index_t;
signal addr_3_reg : gspr_index_t;
+ signal rd_2 : std_ulogic;
+ signal fwd_1 : std_ulogic;
+ signal fwd_2 : std_ulogic;
+ signal fwd_3 : std_ulogic;
+ signal data_1 : std_ulogic_vector(63 downto 0);
+ signal data_2 : std_ulogic_vector(63 downto 0);
+ signal data_3 : std_ulogic_vector(63 downto 0);
+ signal prev_write_data : std_ulogic_vector(63 downto 0);
+
begin
- -- synchronous writes
+ -- synchronous reads and writes
register_write_0: process(clk)
variable a_addr, b_addr, c_addr : gspr_index_t;
variable w_addr : gspr_index_t;
+ variable b_enable : std_ulogic;
begin
if rising_edge(clk) then
if w_in.write_enable = '1' then
a_addr := d1_in.reg_1_addr;
b_addr := d1_in.reg_2_addr;
c_addr := d1_in.reg_3_addr;
-
- if stall = '0' then
+ b_enable := d1_in.read_2_enable;
+ if stall = '1' then
+ a_addr := addr_1_reg;
+ b_addr := addr_2_reg;
+ c_addr := addr_3_reg;
+ b_enable := rd_2;
+ else
addr_1_reg <= a_addr;
addr_2_reg <= b_addr;
addr_3_reg <= c_addr;
+ rd_2 <= b_enable;
end if;
+
+ fwd_1 <= '0';
+ fwd_2 <= '0';
+ fwd_3 <= '0';
+ if w_in.write_enable = '1' then
+ if w_addr = a_addr then
+ fwd_1 <= '1';
+ end if;
+ if w_addr = b_addr then
+ fwd_2 <= '1';
+ end if;
+ if w_addr = c_addr then
+ fwd_3 <= '1';
+ end if;
+ end if;
+
+ -- Do debug reads to GPRs and FPRs using the B port when it is not in use
+ if dbg_gpr_req = '1' then
+ if b_enable = '0' then
+ b_addr := dbg_gpr_addr(5 downto 0);
+ dbg_gpr_done <= '1';
+ end if;
+ else
+ dbg_gpr_done <= '0';
+ end if;
+
+ if not HAS_FPU then
+ -- Make it obvious that we only want 32 GSPRs for a no-FPU implementation
+ a_addr(5) := '0';
+ b_addr(5) := '0';
+ c_addr(5) := '0';
+ end if;
+ data_1 <= registers(to_integer(unsigned(a_addr)));
+ data_2 <= registers(to_integer(unsigned(b_addr)));
+ data_3 <= registers(to_integer(unsigned(c_addr)));
+
+ prev_write_data <= w_in.write_data;
+
assert (d_in.read1_enable = '0') or (d_in.read1_reg = addr_1_reg) severity failure;
assert (d_in.read2_enable = '0') or (d_in.read2_reg = addr_2_reg) severity failure;
assert (d_in.read3_enable = '0') or (d_in.read3_reg = addr_3_reg) severity failure;
end if;
end process register_write_0;
- -- asynchronous reads
+ -- asynchronous forwarding of write data
register_read_0: process(all)
- variable a_addr, b_addr, c_addr : gspr_index_t;
- variable w_addr : gspr_index_t;
+ variable out_data_1 : std_ulogic_vector(63 downto 0);
+ variable out_data_2 : std_ulogic_vector(63 downto 0);
+ variable out_data_3 : std_ulogic_vector(63 downto 0);
begin
- a_addr := d_in.read1_reg;
- b_addr := d_in.read2_reg;
- c_addr := d_in.read3_reg;
- w_addr := w_in.write_reg;
- if not HAS_FPU then
- -- Make it obvious that we only want 32 GSPRs for a no-FPU implementation
- a_addr(5) := '0';
- b_addr(5) := '0';
- c_addr(5) := '0';
- w_addr(5) := '0';
+ out_data_1 := data_1;
+ out_data_2 := data_2;
+ out_data_3 := data_3;
+ if fwd_1 = '1' then
+ out_data_1 := prev_write_data;
end if;
+ if fwd_2 = '1' then
+ out_data_2 := prev_write_data;
+ end if;
+ if fwd_3 = '1' then
+ out_data_3 := prev_write_data;
+ end if;
+
if d_in.read1_enable = '1' then
- report "Reading GPR " & to_hstring(a_addr) & " " & to_hstring(registers(to_integer(unsigned(a_addr))));
+ report "Reading GPR " & to_hstring(addr_1_reg) & " " & to_hstring(out_data_1);
end if;
if d_in.read2_enable = '1' then
- report "Reading GPR " & to_hstring(b_addr) & " " & to_hstring(registers(to_integer(unsigned(b_addr))));
+ report "Reading GPR " & to_hstring(addr_2_reg) & " " & to_hstring(out_data_2);
end if;
if d_in.read3_enable = '1' then
- report "Reading GPR " & to_hstring(c_addr) & " " & to_hstring(registers(to_integer(unsigned(c_addr))));
- end if;
- d_out.read1_data <= registers(to_integer(unsigned(a_addr)));
- -- B read port is multiplexed with reads from the debug circuitry
- if d_in.read2_enable = '0' and dbg_gpr_req = '1' and dbg_ack = '0' then
- b_addr := dbg_gpr_addr;
- if not HAS_FPU then
- b_addr(5) := '0';
- end if;
+ report "Reading GPR " & to_hstring(addr_3_reg) & " " & to_hstring(out_data_3);
end if;
- rd_port_b <= registers(to_integer(unsigned(b_addr)));
- d_out.read2_data <= rd_port_b;
- d_out.read3_data <= registers(to_integer(unsigned(c_addr)));
- -- Forwarding of written data is now done explicitly with a bypass path
- -- from writeback to decode2.
+ d_out.read1_data <= out_data_1;
+ d_out.read2_data <= out_data_2;
+ d_out.read3_data <= out_data_3;
end process register_read_0;
-- Latch read data and ack if dbg read requested and B port not busy
begin
if rising_edge(clk) then
if dbg_gpr_req = '1' then
- if d_in.read2_enable = '0' and dbg_ack = '0' then
- dbg_data <= rd_port_b;
+ if dbg_ack = '0' and dbg_gpr_done = '1' then
+ dbg_data <= data_2;
dbg_ack <= '1';
end if;
else