From 4b6148ada6a58adb48167733b492c73c505b6930 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 28 Jun 2022 08:40:42 +1000 Subject: [PATCH] Add a bypass path from the execute2 stage This enables some instructions to issue earlier and thus improves performance, at the cost of some extra multiplexers in decode2. Signed-off-by: Paul Mackerras --- control.vhdl | 50 ++++++++++++++++++++++++++++++-------------------- core.vhdl | 6 ++++++ decode2.vhdl | 34 +++++++++++++++++++++++----------- execute1.vhdl | 16 ++++++++++++++++ 4 files changed, 75 insertions(+), 31 deletions(-) diff --git a/control.vhdl b/control.vhdl index 0bbe9ad..17a288b 100644 --- a/control.vhdl +++ b/control.vhdl @@ -36,6 +36,8 @@ entity control is execute_next_tag : in instr_tag_t; execute_next_cr_tag : in instr_tag_t; + execute2_next_tag : in instr_tag_t; + execute2_next_cr_tag : in instr_tag_t; cr_read_in : in std_ulogic; cr_write_in : in std_ulogic; @@ -44,10 +46,10 @@ entity control is stall_out : out std_ulogic; stopped_out : out std_ulogic; - gpr_bypass_a : out std_ulogic; - gpr_bypass_b : out std_ulogic; - gpr_bypass_c : out std_ulogic; - cr_bypass : out std_ulogic; + gpr_bypass_a : out std_ulogic_vector(1 downto 0); + gpr_bypass_b : out std_ulogic_vector(1 downto 0); + gpr_bypass_c : out std_ulogic_vector(1 downto 0); + cr_bypass : out std_ulogic_vector(1 downto 0); instr_tag_out : out instr_tag_t ); @@ -142,11 +144,11 @@ begin variable tag_s : instr_tag_t; variable tag_t : instr_tag_t; variable incr_tag : tag_number_t; - variable byp_a : std_ulogic; - variable byp_b : std_ulogic; - variable byp_c : std_ulogic; + variable byp_a : std_ulogic_vector(1 downto 0); + variable byp_b : std_ulogic_vector(1 downto 0); + variable byp_c : std_ulogic_vector(1 downto 0); variable tag_cr : instr_tag_t; - variable byp_cr : std_ulogic; + variable byp_cr : std_ulogic_vector(1 downto 0); begin tag_a := instr_tag_init; for i in tag_number_t loop @@ -179,26 +181,32 @@ begin tag_c.valid := '0'; end if; - byp_a := '0'; + byp_a := "00"; if EX1_BYPASS and tag_match(execute_next_tag, tag_a) then - byp_a := '1'; + byp_a := "10"; + elsif EX1_BYPASS and tag_match(execute2_next_tag, tag_a) then + byp_a := "11"; end if; - byp_b := '0'; + byp_b := "00"; if EX1_BYPASS and tag_match(execute_next_tag, tag_b) then - byp_b := '1'; + byp_b := "10"; + elsif EX1_BYPASS and tag_match(execute2_next_tag, tag_b) then + byp_b := "11"; end if; - byp_c := '0'; + byp_c := "00"; if EX1_BYPASS and tag_match(execute_next_tag, tag_c) then - byp_c := '1'; + byp_c := "10"; + elsif EX1_BYPASS and tag_match(execute2_next_tag, tag_c) then + byp_c := "11"; end if; gpr_bypass_a <= byp_a; gpr_bypass_b <= byp_b; gpr_bypass_c <= byp_c; - gpr_tag_stall <= (tag_a.valid and not byp_a) or - (tag_b.valid and not byp_b) or - (tag_c.valid and not byp_c); + gpr_tag_stall <= (tag_a.valid and not byp_a(1)) or + (tag_b.valid and not byp_b(1)) or + (tag_c.valid and not byp_c(1)); incr_tag := curr_tag; instr_tag.tag <= curr_tag; @@ -215,13 +223,15 @@ begin if tag_match(tag_cr, complete_in) then tag_cr.valid := '0'; end if; - byp_cr := '0'; + byp_cr := "00"; if EX1_BYPASS and tag_match(execute_next_cr_tag, tag_cr) then - byp_cr := '1'; + byp_cr := "10"; + elsif EX1_BYPASS and tag_match(execute2_next_cr_tag, tag_cr) then + byp_cr := "11"; end if; cr_bypass <= byp_cr; - cr_tag_stall <= tag_cr.valid and not byp_cr; + cr_tag_stall <= tag_cr.valid and not byp_cr(1); end process; control1 : process(all) diff --git a/core.vhdl b/core.vhdl index 070a1f1..84604c6 100644 --- a/core.vhdl +++ b/core.vhdl @@ -79,6 +79,8 @@ architecture behave of core is signal execute1_to_writeback: Execute1ToWritebackType; signal execute1_bypass: bypass_data_t; signal execute1_cr_bypass: cr_bypass_data_t; + signal execute2_bypass: bypass_data_t; + signal execute2_cr_bypass: cr_bypass_data_t; -- load store signals signal execute1_to_loadstore1: Execute1ToLoadstore1Type; @@ -298,6 +300,8 @@ begin c_out => decode2_to_cr_file, execute_bypass => execute1_bypass, execute_cr_bypass => execute1_cr_bypass, + execute2_bypass => execute2_bypass, + execute2_cr_bypass => execute2_cr_bypass, log_out => log_data(119 downto 110) ); decode2_busy_in <= ex1_busy_out; @@ -359,6 +363,8 @@ begin e_out => execute1_to_writeback, bypass_data => execute1_bypass, bypass_cr_data => execute1_cr_bypass, + bypass2_data => execute2_bypass, + bypass2_cr_data => execute2_cr_bypass, icache_inval => ex1_icache_inval, dbg_ctrl_out => ctrl_debug, wb_events => writeback_events, diff --git a/decode2.vhdl b/decode2.vhdl index af0c27d..c290c98 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -39,6 +39,8 @@ entity decode2 is execute_bypass : in bypass_data_t; execute_cr_bypass : in cr_bypass_data_t; + execute2_bypass : in bypass_data_t; + execute2_cr_bypass : in cr_bypass_data_t; log_out : out std_ulogic_vector(9 downto 0) ); @@ -273,19 +275,19 @@ architecture behaviour of decode2 is signal gpr_a_read_valid : std_ulogic; signal gpr_a_read : gspr_index_t; - signal gpr_a_bypass : std_ulogic; + signal gpr_a_bypass : std_ulogic_vector(1 downto 0); signal gpr_b_read_valid : std_ulogic; signal gpr_b_read : gspr_index_t; - signal gpr_b_bypass : std_ulogic; + signal gpr_b_bypass : std_ulogic_vector(1 downto 0); signal gpr_c_read_valid : std_ulogic; signal gpr_c_read : gspr_index_t; - signal gpr_c_bypass : std_ulogic; + signal gpr_c_bypass : std_ulogic_vector(1 downto 0); signal cr_read_valid : std_ulogic; signal cr_write_valid : std_ulogic; - signal cr_bypass : std_ulogic; + signal cr_bypass : std_ulogic_vector(1 downto 0); signal instr_tag : instr_tag_t; @@ -321,6 +323,8 @@ begin execute_next_tag => execute_bypass.tag, execute_next_cr_tag => execute_cr_bypass.tag, + execute2_next_tag => execute2_bypass.tag, + execute2_next_cr_tag => execute2_cr_bypass.tag, cr_read_in => cr_read_valid, cr_write_in => cr_write_valid, @@ -504,27 +508,35 @@ begin -- See if any of the operands can get their value via the bypass path. case gpr_a_bypass is - when '1' => + when "10" => v.e.read_data1 := execute_bypass.data; + when "11" => + v.e.read_data1 := execute2_bypass.data; when others => v.e.read_data1 := decoded_reg_a.data; end case; case gpr_b_bypass is - when '1' => + when "10" => v.e.read_data2 := execute_bypass.data; + when "11" => + v.e.read_data2 := execute2_bypass.data; when others => v.e.read_data2 := decoded_reg_b.data; end case; case gpr_c_bypass is - when '1' => + when "10" => v.e.read_data3 := execute_bypass.data; + when "11" => + v.e.read_data3 := execute2_bypass.data; when others => v.e.read_data3 := decoded_reg_c.data; end case; v.e.cr := c_in.read_cr_data; - if cr_bypass = '1' then + if cr_bypass = "10" then v.e.cr := execute_cr_bypass.data; + elsif cr_bypass = "11" then + v.e.cr := execute2_cr_bypass.data; end if; -- issue control @@ -577,9 +589,9 @@ begin r.e.valid & stopped_out & stall_out & - gpr_a_bypass & - gpr_b_bypass & - gpr_c_bypass; + (gpr_a_bypass(1) or gpr_a_bypass(0)) & + (gpr_b_bypass(1) or gpr_b_bypass(0)) & + (gpr_c_bypass(1) or gpr_c_bypass(0)); end if; end process; log_out <= log_data; diff --git a/execute1.vhdl b/execute1.vhdl index ebcdfeb..ebc24c5 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -40,6 +40,8 @@ entity execute1 is e_out : out Execute1ToWritebackType; bypass_data : out bypass_data_t; bypass_cr_data : out cr_bypass_data_t; + bypass2_data : out bypass_data_t; + bypass2_cr_data : out cr_bypass_data_t; dbg_ctrl_out : out ctrl_t; @@ -1482,6 +1484,7 @@ begin variable fv : Execute1ToFPUType; variable k : integer; variable go : std_ulogic; + variable bypass_valid : std_ulogic; begin v := ex2; if (l_in.busy or fp_in.busy) = '0' then @@ -1559,6 +1562,19 @@ begin ctrl_tmp.msr(MSR_LE) <= '1'; end if; + bypass_valid := ex1.e.valid; + if (ex2.busy or l_in.busy or fp_in.busy) = '1' and ex1.res2_sel(1) = '1' then + bypass_valid := '0'; + end if; + + bypass2_data.tag.valid <= ex1.e.write_enable and bypass_valid; + bypass2_data.tag.tag <= ex1.e.instr_tag.tag; + bypass2_data.data <= ex_result; + + bypass2_cr_data.tag.valid <= ex1.e.write_cr_enable and bypass_valid; + bypass2_cr_data.tag.tag <= ex1.e.instr_tag.tag; + bypass2_cr_data.data <= ex1.e.write_cr_data; + -- Update registers ex2in <= v; -- 2.30.2