Add a bypass path from the execute2 stage
authorPaul Mackerras <paulus@ozlabs.org>
Mon, 27 Jun 2022 22:40:42 +0000 (08:40 +1000)
committerPaul Mackerras <paulus@ozlabs.org>
Fri, 22 Jul 2022 12:19:28 +0000 (22:19 +1000)
This enables some instructions to issue earlier and thus improves
performance, at the cost of some extra multiplexers in decode2.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
control.vhdl
core.vhdl
decode2.vhdl
execute1.vhdl

index 0bbe9ad16658dce9654ac34d00cfbaaf64005bd2..17a288b154756590c6c489e44352ab5a58d34560 100644 (file)
@@ -36,6 +36,8 @@ entity control is
 
         execute_next_tag    : in instr_tag_t;
         execute_next_cr_tag : in instr_tag_t;
+        execute2_next_tag    : in instr_tag_t;
+        execute2_next_cr_tag : in instr_tag_t;
 
         cr_read_in          : in std_ulogic;
         cr_write_in         : in std_ulogic;
@@ -44,10 +46,10 @@ entity control is
         stall_out           : out std_ulogic;
         stopped_out         : out std_ulogic;
 
-        gpr_bypass_a        : out std_ulogic;
-        gpr_bypass_b        : out std_ulogic;
-        gpr_bypass_c        : out std_ulogic;
-        cr_bypass           : out std_ulogic;
+        gpr_bypass_a        : out std_ulogic_vector(1 downto 0);
+        gpr_bypass_b        : out std_ulogic_vector(1 downto 0);
+        gpr_bypass_c        : out std_ulogic_vector(1 downto 0);
+        cr_bypass           : out std_ulogic_vector(1 downto 0);
 
         instr_tag_out       : out instr_tag_t
         );
@@ -142,11 +144,11 @@ begin
         variable tag_s : instr_tag_t;
         variable tag_t : instr_tag_t;
         variable incr_tag : tag_number_t;
-        variable byp_a : std_ulogic;
-        variable byp_b : std_ulogic;
-        variable byp_c : std_ulogic;
+        variable byp_a : std_ulogic_vector(1 downto 0);
+        variable byp_b : std_ulogic_vector(1 downto 0);
+        variable byp_c : std_ulogic_vector(1 downto 0);
         variable tag_cr : instr_tag_t;
-        variable byp_cr : std_ulogic;
+        variable byp_cr : std_ulogic_vector(1 downto 0);
     begin
         tag_a := instr_tag_init;
         for i in tag_number_t loop
@@ -179,26 +181,32 @@ begin
             tag_c.valid := '0';
         end if;
 
-        byp_a := '0';
+        byp_a := "00";
         if EX1_BYPASS and tag_match(execute_next_tag, tag_a) then
-            byp_a := '1';
+            byp_a := "10";
+        elsif EX1_BYPASS and tag_match(execute2_next_tag, tag_a) then
+            byp_a := "11";
         end if;
-        byp_b := '0';
+        byp_b := "00";
         if EX1_BYPASS and tag_match(execute_next_tag, tag_b) then
-            byp_b := '1';
+            byp_b := "10";
+        elsif EX1_BYPASS and tag_match(execute2_next_tag, tag_b) then
+            byp_b := "11";
         end if;
-        byp_c := '0';
+        byp_c := "00";
         if EX1_BYPASS and tag_match(execute_next_tag, tag_c) then
-            byp_c := '1';
+            byp_c := "10";
+        elsif EX1_BYPASS and tag_match(execute2_next_tag, tag_c) then
+            byp_c := "11";
         end if;
 
         gpr_bypass_a <= byp_a;
         gpr_bypass_b <= byp_b;
         gpr_bypass_c <= byp_c;
 
-        gpr_tag_stall <= (tag_a.valid and not byp_a) or
-                         (tag_b.valid and not byp_b) or
-                         (tag_c.valid and not byp_c);
+        gpr_tag_stall <= (tag_a.valid and not byp_a(1)) or
+                         (tag_b.valid and not byp_b(1)) or
+                         (tag_c.valid and not byp_c(1));
 
         incr_tag := curr_tag;
         instr_tag.tag <= curr_tag;
@@ -215,13 +223,15 @@ begin
         if tag_match(tag_cr, complete_in) then
             tag_cr.valid := '0';
         end if;
-        byp_cr := '0';
+        byp_cr := "00";
         if EX1_BYPASS and tag_match(execute_next_cr_tag, tag_cr) then
-            byp_cr := '1';
+            byp_cr := "10";
+        elsif EX1_BYPASS and tag_match(execute2_next_cr_tag, tag_cr) then
+            byp_cr := "11";
         end if;
 
         cr_bypass <= byp_cr;
-        cr_tag_stall <= tag_cr.valid and not byp_cr;
+        cr_tag_stall <= tag_cr.valid and not byp_cr(1);
     end process;
 
     control1 : process(all)
index 070a1f1f667316f95bd8ebe3c8037b58b9bdc244..84604c684e4644ca05ff1e1ff9f9585905430a3a 100644 (file)
--- a/core.vhdl
+++ b/core.vhdl
@@ -79,6 +79,8 @@ architecture behave of core is
     signal execute1_to_writeback: Execute1ToWritebackType;
     signal execute1_bypass: bypass_data_t;
     signal execute1_cr_bypass: cr_bypass_data_t;
+    signal execute2_bypass: bypass_data_t;
+    signal execute2_cr_bypass: cr_bypass_data_t;
 
     -- load store signals
     signal execute1_to_loadstore1: Execute1ToLoadstore1Type;
@@ -298,6 +300,8 @@ begin
             c_out => decode2_to_cr_file,
             execute_bypass => execute1_bypass,
             execute_cr_bypass => execute1_cr_bypass,
+            execute2_bypass => execute2_bypass,
+            execute2_cr_bypass => execute2_cr_bypass,
             log_out => log_data(119 downto 110)
             );
     decode2_busy_in <= ex1_busy_out;
@@ -359,6 +363,8 @@ begin
             e_out => execute1_to_writeback,
             bypass_data => execute1_bypass,
             bypass_cr_data => execute1_cr_bypass,
+            bypass2_data => execute2_bypass,
+            bypass2_cr_data => execute2_cr_bypass,
            icache_inval => ex1_icache_inval,
             dbg_ctrl_out => ctrl_debug,
             wb_events => writeback_events,
index af0c27dc14e3057cf4ab713a2b91855727ec8bea..c290c98ce2c88d54800af1521b654cace4441002 100644 (file)
@@ -39,6 +39,8 @@ entity decode2 is
 
         execute_bypass    : in bypass_data_t;
         execute_cr_bypass : in cr_bypass_data_t;
+        execute2_bypass    : in bypass_data_t;
+        execute2_cr_bypass : in cr_bypass_data_t;
 
         log_out : out std_ulogic_vector(9 downto 0)
        );
@@ -273,19 +275,19 @@ architecture behaviour of decode2 is
 
     signal gpr_a_read_valid : std_ulogic;
     signal gpr_a_read       : gspr_index_t;
-    signal gpr_a_bypass     : std_ulogic;
+    signal gpr_a_bypass     : std_ulogic_vector(1 downto 0);
 
     signal gpr_b_read_valid : std_ulogic;
     signal gpr_b_read       : gspr_index_t;
-    signal gpr_b_bypass     : std_ulogic;
+    signal gpr_b_bypass     : std_ulogic_vector(1 downto 0);
 
     signal gpr_c_read_valid : std_ulogic;
     signal gpr_c_read       : gspr_index_t;
-    signal gpr_c_bypass     : std_ulogic;
+    signal gpr_c_bypass     : std_ulogic_vector(1 downto 0);
 
     signal cr_read_valid   : std_ulogic;
     signal cr_write_valid  : std_ulogic;
-    signal cr_bypass       : std_ulogic;
+    signal cr_bypass       : std_ulogic_vector(1 downto 0);
 
     signal instr_tag       : instr_tag_t;
 
@@ -321,6 +323,8 @@ begin
 
             execute_next_tag     => execute_bypass.tag,
             execute_next_cr_tag  => execute_cr_bypass.tag,
+            execute2_next_tag    => execute2_bypass.tag,
+            execute2_next_cr_tag => execute2_cr_bypass.tag,
 
             cr_read_in           => cr_read_valid,
             cr_write_in          => cr_write_valid,
@@ -504,27 +508,35 @@ begin
 
         -- See if any of the operands can get their value via the bypass path.
         case gpr_a_bypass is
-            when '1' =>
+            when "10" =>
                 v.e.read_data1 := execute_bypass.data;
+            when "11" =>
+                v.e.read_data1 := execute2_bypass.data;
             when others =>
                 v.e.read_data1 := decoded_reg_a.data;
         end case;
         case gpr_b_bypass is
-            when '1' =>
+            when "10" =>
                 v.e.read_data2 := execute_bypass.data;
+            when "11" =>
+                v.e.read_data2 := execute2_bypass.data;
             when others =>
                 v.e.read_data2 := decoded_reg_b.data;
         end case;
         case gpr_c_bypass is
-            when '1' =>
+            when "10" =>
                 v.e.read_data3 := execute_bypass.data;
+            when "11" =>
+                v.e.read_data3 := execute2_bypass.data;
             when others =>
                 v.e.read_data3 := decoded_reg_c.data;
         end case;
 
         v.e.cr := c_in.read_cr_data;
-        if cr_bypass = '1' then
+        if cr_bypass = "10" then
             v.e.cr := execute_cr_bypass.data;
+        elsif cr_bypass = "11" then
+            v.e.cr := execute2_cr_bypass.data;
         end if;
 
         -- issue control
@@ -577,9 +589,9 @@ begin
                             r.e.valid &
                             stopped_out &
                             stall_out &
-                            gpr_a_bypass &
-                            gpr_b_bypass &
-                            gpr_c_bypass;
+                            (gpr_a_bypass(1) or gpr_a_bypass(0)) &
+                            (gpr_b_bypass(1) or gpr_b_bypass(0)) &
+                            (gpr_c_bypass(1) or gpr_c_bypass(0));
             end if;
         end process;
         log_out <= log_data;
index ebcdfebf0dd6c8204b420d75cadc4308ca577869..ebc24c54aacf496efc1eacb1cdc39f16d0fe2b0d 100644 (file)
@@ -40,6 +40,8 @@ entity execute1 is
        e_out : out Execute1ToWritebackType;
         bypass_data : out bypass_data_t;
         bypass_cr_data : out cr_bypass_data_t;
+        bypass2_data : out bypass_data_t;
+        bypass2_cr_data : out cr_bypass_data_t;
 
         dbg_ctrl_out : out ctrl_t;
 
@@ -1482,6 +1484,7 @@ begin
         variable fv : Execute1ToFPUType;
         variable k : integer;
         variable go : std_ulogic;
+        variable bypass_valid : std_ulogic;
     begin
        v := ex2;
         if (l_in.busy or fp_in.busy) = '0' then
@@ -1559,6 +1562,19 @@ begin
             ctrl_tmp.msr(MSR_LE) <= '1';
         end if;
 
+        bypass_valid := ex1.e.valid;
+        if (ex2.busy or l_in.busy or fp_in.busy) = '1' and ex1.res2_sel(1) = '1' then
+            bypass_valid := '0';
+        end if;
+
+        bypass2_data.tag.valid <= ex1.e.write_enable and bypass_valid;
+        bypass2_data.tag.tag <= ex1.e.instr_tag.tag;
+        bypass2_data.data <= ex_result;
+
+        bypass2_cr_data.tag.valid <= ex1.e.write_cr_enable and bypass_valid;
+        bypass2_cr_data.tag.tag <= ex1.e.instr_tag.tag;
+        bypass2_cr_data.data <= ex1.e.write_cr_data;
+
        -- Update registers
        ex2in <= v;