From: Paul Mackerras Date: Tue, 10 Dec 2019 09:52:21 +0000 (+1100) Subject: Make multiplier hang off the side of execute1 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=2167186b5fae691b2a165cc5bfaaa79fe4713733;p=microwatt.git Make multiplier hang off the side of execute1 With this, the multiplier isn't a separate pipe that decode2 issues instructions to, but rather is a unit that execute1 sends operands to and which sends the result back to execute1, which then sends it to writeback. Execute1 now sends a stall signal when it gets a multiply instruction until it gets a valid signal back from the multiplier. This all means that we no longer need to mark the multiply instructions as single-issue. Signed-off-by: Paul Mackerras --- diff --git a/Makefile b/Makefile index e2398c0..720e8d5 100644 --- a/Makefile +++ b/Makefile @@ -17,7 +17,7 @@ common.o: decode_types.o control.o: gpr_hazard.o cr_hazard.o common.o sim_jtag.o: sim_jtag_socket.o core_tb.o: common.o wishbone_types.o core.o soc.o sim_jtag.o -core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o loadstore1.o dcache.o multiply.o writeback.o core_debug.o divider.o +core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o loadstore1.o dcache.o writeback.o core_debug.o divider.o core_debug.o: common.o countzero.o: countzero_tb.o: common.o glibc_random.o countzero.o @@ -26,7 +26,7 @@ crhelpers.o: common.o decode1.o: common.o decode_types.o decode2.o: decode_types.o common.o helpers.o insn_helpers.o control.o decode_types.o: -execute1.o: decode_types.o common.o helpers.o crhelpers.o insn_helpers.o ppc_fx_insns.o rotator.o logical.o countzero.o +execute1.o: decode_types.o common.o helpers.o crhelpers.o insn_helpers.o ppc_fx_insns.o rotator.o logical.o countzero.o multiply.o fetch1.o: common.o fetch2.o: common.o wishbone_types.o glibc_random_helpers.o: diff --git a/common.vhdl b/common.vhdl index a27f4f2..9c18230 100644 --- a/common.vhdl +++ b/common.vhdl @@ -130,7 +130,7 @@ package common is invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0', is_32bit => '0', is_signed => '0', xerc => xerc_init, others => (others => '0')); - type Decode2ToMultiplyType is record + type Execute1ToMultiplyType is record valid: std_ulogic; insn_type: insn_type_t; write_reg: gpr_index_t; @@ -141,9 +141,9 @@ package common is is_32bit: std_ulogic; xerc: xer_common_t; end record; - constant Decode2ToMultiplyInit : Decode2ToMultiplyType := (valid => '0', insn_type => OP_ILLEGAL, rc => '0', - oe => '0', is_32bit => '0', xerc => xerc_init, - others => (others => '0')); + constant Execute1ToMultiplyInit : Execute1ToMultiplyType := (valid => '0', insn_type => OP_ILLEGAL, rc => '0', + oe => '0', is_32bit => '0', xerc => xerc_init, + others => (others => '0')); type Decode2ToDividerType is record valid: std_ulogic; @@ -261,20 +261,19 @@ package common is write_xerc_enable => '0', xerc => xerc_init, others => (others => '0')); - type MultiplyToWritebackType is record + type MultiplyToExecute1Type is record valid: std_ulogic; - write_reg_enable : std_ulogic; write_reg_nr: gpr_index_t; write_reg_data: std_ulogic_vector(63 downto 0); write_xerc_enable : std_ulogic; xerc : xer_common_t; rc: std_ulogic; end record; - constant MultiplyToWritebackInit : MultiplyToWritebackType := (valid => '0', write_reg_enable => '0', - rc => '0', write_xerc_enable => '0', - xerc => xerc_init, - others => (others => '0')); + constant MultiplyToExecute1Init : MultiplyToExecute1Type := (valid => '0', + rc => '0', write_xerc_enable => '0', + xerc => xerc_init, + others => (others => '0')); type DividerToWritebackType is record valid: std_ulogic; diff --git a/core.vhdl b/core.vhdl index eb0b526..71c10b3 100644 --- a/core.vhdl +++ b/core.vhdl @@ -63,10 +63,6 @@ architecture behave of core is signal loadstore1_to_dcache: Loadstore1ToDcacheType; signal dcache_to_writeback: DcacheToWritebackType; - -- multiply signals - signal decode2_to_multiply: Decode2ToMultiplyType; - signal multiply_to_writeback: MultiplyToWritebackType; - -- divider signals signal decode2_to_divider: Decode2ToDividerType; signal divider_to_writeback: DividerToWritebackType; @@ -115,7 +111,6 @@ architecture behave of core is attribute keep_hierarchy of register_file_0 : label is keep_h(DISABLE_FLATTEN); attribute keep_hierarchy of cr_file_0 : label is keep_h(DISABLE_FLATTEN); attribute keep_hierarchy of execute1_0 : label is keep_h(DISABLE_FLATTEN); - attribute keep_hierarchy of multiply_0 : label is keep_h(DISABLE_FLATTEN); attribute keep_hierarchy of divider_0 : label is keep_h(DISABLE_FLATTEN); attribute keep_hierarchy of loadstore1_0 : label is keep_h(DISABLE_FLATTEN); attribute keep_hierarchy of dcache_0 : label is keep_h(DISABLE_FLATTEN); @@ -197,7 +192,6 @@ begin d_in => decode1_to_decode2, e_out => decode2_to_execute1, l_out => decode2_to_loadstore1, - m_out => decode2_to_multiply, d_out => decode2_to_divider, r_in => register_file_to_decode2, r_out => decode2_to_register_file, @@ -265,13 +259,6 @@ begin wishbone_out => wishbone_data_out ); - multiply_0: entity work.multiply - port map ( - clk => clk, - m_in => decode2_to_multiply, - m_out => multiply_to_writeback - ); - divider_0: entity work.divider port map ( clk => clk, @@ -285,7 +272,6 @@ begin clk => clk, e_in => execute1_to_writeback, l_in => dcache_to_writeback, - m_in => multiply_to_writeback, d_in => divider_to_writeback, w_out => writeback_to_register_file, c_out => writeback_to_cr_file, diff --git a/decode1.vhdl b/decode1.vhdl index 51a2643..4e1d063 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -54,7 +54,7 @@ architecture behaviour of decode1 is 41 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '1'), -- lhzu 32 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- lwz 33 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '0', '0', NONE, '0', '1'), -- lwzu - 7 => (MUL, OP_MUL_L64, RA, CONST_SI, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '1'), -- mulli + 7 => (ALU, OP_MUL_L64, RA, CONST_SI, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0'), -- mulli 24 => (ALU, OP_OR, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- ori 25 => (ALU, OP_OR, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- oris 20 => (ALU, OP_RLC, RA, CONST_SH32, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- rlwimi @@ -244,19 +244,19 @@ architecture behaviour of decode1 is 2#1100001011# => (DIV, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- modsw 2#0010010000# => (ALU, OP_MTCRF, NONE, NONE, RS, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mtcrf/mtocrf 2#0111010011# => (ALU, OP_MTSPR, NONE, NONE, RS, SPR, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mtspr - 2#0001001001# => (MUL, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '1'), -- mulhd - 2#0000001001# => (MUL, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- mulhdu - 2#0001001011# => (MUL, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '1'), -- mulhw - 2#0000001011# => (MUL, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '1'), -- mulhwu + 2#0001001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulhd + 2#0000001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- mulhdu + 2#0001001011# => (ALU, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- mulhw + 2#0000001011# => (ALU, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- mulhwu -- next 4 have reserved bit set - 2#1001001001# => (MUL, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '1'), -- mulhd - 2#1000001001# => (MUL, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- mulhdu - 2#1001001011# => (MUL, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '1'), -- mulhw - 2#1000001011# => (MUL, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '1'), -- mulhwu - 2#0011101001# => (MUL, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '1'), -- mulld - 2#1011101001# => (MUL, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '1'), -- mulldo - 2#0011101011# => (MUL, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '1'), -- mullw - 2#1011101011# => (MUL, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '1'), -- mullwo + 2#1001001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulhd + 2#1000001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- mulhdu + 2#1001001011# => (ALU, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- mulhw + 2#1000001011# => (ALU, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- mulhwu + 2#0011101001# => (ALU, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulld + 2#1011101001# => (ALU, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulldo + 2#0011101011# => (ALU, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- mullw + 2#1011101011# => (ALU, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- mullwo 2#0111011100# => (ALU, OP_AND, NONE, RB, RS, RA, '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- nand 2#0001101000# => (ALU, OP_ADD, RA, NONE, NONE, RT, '0', '0', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- neg 2#1001101000# => (ALU, OP_ADD, RA, NONE, NONE, RT, '0', '0', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- nego diff --git a/decode2.vhdl b/decode2.vhdl index f6f7101..2da5c41 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -24,7 +24,6 @@ entity decode2 is d_in : in Decode1ToDecode2Type; e_out : out Decode2ToExecute1Type; - m_out : out Decode2ToMultiplyType; d_out : out Decode2ToDividerType; l_out : out Decode2ToLoadstore1Type; @@ -39,7 +38,6 @@ end entity decode2; architecture behaviour of decode2 is type reg_type is record e : Decode2ToExecute1Type; - m : Decode2ToMultiplyType; d : Decode2ToDividerType; l : Decode2ToLoadstore1Type; end record; @@ -238,7 +236,7 @@ begin decode2_0: process(clk) begin if rising_edge(clk) then - if rin.e.valid = '1' or rin.l.valid = '1' or rin.m.valid = '1' or rin.d.valid = '1' then + if rin.e.valid = '1' or rin.l.valid = '1' or rin.d.valid = '1' then report "execute " & to_hstring(rin.e.nia); end if; r <= rin; @@ -266,14 +264,12 @@ begin v.e := Decode2ToExecute1Init; v.l := Decode2ToLoadStore1Init; - v.m := Decode2ToMultiplyInit; v.d := Decode2ToDividerInit; mul_a := (others => '0'); mul_b := (others => '0'); --v.e.input_cr := d_in.decode.input_cr; - --v.m.input_cr := d_in.decode.input_cr; --v.e.output_cr := d_in.decode.output_cr; decoded_reg_a := decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, r_in.read1_data, d_in.ispr1); @@ -323,38 +319,6 @@ begin v.e.insn := d_in.insn; v.e.data_len := length; - -- multiply unit - v.m.insn_type := d_in.decode.insn_type; - mul_a := decoded_reg_a.data; - mul_b := decoded_reg_b.data; - v.m.write_reg := gspr_to_gpr(decoded_reg_o.reg); - v.m.rc := decode_rc(d_in.decode.rc, d_in.insn); - v.m.xerc := c_in.read_xerc_data; - if v.m.insn_type = OP_MUL_L64 then - v.m.oe := decode_oe(d_in.decode.rc, d_in.insn); - end if; - v.m.is_32bit := d_in.decode.is_32bit; - - if d_in.decode.is_32bit = '1' then - if d_in.decode.is_signed = '1' then - v.m.data1 := (others => mul_a(31)); - v.m.data1(31 downto 0) := mul_a(31 downto 0); - v.m.data2 := (others => mul_b(31)); - v.m.data2(31 downto 0) := mul_b(31 downto 0); - else - v.m.data1 := '0' & x"00000000" & mul_a(31 downto 0); - v.m.data2 := '0' & x"00000000" & mul_b(31 downto 0); - end if; - else - if d_in.decode.is_signed = '1' then - v.m.data1 := mul_a(63) & mul_a; - v.m.data2 := mul_b(63) & mul_b; - else - v.m.data1 := '0' & mul_a; - v.m.data2 := '0' & mul_b; - end if; - end if; - -- divide unit -- PPC divide and modulus instruction words have these bits in -- the bottom 11 bits: o1dns 010t1 r @@ -438,7 +402,6 @@ begin cr_write_valid <= d_in.decode.output_cr or decode_rc(d_in.decode.rc, d_in.insn); v.e.valid := '0'; - v.m.valid := '0'; v.d.valid := '0'; v.l.valid := '0'; case d_in.decode.unit is @@ -446,8 +409,6 @@ begin v.e.valid := control_valid_out; when LDST => v.l.valid := control_valid_out; - when MUL => - v.m.valid := control_valid_out; when DIV => v.d.valid := control_valid_out; when NONE => @@ -458,7 +419,6 @@ begin if rst = '1' then v.e := Decode2ToExecute1Init; v.l := Decode2ToLoadStore1Init; - v.m := Decode2ToMultiplyInit; v.d := Decode2ToDividerInit; end if; @@ -468,7 +428,6 @@ begin -- Update outputs e_out <= r.e; l_out <= r.l; - m_out <= r.m; d_out <= r.d; end process; end architecture behaviour; diff --git a/decode_types.vhdl b/decode_types.vhdl index e847fcf..9860406 100644 --- a/decode_types.vhdl +++ b/decode_types.vhdl @@ -46,7 +46,7 @@ package decode_types is constant TOO_OFFSET : integer := 0; - type unit_t is (NONE, ALU, LDST, MUL, DIV); + type unit_t is (NONE, ALU, LDST, DIV); type length_t is (NONE, is1B, is2B, is4B, is8B); type decode_rom_t is record diff --git a/execute1.vhdl b/execute1.vhdl index 4714ec5..710044f 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -35,6 +35,7 @@ architecture behaviour of execute1 is e : Execute1ToWritebackType; lr_update : std_ulogic; next_lr : std_ulogic_vector(63 downto 0); + mul_in_progress : std_ulogic; end record; signal r, rin : reg_type; @@ -48,6 +49,10 @@ architecture behaviour of execute1 is signal logical_result: std_ulogic_vector(63 downto 0); signal countzero_result: std_ulogic_vector(63 downto 0); + -- multiply signals + signal x_to_multiply: Execute1ToMultiplyType; + signal multiply_to_x: MultiplyToExecute1Type; + procedure set_carry(e: inout Execute1ToWritebackType; carry32 : in std_ulogic; carry : in std_ulogic) is @@ -123,6 +128,13 @@ begin result => countzero_result ); + multiply_0: entity work.multiply + port map ( + clk => clk, + m_in => x_to_multiply, + m_out => multiply_to_x + ); + execute1_0: process(clk) begin if rising_edge(clk) then @@ -204,6 +216,38 @@ begin end if; v.lr_update := '0'; + v.mul_in_progress := '0'; + + -- signals to multiply unit + x_to_multiply <= Execute1ToMultiplyInit; + x_to_multiply.insn_type <= e_in.insn_type; + x_to_multiply.write_reg <= gspr_to_gpr(e_in.write_reg); + x_to_multiply.rc <= e_in.rc; + x_to_multiply.xerc <= v.e.xerc; + if e_in.insn_type = OP_MUL_L64 then + x_to_multiply.oe <= e_in.oe; + end if; + x_to_multiply.is_32bit <= e_in.is_32bit; + + if e_in.is_32bit = '1' then + if e_in.is_signed = '1' then + x_to_multiply.data1 <= (others => e_in.read_data1(31)); + x_to_multiply.data1(31 downto 0) <= e_in.read_data1(31 downto 0); + x_to_multiply.data2 <= (others => e_in.read_data2(31)); + x_to_multiply.data2(31 downto 0) <= e_in.read_data2(31 downto 0); + else + x_to_multiply.data1 <= '0' & x"00000000" & e_in.read_data1(31 downto 0); + x_to_multiply.data2 <= '0' & x"00000000" & e_in.read_data2(31 downto 0); + end if; + else + if e_in.is_signed = '1' then + x_to_multiply.data1 <= e_in.read_data1(63) & e_in.read_data1; + x_to_multiply.data2 <= e_in.read_data2(63) & e_in.read_data2; + else + x_to_multiply.data1 <= '0' & e_in.read_data1; + x_to_multiply.data2 <= '0' & e_in.read_data2; + end if; + end if; ctrl_tmp <= ctrl; -- FIXME: run at 512MHz not core freq @@ -506,11 +550,19 @@ begin when OP_ICBI => icache_inval <= '1'; + when OP_MUL_L64 | OP_MUL_H64 | OP_MUL_H32 => + v.e.valid := '0'; + v.mul_in_progress := '1'; + stall_out <= '1'; + x_to_multiply.valid <= '1'; + when others => terminate_out <= '1'; report "illegal"; end case; + v.e.rc := e_in.rc and e_in.valid; + -- Update LR on the next cycle after a branch link -- -- WARNING: The LR update isn't tracked by our hazard tracker. This @@ -536,11 +588,25 @@ begin v.e.write_len := x"8"; v.e.sign_extend := '0'; v.e.valid := '1'; + elsif r.mul_in_progress = '1' then + if multiply_to_x.valid = '1' then + v.e.write_reg := gpr_to_gspr(multiply_to_x.write_reg_nr); + result := multiply_to_x.write_reg_data; + result_en := '1'; + v.e.rc := multiply_to_x.rc; + v.e.xerc := multiply_to_x.xerc; + v.e.write_xerc_enable := multiply_to_x.write_xerc_enable; + v.e.valid := '1'; + v.e.write_len := x"8"; + v.e.sign_extend := '0'; + else + stall_out <= '1'; + v.mul_in_progress := '1'; + end if; end if; v.e.write_data := result; v.e.write_enable := result_en; - v.e.rc := e_in.rc and e_in.valid; -- Update registers rin <= v; diff --git a/multiply.vhdl b/multiply.vhdl index 23339b5..714b844 100644 --- a/multiply.vhdl +++ b/multiply.vhdl @@ -13,13 +13,13 @@ entity multiply is port ( clk : in std_logic; - m_in : in Decode2ToMultiplyType; - m_out : out MultiplyToWritebackType + m_in : in Execute1ToMultiplyType; + m_out : out MultiplyToExecute1Type ); end entity multiply; architecture behaviour of multiply is - signal m: Decode2ToMultiplyType; + signal m: Execute1ToMultiplyType; type multiply_pipeline_stage is record valid : std_ulogic; @@ -64,7 +64,7 @@ begin begin v := r; - m_out <= MultiplyToWritebackInit; + m_out <= MultiplyToExecute1Init; v.multiply_pipeline(0).valid := m.valid; v.multiply_pipeline(0).insn_type := m.insn_type; @@ -107,7 +107,6 @@ begin -- Generate OV/OV32/SO when OE=1 if v.multiply_pipeline(PIPELINE_DEPTH-1).valid = '1' then m_out.valid <= '1'; - m_out.write_reg_enable <= '1'; m_out.rc <= v.multiply_pipeline(PIPELINE_DEPTH-1).rc; m_out.write_xerc_enable <= v.multiply_pipeline(PIPELINE_DEPTH-1).oe; diff --git a/multiply_tb.vhdl b/multiply_tb.vhdl index 48f83ab..a76d739 100644 --- a/multiply_tb.vhdl +++ b/multiply_tb.vhdl @@ -17,8 +17,8 @@ architecture behave of multiply_tb is constant pipeline_depth : integer := 4; - signal m1 : Decode2ToMultiplyType; - signal m2 : MultiplyToWritebackType; + signal m1 : Execute1ToMultiplyType; + signal m2 : MultiplyToExecute1Type; begin multiply_0: entity work.multiply generic map (PIPELINE_DEPTH => pipeline_depth) @@ -58,7 +58,6 @@ begin wait for clk_period; assert m2.valid = '1'; - assert m2.write_reg_enable = '1'; assert m2.write_reg_nr = "10001"; assert m2.write_reg_data = x"0000000001111000"; assert m2.rc = '0'; @@ -76,7 +75,6 @@ begin wait for clk_period * (pipeline_depth-1); assert m2.valid = '1'; - assert m2.write_reg_enable = '1'; assert m2.write_reg_nr = "10001"; assert m2.write_reg_data = x"0000000001111000"; assert m2.rc = '1'; diff --git a/writeback.vhdl b/writeback.vhdl index 8582166..1323f71 100644 --- a/writeback.vhdl +++ b/writeback.vhdl @@ -12,7 +12,6 @@ entity writeback is e_in : in Execute1ToWritebackType; l_in : in DcacheToWritebackType; - m_in : in MultiplyToWritebackType; d_in : in DividerToWritebackType; w_out : out WritebackToRegisterFileType; @@ -67,32 +66,28 @@ begin begin x := "" & e_in.valid; y := "" & l_in.valid; - z := "" & m_in.valid; - w := "" & d_in.valid; - assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z)) + to_integer(unsigned(w))) <= 1 severity failure; + z := "" & d_in.valid; + assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z))) <= 1 severity failure; x := "" & e_in.write_enable; y := "" & l_in.write_enable; - z := "" & m_in.write_reg_enable; - w := "" & d_in.write_reg_enable; - assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z)) + to_integer(unsigned(w))) <= 1 severity failure; + z := "" & d_in.write_reg_enable; + assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z))) <= 1 severity failure; w := "" & e_in.write_cr_enable; x := "" & (e_in.write_enable and e_in.rc); - y := "" & (m_in.valid and m_in.rc); z := "" & (d_in.valid and d_in.rc); - assert (to_integer(unsigned(w)) + to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z))) <= 1 severity failure; + assert (to_integer(unsigned(w)) + to_integer(unsigned(x)) + to_integer(unsigned(z))) <= 1 severity failure; x := "" & e_in.write_xerc_enable; - y := "" & m_in.write_xerc_enable; z := "" & D_in.write_xerc_enable; - assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z))) <= 1 severity failure; + assert (to_integer(unsigned(x)) + to_integer(unsigned(z))) <= 1 severity failure; w_out <= WritebackToRegisterFileInit; c_out <= WritebackToCrFileInit; complete_out <= '0'; - if e_in.valid = '1' or l_in.valid = '1' or m_in.valid = '1' or d_in.valid = '1' then + if e_in.valid = '1' or l_in.valid = '1' or d_in.valid = '1' then complete_out <= '1'; end if; @@ -143,19 +138,6 @@ begin xe := l_in.xerc; end if; - if m_in.write_reg_enable = '1' then - w_out.write_enable <= '1'; - w_out.write_reg <= gpr_to_gspr(m_in.write_reg_nr); - data_in <= m_in.write_reg_data; - rc <= m_in.rc; - xe := m_in.xerc; - end if; - - if m_in.write_xerc_enable = '1' then - c_out.write_xerc_enable <= '1'; - c_out.write_xerc_data <= m_in.xerc; - end if; - if d_in.write_reg_enable = '1' then w_out.write_enable <= '1'; w_out.write_reg <= gpr_to_gspr(d_in.write_reg_nr);