From: Paul Mackerras Date: Mon, 9 May 2022 09:18:42 +0000 (+1000) Subject: Use FPU for division instructions if we have an FPU X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=73cc5167ec1ea591d9da43f2e392b5202f045f32;p=microwatt.git Use FPU for division instructions if we have an FPU - Arrange for XER to be written for OE=1 forms - Arrange for condition codes to be set for RC=1 forms (including correct handling for 32-bit mode) - Don't instantiate the divider if we have an FPU. Signed-off-by: Paul Mackerras --- diff --git a/common.vhdl b/common.vhdl index aa7b830..f846fb4 100644 --- a/common.vhdl +++ b/common.vhdl @@ -640,7 +640,10 @@ package common is frc : std_ulogic_vector(63 downto 0); frt : gspr_index_t; rc : std_ulogic; + m32b : std_ulogic; out_cr : std_ulogic; + oe : std_ulogic; + xerc : xer_common_t; stall : std_ulogic; end record; constant Execute1ToFPUInit : Execute1ToFPUType := (valid => '0', op => OP_ILLEGAL, nia => (others => '0'), @@ -649,6 +652,7 @@ package common is fra => (others => '0'), frb => (others => '0'), frc => (others => '0'), frt => (others => '0'), single => '0', is_signed => '0', out_cr => '0', + m32b => '0', oe => '0', xerc => xerc_init, stall => '0'); type FPUToExecute1Type is record @@ -668,6 +672,8 @@ package common is write_cr_enable : std_ulogic; write_cr_mask : std_ulogic_vector(7 downto 0); write_cr_data : std_ulogic_vector(31 downto 0); + write_xerc : std_ulogic; + xerc : xer_common_t; intr_vec : intr_vector_t; srr0 : std_ulogic_vector(63 downto 0); srr1 : std_ulogic_vector(15 downto 0); @@ -677,6 +683,7 @@ package common is write_enable => '0', write_reg => (others => '0'), write_cr_enable => '0', write_cr_mask => (others => '0'), write_cr_data => (others => '0'), + write_xerc => '0', xerc => xerc_init, intr_vec => 0, srr1 => (others => '0'), others => (others => '0')); diff --git a/decode1.vhdl b/decode1.vhdl index 5bc023b..2e2a8e3 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -35,6 +35,18 @@ architecture behaviour of decode1 is constant illegal_inst : decode_rom_t := (NONE, NONE, OP_ILLEGAL, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE); + -- If we have an FPU, then it is used for integer divisions, + -- otherwise a dedicated divider in the ALU is used. + function divider_unit(hf : boolean) return unit_t is + begin + if hf then + return FPU; + else + return ALU; + end if; + end; + constant DVU : unit_t := divider_unit(HAS_FPU); + type reg_internal_t is record override : std_ulogic; override_decode: decode_rom_t; @@ -225,22 +237,22 @@ architecture behaviour of decode1 is 2#0100010110# => (ALU, NONE, OP_DCBT, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- dcbt 2#0011110110# => (ALU, NONE, OP_DCBTST, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- dcbtst 2#1111110110# => (LDST, NONE, OP_DCBZ, RA_OR_ZERO, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- dcbz - 2#0110001001# => (ALU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), -- divdeu - 2#1110001001# => (ALU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), -- divdeuo - 2#0110001011# => (ALU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE), -- divweu - 2#1110001011# => (ALU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE), -- divweuo - 2#0110101001# => (ALU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0', NONE), -- divde - 2#1110101001# => (ALU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0', NONE), -- divdeo - 2#0110101011# => (ALU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0', NONE), -- divwe - 2#1110101011# => (ALU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0', NONE), -- divweo - 2#0111001001# => (ALU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), -- divdu - 2#1111001001# => (ALU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), -- divduo - 2#0111001011# => (ALU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE), -- divwu - 2#1111001011# => (ALU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE), -- divwuo - 2#0111101001# => (ALU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0', NONE), -- divd - 2#1111101001# => (ALU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0', NONE), -- divdo - 2#0111101011# => (ALU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0', NONE), -- divw - 2#1111101011# => (ALU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0', NONE), -- divwo + 2#0110001001# => (DVU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), -- divdeu + 2#1110001001# => (DVU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), -- divdeuo + 2#0110001011# => (DVU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE), -- divweu + 2#1110001011# => (DVU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE), -- divweuo + 2#0110101001# => (DVU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0', NONE), -- divde + 2#1110101001# => (DVU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0', NONE), -- divdeo + 2#0110101011# => (DVU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0', NONE), -- divwe + 2#1110101011# => (DVU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0', NONE), -- divweo + 2#0111001001# => (DVU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), -- divdu + 2#1111001001# => (DVU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), -- divduo + 2#0111001011# => (DVU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE), -- divwu + 2#1111001011# => (DVU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE), -- divwuo + 2#0111101001# => (DVU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0', NONE), -- divd + 2#1111101001# => (DVU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0', NONE), -- divdo + 2#0111101011# => (DVU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0', NONE), -- divw + 2#1111101011# => (DVU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0', NONE), -- divwo 2#1100110110# => (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- dss 2#0101010110# => (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- dst 2#0101110110# => (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- dstst @@ -318,10 +330,10 @@ architecture behaviour of decode1 is 2#0000010011# => (ALU, NONE, OP_MFCR, NONE, NONE, NONE, RT, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- mfcr/mfocrf 2#0001010011# => (ALU, NONE, OP_MFMSR, NONE, NONE, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE), -- mfmsr 2#0101010011# => (ALU, NONE, OP_MFSPR, SPR, NONE, RS, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- mfspr - 2#0100001001# => (ALU, NONE, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- modud - 2#0100001011# => (ALU, NONE, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE), -- moduw - 2#1100001001# => (ALU, NONE, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE), -- modsd - 2#1100001011# => (ALU, NONE, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', NONE, '0', '0', NONE), -- modsw + 2#0100001001# => (DVU, NONE, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- modud + 2#0100001011# => (DVU, NONE, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE), -- moduw + 2#1100001001# => (DVU, NONE, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE), -- modsd + 2#1100001011# => (DVU, NONE, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', NONE, '0', '0', NONE), -- modsw 2#0010010000# => (ALU, NONE, OP_MTCRF, NONE, NONE, RS, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- mtcrf/mtocrf 2#0010010010# => (ALU, NONE, OP_MTMSRD, NONE, NONE, RS, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE), -- mtmsr 2#0010110010# => (ALU, NONE, OP_MTMSRD, NONE, NONE, RS, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- mtmsrd # ignore top bits and d diff --git a/execute1.vhdl b/execute1.vhdl index 2121963..2efe439 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -188,7 +188,7 @@ architecture behaviour of execute1 is -- divider signals signal x_to_divider: Execute1ToDividerType; - signal divider_to_x: DividerToExecute1Type; + signal divider_to_x: DividerToExecute1Type := DividerToExecute1Init; -- random number generator signals signal random_raw : std_ulogic_vector(63 downto 0); @@ -367,13 +367,15 @@ begin m_out => multiply_to_x ); - divider_0: entity work.divider - port map ( - clk => clk, - rst => rst, - d_in => x_to_divider, - d_out => divider_to_x - ); + divider_0: if not HAS_FPU generate + div_0: entity work.divider + port map ( + clk => clk, + rst => rst, + d_in => x_to_divider, + d_out => divider_to_x + ); + end generate; random_0: entity work.random port map ( @@ -1159,9 +1161,11 @@ begin owait := '1'; when OP_DIV | OP_DIVE | OP_MOD => - v.start_div := '1'; - slow_op := '1'; - owait := '1'; + if not HAS_FPU then + v.start_div := '1'; + slow_op := '1'; + owait := '1'; + end if; when OP_FETCH_FAILED => -- Handling an ITLB miss doesn't count as having executed an instruction @@ -1457,6 +1461,9 @@ begin fv.frt := e_in.write_reg; fv.rc := e_in.rc; fv.out_cr := e_in.output_cr; + fv.m32b := not ex1.msr(MSR_SF); + fv.oe := e_in.oe; + fv.xerc := xerc_in; fv.stall := l_in.l2stall; -- Update registers diff --git a/fpu.vhdl b/fpu.vhdl index b8cea39..90e04b3 100644 --- a/fpu.vhdl +++ b/fpu.vhdl @@ -125,6 +125,7 @@ architecture behaviour of fpu is write_reg : gspr_index_t; complete_tag : instr_tag_t; writing_cr : std_ulogic; + writing_xer : std_ulogic; int_result : std_ulogic; cr_result : std_ulogic_vector(3 downto 0); cr_mask : std_ulogic_vector(7 downto 0); @@ -151,6 +152,7 @@ architecture behaviour of fpu is invalid : std_ulogic; negate : std_ulogic; longmask : std_ulogic; + integer_op : std_ulogic; divext : std_ulogic; divmod : std_ulogic; is_signed : std_ulogic; @@ -159,6 +161,10 @@ architecture behaviour of fpu is inc_quot : std_ulogic; a_hi : std_ulogic_vector(7 downto 0); a_lo : std_ulogic_vector(55 downto 0); + m32b : std_ulogic; + oe : std_ulogic; + xerc : xer_common_t; + xerc_result : xer_common_t; end record; type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0); @@ -604,6 +610,7 @@ begin r.do_intr <= '0'; r.writing_fpr <= '0'; r.writing_cr <= '0'; + r.writing_xer <= '0'; r.fpscr <= (others => '0'); r.write_reg <= (others =>'0'); r.complete_tag.valid <= '0'; @@ -658,6 +665,8 @@ begin w_out.write_cr_mask <= r.cr_mask; w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result & r.cr_result & r.cr_result & r.cr_result & r.cr_result; + w_out.write_xerc <= r.writing_xer and r.complete; + w_out.xerc <= r.xerc_result; w_out.interrupt <= r.do_intr; w_out.intr_vec <= 16#700#; w_out.srr0 <= r.nia; @@ -739,6 +748,7 @@ begin v.instr_done := '0'; v.writing_fpr := '0'; v.writing_cr := '0'; + v.writing_xer := '0'; v.comm_fpscr := r.fpscr; v.illegal := '0'; end if; @@ -755,7 +765,11 @@ begin v.is_signed := e_in.is_signed; v.rc := e_in.rc; v.is_cmp := e_in.out_cr; + v.oe := e_in.oe; + v.m32b := e_in.m32b; + v.xerc := e_in.xerc; v.longmask := '0'; + v.integer_op := '0'; v.divext := '0'; v.divmod := '0'; if e_in.op = OP_FPOP or e_in.op = OP_FPOP_I then @@ -764,6 +778,7 @@ begin int_input := '1'; end if; else -- OP_DIV, OP_DIVE, OP_MOD + v.integer_op := '1'; int_input := '1'; is_32bint := e_in.single; if e_in.op = OP_DIVE then @@ -2865,12 +2880,44 @@ begin v.state := IDIV_DONE; end if; when IDIV_DONE => + v.xerc_result := v.xerc; + if r.oe = '1' then + v.xerc_result.ov := '0'; + v.xerc_result.ov32 := '0'; + v.writing_xer := '1'; + end if; + if r.m32b = '0' then + v.cr_result(3) := r.r(63); + v.cr_result(2 downto 1) := "00"; + if r.r = 64x"0" then + v.cr_result(1) := '1'; + else + v.cr_result(2) := not r.r(63); + end if; + else + v.cr_result(3) := r.r(31); + v.cr_result(2 downto 1) := "00"; + if r.r(31 downto 0) = 32x"0" then + v.cr_result(1) := '1'; + else + v.cr_result(2) := not r.r(31); + end if; + end if; + v.cr_result(0) := v.xerc.so; int_result := '1'; v.writing_fpr := '1'; v.instr_done := '1'; when IDIV_ZERO => opsel_r <= RES_MISC; misc_sel <= "0101"; + v.xerc_result := v.xerc; + if r.oe = '1' then + v.xerc_result.ov := r.int_ovf; + v.xerc_result.ov32 := r.int_ovf; + v.xerc_result.so := r.xerc.so or r.int_ovf; + v.writing_xer := '1'; + end if; + v.cr_result := "001" & v.xerc_result.so; int_result := '1'; v.writing_fpr := '1'; v.instr_done := '1'; @@ -3169,14 +3216,16 @@ begin v.state := IDLE; v.busy := '0'; v.f2stall := '0'; - if r.rc = '1' then + if r.rc = '1' and (r.op = OP_FPOP or r.op = OP_FPOP_I) then v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX); end if; v.sp_result := r.single_prec; v.int_result := int_result; v.illegal := illegal; v.nsnan_result := v.quieten_nan; - if r.is_cmp = '0' then + if r.integer_op = '1' then + v.cr_mask := num_to_fxm(0); + elsif r.is_cmp = '0' then v.cr_mask := num_to_fxm(1); else v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(r.insn)))); diff --git a/tests/fpu/fpu.c b/tests/fpu/fpu.c index 500e92d..773c05d 100644 --- a/tests/fpu/fpu.c +++ b/tests/fpu/fpu.c @@ -1410,6 +1410,110 @@ int fpu_test_23(void) return trapit(0, test23); } +struct idiv_tests { + unsigned long denom; + unsigned long divisor; + unsigned long divd; + unsigned long divdu; + unsigned long divde; + unsigned long divdeu; + unsigned long modsd; + unsigned long modud; +} idiv_tests[] = { + { 0, 0, 0, 0, 0, 0, 0, 0 }, + { 0x56789a, 0x1234, 0x4c0, 0x4c0, 0, 0, 0x19a, 0x19a }, + { 2, 3, 0, 0, 0, 0xaaaaaaaaaaaaaaaa, 2, 2 }, + { 31, 157, 0, 0, 0x328c3ab35cf15328, 0x328c3ab35cf15328, 31, 31 }, + { -4329874, 43879, -98, 0x17e5a119b9170, 0, 0, -29732, 39518 }, + { -4329874, -43879, 98, 0, 0, 0xffffffffffbe99d4, -29732, -4329874 }, + { 0x8000000000000000ul, -1, 0, 0, 0, 0x8000000000000000ul, 0, 0x8000000000000000ul }, +}; + +int fpu_test_24(void) +{ + long i; + unsigned long a, b, results[6]; + + for (i = 0; i < sizeof(idiv_tests) / sizeof(idiv_tests[0]); ++i) { + a = idiv_tests[i].denom; + b = idiv_tests[i].divisor; + asm("divd %0,%1,%2" : "=r" (results[0]) : "r" (a), "r" (b)); + asm("divdu %0,%1,%2" : "=r" (results[1]) : "r" (a), "r" (b)); + asm("divde %0,%1,%2" : "=r" (results[2]) : "r" (a), "r" (b)); + asm("divdeu %0,%1,%2" : "=r" (results[3]) : "r" (a), "r" (b)); + asm("modsd %0,%1,%2" : "=r" (results[4]) : "r" (a), "r" (b)); + asm("modud %0,%1,%2" : "=r" (results[5]) : "r" (a), "r" (b)); + if (results[0] != idiv_tests[i].divd || + results[1] != idiv_tests[i].divdu || + results[2] != idiv_tests[i].divde || + results[3] != idiv_tests[i].divdeu || + results[4] != idiv_tests[i].modsd || + results[5] != idiv_tests[i].modud) { + print_hex(i, 2, " "); + print_hex(results[0], 16, " "); + print_hex(results[1], 16, " "); + print_hex(results[2], 16, " "); + print_hex(results[3], 16, " "); + print_hex(results[4], 16, " "); + print_hex(results[5], 16, "\r\n"); + return i + 1; + } + } + return 0; +} + +struct wdiv_tests { + unsigned int denom; + unsigned int divisor; + unsigned int divw; + unsigned int divwu; + unsigned int divwe; + unsigned int divweu; + unsigned int modsw; + unsigned int moduw; +} wdiv_tests[] = { + { 0, 0, 0, 0, 0, 0, 0, 0 }, + { 0x56789a, 0x1234, 0x4c0, 0x4c0, 0, 0, 0x19a, 0x19a }, + { 2, 3, 0, 0, 0, 0xaaaaaaaa, 2, 2 }, + { 31, 157, 0, 0, 0x328c3ab3, 0x328c3ab3, 31, 31 }, + { -4329874, 43879, -98, 0x17df7, 0, 0, -29732, 17165 }, + { -4329874, -43879, 98, 0, 0, 0xffbe99a9, -29732, -4329874 }, + { 0x80000000u, -1, 0, 0, 0, 0x80000000u, 0, 0x80000000u }, +}; + +int fpu_test_25(void) +{ + long i; + unsigned int a, b, results[6]; + + for (i = 0; i < sizeof(wdiv_tests) / sizeof(wdiv_tests[0]); ++i) { + a = wdiv_tests[i].denom; + b = wdiv_tests[i].divisor; + asm("divw %0,%1,%2" : "=r" (results[0]) : "r" (a), "r" (b)); + asm("divwu %0,%1,%2" : "=r" (results[1]) : "r" (a), "r" (b)); + asm("divwe %0,%1,%2" : "=r" (results[2]) : "r" (a), "r" (b)); + asm("divweu %0,%1,%2" : "=r" (results[3]) : "r" (a), "r" (b)); + asm("modsw %0,%1,%2" : "=r" (results[4]) : "r" (a), "r" (b)); + asm("moduw %0,%1,%2" : "=r" (results[5]) : "r" (a), "r" (b)); + if (results[0] != wdiv_tests[i].divw || + results[1] != wdiv_tests[i].divwu || + results[2] != wdiv_tests[i].divwe || + results[3] != wdiv_tests[i].divweu || + results[4] != wdiv_tests[i].modsw || + results[5] != wdiv_tests[i].moduw) { + print_hex(i, 2, " "); + print_hex(results[0], 8, " "); + print_hex(results[1], 8, " "); + print_hex(results[2], 8, " "); + print_hex(results[3], 8, " "); + print_hex(results[4], 8, " "); + print_hex(results[5], 8, "\r\n"); + return i + 1; + } + } + return 0; +} + int fail = 0; void do_test(int num, int (*test)(void)) @@ -1458,6 +1562,8 @@ int main(void) do_test(21, fpu_test_21); do_test(22, fpu_test_22); do_test(23, fpu_test_23); + do_test(24, fpu_test_24); + do_test(25, fpu_test_25); return fail; } diff --git a/writeback.vhdl b/writeback.vhdl index 0d6f41d..5b384c6 100644 --- a/writeback.vhdl +++ b/writeback.vhdl @@ -73,6 +73,8 @@ begin assert (to_integer(unsigned(w)) + to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure; + assert (e_in.write_xerc_enable and fp_in.write_xerc) /= '1' severity failure; + assert not (e_in.valid = '1' and e_in.instr_tag.valid = '0') severity failure; assert not (l_in.valid = '1' and l_in.instr_tag.valid = '0') severity failure; assert not (fp_in.valid = '1' and fp_in.instr_tag.valid = '0') severity failure; @@ -168,6 +170,11 @@ begin c_out.write_cr_data <= fp_in.write_cr_data; end if; + if fp_in.write_xerc = '1' then + c_out.write_xerc_enable <= '1'; + c_out.write_xerc_data <= fp_in.xerc; + end if; + if l_in.write_enable = '1' then w_out.write_reg <= l_in.write_reg; w_out.write_data <= l_in.write_data;