From: Paul Mackerras Date: Sun, 1 May 2022 23:39:26 +0000 (+1000) Subject: FPU: Convert internal R, A, B, and C registers to 8.56 format X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=23d5c4edc50bf64a7e675220c338671059ede0bf;p=microwatt.git FPU: Convert internal R, A, B, and C registers to 8.56 format This changes the representation of the R, A, B and C registers in the FPU from 10.54 format (10 bits to the left of the binary point and 54 bits to the right) to 8.56 format, to match the representation used in the P and Y registers and the multiplier operands. This eliminates the need for shifting when R, A, B or C is an input to the multiplier and will make it easier to implement integer division in the FPU. Signed-off-by: Paul Mackerras --- diff --git a/fpu.vhdl b/fpu.vhdl index a20a7a0..27587f7 100644 --- a/fpu.vhdl +++ b/fpu.vhdl @@ -28,12 +28,20 @@ architecture behaviour of fpu is type fp_number_class is (ZERO, FINITE, INFINITY, NAN); constant EXP_BITS : natural := 13; + constant UNIT_BIT : natural := 56; + constant QNAN_BIT : natural := UNIT_BIT - 1; + constant SP_LSB : natural := UNIT_BIT - 23; + constant SP_GBIT : natural := SP_LSB - 1; + constant SP_RBIT : natural := SP_LSB - 2; + constant DP_LSB : natural := UNIT_BIT - 52; + constant DP_GBIT : natural := DP_LSB - 1; + constant DP_RBIT : natural := DP_LSB - 2; type fpu_reg_type is record class : fp_number_class; negative : std_ulogic; exponent : signed(EXP_BITS-1 downto 0); -- unbiased - mantissa : std_ulogic_vector(63 downto 0); -- 10.54 format + mantissa : std_ulogic_vector(63 downto 0); -- 8.56 format end record; type state_t is (IDLE, DO_ILLEGAL, @@ -92,7 +100,7 @@ architecture behaviour of fpu is a : fpu_reg_type; b : fpu_reg_type; c : fpu_reg_type; - r : std_ulogic_vector(63 downto 0); -- 10.54 format + r : std_ulogic_vector(63 downto 0); -- 8.56 format s : std_ulogic_vector(55 downto 0); -- extended fraction x : std_ulogic; p : std_ulogic_vector(63 downto 0); -- 8.56 format @@ -170,7 +178,7 @@ architecture behaviour of fpu is constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00"; constant BIN_R : std_ulogic_vector(1 downto 0) := "01"; constant BIN_RND : std_ulogic_vector(1 downto 0) := "10"; - constant BIN_PS6 : std_ulogic_vector(1 downto 0) := "11"; + constant BIN_PS8 : std_ulogic_vector(1 downto 0) := "11"; constant RES_SUM : std_ulogic_vector(1 downto 0) := "00"; constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01"; @@ -432,7 +440,8 @@ architecture behaviour of fpu is if exp_nz = '0' then r.exponent := to_signed(-1022, EXP_BITS); end if; - r.mantissa := "000000000" & exp_nz & fpr(51 downto 0) & "00"; + r.mantissa := std_ulogic_vector(shift_left(resize(unsigned(exp_nz & fpr(51 downto 0)), 64), + UNIT_BIT - 52)); cls := exp_ao & exp_nz & frac_nz; case cls is when "000" => r.class := ZERO; @@ -465,22 +474,22 @@ architecture behaviour of fpu is case class is when ZERO => when FINITE => - if mantissa(54) = '1' then + if mantissa(UNIT_BIT) = '1' then -- normalized number result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023); end if; - result(51 downto 29) := mantissa(53 downto 31); + result(51 downto 29) := mantissa(UNIT_BIT - 1 downto SP_LSB); if single_prec = '0' then - result(28 downto 0) := mantissa(30 downto 2); + result(28 downto 0) := mantissa(SP_LSB - 1 downto DP_LSB); end if; when INFINITY => result(62 downto 52) := "11111111111"; when NAN => result(62 downto 52) := "11111111111"; - result(51) := quieten_nan or mantissa(53); - result(50 downto 29) := mantissa(52 downto 31); + result(51) := quieten_nan or mantissa(QNAN_BIT); + result(50 downto 29) := mantissa(QNAN_BIT - 1 downto SP_LSB); if single_prec = '0' then - result(28 downto 0) := mantissa(30 downto 2); + result(28 downto 0) := mantissa(SP_LSB - 1 downto DP_LSB); end if; end case; return result; @@ -488,8 +497,8 @@ architecture behaviour of fpu is -- Determine whether to increment when rounding -- Returns rounding_inc & inexact - -- Assumes x includes the bottom 29 bits of the mantissa already - -- if single_prec = 1 (usually arranged by setting set_x = 1 earlier). + -- If single_prec = 1, assumes x includes the bottom 31 (== SP_LSB - 2) + -- bits of the mantissa already (usually arranged by setting set_x = 1 earlier). function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic; single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0); sign: std_ulogic) @@ -499,11 +508,11 @@ architecture behaviour of fpu is variable lsb : std_ulogic; begin if single_prec = '0' then - grx := mantissa(1 downto 0) & x; - lsb := mantissa(2); + grx := mantissa(DP_GBIT downto DP_RBIT) & (x or (or mantissa(DP_RBIT - 1 downto 0))); + lsb := mantissa(DP_LSB); else - grx := mantissa(30 downto 29) & x; - lsb := mantissa(31); + grx := mantissa(SP_GBIT downto SP_RBIT) & x; + lsb := mantissa(SP_LSB); end if; ret(1) := '0'; ret(0) := or (grx); @@ -589,11 +598,11 @@ begin begin if rising_edge(clk) then if r.is_sqrt = '1' then - addrhi := r.b.mantissa(55 downto 54); + addrhi := r.b.mantissa(UNIT_BIT + 1 downto UNIT_BIT); else addrhi := "00"; end if; - addr := addrhi & r.b.mantissa(53 downto 46); + addr := addrhi & r.b.mantissa(UNIT_BIT - 1 downto UNIT_BIT - 8); inverse_est <= '1' & inverse_table(to_integer(unsigned(addr))); end if; end process; @@ -670,6 +679,8 @@ begin variable maddend : std_ulogic_vector(127 downto 0); variable sum : std_ulogic_vector(63 downto 0); variable round_inc : std_ulogic_vector(63 downto 0); + variable rbit_inc : std_ulogic; + variable mult_mask : std_ulogic; variable int_result : std_ulogic; variable illegal : std_ulogic; begin @@ -729,8 +740,8 @@ begin end if; end if; - r_hi_nz <= or (r.r(55 downto 31)); - r_lo_nz <= or (r.r(30 downto 2)); + r_hi_nz <= or (r.r(UNIT_BIT + 1 downto SP_LSB)); + r_lo_nz <= or (r.r(SP_LSB - 1 downto DP_LSB)); s_nz <= or (r.s); if r.single_prec = '0' then @@ -761,13 +772,13 @@ begin end if; -- Compare P with zero and with B - px_nz := or (r.p(57 downto 4)); + px_nz := or (r.p(UNIT_BIT + 1 downto 4)); pcmpb_eq := '0'; - if r.p(59 downto 4) = r.b.mantissa(55 downto 0) then + if r.p(59 downto 4) = r.b.mantissa(UNIT_BIT + 1 downto DP_RBIT) then pcmpb_eq := '1'; end if; pcmpb_lt := '0'; - if unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(55 downto 0)) then + if unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(UNIT_BIT + 1 downto DP_RBIT)) then pcmpb_lt := '1'; end if; @@ -805,6 +816,8 @@ begin pshift := '0'; renorm_sqrt := '0'; shiftin := '0'; + rbit_inc := '0'; + mult_mask := '0'; int_result := '0'; illegal := '0'; case r.state is @@ -870,7 +883,7 @@ begin v.state := DO_FCTI; when "10010" => v.opsel_a := AIN_A; - if v.b.mantissa(54) = '0' and v.a.mantissa(54) = '1' then + if v.b.mantissa(UNIT_BIT) = '0' and v.a.mantissa(UNIT_BIT) = '1' then v.opsel_a := AIN_B; end if; v.state := DO_FDIV; @@ -889,7 +902,7 @@ begin when "11001" => v.is_multiply := '1'; v.opsel_a := AIN_A; - if v.c.mantissa(54) = '0' and v.a.mantissa(54) = '1' then + if v.c.mantissa(UNIT_BIT) = '0' and v.a.mantissa(UNIT_BIT) = '1' then v.opsel_a := AIN_C; end if; v.state := DO_FMUL; @@ -898,9 +911,9 @@ begin v.opsel_a := AIN_B; v.state := DO_FRSQRTE; when "11100" | "11101" | "11110" | "11111" => - if v.a.mantissa(54) = '0' then + if v.a.mantissa(UNIT_BIT) = '0' then v.opsel_a := AIN_A; - elsif v.c.mantissa(54) = '0' then + elsif v.c.mantissa(UNIT_BIT) = '0' then v.opsel_a := AIN_C; else v.opsel_a := AIN_B; @@ -934,7 +947,7 @@ begin v.instr_done := '1'; v.cr_result := "0000"; if r.a.class = INFINITY or r.b.class = ZERO or r.b.class = INFINITY or - (r.b.class = FINITE and r.b.mantissa(53) = '0') then + (r.b.class = FINITE and r.b.mantissa(UNIT_BIT) = '0') then v.cr_result(2) := '1'; end if; if r.a.class = NAN or r.a.class = INFINITY or @@ -952,7 +965,7 @@ begin v.instr_done := '1'; v.cr_result := "0000"; if r.b.class = ZERO or r.b.class = INFINITY or - (r.b.class = FINITE and r.b.mantissa(53) = '0') then + (r.b.class = FINITE and r.b.mantissa(UNIT_BIT) = '0') then v.cr_result(2) := '1'; end if; if r.b.class = NAN or r.b.class = INFINITY or r.b.class = ZERO @@ -966,8 +979,8 @@ begin v.instr_done := '1'; update_fx := '1'; v.result_exp := r.b.exponent; - if (r.a.class = NAN and r.a.mantissa(53) = '0') or - (r.b.class = NAN and r.b.mantissa(53) = '0') then + if (r.a.class = NAN and r.a.mantissa(QNAN_BIT) = '0') or + (r.b.class = NAN and r.b.mantissa(QNAN_BIT) = '0') then -- Signalling NAN v.fpscr(FPSCR_VXSNAN) := '1'; if r.insn(6) = '1' and r.fpscr(FPSCR_VE) = '0' then @@ -1119,7 +1132,7 @@ begin v.result_exp := r.b.exponent; v.fpscr(FPSCR_FR) := '0'; v.fpscr(FPSCR_FI) := '0'; - if r.b.class = NAN and r.b.mantissa(53) = '0' then + if r.b.class = NAN and r.b.mantissa(QNAN_BIT) = '0' then -- Signalling NAN v.fpscr(FPSCR_VXSNAN) := '1'; invalid := '1'; @@ -1190,7 +1203,7 @@ begin elsif r.b.exponent >= to_signed(52, EXP_BITS) then -- integer already, no rounding required, -- shift into final position - v.shift := r.b.exponent - to_signed(54, EXP_BITS); + v.shift := r.b.exponent - to_signed(UNIT_BIT, EXP_BITS); if r.insn(8) = '1' and r.b.negative = '1' then v.state := INT_OFLOW; else @@ -1214,7 +1227,7 @@ begin v.result_sign := '1'; end if; v.result_class := r.b.class; - v.result_exp := to_signed(54, EXP_BITS); + v.result_exp := to_signed(UNIT_BIT, EXP_BITS); v.fpscr(FPSCR_FR) := '0'; v.fpscr(FPSCR_FI) := '0'; if r.b.class = ZERO then @@ -1286,9 +1299,9 @@ begin if r.a.class = FINITE and r.c.class = FINITE then v.result_exp := r.a.exponent + r.c.exponent; -- Renormalize denorm operands - if r.a.mantissa(54) = '0' then + if r.a.mantissa(UNIT_BIT) = '0' then v.state := RENORM_A; - elsif r.c.mantissa(54) = '0' then + elsif r.c.mantissa(UNIT_BIT) = '0' then v.state := RENORM_C; else f_to_multiply.valid <= '1'; @@ -1325,9 +1338,9 @@ begin v.count := "00"; if r.a.class = FINITE and r.b.class = FINITE then -- Renormalize denorm operands - if r.a.mantissa(54) = '0' then + if r.a.mantissa(UNIT_BIT) = '0' then v.state := RENORM_A; - elsif r.b.mantissa(54) = '0' then + elsif r.b.mantissa(UNIT_BIT) = '0' then v.state := RENORM_B; else v.first := '1'; @@ -1384,7 +1397,7 @@ begin if r.b.negative = '1' then v.fpscr(FPSCR_VXSQRT) := '1'; qnan_result := '1'; - elsif r.b.mantissa(54) = '0' then + elsif r.b.mantissa(UNIT_BIT) = '0' then v.state := RENORM_B; elsif r.b.exponent(0) = '0' then v.state := SQRT_1; @@ -1416,7 +1429,7 @@ begin case r.b.class is when FINITE => v.result_exp := - r.b.exponent; - if r.b.mantissa(54) = '0' then + if r.b.mantissa(UNIT_BIT) = '0' then v.state := RENORM_B; else v.state := FRE_1; @@ -1446,7 +1459,7 @@ begin if r.b.negative = '1' then v.fpscr(FPSCR_VXSQRT) := '1'; qnan_result := '1'; - elsif r.b.mantissa(54) = '0' then + elsif r.b.mantissa(UNIT_BIT) = '0' then v.state := RENORM_B; elsif r.b.exponent(0) = '0' then v.state := RSQRT_1; @@ -1488,9 +1501,9 @@ begin mulexp := r.a.exponent + r.c.exponent; v.result_exp := mulexp; -- Make sure A and C are normalized - if r.a.mantissa(54) = '0' then + if r.a.mantissa(UNIT_BIT) = '0' then v.state := RENORM_A; - elsif r.c.mantissa(54) = '0' then + elsif r.c.mantissa(UNIT_BIT) = '0' then v.state := RENORM_C; elsif r.b.class = ZERO then -- no addend, degenerates to multiply @@ -1559,7 +1572,7 @@ begin set_a := '1'; v.result_exp := new_exp; if r.insn(4) = '1' then - if r.c.mantissa(54) = '1' then + if r.c.mantissa(UNIT_BIT) = '1' then if r.insn(3) = '0' or r.b.class = ZERO then v.first := '1'; v.state := MULT_1; @@ -1575,7 +1588,7 @@ begin v.state := RENORM_C; end if; else - if r.b.mantissa(54) = '1' then + if r.b.mantissa(UNIT_BIT) = '1' then v.first := '1'; v.state := DIV_2; else @@ -1654,7 +1667,7 @@ begin opsel_ainv <= '1'; carry_in <= '1'; v.state := FINISH; - elsif r.r(55) = '1' then + elsif r.r(UNIT_BIT + 1) = '1' then -- sum overflowed, shift right opsel_r <= RES_SHIFT; set_x := '1'; @@ -1663,10 +1676,10 @@ begin else v.state := ROUNDING; end if; - elsif r.r(54) = '1' then + elsif r.r(UNIT_BIT) = '1' then set_x := '1'; v.state := ROUNDING; - elsif (r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then + elsif (r_hi_nz or r_lo_nz or (or (r.r(DP_LSB - 1 downto 0)))) = '0' then -- r.x must be zero at this point v.result_class := ZERO; if r.is_subtract = '1' then @@ -1753,12 +1766,12 @@ begin opsel_s <= S_NEG; set_s := '1'; end if; - v.shift := to_signed(56, EXP_BITS); + v.shift := to_signed(UNIT_BIT, EXP_BITS); v.state := FMADD_6; when FMADD_6 => - -- r.shift = 56 (or 0, but only if r is now nonzero) - if (r.r(56) or r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then + -- r.shift = UNIT_BIT (or 0, but only if r is now nonzero) + if (r.r(UNIT_BIT + 2) or r_hi_nz or r_lo_nz or (or (r.r(DP_LSB - 1 downto 0)))) = '0' then if s_nz = '0' then -- must be a subtraction, and r.x must be zero v.result_class := ZERO; @@ -1771,7 +1784,7 @@ begin set_s := '1'; -- stay in state FMADD_6 end if; - elsif r.r(56 downto 54) = "001" then + elsif r.r(UNIT_BIT + 2 downto UNIT_BIT) = "001" then v.state := FINISH; else renormalize := '1'; @@ -1835,6 +1848,7 @@ begin set_y := r.first; f_to_multiply.valid <= r.first; pshift := '1'; + mult_mask := '1'; if multiply_to_f.valid = '1' then opsel_r <= RES_MULT; v.first := '1'; @@ -1853,13 +1867,15 @@ begin end if; when DIV_6 => + -- r.opsel_a = AIN_R -- test if remainder is 0 or >= B if pcmpb_lt = '1' then -- quotient is correct, set X if remainder non-zero - v.x := r.p(58) or px_nz; + v.x := r.p(UNIT_BIT + 2) or px_nz; else - -- quotient needs to be incremented by 1 - carry_in <= '1'; + -- quotient needs to be incremented by 1 in R-bit position + rbit_inc := '1'; + opsel_b <= BIN_RND; v.x := not pcmpb_eq; end if; v.state := FINISH; @@ -1913,6 +1929,7 @@ begin msel_2 <= MUL2_R; set_y := r.first; pshift := '1'; + mult_mask := '1'; if multiply_to_f.valid = '1' then -- put result into R opsel_r <= RES_MULT; @@ -1957,6 +1974,7 @@ begin set_y := r.first; -- wait for second multiply (should be here already) pshift := '1'; + mult_mask := '1'; if multiply_to_f.valid = '1' then -- put result into R opsel_r <= RES_MULT; @@ -2001,11 +2019,8 @@ begin end if; when SQRT_10 => - -- Add the bottom 8 bits of P, sign-extended, - -- divided by 4, onto R. - -- The division by 4 is because R is 10.54 format - -- whereas P is 8.56 format. - opsel_b <= BIN_PS6; + -- Add the bottom 8 bits of P, sign-extended, onto R. + opsel_b <= BIN_PS8; sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1); v.result_exp := sqrt_exp; v.shift := to_signed(1, EXP_BITS); @@ -2030,7 +2045,7 @@ begin -- test if remainder is 0 or >= B = 2*R + 1 if pcmpb_lt = '1' then -- square root is correct, set X if remainder non-zero - v.x := r.p(58) or px_nz; + v.x := r.p(UNIT_BIT + 2) or px_nz; else -- square root needs to be incremented by 1 carry_in <= '1'; @@ -2043,10 +2058,10 @@ begin opsel_r <= RES_SHIFT; set_x := '1'; v.state := INT_ROUND; - v.shift := to_signed(-2, EXP_BITS); + v.shift := to_signed(52 - UNIT_BIT, EXP_BITS); when INT_ROUND => - -- r.shift = -2 + -- r.shift = -4 (== 52 - UNIT_BIT) opsel_r <= RES_SHIFT; round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign); v.fpscr(FPSCR_FR downto FPSCR_FI) := round; @@ -2059,7 +2074,7 @@ begin end if; when INT_ISHIFT => - -- r.shift = b.exponent - 54; + -- r.shift = b.exponent - UNIT_BIT; opsel_r <= RES_SHIFT; v.state := INT_FINAL; @@ -2129,7 +2144,7 @@ begin if r.is_multiply = '1' and px_nz = '1' then v.x := '1'; end if; - if r.r(63 downto 54) /= "0000000001" then + if r.r(63 downto UNIT_BIT) /= std_ulogic_vector(to_unsigned(1, 64 - UNIT_BIT)) then renormalize := '1'; v.state := NORMALIZE; else @@ -2172,7 +2187,7 @@ begin -- if denormalized, have to normalize before rounding v.fpscr(FPSCR_UX) := '1'; v.result_exp := r.result_exp + bias_exp; - if r.r(54) = '0' then + if r.r(UNIT_BIT) = '0' then renormalize := '1'; v.state := NORMALIZE; else @@ -2215,7 +2230,7 @@ begin v.shift := to_signed(-1, EXP_BITS); v.state := ROUNDING_2; else - if r.r(54) = '0' then + if r.r(UNIT_BIT) = '0' then -- result after masking could be zero, or could be a -- denormalized result that needs to be renormalized renormalize := '1'; @@ -2235,14 +2250,14 @@ begin -- Check for overflow during rounding -- r.shift = -1 v.x := '0'; - if r.r(55) = '1' then + if r.r(UNIT_BIT + 1) = '1' then opsel_r <= RES_SHIFT; if exp_huge = '1' then v.state := ROUND_OFLOW; else arith_done := '1'; end if; - elsif r.r(54) = '0' then + elsif r.r(UNIT_BIT) = '0' then -- Do CLZ so we can renormalize the result renormalize := '1'; v.state := ROUNDING_3; @@ -2278,9 +2293,9 @@ begin arith_done := '1'; when NAN_RESULT => - if (r.use_a = '1' and r.a.class = NAN and r.a.mantissa(53) = '0') or - (r.use_b = '1' and r.b.class = NAN and r.b.mantissa(53) = '0') or - (r.use_c = '1' and r.c.class = NAN and r.c.mantissa(53) = '0') then + if (r.use_a = '1' and r.a.class = NAN and r.a.mantissa(QNAN_BIT) = '0') or + (r.use_b = '1' and r.b.class = NAN and r.b.mantissa(QNAN_BIT) = '0') or + (r.use_c = '1' and r.c.class = NAN and r.c.mantissa(QNAN_BIT) = '0') then -- Signalling NAN v.fpscr(FPSCR_VXSNAN) := '1'; invalid := '1'; @@ -2343,39 +2358,41 @@ begin -- Multiplier and divide/square root data path case msel_1 is when MUL1_A => - f_to_multiply.data1 <= r.a.mantissa(61 downto 0) & "00"; + f_to_multiply.data1 <= r.a.mantissa; when MUL1_B => - f_to_multiply.data1 <= r.b.mantissa(61 downto 0) & "00"; + f_to_multiply.data1 <= r.b.mantissa; when MUL1_Y => f_to_multiply.data1 <= r.y; when others => - f_to_multiply.data1 <= r.r(61 downto 0) & "00"; + f_to_multiply.data1 <= r.r; end case; case msel_2 is when MUL2_C => - f_to_multiply.data2 <= r.c.mantissa(61 downto 0) & "00"; + f_to_multiply.data2 <= r.c.mantissa; when MUL2_LUT => - f_to_multiply.data2 <= x"00" & inverse_est & '0' & x"000000000"; + f_to_multiply.data2 <= std_ulogic_vector(shift_left(resize(unsigned(inverse_est), 64), + UNIT_BIT - 19)); when MUL2_P => f_to_multiply.data2 <= r.p; when others => - f_to_multiply.data2 <= r.r(61 downto 0) & "00"; + f_to_multiply.data2 <= r.r; end case; maddend := (others => '0'); case msel_add is when MULADD_CONST => -- addend is 2.0 or 1.5 in 16.112 format if r.is_sqrt = '0' then - maddend(113) := '1'; -- 2.0 + maddend(2*UNIT_BIT + 1) := '1'; -- 2.0 else - maddend(112 downto 111) := "11"; -- 1.5 + maddend(2*UNIT_BIT downto 2*UNIT_BIT - 1) := "11"; -- 1.5 end if; when MULADD_A => -- addend is A in 16.112 format - maddend(121 downto 58) := r.a.mantissa; + maddend(UNIT_BIT + 63 downto UNIT_BIT) := r.a.mantissa; when MULADD_RS => -- addend is concatenation of R and S in 16.112 format - maddend := "000000" & r.r & r.s & "00"; + maddend(UNIT_BIT + 63 downto UNIT_BIT) := r.r; + maddend(UNIT_BIT - 1 downto 0) := r.s; when others => end case; if msel_inv = '1' then @@ -2391,7 +2408,7 @@ begin if pshift = '0' then v.p := multiply_to_f.result(63 downto 0); else - v.p := multiply_to_f.result(119 downto 56); + v.p := multiply_to_f.result(UNIT_BIT + 63 downto UNIT_BIT); end if; end if; @@ -2433,11 +2450,15 @@ begin when BIN_R => in_b0 := r.r; when BIN_RND => - round_inc := (31 => r.single_prec, 2 => not r.single_prec, others => '0'); + if rbit_inc = '0' then + round_inc := (SP_LSB => r.single_prec, DP_LSB => not r.single_prec, others => '0'); + else + round_inc := (DP_RBIT => '1', others => '0'); + end if; in_b0 := round_inc; when others => - -- BIN_PS6, 6 LSBs of P/4 sign-extended to 64 - in_b0 := std_ulogic_vector(resize(signed(r.p(7 downto 2)), 64)); + -- BIN_PS8, 8 LSBs of P sign-extended to 64 + in_b0 := std_ulogic_vector(resize(signed(r.p(7 downto 0)), 64)); end case; if opsel_binv = '1' then in_b0 := not in_b0; @@ -2451,9 +2472,9 @@ begin end if; sum := std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in); if opsel_mask = '1' then - sum(1 downto 0) := "00"; + sum(DP_LSB - 1 downto 0) := "0000"; if r.single_prec = '1' then - sum(30 downto 2) := (others => '0'); + sum(SP_LSB - 1 downto DP_LSB) := (others => '0'); end if; end if; case opsel_r is @@ -2462,20 +2483,25 @@ begin when RES_SHIFT => result <= shift_res; when RES_MULT => - result <= multiply_to_f.result(121 downto 58); + result <= multiply_to_f.result(UNIT_BIT + 63 downto UNIT_BIT); + if mult_mask = '1' then + -- trim to 54 fraction bits if mult_mask = 1, for quotient when dividing + result(UNIT_BIT - 55 downto 0) <= (others => '0'); + end if; when others => + misc := (others => '0'); case misc_sel is when "0000" => misc := x"00000000" & (r.fpscr and fpscr_mask); when "0001" => -- generated QNaN mantissa - misc := x"0020000000000000"; + misc(QNAN_BIT) := '1'; when "0010" => -- mantissa of max representable DP number - misc := x"007ffffffffffffc"; + misc(UNIT_BIT downto DP_LSB) := (others => '1'); when "0011" => -- mantissa of max representable SP number - misc := x"007fffff80000000"; + misc(UNIT_BIT downto SP_LSB) := (others => '1'); when "0100" => -- fmrgow result misc := r.a.mantissa(31 downto 0) & r.b.mantissa(31 downto 0); @@ -2483,7 +2509,8 @@ begin -- fmrgew result misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32); when "0111" => - misc := 10x"000" & inverse_est & 35x"000000000"; + misc := std_ulogic_vector(shift_left(resize(unsigned(inverse_est), 64), + UNIT_BIT - 19)); when "1000" => -- max positive result for fctiw[z] misc := x"000000007fffffff"; @@ -2509,7 +2536,6 @@ begin -- max negative result for fctidu[z] misc := x"0000000000000000"; when others => - misc := x"0000000000000000"; end case; result <= misc; end case; @@ -2519,7 +2545,7 @@ begin when S_NEG => v.s := std_ulogic_vector(unsigned(not r.s) + (not r.x)); when S_MULT => - v.s := multiply_to_f.result(57 downto 2); + v.s := multiply_to_f.result(55 downto 0); when S_SHIFT => v.s := shift_res(63 downto 8); if shift_res(7 downto 0) /= x"00" then @@ -2553,12 +2579,12 @@ begin -- make denormalized value end up with even exponent clz(0) := '1'; end if; - v.shift := resize(signed('0' & clz) - 9, EXP_BITS); + v.shift := resize(signed('0' & clz) - (63 - UNIT_BIT), EXP_BITS); end if; if r.update_fprf = '1' then v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class, - r.r(54) and not r.denorm); + r.r(UNIT_BIT) and not r.denorm); end if; v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or