From: Paul Mackerras Date: Thu, 16 Jul 2020 05:51:57 +0000 (+1000) Subject: FPU: Implement floating convert from integer instructions X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=9e8fb293edd59f355cc1fd020f96dafee0af867c;p=microwatt.git FPU: Implement floating convert from integer instructions This implements fcfid, fcfidu, fcfids and fcfidus, which convert 64-bit integer values in an FPR into a floating-point value. This brings in a lot of the datapath that will be needed in future, including the shifter, adder, mask generator and count-leading-zeroes logic, along with the machinery for rounding to single-precision or double-precision, detecting inexact results, signalling inexact-result exceptions, and updating result flags in the FPSCR. Signed-off-by: Paul Mackerras --- diff --git a/decode1.vhdl b/decode1.vhdl index 5f5fb80..83444cf 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -55,6 +55,7 @@ architecture behaviour of decode1 is type op_19_subop_array_t is array(0 to 7) of decode_rom_t; type op_30_subop_array_t is array(0 to 15) of decode_rom_t; type op_31_subop_array_t is array(0 to 1023) of decode_rom_t; + type op_59_subop_array_t is array(0 to 31) of decode_rom_t; type minor_rom_array_2_t is array(0 to 3) of decode_rom_t; type op_63_subop_array_0_t is array(0 to 511) of decode_rom_t; @@ -410,6 +411,13 @@ architecture behaviour of decode1 is others => decode_rom_init ); + constant decode_op_59_array : op_59_subop_array_t := ( + -- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl + -- op in out A out in out len ext pipe + 2#01110# => (FPU, OP_FPOP_I, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fcfid[u]s + others => illegal_inst + ); + constant decode_op_62_array : minor_rom_array_2_t := ( -- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl -- op in out A out in out len ext pipe @@ -433,6 +441,8 @@ architecture behaviour of decode1 is 2#100000010# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 2/8=fmr 2#100000100# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 4/8=fnabs 2#100001000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 8/8=fabs + 2#111011010# => (FPU, OP_FPOP_I, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 26/14=fcfid + 2#111011110# => (FPU, OP_FPOP_I, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 30/14=fcfidu others => illegal_inst ); @@ -586,6 +596,15 @@ begin when 58 => v.decode := decode_op_58_array(to_integer(unsigned(f_in.insn(1 downto 0)))); + when 59 => + if HAS_FPU then + -- floating point operations, mostly single-precision + v.decode := decode_op_59_array(to_integer(unsigned(f_in.insn(5 downto 1)))); + if f_in.insn(5) = '0' and not std_match(f_in.insn(10 downto 1), "11-1001110") then + vi.override := '1'; + end if; + end if; + when 62 => v.decode := decode_op_62_array(to_integer(unsigned(f_in.insn(1 downto 0)))); diff --git a/fpu.vhdl b/fpu.vhdl index 3711b35..fecb7bb 100644 --- a/fpu.vhdl +++ b/fpu.vhdl @@ -37,7 +37,12 @@ architecture behaviour of fpu is type state_t is (IDLE, DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF, - DO_FMR); + DO_FMR, + DO_FCFID, + FINISH, NORMALIZE, + ROUND_UFLOW, ROUND_OFLOW, + ROUNDING, ROUNDING_2, ROUNDING_3, + DENORM); type reg_type is record state : state_t; @@ -54,21 +59,121 @@ architecture behaviour of fpu is fpscr : std_ulogic_vector(31 downto 0); a : fpu_reg_type; b : fpu_reg_type; - r : std_ulogic_vector(63 downto 0); + r : std_ulogic_vector(63 downto 0); -- 10.54 format + x : std_ulogic; result_sign : std_ulogic; result_class : fp_number_class; result_exp : signed(EXP_BITS-1 downto 0); + shift : signed(EXP_BITS-1 downto 0); writing_back : std_ulogic; int_result : std_ulogic; cr_result : std_ulogic_vector(3 downto 0); cr_mask : std_ulogic_vector(7 downto 0); + old_exc : std_ulogic_vector(4 downto 0); + update_fprf : std_ulogic; + tiny : std_ulogic; + denorm : std_ulogic; + round_mode : std_ulogic_vector(2 downto 0); end record; signal r, rin : reg_type; signal fp_result : std_ulogic_vector(63 downto 0); + signal opsel_a : std_ulogic_vector(1 downto 0); + signal opsel_b : std_ulogic_vector(1 downto 0); signal opsel_r : std_ulogic_vector(1 downto 0); + signal opsel_ainv : std_ulogic; + signal opsel_amask : std_ulogic; + signal in_a : std_ulogic_vector(63 downto 0); + signal in_b : std_ulogic_vector(63 downto 0); signal result : std_ulogic_vector(63 downto 0); + signal carry_in : std_ulogic; + signal lost_bits : std_ulogic; + signal r_hi_nz : std_ulogic; + signal r_lo_nz : std_ulogic; + signal misc_sel : std_ulogic_vector(3 downto 0); + + -- opsel values + constant AIN_R : std_ulogic_vector(1 downto 0) := "00"; + constant AIN_A : std_ulogic_vector(1 downto 0) := "01"; + constant AIN_B : std_ulogic_vector(1 downto 0) := "10"; + + constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00"; + constant BIN_R : std_ulogic_vector(1 downto 0) := "01"; + constant BIN_MASK : std_ulogic_vector(1 downto 0) := "10"; + + constant RES_SUM : std_ulogic_vector(1 downto 0) := "00"; + constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01"; + constant RES_MISC : std_ulogic_vector(1 downto 0) := "11"; + + -- Left and right shifter with 120 bit input and 64 bit output. + -- Shifts inp left by shift bits and returns the upper 64 bits of + -- the result. The shift parameter is interpreted as a signed + -- number in the range -64..63, with negative values indicating + -- right shifts. + function shifter_64(inp: std_ulogic_vector(119 downto 0); + shift: std_ulogic_vector(6 downto 0)) + return std_ulogic_vector is + variable s1 : std_ulogic_vector(94 downto 0); + variable s2 : std_ulogic_vector(70 downto 0); + variable result : std_ulogic_vector(63 downto 0); + begin + case shift(6 downto 5) is + when "00" => + s1 := inp(119 downto 25); + when "01" => + s1 := inp(87 downto 0) & "0000000"; + when "10" => + s1 := x"0000000000000000" & inp(119 downto 89); + when others => + s1 := x"00000000" & inp(119 downto 57); + end case; + case shift(4 downto 3) is + when "00" => + s2 := s1(94 downto 24); + when "01" => + s2 := s1(86 downto 16); + when "10" => + s2 := s1(78 downto 8); + when others => + s2 := s1(70 downto 0); + end case; + case shift(2 downto 0) is + when "000" => + result := s2(70 downto 7); + when "001" => + result := s2(69 downto 6); + when "010" => + result := s2(68 downto 5); + when "011" => + result := s2(67 downto 4); + when "100" => + result := s2(66 downto 3); + when "101" => + result := s2(65 downto 2); + when "110" => + result := s2(64 downto 1); + when others => + result := s2(63 downto 0); + end case; + return result; + end; + + -- Generate a mask with 0-bits on the left and 1-bits on the right which + -- selects the bits will be lost in doing a right shift. The shift + -- parameter is the bottom 6 bits of a negative shift count, + -- indicating a right shift. + function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is + variable result: std_ulogic_vector(63 downto 0); + begin + result := (others => '0'); + for i in 0 to 63 loop + if i >= shift then + result(63 - i) := '1'; + end if; + end loop; + return result; + end; -- Split a DP floating-point number into components and work out its class. -- If is_int = 1, the input is considered an integer @@ -112,7 +217,8 @@ architecture behaviour of fpu is -- Construct a DP floating-point result from components function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0); - mantissa: std_ulogic_vector) return std_ulogic_vector is + mantissa: std_ulogic_vector; single_prec: std_ulogic) + return std_ulogic_vector is variable result : std_ulogic_vector(63 downto 0); begin result := (others => '0'); @@ -124,16 +230,76 @@ architecture behaviour of fpu is -- normalized number result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023); end if; - result(51 downto 0) := mantissa(53 downto 2); + result(51 downto 29) := mantissa(53 downto 31); + if single_prec = '0' then + result(28 downto 0) := mantissa(30 downto 2); + end if; when INFINITY => result(62 downto 52) := "11111111111"; when NAN => result(62 downto 52) := "11111111111"; - result(51 downto 0) := mantissa(53 downto 2); + result(51 downto 29) := mantissa(53 downto 31); + if single_prec = '0' then + result(28 downto 0) := mantissa(30 downto 2); + end if; end case; return result; end; + -- Determine whether to increment when rounding + -- Returns rounding_inc & inexact + -- Assumes x includes the bottom 29 bits of the mantissa already + -- if single_prec = 1 (usually arranged by setting set_x = 1 earlier). + function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic; + single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0); + sign: std_ulogic) + return std_ulogic_vector is + variable grx : std_ulogic_vector(2 downto 0); + variable ret : std_ulogic_vector(1 downto 0); + variable lsb : std_ulogic; + begin + if single_prec = '0' then + grx := mantissa(1 downto 0) & x; + lsb := mantissa(2); + else + grx := mantissa(30 downto 29) & x; + lsb := mantissa(31); + end if; + ret(1) := '0'; + ret(0) := or (grx); + case rn(1 downto 0) is + when "00" => -- round to nearest + if grx = "100" and rn(2) = '0' then + ret(1) := lsb; -- tie, round to even + else + ret(1) := grx(2); + end if; + when "01" => -- round towards zero + when others => -- round towards +/- inf + if rn(0) = sign then + -- round towards greater magnitude + ret(1) := ret(0); + end if; + end case; + return ret; + end; + + -- Determine result flags to write into the FPSCR + function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic) + return std_ulogic_vector is + begin + case class is + when ZERO => + return sign & "0010"; + when FINITE => + return (not unitbit) & sign & (not sign) & "00"; + when INFINITY => + return '0' & sign & (not sign) & "01"; + when NAN => + return "10001"; + end case; + end; + begin fpu_0: process(clk) begin @@ -174,6 +340,25 @@ begin variable j, k : integer; variable flm : std_ulogic_vector(7 downto 0); variable int_input : std_ulogic; + variable mask : std_ulogic_vector(63 downto 0); + variable in_a0 : std_ulogic_vector(63 downto 0); + variable in_b0 : std_ulogic_vector(63 downto 0); + variable misc : std_ulogic_vector(63 downto 0); + variable shift_res : std_ulogic_vector(63 downto 0); + variable round : std_ulogic_vector(1 downto 0); + variable update_fx : std_ulogic; + variable arith_done : std_ulogic; + variable mant_nz : std_ulogic; + variable min_exp : signed(EXP_BITS-1 downto 0); + variable max_exp : signed(EXP_BITS-1 downto 0); + variable bias_exp : signed(EXP_BITS-1 downto 0); + variable new_exp : signed(EXP_BITS-1 downto 0); + variable exp_tiny : std_ulogic; + variable exp_huge : std_ulogic; + variable renormalize : std_ulogic; + variable clz : std_ulogic_vector(5 downto 0); + variable set_x : std_ulogic; + variable mshift : signed(EXP_BITS-1 downto 0); begin v := r; illegal := '0'; @@ -199,16 +384,53 @@ begin if e_in.op = OP_FPOP_I then int_input := '1'; end if; + v.tiny := '0'; + v.denorm := '0'; + v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN); adec := decode_dp(e_in.fra, int_input); bdec := decode_dp(e_in.frb, int_input); v.a := adec; v.b := bdec; end if; + r_hi_nz <= or (r.r(55 downto 31)); + r_lo_nz <= or (r.r(30 downto 2)); + + if r.single_prec = '0' then + max_exp := to_signed(1023, EXP_BITS); + min_exp := to_signed(-1022, EXP_BITS); + bias_exp := to_signed(1536, EXP_BITS); + else + max_exp := to_signed(127, EXP_BITS); + min_exp := to_signed(-126, EXP_BITS); + bias_exp := to_signed(192, EXP_BITS); + end if; + new_exp := r.result_exp - r.shift; + exp_tiny := '0'; + exp_huge := '0'; + if new_exp < min_exp then + exp_tiny := '1'; + end if; + if new_exp > max_exp then + exp_huge := '1'; + end if; + v.writing_back := '0'; v.instr_done := '0'; - opsel_r <= "00"; + v.update_fprf := '0'; + v.shift := to_signed(0, EXP_BITS); + opsel_a <= AIN_R; + opsel_ainv <= '0'; + opsel_amask <= '0'; + opsel_b <= BIN_ZERO; + opsel_r <= RES_SUM; + carry_in <= '0'; + misc_sel <= "0000"; fpscr_mask := (others => '1'); + update_fx := '0'; + arith_done := '0'; + renormalize := '0'; + set_x := '0'; case r.state is when IDLE => @@ -230,10 +452,15 @@ begin end if; when "01000" => v.state := DO_FMR; + when "01110" => + -- fcfid[u][s] + v.state := DO_FCFID; when others => illegal := '1'; end case; end if; + v.x := '0'; + v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX); when DO_MCRFS => j := to_integer(unsigned(insn_bfa(r.insn))); @@ -276,7 +503,7 @@ begin when DO_MFFS => v.int_result := '1'; v.writing_back := '1'; - opsel_r <= "10"; + opsel_r <= RES_MISC; case r.insn(20 downto 16) is when "00000" => -- mffs @@ -322,6 +549,7 @@ begin v.state := IDLE; when DO_FMR => + opsel_a <= AIN_B; v.result_class := r.b.class; v.result_exp := r.b.exponent; if r.insn(9) = '1' then @@ -339,29 +567,281 @@ begin v.instr_done := '1'; v.state := IDLE; + when DO_FCFID => + v.result_sign := '0'; + opsel_a <= AIN_B; + if r.insn(8) = '0' and r.b.negative = '1' then + -- fcfid[s] with negative operand, set R = -B + opsel_ainv <= '1'; + carry_in <= '1'; + v.result_sign := '1'; + end if; + v.result_class := r.b.class; + v.result_exp := to_signed(54, EXP_BITS); + v.fpscr(FPSCR_FR) := '0'; + v.fpscr(FPSCR_FI) := '0'; + if r.b.class = ZERO then + arith_done := '1'; + else + v.state := FINISH; + end if; + + when FINISH => + if r.r(63 downto 54) /= "0000000001" then + renormalize := '1'; + v.state := NORMALIZE; + else + set_x := '1'; + if exp_tiny = '1' then + v.shift := new_exp - min_exp; + v.state := ROUND_UFLOW; + elsif exp_huge = '1' then + v.state := ROUND_OFLOW; + else + v.shift := to_signed(-2, EXP_BITS); + v.state := ROUNDING; + end if; + end if; + + when NORMALIZE => + -- Shift so we have 9 leading zeroes (we know R is non-zero) + opsel_r <= RES_SHIFT; + set_x := '1'; + if exp_tiny = '1' then + v.shift := new_exp - min_exp; + v.state := ROUND_UFLOW; + elsif exp_huge = '1' then + v.state := ROUND_OFLOW; + else + v.shift := to_signed(-2, EXP_BITS); + v.state := ROUNDING; + end if; + + when ROUND_UFLOW => + v.tiny := '1'; + if r.fpscr(FPSCR_UE) = '0' then + -- disabled underflow exception case + -- have to denormalize before rounding + opsel_r <= RES_SHIFT; + set_x := '1'; + v.shift := to_signed(-2, EXP_BITS); + v.state := ROUNDING; + else + -- enabled underflow exception case + -- if denormalized, have to normalize before rounding + v.fpscr(FPSCR_UX) := '1'; + v.result_exp := r.result_exp + bias_exp; + if r.r(54) = '0' then + renormalize := '1'; + v.state := NORMALIZE; + else + v.shift := to_signed(-2, EXP_BITS); + v.state := ROUNDING; + end if; + end if; + + when ROUND_OFLOW => + v.fpscr(FPSCR_OX) := '1'; + if r.fpscr(FPSCR_OE) = '0' then + -- disabled overflow exception + -- result depends on rounding mode + v.fpscr(FPSCR_XX) := '1'; + v.fpscr(FPSCR_FI) := '1'; + if r.round_mode(1 downto 0) = "00" or + (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then + v.result_class := INFINITY; + v.fpscr(FPSCR_FR) := '1'; + else + v.fpscr(FPSCR_FR) := '0'; + end if; + -- construct largest representable number + v.result_exp := max_exp; + opsel_r <= RES_MISC; + misc_sel <= "001" & r.single_prec; + arith_done := '1'; + else + -- enabled overflow exception + v.result_exp := r.result_exp - bias_exp; + v.shift := to_signed(-2, EXP_BITS); + v.state := ROUNDING; + end if; + + when ROUNDING => + opsel_amask <= '1'; + round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign); + v.fpscr(FPSCR_FR downto FPSCR_FI) := round; + if round(1) = '1' then + -- set mask to increment the LSB for the precision + opsel_b <= BIN_MASK; + carry_in <= '1'; + v.shift := to_signed(-1, EXP_BITS); + v.state := ROUNDING_2; + else + if r.r(54) = '0' then + -- result after masking could be zero, or could be a + -- denormalized result that needs to be renormalized + renormalize := '1'; + v.state := ROUNDING_3; + else + arith_done := '1'; + end if; + end if; + if round(0) = '1' then + v.fpscr(FPSCR_XX) := '1'; + if r.tiny = '1' then + v.fpscr(FPSCR_UX) := '1'; + end if; + end if; + + when ROUNDING_2 => + -- Check for overflow during rounding + v.x := '0'; + if r.r(55) = '1' then + opsel_r <= RES_SHIFT; + if exp_huge = '1' then + v.state := ROUND_OFLOW; + else + arith_done := '1'; + end if; + elsif r.r(54) = '0' then + -- Do CLZ so we can renormalize the result + renormalize := '1'; + v.state := ROUNDING_3; + else + arith_done := '1'; + end if; + + when ROUNDING_3 => + mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec); + if mant_nz = '0' then + v.result_class := ZERO; + arith_done := '1'; + else + -- Renormalize result after rounding + opsel_r <= RES_SHIFT; + v.denorm := exp_tiny; + v.shift := new_exp - to_signed(-1022, EXP_BITS); + if new_exp < to_signed(-1022, EXP_BITS) then + v.state := DENORM; + else + arith_done := '1'; + end if; + end if; + + when DENORM => + opsel_r <= RES_SHIFT; + arith_done := '1'; + end case; + if arith_done = '1' then + v.writing_back := '1'; + v.update_fprf := '1'; + v.instr_done := '1'; + v.state := IDLE; + update_fx := '1'; + end if; + -- Data path. + -- This has A and B input multiplexers, an adder, a shifter, + -- count-leading-zeroes logic, and a result mux. + if r.single_prec = '1' then + mshift := r.shift + to_signed(-29, EXP_BITS); + else + mshift := r.shift; + end if; + if mshift < to_signed(-64, EXP_BITS) then + mask := (others => '1'); + elsif mshift >= to_signed(0, EXP_BITS) then + mask := (others => '0'); + else + mask := right_mask(unsigned(mshift(5 downto 0))); + end if; + case opsel_a is + when AIN_R => + in_a0 := r.r; + when AIN_A => + in_a0 := r.a.mantissa; + when others => + in_a0 := r.b.mantissa; + end case; + if (or (mask and in_a0)) = '1' and set_x = '1' then + v.x := '1'; + end if; + if opsel_ainv = '1' then + in_a0 := not in_a0; + end if; + if opsel_amask = '1' then + in_a0 := in_a0 and not mask; + end if; + in_a <= in_a0; + case opsel_b is + when BIN_ZERO => + in_b0 := (others => '0'); + when BIN_R => + in_b0 := r.r; + when BIN_MASK => + in_b0 := mask; + when others => + in_b0 := (others => '0'); + end case; + in_b <= in_b0; + if r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then + shift_res := shifter_64(r.r & x"00000000000000", + std_ulogic_vector(r.shift(6 downto 0))); + else + shift_res := (others => '0'); + end if; case opsel_r is - when "00" => - result <= r.b.mantissa; - when "10" => - result <= x"00000000" & (r.fpscr and fpscr_mask); + when RES_SUM => + result <= std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in); + when RES_SHIFT => + result <= shift_res; when others => - result <= (others => '0'); + case misc_sel is + when "0000" => + misc := x"00000000" & (r.fpscr and fpscr_mask); + when "0010" => + -- mantissa of max representable DP number + misc := x"007ffffffffffffc"; + when "0011" => + -- mantissa of max representable SP number + misc := x"007fffff80000000"; + when others => + misc := x"0000000000000000"; + end case; + result <= misc; end case; v.r := result; + if opsel_r = RES_SHIFT then + v.result_exp := new_exp; + end if; + + if renormalize = '1' then + clz := count_left_zeroes(r.r); + v.shift := resize(signed('0' & clz) - 9, EXP_BITS); + end if; + if r.int_result = '1' then fp_result <= r.r; else - fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r); + fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r, + r.single_prec); + end if; + if r.update_fprf = '1' then + v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class, + r.r(54) and not r.denorm); end if; v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI))); v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and v.fpscr(FPSCR_VE downto FPSCR_XE)); + if update_fx = '1' and + (v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then + v.fpscr(FPSCR_FX) := '1'; + end if; if r.rc = '1' then v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX); end if; diff --git a/tests/fpu/fpu.c b/tests/fpu/fpu.c index 46668f8..80751d1 100644 --- a/tests/fpu/fpu.c +++ b/tests/fpu/fpu.c @@ -64,7 +64,7 @@ void print_string(const char *str) putchar(*str); } -void print_hex(unsigned long val, int ndigits) +void print_hex(unsigned long val, int ndigits, const char *str) { int i, x; @@ -75,6 +75,7 @@ void print_hex(unsigned long val, int ndigits) else putchar(x + '0'); } + print_string(str); } // i < 100 @@ -201,12 +202,9 @@ int sp_to_dp(long arg) asm("lfs 20,0(%0); stfd 20,0(%1)" : : "b" (&sp_dp_equiv[arg].sp), "b" (&dp) : "memory"); if (dp != sp_dp_equiv[arg].dp) { - print_hex(sp_dp_equiv[arg].sp, 8); - print_string(" "); - print_hex(dp, 16); - print_string(" "); - print_hex(sp_dp_equiv[arg].dp, 16); - print_string(" "); + print_hex(sp_dp_equiv[arg].sp, 8, " "); + print_hex(dp, 16, " "); + print_hex(sp_dp_equiv[arg].dp, 16, " "); } return dp != sp_dp_equiv[arg].dp; } @@ -465,12 +463,77 @@ int test6(long arg) return 0; } +struct int_fp_equiv { + long ival; + unsigned long fp; + unsigned long fp_u; + unsigned long fp_s; + unsigned long fp_us; +} intvals[] = { + { 0, 0, 0, 0, 0 }, + { 1, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000 }, + { -1, 0xbff0000000000000, 0x43f0000000000000, 0xbff0000000000000, 0x43f0000000000000 }, + { 2, 0x4000000000000000, 0x4000000000000000, 0x4000000000000000, 0x4000000000000000 }, + { -2, 0xc000000000000000, 0x43f0000000000000, 0xc000000000000000, 0x43f0000000000000 }, + { 0x12345678, 0x41b2345678000000, 0x41b2345678000000, 0x41b2345680000000, 0x41b2345680000000 }, + { 0x0008000000000000, 0x4320000000000000, 0x4320000000000000, 0x4320000000000000, 0x4320000000000000 }, + { 0x0010000000000000, 0x4330000000000000, 0x4330000000000000, 0x4330000000000000, 0x4330000000000000 }, + { 0x0020000000000000, 0x4340000000000000, 0x4340000000000000, 0x4340000000000000, 0x4340000000000000 }, + { 0x0020000000000001, 0x4340000000000000, 0x4340000000000000, 0x4340000000000000, 0x4340000000000000 }, + { 0x0020000000000002, 0x4340000000000001, 0x4340000000000001, 0x4340000000000000, 0x4340000000000000 }, + { 0x0020000000000003, 0x4340000000000002, 0x4340000000000002, 0x4340000000000000, 0x4340000000000000 }, + { 0x0020000010000000, 0x4340000008000000, 0x4340000008000000, 0x4340000000000000, 0x4340000000000000 }, + { 0x0020000020000000, 0x4340000010000000, 0x4340000010000000, 0x4340000000000000, 0x4340000000000000 }, + { 0x0020000030000000, 0x4340000018000000, 0x4340000018000000, 0x4340000020000000, 0x4340000020000000 }, + { 0x0020000040000000, 0x4340000020000000, 0x4340000020000000, 0x4340000020000000, 0x4340000020000000 }, + { 0x0020000080000000, 0x4340000040000000, 0x4340000040000000, 0x4340000040000000, 0x4340000040000000 }, + { 0x0040000000000000, 0x4350000000000000, 0x4350000000000000, 0x4350000000000000, 0x4350000000000000 }, + { 0x0040000000000001, 0x4350000000000000, 0x4350000000000000, 0x4350000000000000, 0x4350000000000000 }, + { 0x0040000000000002, 0x4350000000000000, 0x4350000000000000, 0x4350000000000000, 0x4350000000000000 }, + { 0x0040000000000003, 0x4350000000000001, 0x4350000000000001, 0x4350000000000000, 0x4350000000000000 }, + { 0x0040000000000004, 0x4350000000000001, 0x4350000000000001, 0x4350000000000000, 0x4350000000000000 }, + { 0x0040000000000005, 0x4350000000000001, 0x4350000000000001, 0x4350000000000000, 0x4350000000000000 }, + { 0x0040000000000006, 0x4350000000000002, 0x4350000000000002, 0x4350000000000000, 0x4350000000000000 }, + { 0x0040000000000007, 0x4350000000000002, 0x4350000000000002, 0x4350000000000000, 0x4350000000000000 }, +}; + +int test7(long arg) +{ + long i; + unsigned long results[4]; + + for (i = 0; i < sizeof(intvals) / sizeof(intvals[0]); ++i) { + asm("lfd%U0%X0 3,%0; fcfid 6,3; fcfidu 7,3; stfd 6,0(%1); stfd 7,8(%1)" + : : "m" (intvals[i].ival), "b" (results) : "memory"); + asm("fcfids 9,3; stfd 9,16(%0); fcfidus 10,3; stfd 10,24(%0)" + : : "b" (results) : "memory"); + if (results[0] != intvals[i].fp || + results[1] != intvals[i].fp_u || + results[2] != intvals[i].fp_s || + results[3] != intvals[i].fp_us) { + print_string("\r\n"); + print_hex(results[0], 16, " "); + print_hex(results[1], 16, " "); + print_hex(results[2], 16, " "); + print_hex(results[3], 16, " "); + return i + 1; + } + } + return 0; +} + int fpu_test_6(void) { enable_fp(); return trapit(0, test6); } +int fpu_test_7(void) +{ + enable_fp(); + return trapit(0, test7); +} + int fail = 0; void do_test(int num, int (*test)(void)) @@ -484,12 +547,9 @@ void do_test(int num, int (*test)(void)) } else { fail = 1; print_string("FAIL "); - print_hex(ret, 5); - print_string(" SRR0="); - print_hex(mfspr(SRR0), 16); - print_string(" SRR1="); - print_hex(mfspr(SRR1), 16); - print_string("\r\n"); + print_hex(ret, 5, " SRR0="); + print_hex(mfspr(SRR0), 16, " SRR1="); + print_hex(mfspr(SRR1), 16, "\r\n"); } } @@ -503,6 +563,7 @@ int main(void) do_test(4, fpu_test_4); do_test(5, fpu_test_5); do_test(6, fpu_test_6); + do_test(7, fpu_test_7); return fail; } diff --git a/tests/test_fpu.bin b/tests/test_fpu.bin index 4fb260e..25d50c7 100755 Binary files a/tests/test_fpu.bin and b/tests/test_fpu.bin differ diff --git a/tests/test_fpu.console_out b/tests/test_fpu.console_out index a49bb9b..340756c 100644 --- a/tests/test_fpu.console_out +++ b/tests/test_fpu.console_out @@ -4,3 +4,4 @@ test 03:PASS test 04:PASS test 05:PASS test 06:PASS +test 07:PASS