2#011000100# => (FPU, OP_FPOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 4/6=mtfsfi
2#011110010# => (FPU, OP_FPOP_I, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 18/7=mffs family
2#011110110# => (FPU, OP_FPOP_I, NONE, FRB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 22/7=mtfsf
+ 2#100000000# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 0/8=fcpsgn
+ 2#100000001# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 1/8=fneg
+ 2#100000010# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 2/8=fmr
+ 2#100000100# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 4/8=fnabs
+ 2#100001000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 8/8=fabs
others => illegal_inst
);
return (is_fast_spr(ispr), ispr, reg_data);
elsif t = CIA then
return ('0', (others => '0'), instr_addr);
+ elsif HAS_FPU and t = FRA then
+ return ('1', fpr_to_gspr(insn_fra(insn_in)), reg_data);
else
return ('0', (others => '0'), (others => '0'));
end if;
end process;
r_out.read1_reg <= d_in.ispr1 when d_in.decode.input_reg_a = SPR
+ else fpr_to_gspr(insn_fra(d_in.insn)) when d_in.decode.input_reg_a = FRA and HAS_FPU
else gpr_to_gspr(insn_ra(d_in.insn));
r_out.read2_reg <= d_in.ispr2 when d_in.decode.input_reg_b = SPR
else fpr_to_gspr(insn_frb(d_in.insn)) when d_in.decode.input_reg_b = FRB and HAS_FPU
OP_BCD, OP_ADDG6S,
OP_FETCH_FAILED
);
- type input_reg_a_t is (NONE, RA, RA_OR_ZERO, SPR, CIA);
+ type input_reg_a_t is (NONE, RA, RA_OR_ZERO, SPR, CIA, FRA);
type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD,
CONST_DXHI4, CONST_DS, CONST_M1, CONST_SH, CONST_SH32, SPR, FRB);
type input_reg_c_t is (NONE, RS, RCR, FRS);
end entity fpu;
architecture behaviour of fpu is
+ type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
+
+ constant EXP_BITS : natural := 13;
+
+ type fpu_reg_type is record
+ class : fp_number_class;
+ negative : std_ulogic;
+ exponent : signed(EXP_BITS-1 downto 0); -- unbiased
+ mantissa : std_ulogic_vector(63 downto 0); -- 10.54 format
+ end record;
type state_t is (IDLE,
- DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF);
+ DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
+ DO_FMR);
type reg_type is record
state : state_t;
is_cmp : std_ulogic;
single_prec : std_ulogic;
fpscr : std_ulogic_vector(31 downto 0);
- b : std_ulogic_vector(63 downto 0);
+ a : fpu_reg_type;
+ b : fpu_reg_type;
r : std_ulogic_vector(63 downto 0);
+ result_sign : std_ulogic;
+ result_class : fp_number_class;
+ result_exp : signed(EXP_BITS-1 downto 0);
writing_back : std_ulogic;
+ int_result : std_ulogic;
cr_result : std_ulogic_vector(3 downto 0);
cr_mask : std_ulogic_vector(7 downto 0);
end record;
signal r, rin : reg_type;
signal fp_result : std_ulogic_vector(63 downto 0);
+ signal opsel_r : std_ulogic_vector(1 downto 0);
+ signal result : std_ulogic_vector(63 downto 0);
+
+ -- Split a DP floating-point number into components and work out its class.
+ -- If is_int = 1, the input is considered an integer
+ function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_int: std_ulogic) return fpu_reg_type is
+ variable r : fpu_reg_type;
+ variable exp_nz : std_ulogic;
+ variable exp_ao : std_ulogic;
+ variable frac_nz : std_ulogic;
+ variable cls : std_ulogic_vector(2 downto 0);
+ begin
+ r.negative := fpr(63);
+ exp_nz := or (fpr(62 downto 52));
+ exp_ao := and (fpr(62 downto 52));
+ frac_nz := or (fpr(51 downto 0));
+ if is_int = '0' then
+ r.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
+ if exp_nz = '0' then
+ r.exponent := to_signed(-1022, EXP_BITS);
+ end if;
+ r.mantissa := "000000000" & exp_nz & fpr(51 downto 0) & "00";
+ cls := exp_ao & exp_nz & frac_nz;
+ case cls is
+ when "000" => r.class := ZERO;
+ when "001" => r.class := FINITE; -- denormalized
+ when "010" => r.class := FINITE;
+ when "011" => r.class := FINITE;
+ when "110" => r.class := INFINITY;
+ when others => r.class := NAN;
+ end case;
+ else
+ r.mantissa := fpr;
+ r.exponent := (others => '0');
+ if (fpr(63) or exp_nz or frac_nz) = '1' then
+ r.class := FINITE;
+ else
+ r.class := ZERO;
+ end if;
+ end if;
+ return r;
+ end;
+
+ -- Construct a DP floating-point result from components
+ function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
+ mantissa: std_ulogic_vector) return std_ulogic_vector is
+ variable result : std_ulogic_vector(63 downto 0);
+ begin
+ result := (others => '0');
+ result(63) := sign;
+ case class is
+ when ZERO =>
+ when FINITE =>
+ if mantissa(54) = '1' then
+ -- normalized number
+ result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
+ end if;
+ result(51 downto 0) := mantissa(53 downto 2);
+ when INFINITY =>
+ result(62 downto 52) := "11111111111";
+ when NAN =>
+ result(62 downto 52) := "11111111111";
+ result(51 downto 0) := mantissa(53 downto 2);
+ end case;
+ return result;
+ end;
begin
fpu_0: process(clk)
fpu_1: process(all)
variable v : reg_type;
+ variable adec : fpu_reg_type;
+ variable bdec : fpu_reg_type;
variable fpscr_mask : std_ulogic_vector(31 downto 0);
variable illegal : std_ulogic;
variable j, k : integer;
variable flm : std_ulogic_vector(7 downto 0);
+ variable int_input : std_ulogic;
begin
v := r;
illegal := '0';
v.busy := '0';
+ int_input := '0';
-- capture incoming instruction
if e_in.valid = '1' then
v.fe_mode := or (e_in.fe_mode);
v.dest_fpr := e_in.frt;
v.single_prec := e_in.single;
+ v.int_result := '0';
v.rc := e_in.rc;
v.is_cmp := e_in.out_cr;
if e_in.out_cr = '0' then
else
v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(e_in.insn))));
end if;
- v.b := e_in.frb;
+ int_input := '0';
+ if e_in.op = OP_FPOP_I then
+ int_input := '1';
+ end if;
+ adec := decode_dp(e_in.fra, int_input);
+ bdec := decode_dp(e_in.frb, int_input);
+ v.a := adec;
+ v.b := bdec;
end if;
v.writing_back := '0';
v.instr_done := '0';
+ opsel_r <= "00";
fpscr_mask := (others => '1');
case r.state is
else
v.state := DO_MTFSF;
end if;
+ when "01000" =>
+ v.state := DO_FMR;
when others =>
illegal := '1';
end case;
v.state := IDLE;
when DO_MFFS =>
+ v.int_result := '1';
v.writing_back := '1';
+ opsel_r <= "10";
case r.insn(20 downto 16) is
when "00000" =>
-- mffs
-- mffscrn
fpscr_mask := x"000000FF";
v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
- r.b(FPSCR_RN+1 downto FPSCR_RN);
+ r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
when "10111" =>
-- mffscrni
fpscr_mask := x"000000FF";
for i in 0 to 7 loop
k := i * 4;
if flm(i) = '1' then
- v.fpscr(k + 3 downto k) := r.b(k + 3 downto k);
+ v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
end if;
end loop;
v.instr_done := '1';
v.state := IDLE;
+ when DO_FMR =>
+ v.result_class := r.b.class;
+ v.result_exp := r.b.exponent;
+ if r.insn(9) = '1' then
+ v.result_sign := '0'; -- fabs
+ elsif r.insn(8) = '1' then
+ v.result_sign := '1'; -- fnabs
+ elsif r.insn(7) = '1' then
+ v.result_sign := r.b.negative; -- fmr
+ elsif r.insn(6) = '1' then
+ v.result_sign := not r.b.negative; -- fneg
+ else
+ v.result_sign := r.a.negative; -- fcpsgn
+ end if;
+ v.writing_back := '1';
+ v.instr_done := '1';
+ v.state := IDLE;
+
end case;
-- Data path.
- -- Just enough to read FPSCR for now.
- v.r := x"00000000" & (r.fpscr and fpscr_mask);
+ case opsel_r is
+ when "00" =>
+ result <= r.b.mantissa;
+ when "10" =>
+ result <= x"00000000" & (r.fpscr and fpscr_mask);
+ when others =>
+ result <= (others => '0');
+ end case;
+ v.r := result;
- fp_result <= r.r;
+ if r.int_result = '1' then
+ fp_result <= r.r;
+ else
+ fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r);
+ end if;
v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
(or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
return 0;
}
+#define SIGN 0x8000000000000000ul
+
+int test6(long arg)
+{
+ long i;
+ unsigned long results[6];
+ unsigned long v;
+
+ for (i = 0; i < sizeof(sp_dp_equiv) / sizeof(sp_dp_equiv[0]); ++i) {
+ v = sp_dp_equiv[i].dp;
+ asm("lfd%U0%X0 3,%0; fmr 6,3; fneg 7,3; stfd 6,0(%1); stfd 7,8(%1)"
+ : : "m" (sp_dp_equiv[i].dp), "b" (results) : "memory");
+ asm("fabs 9,6; fnabs 10,6; stfd 9,16(%0); stfd 10,24(%0)"
+ : : "b" (results) : "memory");
+ asm("fcpsgn 4,9,3; stfd 4,32(%0); fcpsgn 5,10,3; stfd 5,40(%0)"
+ : : "b" (results) : "memory");
+ if (results[0] != v ||
+ results[1] != (v ^ SIGN) ||
+ results[2] != (v & ~SIGN) ||
+ results[3] != (v | SIGN) ||
+ results[4] != (v & ~SIGN) ||
+ results[5] != (v | SIGN))
+ return i + 1;
+ }
+ return 0;
+}
+
+int fpu_test_6(void)
+{
+ enable_fp();
+ return trapit(0, test6);
+}
+
int fail = 0;
void do_test(int num, int (*test)(void))
do_test(3, fpu_test_3);
do_test(4, fpu_test_4);
do_test(5, fpu_test_5);
+ do_test(6, fpu_test_6);
return fail;
}
test 03:PASS\r
test 04:PASS\r
test 05:PASS\r
+test 06:PASS\r