type op_59_subop_array_t is array(0 to 31) of decode_rom_t;
type minor_rom_array_2_t is array(0 to 3) of decode_rom_t;
type op_63_subop_array_0_t is array(0 to 511) of decode_rom_t;
+ type op_63_subop_array_1_t is array(0 to 16) of decode_rom_t;
constant major_decode_rom_array : major_rom_array_t := (
-- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl
-- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl
-- op in out A out in out len ext pipe
2#01110# => (FPU, OP_FPOP_I, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fcfid[u]s
+ 2#10100# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fsubs
+ 2#10101# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fadds
others => illegal_inst
);
others => illegal_inst
);
+ -- indexed by bits 4..1 of instruction word
+ constant decode_op_63h_array : op_63_subop_array_1_t := (
+ -- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl
+ -- op in out A out in out len ext pipe
+ 2#0100# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fsub
+ 2#0101# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fadd
+ others => illegal_inst
+ );
+
-- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl
-- op in out A out in out len ext pipe
constant nop_instr : decode_rom_t := (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0');
when 63 =>
if HAS_FPU then
-- floating point operations, general and double-precision
- v.decode := decode_op_63l_array(to_integer(unsigned(f_in.insn(4 downto 1) & f_in.insn(10 downto 6))));
- vi.override := f_in.insn(5);
+ if f_in.insn(5) = '0' then
+ v.decode := decode_op_63l_array(to_integer(unsigned(f_in.insn(4 downto 1) & f_in.insn(10 downto 6))));
+ else
+ v.decode := decode_op_63h_array(to_integer(unsigned(f_in.insn(4 downto 1))));
+ end if;
end if;
when others =>
DO_FMR, DO_FMRG,
DO_FCFID, DO_FCTI,
DO_FRSP, DO_FRI,
+ DO_FADD,
FRI_1,
+ ADD_SHIFT, ADD_2, ADD_3,
INT_SHIFT, INT_ROUND, INT_ISHIFT,
INT_FINAL, INT_CHECK, INT_OFLOW,
FINISH, NORMALIZE,
tiny : std_ulogic;
denorm : std_ulogic;
round_mode : std_ulogic_vector(2 downto 0);
+ is_subtract : std_ulogic;
+ exp_cmp : std_ulogic;
+ add_bsmall : std_ulogic;
end record;
signal r, rin : reg_type;
signal opsel_r : std_ulogic_vector(1 downto 0);
signal opsel_ainv : std_ulogic;
signal opsel_amask : std_ulogic;
+ signal opsel_binv : std_ulogic;
signal in_a : std_ulogic_vector(63 downto 0);
signal in_b : std_ulogic_vector(63 downto 0);
signal result : std_ulogic_vector(63 downto 0);
variable mshift : signed(EXP_BITS-1 downto 0);
variable need_check : std_ulogic;
variable msb : std_ulogic;
+ variable is_add : std_ulogic;
+ variable qnan_result : std_ulogic;
+ variable longmask : std_ulogic;
begin
v := r;
illegal := '0';
v.tiny := '0';
v.denorm := '0';
v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
+ v.is_subtract := '0';
+ v.add_bsmall := '0';
adec := decode_dp(e_in.fra, int_input);
bdec := decode_dp(e_in.frb, int_input);
v.a := adec;
v.b := bdec;
+ v.exp_cmp := '0';
+ if adec.exponent > bdec.exponent then
+ v.exp_cmp := '1';
+ end if;
end if;
r_hi_nz <= or (r.r(55 downto 31));
opsel_ainv <= '0';
opsel_amask <= '0';
opsel_b <= BIN_ZERO;
+ opsel_binv <= '0';
opsel_r <= RES_SUM;
carry_in <= '0';
misc_sel <= "0000";
invalid := '0';
renormalize := '0';
set_x := '0';
+ qnan_result := '0';
+ longmask := r.single_prec;
case r.state is
when IDLE =>
when "01111" =>
v.round_mode := "001";
v.state := DO_FCTI;
+ when "10100" | "10101" =>
+ v.state := DO_FADD;
when others =>
illegal := '1';
end case;
v.state := FINISH;
end if;
+ when DO_FADD =>
+ -- fadd[s] and fsub[s]
+ opsel_a <= AIN_A;
+ v.result_sign := r.a.negative;
+ v.result_class := r.a.class;
+ v.result_exp := r.a.exponent;
+ v.fpscr(FPSCR_FR) := '0';
+ v.fpscr(FPSCR_FI) := '0';
+ is_add := r.a.negative xor r.b.negative xor r.insn(1);
+ if r.a.class = FINITE and r.b.class = FINITE then
+ v.is_subtract := not is_add;
+ v.add_bsmall := r.exp_cmp;
+ if r.exp_cmp = '0' then
+ v.shift := r.a.exponent - r.b.exponent;
+ v.result_sign := r.b.negative xnor r.insn(1);
+ if r.a.exponent = r.b.exponent then
+ v.state := ADD_2;
+ else
+ v.state := ADD_SHIFT;
+ end if;
+ else
+ opsel_a <= AIN_B;
+ v.shift := r.b.exponent - r.a.exponent;
+ v.result_exp := r.b.exponent;
+ v.state := ADD_SHIFT;
+ end if;
+ else
+ if (r.a.class = NAN and r.a.mantissa(53) = '0') or
+ (r.b.class = NAN and r.b.mantissa(53) = '0') then
+ -- Signalling NAN
+ v.fpscr(FPSCR_VXSNAN) := '1';
+ invalid := '1';
+ end if;
+ if r.a.class = NAN then
+ -- nothing to do, result is A
+ elsif r.b.class = NAN then
+ v.result_class := NAN;
+ v.result_sign := r.b.negative;
+ opsel_a <= AIN_B;
+ elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then
+ -- invalid operation, construct QNaN
+ v.fpscr(FPSCR_VXISI) := '1';
+ qnan_result := '1';
+ elsif r.a.class = ZERO and r.b.class = ZERO and is_add = '0' then
+ -- return -0 for rounding to -infinity
+ v.result_sign := r.round_mode(1) and r.round_mode(0);
+ elsif r.a.class = INFINITY or r.b.class = ZERO then
+ -- nothing to do, result is A
+ else
+ -- result is +/- B
+ v.result_sign := r.b.negative xnor r.insn(1);
+ v.result_class := r.b.class;
+ v.result_exp := r.b.exponent;
+ opsel_a <= AIN_B;
+ end if;
+ arith_done := '1';
+ end if;
+
+ when ADD_SHIFT =>
+ opsel_r <= RES_SHIFT;
+ set_x := '1';
+ longmask := '0';
+ v.state := ADD_2;
+
+ when ADD_2 =>
+ if r.add_bsmall = '1' then
+ opsel_a <= AIN_A;
+ else
+ opsel_a <= AIN_B;
+ end if;
+ opsel_b <= BIN_R;
+ opsel_binv <= r.is_subtract;
+ carry_in <= r.is_subtract and not r.x;
+ v.shift := to_signed(-1, EXP_BITS);
+ v.state := ADD_3;
+
+ when ADD_3 =>
+ -- check for overflow or negative result (can't get both)
+ if r.r(63) = '1' then
+ -- result is opposite sign to expected
+ v.result_sign := not r.result_sign;
+ opsel_ainv <= '1';
+ carry_in <= '1';
+ v.state := FINISH;
+ elsif r.r(55) = '1' then
+ -- sum overflowed, shift right
+ opsel_r <= RES_SHIFT;
+ set_x := '1';
+ v.shift := to_signed(-2, EXP_BITS);
+ if exp_huge = '1' then
+ v.state := ROUND_OFLOW;
+ else
+ v.state := ROUNDING;
+ end if;
+ elsif r.r(54) = '1' then
+ set_x := '1';
+ v.shift := to_signed(-2, EXP_BITS);
+ v.state := ROUNDING;
+ elsif (r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
+ -- r.x must be zero at this point
+ v.result_class := ZERO;
+ if r.is_subtract = '1' then
+ -- set result sign depending on rounding mode
+ v.result_sign := r.round_mode(1) and r.round_mode(0);
+ end if;
+ arith_done := '1';
+ else
+ renormalize := '1';
+ v.state := NORMALIZE;
+ end if;
+
when INT_SHIFT =>
opsel_r <= RES_SHIFT;
set_x := '1';
mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
if mant_nz = '0' then
v.result_class := ZERO;
+ if r.is_subtract = '1' then
+ -- set result sign depending on rounding mode
+ v.result_sign := r.round_mode(1) and r.round_mode(0);
+ end if;
arith_done := '1';
else
-- Renormalize result after rounding
end case;
+ if qnan_result = '1' then
+ invalid := '1';
+ v.result_class := NAN;
+ v.result_sign := '0';
+ misc_sel <= "0001";
+ opsel_r <= RES_MISC;
+ end if;
if arith_done = '1' then
-- Enabled invalid exception doesn't write result or FPRF
if (invalid and r.fpscr(FPSCR_VE)) = '0' then
-- Data path.
-- This has A and B input multiplexers, an adder, a shifter,
-- count-leading-zeroes logic, and a result mux.
- if r.single_prec = '1' then
+ if longmask = '1' then
mshift := r.shift + to_signed(-29, EXP_BITS);
else
mshift := r.shift;
when others =>
in_b0 := (others => '0');
end case;
+ if opsel_binv = '1' then
+ in_b0 := not in_b0;
+ end if;
in_b <= in_b0;
if r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
shift_res := shifter_64(r.r & x"00000000000000",
case misc_sel is
when "0000" =>
misc := x"00000000" & (r.fpscr and fpscr_mask);
+ when "0001" =>
+ -- generated QNaN mantissa
+ misc := x"0020000000000000";
when "0010" =>
-- mantissa of max representable DP number
misc := x"007ffffffffffffc";
return trapit(0, test12);
}
+struct addvals {
+ unsigned long val_a;
+ unsigned long val_b;
+ unsigned long sum;
+ unsigned long diff;
+} addvals[] = {
+ { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 },
+ { 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000 },
+ { 0x3fdfffffffffffff, 0x0000000000000000, 0x3fdfffffffffffff, 0x3fdfffffffffffff },
+ { 0x3ff0000000000000, 0x3ff0000000000000, 0x4000000000000000, 0x0000000000000000 },
+ { 0xbff0000000000000, 0xbff0000000000000, 0xc000000000000000, 0x0000000000000000 },
+ { 0x402123456789abcd, 0x4021000000000000, 0x403111a2b3c4d5e6, 0x3fb1a2b3c4d5e680 },
+ { 0x4061200000000000, 0x406123456789abcd, 0x407121a2b3c4d5e6, 0xbfba2b3c4d5e6800 },
+ { 0x4061230000000000, 0x3fa4560000000000, 0x4061244560000000, 0x406121baa0000000 },
+ { 0xc061230000000000, 0x3fa4560000000000, 0xc06121baa0000000, 0xc061244560000000 },
+ { 0x4061230000000000, 0xbfa4560000000000, 0x406121baa0000000, 0x4061244560000000 },
+ { 0xc061230000000000, 0xbfa4560000000000, 0xc061244560000000, 0xc06121baa0000000 },
+ { 0x3fa1230000000000, 0x4064560000000000, 0x4064571230000000, 0xc06454edd0000000 },
+ { 0xbfa1230000000000, 0x4064560000000000, 0x406454edd0000000, 0xc064571230000000 },
+ { 0x3fa1230000000000, 0xc064560000000000, 0xc06454edd0000000, 0x4064571230000000 },
+ { 0xbfa1230000000000, 0xc064560000000000, 0xc064571230000000, 0x406454edd0000000 },
+ { 0x6780000000000001, 0x6470000000000000, 0x6780000000000009, 0x677ffffffffffff2 },
+ { 0x6780000000000001, 0x6460000000000000, 0x6780000000000005, 0x677ffffffffffffa },
+ { 0x6780000000000001, 0x6450000000000000, 0x6780000000000003, 0x677ffffffffffffe },
+ { 0x6780000000000001, 0x6440000000000000, 0x6780000000000002, 0x6780000000000000 },
+ { 0x7ff8888888888888, 0x7ff9999999999999, 0x7ff8888888888888, 0x7ff8888888888888 },
+ { 0xfff8888888888888, 0x7ff9999999999999, 0xfff8888888888888, 0xfff8888888888888 },
+ { 0x7ff8888888888888, 0x7ff0000000000000, 0x7ff8888888888888, 0x7ff8888888888888 },
+ { 0x7ff8888888888888, 0x0000000000000000, 0x7ff8888888888888, 0x7ff8888888888888 },
+ { 0x7ff8888888888888, 0x0001111111111111, 0x7ff8888888888888, 0x7ff8888888888888 },
+ { 0x7ff8888888888888, 0x3ff0000000000000, 0x7ff8888888888888, 0x7ff8888888888888 },
+ { 0x7ff0000000000000, 0x7ff9999999999999, 0x7ff9999999999999, 0x7ff9999999999999 },
+ { 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff8000000000000 },
+ { 0x7ff0000000000000, 0xfff0000000000000, 0x7ff8000000000000, 0x7ff0000000000000 },
+ { 0x7ff0000000000000, 0x0000000000000000, 0x7ff0000000000000, 0x7ff0000000000000 },
+ { 0x7ff0000000000000, 0x8000000000000000, 0x7ff0000000000000, 0x7ff0000000000000 },
+ { 0x7ff0000000000000, 0x8002222222222222, 0x7ff0000000000000, 0x7ff0000000000000 },
+ { 0x7ff0000000000000, 0xc002222222222222, 0x7ff0000000000000, 0x7ff0000000000000 },
+ { 0x0000000000000000, 0x7ff9999999999999, 0x7ff9999999999999, 0x7ff9999999999999 },
+ { 0x0000000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0xfff0000000000000 },
+ { 0x8000000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0xfff0000000000000 },
+ { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 },
+ { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 },
+ { 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000 },
+ { 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000 },
+ { 0x8002222222222222, 0x0001111111111111, 0x8001111111111111, 0x8003333333333333 },
+ { 0x0000022222222222, 0x0000111111111111, 0x0000133333333333, 0x80000eeeeeeeeeef },
+ { 0x401ffffffbfffefe, 0x406b8265196bd89e, 0x406c8265194bd896, 0xc06a8265198bd8a6 },
+ { 0x4030020000000004, 0xbf110001ffffffff, 0x403001fbbfff8004, 0x4030020440008004 },
+ { 0x3fdfffffffffffff, 0x3fe0000000000000, 0x3ff0000000000000, 0xbc90000000000000 },
+};
+
+int test13(long arg)
+{
+ long i;
+ unsigned long results[2];
+ struct addvals *vp = addvals;
+
+ set_fpscr(FPS_RN_NEAR);
+ for (i = 0; i < sizeof(addvals) / sizeof(addvals[0]); ++i, ++vp) {
+ asm("lfd 5,0(%0); lfd 6,8(%0); fadd 7,5,6; fsub 8,5,6; stfd 7,0(%1); stfd 8,8(%1)"
+ : : "b" (&vp->val_a), "b" (results) : "memory");
+ if (results[0] != vp->sum || results[1] != vp->diff) {
+ print_hex(i, 2, " ");
+ print_hex(results[0], 16, " ");
+ print_hex(results[1], 16, "\r\n");
+ return i + 1;
+ }
+ }
+ return 0;
+}
+
+int fpu_test_13(void)
+{
+ enable_fp();
+ return trapit(0, test13);
+}
+
+struct addvals sp_addvals[] = {
+ { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 },
+ { 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000 },
+ { 0x3fdfffffffffffff, 0x0000000000000000, 0x3fe0000000000000, 0x3fe0000000000000 },
+ { 0x3ff0000000000000, 0x3ff0000000000000, 0x4000000000000000, 0x0000000000000000 },
+ { 0xbff0000000000000, 0xbff0000000000000, 0xc000000000000000, 0x0000000000000000 },
+ { 0x402123456789abcd, 0x4021000000000000, 0x403111a2c0000000, 0x3fb1a2b000000000 },
+ { 0x4061200000000000, 0x406123456789abcd, 0x407121a2c0000000, 0xbfba2b0000000000 },
+ { 0x4061230000000000, 0x3fa4560000000000, 0x4061244560000000, 0x406121baa0000000 },
+ { 0xc061230000000000, 0x3fa4560000000000, 0xc06121baa0000000, 0xc061244560000000 },
+ { 0x4061230000000000, 0xbfa4560000000000, 0x406121baa0000000, 0x4061244560000000 },
+ { 0xc061230000000000, 0xbfa4560000000000, 0xc061244560000000, 0xc06121baa0000000 },
+ { 0x3fa1230000000000, 0x4064560000000000, 0x4064571240000000, 0xc06454edc0000000 },
+ { 0xbfa1230000000000, 0x4064560000000000, 0x406454edc0000000, 0xc064571240000000 },
+ { 0x3fa1230000000000, 0xc064560000000000, 0xc06454edc0000000, 0x4064571240000000 },
+ { 0xbfa1230000000000, 0xc064560000000000, 0xc064571240000000, 0x406454edc0000000 },
+ { 0x6780000000000001, 0x6470000000000000, 0x7ff0000000000000, 0x7ff8000000000000 },
+ { 0x6780000000000001, 0x6460000000000000, 0x7ff0000000000000, 0x7ff8000000000000 },
+ { 0x6780000000000001, 0x6450000000000000, 0x7ff0000000000000, 0x7ff8000000000000 },
+ { 0x6780000000000001, 0x6440000000000000, 0x7ff0000000000000, 0x7ff8000000000000 },
+ { 0x7ff8888888888888, 0x7ff9999999999999, 0x7ff8888880000000, 0x7ff8888880000000 },
+ { 0xfff8888888888888, 0x7ff9999999999999, 0xfff8888880000000, 0xfff8888880000000 },
+ { 0x7ff8888888888888, 0x7ff0000000000000, 0x7ff8888880000000, 0x7ff8888880000000 },
+ { 0x7ff8888888888888, 0x0000000000000000, 0x7ff8888880000000, 0x7ff8888880000000 },
+ { 0x7ff8888888888888, 0x0001111111111111, 0x7ff8888880000000, 0x7ff8888880000000 },
+ { 0x7ff8888888888888, 0x3ff0000000000000, 0x7ff8888880000000, 0x7ff8888880000000 },
+ { 0x7ff0000000000000, 0x7ff9999999999999, 0x7ff9999980000000, 0x7ff9999980000000 },
+ { 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff8000000000000 },
+ { 0x7ff0000000000000, 0xfff0000000000000, 0x7ff8000000000000, 0x7ff0000000000000 },
+ { 0x7ff0000000000000, 0x0000000000000000, 0x7ff0000000000000, 0x7ff0000000000000 },
+ { 0x7ff0000000000000, 0x8000000000000000, 0x7ff0000000000000, 0x7ff0000000000000 },
+ { 0x7ff0000000000000, 0x8002222222222222, 0x7ff0000000000000, 0x7ff0000000000000 },
+ { 0x7ff0000000000000, 0xc002222222222222, 0x7ff0000000000000, 0x7ff0000000000000 },
+ { 0x0000000000000000, 0x7ff9999999999999, 0x7ff9999980000000, 0x7ff9999980000000 },
+ { 0x0000000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0xfff0000000000000 },
+ { 0x8000000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0xfff0000000000000 },
+ { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 },
+ { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 },
+ { 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000 },
+ { 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000 },
+ { 0x8002222222222222, 0x0001111111111111, 0x0000000000000000, 0x8000000000000000 },
+ { 0x0000022222222222, 0x0000111111111111, 0x0000000000000000, 0x0000000000000000 },
+ { 0x47dc000020000000, 0x47ec03ffe0000000, 0x7ff0000000000000, 0xc7dc07ffa0000000 },
+ { 0x47dbffffe0000000, 0x47eff7ffe0000000, 0x7ff0000000000000, 0xc7e1f80000000000 },
+ { 0x47efffffc0000000, 0xc7efffffc0000000, 0x0000000000000000, 0x7ff0000000000000 },
+};
+
+int test14(long arg)
+{
+ long i;
+ unsigned long results[2];
+ struct addvals *vp = sp_addvals;
+
+ set_fpscr(FPS_RN_NEAR);
+ for (i = 0; i < sizeof(sp_addvals) / sizeof(sp_addvals[0]); ++i, ++vp) {
+ asm("lfd 5,0(%0); frsp 5,5; lfd 6,8(%0); frsp 6,6; "
+ "fadds 7,5,6; fsubs 8,5,6; stfd 7,0(%1); stfd 8,8(%1)"
+ : : "b" (&vp->val_a), "b" (results) : "memory");
+ if (results[0] != vp->sum || results[1] != vp->diff) {
+ print_hex(i, 2, " ");
+ print_hex(results[0], 16, " ");
+ print_hex(results[1], 16, "\r\n");
+ return i + 1;
+ }
+ }
+ return 0;
+}
+
+int fpu_test_14(void)
+{
+ enable_fp();
+ return trapit(0, test14);
+}
+
int fail = 0;
void do_test(int num, int (*test)(void))
do_test(10, fpu_test_10);
do_test(11, fpu_test_11);
do_test(12, fpu_test_12);
+ do_test(13, fpu_test_13);
+ do_test(14, fpu_test_14);
return fail;
}