From: Paul Mackerras Date: Wed, 29 Jul 2020 07:34:03 +0000 (+1000) Subject: FPU: Implement fre[s] X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=4ad5ab92038412d46ef0dc2477e079219b8d7ced;p=microwatt.git FPU: Implement fre[s] This just returns the value from the inverse lookup table. The result is accurate to better than one part in 512 (the architecture requires 1/256). This also adds a simple test, which relies on the particular values in the inverse lookup table, so it is not a general test. Signed-off-by: Paul Mackerras --- diff --git a/decode1.vhdl b/decode1.vhdl index ddcbb3c..c0c3465 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -419,6 +419,7 @@ architecture behaviour of decode1 is 2#10010# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fdivs 2#10100# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fsubs 2#10101# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fadds + 2#11000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fres 2#11001# => (FPU, OP_FPOP, FRA, NONE, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fmuls others => illegal_inst ); @@ -473,6 +474,7 @@ architecture behaviour of decode1 is 2#0010# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fdiv 2#0100# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fsub 2#0101# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fadd + 2#1000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fre 2#1001# => (FPU, OP_FPOP, FRA, NONE, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fmul others => illegal_inst ); diff --git a/fpu.vhdl b/fpu.vhdl index 2584e1c..fee1776 100644 --- a/fpu.vhdl +++ b/fpu.vhdl @@ -41,11 +41,13 @@ architecture behaviour of fpu is DO_FCFID, DO_FCTI, DO_FRSP, DO_FRI, DO_FADD, DO_FMUL, DO_FDIV, + DO_FRE, FRI_1, ADD_SHIFT, ADD_2, ADD_3, MULT_1, LOOKUP, DIV_2, DIV_3, DIV_4, DIV_5, DIV_6, + FRE_1, INT_SHIFT, INT_ROUND, INT_ISHIFT, INT_FINAL, INT_CHECK, INT_OFLOW, FINISH, NORMALIZE, @@ -639,6 +641,8 @@ begin v.state := DO_FDIV; when "10100" | "10101" => v.state := DO_FADD; + when "11000" => + v.state := DO_FRE; when "11001" => v.is_multiply := '1'; v.state := DO_FMUL; @@ -1041,6 +1045,36 @@ begin arith_done := '1'; end if; + when DO_FRE => + opsel_a <= AIN_B; + v.result_class := r.b.class; + v.result_sign := r.b.negative; + v.fpscr(FPSCR_FR) := '0'; + v.fpscr(FPSCR_FI) := '0'; + if r.b.class = NAN and r.b.mantissa(53) = '0' then + v.fpscr(FPSCR_VXSNAN) := '1'; + invalid := '1'; + end if; + case r.b.class is + when FINITE => + v.result_exp := - r.b.exponent; + if r.b.mantissa(54) = '0' then + v.state := RENORM_B; + else + v.state := FRE_1; + end if; + when NAN => + -- result is B + arith_done := '1'; + when INFINITY => + v.result_class := ZERO; + arith_done := '1'; + when ZERO => + v.result_class := INFINITY; + zero_divide := '1'; + arith_done := '1'; + end case; + when RENORM_A => renormalize := '1'; v.state := RENORM_A2; @@ -1149,7 +1183,11 @@ begin opsel_a <= AIN_B; -- wait one cycle for inverse_table[B] lookup v.first := '1'; - v.state := DIV_2; + if r.insn(4) = '0' then + v.state := DIV_2; + else + v.state := FRE_1; + end if; when DIV_2 => -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y @@ -1221,6 +1259,12 @@ begin end if; v.state := FINISH; + when FRE_1 => + opsel_r <= RES_MISC; + misc_sel <= "0111"; + v.shift := to_signed(1, EXP_BITS); + v.state := NORMALIZE; + when INT_SHIFT => opsel_r <= RES_SHIFT; set_x := '1'; @@ -1609,6 +1653,8 @@ begin when "0110" => -- fmrgew result misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32); + when "0111" => + misc := 10x"000" & inverse_est & 35x"000000000"; when "1000" => -- max positive result for fctiw[z] misc := x"000000007fffffff"; diff --git a/tests/fpu/fpu.c b/tests/fpu/fpu.c index cbb0ee2..e62ce27 100644 --- a/tests/fpu/fpu.c +++ b/tests/fpu/fpu.c @@ -1111,6 +1111,43 @@ int fpu_test_17(void) return trapit(0, test17); } +struct recipvals { + unsigned long val; + unsigned long inv; +} recipvals[] = { + { 0x0000000000000000, 0x7ff0000000000000 }, + { 0xfff0000000000000, 0x8000000000000000 }, + { 0x3ff0000000000000, 0x3feff00400000000 }, + { 0xbff0000000000000, 0xbfeff00400000000 }, + { 0x4008000000000000, 0x3fd54e3800000000 }, + { 0xc03ffffffdffffbf, 0xbfa0040000000000 }, +}; + +int test18(long arg) +{ + long i; + unsigned long result; + struct recipvals *vp = recipvals; + + set_fpscr(FPS_RN_NEAR); + for (i = 0; i < sizeof(recipvals) / sizeof(recipvals[0]); ++i, ++vp) { + asm("lfd 6,0(%0); fre 7,6; stfd 7,0(%1)" + : : "b" (&vp->val), "b" (&result) : "memory"); + if (result != vp->inv) { + print_hex(i, 2, " "); + print_hex(result, 16, " "); + return i + 1; + } + } + return 0; +} + +int fpu_test_18(void) +{ + enable_fp(); + return trapit(0, test18); +} + int fail = 0; void do_test(int num, int (*test)(void)) @@ -1153,6 +1190,7 @@ int main(void) do_test(15, fpu_test_15); do_test(16, fpu_test_16); do_test(17, fpu_test_17); + do_test(18, fpu_test_18); return fail; } diff --git a/tests/test_fpu.bin b/tests/test_fpu.bin index dc5af29..572aad0 100755 Binary files a/tests/test_fpu.bin and b/tests/test_fpu.bin differ diff --git a/tests/test_fpu.console_out b/tests/test_fpu.console_out index a8e2dcb..a5c08ea 100644 --- a/tests/test_fpu.console_out +++ b/tests/test_fpu.console_out @@ -15,3 +15,4 @@ test 14:PASS test 15:PASS test 16:PASS test 17:PASS +test 18:PASS