From: Paul Mackerras Date: Wed, 22 Jul 2020 06:13:12 +0000 (+1000) Subject: FPU: Implement floating round-to-integer instructions X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=0ad2aa30149d0a6e2d3082e841f6fe5079209067;p=microwatt.git FPU: Implement floating round-to-integer instructions This implements frin, friz, frip and frim, and adds tests for them. Signed-off-by: Paul Mackerras --- diff --git a/decode1.vhdl b/decode1.vhdl index c659e3e..a42899d 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -441,6 +441,10 @@ architecture behaviour of decode1 is 2#100000010# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 2/8=fmr 2#100000100# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 4/8=fnabs 2#100001000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 8/8=fabs + 2#100001100# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 12/8=frin + 2#100001101# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 13/8=friz + 2#100001110# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 14/8=frip + 2#100001111# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 15/8=frim 2#110000000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- 0/12=frsp 2#111000000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 0/14=fctiw 2#111000100# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 4/14=fctiwu diff --git a/fpu.vhdl b/fpu.vhdl index 6301fa7..371fdc5 100644 --- a/fpu.vhdl +++ b/fpu.vhdl @@ -39,7 +39,8 @@ architecture behaviour of fpu is DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF, DO_FMR, DO_FCFID, DO_FCTI, - DO_FRSP, + DO_FRSP, DO_FRI, + FRI_1, INT_SHIFT, INT_ROUND, INT_ISHIFT, INT_FINAL, INT_CHECK, INT_OFLOW, FINISH, NORMALIZE, @@ -461,7 +462,11 @@ begin v.state := DO_MTFSF; end if; when "01000" => - v.state := DO_FMR; + if e_in.insn(9 downto 8) /= "11" then + v.state := DO_FMR; + else + v.state := DO_FRI; + end if; when "01100" => v.state := DO_FRSP; when "01110" => @@ -587,6 +592,31 @@ begin v.instr_done := '1'; v.state := IDLE; + when DO_FRI => -- fri[nzpm] + opsel_a <= AIN_B; + v.result_class := r.b.class; + v.result_sign := r.b.negative; + v.result_exp := r.b.exponent; + v.fpscr(FPSCR_FR) := '0'; + v.fpscr(FPSCR_FI) := '0'; + if r.b.class = NAN and r.b.mantissa(53) = '0' then + -- Signalling NAN + v.fpscr(FPSCR_VXSNAN) := '1'; + invalid := '1'; + end if; + if r.b.class = FINITE then + if r.b.exponent >= to_signed(52, EXP_BITS) then + -- integer already, no rounding required + arith_done := '1'; + else + v.shift := r.b.exponent - to_signed(52, EXP_BITS); + v.state := FRI_1; + v.round_mode := '1' & r.insn(7 downto 6); + end if; + else + arith_done := '1'; + end if; + when DO_FRSP => opsel_a <= AIN_B; v.result_class := r.b.class; @@ -749,6 +779,12 @@ begin invalid := '1'; arith_done := '1'; + when FRI_1 => + opsel_r <= RES_SHIFT; + set_x := '1'; + v.shift := to_signed(-2, EXP_BITS); + v.state := ROUNDING; + when FINISH => if r.r(63 downto 54) /= "0000000001" then renormalize := '1'; diff --git a/tests/fpu/fpu.c b/tests/fpu/fpu.c index 3c6a9bd..d24fe14 100644 --- a/tests/fpu/fpu.c +++ b/tests/fpu/fpu.c @@ -753,6 +753,76 @@ int fpu_test_10(void) return trapit(0, test10); } +struct frivals { + unsigned long val; + unsigned long nval; + unsigned long zval; + unsigned long pval; + unsigned long mval; +} frivals[] = { + { 0x0000000000000000, 0, 0, 0, 0 }, + { 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000 }, + { 0x3fdfffffffffffff, 0, 0, 0x3ff0000000000000, 0 }, + { 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000 }, + { 0xbff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0xbff0000000000000 }, + { 0x402123456789abcd, 0x4022000000000000, 0x4020000000000000, 0x4022000000000000, 0x4020000000000000 }, + { 0x406123456789abcd, 0x4061200000000000, 0x4061200000000000, 0x4061400000000000, 0x4061200000000000 }, + { 0x409123456789abcd, 0x4091240000000000, 0x4091200000000000, 0x4091240000000000, 0x4091200000000000 }, + { 0x41c123456789abcd, 0x41c1234567800000, 0x41c1234567800000, 0x41c1234568000000, 0x41c1234567800000 }, + { 0x41d123456789abcd, 0x41d1234567800000, 0x41d1234567800000, 0x41d1234567c00000, 0x41d1234567800000 }, + { 0x41e123456789abcd, 0x41e1234567800000, 0x41e1234567800000, 0x41e1234567a00000, 0x41e1234567800000 }, + { 0x41f123456789abcd, 0x41f1234567900000, 0x41f1234567800000, 0x41f1234567900000, 0x41f1234567800000 }, + { 0xc1f123456789abcd, 0xc1f1234567900000, 0xc1f1234567800000, 0xc1f1234567800000, 0xc1f1234567900000 }, + { 0xc1f1234567880000, 0xc1f1234567900000, 0xc1f1234567800000, 0xc1f1234567800000, 0xc1f1234567900000 }, + { 0x432123456789abcd, 0x432123456789abce, 0x432123456789abcc, 0x432123456789abce, 0x432123456789abcc }, + { 0x433123456789abcd, 0x433123456789abcd, 0x433123456789abcd, 0x433123456789abcd, 0x433123456789abcd }, + { 0x434123456789abcd, 0x434123456789abcd, 0x434123456789abcd, 0x434123456789abcd, 0x434123456789abcd }, + { 0x43c123456789abcd, 0x43c123456789abcd, 0x43c123456789abcd, 0x43c123456789abcd, 0x43c123456789abcd }, + { 0x43d123456789abcd, 0x43d123456789abcd, 0x43d123456789abcd, 0x43d123456789abcd, 0x43d123456789abcd }, + { 0x43e123456789abcd, 0x43e123456789abcd, 0x43e123456789abcd, 0x43e123456789abcd, 0x43e123456789abcd }, + { 0x43f123456789abcd, 0x43f123456789abcd, 0x43f123456789abcd, 0x43f123456789abcd, 0x43f123456789abcd }, + { 0xc3f123456789abcd, 0xc3f123456789abcd, 0xc3f123456789abcd, 0xc3f123456789abcd, 0xc3f123456789abcd }, + { 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff0000000000000 }, + { 0xfff0000000000000, 0xfff0000000000000, 0xfff0000000000000, 0xfff0000000000000, 0xfff0000000000000 }, + { 0x7ff123456789abcd, 0x7ff923456789abcd, 0x7ff923456789abcd, 0x7ff923456789abcd, 0x7ff923456789abcd }, + { 0xfff923456789abcd, 0xfff923456789abcd, 0xfff923456789abcd, 0xfff923456789abcd, 0xfff923456789abcd }, +}; + +int test11(long arg) +{ + long i; + unsigned long results[4]; + struct frivals *vp = frivals; + + for (i = 0; i < sizeof(frivals) / sizeof(frivals[0]); ++i, ++vp) { + set_fpscr(FPS_RN_FLOOR); + asm("lfd 3,0(%0); frin 4,3; stfd 4,0(%1)" + : : "b" (&vp->val), "b" (results) : "memory"); + set_fpscr(FPS_RN_NEAR); + asm("friz 5,3; stfd 5,8(%0)" : : "b" (results) : "memory"); + set_fpscr(FPS_RN_ZERO); + asm("frip 5,3; stfd 5,16(%0)" : : "b" (results) : "memory"); + set_fpscr(FPS_RN_CEIL); + asm("frim 5,3; stfd 5,24(%0)" : : "b" (results) : "memory"); + if (results[0] != vp->nval || results[1] != vp->zval || + results[2] != vp->pval || results[3] != vp->mval) { + print_hex(i, 2, "\r\n"); + print_hex(results[0], 16, " "); + print_hex(results[1], 16, " "); + print_hex(results[2], 16, " "); + print_hex(results[3], 16, " "); + return i + 1; + } + } + return 0; +} + +int fpu_test_11(void) +{ + enable_fp(); + return trapit(0, test11); +} + int fail = 0; void do_test(int num, int (*test)(void)) @@ -788,6 +858,7 @@ int main(void) do_test(8, fpu_test_8); do_test(9, fpu_test_9); do_test(10, fpu_test_10); + do_test(11, fpu_test_11); return fail; } diff --git a/tests/test_fpu.bin b/tests/test_fpu.bin index 81d1854..d2320cd 100755 Binary files a/tests/test_fpu.bin and b/tests/test_fpu.bin differ diff --git a/tests/test_fpu.console_out b/tests/test_fpu.console_out index 3e84260..3a5a601 100644 --- a/tests/test_fpu.console_out +++ b/tests/test_fpu.console_out @@ -8,3 +8,4 @@ test 07:PASS test 08:PASS test 09:PASS test 10:PASS +test 11:PASS