FPU: Implement fmr and related instructions

author Paul Mackerras <paulus@ozlabs.org>

Wed, 15 Jul 2020 04:28:06 +0000 (14:28 +1000)

committer Paul Mackerras <paulus@ozlabs.org>

Thu, 3 Sep 2020 07:44:37 +0000 (17:44 +1000)
author Paul Mackerras <paulus@ozlabs.org>
Wed, 15 Jul 2020 04:28:06 +0000 (14:28 +1000)
committer Paul Mackerras <paulus@ozlabs.org>
Thu, 3 Sep 2020 07:44:37 +0000 (17:44 +1000)
diff --git a/decode1.vhdl b/decode1.vhdl

index 343c0c32f7245c702b47604761c54f6682c130c8..5f5fb805387d47497f8e86873717aebc7d309ede 100644 (file)
--- a/decode1.vhdl
+++ b/decode1.vhdl
@@ -428,6 +428,11 @@ architecture behaviour of decode1 is
          2#011000100#  => (FPU,   OP_FPOP,       NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0'), --  4/6=mtfsfi
          2#011110010#  => (FPU,   OP_FPOP_I,     NONE, FRB,  NONE, FRT,  '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0'), -- 18/7=mffs family
          2#011110110#  => (FPU,   OP_FPOP_I,     NONE, FRB,  NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0'), -- 22/7=mtfsf
+        2#100000000#  => (FPU,   OP_FPOP,       FRA,  FRB,  NONE, FRT,  '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0'), --  0/8=fcpsgn
+        2#100000001#  => (FPU,   OP_FPOP,       NONE, FRB,  NONE, FRT,  '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0'), --  1/8=fneg
+        2#100000010#  => (FPU,   OP_FPOP,       NONE, FRB,  NONE, FRT,  '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0'), --  2/8=fmr
+        2#100000100#  => (FPU,   OP_FPOP,       NONE, FRB,  NONE, FRT,  '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0'), --  4/8=fnabs
+        2#100001000#  => (FPU,   OP_FPOP,       NONE, FRB,  NONE, FRT,  '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0'), --  8/8=fabs
          others => illegal_inst
          );
  
diff --git a/decode2.vhdl b/decode2.vhdl

index 8b2ab8c7ed63193f2ce32e2503f2d6d01e5b0e4b..ec8232f570a1226db6d9170af3dffd68f2e039a2 100644 (file)
--- a/decode2.vhdl
+++ b/decode2.vhdl
@@ -80,6 +80,8 @@ architecture behaviour of decode2 is
              return (is_fast_spr(ispr), ispr, reg_data);
          elsif t = CIA then
              return ('0', (others => '0'), instr_addr);
+        elsif HAS_FPU and t = FRA then
+            return ('1', fpr_to_gspr(insn_fra(insn_in)), reg_data);
          else
              return ('0', (others => '0'), (others => '0'));
          end if;
@@ -300,6 +302,7 @@ begin
      end process;
  
      r_out.read1_reg <= d_in.ispr1 when d_in.decode.input_reg_a = SPR
+                       else fpr_to_gspr(insn_fra(d_in.insn)) when d_in.decode.input_reg_a = FRA and HAS_FPU
                         else gpr_to_gspr(insn_ra(d_in.insn));
      r_out.read2_reg <= d_in.ispr2 when d_in.decode.input_reg_b = SPR
                         else fpr_to_gspr(insn_frb(d_in.insn)) when d_in.decode.input_reg_b = FRB and HAS_FPU
diff --git a/decode_types.vhdl b/decode_types.vhdl

index 5eaef5044bea934d32057672195246816f19c40b..08fdc4a2fc95420d5ab88287c7d118336882b3da 100644 (file)
--- a/decode_types.vhdl
+++ b/decode_types.vhdl
@@ -23,7 +23,7 @@ package decode_types is
                           OP_BCD, OP_ADDG6S,
                           OP_FETCH_FAILED
                          );
-    type input_reg_a_t is (NONE, RA, RA_OR_ZERO, SPR, CIA);
+    type input_reg_a_t is (NONE, RA, RA_OR_ZERO, SPR, CIA, FRA);
      type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD,
                             CONST_DXHI4, CONST_DS, CONST_M1, CONST_SH, CONST_SH32, SPR, FRB);
      type input_reg_c_t is (NONE, RS, RCR, FRS);
diff --git a/fpu.vhdl b/fpu.vhdl

index 047bf2d0cecac030dc199193509fba5dc27acb5c..3711b35a4cd4a3851293ef04d6f22e51112734bf 100644 (file)
--- a/fpu.vhdl
+++ b/fpu.vhdl
@@ -24,9 +24,20 @@ entity fpu is
  end entity fpu;
  
  architecture behaviour of fpu is
+    type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
+
+    constant EXP_BITS : natural := 13;
+
+    type fpu_reg_type is record
+        class    : fp_number_class;
+        negative : std_ulogic;
+        exponent : signed(EXP_BITS-1 downto 0);         -- unbiased
+        mantissa : std_ulogic_vector(63 downto 0);      -- 10.54 format
+    end record;
  
      type state_t is (IDLE,
-                     DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF);
+                     DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
+                     DO_FMR);
  
      type reg_type is record
          state        : state_t;
@@ -41,9 +52,14 @@ architecture behaviour of fpu is
          is_cmp       : std_ulogic;
          single_prec  : std_ulogic;
          fpscr        : std_ulogic_vector(31 downto 0);
-        b            : std_ulogic_vector(63 downto 0);
+        a            : fpu_reg_type;
+        b            : fpu_reg_type;
          r            : std_ulogic_vector(63 downto 0);
+        result_sign  : std_ulogic;
+        result_class : fp_number_class;
+        result_exp   : signed(EXP_BITS-1 downto 0);
          writing_back : std_ulogic;
+        int_result   : std_ulogic;
          cr_result    : std_ulogic_vector(3 downto 0);
          cr_mask      : std_ulogic_vector(7 downto 0);
      end record;
@@ -51,6 +67,72 @@ architecture behaviour of fpu is
      signal r, rin : reg_type;
  
      signal fp_result     : std_ulogic_vector(63 downto 0);
+    signal opsel_r       : std_ulogic_vector(1 downto 0);
+    signal result        : std_ulogic_vector(63 downto 0);
+
+    -- Split a DP floating-point number into components and work out its class.
+    -- If is_int = 1, the input is considered an integer
+    function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_int: std_ulogic) return fpu_reg_type is
+        variable r       : fpu_reg_type;
+        variable exp_nz  : std_ulogic;
+        variable exp_ao  : std_ulogic;
+        variable frac_nz : std_ulogic;
+        variable cls     : std_ulogic_vector(2 downto 0);
+    begin
+        r.negative := fpr(63);
+        exp_nz := or (fpr(62 downto 52));
+        exp_ao := and (fpr(62 downto 52));
+        frac_nz := or (fpr(51 downto 0));
+        if is_int = '0' then
+            r.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
+            if exp_nz = '0' then
+                r.exponent := to_signed(-1022, EXP_BITS);
+            end if;
+            r.mantissa := "000000000" & exp_nz & fpr(51 downto 0) & "00";
+            cls := exp_ao & exp_nz & frac_nz;
+            case cls is
+                when "000"  => r.class := ZERO;
+                when "001"  => r.class := FINITE;    -- denormalized
+                when "010"  => r.class := FINITE;
+                when "011"  => r.class := FINITE;
+                when "110"  => r.class := INFINITY;
+                when others => r.class := NAN;
+            end case;
+        else
+            r.mantissa := fpr;
+            r.exponent := (others => '0');
+            if (fpr(63) or exp_nz or frac_nz) = '1' then
+                r.class := FINITE;
+            else
+                r.class := ZERO;
+            end if;
+        end if;
+        return r;
+    end;
+
+    -- Construct a DP floating-point result from components
+    function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
+                     mantissa: std_ulogic_vector) return std_ulogic_vector is
+        variable result : std_ulogic_vector(63 downto 0);
+    begin
+        result := (others => '0');
+        result(63) := sign;
+        case class is
+            when ZERO =>
+            when FINITE =>
+                if mantissa(54) = '1' then
+                    -- normalized number
+                    result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
+                end if;
+                result(51 downto 0) := mantissa(53 downto 2);
+            when INFINITY =>
+                result(62 downto 52) := "11111111111";
+            when NAN =>
+                result(62 downto 52) := "11111111111";
+                result(51 downto 0) := mantissa(53 downto 2);
+        end case;
+        return result;
+    end;
  
  begin
      fpu_0: process(clk)
@@ -85,14 +167,18 @@ begin
  
      fpu_1: process(all)
          variable v           : reg_type;
+        variable adec        : fpu_reg_type;
+        variable bdec        : fpu_reg_type;
          variable fpscr_mask  : std_ulogic_vector(31 downto 0);
          variable illegal     : std_ulogic;
          variable j, k        : integer;
          variable flm         : std_ulogic_vector(7 downto 0);
+        variable int_input   : std_ulogic;
      begin
          v := r;
          illegal := '0';
          v.busy := '0';
+        int_input := '0';
  
          -- capture incoming instruction
          if e_in.valid = '1' then
@@ -101,6 +187,7 @@ begin
              v.fe_mode := or (e_in.fe_mode);
              v.dest_fpr := e_in.frt;
              v.single_prec := e_in.single;
+            v.int_result := '0';
              v.rc := e_in.rc;
              v.is_cmp := e_in.out_cr;
              if e_in.out_cr = '0' then
@@ -108,11 +195,19 @@ begin
              else
                  v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(e_in.insn))));
              end if;
-            v.b := e_in.frb;
+            int_input := '0';
+            if e_in.op = OP_FPOP_I then
+                int_input := '1';
+            end if;
+            adec := decode_dp(e_in.fra, int_input);
+            bdec := decode_dp(e_in.frb, int_input);
+            v.a := adec;
+            v.b := bdec;
          end if;
  
          v.writing_back := '0';
          v.instr_done := '0';
+        opsel_r <= "00";
          fpscr_mask := (others => '1');
  
          case r.state is
@@ -133,6 +228,8 @@ begin
                              else
                                  v.state := DO_MTFSF;
                              end if;
+                        when "01000" =>
+                            v.state := DO_FMR;
                          when others =>
                              illegal := '1';
                      end case;
@@ -177,7 +274,9 @@ begin
                  v.state := IDLE;
  
              when DO_MFFS =>
+                v.int_result := '1';
                  v.writing_back := '1';
+                opsel_r <= "10";
                  case r.insn(20 downto 16) is
                      when "00000" =>
                          -- mffs
@@ -191,7 +290,7 @@ begin
                          -- mffscrn
                          fpscr_mask := x"000000FF";
                          v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
-                            r.b(FPSCR_RN+1 downto FPSCR_RN);
+                            r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
                      when "10111" =>
                          -- mffscrni
                          fpscr_mask := x"000000FF";
@@ -216,19 +315,48 @@ begin
                  for i in 0 to 7 loop
                      k := i * 4;
                      if flm(i) = '1' then
-                        v.fpscr(k + 3 downto k) := r.b(k + 3 downto k);
+                        v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
                      end if;
                  end loop;
                  v.instr_done := '1';
                  v.state := IDLE;
  
+            when DO_FMR =>
+                v.result_class := r.b.class;
+                v.result_exp := r.b.exponent;
+                if r.insn(9) = '1' then
+                    v.result_sign := '0';              -- fabs
+                elsif r.insn(8) = '1' then
+                    v.result_sign := '1';              -- fnabs
+                elsif r.insn(7) = '1' then
+                    v.result_sign := r.b.negative;     -- fmr
+                elsif r.insn(6) = '1' then
+                    v.result_sign := not r.b.negative; -- fneg
+                else
+                    v.result_sign := r.a.negative;     -- fcpsgn
+                end if;
+                v.writing_back := '1';
+                v.instr_done := '1';
+                v.state := IDLE;
+
          end case;
  
          -- Data path.
-        -- Just enough to read FPSCR for now.
-        v.r := x"00000000" & (r.fpscr and fpscr_mask);
+        case opsel_r is
+            when "00" =>
+                result <= r.b.mantissa;
+            when "10" =>
+                result <= x"00000000" & (r.fpscr and fpscr_mask);
+            when others =>
+                result <= (others => '0');
+        end case;
+        v.r := result;
  
-        fp_result <= r.r;
+        if r.int_result = '1' then
+            fp_result <= r.r;
+        else
+            fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r);
+        end if;
  
          v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
                               (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
diff --git a/tests/fpu/fpu.c b/tests/fpu/fpu.c

index f9c4245f7d1739cdfcd617d2d26dd386035ffffa..46668f8cd70c048341873671831b4a77812f1093 100644 (file)
--- a/tests/fpu/fpu.c
+++ b/tests/fpu/fpu.c
@@ -438,6 +438,39 @@ int fpu_test_5(void)
         return 0;
  }
  
+#define SIGN   0x8000000000000000ul
+
+int test6(long arg)
+{
+       long i;
+       unsigned long results[6];
+       unsigned long v;
+
+       for (i = 0; i < sizeof(sp_dp_equiv) / sizeof(sp_dp_equiv[0]); ++i) {
+               v = sp_dp_equiv[i].dp;
+               asm("lfd%U0%X0 3,%0; fmr 6,3; fneg 7,3; stfd 6,0(%1); stfd 7,8(%1)"
+                   : : "m" (sp_dp_equiv[i].dp), "b" (results) : "memory");
+               asm("fabs 9,6; fnabs 10,6; stfd 9,16(%0); stfd 10,24(%0)"
+                   : : "b" (results) : "memory");
+               asm("fcpsgn 4,9,3; stfd 4,32(%0); fcpsgn 5,10,3; stfd 5,40(%0)"
+                   : : "b" (results) : "memory");
+               if (results[0] != v ||
+                   results[1] != (v ^ SIGN) ||
+                   results[2] != (v & ~SIGN) ||
+                   results[3] != (v | SIGN) ||
+                   results[4] != (v & ~SIGN) ||
+                   results[5] != (v | SIGN))
+                       return i + 1;
+       }
+       return 0;
+}
+
+int fpu_test_6(void)
+{
+       enable_fp();
+       return trapit(0, test6);
+}
+
  int fail = 0;
  
  void do_test(int num, int (*test)(void))
@@ -469,6 +502,7 @@ int main(void)
         do_test(3, fpu_test_3);
         do_test(4, fpu_test_4);
         do_test(5, fpu_test_5);
+       do_test(6, fpu_test_6);
  
         return fail;
  }
diff --git a/tests/test_fpu.bin b/tests/test_fpu.bin

index 6bac86156f19b99c44efc42659d3d5f6219d6486..4fb260e1d5a3e4f37deea64098b82e55151aceb2 100755 (executable)

Binary files a/tests/test_fpu.bin and b/tests/test_fpu.bin differ
diff --git a/tests/test_fpu.console_out b/tests/test_fpu.console_out

index 99d32e683af12c0e16b67edbdb42e543c0cee358..a49bb9b00543d7ccbc2c1af7af293d3fc84464ef 100644 (file)
--- a/tests/test_fpu.console_out
+++ b/tests/test_fpu.console_out
@@ -3,3 +3,4 @@ test 02:PASS
  test 03:PASS\r
  test 04:PASS\r
  test 05:PASS\r
+test 06:PASS\r
author	Paul Mackerras <paulus@ozlabs.org>
	Wed, 15 Jul 2020 04:28:06 +0000 (14:28 +1000)
committer	Paul Mackerras <paulus@ozlabs.org>
	Thu, 3 Sep 2020 07:44:37 +0000 (17:44 +1000)
decode1.vhdl		patch \| blob \| history
decode2.vhdl		patch \| blob \| history
decode_types.vhdl		patch \| blob \| history
fpu.vhdl		patch \| blob \| history
tests/fpu/fpu.c		patch \| blob \| history
tests/test_fpu.bin		patch \| blob \| history
tests/test_fpu.console_out		patch \| blob \| history