fpu.vhdl

   1 -- Floating-point unit for Microwatt
   2
   3 library ieee;
   4 use ieee.std_logic_1164.all;
   5 use ieee.numeric_std.all;
   6
   7 library work;
   8 use work.insn_helpers.all;
   9 use work.decode_types.all;
  10 use work.crhelpers.all;
  11 use work.helpers.all;
  12 use work.common.all;
  13
  14 entity fpu is
  15     port (
  16         clk : in std_ulogic;
  17         rst : in std_ulogic;
  18
  19         e_in  : in  Execute1toFPUType;
  20         e_out : out FPUToExecute1Type;
  21
  22         w_out : out FPUToWritebackType
  23         );
  24 end entity fpu;
  25
  26 architecture behaviour of fpu is
  27     type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
  28
  29     constant EXP_BITS : natural := 13;
  30
  31     type fpu_reg_type is record
  32         class    : fp_number_class;
  33         negative : std_ulogic;
  34         exponent : signed(EXP_BITS-1 downto 0);         -- unbiased
  35         mantissa : std_ulogic_vector(63 downto 0);      -- 10.54 format
  36     end record;
  37
  38     type state_t is (IDLE,
  39                      DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
  40                      DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT,
  41                      DO_FCFID, DO_FCTI,
  42                      DO_FRSP, DO_FRI,
  43                      DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD,
  44                      DO_FRE, DO_FRSQRTE,
  45                      DO_FSEL,
  46                      FRI_1,
  47                      ADD_1, ADD_SHIFT, ADD_2, ADD_3,
  48                      CMP_1, CMP_2,
  49                      MULT_1,
  50                      FMADD_1, FMADD_2, FMADD_3,
  51                      FMADD_4, FMADD_5, FMADD_6,
  52                      LOOKUP,
  53                      DIV_2, DIV_3, DIV_4, DIV_5, DIV_6,
  54                      FRE_1,
  55                      RSQRT_1,
  56                      FTDIV_1,
  57                      SQRT_1, SQRT_2, SQRT_3, SQRT_4,
  58                      SQRT_5, SQRT_6, SQRT_7, SQRT_8,
  59                      SQRT_9, SQRT_10, SQRT_11, SQRT_12,
  60                      INT_SHIFT, INT_ROUND, INT_ISHIFT,
  61                      INT_FINAL, INT_CHECK, INT_OFLOW,
  62                      FINISH, NORMALIZE,
  63                      ROUND_UFLOW, ROUND_OFLOW,
  64                      ROUNDING, ROUNDING_2, ROUNDING_3,
  65                      DENORM,
  66                      RENORM_A, RENORM_A2,
  67                      RENORM_B, RENORM_B2,
  68                      RENORM_C, RENORM_C2,
  69                      NAN_RESULT, EXC_RESULT);
  70
  71     type reg_type is record
  72         state        : state_t;
  73         busy         : std_ulogic;
  74         instr_done   : std_ulogic;
  75         do_intr      : std_ulogic;
  76         op           : insn_type_t;
  77         insn         : std_ulogic_vector(31 downto 0);
  78         dest_fpr     : gspr_index_t;
  79         fe_mode      : std_ulogic;
  80         rc           : std_ulogic;
  81         is_cmp       : std_ulogic;
  82         single_prec  : std_ulogic;
  83         fpscr        : std_ulogic_vector(31 downto 0);
  84         a            : fpu_reg_type;
  85         b            : fpu_reg_type;
  86         c            : fpu_reg_type;
  87         r            : std_ulogic_vector(63 downto 0);  -- 10.54 format
  88         s            : std_ulogic_vector(55 downto 0);  -- extended fraction
  89         x            : std_ulogic;
  90         p            : std_ulogic_vector(63 downto 0);  -- 8.56 format
  91         y            : std_ulogic_vector(63 downto 0);  -- 8.56 format
  92         result_sign  : std_ulogic;
  93         result_class : fp_number_class;
  94         result_exp   : signed(EXP_BITS-1 downto 0);
  95         shift        : signed(EXP_BITS-1 downto 0);
  96         writing_back : std_ulogic;
  97         int_result   : std_ulogic;
  98         cr_result    : std_ulogic_vector(3 downto 0);
  99         cr_mask      : std_ulogic_vector(7 downto 0);
 100         old_exc      : std_ulogic_vector(4 downto 0);
 101         update_fprf  : std_ulogic;
 102         quieten_nan  : std_ulogic;
 103         tiny         : std_ulogic;
 104         denorm       : std_ulogic;
 105         round_mode   : std_ulogic_vector(2 downto 0);
 106         is_subtract  : std_ulogic;
 107         exp_cmp      : std_ulogic;
 108         madd_cmp     : std_ulogic;
 109         add_bsmall   : std_ulogic;
 110         is_multiply  : std_ulogic;
 111         is_sqrt      : std_ulogic;
 112         first        : std_ulogic;
 113         count        : unsigned(1 downto 0);
 114         doing_ftdiv  : std_ulogic_vector(1 downto 0);
 115         opsel_a      : std_ulogic_vector(1 downto 0);
 116         use_a        : std_ulogic;
 117         use_b        : std_ulogic;
 118         use_c        : std_ulogic;
 119         invalid      : std_ulogic;
 120         negate       : std_ulogic;
 121     end record;
 122
 123     type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
 124
 125     signal r, rin : reg_type;
 126
 127     signal fp_result     : std_ulogic_vector(63 downto 0);
 128     signal opsel_b       : std_ulogic_vector(1 downto 0);
 129     signal opsel_r       : std_ulogic_vector(1 downto 0);
 130     signal opsel_s       : std_ulogic_vector(1 downto 0);
 131     signal opsel_ainv    : std_ulogic;
 132     signal opsel_amask   : std_ulogic;
 133     signal opsel_binv    : std_ulogic;
 134     signal in_a          : std_ulogic_vector(63 downto 0);
 135     signal in_b          : std_ulogic_vector(63 downto 0);
 136     signal result        : std_ulogic_vector(63 downto 0);
 137     signal carry_in      : std_ulogic;
 138     signal lost_bits     : std_ulogic;
 139     signal r_hi_nz       : std_ulogic;
 140     signal r_lo_nz       : std_ulogic;
 141     signal s_nz          : std_ulogic;
 142     signal misc_sel      : std_ulogic_vector(3 downto 0);
 143     signal f_to_multiply : MultiplyInputType;
 144     signal multiply_to_f : MultiplyOutputType;
 145     signal msel_1        : std_ulogic_vector(1 downto 0);
 146     signal msel_2        : std_ulogic_vector(1 downto 0);
 147     signal msel_add      : std_ulogic_vector(1 downto 0);
 148     signal msel_inv      : std_ulogic;
 149     signal inverse_est   : std_ulogic_vector(18 downto 0);
 150
 151     -- opsel values
 152     constant AIN_R    : std_ulogic_vector(1 downto 0) := "00";
 153     constant AIN_A    : std_ulogic_vector(1 downto 0) := "01";
 154     constant AIN_B    : std_ulogic_vector(1 downto 0) := "10";
 155     constant AIN_C    : std_ulogic_vector(1 downto 0) := "11";
 156
 157     constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
 158     constant BIN_R    : std_ulogic_vector(1 downto 0) := "01";
 159     constant BIN_MASK : std_ulogic_vector(1 downto 0) := "10";
 160     constant BIN_PS6  : std_ulogic_vector(1 downto 0) := "11";
 161
 162     constant RES_SUM   : std_ulogic_vector(1 downto 0) := "00";
 163     constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
 164     constant RES_MULT  : std_ulogic_vector(1 downto 0) := "10";
 165     constant RES_MISC  : std_ulogic_vector(1 downto 0) := "11";
 166
 167     constant S_ZERO  : std_ulogic_vector(1 downto 0) := "00";
 168     constant S_NEG   : std_ulogic_vector(1 downto 0) := "01";
 169     constant S_SHIFT : std_ulogic_vector(1 downto 0) := "10";
 170     constant S_MULT  : std_ulogic_vector(1 downto 0) := "11";
 171
 172     -- msel values
 173     constant MUL1_A : std_ulogic_vector(1 downto 0) := "00";
 174     constant MUL1_B : std_ulogic_vector(1 downto 0) := "01";
 175     constant MUL1_Y : std_ulogic_vector(1 downto 0) := "10";
 176     constant MUL1_R : std_ulogic_vector(1 downto 0) := "11";
 177
 178     constant MUL2_C   : std_ulogic_vector(1 downto 0) := "00";
 179     constant MUL2_LUT : std_ulogic_vector(1 downto 0) := "01";
 180     constant MUL2_P   : std_ulogic_vector(1 downto 0) := "10";
 181     constant MUL2_R   : std_ulogic_vector(1 downto 0) := "11";
 182
 183     constant MULADD_ZERO  : std_ulogic_vector(1 downto 0) := "00";
 184     constant MULADD_CONST : std_ulogic_vector(1 downto 0) := "01";
 185     constant MULADD_A     : std_ulogic_vector(1 downto 0) := "10";
 186     constant MULADD_RS    : std_ulogic_vector(1 downto 0) := "11";
 187
 188     -- Inverse lookup table, indexed by the top 8 fraction bits
 189     -- The first 256 entries are the reciprocal (1/x) lookup table,
 190     -- and the remaining 768 entries are the reciprocal square root table.
 191     -- Output range is [0.5, 1) in 0.19 format, though the top
 192     -- bit isn't stored since it is always 1.
 193     -- Each output value is the inverse of the center of the input
 194     -- range for the value, i.e. entry 0 is 1 / (1 + 1/512),
 195     -- entry 1 is 1 / (1 + 3/512), etc.
 196     signal inverse_table : lookup_table := (
 197         -- 1/x lookup table
 198         -- Unit bit is assumed to be 1, so input range is [1, 2)
 199         18x"3fc01", 18x"3f411", 18x"3ec31", 18x"3e460", 18x"3dc9f", 18x"3d4ec", 18x"3cd49", 18x"3c5b5",
 200         18x"3be2f", 18x"3b6b8", 18x"3af4f", 18x"3a7f4", 18x"3a0a7", 18x"39968", 18x"39237", 18x"38b14",
 201         18x"383fe", 18x"37cf5", 18x"375f9", 18x"36f0a", 18x"36828", 18x"36153", 18x"35a8a", 18x"353ce",
 202         18x"34d1e", 18x"3467a", 18x"33fe3", 18x"33957", 18x"332d7", 18x"32c62", 18x"325f9", 18x"31f9c",
 203         18x"3194a", 18x"31303", 18x"30cc7", 18x"30696", 18x"30070", 18x"2fa54", 18x"2f443", 18x"2ee3d",
 204         18x"2e841", 18x"2e250", 18x"2dc68", 18x"2d68b", 18x"2d0b8", 18x"2caee", 18x"2c52e", 18x"2bf79",
 205         18x"2b9cc", 18x"2b429", 18x"2ae90", 18x"2a900", 18x"2a379", 18x"29dfb", 18x"29887", 18x"2931b",
 206         18x"28db8", 18x"2885e", 18x"2830d", 18x"27dc4", 18x"27884", 18x"2734d", 18x"26e1d", 18x"268f6",
 207         18x"263d8", 18x"25ec1", 18x"259b3", 18x"254ac", 18x"24fad", 18x"24ab7", 18x"245c8", 18x"240e1",
 208         18x"23c01", 18x"23729", 18x"23259", 18x"22d90", 18x"228ce", 18x"22413", 18x"21f60", 18x"21ab4",
 209         18x"2160f", 18x"21172", 18x"20cdb", 18x"2084b", 18x"203c2", 18x"1ff40", 18x"1fac4", 18x"1f64f",
 210         18x"1f1e1", 18x"1ed79", 18x"1e918", 18x"1e4be", 18x"1e069", 18x"1dc1b", 18x"1d7d4", 18x"1d392",
 211         18x"1cf57", 18x"1cb22", 18x"1c6f3", 18x"1c2ca", 18x"1bea7", 18x"1ba8a", 18x"1b672", 18x"1b261",
 212         18x"1ae55", 18x"1aa50", 18x"1a64f", 18x"1a255", 18x"19e60", 18x"19a70", 18x"19686", 18x"192a2",
 213         18x"18ec3", 18x"18ae9", 18x"18715", 18x"18345", 18x"17f7c", 18x"17bb7", 18x"177f7", 18x"1743d",
 214         18x"17087", 18x"16cd7", 18x"1692c", 18x"16585", 18x"161e4", 18x"15e47", 18x"15ab0", 18x"1571d",
 215         18x"1538e", 18x"15005", 18x"14c80", 18x"14900", 18x"14584", 18x"1420d", 18x"13e9b", 18x"13b2d",
 216         18x"137c3", 18x"1345e", 18x"130fe", 18x"12da2", 18x"12a4a", 18x"126f6", 18x"123a7", 18x"1205c",
 217         18x"11d15", 18x"119d2", 18x"11694", 18x"11359", 18x"11023", 18x"10cf1", 18x"109c2", 18x"10698",
 218         18x"10372", 18x"10050", 18x"0fd31", 18x"0fa17", 18x"0f700", 18x"0f3ed", 18x"0f0de", 18x"0edd3",
 219         18x"0eacb", 18x"0e7c7", 18x"0e4c7", 18x"0e1ca", 18x"0ded2", 18x"0dbdc", 18x"0d8eb", 18x"0d5fc",
 220         18x"0d312", 18x"0d02b", 18x"0cd47", 18x"0ca67", 18x"0c78a", 18x"0c4b1", 18x"0c1db", 18x"0bf09",
 221         18x"0bc3a", 18x"0b96e", 18x"0b6a5", 18x"0b3e0", 18x"0b11e", 18x"0ae5f", 18x"0aba3", 18x"0a8eb",
 222         18x"0a636", 18x"0a383", 18x"0a0d4", 18x"09e28", 18x"09b80", 18x"098da", 18x"09637", 18x"09397",
 223         18x"090fb", 18x"08e61", 18x"08bca", 18x"08936", 18x"086a5", 18x"08417", 18x"0818c", 18x"07f04",
 224         18x"07c7e", 18x"079fc", 18x"0777c", 18x"074ff", 18x"07284", 18x"0700d", 18x"06d98", 18x"06b26",
 225         18x"068b6", 18x"0664a", 18x"063e0", 18x"06178", 18x"05f13", 18x"05cb1", 18x"05a52", 18x"057f5",
 226         18x"0559a", 18x"05342", 18x"050ed", 18x"04e9a", 18x"04c4a", 18x"049fc", 18x"047b0", 18x"04567",
 227         18x"04321", 18x"040dd", 18x"03e9b", 18x"03c5c", 18x"03a1f", 18x"037e4", 18x"035ac", 18x"03376",
 228         18x"03142", 18x"02f11", 18x"02ce2", 18x"02ab5", 18x"0288b", 18x"02663", 18x"0243d", 18x"02219",
 229         18x"01ff7", 18x"01dd8", 18x"01bbb", 18x"019a0", 18x"01787", 18x"01570", 18x"0135b", 18x"01149",
 230         18x"00f39", 18x"00d2a", 18x"00b1e", 18x"00914", 18x"0070c", 18x"00506", 18x"00302", 18x"00100",
 231         -- 1/sqrt(x) lookup table
 232         -- Input is in the range [1, 4), i.e. two bits to the left of the
 233         -- binary point.  Those 2 bits index the following 3 blocks of 256 values.
 234         -- 1.0 ... 1.9999
 235         18x"3fe00", 18x"3fa06", 18x"3f612", 18x"3f224", 18x"3ee3a", 18x"3ea58", 18x"3e67c", 18x"3e2a4",
 236         18x"3ded2", 18x"3db06", 18x"3d73e", 18x"3d37e", 18x"3cfc2", 18x"3cc0a", 18x"3c85a", 18x"3c4ae",
 237         18x"3c106", 18x"3bd64", 18x"3b9c8", 18x"3b630", 18x"3b29e", 18x"3af10", 18x"3ab86", 18x"3a802",
 238         18x"3a484", 18x"3a108", 18x"39d94", 18x"39a22", 18x"396b6", 18x"3934e", 18x"38fea", 18x"38c8c",
 239         18x"38932", 18x"385dc", 18x"3828a", 18x"37f3e", 18x"37bf6", 18x"378b2", 18x"37572", 18x"37236",
 240         18x"36efe", 18x"36bca", 18x"3689a", 18x"36570", 18x"36248", 18x"35f26", 18x"35c06", 18x"358ea",
 241         18x"355d4", 18x"352c0", 18x"34fb0", 18x"34ca4", 18x"3499c", 18x"34698", 18x"34398", 18x"3409c",
 242         18x"33da2", 18x"33aac", 18x"337bc", 18x"334cc", 18x"331e2", 18x"32efc", 18x"32c18", 18x"32938",
 243         18x"3265a", 18x"32382", 18x"320ac", 18x"31dd8", 18x"31b0a", 18x"3183e", 18x"31576", 18x"312b0",
 244         18x"30fee", 18x"30d2e", 18x"30a74", 18x"307ba", 18x"30506", 18x"30254", 18x"2ffa4", 18x"2fcf8",
 245         18x"2fa4e", 18x"2f7a8", 18x"2f506", 18x"2f266", 18x"2efca", 18x"2ed2e", 18x"2ea98", 18x"2e804",
 246         18x"2e572", 18x"2e2e4", 18x"2e058", 18x"2ddce", 18x"2db48", 18x"2d8c6", 18x"2d646", 18x"2d3c8",
 247         18x"2d14c", 18x"2ced4", 18x"2cc5e", 18x"2c9ea", 18x"2c77a", 18x"2c50c", 18x"2c2a2", 18x"2c038",
 248         18x"2bdd2", 18x"2bb70", 18x"2b90e", 18x"2b6b0", 18x"2b454", 18x"2b1fa", 18x"2afa4", 18x"2ad4e",
 249         18x"2aafc", 18x"2a8ac", 18x"2a660", 18x"2a414", 18x"2a1cc", 18x"29f86", 18x"29d42", 18x"29b00",
 250         18x"298c2", 18x"29684", 18x"2944a", 18x"29210", 18x"28fda", 18x"28da6", 18x"28b74", 18x"28946",
 251         18x"28718", 18x"284ec", 18x"282c4", 18x"2809c", 18x"27e78", 18x"27c56", 18x"27a34", 18x"27816",
 252         18x"275fa", 18x"273e0", 18x"271c8", 18x"26fb0", 18x"26d9c", 18x"26b8a", 18x"2697a", 18x"2676c",
 253         18x"26560", 18x"26356", 18x"2614c", 18x"25f46", 18x"25d42", 18x"25b40", 18x"2593e", 18x"25740",
 254         18x"25542", 18x"25348", 18x"2514e", 18x"24f58", 18x"24d62", 18x"24b6e", 18x"2497c", 18x"2478c",
 255         18x"2459e", 18x"243b0", 18x"241c6", 18x"23fde", 18x"23df6", 18x"23c10", 18x"23a2c", 18x"2384a",
 256         18x"2366a", 18x"2348c", 18x"232ae", 18x"230d2", 18x"22efa", 18x"22d20", 18x"22b4a", 18x"22976",
 257         18x"227a2", 18x"225d2", 18x"22402", 18x"22234", 18x"22066", 18x"21e9c", 18x"21cd2", 18x"21b0a",
 258         18x"21944", 18x"2177e", 18x"215ba", 18x"213fa", 18x"21238", 18x"2107a", 18x"20ebc", 18x"20d00",
 259         18x"20b46", 18x"2098e", 18x"207d6", 18x"20620", 18x"2046c", 18x"202b8", 18x"20108", 18x"1ff58",
 260         18x"1fda8", 18x"1fbfc", 18x"1fa50", 18x"1f8a4", 18x"1f6fc", 18x"1f554", 18x"1f3ae", 18x"1f208",
 261         18x"1f064", 18x"1eec2", 18x"1ed22", 18x"1eb82", 18x"1e9e4", 18x"1e846", 18x"1e6aa", 18x"1e510",
 262         18x"1e378", 18x"1e1e0", 18x"1e04a", 18x"1deb4", 18x"1dd20", 18x"1db8e", 18x"1d9fc", 18x"1d86c",
 263         18x"1d6de", 18x"1d550", 18x"1d3c4", 18x"1d238", 18x"1d0ae", 18x"1cf26", 18x"1cd9e", 18x"1cc18",
 264         18x"1ca94", 18x"1c910", 18x"1c78c", 18x"1c60a", 18x"1c48a", 18x"1c30c", 18x"1c18e", 18x"1c010",
 265         18x"1be94", 18x"1bd1a", 18x"1bba0", 18x"1ba28", 18x"1b8b2", 18x"1b73c", 18x"1b5c6", 18x"1b452",
 266         18x"1b2e0", 18x"1b16e", 18x"1affe", 18x"1ae8e", 18x"1ad20", 18x"1abb4", 18x"1aa46", 18x"1a8dc",
 267         -- 2.0 ... 2.9999
 268         18x"1a772", 18x"1a608", 18x"1a4a0", 18x"1a33a", 18x"1a1d4", 18x"1a070", 18x"19f0c", 18x"19da8",
 269         18x"19c48", 18x"19ae6", 18x"19986", 18x"19828", 18x"196ca", 18x"1956e", 18x"19412", 18x"192b8",
 270         18x"1915e", 18x"19004", 18x"18eae", 18x"18d56", 18x"18c00", 18x"18aac", 18x"18958", 18x"18804",
 271         18x"186b2", 18x"18562", 18x"18412", 18x"182c2", 18x"18174", 18x"18026", 18x"17eda", 18x"17d8e",
 272         18x"17c44", 18x"17afa", 18x"179b2", 18x"1786a", 18x"17724", 18x"175de", 18x"17498", 18x"17354",
 273         18x"17210", 18x"170ce", 18x"16f8c", 18x"16e4c", 18x"16d0c", 18x"16bcc", 18x"16a8e", 18x"16950",
 274         18x"16814", 18x"166d8", 18x"1659e", 18x"16464", 18x"1632a", 18x"161f2", 18x"160ba", 18x"15f84",
 275         18x"15e4e", 18x"15d1a", 18x"15be6", 18x"15ab2", 18x"15980", 18x"1584e", 18x"1571c", 18x"155ec",
 276         18x"154bc", 18x"1538e", 18x"15260", 18x"15134", 18x"15006", 18x"14edc", 18x"14db0", 18x"14c86",
 277         18x"14b5e", 18x"14a36", 18x"1490e", 18x"147e6", 18x"146c0", 18x"1459a", 18x"14476", 18x"14352",
 278         18x"14230", 18x"1410c", 18x"13fea", 18x"13eca", 18x"13daa", 18x"13c8a", 18x"13b6c", 18x"13a4e",
 279         18x"13930", 18x"13814", 18x"136f8", 18x"135dc", 18x"134c2", 18x"133a8", 18x"1328e", 18x"13176",
 280         18x"1305e", 18x"12f48", 18x"12e30", 18x"12d1a", 18x"12c06", 18x"12af2", 18x"129de", 18x"128ca",
 281         18x"127b8", 18x"126a6", 18x"12596", 18x"12486", 18x"12376", 18x"12266", 18x"12158", 18x"1204a",
 282         18x"11f3e", 18x"11e32", 18x"11d26", 18x"11c1a", 18x"11b10", 18x"11a06", 18x"118fc", 18x"117f4",
 283         18x"116ec", 18x"115e4", 18x"114de", 18x"113d8", 18x"112d2", 18x"111ce", 18x"110ca", 18x"10fc6",
 284         18x"10ec2", 18x"10dc0", 18x"10cbe", 18x"10bbc", 18x"10abc", 18x"109bc", 18x"108bc", 18x"107be",
 285         18x"106c0", 18x"105c2", 18x"104c4", 18x"103c8", 18x"102cc", 18x"101d0", 18x"100d6", 18x"0ffdc",
 286         18x"0fee2", 18x"0fdea", 18x"0fcf0", 18x"0fbf8", 18x"0fb02", 18x"0fa0a", 18x"0f914", 18x"0f81e",
 287         18x"0f72a", 18x"0f636", 18x"0f542", 18x"0f44e", 18x"0f35a", 18x"0f268", 18x"0f176", 18x"0f086",
 288         18x"0ef94", 18x"0eea4", 18x"0edb4", 18x"0ecc6", 18x"0ebd6", 18x"0eae8", 18x"0e9fa", 18x"0e90e",
 289         18x"0e822", 18x"0e736", 18x"0e64a", 18x"0e55e", 18x"0e474", 18x"0e38a", 18x"0e2a0", 18x"0e1b8",
 290         18x"0e0d0", 18x"0dfe8", 18x"0df00", 18x"0de1a", 18x"0dd32", 18x"0dc4c", 18x"0db68", 18x"0da82",
 291         18x"0d99e", 18x"0d8ba", 18x"0d7d6", 18x"0d6f4", 18x"0d612", 18x"0d530", 18x"0d44e", 18x"0d36c",
 292         18x"0d28c", 18x"0d1ac", 18x"0d0cc", 18x"0cfee", 18x"0cf0e", 18x"0ce30", 18x"0cd54", 18x"0cc76",
 293         18x"0cb9a", 18x"0cabc", 18x"0c9e0", 18x"0c906", 18x"0c82a", 18x"0c750", 18x"0c676", 18x"0c59c",
 294         18x"0c4c4", 18x"0c3ea", 18x"0c312", 18x"0c23a", 18x"0c164", 18x"0c08c", 18x"0bfb6", 18x"0bee0",
 295         18x"0be0a", 18x"0bd36", 18x"0bc62", 18x"0bb8c", 18x"0baba", 18x"0b9e6", 18x"0b912", 18x"0b840",
 296         18x"0b76e", 18x"0b69c", 18x"0b5cc", 18x"0b4fa", 18x"0b42a", 18x"0b35a", 18x"0b28a", 18x"0b1bc",
 297         18x"0b0ee", 18x"0b01e", 18x"0af50", 18x"0ae84", 18x"0adb6", 18x"0acea", 18x"0ac1e", 18x"0ab52",
 298         18x"0aa86", 18x"0a9bc", 18x"0a8f0", 18x"0a826", 18x"0a75c", 18x"0a694", 18x"0a5ca", 18x"0a502",
 299         18x"0a43a", 18x"0a372", 18x"0a2aa", 18x"0a1e4", 18x"0a11c", 18x"0a056", 18x"09f90", 18x"09ecc",
 300         -- 3.0 ... 3.9999
 301         18x"09e06", 18x"09d42", 18x"09c7e", 18x"09bba", 18x"09af6", 18x"09a32", 18x"09970", 18x"098ae",
 302         18x"097ec", 18x"0972a", 18x"09668", 18x"095a8", 18x"094e8", 18x"09426", 18x"09368", 18x"092a8",
 303         18x"091e8", 18x"0912a", 18x"0906c", 18x"08fae", 18x"08ef0", 18x"08e32", 18x"08d76", 18x"08cba",
 304         18x"08bfe", 18x"08b42", 18x"08a86", 18x"089ca", 18x"08910", 18x"08856", 18x"0879c", 18x"086e2",
 305         18x"08628", 18x"08570", 18x"084b6", 18x"083fe", 18x"08346", 18x"0828e", 18x"081d8", 18x"08120",
 306         18x"0806a", 18x"07fb4", 18x"07efe", 18x"07e48", 18x"07d92", 18x"07cde", 18x"07c2a", 18x"07b76",
 307         18x"07ac2", 18x"07a0e", 18x"0795a", 18x"078a8", 18x"077f4", 18x"07742", 18x"07690", 18x"075de",
 308         18x"0752e", 18x"0747c", 18x"073cc", 18x"0731c", 18x"0726c", 18x"071bc", 18x"0710c", 18x"0705e",
 309         18x"06fae", 18x"06f00", 18x"06e52", 18x"06da4", 18x"06cf6", 18x"06c4a", 18x"06b9c", 18x"06af0",
 310         18x"06a44", 18x"06998", 18x"068ec", 18x"06840", 18x"06796", 18x"066ea", 18x"06640", 18x"06596",
 311         18x"064ec", 18x"06442", 18x"0639a", 18x"062f0", 18x"06248", 18x"061a0", 18x"060f8", 18x"06050",
 312         18x"05fa8", 18x"05f00", 18x"05e5a", 18x"05db4", 18x"05d0e", 18x"05c68", 18x"05bc2", 18x"05b1c",
 313         18x"05a76", 18x"059d2", 18x"0592e", 18x"05888", 18x"057e4", 18x"05742", 18x"0569e", 18x"055fa",
 314         18x"05558", 18x"054b6", 18x"05412", 18x"05370", 18x"052ce", 18x"0522e", 18x"0518c", 18x"050ec",
 315         18x"0504a", 18x"04faa", 18x"04f0a", 18x"04e6a", 18x"04dca", 18x"04d2c", 18x"04c8c", 18x"04bee",
 316         18x"04b50", 18x"04ab0", 18x"04a12", 18x"04976", 18x"048d8", 18x"0483a", 18x"0479e", 18x"04700",
 317         18x"04664", 18x"045c8", 18x"0452c", 18x"04490", 18x"043f6", 18x"0435a", 18x"042c0", 18x"04226",
 318         18x"0418a", 18x"040f0", 18x"04056", 18x"03fbe", 18x"03f24", 18x"03e8c", 18x"03df2", 18x"03d5a",
 319         18x"03cc2", 18x"03c2a", 18x"03b92", 18x"03afa", 18x"03a62", 18x"039cc", 18x"03934", 18x"0389e",
 320         18x"03808", 18x"03772", 18x"036dc", 18x"03646", 18x"035b2", 18x"0351c", 18x"03488", 18x"033f2",
 321         18x"0335e", 18x"032ca", 18x"03236", 18x"031a2", 18x"03110", 18x"0307c", 18x"02fea", 18x"02f56",
 322         18x"02ec4", 18x"02e32", 18x"02da0", 18x"02d0e", 18x"02c7c", 18x"02bec", 18x"02b5a", 18x"02aca",
 323         18x"02a38", 18x"029a8", 18x"02918", 18x"02888", 18x"027f8", 18x"0276a", 18x"026da", 18x"0264a",
 324         18x"025bc", 18x"0252e", 18x"024a0", 18x"02410", 18x"02384", 18x"022f6", 18x"02268", 18x"021da",
 325         18x"0214e", 18x"020c0", 18x"02034", 18x"01fa8", 18x"01f1c", 18x"01e90", 18x"01e04", 18x"01d78",
 326         18x"01cee", 18x"01c62", 18x"01bd8", 18x"01b4c", 18x"01ac2", 18x"01a38", 18x"019ae", 18x"01924",
 327         18x"0189c", 18x"01812", 18x"01788", 18x"01700", 18x"01676", 18x"015ee", 18x"01566", 18x"014de",
 328         18x"01456", 18x"013ce", 18x"01346", 18x"012c0", 18x"01238", 18x"011b2", 18x"0112c", 18x"010a4",
 329         18x"0101e", 18x"00f98", 18x"00f12", 18x"00e8c", 18x"00e08", 18x"00d82", 18x"00cfe", 18x"00c78",
 330         18x"00bf4", 18x"00b70", 18x"00aec", 18x"00a68", 18x"009e4", 18x"00960", 18x"008dc", 18x"00858",
 331         18x"007d6", 18x"00752", 18x"006d0", 18x"0064e", 18x"005cc", 18x"0054a", 18x"004c8", 18x"00446",
 332         18x"003c4", 18x"00342", 18x"002c2", 18x"00240", 18x"001c0", 18x"00140", 18x"000c0", 18x"00040"
 333         );
 334
 335     -- Left and right shifter with 120 bit input and 64 bit output.
 336     -- Shifts inp left by shift bits and returns the upper 64 bits of
 337     -- the result.  The shift parameter is interpreted as a signed
 338     -- number in the range -64..63, with negative values indicating
 339     -- right shifts.
 340     function shifter_64(inp: std_ulogic_vector(119 downto 0);
 341                         shift: std_ulogic_vector(6 downto 0))
 342         return std_ulogic_vector is
 343         variable s1 : std_ulogic_vector(94 downto 0);
 344         variable s2 : std_ulogic_vector(70 downto 0);
 345         variable result : std_ulogic_vector(63 downto 0);
 346     begin
 347         case shift(6 downto 5) is
 348             when "00" =>
 349                 s1 := inp(119 downto 25);
 350             when "01" =>
 351                 s1 := inp(87 downto 0) & "0000000";
 352             when "10" =>
 353                 s1 := x"0000000000000000" & inp(119 downto 89);
 354             when others =>
 355                 s1 := x"00000000" & inp(119 downto 57);
 356         end case;
 357         case shift(4 downto 3) is
 358             when "00" =>
 359                 s2 := s1(94 downto 24);
 360             when "01" =>
 361                 s2 := s1(86 downto 16);
 362             when "10" =>
 363                 s2 := s1(78 downto 8);
 364             when others =>
 365                 s2 := s1(70 downto 0);
 366         end case;
 367         case shift(2 downto 0) is
 368             when "000" =>
 369                 result := s2(70 downto 7);
 370             when "001" =>
 371                 result := s2(69 downto 6);
 372             when "010" =>
 373                 result := s2(68 downto 5);
 374             when "011" =>
 375                 result := s2(67 downto 4);
 376             when "100" =>
 377                 result := s2(66 downto 3);
 378             when "101" =>
 379                 result := s2(65 downto 2);
 380             when "110" =>
 381                 result := s2(64 downto 1);
 382             when others =>
 383                 result := s2(63 downto 0);
 384         end case;
 385         return result;
 386     end;
 387
 388     -- Generate a mask with 0-bits on the left and 1-bits on the right which
 389     -- selects the bits will be lost in doing a right shift.  The shift
 390     -- parameter is the bottom 6 bits of a negative shift count,
 391     -- indicating a right shift.
 392     function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
 393         variable result: std_ulogic_vector(63 downto 0);
 394     begin
 395         result := (others => '0');
 396         for i in 0 to 63 loop
 397             if i >= shift then
 398                 result(63 - i) := '1';
 399             end if;
 400         end loop;
 401         return result;
 402     end;
 403
 404     -- Split a DP floating-point number into components and work out its class.
 405     -- If is_int = 1, the input is considered an integer
 406     function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_int: std_ulogic) return fpu_reg_type is
 407         variable r       : fpu_reg_type;
 408         variable exp_nz  : std_ulogic;
 409         variable exp_ao  : std_ulogic;
 410         variable frac_nz : std_ulogic;
 411         variable cls     : std_ulogic_vector(2 downto 0);
 412     begin
 413         r.negative := fpr(63);
 414         exp_nz := or (fpr(62 downto 52));
 415         exp_ao := and (fpr(62 downto 52));
 416         frac_nz := or (fpr(51 downto 0));
 417         if is_int = '0' then
 418             r.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
 419             if exp_nz = '0' then
 420                 r.exponent := to_signed(-1022, EXP_BITS);
 421             end if;
 422             r.mantissa := "000000000" & exp_nz & fpr(51 downto 0) & "00";
 423             cls := exp_ao & exp_nz & frac_nz;
 424             case cls is
 425                 when "000"  => r.class := ZERO;
 426                 when "001"  => r.class := FINITE;    -- denormalized
 427                 when "010"  => r.class := FINITE;
 428                 when "011"  => r.class := FINITE;
 429                 when "110"  => r.class := INFINITY;
 430                 when others => r.class := NAN;
 431             end case;
 432         else
 433             r.mantissa := fpr;
 434             r.exponent := (others => '0');
 435             if (fpr(63) or exp_nz or frac_nz) = '1' then
 436                 r.class := FINITE;
 437             else
 438                 r.class := ZERO;
 439             end if;
 440         end if;
 441         return r;
 442     end;
 443
 444     -- Construct a DP floating-point result from components
 445     function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
 446                      mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic)
 447         return std_ulogic_vector is
 448         variable result : std_ulogic_vector(63 downto 0);
 449     begin
 450         result := (others => '0');
 451         result(63) := sign;
 452         case class is
 453             when ZERO =>
 454             when FINITE =>
 455                 if mantissa(54) = '1' then
 456                     -- normalized number
 457                     result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
 458                 end if;
 459                 result(51 downto 29) := mantissa(53 downto 31);
 460                 if single_prec = '0' then
 461                     result(28 downto 0) := mantissa(30 downto 2);
 462                 end if;
 463             when INFINITY =>
 464                 result(62 downto 52) := "11111111111";
 465             when NAN =>
 466                 result(62 downto 52) := "11111111111";
 467                 result(51) := quieten_nan or mantissa(53);
 468                 result(50 downto 29) := mantissa(52 downto 31);
 469                 if single_prec = '0' then
 470                     result(28 downto 0) := mantissa(30 downto 2);
 471                 end if;
 472         end case;
 473         return result;
 474     end;
 475
 476     -- Determine whether to increment when rounding
 477     -- Returns rounding_inc & inexact
 478     -- Assumes x includes the bottom 29 bits of the mantissa already
 479     -- if single_prec = 1 (usually arranged by setting set_x = 1 earlier).
 480     function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
 481                          single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
 482                          sign: std_ulogic)
 483         return std_ulogic_vector is
 484         variable grx : std_ulogic_vector(2 downto 0);
 485         variable ret : std_ulogic_vector(1 downto 0);
 486         variable lsb : std_ulogic;
 487     begin
 488         if single_prec = '0' then
 489             grx := mantissa(1 downto 0) & x;
 490             lsb := mantissa(2);
 491         else
 492             grx := mantissa(30 downto 29) & x;
 493             lsb := mantissa(31);
 494         end if;
 495         ret(1) := '0';
 496         ret(0) := or (grx);
 497         case rn(1 downto 0) is
 498             when "00" =>        -- round to nearest
 499                 if grx = "100" and rn(2) = '0' then
 500                     ret(1) := lsb; -- tie, round to even
 501                 else
 502                     ret(1) := grx(2);
 503                 end if;
 504             when "01" =>        -- round towards zero
 505             when others =>      -- round towards +/- inf
 506                 if rn(0) = sign then
 507                     -- round towards greater magnitude
 508                     ret(1) := ret(0);
 509                 end if;
 510         end case;
 511         return ret;
 512     end;
 513
 514     -- Determine result flags to write into the FPSCR
 515     function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
 516         return std_ulogic_vector is
 517     begin
 518         case class is
 519             when ZERO =>
 520                 return sign & "0010";
 521             when FINITE =>
 522                 return (not unitbit) & sign & (not sign) & "00";
 523             when INFINITY =>
 524                 return '0' & sign & (not sign) & "01";
 525             when NAN =>
 526                 return "10001";
 527         end case;
 528     end;
 529
 530 begin
 531     fpu_multiply_0: entity work.multiply
 532         port map (
 533             clk => clk,
 534             m_in => f_to_multiply,
 535             m_out => multiply_to_f
 536             );
 537
 538     fpu_0: process(clk)
 539     begin
 540         if rising_edge(clk) then
 541             if rst = '1' then
 542                 r.state <= IDLE;
 543                 r.busy <= '0';
 544                 r.instr_done <= '0';
 545                 r.do_intr <= '0';
 546                 r.fpscr <= (others => '0');
 547                 r.writing_back <= '0';
 548             else
 549                 assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
 550                 r <= rin;
 551             end if;
 552         end if;
 553     end process;
 554
 555     -- synchronous reads from lookup table
 556     lut_access: process(clk)
 557         variable addrhi : std_ulogic_vector(1 downto 0);
 558         variable addr   : std_ulogic_vector(9 downto 0);
 559     begin
 560         if rising_edge(clk) then
 561             if r.is_sqrt = '1' then
 562                 addrhi := r.b.mantissa(55 downto 54);
 563             else
 564                 addrhi := "00";
 565             end if;
 566             addr := addrhi & r.b.mantissa(53 downto 46);
 567             inverse_est <= '1' & inverse_table(to_integer(unsigned(addr)));
 568         end if;
 569     end process;
 570
 571     e_out.busy <= r.busy;
 572     e_out.exception <= r.fpscr(FPSCR_FEX);
 573     e_out.interrupt <= r.do_intr;
 574
 575     w_out.valid <= r.instr_done and not r.do_intr;
 576     w_out.write_enable <= r.writing_back;
 577     w_out.write_reg <= r.dest_fpr;
 578     w_out.write_data <= fp_result;
 579     w_out.write_cr_enable <= r.instr_done and (r.rc or r.is_cmp);
 580     w_out.write_cr_mask <= r.cr_mask;
 581     w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
 582                            r.cr_result & r.cr_result & r.cr_result & r.cr_result;
 583
 584     fpu_1: process(all)
 585         variable v           : reg_type;
 586         variable adec        : fpu_reg_type;
 587         variable bdec        : fpu_reg_type;
 588         variable cdec        : fpu_reg_type;
 589         variable fpscr_mask  : std_ulogic_vector(31 downto 0);
 590         variable illegal     : std_ulogic;
 591         variable j, k        : integer;
 592         variable flm         : std_ulogic_vector(7 downto 0);
 593         variable int_input   : std_ulogic;
 594         variable mask        : std_ulogic_vector(63 downto 0);
 595         variable in_a0       : std_ulogic_vector(63 downto 0);
 596         variable in_b0       : std_ulogic_vector(63 downto 0);
 597         variable misc        : std_ulogic_vector(63 downto 0);
 598         variable shift_res   : std_ulogic_vector(63 downto 0);
 599         variable round       : std_ulogic_vector(1 downto 0);
 600         variable update_fx   : std_ulogic;
 601         variable arith_done  : std_ulogic;
 602         variable invalid     : std_ulogic;
 603         variable zero_divide : std_ulogic;
 604         variable mant_nz     : std_ulogic;
 605         variable min_exp     : signed(EXP_BITS-1 downto 0);
 606         variable max_exp     : signed(EXP_BITS-1 downto 0);
 607         variable bias_exp    : signed(EXP_BITS-1 downto 0);
 608         variable new_exp     : signed(EXP_BITS-1 downto 0);
 609         variable exp_tiny    : std_ulogic;
 610         variable exp_huge    : std_ulogic;
 611         variable renormalize : std_ulogic;
 612         variable clz         : std_ulogic_vector(5 downto 0);
 613         variable set_x       : std_ulogic;
 614         variable mshift      : signed(EXP_BITS-1 downto 0);
 615         variable need_check  : std_ulogic;
 616         variable msb         : std_ulogic;
 617         variable is_add      : std_ulogic;
 618         variable longmask    : std_ulogic;
 619         variable set_a       : std_ulogic;
 620         variable set_b       : std_ulogic;
 621         variable set_c       : std_ulogic;
 622         variable set_y       : std_ulogic;
 623         variable set_s       : std_ulogic;
 624         variable qnan_result : std_ulogic;
 625         variable px_nz       : std_ulogic;
 626         variable pcmpb_eq    : std_ulogic;
 627         variable pcmpb_lt    : std_ulogic;
 628         variable pshift      : std_ulogic;
 629         variable renorm_sqrt : std_ulogic;
 630         variable sqrt_exp    : signed(EXP_BITS-1 downto 0);
 631         variable shiftin     : std_ulogic;
 632         variable mulexp      : signed(EXP_BITS-1 downto 0);
 633         variable maddend     : std_ulogic_vector(127 downto 0);
 634     begin
 635         v := r;
 636         illegal := '0';
 637         v.busy := '0';
 638         int_input := '0';
 639
 640         -- capture incoming instruction
 641         if e_in.valid = '1' then
 642             v.insn := e_in.insn;
 643             v.op := e_in.op;
 644             v.fe_mode := or (e_in.fe_mode);
 645             v.dest_fpr := e_in.frt;
 646             v.single_prec := e_in.single;
 647             v.int_result := '0';
 648             v.rc := e_in.rc;
 649             v.is_cmp := e_in.out_cr;
 650             if e_in.out_cr = '0' then
 651                 v.cr_mask := num_to_fxm(1);
 652             else
 653                 v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(e_in.insn))));
 654             end if;
 655             int_input := '0';
 656             if e_in.op = OP_FPOP_I then
 657                 int_input := '1';
 658             end if;
 659             v.quieten_nan := '1';
 660             v.tiny := '0';
 661             v.denorm := '0';
 662             v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
 663             v.is_subtract := '0';
 664             v.is_multiply := '0';
 665             v.is_sqrt := '0';
 666             v.add_bsmall := '0';
 667             v.doing_ftdiv := "00";
 668
 669             adec := decode_dp(e_in.fra, int_input);
 670             bdec := decode_dp(e_in.frb, int_input);
 671             cdec := decode_dp(e_in.frc, int_input);
 672             v.a := adec;
 673             v.b := bdec;
 674             v.c := cdec;
 675
 676             v.exp_cmp := '0';
 677             if adec.exponent > bdec.exponent then
 678                 v.exp_cmp := '1';
 679             end if;
 680             v.madd_cmp := '0';
 681             if (adec.exponent + cdec.exponent + 1) >= bdec.exponent then
 682                 v.madd_cmp := '1';
 683             end if;
 684         end if;
 685
 686         r_hi_nz <= or (r.r(55 downto 31));
 687         r_lo_nz <= or (r.r(30 downto 2));
 688         s_nz <= or (r.s);
 689
 690         if r.single_prec = '0' then
 691             if r.doing_ftdiv(1) = '0' then
 692                 max_exp := to_signed(1023, EXP_BITS);
 693             else
 694                 max_exp := to_signed(1020, EXP_BITS);
 695             end if;
 696             if r.doing_ftdiv(0) = '0' then
 697                 min_exp := to_signed(-1022, EXP_BITS);
 698             else
 699                 min_exp := to_signed(-1021, EXP_BITS);
 700             end if;
 701             bias_exp := to_signed(1536, EXP_BITS);
 702         else
 703             max_exp := to_signed(127, EXP_BITS);
 704             min_exp := to_signed(-126, EXP_BITS);
 705             bias_exp := to_signed(192, EXP_BITS);
 706         end if;
 707         new_exp := r.result_exp - r.shift;
 708         exp_tiny := '0';
 709         exp_huge := '0';
 710         if new_exp < min_exp then
 711             exp_tiny := '1';
 712         end if;
 713         if new_exp > max_exp then
 714             exp_huge := '1';
 715         end if;
 716
 717         -- Compare P with zero and with B
 718         px_nz := or (r.p(57 downto 4));
 719         pcmpb_eq := '0';
 720         if r.p(59 downto 4) = r.b.mantissa(55 downto 0) then
 721             pcmpb_eq := '1';
 722         end if;
 723         pcmpb_lt := '0';
 724         if unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(55 downto 0)) then
 725             pcmpb_lt := '1';
 726         end if;
 727
 728         v.writing_back := '0';
 729         v.instr_done := '0';
 730         v.update_fprf := '0';
 731         v.shift := to_signed(0, EXP_BITS);
 732         v.first := '0';
 733         v.opsel_a := AIN_R;
 734         opsel_ainv <= '0';
 735         opsel_amask <= '0';
 736         opsel_b <= BIN_ZERO;
 737         opsel_binv <= '0';
 738         opsel_r <= RES_SUM;
 739         opsel_s <= S_ZERO;
 740         carry_in <= '0';
 741         misc_sel <= "0000";
 742         fpscr_mask := (others => '1');
 743         update_fx := '0';
 744         arith_done := '0';
 745         invalid := '0';
 746         zero_divide := '0';
 747         renormalize := '0';
 748         set_x := '0';
 749         qnan_result := '0';
 750         longmask := r.single_prec;
 751         set_a := '0';
 752         set_b := '0';
 753         set_c := '0';
 754         set_s := '0';
 755         f_to_multiply.is_32bit <= '0';
 756         f_to_multiply.valid <= '0';
 757         msel_1 <= MUL1_A;
 758         msel_2 <= MUL2_C;
 759         msel_add <= MULADD_ZERO;
 760         msel_inv <= '0';
 761         set_y := '0';
 762         pshift := '0';
 763         renorm_sqrt := '0';
 764         shiftin := '0';
 765         case r.state is
 766             when IDLE =>
 767                 v.use_a := '0';
 768                 v.use_b := '0';
 769                 v.use_c := '0';
 770                 v.invalid := '0';
 771                 v.negate := '0';
 772                 if e_in.valid = '1' then
 773                     case e_in.insn(5 downto 1) is
 774                         when "00000" =>
 775                             if e_in.insn(8) = '1' then
 776                                 if e_in.insn(6) = '0' then
 777                                     v.state := DO_FTDIV;
 778                                 else
 779                                     v.state := DO_FTSQRT;
 780                                 end if;
 781                             elsif e_in.insn(7) = '1' then
 782                                 v.state := DO_MCRFS;
 783                             else
 784                                 v.opsel_a := AIN_B;
 785                                 v.state := DO_FCMP;
 786                             end if;
 787                         when "00110" =>
 788                             if e_in.insn(10) = '0' then
 789                                 if e_in.insn(8) = '0' then
 790                                     v.state := DO_MTFSB;
 791                                 else
 792                                     v.state := DO_MTFSFI;
 793                                 end if;
 794                             else
 795                                 v.state := DO_FMRG;
 796                             end if;
 797                         when "00111" =>
 798                             if e_in.insn(8) = '0' then
 799                                 v.state := DO_MFFS;
 800                             else
 801                                 v.state := DO_MTFSF;
 802                             end if;
 803                         when "01000" =>
 804                             v.opsel_a := AIN_B;
 805                             if e_in.insn(9 downto 8) /= "11" then
 806                                 v.state := DO_FMR;
 807                             else
 808                                 v.state := DO_FRI;
 809                             end if;
 810                         when "01100" =>
 811                             v.opsel_a := AIN_B;
 812                             v.state := DO_FRSP;
 813                         when "01110" =>
 814                             v.opsel_a := AIN_B;
 815                             if int_input = '1' then
 816                                 -- fcfid[u][s]
 817                                 v.state := DO_FCFID;
 818                             else
 819                                 v.state := DO_FCTI;
 820                             end if;
 821                         when "01111" =>
 822                             v.round_mode := "001";
 823                             v.opsel_a := AIN_B;
 824                             v.state := DO_FCTI;
 825                         when "10010" =>
 826                             v.opsel_a := AIN_A;
 827                             if v.b.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
 828                                 v.opsel_a := AIN_B;
 829                             end if;
 830                             v.state := DO_FDIV;
 831                         when "10100" | "10101" =>
 832                             v.opsel_a := AIN_A;
 833                             v.state := DO_FADD;
 834                         when "10110" =>
 835                             v.is_sqrt := '1';
 836                             v.opsel_a := AIN_B;
 837                             v.state := DO_FSQRT;
 838                         when "10111" =>
 839                             v.state := DO_FSEL;
 840                         when "11000" =>
 841                             v.opsel_a := AIN_B;
 842                             v.state := DO_FRE;
 843                         when "11001" =>
 844                             v.is_multiply := '1';
 845                             v.opsel_a := AIN_A;
 846                             if v.c.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
 847                                 v.opsel_a := AIN_C;
 848                             end if;
 849                             v.state := DO_FMUL;
 850                         when "11010" =>
 851                             v.is_sqrt := '1';
 852                             v.opsel_a := AIN_B;
 853                             v.state := DO_FRSQRTE;
 854                         when "11100" | "11101" | "11110" | "11111" =>
 855                             if v.a.mantissa(54) = '0' then
 856                                 v.opsel_a := AIN_A;
 857                             elsif v.c.mantissa(54) = '0' then
 858                                 v.opsel_a := AIN_C;
 859                             else
 860                                 v.opsel_a := AIN_B;
 861                             end if;
 862                             v.state := DO_FMADD;
 863                         when others =>
 864                             illegal := '1';
 865                     end case;
 866                 end if;
 867                 v.x := '0';
 868                 v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
 869                 set_s := '1';
 870
 871             when DO_MCRFS =>
 872                 j := to_integer(unsigned(insn_bfa(r.insn)));
 873                 for i in 0 to 7 loop
 874                     if i = j then
 875                         k := (7 - i) * 4;
 876                         v.cr_result := r.fpscr(k + 3 downto k);
 877                         fpscr_mask(k + 3 downto k) := "0000";
 878                     end if;
 879                 end loop;
 880                 v.fpscr := r.fpscr and (fpscr_mask or x"6007F8FF");
 881                 v.instr_done := '1';
 882                 v.state := IDLE;
 883
 884             when DO_FTDIV =>
 885                 v.instr_done := '1';
 886                 v.state := IDLE;
 887                 v.cr_result := "0000";
 888                 if r.a.class = INFINITY or r.b.class = ZERO or r.b.class = INFINITY or
 889                     (r.b.class = FINITE and r.b.mantissa(53) = '0') then
 890                     v.cr_result(2) := '1';
 891                 end if;
 892                 if r.a.class = NAN or r.a.class = INFINITY or
 893                     r.b.class = NAN or r.b.class = ZERO or r.b.class = INFINITY or
 894                     (r.a.class = FINITE and r.a.exponent <= to_signed(-970, EXP_BITS)) then
 895                     v.cr_result(1) := '1';
 896                 else
 897                     v.doing_ftdiv := "11";
 898                     v.first := '1';
 899                     v.state := FTDIV_1;
 900                     v.instr_done := '0';
 901                 end if;
 902
 903             when DO_FTSQRT =>
 904                 v.instr_done := '1';
 905                 v.state := IDLE;
 906                 v.cr_result := "0000";
 907                 if r.b.class = ZERO or r.b.class = INFINITY or
 908                     (r.b.class = FINITE and r.b.mantissa(53) = '0') then
 909                     v.cr_result(2) := '1';
 910                 end if;
 911                 if r.b.class = NAN or r.b.class = INFINITY or r.b.class = ZERO
 912                     or r.b.negative = '1' or r.b.exponent <= to_signed(-970, EXP_BITS) then
 913                     v.cr_result(1) := '0';
 914                 end if;
 915
 916             when DO_FCMP =>
 917                 -- fcmp[uo]
 918                 -- r.opsel_a = AIN_B
 919                 v.instr_done := '1';
 920                 v.state := IDLE;
 921                 update_fx := '1';
 922                 v.result_exp := r.b.exponent;
 923                 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
 924                     (r.b.class = NAN and r.b.mantissa(53) = '0') then
 925                     -- Signalling NAN
 926                     v.fpscr(FPSCR_VXSNAN) := '1';
 927                     if r.insn(6) = '1' and r.fpscr(FPSCR_VE) = '0' then
 928                         v.fpscr(FPSCR_VXVC) := '1';
 929                     end if;
 930                     invalid := '1';
 931                     v.cr_result := "0001";          -- unordered
 932                 elsif r.a.class = NAN or r.b.class = NAN then
 933                     if r.insn(6) = '1' then
 934                         -- fcmpo
 935                         v.fpscr(FPSCR_VXVC) := '1';
 936                         invalid := '1';
 937                     end if;
 938                     v.cr_result := "0001";          -- unordered
 939                 elsif r.a.class = ZERO and r.b.class = ZERO then
 940                     v.cr_result := "0010";          -- equal
 941                 elsif r.a.negative /= r.b.negative then
 942                     v.cr_result := r.a.negative & r.b.negative & "00";
 943                 elsif r.a.class = ZERO then
 944                     -- A and B are the same sign from here down
 945                     v.cr_result := not r.b.negative & r.b.negative & "00";
 946                 elsif r.a.class = INFINITY then
 947                     if r.b.class = INFINITY then
 948                         v.cr_result := "0010";
 949                     else
 950                         v.cr_result := r.a.negative & not r.a.negative & "00";
 951                     end if;
 952                 elsif r.b.class = ZERO then
 953                     -- A is finite from here down
 954                     v.cr_result := r.a.negative & not r.a.negative & "00";
 955                 elsif r.b.class = INFINITY then
 956                     v.cr_result := not r.b.negative & r.b.negative & "00";
 957                 elsif r.exp_cmp = '1' then
 958                     -- A and B are both finite from here down
 959                     v.cr_result := r.a.negative & not r.a.negative & "00";
 960                 elsif r.a.exponent /= r.b.exponent then
 961                     -- A exponent is smaller than B
 962                     v.cr_result := not r.a.negative & r.a.negative & "00";
 963                 else
 964                     -- Prepare to subtract mantissas, put B in R
 965                     v.cr_result := "0000";
 966                     v.instr_done := '0';
 967                     v.opsel_a := AIN_A;
 968                     v.state := CMP_1;
 969                 end if;
 970                 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
 971
 972             when DO_MTFSB =>
 973                 -- mtfsb{0,1}
 974                 j := to_integer(unsigned(insn_bt(r.insn)));
 975                 for i in 0 to 31 loop
 976                     if i = j then
 977                         v.fpscr(31 - i) := r.insn(6);
 978                     end if;
 979                 end loop;
 980                 v.instr_done := '1';
 981                 v.state := IDLE;
 982
 983             when DO_MTFSFI =>
 984                 -- mtfsfi
 985                 j := to_integer(unsigned(insn_bf(r.insn)));
 986                 if r.insn(16) = '0' then
 987                     for i in 0 to 7 loop
 988                         if i = j then
 989                             k := (7 - i) * 4;
 990                             v.fpscr(k + 3 downto k) := insn_u(r.insn);
 991                         end if;
 992                     end loop;
 993                 end if;
 994                 v.instr_done := '1';
 995                 v.state := IDLE;
 996
 997             when DO_FMRG =>
 998                 -- fmrgew, fmrgow
 999                 opsel_r <= RES_MISC;
1000                 misc_sel <= "01" & r.insn(8) & '0';
1001                 v.int_result := '1';
1002                 v.writing_back := '1';
1003                 v.instr_done := '1';
1004                 v.state := IDLE;
1005
1006             when DO_MFFS =>
1007                 v.int_result := '1';
1008                 v.writing_back := '1';
1009                 opsel_r <= RES_MISC;
1010                 case r.insn(20 downto 16) is
1011                     when "00000" =>
1012                         -- mffs
1013                     when "00001" =>
1014                         -- mffsce
1015                         v.fpscr(FPSCR_VE downto FPSCR_XE) := "00000";
1016                     when "10100" | "10101" =>
1017                         -- mffscdrn[i] (but we don't implement DRN)
1018                         fpscr_mask := x"000000FF";
1019                     when "10110" =>
1020                         -- mffscrn
1021                         fpscr_mask := x"000000FF";
1022                         v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
1023                             r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
1024                     when "10111" =>
1025                         -- mffscrni
1026                         fpscr_mask := x"000000FF";
1027                         v.fpscr(FPSCR_RN+1 downto FPSCR_RN) := r.insn(12 downto 11);
1028                     when "11000" =>
1029                         -- mffsl
1030                         fpscr_mask := x"0007F0FF";
1031                     when others =>
1032                         illegal := '1';
1033                 end case;
1034                 v.instr_done := '1';
1035                 v.state := IDLE;
1036
1037             when DO_MTFSF =>
1038                 if r.insn(25) = '1' then
1039                     flm := x"FF";
1040                 elsif r.insn(16) = '1' then
1041                     flm := x"00";
1042                 else
1043                     flm := r.insn(24 downto 17);
1044                 end if;
1045                 for i in 0 to 7 loop
1046                     k := i * 4;
1047                     if flm(i) = '1' then
1048                         v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
1049                     end if;
1050                 end loop;
1051                 v.instr_done := '1';
1052                 v.state := IDLE;
1053
1054             when DO_FMR =>
1055                 -- r.opsel_a = AIN_B
1056                 v.result_class := r.b.class;
1057                 v.result_exp := r.b.exponent;
1058                 v.quieten_nan := '0';
1059                 if r.insn(9) = '1' then
1060                     v.result_sign := '0';              -- fabs
1061                 elsif r.insn(8) = '1' then
1062                     v.result_sign := '1';              -- fnabs
1063                 elsif r.insn(7) = '1' then
1064                     v.result_sign := r.b.negative;     -- fmr
1065                 elsif r.insn(6) = '1' then
1066                     v.result_sign := not r.b.negative; -- fneg
1067                 else
1068                     v.result_sign := r.a.negative;     -- fcpsgn
1069                 end if;
1070                 v.writing_back := '1';
1071                 v.instr_done := '1';
1072                 v.state := IDLE;
1073
1074             when DO_FRI =>    -- fri[nzpm]
1075                 -- r.opsel_a = AIN_B
1076                 v.result_class := r.b.class;
1077                 v.result_sign := r.b.negative;
1078                 v.result_exp := r.b.exponent;
1079                 v.fpscr(FPSCR_FR) := '0';
1080                 v.fpscr(FPSCR_FI) := '0';
1081                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1082                     -- Signalling NAN
1083                     v.fpscr(FPSCR_VXSNAN) := '1';
1084                     invalid := '1';
1085                 end if;
1086                 if r.b.class = FINITE then
1087                     if r.b.exponent >= to_signed(52, EXP_BITS) then
1088                         -- integer already, no rounding required
1089                         arith_done := '1';
1090                     else
1091                         v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1092                         v.state := FRI_1;
1093                         v.round_mode := '1' & r.insn(7 downto 6);
1094                     end if;
1095                 else
1096                     arith_done := '1';
1097                 end if;
1098
1099             when DO_FRSP =>
1100                 -- r.opsel_a = AIN_B, r.shift = 0
1101                 v.result_class := r.b.class;
1102                 v.result_sign := r.b.negative;
1103                 v.result_exp := r.b.exponent;
1104                 v.fpscr(FPSCR_FR) := '0';
1105                 v.fpscr(FPSCR_FI) := '0';
1106                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1107                     -- Signalling NAN
1108                     v.fpscr(FPSCR_VXSNAN) := '1';
1109                     invalid := '1';
1110                 end if;
1111                 set_x := '1';
1112                 if r.b.class = FINITE then
1113                     if r.b.exponent < to_signed(-126, EXP_BITS) then
1114                         v.shift := r.b.exponent - to_signed(-126, EXP_BITS);
1115                         v.state := ROUND_UFLOW;
1116                     elsif r.b.exponent > to_signed(127, EXP_BITS) then
1117                         v.state := ROUND_OFLOW;
1118                     else
1119                         v.shift := to_signed(-2, EXP_BITS);
1120                         v.state := ROUNDING;
1121                     end if;
1122                 else
1123                     arith_done := '1';
1124                 end if;
1125
1126             when DO_FCTI =>
1127                 -- instr bit 9: 1=dword 0=word
1128                 -- instr bit 8: 1=unsigned 0=signed
1129                 -- instr bit 1: 1=round to zero 0=use fpscr[RN]
1130                 -- r.opsel_a = AIN_B
1131                 v.result_class := r.b.class;
1132                 v.result_sign := r.b.negative;
1133                 v.result_exp := r.b.exponent;
1134                 v.fpscr(FPSCR_FR) := '0';
1135                 v.fpscr(FPSCR_FI) := '0';
1136                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1137                     -- Signalling NAN
1138                     v.fpscr(FPSCR_VXSNAN) := '1';
1139                     invalid := '1';
1140                 end if;
1141
1142                 v.int_result := '1';
1143                 case r.b.class is
1144                     when ZERO =>
1145                         arith_done := '1';
1146                     when FINITE =>
1147                         if r.b.exponent >= to_signed(64, EXP_BITS) or
1148                             (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
1149                             v.state := INT_OFLOW;
1150                         elsif r.b.exponent >= to_signed(52, EXP_BITS) then
1151                             -- integer already, no rounding required,
1152                             -- shift into final position
1153                             v.shift := r.b.exponent - to_signed(54, EXP_BITS);
1154                             if r.insn(8) = '1' and r.b.negative = '1' then
1155                                 v.state := INT_OFLOW;
1156                             else
1157                                 v.state := INT_ISHIFT;
1158                             end if;
1159                         else
1160                             v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1161                             v.state := INT_SHIFT;
1162                         end if;
1163                     when INFINITY | NAN =>
1164                         v.state := INT_OFLOW;
1165                 end case;
1166
1167             when DO_FCFID =>
1168                 -- r.opsel_a = AIN_B
1169                 v.result_sign := '0';
1170                 if r.insn(8) = '0' and r.b.negative = '1' then
1171                     -- fcfid[s] with negative operand, set R = -B
1172                     opsel_ainv <= '1';
1173                     carry_in <= '1';
1174                     v.result_sign := '1';
1175                 end if;
1176                 v.result_class := r.b.class;
1177                 v.result_exp := to_signed(54, EXP_BITS);
1178                 v.fpscr(FPSCR_FR) := '0';
1179                 v.fpscr(FPSCR_FI) := '0';
1180                 if r.b.class = ZERO then
1181                     arith_done := '1';
1182                 else
1183                     v.state := FINISH;
1184                 end if;
1185
1186             when DO_FADD =>
1187                 -- fadd[s] and fsub[s]
1188                 -- r.opsel_a = AIN_A
1189                 v.result_sign := r.a.negative;
1190                 v.result_class := r.a.class;
1191                 v.result_exp := r.a.exponent;
1192                 v.fpscr(FPSCR_FR) := '0';
1193                 v.fpscr(FPSCR_FI) := '0';
1194                 v.use_a := '1';
1195                 v.use_b := '1';
1196                 is_add := r.a.negative xor r.b.negative xor r.insn(1);
1197                 if r.a.class = FINITE and r.b.class = FINITE then
1198                     v.is_subtract := not is_add;
1199                     v.add_bsmall := r.exp_cmp;
1200                     v.opsel_a := AIN_B;
1201                     if r.exp_cmp = '0' then
1202                         v.shift := r.a.exponent - r.b.exponent;
1203                         v.result_sign := r.b.negative xnor r.insn(1);
1204                         if r.a.exponent = r.b.exponent then
1205                             v.state := ADD_2;
1206                         else
1207                             v.state := ADD_SHIFT;
1208                         end if;
1209                     else
1210                         v.state := ADD_1;
1211                     end if;
1212                 else
1213                     if r.a.class = NAN or r.b.class = NAN then
1214                         v.state := NAN_RESULT;
1215                     elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then
1216                         -- invalid operation, construct QNaN
1217                         v.fpscr(FPSCR_VXISI) := '1';
1218                         qnan_result := '1';
1219                         arith_done := '1';
1220                     elsif r.a.class = ZERO and r.b.class = ZERO and is_add = '0' then
1221                         -- return -0 for rounding to -infinity
1222                         v.result_sign := r.round_mode(1) and r.round_mode(0);
1223                         arith_done := '1';
1224                     elsif r.a.class = INFINITY or r.b.class = ZERO then
1225                         -- result is A
1226                         v.opsel_a := AIN_A;
1227                         v.state := EXC_RESULT;
1228                     else
1229                         -- result is +/- B
1230                         v.opsel_a := AIN_B;
1231                         v.negate := not r.insn(1);
1232                         v.state := EXC_RESULT;
1233                     end if;
1234                 end if;
1235
1236             when DO_FMUL =>
1237                 -- fmul[s]
1238                 -- r.opsel_a = AIN_A unless C is denorm and A isn't
1239                 v.result_sign := r.a.negative xor r.c.negative;
1240                 v.result_class := r.a.class;
1241                 v.fpscr(FPSCR_FR) := '0';
1242                 v.fpscr(FPSCR_FI) := '0';
1243                 v.use_a := '1';
1244                 v.use_c := '1';
1245                 if r.a.class = FINITE and r.c.class = FINITE then
1246                     v.result_exp := r.a.exponent + r.c.exponent;
1247                     -- Renormalize denorm operands
1248                     if r.a.mantissa(54) = '0' then
1249                         v.state := RENORM_A;
1250                     elsif r.c.mantissa(54) = '0' then
1251                         v.state := RENORM_C;
1252                     else
1253                         f_to_multiply.valid <= '1';
1254                         v.state := MULT_1;
1255                     end if;
1256                 else
1257                     if r.a.class = NAN or r.c.class = NAN then
1258                         v.state := NAN_RESULT;
1259                     elsif (r.a.class = INFINITY and r.c.class = ZERO) or
1260                         (r.a.class = ZERO and r.c.class = INFINITY) then
1261                         -- invalid operation, construct QNaN
1262                         v.fpscr(FPSCR_VXIMZ) := '1';
1263                         qnan_result := '1';
1264                     elsif r.a.class = ZERO or r.a.class = INFINITY then
1265                         -- result is +/- A
1266                         arith_done := '1';
1267                     else
1268                         -- r.c.class is ZERO or INFINITY
1269                         v.opsel_a := AIN_C;
1270                         v.negate := r.a.negative;
1271                         v.state := EXC_RESULT;
1272                     end if;
1273                 end if;
1274
1275             when DO_FDIV =>
1276                 -- r.opsel_a = AIN_A unless B is denorm and A isn't
1277                 v.result_class := r.a.class;
1278                 v.fpscr(FPSCR_FR) := '0';
1279                 v.fpscr(FPSCR_FI) := '0';
1280                 v.use_a := '1';
1281                 v.use_b := '1';
1282                 v.result_sign := r.a.negative xor r.b.negative;
1283                 v.result_exp := r.a.exponent - r.b.exponent;
1284                 v.count := "00";
1285                 if r.a.class = FINITE and r.b.class = FINITE then
1286                     -- Renormalize denorm operands
1287                     if r.a.mantissa(54) = '0' then
1288                         v.state := RENORM_A;
1289                     elsif r.b.mantissa(54) = '0' then
1290                         v.state := RENORM_B;
1291                     else
1292                         v.first := '1';
1293                         v.state := DIV_2;
1294                     end if;
1295                 else
1296                     if r.a.class = NAN or r.b.class = NAN then
1297                         v.state := NAN_RESULT;
1298                     elsif r.b.class = INFINITY then
1299                         if r.a.class = INFINITY then
1300                             v.fpscr(FPSCR_VXIDI) := '1';
1301                             qnan_result := '1';
1302                         else
1303                             v.result_class := ZERO;
1304                         end if;
1305                         arith_done := '1';
1306                     elsif r.b.class = ZERO then
1307                         if r.a.class = ZERO then
1308                             v.fpscr(FPSCR_VXZDZ) := '1';
1309                             qnan_result := '1';
1310                         else
1311                             if r.a.class = FINITE then
1312                                 zero_divide := '1';
1313                             end if;
1314                             v.result_class := INFINITY;
1315                         end if;
1316                         arith_done := '1';
1317                     else -- r.b.class = FINITE, result_class = r.a.class
1318                         arith_done := '1';
1319                     end if;
1320                 end if;
1321
1322             when DO_FSEL =>
1323                 v.fpscr(FPSCR_FR) := '0';
1324                 v.fpscr(FPSCR_FI) := '0';
1325                 if r.a.class = ZERO or (r.a.negative = '0' and r.a.class /= NAN) then
1326                     v.opsel_a := AIN_C;
1327                 else
1328                     v.opsel_a := AIN_B;
1329                 end if;
1330                 v.quieten_nan := '0';
1331                 v.state := EXC_RESULT;
1332
1333             when DO_FSQRT =>
1334                 -- r.opsel_a = AIN_B
1335                 v.result_class := r.b.class;
1336                 v.result_sign := r.b.negative;
1337                 v.fpscr(FPSCR_FR) := '0';
1338                 v.fpscr(FPSCR_FI) := '0';
1339                 v.use_b := '1';
1340                 case r.b.class is
1341                     when FINITE =>
1342                         v.result_exp := r.b.exponent;
1343                         if r.b.negative = '1' then
1344                             v.fpscr(FPSCR_VXSQRT) := '1';
1345                             qnan_result := '1';
1346                         elsif r.b.mantissa(54) = '0' then
1347                             v.state := RENORM_B;
1348                         elsif r.b.exponent(0) = '0' then
1349                             v.state := SQRT_1;
1350                         else
1351                             v.shift := to_signed(1, EXP_BITS);
1352                             v.state := RENORM_B2;
1353                         end if;
1354                     when NAN =>
1355                         v.state := NAN_RESULT;
1356                     when ZERO =>
1357                         -- result is B
1358                         arith_done := '1';
1359                     when INFINITY =>
1360                         if r.b.negative = '1' then
1361                             v.fpscr(FPSCR_VXSQRT) := '1';
1362                             qnan_result := '1';
1363                         -- else result is B
1364                         end if;
1365                         arith_done := '1';
1366                 end case;
1367
1368             when DO_FRE =>
1369                 -- r.opsel_a = AIN_B
1370                 v.result_class := r.b.class;
1371                 v.result_sign := r.b.negative;
1372                 v.fpscr(FPSCR_FR) := '0';
1373                 v.fpscr(FPSCR_FI) := '0';
1374                 v.use_b := '1';
1375                 case r.b.class is
1376                     when FINITE =>
1377                         v.result_exp := - r.b.exponent;
1378                         if r.b.mantissa(54) = '0' then
1379                             v.state := RENORM_B;
1380                         else
1381                             v.state := FRE_1;
1382                         end if;
1383                     when NAN =>
1384                         v.state := NAN_RESULT;
1385                     when INFINITY =>
1386                         v.result_class := ZERO;
1387                         arith_done := '1';
1388                     when ZERO =>
1389                         v.result_class := INFINITY;
1390                         zero_divide := '1';
1391                         arith_done := '1';
1392                 end case;
1393
1394             when DO_FRSQRTE =>
1395                 -- r.opsel_a = AIN_B
1396                 v.result_class := r.b.class;
1397                 v.result_sign := r.b.negative;
1398                 v.fpscr(FPSCR_FR) := '0';
1399                 v.fpscr(FPSCR_FI) := '0';
1400                 v.use_b := '1';
1401                 v.shift := to_signed(1, EXP_BITS);
1402                 case r.b.class is
1403                     when FINITE =>
1404                         v.result_exp := r.b.exponent;
1405                         if r.b.negative = '1' then
1406                             v.fpscr(FPSCR_VXSQRT) := '1';
1407                             qnan_result := '1';
1408                         elsif r.b.mantissa(54) = '0' then
1409                             v.state := RENORM_B;
1410                         elsif r.b.exponent(0) = '0' then
1411                             v.state := RSQRT_1;
1412                         else
1413                             v.state := RENORM_B2;
1414                         end if;
1415                     when NAN =>
1416                         v.state := NAN_RESULT;
1417                     when INFINITY =>
1418                         if r.b.negative = '1' then
1419                             v.fpscr(FPSCR_VXSQRT) := '1';
1420                             qnan_result := '1';
1421                         else
1422                             v.result_class := ZERO;
1423                         end if;
1424                         arith_done := '1';
1425                     when ZERO =>
1426                         v.result_class := INFINITY;
1427                         zero_divide := '1';
1428                         arith_done := '1';
1429                 end case;
1430
1431             when DO_FMADD =>
1432                 -- fmadd, fmsub, fnmadd, fnmsub
1433                 -- r.opsel_a = AIN_A if A is denorm, else AIN_C if C is denorm,
1434                 -- else AIN_B
1435                 v.result_sign := r.a.negative;
1436                 v.result_class := r.a.class;
1437                 v.result_exp := r.a.exponent;
1438                 v.fpscr(FPSCR_FR) := '0';
1439                 v.fpscr(FPSCR_FI) := '0';
1440                 v.use_a := '1';
1441                 v.use_b := '1';
1442                 v.use_c := '1';
1443                 is_add := r.a.negative xor r.c.negative xor r.b.negative xor r.insn(1);
1444                 if r.a.class = FINITE and r.c.class = FINITE and
1445                     (r.b.class = FINITE or r.b.class = ZERO) then
1446                     v.is_subtract := not is_add;
1447                     mulexp := r.a.exponent + r.c.exponent;
1448                     v.result_exp := mulexp;
1449                     -- Make sure A and C are normalized
1450                     if r.a.mantissa(54) = '0' then
1451                         v.state := RENORM_A;
1452                     elsif r.c.mantissa(54) = '0' then
1453                         v.state := RENORM_C;
1454                     elsif r.b.class = ZERO then
1455                         -- no addend, degenerates to multiply
1456                         v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1457                         f_to_multiply.valid <= '1';
1458                         v.is_multiply := '1';
1459                         v.state := MULT_1;
1460                     elsif r.madd_cmp = '0' then
1461                         -- addend is bigger, do multiply first
1462                         v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1463                         f_to_multiply.valid <= '1';
1464                         v.state := FMADD_1;
1465                     else
1466                         -- product is bigger, shift B right and use it as the
1467                         -- addend to the multiplier
1468                         v.shift := r.b.exponent - mulexp + to_signed(64, EXP_BITS);
1469                         -- for subtract, multiplier does B - A * C
1470                         v.result_sign := not (r.a.negative xor r.c.negative xor r.insn(2) xor is_add);
1471                         v.result_exp := r.b.exponent;
1472                         v.state := FMADD_2;
1473                     end if;
1474                 else
1475                     if r.a.class = NAN or r.b.class = NAN or r.c.class = NAN then
1476                         v.state := NAN_RESULT;
1477                     elsif (r.a.class = ZERO and r.c.class = INFINITY) or
1478                         (r.a.class = INFINITY and r.c.class = ZERO) then
1479                         -- invalid operation, construct QNaN
1480                         v.fpscr(FPSCR_VXIMZ) := '1';
1481                         qnan_result := '1';
1482                     elsif r.a.class = INFINITY or r.c.class = INFINITY then
1483                         if r.b.class = INFINITY and is_add = '0' then
1484                             -- invalid operation, construct QNaN
1485                             v.fpscr(FPSCR_VXISI) := '1';
1486                             qnan_result := '1';
1487                         else
1488                             -- result is infinity
1489                             v.result_class := INFINITY;
1490                             v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1491                             arith_done := '1';
1492                         end if;
1493                     else
1494                         -- Here A is zero, C is zero, or B is infinity
1495                         -- Result is +/-B in all of those cases
1496                         v.opsel_a := AIN_B;
1497                         if r.b.class /= ZERO or is_add = '1' then
1498                             v.negate := not (r.insn(1) xor r.insn(2));
1499                         else
1500                             -- have to be careful about rule for 0 - 0 result sign
1501                             v.negate := r.b.negative xor (r.round_mode(1) and r.round_mode(0)) xor r.insn(2);
1502                         end if;
1503                         v.state := EXC_RESULT;
1504                     end if;
1505                 end if;
1506
1507             when RENORM_A =>
1508                 renormalize := '1';
1509                 v.state := RENORM_A2;
1510                 if r.insn(4) = '1' then
1511                     v.opsel_a := AIN_C;
1512                 else
1513                     v.opsel_a := AIN_B;
1514                 end if;
1515
1516             when RENORM_A2 =>
1517                 -- r.opsel_a = AIN_C for fmul/fmadd, AIN_B for fdiv
1518                 set_a := '1';
1519                 v.result_exp := new_exp;
1520                 if r.insn(4) = '1' then
1521                     if r.c.mantissa(54) = '1' then
1522                         if r.insn(3) = '0' or r.b.class = ZERO then
1523                             v.first := '1';
1524                             v.state := MULT_1;
1525                         else
1526                             v.madd_cmp := '0';
1527                             if new_exp + 1 >= r.b.exponent then
1528                                 v.madd_cmp := '1';
1529                             end if;
1530                             v.opsel_a := AIN_B;
1531                             v.state := DO_FMADD;
1532                         end if;
1533                     else
1534                         v.state := RENORM_C;
1535                     end if;
1536                 else
1537                     if r.b.mantissa(54) = '1' then
1538                         v.first := '1';
1539                         v.state := DIV_2;
1540                     else
1541                         v.state := RENORM_B;
1542                     end if;
1543                 end if;
1544
1545             when RENORM_B =>
1546                 renormalize := '1';
1547                 renorm_sqrt := r.is_sqrt;
1548                 v.state := RENORM_B2;
1549
1550             when RENORM_B2 =>
1551                 set_b := '1';
1552                 if r.is_sqrt = '0' then
1553                     v.result_exp := r.result_exp + r.shift;
1554                 else
1555                     v.result_exp := new_exp;
1556                 end if;
1557                 v.opsel_a := AIN_B;
1558                 v.state := LOOKUP;
1559
1560             when RENORM_C =>
1561                 renormalize := '1';
1562                 v.state := RENORM_C2;
1563
1564             when RENORM_C2 =>
1565                 set_c := '1';
1566                 v.result_exp := new_exp;
1567                 if r.insn(3) = '0' or r.b.class = ZERO then
1568                     v.first := '1';
1569                     v.state := MULT_1;
1570                 else
1571                     v.madd_cmp := '0';
1572                     if new_exp + 1 >= r.b.exponent then
1573                         v.madd_cmp := '1';
1574                     end if;
1575                     v.opsel_a := AIN_B;
1576                     v.state := DO_FMADD;
1577                 end if;
1578
1579             when ADD_1 =>
1580                 -- transferring B to R
1581                 v.shift := r.b.exponent - r.a.exponent;
1582                 v.result_exp := r.b.exponent;
1583                 v.state := ADD_SHIFT;
1584
1585             when ADD_SHIFT =>
1586                 -- r.shift = - exponent difference
1587                 opsel_r <= RES_SHIFT;
1588                 v.x := s_nz;
1589                 set_x := '1';
1590                 longmask := '0';
1591                 if r.add_bsmall = '1' then
1592                     v.opsel_a := AIN_A;
1593                 else
1594                     v.opsel_a := AIN_B;
1595                 end if;
1596                 v.state := ADD_2;
1597
1598             when ADD_2 =>
1599                 -- r.opsel_a = AIN_A if r.add_bsmall = 1 else AIN_B
1600                 opsel_b <= BIN_R;
1601                 opsel_binv <= r.is_subtract;
1602                 carry_in <= r.is_subtract and not r.x;
1603                 v.shift := to_signed(-1, EXP_BITS);
1604                 v.state := ADD_3;
1605
1606             when ADD_3 =>
1607                 -- check for overflow or negative result (can't get both)
1608                 -- r.shift = -1
1609                 if r.r(63) = '1' then
1610                     -- result is opposite sign to expected
1611                     v.result_sign := not r.result_sign;
1612                     opsel_ainv <= '1';
1613                     carry_in <= '1';
1614                     v.state := FINISH;
1615                 elsif r.r(55) = '1' then
1616                     -- sum overflowed, shift right
1617                     opsel_r <= RES_SHIFT;
1618                     set_x := '1';
1619                     v.shift := to_signed(-2, EXP_BITS);
1620                     if exp_huge = '1' then
1621                         v.state := ROUND_OFLOW;
1622                     else
1623                         v.state := ROUNDING;
1624                     end if;
1625                 elsif r.r(54) = '1' then
1626                     set_x := '1';
1627                     v.shift := to_signed(-2, EXP_BITS);
1628                     v.state := ROUNDING;
1629                 elsif (r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1630                     -- r.x must be zero at this point
1631                     v.result_class := ZERO;
1632                     if r.is_subtract = '1' then
1633                         -- set result sign depending on rounding mode
1634                         v.result_sign := r.round_mode(1) and r.round_mode(0);
1635                     end if;
1636                     arith_done := '1';
1637                 else
1638                     renormalize := '1';
1639                     v.state := NORMALIZE;
1640                 end if;
1641
1642             when CMP_1 =>
1643                 -- r.opsel_a = AIN_A
1644                 opsel_b <= BIN_R;
1645                 opsel_binv <= '1';
1646                 carry_in <= '1';
1647                 v.state := CMP_2;
1648
1649             when CMP_2 =>
1650                 if r.r(63) = '1' then
1651                     -- A is smaller in magnitude
1652                     v.cr_result := not r.a.negative & r.a.negative & "00";
1653                 elsif (r_hi_nz or r_lo_nz) = '0' then
1654                     v.cr_result := "0010";
1655                 else
1656                     v.cr_result := r.a.negative & not r.a.negative & "00";
1657                 end if;
1658                 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
1659                 v.instr_done := '1';
1660                 v.state := IDLE;
1661
1662             when MULT_1 =>
1663                 f_to_multiply.valid <= r.first;
1664                 opsel_r <= RES_MULT;
1665                 if multiply_to_f.valid = '1' then
1666                     v.state := FINISH;
1667                 end if;
1668
1669             when FMADD_1 =>
1670                 -- Addend is bigger here
1671                 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1672                 -- note v.shift is at most -2 here
1673                 v.shift := r.result_exp - r.b.exponent;
1674                 opsel_r <= RES_MULT;
1675                 opsel_s <= S_MULT;
1676                 set_s := '1';
1677                 f_to_multiply.valid <= r.first;
1678                 if multiply_to_f.valid = '1' then
1679                     v.state := ADD_SHIFT;
1680                 end if;
1681
1682             when FMADD_2 =>
1683                 -- Product is potentially bigger here
1684                 -- r.shift = addend exp - product exp + 64, r.r = r.b.mantissa
1685                 set_s := '1';
1686                 opsel_s <= S_SHIFT;
1687                 v.shift := r.shift - to_signed(64, EXP_BITS);
1688                 v.state := FMADD_3;
1689
1690             when FMADD_3 =>
1691                 -- r.shift = addend exp - product exp
1692                 opsel_r <= RES_SHIFT;
1693                 v.first := '1';
1694                 v.state := FMADD_4;
1695
1696             when FMADD_4 =>
1697                 msel_add <= MULADD_RS;
1698                 f_to_multiply.valid <= r.first;
1699                 msel_inv <= r.is_subtract;
1700                 opsel_r <= RES_MULT;
1701                 opsel_s <= S_MULT;
1702                 set_s := '1';
1703                 v.shift := to_signed(56, EXP_BITS);
1704                 if multiply_to_f.valid = '1' then
1705                     if multiply_to_f.result(121) = '1' then
1706                         v.state := FMADD_5;
1707                     else
1708                         v.state := FMADD_6;
1709                     end if;
1710                 end if;
1711
1712             when FMADD_5 =>
1713                 -- negate R:S:X
1714                 v.result_sign := not r.result_sign;
1715                 opsel_ainv <= '1';
1716                 carry_in <= not (s_nz or r.x);
1717                 opsel_s <= S_NEG;
1718                 set_s := '1';
1719                 v.shift := to_signed(56, EXP_BITS);
1720                 v.state := FMADD_6;
1721
1722             when FMADD_6 =>
1723                 -- r.shift = 56 (or 0, but only if r is now nonzero)
1724                 if (r.r(56) or r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1725                     if s_nz = '0' then
1726                         -- must be a subtraction, and r.x must be zero
1727                         v.result_class := ZERO;
1728                         v.result_sign := r.round_mode(1) and r.round_mode(0);
1729                         arith_done := '1';
1730                     else
1731                         -- R is all zeroes but there are non-zero bits in S
1732                         -- so shift them into R and set S to 0
1733                         opsel_r <= RES_SHIFT;
1734                         set_s := '1';
1735                         -- stay in state FMADD_6
1736                     end if;
1737                 elsif r.r(56 downto 54) = "001" then
1738                     v.state := FINISH;
1739                 else
1740                     renormalize := '1';
1741                     v.state := NORMALIZE;
1742                 end if;
1743
1744             when LOOKUP =>
1745                 -- r.opsel_a = AIN_B
1746                 -- wait one cycle for inverse_table[B] lookup
1747                 v.first := '1';
1748                 if r.insn(4) = '0' then
1749                     if r.insn(3) = '0' then
1750                         v.state := DIV_2;
1751                     else
1752                         v.state := SQRT_1;
1753                     end if;
1754                 elsif r.insn(2) = '0' then
1755                     v.state := FRE_1;
1756                 else
1757                     v.state := RSQRT_1;
1758                 end if;
1759
1760             when DIV_2 =>
1761                 -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y
1762                 msel_1 <= MUL1_B;
1763                 msel_add <= MULADD_CONST;
1764                 msel_inv <= '1';
1765                 if r.count = 0 then
1766                     msel_2 <= MUL2_LUT;
1767                 else
1768                     msel_2 <= MUL2_P;
1769                 end if;
1770                 set_y := r.first;
1771                 pshift := '1';
1772                 f_to_multiply.valid <= r.first;
1773                 if multiply_to_f.valid = '1' then
1774                     v.first := '1';
1775                     v.count := r.count + 1;
1776                     v.state := DIV_3;
1777                 end if;
1778
1779             when DIV_3 =>
1780                 -- compute Y = P = P * Y
1781                 msel_1 <= MUL1_Y;
1782                 msel_2 <= MUL2_P;
1783                 f_to_multiply.valid <= r.first;
1784                 pshift := '1';
1785                 if multiply_to_f.valid = '1' then
1786                     v.first := '1';
1787                     if r.count = 3 then
1788                         v.state := DIV_4;
1789                     else
1790                         v.state := DIV_2;
1791                     end if;
1792                 end if;
1793
1794             when DIV_4 =>
1795                 -- compute R = P = A * Y (quotient)
1796                 msel_1 <= MUL1_A;
1797                 msel_2 <= MUL2_P;
1798                 set_y := r.first;
1799                 f_to_multiply.valid <= r.first;
1800                 pshift := '1';
1801                 if multiply_to_f.valid = '1' then
1802                     opsel_r <= RES_MULT;
1803                     v.first := '1';
1804                     v.state := DIV_5;
1805                 end if;
1806
1807             when DIV_5 =>
1808                 -- compute P = A - B * R (remainder)
1809                 msel_1 <= MUL1_B;
1810                 msel_2 <= MUL2_R;
1811                 msel_add <= MULADD_A;
1812                 msel_inv <= '1';
1813                 f_to_multiply.valid <= r.first;
1814                 if multiply_to_f.valid = '1' then
1815                     v.state := DIV_6;
1816                 end if;
1817
1818             when DIV_6 =>
1819                 -- test if remainder is 0 or >= B
1820                 if pcmpb_lt = '1' then
1821                     -- quotient is correct, set X if remainder non-zero
1822                     v.x := r.p(58) or px_nz;
1823                 else
1824                     -- quotient needs to be incremented by 1
1825                     carry_in <= '1';
1826                     v.x := not pcmpb_eq;
1827                 end if;
1828                 v.state := FINISH;
1829
1830             when FRE_1 =>
1831                 opsel_r <= RES_MISC;
1832                 misc_sel <= "0111";
1833                 v.shift := to_signed(1, EXP_BITS);
1834                 v.state := NORMALIZE;
1835
1836             when FTDIV_1 =>
1837                 v.cr_result(1) := exp_tiny or exp_huge;
1838                 if exp_tiny = '1' or exp_huge = '1' or r.a.class = ZERO or r.first = '0' then
1839                     v.instr_done := '1';
1840                     v.state := IDLE;
1841                 else
1842                     v.shift := r.a.exponent;
1843                     v.doing_ftdiv := "10";
1844                 end if;
1845
1846             when RSQRT_1 =>
1847                 opsel_r <= RES_MISC;
1848                 misc_sel <= "0111";
1849                 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1850                 v.result_exp := - sqrt_exp;
1851                 v.shift := to_signed(1, EXP_BITS);
1852                 v.state := NORMALIZE;
1853
1854             when SQRT_1 =>
1855                 -- put invsqr[B] in R and compute P = invsqr[B] * B
1856                 -- also transfer B (in R) to A
1857                 set_a := '1';
1858                 opsel_r <= RES_MISC;
1859                 misc_sel <= "0111";
1860                 msel_1 <= MUL1_B;
1861                 msel_2 <= MUL2_LUT;
1862                 f_to_multiply.valid <= '1';
1863                 v.shift := to_signed(-1, EXP_BITS);
1864                 v.count := "00";
1865                 v.state := SQRT_2;
1866
1867             when SQRT_2 =>
1868                 -- shift R right one place
1869                 -- not expecting multiplier result yet
1870                 -- r.shift = -1
1871                 opsel_r <= RES_SHIFT;
1872                 v.first := '1';
1873                 v.state := SQRT_3;
1874
1875             when SQRT_3 =>
1876                 -- put R into Y, wait for product from multiplier
1877                 msel_2 <= MUL2_R;
1878                 set_y := r.first;
1879                 pshift := '1';
1880                 if multiply_to_f.valid = '1' then
1881                     -- put result into R
1882                     opsel_r <= RES_MULT;
1883                     v.first := '1';
1884                     v.state := SQRT_4;
1885                 end if;
1886
1887             when SQRT_4 =>
1888                 -- compute 1.5 - Y * P
1889                 msel_1 <= MUL1_Y;
1890                 msel_2 <= MUL2_P;
1891                 msel_add <= MULADD_CONST;
1892                 msel_inv <= '1';
1893                 f_to_multiply.valid <= r.first;
1894                 pshift := '1';
1895                 if multiply_to_f.valid = '1' then
1896                     v.state := SQRT_5;
1897                 end if;
1898
1899             when SQRT_5 =>
1900                 -- compute Y = Y * P
1901                 msel_1 <= MUL1_Y;
1902                 msel_2 <= MUL2_P;
1903                 f_to_multiply.valid <= '1';
1904                 v.first := '1';
1905                 v.state := SQRT_6;
1906
1907             when SQRT_6 =>
1908                 -- pipeline in R = R * P
1909                 msel_1 <= MUL1_R;
1910                 msel_2 <= MUL2_P;
1911                 f_to_multiply.valid <= r.first;
1912                 pshift := '1';
1913                 if multiply_to_f.valid = '1' then
1914                     v.first := '1';
1915                     v.state := SQRT_7;
1916                 end if;
1917
1918             when SQRT_7 =>
1919                 -- first multiply is done, put result in Y
1920                 msel_2 <= MUL2_P;
1921                 set_y := r.first;
1922                 -- wait for second multiply (should be here already)
1923                 pshift := '1';
1924                 if multiply_to_f.valid = '1' then
1925                     -- put result into R
1926                     opsel_r <= RES_MULT;
1927                     v.first := '1';
1928                     v.count := r.count + 1;
1929                     if r.count < 2 then
1930                         v.state := SQRT_4;
1931                     else
1932                         v.first := '1';
1933                         v.state := SQRT_8;
1934                     end if;
1935                 end if;
1936
1937             when SQRT_8 =>
1938                 -- compute P = A - R * R, which can be +ve or -ve
1939                 -- we arranged for B to be put into A earlier
1940                 msel_1 <= MUL1_R;
1941                 msel_2 <= MUL2_R;
1942                 msel_add <= MULADD_A;
1943                 msel_inv <= '1';
1944                 pshift := '1';
1945                 f_to_multiply.valid <= r.first;
1946                 if multiply_to_f.valid = '1' then
1947                     v.first := '1';
1948                     v.state := SQRT_9;
1949                 end if;
1950
1951             when SQRT_9 =>
1952                 -- compute P = P * Y
1953                 -- since Y is an estimate of 1/sqrt(B), this makes P an
1954                 -- estimate of the adjustment needed to R.  Since the error
1955                 -- could be negative and we have an unsigned multiplier, the
1956                 -- upper bits can be wrong, but it turns out the lowest 8 bits
1957                 -- are correct and are all we need (given 3 iterations through
1958                 -- SQRT_4 to SQRT_7).
1959                 msel_1 <= MUL1_Y;
1960                 msel_2 <= MUL2_P;
1961                 pshift := '1';
1962                 f_to_multiply.valid <= r.first;
1963                 if multiply_to_f.valid = '1' then
1964                     v.state := SQRT_10;
1965                 end if;
1966
1967             when SQRT_10 =>
1968                 -- Add the bottom 8 bits of P, sign-extended,
1969                 -- divided by 4, onto R.
1970                 -- The division by 4 is because R is 10.54 format
1971                 -- whereas P is 8.56 format.
1972                 opsel_b <= BIN_PS6;
1973                 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1974                 v.result_exp := sqrt_exp;
1975                 v.shift := to_signed(1, EXP_BITS);
1976                 v.first := '1';
1977                 v.state := SQRT_11;
1978
1979             when SQRT_11 =>
1980                 -- compute P = A - R * R (remainder)
1981                 -- also put 2 * R + 1 into B for comparison with P
1982                 msel_1 <= MUL1_R;
1983                 msel_2 <= MUL2_R;
1984                 msel_add <= MULADD_A;
1985                 msel_inv <= '1';
1986                 f_to_multiply.valid <= r.first;
1987                 shiftin := '1';
1988                 set_b := r.first;
1989                 if multiply_to_f.valid = '1' then
1990                     v.state := SQRT_12;
1991                 end if;
1992
1993             when SQRT_12 =>
1994                 -- test if remainder is 0 or >= B = 2*R + 1
1995                 if pcmpb_lt = '1' then
1996                     -- square root is correct, set X if remainder non-zero
1997                     v.x := r.p(58) or px_nz;
1998                 else
1999                     -- square root needs to be incremented by 1
2000                     carry_in <= '1';
2001                     v.x := not pcmpb_eq;
2002                 end if;
2003                 v.state := FINISH;
2004
2005             when INT_SHIFT =>
2006                 -- r.shift = b.exponent - 52
2007                 opsel_r <= RES_SHIFT;
2008                 set_x := '1';
2009                 v.state := INT_ROUND;
2010                 v.shift := to_signed(-2, EXP_BITS);
2011
2012             when INT_ROUND =>
2013                 -- r.shift = -2
2014                 opsel_r <= RES_SHIFT;
2015                 round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
2016                 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2017                 -- Check for negative values that don't round to 0 for fcti*u*
2018                 if r.insn(8) = '1' and r.result_sign = '1' and
2019                     (r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then
2020                     v.state := INT_OFLOW;
2021                 else
2022                     v.state := INT_FINAL;
2023                 end if;
2024
2025             when INT_ISHIFT =>
2026                 -- r.shift = b.exponent - 54;
2027                 opsel_r <= RES_SHIFT;
2028                 v.state := INT_FINAL;
2029
2030             when INT_FINAL =>
2031                 -- Negate if necessary, and increment for rounding if needed
2032                 opsel_ainv <= r.result_sign;
2033                 carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign;
2034                 -- Check for possible overflows
2035                 case r.insn(9 downto 8) is
2036                     when "00" =>        -- fctiw[z]
2037                         need_check := r.r(31) or (r.r(30) and not r.result_sign);
2038                     when "01" =>        -- fctiwu[z]
2039                         need_check := r.r(31);
2040                     when "10" =>        -- fctid[z]
2041                         need_check := r.r(63) or (r.r(62) and not r.result_sign);
2042                     when others =>      -- fctidu[z]
2043                         need_check := r.r(63);
2044                 end case;
2045                 if need_check = '1' then
2046                     v.state := INT_CHECK;
2047                 else
2048                     if r.fpscr(FPSCR_FI) = '1' then
2049                         v.fpscr(FPSCR_XX) := '1';
2050                     end if;
2051                     arith_done := '1';
2052                 end if;
2053
2054             when INT_CHECK =>
2055                 if r.insn(9) = '0' then
2056                     msb := r.r(31);
2057                 else
2058                     msb := r.r(63);
2059                 end if;
2060                 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2061                 if (r.insn(8) = '0' and msb /= r.result_sign) or
2062                     (r.insn(8) = '1' and msb /= '1') then
2063                     opsel_r <= RES_MISC;
2064                     v.fpscr(FPSCR_VXCVI) := '1';
2065                     invalid := '1';
2066                 else
2067                     if r.fpscr(FPSCR_FI) = '1' then
2068                         v.fpscr(FPSCR_XX) := '1';
2069                     end if;
2070                 end if;
2071                 arith_done := '1';
2072
2073             when INT_OFLOW =>
2074                 opsel_r <= RES_MISC;
2075                 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2076                 if r.b.class = NAN then
2077                     misc_sel(0) <= '1';
2078                 end if;
2079                 v.fpscr(FPSCR_VXCVI) := '1';
2080                 invalid := '1';
2081                 arith_done := '1';
2082
2083             when FRI_1 =>
2084                 -- r.shift = b.exponent - 52
2085                 opsel_r <= RES_SHIFT;
2086                 set_x := '1';
2087                 v.shift := to_signed(-2, EXP_BITS);
2088                 v.state := ROUNDING;
2089
2090             when FINISH =>
2091                 if r.is_multiply = '1' and px_nz = '1' then
2092                     v.x := '1';
2093                 end if;
2094                 if r.r(63 downto 54) /= "0000000001" then
2095                     renormalize := '1';
2096                     v.state := NORMALIZE;
2097                 else
2098                     set_x := '1';
2099                     if exp_tiny = '1' then
2100                         v.shift := new_exp - min_exp;
2101                         v.state := ROUND_UFLOW;
2102                     elsif exp_huge = '1' then
2103                         v.state := ROUND_OFLOW;
2104                     else
2105                         v.shift := to_signed(-2, EXP_BITS);
2106                         v.state := ROUNDING;
2107                     end if;
2108                 end if;
2109
2110             when NORMALIZE =>
2111                 -- Shift so we have 9 leading zeroes (we know R is non-zero)
2112                 -- r.shift = clz(r.r) - 9
2113                 opsel_r <= RES_SHIFT;
2114                 set_x := '1';
2115                 if exp_tiny = '1' then
2116                     v.shift := new_exp - min_exp;
2117                     v.state := ROUND_UFLOW;
2118                 elsif exp_huge = '1' then
2119                     v.state := ROUND_OFLOW;
2120                 else
2121                     v.shift := to_signed(-2, EXP_BITS);
2122                     v.state := ROUNDING;
2123                 end if;
2124
2125             when ROUND_UFLOW =>
2126                 -- r.shift = - amount by which exponent underflows
2127                 v.tiny := '1';
2128                 if r.fpscr(FPSCR_UE) = '0' then
2129                     -- disabled underflow exception case
2130                     -- have to denormalize before rounding
2131                     opsel_r <= RES_SHIFT;
2132                     set_x := '1';
2133                     v.shift := to_signed(-2, EXP_BITS);
2134                     v.state := ROUNDING;
2135                 else
2136                     -- enabled underflow exception case
2137                     -- if denormalized, have to normalize before rounding
2138                     v.fpscr(FPSCR_UX) := '1';
2139                     v.result_exp := r.result_exp + bias_exp;
2140                     if r.r(54) = '0' then
2141                         renormalize := '1';
2142                         v.state := NORMALIZE;
2143                     else
2144                         v.shift := to_signed(-2, EXP_BITS);
2145                         v.state := ROUNDING;
2146                     end if;
2147                 end if;
2148
2149             when ROUND_OFLOW =>
2150                 v.fpscr(FPSCR_OX) := '1';
2151                 if r.fpscr(FPSCR_OE) = '0' then
2152                     -- disabled overflow exception
2153                     -- result depends on rounding mode
2154                     v.fpscr(FPSCR_XX) := '1';
2155                     v.fpscr(FPSCR_FI) := '1';
2156                     if r.round_mode(1 downto 0) = "00" or
2157                         (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then
2158                         v.result_class := INFINITY;
2159                         v.fpscr(FPSCR_FR) := '1';
2160                     else
2161                         v.fpscr(FPSCR_FR) := '0';
2162                     end if;
2163                     -- construct largest representable number
2164                     v.result_exp := max_exp;
2165                     opsel_r <= RES_MISC;
2166                     misc_sel <= "001" & r.single_prec;
2167                     arith_done := '1';
2168                 else
2169                     -- enabled overflow exception
2170                     v.result_exp := r.result_exp - bias_exp;
2171                     v.shift := to_signed(-2, EXP_BITS);
2172                     v.state := ROUNDING;
2173                 end if;
2174
2175             when ROUNDING =>
2176                 opsel_amask <= '1';
2177                 round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
2178                 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2179                 if round(1) = '1' then
2180                     -- set mask to increment the LSB for the precision
2181                     opsel_b <= BIN_MASK;
2182                     carry_in <= '1';
2183                     v.shift := to_signed(-1, EXP_BITS);
2184                     v.state := ROUNDING_2;
2185                 else
2186                     if r.r(54) = '0' then
2187                         -- result after masking could be zero, or could be a
2188                         -- denormalized result that needs to be renormalized
2189                         renormalize := '1';
2190                         v.state := ROUNDING_3;
2191                     else
2192                         arith_done := '1';
2193                     end if;
2194                 end if;
2195                 if round(0) = '1' then
2196                     v.fpscr(FPSCR_XX) := '1';
2197                     if r.tiny = '1' then
2198                         v.fpscr(FPSCR_UX) := '1';
2199                     end if;
2200                 end if;
2201
2202             when ROUNDING_2 =>
2203                 -- Check for overflow during rounding
2204                 -- r.shift = -1
2205                 v.x := '0';
2206                 if r.r(55) = '1' then
2207                     opsel_r <= RES_SHIFT;
2208                     if exp_huge = '1' then
2209                         v.state := ROUND_OFLOW;
2210                     else
2211                         arith_done := '1';
2212                     end if;
2213                 elsif r.r(54) = '0' then
2214                     -- Do CLZ so we can renormalize the result
2215                     renormalize := '1';
2216                     v.state := ROUNDING_3;
2217                 else
2218                     arith_done := '1';
2219                 end if;
2220
2221             when ROUNDING_3 =>
2222                 -- r.shift = clz(r.r) - 9
2223                 mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
2224                 if mant_nz = '0' then
2225                     v.result_class := ZERO;
2226                     if r.is_subtract = '1' then
2227                         -- set result sign depending on rounding mode
2228                         v.result_sign := r.round_mode(1) and r.round_mode(0);
2229                     end if;
2230                     arith_done := '1';
2231                 else
2232                     -- Renormalize result after rounding
2233                     opsel_r <= RES_SHIFT;
2234                     v.denorm := exp_tiny;
2235                     v.shift := new_exp - to_signed(-1022, EXP_BITS);
2236                     if new_exp < to_signed(-1022, EXP_BITS) then
2237                         v.state := DENORM;
2238                     else
2239                         arith_done := '1';
2240                     end if;
2241                 end if;
2242
2243             when DENORM =>
2244                 -- r.shift = result_exp - -1022
2245                 opsel_r <= RES_SHIFT;
2246                 arith_done := '1';
2247
2248             when NAN_RESULT =>
2249                 if (r.use_a = '1' and r.a.class = NAN and r.a.mantissa(53) = '0') or
2250                     (r.use_b = '1' and r.b.class = NAN and r.b.mantissa(53) = '0') or
2251                     (r.use_c = '1' and r.c.class = NAN and r.c.mantissa(53) = '0') then
2252                     -- Signalling NAN
2253                     v.fpscr(FPSCR_VXSNAN) := '1';
2254                     invalid := '1';
2255                 end if;
2256                 if r.use_a = '1' and r.a.class = NAN then
2257                     v.opsel_a := AIN_A;
2258                 elsif r.use_b = '1' and r.b.class = NAN then
2259                     v.opsel_a := AIN_B;
2260                 elsif r.use_c = '1' and r.c.class = NAN then
2261                     v.opsel_a := AIN_C;
2262                 end if;
2263                 v.state := EXC_RESULT;
2264
2265             when EXC_RESULT =>
2266                 -- r.opsel_a = AIN_A, AIN_B or AIN_C according to which input is the result
2267                 case r.opsel_a is
2268                     when AIN_B =>
2269                         v.result_sign := r.b.negative xor r.negate;
2270                         v.result_exp := r.b.exponent;
2271                         v.result_class := r.b.class;
2272                     when AIN_C =>
2273                         v.result_sign := r.c.negative xor r.negate;
2274                         v.result_exp := r.c.exponent;
2275                         v.result_class := r.c.class;
2276                     when others =>
2277                         v.result_sign := r.a.negative xor r.negate;
2278                         v.result_exp := r.a.exponent;
2279                         v.result_class := r.a.class;
2280                 end case;
2281                 arith_done := '1';
2282
2283         end case;
2284
2285         if zero_divide = '1' then
2286             v.fpscr(FPSCR_ZX) := '1';
2287         end if;
2288         if qnan_result = '1' then
2289             invalid := '1';
2290             v.result_class := NAN;
2291             v.result_sign := '0';
2292             misc_sel <= "0001";
2293             opsel_r <= RES_MISC;
2294             arith_done := '1';
2295         end if;
2296         if invalid = '1' then
2297             v.invalid := '1';
2298         end if;
2299         if arith_done = '1' then
2300             -- Enabled invalid exception doesn't write result or FPRF
2301             -- Neither does enabled zero-divide exception
2302             if (v.invalid and r.fpscr(FPSCR_VE)) = '0' and
2303                 (zero_divide and r.fpscr(FPSCR_ZE)) = '0' then
2304                 v.writing_back := '1';
2305                 v.update_fprf := '1';
2306             end if;
2307             v.instr_done := '1';
2308             v.state := IDLE;
2309             update_fx := '1';
2310         end if;
2311
2312         -- Multiplier and divide/square root data path
2313         case msel_1 is
2314             when MUL1_A =>
2315                 f_to_multiply.data1 <= r.a.mantissa(61 downto 0) & "00";
2316             when MUL1_B =>
2317                 f_to_multiply.data1 <= r.b.mantissa(61 downto 0) & "00";
2318             when MUL1_Y =>
2319                 f_to_multiply.data1 <= r.y;
2320             when others =>
2321                 f_to_multiply.data1 <= r.r(61 downto 0) & "00";
2322         end case;
2323         case msel_2 is
2324             when MUL2_C =>
2325                 f_to_multiply.data2 <= r.c.mantissa(61 downto 0) & "00";
2326             when MUL2_LUT =>
2327                 f_to_multiply.data2 <= x"00" & inverse_est & '0' & x"000000000";
2328             when MUL2_P =>
2329                 f_to_multiply.data2 <= r.p;
2330             when others =>
2331                 f_to_multiply.data2 <= r.r(61 downto 0) & "00";
2332         end case;
2333         maddend := (others => '0');
2334         case msel_add is
2335             when MULADD_CONST =>
2336                 -- addend is 2.0 or 1.5 in 16.112 format
2337                 if r.is_sqrt = '0' then
2338                     maddend(113) := '1';                -- 2.0
2339                 else
2340                     maddend(112 downto 111) := "11";    -- 1.5
2341                 end if;
2342             when MULADD_A =>
2343                 -- addend is A in 16.112 format
2344                 maddend(121 downto 58) := r.a.mantissa;
2345             when MULADD_RS =>
2346                 -- addend is concatenation of R and S in 16.112 format
2347                 maddend := "000000" & r.r & r.s & "00";
2348             when others =>
2349         end case;
2350         if msel_inv = '1' then
2351             f_to_multiply.addend <= not maddend;
2352         else
2353             f_to_multiply.addend <= maddend;
2354         end if;
2355         f_to_multiply.not_result <= msel_inv;
2356         if set_y = '1' then
2357             v.y := f_to_multiply.data2;
2358         end if;
2359         if multiply_to_f.valid = '1' then
2360             if pshift = '0' then
2361                 v.p := multiply_to_f.result(63 downto 0);
2362             else
2363                 v.p := multiply_to_f.result(119 downto 56);
2364             end if;
2365         end if;
2366
2367         -- Data path.
2368         -- This has A and B input multiplexers, an adder, a shifter,
2369         -- count-leading-zeroes logic, and a result mux.
2370         if longmask = '1' then
2371             mshift := r.shift + to_signed(-29, EXP_BITS);
2372         else
2373             mshift := r.shift;
2374         end if;
2375         if mshift < to_signed(-64, EXP_BITS) then
2376             mask := (others => '1');
2377         elsif mshift >= to_signed(0, EXP_BITS) then
2378             mask := (others => '0');
2379         else
2380             mask := right_mask(unsigned(mshift(5 downto 0)));
2381         end if;
2382         case r.opsel_a is
2383             when AIN_R =>
2384                 in_a0 := r.r;
2385             when AIN_A =>
2386                 in_a0 := r.a.mantissa;
2387             when AIN_B =>
2388                 in_a0 := r.b.mantissa;
2389             when others =>
2390                 in_a0 := r.c.mantissa;
2391         end case;
2392         if (or (mask and in_a0)) = '1' and set_x = '1' then
2393             v.x := '1';
2394         end if;
2395         if opsel_ainv = '1' then
2396             in_a0 := not in_a0;
2397         end if;
2398         if opsel_amask = '1' then
2399             in_a0 := in_a0 and not mask;
2400         end if;
2401         in_a <= in_a0;
2402         case opsel_b is
2403             when BIN_ZERO =>
2404                 in_b0 := (others => '0');
2405             when BIN_R =>
2406                 in_b0 := r.r;
2407             when BIN_MASK =>
2408                 in_b0 := mask;
2409             when others =>
2410                 -- BIN_PS6, 6 LSBs of P/4 sign-extended to 64
2411                 in_b0 := std_ulogic_vector(resize(signed(r.p(7 downto 2)), 64));
2412         end case;
2413         if opsel_binv = '1' then
2414             in_b0 := not in_b0;
2415         end if;
2416         in_b <= in_b0;
2417         if r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
2418             shift_res := shifter_64(r.r & (shiftin or r.s(55)) & r.s(54 downto 0),
2419                                     std_ulogic_vector(r.shift(6 downto 0)));
2420         else
2421             shift_res := (others => '0');
2422         end if;
2423         case opsel_r is
2424             when RES_SUM =>
2425                 result <= std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
2426             when RES_SHIFT =>
2427                 result <= shift_res;
2428             when RES_MULT =>
2429                 result <= multiply_to_f.result(121 downto 58);
2430             when others =>
2431                 case misc_sel is
2432                     when "0000" =>
2433                         misc := x"00000000" & (r.fpscr and fpscr_mask);
2434                     when "0001" =>
2435                         -- generated QNaN mantissa
2436                         misc := x"0020000000000000";
2437                     when "0010" =>
2438                         -- mantissa of max representable DP number
2439                         misc := x"007ffffffffffffc";
2440                     when "0011" =>
2441                         -- mantissa of max representable SP number
2442                         misc := x"007fffff80000000";
2443                     when "0100" =>
2444                         -- fmrgow result
2445                         misc := r.a.mantissa(31 downto 0) & r.b.mantissa(31 downto 0);
2446                     when "0110" =>
2447                         -- fmrgew result
2448                         misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32);
2449                     when "0111" =>
2450                         misc := 10x"000" & inverse_est & 35x"000000000";
2451                     when "1000" =>
2452                         -- max positive result for fctiw[z]
2453                         misc := x"000000007fffffff";
2454                     when "1001" =>
2455                         -- max negative result for fctiw[z]
2456                         misc := x"ffffffff80000000";
2457                     when "1010" =>
2458                         -- max positive result for fctiwu[z]
2459                         misc := x"00000000ffffffff";
2460                     when "1011" =>
2461                         -- max negative result for fctiwu[z]
2462                         misc := x"0000000000000000";
2463                     when "1100" =>
2464                         -- max positive result for fctid[z]
2465                         misc := x"7fffffffffffffff";
2466                     when "1101" =>
2467                         -- max negative result for fctid[z]
2468                         misc := x"8000000000000000";
2469                     when "1110" =>
2470                         -- max positive result for fctidu[z]
2471                         misc := x"ffffffffffffffff";
2472                     when "1111" =>
2473                         -- max negative result for fctidu[z]
2474                         misc := x"0000000000000000";
2475                     when others =>
2476                         misc := x"0000000000000000";
2477                 end case;
2478                 result <= misc;
2479         end case;
2480         v.r := result;
2481         if set_s = '1' then
2482             case opsel_s is
2483                 when S_NEG =>
2484                     v.s := std_ulogic_vector(unsigned(not r.s) + (not r.x));
2485                 when S_MULT =>
2486                     v.s := multiply_to_f.result(57 downto 2);
2487                 when S_SHIFT =>
2488                     v.s := shift_res(63 downto 8);
2489                     if shift_res(7 downto 0) /= x"00" then
2490                         v.x := '1';
2491                     end if;
2492                 when others =>
2493                     v.s := (others => '0');
2494             end case;
2495         end if;
2496
2497         if set_a = '1' then
2498             v.a.exponent := new_exp;
2499             v.a.mantissa := shift_res;
2500         end if;
2501         if set_b = '1' then
2502             v.b.exponent := new_exp;
2503             v.b.mantissa := shift_res;
2504         end if;
2505         if set_c = '1' then
2506             v.c.exponent := new_exp;
2507             v.c.mantissa := shift_res;
2508         end if;
2509
2510         if opsel_r = RES_SHIFT then
2511             v.result_exp := new_exp;
2512         end if;
2513
2514         if renormalize = '1' then
2515             clz := count_left_zeroes(r.r);
2516             if renorm_sqrt = '1' then
2517                 -- make denormalized value end up with even exponent
2518                 clz(0) := '1';
2519             end if;
2520             v.shift := resize(signed('0' & clz) - 9, EXP_BITS);
2521         end if;
2522
2523         if r.int_result = '1' then
2524             fp_result <= r.r;
2525         else
2526             fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r,
2527                                  r.single_prec, r.quieten_nan);
2528         end if;
2529         if r.update_fprf = '1' then
2530             v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class,
2531                                                              r.r(54) and not r.denorm);
2532         end if;
2533
2534         v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
2535                              (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
2536         v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
2537                                   v.fpscr(FPSCR_VE downto FPSCR_XE));
2538         if update_fx = '1' and
2539             (v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then
2540             v.fpscr(FPSCR_FX) := '1';
2541         end if;
2542         if r.rc = '1' then
2543             v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
2544         end if;
2545
2546         if illegal = '1' then
2547             v.instr_done := '0';
2548             v.do_intr := '0';
2549             v.writing_back := '0';
2550             v.busy := '0';
2551             v.state := IDLE;
2552         else
2553             v.do_intr := v.instr_done and v.fpscr(FPSCR_FEX) and r.fe_mode;
2554             if v.state /= IDLE or v.do_intr = '1' then
2555                 v.busy := '1';
2556             end if;
2557         end if;
2558
2559         rin <= v;
2560         e_out.illegal <= illegal;
2561     end process;
2562
2563 end architecture behaviour;