fpu.vhdl

   1 -- Floating-point unit for Microwatt
   2
   3 library ieee;
   4 use ieee.std_logic_1164.all;
   5 use ieee.numeric_std.all;
   6
   7 library work;
   8 use work.insn_helpers.all;
   9 use work.decode_types.all;
  10 use work.crhelpers.all;
  11 use work.helpers.all;
  12 use work.common.all;
  13
  14 entity fpu is
  15     port (
  16         clk : in std_ulogic;
  17         rst : in std_ulogic;
  18
  19         e_in  : in  Execute1toFPUType;
  20         e_out : out FPUToExecute1Type;
  21
  22         w_out : out FPUToWritebackType
  23         );
  24 end entity fpu;
  25
  26 architecture behaviour of fpu is
  27     type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
  28
  29     constant EXP_BITS : natural := 13;
  30
  31     type fpu_reg_type is record
  32         class    : fp_number_class;
  33         negative : std_ulogic;
  34         exponent : signed(EXP_BITS-1 downto 0);         -- unbiased
  35         mantissa : std_ulogic_vector(63 downto 0);      -- 10.54 format
  36     end record;
  37
  38     type state_t is (IDLE,
  39                      DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
  40                      DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT,
  41                      DO_FCFID, DO_FCTI,
  42                      DO_FRSP, DO_FRI,
  43                      DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD,
  44                      DO_FRE, DO_FRSQRTE,
  45                      DO_FSEL,
  46                      FRI_1,
  47                      ADD_1, ADD_SHIFT, ADD_2, ADD_3,
  48                      CMP_1, CMP_2,
  49                      MULT_1,
  50                      FMADD_1, FMADD_2, FMADD_3,
  51                      FMADD_4, FMADD_5, FMADD_6,
  52                      LOOKUP,
  53                      DIV_2, DIV_3, DIV_4, DIV_5, DIV_6,
  54                      FRE_1,
  55                      RSQRT_1,
  56                      FTDIV_1,
  57                      SQRT_1, SQRT_2, SQRT_3, SQRT_4,
  58                      SQRT_5, SQRT_6, SQRT_7, SQRT_8,
  59                      SQRT_9, SQRT_10, SQRT_11, SQRT_12,
  60                      INT_SHIFT, INT_ROUND, INT_ISHIFT,
  61                      INT_FINAL, INT_CHECK, INT_OFLOW,
  62                      FINISH, NORMALIZE,
  63                      ROUND_UFLOW, ROUND_OFLOW,
  64                      ROUNDING, ROUNDING_2, ROUNDING_3,
  65                      DENORM,
  66                      RENORM_A, RENORM_A2,
  67                      RENORM_B, RENORM_B2,
  68                      RENORM_C, RENORM_C2,
  69                      NAN_RESULT, EXC_RESULT);
  70
  71     type reg_type is record
  72         state        : state_t;
  73         busy         : std_ulogic;
  74         instr_done   : std_ulogic;
  75         do_intr      : std_ulogic;
  76         op           : insn_type_t;
  77         insn         : std_ulogic_vector(31 downto 0);
  78         dest_fpr     : gspr_index_t;
  79         fe_mode      : std_ulogic;
  80         rc           : std_ulogic;
  81         is_cmp       : std_ulogic;
  82         single_prec  : std_ulogic;
  83         fpscr        : std_ulogic_vector(31 downto 0);
  84         a            : fpu_reg_type;
  85         b            : fpu_reg_type;
  86         c            : fpu_reg_type;
  87         r            : std_ulogic_vector(63 downto 0);  -- 10.54 format
  88         s            : std_ulogic_vector(55 downto 0);  -- extended fraction
  89         x            : std_ulogic;
  90         p            : std_ulogic_vector(63 downto 0);  -- 8.56 format
  91         y            : std_ulogic_vector(63 downto 0);  -- 8.56 format
  92         result_sign  : std_ulogic;
  93         result_class : fp_number_class;
  94         result_exp   : signed(EXP_BITS-1 downto 0);
  95         shift        : signed(EXP_BITS-1 downto 0);
  96         writing_back : std_ulogic;
  97         int_result   : std_ulogic;
  98         cr_result    : std_ulogic_vector(3 downto 0);
  99         cr_mask      : std_ulogic_vector(7 downto 0);
 100         old_exc      : std_ulogic_vector(4 downto 0);
 101         update_fprf  : std_ulogic;
 102         quieten_nan  : std_ulogic;
 103         tiny         : std_ulogic;
 104         denorm       : std_ulogic;
 105         round_mode   : std_ulogic_vector(2 downto 0);
 106         is_subtract  : std_ulogic;
 107         exp_cmp      : std_ulogic;
 108         madd_cmp     : std_ulogic;
 109         add_bsmall   : std_ulogic;
 110         is_multiply  : std_ulogic;
 111         is_sqrt      : std_ulogic;
 112         first        : std_ulogic;
 113         count        : unsigned(1 downto 0);
 114         doing_ftdiv  : std_ulogic_vector(1 downto 0);
 115         opsel_a      : std_ulogic_vector(1 downto 0);
 116         use_a        : std_ulogic;
 117         use_b        : std_ulogic;
 118         use_c        : std_ulogic;
 119         invalid      : std_ulogic;
 120         negate       : std_ulogic;
 121         longmask     : std_ulogic;
 122     end record;
 123
 124     type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
 125
 126     signal r, rin : reg_type;
 127
 128     signal fp_result     : std_ulogic_vector(63 downto 0);
 129     signal opsel_b       : std_ulogic_vector(1 downto 0);
 130     signal opsel_r       : std_ulogic_vector(1 downto 0);
 131     signal opsel_s       : std_ulogic_vector(1 downto 0);
 132     signal opsel_ainv    : std_ulogic;
 133     signal opsel_mask    : std_ulogic;
 134     signal opsel_binv    : std_ulogic;
 135     signal in_a          : std_ulogic_vector(63 downto 0);
 136     signal in_b          : std_ulogic_vector(63 downto 0);
 137     signal result        : std_ulogic_vector(63 downto 0);
 138     signal carry_in      : std_ulogic;
 139     signal lost_bits     : std_ulogic;
 140     signal r_hi_nz       : std_ulogic;
 141     signal r_lo_nz       : std_ulogic;
 142     signal s_nz          : std_ulogic;
 143     signal misc_sel      : std_ulogic_vector(3 downto 0);
 144     signal f_to_multiply : MultiplyInputType;
 145     signal multiply_to_f : MultiplyOutputType;
 146     signal msel_1        : std_ulogic_vector(1 downto 0);
 147     signal msel_2        : std_ulogic_vector(1 downto 0);
 148     signal msel_add      : std_ulogic_vector(1 downto 0);
 149     signal msel_inv      : std_ulogic;
 150     signal inverse_est   : std_ulogic_vector(18 downto 0);
 151
 152     -- opsel values
 153     constant AIN_R    : std_ulogic_vector(1 downto 0) := "00";
 154     constant AIN_A    : std_ulogic_vector(1 downto 0) := "01";
 155     constant AIN_B    : std_ulogic_vector(1 downto 0) := "10";
 156     constant AIN_C    : std_ulogic_vector(1 downto 0) := "11";
 157
 158     constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
 159     constant BIN_R    : std_ulogic_vector(1 downto 0) := "01";
 160     constant BIN_MASK : std_ulogic_vector(1 downto 0) := "10";
 161     constant BIN_PS6  : std_ulogic_vector(1 downto 0) := "11";
 162
 163     constant RES_SUM   : std_ulogic_vector(1 downto 0) := "00";
 164     constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
 165     constant RES_MULT  : std_ulogic_vector(1 downto 0) := "10";
 166     constant RES_MISC  : std_ulogic_vector(1 downto 0) := "11";
 167
 168     constant S_ZERO  : std_ulogic_vector(1 downto 0) := "00";
 169     constant S_NEG   : std_ulogic_vector(1 downto 0) := "01";
 170     constant S_SHIFT : std_ulogic_vector(1 downto 0) := "10";
 171     constant S_MULT  : std_ulogic_vector(1 downto 0) := "11";
 172
 173     -- msel values
 174     constant MUL1_A : std_ulogic_vector(1 downto 0) := "00";
 175     constant MUL1_B : std_ulogic_vector(1 downto 0) := "01";
 176     constant MUL1_Y : std_ulogic_vector(1 downto 0) := "10";
 177     constant MUL1_R : std_ulogic_vector(1 downto 0) := "11";
 178
 179     constant MUL2_C   : std_ulogic_vector(1 downto 0) := "00";
 180     constant MUL2_LUT : std_ulogic_vector(1 downto 0) := "01";
 181     constant MUL2_P   : std_ulogic_vector(1 downto 0) := "10";
 182     constant MUL2_R   : std_ulogic_vector(1 downto 0) := "11";
 183
 184     constant MULADD_ZERO  : std_ulogic_vector(1 downto 0) := "00";
 185     constant MULADD_CONST : std_ulogic_vector(1 downto 0) := "01";
 186     constant MULADD_A     : std_ulogic_vector(1 downto 0) := "10";
 187     constant MULADD_RS    : std_ulogic_vector(1 downto 0) := "11";
 188
 189     -- Inverse lookup table, indexed by the top 8 fraction bits
 190     -- The first 256 entries are the reciprocal (1/x) lookup table,
 191     -- and the remaining 768 entries are the reciprocal square root table.
 192     -- Output range is [0.5, 1) in 0.19 format, though the top
 193     -- bit isn't stored since it is always 1.
 194     -- Each output value is the inverse of the center of the input
 195     -- range for the value, i.e. entry 0 is 1 / (1 + 1/512),
 196     -- entry 1 is 1 / (1 + 3/512), etc.
 197     signal inverse_table : lookup_table := (
 198         -- 1/x lookup table
 199         -- Unit bit is assumed to be 1, so input range is [1, 2)
 200         18x"3fc01", 18x"3f411", 18x"3ec31", 18x"3e460", 18x"3dc9f", 18x"3d4ec", 18x"3cd49", 18x"3c5b5",
 201         18x"3be2f", 18x"3b6b8", 18x"3af4f", 18x"3a7f4", 18x"3a0a7", 18x"39968", 18x"39237", 18x"38b14",
 202         18x"383fe", 18x"37cf5", 18x"375f9", 18x"36f0a", 18x"36828", 18x"36153", 18x"35a8a", 18x"353ce",
 203         18x"34d1e", 18x"3467a", 18x"33fe3", 18x"33957", 18x"332d7", 18x"32c62", 18x"325f9", 18x"31f9c",
 204         18x"3194a", 18x"31303", 18x"30cc7", 18x"30696", 18x"30070", 18x"2fa54", 18x"2f443", 18x"2ee3d",
 205         18x"2e841", 18x"2e250", 18x"2dc68", 18x"2d68b", 18x"2d0b8", 18x"2caee", 18x"2c52e", 18x"2bf79",
 206         18x"2b9cc", 18x"2b429", 18x"2ae90", 18x"2a900", 18x"2a379", 18x"29dfb", 18x"29887", 18x"2931b",
 207         18x"28db8", 18x"2885e", 18x"2830d", 18x"27dc4", 18x"27884", 18x"2734d", 18x"26e1d", 18x"268f6",
 208         18x"263d8", 18x"25ec1", 18x"259b3", 18x"254ac", 18x"24fad", 18x"24ab7", 18x"245c8", 18x"240e1",
 209         18x"23c01", 18x"23729", 18x"23259", 18x"22d90", 18x"228ce", 18x"22413", 18x"21f60", 18x"21ab4",
 210         18x"2160f", 18x"21172", 18x"20cdb", 18x"2084b", 18x"203c2", 18x"1ff40", 18x"1fac4", 18x"1f64f",
 211         18x"1f1e1", 18x"1ed79", 18x"1e918", 18x"1e4be", 18x"1e069", 18x"1dc1b", 18x"1d7d4", 18x"1d392",
 212         18x"1cf57", 18x"1cb22", 18x"1c6f3", 18x"1c2ca", 18x"1bea7", 18x"1ba8a", 18x"1b672", 18x"1b261",
 213         18x"1ae55", 18x"1aa50", 18x"1a64f", 18x"1a255", 18x"19e60", 18x"19a70", 18x"19686", 18x"192a2",
 214         18x"18ec3", 18x"18ae9", 18x"18715", 18x"18345", 18x"17f7c", 18x"17bb7", 18x"177f7", 18x"1743d",
 215         18x"17087", 18x"16cd7", 18x"1692c", 18x"16585", 18x"161e4", 18x"15e47", 18x"15ab0", 18x"1571d",
 216         18x"1538e", 18x"15005", 18x"14c80", 18x"14900", 18x"14584", 18x"1420d", 18x"13e9b", 18x"13b2d",
 217         18x"137c3", 18x"1345e", 18x"130fe", 18x"12da2", 18x"12a4a", 18x"126f6", 18x"123a7", 18x"1205c",
 218         18x"11d15", 18x"119d2", 18x"11694", 18x"11359", 18x"11023", 18x"10cf1", 18x"109c2", 18x"10698",
 219         18x"10372", 18x"10050", 18x"0fd31", 18x"0fa17", 18x"0f700", 18x"0f3ed", 18x"0f0de", 18x"0edd3",
 220         18x"0eacb", 18x"0e7c7", 18x"0e4c7", 18x"0e1ca", 18x"0ded2", 18x"0dbdc", 18x"0d8eb", 18x"0d5fc",
 221         18x"0d312", 18x"0d02b", 18x"0cd47", 18x"0ca67", 18x"0c78a", 18x"0c4b1", 18x"0c1db", 18x"0bf09",
 222         18x"0bc3a", 18x"0b96e", 18x"0b6a5", 18x"0b3e0", 18x"0b11e", 18x"0ae5f", 18x"0aba3", 18x"0a8eb",
 223         18x"0a636", 18x"0a383", 18x"0a0d4", 18x"09e28", 18x"09b80", 18x"098da", 18x"09637", 18x"09397",
 224         18x"090fb", 18x"08e61", 18x"08bca", 18x"08936", 18x"086a5", 18x"08417", 18x"0818c", 18x"07f04",
 225         18x"07c7e", 18x"079fc", 18x"0777c", 18x"074ff", 18x"07284", 18x"0700d", 18x"06d98", 18x"06b26",
 226         18x"068b6", 18x"0664a", 18x"063e0", 18x"06178", 18x"05f13", 18x"05cb1", 18x"05a52", 18x"057f5",
 227         18x"0559a", 18x"05342", 18x"050ed", 18x"04e9a", 18x"04c4a", 18x"049fc", 18x"047b0", 18x"04567",
 228         18x"04321", 18x"040dd", 18x"03e9b", 18x"03c5c", 18x"03a1f", 18x"037e4", 18x"035ac", 18x"03376",
 229         18x"03142", 18x"02f11", 18x"02ce2", 18x"02ab5", 18x"0288b", 18x"02663", 18x"0243d", 18x"02219",
 230         18x"01ff7", 18x"01dd8", 18x"01bbb", 18x"019a0", 18x"01787", 18x"01570", 18x"0135b", 18x"01149",
 231         18x"00f39", 18x"00d2a", 18x"00b1e", 18x"00914", 18x"0070c", 18x"00506", 18x"00302", 18x"00100",
 232         -- 1/sqrt(x) lookup table
 233         -- Input is in the range [1, 4), i.e. two bits to the left of the
 234         -- binary point.  Those 2 bits index the following 3 blocks of 256 values.
 235         -- 1.0 ... 1.9999
 236         18x"3fe00", 18x"3fa06", 18x"3f612", 18x"3f224", 18x"3ee3a", 18x"3ea58", 18x"3e67c", 18x"3e2a4",
 237         18x"3ded2", 18x"3db06", 18x"3d73e", 18x"3d37e", 18x"3cfc2", 18x"3cc0a", 18x"3c85a", 18x"3c4ae",
 238         18x"3c106", 18x"3bd64", 18x"3b9c8", 18x"3b630", 18x"3b29e", 18x"3af10", 18x"3ab86", 18x"3a802",
 239         18x"3a484", 18x"3a108", 18x"39d94", 18x"39a22", 18x"396b6", 18x"3934e", 18x"38fea", 18x"38c8c",
 240         18x"38932", 18x"385dc", 18x"3828a", 18x"37f3e", 18x"37bf6", 18x"378b2", 18x"37572", 18x"37236",
 241         18x"36efe", 18x"36bca", 18x"3689a", 18x"36570", 18x"36248", 18x"35f26", 18x"35c06", 18x"358ea",
 242         18x"355d4", 18x"352c0", 18x"34fb0", 18x"34ca4", 18x"3499c", 18x"34698", 18x"34398", 18x"3409c",
 243         18x"33da2", 18x"33aac", 18x"337bc", 18x"334cc", 18x"331e2", 18x"32efc", 18x"32c18", 18x"32938",
 244         18x"3265a", 18x"32382", 18x"320ac", 18x"31dd8", 18x"31b0a", 18x"3183e", 18x"31576", 18x"312b0",
 245         18x"30fee", 18x"30d2e", 18x"30a74", 18x"307ba", 18x"30506", 18x"30254", 18x"2ffa4", 18x"2fcf8",
 246         18x"2fa4e", 18x"2f7a8", 18x"2f506", 18x"2f266", 18x"2efca", 18x"2ed2e", 18x"2ea98", 18x"2e804",
 247         18x"2e572", 18x"2e2e4", 18x"2e058", 18x"2ddce", 18x"2db48", 18x"2d8c6", 18x"2d646", 18x"2d3c8",
 248         18x"2d14c", 18x"2ced4", 18x"2cc5e", 18x"2c9ea", 18x"2c77a", 18x"2c50c", 18x"2c2a2", 18x"2c038",
 249         18x"2bdd2", 18x"2bb70", 18x"2b90e", 18x"2b6b0", 18x"2b454", 18x"2b1fa", 18x"2afa4", 18x"2ad4e",
 250         18x"2aafc", 18x"2a8ac", 18x"2a660", 18x"2a414", 18x"2a1cc", 18x"29f86", 18x"29d42", 18x"29b00",
 251         18x"298c2", 18x"29684", 18x"2944a", 18x"29210", 18x"28fda", 18x"28da6", 18x"28b74", 18x"28946",
 252         18x"28718", 18x"284ec", 18x"282c4", 18x"2809c", 18x"27e78", 18x"27c56", 18x"27a34", 18x"27816",
 253         18x"275fa", 18x"273e0", 18x"271c8", 18x"26fb0", 18x"26d9c", 18x"26b8a", 18x"2697a", 18x"2676c",
 254         18x"26560", 18x"26356", 18x"2614c", 18x"25f46", 18x"25d42", 18x"25b40", 18x"2593e", 18x"25740",
 255         18x"25542", 18x"25348", 18x"2514e", 18x"24f58", 18x"24d62", 18x"24b6e", 18x"2497c", 18x"2478c",
 256         18x"2459e", 18x"243b0", 18x"241c6", 18x"23fde", 18x"23df6", 18x"23c10", 18x"23a2c", 18x"2384a",
 257         18x"2366a", 18x"2348c", 18x"232ae", 18x"230d2", 18x"22efa", 18x"22d20", 18x"22b4a", 18x"22976",
 258         18x"227a2", 18x"225d2", 18x"22402", 18x"22234", 18x"22066", 18x"21e9c", 18x"21cd2", 18x"21b0a",
 259         18x"21944", 18x"2177e", 18x"215ba", 18x"213fa", 18x"21238", 18x"2107a", 18x"20ebc", 18x"20d00",
 260         18x"20b46", 18x"2098e", 18x"207d6", 18x"20620", 18x"2046c", 18x"202b8", 18x"20108", 18x"1ff58",
 261         18x"1fda8", 18x"1fbfc", 18x"1fa50", 18x"1f8a4", 18x"1f6fc", 18x"1f554", 18x"1f3ae", 18x"1f208",
 262         18x"1f064", 18x"1eec2", 18x"1ed22", 18x"1eb82", 18x"1e9e4", 18x"1e846", 18x"1e6aa", 18x"1e510",
 263         18x"1e378", 18x"1e1e0", 18x"1e04a", 18x"1deb4", 18x"1dd20", 18x"1db8e", 18x"1d9fc", 18x"1d86c",
 264         18x"1d6de", 18x"1d550", 18x"1d3c4", 18x"1d238", 18x"1d0ae", 18x"1cf26", 18x"1cd9e", 18x"1cc18",
 265         18x"1ca94", 18x"1c910", 18x"1c78c", 18x"1c60a", 18x"1c48a", 18x"1c30c", 18x"1c18e", 18x"1c010",
 266         18x"1be94", 18x"1bd1a", 18x"1bba0", 18x"1ba28", 18x"1b8b2", 18x"1b73c", 18x"1b5c6", 18x"1b452",
 267         18x"1b2e0", 18x"1b16e", 18x"1affe", 18x"1ae8e", 18x"1ad20", 18x"1abb4", 18x"1aa46", 18x"1a8dc",
 268         -- 2.0 ... 2.9999
 269         18x"1a772", 18x"1a608", 18x"1a4a0", 18x"1a33a", 18x"1a1d4", 18x"1a070", 18x"19f0c", 18x"19da8",
 270         18x"19c48", 18x"19ae6", 18x"19986", 18x"19828", 18x"196ca", 18x"1956e", 18x"19412", 18x"192b8",
 271         18x"1915e", 18x"19004", 18x"18eae", 18x"18d56", 18x"18c00", 18x"18aac", 18x"18958", 18x"18804",
 272         18x"186b2", 18x"18562", 18x"18412", 18x"182c2", 18x"18174", 18x"18026", 18x"17eda", 18x"17d8e",
 273         18x"17c44", 18x"17afa", 18x"179b2", 18x"1786a", 18x"17724", 18x"175de", 18x"17498", 18x"17354",
 274         18x"17210", 18x"170ce", 18x"16f8c", 18x"16e4c", 18x"16d0c", 18x"16bcc", 18x"16a8e", 18x"16950",
 275         18x"16814", 18x"166d8", 18x"1659e", 18x"16464", 18x"1632a", 18x"161f2", 18x"160ba", 18x"15f84",
 276         18x"15e4e", 18x"15d1a", 18x"15be6", 18x"15ab2", 18x"15980", 18x"1584e", 18x"1571c", 18x"155ec",
 277         18x"154bc", 18x"1538e", 18x"15260", 18x"15134", 18x"15006", 18x"14edc", 18x"14db0", 18x"14c86",
 278         18x"14b5e", 18x"14a36", 18x"1490e", 18x"147e6", 18x"146c0", 18x"1459a", 18x"14476", 18x"14352",
 279         18x"14230", 18x"1410c", 18x"13fea", 18x"13eca", 18x"13daa", 18x"13c8a", 18x"13b6c", 18x"13a4e",
 280         18x"13930", 18x"13814", 18x"136f8", 18x"135dc", 18x"134c2", 18x"133a8", 18x"1328e", 18x"13176",
 281         18x"1305e", 18x"12f48", 18x"12e30", 18x"12d1a", 18x"12c06", 18x"12af2", 18x"129de", 18x"128ca",
 282         18x"127b8", 18x"126a6", 18x"12596", 18x"12486", 18x"12376", 18x"12266", 18x"12158", 18x"1204a",
 283         18x"11f3e", 18x"11e32", 18x"11d26", 18x"11c1a", 18x"11b10", 18x"11a06", 18x"118fc", 18x"117f4",
 284         18x"116ec", 18x"115e4", 18x"114de", 18x"113d8", 18x"112d2", 18x"111ce", 18x"110ca", 18x"10fc6",
 285         18x"10ec2", 18x"10dc0", 18x"10cbe", 18x"10bbc", 18x"10abc", 18x"109bc", 18x"108bc", 18x"107be",
 286         18x"106c0", 18x"105c2", 18x"104c4", 18x"103c8", 18x"102cc", 18x"101d0", 18x"100d6", 18x"0ffdc",
 287         18x"0fee2", 18x"0fdea", 18x"0fcf0", 18x"0fbf8", 18x"0fb02", 18x"0fa0a", 18x"0f914", 18x"0f81e",
 288         18x"0f72a", 18x"0f636", 18x"0f542", 18x"0f44e", 18x"0f35a", 18x"0f268", 18x"0f176", 18x"0f086",
 289         18x"0ef94", 18x"0eea4", 18x"0edb4", 18x"0ecc6", 18x"0ebd6", 18x"0eae8", 18x"0e9fa", 18x"0e90e",
 290         18x"0e822", 18x"0e736", 18x"0e64a", 18x"0e55e", 18x"0e474", 18x"0e38a", 18x"0e2a0", 18x"0e1b8",
 291         18x"0e0d0", 18x"0dfe8", 18x"0df00", 18x"0de1a", 18x"0dd32", 18x"0dc4c", 18x"0db68", 18x"0da82",
 292         18x"0d99e", 18x"0d8ba", 18x"0d7d6", 18x"0d6f4", 18x"0d612", 18x"0d530", 18x"0d44e", 18x"0d36c",
 293         18x"0d28c", 18x"0d1ac", 18x"0d0cc", 18x"0cfee", 18x"0cf0e", 18x"0ce30", 18x"0cd54", 18x"0cc76",
 294         18x"0cb9a", 18x"0cabc", 18x"0c9e0", 18x"0c906", 18x"0c82a", 18x"0c750", 18x"0c676", 18x"0c59c",
 295         18x"0c4c4", 18x"0c3ea", 18x"0c312", 18x"0c23a", 18x"0c164", 18x"0c08c", 18x"0bfb6", 18x"0bee0",
 296         18x"0be0a", 18x"0bd36", 18x"0bc62", 18x"0bb8c", 18x"0baba", 18x"0b9e6", 18x"0b912", 18x"0b840",
 297         18x"0b76e", 18x"0b69c", 18x"0b5cc", 18x"0b4fa", 18x"0b42a", 18x"0b35a", 18x"0b28a", 18x"0b1bc",
 298         18x"0b0ee", 18x"0b01e", 18x"0af50", 18x"0ae84", 18x"0adb6", 18x"0acea", 18x"0ac1e", 18x"0ab52",
 299         18x"0aa86", 18x"0a9bc", 18x"0a8f0", 18x"0a826", 18x"0a75c", 18x"0a694", 18x"0a5ca", 18x"0a502",
 300         18x"0a43a", 18x"0a372", 18x"0a2aa", 18x"0a1e4", 18x"0a11c", 18x"0a056", 18x"09f90", 18x"09ecc",
 301         -- 3.0 ... 3.9999
 302         18x"09e06", 18x"09d42", 18x"09c7e", 18x"09bba", 18x"09af6", 18x"09a32", 18x"09970", 18x"098ae",
 303         18x"097ec", 18x"0972a", 18x"09668", 18x"095a8", 18x"094e8", 18x"09426", 18x"09368", 18x"092a8",
 304         18x"091e8", 18x"0912a", 18x"0906c", 18x"08fae", 18x"08ef0", 18x"08e32", 18x"08d76", 18x"08cba",
 305         18x"08bfe", 18x"08b42", 18x"08a86", 18x"089ca", 18x"08910", 18x"08856", 18x"0879c", 18x"086e2",
 306         18x"08628", 18x"08570", 18x"084b6", 18x"083fe", 18x"08346", 18x"0828e", 18x"081d8", 18x"08120",
 307         18x"0806a", 18x"07fb4", 18x"07efe", 18x"07e48", 18x"07d92", 18x"07cde", 18x"07c2a", 18x"07b76",
 308         18x"07ac2", 18x"07a0e", 18x"0795a", 18x"078a8", 18x"077f4", 18x"07742", 18x"07690", 18x"075de",
 309         18x"0752e", 18x"0747c", 18x"073cc", 18x"0731c", 18x"0726c", 18x"071bc", 18x"0710c", 18x"0705e",
 310         18x"06fae", 18x"06f00", 18x"06e52", 18x"06da4", 18x"06cf6", 18x"06c4a", 18x"06b9c", 18x"06af0",
 311         18x"06a44", 18x"06998", 18x"068ec", 18x"06840", 18x"06796", 18x"066ea", 18x"06640", 18x"06596",
 312         18x"064ec", 18x"06442", 18x"0639a", 18x"062f0", 18x"06248", 18x"061a0", 18x"060f8", 18x"06050",
 313         18x"05fa8", 18x"05f00", 18x"05e5a", 18x"05db4", 18x"05d0e", 18x"05c68", 18x"05bc2", 18x"05b1c",
 314         18x"05a76", 18x"059d2", 18x"0592e", 18x"05888", 18x"057e4", 18x"05742", 18x"0569e", 18x"055fa",
 315         18x"05558", 18x"054b6", 18x"05412", 18x"05370", 18x"052ce", 18x"0522e", 18x"0518c", 18x"050ec",
 316         18x"0504a", 18x"04faa", 18x"04f0a", 18x"04e6a", 18x"04dca", 18x"04d2c", 18x"04c8c", 18x"04bee",
 317         18x"04b50", 18x"04ab0", 18x"04a12", 18x"04976", 18x"048d8", 18x"0483a", 18x"0479e", 18x"04700",
 318         18x"04664", 18x"045c8", 18x"0452c", 18x"04490", 18x"043f6", 18x"0435a", 18x"042c0", 18x"04226",
 319         18x"0418a", 18x"040f0", 18x"04056", 18x"03fbe", 18x"03f24", 18x"03e8c", 18x"03df2", 18x"03d5a",
 320         18x"03cc2", 18x"03c2a", 18x"03b92", 18x"03afa", 18x"03a62", 18x"039cc", 18x"03934", 18x"0389e",
 321         18x"03808", 18x"03772", 18x"036dc", 18x"03646", 18x"035b2", 18x"0351c", 18x"03488", 18x"033f2",
 322         18x"0335e", 18x"032ca", 18x"03236", 18x"031a2", 18x"03110", 18x"0307c", 18x"02fea", 18x"02f56",
 323         18x"02ec4", 18x"02e32", 18x"02da0", 18x"02d0e", 18x"02c7c", 18x"02bec", 18x"02b5a", 18x"02aca",
 324         18x"02a38", 18x"029a8", 18x"02918", 18x"02888", 18x"027f8", 18x"0276a", 18x"026da", 18x"0264a",
 325         18x"025bc", 18x"0252e", 18x"024a0", 18x"02410", 18x"02384", 18x"022f6", 18x"02268", 18x"021da",
 326         18x"0214e", 18x"020c0", 18x"02034", 18x"01fa8", 18x"01f1c", 18x"01e90", 18x"01e04", 18x"01d78",
 327         18x"01cee", 18x"01c62", 18x"01bd8", 18x"01b4c", 18x"01ac2", 18x"01a38", 18x"019ae", 18x"01924",
 328         18x"0189c", 18x"01812", 18x"01788", 18x"01700", 18x"01676", 18x"015ee", 18x"01566", 18x"014de",
 329         18x"01456", 18x"013ce", 18x"01346", 18x"012c0", 18x"01238", 18x"011b2", 18x"0112c", 18x"010a4",
 330         18x"0101e", 18x"00f98", 18x"00f12", 18x"00e8c", 18x"00e08", 18x"00d82", 18x"00cfe", 18x"00c78",
 331         18x"00bf4", 18x"00b70", 18x"00aec", 18x"00a68", 18x"009e4", 18x"00960", 18x"008dc", 18x"00858",
 332         18x"007d6", 18x"00752", 18x"006d0", 18x"0064e", 18x"005cc", 18x"0054a", 18x"004c8", 18x"00446",
 333         18x"003c4", 18x"00342", 18x"002c2", 18x"00240", 18x"001c0", 18x"00140", 18x"000c0", 18x"00040"
 334         );
 335
 336     -- Left and right shifter with 120 bit input and 64 bit output.
 337     -- Shifts inp left by shift bits and returns the upper 64 bits of
 338     -- the result.  The shift parameter is interpreted as a signed
 339     -- number in the range -64..63, with negative values indicating
 340     -- right shifts.
 341     function shifter_64(inp: std_ulogic_vector(119 downto 0);
 342                         shift: std_ulogic_vector(6 downto 0))
 343         return std_ulogic_vector is
 344         variable s1 : std_ulogic_vector(94 downto 0);
 345         variable s2 : std_ulogic_vector(70 downto 0);
 346         variable result : std_ulogic_vector(63 downto 0);
 347     begin
 348         case shift(6 downto 5) is
 349             when "00" =>
 350                 s1 := inp(119 downto 25);
 351             when "01" =>
 352                 s1 := inp(87 downto 0) & "0000000";
 353             when "10" =>
 354                 s1 := x"0000000000000000" & inp(119 downto 89);
 355             when others =>
 356                 s1 := x"00000000" & inp(119 downto 57);
 357         end case;
 358         case shift(4 downto 3) is
 359             when "00" =>
 360                 s2 := s1(94 downto 24);
 361             when "01" =>
 362                 s2 := s1(86 downto 16);
 363             when "10" =>
 364                 s2 := s1(78 downto 8);
 365             when others =>
 366                 s2 := s1(70 downto 0);
 367         end case;
 368         case shift(2 downto 0) is
 369             when "000" =>
 370                 result := s2(70 downto 7);
 371             when "001" =>
 372                 result := s2(69 downto 6);
 373             when "010" =>
 374                 result := s2(68 downto 5);
 375             when "011" =>
 376                 result := s2(67 downto 4);
 377             when "100" =>
 378                 result := s2(66 downto 3);
 379             when "101" =>
 380                 result := s2(65 downto 2);
 381             when "110" =>
 382                 result := s2(64 downto 1);
 383             when others =>
 384                 result := s2(63 downto 0);
 385         end case;
 386         return result;
 387     end;
 388
 389     -- Generate a mask with 0-bits on the left and 1-bits on the right which
 390     -- selects the bits will be lost in doing a right shift.  The shift
 391     -- parameter is the bottom 6 bits of a negative shift count,
 392     -- indicating a right shift.
 393     function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
 394         variable result: std_ulogic_vector(63 downto 0);
 395     begin
 396         result := (others => '0');
 397         for i in 0 to 63 loop
 398             if i >= shift then
 399                 result(63 - i) := '1';
 400             end if;
 401         end loop;
 402         return result;
 403     end;
 404
 405     -- Split a DP floating-point number into components and work out its class.
 406     -- If is_int = 1, the input is considered an integer
 407     function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_int: std_ulogic) return fpu_reg_type is
 408         variable r       : fpu_reg_type;
 409         variable exp_nz  : std_ulogic;
 410         variable exp_ao  : std_ulogic;
 411         variable frac_nz : std_ulogic;
 412         variable cls     : std_ulogic_vector(2 downto 0);
 413     begin
 414         r.negative := fpr(63);
 415         exp_nz := or (fpr(62 downto 52));
 416         exp_ao := and (fpr(62 downto 52));
 417         frac_nz := or (fpr(51 downto 0));
 418         if is_int = '0' then
 419             r.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
 420             if exp_nz = '0' then
 421                 r.exponent := to_signed(-1022, EXP_BITS);
 422             end if;
 423             r.mantissa := "000000000" & exp_nz & fpr(51 downto 0) & "00";
 424             cls := exp_ao & exp_nz & frac_nz;
 425             case cls is
 426                 when "000"  => r.class := ZERO;
 427                 when "001"  => r.class := FINITE;    -- denormalized
 428                 when "010"  => r.class := FINITE;
 429                 when "011"  => r.class := FINITE;
 430                 when "110"  => r.class := INFINITY;
 431                 when others => r.class := NAN;
 432             end case;
 433         else
 434             r.mantissa := fpr;
 435             r.exponent := (others => '0');
 436             if (fpr(63) or exp_nz or frac_nz) = '1' then
 437                 r.class := FINITE;
 438             else
 439                 r.class := ZERO;
 440             end if;
 441         end if;
 442         return r;
 443     end;
 444
 445     -- Construct a DP floating-point result from components
 446     function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
 447                      mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic)
 448         return std_ulogic_vector is
 449         variable result : std_ulogic_vector(63 downto 0);
 450     begin
 451         result := (others => '0');
 452         result(63) := sign;
 453         case class is
 454             when ZERO =>
 455             when FINITE =>
 456                 if mantissa(54) = '1' then
 457                     -- normalized number
 458                     result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
 459                 end if;
 460                 result(51 downto 29) := mantissa(53 downto 31);
 461                 if single_prec = '0' then
 462                     result(28 downto 0) := mantissa(30 downto 2);
 463                 end if;
 464             when INFINITY =>
 465                 result(62 downto 52) := "11111111111";
 466             when NAN =>
 467                 result(62 downto 52) := "11111111111";
 468                 result(51) := quieten_nan or mantissa(53);
 469                 result(50 downto 29) := mantissa(52 downto 31);
 470                 if single_prec = '0' then
 471                     result(28 downto 0) := mantissa(30 downto 2);
 472                 end if;
 473         end case;
 474         return result;
 475     end;
 476
 477     -- Determine whether to increment when rounding
 478     -- Returns rounding_inc & inexact
 479     -- Assumes x includes the bottom 29 bits of the mantissa already
 480     -- if single_prec = 1 (usually arranged by setting set_x = 1 earlier).
 481     function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
 482                          single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
 483                          sign: std_ulogic)
 484         return std_ulogic_vector is
 485         variable grx : std_ulogic_vector(2 downto 0);
 486         variable ret : std_ulogic_vector(1 downto 0);
 487         variable lsb : std_ulogic;
 488     begin
 489         if single_prec = '0' then
 490             grx := mantissa(1 downto 0) & x;
 491             lsb := mantissa(2);
 492         else
 493             grx := mantissa(30 downto 29) & x;
 494             lsb := mantissa(31);
 495         end if;
 496         ret(1) := '0';
 497         ret(0) := or (grx);
 498         case rn(1 downto 0) is
 499             when "00" =>        -- round to nearest
 500                 if grx = "100" and rn(2) = '0' then
 501                     ret(1) := lsb; -- tie, round to even
 502                 else
 503                     ret(1) := grx(2);
 504                 end if;
 505             when "01" =>        -- round towards zero
 506             when others =>      -- round towards +/- inf
 507                 if rn(0) = sign then
 508                     -- round towards greater magnitude
 509                     ret(1) := ret(0);
 510                 end if;
 511         end case;
 512         return ret;
 513     end;
 514
 515     -- Determine result flags to write into the FPSCR
 516     function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
 517         return std_ulogic_vector is
 518     begin
 519         case class is
 520             when ZERO =>
 521                 return sign & "0010";
 522             when FINITE =>
 523                 return (not unitbit) & sign & (not sign) & "00";
 524             when INFINITY =>
 525                 return '0' & sign & (not sign) & "01";
 526             when NAN =>
 527                 return "10001";
 528         end case;
 529     end;
 530
 531 begin
 532     fpu_multiply_0: entity work.multiply
 533         port map (
 534             clk => clk,
 535             m_in => f_to_multiply,
 536             m_out => multiply_to_f
 537             );
 538
 539     fpu_0: process(clk)
 540     begin
 541         if rising_edge(clk) then
 542             if rst = '1' then
 543                 r.state <= IDLE;
 544                 r.busy <= '0';
 545                 r.instr_done <= '0';
 546                 r.do_intr <= '0';
 547                 r.fpscr <= (others => '0');
 548                 r.writing_back <= '0';
 549             else
 550                 assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
 551                 r <= rin;
 552             end if;
 553         end if;
 554     end process;
 555
 556     -- synchronous reads from lookup table
 557     lut_access: process(clk)
 558         variable addrhi : std_ulogic_vector(1 downto 0);
 559         variable addr   : std_ulogic_vector(9 downto 0);
 560     begin
 561         if rising_edge(clk) then
 562             if r.is_sqrt = '1' then
 563                 addrhi := r.b.mantissa(55 downto 54);
 564             else
 565                 addrhi := "00";
 566             end if;
 567             addr := addrhi & r.b.mantissa(53 downto 46);
 568             inverse_est <= '1' & inverse_table(to_integer(unsigned(addr)));
 569         end if;
 570     end process;
 571
 572     e_out.busy <= r.busy;
 573     e_out.exception <= r.fpscr(FPSCR_FEX);
 574     e_out.interrupt <= r.do_intr;
 575
 576     w_out.valid <= r.instr_done and not r.do_intr;
 577     w_out.write_enable <= r.writing_back;
 578     w_out.write_reg <= r.dest_fpr;
 579     w_out.write_data <= fp_result;
 580     w_out.write_cr_enable <= r.instr_done and (r.rc or r.is_cmp);
 581     w_out.write_cr_mask <= r.cr_mask;
 582     w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
 583                            r.cr_result & r.cr_result & r.cr_result & r.cr_result;
 584
 585     fpu_1: process(all)
 586         variable v           : reg_type;
 587         variable adec        : fpu_reg_type;
 588         variable bdec        : fpu_reg_type;
 589         variable cdec        : fpu_reg_type;
 590         variable fpscr_mask  : std_ulogic_vector(31 downto 0);
 591         variable illegal     : std_ulogic;
 592         variable j, k        : integer;
 593         variable flm         : std_ulogic_vector(7 downto 0);
 594         variable int_input   : std_ulogic;
 595         variable mask        : std_ulogic_vector(63 downto 0);
 596         variable in_a0       : std_ulogic_vector(63 downto 0);
 597         variable in_b0       : std_ulogic_vector(63 downto 0);
 598         variable misc        : std_ulogic_vector(63 downto 0);
 599         variable shift_res   : std_ulogic_vector(63 downto 0);
 600         variable round       : std_ulogic_vector(1 downto 0);
 601         variable update_fx   : std_ulogic;
 602         variable arith_done  : std_ulogic;
 603         variable invalid     : std_ulogic;
 604         variable zero_divide : std_ulogic;
 605         variable mant_nz     : std_ulogic;
 606         variable min_exp     : signed(EXP_BITS-1 downto 0);
 607         variable max_exp     : signed(EXP_BITS-1 downto 0);
 608         variable bias_exp    : signed(EXP_BITS-1 downto 0);
 609         variable new_exp     : signed(EXP_BITS-1 downto 0);
 610         variable exp_tiny    : std_ulogic;
 611         variable exp_huge    : std_ulogic;
 612         variable renormalize : std_ulogic;
 613         variable clz         : std_ulogic_vector(5 downto 0);
 614         variable set_x       : std_ulogic;
 615         variable mshift      : signed(EXP_BITS-1 downto 0);
 616         variable need_check  : std_ulogic;
 617         variable msb         : std_ulogic;
 618         variable is_add      : std_ulogic;
 619         variable set_a       : std_ulogic;
 620         variable set_b       : std_ulogic;
 621         variable set_c       : std_ulogic;
 622         variable set_y       : std_ulogic;
 623         variable set_s       : std_ulogic;
 624         variable qnan_result : std_ulogic;
 625         variable px_nz       : std_ulogic;
 626         variable pcmpb_eq    : std_ulogic;
 627         variable pcmpb_lt    : std_ulogic;
 628         variable pshift      : std_ulogic;
 629         variable renorm_sqrt : std_ulogic;
 630         variable sqrt_exp    : signed(EXP_BITS-1 downto 0);
 631         variable shiftin     : std_ulogic;
 632         variable mulexp      : signed(EXP_BITS-1 downto 0);
 633         variable maddend     : std_ulogic_vector(127 downto 0);
 634         variable sum         : std_ulogic_vector(63 downto 0);
 635     begin
 636         v := r;
 637         illegal := '0';
 638         v.busy := '0';
 639         int_input := '0';
 640
 641         -- capture incoming instruction
 642         if e_in.valid = '1' then
 643             v.insn := e_in.insn;
 644             v.op := e_in.op;
 645             v.fe_mode := or (e_in.fe_mode);
 646             v.dest_fpr := e_in.frt;
 647             v.single_prec := e_in.single;
 648             v.longmask := e_in.single;
 649             v.int_result := '0';
 650             v.rc := e_in.rc;
 651             v.is_cmp := e_in.out_cr;
 652             if e_in.out_cr = '0' then
 653                 v.cr_mask := num_to_fxm(1);
 654             else
 655                 v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(e_in.insn))));
 656             end if;
 657             int_input := '0';
 658             if e_in.op = OP_FPOP_I then
 659                 int_input := '1';
 660             end if;
 661             v.quieten_nan := '1';
 662             v.tiny := '0';
 663             v.denorm := '0';
 664             v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
 665             v.is_subtract := '0';
 666             v.is_multiply := '0';
 667             v.is_sqrt := '0';
 668             v.add_bsmall := '0';
 669             v.doing_ftdiv := "00";
 670
 671             adec := decode_dp(e_in.fra, int_input);
 672             bdec := decode_dp(e_in.frb, int_input);
 673             cdec := decode_dp(e_in.frc, int_input);
 674             v.a := adec;
 675             v.b := bdec;
 676             v.c := cdec;
 677
 678             v.exp_cmp := '0';
 679             if adec.exponent > bdec.exponent then
 680                 v.exp_cmp := '1';
 681             end if;
 682             v.madd_cmp := '0';
 683             if (adec.exponent + cdec.exponent + 1) >= bdec.exponent then
 684                 v.madd_cmp := '1';
 685             end if;
 686         end if;
 687
 688         r_hi_nz <= or (r.r(55 downto 31));
 689         r_lo_nz <= or (r.r(30 downto 2));
 690         s_nz <= or (r.s);
 691
 692         if r.single_prec = '0' then
 693             if r.doing_ftdiv(1) = '0' then
 694                 max_exp := to_signed(1023, EXP_BITS);
 695             else
 696                 max_exp := to_signed(1020, EXP_BITS);
 697             end if;
 698             if r.doing_ftdiv(0) = '0' then
 699                 min_exp := to_signed(-1022, EXP_BITS);
 700             else
 701                 min_exp := to_signed(-1021, EXP_BITS);
 702             end if;
 703             bias_exp := to_signed(1536, EXP_BITS);
 704         else
 705             max_exp := to_signed(127, EXP_BITS);
 706             min_exp := to_signed(-126, EXP_BITS);
 707             bias_exp := to_signed(192, EXP_BITS);
 708         end if;
 709         new_exp := r.result_exp - r.shift;
 710         exp_tiny := '0';
 711         exp_huge := '0';
 712         if new_exp < min_exp then
 713             exp_tiny := '1';
 714         end if;
 715         if new_exp > max_exp then
 716             exp_huge := '1';
 717         end if;
 718
 719         -- Compare P with zero and with B
 720         px_nz := or (r.p(57 downto 4));
 721         pcmpb_eq := '0';
 722         if r.p(59 downto 4) = r.b.mantissa(55 downto 0) then
 723             pcmpb_eq := '1';
 724         end if;
 725         pcmpb_lt := '0';
 726         if unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(55 downto 0)) then
 727             pcmpb_lt := '1';
 728         end if;
 729
 730         v.writing_back := '0';
 731         v.instr_done := '0';
 732         v.update_fprf := '0';
 733         v.shift := to_signed(0, EXP_BITS);
 734         v.first := '0';
 735         v.opsel_a := AIN_R;
 736         opsel_ainv <= '0';
 737         opsel_mask <= '0';
 738         opsel_b <= BIN_ZERO;
 739         opsel_binv <= '0';
 740         opsel_r <= RES_SUM;
 741         opsel_s <= S_ZERO;
 742         carry_in <= '0';
 743         misc_sel <= "0000";
 744         fpscr_mask := (others => '1');
 745         update_fx := '0';
 746         arith_done := '0';
 747         invalid := '0';
 748         zero_divide := '0';
 749         renormalize := '0';
 750         set_x := '0';
 751         qnan_result := '0';
 752         set_a := '0';
 753         set_b := '0';
 754         set_c := '0';
 755         set_s := '0';
 756         f_to_multiply.is_32bit <= '0';
 757         f_to_multiply.valid <= '0';
 758         msel_1 <= MUL1_A;
 759         msel_2 <= MUL2_C;
 760         msel_add <= MULADD_ZERO;
 761         msel_inv <= '0';
 762         set_y := '0';
 763         pshift := '0';
 764         renorm_sqrt := '0';
 765         shiftin := '0';
 766         case r.state is
 767             when IDLE =>
 768                 v.use_a := '0';
 769                 v.use_b := '0';
 770                 v.use_c := '0';
 771                 v.invalid := '0';
 772                 v.negate := '0';
 773                 if e_in.valid = '1' then
 774                     case e_in.insn(5 downto 1) is
 775                         when "00000" =>
 776                             if e_in.insn(8) = '1' then
 777                                 if e_in.insn(6) = '0' then
 778                                     v.state := DO_FTDIV;
 779                                 else
 780                                     v.state := DO_FTSQRT;
 781                                 end if;
 782                             elsif e_in.insn(7) = '1' then
 783                                 v.state := DO_MCRFS;
 784                             else
 785                                 v.opsel_a := AIN_B;
 786                                 v.state := DO_FCMP;
 787                             end if;
 788                         when "00110" =>
 789                             if e_in.insn(10) = '0' then
 790                                 if e_in.insn(8) = '0' then
 791                                     v.state := DO_MTFSB;
 792                                 else
 793                                     v.state := DO_MTFSFI;
 794                                 end if;
 795                             else
 796                                 v.state := DO_FMRG;
 797                             end if;
 798                         when "00111" =>
 799                             if e_in.insn(8) = '0' then
 800                                 v.state := DO_MFFS;
 801                             else
 802                                 v.state := DO_MTFSF;
 803                             end if;
 804                         when "01000" =>
 805                             v.opsel_a := AIN_B;
 806                             if e_in.insn(9 downto 8) /= "11" then
 807                                 v.state := DO_FMR;
 808                             else
 809                                 v.state := DO_FRI;
 810                             end if;
 811                         when "01100" =>
 812                             v.opsel_a := AIN_B;
 813                             v.state := DO_FRSP;
 814                         when "01110" =>
 815                             v.opsel_a := AIN_B;
 816                             if int_input = '1' then
 817                                 -- fcfid[u][s]
 818                                 v.state := DO_FCFID;
 819                             else
 820                                 v.state := DO_FCTI;
 821                             end if;
 822                         when "01111" =>
 823                             v.round_mode := "001";
 824                             v.opsel_a := AIN_B;
 825                             v.state := DO_FCTI;
 826                         when "10010" =>
 827                             v.opsel_a := AIN_A;
 828                             if v.b.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
 829                                 v.opsel_a := AIN_B;
 830                             end if;
 831                             v.state := DO_FDIV;
 832                         when "10100" | "10101" =>
 833                             v.opsel_a := AIN_A;
 834                             v.state := DO_FADD;
 835                         when "10110" =>
 836                             v.is_sqrt := '1';
 837                             v.opsel_a := AIN_B;
 838                             v.state := DO_FSQRT;
 839                         when "10111" =>
 840                             v.state := DO_FSEL;
 841                         when "11000" =>
 842                             v.opsel_a := AIN_B;
 843                             v.state := DO_FRE;
 844                         when "11001" =>
 845                             v.is_multiply := '1';
 846                             v.opsel_a := AIN_A;
 847                             if v.c.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
 848                                 v.opsel_a := AIN_C;
 849                             end if;
 850                             v.state := DO_FMUL;
 851                         when "11010" =>
 852                             v.is_sqrt := '1';
 853                             v.opsel_a := AIN_B;
 854                             v.state := DO_FRSQRTE;
 855                         when "11100" | "11101" | "11110" | "11111" =>
 856                             if v.a.mantissa(54) = '0' then
 857                                 v.opsel_a := AIN_A;
 858                             elsif v.c.mantissa(54) = '0' then
 859                                 v.opsel_a := AIN_C;
 860                             else
 861                                 v.opsel_a := AIN_B;
 862                             end if;
 863                             v.state := DO_FMADD;
 864                         when others =>
 865                             illegal := '1';
 866                     end case;
 867                 end if;
 868                 v.x := '0';
 869                 v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
 870                 set_s := '1';
 871
 872             when DO_MCRFS =>
 873                 j := to_integer(unsigned(insn_bfa(r.insn)));
 874                 for i in 0 to 7 loop
 875                     if i = j then
 876                         k := (7 - i) * 4;
 877                         v.cr_result := r.fpscr(k + 3 downto k);
 878                         fpscr_mask(k + 3 downto k) := "0000";
 879                     end if;
 880                 end loop;
 881                 v.fpscr := r.fpscr and (fpscr_mask or x"6007F8FF");
 882                 v.instr_done := '1';
 883                 v.state := IDLE;
 884
 885             when DO_FTDIV =>
 886                 v.instr_done := '1';
 887                 v.state := IDLE;
 888                 v.cr_result := "0000";
 889                 if r.a.class = INFINITY or r.b.class = ZERO or r.b.class = INFINITY or
 890                     (r.b.class = FINITE and r.b.mantissa(53) = '0') then
 891                     v.cr_result(2) := '1';
 892                 end if;
 893                 if r.a.class = NAN or r.a.class = INFINITY or
 894                     r.b.class = NAN or r.b.class = ZERO or r.b.class = INFINITY or
 895                     (r.a.class = FINITE and r.a.exponent <= to_signed(-970, EXP_BITS)) then
 896                     v.cr_result(1) := '1';
 897                 else
 898                     v.doing_ftdiv := "11";
 899                     v.first := '1';
 900                     v.state := FTDIV_1;
 901                     v.instr_done := '0';
 902                 end if;
 903
 904             when DO_FTSQRT =>
 905                 v.instr_done := '1';
 906                 v.state := IDLE;
 907                 v.cr_result := "0000";
 908                 if r.b.class = ZERO or r.b.class = INFINITY or
 909                     (r.b.class = FINITE and r.b.mantissa(53) = '0') then
 910                     v.cr_result(2) := '1';
 911                 end if;
 912                 if r.b.class = NAN or r.b.class = INFINITY or r.b.class = ZERO
 913                     or r.b.negative = '1' or r.b.exponent <= to_signed(-970, EXP_BITS) then
 914                     v.cr_result(1) := '0';
 915                 end if;
 916
 917             when DO_FCMP =>
 918                 -- fcmp[uo]
 919                 -- r.opsel_a = AIN_B
 920                 v.instr_done := '1';
 921                 v.state := IDLE;
 922                 update_fx := '1';
 923                 v.result_exp := r.b.exponent;
 924                 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
 925                     (r.b.class = NAN and r.b.mantissa(53) = '0') then
 926                     -- Signalling NAN
 927                     v.fpscr(FPSCR_VXSNAN) := '1';
 928                     if r.insn(6) = '1' and r.fpscr(FPSCR_VE) = '0' then
 929                         v.fpscr(FPSCR_VXVC) := '1';
 930                     end if;
 931                     invalid := '1';
 932                     v.cr_result := "0001";          -- unordered
 933                 elsif r.a.class = NAN or r.b.class = NAN then
 934                     if r.insn(6) = '1' then
 935                         -- fcmpo
 936                         v.fpscr(FPSCR_VXVC) := '1';
 937                         invalid := '1';
 938                     end if;
 939                     v.cr_result := "0001";          -- unordered
 940                 elsif r.a.class = ZERO and r.b.class = ZERO then
 941                     v.cr_result := "0010";          -- equal
 942                 elsif r.a.negative /= r.b.negative then
 943                     v.cr_result := r.a.negative & r.b.negative & "00";
 944                 elsif r.a.class = ZERO then
 945                     -- A and B are the same sign from here down
 946                     v.cr_result := not r.b.negative & r.b.negative & "00";
 947                 elsif r.a.class = INFINITY then
 948                     if r.b.class = INFINITY then
 949                         v.cr_result := "0010";
 950                     else
 951                         v.cr_result := r.a.negative & not r.a.negative & "00";
 952                     end if;
 953                 elsif r.b.class = ZERO then
 954                     -- A is finite from here down
 955                     v.cr_result := r.a.negative & not r.a.negative & "00";
 956                 elsif r.b.class = INFINITY then
 957                     v.cr_result := not r.b.negative & r.b.negative & "00";
 958                 elsif r.exp_cmp = '1' then
 959                     -- A and B are both finite from here down
 960                     v.cr_result := r.a.negative & not r.a.negative & "00";
 961                 elsif r.a.exponent /= r.b.exponent then
 962                     -- A exponent is smaller than B
 963                     v.cr_result := not r.a.negative & r.a.negative & "00";
 964                 else
 965                     -- Prepare to subtract mantissas, put B in R
 966                     v.cr_result := "0000";
 967                     v.instr_done := '0';
 968                     v.opsel_a := AIN_A;
 969                     v.state := CMP_1;
 970                 end if;
 971                 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
 972
 973             when DO_MTFSB =>
 974                 -- mtfsb{0,1}
 975                 j := to_integer(unsigned(insn_bt(r.insn)));
 976                 for i in 0 to 31 loop
 977                     if i = j then
 978                         v.fpscr(31 - i) := r.insn(6);
 979                     end if;
 980                 end loop;
 981                 v.instr_done := '1';
 982                 v.state := IDLE;
 983
 984             when DO_MTFSFI =>
 985                 -- mtfsfi
 986                 j := to_integer(unsigned(insn_bf(r.insn)));
 987                 if r.insn(16) = '0' then
 988                     for i in 0 to 7 loop
 989                         if i = j then
 990                             k := (7 - i) * 4;
 991                             v.fpscr(k + 3 downto k) := insn_u(r.insn);
 992                         end if;
 993                     end loop;
 994                 end if;
 995                 v.instr_done := '1';
 996                 v.state := IDLE;
 997
 998             when DO_FMRG =>
 999                 -- fmrgew, fmrgow
1000                 opsel_r <= RES_MISC;
1001                 misc_sel <= "01" & r.insn(8) & '0';
1002                 v.int_result := '1';
1003                 v.writing_back := '1';
1004                 v.instr_done := '1';
1005                 v.state := IDLE;
1006
1007             when DO_MFFS =>
1008                 v.int_result := '1';
1009                 v.writing_back := '1';
1010                 opsel_r <= RES_MISC;
1011                 case r.insn(20 downto 16) is
1012                     when "00000" =>
1013                         -- mffs
1014                     when "00001" =>
1015                         -- mffsce
1016                         v.fpscr(FPSCR_VE downto FPSCR_XE) := "00000";
1017                     when "10100" | "10101" =>
1018                         -- mffscdrn[i] (but we don't implement DRN)
1019                         fpscr_mask := x"000000FF";
1020                     when "10110" =>
1021                         -- mffscrn
1022                         fpscr_mask := x"000000FF";
1023                         v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
1024                             r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
1025                     when "10111" =>
1026                         -- mffscrni
1027                         fpscr_mask := x"000000FF";
1028                         v.fpscr(FPSCR_RN+1 downto FPSCR_RN) := r.insn(12 downto 11);
1029                     when "11000" =>
1030                         -- mffsl
1031                         fpscr_mask := x"0007F0FF";
1032                     when others =>
1033                         illegal := '1';
1034                 end case;
1035                 v.instr_done := '1';
1036                 v.state := IDLE;
1037
1038             when DO_MTFSF =>
1039                 if r.insn(25) = '1' then
1040                     flm := x"FF";
1041                 elsif r.insn(16) = '1' then
1042                     flm := x"00";
1043                 else
1044                     flm := r.insn(24 downto 17);
1045                 end if;
1046                 for i in 0 to 7 loop
1047                     k := i * 4;
1048                     if flm(i) = '1' then
1049                         v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
1050                     end if;
1051                 end loop;
1052                 v.instr_done := '1';
1053                 v.state := IDLE;
1054
1055             when DO_FMR =>
1056                 -- r.opsel_a = AIN_B
1057                 v.result_class := r.b.class;
1058                 v.result_exp := r.b.exponent;
1059                 v.quieten_nan := '0';
1060                 if r.insn(9) = '1' then
1061                     v.result_sign := '0';              -- fabs
1062                 elsif r.insn(8) = '1' then
1063                     v.result_sign := '1';              -- fnabs
1064                 elsif r.insn(7) = '1' then
1065                     v.result_sign := r.b.negative;     -- fmr
1066                 elsif r.insn(6) = '1' then
1067                     v.result_sign := not r.b.negative; -- fneg
1068                 else
1069                     v.result_sign := r.a.negative;     -- fcpsgn
1070                 end if;
1071                 v.writing_back := '1';
1072                 v.instr_done := '1';
1073                 v.state := IDLE;
1074
1075             when DO_FRI =>    -- fri[nzpm]
1076                 -- r.opsel_a = AIN_B
1077                 v.result_class := r.b.class;
1078                 v.result_sign := r.b.negative;
1079                 v.result_exp := r.b.exponent;
1080                 v.fpscr(FPSCR_FR) := '0';
1081                 v.fpscr(FPSCR_FI) := '0';
1082                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1083                     -- Signalling NAN
1084                     v.fpscr(FPSCR_VXSNAN) := '1';
1085                     invalid := '1';
1086                 end if;
1087                 if r.b.class = FINITE then
1088                     if r.b.exponent >= to_signed(52, EXP_BITS) then
1089                         -- integer already, no rounding required
1090                         arith_done := '1';
1091                     else
1092                         v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1093                         v.state := FRI_1;
1094                         v.round_mode := '1' & r.insn(7 downto 6);
1095                     end if;
1096                 else
1097                     arith_done := '1';
1098                 end if;
1099
1100             when DO_FRSP =>
1101                 -- r.opsel_a = AIN_B, r.shift = 0
1102                 v.result_class := r.b.class;
1103                 v.result_sign := r.b.negative;
1104                 v.result_exp := r.b.exponent;
1105                 v.fpscr(FPSCR_FR) := '0';
1106                 v.fpscr(FPSCR_FI) := '0';
1107                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1108                     -- Signalling NAN
1109                     v.fpscr(FPSCR_VXSNAN) := '1';
1110                     invalid := '1';
1111                 end if;
1112                 set_x := '1';
1113                 if r.b.class = FINITE then
1114                     if r.b.exponent < to_signed(-126, EXP_BITS) then
1115                         v.shift := r.b.exponent - to_signed(-126, EXP_BITS);
1116                         v.state := ROUND_UFLOW;
1117                     elsif r.b.exponent > to_signed(127, EXP_BITS) then
1118                         v.state := ROUND_OFLOW;
1119                     else
1120                         v.shift := to_signed(-2, EXP_BITS);
1121                         v.state := ROUNDING;
1122                     end if;
1123                 else
1124                     arith_done := '1';
1125                 end if;
1126
1127             when DO_FCTI =>
1128                 -- instr bit 9: 1=dword 0=word
1129                 -- instr bit 8: 1=unsigned 0=signed
1130                 -- instr bit 1: 1=round to zero 0=use fpscr[RN]
1131                 -- r.opsel_a = AIN_B
1132                 v.result_class := r.b.class;
1133                 v.result_sign := r.b.negative;
1134                 v.result_exp := r.b.exponent;
1135                 v.fpscr(FPSCR_FR) := '0';
1136                 v.fpscr(FPSCR_FI) := '0';
1137                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1138                     -- Signalling NAN
1139                     v.fpscr(FPSCR_VXSNAN) := '1';
1140                     invalid := '1';
1141                 end if;
1142
1143                 v.int_result := '1';
1144                 case r.b.class is
1145                     when ZERO =>
1146                         arith_done := '1';
1147                     when FINITE =>
1148                         if r.b.exponent >= to_signed(64, EXP_BITS) or
1149                             (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
1150                             v.state := INT_OFLOW;
1151                         elsif r.b.exponent >= to_signed(52, EXP_BITS) then
1152                             -- integer already, no rounding required,
1153                             -- shift into final position
1154                             v.shift := r.b.exponent - to_signed(54, EXP_BITS);
1155                             if r.insn(8) = '1' and r.b.negative = '1' then
1156                                 v.state := INT_OFLOW;
1157                             else
1158                                 v.state := INT_ISHIFT;
1159                             end if;
1160                         else
1161                             v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1162                             v.state := INT_SHIFT;
1163                         end if;
1164                     when INFINITY | NAN =>
1165                         v.state := INT_OFLOW;
1166                 end case;
1167
1168             when DO_FCFID =>
1169                 -- r.opsel_a = AIN_B
1170                 v.result_sign := '0';
1171                 if r.insn(8) = '0' and r.b.negative = '1' then
1172                     -- fcfid[s] with negative operand, set R = -B
1173                     opsel_ainv <= '1';
1174                     carry_in <= '1';
1175                     v.result_sign := '1';
1176                 end if;
1177                 v.result_class := r.b.class;
1178                 v.result_exp := to_signed(54, EXP_BITS);
1179                 v.fpscr(FPSCR_FR) := '0';
1180                 v.fpscr(FPSCR_FI) := '0';
1181                 if r.b.class = ZERO then
1182                     arith_done := '1';
1183                 else
1184                     v.state := FINISH;
1185                 end if;
1186
1187             when DO_FADD =>
1188                 -- fadd[s] and fsub[s]
1189                 -- r.opsel_a = AIN_A
1190                 v.result_sign := r.a.negative;
1191                 v.result_class := r.a.class;
1192                 v.result_exp := r.a.exponent;
1193                 v.fpscr(FPSCR_FR) := '0';
1194                 v.fpscr(FPSCR_FI) := '0';
1195                 v.use_a := '1';
1196                 v.use_b := '1';
1197                 is_add := r.a.negative xor r.b.negative xor r.insn(1);
1198                 if r.a.class = FINITE and r.b.class = FINITE then
1199                     v.is_subtract := not is_add;
1200                     v.add_bsmall := r.exp_cmp;
1201                     v.opsel_a := AIN_B;
1202                     if r.exp_cmp = '0' then
1203                         v.shift := r.a.exponent - r.b.exponent;
1204                         v.result_sign := r.b.negative xnor r.insn(1);
1205                         if r.a.exponent = r.b.exponent then
1206                             v.state := ADD_2;
1207                         else
1208                             v.longmask := '0';
1209                             v.state := ADD_SHIFT;
1210                         end if;
1211                     else
1212                         v.state := ADD_1;
1213                     end if;
1214                 else
1215                     if r.a.class = NAN or r.b.class = NAN then
1216                         v.state := NAN_RESULT;
1217                     elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then
1218                         -- invalid operation, construct QNaN
1219                         v.fpscr(FPSCR_VXISI) := '1';
1220                         qnan_result := '1';
1221                         arith_done := '1';
1222                     elsif r.a.class = ZERO and r.b.class = ZERO and is_add = '0' then
1223                         -- return -0 for rounding to -infinity
1224                         v.result_sign := r.round_mode(1) and r.round_mode(0);
1225                         arith_done := '1';
1226                     elsif r.a.class = INFINITY or r.b.class = ZERO then
1227                         -- result is A
1228                         v.opsel_a := AIN_A;
1229                         v.state := EXC_RESULT;
1230                     else
1231                         -- result is +/- B
1232                         v.opsel_a := AIN_B;
1233                         v.negate := not r.insn(1);
1234                         v.state := EXC_RESULT;
1235                     end if;
1236                 end if;
1237
1238             when DO_FMUL =>
1239                 -- fmul[s]
1240                 -- r.opsel_a = AIN_A unless C is denorm and A isn't
1241                 v.result_sign := r.a.negative xor r.c.negative;
1242                 v.result_class := r.a.class;
1243                 v.fpscr(FPSCR_FR) := '0';
1244                 v.fpscr(FPSCR_FI) := '0';
1245                 v.use_a := '1';
1246                 v.use_c := '1';
1247                 if r.a.class = FINITE and r.c.class = FINITE then
1248                     v.result_exp := r.a.exponent + r.c.exponent;
1249                     -- Renormalize denorm operands
1250                     if r.a.mantissa(54) = '0' then
1251                         v.state := RENORM_A;
1252                     elsif r.c.mantissa(54) = '0' then
1253                         v.state := RENORM_C;
1254                     else
1255                         f_to_multiply.valid <= '1';
1256                         v.state := MULT_1;
1257                     end if;
1258                 else
1259                     if r.a.class = NAN or r.c.class = NAN then
1260                         v.state := NAN_RESULT;
1261                     elsif (r.a.class = INFINITY and r.c.class = ZERO) or
1262                         (r.a.class = ZERO and r.c.class = INFINITY) then
1263                         -- invalid operation, construct QNaN
1264                         v.fpscr(FPSCR_VXIMZ) := '1';
1265                         qnan_result := '1';
1266                     elsif r.a.class = ZERO or r.a.class = INFINITY then
1267                         -- result is +/- A
1268                         arith_done := '1';
1269                     else
1270                         -- r.c.class is ZERO or INFINITY
1271                         v.opsel_a := AIN_C;
1272                         v.negate := r.a.negative;
1273                         v.state := EXC_RESULT;
1274                     end if;
1275                 end if;
1276
1277             when DO_FDIV =>
1278                 -- r.opsel_a = AIN_A unless B is denorm and A isn't
1279                 v.result_class := r.a.class;
1280                 v.fpscr(FPSCR_FR) := '0';
1281                 v.fpscr(FPSCR_FI) := '0';
1282                 v.use_a := '1';
1283                 v.use_b := '1';
1284                 v.result_sign := r.a.negative xor r.b.negative;
1285                 v.result_exp := r.a.exponent - r.b.exponent;
1286                 v.count := "00";
1287                 if r.a.class = FINITE and r.b.class = FINITE then
1288                     -- Renormalize denorm operands
1289                     if r.a.mantissa(54) = '0' then
1290                         v.state := RENORM_A;
1291                     elsif r.b.mantissa(54) = '0' then
1292                         v.state := RENORM_B;
1293                     else
1294                         v.first := '1';
1295                         v.state := DIV_2;
1296                     end if;
1297                 else
1298                     if r.a.class = NAN or r.b.class = NAN then
1299                         v.state := NAN_RESULT;
1300                     elsif r.b.class = INFINITY then
1301                         if r.a.class = INFINITY then
1302                             v.fpscr(FPSCR_VXIDI) := '1';
1303                             qnan_result := '1';
1304                         else
1305                             v.result_class := ZERO;
1306                         end if;
1307                         arith_done := '1';
1308                     elsif r.b.class = ZERO then
1309                         if r.a.class = ZERO then
1310                             v.fpscr(FPSCR_VXZDZ) := '1';
1311                             qnan_result := '1';
1312                         else
1313                             if r.a.class = FINITE then
1314                                 zero_divide := '1';
1315                             end if;
1316                             v.result_class := INFINITY;
1317                         end if;
1318                         arith_done := '1';
1319                     else -- r.b.class = FINITE, result_class = r.a.class
1320                         arith_done := '1';
1321                     end if;
1322                 end if;
1323
1324             when DO_FSEL =>
1325                 v.fpscr(FPSCR_FR) := '0';
1326                 v.fpscr(FPSCR_FI) := '0';
1327                 if r.a.class = ZERO or (r.a.negative = '0' and r.a.class /= NAN) then
1328                     v.opsel_a := AIN_C;
1329                 else
1330                     v.opsel_a := AIN_B;
1331                 end if;
1332                 v.quieten_nan := '0';
1333                 v.state := EXC_RESULT;
1334
1335             when DO_FSQRT =>
1336                 -- r.opsel_a = AIN_B
1337                 v.result_class := r.b.class;
1338                 v.result_sign := r.b.negative;
1339                 v.fpscr(FPSCR_FR) := '0';
1340                 v.fpscr(FPSCR_FI) := '0';
1341                 v.use_b := '1';
1342                 case r.b.class is
1343                     when FINITE =>
1344                         v.result_exp := r.b.exponent;
1345                         if r.b.negative = '1' then
1346                             v.fpscr(FPSCR_VXSQRT) := '1';
1347                             qnan_result := '1';
1348                         elsif r.b.mantissa(54) = '0' then
1349                             v.state := RENORM_B;
1350                         elsif r.b.exponent(0) = '0' then
1351                             v.state := SQRT_1;
1352                         else
1353                             v.shift := to_signed(1, EXP_BITS);
1354                             v.state := RENORM_B2;
1355                         end if;
1356                     when NAN =>
1357                         v.state := NAN_RESULT;
1358                     when ZERO =>
1359                         -- result is B
1360                         arith_done := '1';
1361                     when INFINITY =>
1362                         if r.b.negative = '1' then
1363                             v.fpscr(FPSCR_VXSQRT) := '1';
1364                             qnan_result := '1';
1365                         -- else result is B
1366                         end if;
1367                         arith_done := '1';
1368                 end case;
1369
1370             when DO_FRE =>
1371                 -- r.opsel_a = AIN_B
1372                 v.result_class := r.b.class;
1373                 v.result_sign := r.b.negative;
1374                 v.fpscr(FPSCR_FR) := '0';
1375                 v.fpscr(FPSCR_FI) := '0';
1376                 v.use_b := '1';
1377                 case r.b.class is
1378                     when FINITE =>
1379                         v.result_exp := - r.b.exponent;
1380                         if r.b.mantissa(54) = '0' then
1381                             v.state := RENORM_B;
1382                         else
1383                             v.state := FRE_1;
1384                         end if;
1385                     when NAN =>
1386                         v.state := NAN_RESULT;
1387                     when INFINITY =>
1388                         v.result_class := ZERO;
1389                         arith_done := '1';
1390                     when ZERO =>
1391                         v.result_class := INFINITY;
1392                         zero_divide := '1';
1393                         arith_done := '1';
1394                 end case;
1395
1396             when DO_FRSQRTE =>
1397                 -- r.opsel_a = AIN_B
1398                 v.result_class := r.b.class;
1399                 v.result_sign := r.b.negative;
1400                 v.fpscr(FPSCR_FR) := '0';
1401                 v.fpscr(FPSCR_FI) := '0';
1402                 v.use_b := '1';
1403                 v.shift := to_signed(1, EXP_BITS);
1404                 case r.b.class is
1405                     when FINITE =>
1406                         v.result_exp := r.b.exponent;
1407                         if r.b.negative = '1' then
1408                             v.fpscr(FPSCR_VXSQRT) := '1';
1409                             qnan_result := '1';
1410                         elsif r.b.mantissa(54) = '0' then
1411                             v.state := RENORM_B;
1412                         elsif r.b.exponent(0) = '0' then
1413                             v.state := RSQRT_1;
1414                         else
1415                             v.state := RENORM_B2;
1416                         end if;
1417                     when NAN =>
1418                         v.state := NAN_RESULT;
1419                     when INFINITY =>
1420                         if r.b.negative = '1' then
1421                             v.fpscr(FPSCR_VXSQRT) := '1';
1422                             qnan_result := '1';
1423                         else
1424                             v.result_class := ZERO;
1425                         end if;
1426                         arith_done := '1';
1427                     when ZERO =>
1428                         v.result_class := INFINITY;
1429                         zero_divide := '1';
1430                         arith_done := '1';
1431                 end case;
1432
1433             when DO_FMADD =>
1434                 -- fmadd, fmsub, fnmadd, fnmsub
1435                 -- r.opsel_a = AIN_A if A is denorm, else AIN_C if C is denorm,
1436                 -- else AIN_B
1437                 v.result_sign := r.a.negative;
1438                 v.result_class := r.a.class;
1439                 v.result_exp := r.a.exponent;
1440                 v.fpscr(FPSCR_FR) := '0';
1441                 v.fpscr(FPSCR_FI) := '0';
1442                 v.use_a := '1';
1443                 v.use_b := '1';
1444                 v.use_c := '1';
1445                 is_add := r.a.negative xor r.c.negative xor r.b.negative xor r.insn(1);
1446                 if r.a.class = FINITE and r.c.class = FINITE and
1447                     (r.b.class = FINITE or r.b.class = ZERO) then
1448                     v.is_subtract := not is_add;
1449                     mulexp := r.a.exponent + r.c.exponent;
1450                     v.result_exp := mulexp;
1451                     -- Make sure A and C are normalized
1452                     if r.a.mantissa(54) = '0' then
1453                         v.state := RENORM_A;
1454                     elsif r.c.mantissa(54) = '0' then
1455                         v.state := RENORM_C;
1456                     elsif r.b.class = ZERO then
1457                         -- no addend, degenerates to multiply
1458                         v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1459                         f_to_multiply.valid <= '1';
1460                         v.is_multiply := '1';
1461                         v.state := MULT_1;
1462                     elsif r.madd_cmp = '0' then
1463                         -- addend is bigger, do multiply first
1464                         v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1465                         f_to_multiply.valid <= '1';
1466                         v.state := FMADD_1;
1467                     else
1468                         -- product is bigger, shift B right and use it as the
1469                         -- addend to the multiplier
1470                         v.shift := r.b.exponent - mulexp + to_signed(64, EXP_BITS);
1471                         -- for subtract, multiplier does B - A * C
1472                         v.result_sign := not (r.a.negative xor r.c.negative xor r.insn(2) xor is_add);
1473                         v.result_exp := r.b.exponent;
1474                         v.state := FMADD_2;
1475                     end if;
1476                 else
1477                     if r.a.class = NAN or r.b.class = NAN or r.c.class = NAN then
1478                         v.state := NAN_RESULT;
1479                     elsif (r.a.class = ZERO and r.c.class = INFINITY) or
1480                         (r.a.class = INFINITY and r.c.class = ZERO) then
1481                         -- invalid operation, construct QNaN
1482                         v.fpscr(FPSCR_VXIMZ) := '1';
1483                         qnan_result := '1';
1484                     elsif r.a.class = INFINITY or r.c.class = INFINITY then
1485                         if r.b.class = INFINITY and is_add = '0' then
1486                             -- invalid operation, construct QNaN
1487                             v.fpscr(FPSCR_VXISI) := '1';
1488                             qnan_result := '1';
1489                         else
1490                             -- result is infinity
1491                             v.result_class := INFINITY;
1492                             v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1493                             arith_done := '1';
1494                         end if;
1495                     else
1496                         -- Here A is zero, C is zero, or B is infinity
1497                         -- Result is +/-B in all of those cases
1498                         v.opsel_a := AIN_B;
1499                         if r.b.class /= ZERO or is_add = '1' then
1500                             v.negate := not (r.insn(1) xor r.insn(2));
1501                         else
1502                             -- have to be careful about rule for 0 - 0 result sign
1503                             v.negate := r.b.negative xor (r.round_mode(1) and r.round_mode(0)) xor r.insn(2);
1504                         end if;
1505                         v.state := EXC_RESULT;
1506                     end if;
1507                 end if;
1508
1509             when RENORM_A =>
1510                 renormalize := '1';
1511                 v.state := RENORM_A2;
1512                 if r.insn(4) = '1' then
1513                     v.opsel_a := AIN_C;
1514                 else
1515                     v.opsel_a := AIN_B;
1516                 end if;
1517
1518             when RENORM_A2 =>
1519                 -- r.opsel_a = AIN_C for fmul/fmadd, AIN_B for fdiv
1520                 set_a := '1';
1521                 v.result_exp := new_exp;
1522                 if r.insn(4) = '1' then
1523                     if r.c.mantissa(54) = '1' then
1524                         if r.insn(3) = '0' or r.b.class = ZERO then
1525                             v.first := '1';
1526                             v.state := MULT_1;
1527                         else
1528                             v.madd_cmp := '0';
1529                             if new_exp + 1 >= r.b.exponent then
1530                                 v.madd_cmp := '1';
1531                             end if;
1532                             v.opsel_a := AIN_B;
1533                             v.state := DO_FMADD;
1534                         end if;
1535                     else
1536                         v.state := RENORM_C;
1537                     end if;
1538                 else
1539                     if r.b.mantissa(54) = '1' then
1540                         v.first := '1';
1541                         v.state := DIV_2;
1542                     else
1543                         v.state := RENORM_B;
1544                     end if;
1545                 end if;
1546
1547             when RENORM_B =>
1548                 renormalize := '1';
1549                 renorm_sqrt := r.is_sqrt;
1550                 v.state := RENORM_B2;
1551
1552             when RENORM_B2 =>
1553                 set_b := '1';
1554                 if r.is_sqrt = '0' then
1555                     v.result_exp := r.result_exp + r.shift;
1556                 else
1557                     v.result_exp := new_exp;
1558                 end if;
1559                 v.opsel_a := AIN_B;
1560                 v.state := LOOKUP;
1561
1562             when RENORM_C =>
1563                 renormalize := '1';
1564                 v.state := RENORM_C2;
1565
1566             when RENORM_C2 =>
1567                 set_c := '1';
1568                 v.result_exp := new_exp;
1569                 if r.insn(3) = '0' or r.b.class = ZERO then
1570                     v.first := '1';
1571                     v.state := MULT_1;
1572                 else
1573                     v.madd_cmp := '0';
1574                     if new_exp + 1 >= r.b.exponent then
1575                         v.madd_cmp := '1';
1576                     end if;
1577                     v.opsel_a := AIN_B;
1578                     v.state := DO_FMADD;
1579                 end if;
1580
1581             when ADD_1 =>
1582                 -- transferring B to R
1583                 v.shift := r.b.exponent - r.a.exponent;
1584                 v.result_exp := r.b.exponent;
1585                 v.longmask := '0';
1586                 v.state := ADD_SHIFT;
1587
1588             when ADD_SHIFT =>
1589                 -- r.shift = - exponent difference, r.longmask = 0
1590                 opsel_r <= RES_SHIFT;
1591                 v.x := s_nz;
1592                 set_x := '1';
1593                 v.longmask := r.single_prec;
1594                 if r.add_bsmall = '1' then
1595                     v.opsel_a := AIN_A;
1596                 else
1597                     v.opsel_a := AIN_B;
1598                 end if;
1599                 v.state := ADD_2;
1600
1601             when ADD_2 =>
1602                 -- r.opsel_a = AIN_A if r.add_bsmall = 1 else AIN_B
1603                 opsel_b <= BIN_R;
1604                 opsel_binv <= r.is_subtract;
1605                 carry_in <= r.is_subtract and not r.x;
1606                 v.shift := to_signed(-1, EXP_BITS);
1607                 v.state := ADD_3;
1608
1609             when ADD_3 =>
1610                 -- check for overflow or negative result (can't get both)
1611                 -- r.shift = -1
1612                 if r.r(63) = '1' then
1613                     -- result is opposite sign to expected
1614                     v.result_sign := not r.result_sign;
1615                     opsel_ainv <= '1';
1616                     carry_in <= '1';
1617                     v.state := FINISH;
1618                 elsif r.r(55) = '1' then
1619                     -- sum overflowed, shift right
1620                     opsel_r <= RES_SHIFT;
1621                     set_x := '1';
1622                     v.shift := to_signed(-2, EXP_BITS);
1623                     if exp_huge = '1' then
1624                         v.state := ROUND_OFLOW;
1625                     else
1626                         v.state := ROUNDING;
1627                     end if;
1628                 elsif r.r(54) = '1' then
1629                     set_x := '1';
1630                     v.shift := to_signed(-2, EXP_BITS);
1631                     v.state := ROUNDING;
1632                 elsif (r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1633                     -- r.x must be zero at this point
1634                     v.result_class := ZERO;
1635                     if r.is_subtract = '1' then
1636                         -- set result sign depending on rounding mode
1637                         v.result_sign := r.round_mode(1) and r.round_mode(0);
1638                     end if;
1639                     arith_done := '1';
1640                 else
1641                     renormalize := '1';
1642                     v.state := NORMALIZE;
1643                 end if;
1644
1645             when CMP_1 =>
1646                 -- r.opsel_a = AIN_A
1647                 opsel_b <= BIN_R;
1648                 opsel_binv <= '1';
1649                 carry_in <= '1';
1650                 v.state := CMP_2;
1651
1652             when CMP_2 =>
1653                 if r.r(63) = '1' then
1654                     -- A is smaller in magnitude
1655                     v.cr_result := not r.a.negative & r.a.negative & "00";
1656                 elsif (r_hi_nz or r_lo_nz) = '0' then
1657                     v.cr_result := "0010";
1658                 else
1659                     v.cr_result := r.a.negative & not r.a.negative & "00";
1660                 end if;
1661                 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
1662                 v.instr_done := '1';
1663                 v.state := IDLE;
1664
1665             when MULT_1 =>
1666                 f_to_multiply.valid <= r.first;
1667                 opsel_r <= RES_MULT;
1668                 if multiply_to_f.valid = '1' then
1669                     v.state := FINISH;
1670                 end if;
1671
1672             when FMADD_1 =>
1673                 -- Addend is bigger here
1674                 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1675                 -- note v.shift is at most -2 here
1676                 v.shift := r.result_exp - r.b.exponent;
1677                 opsel_r <= RES_MULT;
1678                 opsel_s <= S_MULT;
1679                 set_s := '1';
1680                 f_to_multiply.valid <= r.first;
1681                 if multiply_to_f.valid = '1' then
1682                     v.longmask := '0';
1683                     v.state := ADD_SHIFT;
1684                 end if;
1685
1686             when FMADD_2 =>
1687                 -- Product is potentially bigger here
1688                 -- r.shift = addend exp - product exp + 64, r.r = r.b.mantissa
1689                 set_s := '1';
1690                 opsel_s <= S_SHIFT;
1691                 v.shift := r.shift - to_signed(64, EXP_BITS);
1692                 v.state := FMADD_3;
1693
1694             when FMADD_3 =>
1695                 -- r.shift = addend exp - product exp
1696                 opsel_r <= RES_SHIFT;
1697                 v.first := '1';
1698                 v.state := FMADD_4;
1699
1700             when FMADD_4 =>
1701                 msel_add <= MULADD_RS;
1702                 f_to_multiply.valid <= r.first;
1703                 msel_inv <= r.is_subtract;
1704                 opsel_r <= RES_MULT;
1705                 opsel_s <= S_MULT;
1706                 set_s := '1';
1707                 v.shift := to_signed(56, EXP_BITS);
1708                 if multiply_to_f.valid = '1' then
1709                     if multiply_to_f.result(121) = '1' then
1710                         v.state := FMADD_5;
1711                     else
1712                         v.state := FMADD_6;
1713                     end if;
1714                 end if;
1715
1716             when FMADD_5 =>
1717                 -- negate R:S:X
1718                 v.result_sign := not r.result_sign;
1719                 opsel_ainv <= '1';
1720                 carry_in <= not (s_nz or r.x);
1721                 opsel_s <= S_NEG;
1722                 set_s := '1';
1723                 v.shift := to_signed(56, EXP_BITS);
1724                 v.state := FMADD_6;
1725
1726             when FMADD_6 =>
1727                 -- r.shift = 56 (or 0, but only if r is now nonzero)
1728                 if (r.r(56) or r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1729                     if s_nz = '0' then
1730                         -- must be a subtraction, and r.x must be zero
1731                         v.result_class := ZERO;
1732                         v.result_sign := r.round_mode(1) and r.round_mode(0);
1733                         arith_done := '1';
1734                     else
1735                         -- R is all zeroes but there are non-zero bits in S
1736                         -- so shift them into R and set S to 0
1737                         opsel_r <= RES_SHIFT;
1738                         set_s := '1';
1739                         -- stay in state FMADD_6
1740                     end if;
1741                 elsif r.r(56 downto 54) = "001" then
1742                     v.state := FINISH;
1743                 else
1744                     renormalize := '1';
1745                     v.state := NORMALIZE;
1746                 end if;
1747
1748             when LOOKUP =>
1749                 -- r.opsel_a = AIN_B
1750                 -- wait one cycle for inverse_table[B] lookup
1751                 v.first := '1';
1752                 if r.insn(4) = '0' then
1753                     if r.insn(3) = '0' then
1754                         v.state := DIV_2;
1755                     else
1756                         v.state := SQRT_1;
1757                     end if;
1758                 elsif r.insn(2) = '0' then
1759                     v.state := FRE_1;
1760                 else
1761                     v.state := RSQRT_1;
1762                 end if;
1763
1764             when DIV_2 =>
1765                 -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y
1766                 msel_1 <= MUL1_B;
1767                 msel_add <= MULADD_CONST;
1768                 msel_inv <= '1';
1769                 if r.count = 0 then
1770                     msel_2 <= MUL2_LUT;
1771                 else
1772                     msel_2 <= MUL2_P;
1773                 end if;
1774                 set_y := r.first;
1775                 pshift := '1';
1776                 f_to_multiply.valid <= r.first;
1777                 if multiply_to_f.valid = '1' then
1778                     v.first := '1';
1779                     v.count := r.count + 1;
1780                     v.state := DIV_3;
1781                 end if;
1782
1783             when DIV_3 =>
1784                 -- compute Y = P = P * Y
1785                 msel_1 <= MUL1_Y;
1786                 msel_2 <= MUL2_P;
1787                 f_to_multiply.valid <= r.first;
1788                 pshift := '1';
1789                 if multiply_to_f.valid = '1' then
1790                     v.first := '1';
1791                     if r.count = 3 then
1792                         v.state := DIV_4;
1793                     else
1794                         v.state := DIV_2;
1795                     end if;
1796                 end if;
1797
1798             when DIV_4 =>
1799                 -- compute R = P = A * Y (quotient)
1800                 msel_1 <= MUL1_A;
1801                 msel_2 <= MUL2_P;
1802                 set_y := r.first;
1803                 f_to_multiply.valid <= r.first;
1804                 pshift := '1';
1805                 if multiply_to_f.valid = '1' then
1806                     opsel_r <= RES_MULT;
1807                     v.first := '1';
1808                     v.state := DIV_5;
1809                 end if;
1810
1811             when DIV_5 =>
1812                 -- compute P = A - B * R (remainder)
1813                 msel_1 <= MUL1_B;
1814                 msel_2 <= MUL2_R;
1815                 msel_add <= MULADD_A;
1816                 msel_inv <= '1';
1817                 f_to_multiply.valid <= r.first;
1818                 if multiply_to_f.valid = '1' then
1819                     v.state := DIV_6;
1820                 end if;
1821
1822             when DIV_6 =>
1823                 -- test if remainder is 0 or >= B
1824                 if pcmpb_lt = '1' then
1825                     -- quotient is correct, set X if remainder non-zero
1826                     v.x := r.p(58) or px_nz;
1827                 else
1828                     -- quotient needs to be incremented by 1
1829                     carry_in <= '1';
1830                     v.x := not pcmpb_eq;
1831                 end if;
1832                 v.state := FINISH;
1833
1834             when FRE_1 =>
1835                 opsel_r <= RES_MISC;
1836                 misc_sel <= "0111";
1837                 v.shift := to_signed(1, EXP_BITS);
1838                 v.state := NORMALIZE;
1839
1840             when FTDIV_1 =>
1841                 v.cr_result(1) := exp_tiny or exp_huge;
1842                 if exp_tiny = '1' or exp_huge = '1' or r.a.class = ZERO or r.first = '0' then
1843                     v.instr_done := '1';
1844                     v.state := IDLE;
1845                 else
1846                     v.shift := r.a.exponent;
1847                     v.doing_ftdiv := "10";
1848                 end if;
1849
1850             when RSQRT_1 =>
1851                 opsel_r <= RES_MISC;
1852                 misc_sel <= "0111";
1853                 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1854                 v.result_exp := - sqrt_exp;
1855                 v.shift := to_signed(1, EXP_BITS);
1856                 v.state := NORMALIZE;
1857
1858             when SQRT_1 =>
1859                 -- put invsqr[B] in R and compute P = invsqr[B] * B
1860                 -- also transfer B (in R) to A
1861                 set_a := '1';
1862                 opsel_r <= RES_MISC;
1863                 misc_sel <= "0111";
1864                 msel_1 <= MUL1_B;
1865                 msel_2 <= MUL2_LUT;
1866                 f_to_multiply.valid <= '1';
1867                 v.shift := to_signed(-1, EXP_BITS);
1868                 v.count := "00";
1869                 v.state := SQRT_2;
1870
1871             when SQRT_2 =>
1872                 -- shift R right one place
1873                 -- not expecting multiplier result yet
1874                 -- r.shift = -1
1875                 opsel_r <= RES_SHIFT;
1876                 v.first := '1';
1877                 v.state := SQRT_3;
1878
1879             when SQRT_3 =>
1880                 -- put R into Y, wait for product from multiplier
1881                 msel_2 <= MUL2_R;
1882                 set_y := r.first;
1883                 pshift := '1';
1884                 if multiply_to_f.valid = '1' then
1885                     -- put result into R
1886                     opsel_r <= RES_MULT;
1887                     v.first := '1';
1888                     v.state := SQRT_4;
1889                 end if;
1890
1891             when SQRT_4 =>
1892                 -- compute 1.5 - Y * P
1893                 msel_1 <= MUL1_Y;
1894                 msel_2 <= MUL2_P;
1895                 msel_add <= MULADD_CONST;
1896                 msel_inv <= '1';
1897                 f_to_multiply.valid <= r.first;
1898                 pshift := '1';
1899                 if multiply_to_f.valid = '1' then
1900                     v.state := SQRT_5;
1901                 end if;
1902
1903             when SQRT_5 =>
1904                 -- compute Y = Y * P
1905                 msel_1 <= MUL1_Y;
1906                 msel_2 <= MUL2_P;
1907                 f_to_multiply.valid <= '1';
1908                 v.first := '1';
1909                 v.state := SQRT_6;
1910
1911             when SQRT_6 =>
1912                 -- pipeline in R = R * P
1913                 msel_1 <= MUL1_R;
1914                 msel_2 <= MUL2_P;
1915                 f_to_multiply.valid <= r.first;
1916                 pshift := '1';
1917                 if multiply_to_f.valid = '1' then
1918                     v.first := '1';
1919                     v.state := SQRT_7;
1920                 end if;
1921
1922             when SQRT_7 =>
1923                 -- first multiply is done, put result in Y
1924                 msel_2 <= MUL2_P;
1925                 set_y := r.first;
1926                 -- wait for second multiply (should be here already)
1927                 pshift := '1';
1928                 if multiply_to_f.valid = '1' then
1929                     -- put result into R
1930                     opsel_r <= RES_MULT;
1931                     v.first := '1';
1932                     v.count := r.count + 1;
1933                     if r.count < 2 then
1934                         v.state := SQRT_4;
1935                     else
1936                         v.first := '1';
1937                         v.state := SQRT_8;
1938                     end if;
1939                 end if;
1940
1941             when SQRT_8 =>
1942                 -- compute P = A - R * R, which can be +ve or -ve
1943                 -- we arranged for B to be put into A earlier
1944                 msel_1 <= MUL1_R;
1945                 msel_2 <= MUL2_R;
1946                 msel_add <= MULADD_A;
1947                 msel_inv <= '1';
1948                 pshift := '1';
1949                 f_to_multiply.valid <= r.first;
1950                 if multiply_to_f.valid = '1' then
1951                     v.first := '1';
1952                     v.state := SQRT_9;
1953                 end if;
1954
1955             when SQRT_9 =>
1956                 -- compute P = P * Y
1957                 -- since Y is an estimate of 1/sqrt(B), this makes P an
1958                 -- estimate of the adjustment needed to R.  Since the error
1959                 -- could be negative and we have an unsigned multiplier, the
1960                 -- upper bits can be wrong, but it turns out the lowest 8 bits
1961                 -- are correct and are all we need (given 3 iterations through
1962                 -- SQRT_4 to SQRT_7).
1963                 msel_1 <= MUL1_Y;
1964                 msel_2 <= MUL2_P;
1965                 pshift := '1';
1966                 f_to_multiply.valid <= r.first;
1967                 if multiply_to_f.valid = '1' then
1968                     v.state := SQRT_10;
1969                 end if;
1970
1971             when SQRT_10 =>
1972                 -- Add the bottom 8 bits of P, sign-extended,
1973                 -- divided by 4, onto R.
1974                 -- The division by 4 is because R is 10.54 format
1975                 -- whereas P is 8.56 format.
1976                 opsel_b <= BIN_PS6;
1977                 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1978                 v.result_exp := sqrt_exp;
1979                 v.shift := to_signed(1, EXP_BITS);
1980                 v.first := '1';
1981                 v.state := SQRT_11;
1982
1983             when SQRT_11 =>
1984                 -- compute P = A - R * R (remainder)
1985                 -- also put 2 * R + 1 into B for comparison with P
1986                 msel_1 <= MUL1_R;
1987                 msel_2 <= MUL2_R;
1988                 msel_add <= MULADD_A;
1989                 msel_inv <= '1';
1990                 f_to_multiply.valid <= r.first;
1991                 shiftin := '1';
1992                 set_b := r.first;
1993                 if multiply_to_f.valid = '1' then
1994                     v.state := SQRT_12;
1995                 end if;
1996
1997             when SQRT_12 =>
1998                 -- test if remainder is 0 or >= B = 2*R + 1
1999                 if pcmpb_lt = '1' then
2000                     -- square root is correct, set X if remainder non-zero
2001                     v.x := r.p(58) or px_nz;
2002                 else
2003                     -- square root needs to be incremented by 1
2004                     carry_in <= '1';
2005                     v.x := not pcmpb_eq;
2006                 end if;
2007                 v.state := FINISH;
2008
2009             when INT_SHIFT =>
2010                 -- r.shift = b.exponent - 52
2011                 opsel_r <= RES_SHIFT;
2012                 set_x := '1';
2013                 v.state := INT_ROUND;
2014                 v.shift := to_signed(-2, EXP_BITS);
2015
2016             when INT_ROUND =>
2017                 -- r.shift = -2
2018                 opsel_r <= RES_SHIFT;
2019                 round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
2020                 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2021                 -- Check for negative values that don't round to 0 for fcti*u*
2022                 if r.insn(8) = '1' and r.result_sign = '1' and
2023                     (r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then
2024                     v.state := INT_OFLOW;
2025                 else
2026                     v.state := INT_FINAL;
2027                 end if;
2028
2029             when INT_ISHIFT =>
2030                 -- r.shift = b.exponent - 54;
2031                 opsel_r <= RES_SHIFT;
2032                 v.state := INT_FINAL;
2033
2034             when INT_FINAL =>
2035                 -- Negate if necessary, and increment for rounding if needed
2036                 opsel_ainv <= r.result_sign;
2037                 carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign;
2038                 -- Check for possible overflows
2039                 case r.insn(9 downto 8) is
2040                     when "00" =>        -- fctiw[z]
2041                         need_check := r.r(31) or (r.r(30) and not r.result_sign);
2042                     when "01" =>        -- fctiwu[z]
2043                         need_check := r.r(31);
2044                     when "10" =>        -- fctid[z]
2045                         need_check := r.r(63) or (r.r(62) and not r.result_sign);
2046                     when others =>      -- fctidu[z]
2047                         need_check := r.r(63);
2048                 end case;
2049                 if need_check = '1' then
2050                     v.state := INT_CHECK;
2051                 else
2052                     if r.fpscr(FPSCR_FI) = '1' then
2053                         v.fpscr(FPSCR_XX) := '1';
2054                     end if;
2055                     arith_done := '1';
2056                 end if;
2057
2058             when INT_CHECK =>
2059                 if r.insn(9) = '0' then
2060                     msb := r.r(31);
2061                 else
2062                     msb := r.r(63);
2063                 end if;
2064                 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2065                 if (r.insn(8) = '0' and msb /= r.result_sign) or
2066                     (r.insn(8) = '1' and msb /= '1') then
2067                     opsel_r <= RES_MISC;
2068                     v.fpscr(FPSCR_VXCVI) := '1';
2069                     invalid := '1';
2070                 else
2071                     if r.fpscr(FPSCR_FI) = '1' then
2072                         v.fpscr(FPSCR_XX) := '1';
2073                     end if;
2074                 end if;
2075                 arith_done := '1';
2076
2077             when INT_OFLOW =>
2078                 opsel_r <= RES_MISC;
2079                 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2080                 if r.b.class = NAN then
2081                     misc_sel(0) <= '1';
2082                 end if;
2083                 v.fpscr(FPSCR_VXCVI) := '1';
2084                 invalid := '1';
2085                 arith_done := '1';
2086
2087             when FRI_1 =>
2088                 -- r.shift = b.exponent - 52
2089                 opsel_r <= RES_SHIFT;
2090                 set_x := '1';
2091                 v.shift := to_signed(-2, EXP_BITS);
2092                 v.state := ROUNDING;
2093
2094             when FINISH =>
2095                 if r.is_multiply = '1' and px_nz = '1' then
2096                     v.x := '1';
2097                 end if;
2098                 if r.r(63 downto 54) /= "0000000001" then
2099                     renormalize := '1';
2100                     v.state := NORMALIZE;
2101                 else
2102                     set_x := '1';
2103                     if exp_tiny = '1' then
2104                         v.shift := new_exp - min_exp;
2105                         v.state := ROUND_UFLOW;
2106                     elsif exp_huge = '1' then
2107                         v.state := ROUND_OFLOW;
2108                     else
2109                         v.shift := to_signed(-2, EXP_BITS);
2110                         v.state := ROUNDING;
2111                     end if;
2112                 end if;
2113
2114             when NORMALIZE =>
2115                 -- Shift so we have 9 leading zeroes (we know R is non-zero)
2116                 -- r.shift = clz(r.r) - 9
2117                 opsel_r <= RES_SHIFT;
2118                 set_x := '1';
2119                 if exp_tiny = '1' then
2120                     v.shift := new_exp - min_exp;
2121                     v.state := ROUND_UFLOW;
2122                 elsif exp_huge = '1' then
2123                     v.state := ROUND_OFLOW;
2124                 else
2125                     v.shift := to_signed(-2, EXP_BITS);
2126                     v.state := ROUNDING;
2127                 end if;
2128
2129             when ROUND_UFLOW =>
2130                 -- r.shift = - amount by which exponent underflows
2131                 v.tiny := '1';
2132                 if r.fpscr(FPSCR_UE) = '0' then
2133                     -- disabled underflow exception case
2134                     -- have to denormalize before rounding
2135                     opsel_r <= RES_SHIFT;
2136                     set_x := '1';
2137                     v.shift := to_signed(-2, EXP_BITS);
2138                     v.state := ROUNDING;
2139                 else
2140                     -- enabled underflow exception case
2141                     -- if denormalized, have to normalize before rounding
2142                     v.fpscr(FPSCR_UX) := '1';
2143                     v.result_exp := r.result_exp + bias_exp;
2144                     if r.r(54) = '0' then
2145                         renormalize := '1';
2146                         v.state := NORMALIZE;
2147                     else
2148                         v.shift := to_signed(-2, EXP_BITS);
2149                         v.state := ROUNDING;
2150                     end if;
2151                 end if;
2152
2153             when ROUND_OFLOW =>
2154                 v.fpscr(FPSCR_OX) := '1';
2155                 if r.fpscr(FPSCR_OE) = '0' then
2156                     -- disabled overflow exception
2157                     -- result depends on rounding mode
2158                     v.fpscr(FPSCR_XX) := '1';
2159                     v.fpscr(FPSCR_FI) := '1';
2160                     if r.round_mode(1 downto 0) = "00" or
2161                         (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then
2162                         v.result_class := INFINITY;
2163                         v.fpscr(FPSCR_FR) := '1';
2164                     else
2165                         v.fpscr(FPSCR_FR) := '0';
2166                     end if;
2167                     -- construct largest representable number
2168                     v.result_exp := max_exp;
2169                     opsel_r <= RES_MISC;
2170                     misc_sel <= "001" & r.single_prec;
2171                     arith_done := '1';
2172                 else
2173                     -- enabled overflow exception
2174                     v.result_exp := r.result_exp - bias_exp;
2175                     v.shift := to_signed(-2, EXP_BITS);
2176                     v.state := ROUNDING;
2177                 end if;
2178
2179             when ROUNDING =>
2180                 opsel_mask <= '1';
2181                 round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
2182                 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2183                 if round(1) = '1' then
2184                     -- set mask to increment the LSB for the precision
2185                     opsel_b <= BIN_MASK;
2186                     carry_in <= '1';
2187                     v.shift := to_signed(-1, EXP_BITS);
2188                     v.state := ROUNDING_2;
2189                 else
2190                     if r.r(54) = '0' then
2191                         -- result after masking could be zero, or could be a
2192                         -- denormalized result that needs to be renormalized
2193                         renormalize := '1';
2194                         v.state := ROUNDING_3;
2195                     else
2196                         arith_done := '1';
2197                     end if;
2198                 end if;
2199                 if round(0) = '1' then
2200                     v.fpscr(FPSCR_XX) := '1';
2201                     if r.tiny = '1' then
2202                         v.fpscr(FPSCR_UX) := '1';
2203                     end if;
2204                 end if;
2205
2206             when ROUNDING_2 =>
2207                 -- Check for overflow during rounding
2208                 -- r.shift = -1
2209                 v.x := '0';
2210                 if r.r(55) = '1' then
2211                     opsel_r <= RES_SHIFT;
2212                     if exp_huge = '1' then
2213                         v.state := ROUND_OFLOW;
2214                     else
2215                         arith_done := '1';
2216                     end if;
2217                 elsif r.r(54) = '0' then
2218                     -- Do CLZ so we can renormalize the result
2219                     renormalize := '1';
2220                     v.state := ROUNDING_3;
2221                 else
2222                     arith_done := '1';
2223                 end if;
2224
2225             when ROUNDING_3 =>
2226                 -- r.shift = clz(r.r) - 9
2227                 mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
2228                 if mant_nz = '0' then
2229                     v.result_class := ZERO;
2230                     if r.is_subtract = '1' then
2231                         -- set result sign depending on rounding mode
2232                         v.result_sign := r.round_mode(1) and r.round_mode(0);
2233                     end if;
2234                     arith_done := '1';
2235                 else
2236                     -- Renormalize result after rounding
2237                     opsel_r <= RES_SHIFT;
2238                     v.denorm := exp_tiny;
2239                     v.shift := new_exp - to_signed(-1022, EXP_BITS);
2240                     if new_exp < to_signed(-1022, EXP_BITS) then
2241                         v.state := DENORM;
2242                     else
2243                         arith_done := '1';
2244                     end if;
2245                 end if;
2246
2247             when DENORM =>
2248                 -- r.shift = result_exp - -1022
2249                 opsel_r <= RES_SHIFT;
2250                 arith_done := '1';
2251
2252             when NAN_RESULT =>
2253                 if (r.use_a = '1' and r.a.class = NAN and r.a.mantissa(53) = '0') or
2254                     (r.use_b = '1' and r.b.class = NAN and r.b.mantissa(53) = '0') or
2255                     (r.use_c = '1' and r.c.class = NAN and r.c.mantissa(53) = '0') then
2256                     -- Signalling NAN
2257                     v.fpscr(FPSCR_VXSNAN) := '1';
2258                     invalid := '1';
2259                 end if;
2260                 if r.use_a = '1' and r.a.class = NAN then
2261                     v.opsel_a := AIN_A;
2262                 elsif r.use_b = '1' and r.b.class = NAN then
2263                     v.opsel_a := AIN_B;
2264                 elsif r.use_c = '1' and r.c.class = NAN then
2265                     v.opsel_a := AIN_C;
2266                 end if;
2267                 v.state := EXC_RESULT;
2268
2269             when EXC_RESULT =>
2270                 -- r.opsel_a = AIN_A, AIN_B or AIN_C according to which input is the result
2271                 case r.opsel_a is
2272                     when AIN_B =>
2273                         v.result_sign := r.b.negative xor r.negate;
2274                         v.result_exp := r.b.exponent;
2275                         v.result_class := r.b.class;
2276                     when AIN_C =>
2277                         v.result_sign := r.c.negative xor r.negate;
2278                         v.result_exp := r.c.exponent;
2279                         v.result_class := r.c.class;
2280                     when others =>
2281                         v.result_sign := r.a.negative xor r.negate;
2282                         v.result_exp := r.a.exponent;
2283                         v.result_class := r.a.class;
2284                 end case;
2285                 arith_done := '1';
2286
2287         end case;
2288
2289         if zero_divide = '1' then
2290             v.fpscr(FPSCR_ZX) := '1';
2291         end if;
2292         if qnan_result = '1' then
2293             invalid := '1';
2294             v.result_class := NAN;
2295             v.result_sign := '0';
2296             misc_sel <= "0001";
2297             opsel_r <= RES_MISC;
2298             arith_done := '1';
2299         end if;
2300         if invalid = '1' then
2301             v.invalid := '1';
2302         end if;
2303         if arith_done = '1' then
2304             -- Enabled invalid exception doesn't write result or FPRF
2305             -- Neither does enabled zero-divide exception
2306             if (v.invalid and r.fpscr(FPSCR_VE)) = '0' and
2307                 (zero_divide and r.fpscr(FPSCR_ZE)) = '0' then
2308                 v.writing_back := '1';
2309                 v.update_fprf := '1';
2310             end if;
2311             v.instr_done := '1';
2312             v.state := IDLE;
2313             update_fx := '1';
2314         end if;
2315
2316         -- Multiplier and divide/square root data path
2317         case msel_1 is
2318             when MUL1_A =>
2319                 f_to_multiply.data1 <= r.a.mantissa(61 downto 0) & "00";
2320             when MUL1_B =>
2321                 f_to_multiply.data1 <= r.b.mantissa(61 downto 0) & "00";
2322             when MUL1_Y =>
2323                 f_to_multiply.data1 <= r.y;
2324             when others =>
2325                 f_to_multiply.data1 <= r.r(61 downto 0) & "00";
2326         end case;
2327         case msel_2 is
2328             when MUL2_C =>
2329                 f_to_multiply.data2 <= r.c.mantissa(61 downto 0) & "00";
2330             when MUL2_LUT =>
2331                 f_to_multiply.data2 <= x"00" & inverse_est & '0' & x"000000000";
2332             when MUL2_P =>
2333                 f_to_multiply.data2 <= r.p;
2334             when others =>
2335                 f_to_multiply.data2 <= r.r(61 downto 0) & "00";
2336         end case;
2337         maddend := (others => '0');
2338         case msel_add is
2339             when MULADD_CONST =>
2340                 -- addend is 2.0 or 1.5 in 16.112 format
2341                 if r.is_sqrt = '0' then
2342                     maddend(113) := '1';                -- 2.0
2343                 else
2344                     maddend(112 downto 111) := "11";    -- 1.5
2345                 end if;
2346             when MULADD_A =>
2347                 -- addend is A in 16.112 format
2348                 maddend(121 downto 58) := r.a.mantissa;
2349             when MULADD_RS =>
2350                 -- addend is concatenation of R and S in 16.112 format
2351                 maddend := "000000" & r.r & r.s & "00";
2352             when others =>
2353         end case;
2354         if msel_inv = '1' then
2355             f_to_multiply.addend <= not maddend;
2356         else
2357             f_to_multiply.addend <= maddend;
2358         end if;
2359         f_to_multiply.not_result <= msel_inv;
2360         if set_y = '1' then
2361             v.y := f_to_multiply.data2;
2362         end if;
2363         if multiply_to_f.valid = '1' then
2364             if pshift = '0' then
2365                 v.p := multiply_to_f.result(63 downto 0);
2366             else
2367                 v.p := multiply_to_f.result(119 downto 56);
2368             end if;
2369         end if;
2370
2371         -- Data path.
2372         -- This has A and B input multiplexers, an adder, a shifter,
2373         -- count-leading-zeroes logic, and a result mux.
2374         if r.longmask = '1' then
2375             mshift := r.shift + to_signed(-29, EXP_BITS);
2376         else
2377             mshift := r.shift;
2378         end if;
2379         if mshift < to_signed(-64, EXP_BITS) then
2380             mask := (others => '1');
2381         elsif mshift >= to_signed(0, EXP_BITS) then
2382             mask := (others => '0');
2383         else
2384             mask := right_mask(unsigned(mshift(5 downto 0)));
2385         end if;
2386         case r.opsel_a is
2387             when AIN_R =>
2388                 in_a0 := r.r;
2389             when AIN_A =>
2390                 in_a0 := r.a.mantissa;
2391             when AIN_B =>
2392                 in_a0 := r.b.mantissa;
2393             when others =>
2394                 in_a0 := r.c.mantissa;
2395         end case;
2396         if (or (mask and in_a0)) = '1' and set_x = '1' then
2397             v.x := '1';
2398         end if;
2399         if opsel_ainv = '1' then
2400             in_a0 := not in_a0;
2401         end if;
2402         in_a <= in_a0;
2403         case opsel_b is
2404             when BIN_ZERO =>
2405                 in_b0 := (others => '0');
2406             when BIN_R =>
2407                 in_b0 := r.r;
2408             when BIN_MASK =>
2409                 in_b0 := mask;
2410             when others =>
2411                 -- BIN_PS6, 6 LSBs of P/4 sign-extended to 64
2412                 in_b0 := std_ulogic_vector(resize(signed(r.p(7 downto 2)), 64));
2413         end case;
2414         if opsel_binv = '1' then
2415             in_b0 := not in_b0;
2416         end if;
2417         in_b <= in_b0;
2418         if r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
2419             shift_res := shifter_64(r.r & (shiftin or r.s(55)) & r.s(54 downto 0),
2420                                     std_ulogic_vector(r.shift(6 downto 0)));
2421         else
2422             shift_res := (others => '0');
2423         end if;
2424         sum := std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
2425         if opsel_mask = '1' then
2426             sum := sum and not mask;
2427         end if;
2428         case opsel_r is
2429             when RES_SUM =>
2430                 result <= sum;
2431             when RES_SHIFT =>
2432                 result <= shift_res;
2433             when RES_MULT =>
2434                 result <= multiply_to_f.result(121 downto 58);
2435             when others =>
2436                 case misc_sel is
2437                     when "0000" =>
2438                         misc := x"00000000" & (r.fpscr and fpscr_mask);
2439                     when "0001" =>
2440                         -- generated QNaN mantissa
2441                         misc := x"0020000000000000";
2442                     when "0010" =>
2443                         -- mantissa of max representable DP number
2444                         misc := x"007ffffffffffffc";
2445                     when "0011" =>
2446                         -- mantissa of max representable SP number
2447                         misc := x"007fffff80000000";
2448                     when "0100" =>
2449                         -- fmrgow result
2450                         misc := r.a.mantissa(31 downto 0) & r.b.mantissa(31 downto 0);
2451                     when "0110" =>
2452                         -- fmrgew result
2453                         misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32);
2454                     when "0111" =>
2455                         misc := 10x"000" & inverse_est & 35x"000000000";
2456                     when "1000" =>
2457                         -- max positive result for fctiw[z]
2458                         misc := x"000000007fffffff";
2459                     when "1001" =>
2460                         -- max negative result for fctiw[z]
2461                         misc := x"ffffffff80000000";
2462                     when "1010" =>
2463                         -- max positive result for fctiwu[z]
2464                         misc := x"00000000ffffffff";
2465                     when "1011" =>
2466                         -- max negative result for fctiwu[z]
2467                         misc := x"0000000000000000";
2468                     when "1100" =>
2469                         -- max positive result for fctid[z]
2470                         misc := x"7fffffffffffffff";
2471                     when "1101" =>
2472                         -- max negative result for fctid[z]
2473                         misc := x"8000000000000000";
2474                     when "1110" =>
2475                         -- max positive result for fctidu[z]
2476                         misc := x"ffffffffffffffff";
2477                     when "1111" =>
2478                         -- max negative result for fctidu[z]
2479                         misc := x"0000000000000000";
2480                     when others =>
2481                         misc := x"0000000000000000";
2482                 end case;
2483                 result <= misc;
2484         end case;
2485         v.r := result;
2486         if set_s = '1' then
2487             case opsel_s is
2488                 when S_NEG =>
2489                     v.s := std_ulogic_vector(unsigned(not r.s) + (not r.x));
2490                 when S_MULT =>
2491                     v.s := multiply_to_f.result(57 downto 2);
2492                 when S_SHIFT =>
2493                     v.s := shift_res(63 downto 8);
2494                     if shift_res(7 downto 0) /= x"00" then
2495                         v.x := '1';
2496                     end if;
2497                 when others =>
2498                     v.s := (others => '0');
2499             end case;
2500         end if;
2501
2502         if set_a = '1' then
2503             v.a.exponent := new_exp;
2504             v.a.mantissa := shift_res;
2505         end if;
2506         if set_b = '1' then
2507             v.b.exponent := new_exp;
2508             v.b.mantissa := shift_res;
2509         end if;
2510         if set_c = '1' then
2511             v.c.exponent := new_exp;
2512             v.c.mantissa := shift_res;
2513         end if;
2514
2515         if opsel_r = RES_SHIFT then
2516             v.result_exp := new_exp;
2517         end if;
2518
2519         if renormalize = '1' then
2520             clz := count_left_zeroes(r.r);
2521             if renorm_sqrt = '1' then
2522                 -- make denormalized value end up with even exponent
2523                 clz(0) := '1';
2524             end if;
2525             v.shift := resize(signed('0' & clz) - 9, EXP_BITS);
2526         end if;
2527
2528         if r.int_result = '1' then
2529             fp_result <= r.r;
2530         else
2531             fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r,
2532                                  r.single_prec, r.quieten_nan);
2533         end if;
2534         if r.update_fprf = '1' then
2535             v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class,
2536                                                              r.r(54) and not r.denorm);
2537         end if;
2538
2539         v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
2540                              (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
2541         v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
2542                                   v.fpscr(FPSCR_VE downto FPSCR_XE));
2543         if update_fx = '1' and
2544             (v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then
2545             v.fpscr(FPSCR_FX) := '1';
2546         end if;
2547         if r.rc = '1' then
2548             v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
2549         end if;
2550
2551         if illegal = '1' then
2552             v.instr_done := '0';
2553             v.do_intr := '0';
2554             v.writing_back := '0';
2555             v.busy := '0';
2556             v.state := IDLE;
2557         else
2558             v.do_intr := v.instr_done and v.fpscr(FPSCR_FEX) and r.fe_mode;
2559             if v.state /= IDLE or v.do_intr = '1' then
2560                 v.busy := '1';
2561             end if;
2562         end if;
2563
2564         rin <= v;
2565         e_out.illegal <= illegal;
2566     end process;
2567
2568 end architecture behaviour;