fpu.vhdl

   1 -- Floating-point unit for Microwatt
   2
   3 library ieee;
   4 use ieee.std_logic_1164.all;
   5 use ieee.numeric_std.all;
   6
   7 library work;
   8 use work.insn_helpers.all;
   9 use work.decode_types.all;
  10 use work.crhelpers.all;
  11 use work.helpers.all;
  12 use work.common.all;
  13
  14 entity fpu is
  15     port (
  16         clk : in std_ulogic;
  17         rst : in std_ulogic;
  18
  19         e_in  : in  Execute1toFPUType;
  20         e_out : out FPUToExecute1Type;
  21
  22         w_out : out FPUToWritebackType
  23         );
  24 end entity fpu;
  25
  26 architecture behaviour of fpu is
  27     type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
  28
  29     constant EXP_BITS : natural := 13;
  30
  31     type fpu_reg_type is record
  32         class    : fp_number_class;
  33         negative : std_ulogic;
  34         exponent : signed(EXP_BITS-1 downto 0);         -- unbiased
  35         mantissa : std_ulogic_vector(63 downto 0);      -- 10.54 format
  36     end record;
  37
  38     type state_t is (IDLE,
  39                      DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
  40                      DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT,
  41                      DO_FCFID, DO_FCTI,
  42                      DO_FRSP, DO_FRI,
  43                      DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD,
  44                      DO_FRE, DO_FRSQRTE,
  45                      DO_FSEL,
  46                      FRI_1,
  47                      ADD_1, ADD_SHIFT, ADD_2, ADD_3,
  48                      CMP_1, CMP_2,
  49                      MULT_1,
  50                      FMADD_1, FMADD_2, FMADD_3,
  51                      FMADD_4, FMADD_5, FMADD_6,
  52                      LOOKUP,
  53                      DIV_2, DIV_3, DIV_4, DIV_5, DIV_6,
  54                      FRE_1,
  55                      RSQRT_1,
  56                      FTDIV_1,
  57                      SQRT_1, SQRT_2, SQRT_3, SQRT_4,
  58                      SQRT_5, SQRT_6, SQRT_7, SQRT_8,
  59                      SQRT_9, SQRT_10, SQRT_11, SQRT_12,
  60                      INT_SHIFT, INT_ROUND, INT_ISHIFT,
  61                      INT_FINAL, INT_CHECK, INT_OFLOW,
  62                      FINISH, NORMALIZE,
  63                      ROUND_UFLOW, ROUND_OFLOW,
  64                      ROUNDING, ROUNDING_2, ROUNDING_3,
  65                      DENORM,
  66                      RENORM_A, RENORM_A2,
  67                      RENORM_B, RENORM_B2,
  68                      RENORM_C, RENORM_C2,
  69                      NAN_RESULT, EXC_RESULT);
  70
  71     type reg_type is record
  72         state        : state_t;
  73         busy         : std_ulogic;
  74         instr_done   : std_ulogic;
  75         do_intr      : std_ulogic;
  76         op           : insn_type_t;
  77         insn         : std_ulogic_vector(31 downto 0);
  78         instr_tag    : instr_tag_t;
  79         dest_fpr     : gspr_index_t;
  80         fe_mode      : std_ulogic;
  81         rc           : std_ulogic;
  82         is_cmp       : std_ulogic;
  83         single_prec  : std_ulogic;
  84         fpscr        : std_ulogic_vector(31 downto 0);
  85         a            : fpu_reg_type;
  86         b            : fpu_reg_type;
  87         c            : fpu_reg_type;
  88         r            : std_ulogic_vector(63 downto 0);  -- 10.54 format
  89         s            : std_ulogic_vector(55 downto 0);  -- extended fraction
  90         x            : std_ulogic;
  91         p            : std_ulogic_vector(63 downto 0);  -- 8.56 format
  92         y            : std_ulogic_vector(63 downto 0);  -- 8.56 format
  93         result_sign  : std_ulogic;
  94         result_class : fp_number_class;
  95         result_exp   : signed(EXP_BITS-1 downto 0);
  96         shift        : signed(EXP_BITS-1 downto 0);
  97         writing_back : std_ulogic;
  98         int_result   : std_ulogic;
  99         cr_result    : std_ulogic_vector(3 downto 0);
 100         cr_mask      : std_ulogic_vector(7 downto 0);
 101         old_exc      : std_ulogic_vector(4 downto 0);
 102         update_fprf  : std_ulogic;
 103         quieten_nan  : std_ulogic;
 104         tiny         : std_ulogic;
 105         denorm       : std_ulogic;
 106         round_mode   : std_ulogic_vector(2 downto 0);
 107         is_subtract  : std_ulogic;
 108         exp_cmp      : std_ulogic;
 109         madd_cmp     : std_ulogic;
 110         add_bsmall   : std_ulogic;
 111         is_multiply  : std_ulogic;
 112         is_sqrt      : std_ulogic;
 113         first        : std_ulogic;
 114         count        : unsigned(1 downto 0);
 115         doing_ftdiv  : std_ulogic_vector(1 downto 0);
 116         opsel_a      : std_ulogic_vector(1 downto 0);
 117         use_a        : std_ulogic;
 118         use_b        : std_ulogic;
 119         use_c        : std_ulogic;
 120         invalid      : std_ulogic;
 121         negate       : std_ulogic;
 122         longmask     : std_ulogic;
 123     end record;
 124
 125     type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
 126
 127     signal r, rin : reg_type;
 128
 129     signal fp_result     : std_ulogic_vector(63 downto 0);
 130     signal opsel_b       : std_ulogic_vector(1 downto 0);
 131     signal opsel_r       : std_ulogic_vector(1 downto 0);
 132     signal opsel_s       : std_ulogic_vector(1 downto 0);
 133     signal opsel_ainv    : std_ulogic;
 134     signal opsel_mask    : std_ulogic;
 135     signal opsel_binv    : std_ulogic;
 136     signal in_a          : std_ulogic_vector(63 downto 0);
 137     signal in_b          : std_ulogic_vector(63 downto 0);
 138     signal result        : std_ulogic_vector(63 downto 0);
 139     signal carry_in      : std_ulogic;
 140     signal lost_bits     : std_ulogic;
 141     signal r_hi_nz       : std_ulogic;
 142     signal r_lo_nz       : std_ulogic;
 143     signal s_nz          : std_ulogic;
 144     signal misc_sel      : std_ulogic_vector(3 downto 0);
 145     signal f_to_multiply : MultiplyInputType;
 146     signal multiply_to_f : MultiplyOutputType;
 147     signal msel_1        : std_ulogic_vector(1 downto 0);
 148     signal msel_2        : std_ulogic_vector(1 downto 0);
 149     signal msel_add      : std_ulogic_vector(1 downto 0);
 150     signal msel_inv      : std_ulogic;
 151     signal inverse_est   : std_ulogic_vector(18 downto 0);
 152
 153     -- opsel values
 154     constant AIN_R    : std_ulogic_vector(1 downto 0) := "00";
 155     constant AIN_A    : std_ulogic_vector(1 downto 0) := "01";
 156     constant AIN_B    : std_ulogic_vector(1 downto 0) := "10";
 157     constant AIN_C    : std_ulogic_vector(1 downto 0) := "11";
 158
 159     constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
 160     constant BIN_R    : std_ulogic_vector(1 downto 0) := "01";
 161     constant BIN_RND  : std_ulogic_vector(1 downto 0) := "10";
 162     constant BIN_PS6  : std_ulogic_vector(1 downto 0) := "11";
 163
 164     constant RES_SUM   : std_ulogic_vector(1 downto 0) := "00";
 165     constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
 166     constant RES_MULT  : std_ulogic_vector(1 downto 0) := "10";
 167     constant RES_MISC  : std_ulogic_vector(1 downto 0) := "11";
 168
 169     constant S_ZERO  : std_ulogic_vector(1 downto 0) := "00";
 170     constant S_NEG   : std_ulogic_vector(1 downto 0) := "01";
 171     constant S_SHIFT : std_ulogic_vector(1 downto 0) := "10";
 172     constant S_MULT  : std_ulogic_vector(1 downto 0) := "11";
 173
 174     -- msel values
 175     constant MUL1_A : std_ulogic_vector(1 downto 0) := "00";
 176     constant MUL1_B : std_ulogic_vector(1 downto 0) := "01";
 177     constant MUL1_Y : std_ulogic_vector(1 downto 0) := "10";
 178     constant MUL1_R : std_ulogic_vector(1 downto 0) := "11";
 179
 180     constant MUL2_C   : std_ulogic_vector(1 downto 0) := "00";
 181     constant MUL2_LUT : std_ulogic_vector(1 downto 0) := "01";
 182     constant MUL2_P   : std_ulogic_vector(1 downto 0) := "10";
 183     constant MUL2_R   : std_ulogic_vector(1 downto 0) := "11";
 184
 185     constant MULADD_ZERO  : std_ulogic_vector(1 downto 0) := "00";
 186     constant MULADD_CONST : std_ulogic_vector(1 downto 0) := "01";
 187     constant MULADD_A     : std_ulogic_vector(1 downto 0) := "10";
 188     constant MULADD_RS    : std_ulogic_vector(1 downto 0) := "11";
 189
 190     -- Inverse lookup table, indexed by the top 8 fraction bits
 191     -- The first 256 entries are the reciprocal (1/x) lookup table,
 192     -- and the remaining 768 entries are the reciprocal square root table.
 193     -- Output range is [0.5, 1) in 0.19 format, though the top
 194     -- bit isn't stored since it is always 1.
 195     -- Each output value is the inverse of the center of the input
 196     -- range for the value, i.e. entry 0 is 1 / (1 + 1/512),
 197     -- entry 1 is 1 / (1 + 3/512), etc.
 198     signal inverse_table : lookup_table := (
 199         -- 1/x lookup table
 200         -- Unit bit is assumed to be 1, so input range is [1, 2)
 201         18x"3fc01", 18x"3f411", 18x"3ec31", 18x"3e460", 18x"3dc9f", 18x"3d4ec", 18x"3cd49", 18x"3c5b5",
 202         18x"3be2f", 18x"3b6b8", 18x"3af4f", 18x"3a7f4", 18x"3a0a7", 18x"39968", 18x"39237", 18x"38b14",
 203         18x"383fe", 18x"37cf5", 18x"375f9", 18x"36f0a", 18x"36828", 18x"36153", 18x"35a8a", 18x"353ce",
 204         18x"34d1e", 18x"3467a", 18x"33fe3", 18x"33957", 18x"332d7", 18x"32c62", 18x"325f9", 18x"31f9c",
 205         18x"3194a", 18x"31303", 18x"30cc7", 18x"30696", 18x"30070", 18x"2fa54", 18x"2f443", 18x"2ee3d",
 206         18x"2e841", 18x"2e250", 18x"2dc68", 18x"2d68b", 18x"2d0b8", 18x"2caee", 18x"2c52e", 18x"2bf79",
 207         18x"2b9cc", 18x"2b429", 18x"2ae90", 18x"2a900", 18x"2a379", 18x"29dfb", 18x"29887", 18x"2931b",
 208         18x"28db8", 18x"2885e", 18x"2830d", 18x"27dc4", 18x"27884", 18x"2734d", 18x"26e1d", 18x"268f6",
 209         18x"263d8", 18x"25ec1", 18x"259b3", 18x"254ac", 18x"24fad", 18x"24ab7", 18x"245c8", 18x"240e1",
 210         18x"23c01", 18x"23729", 18x"23259", 18x"22d90", 18x"228ce", 18x"22413", 18x"21f60", 18x"21ab4",
 211         18x"2160f", 18x"21172", 18x"20cdb", 18x"2084b", 18x"203c2", 18x"1ff40", 18x"1fac4", 18x"1f64f",
 212         18x"1f1e1", 18x"1ed79", 18x"1e918", 18x"1e4be", 18x"1e069", 18x"1dc1b", 18x"1d7d4", 18x"1d392",
 213         18x"1cf57", 18x"1cb22", 18x"1c6f3", 18x"1c2ca", 18x"1bea7", 18x"1ba8a", 18x"1b672", 18x"1b261",
 214         18x"1ae55", 18x"1aa50", 18x"1a64f", 18x"1a255", 18x"19e60", 18x"19a70", 18x"19686", 18x"192a2",
 215         18x"18ec3", 18x"18ae9", 18x"18715", 18x"18345", 18x"17f7c", 18x"17bb7", 18x"177f7", 18x"1743d",
 216         18x"17087", 18x"16cd7", 18x"1692c", 18x"16585", 18x"161e4", 18x"15e47", 18x"15ab0", 18x"1571d",
 217         18x"1538e", 18x"15005", 18x"14c80", 18x"14900", 18x"14584", 18x"1420d", 18x"13e9b", 18x"13b2d",
 218         18x"137c3", 18x"1345e", 18x"130fe", 18x"12da2", 18x"12a4a", 18x"126f6", 18x"123a7", 18x"1205c",
 219         18x"11d15", 18x"119d2", 18x"11694", 18x"11359", 18x"11023", 18x"10cf1", 18x"109c2", 18x"10698",
 220         18x"10372", 18x"10050", 18x"0fd31", 18x"0fa17", 18x"0f700", 18x"0f3ed", 18x"0f0de", 18x"0edd3",
 221         18x"0eacb", 18x"0e7c7", 18x"0e4c7", 18x"0e1ca", 18x"0ded2", 18x"0dbdc", 18x"0d8eb", 18x"0d5fc",
 222         18x"0d312", 18x"0d02b", 18x"0cd47", 18x"0ca67", 18x"0c78a", 18x"0c4b1", 18x"0c1db", 18x"0bf09",
 223         18x"0bc3a", 18x"0b96e", 18x"0b6a5", 18x"0b3e0", 18x"0b11e", 18x"0ae5f", 18x"0aba3", 18x"0a8eb",
 224         18x"0a636", 18x"0a383", 18x"0a0d4", 18x"09e28", 18x"09b80", 18x"098da", 18x"09637", 18x"09397",
 225         18x"090fb", 18x"08e61", 18x"08bca", 18x"08936", 18x"086a5", 18x"08417", 18x"0818c", 18x"07f04",
 226         18x"07c7e", 18x"079fc", 18x"0777c", 18x"074ff", 18x"07284", 18x"0700d", 18x"06d98", 18x"06b26",
 227         18x"068b6", 18x"0664a", 18x"063e0", 18x"06178", 18x"05f13", 18x"05cb1", 18x"05a52", 18x"057f5",
 228         18x"0559a", 18x"05342", 18x"050ed", 18x"04e9a", 18x"04c4a", 18x"049fc", 18x"047b0", 18x"04567",
 229         18x"04321", 18x"040dd", 18x"03e9b", 18x"03c5c", 18x"03a1f", 18x"037e4", 18x"035ac", 18x"03376",
 230         18x"03142", 18x"02f11", 18x"02ce2", 18x"02ab5", 18x"0288b", 18x"02663", 18x"0243d", 18x"02219",
 231         18x"01ff7", 18x"01dd8", 18x"01bbb", 18x"019a0", 18x"01787", 18x"01570", 18x"0135b", 18x"01149",
 232         18x"00f39", 18x"00d2a", 18x"00b1e", 18x"00914", 18x"0070c", 18x"00506", 18x"00302", 18x"00100",
 233         -- 1/sqrt(x) lookup table
 234         -- Input is in the range [1, 4), i.e. two bits to the left of the
 235         -- binary point.  Those 2 bits index the following 3 blocks of 256 values.
 236         -- 1.0 ... 1.9999
 237         18x"3fe00", 18x"3fa06", 18x"3f612", 18x"3f224", 18x"3ee3a", 18x"3ea58", 18x"3e67c", 18x"3e2a4",
 238         18x"3ded2", 18x"3db06", 18x"3d73e", 18x"3d37e", 18x"3cfc2", 18x"3cc0a", 18x"3c85a", 18x"3c4ae",
 239         18x"3c106", 18x"3bd64", 18x"3b9c8", 18x"3b630", 18x"3b29e", 18x"3af10", 18x"3ab86", 18x"3a802",
 240         18x"3a484", 18x"3a108", 18x"39d94", 18x"39a22", 18x"396b6", 18x"3934e", 18x"38fea", 18x"38c8c",
 241         18x"38932", 18x"385dc", 18x"3828a", 18x"37f3e", 18x"37bf6", 18x"378b2", 18x"37572", 18x"37236",
 242         18x"36efe", 18x"36bca", 18x"3689a", 18x"36570", 18x"36248", 18x"35f26", 18x"35c06", 18x"358ea",
 243         18x"355d4", 18x"352c0", 18x"34fb0", 18x"34ca4", 18x"3499c", 18x"34698", 18x"34398", 18x"3409c",
 244         18x"33da2", 18x"33aac", 18x"337bc", 18x"334cc", 18x"331e2", 18x"32efc", 18x"32c18", 18x"32938",
 245         18x"3265a", 18x"32382", 18x"320ac", 18x"31dd8", 18x"31b0a", 18x"3183e", 18x"31576", 18x"312b0",
 246         18x"30fee", 18x"30d2e", 18x"30a74", 18x"307ba", 18x"30506", 18x"30254", 18x"2ffa4", 18x"2fcf8",
 247         18x"2fa4e", 18x"2f7a8", 18x"2f506", 18x"2f266", 18x"2efca", 18x"2ed2e", 18x"2ea98", 18x"2e804",
 248         18x"2e572", 18x"2e2e4", 18x"2e058", 18x"2ddce", 18x"2db48", 18x"2d8c6", 18x"2d646", 18x"2d3c8",
 249         18x"2d14c", 18x"2ced4", 18x"2cc5e", 18x"2c9ea", 18x"2c77a", 18x"2c50c", 18x"2c2a2", 18x"2c038",
 250         18x"2bdd2", 18x"2bb70", 18x"2b90e", 18x"2b6b0", 18x"2b454", 18x"2b1fa", 18x"2afa4", 18x"2ad4e",
 251         18x"2aafc", 18x"2a8ac", 18x"2a660", 18x"2a414", 18x"2a1cc", 18x"29f86", 18x"29d42", 18x"29b00",
 252         18x"298c2", 18x"29684", 18x"2944a", 18x"29210", 18x"28fda", 18x"28da6", 18x"28b74", 18x"28946",
 253         18x"28718", 18x"284ec", 18x"282c4", 18x"2809c", 18x"27e78", 18x"27c56", 18x"27a34", 18x"27816",
 254         18x"275fa", 18x"273e0", 18x"271c8", 18x"26fb0", 18x"26d9c", 18x"26b8a", 18x"2697a", 18x"2676c",
 255         18x"26560", 18x"26356", 18x"2614c", 18x"25f46", 18x"25d42", 18x"25b40", 18x"2593e", 18x"25740",
 256         18x"25542", 18x"25348", 18x"2514e", 18x"24f58", 18x"24d62", 18x"24b6e", 18x"2497c", 18x"2478c",
 257         18x"2459e", 18x"243b0", 18x"241c6", 18x"23fde", 18x"23df6", 18x"23c10", 18x"23a2c", 18x"2384a",
 258         18x"2366a", 18x"2348c", 18x"232ae", 18x"230d2", 18x"22efa", 18x"22d20", 18x"22b4a", 18x"22976",
 259         18x"227a2", 18x"225d2", 18x"22402", 18x"22234", 18x"22066", 18x"21e9c", 18x"21cd2", 18x"21b0a",
 260         18x"21944", 18x"2177e", 18x"215ba", 18x"213fa", 18x"21238", 18x"2107a", 18x"20ebc", 18x"20d00",
 261         18x"20b46", 18x"2098e", 18x"207d6", 18x"20620", 18x"2046c", 18x"202b8", 18x"20108", 18x"1ff58",
 262         18x"1fda8", 18x"1fbfc", 18x"1fa50", 18x"1f8a4", 18x"1f6fc", 18x"1f554", 18x"1f3ae", 18x"1f208",
 263         18x"1f064", 18x"1eec2", 18x"1ed22", 18x"1eb82", 18x"1e9e4", 18x"1e846", 18x"1e6aa", 18x"1e510",
 264         18x"1e378", 18x"1e1e0", 18x"1e04a", 18x"1deb4", 18x"1dd20", 18x"1db8e", 18x"1d9fc", 18x"1d86c",
 265         18x"1d6de", 18x"1d550", 18x"1d3c4", 18x"1d238", 18x"1d0ae", 18x"1cf26", 18x"1cd9e", 18x"1cc18",
 266         18x"1ca94", 18x"1c910", 18x"1c78c", 18x"1c60a", 18x"1c48a", 18x"1c30c", 18x"1c18e", 18x"1c010",
 267         18x"1be94", 18x"1bd1a", 18x"1bba0", 18x"1ba28", 18x"1b8b2", 18x"1b73c", 18x"1b5c6", 18x"1b452",
 268         18x"1b2e0", 18x"1b16e", 18x"1affe", 18x"1ae8e", 18x"1ad20", 18x"1abb4", 18x"1aa46", 18x"1a8dc",
 269         -- 2.0 ... 2.9999
 270         18x"1a772", 18x"1a608", 18x"1a4a0", 18x"1a33a", 18x"1a1d4", 18x"1a070", 18x"19f0c", 18x"19da8",
 271         18x"19c48", 18x"19ae6", 18x"19986", 18x"19828", 18x"196ca", 18x"1956e", 18x"19412", 18x"192b8",
 272         18x"1915e", 18x"19004", 18x"18eae", 18x"18d56", 18x"18c00", 18x"18aac", 18x"18958", 18x"18804",
 273         18x"186b2", 18x"18562", 18x"18412", 18x"182c2", 18x"18174", 18x"18026", 18x"17eda", 18x"17d8e",
 274         18x"17c44", 18x"17afa", 18x"179b2", 18x"1786a", 18x"17724", 18x"175de", 18x"17498", 18x"17354",
 275         18x"17210", 18x"170ce", 18x"16f8c", 18x"16e4c", 18x"16d0c", 18x"16bcc", 18x"16a8e", 18x"16950",
 276         18x"16814", 18x"166d8", 18x"1659e", 18x"16464", 18x"1632a", 18x"161f2", 18x"160ba", 18x"15f84",
 277         18x"15e4e", 18x"15d1a", 18x"15be6", 18x"15ab2", 18x"15980", 18x"1584e", 18x"1571c", 18x"155ec",
 278         18x"154bc", 18x"1538e", 18x"15260", 18x"15134", 18x"15006", 18x"14edc", 18x"14db0", 18x"14c86",
 279         18x"14b5e", 18x"14a36", 18x"1490e", 18x"147e6", 18x"146c0", 18x"1459a", 18x"14476", 18x"14352",
 280         18x"14230", 18x"1410c", 18x"13fea", 18x"13eca", 18x"13daa", 18x"13c8a", 18x"13b6c", 18x"13a4e",
 281         18x"13930", 18x"13814", 18x"136f8", 18x"135dc", 18x"134c2", 18x"133a8", 18x"1328e", 18x"13176",
 282         18x"1305e", 18x"12f48", 18x"12e30", 18x"12d1a", 18x"12c06", 18x"12af2", 18x"129de", 18x"128ca",
 283         18x"127b8", 18x"126a6", 18x"12596", 18x"12486", 18x"12376", 18x"12266", 18x"12158", 18x"1204a",
 284         18x"11f3e", 18x"11e32", 18x"11d26", 18x"11c1a", 18x"11b10", 18x"11a06", 18x"118fc", 18x"117f4",
 285         18x"116ec", 18x"115e4", 18x"114de", 18x"113d8", 18x"112d2", 18x"111ce", 18x"110ca", 18x"10fc6",
 286         18x"10ec2", 18x"10dc0", 18x"10cbe", 18x"10bbc", 18x"10abc", 18x"109bc", 18x"108bc", 18x"107be",
 287         18x"106c0", 18x"105c2", 18x"104c4", 18x"103c8", 18x"102cc", 18x"101d0", 18x"100d6", 18x"0ffdc",
 288         18x"0fee2", 18x"0fdea", 18x"0fcf0", 18x"0fbf8", 18x"0fb02", 18x"0fa0a", 18x"0f914", 18x"0f81e",
 289         18x"0f72a", 18x"0f636", 18x"0f542", 18x"0f44e", 18x"0f35a", 18x"0f268", 18x"0f176", 18x"0f086",
 290         18x"0ef94", 18x"0eea4", 18x"0edb4", 18x"0ecc6", 18x"0ebd6", 18x"0eae8", 18x"0e9fa", 18x"0e90e",
 291         18x"0e822", 18x"0e736", 18x"0e64a", 18x"0e55e", 18x"0e474", 18x"0e38a", 18x"0e2a0", 18x"0e1b8",
 292         18x"0e0d0", 18x"0dfe8", 18x"0df00", 18x"0de1a", 18x"0dd32", 18x"0dc4c", 18x"0db68", 18x"0da82",
 293         18x"0d99e", 18x"0d8ba", 18x"0d7d6", 18x"0d6f4", 18x"0d612", 18x"0d530", 18x"0d44e", 18x"0d36c",
 294         18x"0d28c", 18x"0d1ac", 18x"0d0cc", 18x"0cfee", 18x"0cf0e", 18x"0ce30", 18x"0cd54", 18x"0cc76",
 295         18x"0cb9a", 18x"0cabc", 18x"0c9e0", 18x"0c906", 18x"0c82a", 18x"0c750", 18x"0c676", 18x"0c59c",
 296         18x"0c4c4", 18x"0c3ea", 18x"0c312", 18x"0c23a", 18x"0c164", 18x"0c08c", 18x"0bfb6", 18x"0bee0",
 297         18x"0be0a", 18x"0bd36", 18x"0bc62", 18x"0bb8c", 18x"0baba", 18x"0b9e6", 18x"0b912", 18x"0b840",
 298         18x"0b76e", 18x"0b69c", 18x"0b5cc", 18x"0b4fa", 18x"0b42a", 18x"0b35a", 18x"0b28a", 18x"0b1bc",
 299         18x"0b0ee", 18x"0b01e", 18x"0af50", 18x"0ae84", 18x"0adb6", 18x"0acea", 18x"0ac1e", 18x"0ab52",
 300         18x"0aa86", 18x"0a9bc", 18x"0a8f0", 18x"0a826", 18x"0a75c", 18x"0a694", 18x"0a5ca", 18x"0a502",
 301         18x"0a43a", 18x"0a372", 18x"0a2aa", 18x"0a1e4", 18x"0a11c", 18x"0a056", 18x"09f90", 18x"09ecc",
 302         -- 3.0 ... 3.9999
 303         18x"09e06", 18x"09d42", 18x"09c7e", 18x"09bba", 18x"09af6", 18x"09a32", 18x"09970", 18x"098ae",
 304         18x"097ec", 18x"0972a", 18x"09668", 18x"095a8", 18x"094e8", 18x"09426", 18x"09368", 18x"092a8",
 305         18x"091e8", 18x"0912a", 18x"0906c", 18x"08fae", 18x"08ef0", 18x"08e32", 18x"08d76", 18x"08cba",
 306         18x"08bfe", 18x"08b42", 18x"08a86", 18x"089ca", 18x"08910", 18x"08856", 18x"0879c", 18x"086e2",
 307         18x"08628", 18x"08570", 18x"084b6", 18x"083fe", 18x"08346", 18x"0828e", 18x"081d8", 18x"08120",
 308         18x"0806a", 18x"07fb4", 18x"07efe", 18x"07e48", 18x"07d92", 18x"07cde", 18x"07c2a", 18x"07b76",
 309         18x"07ac2", 18x"07a0e", 18x"0795a", 18x"078a8", 18x"077f4", 18x"07742", 18x"07690", 18x"075de",
 310         18x"0752e", 18x"0747c", 18x"073cc", 18x"0731c", 18x"0726c", 18x"071bc", 18x"0710c", 18x"0705e",
 311         18x"06fae", 18x"06f00", 18x"06e52", 18x"06da4", 18x"06cf6", 18x"06c4a", 18x"06b9c", 18x"06af0",
 312         18x"06a44", 18x"06998", 18x"068ec", 18x"06840", 18x"06796", 18x"066ea", 18x"06640", 18x"06596",
 313         18x"064ec", 18x"06442", 18x"0639a", 18x"062f0", 18x"06248", 18x"061a0", 18x"060f8", 18x"06050",
 314         18x"05fa8", 18x"05f00", 18x"05e5a", 18x"05db4", 18x"05d0e", 18x"05c68", 18x"05bc2", 18x"05b1c",
 315         18x"05a76", 18x"059d2", 18x"0592e", 18x"05888", 18x"057e4", 18x"05742", 18x"0569e", 18x"055fa",
 316         18x"05558", 18x"054b6", 18x"05412", 18x"05370", 18x"052ce", 18x"0522e", 18x"0518c", 18x"050ec",
 317         18x"0504a", 18x"04faa", 18x"04f0a", 18x"04e6a", 18x"04dca", 18x"04d2c", 18x"04c8c", 18x"04bee",
 318         18x"04b50", 18x"04ab0", 18x"04a12", 18x"04976", 18x"048d8", 18x"0483a", 18x"0479e", 18x"04700",
 319         18x"04664", 18x"045c8", 18x"0452c", 18x"04490", 18x"043f6", 18x"0435a", 18x"042c0", 18x"04226",
 320         18x"0418a", 18x"040f0", 18x"04056", 18x"03fbe", 18x"03f24", 18x"03e8c", 18x"03df2", 18x"03d5a",
 321         18x"03cc2", 18x"03c2a", 18x"03b92", 18x"03afa", 18x"03a62", 18x"039cc", 18x"03934", 18x"0389e",
 322         18x"03808", 18x"03772", 18x"036dc", 18x"03646", 18x"035b2", 18x"0351c", 18x"03488", 18x"033f2",
 323         18x"0335e", 18x"032ca", 18x"03236", 18x"031a2", 18x"03110", 18x"0307c", 18x"02fea", 18x"02f56",
 324         18x"02ec4", 18x"02e32", 18x"02da0", 18x"02d0e", 18x"02c7c", 18x"02bec", 18x"02b5a", 18x"02aca",
 325         18x"02a38", 18x"029a8", 18x"02918", 18x"02888", 18x"027f8", 18x"0276a", 18x"026da", 18x"0264a",
 326         18x"025bc", 18x"0252e", 18x"024a0", 18x"02410", 18x"02384", 18x"022f6", 18x"02268", 18x"021da",
 327         18x"0214e", 18x"020c0", 18x"02034", 18x"01fa8", 18x"01f1c", 18x"01e90", 18x"01e04", 18x"01d78",
 328         18x"01cee", 18x"01c62", 18x"01bd8", 18x"01b4c", 18x"01ac2", 18x"01a38", 18x"019ae", 18x"01924",
 329         18x"0189c", 18x"01812", 18x"01788", 18x"01700", 18x"01676", 18x"015ee", 18x"01566", 18x"014de",
 330         18x"01456", 18x"013ce", 18x"01346", 18x"012c0", 18x"01238", 18x"011b2", 18x"0112c", 18x"010a4",
 331         18x"0101e", 18x"00f98", 18x"00f12", 18x"00e8c", 18x"00e08", 18x"00d82", 18x"00cfe", 18x"00c78",
 332         18x"00bf4", 18x"00b70", 18x"00aec", 18x"00a68", 18x"009e4", 18x"00960", 18x"008dc", 18x"00858",
 333         18x"007d6", 18x"00752", 18x"006d0", 18x"0064e", 18x"005cc", 18x"0054a", 18x"004c8", 18x"00446",
 334         18x"003c4", 18x"00342", 18x"002c2", 18x"00240", 18x"001c0", 18x"00140", 18x"000c0", 18x"00040"
 335         );
 336
 337     -- Left and right shifter with 120 bit input and 64 bit output.
 338     -- Shifts inp left by shift bits and returns the upper 64 bits of
 339     -- the result.  The shift parameter is interpreted as a signed
 340     -- number in the range -64..63, with negative values indicating
 341     -- right shifts.
 342     function shifter_64(inp: std_ulogic_vector(119 downto 0);
 343                         shift: std_ulogic_vector(6 downto 0))
 344         return std_ulogic_vector is
 345         variable s1 : std_ulogic_vector(94 downto 0);
 346         variable s2 : std_ulogic_vector(70 downto 0);
 347         variable result : std_ulogic_vector(63 downto 0);
 348     begin
 349         case shift(6 downto 5) is
 350             when "00" =>
 351                 s1 := inp(119 downto 25);
 352             when "01" =>
 353                 s1 := inp(87 downto 0) & "0000000";
 354             when "10" =>
 355                 s1 := x"0000000000000000" & inp(119 downto 89);
 356             when others =>
 357                 s1 := x"00000000" & inp(119 downto 57);
 358         end case;
 359         case shift(4 downto 3) is
 360             when "00" =>
 361                 s2 := s1(94 downto 24);
 362             when "01" =>
 363                 s2 := s1(86 downto 16);
 364             when "10" =>
 365                 s2 := s1(78 downto 8);
 366             when others =>
 367                 s2 := s1(70 downto 0);
 368         end case;
 369         case shift(2 downto 0) is
 370             when "000" =>
 371                 result := s2(70 downto 7);
 372             when "001" =>
 373                 result := s2(69 downto 6);
 374             when "010" =>
 375                 result := s2(68 downto 5);
 376             when "011" =>
 377                 result := s2(67 downto 4);
 378             when "100" =>
 379                 result := s2(66 downto 3);
 380             when "101" =>
 381                 result := s2(65 downto 2);
 382             when "110" =>
 383                 result := s2(64 downto 1);
 384             when others =>
 385                 result := s2(63 downto 0);
 386         end case;
 387         return result;
 388     end;
 389
 390     -- Generate a mask with 0-bits on the left and 1-bits on the right which
 391     -- selects the bits will be lost in doing a right shift.  The shift
 392     -- parameter is the bottom 6 bits of a negative shift count,
 393     -- indicating a right shift.
 394     function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
 395         variable result: std_ulogic_vector(63 downto 0);
 396     begin
 397         result := (others => '0');
 398         for i in 0 to 63 loop
 399             if i >= shift then
 400                 result(63 - i) := '1';
 401             end if;
 402         end loop;
 403         return result;
 404     end;
 405
 406     -- Split a DP floating-point number into components and work out its class.
 407     -- If is_int = 1, the input is considered an integer
 408     function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_int: std_ulogic) return fpu_reg_type is
 409         variable r       : fpu_reg_type;
 410         variable exp_nz  : std_ulogic;
 411         variable exp_ao  : std_ulogic;
 412         variable frac_nz : std_ulogic;
 413         variable cls     : std_ulogic_vector(2 downto 0);
 414     begin
 415         r.negative := fpr(63);
 416         exp_nz := or (fpr(62 downto 52));
 417         exp_ao := and (fpr(62 downto 52));
 418         frac_nz := or (fpr(51 downto 0));
 419         if is_int = '0' then
 420             r.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
 421             if exp_nz = '0' then
 422                 r.exponent := to_signed(-1022, EXP_BITS);
 423             end if;
 424             r.mantissa := "000000000" & exp_nz & fpr(51 downto 0) & "00";
 425             cls := exp_ao & exp_nz & frac_nz;
 426             case cls is
 427                 when "000"  => r.class := ZERO;
 428                 when "001"  => r.class := FINITE;    -- denormalized
 429                 when "010"  => r.class := FINITE;
 430                 when "011"  => r.class := FINITE;
 431                 when "110"  => r.class := INFINITY;
 432                 when others => r.class := NAN;
 433             end case;
 434         else
 435             r.mantissa := fpr;
 436             r.exponent := (others => '0');
 437             if (fpr(63) or exp_nz or frac_nz) = '1' then
 438                 r.class := FINITE;
 439             else
 440                 r.class := ZERO;
 441             end if;
 442         end if;
 443         return r;
 444     end;
 445
 446     -- Construct a DP floating-point result from components
 447     function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
 448                      mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic)
 449         return std_ulogic_vector is
 450         variable result : std_ulogic_vector(63 downto 0);
 451     begin
 452         result := (others => '0');
 453         result(63) := sign;
 454         case class is
 455             when ZERO =>
 456             when FINITE =>
 457                 if mantissa(54) = '1' then
 458                     -- normalized number
 459                     result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
 460                 end if;
 461                 result(51 downto 29) := mantissa(53 downto 31);
 462                 if single_prec = '0' then
 463                     result(28 downto 0) := mantissa(30 downto 2);
 464                 end if;
 465             when INFINITY =>
 466                 result(62 downto 52) := "11111111111";
 467             when NAN =>
 468                 result(62 downto 52) := "11111111111";
 469                 result(51) := quieten_nan or mantissa(53);
 470                 result(50 downto 29) := mantissa(52 downto 31);
 471                 if single_prec = '0' then
 472                     result(28 downto 0) := mantissa(30 downto 2);
 473                 end if;
 474         end case;
 475         return result;
 476     end;
 477
 478     -- Determine whether to increment when rounding
 479     -- Returns rounding_inc & inexact
 480     -- Assumes x includes the bottom 29 bits of the mantissa already
 481     -- if single_prec = 1 (usually arranged by setting set_x = 1 earlier).
 482     function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
 483                          single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
 484                          sign: std_ulogic)
 485         return std_ulogic_vector is
 486         variable grx : std_ulogic_vector(2 downto 0);
 487         variable ret : std_ulogic_vector(1 downto 0);
 488         variable lsb : std_ulogic;
 489     begin
 490         if single_prec = '0' then
 491             grx := mantissa(1 downto 0) & x;
 492             lsb := mantissa(2);
 493         else
 494             grx := mantissa(30 downto 29) & x;
 495             lsb := mantissa(31);
 496         end if;
 497         ret(1) := '0';
 498         ret(0) := or (grx);
 499         case rn(1 downto 0) is
 500             when "00" =>        -- round to nearest
 501                 if grx = "100" and rn(2) = '0' then
 502                     ret(1) := lsb; -- tie, round to even
 503                 else
 504                     ret(1) := grx(2);
 505                 end if;
 506             when "01" =>        -- round towards zero
 507             when others =>      -- round towards +/- inf
 508                 if rn(0) = sign then
 509                     -- round towards greater magnitude
 510                     ret(1) := ret(0);
 511                 end if;
 512         end case;
 513         return ret;
 514     end;
 515
 516     -- Determine result flags to write into the FPSCR
 517     function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
 518         return std_ulogic_vector is
 519     begin
 520         case class is
 521             when ZERO =>
 522                 return sign & "0010";
 523             when FINITE =>
 524                 return (not unitbit) & sign & (not sign) & "00";
 525             when INFINITY =>
 526                 return '0' & sign & (not sign) & "01";
 527             when NAN =>
 528                 return "10001";
 529         end case;
 530     end;
 531
 532 begin
 533     fpu_multiply_0: entity work.multiply
 534         port map (
 535             clk => clk,
 536             m_in => f_to_multiply,
 537             m_out => multiply_to_f
 538             );
 539
 540     fpu_0: process(clk)
 541     begin
 542         if rising_edge(clk) then
 543             if rst = '1' then
 544                 r.state <= IDLE;
 545                 r.busy <= '0';
 546                 r.instr_done <= '0';
 547                 r.do_intr <= '0';
 548                 r.fpscr <= (others => '0');
 549                 r.writing_back <= '0';
 550             else
 551                 assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
 552                 r <= rin;
 553             end if;
 554         end if;
 555     end process;
 556
 557     -- synchronous reads from lookup table
 558     lut_access: process(clk)
 559         variable addrhi : std_ulogic_vector(1 downto 0);
 560         variable addr   : std_ulogic_vector(9 downto 0);
 561     begin
 562         if rising_edge(clk) then
 563             if r.is_sqrt = '1' then
 564                 addrhi := r.b.mantissa(55 downto 54);
 565             else
 566                 addrhi := "00";
 567             end if;
 568             addr := addrhi & r.b.mantissa(53 downto 46);
 569             inverse_est <= '1' & inverse_table(to_integer(unsigned(addr)));
 570         end if;
 571     end process;
 572
 573     e_out.busy <= r.busy;
 574     e_out.exception <= r.fpscr(FPSCR_FEX);
 575     e_out.interrupt <= r.do_intr;
 576
 577     w_out.valid <= r.instr_done and not r.do_intr;
 578     w_out.instr_tag <= r.instr_tag;
 579     w_out.write_enable <= r.writing_back;
 580     w_out.write_reg <= r.dest_fpr;
 581     w_out.write_data <= fp_result;
 582     w_out.write_cr_enable <= r.instr_done and (r.rc or r.is_cmp);
 583     w_out.write_cr_mask <= r.cr_mask;
 584     w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
 585                            r.cr_result & r.cr_result & r.cr_result & r.cr_result;
 586
 587     fpu_1: process(all)
 588         variable v           : reg_type;
 589         variable adec        : fpu_reg_type;
 590         variable bdec        : fpu_reg_type;
 591         variable cdec        : fpu_reg_type;
 592         variable fpscr_mask  : std_ulogic_vector(31 downto 0);
 593         variable illegal     : std_ulogic;
 594         variable j, k        : integer;
 595         variable flm         : std_ulogic_vector(7 downto 0);
 596         variable int_input   : std_ulogic;
 597         variable mask        : std_ulogic_vector(63 downto 0);
 598         variable in_a0       : std_ulogic_vector(63 downto 0);
 599         variable in_b0       : std_ulogic_vector(63 downto 0);
 600         variable misc        : std_ulogic_vector(63 downto 0);
 601         variable shift_res   : std_ulogic_vector(63 downto 0);
 602         variable round       : std_ulogic_vector(1 downto 0);
 603         variable update_fx   : std_ulogic;
 604         variable arith_done  : std_ulogic;
 605         variable invalid     : std_ulogic;
 606         variable zero_divide : std_ulogic;
 607         variable mant_nz     : std_ulogic;
 608         variable min_exp     : signed(EXP_BITS-1 downto 0);
 609         variable max_exp     : signed(EXP_BITS-1 downto 0);
 610         variable bias_exp    : signed(EXP_BITS-1 downto 0);
 611         variable new_exp     : signed(EXP_BITS-1 downto 0);
 612         variable exp_tiny    : std_ulogic;
 613         variable exp_huge    : std_ulogic;
 614         variable renormalize : std_ulogic;
 615         variable clz         : std_ulogic_vector(5 downto 0);
 616         variable set_x       : std_ulogic;
 617         variable mshift      : signed(EXP_BITS-1 downto 0);
 618         variable need_check  : std_ulogic;
 619         variable msb         : std_ulogic;
 620         variable is_add      : std_ulogic;
 621         variable set_a       : std_ulogic;
 622         variable set_b       : std_ulogic;
 623         variable set_c       : std_ulogic;
 624         variable set_y       : std_ulogic;
 625         variable set_s       : std_ulogic;
 626         variable qnan_result : std_ulogic;
 627         variable px_nz       : std_ulogic;
 628         variable pcmpb_eq    : std_ulogic;
 629         variable pcmpb_lt    : std_ulogic;
 630         variable pshift      : std_ulogic;
 631         variable renorm_sqrt : std_ulogic;
 632         variable sqrt_exp    : signed(EXP_BITS-1 downto 0);
 633         variable shiftin     : std_ulogic;
 634         variable mulexp      : signed(EXP_BITS-1 downto 0);
 635         variable maddend     : std_ulogic_vector(127 downto 0);
 636         variable sum         : std_ulogic_vector(63 downto 0);
 637         variable round_inc   : std_ulogic_vector(63 downto 0);
 638     begin
 639         v := r;
 640         illegal := '0';
 641         v.busy := '0';
 642         int_input := '0';
 643
 644         -- capture incoming instruction
 645         if e_in.valid = '1' then
 646             v.insn := e_in.insn;
 647             v.op := e_in.op;
 648             v.instr_tag := e_in.itag;
 649             v.fe_mode := or (e_in.fe_mode);
 650             v.dest_fpr := e_in.frt;
 651             v.single_prec := e_in.single;
 652             v.longmask := e_in.single;
 653             v.int_result := '0';
 654             v.rc := e_in.rc;
 655             v.is_cmp := e_in.out_cr;
 656             if e_in.out_cr = '0' then
 657                 v.cr_mask := num_to_fxm(1);
 658             else
 659                 v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(e_in.insn))));
 660             end if;
 661             int_input := '0';
 662             if e_in.op = OP_FPOP_I then
 663                 int_input := '1';
 664             end if;
 665             v.quieten_nan := '1';
 666             v.tiny := '0';
 667             v.denorm := '0';
 668             v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
 669             v.is_subtract := '0';
 670             v.is_multiply := '0';
 671             v.is_sqrt := '0';
 672             v.add_bsmall := '0';
 673             v.doing_ftdiv := "00";
 674
 675             adec := decode_dp(e_in.fra, int_input);
 676             bdec := decode_dp(e_in.frb, int_input);
 677             cdec := decode_dp(e_in.frc, int_input);
 678             v.a := adec;
 679             v.b := bdec;
 680             v.c := cdec;
 681
 682             v.exp_cmp := '0';
 683             if adec.exponent > bdec.exponent then
 684                 v.exp_cmp := '1';
 685             end if;
 686             v.madd_cmp := '0';
 687             if (adec.exponent + cdec.exponent + 1) >= bdec.exponent then
 688                 v.madd_cmp := '1';
 689             end if;
 690         end if;
 691
 692         r_hi_nz <= or (r.r(55 downto 31));
 693         r_lo_nz <= or (r.r(30 downto 2));
 694         s_nz <= or (r.s);
 695
 696         if r.single_prec = '0' then
 697             if r.doing_ftdiv(1) = '0' then
 698                 max_exp := to_signed(1023, EXP_BITS);
 699             else
 700                 max_exp := to_signed(1020, EXP_BITS);
 701             end if;
 702             if r.doing_ftdiv(0) = '0' then
 703                 min_exp := to_signed(-1022, EXP_BITS);
 704             else
 705                 min_exp := to_signed(-1021, EXP_BITS);
 706             end if;
 707             bias_exp := to_signed(1536, EXP_BITS);
 708         else
 709             max_exp := to_signed(127, EXP_BITS);
 710             min_exp := to_signed(-126, EXP_BITS);
 711             bias_exp := to_signed(192, EXP_BITS);
 712         end if;
 713         new_exp := r.result_exp - r.shift;
 714         exp_tiny := '0';
 715         exp_huge := '0';
 716         if new_exp < min_exp then
 717             exp_tiny := '1';
 718         end if;
 719         if new_exp > max_exp then
 720             exp_huge := '1';
 721         end if;
 722
 723         -- Compare P with zero and with B
 724         px_nz := or (r.p(57 downto 4));
 725         pcmpb_eq := '0';
 726         if r.p(59 downto 4) = r.b.mantissa(55 downto 0) then
 727             pcmpb_eq := '1';
 728         end if;
 729         pcmpb_lt := '0';
 730         if unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(55 downto 0)) then
 731             pcmpb_lt := '1';
 732         end if;
 733
 734         v.writing_back := '0';
 735         v.instr_done := '0';
 736         v.update_fprf := '0';
 737         v.shift := to_signed(0, EXP_BITS);
 738         v.first := '0';
 739         v.opsel_a := AIN_R;
 740         opsel_ainv <= '0';
 741         opsel_mask <= '0';
 742         opsel_b <= BIN_ZERO;
 743         opsel_binv <= '0';
 744         opsel_r <= RES_SUM;
 745         opsel_s <= S_ZERO;
 746         carry_in <= '0';
 747         misc_sel <= "0000";
 748         fpscr_mask := (others => '1');
 749         update_fx := '0';
 750         arith_done := '0';
 751         invalid := '0';
 752         zero_divide := '0';
 753         renormalize := '0';
 754         set_x := '0';
 755         qnan_result := '0';
 756         set_a := '0';
 757         set_b := '0';
 758         set_c := '0';
 759         set_s := '0';
 760         f_to_multiply.is_32bit <= '0';
 761         f_to_multiply.valid <= '0';
 762         msel_1 <= MUL1_A;
 763         msel_2 <= MUL2_C;
 764         msel_add <= MULADD_ZERO;
 765         msel_inv <= '0';
 766         set_y := '0';
 767         pshift := '0';
 768         renorm_sqrt := '0';
 769         shiftin := '0';
 770         case r.state is
 771             when IDLE =>
 772                 v.use_a := '0';
 773                 v.use_b := '0';
 774                 v.use_c := '0';
 775                 v.invalid := '0';
 776                 v.negate := '0';
 777                 if e_in.valid = '1' then
 778                     case e_in.insn(5 downto 1) is
 779                         when "00000" =>
 780                             if e_in.insn(8) = '1' then
 781                                 if e_in.insn(6) = '0' then
 782                                     v.state := DO_FTDIV;
 783                                 else
 784                                     v.state := DO_FTSQRT;
 785                                 end if;
 786                             elsif e_in.insn(7) = '1' then
 787                                 v.state := DO_MCRFS;
 788                             else
 789                                 v.opsel_a := AIN_B;
 790                                 v.state := DO_FCMP;
 791                             end if;
 792                         when "00110" =>
 793                             if e_in.insn(10) = '0' then
 794                                 if e_in.insn(8) = '0' then
 795                                     v.state := DO_MTFSB;
 796                                 else
 797                                     v.state := DO_MTFSFI;
 798                                 end if;
 799                             else
 800                                 v.state := DO_FMRG;
 801                             end if;
 802                         when "00111" =>
 803                             if e_in.insn(8) = '0' then
 804                                 v.state := DO_MFFS;
 805                             else
 806                                 v.state := DO_MTFSF;
 807                             end if;
 808                         when "01000" =>
 809                             v.opsel_a := AIN_B;
 810                             if e_in.insn(9 downto 8) /= "11" then
 811                                 v.state := DO_FMR;
 812                             else
 813                                 v.state := DO_FRI;
 814                             end if;
 815                         when "01100" =>
 816                             v.opsel_a := AIN_B;
 817                             v.state := DO_FRSP;
 818                         when "01110" =>
 819                             v.opsel_a := AIN_B;
 820                             if int_input = '1' then
 821                                 -- fcfid[u][s]
 822                                 v.state := DO_FCFID;
 823                             else
 824                                 v.state := DO_FCTI;
 825                             end if;
 826                         when "01111" =>
 827                             v.round_mode := "001";
 828                             v.opsel_a := AIN_B;
 829                             v.state := DO_FCTI;
 830                         when "10010" =>
 831                             v.opsel_a := AIN_A;
 832                             if v.b.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
 833                                 v.opsel_a := AIN_B;
 834                             end if;
 835                             v.state := DO_FDIV;
 836                         when "10100" | "10101" =>
 837                             v.opsel_a := AIN_A;
 838                             v.state := DO_FADD;
 839                         when "10110" =>
 840                             v.is_sqrt := '1';
 841                             v.opsel_a := AIN_B;
 842                             v.state := DO_FSQRT;
 843                         when "10111" =>
 844                             v.state := DO_FSEL;
 845                         when "11000" =>
 846                             v.opsel_a := AIN_B;
 847                             v.state := DO_FRE;
 848                         when "11001" =>
 849                             v.is_multiply := '1';
 850                             v.opsel_a := AIN_A;
 851                             if v.c.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
 852                                 v.opsel_a := AIN_C;
 853                             end if;
 854                             v.state := DO_FMUL;
 855                         when "11010" =>
 856                             v.is_sqrt := '1';
 857                             v.opsel_a := AIN_B;
 858                             v.state := DO_FRSQRTE;
 859                         when "11100" | "11101" | "11110" | "11111" =>
 860                             if v.a.mantissa(54) = '0' then
 861                                 v.opsel_a := AIN_A;
 862                             elsif v.c.mantissa(54) = '0' then
 863                                 v.opsel_a := AIN_C;
 864                             else
 865                                 v.opsel_a := AIN_B;
 866                             end if;
 867                             v.state := DO_FMADD;
 868                         when others =>
 869                             illegal := '1';
 870                     end case;
 871                 end if;
 872                 v.x := '0';
 873                 v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
 874                 set_s := '1';
 875
 876             when DO_MCRFS =>
 877                 j := to_integer(unsigned(insn_bfa(r.insn)));
 878                 for i in 0 to 7 loop
 879                     if i = j then
 880                         k := (7 - i) * 4;
 881                         v.cr_result := r.fpscr(k + 3 downto k);
 882                         fpscr_mask(k + 3 downto k) := "0000";
 883                     end if;
 884                 end loop;
 885                 v.fpscr := r.fpscr and (fpscr_mask or x"6007F8FF");
 886                 v.instr_done := '1';
 887                 v.state := IDLE;
 888
 889             when DO_FTDIV =>
 890                 v.instr_done := '1';
 891                 v.state := IDLE;
 892                 v.cr_result := "0000";
 893                 if r.a.class = INFINITY or r.b.class = ZERO or r.b.class = INFINITY or
 894                     (r.b.class = FINITE and r.b.mantissa(53) = '0') then
 895                     v.cr_result(2) := '1';
 896                 end if;
 897                 if r.a.class = NAN or r.a.class = INFINITY or
 898                     r.b.class = NAN or r.b.class = ZERO or r.b.class = INFINITY or
 899                     (r.a.class = FINITE and r.a.exponent <= to_signed(-970, EXP_BITS)) then
 900                     v.cr_result(1) := '1';
 901                 else
 902                     v.doing_ftdiv := "11";
 903                     v.first := '1';
 904                     v.state := FTDIV_1;
 905                     v.instr_done := '0';
 906                 end if;
 907
 908             when DO_FTSQRT =>
 909                 v.instr_done := '1';
 910                 v.state := IDLE;
 911                 v.cr_result := "0000";
 912                 if r.b.class = ZERO or r.b.class = INFINITY or
 913                     (r.b.class = FINITE and r.b.mantissa(53) = '0') then
 914                     v.cr_result(2) := '1';
 915                 end if;
 916                 if r.b.class = NAN or r.b.class = INFINITY or r.b.class = ZERO
 917                     or r.b.negative = '1' or r.b.exponent <= to_signed(-970, EXP_BITS) then
 918                     v.cr_result(1) := '0';
 919                 end if;
 920
 921             when DO_FCMP =>
 922                 -- fcmp[uo]
 923                 -- r.opsel_a = AIN_B
 924                 v.instr_done := '1';
 925                 v.state := IDLE;
 926                 update_fx := '1';
 927                 v.result_exp := r.b.exponent;
 928                 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
 929                     (r.b.class = NAN and r.b.mantissa(53) = '0') then
 930                     -- Signalling NAN
 931                     v.fpscr(FPSCR_VXSNAN) := '1';
 932                     if r.insn(6) = '1' and r.fpscr(FPSCR_VE) = '0' then
 933                         v.fpscr(FPSCR_VXVC) := '1';
 934                     end if;
 935                     invalid := '1';
 936                     v.cr_result := "0001";          -- unordered
 937                 elsif r.a.class = NAN or r.b.class = NAN then
 938                     if r.insn(6) = '1' then
 939                         -- fcmpo
 940                         v.fpscr(FPSCR_VXVC) := '1';
 941                         invalid := '1';
 942                     end if;
 943                     v.cr_result := "0001";          -- unordered
 944                 elsif r.a.class = ZERO and r.b.class = ZERO then
 945                     v.cr_result := "0010";          -- equal
 946                 elsif r.a.negative /= r.b.negative then
 947                     v.cr_result := r.a.negative & r.b.negative & "00";
 948                 elsif r.a.class = ZERO then
 949                     -- A and B are the same sign from here down
 950                     v.cr_result := not r.b.negative & r.b.negative & "00";
 951                 elsif r.a.class = INFINITY then
 952                     if r.b.class = INFINITY then
 953                         v.cr_result := "0010";
 954                     else
 955                         v.cr_result := r.a.negative & not r.a.negative & "00";
 956                     end if;
 957                 elsif r.b.class = ZERO then
 958                     -- A is finite from here down
 959                     v.cr_result := r.a.negative & not r.a.negative & "00";
 960                 elsif r.b.class = INFINITY then
 961                     v.cr_result := not r.b.negative & r.b.negative & "00";
 962                 elsif r.exp_cmp = '1' then
 963                     -- A and B are both finite from here down
 964                     v.cr_result := r.a.negative & not r.a.negative & "00";
 965                 elsif r.a.exponent /= r.b.exponent then
 966                     -- A exponent is smaller than B
 967                     v.cr_result := not r.a.negative & r.a.negative & "00";
 968                 else
 969                     -- Prepare to subtract mantissas, put B in R
 970                     v.cr_result := "0000";
 971                     v.instr_done := '0';
 972                     v.opsel_a := AIN_A;
 973                     v.state := CMP_1;
 974                 end if;
 975                 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
 976
 977             when DO_MTFSB =>
 978                 -- mtfsb{0,1}
 979                 j := to_integer(unsigned(insn_bt(r.insn)));
 980                 for i in 0 to 31 loop
 981                     if i = j then
 982                         v.fpscr(31 - i) := r.insn(6);
 983                     end if;
 984                 end loop;
 985                 v.instr_done := '1';
 986                 v.state := IDLE;
 987
 988             when DO_MTFSFI =>
 989                 -- mtfsfi
 990                 j := to_integer(unsigned(insn_bf(r.insn)));
 991                 if r.insn(16) = '0' then
 992                     for i in 0 to 7 loop
 993                         if i = j then
 994                             k := (7 - i) * 4;
 995                             v.fpscr(k + 3 downto k) := insn_u(r.insn);
 996                         end if;
 997                     end loop;
 998                 end if;
 999                 v.instr_done := '1';
1000                 v.state := IDLE;
1001
1002             when DO_FMRG =>
1003                 -- fmrgew, fmrgow
1004                 opsel_r <= RES_MISC;
1005                 misc_sel <= "01" & r.insn(8) & '0';
1006                 v.int_result := '1';
1007                 v.writing_back := '1';
1008                 v.instr_done := '1';
1009                 v.state := IDLE;
1010
1011             when DO_MFFS =>
1012                 v.int_result := '1';
1013                 v.writing_back := '1';
1014                 opsel_r <= RES_MISC;
1015                 case r.insn(20 downto 16) is
1016                     when "00000" =>
1017                         -- mffs
1018                     when "00001" =>
1019                         -- mffsce
1020                         v.fpscr(FPSCR_VE downto FPSCR_XE) := "00000";
1021                     when "10100" | "10101" =>
1022                         -- mffscdrn[i] (but we don't implement DRN)
1023                         fpscr_mask := x"000000FF";
1024                     when "10110" =>
1025                         -- mffscrn
1026                         fpscr_mask := x"000000FF";
1027                         v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
1028                             r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
1029                     when "10111" =>
1030                         -- mffscrni
1031                         fpscr_mask := x"000000FF";
1032                         v.fpscr(FPSCR_RN+1 downto FPSCR_RN) := r.insn(12 downto 11);
1033                     when "11000" =>
1034                         -- mffsl
1035                         fpscr_mask := x"0007F0FF";
1036                     when others =>
1037                         illegal := '1';
1038                 end case;
1039                 v.instr_done := '1';
1040                 v.state := IDLE;
1041
1042             when DO_MTFSF =>
1043                 if r.insn(25) = '1' then
1044                     flm := x"FF";
1045                 elsif r.insn(16) = '1' then
1046                     flm := x"00";
1047                 else
1048                     flm := r.insn(24 downto 17);
1049                 end if;
1050                 for i in 0 to 7 loop
1051                     k := i * 4;
1052                     if flm(i) = '1' then
1053                         v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
1054                     end if;
1055                 end loop;
1056                 v.instr_done := '1';
1057                 v.state := IDLE;
1058
1059             when DO_FMR =>
1060                 -- r.opsel_a = AIN_B
1061                 v.result_class := r.b.class;
1062                 v.result_exp := r.b.exponent;
1063                 v.quieten_nan := '0';
1064                 if r.insn(9) = '1' then
1065                     v.result_sign := '0';              -- fabs
1066                 elsif r.insn(8) = '1' then
1067                     v.result_sign := '1';              -- fnabs
1068                 elsif r.insn(7) = '1' then
1069                     v.result_sign := r.b.negative;     -- fmr
1070                 elsif r.insn(6) = '1' then
1071                     v.result_sign := not r.b.negative; -- fneg
1072                 else
1073                     v.result_sign := r.a.negative;     -- fcpsgn
1074                 end if;
1075                 v.writing_back := '1';
1076                 v.instr_done := '1';
1077                 v.state := IDLE;
1078
1079             when DO_FRI =>    -- fri[nzpm]
1080                 -- r.opsel_a = AIN_B
1081                 v.result_class := r.b.class;
1082                 v.result_sign := r.b.negative;
1083                 v.result_exp := r.b.exponent;
1084                 v.fpscr(FPSCR_FR) := '0';
1085                 v.fpscr(FPSCR_FI) := '0';
1086                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1087                     -- Signalling NAN
1088                     v.fpscr(FPSCR_VXSNAN) := '1';
1089                     invalid := '1';
1090                 end if;
1091                 if r.b.class = FINITE then
1092                     if r.b.exponent >= to_signed(52, EXP_BITS) then
1093                         -- integer already, no rounding required
1094                         arith_done := '1';
1095                     else
1096                         v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1097                         v.state := FRI_1;
1098                         v.round_mode := '1' & r.insn(7 downto 6);
1099                     end if;
1100                 else
1101                     arith_done := '1';
1102                 end if;
1103
1104             when DO_FRSP =>
1105                 -- r.opsel_a = AIN_B, r.shift = 0
1106                 v.result_class := r.b.class;
1107                 v.result_sign := r.b.negative;
1108                 v.result_exp := r.b.exponent;
1109                 v.fpscr(FPSCR_FR) := '0';
1110                 v.fpscr(FPSCR_FI) := '0';
1111                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1112                     -- Signalling NAN
1113                     v.fpscr(FPSCR_VXSNAN) := '1';
1114                     invalid := '1';
1115                 end if;
1116                 set_x := '1';
1117                 if r.b.class = FINITE then
1118                     if r.b.exponent < to_signed(-126, EXP_BITS) then
1119                         v.shift := r.b.exponent - to_signed(-126, EXP_BITS);
1120                         v.state := ROUND_UFLOW;
1121                     elsif r.b.exponent > to_signed(127, EXP_BITS) then
1122                         v.state := ROUND_OFLOW;
1123                     else
1124                         v.state := ROUNDING;
1125                     end if;
1126                 else
1127                     arith_done := '1';
1128                 end if;
1129
1130             when DO_FCTI =>
1131                 -- instr bit 9: 1=dword 0=word
1132                 -- instr bit 8: 1=unsigned 0=signed
1133                 -- instr bit 1: 1=round to zero 0=use fpscr[RN]
1134                 -- r.opsel_a = AIN_B
1135                 v.result_class := r.b.class;
1136                 v.result_sign := r.b.negative;
1137                 v.result_exp := r.b.exponent;
1138                 v.fpscr(FPSCR_FR) := '0';
1139                 v.fpscr(FPSCR_FI) := '0';
1140                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1141                     -- Signalling NAN
1142                     v.fpscr(FPSCR_VXSNAN) := '1';
1143                     invalid := '1';
1144                 end if;
1145
1146                 v.int_result := '1';
1147                 case r.b.class is
1148                     when ZERO =>
1149                         arith_done := '1';
1150                     when FINITE =>
1151                         if r.b.exponent >= to_signed(64, EXP_BITS) or
1152                             (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
1153                             v.state := INT_OFLOW;
1154                         elsif r.b.exponent >= to_signed(52, EXP_BITS) then
1155                             -- integer already, no rounding required,
1156                             -- shift into final position
1157                             v.shift := r.b.exponent - to_signed(54, EXP_BITS);
1158                             if r.insn(8) = '1' and r.b.negative = '1' then
1159                                 v.state := INT_OFLOW;
1160                             else
1161                                 v.state := INT_ISHIFT;
1162                             end if;
1163                         else
1164                             v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1165                             v.state := INT_SHIFT;
1166                         end if;
1167                     when INFINITY | NAN =>
1168                         v.state := INT_OFLOW;
1169                 end case;
1170
1171             when DO_FCFID =>
1172                 -- r.opsel_a = AIN_B
1173                 v.result_sign := '0';
1174                 if r.insn(8) = '0' and r.b.negative = '1' then
1175                     -- fcfid[s] with negative operand, set R = -B
1176                     opsel_ainv <= '1';
1177                     carry_in <= '1';
1178                     v.result_sign := '1';
1179                 end if;
1180                 v.result_class := r.b.class;
1181                 v.result_exp := to_signed(54, EXP_BITS);
1182                 v.fpscr(FPSCR_FR) := '0';
1183                 v.fpscr(FPSCR_FI) := '0';
1184                 if r.b.class = ZERO then
1185                     arith_done := '1';
1186                 else
1187                     v.state := FINISH;
1188                 end if;
1189
1190             when DO_FADD =>
1191                 -- fadd[s] and fsub[s]
1192                 -- r.opsel_a = AIN_A
1193                 v.result_sign := r.a.negative;
1194                 v.result_class := r.a.class;
1195                 v.result_exp := r.a.exponent;
1196                 v.fpscr(FPSCR_FR) := '0';
1197                 v.fpscr(FPSCR_FI) := '0';
1198                 v.use_a := '1';
1199                 v.use_b := '1';
1200                 is_add := r.a.negative xor r.b.negative xor r.insn(1);
1201                 if r.a.class = FINITE and r.b.class = FINITE then
1202                     v.is_subtract := not is_add;
1203                     v.add_bsmall := r.exp_cmp;
1204                     v.opsel_a := AIN_B;
1205                     if r.exp_cmp = '0' then
1206                         v.shift := r.a.exponent - r.b.exponent;
1207                         v.result_sign := r.b.negative xnor r.insn(1);
1208                         if r.a.exponent = r.b.exponent then
1209                             v.state := ADD_2;
1210                         else
1211                             v.longmask := '0';
1212                             v.state := ADD_SHIFT;
1213                         end if;
1214                     else
1215                         v.state := ADD_1;
1216                     end if;
1217                 else
1218                     if r.a.class = NAN or r.b.class = NAN then
1219                         v.state := NAN_RESULT;
1220                     elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then
1221                         -- invalid operation, construct QNaN
1222                         v.fpscr(FPSCR_VXISI) := '1';
1223                         qnan_result := '1';
1224                         arith_done := '1';
1225                     elsif r.a.class = ZERO and r.b.class = ZERO and is_add = '0' then
1226                         -- return -0 for rounding to -infinity
1227                         v.result_sign := r.round_mode(1) and r.round_mode(0);
1228                         arith_done := '1';
1229                     elsif r.a.class = INFINITY or r.b.class = ZERO then
1230                         -- result is A
1231                         v.opsel_a := AIN_A;
1232                         v.state := EXC_RESULT;
1233                     else
1234                         -- result is +/- B
1235                         v.opsel_a := AIN_B;
1236                         v.negate := not r.insn(1);
1237                         v.state := EXC_RESULT;
1238                     end if;
1239                 end if;
1240
1241             when DO_FMUL =>
1242                 -- fmul[s]
1243                 -- r.opsel_a = AIN_A unless C is denorm and A isn't
1244                 v.result_sign := r.a.negative xor r.c.negative;
1245                 v.result_class := r.a.class;
1246                 v.fpscr(FPSCR_FR) := '0';
1247                 v.fpscr(FPSCR_FI) := '0';
1248                 v.use_a := '1';
1249                 v.use_c := '1';
1250                 if r.a.class = FINITE and r.c.class = FINITE then
1251                     v.result_exp := r.a.exponent + r.c.exponent;
1252                     -- Renormalize denorm operands
1253                     if r.a.mantissa(54) = '0' then
1254                         v.state := RENORM_A;
1255                     elsif r.c.mantissa(54) = '0' then
1256                         v.state := RENORM_C;
1257                     else
1258                         f_to_multiply.valid <= '1';
1259                         v.state := MULT_1;
1260                     end if;
1261                 else
1262                     if r.a.class = NAN or r.c.class = NAN then
1263                         v.state := NAN_RESULT;
1264                     elsif (r.a.class = INFINITY and r.c.class = ZERO) or
1265                         (r.a.class = ZERO and r.c.class = INFINITY) then
1266                         -- invalid operation, construct QNaN
1267                         v.fpscr(FPSCR_VXIMZ) := '1';
1268                         qnan_result := '1';
1269                     elsif r.a.class = ZERO or r.a.class = INFINITY then
1270                         -- result is +/- A
1271                         arith_done := '1';
1272                     else
1273                         -- r.c.class is ZERO or INFINITY
1274                         v.opsel_a := AIN_C;
1275                         v.negate := r.a.negative;
1276                         v.state := EXC_RESULT;
1277                     end if;
1278                 end if;
1279
1280             when DO_FDIV =>
1281                 -- r.opsel_a = AIN_A unless B is denorm and A isn't
1282                 v.result_class := r.a.class;
1283                 v.fpscr(FPSCR_FR) := '0';
1284                 v.fpscr(FPSCR_FI) := '0';
1285                 v.use_a := '1';
1286                 v.use_b := '1';
1287                 v.result_sign := r.a.negative xor r.b.negative;
1288                 v.result_exp := r.a.exponent - r.b.exponent;
1289                 v.count := "00";
1290                 if r.a.class = FINITE and r.b.class = FINITE then
1291                     -- Renormalize denorm operands
1292                     if r.a.mantissa(54) = '0' then
1293                         v.state := RENORM_A;
1294                     elsif r.b.mantissa(54) = '0' then
1295                         v.state := RENORM_B;
1296                     else
1297                         v.first := '1';
1298                         v.state := DIV_2;
1299                     end if;
1300                 else
1301                     if r.a.class = NAN or r.b.class = NAN then
1302                         v.state := NAN_RESULT;
1303                     elsif r.b.class = INFINITY then
1304                         if r.a.class = INFINITY then
1305                             v.fpscr(FPSCR_VXIDI) := '1';
1306                             qnan_result := '1';
1307                         else
1308                             v.result_class := ZERO;
1309                         end if;
1310                         arith_done := '1';
1311                     elsif r.b.class = ZERO then
1312                         if r.a.class = ZERO then
1313                             v.fpscr(FPSCR_VXZDZ) := '1';
1314                             qnan_result := '1';
1315                         else
1316                             if r.a.class = FINITE then
1317                                 zero_divide := '1';
1318                             end if;
1319                             v.result_class := INFINITY;
1320                         end if;
1321                         arith_done := '1';
1322                     else -- r.b.class = FINITE, result_class = r.a.class
1323                         arith_done := '1';
1324                     end if;
1325                 end if;
1326
1327             when DO_FSEL =>
1328                 v.fpscr(FPSCR_FR) := '0';
1329                 v.fpscr(FPSCR_FI) := '0';
1330                 if r.a.class = ZERO or (r.a.negative = '0' and r.a.class /= NAN) then
1331                     v.opsel_a := AIN_C;
1332                 else
1333                     v.opsel_a := AIN_B;
1334                 end if;
1335                 v.quieten_nan := '0';
1336                 v.state := EXC_RESULT;
1337
1338             when DO_FSQRT =>
1339                 -- r.opsel_a = AIN_B
1340                 v.result_class := r.b.class;
1341                 v.result_sign := r.b.negative;
1342                 v.fpscr(FPSCR_FR) := '0';
1343                 v.fpscr(FPSCR_FI) := '0';
1344                 v.use_b := '1';
1345                 case r.b.class is
1346                     when FINITE =>
1347                         v.result_exp := r.b.exponent;
1348                         if r.b.negative = '1' then
1349                             v.fpscr(FPSCR_VXSQRT) := '1';
1350                             qnan_result := '1';
1351                         elsif r.b.mantissa(54) = '0' then
1352                             v.state := RENORM_B;
1353                         elsif r.b.exponent(0) = '0' then
1354                             v.state := SQRT_1;
1355                         else
1356                             v.shift := to_signed(1, EXP_BITS);
1357                             v.state := RENORM_B2;
1358                         end if;
1359                     when NAN =>
1360                         v.state := NAN_RESULT;
1361                     when ZERO =>
1362                         -- result is B
1363                         arith_done := '1';
1364                     when INFINITY =>
1365                         if r.b.negative = '1' then
1366                             v.fpscr(FPSCR_VXSQRT) := '1';
1367                             qnan_result := '1';
1368                         -- else result is B
1369                         end if;
1370                         arith_done := '1';
1371                 end case;
1372
1373             when DO_FRE =>
1374                 -- r.opsel_a = AIN_B
1375                 v.result_class := r.b.class;
1376                 v.result_sign := r.b.negative;
1377                 v.fpscr(FPSCR_FR) := '0';
1378                 v.fpscr(FPSCR_FI) := '0';
1379                 v.use_b := '1';
1380                 case r.b.class is
1381                     when FINITE =>
1382                         v.result_exp := - r.b.exponent;
1383                         if r.b.mantissa(54) = '0' then
1384                             v.state := RENORM_B;
1385                         else
1386                             v.state := FRE_1;
1387                         end if;
1388                     when NAN =>
1389                         v.state := NAN_RESULT;
1390                     when INFINITY =>
1391                         v.result_class := ZERO;
1392                         arith_done := '1';
1393                     when ZERO =>
1394                         v.result_class := INFINITY;
1395                         zero_divide := '1';
1396                         arith_done := '1';
1397                 end case;
1398
1399             when DO_FRSQRTE =>
1400                 -- r.opsel_a = AIN_B
1401                 v.result_class := r.b.class;
1402                 v.result_sign := r.b.negative;
1403                 v.fpscr(FPSCR_FR) := '0';
1404                 v.fpscr(FPSCR_FI) := '0';
1405                 v.use_b := '1';
1406                 v.shift := to_signed(1, EXP_BITS);
1407                 case r.b.class is
1408                     when FINITE =>
1409                         v.result_exp := r.b.exponent;
1410                         if r.b.negative = '1' then
1411                             v.fpscr(FPSCR_VXSQRT) := '1';
1412                             qnan_result := '1';
1413                         elsif r.b.mantissa(54) = '0' then
1414                             v.state := RENORM_B;
1415                         elsif r.b.exponent(0) = '0' then
1416                             v.state := RSQRT_1;
1417                         else
1418                             v.state := RENORM_B2;
1419                         end if;
1420                     when NAN =>
1421                         v.state := NAN_RESULT;
1422                     when INFINITY =>
1423                         if r.b.negative = '1' then
1424                             v.fpscr(FPSCR_VXSQRT) := '1';
1425                             qnan_result := '1';
1426                         else
1427                             v.result_class := ZERO;
1428                         end if;
1429                         arith_done := '1';
1430                     when ZERO =>
1431                         v.result_class := INFINITY;
1432                         zero_divide := '1';
1433                         arith_done := '1';
1434                 end case;
1435
1436             when DO_FMADD =>
1437                 -- fmadd, fmsub, fnmadd, fnmsub
1438                 -- r.opsel_a = AIN_A if A is denorm, else AIN_C if C is denorm,
1439                 -- else AIN_B
1440                 v.result_sign := r.a.negative;
1441                 v.result_class := r.a.class;
1442                 v.result_exp := r.a.exponent;
1443                 v.fpscr(FPSCR_FR) := '0';
1444                 v.fpscr(FPSCR_FI) := '0';
1445                 v.use_a := '1';
1446                 v.use_b := '1';
1447                 v.use_c := '1';
1448                 is_add := r.a.negative xor r.c.negative xor r.b.negative xor r.insn(1);
1449                 if r.a.class = FINITE and r.c.class = FINITE and
1450                     (r.b.class = FINITE or r.b.class = ZERO) then
1451                     v.is_subtract := not is_add;
1452                     mulexp := r.a.exponent + r.c.exponent;
1453                     v.result_exp := mulexp;
1454                     -- Make sure A and C are normalized
1455                     if r.a.mantissa(54) = '0' then
1456                         v.state := RENORM_A;
1457                     elsif r.c.mantissa(54) = '0' then
1458                         v.state := RENORM_C;
1459                     elsif r.b.class = ZERO then
1460                         -- no addend, degenerates to multiply
1461                         v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1462                         f_to_multiply.valid <= '1';
1463                         v.is_multiply := '1';
1464                         v.state := MULT_1;
1465                     elsif r.madd_cmp = '0' then
1466                         -- addend is bigger, do multiply first
1467                         v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1468                         f_to_multiply.valid <= '1';
1469                         v.state := FMADD_1;
1470                     else
1471                         -- product is bigger, shift B right and use it as the
1472                         -- addend to the multiplier
1473                         v.shift := r.b.exponent - mulexp + to_signed(64, EXP_BITS);
1474                         -- for subtract, multiplier does B - A * C
1475                         v.result_sign := not (r.a.negative xor r.c.negative xor r.insn(2) xor is_add);
1476                         v.result_exp := r.b.exponent;
1477                         v.state := FMADD_2;
1478                     end if;
1479                 else
1480                     if r.a.class = NAN or r.b.class = NAN or r.c.class = NAN then
1481                         v.state := NAN_RESULT;
1482                     elsif (r.a.class = ZERO and r.c.class = INFINITY) or
1483                         (r.a.class = INFINITY and r.c.class = ZERO) then
1484                         -- invalid operation, construct QNaN
1485                         v.fpscr(FPSCR_VXIMZ) := '1';
1486                         qnan_result := '1';
1487                     elsif r.a.class = INFINITY or r.c.class = INFINITY then
1488                         if r.b.class = INFINITY and is_add = '0' then
1489                             -- invalid operation, construct QNaN
1490                             v.fpscr(FPSCR_VXISI) := '1';
1491                             qnan_result := '1';
1492                         else
1493                             -- result is infinity
1494                             v.result_class := INFINITY;
1495                             v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1496                             arith_done := '1';
1497                         end if;
1498                     else
1499                         -- Here A is zero, C is zero, or B is infinity
1500                         -- Result is +/-B in all of those cases
1501                         v.opsel_a := AIN_B;
1502                         if r.b.class /= ZERO or is_add = '1' then
1503                             v.negate := not (r.insn(1) xor r.insn(2));
1504                         else
1505                             -- have to be careful about rule for 0 - 0 result sign
1506                             v.negate := r.b.negative xor (r.round_mode(1) and r.round_mode(0)) xor r.insn(2);
1507                         end if;
1508                         v.state := EXC_RESULT;
1509                     end if;
1510                 end if;
1511
1512             when RENORM_A =>
1513                 renormalize := '1';
1514                 v.state := RENORM_A2;
1515                 if r.insn(4) = '1' then
1516                     v.opsel_a := AIN_C;
1517                 else
1518                     v.opsel_a := AIN_B;
1519                 end if;
1520
1521             when RENORM_A2 =>
1522                 -- r.opsel_a = AIN_C for fmul/fmadd, AIN_B for fdiv
1523                 set_a := '1';
1524                 v.result_exp := new_exp;
1525                 if r.insn(4) = '1' then
1526                     if r.c.mantissa(54) = '1' then
1527                         if r.insn(3) = '0' or r.b.class = ZERO then
1528                             v.first := '1';
1529                             v.state := MULT_1;
1530                         else
1531                             v.madd_cmp := '0';
1532                             if new_exp + 1 >= r.b.exponent then
1533                                 v.madd_cmp := '1';
1534                             end if;
1535                             v.opsel_a := AIN_B;
1536                             v.state := DO_FMADD;
1537                         end if;
1538                     else
1539                         v.state := RENORM_C;
1540                     end if;
1541                 else
1542                     if r.b.mantissa(54) = '1' then
1543                         v.first := '1';
1544                         v.state := DIV_2;
1545                     else
1546                         v.state := RENORM_B;
1547                     end if;
1548                 end if;
1549
1550             when RENORM_B =>
1551                 renormalize := '1';
1552                 renorm_sqrt := r.is_sqrt;
1553                 v.state := RENORM_B2;
1554
1555             when RENORM_B2 =>
1556                 set_b := '1';
1557                 if r.is_sqrt = '0' then
1558                     v.result_exp := r.result_exp + r.shift;
1559                 else
1560                     v.result_exp := new_exp;
1561                 end if;
1562                 v.opsel_a := AIN_B;
1563                 v.state := LOOKUP;
1564
1565             when RENORM_C =>
1566                 renormalize := '1';
1567                 v.state := RENORM_C2;
1568
1569             when RENORM_C2 =>
1570                 set_c := '1';
1571                 v.result_exp := new_exp;
1572                 if r.insn(3) = '0' or r.b.class = ZERO then
1573                     v.first := '1';
1574                     v.state := MULT_1;
1575                 else
1576                     v.madd_cmp := '0';
1577                     if new_exp + 1 >= r.b.exponent then
1578                         v.madd_cmp := '1';
1579                     end if;
1580                     v.opsel_a := AIN_B;
1581                     v.state := DO_FMADD;
1582                 end if;
1583
1584             when ADD_1 =>
1585                 -- transferring B to R
1586                 v.shift := r.b.exponent - r.a.exponent;
1587                 v.result_exp := r.b.exponent;
1588                 v.longmask := '0';
1589                 v.state := ADD_SHIFT;
1590
1591             when ADD_SHIFT =>
1592                 -- r.shift = - exponent difference, r.longmask = 0
1593                 opsel_r <= RES_SHIFT;
1594                 v.x := s_nz;
1595                 set_x := '1';
1596                 v.longmask := r.single_prec;
1597                 if r.add_bsmall = '1' then
1598                     v.opsel_a := AIN_A;
1599                 else
1600                     v.opsel_a := AIN_B;
1601                 end if;
1602                 v.state := ADD_2;
1603
1604             when ADD_2 =>
1605                 -- r.opsel_a = AIN_A if r.add_bsmall = 1 else AIN_B
1606                 opsel_b <= BIN_R;
1607                 opsel_binv <= r.is_subtract;
1608                 carry_in <= r.is_subtract and not r.x;
1609                 v.shift := to_signed(-1, EXP_BITS);
1610                 v.state := ADD_3;
1611
1612             when ADD_3 =>
1613                 -- check for overflow or negative result (can't get both)
1614                 -- r.shift = -1
1615                 if r.r(63) = '1' then
1616                     -- result is opposite sign to expected
1617                     v.result_sign := not r.result_sign;
1618                     opsel_ainv <= '1';
1619                     carry_in <= '1';
1620                     v.state := FINISH;
1621                 elsif r.r(55) = '1' then
1622                     -- sum overflowed, shift right
1623                     opsel_r <= RES_SHIFT;
1624                     set_x := '1';
1625                     if exp_huge = '1' then
1626                         v.state := ROUND_OFLOW;
1627                     else
1628                         v.state := ROUNDING;
1629                     end if;
1630                 elsif r.r(54) = '1' then
1631                     set_x := '1';
1632                     v.state := ROUNDING;
1633                 elsif (r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1634                     -- r.x must be zero at this point
1635                     v.result_class := ZERO;
1636                     if r.is_subtract = '1' then
1637                         -- set result sign depending on rounding mode
1638                         v.result_sign := r.round_mode(1) and r.round_mode(0);
1639                     end if;
1640                     arith_done := '1';
1641                 else
1642                     renormalize := '1';
1643                     v.state := NORMALIZE;
1644                 end if;
1645
1646             when CMP_1 =>
1647                 -- r.opsel_a = AIN_A
1648                 opsel_b <= BIN_R;
1649                 opsel_binv <= '1';
1650                 carry_in <= '1';
1651                 v.state := CMP_2;
1652
1653             when CMP_2 =>
1654                 if r.r(63) = '1' then
1655                     -- A is smaller in magnitude
1656                     v.cr_result := not r.a.negative & r.a.negative & "00";
1657                 elsif (r_hi_nz or r_lo_nz) = '0' then
1658                     v.cr_result := "0010";
1659                 else
1660                     v.cr_result := r.a.negative & not r.a.negative & "00";
1661                 end if;
1662                 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
1663                 v.instr_done := '1';
1664                 v.state := IDLE;
1665
1666             when MULT_1 =>
1667                 f_to_multiply.valid <= r.first;
1668                 opsel_r <= RES_MULT;
1669                 if multiply_to_f.valid = '1' then
1670                     v.state := FINISH;
1671                 end if;
1672
1673             when FMADD_1 =>
1674                 -- Addend is bigger here
1675                 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1676                 -- note v.shift is at most -2 here
1677                 v.shift := r.result_exp - r.b.exponent;
1678                 opsel_r <= RES_MULT;
1679                 opsel_s <= S_MULT;
1680                 set_s := '1';
1681                 f_to_multiply.valid <= r.first;
1682                 if multiply_to_f.valid = '1' then
1683                     v.longmask := '0';
1684                     v.state := ADD_SHIFT;
1685                 end if;
1686
1687             when FMADD_2 =>
1688                 -- Product is potentially bigger here
1689                 -- r.shift = addend exp - product exp + 64, r.r = r.b.mantissa
1690                 set_s := '1';
1691                 opsel_s <= S_SHIFT;
1692                 v.shift := r.shift - to_signed(64, EXP_BITS);
1693                 v.state := FMADD_3;
1694
1695             when FMADD_3 =>
1696                 -- r.shift = addend exp - product exp
1697                 opsel_r <= RES_SHIFT;
1698                 v.first := '1';
1699                 v.state := FMADD_4;
1700
1701             when FMADD_4 =>
1702                 msel_add <= MULADD_RS;
1703                 f_to_multiply.valid <= r.first;
1704                 msel_inv <= r.is_subtract;
1705                 opsel_r <= RES_MULT;
1706                 opsel_s <= S_MULT;
1707                 set_s := '1';
1708                 if multiply_to_f.valid = '1' then
1709                     v.state := FMADD_5;
1710                 end if;
1711
1712             when FMADD_5 =>
1713                 -- negate R:S:X if negative
1714                 if r.r(63) = '1' then
1715                     v.result_sign := not r.result_sign;
1716                     opsel_ainv <= '1';
1717                     carry_in <= not (s_nz or r.x);
1718                     opsel_s <= S_NEG;
1719                     set_s := '1';
1720                 end if;
1721                 v.shift := to_signed(56, EXP_BITS);
1722                 v.state := FMADD_6;
1723
1724             when FMADD_6 =>
1725                 -- r.shift = 56 (or 0, but only if r is now nonzero)
1726                 if (r.r(56) or r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1727                     if s_nz = '0' then
1728                         -- must be a subtraction, and r.x must be zero
1729                         v.result_class := ZERO;
1730                         v.result_sign := r.round_mode(1) and r.round_mode(0);
1731                         arith_done := '1';
1732                     else
1733                         -- R is all zeroes but there are non-zero bits in S
1734                         -- so shift them into R and set S to 0
1735                         opsel_r <= RES_SHIFT;
1736                         set_s := '1';
1737                         -- stay in state FMADD_6
1738                     end if;
1739                 elsif r.r(56 downto 54) = "001" then
1740                     v.state := FINISH;
1741                 else
1742                     renormalize := '1';
1743                     v.state := NORMALIZE;
1744                 end if;
1745
1746             when LOOKUP =>
1747                 -- r.opsel_a = AIN_B
1748                 -- wait one cycle for inverse_table[B] lookup
1749                 v.first := '1';
1750                 if r.insn(4) = '0' then
1751                     if r.insn(3) = '0' then
1752                         v.state := DIV_2;
1753                     else
1754                         v.state := SQRT_1;
1755                     end if;
1756                 elsif r.insn(2) = '0' then
1757                     v.state := FRE_1;
1758                 else
1759                     v.state := RSQRT_1;
1760                 end if;
1761
1762             when DIV_2 =>
1763                 -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y
1764                 msel_1 <= MUL1_B;
1765                 msel_add <= MULADD_CONST;
1766                 msel_inv <= '1';
1767                 if r.count = 0 then
1768                     msel_2 <= MUL2_LUT;
1769                 else
1770                     msel_2 <= MUL2_P;
1771                 end if;
1772                 set_y := r.first;
1773                 pshift := '1';
1774                 f_to_multiply.valid <= r.first;
1775                 if multiply_to_f.valid = '1' then
1776                     v.first := '1';
1777                     v.count := r.count + 1;
1778                     v.state := DIV_3;
1779                 end if;
1780
1781             when DIV_3 =>
1782                 -- compute Y = P = P * Y
1783                 msel_1 <= MUL1_Y;
1784                 msel_2 <= MUL2_P;
1785                 f_to_multiply.valid <= r.first;
1786                 pshift := '1';
1787                 if multiply_to_f.valid = '1' then
1788                     v.first := '1';
1789                     if r.count = 3 then
1790                         v.state := DIV_4;
1791                     else
1792                         v.state := DIV_2;
1793                     end if;
1794                 end if;
1795
1796             when DIV_4 =>
1797                 -- compute R = P = A * Y (quotient)
1798                 msel_1 <= MUL1_A;
1799                 msel_2 <= MUL2_P;
1800                 set_y := r.first;
1801                 f_to_multiply.valid <= r.first;
1802                 pshift := '1';
1803                 if multiply_to_f.valid = '1' then
1804                     opsel_r <= RES_MULT;
1805                     v.first := '1';
1806                     v.state := DIV_5;
1807                 end if;
1808
1809             when DIV_5 =>
1810                 -- compute P = A - B * R (remainder)
1811                 msel_1 <= MUL1_B;
1812                 msel_2 <= MUL2_R;
1813                 msel_add <= MULADD_A;
1814                 msel_inv <= '1';
1815                 f_to_multiply.valid <= r.first;
1816                 if multiply_to_f.valid = '1' then
1817                     v.state := DIV_6;
1818                 end if;
1819
1820             when DIV_6 =>
1821                 -- test if remainder is 0 or >= B
1822                 if pcmpb_lt = '1' then
1823                     -- quotient is correct, set X if remainder non-zero
1824                     v.x := r.p(58) or px_nz;
1825                 else
1826                     -- quotient needs to be incremented by 1
1827                     carry_in <= '1';
1828                     v.x := not pcmpb_eq;
1829                 end if;
1830                 v.state := FINISH;
1831
1832             when FRE_1 =>
1833                 opsel_r <= RES_MISC;
1834                 misc_sel <= "0111";
1835                 v.shift := to_signed(1, EXP_BITS);
1836                 v.state := NORMALIZE;
1837
1838             when FTDIV_1 =>
1839                 v.cr_result(1) := exp_tiny or exp_huge;
1840                 if exp_tiny = '1' or exp_huge = '1' or r.a.class = ZERO or r.first = '0' then
1841                     v.instr_done := '1';
1842                     v.state := IDLE;
1843                 else
1844                     v.shift := r.a.exponent;
1845                     v.doing_ftdiv := "10";
1846                 end if;
1847
1848             when RSQRT_1 =>
1849                 opsel_r <= RES_MISC;
1850                 misc_sel <= "0111";
1851                 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1852                 v.result_exp := - sqrt_exp;
1853                 v.shift := to_signed(1, EXP_BITS);
1854                 v.state := NORMALIZE;
1855
1856             when SQRT_1 =>
1857                 -- put invsqr[B] in R and compute P = invsqr[B] * B
1858                 -- also transfer B (in R) to A
1859                 set_a := '1';
1860                 opsel_r <= RES_MISC;
1861                 misc_sel <= "0111";
1862                 msel_1 <= MUL1_B;
1863                 msel_2 <= MUL2_LUT;
1864                 f_to_multiply.valid <= '1';
1865                 v.shift := to_signed(-1, EXP_BITS);
1866                 v.count := "00";
1867                 v.state := SQRT_2;
1868
1869             when SQRT_2 =>
1870                 -- shift R right one place
1871                 -- not expecting multiplier result yet
1872                 -- r.shift = -1
1873                 opsel_r <= RES_SHIFT;
1874                 v.first := '1';
1875                 v.state := SQRT_3;
1876
1877             when SQRT_3 =>
1878                 -- put R into Y, wait for product from multiplier
1879                 msel_2 <= MUL2_R;
1880                 set_y := r.first;
1881                 pshift := '1';
1882                 if multiply_to_f.valid = '1' then
1883                     -- put result into R
1884                     opsel_r <= RES_MULT;
1885                     v.first := '1';
1886                     v.state := SQRT_4;
1887                 end if;
1888
1889             when SQRT_4 =>
1890                 -- compute 1.5 - Y * P
1891                 msel_1 <= MUL1_Y;
1892                 msel_2 <= MUL2_P;
1893                 msel_add <= MULADD_CONST;
1894                 msel_inv <= '1';
1895                 f_to_multiply.valid <= r.first;
1896                 pshift := '1';
1897                 if multiply_to_f.valid = '1' then
1898                     v.state := SQRT_5;
1899                 end if;
1900
1901             when SQRT_5 =>
1902                 -- compute Y = Y * P
1903                 msel_1 <= MUL1_Y;
1904                 msel_2 <= MUL2_P;
1905                 f_to_multiply.valid <= '1';
1906                 v.first := '1';
1907                 v.state := SQRT_6;
1908
1909             when SQRT_6 =>
1910                 -- pipeline in R = R * P
1911                 msel_1 <= MUL1_R;
1912                 msel_2 <= MUL2_P;
1913                 f_to_multiply.valid <= r.first;
1914                 pshift := '1';
1915                 if multiply_to_f.valid = '1' then
1916                     v.first := '1';
1917                     v.state := SQRT_7;
1918                 end if;
1919
1920             when SQRT_7 =>
1921                 -- first multiply is done, put result in Y
1922                 msel_2 <= MUL2_P;
1923                 set_y := r.first;
1924                 -- wait for second multiply (should be here already)
1925                 pshift := '1';
1926                 if multiply_to_f.valid = '1' then
1927                     -- put result into R
1928                     opsel_r <= RES_MULT;
1929                     v.first := '1';
1930                     v.count := r.count + 1;
1931                     if r.count < 2 then
1932                         v.state := SQRT_4;
1933                     else
1934                         v.first := '1';
1935                         v.state := SQRT_8;
1936                     end if;
1937                 end if;
1938
1939             when SQRT_8 =>
1940                 -- compute P = A - R * R, which can be +ve or -ve
1941                 -- we arranged for B to be put into A earlier
1942                 msel_1 <= MUL1_R;
1943                 msel_2 <= MUL2_R;
1944                 msel_add <= MULADD_A;
1945                 msel_inv <= '1';
1946                 pshift := '1';
1947                 f_to_multiply.valid <= r.first;
1948                 if multiply_to_f.valid = '1' then
1949                     v.first := '1';
1950                     v.state := SQRT_9;
1951                 end if;
1952
1953             when SQRT_9 =>
1954                 -- compute P = P * Y
1955                 -- since Y is an estimate of 1/sqrt(B), this makes P an
1956                 -- estimate of the adjustment needed to R.  Since the error
1957                 -- could be negative and we have an unsigned multiplier, the
1958                 -- upper bits can be wrong, but it turns out the lowest 8 bits
1959                 -- are correct and are all we need (given 3 iterations through
1960                 -- SQRT_4 to SQRT_7).
1961                 msel_1 <= MUL1_Y;
1962                 msel_2 <= MUL2_P;
1963                 pshift := '1';
1964                 f_to_multiply.valid <= r.first;
1965                 if multiply_to_f.valid = '1' then
1966                     v.state := SQRT_10;
1967                 end if;
1968
1969             when SQRT_10 =>
1970                 -- Add the bottom 8 bits of P, sign-extended,
1971                 -- divided by 4, onto R.
1972                 -- The division by 4 is because R is 10.54 format
1973                 -- whereas P is 8.56 format.
1974                 opsel_b <= BIN_PS6;
1975                 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1976                 v.result_exp := sqrt_exp;
1977                 v.shift := to_signed(1, EXP_BITS);
1978                 v.first := '1';
1979                 v.state := SQRT_11;
1980
1981             when SQRT_11 =>
1982                 -- compute P = A - R * R (remainder)
1983                 -- also put 2 * R + 1 into B for comparison with P
1984                 msel_1 <= MUL1_R;
1985                 msel_2 <= MUL2_R;
1986                 msel_add <= MULADD_A;
1987                 msel_inv <= '1';
1988                 f_to_multiply.valid <= r.first;
1989                 shiftin := '1';
1990                 set_b := r.first;
1991                 if multiply_to_f.valid = '1' then
1992                     v.state := SQRT_12;
1993                 end if;
1994
1995             when SQRT_12 =>
1996                 -- test if remainder is 0 or >= B = 2*R + 1
1997                 if pcmpb_lt = '1' then
1998                     -- square root is correct, set X if remainder non-zero
1999                     v.x := r.p(58) or px_nz;
2000                 else
2001                     -- square root needs to be incremented by 1
2002                     carry_in <= '1';
2003                     v.x := not pcmpb_eq;
2004                 end if;
2005                 v.state := FINISH;
2006
2007             when INT_SHIFT =>
2008                 -- r.shift = b.exponent - 52
2009                 opsel_r <= RES_SHIFT;
2010                 set_x := '1';
2011                 v.state := INT_ROUND;
2012                 v.shift := to_signed(-2, EXP_BITS);
2013
2014             when INT_ROUND =>
2015                 -- r.shift = -2
2016                 opsel_r <= RES_SHIFT;
2017                 round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
2018                 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2019                 -- Check for negative values that don't round to 0 for fcti*u*
2020                 if r.insn(8) = '1' and r.result_sign = '1' and
2021                     (r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then
2022                     v.state := INT_OFLOW;
2023                 else
2024                     v.state := INT_FINAL;
2025                 end if;
2026
2027             when INT_ISHIFT =>
2028                 -- r.shift = b.exponent - 54;
2029                 opsel_r <= RES_SHIFT;
2030                 v.state := INT_FINAL;
2031
2032             when INT_FINAL =>
2033                 -- Negate if necessary, and increment for rounding if needed
2034                 opsel_ainv <= r.result_sign;
2035                 carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign;
2036                 -- Check for possible overflows
2037                 case r.insn(9 downto 8) is
2038                     when "00" =>        -- fctiw[z]
2039                         need_check := r.r(31) or (r.r(30) and not r.result_sign);
2040                     when "01" =>        -- fctiwu[z]
2041                         need_check := r.r(31);
2042                     when "10" =>        -- fctid[z]
2043                         need_check := r.r(63) or (r.r(62) and not r.result_sign);
2044                     when others =>      -- fctidu[z]
2045                         need_check := r.r(63);
2046                 end case;
2047                 if need_check = '1' then
2048                     v.state := INT_CHECK;
2049                 else
2050                     if r.fpscr(FPSCR_FI) = '1' then
2051                         v.fpscr(FPSCR_XX) := '1';
2052                     end if;
2053                     arith_done := '1';
2054                 end if;
2055
2056             when INT_CHECK =>
2057                 if r.insn(9) = '0' then
2058                     msb := r.r(31);
2059                 else
2060                     msb := r.r(63);
2061                 end if;
2062                 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2063                 if (r.insn(8) = '0' and msb /= r.result_sign) or
2064                     (r.insn(8) = '1' and msb /= '1') then
2065                     opsel_r <= RES_MISC;
2066                     v.fpscr(FPSCR_VXCVI) := '1';
2067                     invalid := '1';
2068                 else
2069                     if r.fpscr(FPSCR_FI) = '1' then
2070                         v.fpscr(FPSCR_XX) := '1';
2071                     end if;
2072                 end if;
2073                 arith_done := '1';
2074
2075             when INT_OFLOW =>
2076                 opsel_r <= RES_MISC;
2077                 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2078                 if r.b.class = NAN then
2079                     misc_sel(0) <= '1';
2080                 end if;
2081                 v.fpscr(FPSCR_VXCVI) := '1';
2082                 invalid := '1';
2083                 arith_done := '1';
2084
2085             when FRI_1 =>
2086                 -- r.shift = b.exponent - 52
2087                 opsel_r <= RES_SHIFT;
2088                 set_x := '1';
2089                 v.state := ROUNDING;
2090
2091             when FINISH =>
2092                 if r.is_multiply = '1' and px_nz = '1' then
2093                     v.x := '1';
2094                 end if;
2095                 if r.r(63 downto 54) /= "0000000001" then
2096                     renormalize := '1';
2097                     v.state := NORMALIZE;
2098                 else
2099                     set_x := '1';
2100                     if exp_tiny = '1' then
2101                         v.shift := new_exp - min_exp;
2102                         v.state := ROUND_UFLOW;
2103                     elsif exp_huge = '1' then
2104                         v.state := ROUND_OFLOW;
2105                     else
2106                         v.state := ROUNDING;
2107                     end if;
2108                 end if;
2109
2110             when NORMALIZE =>
2111                 -- Shift so we have 9 leading zeroes (we know R is non-zero)
2112                 -- r.shift = clz(r.r) - 9
2113                 opsel_r <= RES_SHIFT;
2114                 set_x := '1';
2115                 if exp_tiny = '1' then
2116                     v.shift := new_exp - min_exp;
2117                     v.state := ROUND_UFLOW;
2118                 elsif exp_huge = '1' then
2119                     v.state := ROUND_OFLOW;
2120                 else
2121                     v.state := ROUNDING;
2122                 end if;
2123
2124             when ROUND_UFLOW =>
2125                 -- r.shift = - amount by which exponent underflows
2126                 v.tiny := '1';
2127                 if r.fpscr(FPSCR_UE) = '0' then
2128                     -- disabled underflow exception case
2129                     -- have to denormalize before rounding
2130                     opsel_r <= RES_SHIFT;
2131                     set_x := '1';
2132                     v.state := ROUNDING;
2133                 else
2134                     -- enabled underflow exception case
2135                     -- if denormalized, have to normalize before rounding
2136                     v.fpscr(FPSCR_UX) := '1';
2137                     v.result_exp := r.result_exp + bias_exp;
2138                     if r.r(54) = '0' then
2139                         renormalize := '1';
2140                         v.state := NORMALIZE;
2141                     else
2142                         v.state := ROUNDING;
2143                     end if;
2144                 end if;
2145
2146             when ROUND_OFLOW =>
2147                 v.fpscr(FPSCR_OX) := '1';
2148                 if r.fpscr(FPSCR_OE) = '0' then
2149                     -- disabled overflow exception
2150                     -- result depends on rounding mode
2151                     v.fpscr(FPSCR_XX) := '1';
2152                     v.fpscr(FPSCR_FI) := '1';
2153                     if r.round_mode(1 downto 0) = "00" or
2154                         (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then
2155                         v.result_class := INFINITY;
2156                         v.fpscr(FPSCR_FR) := '1';
2157                     else
2158                         v.fpscr(FPSCR_FR) := '0';
2159                     end if;
2160                     -- construct largest representable number
2161                     v.result_exp := max_exp;
2162                     opsel_r <= RES_MISC;
2163                     misc_sel <= "001" & r.single_prec;
2164                     arith_done := '1';
2165                 else
2166                     -- enabled overflow exception
2167                     v.result_exp := r.result_exp - bias_exp;
2168                     v.state := ROUNDING;
2169                 end if;
2170
2171             when ROUNDING =>
2172                 opsel_mask <= '1';
2173                 round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
2174                 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2175                 if round(1) = '1' then
2176                     -- increment the LSB for the precision
2177                     opsel_b <= BIN_RND;
2178                     v.shift := to_signed(-1, EXP_BITS);
2179                     v.state := ROUNDING_2;
2180                 else
2181                     if r.r(54) = '0' then
2182                         -- result after masking could be zero, or could be a
2183                         -- denormalized result that needs to be renormalized
2184                         renormalize := '1';
2185                         v.state := ROUNDING_3;
2186                     else
2187                         arith_done := '1';
2188                     end if;
2189                 end if;
2190                 if round(0) = '1' then
2191                     v.fpscr(FPSCR_XX) := '1';
2192                     if r.tiny = '1' then
2193                         v.fpscr(FPSCR_UX) := '1';
2194                     end if;
2195                 end if;
2196
2197             when ROUNDING_2 =>
2198                 -- Check for overflow during rounding
2199                 -- r.shift = -1
2200                 v.x := '0';
2201                 if r.r(55) = '1' then
2202                     opsel_r <= RES_SHIFT;
2203                     if exp_huge = '1' then
2204                         v.state := ROUND_OFLOW;
2205                     else
2206                         arith_done := '1';
2207                     end if;
2208                 elsif r.r(54) = '0' then
2209                     -- Do CLZ so we can renormalize the result
2210                     renormalize := '1';
2211                     v.state := ROUNDING_3;
2212                 else
2213                     arith_done := '1';
2214                 end if;
2215
2216             when ROUNDING_3 =>
2217                 -- r.shift = clz(r.r) - 9
2218                 mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
2219                 if mant_nz = '0' then
2220                     v.result_class := ZERO;
2221                     if r.is_subtract = '1' then
2222                         -- set result sign depending on rounding mode
2223                         v.result_sign := r.round_mode(1) and r.round_mode(0);
2224                     end if;
2225                     arith_done := '1';
2226                 else
2227                     -- Renormalize result after rounding
2228                     opsel_r <= RES_SHIFT;
2229                     v.denorm := exp_tiny;
2230                     v.shift := new_exp - to_signed(-1022, EXP_BITS);
2231                     if new_exp < to_signed(-1022, EXP_BITS) then
2232                         v.state := DENORM;
2233                     else
2234                         arith_done := '1';
2235                     end if;
2236                 end if;
2237
2238             when DENORM =>
2239                 -- r.shift = result_exp - -1022
2240                 opsel_r <= RES_SHIFT;
2241                 arith_done := '1';
2242
2243             when NAN_RESULT =>
2244                 if (r.use_a = '1' and r.a.class = NAN and r.a.mantissa(53) = '0') or
2245                     (r.use_b = '1' and r.b.class = NAN and r.b.mantissa(53) = '0') or
2246                     (r.use_c = '1' and r.c.class = NAN and r.c.mantissa(53) = '0') then
2247                     -- Signalling NAN
2248                     v.fpscr(FPSCR_VXSNAN) := '1';
2249                     invalid := '1';
2250                 end if;
2251                 if r.use_a = '1' and r.a.class = NAN then
2252                     v.opsel_a := AIN_A;
2253                 elsif r.use_b = '1' and r.b.class = NAN then
2254                     v.opsel_a := AIN_B;
2255                 elsif r.use_c = '1' and r.c.class = NAN then
2256                     v.opsel_a := AIN_C;
2257                 end if;
2258                 v.state := EXC_RESULT;
2259
2260             when EXC_RESULT =>
2261                 -- r.opsel_a = AIN_A, AIN_B or AIN_C according to which input is the result
2262                 case r.opsel_a is
2263                     when AIN_B =>
2264                         v.result_sign := r.b.negative xor r.negate;
2265                         v.result_exp := r.b.exponent;
2266                         v.result_class := r.b.class;
2267                     when AIN_C =>
2268                         v.result_sign := r.c.negative xor r.negate;
2269                         v.result_exp := r.c.exponent;
2270                         v.result_class := r.c.class;
2271                     when others =>
2272                         v.result_sign := r.a.negative xor r.negate;
2273                         v.result_exp := r.a.exponent;
2274                         v.result_class := r.a.class;
2275                 end case;
2276                 arith_done := '1';
2277
2278         end case;
2279
2280         if zero_divide = '1' then
2281             v.fpscr(FPSCR_ZX) := '1';
2282         end if;
2283         if qnan_result = '1' then
2284             invalid := '1';
2285             v.result_class := NAN;
2286             v.result_sign := '0';
2287             misc_sel <= "0001";
2288             opsel_r <= RES_MISC;
2289             arith_done := '1';
2290         end if;
2291         if invalid = '1' then
2292             v.invalid := '1';
2293         end if;
2294         if arith_done = '1' then
2295             -- Enabled invalid exception doesn't write result or FPRF
2296             -- Neither does enabled zero-divide exception
2297             if (v.invalid and r.fpscr(FPSCR_VE)) = '0' and
2298                 (zero_divide and r.fpscr(FPSCR_ZE)) = '0' then
2299                 v.writing_back := '1';
2300                 v.update_fprf := '1';
2301             end if;
2302             v.instr_done := '1';
2303             v.state := IDLE;
2304             update_fx := '1';
2305         end if;
2306
2307         -- Multiplier and divide/square root data path
2308         case msel_1 is
2309             when MUL1_A =>
2310                 f_to_multiply.data1 <= r.a.mantissa(61 downto 0) & "00";
2311             when MUL1_B =>
2312                 f_to_multiply.data1 <= r.b.mantissa(61 downto 0) & "00";
2313             when MUL1_Y =>
2314                 f_to_multiply.data1 <= r.y;
2315             when others =>
2316                 f_to_multiply.data1 <= r.r(61 downto 0) & "00";
2317         end case;
2318         case msel_2 is
2319             when MUL2_C =>
2320                 f_to_multiply.data2 <= r.c.mantissa(61 downto 0) & "00";
2321             when MUL2_LUT =>
2322                 f_to_multiply.data2 <= x"00" & inverse_est & '0' & x"000000000";
2323             when MUL2_P =>
2324                 f_to_multiply.data2 <= r.p;
2325             when others =>
2326                 f_to_multiply.data2 <= r.r(61 downto 0) & "00";
2327         end case;
2328         maddend := (others => '0');
2329         case msel_add is
2330             when MULADD_CONST =>
2331                 -- addend is 2.0 or 1.5 in 16.112 format
2332                 if r.is_sqrt = '0' then
2333                     maddend(113) := '1';                -- 2.0
2334                 else
2335                     maddend(112 downto 111) := "11";    -- 1.5
2336                 end if;
2337             when MULADD_A =>
2338                 -- addend is A in 16.112 format
2339                 maddend(121 downto 58) := r.a.mantissa;
2340             when MULADD_RS =>
2341                 -- addend is concatenation of R and S in 16.112 format
2342                 maddend := "000000" & r.r & r.s & "00";
2343             when others =>
2344         end case;
2345         if msel_inv = '1' then
2346             f_to_multiply.addend <= not maddend;
2347         else
2348             f_to_multiply.addend <= maddend;
2349         end if;
2350         f_to_multiply.not_result <= msel_inv;
2351         if set_y = '1' then
2352             v.y := f_to_multiply.data2;
2353         end if;
2354         if multiply_to_f.valid = '1' then
2355             if pshift = '0' then
2356                 v.p := multiply_to_f.result(63 downto 0);
2357             else
2358                 v.p := multiply_to_f.result(119 downto 56);
2359             end if;
2360         end if;
2361
2362         -- Data path.
2363         -- This has A and B input multiplexers, an adder, a shifter,
2364         -- count-leading-zeroes logic, and a result mux.
2365         if r.longmask = '1' then
2366             mshift := r.shift + to_signed(-29, EXP_BITS);
2367         else
2368             mshift := r.shift;
2369         end if;
2370         if mshift < to_signed(-64, EXP_BITS) then
2371             mask := (others => '1');
2372         elsif mshift >= to_signed(0, EXP_BITS) then
2373             mask := (others => '0');
2374         else
2375             mask := right_mask(unsigned(mshift(5 downto 0)));
2376         end if;
2377         case r.opsel_a is
2378             when AIN_R =>
2379                 in_a0 := r.r;
2380             when AIN_A =>
2381                 in_a0 := r.a.mantissa;
2382             when AIN_B =>
2383                 in_a0 := r.b.mantissa;
2384             when others =>
2385                 in_a0 := r.c.mantissa;
2386         end case;
2387         if (or (mask and in_a0)) = '1' and set_x = '1' then
2388             v.x := '1';
2389         end if;
2390         if opsel_ainv = '1' then
2391             in_a0 := not in_a0;
2392         end if;
2393         in_a <= in_a0;
2394         case opsel_b is
2395             when BIN_ZERO =>
2396                 in_b0 := (others => '0');
2397             when BIN_R =>
2398                 in_b0 := r.r;
2399             when BIN_RND =>
2400                 round_inc := (31 => r.single_prec, 2 => not r.single_prec, others => '0');
2401                 in_b0 := round_inc;
2402             when others =>
2403                 -- BIN_PS6, 6 LSBs of P/4 sign-extended to 64
2404                 in_b0 := std_ulogic_vector(resize(signed(r.p(7 downto 2)), 64));
2405         end case;
2406         if opsel_binv = '1' then
2407             in_b0 := not in_b0;
2408         end if;
2409         in_b <= in_b0;
2410         if r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
2411             shift_res := shifter_64(r.r & (shiftin or r.s(55)) & r.s(54 downto 0),
2412                                     std_ulogic_vector(r.shift(6 downto 0)));
2413         else
2414             shift_res := (others => '0');
2415         end if;
2416         sum := std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
2417         if opsel_mask = '1' then
2418             sum(1 downto 0) := "00";
2419             if r.single_prec = '1' then
2420                 sum(30 downto 2) := (others => '0');
2421             end if;
2422         end if;
2423         case opsel_r is
2424             when RES_SUM =>
2425                 result <= sum;
2426             when RES_SHIFT =>
2427                 result <= shift_res;
2428             when RES_MULT =>
2429                 result <= multiply_to_f.result(121 downto 58);
2430             when others =>
2431                 case misc_sel is
2432                     when "0000" =>
2433                         misc := x"00000000" & (r.fpscr and fpscr_mask);
2434                     when "0001" =>
2435                         -- generated QNaN mantissa
2436                         misc := x"0020000000000000";
2437                     when "0010" =>
2438                         -- mantissa of max representable DP number
2439                         misc := x"007ffffffffffffc";
2440                     when "0011" =>
2441                         -- mantissa of max representable SP number
2442                         misc := x"007fffff80000000";
2443                     when "0100" =>
2444                         -- fmrgow result
2445                         misc := r.a.mantissa(31 downto 0) & r.b.mantissa(31 downto 0);
2446                     when "0110" =>
2447                         -- fmrgew result
2448                         misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32);
2449                     when "0111" =>
2450                         misc := 10x"000" & inverse_est & 35x"000000000";
2451                     when "1000" =>
2452                         -- max positive result for fctiw[z]
2453                         misc := x"000000007fffffff";
2454                     when "1001" =>
2455                         -- max negative result for fctiw[z]
2456                         misc := x"ffffffff80000000";
2457                     when "1010" =>
2458                         -- max positive result for fctiwu[z]
2459                         misc := x"00000000ffffffff";
2460                     when "1011" =>
2461                         -- max negative result for fctiwu[z]
2462                         misc := x"0000000000000000";
2463                     when "1100" =>
2464                         -- max positive result for fctid[z]
2465                         misc := x"7fffffffffffffff";
2466                     when "1101" =>
2467                         -- max negative result for fctid[z]
2468                         misc := x"8000000000000000";
2469                     when "1110" =>
2470                         -- max positive result for fctidu[z]
2471                         misc := x"ffffffffffffffff";
2472                     when "1111" =>
2473                         -- max negative result for fctidu[z]
2474                         misc := x"0000000000000000";
2475                     when others =>
2476                         misc := x"0000000000000000";
2477                 end case;
2478                 result <= misc;
2479         end case;
2480         v.r := result;
2481         if set_s = '1' then
2482             case opsel_s is
2483                 when S_NEG =>
2484                     v.s := std_ulogic_vector(unsigned(not r.s) + (not r.x));
2485                 when S_MULT =>
2486                     v.s := multiply_to_f.result(57 downto 2);
2487                 when S_SHIFT =>
2488                     v.s := shift_res(63 downto 8);
2489                     if shift_res(7 downto 0) /= x"00" then
2490                         v.x := '1';
2491                     end if;
2492                 when others =>
2493                     v.s := (others => '0');
2494             end case;
2495         end if;
2496
2497         if set_a = '1' then
2498             v.a.exponent := new_exp;
2499             v.a.mantissa := shift_res;
2500         end if;
2501         if set_b = '1' then
2502             v.b.exponent := new_exp;
2503             v.b.mantissa := shift_res;
2504         end if;
2505         if set_c = '1' then
2506             v.c.exponent := new_exp;
2507             v.c.mantissa := shift_res;
2508         end if;
2509
2510         if opsel_r = RES_SHIFT then
2511             v.result_exp := new_exp;
2512         end if;
2513
2514         if renormalize = '1' then
2515             clz := count_left_zeroes(r.r);
2516             if renorm_sqrt = '1' then
2517                 -- make denormalized value end up with even exponent
2518                 clz(0) := '1';
2519             end if;
2520             v.shift := resize(signed('0' & clz) - 9, EXP_BITS);
2521         end if;
2522
2523         if r.int_result = '1' then
2524             fp_result <= r.r;
2525         else
2526             fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r,
2527                                  r.single_prec, r.quieten_nan);
2528         end if;
2529         if r.update_fprf = '1' then
2530             v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class,
2531                                                              r.r(54) and not r.denorm);
2532         end if;
2533
2534         v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
2535                              (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
2536         v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
2537                                   v.fpscr(FPSCR_VE downto FPSCR_XE));
2538         if update_fx = '1' and
2539             (v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then
2540             v.fpscr(FPSCR_FX) := '1';
2541         end if;
2542         if r.rc = '1' then
2543             v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
2544         end if;
2545
2546         if illegal = '1' then
2547             v.instr_done := '0';
2548             v.do_intr := '0';
2549             v.writing_back := '0';
2550             v.busy := '0';
2551             v.state := IDLE;
2552         else
2553             v.do_intr := v.instr_done and v.fpscr(FPSCR_FEX) and r.fe_mode;
2554             if v.state /= IDLE or v.do_intr = '1' then
2555                 v.busy := '1';
2556             end if;
2557         end if;
2558
2559         rin <= v;
2560         e_out.illegal <= illegal;
2561     end process;
2562
2563 end architecture behaviour;