fpu.vhdl

   1 -- Floating-point unit for Microwatt
   2
   3 library ieee;
   4 use ieee.std_logic_1164.all;
   5 use ieee.numeric_std.all;
   6
   7 library work;
   8 use work.insn_helpers.all;
   9 use work.decode_types.all;
  10 use work.crhelpers.all;
  11 use work.helpers.all;
  12 use work.common.all;
  13
  14 entity fpu is
  15     port (
  16         clk : in std_ulogic;
  17         rst : in std_ulogic;
  18
  19         e_in  : in  Execute1toFPUType;
  20         e_out : out FPUToExecute1Type;
  21
  22         w_out : out FPUToWritebackType
  23         );
  24 end entity fpu;
  25
  26 architecture behaviour of fpu is
  27     type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
  28
  29     constant EXP_BITS : natural := 13;
  30
  31     type fpu_reg_type is record
  32         class    : fp_number_class;
  33         negative : std_ulogic;
  34         exponent : signed(EXP_BITS-1 downto 0);         -- unbiased
  35         mantissa : std_ulogic_vector(63 downto 0);      -- 10.54 format
  36     end record;
  37
  38     type state_t is (IDLE,
  39                      DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
  40                      DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT,
  41                      DO_FCFID, DO_FCTI,
  42                      DO_FRSP, DO_FRI,
  43                      DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD,
  44                      DO_FRE, DO_FRSQRTE,
  45                      DO_FSEL,
  46                      FRI_1,
  47                      ADD_SHIFT, ADD_2, ADD_3,
  48                      CMP_1, CMP_2,
  49                      MULT_1,
  50                      FMADD_1, FMADD_2, FMADD_3,
  51                      FMADD_4, FMADD_5, FMADD_6,
  52                      LOOKUP,
  53                      DIV_2, DIV_3, DIV_4, DIV_5, DIV_6,
  54                      FRE_1,
  55                      RSQRT_1,
  56                      FTDIV_1,
  57                      SQRT_1, SQRT_2, SQRT_3, SQRT_4,
  58                      SQRT_5, SQRT_6, SQRT_7, SQRT_8,
  59                      SQRT_9, SQRT_10, SQRT_11, SQRT_12,
  60                      INT_SHIFT, INT_ROUND, INT_ISHIFT,
  61                      INT_FINAL, INT_CHECK, INT_OFLOW,
  62                      FINISH, NORMALIZE,
  63                      ROUND_UFLOW, ROUND_OFLOW,
  64                      ROUNDING, ROUNDING_2, ROUNDING_3,
  65                      DENORM,
  66                      RENORM_A, RENORM_A2,
  67                      RENORM_B, RENORM_B2,
  68                      RENORM_C, RENORM_C2);
  69
  70     type reg_type is record
  71         state        : state_t;
  72         busy         : std_ulogic;
  73         instr_done   : std_ulogic;
  74         do_intr      : std_ulogic;
  75         op           : insn_type_t;
  76         insn         : std_ulogic_vector(31 downto 0);
  77         dest_fpr     : gspr_index_t;
  78         fe_mode      : std_ulogic;
  79         rc           : std_ulogic;
  80         is_cmp       : std_ulogic;
  81         single_prec  : std_ulogic;
  82         fpscr        : std_ulogic_vector(31 downto 0);
  83         a            : fpu_reg_type;
  84         b            : fpu_reg_type;
  85         c            : fpu_reg_type;
  86         r            : std_ulogic_vector(63 downto 0);  -- 10.54 format
  87         s            : std_ulogic_vector(55 downto 0);  -- extended fraction
  88         x            : std_ulogic;
  89         p            : std_ulogic_vector(63 downto 0);  -- 8.56 format
  90         y            : std_ulogic_vector(63 downto 0);  -- 8.56 format
  91         result_sign  : std_ulogic;
  92         result_class : fp_number_class;
  93         result_exp   : signed(EXP_BITS-1 downto 0);
  94         shift        : signed(EXP_BITS-1 downto 0);
  95         writing_back : std_ulogic;
  96         int_result   : std_ulogic;
  97         cr_result    : std_ulogic_vector(3 downto 0);
  98         cr_mask      : std_ulogic_vector(7 downto 0);
  99         old_exc      : std_ulogic_vector(4 downto 0);
 100         update_fprf  : std_ulogic;
 101         quieten_nan  : std_ulogic;
 102         tiny         : std_ulogic;
 103         denorm       : std_ulogic;
 104         round_mode   : std_ulogic_vector(2 downto 0);
 105         is_subtract  : std_ulogic;
 106         exp_cmp      : std_ulogic;
 107         madd_cmp     : std_ulogic;
 108         add_bsmall   : std_ulogic;
 109         is_multiply  : std_ulogic;
 110         is_sqrt      : std_ulogic;
 111         first        : std_ulogic;
 112         count        : unsigned(1 downto 0);
 113         doing_ftdiv  : std_ulogic_vector(1 downto 0);
 114     end record;
 115
 116     type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
 117
 118     signal r, rin : reg_type;
 119
 120     signal fp_result     : std_ulogic_vector(63 downto 0);
 121     signal opsel_a       : std_ulogic_vector(1 downto 0);
 122     signal opsel_b       : std_ulogic_vector(1 downto 0);
 123     signal opsel_r       : std_ulogic_vector(1 downto 0);
 124     signal opsel_s       : std_ulogic_vector(1 downto 0);
 125     signal opsel_ainv    : std_ulogic;
 126     signal opsel_amask   : std_ulogic;
 127     signal opsel_binv    : std_ulogic;
 128     signal in_a          : std_ulogic_vector(63 downto 0);
 129     signal in_b          : std_ulogic_vector(63 downto 0);
 130     signal result        : std_ulogic_vector(63 downto 0);
 131     signal carry_in      : std_ulogic;
 132     signal lost_bits     : std_ulogic;
 133     signal r_hi_nz       : std_ulogic;
 134     signal r_lo_nz       : std_ulogic;
 135     signal s_nz          : std_ulogic;
 136     signal misc_sel      : std_ulogic_vector(3 downto 0);
 137     signal f_to_multiply : MultiplyInputType;
 138     signal multiply_to_f : MultiplyOutputType;
 139     signal msel_1        : std_ulogic_vector(1 downto 0);
 140     signal msel_2        : std_ulogic_vector(1 downto 0);
 141     signal msel_add      : std_ulogic_vector(1 downto 0);
 142     signal msel_inv      : std_ulogic;
 143     signal inverse_est   : std_ulogic_vector(18 downto 0);
 144
 145     -- opsel values
 146     constant AIN_R    : std_ulogic_vector(1 downto 0) := "00";
 147     constant AIN_A    : std_ulogic_vector(1 downto 0) := "01";
 148     constant AIN_B    : std_ulogic_vector(1 downto 0) := "10";
 149     constant AIN_C    : std_ulogic_vector(1 downto 0) := "11";
 150
 151     constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
 152     constant BIN_R    : std_ulogic_vector(1 downto 0) := "01";
 153     constant BIN_MASK : std_ulogic_vector(1 downto 0) := "10";
 154     constant BIN_PS6  : std_ulogic_vector(1 downto 0) := "11";
 155
 156     constant RES_SUM   : std_ulogic_vector(1 downto 0) := "00";
 157     constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
 158     constant RES_MULT  : std_ulogic_vector(1 downto 0) := "10";
 159     constant RES_MISC  : std_ulogic_vector(1 downto 0) := "11";
 160
 161     constant S_ZERO  : std_ulogic_vector(1 downto 0) := "00";
 162     constant S_NEG   : std_ulogic_vector(1 downto 0) := "01";
 163     constant S_SHIFT : std_ulogic_vector(1 downto 0) := "10";
 164     constant S_MULT  : std_ulogic_vector(1 downto 0) := "11";
 165
 166     -- msel values
 167     constant MUL1_A : std_ulogic_vector(1 downto 0) := "00";
 168     constant MUL1_B : std_ulogic_vector(1 downto 0) := "01";
 169     constant MUL1_Y : std_ulogic_vector(1 downto 0) := "10";
 170     constant MUL1_R : std_ulogic_vector(1 downto 0) := "11";
 171
 172     constant MUL2_C   : std_ulogic_vector(1 downto 0) := "00";
 173     constant MUL2_LUT : std_ulogic_vector(1 downto 0) := "01";
 174     constant MUL2_P   : std_ulogic_vector(1 downto 0) := "10";
 175     constant MUL2_R   : std_ulogic_vector(1 downto 0) := "11";
 176
 177     constant MULADD_ZERO  : std_ulogic_vector(1 downto 0) := "00";
 178     constant MULADD_CONST : std_ulogic_vector(1 downto 0) := "01";
 179     constant MULADD_A     : std_ulogic_vector(1 downto 0) := "10";
 180     constant MULADD_RS    : std_ulogic_vector(1 downto 0) := "11";
 181
 182     -- Inverse lookup table, indexed by the top 8 fraction bits
 183     -- The first 256 entries are the reciprocal (1/x) lookup table,
 184     -- and the remaining 768 entries are the reciprocal square root table.
 185     -- Output range is [0.5, 1) in 0.19 format, though the top
 186     -- bit isn't stored since it is always 1.
 187     -- Each output value is the inverse of the center of the input
 188     -- range for the value, i.e. entry 0 is 1 / (1 + 1/512),
 189     -- entry 1 is 1 / (1 + 3/512), etc.
 190     signal inverse_table : lookup_table := (
 191         -- 1/x lookup table
 192         -- Unit bit is assumed to be 1, so input range is [1, 2)
 193         18x"3fc01", 18x"3f411", 18x"3ec31", 18x"3e460", 18x"3dc9f", 18x"3d4ec", 18x"3cd49", 18x"3c5b5",
 194         18x"3be2f", 18x"3b6b8", 18x"3af4f", 18x"3a7f4", 18x"3a0a7", 18x"39968", 18x"39237", 18x"38b14",
 195         18x"383fe", 18x"37cf5", 18x"375f9", 18x"36f0a", 18x"36828", 18x"36153", 18x"35a8a", 18x"353ce",
 196         18x"34d1e", 18x"3467a", 18x"33fe3", 18x"33957", 18x"332d7", 18x"32c62", 18x"325f9", 18x"31f9c",
 197         18x"3194a", 18x"31303", 18x"30cc7", 18x"30696", 18x"30070", 18x"2fa54", 18x"2f443", 18x"2ee3d",
 198         18x"2e841", 18x"2e250", 18x"2dc68", 18x"2d68b", 18x"2d0b8", 18x"2caee", 18x"2c52e", 18x"2bf79",
 199         18x"2b9cc", 18x"2b429", 18x"2ae90", 18x"2a900", 18x"2a379", 18x"29dfb", 18x"29887", 18x"2931b",
 200         18x"28db8", 18x"2885e", 18x"2830d", 18x"27dc4", 18x"27884", 18x"2734d", 18x"26e1d", 18x"268f6",
 201         18x"263d8", 18x"25ec1", 18x"259b3", 18x"254ac", 18x"24fad", 18x"24ab7", 18x"245c8", 18x"240e1",
 202         18x"23c01", 18x"23729", 18x"23259", 18x"22d90", 18x"228ce", 18x"22413", 18x"21f60", 18x"21ab4",
 203         18x"2160f", 18x"21172", 18x"20cdb", 18x"2084b", 18x"203c2", 18x"1ff40", 18x"1fac4", 18x"1f64f",
 204         18x"1f1e1", 18x"1ed79", 18x"1e918", 18x"1e4be", 18x"1e069", 18x"1dc1b", 18x"1d7d4", 18x"1d392",
 205         18x"1cf57", 18x"1cb22", 18x"1c6f3", 18x"1c2ca", 18x"1bea7", 18x"1ba8a", 18x"1b672", 18x"1b261",
 206         18x"1ae55", 18x"1aa50", 18x"1a64f", 18x"1a255", 18x"19e60", 18x"19a70", 18x"19686", 18x"192a2",
 207         18x"18ec3", 18x"18ae9", 18x"18715", 18x"18345", 18x"17f7c", 18x"17bb7", 18x"177f7", 18x"1743d",
 208         18x"17087", 18x"16cd7", 18x"1692c", 18x"16585", 18x"161e4", 18x"15e47", 18x"15ab0", 18x"1571d",
 209         18x"1538e", 18x"15005", 18x"14c80", 18x"14900", 18x"14584", 18x"1420d", 18x"13e9b", 18x"13b2d",
 210         18x"137c3", 18x"1345e", 18x"130fe", 18x"12da2", 18x"12a4a", 18x"126f6", 18x"123a7", 18x"1205c",
 211         18x"11d15", 18x"119d2", 18x"11694", 18x"11359", 18x"11023", 18x"10cf1", 18x"109c2", 18x"10698",
 212         18x"10372", 18x"10050", 18x"0fd31", 18x"0fa17", 18x"0f700", 18x"0f3ed", 18x"0f0de", 18x"0edd3",
 213         18x"0eacb", 18x"0e7c7", 18x"0e4c7", 18x"0e1ca", 18x"0ded2", 18x"0dbdc", 18x"0d8eb", 18x"0d5fc",
 214         18x"0d312", 18x"0d02b", 18x"0cd47", 18x"0ca67", 18x"0c78a", 18x"0c4b1", 18x"0c1db", 18x"0bf09",
 215         18x"0bc3a", 18x"0b96e", 18x"0b6a5", 18x"0b3e0", 18x"0b11e", 18x"0ae5f", 18x"0aba3", 18x"0a8eb",
 216         18x"0a636", 18x"0a383", 18x"0a0d4", 18x"09e28", 18x"09b80", 18x"098da", 18x"09637", 18x"09397",
 217         18x"090fb", 18x"08e61", 18x"08bca", 18x"08936", 18x"086a5", 18x"08417", 18x"0818c", 18x"07f04",
 218         18x"07c7e", 18x"079fc", 18x"0777c", 18x"074ff", 18x"07284", 18x"0700d", 18x"06d98", 18x"06b26",
 219         18x"068b6", 18x"0664a", 18x"063e0", 18x"06178", 18x"05f13", 18x"05cb1", 18x"05a52", 18x"057f5",
 220         18x"0559a", 18x"05342", 18x"050ed", 18x"04e9a", 18x"04c4a", 18x"049fc", 18x"047b0", 18x"04567",
 221         18x"04321", 18x"040dd", 18x"03e9b", 18x"03c5c", 18x"03a1f", 18x"037e4", 18x"035ac", 18x"03376",
 222         18x"03142", 18x"02f11", 18x"02ce2", 18x"02ab5", 18x"0288b", 18x"02663", 18x"0243d", 18x"02219",
 223         18x"01ff7", 18x"01dd8", 18x"01bbb", 18x"019a0", 18x"01787", 18x"01570", 18x"0135b", 18x"01149",
 224         18x"00f39", 18x"00d2a", 18x"00b1e", 18x"00914", 18x"0070c", 18x"00506", 18x"00302", 18x"00100",
 225         -- 1/sqrt(x) lookup table
 226         -- Input is in the range [1, 4), i.e. two bits to the left of the
 227         -- binary point.  Those 2 bits index the following 3 blocks of 256 values.
 228         -- 1.0 ... 1.9999
 229         18x"3fe00", 18x"3fa06", 18x"3f612", 18x"3f224", 18x"3ee3a", 18x"3ea58", 18x"3e67c", 18x"3e2a4",
 230         18x"3ded2", 18x"3db06", 18x"3d73e", 18x"3d37e", 18x"3cfc2", 18x"3cc0a", 18x"3c85a", 18x"3c4ae",
 231         18x"3c106", 18x"3bd64", 18x"3b9c8", 18x"3b630", 18x"3b29e", 18x"3af10", 18x"3ab86", 18x"3a802",
 232         18x"3a484", 18x"3a108", 18x"39d94", 18x"39a22", 18x"396b6", 18x"3934e", 18x"38fea", 18x"38c8c",
 233         18x"38932", 18x"385dc", 18x"3828a", 18x"37f3e", 18x"37bf6", 18x"378b2", 18x"37572", 18x"37236",
 234         18x"36efe", 18x"36bca", 18x"3689a", 18x"36570", 18x"36248", 18x"35f26", 18x"35c06", 18x"358ea",
 235         18x"355d4", 18x"352c0", 18x"34fb0", 18x"34ca4", 18x"3499c", 18x"34698", 18x"34398", 18x"3409c",
 236         18x"33da2", 18x"33aac", 18x"337bc", 18x"334cc", 18x"331e2", 18x"32efc", 18x"32c18", 18x"32938",
 237         18x"3265a", 18x"32382", 18x"320ac", 18x"31dd8", 18x"31b0a", 18x"3183e", 18x"31576", 18x"312b0",
 238         18x"30fee", 18x"30d2e", 18x"30a74", 18x"307ba", 18x"30506", 18x"30254", 18x"2ffa4", 18x"2fcf8",
 239         18x"2fa4e", 18x"2f7a8", 18x"2f506", 18x"2f266", 18x"2efca", 18x"2ed2e", 18x"2ea98", 18x"2e804",
 240         18x"2e572", 18x"2e2e4", 18x"2e058", 18x"2ddce", 18x"2db48", 18x"2d8c6", 18x"2d646", 18x"2d3c8",
 241         18x"2d14c", 18x"2ced4", 18x"2cc5e", 18x"2c9ea", 18x"2c77a", 18x"2c50c", 18x"2c2a2", 18x"2c038",
 242         18x"2bdd2", 18x"2bb70", 18x"2b90e", 18x"2b6b0", 18x"2b454", 18x"2b1fa", 18x"2afa4", 18x"2ad4e",
 243         18x"2aafc", 18x"2a8ac", 18x"2a660", 18x"2a414", 18x"2a1cc", 18x"29f86", 18x"29d42", 18x"29b00",
 244         18x"298c2", 18x"29684", 18x"2944a", 18x"29210", 18x"28fda", 18x"28da6", 18x"28b74", 18x"28946",
 245         18x"28718", 18x"284ec", 18x"282c4", 18x"2809c", 18x"27e78", 18x"27c56", 18x"27a34", 18x"27816",
 246         18x"275fa", 18x"273e0", 18x"271c8", 18x"26fb0", 18x"26d9c", 18x"26b8a", 18x"2697a", 18x"2676c",
 247         18x"26560", 18x"26356", 18x"2614c", 18x"25f46", 18x"25d42", 18x"25b40", 18x"2593e", 18x"25740",
 248         18x"25542", 18x"25348", 18x"2514e", 18x"24f58", 18x"24d62", 18x"24b6e", 18x"2497c", 18x"2478c",
 249         18x"2459e", 18x"243b0", 18x"241c6", 18x"23fde", 18x"23df6", 18x"23c10", 18x"23a2c", 18x"2384a",
 250         18x"2366a", 18x"2348c", 18x"232ae", 18x"230d2", 18x"22efa", 18x"22d20", 18x"22b4a", 18x"22976",
 251         18x"227a2", 18x"225d2", 18x"22402", 18x"22234", 18x"22066", 18x"21e9c", 18x"21cd2", 18x"21b0a",
 252         18x"21944", 18x"2177e", 18x"215ba", 18x"213fa", 18x"21238", 18x"2107a", 18x"20ebc", 18x"20d00",
 253         18x"20b46", 18x"2098e", 18x"207d6", 18x"20620", 18x"2046c", 18x"202b8", 18x"20108", 18x"1ff58",
 254         18x"1fda8", 18x"1fbfc", 18x"1fa50", 18x"1f8a4", 18x"1f6fc", 18x"1f554", 18x"1f3ae", 18x"1f208",
 255         18x"1f064", 18x"1eec2", 18x"1ed22", 18x"1eb82", 18x"1e9e4", 18x"1e846", 18x"1e6aa", 18x"1e510",
 256         18x"1e378", 18x"1e1e0", 18x"1e04a", 18x"1deb4", 18x"1dd20", 18x"1db8e", 18x"1d9fc", 18x"1d86c",
 257         18x"1d6de", 18x"1d550", 18x"1d3c4", 18x"1d238", 18x"1d0ae", 18x"1cf26", 18x"1cd9e", 18x"1cc18",
 258         18x"1ca94", 18x"1c910", 18x"1c78c", 18x"1c60a", 18x"1c48a", 18x"1c30c", 18x"1c18e", 18x"1c010",
 259         18x"1be94", 18x"1bd1a", 18x"1bba0", 18x"1ba28", 18x"1b8b2", 18x"1b73c", 18x"1b5c6", 18x"1b452",
 260         18x"1b2e0", 18x"1b16e", 18x"1affe", 18x"1ae8e", 18x"1ad20", 18x"1abb4", 18x"1aa46", 18x"1a8dc",
 261         -- 2.0 ... 2.9999
 262         18x"1a772", 18x"1a608", 18x"1a4a0", 18x"1a33a", 18x"1a1d4", 18x"1a070", 18x"19f0c", 18x"19da8",
 263         18x"19c48", 18x"19ae6", 18x"19986", 18x"19828", 18x"196ca", 18x"1956e", 18x"19412", 18x"192b8",
 264         18x"1915e", 18x"19004", 18x"18eae", 18x"18d56", 18x"18c00", 18x"18aac", 18x"18958", 18x"18804",
 265         18x"186b2", 18x"18562", 18x"18412", 18x"182c2", 18x"18174", 18x"18026", 18x"17eda", 18x"17d8e",
 266         18x"17c44", 18x"17afa", 18x"179b2", 18x"1786a", 18x"17724", 18x"175de", 18x"17498", 18x"17354",
 267         18x"17210", 18x"170ce", 18x"16f8c", 18x"16e4c", 18x"16d0c", 18x"16bcc", 18x"16a8e", 18x"16950",
 268         18x"16814", 18x"166d8", 18x"1659e", 18x"16464", 18x"1632a", 18x"161f2", 18x"160ba", 18x"15f84",
 269         18x"15e4e", 18x"15d1a", 18x"15be6", 18x"15ab2", 18x"15980", 18x"1584e", 18x"1571c", 18x"155ec",
 270         18x"154bc", 18x"1538e", 18x"15260", 18x"15134", 18x"15006", 18x"14edc", 18x"14db0", 18x"14c86",
 271         18x"14b5e", 18x"14a36", 18x"1490e", 18x"147e6", 18x"146c0", 18x"1459a", 18x"14476", 18x"14352",
 272         18x"14230", 18x"1410c", 18x"13fea", 18x"13eca", 18x"13daa", 18x"13c8a", 18x"13b6c", 18x"13a4e",
 273         18x"13930", 18x"13814", 18x"136f8", 18x"135dc", 18x"134c2", 18x"133a8", 18x"1328e", 18x"13176",
 274         18x"1305e", 18x"12f48", 18x"12e30", 18x"12d1a", 18x"12c06", 18x"12af2", 18x"129de", 18x"128ca",
 275         18x"127b8", 18x"126a6", 18x"12596", 18x"12486", 18x"12376", 18x"12266", 18x"12158", 18x"1204a",
 276         18x"11f3e", 18x"11e32", 18x"11d26", 18x"11c1a", 18x"11b10", 18x"11a06", 18x"118fc", 18x"117f4",
 277         18x"116ec", 18x"115e4", 18x"114de", 18x"113d8", 18x"112d2", 18x"111ce", 18x"110ca", 18x"10fc6",
 278         18x"10ec2", 18x"10dc0", 18x"10cbe", 18x"10bbc", 18x"10abc", 18x"109bc", 18x"108bc", 18x"107be",
 279         18x"106c0", 18x"105c2", 18x"104c4", 18x"103c8", 18x"102cc", 18x"101d0", 18x"100d6", 18x"0ffdc",
 280         18x"0fee2", 18x"0fdea", 18x"0fcf0", 18x"0fbf8", 18x"0fb02", 18x"0fa0a", 18x"0f914", 18x"0f81e",
 281         18x"0f72a", 18x"0f636", 18x"0f542", 18x"0f44e", 18x"0f35a", 18x"0f268", 18x"0f176", 18x"0f086",
 282         18x"0ef94", 18x"0eea4", 18x"0edb4", 18x"0ecc6", 18x"0ebd6", 18x"0eae8", 18x"0e9fa", 18x"0e90e",
 283         18x"0e822", 18x"0e736", 18x"0e64a", 18x"0e55e", 18x"0e474", 18x"0e38a", 18x"0e2a0", 18x"0e1b8",
 284         18x"0e0d0", 18x"0dfe8", 18x"0df00", 18x"0de1a", 18x"0dd32", 18x"0dc4c", 18x"0db68", 18x"0da82",
 285         18x"0d99e", 18x"0d8ba", 18x"0d7d6", 18x"0d6f4", 18x"0d612", 18x"0d530", 18x"0d44e", 18x"0d36c",
 286         18x"0d28c", 18x"0d1ac", 18x"0d0cc", 18x"0cfee", 18x"0cf0e", 18x"0ce30", 18x"0cd54", 18x"0cc76",
 287         18x"0cb9a", 18x"0cabc", 18x"0c9e0", 18x"0c906", 18x"0c82a", 18x"0c750", 18x"0c676", 18x"0c59c",
 288         18x"0c4c4", 18x"0c3ea", 18x"0c312", 18x"0c23a", 18x"0c164", 18x"0c08c", 18x"0bfb6", 18x"0bee0",
 289         18x"0be0a", 18x"0bd36", 18x"0bc62", 18x"0bb8c", 18x"0baba", 18x"0b9e6", 18x"0b912", 18x"0b840",
 290         18x"0b76e", 18x"0b69c", 18x"0b5cc", 18x"0b4fa", 18x"0b42a", 18x"0b35a", 18x"0b28a", 18x"0b1bc",
 291         18x"0b0ee", 18x"0b01e", 18x"0af50", 18x"0ae84", 18x"0adb6", 18x"0acea", 18x"0ac1e", 18x"0ab52",
 292         18x"0aa86", 18x"0a9bc", 18x"0a8f0", 18x"0a826", 18x"0a75c", 18x"0a694", 18x"0a5ca", 18x"0a502",
 293         18x"0a43a", 18x"0a372", 18x"0a2aa", 18x"0a1e4", 18x"0a11c", 18x"0a056", 18x"09f90", 18x"09ecc",
 294         -- 3.0 ... 3.9999
 295         18x"09e06", 18x"09d42", 18x"09c7e", 18x"09bba", 18x"09af6", 18x"09a32", 18x"09970", 18x"098ae",
 296         18x"097ec", 18x"0972a", 18x"09668", 18x"095a8", 18x"094e8", 18x"09426", 18x"09368", 18x"092a8",
 297         18x"091e8", 18x"0912a", 18x"0906c", 18x"08fae", 18x"08ef0", 18x"08e32", 18x"08d76", 18x"08cba",
 298         18x"08bfe", 18x"08b42", 18x"08a86", 18x"089ca", 18x"08910", 18x"08856", 18x"0879c", 18x"086e2",
 299         18x"08628", 18x"08570", 18x"084b6", 18x"083fe", 18x"08346", 18x"0828e", 18x"081d8", 18x"08120",
 300         18x"0806a", 18x"07fb4", 18x"07efe", 18x"07e48", 18x"07d92", 18x"07cde", 18x"07c2a", 18x"07b76",
 301         18x"07ac2", 18x"07a0e", 18x"0795a", 18x"078a8", 18x"077f4", 18x"07742", 18x"07690", 18x"075de",
 302         18x"0752e", 18x"0747c", 18x"073cc", 18x"0731c", 18x"0726c", 18x"071bc", 18x"0710c", 18x"0705e",
 303         18x"06fae", 18x"06f00", 18x"06e52", 18x"06da4", 18x"06cf6", 18x"06c4a", 18x"06b9c", 18x"06af0",
 304         18x"06a44", 18x"06998", 18x"068ec", 18x"06840", 18x"06796", 18x"066ea", 18x"06640", 18x"06596",
 305         18x"064ec", 18x"06442", 18x"0639a", 18x"062f0", 18x"06248", 18x"061a0", 18x"060f8", 18x"06050",
 306         18x"05fa8", 18x"05f00", 18x"05e5a", 18x"05db4", 18x"05d0e", 18x"05c68", 18x"05bc2", 18x"05b1c",
 307         18x"05a76", 18x"059d2", 18x"0592e", 18x"05888", 18x"057e4", 18x"05742", 18x"0569e", 18x"055fa",
 308         18x"05558", 18x"054b6", 18x"05412", 18x"05370", 18x"052ce", 18x"0522e", 18x"0518c", 18x"050ec",
 309         18x"0504a", 18x"04faa", 18x"04f0a", 18x"04e6a", 18x"04dca", 18x"04d2c", 18x"04c8c", 18x"04bee",
 310         18x"04b50", 18x"04ab0", 18x"04a12", 18x"04976", 18x"048d8", 18x"0483a", 18x"0479e", 18x"04700",
 311         18x"04664", 18x"045c8", 18x"0452c", 18x"04490", 18x"043f6", 18x"0435a", 18x"042c0", 18x"04226",
 312         18x"0418a", 18x"040f0", 18x"04056", 18x"03fbe", 18x"03f24", 18x"03e8c", 18x"03df2", 18x"03d5a",
 313         18x"03cc2", 18x"03c2a", 18x"03b92", 18x"03afa", 18x"03a62", 18x"039cc", 18x"03934", 18x"0389e",
 314         18x"03808", 18x"03772", 18x"036dc", 18x"03646", 18x"035b2", 18x"0351c", 18x"03488", 18x"033f2",
 315         18x"0335e", 18x"032ca", 18x"03236", 18x"031a2", 18x"03110", 18x"0307c", 18x"02fea", 18x"02f56",
 316         18x"02ec4", 18x"02e32", 18x"02da0", 18x"02d0e", 18x"02c7c", 18x"02bec", 18x"02b5a", 18x"02aca",
 317         18x"02a38", 18x"029a8", 18x"02918", 18x"02888", 18x"027f8", 18x"0276a", 18x"026da", 18x"0264a",
 318         18x"025bc", 18x"0252e", 18x"024a0", 18x"02410", 18x"02384", 18x"022f6", 18x"02268", 18x"021da",
 319         18x"0214e", 18x"020c0", 18x"02034", 18x"01fa8", 18x"01f1c", 18x"01e90", 18x"01e04", 18x"01d78",
 320         18x"01cee", 18x"01c62", 18x"01bd8", 18x"01b4c", 18x"01ac2", 18x"01a38", 18x"019ae", 18x"01924",
 321         18x"0189c", 18x"01812", 18x"01788", 18x"01700", 18x"01676", 18x"015ee", 18x"01566", 18x"014de",
 322         18x"01456", 18x"013ce", 18x"01346", 18x"012c0", 18x"01238", 18x"011b2", 18x"0112c", 18x"010a4",
 323         18x"0101e", 18x"00f98", 18x"00f12", 18x"00e8c", 18x"00e08", 18x"00d82", 18x"00cfe", 18x"00c78",
 324         18x"00bf4", 18x"00b70", 18x"00aec", 18x"00a68", 18x"009e4", 18x"00960", 18x"008dc", 18x"00858",
 325         18x"007d6", 18x"00752", 18x"006d0", 18x"0064e", 18x"005cc", 18x"0054a", 18x"004c8", 18x"00446",
 326         18x"003c4", 18x"00342", 18x"002c2", 18x"00240", 18x"001c0", 18x"00140", 18x"000c0", 18x"00040"
 327         );
 328
 329     -- Left and right shifter with 120 bit input and 64 bit output.
 330     -- Shifts inp left by shift bits and returns the upper 64 bits of
 331     -- the result.  The shift parameter is interpreted as a signed
 332     -- number in the range -64..63, with negative values indicating
 333     -- right shifts.
 334     function shifter_64(inp: std_ulogic_vector(119 downto 0);
 335                         shift: std_ulogic_vector(6 downto 0))
 336         return std_ulogic_vector is
 337         variable s1 : std_ulogic_vector(94 downto 0);
 338         variable s2 : std_ulogic_vector(70 downto 0);
 339         variable result : std_ulogic_vector(63 downto 0);
 340     begin
 341         case shift(6 downto 5) is
 342             when "00" =>
 343                 s1 := inp(119 downto 25);
 344             when "01" =>
 345                 s1 := inp(87 downto 0) & "0000000";
 346             when "10" =>
 347                 s1 := x"0000000000000000" & inp(119 downto 89);
 348             when others =>
 349                 s1 := x"00000000" & inp(119 downto 57);
 350         end case;
 351         case shift(4 downto 3) is
 352             when "00" =>
 353                 s2 := s1(94 downto 24);
 354             when "01" =>
 355                 s2 := s1(86 downto 16);
 356             when "10" =>
 357                 s2 := s1(78 downto 8);
 358             when others =>
 359                 s2 := s1(70 downto 0);
 360         end case;
 361         case shift(2 downto 0) is
 362             when "000" =>
 363                 result := s2(70 downto 7);
 364             when "001" =>
 365                 result := s2(69 downto 6);
 366             when "010" =>
 367                 result := s2(68 downto 5);
 368             when "011" =>
 369                 result := s2(67 downto 4);
 370             when "100" =>
 371                 result := s2(66 downto 3);
 372             when "101" =>
 373                 result := s2(65 downto 2);
 374             when "110" =>
 375                 result := s2(64 downto 1);
 376             when others =>
 377                 result := s2(63 downto 0);
 378         end case;
 379         return result;
 380     end;
 381
 382     -- Generate a mask with 0-bits on the left and 1-bits on the right which
 383     -- selects the bits will be lost in doing a right shift.  The shift
 384     -- parameter is the bottom 6 bits of a negative shift count,
 385     -- indicating a right shift.
 386     function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
 387         variable result: std_ulogic_vector(63 downto 0);
 388     begin
 389         result := (others => '0');
 390         for i in 0 to 63 loop
 391             if i >= shift then
 392                 result(63 - i) := '1';
 393             end if;
 394         end loop;
 395         return result;
 396     end;
 397
 398     -- Split a DP floating-point number into components and work out its class.
 399     -- If is_int = 1, the input is considered an integer
 400     function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_int: std_ulogic) return fpu_reg_type is
 401         variable r       : fpu_reg_type;
 402         variable exp_nz  : std_ulogic;
 403         variable exp_ao  : std_ulogic;
 404         variable frac_nz : std_ulogic;
 405         variable cls     : std_ulogic_vector(2 downto 0);
 406     begin
 407         r.negative := fpr(63);
 408         exp_nz := or (fpr(62 downto 52));
 409         exp_ao := and (fpr(62 downto 52));
 410         frac_nz := or (fpr(51 downto 0));
 411         if is_int = '0' then
 412             r.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
 413             if exp_nz = '0' then
 414                 r.exponent := to_signed(-1022, EXP_BITS);
 415             end if;
 416             r.mantissa := "000000000" & exp_nz & fpr(51 downto 0) & "00";
 417             cls := exp_ao & exp_nz & frac_nz;
 418             case cls is
 419                 when "000"  => r.class := ZERO;
 420                 when "001"  => r.class := FINITE;    -- denormalized
 421                 when "010"  => r.class := FINITE;
 422                 when "011"  => r.class := FINITE;
 423                 when "110"  => r.class := INFINITY;
 424                 when others => r.class := NAN;
 425             end case;
 426         else
 427             r.mantissa := fpr;
 428             r.exponent := (others => '0');
 429             if (fpr(63) or exp_nz or frac_nz) = '1' then
 430                 r.class := FINITE;
 431             else
 432                 r.class := ZERO;
 433             end if;
 434         end if;
 435         return r;
 436     end;
 437
 438     -- Construct a DP floating-point result from components
 439     function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
 440                      mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic)
 441         return std_ulogic_vector is
 442         variable result : std_ulogic_vector(63 downto 0);
 443     begin
 444         result := (others => '0');
 445         result(63) := sign;
 446         case class is
 447             when ZERO =>
 448             when FINITE =>
 449                 if mantissa(54) = '1' then
 450                     -- normalized number
 451                     result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
 452                 end if;
 453                 result(51 downto 29) := mantissa(53 downto 31);
 454                 if single_prec = '0' then
 455                     result(28 downto 0) := mantissa(30 downto 2);
 456                 end if;
 457             when INFINITY =>
 458                 result(62 downto 52) := "11111111111";
 459             when NAN =>
 460                 result(62 downto 52) := "11111111111";
 461                 result(51) := quieten_nan or mantissa(53);
 462                 result(50 downto 29) := mantissa(52 downto 31);
 463                 if single_prec = '0' then
 464                     result(28 downto 0) := mantissa(30 downto 2);
 465                 end if;
 466         end case;
 467         return result;
 468     end;
 469
 470     -- Determine whether to increment when rounding
 471     -- Returns rounding_inc & inexact
 472     -- Assumes x includes the bottom 29 bits of the mantissa already
 473     -- if single_prec = 1 (usually arranged by setting set_x = 1 earlier).
 474     function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
 475                          single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
 476                          sign: std_ulogic)
 477         return std_ulogic_vector is
 478         variable grx : std_ulogic_vector(2 downto 0);
 479         variable ret : std_ulogic_vector(1 downto 0);
 480         variable lsb : std_ulogic;
 481     begin
 482         if single_prec = '0' then
 483             grx := mantissa(1 downto 0) & x;
 484             lsb := mantissa(2);
 485         else
 486             grx := mantissa(30 downto 29) & x;
 487             lsb := mantissa(31);
 488         end if;
 489         ret(1) := '0';
 490         ret(0) := or (grx);
 491         case rn(1 downto 0) is
 492             when "00" =>        -- round to nearest
 493                 if grx = "100" and rn(2) = '0' then
 494                     ret(1) := lsb; -- tie, round to even
 495                 else
 496                     ret(1) := grx(2);
 497                 end if;
 498             when "01" =>        -- round towards zero
 499             when others =>      -- round towards +/- inf
 500                 if rn(0) = sign then
 501                     -- round towards greater magnitude
 502                     ret(1) := ret(0);
 503                 end if;
 504         end case;
 505         return ret;
 506     end;
 507
 508     -- Determine result flags to write into the FPSCR
 509     function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
 510         return std_ulogic_vector is
 511     begin
 512         case class is
 513             when ZERO =>
 514                 return sign & "0010";
 515             when FINITE =>
 516                 return (not unitbit) & sign & (not sign) & "00";
 517             when INFINITY =>
 518                 return '0' & sign & (not sign) & "01";
 519             when NAN =>
 520                 return "10001";
 521         end case;
 522     end;
 523
 524 begin
 525     fpu_multiply_0: entity work.multiply
 526         port map (
 527             clk => clk,
 528             m_in => f_to_multiply,
 529             m_out => multiply_to_f
 530             );
 531
 532     fpu_0: process(clk)
 533     begin
 534         if rising_edge(clk) then
 535             if rst = '1' then
 536                 r.state <= IDLE;
 537                 r.busy <= '0';
 538                 r.instr_done <= '0';
 539                 r.do_intr <= '0';
 540                 r.fpscr <= (others => '0');
 541                 r.writing_back <= '0';
 542             else
 543                 assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
 544                 r <= rin;
 545             end if;
 546         end if;
 547     end process;
 548
 549     -- synchronous reads from lookup table
 550     lut_access: process(clk)
 551         variable addrhi : std_ulogic_vector(1 downto 0);
 552         variable addr   : std_ulogic_vector(9 downto 0);
 553     begin
 554         if rising_edge(clk) then
 555             if r.is_sqrt = '1' then
 556                 addrhi := r.b.mantissa(55 downto 54);
 557             else
 558                 addrhi := "00";
 559             end if;
 560             addr := addrhi & r.b.mantissa(53 downto 46);
 561             inverse_est <= '1' & inverse_table(to_integer(unsigned(addr)));
 562         end if;
 563     end process;
 564
 565     e_out.busy <= r.busy;
 566     e_out.exception <= r.fpscr(FPSCR_FEX);
 567     e_out.interrupt <= r.do_intr;
 568
 569     w_out.valid <= r.instr_done and not r.do_intr;
 570     w_out.write_enable <= r.writing_back;
 571     w_out.write_reg <= r.dest_fpr;
 572     w_out.write_data <= fp_result;
 573     w_out.write_cr_enable <= r.instr_done and (r.rc or r.is_cmp);
 574     w_out.write_cr_mask <= r.cr_mask;
 575     w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
 576                            r.cr_result & r.cr_result & r.cr_result & r.cr_result;
 577
 578     fpu_1: process(all)
 579         variable v           : reg_type;
 580         variable adec        : fpu_reg_type;
 581         variable bdec        : fpu_reg_type;
 582         variable cdec        : fpu_reg_type;
 583         variable fpscr_mask  : std_ulogic_vector(31 downto 0);
 584         variable illegal     : std_ulogic;
 585         variable j, k        : integer;
 586         variable flm         : std_ulogic_vector(7 downto 0);
 587         variable int_input   : std_ulogic;
 588         variable mask        : std_ulogic_vector(63 downto 0);
 589         variable in_a0       : std_ulogic_vector(63 downto 0);
 590         variable in_b0       : std_ulogic_vector(63 downto 0);
 591         variable misc        : std_ulogic_vector(63 downto 0);
 592         variable shift_res   : std_ulogic_vector(63 downto 0);
 593         variable round       : std_ulogic_vector(1 downto 0);
 594         variable update_fx   : std_ulogic;
 595         variable arith_done  : std_ulogic;
 596         variable invalid     : std_ulogic;
 597         variable zero_divide : std_ulogic;
 598         variable mant_nz     : std_ulogic;
 599         variable min_exp     : signed(EXP_BITS-1 downto 0);
 600         variable max_exp     : signed(EXP_BITS-1 downto 0);
 601         variable bias_exp    : signed(EXP_BITS-1 downto 0);
 602         variable new_exp     : signed(EXP_BITS-1 downto 0);
 603         variable exp_tiny    : std_ulogic;
 604         variable exp_huge    : std_ulogic;
 605         variable renormalize : std_ulogic;
 606         variable clz         : std_ulogic_vector(5 downto 0);
 607         variable set_x       : std_ulogic;
 608         variable mshift      : signed(EXP_BITS-1 downto 0);
 609         variable need_check  : std_ulogic;
 610         variable msb         : std_ulogic;
 611         variable is_add      : std_ulogic;
 612         variable longmask    : std_ulogic;
 613         variable set_a       : std_ulogic;
 614         variable set_b       : std_ulogic;
 615         variable set_c       : std_ulogic;
 616         variable set_y       : std_ulogic;
 617         variable set_s       : std_ulogic;
 618         variable qnan_result : std_ulogic;
 619         variable px_nz       : std_ulogic;
 620         variable pcmpb_eq    : std_ulogic;
 621         variable pcmpb_lt    : std_ulogic;
 622         variable pshift      : std_ulogic;
 623         variable renorm_sqrt : std_ulogic;
 624         variable sqrt_exp    : signed(EXP_BITS-1 downto 0);
 625         variable shiftin     : std_ulogic;
 626         variable mulexp      : signed(EXP_BITS-1 downto 0);
 627         variable maddend     : std_ulogic_vector(127 downto 0);
 628     begin
 629         v := r;
 630         illegal := '0';
 631         v.busy := '0';
 632         int_input := '0';
 633
 634         -- capture incoming instruction
 635         if e_in.valid = '1' then
 636             v.insn := e_in.insn;
 637             v.op := e_in.op;
 638             v.fe_mode := or (e_in.fe_mode);
 639             v.dest_fpr := e_in.frt;
 640             v.single_prec := e_in.single;
 641             v.int_result := '0';
 642             v.rc := e_in.rc;
 643             v.is_cmp := e_in.out_cr;
 644             if e_in.out_cr = '0' then
 645                 v.cr_mask := num_to_fxm(1);
 646             else
 647                 v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(e_in.insn))));
 648             end if;
 649             int_input := '0';
 650             if e_in.op = OP_FPOP_I then
 651                 int_input := '1';
 652             end if;
 653             v.quieten_nan := '1';
 654             v.tiny := '0';
 655             v.denorm := '0';
 656             v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
 657             v.is_subtract := '0';
 658             v.is_multiply := '0';
 659             v.is_sqrt := '0';
 660             v.add_bsmall := '0';
 661             v.doing_ftdiv := "00";
 662
 663             adec := decode_dp(e_in.fra, int_input);
 664             bdec := decode_dp(e_in.frb, int_input);
 665             cdec := decode_dp(e_in.frc, int_input);
 666             v.a := adec;
 667             v.b := bdec;
 668             v.c := cdec;
 669
 670             v.exp_cmp := '0';
 671             if adec.exponent > bdec.exponent then
 672                 v.exp_cmp := '1';
 673             end if;
 674             v.madd_cmp := '0';
 675             if (adec.exponent + cdec.exponent + 1) >= bdec.exponent then
 676                 v.madd_cmp := '1';
 677             end if;
 678         end if;
 679
 680         r_hi_nz <= or (r.r(55 downto 31));
 681         r_lo_nz <= or (r.r(30 downto 2));
 682         s_nz <= or (r.s);
 683
 684         if r.single_prec = '0' then
 685             if r.doing_ftdiv(1) = '0' then
 686                 max_exp := to_signed(1023, EXP_BITS);
 687             else
 688                 max_exp := to_signed(1020, EXP_BITS);
 689             end if;
 690             if r.doing_ftdiv(0) = '0' then
 691                 min_exp := to_signed(-1022, EXP_BITS);
 692             else
 693                 min_exp := to_signed(-1021, EXP_BITS);
 694             end if;
 695             bias_exp := to_signed(1536, EXP_BITS);
 696         else
 697             max_exp := to_signed(127, EXP_BITS);
 698             min_exp := to_signed(-126, EXP_BITS);
 699             bias_exp := to_signed(192, EXP_BITS);
 700         end if;
 701         new_exp := r.result_exp - r.shift;
 702         exp_tiny := '0';
 703         exp_huge := '0';
 704         if new_exp < min_exp then
 705             exp_tiny := '1';
 706         end if;
 707         if new_exp > max_exp then
 708             exp_huge := '1';
 709         end if;
 710
 711         -- Compare P with zero and with B
 712         px_nz := or (r.p(57 downto 4));
 713         pcmpb_eq := '0';
 714         if r.p(59 downto 4) = r.b.mantissa(55 downto 0) then
 715             pcmpb_eq := '1';
 716         end if;
 717         pcmpb_lt := '0';
 718         if unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(55 downto 0)) then
 719             pcmpb_lt := '1';
 720         end if;
 721
 722         v.writing_back := '0';
 723         v.instr_done := '0';
 724         v.update_fprf := '0';
 725         v.shift := to_signed(0, EXP_BITS);
 726         v.first := '0';
 727         opsel_a <= AIN_R;
 728         opsel_ainv <= '0';
 729         opsel_amask <= '0';
 730         opsel_b <= BIN_ZERO;
 731         opsel_binv <= '0';
 732         opsel_r <= RES_SUM;
 733         opsel_s <= S_ZERO;
 734         carry_in <= '0';
 735         misc_sel <= "0000";
 736         fpscr_mask := (others => '1');
 737         update_fx := '0';
 738         arith_done := '0';
 739         invalid := '0';
 740         zero_divide := '0';
 741         renormalize := '0';
 742         set_x := '0';
 743         qnan_result := '0';
 744         longmask := r.single_prec;
 745         set_a := '0';
 746         set_b := '0';
 747         set_c := '0';
 748         set_s := '0';
 749         f_to_multiply.is_32bit <= '0';
 750         f_to_multiply.valid <= '0';
 751         msel_1 <= MUL1_A;
 752         msel_2 <= MUL2_C;
 753         msel_add <= MULADD_ZERO;
 754         msel_inv <= '0';
 755         set_y := '0';
 756         pshift := '0';
 757         renorm_sqrt := '0';
 758         shiftin := '0';
 759         case r.state is
 760             when IDLE =>
 761                 if e_in.valid = '1' then
 762                     case e_in.insn(5 downto 1) is
 763                         when "00000" =>
 764                             if e_in.insn(8) = '1' then
 765                                 if e_in.insn(6) = '0' then
 766                                     v.state := DO_FTDIV;
 767                                 else
 768                                     v.state := DO_FTSQRT;
 769                                 end if;
 770                             elsif e_in.insn(7) = '1' then
 771                                 v.state := DO_MCRFS;
 772                             else
 773                                 v.state := DO_FCMP;
 774                             end if;
 775                         when "00110" =>
 776                             if e_in.insn(10) = '0' then
 777                                 if e_in.insn(8) = '0' then
 778                                     v.state := DO_MTFSB;
 779                                 else
 780                                     v.state := DO_MTFSFI;
 781                                 end if;
 782                             else
 783                                 v.state := DO_FMRG;
 784                             end if;
 785                         when "00111" =>
 786                             if e_in.insn(8) = '0' then
 787                                 v.state := DO_MFFS;
 788                             else
 789                                 v.state := DO_MTFSF;
 790                             end if;
 791                         when "01000" =>
 792                             if e_in.insn(9 downto 8) /= "11" then
 793                                 v.state := DO_FMR;
 794                             else
 795                                 v.state := DO_FRI;
 796                             end if;
 797                         when "01100" =>
 798                             v.state := DO_FRSP;
 799                         when "01110" =>
 800                             if int_input = '1' then
 801                                 -- fcfid[u][s]
 802                                 v.state := DO_FCFID;
 803                             else
 804                                 v.state := DO_FCTI;
 805                             end if;
 806                         when "01111" =>
 807                             v.round_mode := "001";
 808                             v.state := DO_FCTI;
 809                         when "10010" =>
 810                             v.state := DO_FDIV;
 811                         when "10100" | "10101" =>
 812                             v.state := DO_FADD;
 813                         when "10110" =>
 814                             v.is_sqrt := '1';
 815                             v.state := DO_FSQRT;
 816                         when "10111" =>
 817                             v.state := DO_FSEL;
 818                         when "11000" =>
 819                             v.state := DO_FRE;
 820                         when "11001" =>
 821                             v.is_multiply := '1';
 822                             v.state := DO_FMUL;
 823                         when "11010" =>
 824                             v.is_sqrt := '1';
 825                             v.state := DO_FRSQRTE;
 826                         when "11100" | "11101" | "11110" | "11111" =>
 827                             v.state := DO_FMADD;
 828                         when others =>
 829                             illegal := '1';
 830                     end case;
 831                 end if;
 832                 v.x := '0';
 833                 v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
 834                 set_s := '1';
 835
 836             when DO_MCRFS =>
 837                 j := to_integer(unsigned(insn_bfa(r.insn)));
 838                 for i in 0 to 7 loop
 839                     if i = j then
 840                         k := (7 - i) * 4;
 841                         v.cr_result := r.fpscr(k + 3 downto k);
 842                         fpscr_mask(k + 3 downto k) := "0000";
 843                     end if;
 844                 end loop;
 845                 v.fpscr := r.fpscr and (fpscr_mask or x"6007F8FF");
 846                 v.instr_done := '1';
 847                 v.state := IDLE;
 848
 849             when DO_FTDIV =>
 850                 v.instr_done := '1';
 851                 v.state := IDLE;
 852                 v.cr_result := "0000";
 853                 if r.a.class = INFINITY or r.b.class = ZERO or r.b.class = INFINITY or
 854                     (r.b.class = FINITE and r.b.mantissa(53) = '0') then
 855                     v.cr_result(2) := '1';
 856                 end if;
 857                 if r.a.class = NAN or r.a.class = INFINITY or
 858                     r.b.class = NAN or r.b.class = ZERO or r.b.class = INFINITY or
 859                     (r.a.class = FINITE and r.a.exponent <= to_signed(-970, EXP_BITS)) then
 860                     v.cr_result(1) := '1';
 861                 else
 862                     v.doing_ftdiv := "11";
 863                     v.first := '1';
 864                     v.state := FTDIV_1;
 865                     v.instr_done := '0';
 866                 end if;
 867
 868             when DO_FTSQRT =>
 869                 v.instr_done := '1';
 870                 v.state := IDLE;
 871                 v.cr_result := "0000";
 872                 if r.b.class = ZERO or r.b.class = INFINITY or
 873                     (r.b.class = FINITE and r.b.mantissa(53) = '0') then
 874                     v.cr_result(2) := '1';
 875                 end if;
 876                 if r.b.class = NAN or r.b.class = INFINITY or r.b.class = ZERO
 877                     or r.b.negative = '1' or r.b.exponent <= to_signed(-970, EXP_BITS) then
 878                     v.cr_result(1) := '0';
 879                 end if;
 880
 881             when DO_FCMP =>
 882                 -- fcmp[uo]
 883                 v.instr_done := '1';
 884                 v.state := IDLE;
 885                 update_fx := '1';
 886                 opsel_a <= AIN_B;
 887                 opsel_r <= RES_SUM;
 888                 v.result_exp := r.b.exponent;
 889                 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
 890                     (r.b.class = NAN and r.b.mantissa(53) = '0') then
 891                     -- Signalling NAN
 892                     v.fpscr(FPSCR_VXSNAN) := '1';
 893                     if r.insn(6) = '1' and r.fpscr(FPSCR_VE) = '0' then
 894                         v.fpscr(FPSCR_VXVC) := '1';
 895                     end if;
 896                     invalid := '1';
 897                     v.cr_result := "0001";          -- unordered
 898                 elsif r.a.class = NAN or r.b.class = NAN then
 899                     if r.insn(6) = '1' then
 900                         -- fcmpo
 901                         v.fpscr(FPSCR_VXVC) := '1';
 902                         invalid := '1';
 903                     end if;
 904                     v.cr_result := "0001";          -- unordered
 905                 elsif r.a.class = ZERO and r.b.class = ZERO then
 906                     v.cr_result := "0010";          -- equal
 907                 elsif r.a.negative /= r.b.negative then
 908                     v.cr_result := r.a.negative & r.b.negative & "00";
 909                 elsif r.a.class = ZERO then
 910                     -- A and B are the same sign from here down
 911                     v.cr_result := not r.b.negative & r.b.negative & "00";
 912                 elsif r.a.class = INFINITY then
 913                     if r.b.class = INFINITY then
 914                         v.cr_result := "0010";
 915                     else
 916                         v.cr_result := r.a.negative & not r.a.negative & "00";
 917                     end if;
 918                 elsif r.b.class = ZERO then
 919                     -- A is finite from here down
 920                     v.cr_result := r.a.negative & not r.a.negative & "00";
 921                 elsif r.b.class = INFINITY then
 922                     v.cr_result := not r.b.negative & r.b.negative & "00";
 923                 elsif r.exp_cmp = '1' then
 924                     -- A and B are both finite from here down
 925                     v.cr_result := r.a.negative & not r.a.negative & "00";
 926                 elsif r.a.exponent /= r.b.exponent then
 927                     -- A exponent is smaller than B
 928                     v.cr_result := not r.a.negative & r.a.negative & "00";
 929                 else
 930                     -- Prepare to subtract mantissas, put B in R
 931                     v.cr_result := "0000";
 932                     v.instr_done := '0';
 933                     v.state := CMP_1;
 934                 end if;
 935                 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
 936
 937             when DO_MTFSB =>
 938                 -- mtfsb{0,1}
 939                 j := to_integer(unsigned(insn_bt(r.insn)));
 940                 for i in 0 to 31 loop
 941                     if i = j then
 942                         v.fpscr(31 - i) := r.insn(6);
 943                     end if;
 944                 end loop;
 945                 v.instr_done := '1';
 946                 v.state := IDLE;
 947
 948             when DO_MTFSFI =>
 949                 -- mtfsfi
 950                 j := to_integer(unsigned(insn_bf(r.insn)));
 951                 if r.insn(16) = '0' then
 952                     for i in 0 to 7 loop
 953                         if i = j then
 954                             k := (7 - i) * 4;
 955                             v.fpscr(k + 3 downto k) := insn_u(r.insn);
 956                         end if;
 957                     end loop;
 958                 end if;
 959                 v.instr_done := '1';
 960                 v.state := IDLE;
 961
 962             when DO_FMRG =>
 963                 -- fmrgew, fmrgow
 964                 opsel_r <= RES_MISC;
 965                 misc_sel <= "01" & r.insn(8) & '0';
 966                 v.int_result := '1';
 967                 v.writing_back := '1';
 968                 v.instr_done := '1';
 969                 v.state := IDLE;
 970
 971             when DO_MFFS =>
 972                 v.int_result := '1';
 973                 v.writing_back := '1';
 974                 opsel_r <= RES_MISC;
 975                 case r.insn(20 downto 16) is
 976                     when "00000" =>
 977                         -- mffs
 978                     when "00001" =>
 979                         -- mffsce
 980                         v.fpscr(FPSCR_VE downto FPSCR_XE) := "00000";
 981                     when "10100" | "10101" =>
 982                         -- mffscdrn[i] (but we don't implement DRN)
 983                         fpscr_mask := x"000000FF";
 984                     when "10110" =>
 985                         -- mffscrn
 986                         fpscr_mask := x"000000FF";
 987                         v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
 988                             r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
 989                     when "10111" =>
 990                         -- mffscrni
 991                         fpscr_mask := x"000000FF";
 992                         v.fpscr(FPSCR_RN+1 downto FPSCR_RN) := r.insn(12 downto 11);
 993                     when "11000" =>
 994                         -- mffsl
 995                         fpscr_mask := x"0007F0FF";
 996                     when others =>
 997                         illegal := '1';
 998                 end case;
 999                 v.instr_done := '1';
1000                 v.state := IDLE;
1001
1002             when DO_MTFSF =>
1003                 if r.insn(25) = '1' then
1004                     flm := x"FF";
1005                 elsif r.insn(16) = '1' then
1006                     flm := x"00";
1007                 else
1008                     flm := r.insn(24 downto 17);
1009                 end if;
1010                 for i in 0 to 7 loop
1011                     k := i * 4;
1012                     if flm(i) = '1' then
1013                         v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
1014                     end if;
1015                 end loop;
1016                 v.instr_done := '1';
1017                 v.state := IDLE;
1018
1019             when DO_FMR =>
1020                 opsel_a <= AIN_B;
1021                 v.result_class := r.b.class;
1022                 v.result_exp := r.b.exponent;
1023                 v.quieten_nan := '0';
1024                 if r.insn(9) = '1' then
1025                     v.result_sign := '0';              -- fabs
1026                 elsif r.insn(8) = '1' then
1027                     v.result_sign := '1';              -- fnabs
1028                 elsif r.insn(7) = '1' then
1029                     v.result_sign := r.b.negative;     -- fmr
1030                 elsif r.insn(6) = '1' then
1031                     v.result_sign := not r.b.negative; -- fneg
1032                 else
1033                     v.result_sign := r.a.negative;     -- fcpsgn
1034                 end if;
1035                 v.writing_back := '1';
1036                 v.instr_done := '1';
1037                 v.state := IDLE;
1038
1039             when DO_FRI =>    -- fri[nzpm]
1040                 opsel_a <= AIN_B;
1041                 v.result_class := r.b.class;
1042                 v.result_sign := r.b.negative;
1043                 v.result_exp := r.b.exponent;
1044                 v.fpscr(FPSCR_FR) := '0';
1045                 v.fpscr(FPSCR_FI) := '0';
1046                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1047                     -- Signalling NAN
1048                     v.fpscr(FPSCR_VXSNAN) := '1';
1049                     invalid := '1';
1050                 end if;
1051                 if r.b.class = FINITE then
1052                     if r.b.exponent >= to_signed(52, EXP_BITS) then
1053                         -- integer already, no rounding required
1054                         arith_done := '1';
1055                     else
1056                         v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1057                         v.state := FRI_1;
1058                         v.round_mode := '1' & r.insn(7 downto 6);
1059                     end if;
1060                 else
1061                     arith_done := '1';
1062                 end if;
1063
1064             when DO_FRSP =>
1065                 opsel_a <= AIN_B;
1066                 v.result_class := r.b.class;
1067                 v.result_sign := r.b.negative;
1068                 v.result_exp := r.b.exponent;
1069                 v.fpscr(FPSCR_FR) := '0';
1070                 v.fpscr(FPSCR_FI) := '0';
1071                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1072                     -- Signalling NAN
1073                     v.fpscr(FPSCR_VXSNAN) := '1';
1074                     invalid := '1';
1075                 end if;
1076                 set_x := '1';
1077                 if r.b.class = FINITE then
1078                     if r.b.exponent < to_signed(-126, EXP_BITS) then
1079                         v.shift := r.b.exponent - to_signed(-126, EXP_BITS);
1080                         v.state := ROUND_UFLOW;
1081                     elsif r.b.exponent > to_signed(127, EXP_BITS) then
1082                         v.state := ROUND_OFLOW;
1083                     else
1084                         v.shift := to_signed(-2, EXP_BITS);
1085                         v.state := ROUNDING;
1086                     end if;
1087                 else
1088                     arith_done := '1';
1089                 end if;
1090
1091             when DO_FCTI =>
1092                 -- instr bit 9: 1=dword 0=word
1093                 -- instr bit 8: 1=unsigned 0=signed
1094                 -- instr bit 1: 1=round to zero 0=use fpscr[RN]
1095                 opsel_a <= AIN_B;
1096                 v.result_class := r.b.class;
1097                 v.result_sign := r.b.negative;
1098                 v.result_exp := r.b.exponent;
1099                 v.fpscr(FPSCR_FR) := '0';
1100                 v.fpscr(FPSCR_FI) := '0';
1101                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1102                     -- Signalling NAN
1103                     v.fpscr(FPSCR_VXSNAN) := '1';
1104                     invalid := '1';
1105                 end if;
1106
1107                 v.int_result := '1';
1108                 case r.b.class is
1109                     when ZERO =>
1110                         arith_done := '1';
1111                     when FINITE =>
1112                         if r.b.exponent >= to_signed(64, EXP_BITS) or
1113                             (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
1114                             v.state := INT_OFLOW;
1115                         elsif r.b.exponent >= to_signed(52, EXP_BITS) then
1116                             -- integer already, no rounding required,
1117                             -- shift into final position
1118                             v.shift := r.b.exponent - to_signed(54, EXP_BITS);
1119                             if r.insn(8) = '1' and r.b.negative = '1' then
1120                                 v.state := INT_OFLOW;
1121                             else
1122                                 v.state := INT_ISHIFT;
1123                             end if;
1124                         else
1125                             v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1126                             v.state := INT_SHIFT;
1127                         end if;
1128                     when INFINITY | NAN =>
1129                         v.state := INT_OFLOW;
1130                 end case;
1131
1132             when DO_FCFID =>
1133                 v.result_sign := '0';
1134                 opsel_a <= AIN_B;
1135                 if r.insn(8) = '0' and r.b.negative = '1' then
1136                     -- fcfid[s] with negative operand, set R = -B
1137                     opsel_ainv <= '1';
1138                     carry_in <= '1';
1139                     v.result_sign := '1';
1140                 end if;
1141                 v.result_class := r.b.class;
1142                 v.result_exp := to_signed(54, EXP_BITS);
1143                 v.fpscr(FPSCR_FR) := '0';
1144                 v.fpscr(FPSCR_FI) := '0';
1145                 if r.b.class = ZERO then
1146                     arith_done := '1';
1147                 else
1148                     v.state := FINISH;
1149                 end if;
1150
1151             when DO_FADD =>
1152                 -- fadd[s] and fsub[s]
1153                 opsel_a <= AIN_A;
1154                 v.result_sign := r.a.negative;
1155                 v.result_class := r.a.class;
1156                 v.result_exp := r.a.exponent;
1157                 v.fpscr(FPSCR_FR) := '0';
1158                 v.fpscr(FPSCR_FI) := '0';
1159                 is_add := r.a.negative xor r.b.negative xor r.insn(1);
1160                 if r.a.class = FINITE and r.b.class = FINITE then
1161                     v.is_subtract := not is_add;
1162                     v.add_bsmall := r.exp_cmp;
1163                     if r.exp_cmp = '0' then
1164                         v.shift := r.a.exponent - r.b.exponent;
1165                         v.result_sign := r.b.negative xnor r.insn(1);
1166                         if r.a.exponent = r.b.exponent then
1167                             v.state := ADD_2;
1168                         else
1169                             v.state := ADD_SHIFT;
1170                         end if;
1171                     else
1172                         opsel_a <= AIN_B;
1173                         v.shift := r.b.exponent - r.a.exponent;
1174                         v.result_exp := r.b.exponent;
1175                         v.state := ADD_SHIFT;
1176                     end if;
1177                 else
1178                     if (r.a.class = NAN and r.a.mantissa(53) = '0') or
1179                         (r.b.class = NAN and r.b.mantissa(53) = '0') then
1180                         -- Signalling NAN
1181                         v.fpscr(FPSCR_VXSNAN) := '1';
1182                         invalid := '1';
1183                     end if;
1184                     if r.a.class = NAN then
1185                         -- nothing to do, result is A
1186                     elsif r.b.class = NAN then
1187                         v.result_class := NAN;
1188                         v.result_sign := r.b.negative;
1189                         opsel_a <= AIN_B;
1190                     elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then
1191                         -- invalid operation, construct QNaN
1192                         v.fpscr(FPSCR_VXISI) := '1';
1193                         qnan_result := '1';
1194                     elsif r.a.class = ZERO and r.b.class = ZERO and is_add = '0' then
1195                         -- return -0 for rounding to -infinity
1196                         v.result_sign := r.round_mode(1) and r.round_mode(0);
1197                     elsif r.a.class = INFINITY or r.b.class = ZERO then
1198                         -- nothing to do, result is A
1199                     else
1200                         -- result is +/- B
1201                         v.result_sign := r.b.negative xnor r.insn(1);
1202                         v.result_class := r.b.class;
1203                         v.result_exp := r.b.exponent;
1204                         opsel_a <= AIN_B;
1205                     end if;
1206                     arith_done := '1';
1207                 end if;
1208
1209             when DO_FMUL =>
1210                 -- fmul[s]
1211                 opsel_a <= AIN_A;
1212                 v.result_sign := r.a.negative;
1213                 v.result_class := r.a.class;
1214                 v.result_exp := r.a.exponent;
1215                 v.fpscr(FPSCR_FR) := '0';
1216                 v.fpscr(FPSCR_FI) := '0';
1217                 if r.a.class = FINITE and r.c.class = FINITE then
1218                     v.result_sign := r.a.negative xor r.c.negative;
1219                     v.result_exp := r.a.exponent + r.c.exponent;
1220                     -- Renormalize denorm operands
1221                     if r.a.mantissa(54) = '0' then
1222                         v.state := RENORM_A;
1223                     elsif r.c.mantissa(54) = '0' then
1224                         opsel_a <= AIN_C;
1225                         v.state := RENORM_C;
1226                     else
1227                         f_to_multiply.valid <= '1';
1228                         v.state := MULT_1;
1229                     end if;
1230                 else
1231                     if (r.a.class = NAN and r.a.mantissa(53) = '0') or
1232                         (r.c.class = NAN and r.c.mantissa(53) = '0') then
1233                         -- Signalling NAN
1234                         v.fpscr(FPSCR_VXSNAN) := '1';
1235                         invalid := '1';
1236                     end if;
1237                     if r.a.class = NAN then
1238                     -- result is A
1239                     elsif r.c.class = NAN then
1240                         v.result_class := NAN;
1241                         v.result_sign := r.c.negative;
1242                         opsel_a <= AIN_C;
1243                     elsif (r.a.class = INFINITY and r.c.class = ZERO) or
1244                         (r.a.class = ZERO and r.c.class = INFINITY) then
1245                         -- invalid operation, construct QNaN
1246                         v.fpscr(FPSCR_VXIMZ) := '1';
1247                         qnan_result := '1';
1248                     elsif r.a.class = ZERO or r.a.class = INFINITY then
1249                         -- result is +/- A
1250                         v.result_sign := r.a.negative xor r.c.negative;
1251                     else
1252                         -- r.c.class is ZERO or INFINITY
1253                         v.result_class := r.c.class;
1254                         v.result_sign := r.a.negative xor r.c.negative;
1255                     end if;
1256                     arith_done := '1';
1257                 end if;
1258
1259             when DO_FDIV =>
1260                 opsel_a <= AIN_A;
1261                 v.result_sign := r.a.negative;
1262                 v.result_class := r.a.class;
1263                 v.result_exp := r.a.exponent;
1264                 v.fpscr(FPSCR_FR) := '0';
1265                 v.fpscr(FPSCR_FI) := '0';
1266                 v.result_sign := r.a.negative xor r.b.negative;
1267                 v.result_exp := r.a.exponent - r.b.exponent;
1268                 v.count := "00";
1269                 if r.a.class = FINITE and r.b.class = FINITE then
1270                     -- Renormalize denorm operands
1271                     if r.a.mantissa(54) = '0' then
1272                         v.state := RENORM_A;
1273                     elsif r.b.mantissa(54) = '0' then
1274                         opsel_a <= AIN_B;
1275                         v.state := RENORM_B;
1276                     else
1277                         v.first := '1';
1278                         v.state := DIV_2;
1279                     end if;
1280                 else
1281                     if (r.a.class = NAN and r.a.mantissa(53) = '0') or
1282                         (r.b.class = NAN and r.b.mantissa(53) = '0') then
1283                         -- Signalling NAN
1284                         v.fpscr(FPSCR_VXSNAN) := '1';
1285                         invalid := '1';
1286                     end if;
1287                     if r.a.class = NAN then
1288                         -- result is A
1289                         v.result_sign := r.a.negative;
1290                     elsif r.b.class = NAN then
1291                         v.result_class := NAN;
1292                         v.result_sign := r.b.negative;
1293                         opsel_a <= AIN_B;
1294                     elsif r.b.class = INFINITY then
1295                         if r.a.class = INFINITY then
1296                             v.fpscr(FPSCR_VXIDI) := '1';
1297                             qnan_result := '1';
1298                         else
1299                             v.result_class := ZERO;
1300                         end if;
1301                     elsif r.b.class = ZERO then
1302                         if r.a.class = ZERO then
1303                             v.fpscr(FPSCR_VXZDZ) := '1';
1304                             qnan_result := '1';
1305                         else
1306                             if r.a.class = FINITE then
1307                                 zero_divide := '1';
1308                             end if;
1309                             v.result_class := INFINITY;
1310                         end if;
1311                     -- else r.b.class = FINITE, result_class = r.a.class
1312                     end if;
1313                     arith_done := '1';
1314                 end if;
1315
1316             when DO_FSEL =>
1317                 opsel_a <= AIN_A;
1318                 v.fpscr(FPSCR_FR) := '0';
1319                 v.fpscr(FPSCR_FI) := '0';
1320                 if r.a.class = ZERO or (r.a.negative = '0' and r.a.class /= NAN) then
1321                     v.result_sign := r.c.negative;
1322                     v.result_exp := r.c.exponent;
1323                     v.result_class := r.c.class;
1324                     opsel_a <= AIN_C;
1325                 else
1326                     v.result_sign := r.b.negative;
1327                     v.result_exp := r.b.exponent;
1328                     v.result_class := r.b.class;
1329                     opsel_a <= AIN_B;
1330                 end if;
1331                 v.quieten_nan := '0';
1332                 arith_done := '1';
1333
1334             when DO_FSQRT =>
1335                 opsel_a <= AIN_B;
1336                 v.result_class := r.b.class;
1337                 v.result_sign := r.b.negative;
1338                 v.fpscr(FPSCR_FR) := '0';
1339                 v.fpscr(FPSCR_FI) := '0';
1340                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1341                     v.fpscr(FPSCR_VXSNAN) := '1';
1342                     invalid := '1';
1343                 end if;
1344                 case r.b.class is
1345                     when FINITE =>
1346                         v.result_exp := r.b.exponent;
1347                         if r.b.negative = '1' then
1348                             v.fpscr(FPSCR_VXSQRT) := '1';
1349                             qnan_result := '1';
1350                             arith_done := '1';
1351                         elsif r.b.mantissa(54) = '0' then
1352                             v.state := RENORM_B;
1353                         elsif r.b.exponent(0) = '0' then
1354                             v.state := SQRT_1;
1355                         else
1356                             v.shift := to_signed(1, EXP_BITS);
1357                             v.state := RENORM_B2;
1358                         end if;
1359                     when NAN | ZERO =>
1360                         -- result is B
1361                         arith_done := '1';
1362                     when INFINITY =>
1363                         if r.b.negative = '1' then
1364                             v.fpscr(FPSCR_VXSQRT) := '1';
1365                             qnan_result := '1';
1366                         -- else result is B
1367                         end if;
1368                         arith_done := '1';
1369                 end case;
1370
1371             when DO_FRE =>
1372                 opsel_a <= AIN_B;
1373                 v.result_class := r.b.class;
1374                 v.result_sign := r.b.negative;
1375                 v.fpscr(FPSCR_FR) := '0';
1376                 v.fpscr(FPSCR_FI) := '0';
1377                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1378                     v.fpscr(FPSCR_VXSNAN) := '1';
1379                     invalid := '1';
1380                 end if;
1381                 case r.b.class is
1382                     when FINITE =>
1383                         v.result_exp := - r.b.exponent;
1384                         if r.b.mantissa(54) = '0' then
1385                             v.state := RENORM_B;
1386                         else
1387                             v.state := FRE_1;
1388                         end if;
1389                     when NAN =>
1390                         -- result is B
1391                         arith_done := '1';
1392                     when INFINITY =>
1393                         v.result_class := ZERO;
1394                         arith_done := '1';
1395                     when ZERO =>
1396                         v.result_class := INFINITY;
1397                         zero_divide := '1';
1398                         arith_done := '1';
1399                 end case;
1400
1401             when DO_FRSQRTE =>
1402                 opsel_a <= AIN_B;
1403                 v.result_class := r.b.class;
1404                 v.result_sign := r.b.negative;
1405                 v.fpscr(FPSCR_FR) := '0';
1406                 v.fpscr(FPSCR_FI) := '0';
1407                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1408                     v.fpscr(FPSCR_VXSNAN) := '1';
1409                     invalid := '1';
1410                 end if;
1411                 v.shift := to_signed(1, EXP_BITS);
1412                 case r.b.class is
1413                     when FINITE =>
1414                         v.result_exp := r.b.exponent;
1415                         if r.b.negative = '1' then
1416                             v.fpscr(FPSCR_VXSQRT) := '1';
1417                             qnan_result := '1';
1418                             arith_done := '1';
1419                         elsif r.b.mantissa(54) = '0' then
1420                             v.state := RENORM_B;
1421                         elsif r.b.exponent(0) = '0' then
1422                             v.state := RSQRT_1;
1423                         else
1424                             v.state := RENORM_B2;
1425                         end if;
1426                     when NAN =>
1427                         -- result is B
1428                         arith_done := '1';
1429                     when INFINITY =>
1430                         if r.b.negative = '1' then
1431                             v.fpscr(FPSCR_VXSQRT) := '1';
1432                             qnan_result := '1';
1433                         else
1434                             v.result_class := ZERO;
1435                         end if;
1436                         arith_done := '1';
1437                     when ZERO =>
1438                         v.result_class := INFINITY;
1439                         zero_divide := '1';
1440                         arith_done := '1';
1441                 end case;
1442
1443             when DO_FMADD =>
1444                 -- fmadd, fmsub, fnmadd, fnmsub
1445                 opsel_a <= AIN_A;
1446                 v.result_sign := r.a.negative;
1447                 v.result_class := r.a.class;
1448                 v.result_exp := r.a.exponent;
1449                 v.fpscr(FPSCR_FR) := '0';
1450                 v.fpscr(FPSCR_FI) := '0';
1451                 is_add := r.a.negative xor r.c.negative xor r.b.negative xor r.insn(1);
1452                 if r.a.class = FINITE and r.c.class = FINITE and
1453                     (r.b.class = FINITE or r.b.class = ZERO) then
1454                     v.is_subtract := not is_add;
1455                     mulexp := r.a.exponent + r.c.exponent;
1456                     v.result_exp := mulexp;
1457                     opsel_a <= AIN_B;
1458                     -- Make sure A and C are normalized
1459                     if r.a.mantissa(54) = '0' then
1460                         opsel_a <= AIN_A;
1461                         v.state := RENORM_A;
1462                     elsif r.c.mantissa(54) = '0' then
1463                         opsel_a <= AIN_C;
1464                         v.state := RENORM_C;
1465                     elsif r.b.class = ZERO then
1466                         -- no addend, degenerates to multiply
1467                         v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1468                         f_to_multiply.valid <= '1';
1469                         v.is_multiply := '1';
1470                         v.state := MULT_1;
1471                     elsif r.madd_cmp = '0' then
1472                         -- addend is bigger, do multiply first
1473                         v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1474                         f_to_multiply.valid <= '1';
1475                         v.state := FMADD_1;
1476                     else
1477                         -- product is bigger, shift B right and use it as the
1478                         -- addend to the multiplier
1479                         v.shift := r.b.exponent - mulexp + to_signed(64, EXP_BITS);
1480                         -- for subtract, multiplier does B - A * C
1481                         v.result_sign := not (r.a.negative xor r.c.negative xor r.insn(2) xor is_add);
1482                         v.result_exp := r.b.exponent;
1483                         v.state := FMADD_2;
1484                     end if;
1485                 else
1486                     if (r.a.class = NAN and r.a.mantissa(53) = '0') or
1487                         (r.b.class = NAN and r.b.mantissa(53) = '0') or
1488                         (r.c.class = NAN and r.c.mantissa(53) = '0') then
1489                         -- Signalling NAN
1490                         v.fpscr(FPSCR_VXSNAN) := '1';
1491                         invalid := '1';
1492                     end if;
1493                     if r.a.class = NAN then
1494                         -- nothing to do, result is A
1495                     elsif r.b.class = NAN then
1496                         -- result is B
1497                         v.result_class := NAN;
1498                         v.result_sign := r.b.negative;
1499                         opsel_a <= AIN_B;
1500                     elsif r.c.class = NAN then
1501                         -- result is C
1502                         v.result_class := NAN;
1503                         v.result_sign := r.c.negative;
1504                         opsel_a <= AIN_C;
1505                     elsif (r.a.class = ZERO and r.c.class = INFINITY) or
1506                         (r.a.class = INFINITY and r.c.class = ZERO) then
1507                         -- invalid operation, construct QNaN
1508                         v.fpscr(FPSCR_VXIMZ) := '1';
1509                         qnan_result := '1';
1510                     elsif r.a.class = INFINITY or r.c.class = INFINITY then
1511                         if r.b.class = INFINITY and is_add = '0' then
1512                             -- invalid operation, construct QNaN
1513                             v.fpscr(FPSCR_VXISI) := '1';
1514                             qnan_result := '1';
1515                         else
1516                             -- result is infinity
1517                             v.result_class := INFINITY;
1518                             v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1519                         end if;
1520                     else
1521                         -- Here A is zero, C is zero, or B is infinity
1522                         -- Result is +/-B in all of those cases
1523                         v.result_class := r.b.class;
1524                         v.result_exp := r.b.exponent;
1525                         if v.result_class /= ZERO or is_add = '1' then
1526                             v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1527                         else
1528                             -- have to be careful about rule for 0 - 0 result sign
1529                             v.result_sign := (r.round_mode(1) and r.round_mode(0)) xor r.insn(2);
1530                         end if;
1531                         opsel_a <= AIN_B;
1532                     end if;
1533                     arith_done := '1';
1534                 end if;
1535
1536             when RENORM_A =>
1537                 renormalize := '1';
1538                 v.state := RENORM_A2;
1539
1540             when RENORM_A2 =>
1541                 set_a := '1';
1542                 v.result_exp := new_exp;
1543                 if r.insn(4) = '1' then
1544                     opsel_a <= AIN_C;
1545                     if r.c.mantissa(54) = '1' then
1546                         if r.insn(3) = '0' or r.b.class = ZERO then
1547                             v.first := '1';
1548                             v.state := MULT_1;
1549                         else
1550                             v.madd_cmp := '0';
1551                             if new_exp + 1 >= r.b.exponent then
1552                                 v.madd_cmp := '1';
1553                             end if;
1554                             v.state := DO_FMADD;
1555                         end if;
1556                     else
1557                         v.state := RENORM_C;
1558                     end if;
1559                 else
1560                         opsel_a <= AIN_B;
1561                         if r.b.mantissa(54) = '1' then
1562                             v.first := '1';
1563                             v.state := DIV_2;
1564                         else
1565                             v.state := RENORM_B;
1566                     end if;
1567                 end if;
1568
1569             when RENORM_B =>
1570                 renormalize := '1';
1571                 renorm_sqrt := r.is_sqrt;
1572                 v.state := RENORM_B2;
1573
1574             when RENORM_B2 =>
1575                 set_b := '1';
1576                 if r.is_sqrt = '0' then
1577                     v.result_exp := r.result_exp + r.shift;
1578                 else
1579                     v.result_exp := new_exp;
1580                 end if;
1581                 v.state := LOOKUP;
1582
1583             when RENORM_C =>
1584                 renormalize := '1';
1585                 v.state := RENORM_C2;
1586
1587             when RENORM_C2 =>
1588                 set_c := '1';
1589                 v.result_exp := new_exp;
1590                 if r.insn(3) = '0' or r.b.class = ZERO then
1591                     v.first := '1';
1592                     v.state := MULT_1;
1593                 else
1594                     v.madd_cmp := '0';
1595                     if new_exp + 1 >= r.b.exponent then
1596                         v.madd_cmp := '1';
1597                     end if;
1598                     v.state := DO_FMADD;
1599                 end if;
1600
1601             when ADD_SHIFT =>
1602                 -- r.shift = - exponent difference
1603                 opsel_r <= RES_SHIFT;
1604                 v.x := s_nz;
1605                 set_x := '1';
1606                 longmask := '0';
1607                 v.state := ADD_2;
1608
1609             when ADD_2 =>
1610                 if r.add_bsmall = '1' then
1611                     opsel_a <= AIN_A;
1612                 else
1613                     opsel_a <= AIN_B;
1614                 end if;
1615                 opsel_b <= BIN_R;
1616                 opsel_binv <= r.is_subtract;
1617                 carry_in <= r.is_subtract and not r.x;
1618                 v.shift := to_signed(-1, EXP_BITS);
1619                 v.state := ADD_3;
1620
1621             when ADD_3 =>
1622                 -- check for overflow or negative result (can't get both)
1623                 -- r.shift = -1
1624                 if r.r(63) = '1' then
1625                     -- result is opposite sign to expected
1626                     v.result_sign := not r.result_sign;
1627                     opsel_ainv <= '1';
1628                     carry_in <= '1';
1629                     v.state := FINISH;
1630                 elsif r.r(55) = '1' then
1631                     -- sum overflowed, shift right
1632                     opsel_r <= RES_SHIFT;
1633                     set_x := '1';
1634                     v.shift := to_signed(-2, EXP_BITS);
1635                     if exp_huge = '1' then
1636                         v.state := ROUND_OFLOW;
1637                     else
1638                         v.state := ROUNDING;
1639                     end if;
1640                 elsif r.r(54) = '1' then
1641                     set_x := '1';
1642                     v.shift := to_signed(-2, EXP_BITS);
1643                     v.state := ROUNDING;
1644                 elsif (r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1645                     -- r.x must be zero at this point
1646                     v.result_class := ZERO;
1647                     if r.is_subtract = '1' then
1648                         -- set result sign depending on rounding mode
1649                         v.result_sign := r.round_mode(1) and r.round_mode(0);
1650                     end if;
1651                     arith_done := '1';
1652                 else
1653                     renormalize := '1';
1654                     v.state := NORMALIZE;
1655                 end if;
1656
1657             when CMP_1 =>
1658                 opsel_a <= AIN_A;
1659                 opsel_b <= BIN_R;
1660                 opsel_binv <= '1';
1661                 carry_in <= '1';
1662                 v.state := CMP_2;
1663
1664             when CMP_2 =>
1665                 if r.r(63) = '1' then
1666                     -- A is smaller in magnitude
1667                     v.cr_result := not r.a.negative & r.a.negative & "00";
1668                 elsif (r_hi_nz or r_lo_nz) = '0' then
1669                     v.cr_result := "0010";
1670                 else
1671                     v.cr_result := r.a.negative & not r.a.negative & "00";
1672                 end if;
1673                 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
1674                 v.instr_done := '1';
1675                 v.state := IDLE;
1676
1677             when MULT_1 =>
1678                 f_to_multiply.valid <= r.first;
1679                 opsel_r <= RES_MULT;
1680                 if multiply_to_f.valid = '1' then
1681                     v.state := FINISH;
1682                 end if;
1683
1684             when FMADD_1 =>
1685                 -- Addend is bigger here
1686                 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1687                 -- note v.shift is at most -2 here
1688                 v.shift := r.result_exp - r.b.exponent;
1689                 opsel_r <= RES_MULT;
1690                 opsel_s <= S_MULT;
1691                 set_s := '1';
1692                 f_to_multiply.valid <= r.first;
1693                 if multiply_to_f.valid = '1' then
1694                     v.state := ADD_SHIFT;
1695                 end if;
1696
1697             when FMADD_2 =>
1698                 -- Product is potentially bigger here
1699                 -- r.shift = addend exp - product exp + 64
1700                 set_s := '1';
1701                 opsel_s <= S_SHIFT;
1702                 v.shift := r.shift - to_signed(64, EXP_BITS);
1703                 v.state := FMADD_3;
1704
1705             when FMADD_3 =>
1706                 -- r.shift = addend exp - product exp
1707                 opsel_r <= RES_SHIFT;
1708                 v.first := '1';
1709                 v.state := FMADD_4;
1710
1711             when FMADD_4 =>
1712                 msel_add <= MULADD_RS;
1713                 f_to_multiply.valid <= r.first;
1714                 msel_inv <= r.is_subtract;
1715                 opsel_r <= RES_MULT;
1716                 opsel_s <= S_MULT;
1717                 set_s := '1';
1718                 v.shift := to_signed(56, EXP_BITS);
1719                 if multiply_to_f.valid = '1' then
1720                     if multiply_to_f.result(121) = '1' then
1721                         v.state := FMADD_5;
1722                     else
1723                         v.state := FMADD_6;
1724                     end if;
1725                 end if;
1726
1727             when FMADD_5 =>
1728                 -- negate R:S:X
1729                 v.result_sign := not r.result_sign;
1730                 opsel_ainv <= '1';
1731                 carry_in <= not (s_nz or r.x);
1732                 opsel_s <= S_NEG;
1733                 set_s := '1';
1734                 v.shift := to_signed(56, EXP_BITS);
1735                 v.state := FMADD_6;
1736
1737             when FMADD_6 =>
1738                 -- r.shift = 56 (or 0, but only if r is now nonzero)
1739                 if (r.r(56) or r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1740                     if s_nz = '0' then
1741                         -- must be a subtraction, and r.x must be zero
1742                         v.result_class := ZERO;
1743                         v.result_sign := r.round_mode(1) and r.round_mode(0);
1744                         arith_done := '1';
1745                     else
1746                         -- R is all zeroes but there are non-zero bits in S
1747                         -- so shift them into R and set S to 0
1748                         opsel_r <= RES_SHIFT;
1749                         set_s := '1';
1750                         -- stay in state FMADD_6
1751                     end if;
1752                 elsif r.r(56 downto 54) = "001" then
1753                     v.state := FINISH;
1754                 else
1755                     renormalize := '1';
1756                     v.state := NORMALIZE;
1757                 end if;
1758
1759             when LOOKUP =>
1760                 opsel_a <= AIN_B;
1761                 -- wait one cycle for inverse_table[B] lookup
1762                 v.first := '1';
1763                 if r.insn(4) = '0' then
1764                     if r.insn(3) = '0' then
1765                         v.state := DIV_2;
1766                     else
1767                         v.state := SQRT_1;
1768                     end if;
1769                 elsif r.insn(2) = '0' then
1770                     v.state := FRE_1;
1771                 else
1772                     v.state := RSQRT_1;
1773                 end if;
1774
1775             when DIV_2 =>
1776                 -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y
1777                 msel_1 <= MUL1_B;
1778                 msel_add <= MULADD_CONST;
1779                 msel_inv <= '1';
1780                 if r.count = 0 then
1781                     msel_2 <= MUL2_LUT;
1782                 else
1783                     msel_2 <= MUL2_P;
1784                 end if;
1785                 set_y := r.first;
1786                 pshift := '1';
1787                 f_to_multiply.valid <= r.first;
1788                 if multiply_to_f.valid = '1' then
1789                     v.first := '1';
1790                     v.count := r.count + 1;
1791                     v.state := DIV_3;
1792                 end if;
1793
1794             when DIV_3 =>
1795                 -- compute Y = P = P * Y
1796                 msel_1 <= MUL1_Y;
1797                 msel_2 <= MUL2_P;
1798                 f_to_multiply.valid <= r.first;
1799                 pshift := '1';
1800                 if multiply_to_f.valid = '1' then
1801                     v.first := '1';
1802                     if r.count = 3 then
1803                         v.state := DIV_4;
1804                     else
1805                         v.state := DIV_2;
1806                     end if;
1807                 end if;
1808
1809             when DIV_4 =>
1810                 -- compute R = P = A * Y (quotient)
1811                 msel_1 <= MUL1_A;
1812                 msel_2 <= MUL2_P;
1813                 set_y := r.first;
1814                 f_to_multiply.valid <= r.first;
1815                 pshift := '1';
1816                 if multiply_to_f.valid = '1' then
1817                     opsel_r <= RES_MULT;
1818                     v.first := '1';
1819                     v.state := DIV_5;
1820                 end if;
1821
1822             when DIV_5 =>
1823                 -- compute P = A - B * R (remainder)
1824                 msel_1 <= MUL1_B;
1825                 msel_2 <= MUL2_R;
1826                 msel_add <= MULADD_A;
1827                 msel_inv <= '1';
1828                 f_to_multiply.valid <= r.first;
1829                 if multiply_to_f.valid = '1' then
1830                     v.state := DIV_6;
1831                 end if;
1832
1833             when DIV_6 =>
1834                 -- test if remainder is 0 or >= B
1835                 if pcmpb_lt = '1' then
1836                     -- quotient is correct, set X if remainder non-zero
1837                     v.x := r.p(58) or px_nz;
1838                 else
1839                     -- quotient needs to be incremented by 1
1840                     carry_in <= '1';
1841                     v.x := not pcmpb_eq;
1842                 end if;
1843                 v.state := FINISH;
1844
1845             when FRE_1 =>
1846                 opsel_r <= RES_MISC;
1847                 misc_sel <= "0111";
1848                 v.shift := to_signed(1, EXP_BITS);
1849                 v.state := NORMALIZE;
1850
1851             when FTDIV_1 =>
1852                 v.cr_result(1) := exp_tiny or exp_huge;
1853                 if exp_tiny = '1' or exp_huge = '1' or r.a.class = ZERO or r.first = '0' then
1854                     v.instr_done := '1';
1855                     v.state := IDLE;
1856                 else
1857                     v.shift := r.a.exponent;
1858                     v.doing_ftdiv := "10";
1859                 end if;
1860
1861             when RSQRT_1 =>
1862                 opsel_r <= RES_MISC;
1863                 misc_sel <= "0111";
1864                 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1865                 v.result_exp := - sqrt_exp;
1866                 v.shift := to_signed(1, EXP_BITS);
1867                 v.state := NORMALIZE;
1868
1869             when SQRT_1 =>
1870                 -- put invsqr[B] in R and compute P = invsqr[B] * B
1871                 -- also transfer B (in R) to A
1872                 set_a := '1';
1873                 opsel_r <= RES_MISC;
1874                 misc_sel <= "0111";
1875                 msel_1 <= MUL1_B;
1876                 msel_2 <= MUL2_LUT;
1877                 f_to_multiply.valid <= '1';
1878                 v.shift := to_signed(-1, EXP_BITS);
1879                 v.count := "00";
1880                 v.state := SQRT_2;
1881
1882             when SQRT_2 =>
1883                 -- shift R right one place
1884                 -- not expecting multiplier result yet
1885                 -- r.shift = -1
1886                 opsel_r <= RES_SHIFT;
1887                 v.first := '1';
1888                 v.state := SQRT_3;
1889
1890             when SQRT_3 =>
1891                 -- put R into Y, wait for product from multiplier
1892                 msel_2 <= MUL2_R;
1893                 set_y := r.first;
1894                 pshift := '1';
1895                 if multiply_to_f.valid = '1' then
1896                     -- put result into R
1897                     opsel_r <= RES_MULT;
1898                     v.first := '1';
1899                     v.state := SQRT_4;
1900                 end if;
1901
1902             when SQRT_4 =>
1903                 -- compute 1.5 - Y * P
1904                 msel_1 <= MUL1_Y;
1905                 msel_2 <= MUL2_P;
1906                 msel_add <= MULADD_CONST;
1907                 msel_inv <= '1';
1908                 f_to_multiply.valid <= r.first;
1909                 pshift := '1';
1910                 if multiply_to_f.valid = '1' then
1911                     v.state := SQRT_5;
1912                 end if;
1913
1914             when SQRT_5 =>
1915                 -- compute Y = Y * P
1916                 msel_1 <= MUL1_Y;
1917                 msel_2 <= MUL2_P;
1918                 f_to_multiply.valid <= '1';
1919                 v.first := '1';
1920                 v.state := SQRT_6;
1921
1922             when SQRT_6 =>
1923                 -- pipeline in R = R * P
1924                 msel_1 <= MUL1_R;
1925                 msel_2 <= MUL2_P;
1926                 f_to_multiply.valid <= r.first;
1927                 pshift := '1';
1928                 if multiply_to_f.valid = '1' then
1929                     v.first := '1';
1930                     v.state := SQRT_7;
1931                 end if;
1932
1933             when SQRT_7 =>
1934                 -- first multiply is done, put result in Y
1935                 msel_2 <= MUL2_P;
1936                 set_y := r.first;
1937                 -- wait for second multiply (should be here already)
1938                 pshift := '1';
1939                 if multiply_to_f.valid = '1' then
1940                     -- put result into R
1941                     opsel_r <= RES_MULT;
1942                     v.first := '1';
1943                     v.count := r.count + 1;
1944                     if r.count < 2 then
1945                         v.state := SQRT_4;
1946                     else
1947                         v.first := '1';
1948                         v.state := SQRT_8;
1949                     end if;
1950                 end if;
1951
1952             when SQRT_8 =>
1953                 -- compute P = A - R * R, which can be +ve or -ve
1954                 -- we arranged for B to be put into A earlier
1955                 msel_1 <= MUL1_R;
1956                 msel_2 <= MUL2_R;
1957                 msel_add <= MULADD_A;
1958                 msel_inv <= '1';
1959                 pshift := '1';
1960                 f_to_multiply.valid <= r.first;
1961                 if multiply_to_f.valid = '1' then
1962                     v.first := '1';
1963                     v.state := SQRT_9;
1964                 end if;
1965
1966             when SQRT_9 =>
1967                 -- compute P = P * Y
1968                 -- since Y is an estimate of 1/sqrt(B), this makes P an
1969                 -- estimate of the adjustment needed to R.  Since the error
1970                 -- could be negative and we have an unsigned multiplier, the
1971                 -- upper bits can be wrong, but it turns out the lowest 8 bits
1972                 -- are correct and are all we need (given 3 iterations through
1973                 -- SQRT_4 to SQRT_7).
1974                 msel_1 <= MUL1_Y;
1975                 msel_2 <= MUL2_P;
1976                 pshift := '1';
1977                 f_to_multiply.valid <= r.first;
1978                 if multiply_to_f.valid = '1' then
1979                     v.state := SQRT_10;
1980                 end if;
1981
1982             when SQRT_10 =>
1983                 -- Add the bottom 8 bits of P, sign-extended,
1984                 -- divided by 4, onto R.
1985                 -- The division by 4 is because R is 10.54 format
1986                 -- whereas P is 8.56 format.
1987                 opsel_b <= BIN_PS6;
1988                 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1989                 v.result_exp := sqrt_exp;
1990                 v.shift := to_signed(1, EXP_BITS);
1991                 v.first := '1';
1992                 v.state := SQRT_11;
1993
1994             when SQRT_11 =>
1995                 -- compute P = A - R * R (remainder)
1996                 -- also put 2 * R + 1 into B for comparison with P
1997                 msel_1 <= MUL1_R;
1998                 msel_2 <= MUL2_R;
1999                 msel_add <= MULADD_A;
2000                 msel_inv <= '1';
2001                 f_to_multiply.valid <= r.first;
2002                 shiftin := '1';
2003                 set_b := r.first;
2004                 if multiply_to_f.valid = '1' then
2005                     v.state := SQRT_12;
2006                 end if;
2007
2008             when SQRT_12 =>
2009                 -- test if remainder is 0 or >= B = 2*R + 1
2010                 if pcmpb_lt = '1' then
2011                     -- square root is correct, set X if remainder non-zero
2012                     v.x := r.p(58) or px_nz;
2013                 else
2014                     -- square root needs to be incremented by 1
2015                     carry_in <= '1';
2016                     v.x := not pcmpb_eq;
2017                 end if;
2018                 v.state := FINISH;
2019
2020             when INT_SHIFT =>
2021                 -- r.shift = b.exponent - 52
2022                 opsel_r <= RES_SHIFT;
2023                 set_x := '1';
2024                 v.state := INT_ROUND;
2025                 v.shift := to_signed(-2, EXP_BITS);
2026
2027             when INT_ROUND =>
2028                 -- r.shift = -2
2029                 opsel_r <= RES_SHIFT;
2030                 round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
2031                 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2032                 -- Check for negative values that don't round to 0 for fcti*u*
2033                 if r.insn(8) = '1' and r.result_sign = '1' and
2034                     (r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then
2035                     v.state := INT_OFLOW;
2036                 else
2037                     v.state := INT_FINAL;
2038                 end if;
2039
2040             when INT_ISHIFT =>
2041                 -- r.shift = b.exponent - 54;
2042                 opsel_r <= RES_SHIFT;
2043                 v.state := INT_FINAL;
2044
2045             when INT_FINAL =>
2046                 -- Negate if necessary, and increment for rounding if needed
2047                 opsel_ainv <= r.result_sign;
2048                 carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign;
2049                 -- Check for possible overflows
2050                 case r.insn(9 downto 8) is
2051                     when "00" =>        -- fctiw[z]
2052                         need_check := r.r(31) or (r.r(30) and not r.result_sign);
2053                     when "01" =>        -- fctiwu[z]
2054                         need_check := r.r(31);
2055                     when "10" =>        -- fctid[z]
2056                         need_check := r.r(63) or (r.r(62) and not r.result_sign);
2057                     when others =>      -- fctidu[z]
2058                         need_check := r.r(63);
2059                 end case;
2060                 if need_check = '1' then
2061                     v.state := INT_CHECK;
2062                 else
2063                     if r.fpscr(FPSCR_FI) = '1' then
2064                         v.fpscr(FPSCR_XX) := '1';
2065                     end if;
2066                     arith_done := '1';
2067                 end if;
2068
2069             when INT_CHECK =>
2070                 if r.insn(9) = '0' then
2071                     msb := r.r(31);
2072                 else
2073                     msb := r.r(63);
2074                 end if;
2075                 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2076                 if (r.insn(8) = '0' and msb /= r.result_sign) or
2077                     (r.insn(8) = '1' and msb /= '1') then
2078                     opsel_r <= RES_MISC;
2079                     v.fpscr(FPSCR_VXCVI) := '1';
2080                     invalid := '1';
2081                 else
2082                     if r.fpscr(FPSCR_FI) = '1' then
2083                         v.fpscr(FPSCR_XX) := '1';
2084                     end if;
2085                 end if;
2086                 arith_done := '1';
2087
2088             when INT_OFLOW =>
2089                 opsel_r <= RES_MISC;
2090                 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2091                 if r.b.class = NAN then
2092                     misc_sel(0) <= '1';
2093                 end if;
2094                 v.fpscr(FPSCR_VXCVI) := '1';
2095                 invalid := '1';
2096                 arith_done := '1';
2097
2098             when FRI_1 =>
2099                 -- r.shift = b.exponent - 52
2100                 opsel_r <= RES_SHIFT;
2101                 set_x := '1';
2102                 v.shift := to_signed(-2, EXP_BITS);
2103                 v.state := ROUNDING;
2104
2105             when FINISH =>
2106                 if r.is_multiply = '1' and px_nz = '1' then
2107                     v.x := '1';
2108                 end if;
2109                 if r.r(63 downto 54) /= "0000000001" then
2110                     renormalize := '1';
2111                     v.state := NORMALIZE;
2112                 else
2113                     set_x := '1';
2114                     if exp_tiny = '1' then
2115                         v.shift := new_exp - min_exp;
2116                         v.state := ROUND_UFLOW;
2117                     elsif exp_huge = '1' then
2118                         v.state := ROUND_OFLOW;
2119                     else
2120                         v.shift := to_signed(-2, EXP_BITS);
2121                         v.state := ROUNDING;
2122                     end if;
2123                 end if;
2124
2125             when NORMALIZE =>
2126                 -- Shift so we have 9 leading zeroes (we know R is non-zero)
2127                 -- r.shift = clz(r.r) - 9
2128                 opsel_r <= RES_SHIFT;
2129                 set_x := '1';
2130                 if exp_tiny = '1' then
2131                     v.shift := new_exp - min_exp;
2132                     v.state := ROUND_UFLOW;
2133                 elsif exp_huge = '1' then
2134                     v.state := ROUND_OFLOW;
2135                 else
2136                     v.shift := to_signed(-2, EXP_BITS);
2137                     v.state := ROUNDING;
2138                 end if;
2139
2140             when ROUND_UFLOW =>
2141                 -- r.shift = - amount by which exponent underflows
2142                 v.tiny := '1';
2143                 if r.fpscr(FPSCR_UE) = '0' then
2144                     -- disabled underflow exception case
2145                     -- have to denormalize before rounding
2146                     opsel_r <= RES_SHIFT;
2147                     set_x := '1';
2148                     v.shift := to_signed(-2, EXP_BITS);
2149                     v.state := ROUNDING;
2150                 else
2151                     -- enabled underflow exception case
2152                     -- if denormalized, have to normalize before rounding
2153                     v.fpscr(FPSCR_UX) := '1';
2154                     v.result_exp := r.result_exp + bias_exp;
2155                     if r.r(54) = '0' then
2156                         renormalize := '1';
2157                         v.state := NORMALIZE;
2158                     else
2159                         v.shift := to_signed(-2, EXP_BITS);
2160                         v.state := ROUNDING;
2161                     end if;
2162                 end if;
2163
2164             when ROUND_OFLOW =>
2165                 v.fpscr(FPSCR_OX) := '1';
2166                 if r.fpscr(FPSCR_OE) = '0' then
2167                     -- disabled overflow exception
2168                     -- result depends on rounding mode
2169                     v.fpscr(FPSCR_XX) := '1';
2170                     v.fpscr(FPSCR_FI) := '1';
2171                     if r.round_mode(1 downto 0) = "00" or
2172                         (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then
2173                         v.result_class := INFINITY;
2174                         v.fpscr(FPSCR_FR) := '1';
2175                     else
2176                         v.fpscr(FPSCR_FR) := '0';
2177                     end if;
2178                     -- construct largest representable number
2179                     v.result_exp := max_exp;
2180                     opsel_r <= RES_MISC;
2181                     misc_sel <= "001" & r.single_prec;
2182                     arith_done := '1';
2183                 else
2184                     -- enabled overflow exception
2185                     v.result_exp := r.result_exp - bias_exp;
2186                     v.shift := to_signed(-2, EXP_BITS);
2187                     v.state := ROUNDING;
2188                 end if;
2189
2190             when ROUNDING =>
2191                 opsel_amask <= '1';
2192                 round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
2193                 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2194                 if round(1) = '1' then
2195                     -- set mask to increment the LSB for the precision
2196                     opsel_b <= BIN_MASK;
2197                     carry_in <= '1';
2198                     v.shift := to_signed(-1, EXP_BITS);
2199                     v.state := ROUNDING_2;
2200                 else
2201                     if r.r(54) = '0' then
2202                         -- result after masking could be zero, or could be a
2203                         -- denormalized result that needs to be renormalized
2204                         renormalize := '1';
2205                         v.state := ROUNDING_3;
2206                     else
2207                         arith_done := '1';
2208                     end if;
2209                 end if;
2210                 if round(0) = '1' then
2211                     v.fpscr(FPSCR_XX) := '1';
2212                     if r.tiny = '1' then
2213                         v.fpscr(FPSCR_UX) := '1';
2214                     end if;
2215                 end if;
2216
2217             when ROUNDING_2 =>
2218                 -- Check for overflow during rounding
2219                 -- r.shift = -1
2220                 v.x := '0';
2221                 if r.r(55) = '1' then
2222                     opsel_r <= RES_SHIFT;
2223                     if exp_huge = '1' then
2224                         v.state := ROUND_OFLOW;
2225                     else
2226                         arith_done := '1';
2227                     end if;
2228                 elsif r.r(54) = '0' then
2229                     -- Do CLZ so we can renormalize the result
2230                     renormalize := '1';
2231                     v.state := ROUNDING_3;
2232                 else
2233                     arith_done := '1';
2234                 end if;
2235
2236             when ROUNDING_3 =>
2237                 -- r.shift = clz(r.r) - 9
2238                 mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
2239                 if mant_nz = '0' then
2240                     v.result_class := ZERO;
2241                     if r.is_subtract = '1' then
2242                         -- set result sign depending on rounding mode
2243                         v.result_sign := r.round_mode(1) and r.round_mode(0);
2244                     end if;
2245                     arith_done := '1';
2246                 else
2247                     -- Renormalize result after rounding
2248                     opsel_r <= RES_SHIFT;
2249                     v.denorm := exp_tiny;
2250                     v.shift := new_exp - to_signed(-1022, EXP_BITS);
2251                     if new_exp < to_signed(-1022, EXP_BITS) then
2252                         v.state := DENORM;
2253                     else
2254                         arith_done := '1';
2255                     end if;
2256                 end if;
2257
2258             when DENORM =>
2259                 -- r.shift = result_exp - -1022
2260                 opsel_r <= RES_SHIFT;
2261                 arith_done := '1';
2262
2263         end case;
2264
2265         if zero_divide = '1' then
2266             v.fpscr(FPSCR_ZX) := '1';
2267         end if;
2268         if qnan_result = '1' then
2269             invalid := '1';
2270             v.result_class := NAN;
2271             v.result_sign := '0';
2272             misc_sel <= "0001";
2273             opsel_r <= RES_MISC;
2274         end if;
2275         if arith_done = '1' then
2276             -- Enabled invalid exception doesn't write result or FPRF
2277             -- Neither does enabled zero-divide exception
2278             if (invalid and r.fpscr(FPSCR_VE)) = '0' and
2279                 (zero_divide and r.fpscr(FPSCR_ZE)) = '0' then
2280                 v.writing_back := '1';
2281                 v.update_fprf := '1';
2282             end if;
2283             v.instr_done := '1';
2284             v.state := IDLE;
2285             update_fx := '1';
2286         end if;
2287
2288         -- Multiplier and divide/square root data path
2289         case msel_1 is
2290             when MUL1_A =>
2291                 f_to_multiply.data1 <= r.a.mantissa(61 downto 0) & "00";
2292             when MUL1_B =>
2293                 f_to_multiply.data1 <= r.b.mantissa(61 downto 0) & "00";
2294             when MUL1_Y =>
2295                 f_to_multiply.data1 <= r.y;
2296             when others =>
2297                 f_to_multiply.data1 <= r.r(61 downto 0) & "00";
2298         end case;
2299         case msel_2 is
2300             when MUL2_C =>
2301                 f_to_multiply.data2 <= r.c.mantissa(61 downto 0) & "00";
2302             when MUL2_LUT =>
2303                 f_to_multiply.data2 <= x"00" & inverse_est & '0' & x"000000000";
2304             when MUL2_P =>
2305                 f_to_multiply.data2 <= r.p;
2306             when others =>
2307                 f_to_multiply.data2 <= r.r(61 downto 0) & "00";
2308         end case;
2309         maddend := (others => '0');
2310         case msel_add is
2311             when MULADD_CONST =>
2312                 -- addend is 2.0 or 1.5 in 16.112 format
2313                 if r.is_sqrt = '0' then
2314                     maddend(113) := '1';                -- 2.0
2315                 else
2316                     maddend(112 downto 111) := "11";    -- 1.5
2317                 end if;
2318             when MULADD_A =>
2319                 -- addend is A in 16.112 format
2320                 maddend(121 downto 58) := r.a.mantissa;
2321             when MULADD_RS =>
2322                 -- addend is concatenation of R and S in 16.112 format
2323                 maddend := "000000" & r.r & r.s & "00";
2324             when others =>
2325         end case;
2326         if msel_inv = '1' then
2327             f_to_multiply.addend <= not maddend;
2328         else
2329             f_to_multiply.addend <= maddend;
2330         end if;
2331         f_to_multiply.not_result <= msel_inv;
2332         if set_y = '1' then
2333             v.y := f_to_multiply.data2;
2334         end if;
2335         if multiply_to_f.valid = '1' then
2336             if pshift = '0' then
2337                 v.p := multiply_to_f.result(63 downto 0);
2338             else
2339                 v.p := multiply_to_f.result(119 downto 56);
2340             end if;
2341         end if;
2342
2343         -- Data path.
2344         -- This has A and B input multiplexers, an adder, a shifter,
2345         -- count-leading-zeroes logic, and a result mux.
2346         if longmask = '1' then
2347             mshift := r.shift + to_signed(-29, EXP_BITS);
2348         else
2349             mshift := r.shift;
2350         end if;
2351         if mshift < to_signed(-64, EXP_BITS) then
2352             mask := (others => '1');
2353         elsif mshift >= to_signed(0, EXP_BITS) then
2354             mask := (others => '0');
2355         else
2356             mask := right_mask(unsigned(mshift(5 downto 0)));
2357         end if;
2358         case opsel_a is
2359             when AIN_R =>
2360                 in_a0 := r.r;
2361             when AIN_A =>
2362                 in_a0 := r.a.mantissa;
2363             when AIN_B =>
2364                 in_a0 := r.b.mantissa;
2365             when others =>
2366                 in_a0 := r.c.mantissa;
2367         end case;
2368         if (or (mask and in_a0)) = '1' and set_x = '1' then
2369             v.x := '1';
2370         end if;
2371         if opsel_ainv = '1' then
2372             in_a0 := not in_a0;
2373         end if;
2374         if opsel_amask = '1' then
2375             in_a0 := in_a0 and not mask;
2376         end if;
2377         in_a <= in_a0;
2378         case opsel_b is
2379             when BIN_ZERO =>
2380                 in_b0 := (others => '0');
2381             when BIN_R =>
2382                 in_b0 := r.r;
2383             when BIN_MASK =>
2384                 in_b0 := mask;
2385             when others =>
2386                 -- BIN_PS6, 6 LSBs of P/4 sign-extended to 64
2387                 in_b0 := std_ulogic_vector(resize(signed(r.p(7 downto 2)), 64));
2388         end case;
2389         if opsel_binv = '1' then
2390             in_b0 := not in_b0;
2391         end if;
2392         in_b <= in_b0;
2393         if r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
2394             shift_res := shifter_64(r.r & (shiftin or r.s(55)) & r.s(54 downto 0),
2395                                     std_ulogic_vector(r.shift(6 downto 0)));
2396         else
2397             shift_res := (others => '0');
2398         end if;
2399         case opsel_r is
2400             when RES_SUM =>
2401                 result <= std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
2402             when RES_SHIFT =>
2403                 result <= shift_res;
2404             when RES_MULT =>
2405                 result <= multiply_to_f.result(121 downto 58);
2406             when others =>
2407                 case misc_sel is
2408                     when "0000" =>
2409                         misc := x"00000000" & (r.fpscr and fpscr_mask);
2410                     when "0001" =>
2411                         -- generated QNaN mantissa
2412                         misc := x"0020000000000000";
2413                     when "0010" =>
2414                         -- mantissa of max representable DP number
2415                         misc := x"007ffffffffffffc";
2416                     when "0011" =>
2417                         -- mantissa of max representable SP number
2418                         misc := x"007fffff80000000";
2419                     when "0100" =>
2420                         -- fmrgow result
2421                         misc := r.a.mantissa(31 downto 0) & r.b.mantissa(31 downto 0);
2422                     when "0110" =>
2423                         -- fmrgew result
2424                         misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32);
2425                     when "0111" =>
2426                         misc := 10x"000" & inverse_est & 35x"000000000";
2427                     when "1000" =>
2428                         -- max positive result for fctiw[z]
2429                         misc := x"000000007fffffff";
2430                     when "1001" =>
2431                         -- max negative result for fctiw[z]
2432                         misc := x"ffffffff80000000";
2433                     when "1010" =>
2434                         -- max positive result for fctiwu[z]
2435                         misc := x"00000000ffffffff";
2436                     when "1011" =>
2437                         -- max negative result for fctiwu[z]
2438                         misc := x"0000000000000000";
2439                     when "1100" =>
2440                         -- max positive result for fctid[z]
2441                         misc := x"7fffffffffffffff";
2442                     when "1101" =>
2443                         -- max negative result for fctid[z]
2444                         misc := x"8000000000000000";
2445                     when "1110" =>
2446                         -- max positive result for fctidu[z]
2447                         misc := x"ffffffffffffffff";
2448                     when "1111" =>
2449                         -- max negative result for fctidu[z]
2450                         misc := x"0000000000000000";
2451                     when others =>
2452                         misc := x"0000000000000000";
2453                 end case;
2454                 result <= misc;
2455         end case;
2456         v.r := result;
2457         if set_s = '1' then
2458             case opsel_s is
2459                 when S_NEG =>
2460                     v.s := std_ulogic_vector(unsigned(not r.s) + (not r.x));
2461                 when S_MULT =>
2462                     v.s := multiply_to_f.result(57 downto 2);
2463                 when S_SHIFT =>
2464                     v.s := shift_res(63 downto 8);
2465                     if shift_res(7 downto 0) /= x"00" then
2466                         v.x := '1';
2467                     end if;
2468                 when others =>
2469                     v.s := (others => '0');
2470             end case;
2471         end if;
2472
2473         if set_a = '1' then
2474             v.a.exponent := new_exp;
2475             v.a.mantissa := shift_res;
2476         end if;
2477         if set_b = '1' then
2478             v.b.exponent := new_exp;
2479             v.b.mantissa := shift_res;
2480         end if;
2481         if set_c = '1' then
2482             v.c.exponent := new_exp;
2483             v.c.mantissa := shift_res;
2484         end if;
2485
2486         if opsel_r = RES_SHIFT then
2487             v.result_exp := new_exp;
2488         end if;
2489
2490         if renormalize = '1' then
2491             clz := count_left_zeroes(r.r);
2492             if renorm_sqrt = '1' then
2493                 -- make denormalized value end up with even exponent
2494                 clz(0) := '1';
2495             end if;
2496             v.shift := resize(signed('0' & clz) - 9, EXP_BITS);
2497         end if;
2498
2499         if r.int_result = '1' then
2500             fp_result <= r.r;
2501         else
2502             fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r,
2503                                  r.single_prec, r.quieten_nan);
2504         end if;
2505         if r.update_fprf = '1' then
2506             v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class,
2507                                                              r.r(54) and not r.denorm);
2508         end if;
2509
2510         v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
2511                              (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
2512         v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
2513                                   v.fpscr(FPSCR_VE downto FPSCR_XE));
2514         if update_fx = '1' and
2515             (v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then
2516             v.fpscr(FPSCR_FX) := '1';
2517         end if;
2518         if r.rc = '1' then
2519             v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
2520         end if;
2521
2522         if illegal = '1' then
2523             v.instr_done := '0';
2524             v.do_intr := '0';
2525             v.writing_back := '0';
2526             v.busy := '0';
2527             v.state := IDLE;
2528         else
2529             v.do_intr := v.instr_done and v.fpscr(FPSCR_FEX) and r.fe_mode;
2530             if v.state /= IDLE or v.do_intr = '1' then
2531                 v.busy := '1';
2532             end if;
2533         end if;
2534
2535         rin <= v;
2536         e_out.illegal <= illegal;
2537     end process;
2538
2539 end architecture behaviour;