1 -- Floating-point unit for Microwatt
4 use ieee.std_logic_1164.all;
5 use ieee.numeric_std.all;
8 use work.insn_helpers.all;
9 use work.decode_types.all;
10 use work.crhelpers.all;
19 e_in : in Execute1toFPUType;
20 e_out : out FPUToExecute1Type;
22 w_out : out FPUToWritebackType
26 architecture behaviour of fpu is
27 type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
29 constant EXP_BITS : natural := 13;
31 type fpu_reg_type is record
32 class : fp_number_class;
33 negative : std_ulogic;
34 exponent : signed(EXP_BITS-1 downto 0); -- unbiased
35 mantissa : std_ulogic_vector(63 downto 0); -- 10.54 format
38 type state_t is (IDLE,
39 DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
40 DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT,
43 DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD,
47 ADD_SHIFT, ADD_2, ADD_3,
50 FMADD_1, FMADD_2, FMADD_3,
51 FMADD_4, FMADD_5, FMADD_6,
53 DIV_2, DIV_3, DIV_4, DIV_5, DIV_6,
57 SQRT_1, SQRT_2, SQRT_3, SQRT_4,
58 SQRT_5, SQRT_6, SQRT_7, SQRT_8,
59 SQRT_9, SQRT_10, SQRT_11, SQRT_12,
60 INT_SHIFT, INT_ROUND, INT_ISHIFT,
61 INT_FINAL, INT_CHECK, INT_OFLOW,
63 ROUND_UFLOW, ROUND_OFLOW,
64 ROUNDING, ROUNDING_2, ROUNDING_3,
70 type reg_type is record
73 instr_done : std_ulogic;
76 insn : std_ulogic_vector(31 downto 0);
77 dest_fpr : gspr_index_t;
81 single_prec : std_ulogic;
82 fpscr : std_ulogic_vector(31 downto 0);
86 r : std_ulogic_vector(63 downto 0); -- 10.54 format
87 s : std_ulogic_vector(55 downto 0); -- extended fraction
89 p : std_ulogic_vector(63 downto 0); -- 8.56 format
90 y : std_ulogic_vector(63 downto 0); -- 8.56 format
91 result_sign : std_ulogic;
92 result_class : fp_number_class;
93 result_exp : signed(EXP_BITS-1 downto 0);
94 shift : signed(EXP_BITS-1 downto 0);
95 writing_back : std_ulogic;
96 int_result : std_ulogic;
97 cr_result : std_ulogic_vector(3 downto 0);
98 cr_mask : std_ulogic_vector(7 downto 0);
99 old_exc : std_ulogic_vector(4 downto 0);
100 update_fprf : std_ulogic;
101 quieten_nan : std_ulogic;
104 round_mode : std_ulogic_vector(2 downto 0);
105 is_subtract : std_ulogic;
106 exp_cmp : std_ulogic;
107 madd_cmp : std_ulogic;
108 add_bsmall : std_ulogic;
109 is_multiply : std_ulogic;
110 is_sqrt : std_ulogic;
112 count : unsigned(1 downto 0);
113 doing_ftdiv : std_ulogic_vector(1 downto 0);
116 type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
118 signal r, rin : reg_type;
120 signal fp_result : std_ulogic_vector(63 downto 0);
121 signal opsel_a : std_ulogic_vector(1 downto 0);
122 signal opsel_b : std_ulogic_vector(1 downto 0);
123 signal opsel_r : std_ulogic_vector(1 downto 0);
124 signal opsel_s : std_ulogic_vector(1 downto 0);
125 signal opsel_ainv : std_ulogic;
126 signal opsel_amask : std_ulogic;
127 signal opsel_binv : std_ulogic;
128 signal in_a : std_ulogic_vector(63 downto 0);
129 signal in_b : std_ulogic_vector(63 downto 0);
130 signal result : std_ulogic_vector(63 downto 0);
131 signal carry_in : std_ulogic;
132 signal lost_bits : std_ulogic;
133 signal r_hi_nz : std_ulogic;
134 signal r_lo_nz : std_ulogic;
135 signal s_nz : std_ulogic;
136 signal misc_sel : std_ulogic_vector(3 downto 0);
137 signal f_to_multiply : MultiplyInputType;
138 signal multiply_to_f : MultiplyOutputType;
139 signal msel_1 : std_ulogic_vector(1 downto 0);
140 signal msel_2 : std_ulogic_vector(1 downto 0);
141 signal msel_add : std_ulogic_vector(1 downto 0);
142 signal msel_inv : std_ulogic;
143 signal inverse_est : std_ulogic_vector(18 downto 0);
146 constant AIN_R : std_ulogic_vector(1 downto 0) := "00";
147 constant AIN_A : std_ulogic_vector(1 downto 0) := "01";
148 constant AIN_B : std_ulogic_vector(1 downto 0) := "10";
149 constant AIN_C : std_ulogic_vector(1 downto 0) := "11";
151 constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
152 constant BIN_R : std_ulogic_vector(1 downto 0) := "01";
153 constant BIN_MASK : std_ulogic_vector(1 downto 0) := "10";
154 constant BIN_PS6 : std_ulogic_vector(1 downto 0) := "11";
156 constant RES_SUM : std_ulogic_vector(1 downto 0) := "00";
157 constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
158 constant RES_MULT : std_ulogic_vector(1 downto 0) := "10";
159 constant RES_MISC : std_ulogic_vector(1 downto 0) := "11";
161 constant S_ZERO : std_ulogic_vector(1 downto 0) := "00";
162 constant S_NEG : std_ulogic_vector(1 downto 0) := "01";
163 constant S_SHIFT : std_ulogic_vector(1 downto 0) := "10";
164 constant S_MULT : std_ulogic_vector(1 downto 0) := "11";
167 constant MUL1_A : std_ulogic_vector(1 downto 0) := "00";
168 constant MUL1_B : std_ulogic_vector(1 downto 0) := "01";
169 constant MUL1_Y : std_ulogic_vector(1 downto 0) := "10";
170 constant MUL1_R : std_ulogic_vector(1 downto 0) := "11";
172 constant MUL2_C : std_ulogic_vector(1 downto 0) := "00";
173 constant MUL2_LUT : std_ulogic_vector(1 downto 0) := "01";
174 constant MUL2_P : std_ulogic_vector(1 downto 0) := "10";
175 constant MUL2_R : std_ulogic_vector(1 downto 0) := "11";
177 constant MULADD_ZERO : std_ulogic_vector(1 downto 0) := "00";
178 constant MULADD_CONST : std_ulogic_vector(1 downto 0) := "01";
179 constant MULADD_A : std_ulogic_vector(1 downto 0) := "10";
180 constant MULADD_RS : std_ulogic_vector(1 downto 0) := "11";
182 -- Inverse lookup table, indexed by the top 8 fraction bits
183 -- The first 256 entries are the reciprocal (1/x) lookup table,
184 -- and the remaining 768 entries are the reciprocal square root table.
185 -- Output range is [0.5, 1) in 0.19 format, though the top
186 -- bit isn't stored since it is always 1.
187 -- Each output value is the inverse of the center of the input
188 -- range for the value, i.e. entry 0 is 1 / (1 + 1/512),
189 -- entry 1 is 1 / (1 + 3/512), etc.
190 signal inverse_table : lookup_table := (
192 -- Unit bit is assumed to be 1, so input range is [1, 2)
193 18x"3fc01", 18x"3f411", 18x"3ec31", 18x"3e460", 18x"3dc9f", 18x"3d4ec", 18x"3cd49", 18x"3c5b5",
194 18x"3be2f", 18x"3b6b8", 18x"3af4f", 18x"3a7f4", 18x"3a0a7", 18x"39968", 18x"39237", 18x"38b14",
195 18x"383fe", 18x"37cf5", 18x"375f9", 18x"36f0a", 18x"36828", 18x"36153", 18x"35a8a", 18x"353ce",
196 18x"34d1e", 18x"3467a", 18x"33fe3", 18x"33957", 18x"332d7", 18x"32c62", 18x"325f9", 18x"31f9c",
197 18x"3194a", 18x"31303", 18x"30cc7", 18x"30696", 18x"30070", 18x"2fa54", 18x"2f443", 18x"2ee3d",
198 18x"2e841", 18x"2e250", 18x"2dc68", 18x"2d68b", 18x"2d0b8", 18x"2caee", 18x"2c52e", 18x"2bf79",
199 18x"2b9cc", 18x"2b429", 18x"2ae90", 18x"2a900", 18x"2a379", 18x"29dfb", 18x"29887", 18x"2931b",
200 18x"28db8", 18x"2885e", 18x"2830d", 18x"27dc4", 18x"27884", 18x"2734d", 18x"26e1d", 18x"268f6",
201 18x"263d8", 18x"25ec1", 18x"259b3", 18x"254ac", 18x"24fad", 18x"24ab7", 18x"245c8", 18x"240e1",
202 18x"23c01", 18x"23729", 18x"23259", 18x"22d90", 18x"228ce", 18x"22413", 18x"21f60", 18x"21ab4",
203 18x"2160f", 18x"21172", 18x"20cdb", 18x"2084b", 18x"203c2", 18x"1ff40", 18x"1fac4", 18x"1f64f",
204 18x"1f1e1", 18x"1ed79", 18x"1e918", 18x"1e4be", 18x"1e069", 18x"1dc1b", 18x"1d7d4", 18x"1d392",
205 18x"1cf57", 18x"1cb22", 18x"1c6f3", 18x"1c2ca", 18x"1bea7", 18x"1ba8a", 18x"1b672", 18x"1b261",
206 18x"1ae55", 18x"1aa50", 18x"1a64f", 18x"1a255", 18x"19e60", 18x"19a70", 18x"19686", 18x"192a2",
207 18x"18ec3", 18x"18ae9", 18x"18715", 18x"18345", 18x"17f7c", 18x"17bb7", 18x"177f7", 18x"1743d",
208 18x"17087", 18x"16cd7", 18x"1692c", 18x"16585", 18x"161e4", 18x"15e47", 18x"15ab0", 18x"1571d",
209 18x"1538e", 18x"15005", 18x"14c80", 18x"14900", 18x"14584", 18x"1420d", 18x"13e9b", 18x"13b2d",
210 18x"137c3", 18x"1345e", 18x"130fe", 18x"12da2", 18x"12a4a", 18x"126f6", 18x"123a7", 18x"1205c",
211 18x"11d15", 18x"119d2", 18x"11694", 18x"11359", 18x"11023", 18x"10cf1", 18x"109c2", 18x"10698",
212 18x"10372", 18x"10050", 18x"0fd31", 18x"0fa17", 18x"0f700", 18x"0f3ed", 18x"0f0de", 18x"0edd3",
213 18x"0eacb", 18x"0e7c7", 18x"0e4c7", 18x"0e1ca", 18x"0ded2", 18x"0dbdc", 18x"0d8eb", 18x"0d5fc",
214 18x"0d312", 18x"0d02b", 18x"0cd47", 18x"0ca67", 18x"0c78a", 18x"0c4b1", 18x"0c1db", 18x"0bf09",
215 18x"0bc3a", 18x"0b96e", 18x"0b6a5", 18x"0b3e0", 18x"0b11e", 18x"0ae5f", 18x"0aba3", 18x"0a8eb",
216 18x"0a636", 18x"0a383", 18x"0a0d4", 18x"09e28", 18x"09b80", 18x"098da", 18x"09637", 18x"09397",
217 18x"090fb", 18x"08e61", 18x"08bca", 18x"08936", 18x"086a5", 18x"08417", 18x"0818c", 18x"07f04",
218 18x"07c7e", 18x"079fc", 18x"0777c", 18x"074ff", 18x"07284", 18x"0700d", 18x"06d98", 18x"06b26",
219 18x"068b6", 18x"0664a", 18x"063e0", 18x"06178", 18x"05f13", 18x"05cb1", 18x"05a52", 18x"057f5",
220 18x"0559a", 18x"05342", 18x"050ed", 18x"04e9a", 18x"04c4a", 18x"049fc", 18x"047b0", 18x"04567",
221 18x"04321", 18x"040dd", 18x"03e9b", 18x"03c5c", 18x"03a1f", 18x"037e4", 18x"035ac", 18x"03376",
222 18x"03142", 18x"02f11", 18x"02ce2", 18x"02ab5", 18x"0288b", 18x"02663", 18x"0243d", 18x"02219",
223 18x"01ff7", 18x"01dd8", 18x"01bbb", 18x"019a0", 18x"01787", 18x"01570", 18x"0135b", 18x"01149",
224 18x"00f39", 18x"00d2a", 18x"00b1e", 18x"00914", 18x"0070c", 18x"00506", 18x"00302", 18x"00100",
225 -- 1/sqrt(x) lookup table
226 -- Input is in the range [1, 4), i.e. two bits to the left of the
227 -- binary point. Those 2 bits index the following 3 blocks of 256 values.
229 18x"3fe00", 18x"3fa06", 18x"3f612", 18x"3f224", 18x"3ee3a", 18x"3ea58", 18x"3e67c", 18x"3e2a4",
230 18x"3ded2", 18x"3db06", 18x"3d73e", 18x"3d37e", 18x"3cfc2", 18x"3cc0a", 18x"3c85a", 18x"3c4ae",
231 18x"3c106", 18x"3bd64", 18x"3b9c8", 18x"3b630", 18x"3b29e", 18x"3af10", 18x"3ab86", 18x"3a802",
232 18x"3a484", 18x"3a108", 18x"39d94", 18x"39a22", 18x"396b6", 18x"3934e", 18x"38fea", 18x"38c8c",
233 18x"38932", 18x"385dc", 18x"3828a", 18x"37f3e", 18x"37bf6", 18x"378b2", 18x"37572", 18x"37236",
234 18x"36efe", 18x"36bca", 18x"3689a", 18x"36570", 18x"36248", 18x"35f26", 18x"35c06", 18x"358ea",
235 18x"355d4", 18x"352c0", 18x"34fb0", 18x"34ca4", 18x"3499c", 18x"34698", 18x"34398", 18x"3409c",
236 18x"33da2", 18x"33aac", 18x"337bc", 18x"334cc", 18x"331e2", 18x"32efc", 18x"32c18", 18x"32938",
237 18x"3265a", 18x"32382", 18x"320ac", 18x"31dd8", 18x"31b0a", 18x"3183e", 18x"31576", 18x"312b0",
238 18x"30fee", 18x"30d2e", 18x"30a74", 18x"307ba", 18x"30506", 18x"30254", 18x"2ffa4", 18x"2fcf8",
239 18x"2fa4e", 18x"2f7a8", 18x"2f506", 18x"2f266", 18x"2efca", 18x"2ed2e", 18x"2ea98", 18x"2e804",
240 18x"2e572", 18x"2e2e4", 18x"2e058", 18x"2ddce", 18x"2db48", 18x"2d8c6", 18x"2d646", 18x"2d3c8",
241 18x"2d14c", 18x"2ced4", 18x"2cc5e", 18x"2c9ea", 18x"2c77a", 18x"2c50c", 18x"2c2a2", 18x"2c038",
242 18x"2bdd2", 18x"2bb70", 18x"2b90e", 18x"2b6b0", 18x"2b454", 18x"2b1fa", 18x"2afa4", 18x"2ad4e",
243 18x"2aafc", 18x"2a8ac", 18x"2a660", 18x"2a414", 18x"2a1cc", 18x"29f86", 18x"29d42", 18x"29b00",
244 18x"298c2", 18x"29684", 18x"2944a", 18x"29210", 18x"28fda", 18x"28da6", 18x"28b74", 18x"28946",
245 18x"28718", 18x"284ec", 18x"282c4", 18x"2809c", 18x"27e78", 18x"27c56", 18x"27a34", 18x"27816",
246 18x"275fa", 18x"273e0", 18x"271c8", 18x"26fb0", 18x"26d9c", 18x"26b8a", 18x"2697a", 18x"2676c",
247 18x"26560", 18x"26356", 18x"2614c", 18x"25f46", 18x"25d42", 18x"25b40", 18x"2593e", 18x"25740",
248 18x"25542", 18x"25348", 18x"2514e", 18x"24f58", 18x"24d62", 18x"24b6e", 18x"2497c", 18x"2478c",
249 18x"2459e", 18x"243b0", 18x"241c6", 18x"23fde", 18x"23df6", 18x"23c10", 18x"23a2c", 18x"2384a",
250 18x"2366a", 18x"2348c", 18x"232ae", 18x"230d2", 18x"22efa", 18x"22d20", 18x"22b4a", 18x"22976",
251 18x"227a2", 18x"225d2", 18x"22402", 18x"22234", 18x"22066", 18x"21e9c", 18x"21cd2", 18x"21b0a",
252 18x"21944", 18x"2177e", 18x"215ba", 18x"213fa", 18x"21238", 18x"2107a", 18x"20ebc", 18x"20d00",
253 18x"20b46", 18x"2098e", 18x"207d6", 18x"20620", 18x"2046c", 18x"202b8", 18x"20108", 18x"1ff58",
254 18x"1fda8", 18x"1fbfc", 18x"1fa50", 18x"1f8a4", 18x"1f6fc", 18x"1f554", 18x"1f3ae", 18x"1f208",
255 18x"1f064", 18x"1eec2", 18x"1ed22", 18x"1eb82", 18x"1e9e4", 18x"1e846", 18x"1e6aa", 18x"1e510",
256 18x"1e378", 18x"1e1e0", 18x"1e04a", 18x"1deb4", 18x"1dd20", 18x"1db8e", 18x"1d9fc", 18x"1d86c",
257 18x"1d6de", 18x"1d550", 18x"1d3c4", 18x"1d238", 18x"1d0ae", 18x"1cf26", 18x"1cd9e", 18x"1cc18",
258 18x"1ca94", 18x"1c910", 18x"1c78c", 18x"1c60a", 18x"1c48a", 18x"1c30c", 18x"1c18e", 18x"1c010",
259 18x"1be94", 18x"1bd1a", 18x"1bba0", 18x"1ba28", 18x"1b8b2", 18x"1b73c", 18x"1b5c6", 18x"1b452",
260 18x"1b2e0", 18x"1b16e", 18x"1affe", 18x"1ae8e", 18x"1ad20", 18x"1abb4", 18x"1aa46", 18x"1a8dc",
262 18x"1a772", 18x"1a608", 18x"1a4a0", 18x"1a33a", 18x"1a1d4", 18x"1a070", 18x"19f0c", 18x"19da8",
263 18x"19c48", 18x"19ae6", 18x"19986", 18x"19828", 18x"196ca", 18x"1956e", 18x"19412", 18x"192b8",
264 18x"1915e", 18x"19004", 18x"18eae", 18x"18d56", 18x"18c00", 18x"18aac", 18x"18958", 18x"18804",
265 18x"186b2", 18x"18562", 18x"18412", 18x"182c2", 18x"18174", 18x"18026", 18x"17eda", 18x"17d8e",
266 18x"17c44", 18x"17afa", 18x"179b2", 18x"1786a", 18x"17724", 18x"175de", 18x"17498", 18x"17354",
267 18x"17210", 18x"170ce", 18x"16f8c", 18x"16e4c", 18x"16d0c", 18x"16bcc", 18x"16a8e", 18x"16950",
268 18x"16814", 18x"166d8", 18x"1659e", 18x"16464", 18x"1632a", 18x"161f2", 18x"160ba", 18x"15f84",
269 18x"15e4e", 18x"15d1a", 18x"15be6", 18x"15ab2", 18x"15980", 18x"1584e", 18x"1571c", 18x"155ec",
270 18x"154bc", 18x"1538e", 18x"15260", 18x"15134", 18x"15006", 18x"14edc", 18x"14db0", 18x"14c86",
271 18x"14b5e", 18x"14a36", 18x"1490e", 18x"147e6", 18x"146c0", 18x"1459a", 18x"14476", 18x"14352",
272 18x"14230", 18x"1410c", 18x"13fea", 18x"13eca", 18x"13daa", 18x"13c8a", 18x"13b6c", 18x"13a4e",
273 18x"13930", 18x"13814", 18x"136f8", 18x"135dc", 18x"134c2", 18x"133a8", 18x"1328e", 18x"13176",
274 18x"1305e", 18x"12f48", 18x"12e30", 18x"12d1a", 18x"12c06", 18x"12af2", 18x"129de", 18x"128ca",
275 18x"127b8", 18x"126a6", 18x"12596", 18x"12486", 18x"12376", 18x"12266", 18x"12158", 18x"1204a",
276 18x"11f3e", 18x"11e32", 18x"11d26", 18x"11c1a", 18x"11b10", 18x"11a06", 18x"118fc", 18x"117f4",
277 18x"116ec", 18x"115e4", 18x"114de", 18x"113d8", 18x"112d2", 18x"111ce", 18x"110ca", 18x"10fc6",
278 18x"10ec2", 18x"10dc0", 18x"10cbe", 18x"10bbc", 18x"10abc", 18x"109bc", 18x"108bc", 18x"107be",
279 18x"106c0", 18x"105c2", 18x"104c4", 18x"103c8", 18x"102cc", 18x"101d0", 18x"100d6", 18x"0ffdc",
280 18x"0fee2", 18x"0fdea", 18x"0fcf0", 18x"0fbf8", 18x"0fb02", 18x"0fa0a", 18x"0f914", 18x"0f81e",
281 18x"0f72a", 18x"0f636", 18x"0f542", 18x"0f44e", 18x"0f35a", 18x"0f268", 18x"0f176", 18x"0f086",
282 18x"0ef94", 18x"0eea4", 18x"0edb4", 18x"0ecc6", 18x"0ebd6", 18x"0eae8", 18x"0e9fa", 18x"0e90e",
283 18x"0e822", 18x"0e736", 18x"0e64a", 18x"0e55e", 18x"0e474", 18x"0e38a", 18x"0e2a0", 18x"0e1b8",
284 18x"0e0d0", 18x"0dfe8", 18x"0df00", 18x"0de1a", 18x"0dd32", 18x"0dc4c", 18x"0db68", 18x"0da82",
285 18x"0d99e", 18x"0d8ba", 18x"0d7d6", 18x"0d6f4", 18x"0d612", 18x"0d530", 18x"0d44e", 18x"0d36c",
286 18x"0d28c", 18x"0d1ac", 18x"0d0cc", 18x"0cfee", 18x"0cf0e", 18x"0ce30", 18x"0cd54", 18x"0cc76",
287 18x"0cb9a", 18x"0cabc", 18x"0c9e0", 18x"0c906", 18x"0c82a", 18x"0c750", 18x"0c676", 18x"0c59c",
288 18x"0c4c4", 18x"0c3ea", 18x"0c312", 18x"0c23a", 18x"0c164", 18x"0c08c", 18x"0bfb6", 18x"0bee0",
289 18x"0be0a", 18x"0bd36", 18x"0bc62", 18x"0bb8c", 18x"0baba", 18x"0b9e6", 18x"0b912", 18x"0b840",
290 18x"0b76e", 18x"0b69c", 18x"0b5cc", 18x"0b4fa", 18x"0b42a", 18x"0b35a", 18x"0b28a", 18x"0b1bc",
291 18x"0b0ee", 18x"0b01e", 18x"0af50", 18x"0ae84", 18x"0adb6", 18x"0acea", 18x"0ac1e", 18x"0ab52",
292 18x"0aa86", 18x"0a9bc", 18x"0a8f0", 18x"0a826", 18x"0a75c", 18x"0a694", 18x"0a5ca", 18x"0a502",
293 18x"0a43a", 18x"0a372", 18x"0a2aa", 18x"0a1e4", 18x"0a11c", 18x"0a056", 18x"09f90", 18x"09ecc",
295 18x"09e06", 18x"09d42", 18x"09c7e", 18x"09bba", 18x"09af6", 18x"09a32", 18x"09970", 18x"098ae",
296 18x"097ec", 18x"0972a", 18x"09668", 18x"095a8", 18x"094e8", 18x"09426", 18x"09368", 18x"092a8",
297 18x"091e8", 18x"0912a", 18x"0906c", 18x"08fae", 18x"08ef0", 18x"08e32", 18x"08d76", 18x"08cba",
298 18x"08bfe", 18x"08b42", 18x"08a86", 18x"089ca", 18x"08910", 18x"08856", 18x"0879c", 18x"086e2",
299 18x"08628", 18x"08570", 18x"084b6", 18x"083fe", 18x"08346", 18x"0828e", 18x"081d8", 18x"08120",
300 18x"0806a", 18x"07fb4", 18x"07efe", 18x"07e48", 18x"07d92", 18x"07cde", 18x"07c2a", 18x"07b76",
301 18x"07ac2", 18x"07a0e", 18x"0795a", 18x"078a8", 18x"077f4", 18x"07742", 18x"07690", 18x"075de",
302 18x"0752e", 18x"0747c", 18x"073cc", 18x"0731c", 18x"0726c", 18x"071bc", 18x"0710c", 18x"0705e",
303 18x"06fae", 18x"06f00", 18x"06e52", 18x"06da4", 18x"06cf6", 18x"06c4a", 18x"06b9c", 18x"06af0",
304 18x"06a44", 18x"06998", 18x"068ec", 18x"06840", 18x"06796", 18x"066ea", 18x"06640", 18x"06596",
305 18x"064ec", 18x"06442", 18x"0639a", 18x"062f0", 18x"06248", 18x"061a0", 18x"060f8", 18x"06050",
306 18x"05fa8", 18x"05f00", 18x"05e5a", 18x"05db4", 18x"05d0e", 18x"05c68", 18x"05bc2", 18x"05b1c",
307 18x"05a76", 18x"059d2", 18x"0592e", 18x"05888", 18x"057e4", 18x"05742", 18x"0569e", 18x"055fa",
308 18x"05558", 18x"054b6", 18x"05412", 18x"05370", 18x"052ce", 18x"0522e", 18x"0518c", 18x"050ec",
309 18x"0504a", 18x"04faa", 18x"04f0a", 18x"04e6a", 18x"04dca", 18x"04d2c", 18x"04c8c", 18x"04bee",
310 18x"04b50", 18x"04ab0", 18x"04a12", 18x"04976", 18x"048d8", 18x"0483a", 18x"0479e", 18x"04700",
311 18x"04664", 18x"045c8", 18x"0452c", 18x"04490", 18x"043f6", 18x"0435a", 18x"042c0", 18x"04226",
312 18x"0418a", 18x"040f0", 18x"04056", 18x"03fbe", 18x"03f24", 18x"03e8c", 18x"03df2", 18x"03d5a",
313 18x"03cc2", 18x"03c2a", 18x"03b92", 18x"03afa", 18x"03a62", 18x"039cc", 18x"03934", 18x"0389e",
314 18x"03808", 18x"03772", 18x"036dc", 18x"03646", 18x"035b2", 18x"0351c", 18x"03488", 18x"033f2",
315 18x"0335e", 18x"032ca", 18x"03236", 18x"031a2", 18x"03110", 18x"0307c", 18x"02fea", 18x"02f56",
316 18x"02ec4", 18x"02e32", 18x"02da0", 18x"02d0e", 18x"02c7c", 18x"02bec", 18x"02b5a", 18x"02aca",
317 18x"02a38", 18x"029a8", 18x"02918", 18x"02888", 18x"027f8", 18x"0276a", 18x"026da", 18x"0264a",
318 18x"025bc", 18x"0252e", 18x"024a0", 18x"02410", 18x"02384", 18x"022f6", 18x"02268", 18x"021da",
319 18x"0214e", 18x"020c0", 18x"02034", 18x"01fa8", 18x"01f1c", 18x"01e90", 18x"01e04", 18x"01d78",
320 18x"01cee", 18x"01c62", 18x"01bd8", 18x"01b4c", 18x"01ac2", 18x"01a38", 18x"019ae", 18x"01924",
321 18x"0189c", 18x"01812", 18x"01788", 18x"01700", 18x"01676", 18x"015ee", 18x"01566", 18x"014de",
322 18x"01456", 18x"013ce", 18x"01346", 18x"012c0", 18x"01238", 18x"011b2", 18x"0112c", 18x"010a4",
323 18x"0101e", 18x"00f98", 18x"00f12", 18x"00e8c", 18x"00e08", 18x"00d82", 18x"00cfe", 18x"00c78",
324 18x"00bf4", 18x"00b70", 18x"00aec", 18x"00a68", 18x"009e4", 18x"00960", 18x"008dc", 18x"00858",
325 18x"007d6", 18x"00752", 18x"006d0", 18x"0064e", 18x"005cc", 18x"0054a", 18x"004c8", 18x"00446",
326 18x"003c4", 18x"00342", 18x"002c2", 18x"00240", 18x"001c0", 18x"00140", 18x"000c0", 18x"00040"
329 -- Left and right shifter with 120 bit input and 64 bit output.
330 -- Shifts inp left by shift bits and returns the upper 64 bits of
331 -- the result. The shift parameter is interpreted as a signed
332 -- number in the range -64..63, with negative values indicating
334 function shifter_64(inp: std_ulogic_vector(119 downto 0);
335 shift: std_ulogic_vector(6 downto 0))
336 return std_ulogic_vector is
337 variable s1 : std_ulogic_vector(94 downto 0);
338 variable s2 : std_ulogic_vector(70 downto 0);
339 variable result : std_ulogic_vector(63 downto 0);
341 case shift(6 downto 5) is
343 s1 := inp(119 downto 25);
345 s1 := inp(87 downto 0) & "0000000";
347 s1 := x"0000000000000000" & inp(119 downto 89);
349 s1 := x"00000000" & inp(119 downto 57);
351 case shift(4 downto 3) is
353 s2 := s1(94 downto 24);
355 s2 := s1(86 downto 16);
357 s2 := s1(78 downto 8);
359 s2 := s1(70 downto 0);
361 case shift(2 downto 0) is
363 result := s2(70 downto 7);
365 result := s2(69 downto 6);
367 result := s2(68 downto 5);
369 result := s2(67 downto 4);
371 result := s2(66 downto 3);
373 result := s2(65 downto 2);
375 result := s2(64 downto 1);
377 result := s2(63 downto 0);
382 -- Generate a mask with 0-bits on the left and 1-bits on the right which
383 -- selects the bits will be lost in doing a right shift. The shift
384 -- parameter is the bottom 6 bits of a negative shift count,
385 -- indicating a right shift.
386 function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
387 variable result: std_ulogic_vector(63 downto 0);
389 result := (others => '0');
390 for i in 0 to 63 loop
392 result(63 - i) := '1';
398 -- Split a DP floating-point number into components and work out its class.
399 -- If is_int = 1, the input is considered an integer
400 function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_int: std_ulogic) return fpu_reg_type is
401 variable r : fpu_reg_type;
402 variable exp_nz : std_ulogic;
403 variable exp_ao : std_ulogic;
404 variable frac_nz : std_ulogic;
405 variable cls : std_ulogic_vector(2 downto 0);
407 r.negative := fpr(63);
408 exp_nz := or (fpr(62 downto 52));
409 exp_ao := and (fpr(62 downto 52));
410 frac_nz := or (fpr(51 downto 0));
412 r.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
414 r.exponent := to_signed(-1022, EXP_BITS);
416 r.mantissa := "000000000" & exp_nz & fpr(51 downto 0) & "00";
417 cls := exp_ao & exp_nz & frac_nz;
419 when "000" => r.class := ZERO;
420 when "001" => r.class := FINITE; -- denormalized
421 when "010" => r.class := FINITE;
422 when "011" => r.class := FINITE;
423 when "110" => r.class := INFINITY;
424 when others => r.class := NAN;
428 r.exponent := (others => '0');
429 if (fpr(63) or exp_nz or frac_nz) = '1' then
438 -- Construct a DP floating-point result from components
439 function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
440 mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic)
441 return std_ulogic_vector is
442 variable result : std_ulogic_vector(63 downto 0);
444 result := (others => '0');
449 if mantissa(54) = '1' then
451 result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
453 result(51 downto 29) := mantissa(53 downto 31);
454 if single_prec = '0' then
455 result(28 downto 0) := mantissa(30 downto 2);
458 result(62 downto 52) := "11111111111";
460 result(62 downto 52) := "11111111111";
461 result(51) := quieten_nan or mantissa(53);
462 result(50 downto 29) := mantissa(52 downto 31);
463 if single_prec = '0' then
464 result(28 downto 0) := mantissa(30 downto 2);
470 -- Determine whether to increment when rounding
471 -- Returns rounding_inc & inexact
472 -- Assumes x includes the bottom 29 bits of the mantissa already
473 -- if single_prec = 1 (usually arranged by setting set_x = 1 earlier).
474 function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
475 single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
477 return std_ulogic_vector is
478 variable grx : std_ulogic_vector(2 downto 0);
479 variable ret : std_ulogic_vector(1 downto 0);
480 variable lsb : std_ulogic;
482 if single_prec = '0' then
483 grx := mantissa(1 downto 0) & x;
486 grx := mantissa(30 downto 29) & x;
491 case rn(1 downto 0) is
492 when "00" => -- round to nearest
493 if grx = "100" and rn(2) = '0' then
494 ret(1) := lsb; -- tie, round to even
498 when "01" => -- round towards zero
499 when others => -- round towards +/- inf
501 -- round towards greater magnitude
508 -- Determine result flags to write into the FPSCR
509 function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
510 return std_ulogic_vector is
514 return sign & "0010";
516 return (not unitbit) & sign & (not sign) & "00";
518 return '0' & sign & (not sign) & "01";
525 fpu_multiply_0: entity work.multiply
528 m_in => f_to_multiply,
529 m_out => multiply_to_f
534 if rising_edge(clk) then
540 r.fpscr <= (others => '0');
541 r.writing_back <= '0';
543 assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
549 -- synchronous reads from lookup table
550 lut_access: process(clk)
551 variable addrhi : std_ulogic_vector(1 downto 0);
552 variable addr : std_ulogic_vector(9 downto 0);
554 if rising_edge(clk) then
555 if r.is_sqrt = '1' then
556 addrhi := r.b.mantissa(55 downto 54);
560 addr := addrhi & r.b.mantissa(53 downto 46);
561 inverse_est <= '1' & inverse_table(to_integer(unsigned(addr)));
565 e_out.busy <= r.busy;
566 e_out.exception <= r.fpscr(FPSCR_FEX);
567 e_out.interrupt <= r.do_intr;
569 w_out.valid <= r.instr_done and not r.do_intr;
570 w_out.write_enable <= r.writing_back;
571 w_out.write_reg <= r.dest_fpr;
572 w_out.write_data <= fp_result;
573 w_out.write_cr_enable <= r.instr_done and (r.rc or r.is_cmp);
574 w_out.write_cr_mask <= r.cr_mask;
575 w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
576 r.cr_result & r.cr_result & r.cr_result & r.cr_result;
579 variable v : reg_type;
580 variable adec : fpu_reg_type;
581 variable bdec : fpu_reg_type;
582 variable cdec : fpu_reg_type;
583 variable fpscr_mask : std_ulogic_vector(31 downto 0);
584 variable illegal : std_ulogic;
585 variable j, k : integer;
586 variable flm : std_ulogic_vector(7 downto 0);
587 variable int_input : std_ulogic;
588 variable mask : std_ulogic_vector(63 downto 0);
589 variable in_a0 : std_ulogic_vector(63 downto 0);
590 variable in_b0 : std_ulogic_vector(63 downto 0);
591 variable misc : std_ulogic_vector(63 downto 0);
592 variable shift_res : std_ulogic_vector(63 downto 0);
593 variable round : std_ulogic_vector(1 downto 0);
594 variable update_fx : std_ulogic;
595 variable arith_done : std_ulogic;
596 variable invalid : std_ulogic;
597 variable zero_divide : std_ulogic;
598 variable mant_nz : std_ulogic;
599 variable min_exp : signed(EXP_BITS-1 downto 0);
600 variable max_exp : signed(EXP_BITS-1 downto 0);
601 variable bias_exp : signed(EXP_BITS-1 downto 0);
602 variable new_exp : signed(EXP_BITS-1 downto 0);
603 variable exp_tiny : std_ulogic;
604 variable exp_huge : std_ulogic;
605 variable renormalize : std_ulogic;
606 variable clz : std_ulogic_vector(5 downto 0);
607 variable set_x : std_ulogic;
608 variable mshift : signed(EXP_BITS-1 downto 0);
609 variable need_check : std_ulogic;
610 variable msb : std_ulogic;
611 variable is_add : std_ulogic;
612 variable longmask : std_ulogic;
613 variable set_a : std_ulogic;
614 variable set_b : std_ulogic;
615 variable set_c : std_ulogic;
616 variable set_y : std_ulogic;
617 variable set_s : std_ulogic;
618 variable qnan_result : std_ulogic;
619 variable px_nz : std_ulogic;
620 variable pcmpb_eq : std_ulogic;
621 variable pcmpb_lt : std_ulogic;
622 variable pshift : std_ulogic;
623 variable renorm_sqrt : std_ulogic;
624 variable sqrt_exp : signed(EXP_BITS-1 downto 0);
625 variable shiftin : std_ulogic;
626 variable mulexp : signed(EXP_BITS-1 downto 0);
627 variable maddend : std_ulogic_vector(127 downto 0);
634 -- capture incoming instruction
635 if e_in.valid = '1' then
638 v.fe_mode := or (e_in.fe_mode);
639 v.dest_fpr := e_in.frt;
640 v.single_prec := e_in.single;
643 v.is_cmp := e_in.out_cr;
644 if e_in.out_cr = '0' then
645 v.cr_mask := num_to_fxm(1);
647 v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(e_in.insn))));
650 if e_in.op = OP_FPOP_I then
653 v.quieten_nan := '1';
656 v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
657 v.is_subtract := '0';
658 v.is_multiply := '0';
661 v.doing_ftdiv := "00";
663 adec := decode_dp(e_in.fra, int_input);
664 bdec := decode_dp(e_in.frb, int_input);
665 cdec := decode_dp(e_in.frc, int_input);
671 if adec.exponent > bdec.exponent then
675 if (adec.exponent + cdec.exponent + 1) >= bdec.exponent then
680 r_hi_nz <= or (r.r(55 downto 31));
681 r_lo_nz <= or (r.r(30 downto 2));
684 if r.single_prec = '0' then
685 if r.doing_ftdiv(1) = '0' then
686 max_exp := to_signed(1023, EXP_BITS);
688 max_exp := to_signed(1020, EXP_BITS);
690 if r.doing_ftdiv(0) = '0' then
691 min_exp := to_signed(-1022, EXP_BITS);
693 min_exp := to_signed(-1021, EXP_BITS);
695 bias_exp := to_signed(1536, EXP_BITS);
697 max_exp := to_signed(127, EXP_BITS);
698 min_exp := to_signed(-126, EXP_BITS);
699 bias_exp := to_signed(192, EXP_BITS);
701 new_exp := r.result_exp - r.shift;
704 if new_exp < min_exp then
707 if new_exp > max_exp then
711 -- Compare P with zero and with B
712 px_nz := or (r.p(57 downto 4));
714 if r.p(59 downto 4) = r.b.mantissa(55 downto 0) then
718 if unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(55 downto 0)) then
722 v.writing_back := '0';
724 v.update_fprf := '0';
725 v.shift := to_signed(0, EXP_BITS);
736 fpscr_mask := (others => '1');
744 longmask := r.single_prec;
749 f_to_multiply.is_32bit <= '0';
750 f_to_multiply.valid <= '0';
753 msel_add <= MULADD_ZERO;
761 if e_in.valid = '1' then
762 case e_in.insn(5 downto 1) is
764 if e_in.insn(8) = '1' then
765 if e_in.insn(6) = '0' then
768 v.state := DO_FTSQRT;
770 elsif e_in.insn(7) = '1' then
776 if e_in.insn(10) = '0' then
777 if e_in.insn(8) = '0' then
780 v.state := DO_MTFSFI;
786 if e_in.insn(8) = '0' then
792 if e_in.insn(9 downto 8) /= "11" then
800 if int_input = '1' then
807 v.round_mode := "001";
811 when "10100" | "10101" =>
821 v.is_multiply := '1';
825 v.state := DO_FRSQRTE;
826 when "11100" | "11101" | "11110" | "11111" =>
833 v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
837 j := to_integer(unsigned(insn_bfa(r.insn)));
841 v.cr_result := r.fpscr(k + 3 downto k);
842 fpscr_mask(k + 3 downto k) := "0000";
845 v.fpscr := r.fpscr and (fpscr_mask or x"6007F8FF");
852 v.cr_result := "0000";
853 if r.a.class = INFINITY or r.b.class = ZERO or r.b.class = INFINITY or
854 (r.b.class = FINITE and r.b.mantissa(53) = '0') then
855 v.cr_result(2) := '1';
857 if r.a.class = NAN or r.a.class = INFINITY or
858 r.b.class = NAN or r.b.class = ZERO or r.b.class = INFINITY or
859 (r.a.class = FINITE and r.a.exponent <= to_signed(-970, EXP_BITS)) then
860 v.cr_result(1) := '1';
862 v.doing_ftdiv := "11";
871 v.cr_result := "0000";
872 if r.b.class = ZERO or r.b.class = INFINITY or
873 (r.b.class = FINITE and r.b.mantissa(53) = '0') then
874 v.cr_result(2) := '1';
876 if r.b.class = NAN or r.b.class = INFINITY or r.b.class = ZERO
877 or r.b.negative = '1' or r.b.exponent <= to_signed(-970, EXP_BITS) then
878 v.cr_result(1) := '0';
888 v.result_exp := r.b.exponent;
889 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
890 (r.b.class = NAN and r.b.mantissa(53) = '0') then
892 v.fpscr(FPSCR_VXSNAN) := '1';
893 if r.insn(6) = '1' and r.fpscr(FPSCR_VE) = '0' then
894 v.fpscr(FPSCR_VXVC) := '1';
897 v.cr_result := "0001"; -- unordered
898 elsif r.a.class = NAN or r.b.class = NAN then
899 if r.insn(6) = '1' then
901 v.fpscr(FPSCR_VXVC) := '1';
904 v.cr_result := "0001"; -- unordered
905 elsif r.a.class = ZERO and r.b.class = ZERO then
906 v.cr_result := "0010"; -- equal
907 elsif r.a.negative /= r.b.negative then
908 v.cr_result := r.a.negative & r.b.negative & "00";
909 elsif r.a.class = ZERO then
910 -- A and B are the same sign from here down
911 v.cr_result := not r.b.negative & r.b.negative & "00";
912 elsif r.a.class = INFINITY then
913 if r.b.class = INFINITY then
914 v.cr_result := "0010";
916 v.cr_result := r.a.negative & not r.a.negative & "00";
918 elsif r.b.class = ZERO then
919 -- A is finite from here down
920 v.cr_result := r.a.negative & not r.a.negative & "00";
921 elsif r.b.class = INFINITY then
922 v.cr_result := not r.b.negative & r.b.negative & "00";
923 elsif r.exp_cmp = '1' then
924 -- A and B are both finite from here down
925 v.cr_result := r.a.negative & not r.a.negative & "00";
926 elsif r.a.exponent /= r.b.exponent then
927 -- A exponent is smaller than B
928 v.cr_result := not r.a.negative & r.a.negative & "00";
930 -- Prepare to subtract mantissas, put B in R
931 v.cr_result := "0000";
935 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
939 j := to_integer(unsigned(insn_bt(r.insn)));
940 for i in 0 to 31 loop
942 v.fpscr(31 - i) := r.insn(6);
950 j := to_integer(unsigned(insn_bf(r.insn)));
951 if r.insn(16) = '0' then
955 v.fpscr(k + 3 downto k) := insn_u(r.insn);
965 misc_sel <= "01" & r.insn(8) & '0';
967 v.writing_back := '1';
973 v.writing_back := '1';
975 case r.insn(20 downto 16) is
980 v.fpscr(FPSCR_VE downto FPSCR_XE) := "00000";
981 when "10100" | "10101" =>
982 -- mffscdrn[i] (but we don't implement DRN)
983 fpscr_mask := x"000000FF";
986 fpscr_mask := x"000000FF";
987 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
988 r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
991 fpscr_mask := x"000000FF";
992 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) := r.insn(12 downto 11);
995 fpscr_mask := x"0007F0FF";
1003 if r.insn(25) = '1' then
1005 elsif r.insn(16) = '1' then
1008 flm := r.insn(24 downto 17);
1010 for i in 0 to 7 loop
1012 if flm(i) = '1' then
1013 v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
1016 v.instr_done := '1';
1021 v.result_class := r.b.class;
1022 v.result_exp := r.b.exponent;
1023 v.quieten_nan := '0';
1024 if r.insn(9) = '1' then
1025 v.result_sign := '0'; -- fabs
1026 elsif r.insn(8) = '1' then
1027 v.result_sign := '1'; -- fnabs
1028 elsif r.insn(7) = '1' then
1029 v.result_sign := r.b.negative; -- fmr
1030 elsif r.insn(6) = '1' then
1031 v.result_sign := not r.b.negative; -- fneg
1033 v.result_sign := r.a.negative; -- fcpsgn
1035 v.writing_back := '1';
1036 v.instr_done := '1';
1039 when DO_FRI => -- fri[nzpm]
1041 v.result_class := r.b.class;
1042 v.result_sign := r.b.negative;
1043 v.result_exp := r.b.exponent;
1044 v.fpscr(FPSCR_FR) := '0';
1045 v.fpscr(FPSCR_FI) := '0';
1046 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1048 v.fpscr(FPSCR_VXSNAN) := '1';
1051 if r.b.class = FINITE then
1052 if r.b.exponent >= to_signed(52, EXP_BITS) then
1053 -- integer already, no rounding required
1056 v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1058 v.round_mode := '1' & r.insn(7 downto 6);
1066 v.result_class := r.b.class;
1067 v.result_sign := r.b.negative;
1068 v.result_exp := r.b.exponent;
1069 v.fpscr(FPSCR_FR) := '0';
1070 v.fpscr(FPSCR_FI) := '0';
1071 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1073 v.fpscr(FPSCR_VXSNAN) := '1';
1077 if r.b.class = FINITE then
1078 if r.b.exponent < to_signed(-126, EXP_BITS) then
1079 v.shift := r.b.exponent - to_signed(-126, EXP_BITS);
1080 v.state := ROUND_UFLOW;
1081 elsif r.b.exponent > to_signed(127, EXP_BITS) then
1082 v.state := ROUND_OFLOW;
1084 v.shift := to_signed(-2, EXP_BITS);
1085 v.state := ROUNDING;
1092 -- instr bit 9: 1=dword 0=word
1093 -- instr bit 8: 1=unsigned 0=signed
1094 -- instr bit 1: 1=round to zero 0=use fpscr[RN]
1096 v.result_class := r.b.class;
1097 v.result_sign := r.b.negative;
1098 v.result_exp := r.b.exponent;
1099 v.fpscr(FPSCR_FR) := '0';
1100 v.fpscr(FPSCR_FI) := '0';
1101 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1103 v.fpscr(FPSCR_VXSNAN) := '1';
1107 v.int_result := '1';
1112 if r.b.exponent >= to_signed(64, EXP_BITS) or
1113 (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
1114 v.state := INT_OFLOW;
1115 elsif r.b.exponent >= to_signed(52, EXP_BITS) then
1116 -- integer already, no rounding required,
1117 -- shift into final position
1118 v.shift := r.b.exponent - to_signed(54, EXP_BITS);
1119 if r.insn(8) = '1' and r.b.negative = '1' then
1120 v.state := INT_OFLOW;
1122 v.state := INT_ISHIFT;
1125 v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1126 v.state := INT_SHIFT;
1128 when INFINITY | NAN =>
1129 v.state := INT_OFLOW;
1133 v.result_sign := '0';
1135 if r.insn(8) = '0' and r.b.negative = '1' then
1136 -- fcfid[s] with negative operand, set R = -B
1139 v.result_sign := '1';
1141 v.result_class := r.b.class;
1142 v.result_exp := to_signed(54, EXP_BITS);
1143 v.fpscr(FPSCR_FR) := '0';
1144 v.fpscr(FPSCR_FI) := '0';
1145 if r.b.class = ZERO then
1152 -- fadd[s] and fsub[s]
1154 v.result_sign := r.a.negative;
1155 v.result_class := r.a.class;
1156 v.result_exp := r.a.exponent;
1157 v.fpscr(FPSCR_FR) := '0';
1158 v.fpscr(FPSCR_FI) := '0';
1159 is_add := r.a.negative xor r.b.negative xor r.insn(1);
1160 if r.a.class = FINITE and r.b.class = FINITE then
1161 v.is_subtract := not is_add;
1162 v.add_bsmall := r.exp_cmp;
1163 if r.exp_cmp = '0' then
1164 v.shift := r.a.exponent - r.b.exponent;
1165 v.result_sign := r.b.negative xnor r.insn(1);
1166 if r.a.exponent = r.b.exponent then
1169 v.state := ADD_SHIFT;
1173 v.shift := r.b.exponent - r.a.exponent;
1174 v.result_exp := r.b.exponent;
1175 v.state := ADD_SHIFT;
1178 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
1179 (r.b.class = NAN and r.b.mantissa(53) = '0') then
1181 v.fpscr(FPSCR_VXSNAN) := '1';
1184 if r.a.class = NAN then
1185 -- nothing to do, result is A
1186 elsif r.b.class = NAN then
1187 v.result_class := NAN;
1188 v.result_sign := r.b.negative;
1190 elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then
1191 -- invalid operation, construct QNaN
1192 v.fpscr(FPSCR_VXISI) := '1';
1194 elsif r.a.class = ZERO and r.b.class = ZERO and is_add = '0' then
1195 -- return -0 for rounding to -infinity
1196 v.result_sign := r.round_mode(1) and r.round_mode(0);
1197 elsif r.a.class = INFINITY or r.b.class = ZERO then
1198 -- nothing to do, result is A
1201 v.result_sign := r.b.negative xnor r.insn(1);
1202 v.result_class := r.b.class;
1203 v.result_exp := r.b.exponent;
1212 v.result_sign := r.a.negative;
1213 v.result_class := r.a.class;
1214 v.result_exp := r.a.exponent;
1215 v.fpscr(FPSCR_FR) := '0';
1216 v.fpscr(FPSCR_FI) := '0';
1217 if r.a.class = FINITE and r.c.class = FINITE then
1218 v.result_sign := r.a.negative xor r.c.negative;
1219 v.result_exp := r.a.exponent + r.c.exponent;
1220 -- Renormalize denorm operands
1221 if r.a.mantissa(54) = '0' then
1222 v.state := RENORM_A;
1223 elsif r.c.mantissa(54) = '0' then
1225 v.state := RENORM_C;
1227 f_to_multiply.valid <= '1';
1231 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
1232 (r.c.class = NAN and r.c.mantissa(53) = '0') then
1234 v.fpscr(FPSCR_VXSNAN) := '1';
1237 if r.a.class = NAN then
1239 elsif r.c.class = NAN then
1240 v.result_class := NAN;
1241 v.result_sign := r.c.negative;
1243 elsif (r.a.class = INFINITY and r.c.class = ZERO) or
1244 (r.a.class = ZERO and r.c.class = INFINITY) then
1245 -- invalid operation, construct QNaN
1246 v.fpscr(FPSCR_VXIMZ) := '1';
1248 elsif r.a.class = ZERO or r.a.class = INFINITY then
1250 v.result_sign := r.a.negative xor r.c.negative;
1252 -- r.c.class is ZERO or INFINITY
1253 v.result_class := r.c.class;
1254 v.result_sign := r.a.negative xor r.c.negative;
1261 v.result_sign := r.a.negative;
1262 v.result_class := r.a.class;
1263 v.result_exp := r.a.exponent;
1264 v.fpscr(FPSCR_FR) := '0';
1265 v.fpscr(FPSCR_FI) := '0';
1266 v.result_sign := r.a.negative xor r.b.negative;
1267 v.result_exp := r.a.exponent - r.b.exponent;
1269 if r.a.class = FINITE and r.b.class = FINITE then
1270 -- Renormalize denorm operands
1271 if r.a.mantissa(54) = '0' then
1272 v.state := RENORM_A;
1273 elsif r.b.mantissa(54) = '0' then
1275 v.state := RENORM_B;
1281 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
1282 (r.b.class = NAN and r.b.mantissa(53) = '0') then
1284 v.fpscr(FPSCR_VXSNAN) := '1';
1287 if r.a.class = NAN then
1289 v.result_sign := r.a.negative;
1290 elsif r.b.class = NAN then
1291 v.result_class := NAN;
1292 v.result_sign := r.b.negative;
1294 elsif r.b.class = INFINITY then
1295 if r.a.class = INFINITY then
1296 v.fpscr(FPSCR_VXIDI) := '1';
1299 v.result_class := ZERO;
1301 elsif r.b.class = ZERO then
1302 if r.a.class = ZERO then
1303 v.fpscr(FPSCR_VXZDZ) := '1';
1306 if r.a.class = FINITE then
1309 v.result_class := INFINITY;
1311 -- else r.b.class = FINITE, result_class = r.a.class
1318 v.fpscr(FPSCR_FR) := '0';
1319 v.fpscr(FPSCR_FI) := '0';
1320 if r.a.class = ZERO or (r.a.negative = '0' and r.a.class /= NAN) then
1321 v.result_sign := r.c.negative;
1322 v.result_exp := r.c.exponent;
1323 v.result_class := r.c.class;
1326 v.result_sign := r.b.negative;
1327 v.result_exp := r.b.exponent;
1328 v.result_class := r.b.class;
1331 v.quieten_nan := '0';
1336 v.result_class := r.b.class;
1337 v.result_sign := r.b.negative;
1338 v.fpscr(FPSCR_FR) := '0';
1339 v.fpscr(FPSCR_FI) := '0';
1340 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1341 v.fpscr(FPSCR_VXSNAN) := '1';
1346 v.result_exp := r.b.exponent;
1347 if r.b.negative = '1' then
1348 v.fpscr(FPSCR_VXSQRT) := '1';
1351 elsif r.b.mantissa(54) = '0' then
1352 v.state := RENORM_B;
1353 elsif r.b.exponent(0) = '0' then
1356 v.shift := to_signed(1, EXP_BITS);
1357 v.state := RENORM_B2;
1363 if r.b.negative = '1' then
1364 v.fpscr(FPSCR_VXSQRT) := '1';
1373 v.result_class := r.b.class;
1374 v.result_sign := r.b.negative;
1375 v.fpscr(FPSCR_FR) := '0';
1376 v.fpscr(FPSCR_FI) := '0';
1377 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1378 v.fpscr(FPSCR_VXSNAN) := '1';
1383 v.result_exp := - r.b.exponent;
1384 if r.b.mantissa(54) = '0' then
1385 v.state := RENORM_B;
1393 v.result_class := ZERO;
1396 v.result_class := INFINITY;
1403 v.result_class := r.b.class;
1404 v.result_sign := r.b.negative;
1405 v.fpscr(FPSCR_FR) := '0';
1406 v.fpscr(FPSCR_FI) := '0';
1407 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1408 v.fpscr(FPSCR_VXSNAN) := '1';
1411 v.shift := to_signed(1, EXP_BITS);
1414 v.result_exp := r.b.exponent;
1415 if r.b.negative = '1' then
1416 v.fpscr(FPSCR_VXSQRT) := '1';
1419 elsif r.b.mantissa(54) = '0' then
1420 v.state := RENORM_B;
1421 elsif r.b.exponent(0) = '0' then
1424 v.state := RENORM_B2;
1430 if r.b.negative = '1' then
1431 v.fpscr(FPSCR_VXSQRT) := '1';
1434 v.result_class := ZERO;
1438 v.result_class := INFINITY;
1444 -- fmadd, fmsub, fnmadd, fnmsub
1446 v.result_sign := r.a.negative;
1447 v.result_class := r.a.class;
1448 v.result_exp := r.a.exponent;
1449 v.fpscr(FPSCR_FR) := '0';
1450 v.fpscr(FPSCR_FI) := '0';
1451 is_add := r.a.negative xor r.c.negative xor r.b.negative xor r.insn(1);
1452 if r.a.class = FINITE and r.c.class = FINITE and
1453 (r.b.class = FINITE or r.b.class = ZERO) then
1454 v.is_subtract := not is_add;
1455 mulexp := r.a.exponent + r.c.exponent;
1456 v.result_exp := mulexp;
1458 -- Make sure A and C are normalized
1459 if r.a.mantissa(54) = '0' then
1461 v.state := RENORM_A;
1462 elsif r.c.mantissa(54) = '0' then
1464 v.state := RENORM_C;
1465 elsif r.b.class = ZERO then
1466 -- no addend, degenerates to multiply
1467 v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1468 f_to_multiply.valid <= '1';
1469 v.is_multiply := '1';
1471 elsif r.madd_cmp = '0' then
1472 -- addend is bigger, do multiply first
1473 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1474 f_to_multiply.valid <= '1';
1477 -- product is bigger, shift B right and use it as the
1478 -- addend to the multiplier
1479 v.shift := r.b.exponent - mulexp + to_signed(64, EXP_BITS);
1480 -- for subtract, multiplier does B - A * C
1481 v.result_sign := not (r.a.negative xor r.c.negative xor r.insn(2) xor is_add);
1482 v.result_exp := r.b.exponent;
1486 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
1487 (r.b.class = NAN and r.b.mantissa(53) = '0') or
1488 (r.c.class = NAN and r.c.mantissa(53) = '0') then
1490 v.fpscr(FPSCR_VXSNAN) := '1';
1493 if r.a.class = NAN then
1494 -- nothing to do, result is A
1495 elsif r.b.class = NAN then
1497 v.result_class := NAN;
1498 v.result_sign := r.b.negative;
1500 elsif r.c.class = NAN then
1502 v.result_class := NAN;
1503 v.result_sign := r.c.negative;
1505 elsif (r.a.class = ZERO and r.c.class = INFINITY) or
1506 (r.a.class = INFINITY and r.c.class = ZERO) then
1507 -- invalid operation, construct QNaN
1508 v.fpscr(FPSCR_VXIMZ) := '1';
1510 elsif r.a.class = INFINITY or r.c.class = INFINITY then
1511 if r.b.class = INFINITY and is_add = '0' then
1512 -- invalid operation, construct QNaN
1513 v.fpscr(FPSCR_VXISI) := '1';
1516 -- result is infinity
1517 v.result_class := INFINITY;
1518 v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1521 -- Here A is zero, C is zero, or B is infinity
1522 -- Result is +/-B in all of those cases
1523 v.result_class := r.b.class;
1524 v.result_exp := r.b.exponent;
1525 if v.result_class /= ZERO or is_add = '1' then
1526 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1528 -- have to be careful about rule for 0 - 0 result sign
1529 v.result_sign := (r.round_mode(1) and r.round_mode(0)) xor r.insn(2);
1538 v.state := RENORM_A2;
1542 v.result_exp := new_exp;
1543 if r.insn(4) = '1' then
1545 if r.c.mantissa(54) = '1' then
1546 if r.insn(3) = '0' or r.b.class = ZERO then
1551 if new_exp + 1 >= r.b.exponent then
1554 v.state := DO_FMADD;
1557 v.state := RENORM_C;
1561 if r.b.mantissa(54) = '1' then
1565 v.state := RENORM_B;
1571 renorm_sqrt := r.is_sqrt;
1572 v.state := RENORM_B2;
1576 if r.is_sqrt = '0' then
1577 v.result_exp := r.result_exp + r.shift;
1579 v.result_exp := new_exp;
1585 v.state := RENORM_C2;
1589 v.result_exp := new_exp;
1590 if r.insn(3) = '0' or r.b.class = ZERO then
1595 if new_exp + 1 >= r.b.exponent then
1598 v.state := DO_FMADD;
1602 -- r.shift = - exponent difference
1603 opsel_r <= RES_SHIFT;
1610 if r.add_bsmall = '1' then
1616 opsel_binv <= r.is_subtract;
1617 carry_in <= r.is_subtract and not r.x;
1618 v.shift := to_signed(-1, EXP_BITS);
1622 -- check for overflow or negative result (can't get both)
1624 if r.r(63) = '1' then
1625 -- result is opposite sign to expected
1626 v.result_sign := not r.result_sign;
1630 elsif r.r(55) = '1' then
1631 -- sum overflowed, shift right
1632 opsel_r <= RES_SHIFT;
1634 v.shift := to_signed(-2, EXP_BITS);
1635 if exp_huge = '1' then
1636 v.state := ROUND_OFLOW;
1638 v.state := ROUNDING;
1640 elsif r.r(54) = '1' then
1642 v.shift := to_signed(-2, EXP_BITS);
1643 v.state := ROUNDING;
1644 elsif (r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1645 -- r.x must be zero at this point
1646 v.result_class := ZERO;
1647 if r.is_subtract = '1' then
1648 -- set result sign depending on rounding mode
1649 v.result_sign := r.round_mode(1) and r.round_mode(0);
1654 v.state := NORMALIZE;
1665 if r.r(63) = '1' then
1666 -- A is smaller in magnitude
1667 v.cr_result := not r.a.negative & r.a.negative & "00";
1668 elsif (r_hi_nz or r_lo_nz) = '0' then
1669 v.cr_result := "0010";
1671 v.cr_result := r.a.negative & not r.a.negative & "00";
1673 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
1674 v.instr_done := '1';
1678 f_to_multiply.valid <= r.first;
1679 opsel_r <= RES_MULT;
1680 if multiply_to_f.valid = '1' then
1685 -- Addend is bigger here
1686 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1687 -- note v.shift is at most -2 here
1688 v.shift := r.result_exp - r.b.exponent;
1689 opsel_r <= RES_MULT;
1692 f_to_multiply.valid <= r.first;
1693 if multiply_to_f.valid = '1' then
1694 v.state := ADD_SHIFT;
1698 -- Product is potentially bigger here
1699 -- r.shift = addend exp - product exp + 64
1702 v.shift := r.shift - to_signed(64, EXP_BITS);
1706 -- r.shift = addend exp - product exp
1707 opsel_r <= RES_SHIFT;
1712 msel_add <= MULADD_RS;
1713 f_to_multiply.valid <= r.first;
1714 msel_inv <= r.is_subtract;
1715 opsel_r <= RES_MULT;
1718 v.shift := to_signed(56, EXP_BITS);
1719 if multiply_to_f.valid = '1' then
1720 if multiply_to_f.result(121) = '1' then
1729 v.result_sign := not r.result_sign;
1731 carry_in <= not (s_nz or r.x);
1734 v.shift := to_signed(56, EXP_BITS);
1738 -- r.shift = 56 (or 0, but only if r is now nonzero)
1739 if (r.r(56) or r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1741 -- must be a subtraction, and r.x must be zero
1742 v.result_class := ZERO;
1743 v.result_sign := r.round_mode(1) and r.round_mode(0);
1746 -- R is all zeroes but there are non-zero bits in S
1747 -- so shift them into R and set S to 0
1748 opsel_r <= RES_SHIFT;
1750 -- stay in state FMADD_6
1752 elsif r.r(56 downto 54) = "001" then
1756 v.state := NORMALIZE;
1761 -- wait one cycle for inverse_table[B] lookup
1763 if r.insn(4) = '0' then
1764 if r.insn(3) = '0' then
1769 elsif r.insn(2) = '0' then
1776 -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y
1778 msel_add <= MULADD_CONST;
1787 f_to_multiply.valid <= r.first;
1788 if multiply_to_f.valid = '1' then
1790 v.count := r.count + 1;
1795 -- compute Y = P = P * Y
1798 f_to_multiply.valid <= r.first;
1800 if multiply_to_f.valid = '1' then
1810 -- compute R = P = A * Y (quotient)
1814 f_to_multiply.valid <= r.first;
1816 if multiply_to_f.valid = '1' then
1817 opsel_r <= RES_MULT;
1823 -- compute P = A - B * R (remainder)
1826 msel_add <= MULADD_A;
1828 f_to_multiply.valid <= r.first;
1829 if multiply_to_f.valid = '1' then
1834 -- test if remainder is 0 or >= B
1835 if pcmpb_lt = '1' then
1836 -- quotient is correct, set X if remainder non-zero
1837 v.x := r.p(58) or px_nz;
1839 -- quotient needs to be incremented by 1
1841 v.x := not pcmpb_eq;
1846 opsel_r <= RES_MISC;
1848 v.shift := to_signed(1, EXP_BITS);
1849 v.state := NORMALIZE;
1852 v.cr_result(1) := exp_tiny or exp_huge;
1853 if exp_tiny = '1' or exp_huge = '1' or r.a.class = ZERO or r.first = '0' then
1854 v.instr_done := '1';
1857 v.shift := r.a.exponent;
1858 v.doing_ftdiv := "10";
1862 opsel_r <= RES_MISC;
1864 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1865 v.result_exp := - sqrt_exp;
1866 v.shift := to_signed(1, EXP_BITS);
1867 v.state := NORMALIZE;
1870 -- put invsqr[B] in R and compute P = invsqr[B] * B
1871 -- also transfer B (in R) to A
1873 opsel_r <= RES_MISC;
1877 f_to_multiply.valid <= '1';
1878 v.shift := to_signed(-1, EXP_BITS);
1883 -- shift R right one place
1884 -- not expecting multiplier result yet
1886 opsel_r <= RES_SHIFT;
1891 -- put R into Y, wait for product from multiplier
1895 if multiply_to_f.valid = '1' then
1896 -- put result into R
1897 opsel_r <= RES_MULT;
1903 -- compute 1.5 - Y * P
1906 msel_add <= MULADD_CONST;
1908 f_to_multiply.valid <= r.first;
1910 if multiply_to_f.valid = '1' then
1915 -- compute Y = Y * P
1918 f_to_multiply.valid <= '1';
1923 -- pipeline in R = R * P
1926 f_to_multiply.valid <= r.first;
1928 if multiply_to_f.valid = '1' then
1934 -- first multiply is done, put result in Y
1937 -- wait for second multiply (should be here already)
1939 if multiply_to_f.valid = '1' then
1940 -- put result into R
1941 opsel_r <= RES_MULT;
1943 v.count := r.count + 1;
1953 -- compute P = A - R * R, which can be +ve or -ve
1954 -- we arranged for B to be put into A earlier
1957 msel_add <= MULADD_A;
1960 f_to_multiply.valid <= r.first;
1961 if multiply_to_f.valid = '1' then
1967 -- compute P = P * Y
1968 -- since Y is an estimate of 1/sqrt(B), this makes P an
1969 -- estimate of the adjustment needed to R. Since the error
1970 -- could be negative and we have an unsigned multiplier, the
1971 -- upper bits can be wrong, but it turns out the lowest 8 bits
1972 -- are correct and are all we need (given 3 iterations through
1973 -- SQRT_4 to SQRT_7).
1977 f_to_multiply.valid <= r.first;
1978 if multiply_to_f.valid = '1' then
1983 -- Add the bottom 8 bits of P, sign-extended,
1984 -- divided by 4, onto R.
1985 -- The division by 4 is because R is 10.54 format
1986 -- whereas P is 8.56 format.
1988 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1989 v.result_exp := sqrt_exp;
1990 v.shift := to_signed(1, EXP_BITS);
1995 -- compute P = A - R * R (remainder)
1996 -- also put 2 * R + 1 into B for comparison with P
1999 msel_add <= MULADD_A;
2001 f_to_multiply.valid <= r.first;
2004 if multiply_to_f.valid = '1' then
2009 -- test if remainder is 0 or >= B = 2*R + 1
2010 if pcmpb_lt = '1' then
2011 -- square root is correct, set X if remainder non-zero
2012 v.x := r.p(58) or px_nz;
2014 -- square root needs to be incremented by 1
2016 v.x := not pcmpb_eq;
2021 -- r.shift = b.exponent - 52
2022 opsel_r <= RES_SHIFT;
2024 v.state := INT_ROUND;
2025 v.shift := to_signed(-2, EXP_BITS);
2029 opsel_r <= RES_SHIFT;
2030 round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
2031 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2032 -- Check for negative values that don't round to 0 for fcti*u*
2033 if r.insn(8) = '1' and r.result_sign = '1' and
2034 (r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then
2035 v.state := INT_OFLOW;
2037 v.state := INT_FINAL;
2041 -- r.shift = b.exponent - 54;
2042 opsel_r <= RES_SHIFT;
2043 v.state := INT_FINAL;
2046 -- Negate if necessary, and increment for rounding if needed
2047 opsel_ainv <= r.result_sign;
2048 carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign;
2049 -- Check for possible overflows
2050 case r.insn(9 downto 8) is
2051 when "00" => -- fctiw[z]
2052 need_check := r.r(31) or (r.r(30) and not r.result_sign);
2053 when "01" => -- fctiwu[z]
2054 need_check := r.r(31);
2055 when "10" => -- fctid[z]
2056 need_check := r.r(63) or (r.r(62) and not r.result_sign);
2057 when others => -- fctidu[z]
2058 need_check := r.r(63);
2060 if need_check = '1' then
2061 v.state := INT_CHECK;
2063 if r.fpscr(FPSCR_FI) = '1' then
2064 v.fpscr(FPSCR_XX) := '1';
2070 if r.insn(9) = '0' then
2075 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2076 if (r.insn(8) = '0' and msb /= r.result_sign) or
2077 (r.insn(8) = '1' and msb /= '1') then
2078 opsel_r <= RES_MISC;
2079 v.fpscr(FPSCR_VXCVI) := '1';
2082 if r.fpscr(FPSCR_FI) = '1' then
2083 v.fpscr(FPSCR_XX) := '1';
2089 opsel_r <= RES_MISC;
2090 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2091 if r.b.class = NAN then
2094 v.fpscr(FPSCR_VXCVI) := '1';
2099 -- r.shift = b.exponent - 52
2100 opsel_r <= RES_SHIFT;
2102 v.shift := to_signed(-2, EXP_BITS);
2103 v.state := ROUNDING;
2106 if r.is_multiply = '1' and px_nz = '1' then
2109 if r.r(63 downto 54) /= "0000000001" then
2111 v.state := NORMALIZE;
2114 if exp_tiny = '1' then
2115 v.shift := new_exp - min_exp;
2116 v.state := ROUND_UFLOW;
2117 elsif exp_huge = '1' then
2118 v.state := ROUND_OFLOW;
2120 v.shift := to_signed(-2, EXP_BITS);
2121 v.state := ROUNDING;
2126 -- Shift so we have 9 leading zeroes (we know R is non-zero)
2127 -- r.shift = clz(r.r) - 9
2128 opsel_r <= RES_SHIFT;
2130 if exp_tiny = '1' then
2131 v.shift := new_exp - min_exp;
2132 v.state := ROUND_UFLOW;
2133 elsif exp_huge = '1' then
2134 v.state := ROUND_OFLOW;
2136 v.shift := to_signed(-2, EXP_BITS);
2137 v.state := ROUNDING;
2141 -- r.shift = - amount by which exponent underflows
2143 if r.fpscr(FPSCR_UE) = '0' then
2144 -- disabled underflow exception case
2145 -- have to denormalize before rounding
2146 opsel_r <= RES_SHIFT;
2148 v.shift := to_signed(-2, EXP_BITS);
2149 v.state := ROUNDING;
2151 -- enabled underflow exception case
2152 -- if denormalized, have to normalize before rounding
2153 v.fpscr(FPSCR_UX) := '1';
2154 v.result_exp := r.result_exp + bias_exp;
2155 if r.r(54) = '0' then
2157 v.state := NORMALIZE;
2159 v.shift := to_signed(-2, EXP_BITS);
2160 v.state := ROUNDING;
2165 v.fpscr(FPSCR_OX) := '1';
2166 if r.fpscr(FPSCR_OE) = '0' then
2167 -- disabled overflow exception
2168 -- result depends on rounding mode
2169 v.fpscr(FPSCR_XX) := '1';
2170 v.fpscr(FPSCR_FI) := '1';
2171 if r.round_mode(1 downto 0) = "00" or
2172 (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then
2173 v.result_class := INFINITY;
2174 v.fpscr(FPSCR_FR) := '1';
2176 v.fpscr(FPSCR_FR) := '0';
2178 -- construct largest representable number
2179 v.result_exp := max_exp;
2180 opsel_r <= RES_MISC;
2181 misc_sel <= "001" & r.single_prec;
2184 -- enabled overflow exception
2185 v.result_exp := r.result_exp - bias_exp;
2186 v.shift := to_signed(-2, EXP_BITS);
2187 v.state := ROUNDING;
2192 round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
2193 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2194 if round(1) = '1' then
2195 -- set mask to increment the LSB for the precision
2196 opsel_b <= BIN_MASK;
2198 v.shift := to_signed(-1, EXP_BITS);
2199 v.state := ROUNDING_2;
2201 if r.r(54) = '0' then
2202 -- result after masking could be zero, or could be a
2203 -- denormalized result that needs to be renormalized
2205 v.state := ROUNDING_3;
2210 if round(0) = '1' then
2211 v.fpscr(FPSCR_XX) := '1';
2212 if r.tiny = '1' then
2213 v.fpscr(FPSCR_UX) := '1';
2218 -- Check for overflow during rounding
2221 if r.r(55) = '1' then
2222 opsel_r <= RES_SHIFT;
2223 if exp_huge = '1' then
2224 v.state := ROUND_OFLOW;
2228 elsif r.r(54) = '0' then
2229 -- Do CLZ so we can renormalize the result
2231 v.state := ROUNDING_3;
2237 -- r.shift = clz(r.r) - 9
2238 mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
2239 if mant_nz = '0' then
2240 v.result_class := ZERO;
2241 if r.is_subtract = '1' then
2242 -- set result sign depending on rounding mode
2243 v.result_sign := r.round_mode(1) and r.round_mode(0);
2247 -- Renormalize result after rounding
2248 opsel_r <= RES_SHIFT;
2249 v.denorm := exp_tiny;
2250 v.shift := new_exp - to_signed(-1022, EXP_BITS);
2251 if new_exp < to_signed(-1022, EXP_BITS) then
2259 -- r.shift = result_exp - -1022
2260 opsel_r <= RES_SHIFT;
2265 if zero_divide = '1' then
2266 v.fpscr(FPSCR_ZX) := '1';
2268 if qnan_result = '1' then
2270 v.result_class := NAN;
2271 v.result_sign := '0';
2273 opsel_r <= RES_MISC;
2275 if arith_done = '1' then
2276 -- Enabled invalid exception doesn't write result or FPRF
2277 -- Neither does enabled zero-divide exception
2278 if (invalid and r.fpscr(FPSCR_VE)) = '0' and
2279 (zero_divide and r.fpscr(FPSCR_ZE)) = '0' then
2280 v.writing_back := '1';
2281 v.update_fprf := '1';
2283 v.instr_done := '1';
2288 -- Multiplier and divide/square root data path
2291 f_to_multiply.data1 <= r.a.mantissa(61 downto 0) & "00";
2293 f_to_multiply.data1 <= r.b.mantissa(61 downto 0) & "00";
2295 f_to_multiply.data1 <= r.y;
2297 f_to_multiply.data1 <= r.r(61 downto 0) & "00";
2301 f_to_multiply.data2 <= r.c.mantissa(61 downto 0) & "00";
2303 f_to_multiply.data2 <= x"00" & inverse_est & '0' & x"000000000";
2305 f_to_multiply.data2 <= r.p;
2307 f_to_multiply.data2 <= r.r(61 downto 0) & "00";
2309 maddend := (others => '0');
2311 when MULADD_CONST =>
2312 -- addend is 2.0 or 1.5 in 16.112 format
2313 if r.is_sqrt = '0' then
2314 maddend(113) := '1'; -- 2.0
2316 maddend(112 downto 111) := "11"; -- 1.5
2319 -- addend is A in 16.112 format
2320 maddend(121 downto 58) := r.a.mantissa;
2322 -- addend is concatenation of R and S in 16.112 format
2323 maddend := "000000" & r.r & r.s & "00";
2326 if msel_inv = '1' then
2327 f_to_multiply.addend <= not maddend;
2329 f_to_multiply.addend <= maddend;
2331 f_to_multiply.not_result <= msel_inv;
2333 v.y := f_to_multiply.data2;
2335 if multiply_to_f.valid = '1' then
2336 if pshift = '0' then
2337 v.p := multiply_to_f.result(63 downto 0);
2339 v.p := multiply_to_f.result(119 downto 56);
2344 -- This has A and B input multiplexers, an adder, a shifter,
2345 -- count-leading-zeroes logic, and a result mux.
2346 if longmask = '1' then
2347 mshift := r.shift + to_signed(-29, EXP_BITS);
2351 if mshift < to_signed(-64, EXP_BITS) then
2352 mask := (others => '1');
2353 elsif mshift >= to_signed(0, EXP_BITS) then
2354 mask := (others => '0');
2356 mask := right_mask(unsigned(mshift(5 downto 0)));
2362 in_a0 := r.a.mantissa;
2364 in_a0 := r.b.mantissa;
2366 in_a0 := r.c.mantissa;
2368 if (or (mask and in_a0)) = '1' and set_x = '1' then
2371 if opsel_ainv = '1' then
2374 if opsel_amask = '1' then
2375 in_a0 := in_a0 and not mask;
2380 in_b0 := (others => '0');
2386 -- BIN_PS6, 6 LSBs of P/4 sign-extended to 64
2387 in_b0 := std_ulogic_vector(resize(signed(r.p(7 downto 2)), 64));
2389 if opsel_binv = '1' then
2393 if r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
2394 shift_res := shifter_64(r.r & (shiftin or r.s(55)) & r.s(54 downto 0),
2395 std_ulogic_vector(r.shift(6 downto 0)));
2397 shift_res := (others => '0');
2401 result <= std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
2403 result <= shift_res;
2405 result <= multiply_to_f.result(121 downto 58);
2409 misc := x"00000000" & (r.fpscr and fpscr_mask);
2411 -- generated QNaN mantissa
2412 misc := x"0020000000000000";
2414 -- mantissa of max representable DP number
2415 misc := x"007ffffffffffffc";
2417 -- mantissa of max representable SP number
2418 misc := x"007fffff80000000";
2421 misc := r.a.mantissa(31 downto 0) & r.b.mantissa(31 downto 0);
2424 misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32);
2426 misc := 10x"000" & inverse_est & 35x"000000000";
2428 -- max positive result for fctiw[z]
2429 misc := x"000000007fffffff";
2431 -- max negative result for fctiw[z]
2432 misc := x"ffffffff80000000";
2434 -- max positive result for fctiwu[z]
2435 misc := x"00000000ffffffff";
2437 -- max negative result for fctiwu[z]
2438 misc := x"0000000000000000";
2440 -- max positive result for fctid[z]
2441 misc := x"7fffffffffffffff";
2443 -- max negative result for fctid[z]
2444 misc := x"8000000000000000";
2446 -- max positive result for fctidu[z]
2447 misc := x"ffffffffffffffff";
2449 -- max negative result for fctidu[z]
2450 misc := x"0000000000000000";
2452 misc := x"0000000000000000";
2460 v.s := std_ulogic_vector(unsigned(not r.s) + (not r.x));
2462 v.s := multiply_to_f.result(57 downto 2);
2464 v.s := shift_res(63 downto 8);
2465 if shift_res(7 downto 0) /= x"00" then
2469 v.s := (others => '0');
2474 v.a.exponent := new_exp;
2475 v.a.mantissa := shift_res;
2478 v.b.exponent := new_exp;
2479 v.b.mantissa := shift_res;
2482 v.c.exponent := new_exp;
2483 v.c.mantissa := shift_res;
2486 if opsel_r = RES_SHIFT then
2487 v.result_exp := new_exp;
2490 if renormalize = '1' then
2491 clz := count_left_zeroes(r.r);
2492 if renorm_sqrt = '1' then
2493 -- make denormalized value end up with even exponent
2496 v.shift := resize(signed('0' & clz) - 9, EXP_BITS);
2499 if r.int_result = '1' then
2502 fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r,
2503 r.single_prec, r.quieten_nan);
2505 if r.update_fprf = '1' then
2506 v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class,
2507 r.r(54) and not r.denorm);
2510 v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
2511 (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
2512 v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
2513 v.fpscr(FPSCR_VE downto FPSCR_XE));
2514 if update_fx = '1' and
2515 (v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then
2516 v.fpscr(FPSCR_FX) := '1';
2519 v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
2522 if illegal = '1' then
2523 v.instr_done := '0';
2525 v.writing_back := '0';
2529 v.do_intr := v.instr_done and v.fpscr(FPSCR_FEX) and r.fe_mode;
2530 if v.state /= IDLE or v.do_intr = '1' then
2536 e_out.illegal <= illegal;
2539 end architecture behaviour;