1 -- Floating-point unit for Microwatt
4 use ieee.std_logic_1164.all;
5 use ieee.numeric_std.all;
8 use work.insn_helpers.all;
9 use work.decode_types.all;
10 use work.crhelpers.all;
19 e_in : in Execute1toFPUType;
20 e_out : out FPUToExecute1Type;
22 w_out : out FPUToWritebackType
26 architecture behaviour of fpu is
27 type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
29 constant EXP_BITS : natural := 13;
31 type fpu_reg_type is record
32 class : fp_number_class;
33 negative : std_ulogic;
34 exponent : signed(EXP_BITS-1 downto 0); -- unbiased
35 mantissa : std_ulogic_vector(63 downto 0); -- 10.54 format
38 type state_t is (IDLE,
39 DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
40 DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT,
43 DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD,
47 ADD_1, ADD_SHIFT, ADD_2, ADD_3,
50 FMADD_1, FMADD_2, FMADD_3,
51 FMADD_4, FMADD_5, FMADD_6,
53 DIV_2, DIV_3, DIV_4, DIV_5, DIV_6,
57 SQRT_1, SQRT_2, SQRT_3, SQRT_4,
58 SQRT_5, SQRT_6, SQRT_7, SQRT_8,
59 SQRT_9, SQRT_10, SQRT_11, SQRT_12,
60 INT_SHIFT, INT_ROUND, INT_ISHIFT,
61 INT_FINAL, INT_CHECK, INT_OFLOW,
63 ROUND_UFLOW, ROUND_OFLOW,
64 ROUNDING, ROUNDING_2, ROUNDING_3,
69 NAN_RESULT, EXC_RESULT);
71 type reg_type is record
74 instr_done : std_ulogic;
77 insn : std_ulogic_vector(31 downto 0);
78 instr_tag : instr_tag_t;
79 dest_fpr : gspr_index_t;
83 single_prec : std_ulogic;
84 fpscr : std_ulogic_vector(31 downto 0);
88 r : std_ulogic_vector(63 downto 0); -- 10.54 format
89 s : std_ulogic_vector(55 downto 0); -- extended fraction
91 p : std_ulogic_vector(63 downto 0); -- 8.56 format
92 y : std_ulogic_vector(63 downto 0); -- 8.56 format
93 result_sign : std_ulogic;
94 result_class : fp_number_class;
95 result_exp : signed(EXP_BITS-1 downto 0);
96 shift : signed(EXP_BITS-1 downto 0);
97 writing_back : std_ulogic;
98 int_result : std_ulogic;
99 cr_result : std_ulogic_vector(3 downto 0);
100 cr_mask : std_ulogic_vector(7 downto 0);
101 old_exc : std_ulogic_vector(4 downto 0);
102 update_fprf : std_ulogic;
103 quieten_nan : std_ulogic;
106 round_mode : std_ulogic_vector(2 downto 0);
107 is_subtract : std_ulogic;
108 exp_cmp : std_ulogic;
109 madd_cmp : std_ulogic;
110 add_bsmall : std_ulogic;
111 is_multiply : std_ulogic;
112 is_sqrt : std_ulogic;
114 count : unsigned(1 downto 0);
115 doing_ftdiv : std_ulogic_vector(1 downto 0);
116 opsel_a : std_ulogic_vector(1 downto 0);
120 invalid : std_ulogic;
122 longmask : std_ulogic;
125 type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
127 signal r, rin : reg_type;
129 signal fp_result : std_ulogic_vector(63 downto 0);
130 signal opsel_b : std_ulogic_vector(1 downto 0);
131 signal opsel_r : std_ulogic_vector(1 downto 0);
132 signal opsel_s : std_ulogic_vector(1 downto 0);
133 signal opsel_ainv : std_ulogic;
134 signal opsel_mask : std_ulogic;
135 signal opsel_binv : std_ulogic;
136 signal in_a : std_ulogic_vector(63 downto 0);
137 signal in_b : std_ulogic_vector(63 downto 0);
138 signal result : std_ulogic_vector(63 downto 0);
139 signal carry_in : std_ulogic;
140 signal lost_bits : std_ulogic;
141 signal r_hi_nz : std_ulogic;
142 signal r_lo_nz : std_ulogic;
143 signal s_nz : std_ulogic;
144 signal misc_sel : std_ulogic_vector(3 downto 0);
145 signal f_to_multiply : MultiplyInputType;
146 signal multiply_to_f : MultiplyOutputType;
147 signal msel_1 : std_ulogic_vector(1 downto 0);
148 signal msel_2 : std_ulogic_vector(1 downto 0);
149 signal msel_add : std_ulogic_vector(1 downto 0);
150 signal msel_inv : std_ulogic;
151 signal inverse_est : std_ulogic_vector(18 downto 0);
154 constant AIN_R : std_ulogic_vector(1 downto 0) := "00";
155 constant AIN_A : std_ulogic_vector(1 downto 0) := "01";
156 constant AIN_B : std_ulogic_vector(1 downto 0) := "10";
157 constant AIN_C : std_ulogic_vector(1 downto 0) := "11";
159 constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
160 constant BIN_R : std_ulogic_vector(1 downto 0) := "01";
161 constant BIN_RND : std_ulogic_vector(1 downto 0) := "10";
162 constant BIN_PS6 : std_ulogic_vector(1 downto 0) := "11";
164 constant RES_SUM : std_ulogic_vector(1 downto 0) := "00";
165 constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
166 constant RES_MULT : std_ulogic_vector(1 downto 0) := "10";
167 constant RES_MISC : std_ulogic_vector(1 downto 0) := "11";
169 constant S_ZERO : std_ulogic_vector(1 downto 0) := "00";
170 constant S_NEG : std_ulogic_vector(1 downto 0) := "01";
171 constant S_SHIFT : std_ulogic_vector(1 downto 0) := "10";
172 constant S_MULT : std_ulogic_vector(1 downto 0) := "11";
175 constant MUL1_A : std_ulogic_vector(1 downto 0) := "00";
176 constant MUL1_B : std_ulogic_vector(1 downto 0) := "01";
177 constant MUL1_Y : std_ulogic_vector(1 downto 0) := "10";
178 constant MUL1_R : std_ulogic_vector(1 downto 0) := "11";
180 constant MUL2_C : std_ulogic_vector(1 downto 0) := "00";
181 constant MUL2_LUT : std_ulogic_vector(1 downto 0) := "01";
182 constant MUL2_P : std_ulogic_vector(1 downto 0) := "10";
183 constant MUL2_R : std_ulogic_vector(1 downto 0) := "11";
185 constant MULADD_ZERO : std_ulogic_vector(1 downto 0) := "00";
186 constant MULADD_CONST : std_ulogic_vector(1 downto 0) := "01";
187 constant MULADD_A : std_ulogic_vector(1 downto 0) := "10";
188 constant MULADD_RS : std_ulogic_vector(1 downto 0) := "11";
190 -- Inverse lookup table, indexed by the top 8 fraction bits
191 -- The first 256 entries are the reciprocal (1/x) lookup table,
192 -- and the remaining 768 entries are the reciprocal square root table.
193 -- Output range is [0.5, 1) in 0.19 format, though the top
194 -- bit isn't stored since it is always 1.
195 -- Each output value is the inverse of the center of the input
196 -- range for the value, i.e. entry 0 is 1 / (1 + 1/512),
197 -- entry 1 is 1 / (1 + 3/512), etc.
198 signal inverse_table : lookup_table := (
200 -- Unit bit is assumed to be 1, so input range is [1, 2)
201 18x"3fc01", 18x"3f411", 18x"3ec31", 18x"3e460", 18x"3dc9f", 18x"3d4ec", 18x"3cd49", 18x"3c5b5",
202 18x"3be2f", 18x"3b6b8", 18x"3af4f", 18x"3a7f4", 18x"3a0a7", 18x"39968", 18x"39237", 18x"38b14",
203 18x"383fe", 18x"37cf5", 18x"375f9", 18x"36f0a", 18x"36828", 18x"36153", 18x"35a8a", 18x"353ce",
204 18x"34d1e", 18x"3467a", 18x"33fe3", 18x"33957", 18x"332d7", 18x"32c62", 18x"325f9", 18x"31f9c",
205 18x"3194a", 18x"31303", 18x"30cc7", 18x"30696", 18x"30070", 18x"2fa54", 18x"2f443", 18x"2ee3d",
206 18x"2e841", 18x"2e250", 18x"2dc68", 18x"2d68b", 18x"2d0b8", 18x"2caee", 18x"2c52e", 18x"2bf79",
207 18x"2b9cc", 18x"2b429", 18x"2ae90", 18x"2a900", 18x"2a379", 18x"29dfb", 18x"29887", 18x"2931b",
208 18x"28db8", 18x"2885e", 18x"2830d", 18x"27dc4", 18x"27884", 18x"2734d", 18x"26e1d", 18x"268f6",
209 18x"263d8", 18x"25ec1", 18x"259b3", 18x"254ac", 18x"24fad", 18x"24ab7", 18x"245c8", 18x"240e1",
210 18x"23c01", 18x"23729", 18x"23259", 18x"22d90", 18x"228ce", 18x"22413", 18x"21f60", 18x"21ab4",
211 18x"2160f", 18x"21172", 18x"20cdb", 18x"2084b", 18x"203c2", 18x"1ff40", 18x"1fac4", 18x"1f64f",
212 18x"1f1e1", 18x"1ed79", 18x"1e918", 18x"1e4be", 18x"1e069", 18x"1dc1b", 18x"1d7d4", 18x"1d392",
213 18x"1cf57", 18x"1cb22", 18x"1c6f3", 18x"1c2ca", 18x"1bea7", 18x"1ba8a", 18x"1b672", 18x"1b261",
214 18x"1ae55", 18x"1aa50", 18x"1a64f", 18x"1a255", 18x"19e60", 18x"19a70", 18x"19686", 18x"192a2",
215 18x"18ec3", 18x"18ae9", 18x"18715", 18x"18345", 18x"17f7c", 18x"17bb7", 18x"177f7", 18x"1743d",
216 18x"17087", 18x"16cd7", 18x"1692c", 18x"16585", 18x"161e4", 18x"15e47", 18x"15ab0", 18x"1571d",
217 18x"1538e", 18x"15005", 18x"14c80", 18x"14900", 18x"14584", 18x"1420d", 18x"13e9b", 18x"13b2d",
218 18x"137c3", 18x"1345e", 18x"130fe", 18x"12da2", 18x"12a4a", 18x"126f6", 18x"123a7", 18x"1205c",
219 18x"11d15", 18x"119d2", 18x"11694", 18x"11359", 18x"11023", 18x"10cf1", 18x"109c2", 18x"10698",
220 18x"10372", 18x"10050", 18x"0fd31", 18x"0fa17", 18x"0f700", 18x"0f3ed", 18x"0f0de", 18x"0edd3",
221 18x"0eacb", 18x"0e7c7", 18x"0e4c7", 18x"0e1ca", 18x"0ded2", 18x"0dbdc", 18x"0d8eb", 18x"0d5fc",
222 18x"0d312", 18x"0d02b", 18x"0cd47", 18x"0ca67", 18x"0c78a", 18x"0c4b1", 18x"0c1db", 18x"0bf09",
223 18x"0bc3a", 18x"0b96e", 18x"0b6a5", 18x"0b3e0", 18x"0b11e", 18x"0ae5f", 18x"0aba3", 18x"0a8eb",
224 18x"0a636", 18x"0a383", 18x"0a0d4", 18x"09e28", 18x"09b80", 18x"098da", 18x"09637", 18x"09397",
225 18x"090fb", 18x"08e61", 18x"08bca", 18x"08936", 18x"086a5", 18x"08417", 18x"0818c", 18x"07f04",
226 18x"07c7e", 18x"079fc", 18x"0777c", 18x"074ff", 18x"07284", 18x"0700d", 18x"06d98", 18x"06b26",
227 18x"068b6", 18x"0664a", 18x"063e0", 18x"06178", 18x"05f13", 18x"05cb1", 18x"05a52", 18x"057f5",
228 18x"0559a", 18x"05342", 18x"050ed", 18x"04e9a", 18x"04c4a", 18x"049fc", 18x"047b0", 18x"04567",
229 18x"04321", 18x"040dd", 18x"03e9b", 18x"03c5c", 18x"03a1f", 18x"037e4", 18x"035ac", 18x"03376",
230 18x"03142", 18x"02f11", 18x"02ce2", 18x"02ab5", 18x"0288b", 18x"02663", 18x"0243d", 18x"02219",
231 18x"01ff7", 18x"01dd8", 18x"01bbb", 18x"019a0", 18x"01787", 18x"01570", 18x"0135b", 18x"01149",
232 18x"00f39", 18x"00d2a", 18x"00b1e", 18x"00914", 18x"0070c", 18x"00506", 18x"00302", 18x"00100",
233 -- 1/sqrt(x) lookup table
234 -- Input is in the range [1, 4), i.e. two bits to the left of the
235 -- binary point. Those 2 bits index the following 3 blocks of 256 values.
237 18x"3fe00", 18x"3fa06", 18x"3f612", 18x"3f224", 18x"3ee3a", 18x"3ea58", 18x"3e67c", 18x"3e2a4",
238 18x"3ded2", 18x"3db06", 18x"3d73e", 18x"3d37e", 18x"3cfc2", 18x"3cc0a", 18x"3c85a", 18x"3c4ae",
239 18x"3c106", 18x"3bd64", 18x"3b9c8", 18x"3b630", 18x"3b29e", 18x"3af10", 18x"3ab86", 18x"3a802",
240 18x"3a484", 18x"3a108", 18x"39d94", 18x"39a22", 18x"396b6", 18x"3934e", 18x"38fea", 18x"38c8c",
241 18x"38932", 18x"385dc", 18x"3828a", 18x"37f3e", 18x"37bf6", 18x"378b2", 18x"37572", 18x"37236",
242 18x"36efe", 18x"36bca", 18x"3689a", 18x"36570", 18x"36248", 18x"35f26", 18x"35c06", 18x"358ea",
243 18x"355d4", 18x"352c0", 18x"34fb0", 18x"34ca4", 18x"3499c", 18x"34698", 18x"34398", 18x"3409c",
244 18x"33da2", 18x"33aac", 18x"337bc", 18x"334cc", 18x"331e2", 18x"32efc", 18x"32c18", 18x"32938",
245 18x"3265a", 18x"32382", 18x"320ac", 18x"31dd8", 18x"31b0a", 18x"3183e", 18x"31576", 18x"312b0",
246 18x"30fee", 18x"30d2e", 18x"30a74", 18x"307ba", 18x"30506", 18x"30254", 18x"2ffa4", 18x"2fcf8",
247 18x"2fa4e", 18x"2f7a8", 18x"2f506", 18x"2f266", 18x"2efca", 18x"2ed2e", 18x"2ea98", 18x"2e804",
248 18x"2e572", 18x"2e2e4", 18x"2e058", 18x"2ddce", 18x"2db48", 18x"2d8c6", 18x"2d646", 18x"2d3c8",
249 18x"2d14c", 18x"2ced4", 18x"2cc5e", 18x"2c9ea", 18x"2c77a", 18x"2c50c", 18x"2c2a2", 18x"2c038",
250 18x"2bdd2", 18x"2bb70", 18x"2b90e", 18x"2b6b0", 18x"2b454", 18x"2b1fa", 18x"2afa4", 18x"2ad4e",
251 18x"2aafc", 18x"2a8ac", 18x"2a660", 18x"2a414", 18x"2a1cc", 18x"29f86", 18x"29d42", 18x"29b00",
252 18x"298c2", 18x"29684", 18x"2944a", 18x"29210", 18x"28fda", 18x"28da6", 18x"28b74", 18x"28946",
253 18x"28718", 18x"284ec", 18x"282c4", 18x"2809c", 18x"27e78", 18x"27c56", 18x"27a34", 18x"27816",
254 18x"275fa", 18x"273e0", 18x"271c8", 18x"26fb0", 18x"26d9c", 18x"26b8a", 18x"2697a", 18x"2676c",
255 18x"26560", 18x"26356", 18x"2614c", 18x"25f46", 18x"25d42", 18x"25b40", 18x"2593e", 18x"25740",
256 18x"25542", 18x"25348", 18x"2514e", 18x"24f58", 18x"24d62", 18x"24b6e", 18x"2497c", 18x"2478c",
257 18x"2459e", 18x"243b0", 18x"241c6", 18x"23fde", 18x"23df6", 18x"23c10", 18x"23a2c", 18x"2384a",
258 18x"2366a", 18x"2348c", 18x"232ae", 18x"230d2", 18x"22efa", 18x"22d20", 18x"22b4a", 18x"22976",
259 18x"227a2", 18x"225d2", 18x"22402", 18x"22234", 18x"22066", 18x"21e9c", 18x"21cd2", 18x"21b0a",
260 18x"21944", 18x"2177e", 18x"215ba", 18x"213fa", 18x"21238", 18x"2107a", 18x"20ebc", 18x"20d00",
261 18x"20b46", 18x"2098e", 18x"207d6", 18x"20620", 18x"2046c", 18x"202b8", 18x"20108", 18x"1ff58",
262 18x"1fda8", 18x"1fbfc", 18x"1fa50", 18x"1f8a4", 18x"1f6fc", 18x"1f554", 18x"1f3ae", 18x"1f208",
263 18x"1f064", 18x"1eec2", 18x"1ed22", 18x"1eb82", 18x"1e9e4", 18x"1e846", 18x"1e6aa", 18x"1e510",
264 18x"1e378", 18x"1e1e0", 18x"1e04a", 18x"1deb4", 18x"1dd20", 18x"1db8e", 18x"1d9fc", 18x"1d86c",
265 18x"1d6de", 18x"1d550", 18x"1d3c4", 18x"1d238", 18x"1d0ae", 18x"1cf26", 18x"1cd9e", 18x"1cc18",
266 18x"1ca94", 18x"1c910", 18x"1c78c", 18x"1c60a", 18x"1c48a", 18x"1c30c", 18x"1c18e", 18x"1c010",
267 18x"1be94", 18x"1bd1a", 18x"1bba0", 18x"1ba28", 18x"1b8b2", 18x"1b73c", 18x"1b5c6", 18x"1b452",
268 18x"1b2e0", 18x"1b16e", 18x"1affe", 18x"1ae8e", 18x"1ad20", 18x"1abb4", 18x"1aa46", 18x"1a8dc",
270 18x"1a772", 18x"1a608", 18x"1a4a0", 18x"1a33a", 18x"1a1d4", 18x"1a070", 18x"19f0c", 18x"19da8",
271 18x"19c48", 18x"19ae6", 18x"19986", 18x"19828", 18x"196ca", 18x"1956e", 18x"19412", 18x"192b8",
272 18x"1915e", 18x"19004", 18x"18eae", 18x"18d56", 18x"18c00", 18x"18aac", 18x"18958", 18x"18804",
273 18x"186b2", 18x"18562", 18x"18412", 18x"182c2", 18x"18174", 18x"18026", 18x"17eda", 18x"17d8e",
274 18x"17c44", 18x"17afa", 18x"179b2", 18x"1786a", 18x"17724", 18x"175de", 18x"17498", 18x"17354",
275 18x"17210", 18x"170ce", 18x"16f8c", 18x"16e4c", 18x"16d0c", 18x"16bcc", 18x"16a8e", 18x"16950",
276 18x"16814", 18x"166d8", 18x"1659e", 18x"16464", 18x"1632a", 18x"161f2", 18x"160ba", 18x"15f84",
277 18x"15e4e", 18x"15d1a", 18x"15be6", 18x"15ab2", 18x"15980", 18x"1584e", 18x"1571c", 18x"155ec",
278 18x"154bc", 18x"1538e", 18x"15260", 18x"15134", 18x"15006", 18x"14edc", 18x"14db0", 18x"14c86",
279 18x"14b5e", 18x"14a36", 18x"1490e", 18x"147e6", 18x"146c0", 18x"1459a", 18x"14476", 18x"14352",
280 18x"14230", 18x"1410c", 18x"13fea", 18x"13eca", 18x"13daa", 18x"13c8a", 18x"13b6c", 18x"13a4e",
281 18x"13930", 18x"13814", 18x"136f8", 18x"135dc", 18x"134c2", 18x"133a8", 18x"1328e", 18x"13176",
282 18x"1305e", 18x"12f48", 18x"12e30", 18x"12d1a", 18x"12c06", 18x"12af2", 18x"129de", 18x"128ca",
283 18x"127b8", 18x"126a6", 18x"12596", 18x"12486", 18x"12376", 18x"12266", 18x"12158", 18x"1204a",
284 18x"11f3e", 18x"11e32", 18x"11d26", 18x"11c1a", 18x"11b10", 18x"11a06", 18x"118fc", 18x"117f4",
285 18x"116ec", 18x"115e4", 18x"114de", 18x"113d8", 18x"112d2", 18x"111ce", 18x"110ca", 18x"10fc6",
286 18x"10ec2", 18x"10dc0", 18x"10cbe", 18x"10bbc", 18x"10abc", 18x"109bc", 18x"108bc", 18x"107be",
287 18x"106c0", 18x"105c2", 18x"104c4", 18x"103c8", 18x"102cc", 18x"101d0", 18x"100d6", 18x"0ffdc",
288 18x"0fee2", 18x"0fdea", 18x"0fcf0", 18x"0fbf8", 18x"0fb02", 18x"0fa0a", 18x"0f914", 18x"0f81e",
289 18x"0f72a", 18x"0f636", 18x"0f542", 18x"0f44e", 18x"0f35a", 18x"0f268", 18x"0f176", 18x"0f086",
290 18x"0ef94", 18x"0eea4", 18x"0edb4", 18x"0ecc6", 18x"0ebd6", 18x"0eae8", 18x"0e9fa", 18x"0e90e",
291 18x"0e822", 18x"0e736", 18x"0e64a", 18x"0e55e", 18x"0e474", 18x"0e38a", 18x"0e2a0", 18x"0e1b8",
292 18x"0e0d0", 18x"0dfe8", 18x"0df00", 18x"0de1a", 18x"0dd32", 18x"0dc4c", 18x"0db68", 18x"0da82",
293 18x"0d99e", 18x"0d8ba", 18x"0d7d6", 18x"0d6f4", 18x"0d612", 18x"0d530", 18x"0d44e", 18x"0d36c",
294 18x"0d28c", 18x"0d1ac", 18x"0d0cc", 18x"0cfee", 18x"0cf0e", 18x"0ce30", 18x"0cd54", 18x"0cc76",
295 18x"0cb9a", 18x"0cabc", 18x"0c9e0", 18x"0c906", 18x"0c82a", 18x"0c750", 18x"0c676", 18x"0c59c",
296 18x"0c4c4", 18x"0c3ea", 18x"0c312", 18x"0c23a", 18x"0c164", 18x"0c08c", 18x"0bfb6", 18x"0bee0",
297 18x"0be0a", 18x"0bd36", 18x"0bc62", 18x"0bb8c", 18x"0baba", 18x"0b9e6", 18x"0b912", 18x"0b840",
298 18x"0b76e", 18x"0b69c", 18x"0b5cc", 18x"0b4fa", 18x"0b42a", 18x"0b35a", 18x"0b28a", 18x"0b1bc",
299 18x"0b0ee", 18x"0b01e", 18x"0af50", 18x"0ae84", 18x"0adb6", 18x"0acea", 18x"0ac1e", 18x"0ab52",
300 18x"0aa86", 18x"0a9bc", 18x"0a8f0", 18x"0a826", 18x"0a75c", 18x"0a694", 18x"0a5ca", 18x"0a502",
301 18x"0a43a", 18x"0a372", 18x"0a2aa", 18x"0a1e4", 18x"0a11c", 18x"0a056", 18x"09f90", 18x"09ecc",
303 18x"09e06", 18x"09d42", 18x"09c7e", 18x"09bba", 18x"09af6", 18x"09a32", 18x"09970", 18x"098ae",
304 18x"097ec", 18x"0972a", 18x"09668", 18x"095a8", 18x"094e8", 18x"09426", 18x"09368", 18x"092a8",
305 18x"091e8", 18x"0912a", 18x"0906c", 18x"08fae", 18x"08ef0", 18x"08e32", 18x"08d76", 18x"08cba",
306 18x"08bfe", 18x"08b42", 18x"08a86", 18x"089ca", 18x"08910", 18x"08856", 18x"0879c", 18x"086e2",
307 18x"08628", 18x"08570", 18x"084b6", 18x"083fe", 18x"08346", 18x"0828e", 18x"081d8", 18x"08120",
308 18x"0806a", 18x"07fb4", 18x"07efe", 18x"07e48", 18x"07d92", 18x"07cde", 18x"07c2a", 18x"07b76",
309 18x"07ac2", 18x"07a0e", 18x"0795a", 18x"078a8", 18x"077f4", 18x"07742", 18x"07690", 18x"075de",
310 18x"0752e", 18x"0747c", 18x"073cc", 18x"0731c", 18x"0726c", 18x"071bc", 18x"0710c", 18x"0705e",
311 18x"06fae", 18x"06f00", 18x"06e52", 18x"06da4", 18x"06cf6", 18x"06c4a", 18x"06b9c", 18x"06af0",
312 18x"06a44", 18x"06998", 18x"068ec", 18x"06840", 18x"06796", 18x"066ea", 18x"06640", 18x"06596",
313 18x"064ec", 18x"06442", 18x"0639a", 18x"062f0", 18x"06248", 18x"061a0", 18x"060f8", 18x"06050",
314 18x"05fa8", 18x"05f00", 18x"05e5a", 18x"05db4", 18x"05d0e", 18x"05c68", 18x"05bc2", 18x"05b1c",
315 18x"05a76", 18x"059d2", 18x"0592e", 18x"05888", 18x"057e4", 18x"05742", 18x"0569e", 18x"055fa",
316 18x"05558", 18x"054b6", 18x"05412", 18x"05370", 18x"052ce", 18x"0522e", 18x"0518c", 18x"050ec",
317 18x"0504a", 18x"04faa", 18x"04f0a", 18x"04e6a", 18x"04dca", 18x"04d2c", 18x"04c8c", 18x"04bee",
318 18x"04b50", 18x"04ab0", 18x"04a12", 18x"04976", 18x"048d8", 18x"0483a", 18x"0479e", 18x"04700",
319 18x"04664", 18x"045c8", 18x"0452c", 18x"04490", 18x"043f6", 18x"0435a", 18x"042c0", 18x"04226",
320 18x"0418a", 18x"040f0", 18x"04056", 18x"03fbe", 18x"03f24", 18x"03e8c", 18x"03df2", 18x"03d5a",
321 18x"03cc2", 18x"03c2a", 18x"03b92", 18x"03afa", 18x"03a62", 18x"039cc", 18x"03934", 18x"0389e",
322 18x"03808", 18x"03772", 18x"036dc", 18x"03646", 18x"035b2", 18x"0351c", 18x"03488", 18x"033f2",
323 18x"0335e", 18x"032ca", 18x"03236", 18x"031a2", 18x"03110", 18x"0307c", 18x"02fea", 18x"02f56",
324 18x"02ec4", 18x"02e32", 18x"02da0", 18x"02d0e", 18x"02c7c", 18x"02bec", 18x"02b5a", 18x"02aca",
325 18x"02a38", 18x"029a8", 18x"02918", 18x"02888", 18x"027f8", 18x"0276a", 18x"026da", 18x"0264a",
326 18x"025bc", 18x"0252e", 18x"024a0", 18x"02410", 18x"02384", 18x"022f6", 18x"02268", 18x"021da",
327 18x"0214e", 18x"020c0", 18x"02034", 18x"01fa8", 18x"01f1c", 18x"01e90", 18x"01e04", 18x"01d78",
328 18x"01cee", 18x"01c62", 18x"01bd8", 18x"01b4c", 18x"01ac2", 18x"01a38", 18x"019ae", 18x"01924",
329 18x"0189c", 18x"01812", 18x"01788", 18x"01700", 18x"01676", 18x"015ee", 18x"01566", 18x"014de",
330 18x"01456", 18x"013ce", 18x"01346", 18x"012c0", 18x"01238", 18x"011b2", 18x"0112c", 18x"010a4",
331 18x"0101e", 18x"00f98", 18x"00f12", 18x"00e8c", 18x"00e08", 18x"00d82", 18x"00cfe", 18x"00c78",
332 18x"00bf4", 18x"00b70", 18x"00aec", 18x"00a68", 18x"009e4", 18x"00960", 18x"008dc", 18x"00858",
333 18x"007d6", 18x"00752", 18x"006d0", 18x"0064e", 18x"005cc", 18x"0054a", 18x"004c8", 18x"00446",
334 18x"003c4", 18x"00342", 18x"002c2", 18x"00240", 18x"001c0", 18x"00140", 18x"000c0", 18x"00040"
337 -- Left and right shifter with 120 bit input and 64 bit output.
338 -- Shifts inp left by shift bits and returns the upper 64 bits of
339 -- the result. The shift parameter is interpreted as a signed
340 -- number in the range -64..63, with negative values indicating
342 function shifter_64(inp: std_ulogic_vector(119 downto 0);
343 shift: std_ulogic_vector(6 downto 0))
344 return std_ulogic_vector is
345 variable s1 : std_ulogic_vector(94 downto 0);
346 variable s2 : std_ulogic_vector(70 downto 0);
347 variable result : std_ulogic_vector(63 downto 0);
349 case shift(6 downto 5) is
351 s1 := inp(119 downto 25);
353 s1 := inp(87 downto 0) & "0000000";
355 s1 := x"0000000000000000" & inp(119 downto 89);
357 s1 := x"00000000" & inp(119 downto 57);
359 case shift(4 downto 3) is
361 s2 := s1(94 downto 24);
363 s2 := s1(86 downto 16);
365 s2 := s1(78 downto 8);
367 s2 := s1(70 downto 0);
369 case shift(2 downto 0) is
371 result := s2(70 downto 7);
373 result := s2(69 downto 6);
375 result := s2(68 downto 5);
377 result := s2(67 downto 4);
379 result := s2(66 downto 3);
381 result := s2(65 downto 2);
383 result := s2(64 downto 1);
385 result := s2(63 downto 0);
390 -- Generate a mask with 0-bits on the left and 1-bits on the right which
391 -- selects the bits will be lost in doing a right shift. The shift
392 -- parameter is the bottom 6 bits of a negative shift count,
393 -- indicating a right shift.
394 function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
395 variable result: std_ulogic_vector(63 downto 0);
397 result := (others => '0');
398 for i in 0 to 63 loop
400 result(63 - i) := '1';
406 -- Split a DP floating-point number into components and work out its class.
407 -- If is_int = 1, the input is considered an integer
408 function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_int: std_ulogic) return fpu_reg_type is
409 variable r : fpu_reg_type;
410 variable exp_nz : std_ulogic;
411 variable exp_ao : std_ulogic;
412 variable frac_nz : std_ulogic;
413 variable cls : std_ulogic_vector(2 downto 0);
415 r.negative := fpr(63);
416 exp_nz := or (fpr(62 downto 52));
417 exp_ao := and (fpr(62 downto 52));
418 frac_nz := or (fpr(51 downto 0));
420 r.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
422 r.exponent := to_signed(-1022, EXP_BITS);
424 r.mantissa := "000000000" & exp_nz & fpr(51 downto 0) & "00";
425 cls := exp_ao & exp_nz & frac_nz;
427 when "000" => r.class := ZERO;
428 when "001" => r.class := FINITE; -- denormalized
429 when "010" => r.class := FINITE;
430 when "011" => r.class := FINITE;
431 when "110" => r.class := INFINITY;
432 when others => r.class := NAN;
436 r.exponent := (others => '0');
437 if (fpr(63) or exp_nz or frac_nz) = '1' then
446 -- Construct a DP floating-point result from components
447 function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
448 mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic)
449 return std_ulogic_vector is
450 variable result : std_ulogic_vector(63 downto 0);
452 result := (others => '0');
457 if mantissa(54) = '1' then
459 result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
461 result(51 downto 29) := mantissa(53 downto 31);
462 if single_prec = '0' then
463 result(28 downto 0) := mantissa(30 downto 2);
466 result(62 downto 52) := "11111111111";
468 result(62 downto 52) := "11111111111";
469 result(51) := quieten_nan or mantissa(53);
470 result(50 downto 29) := mantissa(52 downto 31);
471 if single_prec = '0' then
472 result(28 downto 0) := mantissa(30 downto 2);
478 -- Determine whether to increment when rounding
479 -- Returns rounding_inc & inexact
480 -- Assumes x includes the bottom 29 bits of the mantissa already
481 -- if single_prec = 1 (usually arranged by setting set_x = 1 earlier).
482 function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
483 single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
485 return std_ulogic_vector is
486 variable grx : std_ulogic_vector(2 downto 0);
487 variable ret : std_ulogic_vector(1 downto 0);
488 variable lsb : std_ulogic;
490 if single_prec = '0' then
491 grx := mantissa(1 downto 0) & x;
494 grx := mantissa(30 downto 29) & x;
499 case rn(1 downto 0) is
500 when "00" => -- round to nearest
501 if grx = "100" and rn(2) = '0' then
502 ret(1) := lsb; -- tie, round to even
506 when "01" => -- round towards zero
507 when others => -- round towards +/- inf
509 -- round towards greater magnitude
516 -- Determine result flags to write into the FPSCR
517 function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
518 return std_ulogic_vector is
522 return sign & "0010";
524 return (not unitbit) & sign & (not sign) & "00";
526 return '0' & sign & (not sign) & "01";
533 fpu_multiply_0: entity work.multiply
536 m_in => f_to_multiply,
537 m_out => multiply_to_f
542 if rising_edge(clk) then
548 r.fpscr <= (others => '0');
549 r.writing_back <= '0';
551 assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
557 -- synchronous reads from lookup table
558 lut_access: process(clk)
559 variable addrhi : std_ulogic_vector(1 downto 0);
560 variable addr : std_ulogic_vector(9 downto 0);
562 if rising_edge(clk) then
563 if r.is_sqrt = '1' then
564 addrhi := r.b.mantissa(55 downto 54);
568 addr := addrhi & r.b.mantissa(53 downto 46);
569 inverse_est <= '1' & inverse_table(to_integer(unsigned(addr)));
573 e_out.busy <= r.busy;
574 e_out.exception <= r.fpscr(FPSCR_FEX);
575 e_out.interrupt <= r.do_intr;
577 w_out.valid <= r.instr_done and not r.do_intr;
578 w_out.instr_tag <= r.instr_tag;
579 w_out.write_enable <= r.writing_back;
580 w_out.write_reg <= r.dest_fpr;
581 w_out.write_data <= fp_result;
582 w_out.write_cr_enable <= r.instr_done and (r.rc or r.is_cmp);
583 w_out.write_cr_mask <= r.cr_mask;
584 w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
585 r.cr_result & r.cr_result & r.cr_result & r.cr_result;
588 variable v : reg_type;
589 variable adec : fpu_reg_type;
590 variable bdec : fpu_reg_type;
591 variable cdec : fpu_reg_type;
592 variable fpscr_mask : std_ulogic_vector(31 downto 0);
593 variable illegal : std_ulogic;
594 variable j, k : integer;
595 variable flm : std_ulogic_vector(7 downto 0);
596 variable int_input : std_ulogic;
597 variable mask : std_ulogic_vector(63 downto 0);
598 variable in_a0 : std_ulogic_vector(63 downto 0);
599 variable in_b0 : std_ulogic_vector(63 downto 0);
600 variable misc : std_ulogic_vector(63 downto 0);
601 variable shift_res : std_ulogic_vector(63 downto 0);
602 variable round : std_ulogic_vector(1 downto 0);
603 variable update_fx : std_ulogic;
604 variable arith_done : std_ulogic;
605 variable invalid : std_ulogic;
606 variable zero_divide : std_ulogic;
607 variable mant_nz : std_ulogic;
608 variable min_exp : signed(EXP_BITS-1 downto 0);
609 variable max_exp : signed(EXP_BITS-1 downto 0);
610 variable bias_exp : signed(EXP_BITS-1 downto 0);
611 variable new_exp : signed(EXP_BITS-1 downto 0);
612 variable exp_tiny : std_ulogic;
613 variable exp_huge : std_ulogic;
614 variable renormalize : std_ulogic;
615 variable clz : std_ulogic_vector(5 downto 0);
616 variable set_x : std_ulogic;
617 variable mshift : signed(EXP_BITS-1 downto 0);
618 variable need_check : std_ulogic;
619 variable msb : std_ulogic;
620 variable is_add : std_ulogic;
621 variable set_a : std_ulogic;
622 variable set_b : std_ulogic;
623 variable set_c : std_ulogic;
624 variable set_y : std_ulogic;
625 variable set_s : std_ulogic;
626 variable qnan_result : std_ulogic;
627 variable px_nz : std_ulogic;
628 variable pcmpb_eq : std_ulogic;
629 variable pcmpb_lt : std_ulogic;
630 variable pshift : std_ulogic;
631 variable renorm_sqrt : std_ulogic;
632 variable sqrt_exp : signed(EXP_BITS-1 downto 0);
633 variable shiftin : std_ulogic;
634 variable mulexp : signed(EXP_BITS-1 downto 0);
635 variable maddend : std_ulogic_vector(127 downto 0);
636 variable sum : std_ulogic_vector(63 downto 0);
637 variable round_inc : std_ulogic_vector(63 downto 0);
644 -- capture incoming instruction
645 if e_in.valid = '1' then
648 v.instr_tag := e_in.itag;
649 v.fe_mode := or (e_in.fe_mode);
650 v.dest_fpr := e_in.frt;
651 v.single_prec := e_in.single;
652 v.longmask := e_in.single;
655 v.is_cmp := e_in.out_cr;
656 if e_in.out_cr = '0' then
657 v.cr_mask := num_to_fxm(1);
659 v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(e_in.insn))));
662 if e_in.op = OP_FPOP_I then
665 v.quieten_nan := '1';
668 v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
669 v.is_subtract := '0';
670 v.is_multiply := '0';
673 v.doing_ftdiv := "00";
675 adec := decode_dp(e_in.fra, int_input);
676 bdec := decode_dp(e_in.frb, int_input);
677 cdec := decode_dp(e_in.frc, int_input);
683 if adec.exponent > bdec.exponent then
687 if (adec.exponent + cdec.exponent + 1) >= bdec.exponent then
692 r_hi_nz <= or (r.r(55 downto 31));
693 r_lo_nz <= or (r.r(30 downto 2));
696 if r.single_prec = '0' then
697 if r.doing_ftdiv(1) = '0' then
698 max_exp := to_signed(1023, EXP_BITS);
700 max_exp := to_signed(1020, EXP_BITS);
702 if r.doing_ftdiv(0) = '0' then
703 min_exp := to_signed(-1022, EXP_BITS);
705 min_exp := to_signed(-1021, EXP_BITS);
707 bias_exp := to_signed(1536, EXP_BITS);
709 max_exp := to_signed(127, EXP_BITS);
710 min_exp := to_signed(-126, EXP_BITS);
711 bias_exp := to_signed(192, EXP_BITS);
713 new_exp := r.result_exp - r.shift;
716 if new_exp < min_exp then
719 if new_exp > max_exp then
723 -- Compare P with zero and with B
724 px_nz := or (r.p(57 downto 4));
726 if r.p(59 downto 4) = r.b.mantissa(55 downto 0) then
730 if unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(55 downto 0)) then
734 v.writing_back := '0';
736 v.update_fprf := '0';
737 v.shift := to_signed(0, EXP_BITS);
748 fpscr_mask := (others => '1');
760 f_to_multiply.is_32bit <= '0';
761 f_to_multiply.valid <= '0';
764 msel_add <= MULADD_ZERO;
777 if e_in.valid = '1' then
778 case e_in.insn(5 downto 1) is
780 if e_in.insn(8) = '1' then
781 if e_in.insn(6) = '0' then
784 v.state := DO_FTSQRT;
786 elsif e_in.insn(7) = '1' then
793 if e_in.insn(10) = '0' then
794 if e_in.insn(8) = '0' then
797 v.state := DO_MTFSFI;
803 if e_in.insn(8) = '0' then
810 if e_in.insn(9 downto 8) /= "11" then
820 if int_input = '1' then
827 v.round_mode := "001";
832 if v.b.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
836 when "10100" | "10101" =>
849 v.is_multiply := '1';
851 if v.c.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
858 v.state := DO_FRSQRTE;
859 when "11100" | "11101" | "11110" | "11111" =>
860 if v.a.mantissa(54) = '0' then
862 elsif v.c.mantissa(54) = '0' then
873 v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
877 j := to_integer(unsigned(insn_bfa(r.insn)));
881 v.cr_result := r.fpscr(k + 3 downto k);
882 fpscr_mask(k + 3 downto k) := "0000";
885 v.fpscr := r.fpscr and (fpscr_mask or x"6007F8FF");
892 v.cr_result := "0000";
893 if r.a.class = INFINITY or r.b.class = ZERO or r.b.class = INFINITY or
894 (r.b.class = FINITE and r.b.mantissa(53) = '0') then
895 v.cr_result(2) := '1';
897 if r.a.class = NAN or r.a.class = INFINITY or
898 r.b.class = NAN or r.b.class = ZERO or r.b.class = INFINITY or
899 (r.a.class = FINITE and r.a.exponent <= to_signed(-970, EXP_BITS)) then
900 v.cr_result(1) := '1';
902 v.doing_ftdiv := "11";
911 v.cr_result := "0000";
912 if r.b.class = ZERO or r.b.class = INFINITY or
913 (r.b.class = FINITE and r.b.mantissa(53) = '0') then
914 v.cr_result(2) := '1';
916 if r.b.class = NAN or r.b.class = INFINITY or r.b.class = ZERO
917 or r.b.negative = '1' or r.b.exponent <= to_signed(-970, EXP_BITS) then
918 v.cr_result(1) := '0';
927 v.result_exp := r.b.exponent;
928 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
929 (r.b.class = NAN and r.b.mantissa(53) = '0') then
931 v.fpscr(FPSCR_VXSNAN) := '1';
932 if r.insn(6) = '1' and r.fpscr(FPSCR_VE) = '0' then
933 v.fpscr(FPSCR_VXVC) := '1';
936 v.cr_result := "0001"; -- unordered
937 elsif r.a.class = NAN or r.b.class = NAN then
938 if r.insn(6) = '1' then
940 v.fpscr(FPSCR_VXVC) := '1';
943 v.cr_result := "0001"; -- unordered
944 elsif r.a.class = ZERO and r.b.class = ZERO then
945 v.cr_result := "0010"; -- equal
946 elsif r.a.negative /= r.b.negative then
947 v.cr_result := r.a.negative & r.b.negative & "00";
948 elsif r.a.class = ZERO then
949 -- A and B are the same sign from here down
950 v.cr_result := not r.b.negative & r.b.negative & "00";
951 elsif r.a.class = INFINITY then
952 if r.b.class = INFINITY then
953 v.cr_result := "0010";
955 v.cr_result := r.a.negative & not r.a.negative & "00";
957 elsif r.b.class = ZERO then
958 -- A is finite from here down
959 v.cr_result := r.a.negative & not r.a.negative & "00";
960 elsif r.b.class = INFINITY then
961 v.cr_result := not r.b.negative & r.b.negative & "00";
962 elsif r.exp_cmp = '1' then
963 -- A and B are both finite from here down
964 v.cr_result := r.a.negative & not r.a.negative & "00";
965 elsif r.a.exponent /= r.b.exponent then
966 -- A exponent is smaller than B
967 v.cr_result := not r.a.negative & r.a.negative & "00";
969 -- Prepare to subtract mantissas, put B in R
970 v.cr_result := "0000";
975 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
979 j := to_integer(unsigned(insn_bt(r.insn)));
980 for i in 0 to 31 loop
982 v.fpscr(31 - i) := r.insn(6);
990 j := to_integer(unsigned(insn_bf(r.insn)));
991 if r.insn(16) = '0' then
995 v.fpscr(k + 3 downto k) := insn_u(r.insn);
1004 opsel_r <= RES_MISC;
1005 misc_sel <= "01" & r.insn(8) & '0';
1006 v.int_result := '1';
1007 v.writing_back := '1';
1008 v.instr_done := '1';
1012 v.int_result := '1';
1013 v.writing_back := '1';
1014 opsel_r <= RES_MISC;
1015 case r.insn(20 downto 16) is
1020 v.fpscr(FPSCR_VE downto FPSCR_XE) := "00000";
1021 when "10100" | "10101" =>
1022 -- mffscdrn[i] (but we don't implement DRN)
1023 fpscr_mask := x"000000FF";
1026 fpscr_mask := x"000000FF";
1027 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
1028 r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
1031 fpscr_mask := x"000000FF";
1032 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) := r.insn(12 downto 11);
1035 fpscr_mask := x"0007F0FF";
1039 v.instr_done := '1';
1043 if r.insn(25) = '1' then
1045 elsif r.insn(16) = '1' then
1048 flm := r.insn(24 downto 17);
1050 for i in 0 to 7 loop
1052 if flm(i) = '1' then
1053 v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
1056 v.instr_done := '1';
1060 -- r.opsel_a = AIN_B
1061 v.result_class := r.b.class;
1062 v.result_exp := r.b.exponent;
1063 v.quieten_nan := '0';
1064 if r.insn(9) = '1' then
1065 v.result_sign := '0'; -- fabs
1066 elsif r.insn(8) = '1' then
1067 v.result_sign := '1'; -- fnabs
1068 elsif r.insn(7) = '1' then
1069 v.result_sign := r.b.negative; -- fmr
1070 elsif r.insn(6) = '1' then
1071 v.result_sign := not r.b.negative; -- fneg
1073 v.result_sign := r.a.negative; -- fcpsgn
1075 v.writing_back := '1';
1076 v.instr_done := '1';
1079 when DO_FRI => -- fri[nzpm]
1080 -- r.opsel_a = AIN_B
1081 v.result_class := r.b.class;
1082 v.result_sign := r.b.negative;
1083 v.result_exp := r.b.exponent;
1084 v.fpscr(FPSCR_FR) := '0';
1085 v.fpscr(FPSCR_FI) := '0';
1086 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1088 v.fpscr(FPSCR_VXSNAN) := '1';
1091 if r.b.class = FINITE then
1092 if r.b.exponent >= to_signed(52, EXP_BITS) then
1093 -- integer already, no rounding required
1096 v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1098 v.round_mode := '1' & r.insn(7 downto 6);
1105 -- r.opsel_a = AIN_B, r.shift = 0
1106 v.result_class := r.b.class;
1107 v.result_sign := r.b.negative;
1108 v.result_exp := r.b.exponent;
1109 v.fpscr(FPSCR_FR) := '0';
1110 v.fpscr(FPSCR_FI) := '0';
1111 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1113 v.fpscr(FPSCR_VXSNAN) := '1';
1117 if r.b.class = FINITE then
1118 if r.b.exponent < to_signed(-126, EXP_BITS) then
1119 v.shift := r.b.exponent - to_signed(-126, EXP_BITS);
1120 v.state := ROUND_UFLOW;
1121 elsif r.b.exponent > to_signed(127, EXP_BITS) then
1122 v.state := ROUND_OFLOW;
1124 v.state := ROUNDING;
1131 -- instr bit 9: 1=dword 0=word
1132 -- instr bit 8: 1=unsigned 0=signed
1133 -- instr bit 1: 1=round to zero 0=use fpscr[RN]
1134 -- r.opsel_a = AIN_B
1135 v.result_class := r.b.class;
1136 v.result_sign := r.b.negative;
1137 v.result_exp := r.b.exponent;
1138 v.fpscr(FPSCR_FR) := '0';
1139 v.fpscr(FPSCR_FI) := '0';
1140 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1142 v.fpscr(FPSCR_VXSNAN) := '1';
1146 v.int_result := '1';
1151 if r.b.exponent >= to_signed(64, EXP_BITS) or
1152 (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
1153 v.state := INT_OFLOW;
1154 elsif r.b.exponent >= to_signed(52, EXP_BITS) then
1155 -- integer already, no rounding required,
1156 -- shift into final position
1157 v.shift := r.b.exponent - to_signed(54, EXP_BITS);
1158 if r.insn(8) = '1' and r.b.negative = '1' then
1159 v.state := INT_OFLOW;
1161 v.state := INT_ISHIFT;
1164 v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1165 v.state := INT_SHIFT;
1167 when INFINITY | NAN =>
1168 v.state := INT_OFLOW;
1172 -- r.opsel_a = AIN_B
1173 v.result_sign := '0';
1174 if r.insn(8) = '0' and r.b.negative = '1' then
1175 -- fcfid[s] with negative operand, set R = -B
1178 v.result_sign := '1';
1180 v.result_class := r.b.class;
1181 v.result_exp := to_signed(54, EXP_BITS);
1182 v.fpscr(FPSCR_FR) := '0';
1183 v.fpscr(FPSCR_FI) := '0';
1184 if r.b.class = ZERO then
1191 -- fadd[s] and fsub[s]
1192 -- r.opsel_a = AIN_A
1193 v.result_sign := r.a.negative;
1194 v.result_class := r.a.class;
1195 v.result_exp := r.a.exponent;
1196 v.fpscr(FPSCR_FR) := '0';
1197 v.fpscr(FPSCR_FI) := '0';
1200 is_add := r.a.negative xor r.b.negative xor r.insn(1);
1201 if r.a.class = FINITE and r.b.class = FINITE then
1202 v.is_subtract := not is_add;
1203 v.add_bsmall := r.exp_cmp;
1205 if r.exp_cmp = '0' then
1206 v.shift := r.a.exponent - r.b.exponent;
1207 v.result_sign := r.b.negative xnor r.insn(1);
1208 if r.a.exponent = r.b.exponent then
1212 v.state := ADD_SHIFT;
1218 if r.a.class = NAN or r.b.class = NAN then
1219 v.state := NAN_RESULT;
1220 elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then
1221 -- invalid operation, construct QNaN
1222 v.fpscr(FPSCR_VXISI) := '1';
1225 elsif r.a.class = ZERO and r.b.class = ZERO and is_add = '0' then
1226 -- return -0 for rounding to -infinity
1227 v.result_sign := r.round_mode(1) and r.round_mode(0);
1229 elsif r.a.class = INFINITY or r.b.class = ZERO then
1232 v.state := EXC_RESULT;
1236 v.negate := not r.insn(1);
1237 v.state := EXC_RESULT;
1243 -- r.opsel_a = AIN_A unless C is denorm and A isn't
1244 v.result_sign := r.a.negative xor r.c.negative;
1245 v.result_class := r.a.class;
1246 v.fpscr(FPSCR_FR) := '0';
1247 v.fpscr(FPSCR_FI) := '0';
1250 if r.a.class = FINITE and r.c.class = FINITE then
1251 v.result_exp := r.a.exponent + r.c.exponent;
1252 -- Renormalize denorm operands
1253 if r.a.mantissa(54) = '0' then
1254 v.state := RENORM_A;
1255 elsif r.c.mantissa(54) = '0' then
1256 v.state := RENORM_C;
1258 f_to_multiply.valid <= '1';
1262 if r.a.class = NAN or r.c.class = NAN then
1263 v.state := NAN_RESULT;
1264 elsif (r.a.class = INFINITY and r.c.class = ZERO) or
1265 (r.a.class = ZERO and r.c.class = INFINITY) then
1266 -- invalid operation, construct QNaN
1267 v.fpscr(FPSCR_VXIMZ) := '1';
1269 elsif r.a.class = ZERO or r.a.class = INFINITY then
1273 -- r.c.class is ZERO or INFINITY
1275 v.negate := r.a.negative;
1276 v.state := EXC_RESULT;
1281 -- r.opsel_a = AIN_A unless B is denorm and A isn't
1282 v.result_class := r.a.class;
1283 v.fpscr(FPSCR_FR) := '0';
1284 v.fpscr(FPSCR_FI) := '0';
1287 v.result_sign := r.a.negative xor r.b.negative;
1288 v.result_exp := r.a.exponent - r.b.exponent;
1290 if r.a.class = FINITE and r.b.class = FINITE then
1291 -- Renormalize denorm operands
1292 if r.a.mantissa(54) = '0' then
1293 v.state := RENORM_A;
1294 elsif r.b.mantissa(54) = '0' then
1295 v.state := RENORM_B;
1301 if r.a.class = NAN or r.b.class = NAN then
1302 v.state := NAN_RESULT;
1303 elsif r.b.class = INFINITY then
1304 if r.a.class = INFINITY then
1305 v.fpscr(FPSCR_VXIDI) := '1';
1308 v.result_class := ZERO;
1311 elsif r.b.class = ZERO then
1312 if r.a.class = ZERO then
1313 v.fpscr(FPSCR_VXZDZ) := '1';
1316 if r.a.class = FINITE then
1319 v.result_class := INFINITY;
1322 else -- r.b.class = FINITE, result_class = r.a.class
1328 v.fpscr(FPSCR_FR) := '0';
1329 v.fpscr(FPSCR_FI) := '0';
1330 if r.a.class = ZERO or (r.a.negative = '0' and r.a.class /= NAN) then
1335 v.quieten_nan := '0';
1336 v.state := EXC_RESULT;
1339 -- r.opsel_a = AIN_B
1340 v.result_class := r.b.class;
1341 v.result_sign := r.b.negative;
1342 v.fpscr(FPSCR_FR) := '0';
1343 v.fpscr(FPSCR_FI) := '0';
1347 v.result_exp := r.b.exponent;
1348 if r.b.negative = '1' then
1349 v.fpscr(FPSCR_VXSQRT) := '1';
1351 elsif r.b.mantissa(54) = '0' then
1352 v.state := RENORM_B;
1353 elsif r.b.exponent(0) = '0' then
1356 v.shift := to_signed(1, EXP_BITS);
1357 v.state := RENORM_B2;
1360 v.state := NAN_RESULT;
1365 if r.b.negative = '1' then
1366 v.fpscr(FPSCR_VXSQRT) := '1';
1374 -- r.opsel_a = AIN_B
1375 v.result_class := r.b.class;
1376 v.result_sign := r.b.negative;
1377 v.fpscr(FPSCR_FR) := '0';
1378 v.fpscr(FPSCR_FI) := '0';
1382 v.result_exp := - r.b.exponent;
1383 if r.b.mantissa(54) = '0' then
1384 v.state := RENORM_B;
1389 v.state := NAN_RESULT;
1391 v.result_class := ZERO;
1394 v.result_class := INFINITY;
1400 -- r.opsel_a = AIN_B
1401 v.result_class := r.b.class;
1402 v.result_sign := r.b.negative;
1403 v.fpscr(FPSCR_FR) := '0';
1404 v.fpscr(FPSCR_FI) := '0';
1406 v.shift := to_signed(1, EXP_BITS);
1409 v.result_exp := r.b.exponent;
1410 if r.b.negative = '1' then
1411 v.fpscr(FPSCR_VXSQRT) := '1';
1413 elsif r.b.mantissa(54) = '0' then
1414 v.state := RENORM_B;
1415 elsif r.b.exponent(0) = '0' then
1418 v.state := RENORM_B2;
1421 v.state := NAN_RESULT;
1423 if r.b.negative = '1' then
1424 v.fpscr(FPSCR_VXSQRT) := '1';
1427 v.result_class := ZERO;
1431 v.result_class := INFINITY;
1437 -- fmadd, fmsub, fnmadd, fnmsub
1438 -- r.opsel_a = AIN_A if A is denorm, else AIN_C if C is denorm,
1440 v.result_sign := r.a.negative;
1441 v.result_class := r.a.class;
1442 v.result_exp := r.a.exponent;
1443 v.fpscr(FPSCR_FR) := '0';
1444 v.fpscr(FPSCR_FI) := '0';
1448 is_add := r.a.negative xor r.c.negative xor r.b.negative xor r.insn(1);
1449 if r.a.class = FINITE and r.c.class = FINITE and
1450 (r.b.class = FINITE or r.b.class = ZERO) then
1451 v.is_subtract := not is_add;
1452 mulexp := r.a.exponent + r.c.exponent;
1453 v.result_exp := mulexp;
1454 -- Make sure A and C are normalized
1455 if r.a.mantissa(54) = '0' then
1456 v.state := RENORM_A;
1457 elsif r.c.mantissa(54) = '0' then
1458 v.state := RENORM_C;
1459 elsif r.b.class = ZERO then
1460 -- no addend, degenerates to multiply
1461 v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1462 f_to_multiply.valid <= '1';
1463 v.is_multiply := '1';
1465 elsif r.madd_cmp = '0' then
1466 -- addend is bigger, do multiply first
1467 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1468 f_to_multiply.valid <= '1';
1471 -- product is bigger, shift B right and use it as the
1472 -- addend to the multiplier
1473 v.shift := r.b.exponent - mulexp + to_signed(64, EXP_BITS);
1474 -- for subtract, multiplier does B - A * C
1475 v.result_sign := not (r.a.negative xor r.c.negative xor r.insn(2) xor is_add);
1476 v.result_exp := r.b.exponent;
1480 if r.a.class = NAN or r.b.class = NAN or r.c.class = NAN then
1481 v.state := NAN_RESULT;
1482 elsif (r.a.class = ZERO and r.c.class = INFINITY) or
1483 (r.a.class = INFINITY and r.c.class = ZERO) then
1484 -- invalid operation, construct QNaN
1485 v.fpscr(FPSCR_VXIMZ) := '1';
1487 elsif r.a.class = INFINITY or r.c.class = INFINITY then
1488 if r.b.class = INFINITY and is_add = '0' then
1489 -- invalid operation, construct QNaN
1490 v.fpscr(FPSCR_VXISI) := '1';
1493 -- result is infinity
1494 v.result_class := INFINITY;
1495 v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1499 -- Here A is zero, C is zero, or B is infinity
1500 -- Result is +/-B in all of those cases
1502 if r.b.class /= ZERO or is_add = '1' then
1503 v.negate := not (r.insn(1) xor r.insn(2));
1505 -- have to be careful about rule for 0 - 0 result sign
1506 v.negate := r.b.negative xor (r.round_mode(1) and r.round_mode(0)) xor r.insn(2);
1508 v.state := EXC_RESULT;
1514 v.state := RENORM_A2;
1515 if r.insn(4) = '1' then
1522 -- r.opsel_a = AIN_C for fmul/fmadd, AIN_B for fdiv
1524 v.result_exp := new_exp;
1525 if r.insn(4) = '1' then
1526 if r.c.mantissa(54) = '1' then
1527 if r.insn(3) = '0' or r.b.class = ZERO then
1532 if new_exp + 1 >= r.b.exponent then
1536 v.state := DO_FMADD;
1539 v.state := RENORM_C;
1542 if r.b.mantissa(54) = '1' then
1546 v.state := RENORM_B;
1552 renorm_sqrt := r.is_sqrt;
1553 v.state := RENORM_B2;
1557 if r.is_sqrt = '0' then
1558 v.result_exp := r.result_exp + r.shift;
1560 v.result_exp := new_exp;
1567 v.state := RENORM_C2;
1571 v.result_exp := new_exp;
1572 if r.insn(3) = '0' or r.b.class = ZERO then
1577 if new_exp + 1 >= r.b.exponent then
1581 v.state := DO_FMADD;
1585 -- transferring B to R
1586 v.shift := r.b.exponent - r.a.exponent;
1587 v.result_exp := r.b.exponent;
1589 v.state := ADD_SHIFT;
1592 -- r.shift = - exponent difference, r.longmask = 0
1593 opsel_r <= RES_SHIFT;
1596 v.longmask := r.single_prec;
1597 if r.add_bsmall = '1' then
1605 -- r.opsel_a = AIN_A if r.add_bsmall = 1 else AIN_B
1607 opsel_binv <= r.is_subtract;
1608 carry_in <= r.is_subtract and not r.x;
1609 v.shift := to_signed(-1, EXP_BITS);
1613 -- check for overflow or negative result (can't get both)
1615 if r.r(63) = '1' then
1616 -- result is opposite sign to expected
1617 v.result_sign := not r.result_sign;
1621 elsif r.r(55) = '1' then
1622 -- sum overflowed, shift right
1623 opsel_r <= RES_SHIFT;
1625 if exp_huge = '1' then
1626 v.state := ROUND_OFLOW;
1628 v.state := ROUNDING;
1630 elsif r.r(54) = '1' then
1632 v.state := ROUNDING;
1633 elsif (r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1634 -- r.x must be zero at this point
1635 v.result_class := ZERO;
1636 if r.is_subtract = '1' then
1637 -- set result sign depending on rounding mode
1638 v.result_sign := r.round_mode(1) and r.round_mode(0);
1643 v.state := NORMALIZE;
1647 -- r.opsel_a = AIN_A
1654 if r.r(63) = '1' then
1655 -- A is smaller in magnitude
1656 v.cr_result := not r.a.negative & r.a.negative & "00";
1657 elsif (r_hi_nz or r_lo_nz) = '0' then
1658 v.cr_result := "0010";
1660 v.cr_result := r.a.negative & not r.a.negative & "00";
1662 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
1663 v.instr_done := '1';
1667 f_to_multiply.valid <= r.first;
1668 opsel_r <= RES_MULT;
1669 if multiply_to_f.valid = '1' then
1674 -- Addend is bigger here
1675 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1676 -- note v.shift is at most -2 here
1677 v.shift := r.result_exp - r.b.exponent;
1678 opsel_r <= RES_MULT;
1681 f_to_multiply.valid <= r.first;
1682 if multiply_to_f.valid = '1' then
1684 v.state := ADD_SHIFT;
1688 -- Product is potentially bigger here
1689 -- r.shift = addend exp - product exp + 64, r.r = r.b.mantissa
1692 v.shift := r.shift - to_signed(64, EXP_BITS);
1696 -- r.shift = addend exp - product exp
1697 opsel_r <= RES_SHIFT;
1702 msel_add <= MULADD_RS;
1703 f_to_multiply.valid <= r.first;
1704 msel_inv <= r.is_subtract;
1705 opsel_r <= RES_MULT;
1708 if multiply_to_f.valid = '1' then
1713 -- negate R:S:X if negative
1714 if r.r(63) = '1' then
1715 v.result_sign := not r.result_sign;
1717 carry_in <= not (s_nz or r.x);
1721 v.shift := to_signed(56, EXP_BITS);
1725 -- r.shift = 56 (or 0, but only if r is now nonzero)
1726 if (r.r(56) or r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1728 -- must be a subtraction, and r.x must be zero
1729 v.result_class := ZERO;
1730 v.result_sign := r.round_mode(1) and r.round_mode(0);
1733 -- R is all zeroes but there are non-zero bits in S
1734 -- so shift them into R and set S to 0
1735 opsel_r <= RES_SHIFT;
1737 -- stay in state FMADD_6
1739 elsif r.r(56 downto 54) = "001" then
1743 v.state := NORMALIZE;
1747 -- r.opsel_a = AIN_B
1748 -- wait one cycle for inverse_table[B] lookup
1750 if r.insn(4) = '0' then
1751 if r.insn(3) = '0' then
1756 elsif r.insn(2) = '0' then
1763 -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y
1765 msel_add <= MULADD_CONST;
1774 f_to_multiply.valid <= r.first;
1775 if multiply_to_f.valid = '1' then
1777 v.count := r.count + 1;
1782 -- compute Y = P = P * Y
1785 f_to_multiply.valid <= r.first;
1787 if multiply_to_f.valid = '1' then
1797 -- compute R = P = A * Y (quotient)
1801 f_to_multiply.valid <= r.first;
1803 if multiply_to_f.valid = '1' then
1804 opsel_r <= RES_MULT;
1810 -- compute P = A - B * R (remainder)
1813 msel_add <= MULADD_A;
1815 f_to_multiply.valid <= r.first;
1816 if multiply_to_f.valid = '1' then
1821 -- test if remainder is 0 or >= B
1822 if pcmpb_lt = '1' then
1823 -- quotient is correct, set X if remainder non-zero
1824 v.x := r.p(58) or px_nz;
1826 -- quotient needs to be incremented by 1
1828 v.x := not pcmpb_eq;
1833 opsel_r <= RES_MISC;
1835 v.shift := to_signed(1, EXP_BITS);
1836 v.state := NORMALIZE;
1839 v.cr_result(1) := exp_tiny or exp_huge;
1840 if exp_tiny = '1' or exp_huge = '1' or r.a.class = ZERO or r.first = '0' then
1841 v.instr_done := '1';
1844 v.shift := r.a.exponent;
1845 v.doing_ftdiv := "10";
1849 opsel_r <= RES_MISC;
1851 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1852 v.result_exp := - sqrt_exp;
1853 v.shift := to_signed(1, EXP_BITS);
1854 v.state := NORMALIZE;
1857 -- put invsqr[B] in R and compute P = invsqr[B] * B
1858 -- also transfer B (in R) to A
1860 opsel_r <= RES_MISC;
1864 f_to_multiply.valid <= '1';
1865 v.shift := to_signed(-1, EXP_BITS);
1870 -- shift R right one place
1871 -- not expecting multiplier result yet
1873 opsel_r <= RES_SHIFT;
1878 -- put R into Y, wait for product from multiplier
1882 if multiply_to_f.valid = '1' then
1883 -- put result into R
1884 opsel_r <= RES_MULT;
1890 -- compute 1.5 - Y * P
1893 msel_add <= MULADD_CONST;
1895 f_to_multiply.valid <= r.first;
1897 if multiply_to_f.valid = '1' then
1902 -- compute Y = Y * P
1905 f_to_multiply.valid <= '1';
1910 -- pipeline in R = R * P
1913 f_to_multiply.valid <= r.first;
1915 if multiply_to_f.valid = '1' then
1921 -- first multiply is done, put result in Y
1924 -- wait for second multiply (should be here already)
1926 if multiply_to_f.valid = '1' then
1927 -- put result into R
1928 opsel_r <= RES_MULT;
1930 v.count := r.count + 1;
1940 -- compute P = A - R * R, which can be +ve or -ve
1941 -- we arranged for B to be put into A earlier
1944 msel_add <= MULADD_A;
1947 f_to_multiply.valid <= r.first;
1948 if multiply_to_f.valid = '1' then
1954 -- compute P = P * Y
1955 -- since Y is an estimate of 1/sqrt(B), this makes P an
1956 -- estimate of the adjustment needed to R. Since the error
1957 -- could be negative and we have an unsigned multiplier, the
1958 -- upper bits can be wrong, but it turns out the lowest 8 bits
1959 -- are correct and are all we need (given 3 iterations through
1960 -- SQRT_4 to SQRT_7).
1964 f_to_multiply.valid <= r.first;
1965 if multiply_to_f.valid = '1' then
1970 -- Add the bottom 8 bits of P, sign-extended,
1971 -- divided by 4, onto R.
1972 -- The division by 4 is because R is 10.54 format
1973 -- whereas P is 8.56 format.
1975 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1976 v.result_exp := sqrt_exp;
1977 v.shift := to_signed(1, EXP_BITS);
1982 -- compute P = A - R * R (remainder)
1983 -- also put 2 * R + 1 into B for comparison with P
1986 msel_add <= MULADD_A;
1988 f_to_multiply.valid <= r.first;
1991 if multiply_to_f.valid = '1' then
1996 -- test if remainder is 0 or >= B = 2*R + 1
1997 if pcmpb_lt = '1' then
1998 -- square root is correct, set X if remainder non-zero
1999 v.x := r.p(58) or px_nz;
2001 -- square root needs to be incremented by 1
2003 v.x := not pcmpb_eq;
2008 -- r.shift = b.exponent - 52
2009 opsel_r <= RES_SHIFT;
2011 v.state := INT_ROUND;
2012 v.shift := to_signed(-2, EXP_BITS);
2016 opsel_r <= RES_SHIFT;
2017 round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
2018 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2019 -- Check for negative values that don't round to 0 for fcti*u*
2020 if r.insn(8) = '1' and r.result_sign = '1' and
2021 (r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then
2022 v.state := INT_OFLOW;
2024 v.state := INT_FINAL;
2028 -- r.shift = b.exponent - 54;
2029 opsel_r <= RES_SHIFT;
2030 v.state := INT_FINAL;
2033 -- Negate if necessary, and increment for rounding if needed
2034 opsel_ainv <= r.result_sign;
2035 carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign;
2036 -- Check for possible overflows
2037 case r.insn(9 downto 8) is
2038 when "00" => -- fctiw[z]
2039 need_check := r.r(31) or (r.r(30) and not r.result_sign);
2040 when "01" => -- fctiwu[z]
2041 need_check := r.r(31);
2042 when "10" => -- fctid[z]
2043 need_check := r.r(63) or (r.r(62) and not r.result_sign);
2044 when others => -- fctidu[z]
2045 need_check := r.r(63);
2047 if need_check = '1' then
2048 v.state := INT_CHECK;
2050 if r.fpscr(FPSCR_FI) = '1' then
2051 v.fpscr(FPSCR_XX) := '1';
2057 if r.insn(9) = '0' then
2062 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2063 if (r.insn(8) = '0' and msb /= r.result_sign) or
2064 (r.insn(8) = '1' and msb /= '1') then
2065 opsel_r <= RES_MISC;
2066 v.fpscr(FPSCR_VXCVI) := '1';
2069 if r.fpscr(FPSCR_FI) = '1' then
2070 v.fpscr(FPSCR_XX) := '1';
2076 opsel_r <= RES_MISC;
2077 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2078 if r.b.class = NAN then
2081 v.fpscr(FPSCR_VXCVI) := '1';
2086 -- r.shift = b.exponent - 52
2087 opsel_r <= RES_SHIFT;
2089 v.state := ROUNDING;
2092 if r.is_multiply = '1' and px_nz = '1' then
2095 if r.r(63 downto 54) /= "0000000001" then
2097 v.state := NORMALIZE;
2100 if exp_tiny = '1' then
2101 v.shift := new_exp - min_exp;
2102 v.state := ROUND_UFLOW;
2103 elsif exp_huge = '1' then
2104 v.state := ROUND_OFLOW;
2106 v.state := ROUNDING;
2111 -- Shift so we have 9 leading zeroes (we know R is non-zero)
2112 -- r.shift = clz(r.r) - 9
2113 opsel_r <= RES_SHIFT;
2115 if exp_tiny = '1' then
2116 v.shift := new_exp - min_exp;
2117 v.state := ROUND_UFLOW;
2118 elsif exp_huge = '1' then
2119 v.state := ROUND_OFLOW;
2121 v.state := ROUNDING;
2125 -- r.shift = - amount by which exponent underflows
2127 if r.fpscr(FPSCR_UE) = '0' then
2128 -- disabled underflow exception case
2129 -- have to denormalize before rounding
2130 opsel_r <= RES_SHIFT;
2132 v.state := ROUNDING;
2134 -- enabled underflow exception case
2135 -- if denormalized, have to normalize before rounding
2136 v.fpscr(FPSCR_UX) := '1';
2137 v.result_exp := r.result_exp + bias_exp;
2138 if r.r(54) = '0' then
2140 v.state := NORMALIZE;
2142 v.state := ROUNDING;
2147 v.fpscr(FPSCR_OX) := '1';
2148 if r.fpscr(FPSCR_OE) = '0' then
2149 -- disabled overflow exception
2150 -- result depends on rounding mode
2151 v.fpscr(FPSCR_XX) := '1';
2152 v.fpscr(FPSCR_FI) := '1';
2153 if r.round_mode(1 downto 0) = "00" or
2154 (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then
2155 v.result_class := INFINITY;
2156 v.fpscr(FPSCR_FR) := '1';
2158 v.fpscr(FPSCR_FR) := '0';
2160 -- construct largest representable number
2161 v.result_exp := max_exp;
2162 opsel_r <= RES_MISC;
2163 misc_sel <= "001" & r.single_prec;
2166 -- enabled overflow exception
2167 v.result_exp := r.result_exp - bias_exp;
2168 v.state := ROUNDING;
2173 round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
2174 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2175 if round(1) = '1' then
2176 -- increment the LSB for the precision
2178 v.shift := to_signed(-1, EXP_BITS);
2179 v.state := ROUNDING_2;
2181 if r.r(54) = '0' then
2182 -- result after masking could be zero, or could be a
2183 -- denormalized result that needs to be renormalized
2185 v.state := ROUNDING_3;
2190 if round(0) = '1' then
2191 v.fpscr(FPSCR_XX) := '1';
2192 if r.tiny = '1' then
2193 v.fpscr(FPSCR_UX) := '1';
2198 -- Check for overflow during rounding
2201 if r.r(55) = '1' then
2202 opsel_r <= RES_SHIFT;
2203 if exp_huge = '1' then
2204 v.state := ROUND_OFLOW;
2208 elsif r.r(54) = '0' then
2209 -- Do CLZ so we can renormalize the result
2211 v.state := ROUNDING_3;
2217 -- r.shift = clz(r.r) - 9
2218 mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
2219 if mant_nz = '0' then
2220 v.result_class := ZERO;
2221 if r.is_subtract = '1' then
2222 -- set result sign depending on rounding mode
2223 v.result_sign := r.round_mode(1) and r.round_mode(0);
2227 -- Renormalize result after rounding
2228 opsel_r <= RES_SHIFT;
2229 v.denorm := exp_tiny;
2230 v.shift := new_exp - to_signed(-1022, EXP_BITS);
2231 if new_exp < to_signed(-1022, EXP_BITS) then
2239 -- r.shift = result_exp - -1022
2240 opsel_r <= RES_SHIFT;
2244 if (r.use_a = '1' and r.a.class = NAN and r.a.mantissa(53) = '0') or
2245 (r.use_b = '1' and r.b.class = NAN and r.b.mantissa(53) = '0') or
2246 (r.use_c = '1' and r.c.class = NAN and r.c.mantissa(53) = '0') then
2248 v.fpscr(FPSCR_VXSNAN) := '1';
2251 if r.use_a = '1' and r.a.class = NAN then
2253 elsif r.use_b = '1' and r.b.class = NAN then
2255 elsif r.use_c = '1' and r.c.class = NAN then
2258 v.state := EXC_RESULT;
2261 -- r.opsel_a = AIN_A, AIN_B or AIN_C according to which input is the result
2264 v.result_sign := r.b.negative xor r.negate;
2265 v.result_exp := r.b.exponent;
2266 v.result_class := r.b.class;
2268 v.result_sign := r.c.negative xor r.negate;
2269 v.result_exp := r.c.exponent;
2270 v.result_class := r.c.class;
2272 v.result_sign := r.a.negative xor r.negate;
2273 v.result_exp := r.a.exponent;
2274 v.result_class := r.a.class;
2280 if zero_divide = '1' then
2281 v.fpscr(FPSCR_ZX) := '1';
2283 if qnan_result = '1' then
2285 v.result_class := NAN;
2286 v.result_sign := '0';
2288 opsel_r <= RES_MISC;
2291 if invalid = '1' then
2294 if arith_done = '1' then
2295 -- Enabled invalid exception doesn't write result or FPRF
2296 -- Neither does enabled zero-divide exception
2297 if (v.invalid and r.fpscr(FPSCR_VE)) = '0' and
2298 (zero_divide and r.fpscr(FPSCR_ZE)) = '0' then
2299 v.writing_back := '1';
2300 v.update_fprf := '1';
2302 v.instr_done := '1';
2307 -- Multiplier and divide/square root data path
2310 f_to_multiply.data1 <= r.a.mantissa(61 downto 0) & "00";
2312 f_to_multiply.data1 <= r.b.mantissa(61 downto 0) & "00";
2314 f_to_multiply.data1 <= r.y;
2316 f_to_multiply.data1 <= r.r(61 downto 0) & "00";
2320 f_to_multiply.data2 <= r.c.mantissa(61 downto 0) & "00";
2322 f_to_multiply.data2 <= x"00" & inverse_est & '0' & x"000000000";
2324 f_to_multiply.data2 <= r.p;
2326 f_to_multiply.data2 <= r.r(61 downto 0) & "00";
2328 maddend := (others => '0');
2330 when MULADD_CONST =>
2331 -- addend is 2.0 or 1.5 in 16.112 format
2332 if r.is_sqrt = '0' then
2333 maddend(113) := '1'; -- 2.0
2335 maddend(112 downto 111) := "11"; -- 1.5
2338 -- addend is A in 16.112 format
2339 maddend(121 downto 58) := r.a.mantissa;
2341 -- addend is concatenation of R and S in 16.112 format
2342 maddend := "000000" & r.r & r.s & "00";
2345 if msel_inv = '1' then
2346 f_to_multiply.addend <= not maddend;
2348 f_to_multiply.addend <= maddend;
2350 f_to_multiply.not_result <= msel_inv;
2352 v.y := f_to_multiply.data2;
2354 if multiply_to_f.valid = '1' then
2355 if pshift = '0' then
2356 v.p := multiply_to_f.result(63 downto 0);
2358 v.p := multiply_to_f.result(119 downto 56);
2363 -- This has A and B input multiplexers, an adder, a shifter,
2364 -- count-leading-zeroes logic, and a result mux.
2365 if r.longmask = '1' then
2366 mshift := r.shift + to_signed(-29, EXP_BITS);
2370 if mshift < to_signed(-64, EXP_BITS) then
2371 mask := (others => '1');
2372 elsif mshift >= to_signed(0, EXP_BITS) then
2373 mask := (others => '0');
2375 mask := right_mask(unsigned(mshift(5 downto 0)));
2381 in_a0 := r.a.mantissa;
2383 in_a0 := r.b.mantissa;
2385 in_a0 := r.c.mantissa;
2387 if (or (mask and in_a0)) = '1' and set_x = '1' then
2390 if opsel_ainv = '1' then
2396 in_b0 := (others => '0');
2400 round_inc := (31 => r.single_prec, 2 => not r.single_prec, others => '0');
2403 -- BIN_PS6, 6 LSBs of P/4 sign-extended to 64
2404 in_b0 := std_ulogic_vector(resize(signed(r.p(7 downto 2)), 64));
2406 if opsel_binv = '1' then
2410 if r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
2411 shift_res := shifter_64(r.r & (shiftin or r.s(55)) & r.s(54 downto 0),
2412 std_ulogic_vector(r.shift(6 downto 0)));
2414 shift_res := (others => '0');
2416 sum := std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
2417 if opsel_mask = '1' then
2418 sum(1 downto 0) := "00";
2419 if r.single_prec = '1' then
2420 sum(30 downto 2) := (others => '0');
2427 result <= shift_res;
2429 result <= multiply_to_f.result(121 downto 58);
2433 misc := x"00000000" & (r.fpscr and fpscr_mask);
2435 -- generated QNaN mantissa
2436 misc := x"0020000000000000";
2438 -- mantissa of max representable DP number
2439 misc := x"007ffffffffffffc";
2441 -- mantissa of max representable SP number
2442 misc := x"007fffff80000000";
2445 misc := r.a.mantissa(31 downto 0) & r.b.mantissa(31 downto 0);
2448 misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32);
2450 misc := 10x"000" & inverse_est & 35x"000000000";
2452 -- max positive result for fctiw[z]
2453 misc := x"000000007fffffff";
2455 -- max negative result for fctiw[z]
2456 misc := x"ffffffff80000000";
2458 -- max positive result for fctiwu[z]
2459 misc := x"00000000ffffffff";
2461 -- max negative result for fctiwu[z]
2462 misc := x"0000000000000000";
2464 -- max positive result for fctid[z]
2465 misc := x"7fffffffffffffff";
2467 -- max negative result for fctid[z]
2468 misc := x"8000000000000000";
2470 -- max positive result for fctidu[z]
2471 misc := x"ffffffffffffffff";
2473 -- max negative result for fctidu[z]
2474 misc := x"0000000000000000";
2476 misc := x"0000000000000000";
2484 v.s := std_ulogic_vector(unsigned(not r.s) + (not r.x));
2486 v.s := multiply_to_f.result(57 downto 2);
2488 v.s := shift_res(63 downto 8);
2489 if shift_res(7 downto 0) /= x"00" then
2493 v.s := (others => '0');
2498 v.a.exponent := new_exp;
2499 v.a.mantissa := shift_res;
2502 v.b.exponent := new_exp;
2503 v.b.mantissa := shift_res;
2506 v.c.exponent := new_exp;
2507 v.c.mantissa := shift_res;
2510 if opsel_r = RES_SHIFT then
2511 v.result_exp := new_exp;
2514 if renormalize = '1' then
2515 clz := count_left_zeroes(r.r);
2516 if renorm_sqrt = '1' then
2517 -- make denormalized value end up with even exponent
2520 v.shift := resize(signed('0' & clz) - 9, EXP_BITS);
2523 if r.int_result = '1' then
2526 fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r,
2527 r.single_prec, r.quieten_nan);
2529 if r.update_fprf = '1' then
2530 v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class,
2531 r.r(54) and not r.denorm);
2534 v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
2535 (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
2536 v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
2537 v.fpscr(FPSCR_VE downto FPSCR_XE));
2538 if update_fx = '1' and
2539 (v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then
2540 v.fpscr(FPSCR_FX) := '1';
2543 v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
2546 if illegal = '1' then
2547 v.instr_done := '0';
2549 v.writing_back := '0';
2553 v.do_intr := v.instr_done and v.fpscr(FPSCR_FEX) and r.fe_mode;
2554 if v.state /= IDLE or v.do_intr = '1' then
2560 e_out.illegal <= illegal;
2563 end architecture behaviour;