1 -- Floating-point unit for Microwatt
4 use ieee.std_logic_1164.all;
5 use ieee.numeric_std.all;
8 use work.insn_helpers.all;
9 use work.decode_types.all;
10 use work.crhelpers.all;
19 e_in : in Execute1toFPUType;
20 e_out : out FPUToExecute1Type;
22 w_out : out FPUToWritebackType
26 architecture behaviour of fpu is
27 type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
29 constant EXP_BITS : natural := 13;
31 type fpu_reg_type is record
32 class : fp_number_class;
33 negative : std_ulogic;
34 exponent : signed(EXP_BITS-1 downto 0); -- unbiased
35 mantissa : std_ulogic_vector(63 downto 0); -- 10.54 format
38 type state_t is (IDLE,
39 DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
40 DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT,
43 DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD,
47 ADD_1, ADD_SHIFT, ADD_2, ADD_3,
50 FMADD_1, FMADD_2, FMADD_3,
51 FMADD_4, FMADD_5, FMADD_6,
53 DIV_2, DIV_3, DIV_4, DIV_5, DIV_6,
57 SQRT_1, SQRT_2, SQRT_3, SQRT_4,
58 SQRT_5, SQRT_6, SQRT_7, SQRT_8,
59 SQRT_9, SQRT_10, SQRT_11, SQRT_12,
60 INT_SHIFT, INT_ROUND, INT_ISHIFT,
61 INT_FINAL, INT_CHECK, INT_OFLOW,
63 ROUND_UFLOW, ROUND_OFLOW,
64 ROUNDING, ROUNDING_2, ROUNDING_3,
69 NAN_RESULT, EXC_RESULT);
71 type reg_type is record
74 instr_done : std_ulogic;
77 insn : std_ulogic_vector(31 downto 0);
78 dest_fpr : gspr_index_t;
82 single_prec : std_ulogic;
83 fpscr : std_ulogic_vector(31 downto 0);
87 r : std_ulogic_vector(63 downto 0); -- 10.54 format
88 s : std_ulogic_vector(55 downto 0); -- extended fraction
90 p : std_ulogic_vector(63 downto 0); -- 8.56 format
91 y : std_ulogic_vector(63 downto 0); -- 8.56 format
92 result_sign : std_ulogic;
93 result_class : fp_number_class;
94 result_exp : signed(EXP_BITS-1 downto 0);
95 shift : signed(EXP_BITS-1 downto 0);
96 writing_back : std_ulogic;
97 int_result : std_ulogic;
98 cr_result : std_ulogic_vector(3 downto 0);
99 cr_mask : std_ulogic_vector(7 downto 0);
100 old_exc : std_ulogic_vector(4 downto 0);
101 update_fprf : std_ulogic;
102 quieten_nan : std_ulogic;
105 round_mode : std_ulogic_vector(2 downto 0);
106 is_subtract : std_ulogic;
107 exp_cmp : std_ulogic;
108 madd_cmp : std_ulogic;
109 add_bsmall : std_ulogic;
110 is_multiply : std_ulogic;
111 is_sqrt : std_ulogic;
113 count : unsigned(1 downto 0);
114 doing_ftdiv : std_ulogic_vector(1 downto 0);
115 opsel_a : std_ulogic_vector(1 downto 0);
119 invalid : std_ulogic;
123 type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
125 signal r, rin : reg_type;
127 signal fp_result : std_ulogic_vector(63 downto 0);
128 signal opsel_b : std_ulogic_vector(1 downto 0);
129 signal opsel_r : std_ulogic_vector(1 downto 0);
130 signal opsel_s : std_ulogic_vector(1 downto 0);
131 signal opsel_ainv : std_ulogic;
132 signal opsel_amask : std_ulogic;
133 signal opsel_binv : std_ulogic;
134 signal in_a : std_ulogic_vector(63 downto 0);
135 signal in_b : std_ulogic_vector(63 downto 0);
136 signal result : std_ulogic_vector(63 downto 0);
137 signal carry_in : std_ulogic;
138 signal lost_bits : std_ulogic;
139 signal r_hi_nz : std_ulogic;
140 signal r_lo_nz : std_ulogic;
141 signal s_nz : std_ulogic;
142 signal misc_sel : std_ulogic_vector(3 downto 0);
143 signal f_to_multiply : MultiplyInputType;
144 signal multiply_to_f : MultiplyOutputType;
145 signal msel_1 : std_ulogic_vector(1 downto 0);
146 signal msel_2 : std_ulogic_vector(1 downto 0);
147 signal msel_add : std_ulogic_vector(1 downto 0);
148 signal msel_inv : std_ulogic;
149 signal inverse_est : std_ulogic_vector(18 downto 0);
152 constant AIN_R : std_ulogic_vector(1 downto 0) := "00";
153 constant AIN_A : std_ulogic_vector(1 downto 0) := "01";
154 constant AIN_B : std_ulogic_vector(1 downto 0) := "10";
155 constant AIN_C : std_ulogic_vector(1 downto 0) := "11";
157 constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
158 constant BIN_R : std_ulogic_vector(1 downto 0) := "01";
159 constant BIN_MASK : std_ulogic_vector(1 downto 0) := "10";
160 constant BIN_PS6 : std_ulogic_vector(1 downto 0) := "11";
162 constant RES_SUM : std_ulogic_vector(1 downto 0) := "00";
163 constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
164 constant RES_MULT : std_ulogic_vector(1 downto 0) := "10";
165 constant RES_MISC : std_ulogic_vector(1 downto 0) := "11";
167 constant S_ZERO : std_ulogic_vector(1 downto 0) := "00";
168 constant S_NEG : std_ulogic_vector(1 downto 0) := "01";
169 constant S_SHIFT : std_ulogic_vector(1 downto 0) := "10";
170 constant S_MULT : std_ulogic_vector(1 downto 0) := "11";
173 constant MUL1_A : std_ulogic_vector(1 downto 0) := "00";
174 constant MUL1_B : std_ulogic_vector(1 downto 0) := "01";
175 constant MUL1_Y : std_ulogic_vector(1 downto 0) := "10";
176 constant MUL1_R : std_ulogic_vector(1 downto 0) := "11";
178 constant MUL2_C : std_ulogic_vector(1 downto 0) := "00";
179 constant MUL2_LUT : std_ulogic_vector(1 downto 0) := "01";
180 constant MUL2_P : std_ulogic_vector(1 downto 0) := "10";
181 constant MUL2_R : std_ulogic_vector(1 downto 0) := "11";
183 constant MULADD_ZERO : std_ulogic_vector(1 downto 0) := "00";
184 constant MULADD_CONST : std_ulogic_vector(1 downto 0) := "01";
185 constant MULADD_A : std_ulogic_vector(1 downto 0) := "10";
186 constant MULADD_RS : std_ulogic_vector(1 downto 0) := "11";
188 -- Inverse lookup table, indexed by the top 8 fraction bits
189 -- The first 256 entries are the reciprocal (1/x) lookup table,
190 -- and the remaining 768 entries are the reciprocal square root table.
191 -- Output range is [0.5, 1) in 0.19 format, though the top
192 -- bit isn't stored since it is always 1.
193 -- Each output value is the inverse of the center of the input
194 -- range for the value, i.e. entry 0 is 1 / (1 + 1/512),
195 -- entry 1 is 1 / (1 + 3/512), etc.
196 signal inverse_table : lookup_table := (
198 -- Unit bit is assumed to be 1, so input range is [1, 2)
199 18x"3fc01", 18x"3f411", 18x"3ec31", 18x"3e460", 18x"3dc9f", 18x"3d4ec", 18x"3cd49", 18x"3c5b5",
200 18x"3be2f", 18x"3b6b8", 18x"3af4f", 18x"3a7f4", 18x"3a0a7", 18x"39968", 18x"39237", 18x"38b14",
201 18x"383fe", 18x"37cf5", 18x"375f9", 18x"36f0a", 18x"36828", 18x"36153", 18x"35a8a", 18x"353ce",
202 18x"34d1e", 18x"3467a", 18x"33fe3", 18x"33957", 18x"332d7", 18x"32c62", 18x"325f9", 18x"31f9c",
203 18x"3194a", 18x"31303", 18x"30cc7", 18x"30696", 18x"30070", 18x"2fa54", 18x"2f443", 18x"2ee3d",
204 18x"2e841", 18x"2e250", 18x"2dc68", 18x"2d68b", 18x"2d0b8", 18x"2caee", 18x"2c52e", 18x"2bf79",
205 18x"2b9cc", 18x"2b429", 18x"2ae90", 18x"2a900", 18x"2a379", 18x"29dfb", 18x"29887", 18x"2931b",
206 18x"28db8", 18x"2885e", 18x"2830d", 18x"27dc4", 18x"27884", 18x"2734d", 18x"26e1d", 18x"268f6",
207 18x"263d8", 18x"25ec1", 18x"259b3", 18x"254ac", 18x"24fad", 18x"24ab7", 18x"245c8", 18x"240e1",
208 18x"23c01", 18x"23729", 18x"23259", 18x"22d90", 18x"228ce", 18x"22413", 18x"21f60", 18x"21ab4",
209 18x"2160f", 18x"21172", 18x"20cdb", 18x"2084b", 18x"203c2", 18x"1ff40", 18x"1fac4", 18x"1f64f",
210 18x"1f1e1", 18x"1ed79", 18x"1e918", 18x"1e4be", 18x"1e069", 18x"1dc1b", 18x"1d7d4", 18x"1d392",
211 18x"1cf57", 18x"1cb22", 18x"1c6f3", 18x"1c2ca", 18x"1bea7", 18x"1ba8a", 18x"1b672", 18x"1b261",
212 18x"1ae55", 18x"1aa50", 18x"1a64f", 18x"1a255", 18x"19e60", 18x"19a70", 18x"19686", 18x"192a2",
213 18x"18ec3", 18x"18ae9", 18x"18715", 18x"18345", 18x"17f7c", 18x"17bb7", 18x"177f7", 18x"1743d",
214 18x"17087", 18x"16cd7", 18x"1692c", 18x"16585", 18x"161e4", 18x"15e47", 18x"15ab0", 18x"1571d",
215 18x"1538e", 18x"15005", 18x"14c80", 18x"14900", 18x"14584", 18x"1420d", 18x"13e9b", 18x"13b2d",
216 18x"137c3", 18x"1345e", 18x"130fe", 18x"12da2", 18x"12a4a", 18x"126f6", 18x"123a7", 18x"1205c",
217 18x"11d15", 18x"119d2", 18x"11694", 18x"11359", 18x"11023", 18x"10cf1", 18x"109c2", 18x"10698",
218 18x"10372", 18x"10050", 18x"0fd31", 18x"0fa17", 18x"0f700", 18x"0f3ed", 18x"0f0de", 18x"0edd3",
219 18x"0eacb", 18x"0e7c7", 18x"0e4c7", 18x"0e1ca", 18x"0ded2", 18x"0dbdc", 18x"0d8eb", 18x"0d5fc",
220 18x"0d312", 18x"0d02b", 18x"0cd47", 18x"0ca67", 18x"0c78a", 18x"0c4b1", 18x"0c1db", 18x"0bf09",
221 18x"0bc3a", 18x"0b96e", 18x"0b6a5", 18x"0b3e0", 18x"0b11e", 18x"0ae5f", 18x"0aba3", 18x"0a8eb",
222 18x"0a636", 18x"0a383", 18x"0a0d4", 18x"09e28", 18x"09b80", 18x"098da", 18x"09637", 18x"09397",
223 18x"090fb", 18x"08e61", 18x"08bca", 18x"08936", 18x"086a5", 18x"08417", 18x"0818c", 18x"07f04",
224 18x"07c7e", 18x"079fc", 18x"0777c", 18x"074ff", 18x"07284", 18x"0700d", 18x"06d98", 18x"06b26",
225 18x"068b6", 18x"0664a", 18x"063e0", 18x"06178", 18x"05f13", 18x"05cb1", 18x"05a52", 18x"057f5",
226 18x"0559a", 18x"05342", 18x"050ed", 18x"04e9a", 18x"04c4a", 18x"049fc", 18x"047b0", 18x"04567",
227 18x"04321", 18x"040dd", 18x"03e9b", 18x"03c5c", 18x"03a1f", 18x"037e4", 18x"035ac", 18x"03376",
228 18x"03142", 18x"02f11", 18x"02ce2", 18x"02ab5", 18x"0288b", 18x"02663", 18x"0243d", 18x"02219",
229 18x"01ff7", 18x"01dd8", 18x"01bbb", 18x"019a0", 18x"01787", 18x"01570", 18x"0135b", 18x"01149",
230 18x"00f39", 18x"00d2a", 18x"00b1e", 18x"00914", 18x"0070c", 18x"00506", 18x"00302", 18x"00100",
231 -- 1/sqrt(x) lookup table
232 -- Input is in the range [1, 4), i.e. two bits to the left of the
233 -- binary point. Those 2 bits index the following 3 blocks of 256 values.
235 18x"3fe00", 18x"3fa06", 18x"3f612", 18x"3f224", 18x"3ee3a", 18x"3ea58", 18x"3e67c", 18x"3e2a4",
236 18x"3ded2", 18x"3db06", 18x"3d73e", 18x"3d37e", 18x"3cfc2", 18x"3cc0a", 18x"3c85a", 18x"3c4ae",
237 18x"3c106", 18x"3bd64", 18x"3b9c8", 18x"3b630", 18x"3b29e", 18x"3af10", 18x"3ab86", 18x"3a802",
238 18x"3a484", 18x"3a108", 18x"39d94", 18x"39a22", 18x"396b6", 18x"3934e", 18x"38fea", 18x"38c8c",
239 18x"38932", 18x"385dc", 18x"3828a", 18x"37f3e", 18x"37bf6", 18x"378b2", 18x"37572", 18x"37236",
240 18x"36efe", 18x"36bca", 18x"3689a", 18x"36570", 18x"36248", 18x"35f26", 18x"35c06", 18x"358ea",
241 18x"355d4", 18x"352c0", 18x"34fb0", 18x"34ca4", 18x"3499c", 18x"34698", 18x"34398", 18x"3409c",
242 18x"33da2", 18x"33aac", 18x"337bc", 18x"334cc", 18x"331e2", 18x"32efc", 18x"32c18", 18x"32938",
243 18x"3265a", 18x"32382", 18x"320ac", 18x"31dd8", 18x"31b0a", 18x"3183e", 18x"31576", 18x"312b0",
244 18x"30fee", 18x"30d2e", 18x"30a74", 18x"307ba", 18x"30506", 18x"30254", 18x"2ffa4", 18x"2fcf8",
245 18x"2fa4e", 18x"2f7a8", 18x"2f506", 18x"2f266", 18x"2efca", 18x"2ed2e", 18x"2ea98", 18x"2e804",
246 18x"2e572", 18x"2e2e4", 18x"2e058", 18x"2ddce", 18x"2db48", 18x"2d8c6", 18x"2d646", 18x"2d3c8",
247 18x"2d14c", 18x"2ced4", 18x"2cc5e", 18x"2c9ea", 18x"2c77a", 18x"2c50c", 18x"2c2a2", 18x"2c038",
248 18x"2bdd2", 18x"2bb70", 18x"2b90e", 18x"2b6b0", 18x"2b454", 18x"2b1fa", 18x"2afa4", 18x"2ad4e",
249 18x"2aafc", 18x"2a8ac", 18x"2a660", 18x"2a414", 18x"2a1cc", 18x"29f86", 18x"29d42", 18x"29b00",
250 18x"298c2", 18x"29684", 18x"2944a", 18x"29210", 18x"28fda", 18x"28da6", 18x"28b74", 18x"28946",
251 18x"28718", 18x"284ec", 18x"282c4", 18x"2809c", 18x"27e78", 18x"27c56", 18x"27a34", 18x"27816",
252 18x"275fa", 18x"273e0", 18x"271c8", 18x"26fb0", 18x"26d9c", 18x"26b8a", 18x"2697a", 18x"2676c",
253 18x"26560", 18x"26356", 18x"2614c", 18x"25f46", 18x"25d42", 18x"25b40", 18x"2593e", 18x"25740",
254 18x"25542", 18x"25348", 18x"2514e", 18x"24f58", 18x"24d62", 18x"24b6e", 18x"2497c", 18x"2478c",
255 18x"2459e", 18x"243b0", 18x"241c6", 18x"23fde", 18x"23df6", 18x"23c10", 18x"23a2c", 18x"2384a",
256 18x"2366a", 18x"2348c", 18x"232ae", 18x"230d2", 18x"22efa", 18x"22d20", 18x"22b4a", 18x"22976",
257 18x"227a2", 18x"225d2", 18x"22402", 18x"22234", 18x"22066", 18x"21e9c", 18x"21cd2", 18x"21b0a",
258 18x"21944", 18x"2177e", 18x"215ba", 18x"213fa", 18x"21238", 18x"2107a", 18x"20ebc", 18x"20d00",
259 18x"20b46", 18x"2098e", 18x"207d6", 18x"20620", 18x"2046c", 18x"202b8", 18x"20108", 18x"1ff58",
260 18x"1fda8", 18x"1fbfc", 18x"1fa50", 18x"1f8a4", 18x"1f6fc", 18x"1f554", 18x"1f3ae", 18x"1f208",
261 18x"1f064", 18x"1eec2", 18x"1ed22", 18x"1eb82", 18x"1e9e4", 18x"1e846", 18x"1e6aa", 18x"1e510",
262 18x"1e378", 18x"1e1e0", 18x"1e04a", 18x"1deb4", 18x"1dd20", 18x"1db8e", 18x"1d9fc", 18x"1d86c",
263 18x"1d6de", 18x"1d550", 18x"1d3c4", 18x"1d238", 18x"1d0ae", 18x"1cf26", 18x"1cd9e", 18x"1cc18",
264 18x"1ca94", 18x"1c910", 18x"1c78c", 18x"1c60a", 18x"1c48a", 18x"1c30c", 18x"1c18e", 18x"1c010",
265 18x"1be94", 18x"1bd1a", 18x"1bba0", 18x"1ba28", 18x"1b8b2", 18x"1b73c", 18x"1b5c6", 18x"1b452",
266 18x"1b2e0", 18x"1b16e", 18x"1affe", 18x"1ae8e", 18x"1ad20", 18x"1abb4", 18x"1aa46", 18x"1a8dc",
268 18x"1a772", 18x"1a608", 18x"1a4a0", 18x"1a33a", 18x"1a1d4", 18x"1a070", 18x"19f0c", 18x"19da8",
269 18x"19c48", 18x"19ae6", 18x"19986", 18x"19828", 18x"196ca", 18x"1956e", 18x"19412", 18x"192b8",
270 18x"1915e", 18x"19004", 18x"18eae", 18x"18d56", 18x"18c00", 18x"18aac", 18x"18958", 18x"18804",
271 18x"186b2", 18x"18562", 18x"18412", 18x"182c2", 18x"18174", 18x"18026", 18x"17eda", 18x"17d8e",
272 18x"17c44", 18x"17afa", 18x"179b2", 18x"1786a", 18x"17724", 18x"175de", 18x"17498", 18x"17354",
273 18x"17210", 18x"170ce", 18x"16f8c", 18x"16e4c", 18x"16d0c", 18x"16bcc", 18x"16a8e", 18x"16950",
274 18x"16814", 18x"166d8", 18x"1659e", 18x"16464", 18x"1632a", 18x"161f2", 18x"160ba", 18x"15f84",
275 18x"15e4e", 18x"15d1a", 18x"15be6", 18x"15ab2", 18x"15980", 18x"1584e", 18x"1571c", 18x"155ec",
276 18x"154bc", 18x"1538e", 18x"15260", 18x"15134", 18x"15006", 18x"14edc", 18x"14db0", 18x"14c86",
277 18x"14b5e", 18x"14a36", 18x"1490e", 18x"147e6", 18x"146c0", 18x"1459a", 18x"14476", 18x"14352",
278 18x"14230", 18x"1410c", 18x"13fea", 18x"13eca", 18x"13daa", 18x"13c8a", 18x"13b6c", 18x"13a4e",
279 18x"13930", 18x"13814", 18x"136f8", 18x"135dc", 18x"134c2", 18x"133a8", 18x"1328e", 18x"13176",
280 18x"1305e", 18x"12f48", 18x"12e30", 18x"12d1a", 18x"12c06", 18x"12af2", 18x"129de", 18x"128ca",
281 18x"127b8", 18x"126a6", 18x"12596", 18x"12486", 18x"12376", 18x"12266", 18x"12158", 18x"1204a",
282 18x"11f3e", 18x"11e32", 18x"11d26", 18x"11c1a", 18x"11b10", 18x"11a06", 18x"118fc", 18x"117f4",
283 18x"116ec", 18x"115e4", 18x"114de", 18x"113d8", 18x"112d2", 18x"111ce", 18x"110ca", 18x"10fc6",
284 18x"10ec2", 18x"10dc0", 18x"10cbe", 18x"10bbc", 18x"10abc", 18x"109bc", 18x"108bc", 18x"107be",
285 18x"106c0", 18x"105c2", 18x"104c4", 18x"103c8", 18x"102cc", 18x"101d0", 18x"100d6", 18x"0ffdc",
286 18x"0fee2", 18x"0fdea", 18x"0fcf0", 18x"0fbf8", 18x"0fb02", 18x"0fa0a", 18x"0f914", 18x"0f81e",
287 18x"0f72a", 18x"0f636", 18x"0f542", 18x"0f44e", 18x"0f35a", 18x"0f268", 18x"0f176", 18x"0f086",
288 18x"0ef94", 18x"0eea4", 18x"0edb4", 18x"0ecc6", 18x"0ebd6", 18x"0eae8", 18x"0e9fa", 18x"0e90e",
289 18x"0e822", 18x"0e736", 18x"0e64a", 18x"0e55e", 18x"0e474", 18x"0e38a", 18x"0e2a0", 18x"0e1b8",
290 18x"0e0d0", 18x"0dfe8", 18x"0df00", 18x"0de1a", 18x"0dd32", 18x"0dc4c", 18x"0db68", 18x"0da82",
291 18x"0d99e", 18x"0d8ba", 18x"0d7d6", 18x"0d6f4", 18x"0d612", 18x"0d530", 18x"0d44e", 18x"0d36c",
292 18x"0d28c", 18x"0d1ac", 18x"0d0cc", 18x"0cfee", 18x"0cf0e", 18x"0ce30", 18x"0cd54", 18x"0cc76",
293 18x"0cb9a", 18x"0cabc", 18x"0c9e0", 18x"0c906", 18x"0c82a", 18x"0c750", 18x"0c676", 18x"0c59c",
294 18x"0c4c4", 18x"0c3ea", 18x"0c312", 18x"0c23a", 18x"0c164", 18x"0c08c", 18x"0bfb6", 18x"0bee0",
295 18x"0be0a", 18x"0bd36", 18x"0bc62", 18x"0bb8c", 18x"0baba", 18x"0b9e6", 18x"0b912", 18x"0b840",
296 18x"0b76e", 18x"0b69c", 18x"0b5cc", 18x"0b4fa", 18x"0b42a", 18x"0b35a", 18x"0b28a", 18x"0b1bc",
297 18x"0b0ee", 18x"0b01e", 18x"0af50", 18x"0ae84", 18x"0adb6", 18x"0acea", 18x"0ac1e", 18x"0ab52",
298 18x"0aa86", 18x"0a9bc", 18x"0a8f0", 18x"0a826", 18x"0a75c", 18x"0a694", 18x"0a5ca", 18x"0a502",
299 18x"0a43a", 18x"0a372", 18x"0a2aa", 18x"0a1e4", 18x"0a11c", 18x"0a056", 18x"09f90", 18x"09ecc",
301 18x"09e06", 18x"09d42", 18x"09c7e", 18x"09bba", 18x"09af6", 18x"09a32", 18x"09970", 18x"098ae",
302 18x"097ec", 18x"0972a", 18x"09668", 18x"095a8", 18x"094e8", 18x"09426", 18x"09368", 18x"092a8",
303 18x"091e8", 18x"0912a", 18x"0906c", 18x"08fae", 18x"08ef0", 18x"08e32", 18x"08d76", 18x"08cba",
304 18x"08bfe", 18x"08b42", 18x"08a86", 18x"089ca", 18x"08910", 18x"08856", 18x"0879c", 18x"086e2",
305 18x"08628", 18x"08570", 18x"084b6", 18x"083fe", 18x"08346", 18x"0828e", 18x"081d8", 18x"08120",
306 18x"0806a", 18x"07fb4", 18x"07efe", 18x"07e48", 18x"07d92", 18x"07cde", 18x"07c2a", 18x"07b76",
307 18x"07ac2", 18x"07a0e", 18x"0795a", 18x"078a8", 18x"077f4", 18x"07742", 18x"07690", 18x"075de",
308 18x"0752e", 18x"0747c", 18x"073cc", 18x"0731c", 18x"0726c", 18x"071bc", 18x"0710c", 18x"0705e",
309 18x"06fae", 18x"06f00", 18x"06e52", 18x"06da4", 18x"06cf6", 18x"06c4a", 18x"06b9c", 18x"06af0",
310 18x"06a44", 18x"06998", 18x"068ec", 18x"06840", 18x"06796", 18x"066ea", 18x"06640", 18x"06596",
311 18x"064ec", 18x"06442", 18x"0639a", 18x"062f0", 18x"06248", 18x"061a0", 18x"060f8", 18x"06050",
312 18x"05fa8", 18x"05f00", 18x"05e5a", 18x"05db4", 18x"05d0e", 18x"05c68", 18x"05bc2", 18x"05b1c",
313 18x"05a76", 18x"059d2", 18x"0592e", 18x"05888", 18x"057e4", 18x"05742", 18x"0569e", 18x"055fa",
314 18x"05558", 18x"054b6", 18x"05412", 18x"05370", 18x"052ce", 18x"0522e", 18x"0518c", 18x"050ec",
315 18x"0504a", 18x"04faa", 18x"04f0a", 18x"04e6a", 18x"04dca", 18x"04d2c", 18x"04c8c", 18x"04bee",
316 18x"04b50", 18x"04ab0", 18x"04a12", 18x"04976", 18x"048d8", 18x"0483a", 18x"0479e", 18x"04700",
317 18x"04664", 18x"045c8", 18x"0452c", 18x"04490", 18x"043f6", 18x"0435a", 18x"042c0", 18x"04226",
318 18x"0418a", 18x"040f0", 18x"04056", 18x"03fbe", 18x"03f24", 18x"03e8c", 18x"03df2", 18x"03d5a",
319 18x"03cc2", 18x"03c2a", 18x"03b92", 18x"03afa", 18x"03a62", 18x"039cc", 18x"03934", 18x"0389e",
320 18x"03808", 18x"03772", 18x"036dc", 18x"03646", 18x"035b2", 18x"0351c", 18x"03488", 18x"033f2",
321 18x"0335e", 18x"032ca", 18x"03236", 18x"031a2", 18x"03110", 18x"0307c", 18x"02fea", 18x"02f56",
322 18x"02ec4", 18x"02e32", 18x"02da0", 18x"02d0e", 18x"02c7c", 18x"02bec", 18x"02b5a", 18x"02aca",
323 18x"02a38", 18x"029a8", 18x"02918", 18x"02888", 18x"027f8", 18x"0276a", 18x"026da", 18x"0264a",
324 18x"025bc", 18x"0252e", 18x"024a0", 18x"02410", 18x"02384", 18x"022f6", 18x"02268", 18x"021da",
325 18x"0214e", 18x"020c0", 18x"02034", 18x"01fa8", 18x"01f1c", 18x"01e90", 18x"01e04", 18x"01d78",
326 18x"01cee", 18x"01c62", 18x"01bd8", 18x"01b4c", 18x"01ac2", 18x"01a38", 18x"019ae", 18x"01924",
327 18x"0189c", 18x"01812", 18x"01788", 18x"01700", 18x"01676", 18x"015ee", 18x"01566", 18x"014de",
328 18x"01456", 18x"013ce", 18x"01346", 18x"012c0", 18x"01238", 18x"011b2", 18x"0112c", 18x"010a4",
329 18x"0101e", 18x"00f98", 18x"00f12", 18x"00e8c", 18x"00e08", 18x"00d82", 18x"00cfe", 18x"00c78",
330 18x"00bf4", 18x"00b70", 18x"00aec", 18x"00a68", 18x"009e4", 18x"00960", 18x"008dc", 18x"00858",
331 18x"007d6", 18x"00752", 18x"006d0", 18x"0064e", 18x"005cc", 18x"0054a", 18x"004c8", 18x"00446",
332 18x"003c4", 18x"00342", 18x"002c2", 18x"00240", 18x"001c0", 18x"00140", 18x"000c0", 18x"00040"
335 -- Left and right shifter with 120 bit input and 64 bit output.
336 -- Shifts inp left by shift bits and returns the upper 64 bits of
337 -- the result. The shift parameter is interpreted as a signed
338 -- number in the range -64..63, with negative values indicating
340 function shifter_64(inp: std_ulogic_vector(119 downto 0);
341 shift: std_ulogic_vector(6 downto 0))
342 return std_ulogic_vector is
343 variable s1 : std_ulogic_vector(94 downto 0);
344 variable s2 : std_ulogic_vector(70 downto 0);
345 variable result : std_ulogic_vector(63 downto 0);
347 case shift(6 downto 5) is
349 s1 := inp(119 downto 25);
351 s1 := inp(87 downto 0) & "0000000";
353 s1 := x"0000000000000000" & inp(119 downto 89);
355 s1 := x"00000000" & inp(119 downto 57);
357 case shift(4 downto 3) is
359 s2 := s1(94 downto 24);
361 s2 := s1(86 downto 16);
363 s2 := s1(78 downto 8);
365 s2 := s1(70 downto 0);
367 case shift(2 downto 0) is
369 result := s2(70 downto 7);
371 result := s2(69 downto 6);
373 result := s2(68 downto 5);
375 result := s2(67 downto 4);
377 result := s2(66 downto 3);
379 result := s2(65 downto 2);
381 result := s2(64 downto 1);
383 result := s2(63 downto 0);
388 -- Generate a mask with 0-bits on the left and 1-bits on the right which
389 -- selects the bits will be lost in doing a right shift. The shift
390 -- parameter is the bottom 6 bits of a negative shift count,
391 -- indicating a right shift.
392 function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
393 variable result: std_ulogic_vector(63 downto 0);
395 result := (others => '0');
396 for i in 0 to 63 loop
398 result(63 - i) := '1';
404 -- Split a DP floating-point number into components and work out its class.
405 -- If is_int = 1, the input is considered an integer
406 function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_int: std_ulogic) return fpu_reg_type is
407 variable r : fpu_reg_type;
408 variable exp_nz : std_ulogic;
409 variable exp_ao : std_ulogic;
410 variable frac_nz : std_ulogic;
411 variable cls : std_ulogic_vector(2 downto 0);
413 r.negative := fpr(63);
414 exp_nz := or (fpr(62 downto 52));
415 exp_ao := and (fpr(62 downto 52));
416 frac_nz := or (fpr(51 downto 0));
418 r.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
420 r.exponent := to_signed(-1022, EXP_BITS);
422 r.mantissa := "000000000" & exp_nz & fpr(51 downto 0) & "00";
423 cls := exp_ao & exp_nz & frac_nz;
425 when "000" => r.class := ZERO;
426 when "001" => r.class := FINITE; -- denormalized
427 when "010" => r.class := FINITE;
428 when "011" => r.class := FINITE;
429 when "110" => r.class := INFINITY;
430 when others => r.class := NAN;
434 r.exponent := (others => '0');
435 if (fpr(63) or exp_nz or frac_nz) = '1' then
444 -- Construct a DP floating-point result from components
445 function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
446 mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic)
447 return std_ulogic_vector is
448 variable result : std_ulogic_vector(63 downto 0);
450 result := (others => '0');
455 if mantissa(54) = '1' then
457 result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
459 result(51 downto 29) := mantissa(53 downto 31);
460 if single_prec = '0' then
461 result(28 downto 0) := mantissa(30 downto 2);
464 result(62 downto 52) := "11111111111";
466 result(62 downto 52) := "11111111111";
467 result(51) := quieten_nan or mantissa(53);
468 result(50 downto 29) := mantissa(52 downto 31);
469 if single_prec = '0' then
470 result(28 downto 0) := mantissa(30 downto 2);
476 -- Determine whether to increment when rounding
477 -- Returns rounding_inc & inexact
478 -- Assumes x includes the bottom 29 bits of the mantissa already
479 -- if single_prec = 1 (usually arranged by setting set_x = 1 earlier).
480 function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
481 single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
483 return std_ulogic_vector is
484 variable grx : std_ulogic_vector(2 downto 0);
485 variable ret : std_ulogic_vector(1 downto 0);
486 variable lsb : std_ulogic;
488 if single_prec = '0' then
489 grx := mantissa(1 downto 0) & x;
492 grx := mantissa(30 downto 29) & x;
497 case rn(1 downto 0) is
498 when "00" => -- round to nearest
499 if grx = "100" and rn(2) = '0' then
500 ret(1) := lsb; -- tie, round to even
504 when "01" => -- round towards zero
505 when others => -- round towards +/- inf
507 -- round towards greater magnitude
514 -- Determine result flags to write into the FPSCR
515 function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
516 return std_ulogic_vector is
520 return sign & "0010";
522 return (not unitbit) & sign & (not sign) & "00";
524 return '0' & sign & (not sign) & "01";
531 fpu_multiply_0: entity work.multiply
534 m_in => f_to_multiply,
535 m_out => multiply_to_f
540 if rising_edge(clk) then
546 r.fpscr <= (others => '0');
547 r.writing_back <= '0';
549 assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
555 -- synchronous reads from lookup table
556 lut_access: process(clk)
557 variable addrhi : std_ulogic_vector(1 downto 0);
558 variable addr : std_ulogic_vector(9 downto 0);
560 if rising_edge(clk) then
561 if r.is_sqrt = '1' then
562 addrhi := r.b.mantissa(55 downto 54);
566 addr := addrhi & r.b.mantissa(53 downto 46);
567 inverse_est <= '1' & inverse_table(to_integer(unsigned(addr)));
571 e_out.busy <= r.busy;
572 e_out.exception <= r.fpscr(FPSCR_FEX);
573 e_out.interrupt <= r.do_intr;
575 w_out.valid <= r.instr_done and not r.do_intr;
576 w_out.write_enable <= r.writing_back;
577 w_out.write_reg <= r.dest_fpr;
578 w_out.write_data <= fp_result;
579 w_out.write_cr_enable <= r.instr_done and (r.rc or r.is_cmp);
580 w_out.write_cr_mask <= r.cr_mask;
581 w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
582 r.cr_result & r.cr_result & r.cr_result & r.cr_result;
585 variable v : reg_type;
586 variable adec : fpu_reg_type;
587 variable bdec : fpu_reg_type;
588 variable cdec : fpu_reg_type;
589 variable fpscr_mask : std_ulogic_vector(31 downto 0);
590 variable illegal : std_ulogic;
591 variable j, k : integer;
592 variable flm : std_ulogic_vector(7 downto 0);
593 variable int_input : std_ulogic;
594 variable mask : std_ulogic_vector(63 downto 0);
595 variable in_a0 : std_ulogic_vector(63 downto 0);
596 variable in_b0 : std_ulogic_vector(63 downto 0);
597 variable misc : std_ulogic_vector(63 downto 0);
598 variable shift_res : std_ulogic_vector(63 downto 0);
599 variable round : std_ulogic_vector(1 downto 0);
600 variable update_fx : std_ulogic;
601 variable arith_done : std_ulogic;
602 variable invalid : std_ulogic;
603 variable zero_divide : std_ulogic;
604 variable mant_nz : std_ulogic;
605 variable min_exp : signed(EXP_BITS-1 downto 0);
606 variable max_exp : signed(EXP_BITS-1 downto 0);
607 variable bias_exp : signed(EXP_BITS-1 downto 0);
608 variable new_exp : signed(EXP_BITS-1 downto 0);
609 variable exp_tiny : std_ulogic;
610 variable exp_huge : std_ulogic;
611 variable renormalize : std_ulogic;
612 variable clz : std_ulogic_vector(5 downto 0);
613 variable set_x : std_ulogic;
614 variable mshift : signed(EXP_BITS-1 downto 0);
615 variable need_check : std_ulogic;
616 variable msb : std_ulogic;
617 variable is_add : std_ulogic;
618 variable longmask : std_ulogic;
619 variable set_a : std_ulogic;
620 variable set_b : std_ulogic;
621 variable set_c : std_ulogic;
622 variable set_y : std_ulogic;
623 variable set_s : std_ulogic;
624 variable qnan_result : std_ulogic;
625 variable px_nz : std_ulogic;
626 variable pcmpb_eq : std_ulogic;
627 variable pcmpb_lt : std_ulogic;
628 variable pshift : std_ulogic;
629 variable renorm_sqrt : std_ulogic;
630 variable sqrt_exp : signed(EXP_BITS-1 downto 0);
631 variable shiftin : std_ulogic;
632 variable mulexp : signed(EXP_BITS-1 downto 0);
633 variable maddend : std_ulogic_vector(127 downto 0);
640 -- capture incoming instruction
641 if e_in.valid = '1' then
644 v.fe_mode := or (e_in.fe_mode);
645 v.dest_fpr := e_in.frt;
646 v.single_prec := e_in.single;
649 v.is_cmp := e_in.out_cr;
650 if e_in.out_cr = '0' then
651 v.cr_mask := num_to_fxm(1);
653 v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(e_in.insn))));
656 if e_in.op = OP_FPOP_I then
659 v.quieten_nan := '1';
662 v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
663 v.is_subtract := '0';
664 v.is_multiply := '0';
667 v.doing_ftdiv := "00";
669 adec := decode_dp(e_in.fra, int_input);
670 bdec := decode_dp(e_in.frb, int_input);
671 cdec := decode_dp(e_in.frc, int_input);
677 if adec.exponent > bdec.exponent then
681 if (adec.exponent + cdec.exponent + 1) >= bdec.exponent then
686 r_hi_nz <= or (r.r(55 downto 31));
687 r_lo_nz <= or (r.r(30 downto 2));
690 if r.single_prec = '0' then
691 if r.doing_ftdiv(1) = '0' then
692 max_exp := to_signed(1023, EXP_BITS);
694 max_exp := to_signed(1020, EXP_BITS);
696 if r.doing_ftdiv(0) = '0' then
697 min_exp := to_signed(-1022, EXP_BITS);
699 min_exp := to_signed(-1021, EXP_BITS);
701 bias_exp := to_signed(1536, EXP_BITS);
703 max_exp := to_signed(127, EXP_BITS);
704 min_exp := to_signed(-126, EXP_BITS);
705 bias_exp := to_signed(192, EXP_BITS);
707 new_exp := r.result_exp - r.shift;
710 if new_exp < min_exp then
713 if new_exp > max_exp then
717 -- Compare P with zero and with B
718 px_nz := or (r.p(57 downto 4));
720 if r.p(59 downto 4) = r.b.mantissa(55 downto 0) then
724 if unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(55 downto 0)) then
728 v.writing_back := '0';
730 v.update_fprf := '0';
731 v.shift := to_signed(0, EXP_BITS);
742 fpscr_mask := (others => '1');
750 longmask := r.single_prec;
755 f_to_multiply.is_32bit <= '0';
756 f_to_multiply.valid <= '0';
759 msel_add <= MULADD_ZERO;
772 if e_in.valid = '1' then
773 case e_in.insn(5 downto 1) is
775 if e_in.insn(8) = '1' then
776 if e_in.insn(6) = '0' then
779 v.state := DO_FTSQRT;
781 elsif e_in.insn(7) = '1' then
788 if e_in.insn(10) = '0' then
789 if e_in.insn(8) = '0' then
792 v.state := DO_MTFSFI;
798 if e_in.insn(8) = '0' then
805 if e_in.insn(9 downto 8) /= "11" then
815 if int_input = '1' then
822 v.round_mode := "001";
827 if v.b.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
831 when "10100" | "10101" =>
844 v.is_multiply := '1';
846 if v.c.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
853 v.state := DO_FRSQRTE;
854 when "11100" | "11101" | "11110" | "11111" =>
855 if v.a.mantissa(54) = '0' then
857 elsif v.c.mantissa(54) = '0' then
868 v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
872 j := to_integer(unsigned(insn_bfa(r.insn)));
876 v.cr_result := r.fpscr(k + 3 downto k);
877 fpscr_mask(k + 3 downto k) := "0000";
880 v.fpscr := r.fpscr and (fpscr_mask or x"6007F8FF");
887 v.cr_result := "0000";
888 if r.a.class = INFINITY or r.b.class = ZERO or r.b.class = INFINITY or
889 (r.b.class = FINITE and r.b.mantissa(53) = '0') then
890 v.cr_result(2) := '1';
892 if r.a.class = NAN or r.a.class = INFINITY or
893 r.b.class = NAN or r.b.class = ZERO or r.b.class = INFINITY or
894 (r.a.class = FINITE and r.a.exponent <= to_signed(-970, EXP_BITS)) then
895 v.cr_result(1) := '1';
897 v.doing_ftdiv := "11";
906 v.cr_result := "0000";
907 if r.b.class = ZERO or r.b.class = INFINITY or
908 (r.b.class = FINITE and r.b.mantissa(53) = '0') then
909 v.cr_result(2) := '1';
911 if r.b.class = NAN or r.b.class = INFINITY or r.b.class = ZERO
912 or r.b.negative = '1' or r.b.exponent <= to_signed(-970, EXP_BITS) then
913 v.cr_result(1) := '0';
922 v.result_exp := r.b.exponent;
923 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
924 (r.b.class = NAN and r.b.mantissa(53) = '0') then
926 v.fpscr(FPSCR_VXSNAN) := '1';
927 if r.insn(6) = '1' and r.fpscr(FPSCR_VE) = '0' then
928 v.fpscr(FPSCR_VXVC) := '1';
931 v.cr_result := "0001"; -- unordered
932 elsif r.a.class = NAN or r.b.class = NAN then
933 if r.insn(6) = '1' then
935 v.fpscr(FPSCR_VXVC) := '1';
938 v.cr_result := "0001"; -- unordered
939 elsif r.a.class = ZERO and r.b.class = ZERO then
940 v.cr_result := "0010"; -- equal
941 elsif r.a.negative /= r.b.negative then
942 v.cr_result := r.a.negative & r.b.negative & "00";
943 elsif r.a.class = ZERO then
944 -- A and B are the same sign from here down
945 v.cr_result := not r.b.negative & r.b.negative & "00";
946 elsif r.a.class = INFINITY then
947 if r.b.class = INFINITY then
948 v.cr_result := "0010";
950 v.cr_result := r.a.negative & not r.a.negative & "00";
952 elsif r.b.class = ZERO then
953 -- A is finite from here down
954 v.cr_result := r.a.negative & not r.a.negative & "00";
955 elsif r.b.class = INFINITY then
956 v.cr_result := not r.b.negative & r.b.negative & "00";
957 elsif r.exp_cmp = '1' then
958 -- A and B are both finite from here down
959 v.cr_result := r.a.negative & not r.a.negative & "00";
960 elsif r.a.exponent /= r.b.exponent then
961 -- A exponent is smaller than B
962 v.cr_result := not r.a.negative & r.a.negative & "00";
964 -- Prepare to subtract mantissas, put B in R
965 v.cr_result := "0000";
970 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
974 j := to_integer(unsigned(insn_bt(r.insn)));
975 for i in 0 to 31 loop
977 v.fpscr(31 - i) := r.insn(6);
985 j := to_integer(unsigned(insn_bf(r.insn)));
986 if r.insn(16) = '0' then
990 v.fpscr(k + 3 downto k) := insn_u(r.insn);
1000 misc_sel <= "01" & r.insn(8) & '0';
1001 v.int_result := '1';
1002 v.writing_back := '1';
1003 v.instr_done := '1';
1007 v.int_result := '1';
1008 v.writing_back := '1';
1009 opsel_r <= RES_MISC;
1010 case r.insn(20 downto 16) is
1015 v.fpscr(FPSCR_VE downto FPSCR_XE) := "00000";
1016 when "10100" | "10101" =>
1017 -- mffscdrn[i] (but we don't implement DRN)
1018 fpscr_mask := x"000000FF";
1021 fpscr_mask := x"000000FF";
1022 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
1023 r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
1026 fpscr_mask := x"000000FF";
1027 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) := r.insn(12 downto 11);
1030 fpscr_mask := x"0007F0FF";
1034 v.instr_done := '1';
1038 if r.insn(25) = '1' then
1040 elsif r.insn(16) = '1' then
1043 flm := r.insn(24 downto 17);
1045 for i in 0 to 7 loop
1047 if flm(i) = '1' then
1048 v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
1051 v.instr_done := '1';
1055 -- r.opsel_a = AIN_B
1056 v.result_class := r.b.class;
1057 v.result_exp := r.b.exponent;
1058 v.quieten_nan := '0';
1059 if r.insn(9) = '1' then
1060 v.result_sign := '0'; -- fabs
1061 elsif r.insn(8) = '1' then
1062 v.result_sign := '1'; -- fnabs
1063 elsif r.insn(7) = '1' then
1064 v.result_sign := r.b.negative; -- fmr
1065 elsif r.insn(6) = '1' then
1066 v.result_sign := not r.b.negative; -- fneg
1068 v.result_sign := r.a.negative; -- fcpsgn
1070 v.writing_back := '1';
1071 v.instr_done := '1';
1074 when DO_FRI => -- fri[nzpm]
1075 -- r.opsel_a = AIN_B
1076 v.result_class := r.b.class;
1077 v.result_sign := r.b.negative;
1078 v.result_exp := r.b.exponent;
1079 v.fpscr(FPSCR_FR) := '0';
1080 v.fpscr(FPSCR_FI) := '0';
1081 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1083 v.fpscr(FPSCR_VXSNAN) := '1';
1086 if r.b.class = FINITE then
1087 if r.b.exponent >= to_signed(52, EXP_BITS) then
1088 -- integer already, no rounding required
1091 v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1093 v.round_mode := '1' & r.insn(7 downto 6);
1100 -- r.opsel_a = AIN_B, r.shift = 0
1101 v.result_class := r.b.class;
1102 v.result_sign := r.b.negative;
1103 v.result_exp := r.b.exponent;
1104 v.fpscr(FPSCR_FR) := '0';
1105 v.fpscr(FPSCR_FI) := '0';
1106 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1108 v.fpscr(FPSCR_VXSNAN) := '1';
1112 if r.b.class = FINITE then
1113 if r.b.exponent < to_signed(-126, EXP_BITS) then
1114 v.shift := r.b.exponent - to_signed(-126, EXP_BITS);
1115 v.state := ROUND_UFLOW;
1116 elsif r.b.exponent > to_signed(127, EXP_BITS) then
1117 v.state := ROUND_OFLOW;
1119 v.shift := to_signed(-2, EXP_BITS);
1120 v.state := ROUNDING;
1127 -- instr bit 9: 1=dword 0=word
1128 -- instr bit 8: 1=unsigned 0=signed
1129 -- instr bit 1: 1=round to zero 0=use fpscr[RN]
1130 -- r.opsel_a = AIN_B
1131 v.result_class := r.b.class;
1132 v.result_sign := r.b.negative;
1133 v.result_exp := r.b.exponent;
1134 v.fpscr(FPSCR_FR) := '0';
1135 v.fpscr(FPSCR_FI) := '0';
1136 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1138 v.fpscr(FPSCR_VXSNAN) := '1';
1142 v.int_result := '1';
1147 if r.b.exponent >= to_signed(64, EXP_BITS) or
1148 (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
1149 v.state := INT_OFLOW;
1150 elsif r.b.exponent >= to_signed(52, EXP_BITS) then
1151 -- integer already, no rounding required,
1152 -- shift into final position
1153 v.shift := r.b.exponent - to_signed(54, EXP_BITS);
1154 if r.insn(8) = '1' and r.b.negative = '1' then
1155 v.state := INT_OFLOW;
1157 v.state := INT_ISHIFT;
1160 v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1161 v.state := INT_SHIFT;
1163 when INFINITY | NAN =>
1164 v.state := INT_OFLOW;
1168 -- r.opsel_a = AIN_B
1169 v.result_sign := '0';
1170 if r.insn(8) = '0' and r.b.negative = '1' then
1171 -- fcfid[s] with negative operand, set R = -B
1174 v.result_sign := '1';
1176 v.result_class := r.b.class;
1177 v.result_exp := to_signed(54, EXP_BITS);
1178 v.fpscr(FPSCR_FR) := '0';
1179 v.fpscr(FPSCR_FI) := '0';
1180 if r.b.class = ZERO then
1187 -- fadd[s] and fsub[s]
1188 -- r.opsel_a = AIN_A
1189 v.result_sign := r.a.negative;
1190 v.result_class := r.a.class;
1191 v.result_exp := r.a.exponent;
1192 v.fpscr(FPSCR_FR) := '0';
1193 v.fpscr(FPSCR_FI) := '0';
1196 is_add := r.a.negative xor r.b.negative xor r.insn(1);
1197 if r.a.class = FINITE and r.b.class = FINITE then
1198 v.is_subtract := not is_add;
1199 v.add_bsmall := r.exp_cmp;
1201 if r.exp_cmp = '0' then
1202 v.shift := r.a.exponent - r.b.exponent;
1203 v.result_sign := r.b.negative xnor r.insn(1);
1204 if r.a.exponent = r.b.exponent then
1207 v.state := ADD_SHIFT;
1213 if r.a.class = NAN or r.b.class = NAN then
1214 v.state := NAN_RESULT;
1215 elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then
1216 -- invalid operation, construct QNaN
1217 v.fpscr(FPSCR_VXISI) := '1';
1220 elsif r.a.class = ZERO and r.b.class = ZERO and is_add = '0' then
1221 -- return -0 for rounding to -infinity
1222 v.result_sign := r.round_mode(1) and r.round_mode(0);
1224 elsif r.a.class = INFINITY or r.b.class = ZERO then
1227 v.state := EXC_RESULT;
1231 v.negate := not r.insn(1);
1232 v.state := EXC_RESULT;
1238 -- r.opsel_a = AIN_A unless C is denorm and A isn't
1239 v.result_sign := r.a.negative xor r.c.negative;
1240 v.result_class := r.a.class;
1241 v.fpscr(FPSCR_FR) := '0';
1242 v.fpscr(FPSCR_FI) := '0';
1245 if r.a.class = FINITE and r.c.class = FINITE then
1246 v.result_exp := r.a.exponent + r.c.exponent;
1247 -- Renormalize denorm operands
1248 if r.a.mantissa(54) = '0' then
1249 v.state := RENORM_A;
1250 elsif r.c.mantissa(54) = '0' then
1251 v.state := RENORM_C;
1253 f_to_multiply.valid <= '1';
1257 if r.a.class = NAN or r.c.class = NAN then
1258 v.state := NAN_RESULT;
1259 elsif (r.a.class = INFINITY and r.c.class = ZERO) or
1260 (r.a.class = ZERO and r.c.class = INFINITY) then
1261 -- invalid operation, construct QNaN
1262 v.fpscr(FPSCR_VXIMZ) := '1';
1264 elsif r.a.class = ZERO or r.a.class = INFINITY then
1268 -- r.c.class is ZERO or INFINITY
1270 v.negate := r.a.negative;
1271 v.state := EXC_RESULT;
1276 -- r.opsel_a = AIN_A unless B is denorm and A isn't
1277 v.result_class := r.a.class;
1278 v.fpscr(FPSCR_FR) := '0';
1279 v.fpscr(FPSCR_FI) := '0';
1282 v.result_sign := r.a.negative xor r.b.negative;
1283 v.result_exp := r.a.exponent - r.b.exponent;
1285 if r.a.class = FINITE and r.b.class = FINITE then
1286 -- Renormalize denorm operands
1287 if r.a.mantissa(54) = '0' then
1288 v.state := RENORM_A;
1289 elsif r.b.mantissa(54) = '0' then
1290 v.state := RENORM_B;
1296 if r.a.class = NAN or r.b.class = NAN then
1297 v.state := NAN_RESULT;
1298 elsif r.b.class = INFINITY then
1299 if r.a.class = INFINITY then
1300 v.fpscr(FPSCR_VXIDI) := '1';
1303 v.result_class := ZERO;
1306 elsif r.b.class = ZERO then
1307 if r.a.class = ZERO then
1308 v.fpscr(FPSCR_VXZDZ) := '1';
1311 if r.a.class = FINITE then
1314 v.result_class := INFINITY;
1317 else -- r.b.class = FINITE, result_class = r.a.class
1323 v.fpscr(FPSCR_FR) := '0';
1324 v.fpscr(FPSCR_FI) := '0';
1325 if r.a.class = ZERO or (r.a.negative = '0' and r.a.class /= NAN) then
1330 v.quieten_nan := '0';
1331 v.state := EXC_RESULT;
1334 -- r.opsel_a = AIN_B
1335 v.result_class := r.b.class;
1336 v.result_sign := r.b.negative;
1337 v.fpscr(FPSCR_FR) := '0';
1338 v.fpscr(FPSCR_FI) := '0';
1342 v.result_exp := r.b.exponent;
1343 if r.b.negative = '1' then
1344 v.fpscr(FPSCR_VXSQRT) := '1';
1346 elsif r.b.mantissa(54) = '0' then
1347 v.state := RENORM_B;
1348 elsif r.b.exponent(0) = '0' then
1351 v.shift := to_signed(1, EXP_BITS);
1352 v.state := RENORM_B2;
1355 v.state := NAN_RESULT;
1360 if r.b.negative = '1' then
1361 v.fpscr(FPSCR_VXSQRT) := '1';
1369 -- r.opsel_a = AIN_B
1370 v.result_class := r.b.class;
1371 v.result_sign := r.b.negative;
1372 v.fpscr(FPSCR_FR) := '0';
1373 v.fpscr(FPSCR_FI) := '0';
1377 v.result_exp := - r.b.exponent;
1378 if r.b.mantissa(54) = '0' then
1379 v.state := RENORM_B;
1384 v.state := NAN_RESULT;
1386 v.result_class := ZERO;
1389 v.result_class := INFINITY;
1395 -- r.opsel_a = AIN_B
1396 v.result_class := r.b.class;
1397 v.result_sign := r.b.negative;
1398 v.fpscr(FPSCR_FR) := '0';
1399 v.fpscr(FPSCR_FI) := '0';
1401 v.shift := to_signed(1, EXP_BITS);
1404 v.result_exp := r.b.exponent;
1405 if r.b.negative = '1' then
1406 v.fpscr(FPSCR_VXSQRT) := '1';
1408 elsif r.b.mantissa(54) = '0' then
1409 v.state := RENORM_B;
1410 elsif r.b.exponent(0) = '0' then
1413 v.state := RENORM_B2;
1416 v.state := NAN_RESULT;
1418 if r.b.negative = '1' then
1419 v.fpscr(FPSCR_VXSQRT) := '1';
1422 v.result_class := ZERO;
1426 v.result_class := INFINITY;
1432 -- fmadd, fmsub, fnmadd, fnmsub
1433 -- r.opsel_a = AIN_A if A is denorm, else AIN_C if C is denorm,
1435 v.result_sign := r.a.negative;
1436 v.result_class := r.a.class;
1437 v.result_exp := r.a.exponent;
1438 v.fpscr(FPSCR_FR) := '0';
1439 v.fpscr(FPSCR_FI) := '0';
1443 is_add := r.a.negative xor r.c.negative xor r.b.negative xor r.insn(1);
1444 if r.a.class = FINITE and r.c.class = FINITE and
1445 (r.b.class = FINITE or r.b.class = ZERO) then
1446 v.is_subtract := not is_add;
1447 mulexp := r.a.exponent + r.c.exponent;
1448 v.result_exp := mulexp;
1449 -- Make sure A and C are normalized
1450 if r.a.mantissa(54) = '0' then
1451 v.state := RENORM_A;
1452 elsif r.c.mantissa(54) = '0' then
1453 v.state := RENORM_C;
1454 elsif r.b.class = ZERO then
1455 -- no addend, degenerates to multiply
1456 v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1457 f_to_multiply.valid <= '1';
1458 v.is_multiply := '1';
1460 elsif r.madd_cmp = '0' then
1461 -- addend is bigger, do multiply first
1462 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1463 f_to_multiply.valid <= '1';
1466 -- product is bigger, shift B right and use it as the
1467 -- addend to the multiplier
1468 v.shift := r.b.exponent - mulexp + to_signed(64, EXP_BITS);
1469 -- for subtract, multiplier does B - A * C
1470 v.result_sign := not (r.a.negative xor r.c.negative xor r.insn(2) xor is_add);
1471 v.result_exp := r.b.exponent;
1475 if r.a.class = NAN or r.b.class = NAN or r.c.class = NAN then
1476 v.state := NAN_RESULT;
1477 elsif (r.a.class = ZERO and r.c.class = INFINITY) or
1478 (r.a.class = INFINITY and r.c.class = ZERO) then
1479 -- invalid operation, construct QNaN
1480 v.fpscr(FPSCR_VXIMZ) := '1';
1482 elsif r.a.class = INFINITY or r.c.class = INFINITY then
1483 if r.b.class = INFINITY and is_add = '0' then
1484 -- invalid operation, construct QNaN
1485 v.fpscr(FPSCR_VXISI) := '1';
1488 -- result is infinity
1489 v.result_class := INFINITY;
1490 v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1494 -- Here A is zero, C is zero, or B is infinity
1495 -- Result is +/-B in all of those cases
1497 if r.b.class /= ZERO or is_add = '1' then
1498 v.negate := not (r.insn(1) xor r.insn(2));
1500 -- have to be careful about rule for 0 - 0 result sign
1501 v.negate := r.b.negative xor (r.round_mode(1) and r.round_mode(0)) xor r.insn(2);
1503 v.state := EXC_RESULT;
1509 v.state := RENORM_A2;
1510 if r.insn(4) = '1' then
1517 -- r.opsel_a = AIN_C for fmul/fmadd, AIN_B for fdiv
1519 v.result_exp := new_exp;
1520 if r.insn(4) = '1' then
1521 if r.c.mantissa(54) = '1' then
1522 if r.insn(3) = '0' or r.b.class = ZERO then
1527 if new_exp + 1 >= r.b.exponent then
1531 v.state := DO_FMADD;
1534 v.state := RENORM_C;
1537 if r.b.mantissa(54) = '1' then
1541 v.state := RENORM_B;
1547 renorm_sqrt := r.is_sqrt;
1548 v.state := RENORM_B2;
1552 if r.is_sqrt = '0' then
1553 v.result_exp := r.result_exp + r.shift;
1555 v.result_exp := new_exp;
1562 v.state := RENORM_C2;
1566 v.result_exp := new_exp;
1567 if r.insn(3) = '0' or r.b.class = ZERO then
1572 if new_exp + 1 >= r.b.exponent then
1576 v.state := DO_FMADD;
1580 -- transferring B to R
1581 v.shift := r.b.exponent - r.a.exponent;
1582 v.result_exp := r.b.exponent;
1583 v.state := ADD_SHIFT;
1586 -- r.shift = - exponent difference
1587 opsel_r <= RES_SHIFT;
1591 if r.add_bsmall = '1' then
1599 -- r.opsel_a = AIN_A if r.add_bsmall = 1 else AIN_B
1601 opsel_binv <= r.is_subtract;
1602 carry_in <= r.is_subtract and not r.x;
1603 v.shift := to_signed(-1, EXP_BITS);
1607 -- check for overflow or negative result (can't get both)
1609 if r.r(63) = '1' then
1610 -- result is opposite sign to expected
1611 v.result_sign := not r.result_sign;
1615 elsif r.r(55) = '1' then
1616 -- sum overflowed, shift right
1617 opsel_r <= RES_SHIFT;
1619 v.shift := to_signed(-2, EXP_BITS);
1620 if exp_huge = '1' then
1621 v.state := ROUND_OFLOW;
1623 v.state := ROUNDING;
1625 elsif r.r(54) = '1' then
1627 v.shift := to_signed(-2, EXP_BITS);
1628 v.state := ROUNDING;
1629 elsif (r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1630 -- r.x must be zero at this point
1631 v.result_class := ZERO;
1632 if r.is_subtract = '1' then
1633 -- set result sign depending on rounding mode
1634 v.result_sign := r.round_mode(1) and r.round_mode(0);
1639 v.state := NORMALIZE;
1643 -- r.opsel_a = AIN_A
1650 if r.r(63) = '1' then
1651 -- A is smaller in magnitude
1652 v.cr_result := not r.a.negative & r.a.negative & "00";
1653 elsif (r_hi_nz or r_lo_nz) = '0' then
1654 v.cr_result := "0010";
1656 v.cr_result := r.a.negative & not r.a.negative & "00";
1658 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
1659 v.instr_done := '1';
1663 f_to_multiply.valid <= r.first;
1664 opsel_r <= RES_MULT;
1665 if multiply_to_f.valid = '1' then
1670 -- Addend is bigger here
1671 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1672 -- note v.shift is at most -2 here
1673 v.shift := r.result_exp - r.b.exponent;
1674 opsel_r <= RES_MULT;
1677 f_to_multiply.valid <= r.first;
1678 if multiply_to_f.valid = '1' then
1679 v.state := ADD_SHIFT;
1683 -- Product is potentially bigger here
1684 -- r.shift = addend exp - product exp + 64, r.r = r.b.mantissa
1687 v.shift := r.shift - to_signed(64, EXP_BITS);
1691 -- r.shift = addend exp - product exp
1692 opsel_r <= RES_SHIFT;
1697 msel_add <= MULADD_RS;
1698 f_to_multiply.valid <= r.first;
1699 msel_inv <= r.is_subtract;
1700 opsel_r <= RES_MULT;
1703 v.shift := to_signed(56, EXP_BITS);
1704 if multiply_to_f.valid = '1' then
1705 if multiply_to_f.result(121) = '1' then
1714 v.result_sign := not r.result_sign;
1716 carry_in <= not (s_nz or r.x);
1719 v.shift := to_signed(56, EXP_BITS);
1723 -- r.shift = 56 (or 0, but only if r is now nonzero)
1724 if (r.r(56) or r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1726 -- must be a subtraction, and r.x must be zero
1727 v.result_class := ZERO;
1728 v.result_sign := r.round_mode(1) and r.round_mode(0);
1731 -- R is all zeroes but there are non-zero bits in S
1732 -- so shift them into R and set S to 0
1733 opsel_r <= RES_SHIFT;
1735 -- stay in state FMADD_6
1737 elsif r.r(56 downto 54) = "001" then
1741 v.state := NORMALIZE;
1745 -- r.opsel_a = AIN_B
1746 -- wait one cycle for inverse_table[B] lookup
1748 if r.insn(4) = '0' then
1749 if r.insn(3) = '0' then
1754 elsif r.insn(2) = '0' then
1761 -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y
1763 msel_add <= MULADD_CONST;
1772 f_to_multiply.valid <= r.first;
1773 if multiply_to_f.valid = '1' then
1775 v.count := r.count + 1;
1780 -- compute Y = P = P * Y
1783 f_to_multiply.valid <= r.first;
1785 if multiply_to_f.valid = '1' then
1795 -- compute R = P = A * Y (quotient)
1799 f_to_multiply.valid <= r.first;
1801 if multiply_to_f.valid = '1' then
1802 opsel_r <= RES_MULT;
1808 -- compute P = A - B * R (remainder)
1811 msel_add <= MULADD_A;
1813 f_to_multiply.valid <= r.first;
1814 if multiply_to_f.valid = '1' then
1819 -- test if remainder is 0 or >= B
1820 if pcmpb_lt = '1' then
1821 -- quotient is correct, set X if remainder non-zero
1822 v.x := r.p(58) or px_nz;
1824 -- quotient needs to be incremented by 1
1826 v.x := not pcmpb_eq;
1831 opsel_r <= RES_MISC;
1833 v.shift := to_signed(1, EXP_BITS);
1834 v.state := NORMALIZE;
1837 v.cr_result(1) := exp_tiny or exp_huge;
1838 if exp_tiny = '1' or exp_huge = '1' or r.a.class = ZERO or r.first = '0' then
1839 v.instr_done := '1';
1842 v.shift := r.a.exponent;
1843 v.doing_ftdiv := "10";
1847 opsel_r <= RES_MISC;
1849 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1850 v.result_exp := - sqrt_exp;
1851 v.shift := to_signed(1, EXP_BITS);
1852 v.state := NORMALIZE;
1855 -- put invsqr[B] in R and compute P = invsqr[B] * B
1856 -- also transfer B (in R) to A
1858 opsel_r <= RES_MISC;
1862 f_to_multiply.valid <= '1';
1863 v.shift := to_signed(-1, EXP_BITS);
1868 -- shift R right one place
1869 -- not expecting multiplier result yet
1871 opsel_r <= RES_SHIFT;
1876 -- put R into Y, wait for product from multiplier
1880 if multiply_to_f.valid = '1' then
1881 -- put result into R
1882 opsel_r <= RES_MULT;
1888 -- compute 1.5 - Y * P
1891 msel_add <= MULADD_CONST;
1893 f_to_multiply.valid <= r.first;
1895 if multiply_to_f.valid = '1' then
1900 -- compute Y = Y * P
1903 f_to_multiply.valid <= '1';
1908 -- pipeline in R = R * P
1911 f_to_multiply.valid <= r.first;
1913 if multiply_to_f.valid = '1' then
1919 -- first multiply is done, put result in Y
1922 -- wait for second multiply (should be here already)
1924 if multiply_to_f.valid = '1' then
1925 -- put result into R
1926 opsel_r <= RES_MULT;
1928 v.count := r.count + 1;
1938 -- compute P = A - R * R, which can be +ve or -ve
1939 -- we arranged for B to be put into A earlier
1942 msel_add <= MULADD_A;
1945 f_to_multiply.valid <= r.first;
1946 if multiply_to_f.valid = '1' then
1952 -- compute P = P * Y
1953 -- since Y is an estimate of 1/sqrt(B), this makes P an
1954 -- estimate of the adjustment needed to R. Since the error
1955 -- could be negative and we have an unsigned multiplier, the
1956 -- upper bits can be wrong, but it turns out the lowest 8 bits
1957 -- are correct and are all we need (given 3 iterations through
1958 -- SQRT_4 to SQRT_7).
1962 f_to_multiply.valid <= r.first;
1963 if multiply_to_f.valid = '1' then
1968 -- Add the bottom 8 bits of P, sign-extended,
1969 -- divided by 4, onto R.
1970 -- The division by 4 is because R is 10.54 format
1971 -- whereas P is 8.56 format.
1973 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1974 v.result_exp := sqrt_exp;
1975 v.shift := to_signed(1, EXP_BITS);
1980 -- compute P = A - R * R (remainder)
1981 -- also put 2 * R + 1 into B for comparison with P
1984 msel_add <= MULADD_A;
1986 f_to_multiply.valid <= r.first;
1989 if multiply_to_f.valid = '1' then
1994 -- test if remainder is 0 or >= B = 2*R + 1
1995 if pcmpb_lt = '1' then
1996 -- square root is correct, set X if remainder non-zero
1997 v.x := r.p(58) or px_nz;
1999 -- square root needs to be incremented by 1
2001 v.x := not pcmpb_eq;
2006 -- r.shift = b.exponent - 52
2007 opsel_r <= RES_SHIFT;
2009 v.state := INT_ROUND;
2010 v.shift := to_signed(-2, EXP_BITS);
2014 opsel_r <= RES_SHIFT;
2015 round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
2016 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2017 -- Check for negative values that don't round to 0 for fcti*u*
2018 if r.insn(8) = '1' and r.result_sign = '1' and
2019 (r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then
2020 v.state := INT_OFLOW;
2022 v.state := INT_FINAL;
2026 -- r.shift = b.exponent - 54;
2027 opsel_r <= RES_SHIFT;
2028 v.state := INT_FINAL;
2031 -- Negate if necessary, and increment for rounding if needed
2032 opsel_ainv <= r.result_sign;
2033 carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign;
2034 -- Check for possible overflows
2035 case r.insn(9 downto 8) is
2036 when "00" => -- fctiw[z]
2037 need_check := r.r(31) or (r.r(30) and not r.result_sign);
2038 when "01" => -- fctiwu[z]
2039 need_check := r.r(31);
2040 when "10" => -- fctid[z]
2041 need_check := r.r(63) or (r.r(62) and not r.result_sign);
2042 when others => -- fctidu[z]
2043 need_check := r.r(63);
2045 if need_check = '1' then
2046 v.state := INT_CHECK;
2048 if r.fpscr(FPSCR_FI) = '1' then
2049 v.fpscr(FPSCR_XX) := '1';
2055 if r.insn(9) = '0' then
2060 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2061 if (r.insn(8) = '0' and msb /= r.result_sign) or
2062 (r.insn(8) = '1' and msb /= '1') then
2063 opsel_r <= RES_MISC;
2064 v.fpscr(FPSCR_VXCVI) := '1';
2067 if r.fpscr(FPSCR_FI) = '1' then
2068 v.fpscr(FPSCR_XX) := '1';
2074 opsel_r <= RES_MISC;
2075 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2076 if r.b.class = NAN then
2079 v.fpscr(FPSCR_VXCVI) := '1';
2084 -- r.shift = b.exponent - 52
2085 opsel_r <= RES_SHIFT;
2087 v.shift := to_signed(-2, EXP_BITS);
2088 v.state := ROUNDING;
2091 if r.is_multiply = '1' and px_nz = '1' then
2094 if r.r(63 downto 54) /= "0000000001" then
2096 v.state := NORMALIZE;
2099 if exp_tiny = '1' then
2100 v.shift := new_exp - min_exp;
2101 v.state := ROUND_UFLOW;
2102 elsif exp_huge = '1' then
2103 v.state := ROUND_OFLOW;
2105 v.shift := to_signed(-2, EXP_BITS);
2106 v.state := ROUNDING;
2111 -- Shift so we have 9 leading zeroes (we know R is non-zero)
2112 -- r.shift = clz(r.r) - 9
2113 opsel_r <= RES_SHIFT;
2115 if exp_tiny = '1' then
2116 v.shift := new_exp - min_exp;
2117 v.state := ROUND_UFLOW;
2118 elsif exp_huge = '1' then
2119 v.state := ROUND_OFLOW;
2121 v.shift := to_signed(-2, EXP_BITS);
2122 v.state := ROUNDING;
2126 -- r.shift = - amount by which exponent underflows
2128 if r.fpscr(FPSCR_UE) = '0' then
2129 -- disabled underflow exception case
2130 -- have to denormalize before rounding
2131 opsel_r <= RES_SHIFT;
2133 v.shift := to_signed(-2, EXP_BITS);
2134 v.state := ROUNDING;
2136 -- enabled underflow exception case
2137 -- if denormalized, have to normalize before rounding
2138 v.fpscr(FPSCR_UX) := '1';
2139 v.result_exp := r.result_exp + bias_exp;
2140 if r.r(54) = '0' then
2142 v.state := NORMALIZE;
2144 v.shift := to_signed(-2, EXP_BITS);
2145 v.state := ROUNDING;
2150 v.fpscr(FPSCR_OX) := '1';
2151 if r.fpscr(FPSCR_OE) = '0' then
2152 -- disabled overflow exception
2153 -- result depends on rounding mode
2154 v.fpscr(FPSCR_XX) := '1';
2155 v.fpscr(FPSCR_FI) := '1';
2156 if r.round_mode(1 downto 0) = "00" or
2157 (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then
2158 v.result_class := INFINITY;
2159 v.fpscr(FPSCR_FR) := '1';
2161 v.fpscr(FPSCR_FR) := '0';
2163 -- construct largest representable number
2164 v.result_exp := max_exp;
2165 opsel_r <= RES_MISC;
2166 misc_sel <= "001" & r.single_prec;
2169 -- enabled overflow exception
2170 v.result_exp := r.result_exp - bias_exp;
2171 v.shift := to_signed(-2, EXP_BITS);
2172 v.state := ROUNDING;
2177 round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
2178 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2179 if round(1) = '1' then
2180 -- set mask to increment the LSB for the precision
2181 opsel_b <= BIN_MASK;
2183 v.shift := to_signed(-1, EXP_BITS);
2184 v.state := ROUNDING_2;
2186 if r.r(54) = '0' then
2187 -- result after masking could be zero, or could be a
2188 -- denormalized result that needs to be renormalized
2190 v.state := ROUNDING_3;
2195 if round(0) = '1' then
2196 v.fpscr(FPSCR_XX) := '1';
2197 if r.tiny = '1' then
2198 v.fpscr(FPSCR_UX) := '1';
2203 -- Check for overflow during rounding
2206 if r.r(55) = '1' then
2207 opsel_r <= RES_SHIFT;
2208 if exp_huge = '1' then
2209 v.state := ROUND_OFLOW;
2213 elsif r.r(54) = '0' then
2214 -- Do CLZ so we can renormalize the result
2216 v.state := ROUNDING_3;
2222 -- r.shift = clz(r.r) - 9
2223 mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
2224 if mant_nz = '0' then
2225 v.result_class := ZERO;
2226 if r.is_subtract = '1' then
2227 -- set result sign depending on rounding mode
2228 v.result_sign := r.round_mode(1) and r.round_mode(0);
2232 -- Renormalize result after rounding
2233 opsel_r <= RES_SHIFT;
2234 v.denorm := exp_tiny;
2235 v.shift := new_exp - to_signed(-1022, EXP_BITS);
2236 if new_exp < to_signed(-1022, EXP_BITS) then
2244 -- r.shift = result_exp - -1022
2245 opsel_r <= RES_SHIFT;
2249 if (r.use_a = '1' and r.a.class = NAN and r.a.mantissa(53) = '0') or
2250 (r.use_b = '1' and r.b.class = NAN and r.b.mantissa(53) = '0') or
2251 (r.use_c = '1' and r.c.class = NAN and r.c.mantissa(53) = '0') then
2253 v.fpscr(FPSCR_VXSNAN) := '1';
2256 if r.use_a = '1' and r.a.class = NAN then
2258 elsif r.use_b = '1' and r.b.class = NAN then
2260 elsif r.use_c = '1' and r.c.class = NAN then
2263 v.state := EXC_RESULT;
2266 -- r.opsel_a = AIN_A, AIN_B or AIN_C according to which input is the result
2269 v.result_sign := r.b.negative xor r.negate;
2270 v.result_exp := r.b.exponent;
2271 v.result_class := r.b.class;
2273 v.result_sign := r.c.negative xor r.negate;
2274 v.result_exp := r.c.exponent;
2275 v.result_class := r.c.class;
2277 v.result_sign := r.a.negative xor r.negate;
2278 v.result_exp := r.a.exponent;
2279 v.result_class := r.a.class;
2285 if zero_divide = '1' then
2286 v.fpscr(FPSCR_ZX) := '1';
2288 if qnan_result = '1' then
2290 v.result_class := NAN;
2291 v.result_sign := '0';
2293 opsel_r <= RES_MISC;
2296 if invalid = '1' then
2299 if arith_done = '1' then
2300 -- Enabled invalid exception doesn't write result or FPRF
2301 -- Neither does enabled zero-divide exception
2302 if (v.invalid and r.fpscr(FPSCR_VE)) = '0' and
2303 (zero_divide and r.fpscr(FPSCR_ZE)) = '0' then
2304 v.writing_back := '1';
2305 v.update_fprf := '1';
2307 v.instr_done := '1';
2312 -- Multiplier and divide/square root data path
2315 f_to_multiply.data1 <= r.a.mantissa(61 downto 0) & "00";
2317 f_to_multiply.data1 <= r.b.mantissa(61 downto 0) & "00";
2319 f_to_multiply.data1 <= r.y;
2321 f_to_multiply.data1 <= r.r(61 downto 0) & "00";
2325 f_to_multiply.data2 <= r.c.mantissa(61 downto 0) & "00";
2327 f_to_multiply.data2 <= x"00" & inverse_est & '0' & x"000000000";
2329 f_to_multiply.data2 <= r.p;
2331 f_to_multiply.data2 <= r.r(61 downto 0) & "00";
2333 maddend := (others => '0');
2335 when MULADD_CONST =>
2336 -- addend is 2.0 or 1.5 in 16.112 format
2337 if r.is_sqrt = '0' then
2338 maddend(113) := '1'; -- 2.0
2340 maddend(112 downto 111) := "11"; -- 1.5
2343 -- addend is A in 16.112 format
2344 maddend(121 downto 58) := r.a.mantissa;
2346 -- addend is concatenation of R and S in 16.112 format
2347 maddend := "000000" & r.r & r.s & "00";
2350 if msel_inv = '1' then
2351 f_to_multiply.addend <= not maddend;
2353 f_to_multiply.addend <= maddend;
2355 f_to_multiply.not_result <= msel_inv;
2357 v.y := f_to_multiply.data2;
2359 if multiply_to_f.valid = '1' then
2360 if pshift = '0' then
2361 v.p := multiply_to_f.result(63 downto 0);
2363 v.p := multiply_to_f.result(119 downto 56);
2368 -- This has A and B input multiplexers, an adder, a shifter,
2369 -- count-leading-zeroes logic, and a result mux.
2370 if longmask = '1' then
2371 mshift := r.shift + to_signed(-29, EXP_BITS);
2375 if mshift < to_signed(-64, EXP_BITS) then
2376 mask := (others => '1');
2377 elsif mshift >= to_signed(0, EXP_BITS) then
2378 mask := (others => '0');
2380 mask := right_mask(unsigned(mshift(5 downto 0)));
2386 in_a0 := r.a.mantissa;
2388 in_a0 := r.b.mantissa;
2390 in_a0 := r.c.mantissa;
2392 if (or (mask and in_a0)) = '1' and set_x = '1' then
2395 if opsel_ainv = '1' then
2398 if opsel_amask = '1' then
2399 in_a0 := in_a0 and not mask;
2404 in_b0 := (others => '0');
2410 -- BIN_PS6, 6 LSBs of P/4 sign-extended to 64
2411 in_b0 := std_ulogic_vector(resize(signed(r.p(7 downto 2)), 64));
2413 if opsel_binv = '1' then
2417 if r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
2418 shift_res := shifter_64(r.r & (shiftin or r.s(55)) & r.s(54 downto 0),
2419 std_ulogic_vector(r.shift(6 downto 0)));
2421 shift_res := (others => '0');
2425 result <= std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
2427 result <= shift_res;
2429 result <= multiply_to_f.result(121 downto 58);
2433 misc := x"00000000" & (r.fpscr and fpscr_mask);
2435 -- generated QNaN mantissa
2436 misc := x"0020000000000000";
2438 -- mantissa of max representable DP number
2439 misc := x"007ffffffffffffc";
2441 -- mantissa of max representable SP number
2442 misc := x"007fffff80000000";
2445 misc := r.a.mantissa(31 downto 0) & r.b.mantissa(31 downto 0);
2448 misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32);
2450 misc := 10x"000" & inverse_est & 35x"000000000";
2452 -- max positive result for fctiw[z]
2453 misc := x"000000007fffffff";
2455 -- max negative result for fctiw[z]
2456 misc := x"ffffffff80000000";
2458 -- max positive result for fctiwu[z]
2459 misc := x"00000000ffffffff";
2461 -- max negative result for fctiwu[z]
2462 misc := x"0000000000000000";
2464 -- max positive result for fctid[z]
2465 misc := x"7fffffffffffffff";
2467 -- max negative result for fctid[z]
2468 misc := x"8000000000000000";
2470 -- max positive result for fctidu[z]
2471 misc := x"ffffffffffffffff";
2473 -- max negative result for fctidu[z]
2474 misc := x"0000000000000000";
2476 misc := x"0000000000000000";
2484 v.s := std_ulogic_vector(unsigned(not r.s) + (not r.x));
2486 v.s := multiply_to_f.result(57 downto 2);
2488 v.s := shift_res(63 downto 8);
2489 if shift_res(7 downto 0) /= x"00" then
2493 v.s := (others => '0');
2498 v.a.exponent := new_exp;
2499 v.a.mantissa := shift_res;
2502 v.b.exponent := new_exp;
2503 v.b.mantissa := shift_res;
2506 v.c.exponent := new_exp;
2507 v.c.mantissa := shift_res;
2510 if opsel_r = RES_SHIFT then
2511 v.result_exp := new_exp;
2514 if renormalize = '1' then
2515 clz := count_left_zeroes(r.r);
2516 if renorm_sqrt = '1' then
2517 -- make denormalized value end up with even exponent
2520 v.shift := resize(signed('0' & clz) - 9, EXP_BITS);
2523 if r.int_result = '1' then
2526 fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r,
2527 r.single_prec, r.quieten_nan);
2529 if r.update_fprf = '1' then
2530 v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class,
2531 r.r(54) and not r.denorm);
2534 v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
2535 (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
2536 v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
2537 v.fpscr(FPSCR_VE downto FPSCR_XE));
2538 if update_fx = '1' and
2539 (v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then
2540 v.fpscr(FPSCR_FX) := '1';
2543 v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
2546 if illegal = '1' then
2547 v.instr_done := '0';
2549 v.writing_back := '0';
2553 v.do_intr := v.instr_done and v.fpscr(FPSCR_FEX) and r.fe_mode;
2554 if v.state /= IDLE or v.do_intr = '1' then
2560 e_out.illegal <= illegal;
2563 end architecture behaviour;