1 -- Floating-point unit for Microwatt
4 use ieee.std_logic_1164.all;
5 use ieee.numeric_std.all;
8 use work.insn_helpers.all;
9 use work.decode_types.all;
10 use work.crhelpers.all;
19 e_in : in Execute1toFPUType;
20 e_out : out FPUToExecute1Type;
22 w_out : out FPUToWritebackType
26 architecture behaviour of fpu is
27 type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
29 constant EXP_BITS : natural := 13;
31 type fpu_reg_type is record
32 class : fp_number_class;
33 negative : std_ulogic;
34 exponent : signed(EXP_BITS-1 downto 0); -- unbiased
35 mantissa : std_ulogic_vector(63 downto 0); -- 10.54 format
38 type state_t is (IDLE,
39 DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
40 DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT,
43 DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD,
47 ADD_1, ADD_SHIFT, ADD_2, ADD_3,
50 FMADD_1, FMADD_2, FMADD_3,
51 FMADD_4, FMADD_5, FMADD_6,
53 DIV_2, DIV_3, DIV_4, DIV_5, DIV_6,
57 SQRT_1, SQRT_2, SQRT_3, SQRT_4,
58 SQRT_5, SQRT_6, SQRT_7, SQRT_8,
59 SQRT_9, SQRT_10, SQRT_11, SQRT_12,
60 INT_SHIFT, INT_ROUND, INT_ISHIFT,
61 INT_FINAL, INT_CHECK, INT_OFLOW,
63 ROUND_UFLOW, ROUND_OFLOW,
64 ROUNDING, ROUNDING_2, ROUNDING_3,
69 NAN_RESULT, EXC_RESULT);
71 type reg_type is record
74 instr_done : std_ulogic;
77 insn : std_ulogic_vector(31 downto 0);
78 dest_fpr : gspr_index_t;
82 single_prec : std_ulogic;
83 fpscr : std_ulogic_vector(31 downto 0);
87 r : std_ulogic_vector(63 downto 0); -- 10.54 format
88 s : std_ulogic_vector(55 downto 0); -- extended fraction
90 p : std_ulogic_vector(63 downto 0); -- 8.56 format
91 y : std_ulogic_vector(63 downto 0); -- 8.56 format
92 result_sign : std_ulogic;
93 result_class : fp_number_class;
94 result_exp : signed(EXP_BITS-1 downto 0);
95 shift : signed(EXP_BITS-1 downto 0);
96 writing_back : std_ulogic;
97 int_result : std_ulogic;
98 cr_result : std_ulogic_vector(3 downto 0);
99 cr_mask : std_ulogic_vector(7 downto 0);
100 old_exc : std_ulogic_vector(4 downto 0);
101 update_fprf : std_ulogic;
102 quieten_nan : std_ulogic;
105 round_mode : std_ulogic_vector(2 downto 0);
106 is_subtract : std_ulogic;
107 exp_cmp : std_ulogic;
108 madd_cmp : std_ulogic;
109 add_bsmall : std_ulogic;
110 is_multiply : std_ulogic;
111 is_sqrt : std_ulogic;
113 count : unsigned(1 downto 0);
114 doing_ftdiv : std_ulogic_vector(1 downto 0);
115 opsel_a : std_ulogic_vector(1 downto 0);
119 invalid : std_ulogic;
121 longmask : std_ulogic;
124 type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
126 signal r, rin : reg_type;
128 signal fp_result : std_ulogic_vector(63 downto 0);
129 signal opsel_b : std_ulogic_vector(1 downto 0);
130 signal opsel_r : std_ulogic_vector(1 downto 0);
131 signal opsel_s : std_ulogic_vector(1 downto 0);
132 signal opsel_ainv : std_ulogic;
133 signal opsel_mask : std_ulogic;
134 signal opsel_binv : std_ulogic;
135 signal in_a : std_ulogic_vector(63 downto 0);
136 signal in_b : std_ulogic_vector(63 downto 0);
137 signal result : std_ulogic_vector(63 downto 0);
138 signal carry_in : std_ulogic;
139 signal lost_bits : std_ulogic;
140 signal r_hi_nz : std_ulogic;
141 signal r_lo_nz : std_ulogic;
142 signal s_nz : std_ulogic;
143 signal misc_sel : std_ulogic_vector(3 downto 0);
144 signal f_to_multiply : MultiplyInputType;
145 signal multiply_to_f : MultiplyOutputType;
146 signal msel_1 : std_ulogic_vector(1 downto 0);
147 signal msel_2 : std_ulogic_vector(1 downto 0);
148 signal msel_add : std_ulogic_vector(1 downto 0);
149 signal msel_inv : std_ulogic;
150 signal inverse_est : std_ulogic_vector(18 downto 0);
153 constant AIN_R : std_ulogic_vector(1 downto 0) := "00";
154 constant AIN_A : std_ulogic_vector(1 downto 0) := "01";
155 constant AIN_B : std_ulogic_vector(1 downto 0) := "10";
156 constant AIN_C : std_ulogic_vector(1 downto 0) := "11";
158 constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
159 constant BIN_R : std_ulogic_vector(1 downto 0) := "01";
160 constant BIN_MASK : std_ulogic_vector(1 downto 0) := "10";
161 constant BIN_PS6 : std_ulogic_vector(1 downto 0) := "11";
163 constant RES_SUM : std_ulogic_vector(1 downto 0) := "00";
164 constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
165 constant RES_MULT : std_ulogic_vector(1 downto 0) := "10";
166 constant RES_MISC : std_ulogic_vector(1 downto 0) := "11";
168 constant S_ZERO : std_ulogic_vector(1 downto 0) := "00";
169 constant S_NEG : std_ulogic_vector(1 downto 0) := "01";
170 constant S_SHIFT : std_ulogic_vector(1 downto 0) := "10";
171 constant S_MULT : std_ulogic_vector(1 downto 0) := "11";
174 constant MUL1_A : std_ulogic_vector(1 downto 0) := "00";
175 constant MUL1_B : std_ulogic_vector(1 downto 0) := "01";
176 constant MUL1_Y : std_ulogic_vector(1 downto 0) := "10";
177 constant MUL1_R : std_ulogic_vector(1 downto 0) := "11";
179 constant MUL2_C : std_ulogic_vector(1 downto 0) := "00";
180 constant MUL2_LUT : std_ulogic_vector(1 downto 0) := "01";
181 constant MUL2_P : std_ulogic_vector(1 downto 0) := "10";
182 constant MUL2_R : std_ulogic_vector(1 downto 0) := "11";
184 constant MULADD_ZERO : std_ulogic_vector(1 downto 0) := "00";
185 constant MULADD_CONST : std_ulogic_vector(1 downto 0) := "01";
186 constant MULADD_A : std_ulogic_vector(1 downto 0) := "10";
187 constant MULADD_RS : std_ulogic_vector(1 downto 0) := "11";
189 -- Inverse lookup table, indexed by the top 8 fraction bits
190 -- The first 256 entries are the reciprocal (1/x) lookup table,
191 -- and the remaining 768 entries are the reciprocal square root table.
192 -- Output range is [0.5, 1) in 0.19 format, though the top
193 -- bit isn't stored since it is always 1.
194 -- Each output value is the inverse of the center of the input
195 -- range for the value, i.e. entry 0 is 1 / (1 + 1/512),
196 -- entry 1 is 1 / (1 + 3/512), etc.
197 signal inverse_table : lookup_table := (
199 -- Unit bit is assumed to be 1, so input range is [1, 2)
200 18x"3fc01", 18x"3f411", 18x"3ec31", 18x"3e460", 18x"3dc9f", 18x"3d4ec", 18x"3cd49", 18x"3c5b5",
201 18x"3be2f", 18x"3b6b8", 18x"3af4f", 18x"3a7f4", 18x"3a0a7", 18x"39968", 18x"39237", 18x"38b14",
202 18x"383fe", 18x"37cf5", 18x"375f9", 18x"36f0a", 18x"36828", 18x"36153", 18x"35a8a", 18x"353ce",
203 18x"34d1e", 18x"3467a", 18x"33fe3", 18x"33957", 18x"332d7", 18x"32c62", 18x"325f9", 18x"31f9c",
204 18x"3194a", 18x"31303", 18x"30cc7", 18x"30696", 18x"30070", 18x"2fa54", 18x"2f443", 18x"2ee3d",
205 18x"2e841", 18x"2e250", 18x"2dc68", 18x"2d68b", 18x"2d0b8", 18x"2caee", 18x"2c52e", 18x"2bf79",
206 18x"2b9cc", 18x"2b429", 18x"2ae90", 18x"2a900", 18x"2a379", 18x"29dfb", 18x"29887", 18x"2931b",
207 18x"28db8", 18x"2885e", 18x"2830d", 18x"27dc4", 18x"27884", 18x"2734d", 18x"26e1d", 18x"268f6",
208 18x"263d8", 18x"25ec1", 18x"259b3", 18x"254ac", 18x"24fad", 18x"24ab7", 18x"245c8", 18x"240e1",
209 18x"23c01", 18x"23729", 18x"23259", 18x"22d90", 18x"228ce", 18x"22413", 18x"21f60", 18x"21ab4",
210 18x"2160f", 18x"21172", 18x"20cdb", 18x"2084b", 18x"203c2", 18x"1ff40", 18x"1fac4", 18x"1f64f",
211 18x"1f1e1", 18x"1ed79", 18x"1e918", 18x"1e4be", 18x"1e069", 18x"1dc1b", 18x"1d7d4", 18x"1d392",
212 18x"1cf57", 18x"1cb22", 18x"1c6f3", 18x"1c2ca", 18x"1bea7", 18x"1ba8a", 18x"1b672", 18x"1b261",
213 18x"1ae55", 18x"1aa50", 18x"1a64f", 18x"1a255", 18x"19e60", 18x"19a70", 18x"19686", 18x"192a2",
214 18x"18ec3", 18x"18ae9", 18x"18715", 18x"18345", 18x"17f7c", 18x"17bb7", 18x"177f7", 18x"1743d",
215 18x"17087", 18x"16cd7", 18x"1692c", 18x"16585", 18x"161e4", 18x"15e47", 18x"15ab0", 18x"1571d",
216 18x"1538e", 18x"15005", 18x"14c80", 18x"14900", 18x"14584", 18x"1420d", 18x"13e9b", 18x"13b2d",
217 18x"137c3", 18x"1345e", 18x"130fe", 18x"12da2", 18x"12a4a", 18x"126f6", 18x"123a7", 18x"1205c",
218 18x"11d15", 18x"119d2", 18x"11694", 18x"11359", 18x"11023", 18x"10cf1", 18x"109c2", 18x"10698",
219 18x"10372", 18x"10050", 18x"0fd31", 18x"0fa17", 18x"0f700", 18x"0f3ed", 18x"0f0de", 18x"0edd3",
220 18x"0eacb", 18x"0e7c7", 18x"0e4c7", 18x"0e1ca", 18x"0ded2", 18x"0dbdc", 18x"0d8eb", 18x"0d5fc",
221 18x"0d312", 18x"0d02b", 18x"0cd47", 18x"0ca67", 18x"0c78a", 18x"0c4b1", 18x"0c1db", 18x"0bf09",
222 18x"0bc3a", 18x"0b96e", 18x"0b6a5", 18x"0b3e0", 18x"0b11e", 18x"0ae5f", 18x"0aba3", 18x"0a8eb",
223 18x"0a636", 18x"0a383", 18x"0a0d4", 18x"09e28", 18x"09b80", 18x"098da", 18x"09637", 18x"09397",
224 18x"090fb", 18x"08e61", 18x"08bca", 18x"08936", 18x"086a5", 18x"08417", 18x"0818c", 18x"07f04",
225 18x"07c7e", 18x"079fc", 18x"0777c", 18x"074ff", 18x"07284", 18x"0700d", 18x"06d98", 18x"06b26",
226 18x"068b6", 18x"0664a", 18x"063e0", 18x"06178", 18x"05f13", 18x"05cb1", 18x"05a52", 18x"057f5",
227 18x"0559a", 18x"05342", 18x"050ed", 18x"04e9a", 18x"04c4a", 18x"049fc", 18x"047b0", 18x"04567",
228 18x"04321", 18x"040dd", 18x"03e9b", 18x"03c5c", 18x"03a1f", 18x"037e4", 18x"035ac", 18x"03376",
229 18x"03142", 18x"02f11", 18x"02ce2", 18x"02ab5", 18x"0288b", 18x"02663", 18x"0243d", 18x"02219",
230 18x"01ff7", 18x"01dd8", 18x"01bbb", 18x"019a0", 18x"01787", 18x"01570", 18x"0135b", 18x"01149",
231 18x"00f39", 18x"00d2a", 18x"00b1e", 18x"00914", 18x"0070c", 18x"00506", 18x"00302", 18x"00100",
232 -- 1/sqrt(x) lookup table
233 -- Input is in the range [1, 4), i.e. two bits to the left of the
234 -- binary point. Those 2 bits index the following 3 blocks of 256 values.
236 18x"3fe00", 18x"3fa06", 18x"3f612", 18x"3f224", 18x"3ee3a", 18x"3ea58", 18x"3e67c", 18x"3e2a4",
237 18x"3ded2", 18x"3db06", 18x"3d73e", 18x"3d37e", 18x"3cfc2", 18x"3cc0a", 18x"3c85a", 18x"3c4ae",
238 18x"3c106", 18x"3bd64", 18x"3b9c8", 18x"3b630", 18x"3b29e", 18x"3af10", 18x"3ab86", 18x"3a802",
239 18x"3a484", 18x"3a108", 18x"39d94", 18x"39a22", 18x"396b6", 18x"3934e", 18x"38fea", 18x"38c8c",
240 18x"38932", 18x"385dc", 18x"3828a", 18x"37f3e", 18x"37bf6", 18x"378b2", 18x"37572", 18x"37236",
241 18x"36efe", 18x"36bca", 18x"3689a", 18x"36570", 18x"36248", 18x"35f26", 18x"35c06", 18x"358ea",
242 18x"355d4", 18x"352c0", 18x"34fb0", 18x"34ca4", 18x"3499c", 18x"34698", 18x"34398", 18x"3409c",
243 18x"33da2", 18x"33aac", 18x"337bc", 18x"334cc", 18x"331e2", 18x"32efc", 18x"32c18", 18x"32938",
244 18x"3265a", 18x"32382", 18x"320ac", 18x"31dd8", 18x"31b0a", 18x"3183e", 18x"31576", 18x"312b0",
245 18x"30fee", 18x"30d2e", 18x"30a74", 18x"307ba", 18x"30506", 18x"30254", 18x"2ffa4", 18x"2fcf8",
246 18x"2fa4e", 18x"2f7a8", 18x"2f506", 18x"2f266", 18x"2efca", 18x"2ed2e", 18x"2ea98", 18x"2e804",
247 18x"2e572", 18x"2e2e4", 18x"2e058", 18x"2ddce", 18x"2db48", 18x"2d8c6", 18x"2d646", 18x"2d3c8",
248 18x"2d14c", 18x"2ced4", 18x"2cc5e", 18x"2c9ea", 18x"2c77a", 18x"2c50c", 18x"2c2a2", 18x"2c038",
249 18x"2bdd2", 18x"2bb70", 18x"2b90e", 18x"2b6b0", 18x"2b454", 18x"2b1fa", 18x"2afa4", 18x"2ad4e",
250 18x"2aafc", 18x"2a8ac", 18x"2a660", 18x"2a414", 18x"2a1cc", 18x"29f86", 18x"29d42", 18x"29b00",
251 18x"298c2", 18x"29684", 18x"2944a", 18x"29210", 18x"28fda", 18x"28da6", 18x"28b74", 18x"28946",
252 18x"28718", 18x"284ec", 18x"282c4", 18x"2809c", 18x"27e78", 18x"27c56", 18x"27a34", 18x"27816",
253 18x"275fa", 18x"273e0", 18x"271c8", 18x"26fb0", 18x"26d9c", 18x"26b8a", 18x"2697a", 18x"2676c",
254 18x"26560", 18x"26356", 18x"2614c", 18x"25f46", 18x"25d42", 18x"25b40", 18x"2593e", 18x"25740",
255 18x"25542", 18x"25348", 18x"2514e", 18x"24f58", 18x"24d62", 18x"24b6e", 18x"2497c", 18x"2478c",
256 18x"2459e", 18x"243b0", 18x"241c6", 18x"23fde", 18x"23df6", 18x"23c10", 18x"23a2c", 18x"2384a",
257 18x"2366a", 18x"2348c", 18x"232ae", 18x"230d2", 18x"22efa", 18x"22d20", 18x"22b4a", 18x"22976",
258 18x"227a2", 18x"225d2", 18x"22402", 18x"22234", 18x"22066", 18x"21e9c", 18x"21cd2", 18x"21b0a",
259 18x"21944", 18x"2177e", 18x"215ba", 18x"213fa", 18x"21238", 18x"2107a", 18x"20ebc", 18x"20d00",
260 18x"20b46", 18x"2098e", 18x"207d6", 18x"20620", 18x"2046c", 18x"202b8", 18x"20108", 18x"1ff58",
261 18x"1fda8", 18x"1fbfc", 18x"1fa50", 18x"1f8a4", 18x"1f6fc", 18x"1f554", 18x"1f3ae", 18x"1f208",
262 18x"1f064", 18x"1eec2", 18x"1ed22", 18x"1eb82", 18x"1e9e4", 18x"1e846", 18x"1e6aa", 18x"1e510",
263 18x"1e378", 18x"1e1e0", 18x"1e04a", 18x"1deb4", 18x"1dd20", 18x"1db8e", 18x"1d9fc", 18x"1d86c",
264 18x"1d6de", 18x"1d550", 18x"1d3c4", 18x"1d238", 18x"1d0ae", 18x"1cf26", 18x"1cd9e", 18x"1cc18",
265 18x"1ca94", 18x"1c910", 18x"1c78c", 18x"1c60a", 18x"1c48a", 18x"1c30c", 18x"1c18e", 18x"1c010",
266 18x"1be94", 18x"1bd1a", 18x"1bba0", 18x"1ba28", 18x"1b8b2", 18x"1b73c", 18x"1b5c6", 18x"1b452",
267 18x"1b2e0", 18x"1b16e", 18x"1affe", 18x"1ae8e", 18x"1ad20", 18x"1abb4", 18x"1aa46", 18x"1a8dc",
269 18x"1a772", 18x"1a608", 18x"1a4a0", 18x"1a33a", 18x"1a1d4", 18x"1a070", 18x"19f0c", 18x"19da8",
270 18x"19c48", 18x"19ae6", 18x"19986", 18x"19828", 18x"196ca", 18x"1956e", 18x"19412", 18x"192b8",
271 18x"1915e", 18x"19004", 18x"18eae", 18x"18d56", 18x"18c00", 18x"18aac", 18x"18958", 18x"18804",
272 18x"186b2", 18x"18562", 18x"18412", 18x"182c2", 18x"18174", 18x"18026", 18x"17eda", 18x"17d8e",
273 18x"17c44", 18x"17afa", 18x"179b2", 18x"1786a", 18x"17724", 18x"175de", 18x"17498", 18x"17354",
274 18x"17210", 18x"170ce", 18x"16f8c", 18x"16e4c", 18x"16d0c", 18x"16bcc", 18x"16a8e", 18x"16950",
275 18x"16814", 18x"166d8", 18x"1659e", 18x"16464", 18x"1632a", 18x"161f2", 18x"160ba", 18x"15f84",
276 18x"15e4e", 18x"15d1a", 18x"15be6", 18x"15ab2", 18x"15980", 18x"1584e", 18x"1571c", 18x"155ec",
277 18x"154bc", 18x"1538e", 18x"15260", 18x"15134", 18x"15006", 18x"14edc", 18x"14db0", 18x"14c86",
278 18x"14b5e", 18x"14a36", 18x"1490e", 18x"147e6", 18x"146c0", 18x"1459a", 18x"14476", 18x"14352",
279 18x"14230", 18x"1410c", 18x"13fea", 18x"13eca", 18x"13daa", 18x"13c8a", 18x"13b6c", 18x"13a4e",
280 18x"13930", 18x"13814", 18x"136f8", 18x"135dc", 18x"134c2", 18x"133a8", 18x"1328e", 18x"13176",
281 18x"1305e", 18x"12f48", 18x"12e30", 18x"12d1a", 18x"12c06", 18x"12af2", 18x"129de", 18x"128ca",
282 18x"127b8", 18x"126a6", 18x"12596", 18x"12486", 18x"12376", 18x"12266", 18x"12158", 18x"1204a",
283 18x"11f3e", 18x"11e32", 18x"11d26", 18x"11c1a", 18x"11b10", 18x"11a06", 18x"118fc", 18x"117f4",
284 18x"116ec", 18x"115e4", 18x"114de", 18x"113d8", 18x"112d2", 18x"111ce", 18x"110ca", 18x"10fc6",
285 18x"10ec2", 18x"10dc0", 18x"10cbe", 18x"10bbc", 18x"10abc", 18x"109bc", 18x"108bc", 18x"107be",
286 18x"106c0", 18x"105c2", 18x"104c4", 18x"103c8", 18x"102cc", 18x"101d0", 18x"100d6", 18x"0ffdc",
287 18x"0fee2", 18x"0fdea", 18x"0fcf0", 18x"0fbf8", 18x"0fb02", 18x"0fa0a", 18x"0f914", 18x"0f81e",
288 18x"0f72a", 18x"0f636", 18x"0f542", 18x"0f44e", 18x"0f35a", 18x"0f268", 18x"0f176", 18x"0f086",
289 18x"0ef94", 18x"0eea4", 18x"0edb4", 18x"0ecc6", 18x"0ebd6", 18x"0eae8", 18x"0e9fa", 18x"0e90e",
290 18x"0e822", 18x"0e736", 18x"0e64a", 18x"0e55e", 18x"0e474", 18x"0e38a", 18x"0e2a0", 18x"0e1b8",
291 18x"0e0d0", 18x"0dfe8", 18x"0df00", 18x"0de1a", 18x"0dd32", 18x"0dc4c", 18x"0db68", 18x"0da82",
292 18x"0d99e", 18x"0d8ba", 18x"0d7d6", 18x"0d6f4", 18x"0d612", 18x"0d530", 18x"0d44e", 18x"0d36c",
293 18x"0d28c", 18x"0d1ac", 18x"0d0cc", 18x"0cfee", 18x"0cf0e", 18x"0ce30", 18x"0cd54", 18x"0cc76",
294 18x"0cb9a", 18x"0cabc", 18x"0c9e0", 18x"0c906", 18x"0c82a", 18x"0c750", 18x"0c676", 18x"0c59c",
295 18x"0c4c4", 18x"0c3ea", 18x"0c312", 18x"0c23a", 18x"0c164", 18x"0c08c", 18x"0bfb6", 18x"0bee0",
296 18x"0be0a", 18x"0bd36", 18x"0bc62", 18x"0bb8c", 18x"0baba", 18x"0b9e6", 18x"0b912", 18x"0b840",
297 18x"0b76e", 18x"0b69c", 18x"0b5cc", 18x"0b4fa", 18x"0b42a", 18x"0b35a", 18x"0b28a", 18x"0b1bc",
298 18x"0b0ee", 18x"0b01e", 18x"0af50", 18x"0ae84", 18x"0adb6", 18x"0acea", 18x"0ac1e", 18x"0ab52",
299 18x"0aa86", 18x"0a9bc", 18x"0a8f0", 18x"0a826", 18x"0a75c", 18x"0a694", 18x"0a5ca", 18x"0a502",
300 18x"0a43a", 18x"0a372", 18x"0a2aa", 18x"0a1e4", 18x"0a11c", 18x"0a056", 18x"09f90", 18x"09ecc",
302 18x"09e06", 18x"09d42", 18x"09c7e", 18x"09bba", 18x"09af6", 18x"09a32", 18x"09970", 18x"098ae",
303 18x"097ec", 18x"0972a", 18x"09668", 18x"095a8", 18x"094e8", 18x"09426", 18x"09368", 18x"092a8",
304 18x"091e8", 18x"0912a", 18x"0906c", 18x"08fae", 18x"08ef0", 18x"08e32", 18x"08d76", 18x"08cba",
305 18x"08bfe", 18x"08b42", 18x"08a86", 18x"089ca", 18x"08910", 18x"08856", 18x"0879c", 18x"086e2",
306 18x"08628", 18x"08570", 18x"084b6", 18x"083fe", 18x"08346", 18x"0828e", 18x"081d8", 18x"08120",
307 18x"0806a", 18x"07fb4", 18x"07efe", 18x"07e48", 18x"07d92", 18x"07cde", 18x"07c2a", 18x"07b76",
308 18x"07ac2", 18x"07a0e", 18x"0795a", 18x"078a8", 18x"077f4", 18x"07742", 18x"07690", 18x"075de",
309 18x"0752e", 18x"0747c", 18x"073cc", 18x"0731c", 18x"0726c", 18x"071bc", 18x"0710c", 18x"0705e",
310 18x"06fae", 18x"06f00", 18x"06e52", 18x"06da4", 18x"06cf6", 18x"06c4a", 18x"06b9c", 18x"06af0",
311 18x"06a44", 18x"06998", 18x"068ec", 18x"06840", 18x"06796", 18x"066ea", 18x"06640", 18x"06596",
312 18x"064ec", 18x"06442", 18x"0639a", 18x"062f0", 18x"06248", 18x"061a0", 18x"060f8", 18x"06050",
313 18x"05fa8", 18x"05f00", 18x"05e5a", 18x"05db4", 18x"05d0e", 18x"05c68", 18x"05bc2", 18x"05b1c",
314 18x"05a76", 18x"059d2", 18x"0592e", 18x"05888", 18x"057e4", 18x"05742", 18x"0569e", 18x"055fa",
315 18x"05558", 18x"054b6", 18x"05412", 18x"05370", 18x"052ce", 18x"0522e", 18x"0518c", 18x"050ec",
316 18x"0504a", 18x"04faa", 18x"04f0a", 18x"04e6a", 18x"04dca", 18x"04d2c", 18x"04c8c", 18x"04bee",
317 18x"04b50", 18x"04ab0", 18x"04a12", 18x"04976", 18x"048d8", 18x"0483a", 18x"0479e", 18x"04700",
318 18x"04664", 18x"045c8", 18x"0452c", 18x"04490", 18x"043f6", 18x"0435a", 18x"042c0", 18x"04226",
319 18x"0418a", 18x"040f0", 18x"04056", 18x"03fbe", 18x"03f24", 18x"03e8c", 18x"03df2", 18x"03d5a",
320 18x"03cc2", 18x"03c2a", 18x"03b92", 18x"03afa", 18x"03a62", 18x"039cc", 18x"03934", 18x"0389e",
321 18x"03808", 18x"03772", 18x"036dc", 18x"03646", 18x"035b2", 18x"0351c", 18x"03488", 18x"033f2",
322 18x"0335e", 18x"032ca", 18x"03236", 18x"031a2", 18x"03110", 18x"0307c", 18x"02fea", 18x"02f56",
323 18x"02ec4", 18x"02e32", 18x"02da0", 18x"02d0e", 18x"02c7c", 18x"02bec", 18x"02b5a", 18x"02aca",
324 18x"02a38", 18x"029a8", 18x"02918", 18x"02888", 18x"027f8", 18x"0276a", 18x"026da", 18x"0264a",
325 18x"025bc", 18x"0252e", 18x"024a0", 18x"02410", 18x"02384", 18x"022f6", 18x"02268", 18x"021da",
326 18x"0214e", 18x"020c0", 18x"02034", 18x"01fa8", 18x"01f1c", 18x"01e90", 18x"01e04", 18x"01d78",
327 18x"01cee", 18x"01c62", 18x"01bd8", 18x"01b4c", 18x"01ac2", 18x"01a38", 18x"019ae", 18x"01924",
328 18x"0189c", 18x"01812", 18x"01788", 18x"01700", 18x"01676", 18x"015ee", 18x"01566", 18x"014de",
329 18x"01456", 18x"013ce", 18x"01346", 18x"012c0", 18x"01238", 18x"011b2", 18x"0112c", 18x"010a4",
330 18x"0101e", 18x"00f98", 18x"00f12", 18x"00e8c", 18x"00e08", 18x"00d82", 18x"00cfe", 18x"00c78",
331 18x"00bf4", 18x"00b70", 18x"00aec", 18x"00a68", 18x"009e4", 18x"00960", 18x"008dc", 18x"00858",
332 18x"007d6", 18x"00752", 18x"006d0", 18x"0064e", 18x"005cc", 18x"0054a", 18x"004c8", 18x"00446",
333 18x"003c4", 18x"00342", 18x"002c2", 18x"00240", 18x"001c0", 18x"00140", 18x"000c0", 18x"00040"
336 -- Left and right shifter with 120 bit input and 64 bit output.
337 -- Shifts inp left by shift bits and returns the upper 64 bits of
338 -- the result. The shift parameter is interpreted as a signed
339 -- number in the range -64..63, with negative values indicating
341 function shifter_64(inp: std_ulogic_vector(119 downto 0);
342 shift: std_ulogic_vector(6 downto 0))
343 return std_ulogic_vector is
344 variable s1 : std_ulogic_vector(94 downto 0);
345 variable s2 : std_ulogic_vector(70 downto 0);
346 variable result : std_ulogic_vector(63 downto 0);
348 case shift(6 downto 5) is
350 s1 := inp(119 downto 25);
352 s1 := inp(87 downto 0) & "0000000";
354 s1 := x"0000000000000000" & inp(119 downto 89);
356 s1 := x"00000000" & inp(119 downto 57);
358 case shift(4 downto 3) is
360 s2 := s1(94 downto 24);
362 s2 := s1(86 downto 16);
364 s2 := s1(78 downto 8);
366 s2 := s1(70 downto 0);
368 case shift(2 downto 0) is
370 result := s2(70 downto 7);
372 result := s2(69 downto 6);
374 result := s2(68 downto 5);
376 result := s2(67 downto 4);
378 result := s2(66 downto 3);
380 result := s2(65 downto 2);
382 result := s2(64 downto 1);
384 result := s2(63 downto 0);
389 -- Generate a mask with 0-bits on the left and 1-bits on the right which
390 -- selects the bits will be lost in doing a right shift. The shift
391 -- parameter is the bottom 6 bits of a negative shift count,
392 -- indicating a right shift.
393 function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
394 variable result: std_ulogic_vector(63 downto 0);
396 result := (others => '0');
397 for i in 0 to 63 loop
399 result(63 - i) := '1';
405 -- Split a DP floating-point number into components and work out its class.
406 -- If is_int = 1, the input is considered an integer
407 function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_int: std_ulogic) return fpu_reg_type is
408 variable r : fpu_reg_type;
409 variable exp_nz : std_ulogic;
410 variable exp_ao : std_ulogic;
411 variable frac_nz : std_ulogic;
412 variable cls : std_ulogic_vector(2 downto 0);
414 r.negative := fpr(63);
415 exp_nz := or (fpr(62 downto 52));
416 exp_ao := and (fpr(62 downto 52));
417 frac_nz := or (fpr(51 downto 0));
419 r.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
421 r.exponent := to_signed(-1022, EXP_BITS);
423 r.mantissa := "000000000" & exp_nz & fpr(51 downto 0) & "00";
424 cls := exp_ao & exp_nz & frac_nz;
426 when "000" => r.class := ZERO;
427 when "001" => r.class := FINITE; -- denormalized
428 when "010" => r.class := FINITE;
429 when "011" => r.class := FINITE;
430 when "110" => r.class := INFINITY;
431 when others => r.class := NAN;
435 r.exponent := (others => '0');
436 if (fpr(63) or exp_nz or frac_nz) = '1' then
445 -- Construct a DP floating-point result from components
446 function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
447 mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic)
448 return std_ulogic_vector is
449 variable result : std_ulogic_vector(63 downto 0);
451 result := (others => '0');
456 if mantissa(54) = '1' then
458 result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
460 result(51 downto 29) := mantissa(53 downto 31);
461 if single_prec = '0' then
462 result(28 downto 0) := mantissa(30 downto 2);
465 result(62 downto 52) := "11111111111";
467 result(62 downto 52) := "11111111111";
468 result(51) := quieten_nan or mantissa(53);
469 result(50 downto 29) := mantissa(52 downto 31);
470 if single_prec = '0' then
471 result(28 downto 0) := mantissa(30 downto 2);
477 -- Determine whether to increment when rounding
478 -- Returns rounding_inc & inexact
479 -- Assumes x includes the bottom 29 bits of the mantissa already
480 -- if single_prec = 1 (usually arranged by setting set_x = 1 earlier).
481 function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
482 single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
484 return std_ulogic_vector is
485 variable grx : std_ulogic_vector(2 downto 0);
486 variable ret : std_ulogic_vector(1 downto 0);
487 variable lsb : std_ulogic;
489 if single_prec = '0' then
490 grx := mantissa(1 downto 0) & x;
493 grx := mantissa(30 downto 29) & x;
498 case rn(1 downto 0) is
499 when "00" => -- round to nearest
500 if grx = "100" and rn(2) = '0' then
501 ret(1) := lsb; -- tie, round to even
505 when "01" => -- round towards zero
506 when others => -- round towards +/- inf
508 -- round towards greater magnitude
515 -- Determine result flags to write into the FPSCR
516 function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
517 return std_ulogic_vector is
521 return sign & "0010";
523 return (not unitbit) & sign & (not sign) & "00";
525 return '0' & sign & (not sign) & "01";
532 fpu_multiply_0: entity work.multiply
535 m_in => f_to_multiply,
536 m_out => multiply_to_f
541 if rising_edge(clk) then
547 r.fpscr <= (others => '0');
548 r.writing_back <= '0';
550 assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
556 -- synchronous reads from lookup table
557 lut_access: process(clk)
558 variable addrhi : std_ulogic_vector(1 downto 0);
559 variable addr : std_ulogic_vector(9 downto 0);
561 if rising_edge(clk) then
562 if r.is_sqrt = '1' then
563 addrhi := r.b.mantissa(55 downto 54);
567 addr := addrhi & r.b.mantissa(53 downto 46);
568 inverse_est <= '1' & inverse_table(to_integer(unsigned(addr)));
572 e_out.busy <= r.busy;
573 e_out.exception <= r.fpscr(FPSCR_FEX);
574 e_out.interrupt <= r.do_intr;
576 w_out.valid <= r.instr_done and not r.do_intr;
577 w_out.write_enable <= r.writing_back;
578 w_out.write_reg <= r.dest_fpr;
579 w_out.write_data <= fp_result;
580 w_out.write_cr_enable <= r.instr_done and (r.rc or r.is_cmp);
581 w_out.write_cr_mask <= r.cr_mask;
582 w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
583 r.cr_result & r.cr_result & r.cr_result & r.cr_result;
586 variable v : reg_type;
587 variable adec : fpu_reg_type;
588 variable bdec : fpu_reg_type;
589 variable cdec : fpu_reg_type;
590 variable fpscr_mask : std_ulogic_vector(31 downto 0);
591 variable illegal : std_ulogic;
592 variable j, k : integer;
593 variable flm : std_ulogic_vector(7 downto 0);
594 variable int_input : std_ulogic;
595 variable mask : std_ulogic_vector(63 downto 0);
596 variable in_a0 : std_ulogic_vector(63 downto 0);
597 variable in_b0 : std_ulogic_vector(63 downto 0);
598 variable misc : std_ulogic_vector(63 downto 0);
599 variable shift_res : std_ulogic_vector(63 downto 0);
600 variable round : std_ulogic_vector(1 downto 0);
601 variable update_fx : std_ulogic;
602 variable arith_done : std_ulogic;
603 variable invalid : std_ulogic;
604 variable zero_divide : std_ulogic;
605 variable mant_nz : std_ulogic;
606 variable min_exp : signed(EXP_BITS-1 downto 0);
607 variable max_exp : signed(EXP_BITS-1 downto 0);
608 variable bias_exp : signed(EXP_BITS-1 downto 0);
609 variable new_exp : signed(EXP_BITS-1 downto 0);
610 variable exp_tiny : std_ulogic;
611 variable exp_huge : std_ulogic;
612 variable renormalize : std_ulogic;
613 variable clz : std_ulogic_vector(5 downto 0);
614 variable set_x : std_ulogic;
615 variable mshift : signed(EXP_BITS-1 downto 0);
616 variable need_check : std_ulogic;
617 variable msb : std_ulogic;
618 variable is_add : std_ulogic;
619 variable set_a : std_ulogic;
620 variable set_b : std_ulogic;
621 variable set_c : std_ulogic;
622 variable set_y : std_ulogic;
623 variable set_s : std_ulogic;
624 variable qnan_result : std_ulogic;
625 variable px_nz : std_ulogic;
626 variable pcmpb_eq : std_ulogic;
627 variable pcmpb_lt : std_ulogic;
628 variable pshift : std_ulogic;
629 variable renorm_sqrt : std_ulogic;
630 variable sqrt_exp : signed(EXP_BITS-1 downto 0);
631 variable shiftin : std_ulogic;
632 variable mulexp : signed(EXP_BITS-1 downto 0);
633 variable maddend : std_ulogic_vector(127 downto 0);
634 variable sum : std_ulogic_vector(63 downto 0);
641 -- capture incoming instruction
642 if e_in.valid = '1' then
645 v.fe_mode := or (e_in.fe_mode);
646 v.dest_fpr := e_in.frt;
647 v.single_prec := e_in.single;
648 v.longmask := e_in.single;
651 v.is_cmp := e_in.out_cr;
652 if e_in.out_cr = '0' then
653 v.cr_mask := num_to_fxm(1);
655 v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(e_in.insn))));
658 if e_in.op = OP_FPOP_I then
661 v.quieten_nan := '1';
664 v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
665 v.is_subtract := '0';
666 v.is_multiply := '0';
669 v.doing_ftdiv := "00";
671 adec := decode_dp(e_in.fra, int_input);
672 bdec := decode_dp(e_in.frb, int_input);
673 cdec := decode_dp(e_in.frc, int_input);
679 if adec.exponent > bdec.exponent then
683 if (adec.exponent + cdec.exponent + 1) >= bdec.exponent then
688 r_hi_nz <= or (r.r(55 downto 31));
689 r_lo_nz <= or (r.r(30 downto 2));
692 if r.single_prec = '0' then
693 if r.doing_ftdiv(1) = '0' then
694 max_exp := to_signed(1023, EXP_BITS);
696 max_exp := to_signed(1020, EXP_BITS);
698 if r.doing_ftdiv(0) = '0' then
699 min_exp := to_signed(-1022, EXP_BITS);
701 min_exp := to_signed(-1021, EXP_BITS);
703 bias_exp := to_signed(1536, EXP_BITS);
705 max_exp := to_signed(127, EXP_BITS);
706 min_exp := to_signed(-126, EXP_BITS);
707 bias_exp := to_signed(192, EXP_BITS);
709 new_exp := r.result_exp - r.shift;
712 if new_exp < min_exp then
715 if new_exp > max_exp then
719 -- Compare P with zero and with B
720 px_nz := or (r.p(57 downto 4));
722 if r.p(59 downto 4) = r.b.mantissa(55 downto 0) then
726 if unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(55 downto 0)) then
730 v.writing_back := '0';
732 v.update_fprf := '0';
733 v.shift := to_signed(0, EXP_BITS);
744 fpscr_mask := (others => '1');
756 f_to_multiply.is_32bit <= '0';
757 f_to_multiply.valid <= '0';
760 msel_add <= MULADD_ZERO;
773 if e_in.valid = '1' then
774 case e_in.insn(5 downto 1) is
776 if e_in.insn(8) = '1' then
777 if e_in.insn(6) = '0' then
780 v.state := DO_FTSQRT;
782 elsif e_in.insn(7) = '1' then
789 if e_in.insn(10) = '0' then
790 if e_in.insn(8) = '0' then
793 v.state := DO_MTFSFI;
799 if e_in.insn(8) = '0' then
806 if e_in.insn(9 downto 8) /= "11" then
816 if int_input = '1' then
823 v.round_mode := "001";
828 if v.b.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
832 when "10100" | "10101" =>
845 v.is_multiply := '1';
847 if v.c.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
854 v.state := DO_FRSQRTE;
855 when "11100" | "11101" | "11110" | "11111" =>
856 if v.a.mantissa(54) = '0' then
858 elsif v.c.mantissa(54) = '0' then
869 v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
873 j := to_integer(unsigned(insn_bfa(r.insn)));
877 v.cr_result := r.fpscr(k + 3 downto k);
878 fpscr_mask(k + 3 downto k) := "0000";
881 v.fpscr := r.fpscr and (fpscr_mask or x"6007F8FF");
888 v.cr_result := "0000";
889 if r.a.class = INFINITY or r.b.class = ZERO or r.b.class = INFINITY or
890 (r.b.class = FINITE and r.b.mantissa(53) = '0') then
891 v.cr_result(2) := '1';
893 if r.a.class = NAN or r.a.class = INFINITY or
894 r.b.class = NAN or r.b.class = ZERO or r.b.class = INFINITY or
895 (r.a.class = FINITE and r.a.exponent <= to_signed(-970, EXP_BITS)) then
896 v.cr_result(1) := '1';
898 v.doing_ftdiv := "11";
907 v.cr_result := "0000";
908 if r.b.class = ZERO or r.b.class = INFINITY or
909 (r.b.class = FINITE and r.b.mantissa(53) = '0') then
910 v.cr_result(2) := '1';
912 if r.b.class = NAN or r.b.class = INFINITY or r.b.class = ZERO
913 or r.b.negative = '1' or r.b.exponent <= to_signed(-970, EXP_BITS) then
914 v.cr_result(1) := '0';
923 v.result_exp := r.b.exponent;
924 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
925 (r.b.class = NAN and r.b.mantissa(53) = '0') then
927 v.fpscr(FPSCR_VXSNAN) := '1';
928 if r.insn(6) = '1' and r.fpscr(FPSCR_VE) = '0' then
929 v.fpscr(FPSCR_VXVC) := '1';
932 v.cr_result := "0001"; -- unordered
933 elsif r.a.class = NAN or r.b.class = NAN then
934 if r.insn(6) = '1' then
936 v.fpscr(FPSCR_VXVC) := '1';
939 v.cr_result := "0001"; -- unordered
940 elsif r.a.class = ZERO and r.b.class = ZERO then
941 v.cr_result := "0010"; -- equal
942 elsif r.a.negative /= r.b.negative then
943 v.cr_result := r.a.negative & r.b.negative & "00";
944 elsif r.a.class = ZERO then
945 -- A and B are the same sign from here down
946 v.cr_result := not r.b.negative & r.b.negative & "00";
947 elsif r.a.class = INFINITY then
948 if r.b.class = INFINITY then
949 v.cr_result := "0010";
951 v.cr_result := r.a.negative & not r.a.negative & "00";
953 elsif r.b.class = ZERO then
954 -- A is finite from here down
955 v.cr_result := r.a.negative & not r.a.negative & "00";
956 elsif r.b.class = INFINITY then
957 v.cr_result := not r.b.negative & r.b.negative & "00";
958 elsif r.exp_cmp = '1' then
959 -- A and B are both finite from here down
960 v.cr_result := r.a.negative & not r.a.negative & "00";
961 elsif r.a.exponent /= r.b.exponent then
962 -- A exponent is smaller than B
963 v.cr_result := not r.a.negative & r.a.negative & "00";
965 -- Prepare to subtract mantissas, put B in R
966 v.cr_result := "0000";
971 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
975 j := to_integer(unsigned(insn_bt(r.insn)));
976 for i in 0 to 31 loop
978 v.fpscr(31 - i) := r.insn(6);
986 j := to_integer(unsigned(insn_bf(r.insn)));
987 if r.insn(16) = '0' then
991 v.fpscr(k + 3 downto k) := insn_u(r.insn);
1000 opsel_r <= RES_MISC;
1001 misc_sel <= "01" & r.insn(8) & '0';
1002 v.int_result := '1';
1003 v.writing_back := '1';
1004 v.instr_done := '1';
1008 v.int_result := '1';
1009 v.writing_back := '1';
1010 opsel_r <= RES_MISC;
1011 case r.insn(20 downto 16) is
1016 v.fpscr(FPSCR_VE downto FPSCR_XE) := "00000";
1017 when "10100" | "10101" =>
1018 -- mffscdrn[i] (but we don't implement DRN)
1019 fpscr_mask := x"000000FF";
1022 fpscr_mask := x"000000FF";
1023 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
1024 r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
1027 fpscr_mask := x"000000FF";
1028 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) := r.insn(12 downto 11);
1031 fpscr_mask := x"0007F0FF";
1035 v.instr_done := '1';
1039 if r.insn(25) = '1' then
1041 elsif r.insn(16) = '1' then
1044 flm := r.insn(24 downto 17);
1046 for i in 0 to 7 loop
1048 if flm(i) = '1' then
1049 v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
1052 v.instr_done := '1';
1056 -- r.opsel_a = AIN_B
1057 v.result_class := r.b.class;
1058 v.result_exp := r.b.exponent;
1059 v.quieten_nan := '0';
1060 if r.insn(9) = '1' then
1061 v.result_sign := '0'; -- fabs
1062 elsif r.insn(8) = '1' then
1063 v.result_sign := '1'; -- fnabs
1064 elsif r.insn(7) = '1' then
1065 v.result_sign := r.b.negative; -- fmr
1066 elsif r.insn(6) = '1' then
1067 v.result_sign := not r.b.negative; -- fneg
1069 v.result_sign := r.a.negative; -- fcpsgn
1071 v.writing_back := '1';
1072 v.instr_done := '1';
1075 when DO_FRI => -- fri[nzpm]
1076 -- r.opsel_a = AIN_B
1077 v.result_class := r.b.class;
1078 v.result_sign := r.b.negative;
1079 v.result_exp := r.b.exponent;
1080 v.fpscr(FPSCR_FR) := '0';
1081 v.fpscr(FPSCR_FI) := '0';
1082 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1084 v.fpscr(FPSCR_VXSNAN) := '1';
1087 if r.b.class = FINITE then
1088 if r.b.exponent >= to_signed(52, EXP_BITS) then
1089 -- integer already, no rounding required
1092 v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1094 v.round_mode := '1' & r.insn(7 downto 6);
1101 -- r.opsel_a = AIN_B, r.shift = 0
1102 v.result_class := r.b.class;
1103 v.result_sign := r.b.negative;
1104 v.result_exp := r.b.exponent;
1105 v.fpscr(FPSCR_FR) := '0';
1106 v.fpscr(FPSCR_FI) := '0';
1107 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1109 v.fpscr(FPSCR_VXSNAN) := '1';
1113 if r.b.class = FINITE then
1114 if r.b.exponent < to_signed(-126, EXP_BITS) then
1115 v.shift := r.b.exponent - to_signed(-126, EXP_BITS);
1116 v.state := ROUND_UFLOW;
1117 elsif r.b.exponent > to_signed(127, EXP_BITS) then
1118 v.state := ROUND_OFLOW;
1120 v.shift := to_signed(-2, EXP_BITS);
1121 v.state := ROUNDING;
1128 -- instr bit 9: 1=dword 0=word
1129 -- instr bit 8: 1=unsigned 0=signed
1130 -- instr bit 1: 1=round to zero 0=use fpscr[RN]
1131 -- r.opsel_a = AIN_B
1132 v.result_class := r.b.class;
1133 v.result_sign := r.b.negative;
1134 v.result_exp := r.b.exponent;
1135 v.fpscr(FPSCR_FR) := '0';
1136 v.fpscr(FPSCR_FI) := '0';
1137 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1139 v.fpscr(FPSCR_VXSNAN) := '1';
1143 v.int_result := '1';
1148 if r.b.exponent >= to_signed(64, EXP_BITS) or
1149 (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
1150 v.state := INT_OFLOW;
1151 elsif r.b.exponent >= to_signed(52, EXP_BITS) then
1152 -- integer already, no rounding required,
1153 -- shift into final position
1154 v.shift := r.b.exponent - to_signed(54, EXP_BITS);
1155 if r.insn(8) = '1' and r.b.negative = '1' then
1156 v.state := INT_OFLOW;
1158 v.state := INT_ISHIFT;
1161 v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1162 v.state := INT_SHIFT;
1164 when INFINITY | NAN =>
1165 v.state := INT_OFLOW;
1169 -- r.opsel_a = AIN_B
1170 v.result_sign := '0';
1171 if r.insn(8) = '0' and r.b.negative = '1' then
1172 -- fcfid[s] with negative operand, set R = -B
1175 v.result_sign := '1';
1177 v.result_class := r.b.class;
1178 v.result_exp := to_signed(54, EXP_BITS);
1179 v.fpscr(FPSCR_FR) := '0';
1180 v.fpscr(FPSCR_FI) := '0';
1181 if r.b.class = ZERO then
1188 -- fadd[s] and fsub[s]
1189 -- r.opsel_a = AIN_A
1190 v.result_sign := r.a.negative;
1191 v.result_class := r.a.class;
1192 v.result_exp := r.a.exponent;
1193 v.fpscr(FPSCR_FR) := '0';
1194 v.fpscr(FPSCR_FI) := '0';
1197 is_add := r.a.negative xor r.b.negative xor r.insn(1);
1198 if r.a.class = FINITE and r.b.class = FINITE then
1199 v.is_subtract := not is_add;
1200 v.add_bsmall := r.exp_cmp;
1202 if r.exp_cmp = '0' then
1203 v.shift := r.a.exponent - r.b.exponent;
1204 v.result_sign := r.b.negative xnor r.insn(1);
1205 if r.a.exponent = r.b.exponent then
1209 v.state := ADD_SHIFT;
1215 if r.a.class = NAN or r.b.class = NAN then
1216 v.state := NAN_RESULT;
1217 elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then
1218 -- invalid operation, construct QNaN
1219 v.fpscr(FPSCR_VXISI) := '1';
1222 elsif r.a.class = ZERO and r.b.class = ZERO and is_add = '0' then
1223 -- return -0 for rounding to -infinity
1224 v.result_sign := r.round_mode(1) and r.round_mode(0);
1226 elsif r.a.class = INFINITY or r.b.class = ZERO then
1229 v.state := EXC_RESULT;
1233 v.negate := not r.insn(1);
1234 v.state := EXC_RESULT;
1240 -- r.opsel_a = AIN_A unless C is denorm and A isn't
1241 v.result_sign := r.a.negative xor r.c.negative;
1242 v.result_class := r.a.class;
1243 v.fpscr(FPSCR_FR) := '0';
1244 v.fpscr(FPSCR_FI) := '0';
1247 if r.a.class = FINITE and r.c.class = FINITE then
1248 v.result_exp := r.a.exponent + r.c.exponent;
1249 -- Renormalize denorm operands
1250 if r.a.mantissa(54) = '0' then
1251 v.state := RENORM_A;
1252 elsif r.c.mantissa(54) = '0' then
1253 v.state := RENORM_C;
1255 f_to_multiply.valid <= '1';
1259 if r.a.class = NAN or r.c.class = NAN then
1260 v.state := NAN_RESULT;
1261 elsif (r.a.class = INFINITY and r.c.class = ZERO) or
1262 (r.a.class = ZERO and r.c.class = INFINITY) then
1263 -- invalid operation, construct QNaN
1264 v.fpscr(FPSCR_VXIMZ) := '1';
1266 elsif r.a.class = ZERO or r.a.class = INFINITY then
1270 -- r.c.class is ZERO or INFINITY
1272 v.negate := r.a.negative;
1273 v.state := EXC_RESULT;
1278 -- r.opsel_a = AIN_A unless B is denorm and A isn't
1279 v.result_class := r.a.class;
1280 v.fpscr(FPSCR_FR) := '0';
1281 v.fpscr(FPSCR_FI) := '0';
1284 v.result_sign := r.a.negative xor r.b.negative;
1285 v.result_exp := r.a.exponent - r.b.exponent;
1287 if r.a.class = FINITE and r.b.class = FINITE then
1288 -- Renormalize denorm operands
1289 if r.a.mantissa(54) = '0' then
1290 v.state := RENORM_A;
1291 elsif r.b.mantissa(54) = '0' then
1292 v.state := RENORM_B;
1298 if r.a.class = NAN or r.b.class = NAN then
1299 v.state := NAN_RESULT;
1300 elsif r.b.class = INFINITY then
1301 if r.a.class = INFINITY then
1302 v.fpscr(FPSCR_VXIDI) := '1';
1305 v.result_class := ZERO;
1308 elsif r.b.class = ZERO then
1309 if r.a.class = ZERO then
1310 v.fpscr(FPSCR_VXZDZ) := '1';
1313 if r.a.class = FINITE then
1316 v.result_class := INFINITY;
1319 else -- r.b.class = FINITE, result_class = r.a.class
1325 v.fpscr(FPSCR_FR) := '0';
1326 v.fpscr(FPSCR_FI) := '0';
1327 if r.a.class = ZERO or (r.a.negative = '0' and r.a.class /= NAN) then
1332 v.quieten_nan := '0';
1333 v.state := EXC_RESULT;
1336 -- r.opsel_a = AIN_B
1337 v.result_class := r.b.class;
1338 v.result_sign := r.b.negative;
1339 v.fpscr(FPSCR_FR) := '0';
1340 v.fpscr(FPSCR_FI) := '0';
1344 v.result_exp := r.b.exponent;
1345 if r.b.negative = '1' then
1346 v.fpscr(FPSCR_VXSQRT) := '1';
1348 elsif r.b.mantissa(54) = '0' then
1349 v.state := RENORM_B;
1350 elsif r.b.exponent(0) = '0' then
1353 v.shift := to_signed(1, EXP_BITS);
1354 v.state := RENORM_B2;
1357 v.state := NAN_RESULT;
1362 if r.b.negative = '1' then
1363 v.fpscr(FPSCR_VXSQRT) := '1';
1371 -- r.opsel_a = AIN_B
1372 v.result_class := r.b.class;
1373 v.result_sign := r.b.negative;
1374 v.fpscr(FPSCR_FR) := '0';
1375 v.fpscr(FPSCR_FI) := '0';
1379 v.result_exp := - r.b.exponent;
1380 if r.b.mantissa(54) = '0' then
1381 v.state := RENORM_B;
1386 v.state := NAN_RESULT;
1388 v.result_class := ZERO;
1391 v.result_class := INFINITY;
1397 -- r.opsel_a = AIN_B
1398 v.result_class := r.b.class;
1399 v.result_sign := r.b.negative;
1400 v.fpscr(FPSCR_FR) := '0';
1401 v.fpscr(FPSCR_FI) := '0';
1403 v.shift := to_signed(1, EXP_BITS);
1406 v.result_exp := r.b.exponent;
1407 if r.b.negative = '1' then
1408 v.fpscr(FPSCR_VXSQRT) := '1';
1410 elsif r.b.mantissa(54) = '0' then
1411 v.state := RENORM_B;
1412 elsif r.b.exponent(0) = '0' then
1415 v.state := RENORM_B2;
1418 v.state := NAN_RESULT;
1420 if r.b.negative = '1' then
1421 v.fpscr(FPSCR_VXSQRT) := '1';
1424 v.result_class := ZERO;
1428 v.result_class := INFINITY;
1434 -- fmadd, fmsub, fnmadd, fnmsub
1435 -- r.opsel_a = AIN_A if A is denorm, else AIN_C if C is denorm,
1437 v.result_sign := r.a.negative;
1438 v.result_class := r.a.class;
1439 v.result_exp := r.a.exponent;
1440 v.fpscr(FPSCR_FR) := '0';
1441 v.fpscr(FPSCR_FI) := '0';
1445 is_add := r.a.negative xor r.c.negative xor r.b.negative xor r.insn(1);
1446 if r.a.class = FINITE and r.c.class = FINITE and
1447 (r.b.class = FINITE or r.b.class = ZERO) then
1448 v.is_subtract := not is_add;
1449 mulexp := r.a.exponent + r.c.exponent;
1450 v.result_exp := mulexp;
1451 -- Make sure A and C are normalized
1452 if r.a.mantissa(54) = '0' then
1453 v.state := RENORM_A;
1454 elsif r.c.mantissa(54) = '0' then
1455 v.state := RENORM_C;
1456 elsif r.b.class = ZERO then
1457 -- no addend, degenerates to multiply
1458 v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1459 f_to_multiply.valid <= '1';
1460 v.is_multiply := '1';
1462 elsif r.madd_cmp = '0' then
1463 -- addend is bigger, do multiply first
1464 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1465 f_to_multiply.valid <= '1';
1468 -- product is bigger, shift B right and use it as the
1469 -- addend to the multiplier
1470 v.shift := r.b.exponent - mulexp + to_signed(64, EXP_BITS);
1471 -- for subtract, multiplier does B - A * C
1472 v.result_sign := not (r.a.negative xor r.c.negative xor r.insn(2) xor is_add);
1473 v.result_exp := r.b.exponent;
1477 if r.a.class = NAN or r.b.class = NAN or r.c.class = NAN then
1478 v.state := NAN_RESULT;
1479 elsif (r.a.class = ZERO and r.c.class = INFINITY) or
1480 (r.a.class = INFINITY and r.c.class = ZERO) then
1481 -- invalid operation, construct QNaN
1482 v.fpscr(FPSCR_VXIMZ) := '1';
1484 elsif r.a.class = INFINITY or r.c.class = INFINITY then
1485 if r.b.class = INFINITY and is_add = '0' then
1486 -- invalid operation, construct QNaN
1487 v.fpscr(FPSCR_VXISI) := '1';
1490 -- result is infinity
1491 v.result_class := INFINITY;
1492 v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1496 -- Here A is zero, C is zero, or B is infinity
1497 -- Result is +/-B in all of those cases
1499 if r.b.class /= ZERO or is_add = '1' then
1500 v.negate := not (r.insn(1) xor r.insn(2));
1502 -- have to be careful about rule for 0 - 0 result sign
1503 v.negate := r.b.negative xor (r.round_mode(1) and r.round_mode(0)) xor r.insn(2);
1505 v.state := EXC_RESULT;
1511 v.state := RENORM_A2;
1512 if r.insn(4) = '1' then
1519 -- r.opsel_a = AIN_C for fmul/fmadd, AIN_B for fdiv
1521 v.result_exp := new_exp;
1522 if r.insn(4) = '1' then
1523 if r.c.mantissa(54) = '1' then
1524 if r.insn(3) = '0' or r.b.class = ZERO then
1529 if new_exp + 1 >= r.b.exponent then
1533 v.state := DO_FMADD;
1536 v.state := RENORM_C;
1539 if r.b.mantissa(54) = '1' then
1543 v.state := RENORM_B;
1549 renorm_sqrt := r.is_sqrt;
1550 v.state := RENORM_B2;
1554 if r.is_sqrt = '0' then
1555 v.result_exp := r.result_exp + r.shift;
1557 v.result_exp := new_exp;
1564 v.state := RENORM_C2;
1568 v.result_exp := new_exp;
1569 if r.insn(3) = '0' or r.b.class = ZERO then
1574 if new_exp + 1 >= r.b.exponent then
1578 v.state := DO_FMADD;
1582 -- transferring B to R
1583 v.shift := r.b.exponent - r.a.exponent;
1584 v.result_exp := r.b.exponent;
1586 v.state := ADD_SHIFT;
1589 -- r.shift = - exponent difference, r.longmask = 0
1590 opsel_r <= RES_SHIFT;
1593 v.longmask := r.single_prec;
1594 if r.add_bsmall = '1' then
1602 -- r.opsel_a = AIN_A if r.add_bsmall = 1 else AIN_B
1604 opsel_binv <= r.is_subtract;
1605 carry_in <= r.is_subtract and not r.x;
1606 v.shift := to_signed(-1, EXP_BITS);
1610 -- check for overflow or negative result (can't get both)
1612 if r.r(63) = '1' then
1613 -- result is opposite sign to expected
1614 v.result_sign := not r.result_sign;
1618 elsif r.r(55) = '1' then
1619 -- sum overflowed, shift right
1620 opsel_r <= RES_SHIFT;
1622 v.shift := to_signed(-2, EXP_BITS);
1623 if exp_huge = '1' then
1624 v.state := ROUND_OFLOW;
1626 v.state := ROUNDING;
1628 elsif r.r(54) = '1' then
1630 v.shift := to_signed(-2, EXP_BITS);
1631 v.state := ROUNDING;
1632 elsif (r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1633 -- r.x must be zero at this point
1634 v.result_class := ZERO;
1635 if r.is_subtract = '1' then
1636 -- set result sign depending on rounding mode
1637 v.result_sign := r.round_mode(1) and r.round_mode(0);
1642 v.state := NORMALIZE;
1646 -- r.opsel_a = AIN_A
1653 if r.r(63) = '1' then
1654 -- A is smaller in magnitude
1655 v.cr_result := not r.a.negative & r.a.negative & "00";
1656 elsif (r_hi_nz or r_lo_nz) = '0' then
1657 v.cr_result := "0010";
1659 v.cr_result := r.a.negative & not r.a.negative & "00";
1661 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
1662 v.instr_done := '1';
1666 f_to_multiply.valid <= r.first;
1667 opsel_r <= RES_MULT;
1668 if multiply_to_f.valid = '1' then
1673 -- Addend is bigger here
1674 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1675 -- note v.shift is at most -2 here
1676 v.shift := r.result_exp - r.b.exponent;
1677 opsel_r <= RES_MULT;
1680 f_to_multiply.valid <= r.first;
1681 if multiply_to_f.valid = '1' then
1683 v.state := ADD_SHIFT;
1687 -- Product is potentially bigger here
1688 -- r.shift = addend exp - product exp + 64, r.r = r.b.mantissa
1691 v.shift := r.shift - to_signed(64, EXP_BITS);
1695 -- r.shift = addend exp - product exp
1696 opsel_r <= RES_SHIFT;
1701 msel_add <= MULADD_RS;
1702 f_to_multiply.valid <= r.first;
1703 msel_inv <= r.is_subtract;
1704 opsel_r <= RES_MULT;
1707 v.shift := to_signed(56, EXP_BITS);
1708 if multiply_to_f.valid = '1' then
1709 if multiply_to_f.result(121) = '1' then
1718 v.result_sign := not r.result_sign;
1720 carry_in <= not (s_nz or r.x);
1723 v.shift := to_signed(56, EXP_BITS);
1727 -- r.shift = 56 (or 0, but only if r is now nonzero)
1728 if (r.r(56) or r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1730 -- must be a subtraction, and r.x must be zero
1731 v.result_class := ZERO;
1732 v.result_sign := r.round_mode(1) and r.round_mode(0);
1735 -- R is all zeroes but there are non-zero bits in S
1736 -- so shift them into R and set S to 0
1737 opsel_r <= RES_SHIFT;
1739 -- stay in state FMADD_6
1741 elsif r.r(56 downto 54) = "001" then
1745 v.state := NORMALIZE;
1749 -- r.opsel_a = AIN_B
1750 -- wait one cycle for inverse_table[B] lookup
1752 if r.insn(4) = '0' then
1753 if r.insn(3) = '0' then
1758 elsif r.insn(2) = '0' then
1765 -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y
1767 msel_add <= MULADD_CONST;
1776 f_to_multiply.valid <= r.first;
1777 if multiply_to_f.valid = '1' then
1779 v.count := r.count + 1;
1784 -- compute Y = P = P * Y
1787 f_to_multiply.valid <= r.first;
1789 if multiply_to_f.valid = '1' then
1799 -- compute R = P = A * Y (quotient)
1803 f_to_multiply.valid <= r.first;
1805 if multiply_to_f.valid = '1' then
1806 opsel_r <= RES_MULT;
1812 -- compute P = A - B * R (remainder)
1815 msel_add <= MULADD_A;
1817 f_to_multiply.valid <= r.first;
1818 if multiply_to_f.valid = '1' then
1823 -- test if remainder is 0 or >= B
1824 if pcmpb_lt = '1' then
1825 -- quotient is correct, set X if remainder non-zero
1826 v.x := r.p(58) or px_nz;
1828 -- quotient needs to be incremented by 1
1830 v.x := not pcmpb_eq;
1835 opsel_r <= RES_MISC;
1837 v.shift := to_signed(1, EXP_BITS);
1838 v.state := NORMALIZE;
1841 v.cr_result(1) := exp_tiny or exp_huge;
1842 if exp_tiny = '1' or exp_huge = '1' or r.a.class = ZERO or r.first = '0' then
1843 v.instr_done := '1';
1846 v.shift := r.a.exponent;
1847 v.doing_ftdiv := "10";
1851 opsel_r <= RES_MISC;
1853 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1854 v.result_exp := - sqrt_exp;
1855 v.shift := to_signed(1, EXP_BITS);
1856 v.state := NORMALIZE;
1859 -- put invsqr[B] in R and compute P = invsqr[B] * B
1860 -- also transfer B (in R) to A
1862 opsel_r <= RES_MISC;
1866 f_to_multiply.valid <= '1';
1867 v.shift := to_signed(-1, EXP_BITS);
1872 -- shift R right one place
1873 -- not expecting multiplier result yet
1875 opsel_r <= RES_SHIFT;
1880 -- put R into Y, wait for product from multiplier
1884 if multiply_to_f.valid = '1' then
1885 -- put result into R
1886 opsel_r <= RES_MULT;
1892 -- compute 1.5 - Y * P
1895 msel_add <= MULADD_CONST;
1897 f_to_multiply.valid <= r.first;
1899 if multiply_to_f.valid = '1' then
1904 -- compute Y = Y * P
1907 f_to_multiply.valid <= '1';
1912 -- pipeline in R = R * P
1915 f_to_multiply.valid <= r.first;
1917 if multiply_to_f.valid = '1' then
1923 -- first multiply is done, put result in Y
1926 -- wait for second multiply (should be here already)
1928 if multiply_to_f.valid = '1' then
1929 -- put result into R
1930 opsel_r <= RES_MULT;
1932 v.count := r.count + 1;
1942 -- compute P = A - R * R, which can be +ve or -ve
1943 -- we arranged for B to be put into A earlier
1946 msel_add <= MULADD_A;
1949 f_to_multiply.valid <= r.first;
1950 if multiply_to_f.valid = '1' then
1956 -- compute P = P * Y
1957 -- since Y is an estimate of 1/sqrt(B), this makes P an
1958 -- estimate of the adjustment needed to R. Since the error
1959 -- could be negative and we have an unsigned multiplier, the
1960 -- upper bits can be wrong, but it turns out the lowest 8 bits
1961 -- are correct and are all we need (given 3 iterations through
1962 -- SQRT_4 to SQRT_7).
1966 f_to_multiply.valid <= r.first;
1967 if multiply_to_f.valid = '1' then
1972 -- Add the bottom 8 bits of P, sign-extended,
1973 -- divided by 4, onto R.
1974 -- The division by 4 is because R is 10.54 format
1975 -- whereas P is 8.56 format.
1977 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1978 v.result_exp := sqrt_exp;
1979 v.shift := to_signed(1, EXP_BITS);
1984 -- compute P = A - R * R (remainder)
1985 -- also put 2 * R + 1 into B for comparison with P
1988 msel_add <= MULADD_A;
1990 f_to_multiply.valid <= r.first;
1993 if multiply_to_f.valid = '1' then
1998 -- test if remainder is 0 or >= B = 2*R + 1
1999 if pcmpb_lt = '1' then
2000 -- square root is correct, set X if remainder non-zero
2001 v.x := r.p(58) or px_nz;
2003 -- square root needs to be incremented by 1
2005 v.x := not pcmpb_eq;
2010 -- r.shift = b.exponent - 52
2011 opsel_r <= RES_SHIFT;
2013 v.state := INT_ROUND;
2014 v.shift := to_signed(-2, EXP_BITS);
2018 opsel_r <= RES_SHIFT;
2019 round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
2020 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2021 -- Check for negative values that don't round to 0 for fcti*u*
2022 if r.insn(8) = '1' and r.result_sign = '1' and
2023 (r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then
2024 v.state := INT_OFLOW;
2026 v.state := INT_FINAL;
2030 -- r.shift = b.exponent - 54;
2031 opsel_r <= RES_SHIFT;
2032 v.state := INT_FINAL;
2035 -- Negate if necessary, and increment for rounding if needed
2036 opsel_ainv <= r.result_sign;
2037 carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign;
2038 -- Check for possible overflows
2039 case r.insn(9 downto 8) is
2040 when "00" => -- fctiw[z]
2041 need_check := r.r(31) or (r.r(30) and not r.result_sign);
2042 when "01" => -- fctiwu[z]
2043 need_check := r.r(31);
2044 when "10" => -- fctid[z]
2045 need_check := r.r(63) or (r.r(62) and not r.result_sign);
2046 when others => -- fctidu[z]
2047 need_check := r.r(63);
2049 if need_check = '1' then
2050 v.state := INT_CHECK;
2052 if r.fpscr(FPSCR_FI) = '1' then
2053 v.fpscr(FPSCR_XX) := '1';
2059 if r.insn(9) = '0' then
2064 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2065 if (r.insn(8) = '0' and msb /= r.result_sign) or
2066 (r.insn(8) = '1' and msb /= '1') then
2067 opsel_r <= RES_MISC;
2068 v.fpscr(FPSCR_VXCVI) := '1';
2071 if r.fpscr(FPSCR_FI) = '1' then
2072 v.fpscr(FPSCR_XX) := '1';
2078 opsel_r <= RES_MISC;
2079 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2080 if r.b.class = NAN then
2083 v.fpscr(FPSCR_VXCVI) := '1';
2088 -- r.shift = b.exponent - 52
2089 opsel_r <= RES_SHIFT;
2091 v.shift := to_signed(-2, EXP_BITS);
2092 v.state := ROUNDING;
2095 if r.is_multiply = '1' and px_nz = '1' then
2098 if r.r(63 downto 54) /= "0000000001" then
2100 v.state := NORMALIZE;
2103 if exp_tiny = '1' then
2104 v.shift := new_exp - min_exp;
2105 v.state := ROUND_UFLOW;
2106 elsif exp_huge = '1' then
2107 v.state := ROUND_OFLOW;
2109 v.shift := to_signed(-2, EXP_BITS);
2110 v.state := ROUNDING;
2115 -- Shift so we have 9 leading zeroes (we know R is non-zero)
2116 -- r.shift = clz(r.r) - 9
2117 opsel_r <= RES_SHIFT;
2119 if exp_tiny = '1' then
2120 v.shift := new_exp - min_exp;
2121 v.state := ROUND_UFLOW;
2122 elsif exp_huge = '1' then
2123 v.state := ROUND_OFLOW;
2125 v.shift := to_signed(-2, EXP_BITS);
2126 v.state := ROUNDING;
2130 -- r.shift = - amount by which exponent underflows
2132 if r.fpscr(FPSCR_UE) = '0' then
2133 -- disabled underflow exception case
2134 -- have to denormalize before rounding
2135 opsel_r <= RES_SHIFT;
2137 v.shift := to_signed(-2, EXP_BITS);
2138 v.state := ROUNDING;
2140 -- enabled underflow exception case
2141 -- if denormalized, have to normalize before rounding
2142 v.fpscr(FPSCR_UX) := '1';
2143 v.result_exp := r.result_exp + bias_exp;
2144 if r.r(54) = '0' then
2146 v.state := NORMALIZE;
2148 v.shift := to_signed(-2, EXP_BITS);
2149 v.state := ROUNDING;
2154 v.fpscr(FPSCR_OX) := '1';
2155 if r.fpscr(FPSCR_OE) = '0' then
2156 -- disabled overflow exception
2157 -- result depends on rounding mode
2158 v.fpscr(FPSCR_XX) := '1';
2159 v.fpscr(FPSCR_FI) := '1';
2160 if r.round_mode(1 downto 0) = "00" or
2161 (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then
2162 v.result_class := INFINITY;
2163 v.fpscr(FPSCR_FR) := '1';
2165 v.fpscr(FPSCR_FR) := '0';
2167 -- construct largest representable number
2168 v.result_exp := max_exp;
2169 opsel_r <= RES_MISC;
2170 misc_sel <= "001" & r.single_prec;
2173 -- enabled overflow exception
2174 v.result_exp := r.result_exp - bias_exp;
2175 v.shift := to_signed(-2, EXP_BITS);
2176 v.state := ROUNDING;
2181 round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
2182 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2183 if round(1) = '1' then
2184 -- set mask to increment the LSB for the precision
2185 opsel_b <= BIN_MASK;
2187 v.shift := to_signed(-1, EXP_BITS);
2188 v.state := ROUNDING_2;
2190 if r.r(54) = '0' then
2191 -- result after masking could be zero, or could be a
2192 -- denormalized result that needs to be renormalized
2194 v.state := ROUNDING_3;
2199 if round(0) = '1' then
2200 v.fpscr(FPSCR_XX) := '1';
2201 if r.tiny = '1' then
2202 v.fpscr(FPSCR_UX) := '1';
2207 -- Check for overflow during rounding
2210 if r.r(55) = '1' then
2211 opsel_r <= RES_SHIFT;
2212 if exp_huge = '1' then
2213 v.state := ROUND_OFLOW;
2217 elsif r.r(54) = '0' then
2218 -- Do CLZ so we can renormalize the result
2220 v.state := ROUNDING_3;
2226 -- r.shift = clz(r.r) - 9
2227 mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
2228 if mant_nz = '0' then
2229 v.result_class := ZERO;
2230 if r.is_subtract = '1' then
2231 -- set result sign depending on rounding mode
2232 v.result_sign := r.round_mode(1) and r.round_mode(0);
2236 -- Renormalize result after rounding
2237 opsel_r <= RES_SHIFT;
2238 v.denorm := exp_tiny;
2239 v.shift := new_exp - to_signed(-1022, EXP_BITS);
2240 if new_exp < to_signed(-1022, EXP_BITS) then
2248 -- r.shift = result_exp - -1022
2249 opsel_r <= RES_SHIFT;
2253 if (r.use_a = '1' and r.a.class = NAN and r.a.mantissa(53) = '0') or
2254 (r.use_b = '1' and r.b.class = NAN and r.b.mantissa(53) = '0') or
2255 (r.use_c = '1' and r.c.class = NAN and r.c.mantissa(53) = '0') then
2257 v.fpscr(FPSCR_VXSNAN) := '1';
2260 if r.use_a = '1' and r.a.class = NAN then
2262 elsif r.use_b = '1' and r.b.class = NAN then
2264 elsif r.use_c = '1' and r.c.class = NAN then
2267 v.state := EXC_RESULT;
2270 -- r.opsel_a = AIN_A, AIN_B or AIN_C according to which input is the result
2273 v.result_sign := r.b.negative xor r.negate;
2274 v.result_exp := r.b.exponent;
2275 v.result_class := r.b.class;
2277 v.result_sign := r.c.negative xor r.negate;
2278 v.result_exp := r.c.exponent;
2279 v.result_class := r.c.class;
2281 v.result_sign := r.a.negative xor r.negate;
2282 v.result_exp := r.a.exponent;
2283 v.result_class := r.a.class;
2289 if zero_divide = '1' then
2290 v.fpscr(FPSCR_ZX) := '1';
2292 if qnan_result = '1' then
2294 v.result_class := NAN;
2295 v.result_sign := '0';
2297 opsel_r <= RES_MISC;
2300 if invalid = '1' then
2303 if arith_done = '1' then
2304 -- Enabled invalid exception doesn't write result or FPRF
2305 -- Neither does enabled zero-divide exception
2306 if (v.invalid and r.fpscr(FPSCR_VE)) = '0' and
2307 (zero_divide and r.fpscr(FPSCR_ZE)) = '0' then
2308 v.writing_back := '1';
2309 v.update_fprf := '1';
2311 v.instr_done := '1';
2316 -- Multiplier and divide/square root data path
2319 f_to_multiply.data1 <= r.a.mantissa(61 downto 0) & "00";
2321 f_to_multiply.data1 <= r.b.mantissa(61 downto 0) & "00";
2323 f_to_multiply.data1 <= r.y;
2325 f_to_multiply.data1 <= r.r(61 downto 0) & "00";
2329 f_to_multiply.data2 <= r.c.mantissa(61 downto 0) & "00";
2331 f_to_multiply.data2 <= x"00" & inverse_est & '0' & x"000000000";
2333 f_to_multiply.data2 <= r.p;
2335 f_to_multiply.data2 <= r.r(61 downto 0) & "00";
2337 maddend := (others => '0');
2339 when MULADD_CONST =>
2340 -- addend is 2.0 or 1.5 in 16.112 format
2341 if r.is_sqrt = '0' then
2342 maddend(113) := '1'; -- 2.0
2344 maddend(112 downto 111) := "11"; -- 1.5
2347 -- addend is A in 16.112 format
2348 maddend(121 downto 58) := r.a.mantissa;
2350 -- addend is concatenation of R and S in 16.112 format
2351 maddend := "000000" & r.r & r.s & "00";
2354 if msel_inv = '1' then
2355 f_to_multiply.addend <= not maddend;
2357 f_to_multiply.addend <= maddend;
2359 f_to_multiply.not_result <= msel_inv;
2361 v.y := f_to_multiply.data2;
2363 if multiply_to_f.valid = '1' then
2364 if pshift = '0' then
2365 v.p := multiply_to_f.result(63 downto 0);
2367 v.p := multiply_to_f.result(119 downto 56);
2372 -- This has A and B input multiplexers, an adder, a shifter,
2373 -- count-leading-zeroes logic, and a result mux.
2374 if r.longmask = '1' then
2375 mshift := r.shift + to_signed(-29, EXP_BITS);
2379 if mshift < to_signed(-64, EXP_BITS) then
2380 mask := (others => '1');
2381 elsif mshift >= to_signed(0, EXP_BITS) then
2382 mask := (others => '0');
2384 mask := right_mask(unsigned(mshift(5 downto 0)));
2390 in_a0 := r.a.mantissa;
2392 in_a0 := r.b.mantissa;
2394 in_a0 := r.c.mantissa;
2396 if (or (mask and in_a0)) = '1' and set_x = '1' then
2399 if opsel_ainv = '1' then
2405 in_b0 := (others => '0');
2411 -- BIN_PS6, 6 LSBs of P/4 sign-extended to 64
2412 in_b0 := std_ulogic_vector(resize(signed(r.p(7 downto 2)), 64));
2414 if opsel_binv = '1' then
2418 if r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
2419 shift_res := shifter_64(r.r & (shiftin or r.s(55)) & r.s(54 downto 0),
2420 std_ulogic_vector(r.shift(6 downto 0)));
2422 shift_res := (others => '0');
2424 sum := std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
2425 if opsel_mask = '1' then
2426 sum := sum and not mask;
2432 result <= shift_res;
2434 result <= multiply_to_f.result(121 downto 58);
2438 misc := x"00000000" & (r.fpscr and fpscr_mask);
2440 -- generated QNaN mantissa
2441 misc := x"0020000000000000";
2443 -- mantissa of max representable DP number
2444 misc := x"007ffffffffffffc";
2446 -- mantissa of max representable SP number
2447 misc := x"007fffff80000000";
2450 misc := r.a.mantissa(31 downto 0) & r.b.mantissa(31 downto 0);
2453 misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32);
2455 misc := 10x"000" & inverse_est & 35x"000000000";
2457 -- max positive result for fctiw[z]
2458 misc := x"000000007fffffff";
2460 -- max negative result for fctiw[z]
2461 misc := x"ffffffff80000000";
2463 -- max positive result for fctiwu[z]
2464 misc := x"00000000ffffffff";
2466 -- max negative result for fctiwu[z]
2467 misc := x"0000000000000000";
2469 -- max positive result for fctid[z]
2470 misc := x"7fffffffffffffff";
2472 -- max negative result for fctid[z]
2473 misc := x"8000000000000000";
2475 -- max positive result for fctidu[z]
2476 misc := x"ffffffffffffffff";
2478 -- max negative result for fctidu[z]
2479 misc := x"0000000000000000";
2481 misc := x"0000000000000000";
2489 v.s := std_ulogic_vector(unsigned(not r.s) + (not r.x));
2491 v.s := multiply_to_f.result(57 downto 2);
2493 v.s := shift_res(63 downto 8);
2494 if shift_res(7 downto 0) /= x"00" then
2498 v.s := (others => '0');
2503 v.a.exponent := new_exp;
2504 v.a.mantissa := shift_res;
2507 v.b.exponent := new_exp;
2508 v.b.mantissa := shift_res;
2511 v.c.exponent := new_exp;
2512 v.c.mantissa := shift_res;
2515 if opsel_r = RES_SHIFT then
2516 v.result_exp := new_exp;
2519 if renormalize = '1' then
2520 clz := count_left_zeroes(r.r);
2521 if renorm_sqrt = '1' then
2522 -- make denormalized value end up with even exponent
2525 v.shift := resize(signed('0' & clz) - 9, EXP_BITS);
2528 if r.int_result = '1' then
2531 fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r,
2532 r.single_prec, r.quieten_nan);
2534 if r.update_fprf = '1' then
2535 v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class,
2536 r.r(54) and not r.denorm);
2539 v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
2540 (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
2541 v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
2542 v.fpscr(FPSCR_VE downto FPSCR_XE));
2543 if update_fx = '1' and
2544 (v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then
2545 v.fpscr(FPSCR_FX) := '1';
2548 v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
2551 if illegal = '1' then
2552 v.instr_done := '0';
2554 v.writing_back := '0';
2558 v.do_intr := v.instr_done and v.fpscr(FPSCR_FEX) and r.fe_mode;
2559 if v.state /= IDLE or v.do_intr = '1' then
2565 e_out.illegal <= illegal;
2568 end architecture behaviour;