1 -- Floating-point unit for Microwatt
4 use ieee.std_logic_1164.all;
5 use ieee.numeric_std.all;
8 use work.insn_helpers.all;
9 use work.decode_types.all;
10 use work.crhelpers.all;
19 e_in : in Execute1toFPUType;
20 e_out : out FPUToExecute1Type;
22 w_out : out FPUToWritebackType
26 architecture behaviour of fpu is
27 type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
29 constant EXP_BITS : natural := 13;
31 type fpu_reg_type is record
32 class : fp_number_class;
33 negative : std_ulogic;
34 exponent : signed(EXP_BITS-1 downto 0); -- unbiased
35 mantissa : std_ulogic_vector(63 downto 0); -- 10.54 format
38 type state_t is (IDLE,
39 DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
40 DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT,
43 DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD,
47 ADD_SHIFT, ADD_2, ADD_3,
50 FMADD_1, FMADD_2, FMADD_3,
51 FMADD_4, FMADD_5, FMADD_6,
53 DIV_2, DIV_3, DIV_4, DIV_5, DIV_6,
57 SQRT_1, SQRT_2, SQRT_3, SQRT_4,
58 SQRT_5, SQRT_6, SQRT_7, SQRT_8,
59 SQRT_9, SQRT_10, SQRT_11, SQRT_12,
60 INT_SHIFT, INT_ROUND, INT_ISHIFT,
61 INT_FINAL, INT_CHECK, INT_OFLOW,
63 ROUND_UFLOW, ROUND_OFLOW,
64 ROUNDING, ROUNDING_2, ROUNDING_3,
70 type reg_type is record
73 instr_done : std_ulogic;
76 insn : std_ulogic_vector(31 downto 0);
77 dest_fpr : gspr_index_t;
81 single_prec : std_ulogic;
82 fpscr : std_ulogic_vector(31 downto 0);
86 r : std_ulogic_vector(63 downto 0); -- 10.54 format
87 s : std_ulogic_vector(55 downto 0); -- extended fraction
89 p : std_ulogic_vector(63 downto 0); -- 8.56 format
90 y : std_ulogic_vector(63 downto 0); -- 8.56 format
91 result_sign : std_ulogic;
92 result_class : fp_number_class;
93 result_exp : signed(EXP_BITS-1 downto 0);
94 shift : signed(EXP_BITS-1 downto 0);
95 writing_back : std_ulogic;
96 int_result : std_ulogic;
97 cr_result : std_ulogic_vector(3 downto 0);
98 cr_mask : std_ulogic_vector(7 downto 0);
99 old_exc : std_ulogic_vector(4 downto 0);
100 update_fprf : std_ulogic;
101 quieten_nan : std_ulogic;
104 round_mode : std_ulogic_vector(2 downto 0);
105 is_subtract : std_ulogic;
106 exp_cmp : std_ulogic;
107 madd_cmp : std_ulogic;
108 add_bsmall : std_ulogic;
109 is_multiply : std_ulogic;
110 is_sqrt : std_ulogic;
112 count : unsigned(1 downto 0);
113 doing_ftdiv : std_ulogic_vector(1 downto 0);
116 type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
118 signal r, rin : reg_type;
120 signal fp_result : std_ulogic_vector(63 downto 0);
121 signal opsel_a : std_ulogic_vector(1 downto 0);
122 signal opsel_b : std_ulogic_vector(1 downto 0);
123 signal opsel_r : std_ulogic_vector(1 downto 0);
124 signal opsel_s : std_ulogic_vector(1 downto 0);
125 signal opsel_ainv : std_ulogic;
126 signal opsel_amask : std_ulogic;
127 signal opsel_binv : std_ulogic;
128 signal in_a : std_ulogic_vector(63 downto 0);
129 signal in_b : std_ulogic_vector(63 downto 0);
130 signal result : std_ulogic_vector(63 downto 0);
131 signal carry_in : std_ulogic;
132 signal lost_bits : std_ulogic;
133 signal r_hi_nz : std_ulogic;
134 signal r_lo_nz : std_ulogic;
135 signal s_nz : std_ulogic;
136 signal misc_sel : std_ulogic_vector(3 downto 0);
137 signal f_to_multiply : MultiplyInputType;
138 signal multiply_to_f : MultiplyOutputType;
139 signal msel_1 : std_ulogic_vector(1 downto 0);
140 signal msel_2 : std_ulogic_vector(1 downto 0);
141 signal msel_add : std_ulogic_vector(1 downto 0);
142 signal msel_inv : std_ulogic;
143 signal inverse_est : std_ulogic_vector(18 downto 0);
146 constant AIN_R : std_ulogic_vector(1 downto 0) := "00";
147 constant AIN_A : std_ulogic_vector(1 downto 0) := "01";
148 constant AIN_B : std_ulogic_vector(1 downto 0) := "10";
149 constant AIN_C : std_ulogic_vector(1 downto 0) := "11";
151 constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
152 constant BIN_R : std_ulogic_vector(1 downto 0) := "01";
153 constant BIN_MASK : std_ulogic_vector(1 downto 0) := "10";
154 constant BIN_PS6 : std_ulogic_vector(1 downto 0) := "11";
156 constant RES_SUM : std_ulogic_vector(1 downto 0) := "00";
157 constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
158 constant RES_MULT : std_ulogic_vector(1 downto 0) := "10";
159 constant RES_MISC : std_ulogic_vector(1 downto 0) := "11";
161 constant S_ZERO : std_ulogic_vector(1 downto 0) := "00";
162 constant S_NEG : std_ulogic_vector(1 downto 0) := "01";
163 constant S_SHIFT : std_ulogic_vector(1 downto 0) := "10";
164 constant S_MULT : std_ulogic_vector(1 downto 0) := "11";
167 constant MUL1_A : std_ulogic_vector(1 downto 0) := "00";
168 constant MUL1_B : std_ulogic_vector(1 downto 0) := "01";
169 constant MUL1_Y : std_ulogic_vector(1 downto 0) := "10";
170 constant MUL1_R : std_ulogic_vector(1 downto 0) := "11";
172 constant MUL2_C : std_ulogic_vector(1 downto 0) := "00";
173 constant MUL2_LUT : std_ulogic_vector(1 downto 0) := "01";
174 constant MUL2_P : std_ulogic_vector(1 downto 0) := "10";
175 constant MUL2_R : std_ulogic_vector(1 downto 0) := "11";
177 constant MULADD_ZERO : std_ulogic_vector(1 downto 0) := "00";
178 constant MULADD_CONST : std_ulogic_vector(1 downto 0) := "01";
179 constant MULADD_A : std_ulogic_vector(1 downto 0) := "10";
180 constant MULADD_RS : std_ulogic_vector(1 downto 0) := "11";
182 -- Inverse lookup table, indexed by the top 8 fraction bits
183 -- The first 256 entries are the reciprocal (1/x) lookup table,
184 -- and the remaining 768 entries are the reciprocal square root table.
185 -- Output range is [0.5, 1) in 0.19 format, though the top
186 -- bit isn't stored since it is always 1.
187 -- Each output value is the inverse of the center of the input
188 -- range for the value, i.e. entry 0 is 1 / (1 + 1/512),
189 -- entry 1 is 1 / (1 + 3/512), etc.
190 signal inverse_table : lookup_table := (
192 -- Unit bit is assumed to be 1, so input range is [1, 2)
193 18x"3fc01", 18x"3f411", 18x"3ec31", 18x"3e460", 18x"3dc9f", 18x"3d4ec", 18x"3cd49", 18x"3c5b5",
194 18x"3be2f", 18x"3b6b8", 18x"3af4f", 18x"3a7f4", 18x"3a0a7", 18x"39968", 18x"39237", 18x"38b14",
195 18x"383fe", 18x"37cf5", 18x"375f9", 18x"36f0a", 18x"36828", 18x"36153", 18x"35a8a", 18x"353ce",
196 18x"34d1e", 18x"3467a", 18x"33fe3", 18x"33957", 18x"332d7", 18x"32c62", 18x"325f9", 18x"31f9c",
197 18x"3194a", 18x"31303", 18x"30cc7", 18x"30696", 18x"30070", 18x"2fa54", 18x"2f443", 18x"2ee3d",
198 18x"2e841", 18x"2e250", 18x"2dc68", 18x"2d68b", 18x"2d0b8", 18x"2caee", 18x"2c52e", 18x"2bf79",
199 18x"2b9cc", 18x"2b429", 18x"2ae90", 18x"2a900", 18x"2a379", 18x"29dfb", 18x"29887", 18x"2931b",
200 18x"28db8", 18x"2885e", 18x"2830d", 18x"27dc4", 18x"27884", 18x"2734d", 18x"26e1d", 18x"268f6",
201 18x"263d8", 18x"25ec1", 18x"259b3", 18x"254ac", 18x"24fad", 18x"24ab7", 18x"245c8", 18x"240e1",
202 18x"23c01", 18x"23729", 18x"23259", 18x"22d90", 18x"228ce", 18x"22413", 18x"21f60", 18x"21ab4",
203 18x"2160f", 18x"21172", 18x"20cdb", 18x"2084b", 18x"203c2", 18x"1ff40", 18x"1fac4", 18x"1f64f",
204 18x"1f1e1", 18x"1ed79", 18x"1e918", 18x"1e4be", 18x"1e069", 18x"1dc1b", 18x"1d7d4", 18x"1d392",
205 18x"1cf57", 18x"1cb22", 18x"1c6f3", 18x"1c2ca", 18x"1bea7", 18x"1ba8a", 18x"1b672", 18x"1b261",
206 18x"1ae55", 18x"1aa50", 18x"1a64f", 18x"1a255", 18x"19e60", 18x"19a70", 18x"19686", 18x"192a2",
207 18x"18ec3", 18x"18ae9", 18x"18715", 18x"18345", 18x"17f7c", 18x"17bb7", 18x"177f7", 18x"1743d",
208 18x"17087", 18x"16cd7", 18x"1692c", 18x"16585", 18x"161e4", 18x"15e47", 18x"15ab0", 18x"1571d",
209 18x"1538e", 18x"15005", 18x"14c80", 18x"14900", 18x"14584", 18x"1420d", 18x"13e9b", 18x"13b2d",
210 18x"137c3", 18x"1345e", 18x"130fe", 18x"12da2", 18x"12a4a", 18x"126f6", 18x"123a7", 18x"1205c",
211 18x"11d15", 18x"119d2", 18x"11694", 18x"11359", 18x"11023", 18x"10cf1", 18x"109c2", 18x"10698",
212 18x"10372", 18x"10050", 18x"0fd31", 18x"0fa17", 18x"0f700", 18x"0f3ed", 18x"0f0de", 18x"0edd3",
213 18x"0eacb", 18x"0e7c7", 18x"0e4c7", 18x"0e1ca", 18x"0ded2", 18x"0dbdc", 18x"0d8eb", 18x"0d5fc",
214 18x"0d312", 18x"0d02b", 18x"0cd47", 18x"0ca67", 18x"0c78a", 18x"0c4b1", 18x"0c1db", 18x"0bf09",
215 18x"0bc3a", 18x"0b96e", 18x"0b6a5", 18x"0b3e0", 18x"0b11e", 18x"0ae5f", 18x"0aba3", 18x"0a8eb",
216 18x"0a636", 18x"0a383", 18x"0a0d4", 18x"09e28", 18x"09b80", 18x"098da", 18x"09637", 18x"09397",
217 18x"090fb", 18x"08e61", 18x"08bca", 18x"08936", 18x"086a5", 18x"08417", 18x"0818c", 18x"07f04",
218 18x"07c7e", 18x"079fc", 18x"0777c", 18x"074ff", 18x"07284", 18x"0700d", 18x"06d98", 18x"06b26",
219 18x"068b6", 18x"0664a", 18x"063e0", 18x"06178", 18x"05f13", 18x"05cb1", 18x"05a52", 18x"057f5",
220 18x"0559a", 18x"05342", 18x"050ed", 18x"04e9a", 18x"04c4a", 18x"049fc", 18x"047b0", 18x"04567",
221 18x"04321", 18x"040dd", 18x"03e9b", 18x"03c5c", 18x"03a1f", 18x"037e4", 18x"035ac", 18x"03376",
222 18x"03142", 18x"02f11", 18x"02ce2", 18x"02ab5", 18x"0288b", 18x"02663", 18x"0243d", 18x"02219",
223 18x"01ff7", 18x"01dd8", 18x"01bbb", 18x"019a0", 18x"01787", 18x"01570", 18x"0135b", 18x"01149",
224 18x"00f39", 18x"00d2a", 18x"00b1e", 18x"00914", 18x"0070c", 18x"00506", 18x"00302", 18x"00100",
225 -- 1/sqrt(x) lookup table
226 -- Input is in the range [1, 4), i.e. two bits to the left of the
227 -- binary point. Those 2 bits index the following 3 blocks of 256 values.
229 18x"3fe00", 18x"3fa06", 18x"3f612", 18x"3f224", 18x"3ee3a", 18x"3ea58", 18x"3e67c", 18x"3e2a4",
230 18x"3ded2", 18x"3db06", 18x"3d73e", 18x"3d37e", 18x"3cfc2", 18x"3cc0a", 18x"3c85a", 18x"3c4ae",
231 18x"3c106", 18x"3bd64", 18x"3b9c8", 18x"3b630", 18x"3b29e", 18x"3af10", 18x"3ab86", 18x"3a802",
232 18x"3a484", 18x"3a108", 18x"39d94", 18x"39a22", 18x"396b6", 18x"3934e", 18x"38fea", 18x"38c8c",
233 18x"38932", 18x"385dc", 18x"3828a", 18x"37f3e", 18x"37bf6", 18x"378b2", 18x"37572", 18x"37236",
234 18x"36efe", 18x"36bca", 18x"3689a", 18x"36570", 18x"36248", 18x"35f26", 18x"35c06", 18x"358ea",
235 18x"355d4", 18x"352c0", 18x"34fb0", 18x"34ca4", 18x"3499c", 18x"34698", 18x"34398", 18x"3409c",
236 18x"33da2", 18x"33aac", 18x"337bc", 18x"334cc", 18x"331e2", 18x"32efc", 18x"32c18", 18x"32938",
237 18x"3265a", 18x"32382", 18x"320ac", 18x"31dd8", 18x"31b0a", 18x"3183e", 18x"31576", 18x"312b0",
238 18x"30fee", 18x"30d2e", 18x"30a74", 18x"307ba", 18x"30506", 18x"30254", 18x"2ffa4", 18x"2fcf8",
239 18x"2fa4e", 18x"2f7a8", 18x"2f506", 18x"2f266", 18x"2efca", 18x"2ed2e", 18x"2ea98", 18x"2e804",
240 18x"2e572", 18x"2e2e4", 18x"2e058", 18x"2ddce", 18x"2db48", 18x"2d8c6", 18x"2d646", 18x"2d3c8",
241 18x"2d14c", 18x"2ced4", 18x"2cc5e", 18x"2c9ea", 18x"2c77a", 18x"2c50c", 18x"2c2a2", 18x"2c038",
242 18x"2bdd2", 18x"2bb70", 18x"2b90e", 18x"2b6b0", 18x"2b454", 18x"2b1fa", 18x"2afa4", 18x"2ad4e",
243 18x"2aafc", 18x"2a8ac", 18x"2a660", 18x"2a414", 18x"2a1cc", 18x"29f86", 18x"29d42", 18x"29b00",
244 18x"298c2", 18x"29684", 18x"2944a", 18x"29210", 18x"28fda", 18x"28da6", 18x"28b74", 18x"28946",
245 18x"28718", 18x"284ec", 18x"282c4", 18x"2809c", 18x"27e78", 18x"27c56", 18x"27a34", 18x"27816",
246 18x"275fa", 18x"273e0", 18x"271c8", 18x"26fb0", 18x"26d9c", 18x"26b8a", 18x"2697a", 18x"2676c",
247 18x"26560", 18x"26356", 18x"2614c", 18x"25f46", 18x"25d42", 18x"25b40", 18x"2593e", 18x"25740",
248 18x"25542", 18x"25348", 18x"2514e", 18x"24f58", 18x"24d62", 18x"24b6e", 18x"2497c", 18x"2478c",
249 18x"2459e", 18x"243b0", 18x"241c6", 18x"23fde", 18x"23df6", 18x"23c10", 18x"23a2c", 18x"2384a",
250 18x"2366a", 18x"2348c", 18x"232ae", 18x"230d2", 18x"22efa", 18x"22d20", 18x"22b4a", 18x"22976",
251 18x"227a2", 18x"225d2", 18x"22402", 18x"22234", 18x"22066", 18x"21e9c", 18x"21cd2", 18x"21b0a",
252 18x"21944", 18x"2177e", 18x"215ba", 18x"213fa", 18x"21238", 18x"2107a", 18x"20ebc", 18x"20d00",
253 18x"20b46", 18x"2098e", 18x"207d6", 18x"20620", 18x"2046c", 18x"202b8", 18x"20108", 18x"1ff58",
254 18x"1fda8", 18x"1fbfc", 18x"1fa50", 18x"1f8a4", 18x"1f6fc", 18x"1f554", 18x"1f3ae", 18x"1f208",
255 18x"1f064", 18x"1eec2", 18x"1ed22", 18x"1eb82", 18x"1e9e4", 18x"1e846", 18x"1e6aa", 18x"1e510",
256 18x"1e378", 18x"1e1e0", 18x"1e04a", 18x"1deb4", 18x"1dd20", 18x"1db8e", 18x"1d9fc", 18x"1d86c",
257 18x"1d6de", 18x"1d550", 18x"1d3c4", 18x"1d238", 18x"1d0ae", 18x"1cf26", 18x"1cd9e", 18x"1cc18",
258 18x"1ca94", 18x"1c910", 18x"1c78c", 18x"1c60a", 18x"1c48a", 18x"1c30c", 18x"1c18e", 18x"1c010",
259 18x"1be94", 18x"1bd1a", 18x"1bba0", 18x"1ba28", 18x"1b8b2", 18x"1b73c", 18x"1b5c6", 18x"1b452",
260 18x"1b2e0", 18x"1b16e", 18x"1affe", 18x"1ae8e", 18x"1ad20", 18x"1abb4", 18x"1aa46", 18x"1a8dc",
262 18x"1a772", 18x"1a608", 18x"1a4a0", 18x"1a33a", 18x"1a1d4", 18x"1a070", 18x"19f0c", 18x"19da8",
263 18x"19c48", 18x"19ae6", 18x"19986", 18x"19828", 18x"196ca", 18x"1956e", 18x"19412", 18x"192b8",
264 18x"1915e", 18x"19004", 18x"18eae", 18x"18d56", 18x"18c00", 18x"18aac", 18x"18958", 18x"18804",
265 18x"186b2", 18x"18562", 18x"18412", 18x"182c2", 18x"18174", 18x"18026", 18x"17eda", 18x"17d8e",
266 18x"17c44", 18x"17afa", 18x"179b2", 18x"1786a", 18x"17724", 18x"175de", 18x"17498", 18x"17354",
267 18x"17210", 18x"170ce", 18x"16f8c", 18x"16e4c", 18x"16d0c", 18x"16bcc", 18x"16a8e", 18x"16950",
268 18x"16814", 18x"166d8", 18x"1659e", 18x"16464", 18x"1632a", 18x"161f2", 18x"160ba", 18x"15f84",
269 18x"15e4e", 18x"15d1a", 18x"15be6", 18x"15ab2", 18x"15980", 18x"1584e", 18x"1571c", 18x"155ec",
270 18x"154bc", 18x"1538e", 18x"15260", 18x"15134", 18x"15006", 18x"14edc", 18x"14db0", 18x"14c86",
271 18x"14b5e", 18x"14a36", 18x"1490e", 18x"147e6", 18x"146c0", 18x"1459a", 18x"14476", 18x"14352",
272 18x"14230", 18x"1410c", 18x"13fea", 18x"13eca", 18x"13daa", 18x"13c8a", 18x"13b6c", 18x"13a4e",
273 18x"13930", 18x"13814", 18x"136f8", 18x"135dc", 18x"134c2", 18x"133a8", 18x"1328e", 18x"13176",
274 18x"1305e", 18x"12f48", 18x"12e30", 18x"12d1a", 18x"12c06", 18x"12af2", 18x"129de", 18x"128ca",
275 18x"127b8", 18x"126a6", 18x"12596", 18x"12486", 18x"12376", 18x"12266", 18x"12158", 18x"1204a",
276 18x"11f3e", 18x"11e32", 18x"11d26", 18x"11c1a", 18x"11b10", 18x"11a06", 18x"118fc", 18x"117f4",
277 18x"116ec", 18x"115e4", 18x"114de", 18x"113d8", 18x"112d2", 18x"111ce", 18x"110ca", 18x"10fc6",
278 18x"10ec2", 18x"10dc0", 18x"10cbe", 18x"10bbc", 18x"10abc", 18x"109bc", 18x"108bc", 18x"107be",
279 18x"106c0", 18x"105c2", 18x"104c4", 18x"103c8", 18x"102cc", 18x"101d0", 18x"100d6", 18x"0ffdc",
280 18x"0fee2", 18x"0fdea", 18x"0fcf0", 18x"0fbf8", 18x"0fb02", 18x"0fa0a", 18x"0f914", 18x"0f81e",
281 18x"0f72a", 18x"0f636", 18x"0f542", 18x"0f44e", 18x"0f35a", 18x"0f268", 18x"0f176", 18x"0f086",
282 18x"0ef94", 18x"0eea4", 18x"0edb4", 18x"0ecc6", 18x"0ebd6", 18x"0eae8", 18x"0e9fa", 18x"0e90e",
283 18x"0e822", 18x"0e736", 18x"0e64a", 18x"0e55e", 18x"0e474", 18x"0e38a", 18x"0e2a0", 18x"0e1b8",
284 18x"0e0d0", 18x"0dfe8", 18x"0df00", 18x"0de1a", 18x"0dd32", 18x"0dc4c", 18x"0db68", 18x"0da82",
285 18x"0d99e", 18x"0d8ba", 18x"0d7d6", 18x"0d6f4", 18x"0d612", 18x"0d530", 18x"0d44e", 18x"0d36c",
286 18x"0d28c", 18x"0d1ac", 18x"0d0cc", 18x"0cfee", 18x"0cf0e", 18x"0ce30", 18x"0cd54", 18x"0cc76",
287 18x"0cb9a", 18x"0cabc", 18x"0c9e0", 18x"0c906", 18x"0c82a", 18x"0c750", 18x"0c676", 18x"0c59c",
288 18x"0c4c4", 18x"0c3ea", 18x"0c312", 18x"0c23a", 18x"0c164", 18x"0c08c", 18x"0bfb6", 18x"0bee0",
289 18x"0be0a", 18x"0bd36", 18x"0bc62", 18x"0bb8c", 18x"0baba", 18x"0b9e6", 18x"0b912", 18x"0b840",
290 18x"0b76e", 18x"0b69c", 18x"0b5cc", 18x"0b4fa", 18x"0b42a", 18x"0b35a", 18x"0b28a", 18x"0b1bc",
291 18x"0b0ee", 18x"0b01e", 18x"0af50", 18x"0ae84", 18x"0adb6", 18x"0acea", 18x"0ac1e", 18x"0ab52",
292 18x"0aa86", 18x"0a9bc", 18x"0a8f0", 18x"0a826", 18x"0a75c", 18x"0a694", 18x"0a5ca", 18x"0a502",
293 18x"0a43a", 18x"0a372", 18x"0a2aa", 18x"0a1e4", 18x"0a11c", 18x"0a056", 18x"09f90", 18x"09ecc",
295 18x"09e06", 18x"09d42", 18x"09c7e", 18x"09bba", 18x"09af6", 18x"09a32", 18x"09970", 18x"098ae",
296 18x"097ec", 18x"0972a", 18x"09668", 18x"095a8", 18x"094e8", 18x"09426", 18x"09368", 18x"092a8",
297 18x"091e8", 18x"0912a", 18x"0906c", 18x"08fae", 18x"08ef0", 18x"08e32", 18x"08d76", 18x"08cba",
298 18x"08bfe", 18x"08b42", 18x"08a86", 18x"089ca", 18x"08910", 18x"08856", 18x"0879c", 18x"086e2",
299 18x"08628", 18x"08570", 18x"084b6", 18x"083fe", 18x"08346", 18x"0828e", 18x"081d8", 18x"08120",
300 18x"0806a", 18x"07fb4", 18x"07efe", 18x"07e48", 18x"07d92", 18x"07cde", 18x"07c2a", 18x"07b76",
301 18x"07ac2", 18x"07a0e", 18x"0795a", 18x"078a8", 18x"077f4", 18x"07742", 18x"07690", 18x"075de",
302 18x"0752e", 18x"0747c", 18x"073cc", 18x"0731c", 18x"0726c", 18x"071bc", 18x"0710c", 18x"0705e",
303 18x"06fae", 18x"06f00", 18x"06e52", 18x"06da4", 18x"06cf6", 18x"06c4a", 18x"06b9c", 18x"06af0",
304 18x"06a44", 18x"06998", 18x"068ec", 18x"06840", 18x"06796", 18x"066ea", 18x"06640", 18x"06596",
305 18x"064ec", 18x"06442", 18x"0639a", 18x"062f0", 18x"06248", 18x"061a0", 18x"060f8", 18x"06050",
306 18x"05fa8", 18x"05f00", 18x"05e5a", 18x"05db4", 18x"05d0e", 18x"05c68", 18x"05bc2", 18x"05b1c",
307 18x"05a76", 18x"059d2", 18x"0592e", 18x"05888", 18x"057e4", 18x"05742", 18x"0569e", 18x"055fa",
308 18x"05558", 18x"054b6", 18x"05412", 18x"05370", 18x"052ce", 18x"0522e", 18x"0518c", 18x"050ec",
309 18x"0504a", 18x"04faa", 18x"04f0a", 18x"04e6a", 18x"04dca", 18x"04d2c", 18x"04c8c", 18x"04bee",
310 18x"04b50", 18x"04ab0", 18x"04a12", 18x"04976", 18x"048d8", 18x"0483a", 18x"0479e", 18x"04700",
311 18x"04664", 18x"045c8", 18x"0452c", 18x"04490", 18x"043f6", 18x"0435a", 18x"042c0", 18x"04226",
312 18x"0418a", 18x"040f0", 18x"04056", 18x"03fbe", 18x"03f24", 18x"03e8c", 18x"03df2", 18x"03d5a",
313 18x"03cc2", 18x"03c2a", 18x"03b92", 18x"03afa", 18x"03a62", 18x"039cc", 18x"03934", 18x"0389e",
314 18x"03808", 18x"03772", 18x"036dc", 18x"03646", 18x"035b2", 18x"0351c", 18x"03488", 18x"033f2",
315 18x"0335e", 18x"032ca", 18x"03236", 18x"031a2", 18x"03110", 18x"0307c", 18x"02fea", 18x"02f56",
316 18x"02ec4", 18x"02e32", 18x"02da0", 18x"02d0e", 18x"02c7c", 18x"02bec", 18x"02b5a", 18x"02aca",
317 18x"02a38", 18x"029a8", 18x"02918", 18x"02888", 18x"027f8", 18x"0276a", 18x"026da", 18x"0264a",
318 18x"025bc", 18x"0252e", 18x"024a0", 18x"02410", 18x"02384", 18x"022f6", 18x"02268", 18x"021da",
319 18x"0214e", 18x"020c0", 18x"02034", 18x"01fa8", 18x"01f1c", 18x"01e90", 18x"01e04", 18x"01d78",
320 18x"01cee", 18x"01c62", 18x"01bd8", 18x"01b4c", 18x"01ac2", 18x"01a38", 18x"019ae", 18x"01924",
321 18x"0189c", 18x"01812", 18x"01788", 18x"01700", 18x"01676", 18x"015ee", 18x"01566", 18x"014de",
322 18x"01456", 18x"013ce", 18x"01346", 18x"012c0", 18x"01238", 18x"011b2", 18x"0112c", 18x"010a4",
323 18x"0101e", 18x"00f98", 18x"00f12", 18x"00e8c", 18x"00e08", 18x"00d82", 18x"00cfe", 18x"00c78",
324 18x"00bf4", 18x"00b70", 18x"00aec", 18x"00a68", 18x"009e4", 18x"00960", 18x"008dc", 18x"00858",
325 18x"007d6", 18x"00752", 18x"006d0", 18x"0064e", 18x"005cc", 18x"0054a", 18x"004c8", 18x"00446",
326 18x"003c4", 18x"00342", 18x"002c2", 18x"00240", 18x"001c0", 18x"00140", 18x"000c0", 18x"00040"
329 -- Left and right shifter with 120 bit input and 64 bit output.
330 -- Shifts inp left by shift bits and returns the upper 64 bits of
331 -- the result. The shift parameter is interpreted as a signed
332 -- number in the range -64..63, with negative values indicating
334 function shifter_64(inp: std_ulogic_vector(119 downto 0);
335 shift: std_ulogic_vector(6 downto 0))
336 return std_ulogic_vector is
337 variable s1 : std_ulogic_vector(94 downto 0);
338 variable s2 : std_ulogic_vector(70 downto 0);
339 variable result : std_ulogic_vector(63 downto 0);
341 case shift(6 downto 5) is
343 s1 := inp(119 downto 25);
345 s1 := inp(87 downto 0) & "0000000";
347 s1 := x"0000000000000000" & inp(119 downto 89);
349 s1 := x"00000000" & inp(119 downto 57);
351 case shift(4 downto 3) is
353 s2 := s1(94 downto 24);
355 s2 := s1(86 downto 16);
357 s2 := s1(78 downto 8);
359 s2 := s1(70 downto 0);
361 case shift(2 downto 0) is
363 result := s2(70 downto 7);
365 result := s2(69 downto 6);
367 result := s2(68 downto 5);
369 result := s2(67 downto 4);
371 result := s2(66 downto 3);
373 result := s2(65 downto 2);
375 result := s2(64 downto 1);
377 result := s2(63 downto 0);
382 -- Generate a mask with 0-bits on the left and 1-bits on the right which
383 -- selects the bits will be lost in doing a right shift. The shift
384 -- parameter is the bottom 6 bits of a negative shift count,
385 -- indicating a right shift.
386 function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
387 variable result: std_ulogic_vector(63 downto 0);
389 result := (others => '0');
390 for i in 0 to 63 loop
392 result(63 - i) := '1';
398 -- Split a DP floating-point number into components and work out its class.
399 -- If is_int = 1, the input is considered an integer
400 function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_int: std_ulogic) return fpu_reg_type is
401 variable r : fpu_reg_type;
402 variable exp_nz : std_ulogic;
403 variable exp_ao : std_ulogic;
404 variable frac_nz : std_ulogic;
405 variable cls : std_ulogic_vector(2 downto 0);
407 r.negative := fpr(63);
408 exp_nz := or (fpr(62 downto 52));
409 exp_ao := and (fpr(62 downto 52));
410 frac_nz := or (fpr(51 downto 0));
412 r.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
414 r.exponent := to_signed(-1022, EXP_BITS);
416 r.mantissa := "000000000" & exp_nz & fpr(51 downto 0) & "00";
417 cls := exp_ao & exp_nz & frac_nz;
419 when "000" => r.class := ZERO;
420 when "001" => r.class := FINITE; -- denormalized
421 when "010" => r.class := FINITE;
422 when "011" => r.class := FINITE;
423 when "110" => r.class := INFINITY;
424 when others => r.class := NAN;
428 r.exponent := (others => '0');
429 if (fpr(63) or exp_nz or frac_nz) = '1' then
438 -- Construct a DP floating-point result from components
439 function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
440 mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic)
441 return std_ulogic_vector is
442 variable result : std_ulogic_vector(63 downto 0);
444 result := (others => '0');
449 if mantissa(54) = '1' then
451 result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
453 result(51 downto 29) := mantissa(53 downto 31);
454 if single_prec = '0' then
455 result(28 downto 0) := mantissa(30 downto 2);
458 result(62 downto 52) := "11111111111";
460 result(62 downto 52) := "11111111111";
461 result(51) := quieten_nan or mantissa(53);
462 result(50 downto 29) := mantissa(52 downto 31);
463 if single_prec = '0' then
464 result(28 downto 0) := mantissa(30 downto 2);
470 -- Determine whether to increment when rounding
471 -- Returns rounding_inc & inexact
472 -- Assumes x includes the bottom 29 bits of the mantissa already
473 -- if single_prec = 1 (usually arranged by setting set_x = 1 earlier).
474 function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
475 single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
477 return std_ulogic_vector is
478 variable grx : std_ulogic_vector(2 downto 0);
479 variable ret : std_ulogic_vector(1 downto 0);
480 variable lsb : std_ulogic;
482 if single_prec = '0' then
483 grx := mantissa(1 downto 0) & x;
486 grx := mantissa(30 downto 29) & x;
491 case rn(1 downto 0) is
492 when "00" => -- round to nearest
493 if grx = "100" and rn(2) = '0' then
494 ret(1) := lsb; -- tie, round to even
498 when "01" => -- round towards zero
499 when others => -- round towards +/- inf
501 -- round towards greater magnitude
508 -- Determine result flags to write into the FPSCR
509 function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
510 return std_ulogic_vector is
514 return sign & "0010";
516 return (not unitbit) & sign & (not sign) & "00";
518 return '0' & sign & (not sign) & "01";
525 fpu_multiply_0: entity work.multiply
528 m_in => f_to_multiply,
529 m_out => multiply_to_f
534 if rising_edge(clk) then
540 r.fpscr <= (others => '0');
541 r.writing_back <= '0';
543 assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
549 -- synchronous reads from lookup table
550 lut_access: process(clk)
551 variable addrhi : std_ulogic_vector(1 downto 0);
552 variable addr : std_ulogic_vector(9 downto 0);
554 if rising_edge(clk) then
555 if r.is_sqrt = '1' then
556 addrhi := r.b.mantissa(55 downto 54);
560 addr := addrhi & r.b.mantissa(53 downto 46);
561 inverse_est <= '1' & inverse_table(to_integer(unsigned(addr)));
565 e_out.busy <= r.busy;
566 e_out.exception <= r.fpscr(FPSCR_FEX);
567 e_out.interrupt <= r.do_intr;
569 w_out.valid <= r.instr_done and not r.do_intr;
570 w_out.write_enable <= r.writing_back;
571 w_out.write_reg <= r.dest_fpr;
572 w_out.write_data <= fp_result;
573 w_out.write_cr_enable <= r.instr_done and (r.rc or r.is_cmp);
574 w_out.write_cr_mask <= r.cr_mask;
575 w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
576 r.cr_result & r.cr_result & r.cr_result & r.cr_result;
579 variable v : reg_type;
580 variable adec : fpu_reg_type;
581 variable bdec : fpu_reg_type;
582 variable cdec : fpu_reg_type;
583 variable fpscr_mask : std_ulogic_vector(31 downto 0);
584 variable illegal : std_ulogic;
585 variable j, k : integer;
586 variable flm : std_ulogic_vector(7 downto 0);
587 variable int_input : std_ulogic;
588 variable mask : std_ulogic_vector(63 downto 0);
589 variable in_a0 : std_ulogic_vector(63 downto 0);
590 variable in_b0 : std_ulogic_vector(63 downto 0);
591 variable misc : std_ulogic_vector(63 downto 0);
592 variable shift_res : std_ulogic_vector(63 downto 0);
593 variable round : std_ulogic_vector(1 downto 0);
594 variable update_fx : std_ulogic;
595 variable arith_done : std_ulogic;
596 variable invalid : std_ulogic;
597 variable zero_divide : std_ulogic;
598 variable mant_nz : std_ulogic;
599 variable min_exp : signed(EXP_BITS-1 downto 0);
600 variable max_exp : signed(EXP_BITS-1 downto 0);
601 variable bias_exp : signed(EXP_BITS-1 downto 0);
602 variable new_exp : signed(EXP_BITS-1 downto 0);
603 variable exp_tiny : std_ulogic;
604 variable exp_huge : std_ulogic;
605 variable renormalize : std_ulogic;
606 variable clz : std_ulogic_vector(5 downto 0);
607 variable set_x : std_ulogic;
608 variable mshift : signed(EXP_BITS-1 downto 0);
609 variable need_check : std_ulogic;
610 variable msb : std_ulogic;
611 variable is_add : std_ulogic;
612 variable longmask : std_ulogic;
613 variable set_a : std_ulogic;
614 variable set_b : std_ulogic;
615 variable set_c : std_ulogic;
616 variable set_y : std_ulogic;
617 variable set_s : std_ulogic;
618 variable qnan_result : std_ulogic;
619 variable px_nz : std_ulogic;
620 variable pcmpb_eq : std_ulogic;
621 variable pcmpb_lt : std_ulogic;
622 variable pshift : std_ulogic;
623 variable renorm_sqrt : std_ulogic;
624 variable sqrt_exp : signed(EXP_BITS-1 downto 0);
625 variable shiftin : std_ulogic;
626 variable mulexp : signed(EXP_BITS-1 downto 0);
627 variable maddend : std_ulogic_vector(127 downto 0);
634 -- capture incoming instruction
635 if e_in.valid = '1' then
638 v.fe_mode := or (e_in.fe_mode);
639 v.dest_fpr := e_in.frt;
640 v.single_prec := e_in.single;
643 v.is_cmp := e_in.out_cr;
644 if e_in.out_cr = '0' then
645 v.cr_mask := num_to_fxm(1);
647 v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(e_in.insn))));
650 if e_in.op = OP_FPOP_I then
653 v.quieten_nan := '1';
656 v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
657 v.is_subtract := '0';
658 v.is_multiply := '0';
661 v.doing_ftdiv := "00";
663 adec := decode_dp(e_in.fra, int_input);
664 bdec := decode_dp(e_in.frb, int_input);
665 cdec := decode_dp(e_in.frc, int_input);
671 if adec.exponent > bdec.exponent then
675 if (adec.exponent + cdec.exponent + 1) >= bdec.exponent then
680 r_hi_nz <= or (r.r(55 downto 31));
681 r_lo_nz <= or (r.r(30 downto 2));
684 if r.single_prec = '0' then
685 if r.doing_ftdiv(1) = '0' then
686 max_exp := to_signed(1023, EXP_BITS);
688 max_exp := to_signed(1020, EXP_BITS);
690 if r.doing_ftdiv(0) = '0' then
691 min_exp := to_signed(-1022, EXP_BITS);
693 min_exp := to_signed(-1021, EXP_BITS);
695 bias_exp := to_signed(1536, EXP_BITS);
697 max_exp := to_signed(127, EXP_BITS);
698 min_exp := to_signed(-126, EXP_BITS);
699 bias_exp := to_signed(192, EXP_BITS);
701 new_exp := r.result_exp - r.shift;
704 if new_exp < min_exp then
707 if new_exp > max_exp then
711 -- Compare P with zero and with B
712 px_nz := or (r.p(57 downto 4));
714 if r.p(59 downto 4) = r.b.mantissa(55 downto 0) then
718 if unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(55 downto 0)) then
722 v.writing_back := '0';
724 v.update_fprf := '0';
725 v.shift := to_signed(0, EXP_BITS);
736 fpscr_mask := (others => '1');
744 longmask := r.single_prec;
749 f_to_multiply.is_32bit <= '0';
750 f_to_multiply.valid <= '0';
753 msel_add <= MULADD_ZERO;
761 if e_in.valid = '1' then
762 case e_in.insn(5 downto 1) is
764 if e_in.insn(8) = '1' then
765 if e_in.insn(6) = '0' then
768 v.state := DO_FTSQRT;
770 elsif e_in.insn(7) = '1' then
776 if e_in.insn(10) = '0' then
777 if e_in.insn(8) = '0' then
780 v.state := DO_MTFSFI;
786 if e_in.insn(8) = '0' then
792 if e_in.insn(9 downto 8) /= "11" then
800 if int_input = '1' then
807 v.round_mode := "001";
811 when "10100" | "10101" =>
821 v.is_multiply := '1';
825 v.state := DO_FRSQRTE;
826 when "11100" | "11101" | "11110" | "11111" =>
833 v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
837 j := to_integer(unsigned(insn_bfa(r.insn)));
841 v.cr_result := r.fpscr(k + 3 downto k);
842 fpscr_mask(k + 3 downto k) := "0000";
845 v.fpscr := r.fpscr and (fpscr_mask or x"6007F8FF");
852 v.cr_result := "0000";
853 if r.a.class = INFINITY or r.b.class = ZERO or r.b.class = INFINITY or
854 (r.b.class = FINITE and r.b.mantissa(53) = '0') then
855 v.cr_result(2) := '1';
857 if r.a.class = NAN or r.a.class = INFINITY or
858 r.b.class = NAN or r.b.class = ZERO or r.b.class = INFINITY or
859 (r.a.class = FINITE and r.a.exponent <= to_signed(-970, EXP_BITS)) then
860 v.cr_result(1) := '1';
862 v.doing_ftdiv := "11";
871 v.cr_result := "0000";
872 if r.b.class = ZERO or r.b.class = INFINITY or
873 (r.b.class = FINITE and r.b.mantissa(53) = '0') then
874 v.cr_result(2) := '1';
876 if r.b.class = NAN or r.b.class = INFINITY or r.b.class = ZERO
877 or r.b.negative = '1' or r.b.exponent <= to_signed(-970, EXP_BITS) then
878 v.cr_result(1) := '0';
888 v.result_exp := r.b.exponent;
889 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
890 (r.b.class = NAN and r.b.mantissa(53) = '0') then
892 v.fpscr(FPSCR_VXSNAN) := '1';
893 if r.insn(6) = '1' and r.fpscr(FPSCR_VE) = '0' then
894 v.fpscr(FPSCR_VXVC) := '1';
897 v.cr_result := "0001"; -- unordered
898 elsif r.a.class = NAN or r.b.class = NAN then
899 if r.insn(6) = '1' then
901 v.fpscr(FPSCR_VXVC) := '1';
904 v.cr_result := "0001"; -- unordered
905 elsif r.a.class = ZERO and r.b.class = ZERO then
906 v.cr_result := "0010"; -- equal
907 elsif r.a.negative /= r.b.negative then
908 v.cr_result := r.a.negative & r.b.negative & "00";
909 elsif r.a.class = ZERO then
910 -- A and B are the same sign from here down
911 v.cr_result := not r.b.negative & r.b.negative & "00";
912 elsif r.a.class = INFINITY then
913 if r.b.class = INFINITY then
914 v.cr_result := "0010";
916 v.cr_result := r.a.negative & not r.a.negative & "00";
918 elsif r.b.class = ZERO then
919 -- A is finite from here down
920 v.cr_result := r.a.negative & not r.a.negative & "00";
921 elsif r.b.class = INFINITY then
922 v.cr_result := not r.b.negative & r.b.negative & "00";
923 elsif r.exp_cmp = '1' then
924 -- A and B are both finite from here down
925 v.cr_result := r.a.negative & not r.a.negative & "00";
926 elsif r.a.exponent /= r.b.exponent then
927 -- A exponent is smaller than B
928 v.cr_result := not r.a.negative & r.a.negative & "00";
930 -- Prepare to subtract mantissas, put B in R
931 v.cr_result := "0000";
935 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
939 j := to_integer(unsigned(insn_bt(r.insn)));
940 for i in 0 to 31 loop
942 v.fpscr(31 - i) := r.insn(6);
950 j := to_integer(unsigned(insn_bf(r.insn)));
951 if r.insn(16) = '0' then
955 v.fpscr(k + 3 downto k) := insn_u(r.insn);
965 misc_sel <= "01" & r.insn(8) & '0';
967 v.writing_back := '1';
973 v.writing_back := '1';
975 case r.insn(20 downto 16) is
980 v.fpscr(FPSCR_VE downto FPSCR_XE) := "00000";
981 when "10100" | "10101" =>
982 -- mffscdrn[i] (but we don't implement DRN)
983 fpscr_mask := x"000000FF";
986 fpscr_mask := x"000000FF";
987 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
988 r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
991 fpscr_mask := x"000000FF";
992 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) := r.insn(12 downto 11);
995 fpscr_mask := x"0007F0FF";
1003 if r.insn(25) = '1' then
1005 elsif r.insn(16) = '1' then
1008 flm := r.insn(24 downto 17);
1010 for i in 0 to 7 loop
1012 if flm(i) = '1' then
1013 v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
1016 v.instr_done := '1';
1021 v.result_class := r.b.class;
1022 v.result_exp := r.b.exponent;
1023 v.quieten_nan := '0';
1024 if r.insn(9) = '1' then
1025 v.result_sign := '0'; -- fabs
1026 elsif r.insn(8) = '1' then
1027 v.result_sign := '1'; -- fnabs
1028 elsif r.insn(7) = '1' then
1029 v.result_sign := r.b.negative; -- fmr
1030 elsif r.insn(6) = '1' then
1031 v.result_sign := not r.b.negative; -- fneg
1033 v.result_sign := r.a.negative; -- fcpsgn
1035 v.writing_back := '1';
1036 v.instr_done := '1';
1039 when DO_FRI => -- fri[nzpm]
1041 v.result_class := r.b.class;
1042 v.result_sign := r.b.negative;
1043 v.result_exp := r.b.exponent;
1044 v.fpscr(FPSCR_FR) := '0';
1045 v.fpscr(FPSCR_FI) := '0';
1046 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1048 v.fpscr(FPSCR_VXSNAN) := '1';
1051 if r.b.class = FINITE then
1052 if r.b.exponent >= to_signed(52, EXP_BITS) then
1053 -- integer already, no rounding required
1056 v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1058 v.round_mode := '1' & r.insn(7 downto 6);
1066 v.result_class := r.b.class;
1067 v.result_sign := r.b.negative;
1068 v.result_exp := r.b.exponent;
1069 v.fpscr(FPSCR_FR) := '0';
1070 v.fpscr(FPSCR_FI) := '0';
1071 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1073 v.fpscr(FPSCR_VXSNAN) := '1';
1077 if r.b.class = FINITE then
1078 if r.b.exponent < to_signed(-126, EXP_BITS) then
1079 v.shift := r.b.exponent - to_signed(-126, EXP_BITS);
1080 v.state := ROUND_UFLOW;
1081 elsif r.b.exponent > to_signed(127, EXP_BITS) then
1082 v.state := ROUND_OFLOW;
1084 v.shift := to_signed(-2, EXP_BITS);
1085 v.state := ROUNDING;
1092 -- instr bit 9: 1=dword 0=word
1093 -- instr bit 8: 1=unsigned 0=signed
1094 -- instr bit 1: 1=round to zero 0=use fpscr[RN]
1096 v.result_class := r.b.class;
1097 v.result_sign := r.b.negative;
1098 v.result_exp := r.b.exponent;
1099 v.fpscr(FPSCR_FR) := '0';
1100 v.fpscr(FPSCR_FI) := '0';
1101 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1103 v.fpscr(FPSCR_VXSNAN) := '1';
1107 v.int_result := '1';
1112 if r.b.exponent >= to_signed(64, EXP_BITS) or
1113 (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
1114 v.state := INT_OFLOW;
1115 elsif r.b.exponent >= to_signed(52, EXP_BITS) then
1116 -- integer already, no rounding required,
1117 -- shift into final position
1118 v.shift := r.b.exponent - to_signed(54, EXP_BITS);
1119 if r.insn(8) = '1' and r.b.negative = '1' then
1120 v.state := INT_OFLOW;
1122 v.state := INT_ISHIFT;
1125 v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1126 v.state := INT_SHIFT;
1128 when INFINITY | NAN =>
1129 v.state := INT_OFLOW;
1133 v.result_sign := '0';
1135 if r.insn(8) = '0' and r.b.negative = '1' then
1136 -- fcfid[s] with negative operand, set R = -B
1139 v.result_sign := '1';
1141 v.result_class := r.b.class;
1142 v.result_exp := to_signed(54, EXP_BITS);
1143 v.fpscr(FPSCR_FR) := '0';
1144 v.fpscr(FPSCR_FI) := '0';
1145 if r.b.class = ZERO then
1152 -- fadd[s] and fsub[s]
1154 v.result_sign := r.a.negative;
1155 v.result_class := r.a.class;
1156 v.result_exp := r.a.exponent;
1157 v.fpscr(FPSCR_FR) := '0';
1158 v.fpscr(FPSCR_FI) := '0';
1159 is_add := r.a.negative xor r.b.negative xor r.insn(1);
1160 if r.a.class = FINITE and r.b.class = FINITE then
1161 v.is_subtract := not is_add;
1162 v.add_bsmall := r.exp_cmp;
1163 if r.exp_cmp = '0' then
1164 v.shift := r.a.exponent - r.b.exponent;
1165 v.result_sign := r.b.negative xnor r.insn(1);
1166 if r.a.exponent = r.b.exponent then
1169 v.state := ADD_SHIFT;
1173 v.shift := r.b.exponent - r.a.exponent;
1174 v.result_exp := r.b.exponent;
1175 v.state := ADD_SHIFT;
1178 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
1179 (r.b.class = NAN and r.b.mantissa(53) = '0') then
1181 v.fpscr(FPSCR_VXSNAN) := '1';
1184 if r.a.class = NAN then
1185 -- nothing to do, result is A
1186 elsif r.b.class = NAN then
1187 v.result_class := NAN;
1188 v.result_sign := r.b.negative;
1190 elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then
1191 -- invalid operation, construct QNaN
1192 v.fpscr(FPSCR_VXISI) := '1';
1194 elsif r.a.class = ZERO and r.b.class = ZERO and is_add = '0' then
1195 -- return -0 for rounding to -infinity
1196 v.result_sign := r.round_mode(1) and r.round_mode(0);
1197 elsif r.a.class = INFINITY or r.b.class = ZERO then
1198 -- nothing to do, result is A
1201 v.result_sign := r.b.negative xnor r.insn(1);
1202 v.result_class := r.b.class;
1203 v.result_exp := r.b.exponent;
1212 v.result_sign := r.a.negative;
1213 v.result_class := r.a.class;
1214 v.result_exp := r.a.exponent;
1215 v.fpscr(FPSCR_FR) := '0';
1216 v.fpscr(FPSCR_FI) := '0';
1217 if r.a.class = FINITE and r.c.class = FINITE then
1218 v.result_sign := r.a.negative xor r.c.negative;
1219 v.result_exp := r.a.exponent + r.c.exponent;
1220 -- Renormalize denorm operands
1221 if r.a.mantissa(54) = '0' then
1222 v.state := RENORM_A;
1223 elsif r.c.mantissa(54) = '0' then
1225 v.state := RENORM_C;
1227 f_to_multiply.valid <= '1';
1231 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
1232 (r.c.class = NAN and r.c.mantissa(53) = '0') then
1234 v.fpscr(FPSCR_VXSNAN) := '1';
1237 if r.a.class = NAN then
1239 elsif r.c.class = NAN then
1240 v.result_class := NAN;
1241 v.result_sign := r.c.negative;
1243 elsif (r.a.class = INFINITY and r.c.class = ZERO) or
1244 (r.a.class = ZERO and r.c.class = INFINITY) then
1245 -- invalid operation, construct QNaN
1246 v.fpscr(FPSCR_VXIMZ) := '1';
1248 elsif r.a.class = ZERO or r.a.class = INFINITY then
1250 v.result_sign := r.a.negative xor r.c.negative;
1252 -- r.c.class is ZERO or INFINITY
1253 v.result_class := r.c.class;
1254 v.result_sign := r.a.negative xor r.c.negative;
1261 v.result_sign := r.a.negative;
1262 v.result_class := r.a.class;
1263 v.result_exp := r.a.exponent;
1264 v.fpscr(FPSCR_FR) := '0';
1265 v.fpscr(FPSCR_FI) := '0';
1266 v.result_sign := r.a.negative xor r.b.negative;
1267 v.result_exp := r.a.exponent - r.b.exponent;
1269 if r.a.class = FINITE and r.b.class = FINITE then
1270 -- Renormalize denorm operands
1271 if r.a.mantissa(54) = '0' then
1272 v.state := RENORM_A;
1273 elsif r.b.mantissa(54) = '0' then
1275 v.state := RENORM_B;
1281 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
1282 (r.b.class = NAN and r.b.mantissa(53) = '0') then
1284 v.fpscr(FPSCR_VXSNAN) := '1';
1287 if r.a.class = NAN then
1289 v.result_sign := r.a.negative;
1290 elsif r.b.class = NAN then
1291 v.result_class := NAN;
1292 v.result_sign := r.b.negative;
1294 elsif r.b.class = INFINITY then
1295 if r.a.class = INFINITY then
1296 v.fpscr(FPSCR_VXIDI) := '1';
1299 v.result_class := ZERO;
1301 elsif r.b.class = ZERO then
1302 if r.a.class = ZERO then
1303 v.fpscr(FPSCR_VXZDZ) := '1';
1306 if r.a.class = FINITE then
1309 v.result_class := INFINITY;
1311 -- else r.b.class = FINITE, result_class = r.a.class
1318 v.fpscr(FPSCR_FR) := '0';
1319 v.fpscr(FPSCR_FI) := '0';
1320 if r.a.class = ZERO or (r.a.negative = '0' and r.a.class /= NAN) then
1321 v.result_sign := r.c.negative;
1322 v.result_exp := r.c.exponent;
1323 v.result_class := r.c.class;
1326 v.result_sign := r.b.negative;
1327 v.result_exp := r.b.exponent;
1328 v.result_class := r.b.class;
1331 v.quieten_nan := '0';
1336 v.result_class := r.b.class;
1337 v.result_sign := r.b.negative;
1338 v.fpscr(FPSCR_FR) := '0';
1339 v.fpscr(FPSCR_FI) := '0';
1340 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1341 v.fpscr(FPSCR_VXSNAN) := '1';
1346 v.result_exp := r.b.exponent;
1347 if r.b.negative = '1' then
1348 v.fpscr(FPSCR_VXSQRT) := '1';
1351 elsif r.b.mantissa(54) = '0' then
1352 v.state := RENORM_B;
1353 elsif r.b.exponent(0) = '0' then
1356 v.shift := to_signed(1, EXP_BITS);
1357 v.state := RENORM_B2;
1363 if r.b.negative = '1' then
1364 v.fpscr(FPSCR_VXSQRT) := '1';
1373 v.result_class := r.b.class;
1374 v.result_sign := r.b.negative;
1375 v.fpscr(FPSCR_FR) := '0';
1376 v.fpscr(FPSCR_FI) := '0';
1377 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1378 v.fpscr(FPSCR_VXSNAN) := '1';
1383 v.result_exp := - r.b.exponent;
1384 if r.b.mantissa(54) = '0' then
1385 v.state := RENORM_B;
1393 v.result_class := ZERO;
1396 v.result_class := INFINITY;
1403 v.result_class := r.b.class;
1404 v.result_sign := r.b.negative;
1405 v.fpscr(FPSCR_FR) := '0';
1406 v.fpscr(FPSCR_FI) := '0';
1407 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1408 v.fpscr(FPSCR_VXSNAN) := '1';
1411 v.shift := to_signed(1, EXP_BITS);
1414 v.result_exp := r.b.exponent;
1415 if r.b.negative = '1' then
1416 v.fpscr(FPSCR_VXSQRT) := '1';
1419 elsif r.b.mantissa(54) = '0' then
1420 v.state := RENORM_B;
1421 elsif r.b.exponent(0) = '0' then
1424 v.state := RENORM_B2;
1430 if r.b.negative = '1' then
1431 v.fpscr(FPSCR_VXSQRT) := '1';
1434 v.result_class := ZERO;
1438 v.result_class := INFINITY;
1444 -- fmadd, fmsub, fnmadd, fnmsub
1446 v.result_sign := r.a.negative;
1447 v.result_class := r.a.class;
1448 v.result_exp := r.a.exponent;
1449 v.fpscr(FPSCR_FR) := '0';
1450 v.fpscr(FPSCR_FI) := '0';
1451 is_add := r.a.negative xor r.c.negative xor r.b.negative xor r.insn(1);
1452 if r.a.class = FINITE and r.c.class = FINITE and
1453 (r.b.class = FINITE or r.b.class = ZERO) then
1454 v.is_subtract := not is_add;
1455 mulexp := r.a.exponent + r.c.exponent;
1456 v.result_exp := mulexp;
1458 -- Make sure A and C are normalized
1459 if r.a.mantissa(54) = '0' then
1461 v.state := RENORM_A;
1462 elsif r.c.mantissa(54) = '0' then
1464 v.state := RENORM_C;
1465 elsif r.b.class = ZERO then
1466 -- no addend, degenerates to multiply
1467 v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1468 f_to_multiply.valid <= '1';
1469 v.is_multiply := '1';
1471 elsif r.madd_cmp = '0' then
1472 -- addend is bigger, do multiply first
1473 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1474 f_to_multiply.valid <= '1';
1477 -- product is bigger, shift B right and use it as the
1478 -- addend to the multiplier
1479 v.shift := r.b.exponent - mulexp + to_signed(64, EXP_BITS);
1480 -- for subtract, multiplier does B - A * C
1481 v.result_sign := not (r.a.negative xor r.c.negative xor r.insn(2) xor is_add);
1482 v.result_exp := r.b.exponent;
1486 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
1487 (r.b.class = NAN and r.b.mantissa(53) = '0') or
1488 (r.c.class = NAN and r.c.mantissa(53) = '0') then
1490 v.fpscr(FPSCR_VXSNAN) := '1';
1493 if r.a.class = NAN then
1494 -- nothing to do, result is A
1495 elsif r.b.class = NAN then
1497 v.result_class := NAN;
1498 v.result_sign := r.b.negative;
1500 elsif r.c.class = NAN then
1502 v.result_class := NAN;
1503 v.result_sign := r.c.negative;
1505 elsif (r.a.class = ZERO and r.c.class = INFINITY) or
1506 (r.a.class = INFINITY and r.c.class = ZERO) then
1507 -- invalid operation, construct QNaN
1508 v.fpscr(FPSCR_VXIMZ) := '1';
1510 elsif r.a.class = INFINITY or r.c.class = INFINITY then
1511 if r.b.class = INFINITY and is_add = '0' then
1512 -- invalid operation, construct QNaN
1513 v.fpscr(FPSCR_VXISI) := '1';
1516 -- result is infinity
1517 v.result_class := INFINITY;
1518 v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1521 -- Here A is zero, C is zero, or B is infinity
1522 -- Result is +/-B in all of those cases
1523 v.result_class := r.b.class;
1524 v.result_exp := r.b.exponent;
1525 if v.result_class /= ZERO or is_add = '1' then
1526 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1528 -- have to be careful about rule for 0 - 0 result sign
1529 v.result_sign := (r.round_mode(1) and r.round_mode(0)) xor r.insn(2);
1538 v.state := RENORM_A2;
1542 v.result_exp := new_exp;
1543 if r.insn(4) = '1' then
1545 if r.c.mantissa(54) = '1' then
1546 if r.insn(3) = '0' or r.b.class = ZERO then
1551 if new_exp + 1 >= r.b.exponent then
1554 v.state := DO_FMADD;
1557 v.state := RENORM_C;
1561 if r.b.mantissa(54) = '1' then
1565 v.state := RENORM_B;
1571 renorm_sqrt := r.is_sqrt;
1572 v.state := RENORM_B2;
1576 if r.is_sqrt = '0' then
1577 v.result_exp := r.result_exp + r.shift;
1579 v.result_exp := new_exp;
1585 v.state := RENORM_C2;
1589 v.result_exp := new_exp;
1590 if r.insn(3) = '0' or r.b.class = ZERO then
1595 if new_exp + 1 >= r.b.exponent then
1598 v.state := DO_FMADD;
1602 opsel_r <= RES_SHIFT;
1609 if r.add_bsmall = '1' then
1615 opsel_binv <= r.is_subtract;
1616 carry_in <= r.is_subtract and not r.x;
1617 v.shift := to_signed(-1, EXP_BITS);
1621 -- check for overflow or negative result (can't get both)
1622 if r.r(63) = '1' then
1623 -- result is opposite sign to expected
1624 v.result_sign := not r.result_sign;
1628 elsif r.r(55) = '1' then
1629 -- sum overflowed, shift right
1630 opsel_r <= RES_SHIFT;
1632 v.shift := to_signed(-2, EXP_BITS);
1633 if exp_huge = '1' then
1634 v.state := ROUND_OFLOW;
1636 v.state := ROUNDING;
1638 elsif r.r(54) = '1' then
1640 v.shift := to_signed(-2, EXP_BITS);
1641 v.state := ROUNDING;
1642 elsif (r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1643 -- r.x must be zero at this point
1644 v.result_class := ZERO;
1645 if r.is_subtract = '1' then
1646 -- set result sign depending on rounding mode
1647 v.result_sign := r.round_mode(1) and r.round_mode(0);
1652 v.state := NORMALIZE;
1663 if r.r(63) = '1' then
1664 -- A is smaller in magnitude
1665 v.cr_result := not r.a.negative & r.a.negative & "00";
1666 elsif (r_hi_nz or r_lo_nz) = '0' then
1667 v.cr_result := "0010";
1669 v.cr_result := r.a.negative & not r.a.negative & "00";
1671 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
1672 v.instr_done := '1';
1676 f_to_multiply.valid <= r.first;
1677 opsel_r <= RES_MULT;
1678 if multiply_to_f.valid = '1' then
1683 -- Addend is bigger here
1684 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1685 -- note v.shift is at most -2 here
1686 v.shift := r.result_exp - r.b.exponent;
1687 opsel_r <= RES_MULT;
1690 f_to_multiply.valid <= r.first;
1691 if multiply_to_f.valid = '1' then
1692 v.state := ADD_SHIFT;
1696 -- Product is potentially bigger here
1699 v.shift := r.shift - to_signed(64, EXP_BITS);
1703 opsel_r <= RES_SHIFT;
1708 msel_add <= MULADD_RS;
1709 f_to_multiply.valid <= r.first;
1710 msel_inv <= r.is_subtract;
1711 opsel_r <= RES_MULT;
1714 v.shift := to_signed(56, EXP_BITS);
1715 if multiply_to_f.valid = '1' then
1716 if multiply_to_f.result(121) = '1' then
1725 v.result_sign := not r.result_sign;
1727 carry_in <= not (s_nz or r.x);
1730 v.shift := to_signed(56, EXP_BITS);
1734 if (r.r(56) or r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1736 -- must be a subtraction, and r.x must be zero
1737 v.result_class := ZERO;
1738 v.result_sign := r.round_mode(1) and r.round_mode(0);
1741 -- R is all zeroes but there are non-zero bits in S
1742 -- so shift them into R and set S to 0
1743 opsel_r <= RES_SHIFT;
1745 -- stay in state FMADD_6
1747 elsif r.r(56 downto 54) = "001" then
1751 v.state := NORMALIZE;
1756 -- wait one cycle for inverse_table[B] lookup
1758 if r.insn(4) = '0' then
1759 if r.insn(3) = '0' then
1764 elsif r.insn(2) = '0' then
1771 -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y
1773 msel_add <= MULADD_CONST;
1782 f_to_multiply.valid <= r.first;
1783 if multiply_to_f.valid = '1' then
1785 v.count := r.count + 1;
1790 -- compute Y = P = P * Y
1793 f_to_multiply.valid <= r.first;
1795 if multiply_to_f.valid = '1' then
1805 -- compute R = P = A * Y (quotient)
1809 f_to_multiply.valid <= r.first;
1811 if multiply_to_f.valid = '1' then
1812 opsel_r <= RES_MULT;
1818 -- compute P = A - B * R (remainder)
1821 msel_add <= MULADD_A;
1823 f_to_multiply.valid <= r.first;
1824 if multiply_to_f.valid = '1' then
1829 -- test if remainder is 0 or >= B
1830 if pcmpb_lt = '1' then
1831 -- quotient is correct, set X if remainder non-zero
1832 v.x := r.p(58) or px_nz;
1834 -- quotient needs to be incremented by 1
1836 v.x := not pcmpb_eq;
1841 opsel_r <= RES_MISC;
1843 v.shift := to_signed(1, EXP_BITS);
1844 v.state := NORMALIZE;
1847 v.cr_result(1) := exp_tiny or exp_huge;
1848 if exp_tiny = '1' or exp_huge = '1' or r.a.class = ZERO or r.first = '0' then
1849 v.instr_done := '1';
1852 v.shift := r.a.exponent;
1853 v.doing_ftdiv := "10";
1857 opsel_r <= RES_MISC;
1859 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1860 v.result_exp := - sqrt_exp;
1861 v.shift := to_signed(1, EXP_BITS);
1862 v.state := NORMALIZE;
1865 -- put invsqr[B] in R and compute P = invsqr[B] * B
1866 -- also transfer B (in R) to A
1868 opsel_r <= RES_MISC;
1872 f_to_multiply.valid <= '1';
1873 v.shift := to_signed(-1, EXP_BITS);
1878 -- shift R right one place
1879 -- not expecting multiplier result yet
1880 opsel_r <= RES_SHIFT;
1885 -- put R into Y, wait for product from multiplier
1889 if multiply_to_f.valid = '1' then
1890 -- put result into R
1891 opsel_r <= RES_MULT;
1897 -- compute 1.5 - Y * P
1900 msel_add <= MULADD_CONST;
1902 f_to_multiply.valid <= r.first;
1904 if multiply_to_f.valid = '1' then
1909 -- compute Y = Y * P
1912 f_to_multiply.valid <= '1';
1917 -- pipeline in R = R * P
1920 f_to_multiply.valid <= r.first;
1922 if multiply_to_f.valid = '1' then
1928 -- first multiply is done, put result in Y
1931 -- wait for second multiply (should be here already)
1933 if multiply_to_f.valid = '1' then
1934 -- put result into R
1935 opsel_r <= RES_MULT;
1937 v.count := r.count + 1;
1947 -- compute P = A - R * R, which can be +ve or -ve
1948 -- we arranged for B to be put into A earlier
1951 msel_add <= MULADD_A;
1954 f_to_multiply.valid <= r.first;
1955 if multiply_to_f.valid = '1' then
1961 -- compute P = P * Y
1962 -- since Y is an estimate of 1/sqrt(B), this makes P an
1963 -- estimate of the adjustment needed to R. Since the error
1964 -- could be negative and we have an unsigned multiplier, the
1965 -- upper bits can be wrong, but it turns out the lowest 8 bits
1966 -- are correct and are all we need (given 3 iterations through
1967 -- SQRT_4 to SQRT_7).
1971 f_to_multiply.valid <= r.first;
1972 if multiply_to_f.valid = '1' then
1977 -- Add the bottom 8 bits of P, sign-extended,
1978 -- divided by 4, onto R.
1979 -- The division by 4 is because R is 10.54 format
1980 -- whereas P is 8.56 format.
1982 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1983 v.result_exp := sqrt_exp;
1984 v.shift := to_signed(1, EXP_BITS);
1989 -- compute P = A - R * R (remainder)
1990 -- also put 2 * R + 1 into B for comparison with P
1993 msel_add <= MULADD_A;
1995 f_to_multiply.valid <= r.first;
1998 if multiply_to_f.valid = '1' then
2003 -- test if remainder is 0 or >= B = 2*R + 1
2004 if pcmpb_lt = '1' then
2005 -- square root is correct, set X if remainder non-zero
2006 v.x := r.p(58) or px_nz;
2008 -- square root needs to be incremented by 1
2010 v.x := not pcmpb_eq;
2015 opsel_r <= RES_SHIFT;
2017 v.state := INT_ROUND;
2018 v.shift := to_signed(-2, EXP_BITS);
2021 opsel_r <= RES_SHIFT;
2022 round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
2023 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2024 -- Check for negative values that don't round to 0 for fcti*u*
2025 if r.insn(8) = '1' and r.result_sign = '1' and
2026 (r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then
2027 v.state := INT_OFLOW;
2029 v.state := INT_FINAL;
2033 opsel_r <= RES_SHIFT;
2034 v.state := INT_FINAL;
2037 -- Negate if necessary, and increment for rounding if needed
2038 opsel_ainv <= r.result_sign;
2039 carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign;
2040 -- Check for possible overflows
2041 case r.insn(9 downto 8) is
2042 when "00" => -- fctiw[z]
2043 need_check := r.r(31) or (r.r(30) and not r.result_sign);
2044 when "01" => -- fctiwu[z]
2045 need_check := r.r(31);
2046 when "10" => -- fctid[z]
2047 need_check := r.r(63) or (r.r(62) and not r.result_sign);
2048 when others => -- fctidu[z]
2049 need_check := r.r(63);
2051 if need_check = '1' then
2052 v.state := INT_CHECK;
2054 if r.fpscr(FPSCR_FI) = '1' then
2055 v.fpscr(FPSCR_XX) := '1';
2061 if r.insn(9) = '0' then
2066 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2067 if (r.insn(8) = '0' and msb /= r.result_sign) or
2068 (r.insn(8) = '1' and msb /= '1') then
2069 opsel_r <= RES_MISC;
2070 v.fpscr(FPSCR_VXCVI) := '1';
2073 if r.fpscr(FPSCR_FI) = '1' then
2074 v.fpscr(FPSCR_XX) := '1';
2080 opsel_r <= RES_MISC;
2081 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2082 if r.b.class = NAN then
2085 v.fpscr(FPSCR_VXCVI) := '1';
2090 opsel_r <= RES_SHIFT;
2092 v.shift := to_signed(-2, EXP_BITS);
2093 v.state := ROUNDING;
2096 if r.is_multiply = '1' and px_nz = '1' then
2099 if r.r(63 downto 54) /= "0000000001" then
2101 v.state := NORMALIZE;
2104 if exp_tiny = '1' then
2105 v.shift := new_exp - min_exp;
2106 v.state := ROUND_UFLOW;
2107 elsif exp_huge = '1' then
2108 v.state := ROUND_OFLOW;
2110 v.shift := to_signed(-2, EXP_BITS);
2111 v.state := ROUNDING;
2116 -- Shift so we have 9 leading zeroes (we know R is non-zero)
2117 opsel_r <= RES_SHIFT;
2119 if exp_tiny = '1' then
2120 v.shift := new_exp - min_exp;
2121 v.state := ROUND_UFLOW;
2122 elsif exp_huge = '1' then
2123 v.state := ROUND_OFLOW;
2125 v.shift := to_signed(-2, EXP_BITS);
2126 v.state := ROUNDING;
2131 if r.fpscr(FPSCR_UE) = '0' then
2132 -- disabled underflow exception case
2133 -- have to denormalize before rounding
2134 opsel_r <= RES_SHIFT;
2136 v.shift := to_signed(-2, EXP_BITS);
2137 v.state := ROUNDING;
2139 -- enabled underflow exception case
2140 -- if denormalized, have to normalize before rounding
2141 v.fpscr(FPSCR_UX) := '1';
2142 v.result_exp := r.result_exp + bias_exp;
2143 if r.r(54) = '0' then
2145 v.state := NORMALIZE;
2147 v.shift := to_signed(-2, EXP_BITS);
2148 v.state := ROUNDING;
2153 v.fpscr(FPSCR_OX) := '1';
2154 if r.fpscr(FPSCR_OE) = '0' then
2155 -- disabled overflow exception
2156 -- result depends on rounding mode
2157 v.fpscr(FPSCR_XX) := '1';
2158 v.fpscr(FPSCR_FI) := '1';
2159 if r.round_mode(1 downto 0) = "00" or
2160 (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then
2161 v.result_class := INFINITY;
2162 v.fpscr(FPSCR_FR) := '1';
2164 v.fpscr(FPSCR_FR) := '0';
2166 -- construct largest representable number
2167 v.result_exp := max_exp;
2168 opsel_r <= RES_MISC;
2169 misc_sel <= "001" & r.single_prec;
2172 -- enabled overflow exception
2173 v.result_exp := r.result_exp - bias_exp;
2174 v.shift := to_signed(-2, EXP_BITS);
2175 v.state := ROUNDING;
2180 round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
2181 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2182 if round(1) = '1' then
2183 -- set mask to increment the LSB for the precision
2184 opsel_b <= BIN_MASK;
2186 v.shift := to_signed(-1, EXP_BITS);
2187 v.state := ROUNDING_2;
2189 if r.r(54) = '0' then
2190 -- result after masking could be zero, or could be a
2191 -- denormalized result that needs to be renormalized
2193 v.state := ROUNDING_3;
2198 if round(0) = '1' then
2199 v.fpscr(FPSCR_XX) := '1';
2200 if r.tiny = '1' then
2201 v.fpscr(FPSCR_UX) := '1';
2206 -- Check for overflow during rounding
2208 if r.r(55) = '1' then
2209 opsel_r <= RES_SHIFT;
2210 if exp_huge = '1' then
2211 v.state := ROUND_OFLOW;
2215 elsif r.r(54) = '0' then
2216 -- Do CLZ so we can renormalize the result
2218 v.state := ROUNDING_3;
2224 mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
2225 if mant_nz = '0' then
2226 v.result_class := ZERO;
2227 if r.is_subtract = '1' then
2228 -- set result sign depending on rounding mode
2229 v.result_sign := r.round_mode(1) and r.round_mode(0);
2233 -- Renormalize result after rounding
2234 opsel_r <= RES_SHIFT;
2235 v.denorm := exp_tiny;
2236 v.shift := new_exp - to_signed(-1022, EXP_BITS);
2237 if new_exp < to_signed(-1022, EXP_BITS) then
2245 opsel_r <= RES_SHIFT;
2250 if zero_divide = '1' then
2251 v.fpscr(FPSCR_ZX) := '1';
2253 if qnan_result = '1' then
2255 v.result_class := NAN;
2256 v.result_sign := '0';
2258 opsel_r <= RES_MISC;
2260 if arith_done = '1' then
2261 -- Enabled invalid exception doesn't write result or FPRF
2262 -- Neither does enabled zero-divide exception
2263 if (invalid and r.fpscr(FPSCR_VE)) = '0' and
2264 (zero_divide and r.fpscr(FPSCR_ZE)) = '0' then
2265 v.writing_back := '1';
2266 v.update_fprf := '1';
2268 v.instr_done := '1';
2273 -- Multiplier and divide/square root data path
2276 f_to_multiply.data1 <= r.a.mantissa(61 downto 0) & "00";
2278 f_to_multiply.data1 <= r.b.mantissa(61 downto 0) & "00";
2280 f_to_multiply.data1 <= r.y;
2282 f_to_multiply.data1 <= r.r(61 downto 0) & "00";
2286 f_to_multiply.data2 <= r.c.mantissa(61 downto 0) & "00";
2288 f_to_multiply.data2 <= x"00" & inverse_est & '0' & x"000000000";
2290 f_to_multiply.data2 <= r.p;
2292 f_to_multiply.data2 <= r.r(61 downto 0) & "00";
2294 maddend := (others => '0');
2296 when MULADD_CONST =>
2297 -- addend is 2.0 or 1.5 in 16.112 format
2298 if r.is_sqrt = '0' then
2299 maddend(113) := '1'; -- 2.0
2301 maddend(112 downto 111) := "11"; -- 1.5
2304 -- addend is A in 16.112 format
2305 maddend(121 downto 58) := r.a.mantissa;
2307 -- addend is concatenation of R and S in 16.112 format
2308 maddend := "000000" & r.r & r.s & "00";
2311 if msel_inv = '1' then
2312 f_to_multiply.addend <= not maddend;
2314 f_to_multiply.addend <= maddend;
2316 f_to_multiply.not_result <= msel_inv;
2318 v.y := f_to_multiply.data2;
2320 if multiply_to_f.valid = '1' then
2321 if pshift = '0' then
2322 v.p := multiply_to_f.result(63 downto 0);
2324 v.p := multiply_to_f.result(119 downto 56);
2329 -- This has A and B input multiplexers, an adder, a shifter,
2330 -- count-leading-zeroes logic, and a result mux.
2331 if longmask = '1' then
2332 mshift := r.shift + to_signed(-29, EXP_BITS);
2336 if mshift < to_signed(-64, EXP_BITS) then
2337 mask := (others => '1');
2338 elsif mshift >= to_signed(0, EXP_BITS) then
2339 mask := (others => '0');
2341 mask := right_mask(unsigned(mshift(5 downto 0)));
2347 in_a0 := r.a.mantissa;
2349 in_a0 := r.b.mantissa;
2351 in_a0 := r.c.mantissa;
2353 if (or (mask and in_a0)) = '1' and set_x = '1' then
2356 if opsel_ainv = '1' then
2359 if opsel_amask = '1' then
2360 in_a0 := in_a0 and not mask;
2365 in_b0 := (others => '0');
2371 -- BIN_PS6, 6 LSBs of P/4 sign-extended to 64
2372 in_b0 := std_ulogic_vector(resize(signed(r.p(7 downto 2)), 64));
2374 if opsel_binv = '1' then
2378 if r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
2379 shift_res := shifter_64(r.r & (shiftin or r.s(55)) & r.s(54 downto 0),
2380 std_ulogic_vector(r.shift(6 downto 0)));
2382 shift_res := (others => '0');
2386 result <= std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
2388 result <= shift_res;
2390 result <= multiply_to_f.result(121 downto 58);
2394 misc := x"00000000" & (r.fpscr and fpscr_mask);
2396 -- generated QNaN mantissa
2397 misc := x"0020000000000000";
2399 -- mantissa of max representable DP number
2400 misc := x"007ffffffffffffc";
2402 -- mantissa of max representable SP number
2403 misc := x"007fffff80000000";
2406 misc := r.a.mantissa(31 downto 0) & r.b.mantissa(31 downto 0);
2409 misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32);
2411 misc := 10x"000" & inverse_est & 35x"000000000";
2413 -- max positive result for fctiw[z]
2414 misc := x"000000007fffffff";
2416 -- max negative result for fctiw[z]
2417 misc := x"ffffffff80000000";
2419 -- max positive result for fctiwu[z]
2420 misc := x"00000000ffffffff";
2422 -- max negative result for fctiwu[z]
2423 misc := x"0000000000000000";
2425 -- max positive result for fctid[z]
2426 misc := x"7fffffffffffffff";
2428 -- max negative result for fctid[z]
2429 misc := x"8000000000000000";
2431 -- max positive result for fctidu[z]
2432 misc := x"ffffffffffffffff";
2434 -- max negative result for fctidu[z]
2435 misc := x"0000000000000000";
2437 misc := x"0000000000000000";
2445 v.s := std_ulogic_vector(unsigned(not r.s) + (not r.x));
2447 v.s := multiply_to_f.result(57 downto 2);
2449 v.s := shift_res(63 downto 8);
2450 if shift_res(7 downto 0) /= x"00" then
2454 v.s := (others => '0');
2459 v.a.exponent := new_exp;
2460 v.a.mantissa := shift_res;
2463 v.b.exponent := new_exp;
2464 v.b.mantissa := shift_res;
2467 v.c.exponent := new_exp;
2468 v.c.mantissa := shift_res;
2471 if opsel_r = RES_SHIFT then
2472 v.result_exp := new_exp;
2475 if renormalize = '1' then
2476 clz := count_left_zeroes(r.r);
2477 if renorm_sqrt = '1' then
2478 -- make denormalized value end up with even exponent
2481 v.shift := resize(signed('0' & clz) - 9, EXP_BITS);
2484 if r.int_result = '1' then
2487 fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r,
2488 r.single_prec, r.quieten_nan);
2490 if r.update_fprf = '1' then
2491 v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class,
2492 r.r(54) and not r.denorm);
2495 v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
2496 (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
2497 v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
2498 v.fpscr(FPSCR_VE downto FPSCR_XE));
2499 if update_fx = '1' and
2500 (v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then
2501 v.fpscr(FPSCR_FX) := '1';
2504 v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
2507 if illegal = '1' then
2508 v.instr_done := '0';
2510 v.writing_back := '0';
2514 v.do_intr := v.instr_done and v.fpscr(FPSCR_FEX) and r.fe_mode;
2515 if v.state /= IDLE or v.do_intr = '1' then
2521 e_out.illegal <= illegal;
2524 end architecture behaviour;