FPU: Decide on A input selection a cycle earlier
[microwatt.git] / fpu.vhdl
1 -- Floating-point unit for Microwatt
2
3 library ieee;
4 use ieee.std_logic_1164.all;
5 use ieee.numeric_std.all;
6
7 library work;
8 use work.insn_helpers.all;
9 use work.decode_types.all;
10 use work.crhelpers.all;
11 use work.helpers.all;
12 use work.common.all;
13
14 entity fpu is
15 port (
16 clk : in std_ulogic;
17 rst : in std_ulogic;
18
19 e_in : in Execute1toFPUType;
20 e_out : out FPUToExecute1Type;
21
22 w_out : out FPUToWritebackType
23 );
24 end entity fpu;
25
26 architecture behaviour of fpu is
27 type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
28
29 constant EXP_BITS : natural := 13;
30
31 type fpu_reg_type is record
32 class : fp_number_class;
33 negative : std_ulogic;
34 exponent : signed(EXP_BITS-1 downto 0); -- unbiased
35 mantissa : std_ulogic_vector(63 downto 0); -- 10.54 format
36 end record;
37
38 type state_t is (IDLE,
39 DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
40 DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT,
41 DO_FCFID, DO_FCTI,
42 DO_FRSP, DO_FRI,
43 DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD,
44 DO_FRE, DO_FRSQRTE,
45 DO_FSEL,
46 FRI_1,
47 ADD_1, ADD_SHIFT, ADD_2, ADD_3,
48 CMP_1, CMP_2,
49 MULT_1,
50 FMADD_1, FMADD_2, FMADD_3,
51 FMADD_4, FMADD_5, FMADD_6,
52 LOOKUP,
53 DIV_2, DIV_3, DIV_4, DIV_5, DIV_6,
54 FRE_1,
55 RSQRT_1,
56 FTDIV_1,
57 SQRT_1, SQRT_2, SQRT_3, SQRT_4,
58 SQRT_5, SQRT_6, SQRT_7, SQRT_8,
59 SQRT_9, SQRT_10, SQRT_11, SQRT_12,
60 INT_SHIFT, INT_ROUND, INT_ISHIFT,
61 INT_FINAL, INT_CHECK, INT_OFLOW,
62 FINISH, NORMALIZE,
63 ROUND_UFLOW, ROUND_OFLOW,
64 ROUNDING, ROUNDING_2, ROUNDING_3,
65 DENORM,
66 RENORM_A, RENORM_A2,
67 RENORM_B, RENORM_B2,
68 RENORM_C, RENORM_C2,
69 NAN_RESULT, EXC_RESULT);
70
71 type reg_type is record
72 state : state_t;
73 busy : std_ulogic;
74 instr_done : std_ulogic;
75 do_intr : std_ulogic;
76 op : insn_type_t;
77 insn : std_ulogic_vector(31 downto 0);
78 dest_fpr : gspr_index_t;
79 fe_mode : std_ulogic;
80 rc : std_ulogic;
81 is_cmp : std_ulogic;
82 single_prec : std_ulogic;
83 fpscr : std_ulogic_vector(31 downto 0);
84 a : fpu_reg_type;
85 b : fpu_reg_type;
86 c : fpu_reg_type;
87 r : std_ulogic_vector(63 downto 0); -- 10.54 format
88 s : std_ulogic_vector(55 downto 0); -- extended fraction
89 x : std_ulogic;
90 p : std_ulogic_vector(63 downto 0); -- 8.56 format
91 y : std_ulogic_vector(63 downto 0); -- 8.56 format
92 result_sign : std_ulogic;
93 result_class : fp_number_class;
94 result_exp : signed(EXP_BITS-1 downto 0);
95 shift : signed(EXP_BITS-1 downto 0);
96 writing_back : std_ulogic;
97 int_result : std_ulogic;
98 cr_result : std_ulogic_vector(3 downto 0);
99 cr_mask : std_ulogic_vector(7 downto 0);
100 old_exc : std_ulogic_vector(4 downto 0);
101 update_fprf : std_ulogic;
102 quieten_nan : std_ulogic;
103 tiny : std_ulogic;
104 denorm : std_ulogic;
105 round_mode : std_ulogic_vector(2 downto 0);
106 is_subtract : std_ulogic;
107 exp_cmp : std_ulogic;
108 madd_cmp : std_ulogic;
109 add_bsmall : std_ulogic;
110 is_multiply : std_ulogic;
111 is_sqrt : std_ulogic;
112 first : std_ulogic;
113 count : unsigned(1 downto 0);
114 doing_ftdiv : std_ulogic_vector(1 downto 0);
115 opsel_a : std_ulogic_vector(1 downto 0);
116 use_a : std_ulogic;
117 use_b : std_ulogic;
118 use_c : std_ulogic;
119 invalid : std_ulogic;
120 negate : std_ulogic;
121 end record;
122
123 type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
124
125 signal r, rin : reg_type;
126
127 signal fp_result : std_ulogic_vector(63 downto 0);
128 signal opsel_b : std_ulogic_vector(1 downto 0);
129 signal opsel_r : std_ulogic_vector(1 downto 0);
130 signal opsel_s : std_ulogic_vector(1 downto 0);
131 signal opsel_ainv : std_ulogic;
132 signal opsel_amask : std_ulogic;
133 signal opsel_binv : std_ulogic;
134 signal in_a : std_ulogic_vector(63 downto 0);
135 signal in_b : std_ulogic_vector(63 downto 0);
136 signal result : std_ulogic_vector(63 downto 0);
137 signal carry_in : std_ulogic;
138 signal lost_bits : std_ulogic;
139 signal r_hi_nz : std_ulogic;
140 signal r_lo_nz : std_ulogic;
141 signal s_nz : std_ulogic;
142 signal misc_sel : std_ulogic_vector(3 downto 0);
143 signal f_to_multiply : MultiplyInputType;
144 signal multiply_to_f : MultiplyOutputType;
145 signal msel_1 : std_ulogic_vector(1 downto 0);
146 signal msel_2 : std_ulogic_vector(1 downto 0);
147 signal msel_add : std_ulogic_vector(1 downto 0);
148 signal msel_inv : std_ulogic;
149 signal inverse_est : std_ulogic_vector(18 downto 0);
150
151 -- opsel values
152 constant AIN_R : std_ulogic_vector(1 downto 0) := "00";
153 constant AIN_A : std_ulogic_vector(1 downto 0) := "01";
154 constant AIN_B : std_ulogic_vector(1 downto 0) := "10";
155 constant AIN_C : std_ulogic_vector(1 downto 0) := "11";
156
157 constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
158 constant BIN_R : std_ulogic_vector(1 downto 0) := "01";
159 constant BIN_MASK : std_ulogic_vector(1 downto 0) := "10";
160 constant BIN_PS6 : std_ulogic_vector(1 downto 0) := "11";
161
162 constant RES_SUM : std_ulogic_vector(1 downto 0) := "00";
163 constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
164 constant RES_MULT : std_ulogic_vector(1 downto 0) := "10";
165 constant RES_MISC : std_ulogic_vector(1 downto 0) := "11";
166
167 constant S_ZERO : std_ulogic_vector(1 downto 0) := "00";
168 constant S_NEG : std_ulogic_vector(1 downto 0) := "01";
169 constant S_SHIFT : std_ulogic_vector(1 downto 0) := "10";
170 constant S_MULT : std_ulogic_vector(1 downto 0) := "11";
171
172 -- msel values
173 constant MUL1_A : std_ulogic_vector(1 downto 0) := "00";
174 constant MUL1_B : std_ulogic_vector(1 downto 0) := "01";
175 constant MUL1_Y : std_ulogic_vector(1 downto 0) := "10";
176 constant MUL1_R : std_ulogic_vector(1 downto 0) := "11";
177
178 constant MUL2_C : std_ulogic_vector(1 downto 0) := "00";
179 constant MUL2_LUT : std_ulogic_vector(1 downto 0) := "01";
180 constant MUL2_P : std_ulogic_vector(1 downto 0) := "10";
181 constant MUL2_R : std_ulogic_vector(1 downto 0) := "11";
182
183 constant MULADD_ZERO : std_ulogic_vector(1 downto 0) := "00";
184 constant MULADD_CONST : std_ulogic_vector(1 downto 0) := "01";
185 constant MULADD_A : std_ulogic_vector(1 downto 0) := "10";
186 constant MULADD_RS : std_ulogic_vector(1 downto 0) := "11";
187
188 -- Inverse lookup table, indexed by the top 8 fraction bits
189 -- The first 256 entries are the reciprocal (1/x) lookup table,
190 -- and the remaining 768 entries are the reciprocal square root table.
191 -- Output range is [0.5, 1) in 0.19 format, though the top
192 -- bit isn't stored since it is always 1.
193 -- Each output value is the inverse of the center of the input
194 -- range for the value, i.e. entry 0 is 1 / (1 + 1/512),
195 -- entry 1 is 1 / (1 + 3/512), etc.
196 signal inverse_table : lookup_table := (
197 -- 1/x lookup table
198 -- Unit bit is assumed to be 1, so input range is [1, 2)
199 18x"3fc01", 18x"3f411", 18x"3ec31", 18x"3e460", 18x"3dc9f", 18x"3d4ec", 18x"3cd49", 18x"3c5b5",
200 18x"3be2f", 18x"3b6b8", 18x"3af4f", 18x"3a7f4", 18x"3a0a7", 18x"39968", 18x"39237", 18x"38b14",
201 18x"383fe", 18x"37cf5", 18x"375f9", 18x"36f0a", 18x"36828", 18x"36153", 18x"35a8a", 18x"353ce",
202 18x"34d1e", 18x"3467a", 18x"33fe3", 18x"33957", 18x"332d7", 18x"32c62", 18x"325f9", 18x"31f9c",
203 18x"3194a", 18x"31303", 18x"30cc7", 18x"30696", 18x"30070", 18x"2fa54", 18x"2f443", 18x"2ee3d",
204 18x"2e841", 18x"2e250", 18x"2dc68", 18x"2d68b", 18x"2d0b8", 18x"2caee", 18x"2c52e", 18x"2bf79",
205 18x"2b9cc", 18x"2b429", 18x"2ae90", 18x"2a900", 18x"2a379", 18x"29dfb", 18x"29887", 18x"2931b",
206 18x"28db8", 18x"2885e", 18x"2830d", 18x"27dc4", 18x"27884", 18x"2734d", 18x"26e1d", 18x"268f6",
207 18x"263d8", 18x"25ec1", 18x"259b3", 18x"254ac", 18x"24fad", 18x"24ab7", 18x"245c8", 18x"240e1",
208 18x"23c01", 18x"23729", 18x"23259", 18x"22d90", 18x"228ce", 18x"22413", 18x"21f60", 18x"21ab4",
209 18x"2160f", 18x"21172", 18x"20cdb", 18x"2084b", 18x"203c2", 18x"1ff40", 18x"1fac4", 18x"1f64f",
210 18x"1f1e1", 18x"1ed79", 18x"1e918", 18x"1e4be", 18x"1e069", 18x"1dc1b", 18x"1d7d4", 18x"1d392",
211 18x"1cf57", 18x"1cb22", 18x"1c6f3", 18x"1c2ca", 18x"1bea7", 18x"1ba8a", 18x"1b672", 18x"1b261",
212 18x"1ae55", 18x"1aa50", 18x"1a64f", 18x"1a255", 18x"19e60", 18x"19a70", 18x"19686", 18x"192a2",
213 18x"18ec3", 18x"18ae9", 18x"18715", 18x"18345", 18x"17f7c", 18x"17bb7", 18x"177f7", 18x"1743d",
214 18x"17087", 18x"16cd7", 18x"1692c", 18x"16585", 18x"161e4", 18x"15e47", 18x"15ab0", 18x"1571d",
215 18x"1538e", 18x"15005", 18x"14c80", 18x"14900", 18x"14584", 18x"1420d", 18x"13e9b", 18x"13b2d",
216 18x"137c3", 18x"1345e", 18x"130fe", 18x"12da2", 18x"12a4a", 18x"126f6", 18x"123a7", 18x"1205c",
217 18x"11d15", 18x"119d2", 18x"11694", 18x"11359", 18x"11023", 18x"10cf1", 18x"109c2", 18x"10698",
218 18x"10372", 18x"10050", 18x"0fd31", 18x"0fa17", 18x"0f700", 18x"0f3ed", 18x"0f0de", 18x"0edd3",
219 18x"0eacb", 18x"0e7c7", 18x"0e4c7", 18x"0e1ca", 18x"0ded2", 18x"0dbdc", 18x"0d8eb", 18x"0d5fc",
220 18x"0d312", 18x"0d02b", 18x"0cd47", 18x"0ca67", 18x"0c78a", 18x"0c4b1", 18x"0c1db", 18x"0bf09",
221 18x"0bc3a", 18x"0b96e", 18x"0b6a5", 18x"0b3e0", 18x"0b11e", 18x"0ae5f", 18x"0aba3", 18x"0a8eb",
222 18x"0a636", 18x"0a383", 18x"0a0d4", 18x"09e28", 18x"09b80", 18x"098da", 18x"09637", 18x"09397",
223 18x"090fb", 18x"08e61", 18x"08bca", 18x"08936", 18x"086a5", 18x"08417", 18x"0818c", 18x"07f04",
224 18x"07c7e", 18x"079fc", 18x"0777c", 18x"074ff", 18x"07284", 18x"0700d", 18x"06d98", 18x"06b26",
225 18x"068b6", 18x"0664a", 18x"063e0", 18x"06178", 18x"05f13", 18x"05cb1", 18x"05a52", 18x"057f5",
226 18x"0559a", 18x"05342", 18x"050ed", 18x"04e9a", 18x"04c4a", 18x"049fc", 18x"047b0", 18x"04567",
227 18x"04321", 18x"040dd", 18x"03e9b", 18x"03c5c", 18x"03a1f", 18x"037e4", 18x"035ac", 18x"03376",
228 18x"03142", 18x"02f11", 18x"02ce2", 18x"02ab5", 18x"0288b", 18x"02663", 18x"0243d", 18x"02219",
229 18x"01ff7", 18x"01dd8", 18x"01bbb", 18x"019a0", 18x"01787", 18x"01570", 18x"0135b", 18x"01149",
230 18x"00f39", 18x"00d2a", 18x"00b1e", 18x"00914", 18x"0070c", 18x"00506", 18x"00302", 18x"00100",
231 -- 1/sqrt(x) lookup table
232 -- Input is in the range [1, 4), i.e. two bits to the left of the
233 -- binary point. Those 2 bits index the following 3 blocks of 256 values.
234 -- 1.0 ... 1.9999
235 18x"3fe00", 18x"3fa06", 18x"3f612", 18x"3f224", 18x"3ee3a", 18x"3ea58", 18x"3e67c", 18x"3e2a4",
236 18x"3ded2", 18x"3db06", 18x"3d73e", 18x"3d37e", 18x"3cfc2", 18x"3cc0a", 18x"3c85a", 18x"3c4ae",
237 18x"3c106", 18x"3bd64", 18x"3b9c8", 18x"3b630", 18x"3b29e", 18x"3af10", 18x"3ab86", 18x"3a802",
238 18x"3a484", 18x"3a108", 18x"39d94", 18x"39a22", 18x"396b6", 18x"3934e", 18x"38fea", 18x"38c8c",
239 18x"38932", 18x"385dc", 18x"3828a", 18x"37f3e", 18x"37bf6", 18x"378b2", 18x"37572", 18x"37236",
240 18x"36efe", 18x"36bca", 18x"3689a", 18x"36570", 18x"36248", 18x"35f26", 18x"35c06", 18x"358ea",
241 18x"355d4", 18x"352c0", 18x"34fb0", 18x"34ca4", 18x"3499c", 18x"34698", 18x"34398", 18x"3409c",
242 18x"33da2", 18x"33aac", 18x"337bc", 18x"334cc", 18x"331e2", 18x"32efc", 18x"32c18", 18x"32938",
243 18x"3265a", 18x"32382", 18x"320ac", 18x"31dd8", 18x"31b0a", 18x"3183e", 18x"31576", 18x"312b0",
244 18x"30fee", 18x"30d2e", 18x"30a74", 18x"307ba", 18x"30506", 18x"30254", 18x"2ffa4", 18x"2fcf8",
245 18x"2fa4e", 18x"2f7a8", 18x"2f506", 18x"2f266", 18x"2efca", 18x"2ed2e", 18x"2ea98", 18x"2e804",
246 18x"2e572", 18x"2e2e4", 18x"2e058", 18x"2ddce", 18x"2db48", 18x"2d8c6", 18x"2d646", 18x"2d3c8",
247 18x"2d14c", 18x"2ced4", 18x"2cc5e", 18x"2c9ea", 18x"2c77a", 18x"2c50c", 18x"2c2a2", 18x"2c038",
248 18x"2bdd2", 18x"2bb70", 18x"2b90e", 18x"2b6b0", 18x"2b454", 18x"2b1fa", 18x"2afa4", 18x"2ad4e",
249 18x"2aafc", 18x"2a8ac", 18x"2a660", 18x"2a414", 18x"2a1cc", 18x"29f86", 18x"29d42", 18x"29b00",
250 18x"298c2", 18x"29684", 18x"2944a", 18x"29210", 18x"28fda", 18x"28da6", 18x"28b74", 18x"28946",
251 18x"28718", 18x"284ec", 18x"282c4", 18x"2809c", 18x"27e78", 18x"27c56", 18x"27a34", 18x"27816",
252 18x"275fa", 18x"273e0", 18x"271c8", 18x"26fb0", 18x"26d9c", 18x"26b8a", 18x"2697a", 18x"2676c",
253 18x"26560", 18x"26356", 18x"2614c", 18x"25f46", 18x"25d42", 18x"25b40", 18x"2593e", 18x"25740",
254 18x"25542", 18x"25348", 18x"2514e", 18x"24f58", 18x"24d62", 18x"24b6e", 18x"2497c", 18x"2478c",
255 18x"2459e", 18x"243b0", 18x"241c6", 18x"23fde", 18x"23df6", 18x"23c10", 18x"23a2c", 18x"2384a",
256 18x"2366a", 18x"2348c", 18x"232ae", 18x"230d2", 18x"22efa", 18x"22d20", 18x"22b4a", 18x"22976",
257 18x"227a2", 18x"225d2", 18x"22402", 18x"22234", 18x"22066", 18x"21e9c", 18x"21cd2", 18x"21b0a",
258 18x"21944", 18x"2177e", 18x"215ba", 18x"213fa", 18x"21238", 18x"2107a", 18x"20ebc", 18x"20d00",
259 18x"20b46", 18x"2098e", 18x"207d6", 18x"20620", 18x"2046c", 18x"202b8", 18x"20108", 18x"1ff58",
260 18x"1fda8", 18x"1fbfc", 18x"1fa50", 18x"1f8a4", 18x"1f6fc", 18x"1f554", 18x"1f3ae", 18x"1f208",
261 18x"1f064", 18x"1eec2", 18x"1ed22", 18x"1eb82", 18x"1e9e4", 18x"1e846", 18x"1e6aa", 18x"1e510",
262 18x"1e378", 18x"1e1e0", 18x"1e04a", 18x"1deb4", 18x"1dd20", 18x"1db8e", 18x"1d9fc", 18x"1d86c",
263 18x"1d6de", 18x"1d550", 18x"1d3c4", 18x"1d238", 18x"1d0ae", 18x"1cf26", 18x"1cd9e", 18x"1cc18",
264 18x"1ca94", 18x"1c910", 18x"1c78c", 18x"1c60a", 18x"1c48a", 18x"1c30c", 18x"1c18e", 18x"1c010",
265 18x"1be94", 18x"1bd1a", 18x"1bba0", 18x"1ba28", 18x"1b8b2", 18x"1b73c", 18x"1b5c6", 18x"1b452",
266 18x"1b2e0", 18x"1b16e", 18x"1affe", 18x"1ae8e", 18x"1ad20", 18x"1abb4", 18x"1aa46", 18x"1a8dc",
267 -- 2.0 ... 2.9999
268 18x"1a772", 18x"1a608", 18x"1a4a0", 18x"1a33a", 18x"1a1d4", 18x"1a070", 18x"19f0c", 18x"19da8",
269 18x"19c48", 18x"19ae6", 18x"19986", 18x"19828", 18x"196ca", 18x"1956e", 18x"19412", 18x"192b8",
270 18x"1915e", 18x"19004", 18x"18eae", 18x"18d56", 18x"18c00", 18x"18aac", 18x"18958", 18x"18804",
271 18x"186b2", 18x"18562", 18x"18412", 18x"182c2", 18x"18174", 18x"18026", 18x"17eda", 18x"17d8e",
272 18x"17c44", 18x"17afa", 18x"179b2", 18x"1786a", 18x"17724", 18x"175de", 18x"17498", 18x"17354",
273 18x"17210", 18x"170ce", 18x"16f8c", 18x"16e4c", 18x"16d0c", 18x"16bcc", 18x"16a8e", 18x"16950",
274 18x"16814", 18x"166d8", 18x"1659e", 18x"16464", 18x"1632a", 18x"161f2", 18x"160ba", 18x"15f84",
275 18x"15e4e", 18x"15d1a", 18x"15be6", 18x"15ab2", 18x"15980", 18x"1584e", 18x"1571c", 18x"155ec",
276 18x"154bc", 18x"1538e", 18x"15260", 18x"15134", 18x"15006", 18x"14edc", 18x"14db0", 18x"14c86",
277 18x"14b5e", 18x"14a36", 18x"1490e", 18x"147e6", 18x"146c0", 18x"1459a", 18x"14476", 18x"14352",
278 18x"14230", 18x"1410c", 18x"13fea", 18x"13eca", 18x"13daa", 18x"13c8a", 18x"13b6c", 18x"13a4e",
279 18x"13930", 18x"13814", 18x"136f8", 18x"135dc", 18x"134c2", 18x"133a8", 18x"1328e", 18x"13176",
280 18x"1305e", 18x"12f48", 18x"12e30", 18x"12d1a", 18x"12c06", 18x"12af2", 18x"129de", 18x"128ca",
281 18x"127b8", 18x"126a6", 18x"12596", 18x"12486", 18x"12376", 18x"12266", 18x"12158", 18x"1204a",
282 18x"11f3e", 18x"11e32", 18x"11d26", 18x"11c1a", 18x"11b10", 18x"11a06", 18x"118fc", 18x"117f4",
283 18x"116ec", 18x"115e4", 18x"114de", 18x"113d8", 18x"112d2", 18x"111ce", 18x"110ca", 18x"10fc6",
284 18x"10ec2", 18x"10dc0", 18x"10cbe", 18x"10bbc", 18x"10abc", 18x"109bc", 18x"108bc", 18x"107be",
285 18x"106c0", 18x"105c2", 18x"104c4", 18x"103c8", 18x"102cc", 18x"101d0", 18x"100d6", 18x"0ffdc",
286 18x"0fee2", 18x"0fdea", 18x"0fcf0", 18x"0fbf8", 18x"0fb02", 18x"0fa0a", 18x"0f914", 18x"0f81e",
287 18x"0f72a", 18x"0f636", 18x"0f542", 18x"0f44e", 18x"0f35a", 18x"0f268", 18x"0f176", 18x"0f086",
288 18x"0ef94", 18x"0eea4", 18x"0edb4", 18x"0ecc6", 18x"0ebd6", 18x"0eae8", 18x"0e9fa", 18x"0e90e",
289 18x"0e822", 18x"0e736", 18x"0e64a", 18x"0e55e", 18x"0e474", 18x"0e38a", 18x"0e2a0", 18x"0e1b8",
290 18x"0e0d0", 18x"0dfe8", 18x"0df00", 18x"0de1a", 18x"0dd32", 18x"0dc4c", 18x"0db68", 18x"0da82",
291 18x"0d99e", 18x"0d8ba", 18x"0d7d6", 18x"0d6f4", 18x"0d612", 18x"0d530", 18x"0d44e", 18x"0d36c",
292 18x"0d28c", 18x"0d1ac", 18x"0d0cc", 18x"0cfee", 18x"0cf0e", 18x"0ce30", 18x"0cd54", 18x"0cc76",
293 18x"0cb9a", 18x"0cabc", 18x"0c9e0", 18x"0c906", 18x"0c82a", 18x"0c750", 18x"0c676", 18x"0c59c",
294 18x"0c4c4", 18x"0c3ea", 18x"0c312", 18x"0c23a", 18x"0c164", 18x"0c08c", 18x"0bfb6", 18x"0bee0",
295 18x"0be0a", 18x"0bd36", 18x"0bc62", 18x"0bb8c", 18x"0baba", 18x"0b9e6", 18x"0b912", 18x"0b840",
296 18x"0b76e", 18x"0b69c", 18x"0b5cc", 18x"0b4fa", 18x"0b42a", 18x"0b35a", 18x"0b28a", 18x"0b1bc",
297 18x"0b0ee", 18x"0b01e", 18x"0af50", 18x"0ae84", 18x"0adb6", 18x"0acea", 18x"0ac1e", 18x"0ab52",
298 18x"0aa86", 18x"0a9bc", 18x"0a8f0", 18x"0a826", 18x"0a75c", 18x"0a694", 18x"0a5ca", 18x"0a502",
299 18x"0a43a", 18x"0a372", 18x"0a2aa", 18x"0a1e4", 18x"0a11c", 18x"0a056", 18x"09f90", 18x"09ecc",
300 -- 3.0 ... 3.9999
301 18x"09e06", 18x"09d42", 18x"09c7e", 18x"09bba", 18x"09af6", 18x"09a32", 18x"09970", 18x"098ae",
302 18x"097ec", 18x"0972a", 18x"09668", 18x"095a8", 18x"094e8", 18x"09426", 18x"09368", 18x"092a8",
303 18x"091e8", 18x"0912a", 18x"0906c", 18x"08fae", 18x"08ef0", 18x"08e32", 18x"08d76", 18x"08cba",
304 18x"08bfe", 18x"08b42", 18x"08a86", 18x"089ca", 18x"08910", 18x"08856", 18x"0879c", 18x"086e2",
305 18x"08628", 18x"08570", 18x"084b6", 18x"083fe", 18x"08346", 18x"0828e", 18x"081d8", 18x"08120",
306 18x"0806a", 18x"07fb4", 18x"07efe", 18x"07e48", 18x"07d92", 18x"07cde", 18x"07c2a", 18x"07b76",
307 18x"07ac2", 18x"07a0e", 18x"0795a", 18x"078a8", 18x"077f4", 18x"07742", 18x"07690", 18x"075de",
308 18x"0752e", 18x"0747c", 18x"073cc", 18x"0731c", 18x"0726c", 18x"071bc", 18x"0710c", 18x"0705e",
309 18x"06fae", 18x"06f00", 18x"06e52", 18x"06da4", 18x"06cf6", 18x"06c4a", 18x"06b9c", 18x"06af0",
310 18x"06a44", 18x"06998", 18x"068ec", 18x"06840", 18x"06796", 18x"066ea", 18x"06640", 18x"06596",
311 18x"064ec", 18x"06442", 18x"0639a", 18x"062f0", 18x"06248", 18x"061a0", 18x"060f8", 18x"06050",
312 18x"05fa8", 18x"05f00", 18x"05e5a", 18x"05db4", 18x"05d0e", 18x"05c68", 18x"05bc2", 18x"05b1c",
313 18x"05a76", 18x"059d2", 18x"0592e", 18x"05888", 18x"057e4", 18x"05742", 18x"0569e", 18x"055fa",
314 18x"05558", 18x"054b6", 18x"05412", 18x"05370", 18x"052ce", 18x"0522e", 18x"0518c", 18x"050ec",
315 18x"0504a", 18x"04faa", 18x"04f0a", 18x"04e6a", 18x"04dca", 18x"04d2c", 18x"04c8c", 18x"04bee",
316 18x"04b50", 18x"04ab0", 18x"04a12", 18x"04976", 18x"048d8", 18x"0483a", 18x"0479e", 18x"04700",
317 18x"04664", 18x"045c8", 18x"0452c", 18x"04490", 18x"043f6", 18x"0435a", 18x"042c0", 18x"04226",
318 18x"0418a", 18x"040f0", 18x"04056", 18x"03fbe", 18x"03f24", 18x"03e8c", 18x"03df2", 18x"03d5a",
319 18x"03cc2", 18x"03c2a", 18x"03b92", 18x"03afa", 18x"03a62", 18x"039cc", 18x"03934", 18x"0389e",
320 18x"03808", 18x"03772", 18x"036dc", 18x"03646", 18x"035b2", 18x"0351c", 18x"03488", 18x"033f2",
321 18x"0335e", 18x"032ca", 18x"03236", 18x"031a2", 18x"03110", 18x"0307c", 18x"02fea", 18x"02f56",
322 18x"02ec4", 18x"02e32", 18x"02da0", 18x"02d0e", 18x"02c7c", 18x"02bec", 18x"02b5a", 18x"02aca",
323 18x"02a38", 18x"029a8", 18x"02918", 18x"02888", 18x"027f8", 18x"0276a", 18x"026da", 18x"0264a",
324 18x"025bc", 18x"0252e", 18x"024a0", 18x"02410", 18x"02384", 18x"022f6", 18x"02268", 18x"021da",
325 18x"0214e", 18x"020c0", 18x"02034", 18x"01fa8", 18x"01f1c", 18x"01e90", 18x"01e04", 18x"01d78",
326 18x"01cee", 18x"01c62", 18x"01bd8", 18x"01b4c", 18x"01ac2", 18x"01a38", 18x"019ae", 18x"01924",
327 18x"0189c", 18x"01812", 18x"01788", 18x"01700", 18x"01676", 18x"015ee", 18x"01566", 18x"014de",
328 18x"01456", 18x"013ce", 18x"01346", 18x"012c0", 18x"01238", 18x"011b2", 18x"0112c", 18x"010a4",
329 18x"0101e", 18x"00f98", 18x"00f12", 18x"00e8c", 18x"00e08", 18x"00d82", 18x"00cfe", 18x"00c78",
330 18x"00bf4", 18x"00b70", 18x"00aec", 18x"00a68", 18x"009e4", 18x"00960", 18x"008dc", 18x"00858",
331 18x"007d6", 18x"00752", 18x"006d0", 18x"0064e", 18x"005cc", 18x"0054a", 18x"004c8", 18x"00446",
332 18x"003c4", 18x"00342", 18x"002c2", 18x"00240", 18x"001c0", 18x"00140", 18x"000c0", 18x"00040"
333 );
334
335 -- Left and right shifter with 120 bit input and 64 bit output.
336 -- Shifts inp left by shift bits and returns the upper 64 bits of
337 -- the result. The shift parameter is interpreted as a signed
338 -- number in the range -64..63, with negative values indicating
339 -- right shifts.
340 function shifter_64(inp: std_ulogic_vector(119 downto 0);
341 shift: std_ulogic_vector(6 downto 0))
342 return std_ulogic_vector is
343 variable s1 : std_ulogic_vector(94 downto 0);
344 variable s2 : std_ulogic_vector(70 downto 0);
345 variable result : std_ulogic_vector(63 downto 0);
346 begin
347 case shift(6 downto 5) is
348 when "00" =>
349 s1 := inp(119 downto 25);
350 when "01" =>
351 s1 := inp(87 downto 0) & "0000000";
352 when "10" =>
353 s1 := x"0000000000000000" & inp(119 downto 89);
354 when others =>
355 s1 := x"00000000" & inp(119 downto 57);
356 end case;
357 case shift(4 downto 3) is
358 when "00" =>
359 s2 := s1(94 downto 24);
360 when "01" =>
361 s2 := s1(86 downto 16);
362 when "10" =>
363 s2 := s1(78 downto 8);
364 when others =>
365 s2 := s1(70 downto 0);
366 end case;
367 case shift(2 downto 0) is
368 when "000" =>
369 result := s2(70 downto 7);
370 when "001" =>
371 result := s2(69 downto 6);
372 when "010" =>
373 result := s2(68 downto 5);
374 when "011" =>
375 result := s2(67 downto 4);
376 when "100" =>
377 result := s2(66 downto 3);
378 when "101" =>
379 result := s2(65 downto 2);
380 when "110" =>
381 result := s2(64 downto 1);
382 when others =>
383 result := s2(63 downto 0);
384 end case;
385 return result;
386 end;
387
388 -- Generate a mask with 0-bits on the left and 1-bits on the right which
389 -- selects the bits will be lost in doing a right shift. The shift
390 -- parameter is the bottom 6 bits of a negative shift count,
391 -- indicating a right shift.
392 function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
393 variable result: std_ulogic_vector(63 downto 0);
394 begin
395 result := (others => '0');
396 for i in 0 to 63 loop
397 if i >= shift then
398 result(63 - i) := '1';
399 end if;
400 end loop;
401 return result;
402 end;
403
404 -- Split a DP floating-point number into components and work out its class.
405 -- If is_int = 1, the input is considered an integer
406 function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_int: std_ulogic) return fpu_reg_type is
407 variable r : fpu_reg_type;
408 variable exp_nz : std_ulogic;
409 variable exp_ao : std_ulogic;
410 variable frac_nz : std_ulogic;
411 variable cls : std_ulogic_vector(2 downto 0);
412 begin
413 r.negative := fpr(63);
414 exp_nz := or (fpr(62 downto 52));
415 exp_ao := and (fpr(62 downto 52));
416 frac_nz := or (fpr(51 downto 0));
417 if is_int = '0' then
418 r.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
419 if exp_nz = '0' then
420 r.exponent := to_signed(-1022, EXP_BITS);
421 end if;
422 r.mantissa := "000000000" & exp_nz & fpr(51 downto 0) & "00";
423 cls := exp_ao & exp_nz & frac_nz;
424 case cls is
425 when "000" => r.class := ZERO;
426 when "001" => r.class := FINITE; -- denormalized
427 when "010" => r.class := FINITE;
428 when "011" => r.class := FINITE;
429 when "110" => r.class := INFINITY;
430 when others => r.class := NAN;
431 end case;
432 else
433 r.mantissa := fpr;
434 r.exponent := (others => '0');
435 if (fpr(63) or exp_nz or frac_nz) = '1' then
436 r.class := FINITE;
437 else
438 r.class := ZERO;
439 end if;
440 end if;
441 return r;
442 end;
443
444 -- Construct a DP floating-point result from components
445 function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
446 mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic)
447 return std_ulogic_vector is
448 variable result : std_ulogic_vector(63 downto 0);
449 begin
450 result := (others => '0');
451 result(63) := sign;
452 case class is
453 when ZERO =>
454 when FINITE =>
455 if mantissa(54) = '1' then
456 -- normalized number
457 result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
458 end if;
459 result(51 downto 29) := mantissa(53 downto 31);
460 if single_prec = '0' then
461 result(28 downto 0) := mantissa(30 downto 2);
462 end if;
463 when INFINITY =>
464 result(62 downto 52) := "11111111111";
465 when NAN =>
466 result(62 downto 52) := "11111111111";
467 result(51) := quieten_nan or mantissa(53);
468 result(50 downto 29) := mantissa(52 downto 31);
469 if single_prec = '0' then
470 result(28 downto 0) := mantissa(30 downto 2);
471 end if;
472 end case;
473 return result;
474 end;
475
476 -- Determine whether to increment when rounding
477 -- Returns rounding_inc & inexact
478 -- Assumes x includes the bottom 29 bits of the mantissa already
479 -- if single_prec = 1 (usually arranged by setting set_x = 1 earlier).
480 function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
481 single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
482 sign: std_ulogic)
483 return std_ulogic_vector is
484 variable grx : std_ulogic_vector(2 downto 0);
485 variable ret : std_ulogic_vector(1 downto 0);
486 variable lsb : std_ulogic;
487 begin
488 if single_prec = '0' then
489 grx := mantissa(1 downto 0) & x;
490 lsb := mantissa(2);
491 else
492 grx := mantissa(30 downto 29) & x;
493 lsb := mantissa(31);
494 end if;
495 ret(1) := '0';
496 ret(0) := or (grx);
497 case rn(1 downto 0) is
498 when "00" => -- round to nearest
499 if grx = "100" and rn(2) = '0' then
500 ret(1) := lsb; -- tie, round to even
501 else
502 ret(1) := grx(2);
503 end if;
504 when "01" => -- round towards zero
505 when others => -- round towards +/- inf
506 if rn(0) = sign then
507 -- round towards greater magnitude
508 ret(1) := ret(0);
509 end if;
510 end case;
511 return ret;
512 end;
513
514 -- Determine result flags to write into the FPSCR
515 function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
516 return std_ulogic_vector is
517 begin
518 case class is
519 when ZERO =>
520 return sign & "0010";
521 when FINITE =>
522 return (not unitbit) & sign & (not sign) & "00";
523 when INFINITY =>
524 return '0' & sign & (not sign) & "01";
525 when NAN =>
526 return "10001";
527 end case;
528 end;
529
530 begin
531 fpu_multiply_0: entity work.multiply
532 port map (
533 clk => clk,
534 m_in => f_to_multiply,
535 m_out => multiply_to_f
536 );
537
538 fpu_0: process(clk)
539 begin
540 if rising_edge(clk) then
541 if rst = '1' then
542 r.state <= IDLE;
543 r.busy <= '0';
544 r.instr_done <= '0';
545 r.do_intr <= '0';
546 r.fpscr <= (others => '0');
547 r.writing_back <= '0';
548 else
549 assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
550 r <= rin;
551 end if;
552 end if;
553 end process;
554
555 -- synchronous reads from lookup table
556 lut_access: process(clk)
557 variable addrhi : std_ulogic_vector(1 downto 0);
558 variable addr : std_ulogic_vector(9 downto 0);
559 begin
560 if rising_edge(clk) then
561 if r.is_sqrt = '1' then
562 addrhi := r.b.mantissa(55 downto 54);
563 else
564 addrhi := "00";
565 end if;
566 addr := addrhi & r.b.mantissa(53 downto 46);
567 inverse_est <= '1' & inverse_table(to_integer(unsigned(addr)));
568 end if;
569 end process;
570
571 e_out.busy <= r.busy;
572 e_out.exception <= r.fpscr(FPSCR_FEX);
573 e_out.interrupt <= r.do_intr;
574
575 w_out.valid <= r.instr_done and not r.do_intr;
576 w_out.write_enable <= r.writing_back;
577 w_out.write_reg <= r.dest_fpr;
578 w_out.write_data <= fp_result;
579 w_out.write_cr_enable <= r.instr_done and (r.rc or r.is_cmp);
580 w_out.write_cr_mask <= r.cr_mask;
581 w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
582 r.cr_result & r.cr_result & r.cr_result & r.cr_result;
583
584 fpu_1: process(all)
585 variable v : reg_type;
586 variable adec : fpu_reg_type;
587 variable bdec : fpu_reg_type;
588 variable cdec : fpu_reg_type;
589 variable fpscr_mask : std_ulogic_vector(31 downto 0);
590 variable illegal : std_ulogic;
591 variable j, k : integer;
592 variable flm : std_ulogic_vector(7 downto 0);
593 variable int_input : std_ulogic;
594 variable mask : std_ulogic_vector(63 downto 0);
595 variable in_a0 : std_ulogic_vector(63 downto 0);
596 variable in_b0 : std_ulogic_vector(63 downto 0);
597 variable misc : std_ulogic_vector(63 downto 0);
598 variable shift_res : std_ulogic_vector(63 downto 0);
599 variable round : std_ulogic_vector(1 downto 0);
600 variable update_fx : std_ulogic;
601 variable arith_done : std_ulogic;
602 variable invalid : std_ulogic;
603 variable zero_divide : std_ulogic;
604 variable mant_nz : std_ulogic;
605 variable min_exp : signed(EXP_BITS-1 downto 0);
606 variable max_exp : signed(EXP_BITS-1 downto 0);
607 variable bias_exp : signed(EXP_BITS-1 downto 0);
608 variable new_exp : signed(EXP_BITS-1 downto 0);
609 variable exp_tiny : std_ulogic;
610 variable exp_huge : std_ulogic;
611 variable renormalize : std_ulogic;
612 variable clz : std_ulogic_vector(5 downto 0);
613 variable set_x : std_ulogic;
614 variable mshift : signed(EXP_BITS-1 downto 0);
615 variable need_check : std_ulogic;
616 variable msb : std_ulogic;
617 variable is_add : std_ulogic;
618 variable longmask : std_ulogic;
619 variable set_a : std_ulogic;
620 variable set_b : std_ulogic;
621 variable set_c : std_ulogic;
622 variable set_y : std_ulogic;
623 variable set_s : std_ulogic;
624 variable qnan_result : std_ulogic;
625 variable px_nz : std_ulogic;
626 variable pcmpb_eq : std_ulogic;
627 variable pcmpb_lt : std_ulogic;
628 variable pshift : std_ulogic;
629 variable renorm_sqrt : std_ulogic;
630 variable sqrt_exp : signed(EXP_BITS-1 downto 0);
631 variable shiftin : std_ulogic;
632 variable mulexp : signed(EXP_BITS-1 downto 0);
633 variable maddend : std_ulogic_vector(127 downto 0);
634 begin
635 v := r;
636 illegal := '0';
637 v.busy := '0';
638 int_input := '0';
639
640 -- capture incoming instruction
641 if e_in.valid = '1' then
642 v.insn := e_in.insn;
643 v.op := e_in.op;
644 v.fe_mode := or (e_in.fe_mode);
645 v.dest_fpr := e_in.frt;
646 v.single_prec := e_in.single;
647 v.int_result := '0';
648 v.rc := e_in.rc;
649 v.is_cmp := e_in.out_cr;
650 if e_in.out_cr = '0' then
651 v.cr_mask := num_to_fxm(1);
652 else
653 v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(e_in.insn))));
654 end if;
655 int_input := '0';
656 if e_in.op = OP_FPOP_I then
657 int_input := '1';
658 end if;
659 v.quieten_nan := '1';
660 v.tiny := '0';
661 v.denorm := '0';
662 v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
663 v.is_subtract := '0';
664 v.is_multiply := '0';
665 v.is_sqrt := '0';
666 v.add_bsmall := '0';
667 v.doing_ftdiv := "00";
668
669 adec := decode_dp(e_in.fra, int_input);
670 bdec := decode_dp(e_in.frb, int_input);
671 cdec := decode_dp(e_in.frc, int_input);
672 v.a := adec;
673 v.b := bdec;
674 v.c := cdec;
675
676 v.exp_cmp := '0';
677 if adec.exponent > bdec.exponent then
678 v.exp_cmp := '1';
679 end if;
680 v.madd_cmp := '0';
681 if (adec.exponent + cdec.exponent + 1) >= bdec.exponent then
682 v.madd_cmp := '1';
683 end if;
684 end if;
685
686 r_hi_nz <= or (r.r(55 downto 31));
687 r_lo_nz <= or (r.r(30 downto 2));
688 s_nz <= or (r.s);
689
690 if r.single_prec = '0' then
691 if r.doing_ftdiv(1) = '0' then
692 max_exp := to_signed(1023, EXP_BITS);
693 else
694 max_exp := to_signed(1020, EXP_BITS);
695 end if;
696 if r.doing_ftdiv(0) = '0' then
697 min_exp := to_signed(-1022, EXP_BITS);
698 else
699 min_exp := to_signed(-1021, EXP_BITS);
700 end if;
701 bias_exp := to_signed(1536, EXP_BITS);
702 else
703 max_exp := to_signed(127, EXP_BITS);
704 min_exp := to_signed(-126, EXP_BITS);
705 bias_exp := to_signed(192, EXP_BITS);
706 end if;
707 new_exp := r.result_exp - r.shift;
708 exp_tiny := '0';
709 exp_huge := '0';
710 if new_exp < min_exp then
711 exp_tiny := '1';
712 end if;
713 if new_exp > max_exp then
714 exp_huge := '1';
715 end if;
716
717 -- Compare P with zero and with B
718 px_nz := or (r.p(57 downto 4));
719 pcmpb_eq := '0';
720 if r.p(59 downto 4) = r.b.mantissa(55 downto 0) then
721 pcmpb_eq := '1';
722 end if;
723 pcmpb_lt := '0';
724 if unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(55 downto 0)) then
725 pcmpb_lt := '1';
726 end if;
727
728 v.writing_back := '0';
729 v.instr_done := '0';
730 v.update_fprf := '0';
731 v.shift := to_signed(0, EXP_BITS);
732 v.first := '0';
733 v.opsel_a := AIN_R;
734 opsel_ainv <= '0';
735 opsel_amask <= '0';
736 opsel_b <= BIN_ZERO;
737 opsel_binv <= '0';
738 opsel_r <= RES_SUM;
739 opsel_s <= S_ZERO;
740 carry_in <= '0';
741 misc_sel <= "0000";
742 fpscr_mask := (others => '1');
743 update_fx := '0';
744 arith_done := '0';
745 invalid := '0';
746 zero_divide := '0';
747 renormalize := '0';
748 set_x := '0';
749 qnan_result := '0';
750 longmask := r.single_prec;
751 set_a := '0';
752 set_b := '0';
753 set_c := '0';
754 set_s := '0';
755 f_to_multiply.is_32bit <= '0';
756 f_to_multiply.valid <= '0';
757 msel_1 <= MUL1_A;
758 msel_2 <= MUL2_C;
759 msel_add <= MULADD_ZERO;
760 msel_inv <= '0';
761 set_y := '0';
762 pshift := '0';
763 renorm_sqrt := '0';
764 shiftin := '0';
765 case r.state is
766 when IDLE =>
767 v.use_a := '0';
768 v.use_b := '0';
769 v.use_c := '0';
770 v.invalid := '0';
771 v.negate := '0';
772 if e_in.valid = '1' then
773 case e_in.insn(5 downto 1) is
774 when "00000" =>
775 if e_in.insn(8) = '1' then
776 if e_in.insn(6) = '0' then
777 v.state := DO_FTDIV;
778 else
779 v.state := DO_FTSQRT;
780 end if;
781 elsif e_in.insn(7) = '1' then
782 v.state := DO_MCRFS;
783 else
784 v.opsel_a := AIN_B;
785 v.state := DO_FCMP;
786 end if;
787 when "00110" =>
788 if e_in.insn(10) = '0' then
789 if e_in.insn(8) = '0' then
790 v.state := DO_MTFSB;
791 else
792 v.state := DO_MTFSFI;
793 end if;
794 else
795 v.state := DO_FMRG;
796 end if;
797 when "00111" =>
798 if e_in.insn(8) = '0' then
799 v.state := DO_MFFS;
800 else
801 v.state := DO_MTFSF;
802 end if;
803 when "01000" =>
804 v.opsel_a := AIN_B;
805 if e_in.insn(9 downto 8) /= "11" then
806 v.state := DO_FMR;
807 else
808 v.state := DO_FRI;
809 end if;
810 when "01100" =>
811 v.opsel_a := AIN_B;
812 v.state := DO_FRSP;
813 when "01110" =>
814 v.opsel_a := AIN_B;
815 if int_input = '1' then
816 -- fcfid[u][s]
817 v.state := DO_FCFID;
818 else
819 v.state := DO_FCTI;
820 end if;
821 when "01111" =>
822 v.round_mode := "001";
823 v.opsel_a := AIN_B;
824 v.state := DO_FCTI;
825 when "10010" =>
826 v.opsel_a := AIN_A;
827 if v.b.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
828 v.opsel_a := AIN_B;
829 end if;
830 v.state := DO_FDIV;
831 when "10100" | "10101" =>
832 v.opsel_a := AIN_A;
833 v.state := DO_FADD;
834 when "10110" =>
835 v.is_sqrt := '1';
836 v.opsel_a := AIN_B;
837 v.state := DO_FSQRT;
838 when "10111" =>
839 v.state := DO_FSEL;
840 when "11000" =>
841 v.opsel_a := AIN_B;
842 v.state := DO_FRE;
843 when "11001" =>
844 v.is_multiply := '1';
845 v.opsel_a := AIN_A;
846 if v.c.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
847 v.opsel_a := AIN_C;
848 end if;
849 v.state := DO_FMUL;
850 when "11010" =>
851 v.is_sqrt := '1';
852 v.opsel_a := AIN_B;
853 v.state := DO_FRSQRTE;
854 when "11100" | "11101" | "11110" | "11111" =>
855 if v.a.mantissa(54) = '0' then
856 v.opsel_a := AIN_A;
857 elsif v.c.mantissa(54) = '0' then
858 v.opsel_a := AIN_C;
859 else
860 v.opsel_a := AIN_B;
861 end if;
862 v.state := DO_FMADD;
863 when others =>
864 illegal := '1';
865 end case;
866 end if;
867 v.x := '0';
868 v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
869 set_s := '1';
870
871 when DO_MCRFS =>
872 j := to_integer(unsigned(insn_bfa(r.insn)));
873 for i in 0 to 7 loop
874 if i = j then
875 k := (7 - i) * 4;
876 v.cr_result := r.fpscr(k + 3 downto k);
877 fpscr_mask(k + 3 downto k) := "0000";
878 end if;
879 end loop;
880 v.fpscr := r.fpscr and (fpscr_mask or x"6007F8FF");
881 v.instr_done := '1';
882 v.state := IDLE;
883
884 when DO_FTDIV =>
885 v.instr_done := '1';
886 v.state := IDLE;
887 v.cr_result := "0000";
888 if r.a.class = INFINITY or r.b.class = ZERO or r.b.class = INFINITY or
889 (r.b.class = FINITE and r.b.mantissa(53) = '0') then
890 v.cr_result(2) := '1';
891 end if;
892 if r.a.class = NAN or r.a.class = INFINITY or
893 r.b.class = NAN or r.b.class = ZERO or r.b.class = INFINITY or
894 (r.a.class = FINITE and r.a.exponent <= to_signed(-970, EXP_BITS)) then
895 v.cr_result(1) := '1';
896 else
897 v.doing_ftdiv := "11";
898 v.first := '1';
899 v.state := FTDIV_1;
900 v.instr_done := '0';
901 end if;
902
903 when DO_FTSQRT =>
904 v.instr_done := '1';
905 v.state := IDLE;
906 v.cr_result := "0000";
907 if r.b.class = ZERO or r.b.class = INFINITY or
908 (r.b.class = FINITE and r.b.mantissa(53) = '0') then
909 v.cr_result(2) := '1';
910 end if;
911 if r.b.class = NAN or r.b.class = INFINITY or r.b.class = ZERO
912 or r.b.negative = '1' or r.b.exponent <= to_signed(-970, EXP_BITS) then
913 v.cr_result(1) := '0';
914 end if;
915
916 when DO_FCMP =>
917 -- fcmp[uo]
918 -- r.opsel_a = AIN_B
919 v.instr_done := '1';
920 v.state := IDLE;
921 update_fx := '1';
922 v.result_exp := r.b.exponent;
923 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
924 (r.b.class = NAN and r.b.mantissa(53) = '0') then
925 -- Signalling NAN
926 v.fpscr(FPSCR_VXSNAN) := '1';
927 if r.insn(6) = '1' and r.fpscr(FPSCR_VE) = '0' then
928 v.fpscr(FPSCR_VXVC) := '1';
929 end if;
930 invalid := '1';
931 v.cr_result := "0001"; -- unordered
932 elsif r.a.class = NAN or r.b.class = NAN then
933 if r.insn(6) = '1' then
934 -- fcmpo
935 v.fpscr(FPSCR_VXVC) := '1';
936 invalid := '1';
937 end if;
938 v.cr_result := "0001"; -- unordered
939 elsif r.a.class = ZERO and r.b.class = ZERO then
940 v.cr_result := "0010"; -- equal
941 elsif r.a.negative /= r.b.negative then
942 v.cr_result := r.a.negative & r.b.negative & "00";
943 elsif r.a.class = ZERO then
944 -- A and B are the same sign from here down
945 v.cr_result := not r.b.negative & r.b.negative & "00";
946 elsif r.a.class = INFINITY then
947 if r.b.class = INFINITY then
948 v.cr_result := "0010";
949 else
950 v.cr_result := r.a.negative & not r.a.negative & "00";
951 end if;
952 elsif r.b.class = ZERO then
953 -- A is finite from here down
954 v.cr_result := r.a.negative & not r.a.negative & "00";
955 elsif r.b.class = INFINITY then
956 v.cr_result := not r.b.negative & r.b.negative & "00";
957 elsif r.exp_cmp = '1' then
958 -- A and B are both finite from here down
959 v.cr_result := r.a.negative & not r.a.negative & "00";
960 elsif r.a.exponent /= r.b.exponent then
961 -- A exponent is smaller than B
962 v.cr_result := not r.a.negative & r.a.negative & "00";
963 else
964 -- Prepare to subtract mantissas, put B in R
965 v.cr_result := "0000";
966 v.instr_done := '0';
967 v.opsel_a := AIN_A;
968 v.state := CMP_1;
969 end if;
970 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
971
972 when DO_MTFSB =>
973 -- mtfsb{0,1}
974 j := to_integer(unsigned(insn_bt(r.insn)));
975 for i in 0 to 31 loop
976 if i = j then
977 v.fpscr(31 - i) := r.insn(6);
978 end if;
979 end loop;
980 v.instr_done := '1';
981 v.state := IDLE;
982
983 when DO_MTFSFI =>
984 -- mtfsfi
985 j := to_integer(unsigned(insn_bf(r.insn)));
986 if r.insn(16) = '0' then
987 for i in 0 to 7 loop
988 if i = j then
989 k := (7 - i) * 4;
990 v.fpscr(k + 3 downto k) := insn_u(r.insn);
991 end if;
992 end loop;
993 end if;
994 v.instr_done := '1';
995 v.state := IDLE;
996
997 when DO_FMRG =>
998 -- fmrgew, fmrgow
999 opsel_r <= RES_MISC;
1000 misc_sel <= "01" & r.insn(8) & '0';
1001 v.int_result := '1';
1002 v.writing_back := '1';
1003 v.instr_done := '1';
1004 v.state := IDLE;
1005
1006 when DO_MFFS =>
1007 v.int_result := '1';
1008 v.writing_back := '1';
1009 opsel_r <= RES_MISC;
1010 case r.insn(20 downto 16) is
1011 when "00000" =>
1012 -- mffs
1013 when "00001" =>
1014 -- mffsce
1015 v.fpscr(FPSCR_VE downto FPSCR_XE) := "00000";
1016 when "10100" | "10101" =>
1017 -- mffscdrn[i] (but we don't implement DRN)
1018 fpscr_mask := x"000000FF";
1019 when "10110" =>
1020 -- mffscrn
1021 fpscr_mask := x"000000FF";
1022 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
1023 r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
1024 when "10111" =>
1025 -- mffscrni
1026 fpscr_mask := x"000000FF";
1027 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) := r.insn(12 downto 11);
1028 when "11000" =>
1029 -- mffsl
1030 fpscr_mask := x"0007F0FF";
1031 when others =>
1032 illegal := '1';
1033 end case;
1034 v.instr_done := '1';
1035 v.state := IDLE;
1036
1037 when DO_MTFSF =>
1038 if r.insn(25) = '1' then
1039 flm := x"FF";
1040 elsif r.insn(16) = '1' then
1041 flm := x"00";
1042 else
1043 flm := r.insn(24 downto 17);
1044 end if;
1045 for i in 0 to 7 loop
1046 k := i * 4;
1047 if flm(i) = '1' then
1048 v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
1049 end if;
1050 end loop;
1051 v.instr_done := '1';
1052 v.state := IDLE;
1053
1054 when DO_FMR =>
1055 -- r.opsel_a = AIN_B
1056 v.result_class := r.b.class;
1057 v.result_exp := r.b.exponent;
1058 v.quieten_nan := '0';
1059 if r.insn(9) = '1' then
1060 v.result_sign := '0'; -- fabs
1061 elsif r.insn(8) = '1' then
1062 v.result_sign := '1'; -- fnabs
1063 elsif r.insn(7) = '1' then
1064 v.result_sign := r.b.negative; -- fmr
1065 elsif r.insn(6) = '1' then
1066 v.result_sign := not r.b.negative; -- fneg
1067 else
1068 v.result_sign := r.a.negative; -- fcpsgn
1069 end if;
1070 v.writing_back := '1';
1071 v.instr_done := '1';
1072 v.state := IDLE;
1073
1074 when DO_FRI => -- fri[nzpm]
1075 -- r.opsel_a = AIN_B
1076 v.result_class := r.b.class;
1077 v.result_sign := r.b.negative;
1078 v.result_exp := r.b.exponent;
1079 v.fpscr(FPSCR_FR) := '0';
1080 v.fpscr(FPSCR_FI) := '0';
1081 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1082 -- Signalling NAN
1083 v.fpscr(FPSCR_VXSNAN) := '1';
1084 invalid := '1';
1085 end if;
1086 if r.b.class = FINITE then
1087 if r.b.exponent >= to_signed(52, EXP_BITS) then
1088 -- integer already, no rounding required
1089 arith_done := '1';
1090 else
1091 v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1092 v.state := FRI_1;
1093 v.round_mode := '1' & r.insn(7 downto 6);
1094 end if;
1095 else
1096 arith_done := '1';
1097 end if;
1098
1099 when DO_FRSP =>
1100 -- r.opsel_a = AIN_B, r.shift = 0
1101 v.result_class := r.b.class;
1102 v.result_sign := r.b.negative;
1103 v.result_exp := r.b.exponent;
1104 v.fpscr(FPSCR_FR) := '0';
1105 v.fpscr(FPSCR_FI) := '0';
1106 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1107 -- Signalling NAN
1108 v.fpscr(FPSCR_VXSNAN) := '1';
1109 invalid := '1';
1110 end if;
1111 set_x := '1';
1112 if r.b.class = FINITE then
1113 if r.b.exponent < to_signed(-126, EXP_BITS) then
1114 v.shift := r.b.exponent - to_signed(-126, EXP_BITS);
1115 v.state := ROUND_UFLOW;
1116 elsif r.b.exponent > to_signed(127, EXP_BITS) then
1117 v.state := ROUND_OFLOW;
1118 else
1119 v.shift := to_signed(-2, EXP_BITS);
1120 v.state := ROUNDING;
1121 end if;
1122 else
1123 arith_done := '1';
1124 end if;
1125
1126 when DO_FCTI =>
1127 -- instr bit 9: 1=dword 0=word
1128 -- instr bit 8: 1=unsigned 0=signed
1129 -- instr bit 1: 1=round to zero 0=use fpscr[RN]
1130 -- r.opsel_a = AIN_B
1131 v.result_class := r.b.class;
1132 v.result_sign := r.b.negative;
1133 v.result_exp := r.b.exponent;
1134 v.fpscr(FPSCR_FR) := '0';
1135 v.fpscr(FPSCR_FI) := '0';
1136 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1137 -- Signalling NAN
1138 v.fpscr(FPSCR_VXSNAN) := '1';
1139 invalid := '1';
1140 end if;
1141
1142 v.int_result := '1';
1143 case r.b.class is
1144 when ZERO =>
1145 arith_done := '1';
1146 when FINITE =>
1147 if r.b.exponent >= to_signed(64, EXP_BITS) or
1148 (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
1149 v.state := INT_OFLOW;
1150 elsif r.b.exponent >= to_signed(52, EXP_BITS) then
1151 -- integer already, no rounding required,
1152 -- shift into final position
1153 v.shift := r.b.exponent - to_signed(54, EXP_BITS);
1154 if r.insn(8) = '1' and r.b.negative = '1' then
1155 v.state := INT_OFLOW;
1156 else
1157 v.state := INT_ISHIFT;
1158 end if;
1159 else
1160 v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1161 v.state := INT_SHIFT;
1162 end if;
1163 when INFINITY | NAN =>
1164 v.state := INT_OFLOW;
1165 end case;
1166
1167 when DO_FCFID =>
1168 -- r.opsel_a = AIN_B
1169 v.result_sign := '0';
1170 if r.insn(8) = '0' and r.b.negative = '1' then
1171 -- fcfid[s] with negative operand, set R = -B
1172 opsel_ainv <= '1';
1173 carry_in <= '1';
1174 v.result_sign := '1';
1175 end if;
1176 v.result_class := r.b.class;
1177 v.result_exp := to_signed(54, EXP_BITS);
1178 v.fpscr(FPSCR_FR) := '0';
1179 v.fpscr(FPSCR_FI) := '0';
1180 if r.b.class = ZERO then
1181 arith_done := '1';
1182 else
1183 v.state := FINISH;
1184 end if;
1185
1186 when DO_FADD =>
1187 -- fadd[s] and fsub[s]
1188 -- r.opsel_a = AIN_A
1189 v.result_sign := r.a.negative;
1190 v.result_class := r.a.class;
1191 v.result_exp := r.a.exponent;
1192 v.fpscr(FPSCR_FR) := '0';
1193 v.fpscr(FPSCR_FI) := '0';
1194 v.use_a := '1';
1195 v.use_b := '1';
1196 is_add := r.a.negative xor r.b.negative xor r.insn(1);
1197 if r.a.class = FINITE and r.b.class = FINITE then
1198 v.is_subtract := not is_add;
1199 v.add_bsmall := r.exp_cmp;
1200 v.opsel_a := AIN_B;
1201 if r.exp_cmp = '0' then
1202 v.shift := r.a.exponent - r.b.exponent;
1203 v.result_sign := r.b.negative xnor r.insn(1);
1204 if r.a.exponent = r.b.exponent then
1205 v.state := ADD_2;
1206 else
1207 v.state := ADD_SHIFT;
1208 end if;
1209 else
1210 v.state := ADD_1;
1211 end if;
1212 else
1213 if r.a.class = NAN or r.b.class = NAN then
1214 v.state := NAN_RESULT;
1215 elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then
1216 -- invalid operation, construct QNaN
1217 v.fpscr(FPSCR_VXISI) := '1';
1218 qnan_result := '1';
1219 arith_done := '1';
1220 elsif r.a.class = ZERO and r.b.class = ZERO and is_add = '0' then
1221 -- return -0 for rounding to -infinity
1222 v.result_sign := r.round_mode(1) and r.round_mode(0);
1223 arith_done := '1';
1224 elsif r.a.class = INFINITY or r.b.class = ZERO then
1225 -- result is A
1226 v.opsel_a := AIN_A;
1227 v.state := EXC_RESULT;
1228 else
1229 -- result is +/- B
1230 v.opsel_a := AIN_B;
1231 v.negate := not r.insn(1);
1232 v.state := EXC_RESULT;
1233 end if;
1234 end if;
1235
1236 when DO_FMUL =>
1237 -- fmul[s]
1238 -- r.opsel_a = AIN_A unless C is denorm and A isn't
1239 v.result_sign := r.a.negative xor r.c.negative;
1240 v.result_class := r.a.class;
1241 v.fpscr(FPSCR_FR) := '0';
1242 v.fpscr(FPSCR_FI) := '0';
1243 v.use_a := '1';
1244 v.use_c := '1';
1245 if r.a.class = FINITE and r.c.class = FINITE then
1246 v.result_exp := r.a.exponent + r.c.exponent;
1247 -- Renormalize denorm operands
1248 if r.a.mantissa(54) = '0' then
1249 v.state := RENORM_A;
1250 elsif r.c.mantissa(54) = '0' then
1251 v.state := RENORM_C;
1252 else
1253 f_to_multiply.valid <= '1';
1254 v.state := MULT_1;
1255 end if;
1256 else
1257 if r.a.class = NAN or r.c.class = NAN then
1258 v.state := NAN_RESULT;
1259 elsif (r.a.class = INFINITY and r.c.class = ZERO) or
1260 (r.a.class = ZERO and r.c.class = INFINITY) then
1261 -- invalid operation, construct QNaN
1262 v.fpscr(FPSCR_VXIMZ) := '1';
1263 qnan_result := '1';
1264 elsif r.a.class = ZERO or r.a.class = INFINITY then
1265 -- result is +/- A
1266 arith_done := '1';
1267 else
1268 -- r.c.class is ZERO or INFINITY
1269 v.opsel_a := AIN_C;
1270 v.negate := r.a.negative;
1271 v.state := EXC_RESULT;
1272 end if;
1273 end if;
1274
1275 when DO_FDIV =>
1276 -- r.opsel_a = AIN_A unless B is denorm and A isn't
1277 v.result_class := r.a.class;
1278 v.fpscr(FPSCR_FR) := '0';
1279 v.fpscr(FPSCR_FI) := '0';
1280 v.use_a := '1';
1281 v.use_b := '1';
1282 v.result_sign := r.a.negative xor r.b.negative;
1283 v.result_exp := r.a.exponent - r.b.exponent;
1284 v.count := "00";
1285 if r.a.class = FINITE and r.b.class = FINITE then
1286 -- Renormalize denorm operands
1287 if r.a.mantissa(54) = '0' then
1288 v.state := RENORM_A;
1289 elsif r.b.mantissa(54) = '0' then
1290 v.state := RENORM_B;
1291 else
1292 v.first := '1';
1293 v.state := DIV_2;
1294 end if;
1295 else
1296 if r.a.class = NAN or r.b.class = NAN then
1297 v.state := NAN_RESULT;
1298 elsif r.b.class = INFINITY then
1299 if r.a.class = INFINITY then
1300 v.fpscr(FPSCR_VXIDI) := '1';
1301 qnan_result := '1';
1302 else
1303 v.result_class := ZERO;
1304 end if;
1305 arith_done := '1';
1306 elsif r.b.class = ZERO then
1307 if r.a.class = ZERO then
1308 v.fpscr(FPSCR_VXZDZ) := '1';
1309 qnan_result := '1';
1310 else
1311 if r.a.class = FINITE then
1312 zero_divide := '1';
1313 end if;
1314 v.result_class := INFINITY;
1315 end if;
1316 arith_done := '1';
1317 else -- r.b.class = FINITE, result_class = r.a.class
1318 arith_done := '1';
1319 end if;
1320 end if;
1321
1322 when DO_FSEL =>
1323 v.fpscr(FPSCR_FR) := '0';
1324 v.fpscr(FPSCR_FI) := '0';
1325 if r.a.class = ZERO or (r.a.negative = '0' and r.a.class /= NAN) then
1326 v.opsel_a := AIN_C;
1327 else
1328 v.opsel_a := AIN_B;
1329 end if;
1330 v.quieten_nan := '0';
1331 v.state := EXC_RESULT;
1332
1333 when DO_FSQRT =>
1334 -- r.opsel_a = AIN_B
1335 v.result_class := r.b.class;
1336 v.result_sign := r.b.negative;
1337 v.fpscr(FPSCR_FR) := '0';
1338 v.fpscr(FPSCR_FI) := '0';
1339 v.use_b := '1';
1340 case r.b.class is
1341 when FINITE =>
1342 v.result_exp := r.b.exponent;
1343 if r.b.negative = '1' then
1344 v.fpscr(FPSCR_VXSQRT) := '1';
1345 qnan_result := '1';
1346 elsif r.b.mantissa(54) = '0' then
1347 v.state := RENORM_B;
1348 elsif r.b.exponent(0) = '0' then
1349 v.state := SQRT_1;
1350 else
1351 v.shift := to_signed(1, EXP_BITS);
1352 v.state := RENORM_B2;
1353 end if;
1354 when NAN =>
1355 v.state := NAN_RESULT;
1356 when ZERO =>
1357 -- result is B
1358 arith_done := '1';
1359 when INFINITY =>
1360 if r.b.negative = '1' then
1361 v.fpscr(FPSCR_VXSQRT) := '1';
1362 qnan_result := '1';
1363 -- else result is B
1364 end if;
1365 arith_done := '1';
1366 end case;
1367
1368 when DO_FRE =>
1369 -- r.opsel_a = AIN_B
1370 v.result_class := r.b.class;
1371 v.result_sign := r.b.negative;
1372 v.fpscr(FPSCR_FR) := '0';
1373 v.fpscr(FPSCR_FI) := '0';
1374 v.use_b := '1';
1375 case r.b.class is
1376 when FINITE =>
1377 v.result_exp := - r.b.exponent;
1378 if r.b.mantissa(54) = '0' then
1379 v.state := RENORM_B;
1380 else
1381 v.state := FRE_1;
1382 end if;
1383 when NAN =>
1384 v.state := NAN_RESULT;
1385 when INFINITY =>
1386 v.result_class := ZERO;
1387 arith_done := '1';
1388 when ZERO =>
1389 v.result_class := INFINITY;
1390 zero_divide := '1';
1391 arith_done := '1';
1392 end case;
1393
1394 when DO_FRSQRTE =>
1395 -- r.opsel_a = AIN_B
1396 v.result_class := r.b.class;
1397 v.result_sign := r.b.negative;
1398 v.fpscr(FPSCR_FR) := '0';
1399 v.fpscr(FPSCR_FI) := '0';
1400 v.use_b := '1';
1401 v.shift := to_signed(1, EXP_BITS);
1402 case r.b.class is
1403 when FINITE =>
1404 v.result_exp := r.b.exponent;
1405 if r.b.negative = '1' then
1406 v.fpscr(FPSCR_VXSQRT) := '1';
1407 qnan_result := '1';
1408 elsif r.b.mantissa(54) = '0' then
1409 v.state := RENORM_B;
1410 elsif r.b.exponent(0) = '0' then
1411 v.state := RSQRT_1;
1412 else
1413 v.state := RENORM_B2;
1414 end if;
1415 when NAN =>
1416 v.state := NAN_RESULT;
1417 when INFINITY =>
1418 if r.b.negative = '1' then
1419 v.fpscr(FPSCR_VXSQRT) := '1';
1420 qnan_result := '1';
1421 else
1422 v.result_class := ZERO;
1423 end if;
1424 arith_done := '1';
1425 when ZERO =>
1426 v.result_class := INFINITY;
1427 zero_divide := '1';
1428 arith_done := '1';
1429 end case;
1430
1431 when DO_FMADD =>
1432 -- fmadd, fmsub, fnmadd, fnmsub
1433 -- r.opsel_a = AIN_A if A is denorm, else AIN_C if C is denorm,
1434 -- else AIN_B
1435 v.result_sign := r.a.negative;
1436 v.result_class := r.a.class;
1437 v.result_exp := r.a.exponent;
1438 v.fpscr(FPSCR_FR) := '0';
1439 v.fpscr(FPSCR_FI) := '0';
1440 v.use_a := '1';
1441 v.use_b := '1';
1442 v.use_c := '1';
1443 is_add := r.a.negative xor r.c.negative xor r.b.negative xor r.insn(1);
1444 if r.a.class = FINITE and r.c.class = FINITE and
1445 (r.b.class = FINITE or r.b.class = ZERO) then
1446 v.is_subtract := not is_add;
1447 mulexp := r.a.exponent + r.c.exponent;
1448 v.result_exp := mulexp;
1449 -- Make sure A and C are normalized
1450 if r.a.mantissa(54) = '0' then
1451 v.state := RENORM_A;
1452 elsif r.c.mantissa(54) = '0' then
1453 v.state := RENORM_C;
1454 elsif r.b.class = ZERO then
1455 -- no addend, degenerates to multiply
1456 v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1457 f_to_multiply.valid <= '1';
1458 v.is_multiply := '1';
1459 v.state := MULT_1;
1460 elsif r.madd_cmp = '0' then
1461 -- addend is bigger, do multiply first
1462 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1463 f_to_multiply.valid <= '1';
1464 v.state := FMADD_1;
1465 else
1466 -- product is bigger, shift B right and use it as the
1467 -- addend to the multiplier
1468 v.shift := r.b.exponent - mulexp + to_signed(64, EXP_BITS);
1469 -- for subtract, multiplier does B - A * C
1470 v.result_sign := not (r.a.negative xor r.c.negative xor r.insn(2) xor is_add);
1471 v.result_exp := r.b.exponent;
1472 v.state := FMADD_2;
1473 end if;
1474 else
1475 if r.a.class = NAN or r.b.class = NAN or r.c.class = NAN then
1476 v.state := NAN_RESULT;
1477 elsif (r.a.class = ZERO and r.c.class = INFINITY) or
1478 (r.a.class = INFINITY and r.c.class = ZERO) then
1479 -- invalid operation, construct QNaN
1480 v.fpscr(FPSCR_VXIMZ) := '1';
1481 qnan_result := '1';
1482 elsif r.a.class = INFINITY or r.c.class = INFINITY then
1483 if r.b.class = INFINITY and is_add = '0' then
1484 -- invalid operation, construct QNaN
1485 v.fpscr(FPSCR_VXISI) := '1';
1486 qnan_result := '1';
1487 else
1488 -- result is infinity
1489 v.result_class := INFINITY;
1490 v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1491 arith_done := '1';
1492 end if;
1493 else
1494 -- Here A is zero, C is zero, or B is infinity
1495 -- Result is +/-B in all of those cases
1496 v.opsel_a := AIN_B;
1497 if r.b.class /= ZERO or is_add = '1' then
1498 v.negate := not (r.insn(1) xor r.insn(2));
1499 else
1500 -- have to be careful about rule for 0 - 0 result sign
1501 v.negate := r.b.negative xor (r.round_mode(1) and r.round_mode(0)) xor r.insn(2);
1502 end if;
1503 v.state := EXC_RESULT;
1504 end if;
1505 end if;
1506
1507 when RENORM_A =>
1508 renormalize := '1';
1509 v.state := RENORM_A2;
1510 if r.insn(4) = '1' then
1511 v.opsel_a := AIN_C;
1512 else
1513 v.opsel_a := AIN_B;
1514 end if;
1515
1516 when RENORM_A2 =>
1517 -- r.opsel_a = AIN_C for fmul/fmadd, AIN_B for fdiv
1518 set_a := '1';
1519 v.result_exp := new_exp;
1520 if r.insn(4) = '1' then
1521 if r.c.mantissa(54) = '1' then
1522 if r.insn(3) = '0' or r.b.class = ZERO then
1523 v.first := '1';
1524 v.state := MULT_1;
1525 else
1526 v.madd_cmp := '0';
1527 if new_exp + 1 >= r.b.exponent then
1528 v.madd_cmp := '1';
1529 end if;
1530 v.opsel_a := AIN_B;
1531 v.state := DO_FMADD;
1532 end if;
1533 else
1534 v.state := RENORM_C;
1535 end if;
1536 else
1537 if r.b.mantissa(54) = '1' then
1538 v.first := '1';
1539 v.state := DIV_2;
1540 else
1541 v.state := RENORM_B;
1542 end if;
1543 end if;
1544
1545 when RENORM_B =>
1546 renormalize := '1';
1547 renorm_sqrt := r.is_sqrt;
1548 v.state := RENORM_B2;
1549
1550 when RENORM_B2 =>
1551 set_b := '1';
1552 if r.is_sqrt = '0' then
1553 v.result_exp := r.result_exp + r.shift;
1554 else
1555 v.result_exp := new_exp;
1556 end if;
1557 v.opsel_a := AIN_B;
1558 v.state := LOOKUP;
1559
1560 when RENORM_C =>
1561 renormalize := '1';
1562 v.state := RENORM_C2;
1563
1564 when RENORM_C2 =>
1565 set_c := '1';
1566 v.result_exp := new_exp;
1567 if r.insn(3) = '0' or r.b.class = ZERO then
1568 v.first := '1';
1569 v.state := MULT_1;
1570 else
1571 v.madd_cmp := '0';
1572 if new_exp + 1 >= r.b.exponent then
1573 v.madd_cmp := '1';
1574 end if;
1575 v.opsel_a := AIN_B;
1576 v.state := DO_FMADD;
1577 end if;
1578
1579 when ADD_1 =>
1580 -- transferring B to R
1581 v.shift := r.b.exponent - r.a.exponent;
1582 v.result_exp := r.b.exponent;
1583 v.state := ADD_SHIFT;
1584
1585 when ADD_SHIFT =>
1586 -- r.shift = - exponent difference
1587 opsel_r <= RES_SHIFT;
1588 v.x := s_nz;
1589 set_x := '1';
1590 longmask := '0';
1591 if r.add_bsmall = '1' then
1592 v.opsel_a := AIN_A;
1593 else
1594 v.opsel_a := AIN_B;
1595 end if;
1596 v.state := ADD_2;
1597
1598 when ADD_2 =>
1599 -- r.opsel_a = AIN_A if r.add_bsmall = 1 else AIN_B
1600 opsel_b <= BIN_R;
1601 opsel_binv <= r.is_subtract;
1602 carry_in <= r.is_subtract and not r.x;
1603 v.shift := to_signed(-1, EXP_BITS);
1604 v.state := ADD_3;
1605
1606 when ADD_3 =>
1607 -- check for overflow or negative result (can't get both)
1608 -- r.shift = -1
1609 if r.r(63) = '1' then
1610 -- result is opposite sign to expected
1611 v.result_sign := not r.result_sign;
1612 opsel_ainv <= '1';
1613 carry_in <= '1';
1614 v.state := FINISH;
1615 elsif r.r(55) = '1' then
1616 -- sum overflowed, shift right
1617 opsel_r <= RES_SHIFT;
1618 set_x := '1';
1619 v.shift := to_signed(-2, EXP_BITS);
1620 if exp_huge = '1' then
1621 v.state := ROUND_OFLOW;
1622 else
1623 v.state := ROUNDING;
1624 end if;
1625 elsif r.r(54) = '1' then
1626 set_x := '1';
1627 v.shift := to_signed(-2, EXP_BITS);
1628 v.state := ROUNDING;
1629 elsif (r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1630 -- r.x must be zero at this point
1631 v.result_class := ZERO;
1632 if r.is_subtract = '1' then
1633 -- set result sign depending on rounding mode
1634 v.result_sign := r.round_mode(1) and r.round_mode(0);
1635 end if;
1636 arith_done := '1';
1637 else
1638 renormalize := '1';
1639 v.state := NORMALIZE;
1640 end if;
1641
1642 when CMP_1 =>
1643 -- r.opsel_a = AIN_A
1644 opsel_b <= BIN_R;
1645 opsel_binv <= '1';
1646 carry_in <= '1';
1647 v.state := CMP_2;
1648
1649 when CMP_2 =>
1650 if r.r(63) = '1' then
1651 -- A is smaller in magnitude
1652 v.cr_result := not r.a.negative & r.a.negative & "00";
1653 elsif (r_hi_nz or r_lo_nz) = '0' then
1654 v.cr_result := "0010";
1655 else
1656 v.cr_result := r.a.negative & not r.a.negative & "00";
1657 end if;
1658 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
1659 v.instr_done := '1';
1660 v.state := IDLE;
1661
1662 when MULT_1 =>
1663 f_to_multiply.valid <= r.first;
1664 opsel_r <= RES_MULT;
1665 if multiply_to_f.valid = '1' then
1666 v.state := FINISH;
1667 end if;
1668
1669 when FMADD_1 =>
1670 -- Addend is bigger here
1671 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1672 -- note v.shift is at most -2 here
1673 v.shift := r.result_exp - r.b.exponent;
1674 opsel_r <= RES_MULT;
1675 opsel_s <= S_MULT;
1676 set_s := '1';
1677 f_to_multiply.valid <= r.first;
1678 if multiply_to_f.valid = '1' then
1679 v.state := ADD_SHIFT;
1680 end if;
1681
1682 when FMADD_2 =>
1683 -- Product is potentially bigger here
1684 -- r.shift = addend exp - product exp + 64, r.r = r.b.mantissa
1685 set_s := '1';
1686 opsel_s <= S_SHIFT;
1687 v.shift := r.shift - to_signed(64, EXP_BITS);
1688 v.state := FMADD_3;
1689
1690 when FMADD_3 =>
1691 -- r.shift = addend exp - product exp
1692 opsel_r <= RES_SHIFT;
1693 v.first := '1';
1694 v.state := FMADD_4;
1695
1696 when FMADD_4 =>
1697 msel_add <= MULADD_RS;
1698 f_to_multiply.valid <= r.first;
1699 msel_inv <= r.is_subtract;
1700 opsel_r <= RES_MULT;
1701 opsel_s <= S_MULT;
1702 set_s := '1';
1703 v.shift := to_signed(56, EXP_BITS);
1704 if multiply_to_f.valid = '1' then
1705 if multiply_to_f.result(121) = '1' then
1706 v.state := FMADD_5;
1707 else
1708 v.state := FMADD_6;
1709 end if;
1710 end if;
1711
1712 when FMADD_5 =>
1713 -- negate R:S:X
1714 v.result_sign := not r.result_sign;
1715 opsel_ainv <= '1';
1716 carry_in <= not (s_nz or r.x);
1717 opsel_s <= S_NEG;
1718 set_s := '1';
1719 v.shift := to_signed(56, EXP_BITS);
1720 v.state := FMADD_6;
1721
1722 when FMADD_6 =>
1723 -- r.shift = 56 (or 0, but only if r is now nonzero)
1724 if (r.r(56) or r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1725 if s_nz = '0' then
1726 -- must be a subtraction, and r.x must be zero
1727 v.result_class := ZERO;
1728 v.result_sign := r.round_mode(1) and r.round_mode(0);
1729 arith_done := '1';
1730 else
1731 -- R is all zeroes but there are non-zero bits in S
1732 -- so shift them into R and set S to 0
1733 opsel_r <= RES_SHIFT;
1734 set_s := '1';
1735 -- stay in state FMADD_6
1736 end if;
1737 elsif r.r(56 downto 54) = "001" then
1738 v.state := FINISH;
1739 else
1740 renormalize := '1';
1741 v.state := NORMALIZE;
1742 end if;
1743
1744 when LOOKUP =>
1745 -- r.opsel_a = AIN_B
1746 -- wait one cycle for inverse_table[B] lookup
1747 v.first := '1';
1748 if r.insn(4) = '0' then
1749 if r.insn(3) = '0' then
1750 v.state := DIV_2;
1751 else
1752 v.state := SQRT_1;
1753 end if;
1754 elsif r.insn(2) = '0' then
1755 v.state := FRE_1;
1756 else
1757 v.state := RSQRT_1;
1758 end if;
1759
1760 when DIV_2 =>
1761 -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y
1762 msel_1 <= MUL1_B;
1763 msel_add <= MULADD_CONST;
1764 msel_inv <= '1';
1765 if r.count = 0 then
1766 msel_2 <= MUL2_LUT;
1767 else
1768 msel_2 <= MUL2_P;
1769 end if;
1770 set_y := r.first;
1771 pshift := '1';
1772 f_to_multiply.valid <= r.first;
1773 if multiply_to_f.valid = '1' then
1774 v.first := '1';
1775 v.count := r.count + 1;
1776 v.state := DIV_3;
1777 end if;
1778
1779 when DIV_3 =>
1780 -- compute Y = P = P * Y
1781 msel_1 <= MUL1_Y;
1782 msel_2 <= MUL2_P;
1783 f_to_multiply.valid <= r.first;
1784 pshift := '1';
1785 if multiply_to_f.valid = '1' then
1786 v.first := '1';
1787 if r.count = 3 then
1788 v.state := DIV_4;
1789 else
1790 v.state := DIV_2;
1791 end if;
1792 end if;
1793
1794 when DIV_4 =>
1795 -- compute R = P = A * Y (quotient)
1796 msel_1 <= MUL1_A;
1797 msel_2 <= MUL2_P;
1798 set_y := r.first;
1799 f_to_multiply.valid <= r.first;
1800 pshift := '1';
1801 if multiply_to_f.valid = '1' then
1802 opsel_r <= RES_MULT;
1803 v.first := '1';
1804 v.state := DIV_5;
1805 end if;
1806
1807 when DIV_5 =>
1808 -- compute P = A - B * R (remainder)
1809 msel_1 <= MUL1_B;
1810 msel_2 <= MUL2_R;
1811 msel_add <= MULADD_A;
1812 msel_inv <= '1';
1813 f_to_multiply.valid <= r.first;
1814 if multiply_to_f.valid = '1' then
1815 v.state := DIV_6;
1816 end if;
1817
1818 when DIV_6 =>
1819 -- test if remainder is 0 or >= B
1820 if pcmpb_lt = '1' then
1821 -- quotient is correct, set X if remainder non-zero
1822 v.x := r.p(58) or px_nz;
1823 else
1824 -- quotient needs to be incremented by 1
1825 carry_in <= '1';
1826 v.x := not pcmpb_eq;
1827 end if;
1828 v.state := FINISH;
1829
1830 when FRE_1 =>
1831 opsel_r <= RES_MISC;
1832 misc_sel <= "0111";
1833 v.shift := to_signed(1, EXP_BITS);
1834 v.state := NORMALIZE;
1835
1836 when FTDIV_1 =>
1837 v.cr_result(1) := exp_tiny or exp_huge;
1838 if exp_tiny = '1' or exp_huge = '1' or r.a.class = ZERO or r.first = '0' then
1839 v.instr_done := '1';
1840 v.state := IDLE;
1841 else
1842 v.shift := r.a.exponent;
1843 v.doing_ftdiv := "10";
1844 end if;
1845
1846 when RSQRT_1 =>
1847 opsel_r <= RES_MISC;
1848 misc_sel <= "0111";
1849 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1850 v.result_exp := - sqrt_exp;
1851 v.shift := to_signed(1, EXP_BITS);
1852 v.state := NORMALIZE;
1853
1854 when SQRT_1 =>
1855 -- put invsqr[B] in R and compute P = invsqr[B] * B
1856 -- also transfer B (in R) to A
1857 set_a := '1';
1858 opsel_r <= RES_MISC;
1859 misc_sel <= "0111";
1860 msel_1 <= MUL1_B;
1861 msel_2 <= MUL2_LUT;
1862 f_to_multiply.valid <= '1';
1863 v.shift := to_signed(-1, EXP_BITS);
1864 v.count := "00";
1865 v.state := SQRT_2;
1866
1867 when SQRT_2 =>
1868 -- shift R right one place
1869 -- not expecting multiplier result yet
1870 -- r.shift = -1
1871 opsel_r <= RES_SHIFT;
1872 v.first := '1';
1873 v.state := SQRT_3;
1874
1875 when SQRT_3 =>
1876 -- put R into Y, wait for product from multiplier
1877 msel_2 <= MUL2_R;
1878 set_y := r.first;
1879 pshift := '1';
1880 if multiply_to_f.valid = '1' then
1881 -- put result into R
1882 opsel_r <= RES_MULT;
1883 v.first := '1';
1884 v.state := SQRT_4;
1885 end if;
1886
1887 when SQRT_4 =>
1888 -- compute 1.5 - Y * P
1889 msel_1 <= MUL1_Y;
1890 msel_2 <= MUL2_P;
1891 msel_add <= MULADD_CONST;
1892 msel_inv <= '1';
1893 f_to_multiply.valid <= r.first;
1894 pshift := '1';
1895 if multiply_to_f.valid = '1' then
1896 v.state := SQRT_5;
1897 end if;
1898
1899 when SQRT_5 =>
1900 -- compute Y = Y * P
1901 msel_1 <= MUL1_Y;
1902 msel_2 <= MUL2_P;
1903 f_to_multiply.valid <= '1';
1904 v.first := '1';
1905 v.state := SQRT_6;
1906
1907 when SQRT_6 =>
1908 -- pipeline in R = R * P
1909 msel_1 <= MUL1_R;
1910 msel_2 <= MUL2_P;
1911 f_to_multiply.valid <= r.first;
1912 pshift := '1';
1913 if multiply_to_f.valid = '1' then
1914 v.first := '1';
1915 v.state := SQRT_7;
1916 end if;
1917
1918 when SQRT_7 =>
1919 -- first multiply is done, put result in Y
1920 msel_2 <= MUL2_P;
1921 set_y := r.first;
1922 -- wait for second multiply (should be here already)
1923 pshift := '1';
1924 if multiply_to_f.valid = '1' then
1925 -- put result into R
1926 opsel_r <= RES_MULT;
1927 v.first := '1';
1928 v.count := r.count + 1;
1929 if r.count < 2 then
1930 v.state := SQRT_4;
1931 else
1932 v.first := '1';
1933 v.state := SQRT_8;
1934 end if;
1935 end if;
1936
1937 when SQRT_8 =>
1938 -- compute P = A - R * R, which can be +ve or -ve
1939 -- we arranged for B to be put into A earlier
1940 msel_1 <= MUL1_R;
1941 msel_2 <= MUL2_R;
1942 msel_add <= MULADD_A;
1943 msel_inv <= '1';
1944 pshift := '1';
1945 f_to_multiply.valid <= r.first;
1946 if multiply_to_f.valid = '1' then
1947 v.first := '1';
1948 v.state := SQRT_9;
1949 end if;
1950
1951 when SQRT_9 =>
1952 -- compute P = P * Y
1953 -- since Y is an estimate of 1/sqrt(B), this makes P an
1954 -- estimate of the adjustment needed to R. Since the error
1955 -- could be negative and we have an unsigned multiplier, the
1956 -- upper bits can be wrong, but it turns out the lowest 8 bits
1957 -- are correct and are all we need (given 3 iterations through
1958 -- SQRT_4 to SQRT_7).
1959 msel_1 <= MUL1_Y;
1960 msel_2 <= MUL2_P;
1961 pshift := '1';
1962 f_to_multiply.valid <= r.first;
1963 if multiply_to_f.valid = '1' then
1964 v.state := SQRT_10;
1965 end if;
1966
1967 when SQRT_10 =>
1968 -- Add the bottom 8 bits of P, sign-extended,
1969 -- divided by 4, onto R.
1970 -- The division by 4 is because R is 10.54 format
1971 -- whereas P is 8.56 format.
1972 opsel_b <= BIN_PS6;
1973 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1974 v.result_exp := sqrt_exp;
1975 v.shift := to_signed(1, EXP_BITS);
1976 v.first := '1';
1977 v.state := SQRT_11;
1978
1979 when SQRT_11 =>
1980 -- compute P = A - R * R (remainder)
1981 -- also put 2 * R + 1 into B for comparison with P
1982 msel_1 <= MUL1_R;
1983 msel_2 <= MUL2_R;
1984 msel_add <= MULADD_A;
1985 msel_inv <= '1';
1986 f_to_multiply.valid <= r.first;
1987 shiftin := '1';
1988 set_b := r.first;
1989 if multiply_to_f.valid = '1' then
1990 v.state := SQRT_12;
1991 end if;
1992
1993 when SQRT_12 =>
1994 -- test if remainder is 0 or >= B = 2*R + 1
1995 if pcmpb_lt = '1' then
1996 -- square root is correct, set X if remainder non-zero
1997 v.x := r.p(58) or px_nz;
1998 else
1999 -- square root needs to be incremented by 1
2000 carry_in <= '1';
2001 v.x := not pcmpb_eq;
2002 end if;
2003 v.state := FINISH;
2004
2005 when INT_SHIFT =>
2006 -- r.shift = b.exponent - 52
2007 opsel_r <= RES_SHIFT;
2008 set_x := '1';
2009 v.state := INT_ROUND;
2010 v.shift := to_signed(-2, EXP_BITS);
2011
2012 when INT_ROUND =>
2013 -- r.shift = -2
2014 opsel_r <= RES_SHIFT;
2015 round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
2016 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2017 -- Check for negative values that don't round to 0 for fcti*u*
2018 if r.insn(8) = '1' and r.result_sign = '1' and
2019 (r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then
2020 v.state := INT_OFLOW;
2021 else
2022 v.state := INT_FINAL;
2023 end if;
2024
2025 when INT_ISHIFT =>
2026 -- r.shift = b.exponent - 54;
2027 opsel_r <= RES_SHIFT;
2028 v.state := INT_FINAL;
2029
2030 when INT_FINAL =>
2031 -- Negate if necessary, and increment for rounding if needed
2032 opsel_ainv <= r.result_sign;
2033 carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign;
2034 -- Check for possible overflows
2035 case r.insn(9 downto 8) is
2036 when "00" => -- fctiw[z]
2037 need_check := r.r(31) or (r.r(30) and not r.result_sign);
2038 when "01" => -- fctiwu[z]
2039 need_check := r.r(31);
2040 when "10" => -- fctid[z]
2041 need_check := r.r(63) or (r.r(62) and not r.result_sign);
2042 when others => -- fctidu[z]
2043 need_check := r.r(63);
2044 end case;
2045 if need_check = '1' then
2046 v.state := INT_CHECK;
2047 else
2048 if r.fpscr(FPSCR_FI) = '1' then
2049 v.fpscr(FPSCR_XX) := '1';
2050 end if;
2051 arith_done := '1';
2052 end if;
2053
2054 when INT_CHECK =>
2055 if r.insn(9) = '0' then
2056 msb := r.r(31);
2057 else
2058 msb := r.r(63);
2059 end if;
2060 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2061 if (r.insn(8) = '0' and msb /= r.result_sign) or
2062 (r.insn(8) = '1' and msb /= '1') then
2063 opsel_r <= RES_MISC;
2064 v.fpscr(FPSCR_VXCVI) := '1';
2065 invalid := '1';
2066 else
2067 if r.fpscr(FPSCR_FI) = '1' then
2068 v.fpscr(FPSCR_XX) := '1';
2069 end if;
2070 end if;
2071 arith_done := '1';
2072
2073 when INT_OFLOW =>
2074 opsel_r <= RES_MISC;
2075 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2076 if r.b.class = NAN then
2077 misc_sel(0) <= '1';
2078 end if;
2079 v.fpscr(FPSCR_VXCVI) := '1';
2080 invalid := '1';
2081 arith_done := '1';
2082
2083 when FRI_1 =>
2084 -- r.shift = b.exponent - 52
2085 opsel_r <= RES_SHIFT;
2086 set_x := '1';
2087 v.shift := to_signed(-2, EXP_BITS);
2088 v.state := ROUNDING;
2089
2090 when FINISH =>
2091 if r.is_multiply = '1' and px_nz = '1' then
2092 v.x := '1';
2093 end if;
2094 if r.r(63 downto 54) /= "0000000001" then
2095 renormalize := '1';
2096 v.state := NORMALIZE;
2097 else
2098 set_x := '1';
2099 if exp_tiny = '1' then
2100 v.shift := new_exp - min_exp;
2101 v.state := ROUND_UFLOW;
2102 elsif exp_huge = '1' then
2103 v.state := ROUND_OFLOW;
2104 else
2105 v.shift := to_signed(-2, EXP_BITS);
2106 v.state := ROUNDING;
2107 end if;
2108 end if;
2109
2110 when NORMALIZE =>
2111 -- Shift so we have 9 leading zeroes (we know R is non-zero)
2112 -- r.shift = clz(r.r) - 9
2113 opsel_r <= RES_SHIFT;
2114 set_x := '1';
2115 if exp_tiny = '1' then
2116 v.shift := new_exp - min_exp;
2117 v.state := ROUND_UFLOW;
2118 elsif exp_huge = '1' then
2119 v.state := ROUND_OFLOW;
2120 else
2121 v.shift := to_signed(-2, EXP_BITS);
2122 v.state := ROUNDING;
2123 end if;
2124
2125 when ROUND_UFLOW =>
2126 -- r.shift = - amount by which exponent underflows
2127 v.tiny := '1';
2128 if r.fpscr(FPSCR_UE) = '0' then
2129 -- disabled underflow exception case
2130 -- have to denormalize before rounding
2131 opsel_r <= RES_SHIFT;
2132 set_x := '1';
2133 v.shift := to_signed(-2, EXP_BITS);
2134 v.state := ROUNDING;
2135 else
2136 -- enabled underflow exception case
2137 -- if denormalized, have to normalize before rounding
2138 v.fpscr(FPSCR_UX) := '1';
2139 v.result_exp := r.result_exp + bias_exp;
2140 if r.r(54) = '0' then
2141 renormalize := '1';
2142 v.state := NORMALIZE;
2143 else
2144 v.shift := to_signed(-2, EXP_BITS);
2145 v.state := ROUNDING;
2146 end if;
2147 end if;
2148
2149 when ROUND_OFLOW =>
2150 v.fpscr(FPSCR_OX) := '1';
2151 if r.fpscr(FPSCR_OE) = '0' then
2152 -- disabled overflow exception
2153 -- result depends on rounding mode
2154 v.fpscr(FPSCR_XX) := '1';
2155 v.fpscr(FPSCR_FI) := '1';
2156 if r.round_mode(1 downto 0) = "00" or
2157 (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then
2158 v.result_class := INFINITY;
2159 v.fpscr(FPSCR_FR) := '1';
2160 else
2161 v.fpscr(FPSCR_FR) := '0';
2162 end if;
2163 -- construct largest representable number
2164 v.result_exp := max_exp;
2165 opsel_r <= RES_MISC;
2166 misc_sel <= "001" & r.single_prec;
2167 arith_done := '1';
2168 else
2169 -- enabled overflow exception
2170 v.result_exp := r.result_exp - bias_exp;
2171 v.shift := to_signed(-2, EXP_BITS);
2172 v.state := ROUNDING;
2173 end if;
2174
2175 when ROUNDING =>
2176 opsel_amask <= '1';
2177 round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
2178 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2179 if round(1) = '1' then
2180 -- set mask to increment the LSB for the precision
2181 opsel_b <= BIN_MASK;
2182 carry_in <= '1';
2183 v.shift := to_signed(-1, EXP_BITS);
2184 v.state := ROUNDING_2;
2185 else
2186 if r.r(54) = '0' then
2187 -- result after masking could be zero, or could be a
2188 -- denormalized result that needs to be renormalized
2189 renormalize := '1';
2190 v.state := ROUNDING_3;
2191 else
2192 arith_done := '1';
2193 end if;
2194 end if;
2195 if round(0) = '1' then
2196 v.fpscr(FPSCR_XX) := '1';
2197 if r.tiny = '1' then
2198 v.fpscr(FPSCR_UX) := '1';
2199 end if;
2200 end if;
2201
2202 when ROUNDING_2 =>
2203 -- Check for overflow during rounding
2204 -- r.shift = -1
2205 v.x := '0';
2206 if r.r(55) = '1' then
2207 opsel_r <= RES_SHIFT;
2208 if exp_huge = '1' then
2209 v.state := ROUND_OFLOW;
2210 else
2211 arith_done := '1';
2212 end if;
2213 elsif r.r(54) = '0' then
2214 -- Do CLZ so we can renormalize the result
2215 renormalize := '1';
2216 v.state := ROUNDING_3;
2217 else
2218 arith_done := '1';
2219 end if;
2220
2221 when ROUNDING_3 =>
2222 -- r.shift = clz(r.r) - 9
2223 mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
2224 if mant_nz = '0' then
2225 v.result_class := ZERO;
2226 if r.is_subtract = '1' then
2227 -- set result sign depending on rounding mode
2228 v.result_sign := r.round_mode(1) and r.round_mode(0);
2229 end if;
2230 arith_done := '1';
2231 else
2232 -- Renormalize result after rounding
2233 opsel_r <= RES_SHIFT;
2234 v.denorm := exp_tiny;
2235 v.shift := new_exp - to_signed(-1022, EXP_BITS);
2236 if new_exp < to_signed(-1022, EXP_BITS) then
2237 v.state := DENORM;
2238 else
2239 arith_done := '1';
2240 end if;
2241 end if;
2242
2243 when DENORM =>
2244 -- r.shift = result_exp - -1022
2245 opsel_r <= RES_SHIFT;
2246 arith_done := '1';
2247
2248 when NAN_RESULT =>
2249 if (r.use_a = '1' and r.a.class = NAN and r.a.mantissa(53) = '0') or
2250 (r.use_b = '1' and r.b.class = NAN and r.b.mantissa(53) = '0') or
2251 (r.use_c = '1' and r.c.class = NAN and r.c.mantissa(53) = '0') then
2252 -- Signalling NAN
2253 v.fpscr(FPSCR_VXSNAN) := '1';
2254 invalid := '1';
2255 end if;
2256 if r.use_a = '1' and r.a.class = NAN then
2257 v.opsel_a := AIN_A;
2258 elsif r.use_b = '1' and r.b.class = NAN then
2259 v.opsel_a := AIN_B;
2260 elsif r.use_c = '1' and r.c.class = NAN then
2261 v.opsel_a := AIN_C;
2262 end if;
2263 v.state := EXC_RESULT;
2264
2265 when EXC_RESULT =>
2266 -- r.opsel_a = AIN_A, AIN_B or AIN_C according to which input is the result
2267 case r.opsel_a is
2268 when AIN_B =>
2269 v.result_sign := r.b.negative xor r.negate;
2270 v.result_exp := r.b.exponent;
2271 v.result_class := r.b.class;
2272 when AIN_C =>
2273 v.result_sign := r.c.negative xor r.negate;
2274 v.result_exp := r.c.exponent;
2275 v.result_class := r.c.class;
2276 when others =>
2277 v.result_sign := r.a.negative xor r.negate;
2278 v.result_exp := r.a.exponent;
2279 v.result_class := r.a.class;
2280 end case;
2281 arith_done := '1';
2282
2283 end case;
2284
2285 if zero_divide = '1' then
2286 v.fpscr(FPSCR_ZX) := '1';
2287 end if;
2288 if qnan_result = '1' then
2289 invalid := '1';
2290 v.result_class := NAN;
2291 v.result_sign := '0';
2292 misc_sel <= "0001";
2293 opsel_r <= RES_MISC;
2294 arith_done := '1';
2295 end if;
2296 if invalid = '1' then
2297 v.invalid := '1';
2298 end if;
2299 if arith_done = '1' then
2300 -- Enabled invalid exception doesn't write result or FPRF
2301 -- Neither does enabled zero-divide exception
2302 if (v.invalid and r.fpscr(FPSCR_VE)) = '0' and
2303 (zero_divide and r.fpscr(FPSCR_ZE)) = '0' then
2304 v.writing_back := '1';
2305 v.update_fprf := '1';
2306 end if;
2307 v.instr_done := '1';
2308 v.state := IDLE;
2309 update_fx := '1';
2310 end if;
2311
2312 -- Multiplier and divide/square root data path
2313 case msel_1 is
2314 when MUL1_A =>
2315 f_to_multiply.data1 <= r.a.mantissa(61 downto 0) & "00";
2316 when MUL1_B =>
2317 f_to_multiply.data1 <= r.b.mantissa(61 downto 0) & "00";
2318 when MUL1_Y =>
2319 f_to_multiply.data1 <= r.y;
2320 when others =>
2321 f_to_multiply.data1 <= r.r(61 downto 0) & "00";
2322 end case;
2323 case msel_2 is
2324 when MUL2_C =>
2325 f_to_multiply.data2 <= r.c.mantissa(61 downto 0) & "00";
2326 when MUL2_LUT =>
2327 f_to_multiply.data2 <= x"00" & inverse_est & '0' & x"000000000";
2328 when MUL2_P =>
2329 f_to_multiply.data2 <= r.p;
2330 when others =>
2331 f_to_multiply.data2 <= r.r(61 downto 0) & "00";
2332 end case;
2333 maddend := (others => '0');
2334 case msel_add is
2335 when MULADD_CONST =>
2336 -- addend is 2.0 or 1.5 in 16.112 format
2337 if r.is_sqrt = '0' then
2338 maddend(113) := '1'; -- 2.0
2339 else
2340 maddend(112 downto 111) := "11"; -- 1.5
2341 end if;
2342 when MULADD_A =>
2343 -- addend is A in 16.112 format
2344 maddend(121 downto 58) := r.a.mantissa;
2345 when MULADD_RS =>
2346 -- addend is concatenation of R and S in 16.112 format
2347 maddend := "000000" & r.r & r.s & "00";
2348 when others =>
2349 end case;
2350 if msel_inv = '1' then
2351 f_to_multiply.addend <= not maddend;
2352 else
2353 f_to_multiply.addend <= maddend;
2354 end if;
2355 f_to_multiply.not_result <= msel_inv;
2356 if set_y = '1' then
2357 v.y := f_to_multiply.data2;
2358 end if;
2359 if multiply_to_f.valid = '1' then
2360 if pshift = '0' then
2361 v.p := multiply_to_f.result(63 downto 0);
2362 else
2363 v.p := multiply_to_f.result(119 downto 56);
2364 end if;
2365 end if;
2366
2367 -- Data path.
2368 -- This has A and B input multiplexers, an adder, a shifter,
2369 -- count-leading-zeroes logic, and a result mux.
2370 if longmask = '1' then
2371 mshift := r.shift + to_signed(-29, EXP_BITS);
2372 else
2373 mshift := r.shift;
2374 end if;
2375 if mshift < to_signed(-64, EXP_BITS) then
2376 mask := (others => '1');
2377 elsif mshift >= to_signed(0, EXP_BITS) then
2378 mask := (others => '0');
2379 else
2380 mask := right_mask(unsigned(mshift(5 downto 0)));
2381 end if;
2382 case r.opsel_a is
2383 when AIN_R =>
2384 in_a0 := r.r;
2385 when AIN_A =>
2386 in_a0 := r.a.mantissa;
2387 when AIN_B =>
2388 in_a0 := r.b.mantissa;
2389 when others =>
2390 in_a0 := r.c.mantissa;
2391 end case;
2392 if (or (mask and in_a0)) = '1' and set_x = '1' then
2393 v.x := '1';
2394 end if;
2395 if opsel_ainv = '1' then
2396 in_a0 := not in_a0;
2397 end if;
2398 if opsel_amask = '1' then
2399 in_a0 := in_a0 and not mask;
2400 end if;
2401 in_a <= in_a0;
2402 case opsel_b is
2403 when BIN_ZERO =>
2404 in_b0 := (others => '0');
2405 when BIN_R =>
2406 in_b0 := r.r;
2407 when BIN_MASK =>
2408 in_b0 := mask;
2409 when others =>
2410 -- BIN_PS6, 6 LSBs of P/4 sign-extended to 64
2411 in_b0 := std_ulogic_vector(resize(signed(r.p(7 downto 2)), 64));
2412 end case;
2413 if opsel_binv = '1' then
2414 in_b0 := not in_b0;
2415 end if;
2416 in_b <= in_b0;
2417 if r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
2418 shift_res := shifter_64(r.r & (shiftin or r.s(55)) & r.s(54 downto 0),
2419 std_ulogic_vector(r.shift(6 downto 0)));
2420 else
2421 shift_res := (others => '0');
2422 end if;
2423 case opsel_r is
2424 when RES_SUM =>
2425 result <= std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
2426 when RES_SHIFT =>
2427 result <= shift_res;
2428 when RES_MULT =>
2429 result <= multiply_to_f.result(121 downto 58);
2430 when others =>
2431 case misc_sel is
2432 when "0000" =>
2433 misc := x"00000000" & (r.fpscr and fpscr_mask);
2434 when "0001" =>
2435 -- generated QNaN mantissa
2436 misc := x"0020000000000000";
2437 when "0010" =>
2438 -- mantissa of max representable DP number
2439 misc := x"007ffffffffffffc";
2440 when "0011" =>
2441 -- mantissa of max representable SP number
2442 misc := x"007fffff80000000";
2443 when "0100" =>
2444 -- fmrgow result
2445 misc := r.a.mantissa(31 downto 0) & r.b.mantissa(31 downto 0);
2446 when "0110" =>
2447 -- fmrgew result
2448 misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32);
2449 when "0111" =>
2450 misc := 10x"000" & inverse_est & 35x"000000000";
2451 when "1000" =>
2452 -- max positive result for fctiw[z]
2453 misc := x"000000007fffffff";
2454 when "1001" =>
2455 -- max negative result for fctiw[z]
2456 misc := x"ffffffff80000000";
2457 when "1010" =>
2458 -- max positive result for fctiwu[z]
2459 misc := x"00000000ffffffff";
2460 when "1011" =>
2461 -- max negative result for fctiwu[z]
2462 misc := x"0000000000000000";
2463 when "1100" =>
2464 -- max positive result for fctid[z]
2465 misc := x"7fffffffffffffff";
2466 when "1101" =>
2467 -- max negative result for fctid[z]
2468 misc := x"8000000000000000";
2469 when "1110" =>
2470 -- max positive result for fctidu[z]
2471 misc := x"ffffffffffffffff";
2472 when "1111" =>
2473 -- max negative result for fctidu[z]
2474 misc := x"0000000000000000";
2475 when others =>
2476 misc := x"0000000000000000";
2477 end case;
2478 result <= misc;
2479 end case;
2480 v.r := result;
2481 if set_s = '1' then
2482 case opsel_s is
2483 when S_NEG =>
2484 v.s := std_ulogic_vector(unsigned(not r.s) + (not r.x));
2485 when S_MULT =>
2486 v.s := multiply_to_f.result(57 downto 2);
2487 when S_SHIFT =>
2488 v.s := shift_res(63 downto 8);
2489 if shift_res(7 downto 0) /= x"00" then
2490 v.x := '1';
2491 end if;
2492 when others =>
2493 v.s := (others => '0');
2494 end case;
2495 end if;
2496
2497 if set_a = '1' then
2498 v.a.exponent := new_exp;
2499 v.a.mantissa := shift_res;
2500 end if;
2501 if set_b = '1' then
2502 v.b.exponent := new_exp;
2503 v.b.mantissa := shift_res;
2504 end if;
2505 if set_c = '1' then
2506 v.c.exponent := new_exp;
2507 v.c.mantissa := shift_res;
2508 end if;
2509
2510 if opsel_r = RES_SHIFT then
2511 v.result_exp := new_exp;
2512 end if;
2513
2514 if renormalize = '1' then
2515 clz := count_left_zeroes(r.r);
2516 if renorm_sqrt = '1' then
2517 -- make denormalized value end up with even exponent
2518 clz(0) := '1';
2519 end if;
2520 v.shift := resize(signed('0' & clz) - 9, EXP_BITS);
2521 end if;
2522
2523 if r.int_result = '1' then
2524 fp_result <= r.r;
2525 else
2526 fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r,
2527 r.single_prec, r.quieten_nan);
2528 end if;
2529 if r.update_fprf = '1' then
2530 v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class,
2531 r.r(54) and not r.denorm);
2532 end if;
2533
2534 v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
2535 (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
2536 v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
2537 v.fpscr(FPSCR_VE downto FPSCR_XE));
2538 if update_fx = '1' and
2539 (v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then
2540 v.fpscr(FPSCR_FX) := '1';
2541 end if;
2542 if r.rc = '1' then
2543 v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
2544 end if;
2545
2546 if illegal = '1' then
2547 v.instr_done := '0';
2548 v.do_intr := '0';
2549 v.writing_back := '0';
2550 v.busy := '0';
2551 v.state := IDLE;
2552 else
2553 v.do_intr := v.instr_done and v.fpscr(FPSCR_FEX) and r.fe_mode;
2554 if v.state /= IDLE or v.do_intr = '1' then
2555 v.busy := '1';
2556 end if;
2557 end if;
2558
2559 rin <= v;
2560 e_out.illegal <= illegal;
2561 end process;
2562
2563 end architecture behaviour;