arm: Add support for ARMv8 (AArch64 & AArch32)
[gem5.git] / src / arch / arm / isa / insts / fp64.isa
1 // -*- mode:c++ -*-
2
3 // Copyright (c) 2012-2013 ARM Limited
4 // All rights reserved
5 //
6 // The license below extends only to copyright in the software and shall
7 // not be construed as granting a license to any other intellectual
8 // property including but not limited to intellectual property relating
9 // to a hardware implementation of the functionality of the software
10 // licensed hereunder. You may use the software subject to the license
11 // terms below provided that you ensure that this notice is replicated
12 // unmodified and in its entirety in all distributions of the software,
13 // modified or unmodified, in source code or in binary form.
14 //
15 // Redistribution and use in source and binary forms, with or without
16 // modification, are permitted provided that the following conditions are
17 // met: redistributions of source code must retain the above copyright
18 // notice, this list of conditions and the following disclaimer;
19 // redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution;
22 // neither the name of the copyright holders nor the names of its
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Authors: Thomas Grocutt
39 // Edmund Grimley Evans
40
41 let {{
42
43 header_output = ""
44 decoder_output = ""
45 exec_output = ""
46
47 fmovImmSCode = vfp64EnabledCheckCode + '''
48 AA64FpDestP0_uw = bits(imm, 31, 0);
49 AA64FpDestP1_uw = 0;
50 AA64FpDestP2_uw = 0;
51 AA64FpDestP3_uw = 0;
52 '''
53 fmovImmSIop = InstObjParams("fmov", "FmovImmS", "FpRegImmOp",
54 { "code": fmovImmSCode,
55 "op_class": "SimdFloatMiscOp" }, [])
56 header_output += FpRegImmOpDeclare.subst(fmovImmSIop);
57 decoder_output += FpRegImmOpConstructor.subst(fmovImmSIop);
58 exec_output += BasicExecute.subst(fmovImmSIop);
59
60 fmovImmDCode = vfp64EnabledCheckCode + '''
61 AA64FpDestP0_uw = bits(imm, 31, 0);
62 AA64FpDestP1_uw = bits(imm, 63, 32);
63 AA64FpDestP2_uw = 0;
64 AA64FpDestP3_uw = 0;
65 '''
66 fmovImmDIop = InstObjParams("fmov", "FmovImmD", "FpRegImmOp",
67 { "code": fmovImmDCode,
68 "op_class": "SimdFloatMiscOp" }, [])
69 header_output += FpRegImmOpDeclare.subst(fmovImmDIop);
70 decoder_output += AA64FpRegImmOpConstructor.subst(fmovImmDIop);
71 exec_output += BasicExecute.subst(fmovImmDIop);
72
73 fmovRegSCode = vfp64EnabledCheckCode + '''
74 AA64FpDestP0_uw = AA64FpOp1P0_uw;
75 AA64FpDestP1_uw = 0;
76 AA64FpDestP2_uw = 0;
77 AA64FpDestP3_uw = 0;
78 '''
79 fmovRegSIop = InstObjParams("fmov", "FmovRegS", "FpRegRegOp",
80 { "code": fmovRegSCode,
81 "op_class": "SimdFloatMiscOp" }, [])
82 header_output += FpRegRegOpDeclare.subst(fmovRegSIop);
83 decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegSIop);
84 exec_output += BasicExecute.subst(fmovRegSIop);
85
86 fmovRegDCode = vfp64EnabledCheckCode + '''
87 AA64FpDestP0_uw = AA64FpOp1P0_uw;
88 AA64FpDestP1_uw = AA64FpOp1P1_uw;
89 AA64FpDestP2_uw = 0;
90 AA64FpDestP3_uw = 0;
91 '''
92 fmovRegDIop = InstObjParams("fmov", "FmovRegD", "FpRegRegOp",
93 { "code": fmovRegDCode,
94 "op_class": "SimdFloatMiscOp" }, [])
95 header_output += FpRegRegOpDeclare.subst(fmovRegDIop);
96 decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegDIop);
97 exec_output += BasicExecute.subst(fmovRegDIop);
98
99 fmovCoreRegWCode = vfp64EnabledCheckCode + '''
100 AA64FpDestP0_uw = WOp1_uw;
101 AA64FpDestP1_uw = 0;
102 AA64FpDestP2_uw = 0;
103 AA64FpDestP3_uw = 0;
104 '''
105 fmovCoreRegWIop = InstObjParams("fmov", "FmovCoreRegW", "FpRegRegOp",
106 { "code": fmovCoreRegWCode,
107 "op_class": "SimdFloatMiscOp" }, [])
108 header_output += FpRegRegOpDeclare.subst(fmovCoreRegWIop);
109 decoder_output += AA64FpRegRegOpConstructor.subst(fmovCoreRegWIop);
110 exec_output += BasicExecute.subst(fmovCoreRegWIop);
111
112 fmovCoreRegXCode = vfp64EnabledCheckCode + '''
113 AA64FpDestP0_uw = XOp1_ud;
114 AA64FpDestP1_uw = XOp1_ud >> 32;
115 AA64FpDestP2_uw = 0;
116 AA64FpDestP3_uw = 0;
117 '''
118 fmovCoreRegXIop = InstObjParams("fmov", "FmovCoreRegX", "FpRegRegOp",
119 { "code": fmovCoreRegXCode,
120 "op_class": "SimdFloatMiscOp" }, [])
121 header_output += FpRegRegOpDeclare.subst(fmovCoreRegXIop);
122 decoder_output += AA64FpRegRegOpConstructor.subst(fmovCoreRegXIop);
123 exec_output += BasicExecute.subst(fmovCoreRegXIop);
124
125 fmovUCoreRegXCode = vfp64EnabledCheckCode + '''
126 AA64FpDestP2_uw = XOp1_ud;
127 AA64FpDestP3_uw = XOp1_ud >> 32;
128 '''
129 fmovUCoreRegXIop = InstObjParams("fmov", "FmovUCoreRegX", "FpRegRegOp",
130 { "code": fmovUCoreRegXCode,
131 "op_class": "SimdFloatMiscOp" }, [])
132 header_output += FpRegRegOpDeclare.subst(fmovUCoreRegXIop);
133 decoder_output += AA64FpRegRegOpConstructor.subst(fmovUCoreRegXIop);
134 exec_output += BasicExecute.subst(fmovUCoreRegXIop);
135
136 fmovRegCoreWCode = vfp64EnabledCheckCode + '''
137 WDest = AA64FpOp1P0_uw;
138 '''
139 fmovRegCoreWIop = InstObjParams("fmov", "FmovRegCoreW", "FpRegRegOp",
140 { "code": fmovRegCoreWCode,
141 "op_class": "SimdFloatMiscOp" }, [])
142 header_output += FpRegRegOpDeclare.subst(fmovRegCoreWIop);
143 decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegCoreWIop);
144 exec_output += BasicExecute.subst(fmovRegCoreWIop);
145
146 fmovRegCoreXCode = vfp64EnabledCheckCode + '''
147 XDest = ( ((uint64_t) AA64FpOp1P1_uw) << 32) | AA64FpOp1P0_uw;
148 '''
149 fmovRegCoreXIop = InstObjParams("fmov", "FmovRegCoreX", "FpRegRegOp",
150 { "code": fmovRegCoreXCode,
151 "op_class": "SimdFloatMiscOp" }, [])
152 header_output += FpRegRegOpDeclare.subst(fmovRegCoreXIop);
153 decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegCoreXIop);
154 exec_output += BasicExecute.subst(fmovRegCoreXIop);
155
156 fmovURegCoreXCode = vfp64EnabledCheckCode + '''
157 XDest = ( ((uint64_t) AA64FpOp1P3_uw) << 32) | AA64FpOp1P2_uw;
158 '''
159 fmovURegCoreXIop = InstObjParams("fmov", "FmovURegCoreX", "FpRegRegOp",
160 { "code": fmovURegCoreXCode,
161 "op_class": "SimdFloatMiscOp" }, [])
162 header_output += FpRegRegOpDeclare.subst(fmovURegCoreXIop);
163 decoder_output += AA64FpRegRegOpConstructor.subst(fmovURegCoreXIop);
164 exec_output += BasicExecute.subst(fmovURegCoreXIop);
165 }};
166
167 let {{
168
169 header_output = ""
170 decoder_output = ""
171 exec_output = ""
172
173 singleIntConvCode = vfp64EnabledCheckCode + '''
174 FPSCR fpscr = (FPSCR) FpscrExc;
175 uint32_t cOp1 = AA64FpOp1P0_uw;
176 uint32_t cDest = %(op)s;
177 AA64FpDestP0_uw = cDest;
178 AA64FpDestP1_uw = 0;
179 AA64FpDestP2_uw = 0;
180 AA64FpDestP3_uw = 0;
181 FpscrExc = fpscr;
182 '''
183
184 singleIntConvCode2 = vfp64EnabledCheckCode + '''
185 FPSCR fpscr = (FPSCR) FpscrExc;
186 uint32_t cOp1 = AA64FpOp1P0_uw;
187 uint32_t cOp2 = AA64FpOp2P0_uw;
188 uint32_t cDest = %(op)s;
189 AA64FpDestP0_uw = cDest;
190 AA64FpDestP1_uw = 0;
191 AA64FpDestP2_uw = 0;
192 AA64FpDestP3_uw = 0;
193 FpscrExc = fpscr;
194 '''
195
196 singleBinOp = "binaryOp(fpscr, AA64FpOp1P0, AA64FpOp2P0," + \
197 "%(func)s, fpscr.fz, fpscr.dn, fpscr.rMode)"
198 singleUnaryOp = "unaryOp(fpscr, AA64FpOp1P0, %(func)s, fpscr.fz, fpscr.rMode)"
199
200 doubleIntConvCode = vfp64EnabledCheckCode + '''
201 FPSCR fpscr = (FPSCR) FpscrExc;
202 uint64_t cOp1 = ((uint64_t) AA64FpOp1P1_uw) << 32 | AA64FpOp1P0_uw;
203 uint64_t cDest = %(op)s;
204 AA64FpDestP0_uw = cDest & 0xFFFFFFFF;
205 AA64FpDestP1_uw = cDest >> 32;
206 AA64FpDestP2_uw = 0;
207 AA64FpDestP3_uw = 0;
208 FpscrExc = fpscr;
209 '''
210
211 doubleIntConvCode2 = vfp64EnabledCheckCode + '''
212 FPSCR fpscr = (FPSCR) FpscrExc;
213 uint64_t cOp1 = ((uint64_t) AA64FpOp1P1_uw) << 32 | AA64FpOp1P0_uw;
214 uint64_t cOp2 = ((uint64_t) AA64FpOp2P1_uw) << 32 | AA64FpOp2P0_uw;
215 uint64_t cDest = %(op)s;
216 AA64FpDestP0_uw = cDest & 0xFFFFFFFF;
217 AA64FpDestP1_uw = cDest >> 32;
218 AA64FpDestP2_uw = 0;
219 AA64FpDestP3_uw = 0;
220 FpscrExc = fpscr;
221 '''
222
223 doubleBinOp = '''
224 binaryOp(fpscr, dbl(AA64FpOp1P0_uw, AA64FpOp1P1_uw),
225 dbl(AA64FpOp2P0_uw, AA64FpOp2P1_uw),
226 %(func)s, fpscr.fz, fpscr.dn, fpscr.rMode);
227 '''
228 doubleUnaryOp = '''
229 unaryOp(fpscr, dbl(AA64FpOp1P0_uw, AA64FpOp1P1_uw), %(func)s,
230 fpscr.fz, fpscr.rMode)
231 '''
232
233 def buildTernaryFpOp(name, opClass, sOp, dOp):
234 global header_output, decoder_output, exec_output
235 for isDouble in True, False:
236 code = vfp64EnabledCheckCode + '''
237 FPSCR fpscr = (FPSCR) FpscrExc;
238 '''
239 if isDouble:
240 code += '''
241 uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32;
242 uint64_t cOp2 = AA64FpOp2P0_uw | (uint64_t)AA64FpOp2P1_uw << 32;
243 uint64_t cOp3 = AA64FpOp3P0_uw | (uint64_t)AA64FpOp3P1_uw << 32;
244 uint64_t cDest;
245 ''' "cDest = " + dOp + ";" + '''
246 AA64FpDestP0_uw = cDest;
247 AA64FpDestP1_uw = cDest >> 32;
248 '''
249 else:
250 code += '''
251 uint32_t cOp1 = AA64FpOp1P0_uw;
252 uint32_t cOp2 = AA64FpOp2P0_uw;
253 uint32_t cOp3 = AA64FpOp3P0_uw;
254 uint32_t cDest;
255 ''' "cDest = " + sOp + ";" + '''
256 AA64FpDestP0_uw = cDest;
257 AA64FpDestP1_uw = 0;
258 '''
259 code += '''
260 AA64FpDestP2_uw = 0;
261 AA64FpDestP3_uw = 0;
262 FpscrExc = fpscr;
263 '''
264
265 iop = InstObjParams(name.lower(), name + ("D" if isDouble else "S"),
266 "FpRegRegRegRegOp",
267 { "code": code, "op_class": opClass }, [])
268
269 header_output += AA64FpRegRegRegRegOpDeclare.subst(iop)
270 decoder_output += AA64FpRegRegRegRegOpConstructor.subst(iop)
271 exec_output += BasicExecute.subst(iop)
272
273 buildTernaryFpOp("FMAdd", "SimdFloatMultAccOp",
274 "fplibMulAdd<uint32_t>(cOp3, cOp1, cOp2, fpscr)",
275 "fplibMulAdd<uint64_t>(cOp3, cOp1, cOp2, fpscr)" )
276 buildTernaryFpOp("FMSub", "SimdFloatMultAccOp",
277 "fplibMulAdd<uint32_t>(cOp3, fplibNeg<uint32_t>(cOp1), cOp2, fpscr)",
278 "fplibMulAdd<uint64_t>(cOp3, fplibNeg<uint64_t>(cOp1), cOp2, fpscr)" )
279 buildTernaryFpOp("FNMAdd", "SimdFloatMultAccOp",
280 "fplibMulAdd<uint32_t>(fplibNeg<uint32_t>(cOp3), fplibNeg<uint32_t>(cOp1), cOp2, fpscr)",
281 "fplibMulAdd<uint64_t>(fplibNeg<uint64_t>(cOp3), fplibNeg<uint64_t>(cOp1), cOp2, fpscr)" )
282 buildTernaryFpOp("FNMSub", "SimdFloatMultAccOp",
283 "fplibMulAdd<uint32_t>(fplibNeg<uint32_t>(cOp3), cOp1, cOp2, fpscr)",
284 "fplibMulAdd<uint64_t>(fplibNeg<uint64_t>(cOp3), cOp1, cOp2, fpscr)" )
285
286 def buildBinFpOp(name, Name, base, opClass, singleOp, doubleOp):
287 global header_output, decoder_output, exec_output
288
289 code = singleIntConvCode2 % { "op": singleOp }
290 sIop = InstObjParams(name, Name + "S", base,
291 { "code": code,
292 "op_class": opClass }, [])
293
294 code = doubleIntConvCode2 % { "op": doubleOp }
295 dIop = InstObjParams(name, Name + "D", base,
296 { "code": code,
297 "op_class": opClass }, [])
298
299 declareTempl = eval( base + "Declare");
300 constructorTempl = eval("AA64" + base + "Constructor");
301
302 for iop in sIop, dIop:
303 header_output += declareTempl.subst(iop)
304 decoder_output += constructorTempl.subst(iop)
305 exec_output += BasicExecute.subst(iop)
306
307 buildBinFpOp("fadd", "FAdd", "FpRegRegRegOp", "SimdFloatAddOp",
308 "fplibAdd<uint32_t>(cOp1, cOp2, fpscr)",
309 "fplibAdd<uint64_t>(cOp1, cOp2, fpscr)")
310 buildBinFpOp("fsub", "FSub", "FpRegRegRegOp", "SimdFloatAddOp",
311 "fplibSub<uint32_t>(cOp1, cOp2, fpscr)",
312 "fplibSub<uint64_t>(cOp1, cOp2, fpscr)")
313 buildBinFpOp("fdiv", "FDiv", "FpRegRegRegOp", "SimdFloatDivOp",
314 "fplibDiv<uint32_t>(cOp1, cOp2, fpscr)",
315 "fplibDiv<uint64_t>(cOp1, cOp2, fpscr)")
316 buildBinFpOp("fmul", "FMul", "FpRegRegRegOp", "SimdFloatMultOp",
317 "fplibMul<uint32_t>(cOp1, cOp2, fpscr)",
318 "fplibMul<uint64_t>(cOp1, cOp2, fpscr)")
319 buildBinFpOp("fnmul", "FNMul", "FpRegRegRegOp", "SimdFloatMultOp",
320 "fplibNeg<uint32_t>(fplibMul<uint32_t>(cOp1, cOp2, fpscr))",
321 "fplibNeg<uint64_t>(fplibMul<uint64_t>(cOp1, cOp2, fpscr))")
322 buildBinFpOp("fmin", "FMin", "FpRegRegRegOp", "SimdFloatCmpOp",
323 "fplibMin<uint32_t>(cOp1, cOp2, fpscr)",
324 "fplibMin<uint64_t>(cOp1, cOp2, fpscr)")
325 buildBinFpOp("fmax", "FMax", "FpRegRegRegOp", "SimdFloatCmpOp",
326 "fplibMax<uint32_t>(cOp1, cOp2, fpscr)",
327 "fplibMax<uint64_t>(cOp1, cOp2, fpscr)")
328 buildBinFpOp("fminnm", "FMinNM", "FpRegRegRegOp", "SimdFloatCmpOp",
329 "fplibMinNum<uint32_t>(cOp1, cOp2, fpscr)",
330 "fplibMinNum<uint64_t>(cOp1, cOp2, fpscr)")
331 buildBinFpOp("fmaxnm", "FMaxNM", "FpRegRegRegOp", "SimdFloatCmpOp",
332 "fplibMaxNum<uint32_t>(cOp1, cOp2, fpscr)",
333 "fplibMaxNum<uint64_t>(cOp1, cOp2, fpscr)")
334
335 def buildUnaryFpOp(name, Name, base, opClass, singleOp, doubleOp = None):
336 if doubleOp is None:
337 doubleOp = singleOp
338 global header_output, decoder_output, exec_output
339
340 code = singleIntConvCode % { "op": singleOp }
341 sIop = InstObjParams(name, Name + "S", base,
342 { "code": code,
343 "op_class": opClass }, [])
344 code = doubleIntConvCode % { "op": doubleOp }
345 dIop = InstObjParams(name, Name + "D", base,
346 { "code": code,
347 "op_class": opClass }, [])
348
349 declareTempl = eval( base + "Declare");
350 constructorTempl = eval("AA64" + base + "Constructor");
351
352 for iop in sIop, dIop:
353 header_output += declareTempl.subst(iop)
354 decoder_output += constructorTempl.subst(iop)
355 exec_output += BasicExecute.subst(iop)
356
357 buildUnaryFpOp("fsqrt", "FSqrt", "FpRegRegOp", "SimdFloatSqrtOp",
358 "fplibSqrt<uint32_t>(cOp1, fpscr)", "fplibSqrt<uint64_t>(cOp1, fpscr)")
359
360 def buildSimpleUnaryFpOp(name, Name, base, opClass, singleOp,
361 doubleOp = None, isIntConv = True):
362 if doubleOp is None:
363 doubleOp = singleOp
364 global header_output, decoder_output, exec_output
365
366 if isIntConv:
367 sCode = singleIntConvCode
368 dCode = doubleIntConvCode
369 else:
370 sCode = singleCode
371 dCode = doubleCode
372
373 for code, op, suffix in [[sCode, singleOp, "S"],
374 [dCode, doubleOp, "D"]]:
375 iop = InstObjParams(name, Name + suffix, base,
376 { "code": code % { "op": op },
377 "op_class": opClass }, [])
378
379 declareTempl = eval( base + "Declare");
380 constructorTempl = eval("AA64" + base + "Constructor");
381
382 header_output += declareTempl.subst(iop)
383 decoder_output += constructorTempl.subst(iop)
384 exec_output += BasicExecute.subst(iop)
385
386 buildSimpleUnaryFpOp("fneg", "FNeg", "FpRegRegOp", "SimdFloatMiscOp",
387 "fplibNeg<uint32_t>(cOp1)", "fplibNeg<uint64_t>(cOp1)")
388 buildSimpleUnaryFpOp("fabs", "FAbs", "FpRegRegOp", "SimdFloatMiscOp",
389 "fplibAbs<uint32_t>(cOp1)", "fplibAbs<uint64_t>(cOp1)")
390 buildSimpleUnaryFpOp("frintn", "FRIntN", "FpRegRegOp", "SimdFloatMiscOp",
391 "fplibRoundInt<uint32_t>(cOp1, FPRounding_TIEEVEN, false, fpscr)",
392 "fplibRoundInt<uint64_t>(cOp1, FPRounding_TIEEVEN, false, fpscr)")
393 buildSimpleUnaryFpOp("frintp", "FRIntP", "FpRegRegOp", "SimdFloatMiscOp",
394 "fplibRoundInt<uint32_t>(cOp1, FPRounding_POSINF, false, fpscr)",
395 "fplibRoundInt<uint64_t>(cOp1, FPRounding_POSINF, false, fpscr)")
396 buildSimpleUnaryFpOp("frintm", "FRIntM", "FpRegRegOp", "SimdFloatMiscOp",
397 "fplibRoundInt<uint32_t>(cOp1, FPRounding_NEGINF, false, fpscr)",
398 "fplibRoundInt<uint64_t>(cOp1, FPRounding_NEGINF, false, fpscr)")
399 buildSimpleUnaryFpOp("frintz", "FRIntZ", "FpRegRegOp", "SimdFloatMiscOp",
400 "fplibRoundInt<uint32_t>(cOp1, FPRounding_ZERO, false, fpscr)",
401 "fplibRoundInt<uint64_t>(cOp1, FPRounding_ZERO, false, fpscr)")
402 buildSimpleUnaryFpOp("frinta", "FRIntA", "FpRegRegOp", "SimdFloatMiscOp",
403 "fplibRoundInt<uint32_t>(cOp1, FPRounding_TIEAWAY, false, fpscr)",
404 "fplibRoundInt<uint64_t>(cOp1, FPRounding_TIEAWAY, false, fpscr)")
405 buildSimpleUnaryFpOp("frinti", "FRIntI", "FpRegRegOp", "SimdFloatMiscOp",
406 "fplibRoundInt<uint32_t>(cOp1, FPCRRounding(fpscr), false, fpscr)",
407 "fplibRoundInt<uint64_t>(cOp1, FPCRRounding(fpscr), false, fpscr)")
408 buildSimpleUnaryFpOp("frintx", "FRIntX", "FpRegRegOp", "SimdFloatMiscOp",
409 "fplibRoundInt<uint32_t>(cOp1, FPCRRounding(fpscr), true, fpscr)",
410 "fplibRoundInt<uint64_t>(cOp1, FPCRRounding(fpscr), true, fpscr)")
411 }};
412
413 let {{
414
415 header_output = ""
416 decoder_output = ""
417 exec_output = ""
418
419 # Creates the integer to floating point instructions, including variants for
420 # signed/unsigned, float/double, etc
421 for regL, regOpL, width in [["W", "w", 32],
422 ["X", "d", 64]]:
423 for isDouble in True, False:
424 for us, usCode in [["U", "uint%d_t cSrc = %sOp1_u%s;" %(width, regL, regOpL)],
425 ["S", "int%d_t cSrc = %sOp1_u%s;" %(width, regL, regOpL)]]:
426 fcvtIntFpDCode = vfp64EnabledCheckCode + '''
427 FPSCR fpscr = (FPSCR) FpscrExc;
428 %s
429 ''' %(usCode)
430
431 if isDouble:
432 fcvtIntFpDCode += '''
433 uint64_t cDest = fplibFixedToFP<uint64_t>(cSrc, 0,
434 %s, FPCRRounding(fpscr), fpscr);
435 AA64FpDestP0_uw = cDest;
436 AA64FpDestP1_uw = cDest >> 32;
437 ''' % ("true" if us == "U" else "false")
438 else:
439 fcvtIntFpDCode += '''
440 uint32_t cDest = fplibFixedToFP<uint32_t>(cSrc, 0,
441 %s, FPCRRounding(fpscr), fpscr);
442 AA64FpDestP0_uw = cDest;
443 AA64FpDestP1_uw = 0;
444 ''' % ("true" if us == "U" else "false")
445 fcvtIntFpDCode += '''
446 AA64FpDestP2_uw = 0;
447 AA64FpDestP3_uw = 0;
448 FpscrExc = fpscr;
449 '''
450
451 instName = "Fcvt%s%sIntFp%s" %(regL, us, "D" if isDouble else "S")
452 mnem = "%scvtf" %(us.lower())
453 fcvtIntFpDIop = InstObjParams(mnem, instName, "FpRegRegOp",
454 { "code": fcvtIntFpDCode,
455 "op_class": "SimdFloatCvtOp" }, [])
456 header_output += FpRegRegOpDeclare.subst(fcvtIntFpDIop);
457 decoder_output += AA64FpRegRegOpConstructor.subst(fcvtIntFpDIop);
458 exec_output += BasicExecute.subst(fcvtIntFpDIop);
459
460 # Generates the floating point to integer conversion instructions in various
461 # variants, eg signed/unsigned
462 def buildFpCvtIntOp(isDouble, isSigned, isXReg):
463 global header_output, decoder_output, exec_output
464
465 for rmode, roundingMode in [["N", "FPRounding_TIEEVEN"],
466 ["P", "FPRounding_POSINF"],
467 ["M", "FPRounding_NEGINF"],
468 ["Z", "FPRounding_ZERO"],
469 ["A", "FPRounding_TIEAWAY"]]:
470 fcvtFpIntCode = vfp64EnabledCheckCode + '''
471 FPSCR fpscr = (FPSCR) FpscrExc;'''
472 if isDouble:
473 fcvtFpIntCode += '''
474 uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32;
475 '''
476 else:
477 fcvtFpIntCode += "uint32_t cOp1 = AA64FpOp1P0_uw;"
478
479 fcvtFpIntCode += '''
480 %sDest = fplibFPToFixed<uint%s_t, uint%s_t>(cOp1, 0, %s, %s, fpscr);
481 FpscrExc = fpscr;
482 ''' %("X" if isXReg else "W",
483 "64" if isDouble else "32",
484 "64" if isXReg else "32",
485 "false" if isSigned else "true",
486 roundingMode)
487
488 instName = "FcvtFp%sInt%s%s%s" %("S" if isSigned else "U",
489 "X" if isXReg else "W",
490 "D" if isDouble else "S", rmode)
491 mnem = "fcvt%s%s" %(rmode, "s" if isSigned else "u")
492 fcvtFpIntIop = InstObjParams(mnem, instName, "FpRegRegOp",
493 { "code": fcvtFpIntCode,
494 "op_class": "SimdFloatCvtOp" }, [])
495 header_output += FpRegRegOpDeclare.subst(fcvtFpIntIop);
496 decoder_output += FpRegRegOpConstructor.subst(fcvtFpIntIop);
497 exec_output += BasicExecute.subst(fcvtFpIntIop);
498
499 # Now actually do the building with the different variants
500 for isDouble in True, False:
501 for isSigned in True, False:
502 for isXReg in True, False:
503 buildFpCvtIntOp(isDouble, isSigned, isXReg)
504
505 fcvtFpSFpDCode = vfp64EnabledCheckCode + '''
506 FPSCR fpscr = (FPSCR) FpscrExc;
507 uint64_t cDest = fplibConvert<uint32_t, uint64_t>(AA64FpOp1P0_uw,
508 FPCRRounding(fpscr), fpscr);
509 AA64FpDestP0_uw = cDest;
510 AA64FpDestP1_uw = cDest >> 32;
511 AA64FpDestP2_uw = 0;
512 AA64FpDestP3_uw = 0;
513 FpscrExc = fpscr;
514 '''
515 fcvtFpSFpDIop = InstObjParams("fcvt", "FCvtFpSFpD", "FpRegRegOp",
516 { "code": fcvtFpSFpDCode,
517 "op_class": "SimdFloatCvtOp" }, [])
518 header_output += FpRegRegOpDeclare.subst(fcvtFpSFpDIop);
519 decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpSFpDIop);
520 exec_output += BasicExecute.subst(fcvtFpSFpDIop);
521
522 fcvtFpDFpSCode = vfp64EnabledCheckCode + '''
523 FPSCR fpscr = (FPSCR) FpscrExc;
524 uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32;
525 AA64FpDestP0_uw = fplibConvert<uint64_t, uint32_t>(cOp1,
526 FPCRRounding(fpscr), fpscr);
527 AA64FpDestP1_uw = 0;
528 AA64FpDestP2_uw = 0;
529 AA64FpDestP3_uw = 0;
530 FpscrExc = fpscr;
531 '''
532 fcvtFpDFpSIop = InstObjParams("fcvt", "FcvtFpDFpS", "FpRegRegOp",
533 {"code": fcvtFpDFpSCode,
534 "op_class": "SimdFloatCvtOp" }, [])
535 header_output += FpRegRegOpDeclare.subst(fcvtFpDFpSIop);
536 decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpDFpSIop);
537 exec_output += BasicExecute.subst(fcvtFpDFpSIop);
538
539 # Half precision to single or double precision conversion
540 for isDouble in True, False:
541 code = vfp64EnabledCheckCode + '''
542 FPSCR fpscr = (FPSCR) FpscrExc;
543 %s cDest = fplibConvert<uint16_t, uint%s_t>(AA64FpOp1P0_uw,
544 FPCRRounding(fpscr), fpscr);
545 ''' % ("uint64_t" if isDouble else "uint32_t",
546 "64" if isDouble else "32")
547 if isDouble:
548 code += '''
549 AA64FpDestP0_uw = cDest;
550 AA64FpDestP1_uw = cDest >> 32;
551 '''
552 else:
553 code += '''
554 AA64FpDestP0_uw = cDest;
555 AA64FpDestP1_uw = 0;
556 '''
557 code += '''
558 AA64FpDestP2_uw = 0;
559 AA64FpDestP3_uw = 0;
560 FpscrExc = fpscr;
561 '''
562
563 instName = "FcvtFpHFp%s" %("D" if isDouble else "S")
564 fcvtFpHFpIop = InstObjParams("fcvt", instName, "FpRegRegOp",
565 { "code": code,
566 "op_class": "SimdFloatCvtOp" }, [])
567 header_output += FpRegRegOpDeclare.subst(fcvtFpHFpIop);
568 decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpHFpIop);
569 exec_output += BasicExecute.subst(fcvtFpHFpIop);
570
571 # single or double precision to Half precision conversion
572 for isDouble in True, False:
573 code = vfp64EnabledCheckCode + '''
574 FPSCR fpscr = (FPSCR) FpscrExc;
575 %s;
576 AA64FpDestP0_uw = fplibConvert<uint%s_t, uint16_t>(cOp1,
577 FPCRRounding(fpscr), fpscr);
578 AA64FpDestP1_uw = 0;
579 AA64FpDestP2_uw = 0;
580 AA64FpDestP3_uw = 0;
581 FpscrExc = fpscr;
582 ''' % ("uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32"
583 if isDouble else "uint32_t cOp1 = AA64FpOp1P0_uw",
584 "64" if isDouble else "32")
585
586 instName = "FcvtFp%sFpH" %("D" if isDouble else "S")
587 fcvtFpFpHIop = InstObjParams("fcvt", instName, "FpRegRegOp",
588 { "code": code,
589 "op_class": "SimdFloatCvtOp" }, [])
590 header_output += FpRegRegOpDeclare.subst(fcvtFpFpHIop);
591 decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpFpHIop);
592 exec_output += BasicExecute.subst(fcvtFpFpHIop);
593
594 # Build the various versions of the floating point compare instructions
595 def buildFCmpOp(isQuiet, isDouble, isImm):
596 global header_output, decoder_output, exec_output
597
598 fcmpCode = vfp64EnabledCheckCode + '''
599 FPSCR fpscr = (FPSCR) FpscrExc;
600 %s cOp1 = %s;
601 ''' % ("uint64_t" if isDouble else "uint32_t",
602 "AA64FpDestP0_uw | (uint64_t)AA64FpDestP1_uw << 32"
603 if isDouble else "AA64FpDestP0_uw")
604 if isImm:
605 fcmpCode += '''
606 %s cOp2 = imm;
607 ''' % ("uint64_t" if isDouble else "uint32_t")
608 else:
609 fcmpCode += '''
610 %s cOp2 = %s;
611 ''' % ("uint64_t" if isDouble else "uint32_t",
612 "AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32"
613 if isDouble else "AA64FpOp1P0_uw")
614 fcmpCode += '''
615 int cc = fplibCompare<uint%s_t>(cOp1, cOp2, %s, fpscr);
616 CondCodesNZ = cc >> 2 & 3;
617 CondCodesC = cc >> 1 & 1;
618 CondCodesV = cc & 1;
619 FpCondCodes = fpscr & FpCondCodesMask;
620 FpscrExc = fpscr;
621 ''' % ("64" if isDouble else "32", "false" if isQuiet else "true")
622
623 typeName = "Imm" if isImm else "Reg"
624 instName = "FCmp%s%s%s" %("" if isQuiet else "E", typeName,
625 "D" if isDouble else "S")
626 fcmpIop = InstObjParams("fcmp%s" %("" if isQuiet else "e"), instName,
627 "FpReg%sOp" %(typeName),
628 {"code": fcmpCode,
629 "op_class": "SimdFloatCmpOp"}, [])
630
631 declareTemp = eval("FpReg%sOpDeclare" %(typeName));
632 constructorTemp = eval("AA64FpReg%sOpConstructor" %(typeName));
633 header_output += declareTemp.subst(fcmpIop);
634 decoder_output += constructorTemp.subst(fcmpIop);
635 exec_output += BasicExecute.subst(fcmpIop);
636
637 for isQuiet in True, False:
638 for isDouble in True, False:
639 for isImm in True, False:
640 buildFCmpOp(isQuiet, isDouble, isImm)
641
642 # Build the various versions of the conditional floating point compare
643 # instructions
644 def buildFCCmpOp(isQuiet, isDouble):
645 global header_output, decoder_output, exec_output
646
647 fccmpCode = vfp64EnabledCheckCode + '''
648 FPSCR fpscr = (FPSCR) FpscrExc;
649 if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) {
650 %s cOp1 = %s;
651 %s cOp2 = %s;
652 int cc = fplibCompare<uint%s_t>(cOp1, cOp2, %s, fpscr);
653 CondCodesNZ = cc >> 2 & 3;
654 CondCodesC = cc >> 1 & 1;
655 CondCodesV = cc & 1;
656 } else {
657 CondCodesNZ = (defCc >> 2) & 0x3;
658 CondCodesC = (defCc >> 1) & 0x1;
659 CondCodesV = defCc & 0x1;
660 }
661 FpCondCodes = fpscr & FpCondCodesMask;
662 FpscrExc = fpscr;
663 ''' % ("uint64_t" if isDouble else "uint32_t",
664 "AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32"
665 if isDouble else "AA64FpOp1P0_uw",
666 "uint64_t" if isDouble else "uint32_t",
667 "AA64FpOp2P0_uw | (uint64_t)AA64FpOp2P1_uw << 32"
668 if isDouble else "AA64FpOp2P0_uw",
669 "64" if isDouble else "32", "false" if isQuiet else "true")
670
671 instName = "FCCmp%sReg%s" %("" if isQuiet else "E",
672 "D" if isDouble else "S")
673 fccmpIop = InstObjParams("fccmp%s" %("" if isQuiet else "e"),
674 instName, "FpCondCompRegOp",
675 {"code": fccmpCode,
676 "op_class": "SimdFloatCmpOp"}, [])
677 header_output += DataXCondCompRegDeclare.subst(fccmpIop);
678 decoder_output += DataXCondCompRegConstructor.subst(fccmpIop);
679 exec_output += BasicExecute.subst(fccmpIop);
680
681 for isQuiet in True, False:
682 for isDouble in True, False:
683 buildFCCmpOp(isQuiet, isDouble)
684
685 }};
686
687 let {{
688
689 header_output = ""
690 decoder_output = ""
691 exec_output = ""
692
693 # Generates the variants of the floating to fixed point instructions
694 def buildFpCvtFixedOp(isSigned, isDouble, isXReg):
695 global header_output, decoder_output, exec_output
696
697 fcvtFpFixedCode = vfp64EnabledCheckCode + '''
698 FPSCR fpscr = (FPSCR) FpscrExc;
699 '''
700 if isDouble:
701 fcvtFpFixedCode += '''
702 uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32;
703 '''
704 else:
705 fcvtFpFixedCode += "uint32_t cOp1 = AA64FpOp1P0_uw;"
706 fcvtFpFixedCode += '''
707 %sDest = fplibFPToFixed<uint%s_t, uint%s_t>(cOp1, 64 - imm, %s,
708 FPRounding_ZERO, fpscr);
709 FpscrExc = fpscr;
710 ''' %("X" if isXReg else "W",
711 "64" if isDouble else "32",
712 "64" if isXReg else "32",
713 "false" if isSigned else "true")
714
715 instName = "FcvtFp%sFixed%s%s" %("S" if isSigned else "U",
716 "D" if isDouble else "S",
717 "X" if isXReg else "W")
718 mnem = "fcvtz%s" %("s" if isSigned else "u")
719 fcvtFpFixedIop = InstObjParams(mnem, instName, "FpRegRegImmOp",
720 { "code": fcvtFpFixedCode,
721 "op_class": "SimdFloatCvtOp" }, [])
722 header_output += FpRegRegImmOpDeclare.subst(fcvtFpFixedIop);
723 decoder_output += AA64FpRegRegImmOpConstructor.subst(fcvtFpFixedIop);
724 exec_output += BasicExecute.subst(fcvtFpFixedIop);
725
726 # Generates the variants of the fixed to floating point instructions
727 def buildFixedCvtFpOp(isSigned, isDouble, isXReg):
728 global header_output, decoder_output, exec_output
729
730 srcRegType = "X" if isXReg else "W"
731 fcvtFixedFpCode = vfp64EnabledCheckCode + '''
732 FPSCR fpscr = (FPSCR) FpscrExc;
733 %s result = fplibFixedToFP<uint%s_t>((%s%s_t)%sOp1, 64 - imm,
734 %s, FPCRRounding(fpscr), fpscr);
735 ''' %("uint64_t" if isDouble else "uint32_t",
736 "64" if isDouble else "32",
737 "int" if isSigned else "uint", "64" if isXReg else "32",
738 srcRegType,
739 "false" if isSigned else "true")
740 if isDouble:
741 fcvtFixedFpCode += '''
742 AA64FpDestP0_uw = result;
743 AA64FpDestP1_uw = result >> 32;
744 '''
745 else:
746 fcvtFixedFpCode += '''
747 AA64FpDestP0_uw = result;
748 AA64FpDestP1_uw = 0;
749 '''
750 fcvtFixedFpCode += '''
751 AA64FpDestP2_uw = 0;
752 AA64FpDestP3_uw = 0;
753 FpscrExc = fpscr;
754 '''
755
756 instName = "Fcvt%sFixedFp%s%s" %("S" if isSigned else "U",
757 "D" if isDouble else "S",
758 srcRegType)
759 mnem = "%scvtf" %("s" if isSigned else "u")
760 fcvtFixedFpIop = InstObjParams(mnem, instName, "FpRegRegImmOp",
761 { "code": fcvtFixedFpCode,
762 "op_class": "SimdFloatCvtOp" }, [])
763 header_output += FpRegRegImmOpDeclare.subst(fcvtFixedFpIop);
764 decoder_output += FpRegRegImmOpConstructor.subst(fcvtFixedFpIop);
765 exec_output += BasicExecute.subst(fcvtFixedFpIop);
766
767 # loop over the variants building the instructions for each
768 for isXReg in True, False:
769 for isDouble in True, False:
770 for isSigned in True, False:
771 buildFpCvtFixedOp(isSigned, isDouble, isXReg)
772 buildFixedCvtFpOp(isSigned, isDouble, isXReg)
773 }};
774
775 let {{
776
777 header_output = ""
778 decoder_output = ""
779 exec_output = ""
780
781 for isDouble in True, False:
782 code = '''
783 if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) {
784 AA64FpDestP0_uw = AA64FpOp1P0_uw;
785 '''
786 if isDouble:
787 code += '''
788 AA64FpDestP1_uw = AA64FpOp1P1_uw;
789 } else {
790 AA64FpDestP0_uw = AA64FpOp2P0_uw;
791 AA64FpDestP1_uw = AA64FpOp2P1_uw;
792 }
793 '''
794 else:
795 code += '''
796 } else {
797 AA64FpDestP0_uw = AA64FpOp2P0_uw;
798 }
799 AA64FpDestP1_uw = 0;
800 '''
801 code += '''
802 AA64FpDestP2_uw = 0;
803 AA64FpDestP3_uw = 0;
804 '''
805
806 iop = InstObjParams("fcsel", "FCSel%s" %("D" if isDouble else "S"),
807 "FpCondSelOp", code)
808 header_output += DataXCondSelDeclare.subst(iop)
809 decoder_output += DataXCondSelConstructor.subst(iop)
810 exec_output += BasicExecute.subst(iop)
811 }};