arm: Add support for ARMv8 (AArch64 & AArch32)
[gem5.git] / src / arch / arm / isa / insts / neon64.isa
1 // -*- mode: c++ -*-
2
3 // Copyright (c) 2012-2013 ARM Limited
4 // All rights reserved
5 //
6 // The license below extends only to copyright in the software and shall
7 // not be construed as granting a license to any other intellectual
8 // property including but not limited to intellectual property relating
9 // to a hardware implementation of the functionality of the software
10 // licensed hereunder. You may use the software subject to the license
11 // terms below provided that you ensure that this notice is replicated
12 // unmodified and in its entirety in all distributions of the software,
13 // modified or unmodified, in source code or in binary form.
14 //
15 // Redistribution and use in source and binary forms, with or without
16 // modification, are permitted provided that the following conditions are
17 // met: redistributions of source code must retain the above copyright
18 // notice, this list of conditions and the following disclaimer;
19 // redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution;
22 // neither the name of the copyright holders nor the names of its
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Authors: Giacomo Gabrielli
39 // Mbou Eyole
40
41 let {{
42
43 header_output = ""
44 exec_output = ""
45
46 # FP types (FP operations always work with unsigned representations)
47 floatTypes = ("uint32_t", "uint64_t")
48 smallFloatTypes = ("uint32_t",)
49
50 def threeEqualRegInstX(name, Name, opClass, types, rCount, op,
51 readDest=False, pairwise=False, scalar=False,
52 byElem=False):
53 assert (not pairwise) or ((not byElem) and (not scalar))
54 global header_output, exec_output
55 eWalkCode = simd64EnabledCheckCode + '''
56 RegVect srcReg1, destReg;
57 '''
58 if byElem:
59 # 2nd register operand has to be read fully
60 eWalkCode += '''
61 FullRegVect srcReg2;
62 '''
63 else:
64 eWalkCode += '''
65 RegVect srcReg2;
66 '''
67 for reg in range(rCount):
68 eWalkCode += '''
69 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
70 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw);
71 ''' % { "reg" : reg }
72 if readDest:
73 eWalkCode += '''
74 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
75 ''' % { "reg" : reg }
76 if byElem:
77 # 2nd operand has to be read fully
78 for reg in range(rCount, 4):
79 eWalkCode += '''
80 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw);
81 ''' % { "reg" : reg }
82 readDestCode = ''
83 if readDest:
84 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
85 if pairwise:
86 eWalkCode += '''
87 for (unsigned i = 0; i < eCount; i++) {
88 Element srcElem1 = gtoh(2 * i < eCount ?
89 srcReg1.elements[2 * i] :
90 srcReg2.elements[2 * i - eCount]);
91 Element srcElem2 = gtoh(2 * i < eCount ?
92 srcReg1.elements[2 * i + 1] :
93 srcReg2.elements[2 * i + 1 - eCount]);
94 Element destElem;
95 %(readDest)s
96 %(op)s
97 destReg.elements[i] = htog(destElem);
98 }
99 ''' % { "op" : op, "readDest" : readDestCode }
100 else:
101 scalarCheck = '''
102 if (i != 0) {
103 destReg.elements[i] = 0;
104 continue;
105 }
106 '''
107 eWalkCode += '''
108 for (unsigned i = 0; i < eCount; i++) {
109 %(scalarCheck)s
110 Element srcElem1 = gtoh(srcReg1.elements[i]);
111 Element srcElem2 = gtoh(srcReg2.elements[%(src2Index)s]);
112 Element destElem;
113 %(readDest)s
114 %(op)s
115 destReg.elements[i] = htog(destElem);
116 }
117 ''' % { "op" : op, "readDest" : readDestCode,
118 "scalarCheck" : scalarCheck if scalar else "",
119 "src2Index" : "imm" if byElem else "i" }
120 for reg in range(rCount):
121 eWalkCode += '''
122 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
123 ''' % { "reg" : reg }
124 if rCount < 4: # zero upper half
125 for reg in range(rCount, 4):
126 eWalkCode += '''
127 AA64FpDestP%(reg)d_uw = 0;
128 ''' % { "reg" : reg }
129 iop = InstObjParams(name, Name,
130 "DataX2RegImmOp" if byElem else "DataX2RegOp",
131 { "code": eWalkCode,
132 "r_count": rCount,
133 "op_class": opClass }, [])
134 if byElem:
135 header_output += NeonX2RegImmOpDeclare.subst(iop)
136 else:
137 header_output += NeonX2RegOpDeclare.subst(iop)
138 exec_output += NeonXEqualRegOpExecute.subst(iop)
139 for type in types:
140 substDict = { "targs" : type,
141 "class_name" : Name }
142 exec_output += NeonXExecDeclare.subst(substDict)
143
144 def threeUnequalRegInstX(name, Name, opClass, types, op,
145 bigSrc1, bigSrc2, bigDest, readDest, scalar=False,
146 byElem=False, hi=False):
147 assert not (scalar and hi)
148 global header_output, exec_output
149 src1Cnt = src2Cnt = destCnt = 2
150 src1Prefix = src2Prefix = destPrefix = ''
151 if bigSrc1:
152 src1Cnt = 4
153 src1Prefix = 'Big'
154 if bigSrc2:
155 src2Cnt = 4
156 src2Prefix = 'Big'
157 if bigDest:
158 destCnt = 4
159 destPrefix = 'Big'
160 if byElem:
161 src2Prefix = 'Full'
162 eWalkCode = simd64EnabledCheckCode + '''
163 %sRegVect srcReg1;
164 %sRegVect srcReg2;
165 %sRegVect destReg;
166 ''' % (src1Prefix, src2Prefix, destPrefix)
167 srcReg1 = 0
168 if hi and not bigSrc1: # long/widening operations
169 srcReg1 = 2
170 for reg in range(src1Cnt):
171 eWalkCode += '''
172 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(srcReg1)d_uw);
173 ''' % { "reg" : reg, "srcReg1" : srcReg1 }
174 srcReg1 += 1
175 srcReg2 = 0
176 if (not byElem) and (hi and not bigSrc2): # long/widening operations
177 srcReg2 = 2
178 for reg in range(src2Cnt):
179 eWalkCode += '''
180 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(srcReg2)d_uw);
181 ''' % { "reg" : reg, "srcReg2" : srcReg2 }
182 srcReg2 += 1
183 if byElem:
184 # 2nd operand has to be read fully
185 for reg in range(src2Cnt, 4):
186 eWalkCode += '''
187 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw);
188 ''' % { "reg" : reg }
189 if readDest:
190 for reg in range(destCnt):
191 eWalkCode += '''
192 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
193 ''' % { "reg" : reg }
194 readDestCode = ''
195 if readDest:
196 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
197 scalarCheck = '''
198 if (i != 0) {
199 destReg.elements[i] = 0;
200 continue;
201 }
202 '''
203 eWalkCode += '''
204 for (unsigned i = 0; i < eCount; i++) {
205 %(scalarCheck)s
206 %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
207 %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[%(src2Index)s]);
208 %(destPrefix)sElement destElem;
209 %(readDest)s
210 %(op)s
211 destReg.elements[i] = htog(destElem);
212 }
213 ''' % { "op" : op, "readDest" : readDestCode,
214 "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
215 "destPrefix" : destPrefix,
216 "scalarCheck" : scalarCheck if scalar else "",
217 "src2Index" : "imm" if byElem else "i" }
218 destReg = 0
219 if hi and not bigDest:
220 # narrowing operations
221 destReg = 2
222 for reg in range(destCnt):
223 eWalkCode += '''
224 AA64FpDestP%(destReg)d_uw = gtoh(destReg.regs[%(reg)d]);
225 ''' % { "reg" : reg, "destReg": destReg }
226 destReg += 1
227 if destCnt < 4 and not hi: # zero upper half
228 for reg in range(destCnt, 4):
229 eWalkCode += '''
230 AA64FpDestP%(reg)d_uw = 0;
231 ''' % { "reg" : reg }
232 iop = InstObjParams(name, Name,
233 "DataX2RegImmOp" if byElem else "DataX2RegOp",
234 { "code": eWalkCode,
235 "r_count": 2,
236 "op_class": opClass }, [])
237 if byElem:
238 header_output += NeonX2RegImmOpDeclare.subst(iop)
239 else:
240 header_output += NeonX2RegOpDeclare.subst(iop)
241 exec_output += NeonXUnequalRegOpExecute.subst(iop)
242 for type in types:
243 substDict = { "targs" : type,
244 "class_name" : Name }
245 exec_output += NeonXExecDeclare.subst(substDict)
246
247 def threeRegNarrowInstX(name, Name, opClass, types, op, readDest=False,
248 scalar=False, byElem=False, hi=False):
249 assert not byElem
250 threeUnequalRegInstX(name, Name, opClass, types, op,
251 True, True, False, readDest, scalar, byElem, hi)
252
253 def threeRegLongInstX(name, Name, opClass, types, op, readDest=False,
254 scalar=False, byElem=False, hi=False):
255 threeUnequalRegInstX(name, Name, opClass, types, op,
256 False, False, True, readDest, scalar, byElem, hi)
257
258 def threeRegWideInstX(name, Name, opClass, types, op, readDest=False,
259 scalar=False, byElem=False, hi=False):
260 assert not byElem
261 threeUnequalRegInstX(name, Name, opClass, types, op,
262 True, False, True, readDest, scalar, byElem, hi)
263
264 def twoEqualRegInstX(name, Name, opClass, types, rCount, op,
265 readDest=False, scalar=False, byElem=False,
266 hasImm=False, isDup=False):
267 global header_output, exec_output
268 assert (not isDup) or byElem
269 if byElem:
270 hasImm = True
271 if isDup:
272 eWalkCode = simd64EnabledCheckCode + '''
273 FullRegVect srcReg1;
274 RegVect destReg;
275 '''
276 else:
277 eWalkCode = simd64EnabledCheckCode + '''
278 RegVect srcReg1, destReg;
279 '''
280 for reg in range(4 if isDup else rCount):
281 eWalkCode += '''
282 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
283 ''' % { "reg" : reg }
284 if readDest:
285 eWalkCode += '''
286 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
287 ''' % { "reg" : reg }
288 readDestCode = ''
289 if readDest:
290 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
291 scalarCheck = '''
292 if (i != 0) {
293 destReg.elements[i] = 0;
294 continue;
295 }
296 '''
297 eWalkCode += '''
298 for (unsigned i = 0; i < eCount; i++) {
299 %(scalarCheck)s
300 unsigned j = i;
301 Element srcElem1 = gtoh(srcReg1.elements[%(src1Index)s]);
302 Element destElem;
303 %(readDest)s
304 %(op)s
305 destReg.elements[j] = htog(destElem);
306 }
307 ''' % { "op" : op, "readDest" : readDestCode,
308 "scalarCheck" : scalarCheck if scalar else "",
309 "src1Index" : "imm" if byElem else "i" }
310 for reg in range(rCount):
311 eWalkCode += '''
312 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
313 ''' % { "reg" : reg }
314 if rCount < 4: # zero upper half
315 for reg in range(rCount, 4):
316 eWalkCode += '''
317 AA64FpDestP%(reg)d_uw = 0;
318 ''' % { "reg" : reg }
319 iop = InstObjParams(name, Name,
320 "DataX1RegImmOp" if hasImm else "DataX1RegOp",
321 { "code": eWalkCode,
322 "r_count": rCount,
323 "op_class": opClass }, [])
324 if hasImm:
325 header_output += NeonX1RegImmOpDeclare.subst(iop)
326 else:
327 header_output += NeonX1RegOpDeclare.subst(iop)
328 exec_output += NeonXEqualRegOpExecute.subst(iop)
329 for type in types:
330 substDict = { "targs" : type,
331 "class_name" : Name }
332 exec_output += NeonXExecDeclare.subst(substDict)
333
334 def twoRegLongInstX(name, Name, opClass, types, op, readDest=False,
335 hi=False, hasImm=False):
336 global header_output, exec_output
337 eWalkCode = simd64EnabledCheckCode + '''
338 RegVect srcReg1;
339 BigRegVect destReg;
340 '''
341 destReg = 0 if not hi else 2
342 for reg in range(2):
343 eWalkCode += '''
344 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(destReg)d_uw);
345 ''' % { "reg" : reg, "destReg": destReg }
346 destReg += 1
347 destReg = 0 if not hi else 2
348 if readDest:
349 for reg in range(4):
350 eWalkCode += '''
351 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
352 ''' % { "reg" : reg }
353 destReg += 1
354 readDestCode = ''
355 if readDest:
356 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
357 eWalkCode += '''
358 for (unsigned i = 0; i < eCount; i++) {
359 Element srcElem1 = gtoh(srcReg1.elements[i]);
360 BigElement destElem;
361 %(readDest)s
362 %(op)s
363 destReg.elements[i] = htog(destElem);
364 }
365 ''' % { "op" : op, "readDest" : readDestCode }
366 for reg in range(4):
367 eWalkCode += '''
368 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
369 ''' % { "reg" : reg }
370 iop = InstObjParams(name, Name,
371 "DataX1RegImmOp" if hasImm else "DataX1RegOp",
372 { "code": eWalkCode,
373 "r_count": 2,
374 "op_class": opClass }, [])
375 if hasImm:
376 header_output += NeonX1RegImmOpDeclare.subst(iop)
377 else:
378 header_output += NeonX1RegOpDeclare.subst(iop)
379 exec_output += NeonXUnequalRegOpExecute.subst(iop)
380 for type in types:
381 substDict = { "targs" : type,
382 "class_name" : Name }
383 exec_output += NeonXExecDeclare.subst(substDict)
384
385 def twoRegNarrowInstX(name, Name, opClass, types, op, readDest=False,
386 scalar=False, hi=False, hasImm=False):
387 global header_output, exec_output
388 eWalkCode = simd64EnabledCheckCode + '''
389 BigRegVect srcReg1;
390 RegVect destReg;
391 '''
392 for reg in range(4):
393 eWalkCode += '''
394 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
395 ''' % { "reg" : reg }
396 if readDest:
397 for reg in range(2):
398 eWalkCode += '''
399 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
400 ''' % { "reg" : reg }
401 else:
402 eWalkCode += '''
403 destReg.elements[0] = 0;
404 ''' % { "reg" : reg }
405 readDestCode = ''
406 if readDest:
407 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
408 scalarCheck = '''
409 if (i != 0) {
410 destReg.elements[i] = 0;
411 continue;
412 }
413 '''
414 eWalkCode += '''
415 for (unsigned i = 0; i < eCount; i++) {
416 %(scalarCheck)s
417 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
418 Element destElem;
419 %(readDest)s
420 %(op)s
421 destReg.elements[i] = htog(destElem);
422 }
423 ''' % { "op" : op, "readDest" : readDestCode,
424 "scalarCheck" : scalarCheck if scalar else "" }
425 destReg = 0 if not hi else 2
426 for reg in range(2):
427 eWalkCode += '''
428 AA64FpDestP%(destReg)d_uw = gtoh(destReg.regs[%(reg)d]);
429 ''' % { "reg" : reg, "destReg": destReg }
430 destReg += 1
431 if not hi:
432 for reg in range(2, 4): # zero upper half
433 eWalkCode += '''
434 AA64FpDestP%(reg)d_uw = 0;
435 ''' % { "reg" : reg }
436 iop = InstObjParams(name, Name,
437 "DataX1RegImmOp" if hasImm else "DataX1RegOp",
438 { "code": eWalkCode,
439 "r_count": 2,
440 "op_class": opClass }, [])
441 if hasImm:
442 header_output += NeonX1RegImmOpDeclare.subst(iop)
443 else:
444 header_output += NeonX1RegOpDeclare.subst(iop)
445 exec_output += NeonXUnequalRegOpExecute.subst(iop)
446 for type in types:
447 substDict = { "targs" : type,
448 "class_name" : Name }
449 exec_output += NeonXExecDeclare.subst(substDict)
450
451 def threeRegScrambleInstX(name, Name, opClass, types, rCount, op):
452 global header_output, exec_output
453 eWalkCode = simd64EnabledCheckCode + '''
454 RegVect srcReg1, srcReg2, destReg;
455 '''
456 for reg in range(rCount):
457 eWalkCode += '''
458 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
459 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw);
460 ''' % { "reg" : reg }
461 eWalkCode += op
462 for reg in range(rCount):
463 eWalkCode += '''
464 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
465 ''' % { "reg" : reg }
466 if rCount < 4:
467 for reg in range(rCount, 4):
468 eWalkCode += '''
469 AA64FpDestP%(reg)d_uw = 0;
470 ''' % { "reg" : reg }
471 iop = InstObjParams(name, Name,
472 "DataX2RegOp",
473 { "code": eWalkCode,
474 "r_count": rCount,
475 "op_class": opClass }, [])
476 header_output += NeonX2RegOpDeclare.subst(iop)
477 exec_output += NeonXEqualRegOpExecute.subst(iop)
478 for type in types:
479 substDict = { "targs" : type,
480 "class_name" : Name }
481 exec_output += NeonXExecDeclare.subst(substDict)
482
483 def insFromVecElemInstX(name, Name, opClass, types, rCount):
484 global header_output, exec_output
485 eWalkCode = simd64EnabledCheckCode + '''
486 FullRegVect srcReg1;
487 RegVect destReg;
488 '''
489 for reg in range(4):
490 eWalkCode += '''
491 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
492 ''' % { "reg" : reg }
493 for reg in range(rCount):
494 eWalkCode += '''
495 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
496 ''' % { "reg" : reg }
497 eWalkCode += '''
498 Element srcElem1 = gtoh(srcReg1.elements[imm2]);
499 Element destElem = srcElem1;
500 destReg.elements[imm1] = htog(destElem);
501 '''
502 for reg in range(rCount):
503 eWalkCode += '''
504 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
505 ''' % { "reg" : reg }
506 iop = InstObjParams(name, Name,
507 "DataX1Reg2ImmOp",
508 { "code": eWalkCode,
509 "r_count": rCount,
510 "op_class": opClass }, [])
511 header_output += NeonX1Reg2ImmOpDeclare.subst(iop)
512 exec_output += NeonXEqualRegOpExecute.subst(iop)
513 for type in types:
514 substDict = { "targs" : type,
515 "class_name" : Name }
516 exec_output += NeonXExecDeclare.subst(substDict)
517
518 def twoRegPairwiseScInstX(name, Name, opClass, types, rCount, op):
519 global header_output, exec_output
520 eWalkCode = simd64EnabledCheckCode + '''
521 RegVect srcReg1, destReg;
522 '''
523 for reg in range(rCount):
524 eWalkCode += '''
525 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
526 ''' % { "reg" : reg }
527 eWalkCode += '''
528 Element srcElem1 = gtoh(srcReg1.elements[0]);
529 Element srcElem2 = gtoh(srcReg1.elements[1]);
530 Element destElem;
531 %(op)s
532 destReg.elements[0] = htog(destElem);
533 ''' % { "op" : op }
534 destCnt = rCount / 2
535 for reg in range(destCnt):
536 eWalkCode += '''
537 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
538 ''' % { "reg" : reg }
539 for reg in range(destCnt, 4): # zero upper half
540 eWalkCode += '''
541 AA64FpDestP%(reg)d_uw = 0;
542 ''' % { "reg" : reg }
543 iop = InstObjParams(name, Name,
544 "DataX1RegOp",
545 { "code": eWalkCode,
546 "r_count": rCount,
547 "op_class": opClass }, [])
548 header_output += NeonX1RegOpDeclare.subst(iop)
549 exec_output += NeonXEqualRegOpExecute.subst(iop)
550 for type in types:
551 substDict = { "targs" : type,
552 "class_name" : Name }
553 exec_output += NeonXExecDeclare.subst(substDict)
554
555 def twoRegAcrossInstX(name, Name, opClass, types, rCount, op,
556 doubleDest=False, long=False):
557 global header_output, exec_output
558 destPrefix = "Big" if long else ""
559 eWalkCode = simd64EnabledCheckCode + '''
560 RegVect srcReg1;
561 %sRegVect destReg;
562 ''' % destPrefix
563 for reg in range(rCount):
564 eWalkCode += '''
565 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
566 ''' % { "reg" : reg }
567 eWalkCode += '''
568 destReg.regs[0] = 0;
569 %(destPrefix)sElement destElem = 0;
570 for (unsigned i = 0; i < eCount; i++) {
571 Element srcElem1 = gtoh(srcReg1.elements[i]);
572 if (i == 0) {
573 destElem = srcElem1;
574 } else {
575 %(op)s
576 }
577 }
578 destReg.elements[0] = htog(destElem);
579 ''' % { "op" : op, "destPrefix" : destPrefix }
580 destCnt = 2 if doubleDest else 1
581 for reg in range(destCnt):
582 eWalkCode += '''
583 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
584 ''' % { "reg" : reg }
585 for reg in range(destCnt, 4): # zero upper half
586 eWalkCode += '''
587 AA64FpDestP%(reg)d_uw = 0;
588 ''' % { "reg" : reg }
589 iop = InstObjParams(name, Name,
590 "DataX1RegOp",
591 { "code": eWalkCode,
592 "r_count": rCount,
593 "op_class": opClass }, [])
594 header_output += NeonX1RegOpDeclare.subst(iop)
595 if long:
596 exec_output += NeonXUnequalRegOpExecute.subst(iop)
597 else:
598 exec_output += NeonXEqualRegOpExecute.subst(iop)
599 for type in types:
600 substDict = { "targs" : type,
601 "class_name" : Name }
602 exec_output += NeonXExecDeclare.subst(substDict)
603
604 def twoRegCondenseInstX(name, Name, opClass, types, rCount, op,
605 readDest=False):
606 global header_output, exec_output
607 eWalkCode = simd64EnabledCheckCode + '''
608 RegVect srcRegs;
609 BigRegVect destReg;
610 '''
611 for reg in range(rCount):
612 eWalkCode += '''
613 srcRegs.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
614 ''' % { "reg" : reg }
615 if readDest:
616 eWalkCode += '''
617 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
618 ''' % { "reg" : reg }
619 readDestCode = ''
620 if readDest:
621 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
622 eWalkCode += '''
623 for (unsigned i = 0; i < eCount / 2; i++) {
624 Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
625 Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
626 BigElement destElem;
627 %(readDest)s
628 %(op)s
629 destReg.elements[i] = htog(destElem);
630 }
631 ''' % { "op" : op, "readDest" : readDestCode }
632 for reg in range(rCount):
633 eWalkCode += '''
634 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
635 ''' % { "reg" : reg }
636 if rCount < 4: # zero upper half
637 for reg in range(rCount, 4):
638 eWalkCode += '''
639 AA64FpDestP%(reg)d_uw = 0;
640 ''' % { "reg" : reg }
641 iop = InstObjParams(name, Name,
642 "DataX1RegOp",
643 { "code": eWalkCode,
644 "r_count": rCount,
645 "op_class": opClass }, [])
646 header_output += NeonX1RegOpDeclare.subst(iop)
647 exec_output += NeonXUnequalRegOpExecute.subst(iop)
648 for type in types:
649 substDict = { "targs" : type,
650 "class_name" : Name }
651 exec_output += NeonXExecDeclare.subst(substDict)
652
653 def oneRegImmInstX(name, Name, opClass, types, rCount, op, readDest=False):
654 global header_output, exec_output
655 eWalkCode = simd64EnabledCheckCode + '''
656 RegVect destReg;
657 '''
658 if readDest:
659 for reg in range(rCount):
660 eWalkCode += '''
661 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
662 ''' % { "reg" : reg }
663 readDestCode = ''
664 if readDest:
665 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
666 eWalkCode += '''
667 for (unsigned i = 0; i < eCount; i++) {
668 Element destElem;
669 %(readDest)s
670 %(op)s
671 destReg.elements[i] = htog(destElem);
672 }
673 ''' % { "op" : op, "readDest" : readDestCode }
674 for reg in range(rCount):
675 eWalkCode += '''
676 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
677 ''' % { "reg" : reg }
678 if rCount < 4: # zero upper half
679 for reg in range(rCount, 4):
680 eWalkCode += '''
681 AA64FpDestP%(reg)d_uw = 0;
682 ''' % { "reg" : reg }
683 iop = InstObjParams(name, Name,
684 "DataXImmOnlyOp",
685 { "code": eWalkCode,
686 "r_count": rCount,
687 "op_class": opClass }, [])
688 header_output += NeonX1RegImmOnlyOpDeclare.subst(iop)
689 exec_output += NeonXEqualRegOpExecute.subst(iop)
690 for type in types:
691 substDict = { "targs" : type,
692 "class_name" : Name }
693 exec_output += NeonXExecDeclare.subst(substDict)
694
695 def dupGprInstX(name, Name, opClass, types, rCount, gprSpec):
696 global header_output, exec_output
697 eWalkCode = simd64EnabledCheckCode + '''
698 RegVect destReg;
699 for (unsigned i = 0; i < eCount; i++) {
700 destReg.elements[i] = htog((Element) %sOp1);
701 }
702 ''' % gprSpec
703 for reg in range(rCount):
704 eWalkCode += '''
705 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
706 ''' % { "reg" : reg }
707 if rCount < 4: # zero upper half
708 for reg in range(rCount, 4):
709 eWalkCode += '''
710 AA64FpDestP%(reg)d_uw = 0;
711 ''' % { "reg" : reg }
712 iop = InstObjParams(name, Name,
713 "DataX1RegOp",
714 { "code": eWalkCode,
715 "r_count": rCount,
716 "op_class": opClass }, [])
717 header_output += NeonX1RegOpDeclare.subst(iop)
718 exec_output += NeonXEqualRegOpExecute.subst(iop)
719 for type in types:
720 substDict = { "targs" : type,
721 "class_name" : Name }
722 exec_output += NeonXExecDeclare.subst(substDict)
723
724 def extInstX(name, Name, opClass, types, rCount, op):
725 global header_output, exec_output
726 eWalkCode = simd64EnabledCheckCode + '''
727 RegVect srcReg1, srcReg2, destReg;
728 '''
729 for reg in range(rCount):
730 eWalkCode += '''
731 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
732 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw);
733 ''' % { "reg" : reg }
734 eWalkCode += op
735 for reg in range(rCount):
736 eWalkCode += '''
737 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
738 ''' % { "reg" : reg }
739 if rCount < 4: # zero upper half
740 for reg in range(rCount, 4):
741 eWalkCode += '''
742 AA64FpDestP%(reg)d_uw = 0;
743 ''' % { "reg" : reg }
744 iop = InstObjParams(name, Name,
745 "DataX2RegImmOp",
746 { "code": eWalkCode,
747 "r_count": rCount,
748 "op_class": opClass }, [])
749 header_output += NeonX2RegImmOpDeclare.subst(iop)
750 exec_output += NeonXEqualRegOpExecute.subst(iop)
751 for type in types:
752 substDict = { "targs" : type,
753 "class_name" : Name }
754 exec_output += NeonXExecDeclare.subst(substDict)
755
756 def insFromGprInstX(name, Name, opClass, types, rCount, gprSpec):
757 global header_output, exec_output
758 eWalkCode = simd64EnabledCheckCode + '''
759 RegVect destReg;
760 '''
761 for reg in range(rCount):
762 eWalkCode += '''
763 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
764 ''' % { "reg" : reg }
765 eWalkCode += '''
766 destReg.elements[imm] = htog((Element) %sOp1);
767 ''' % gprSpec
768 for reg in range(rCount):
769 eWalkCode += '''
770 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
771 ''' % { "reg" : reg }
772 iop = InstObjParams(name, Name,
773 "DataX1RegImmOp",
774 { "code": eWalkCode,
775 "r_count": rCount,
776 "op_class": opClass }, [])
777 header_output += NeonX1RegImmOpDeclare.subst(iop)
778 exec_output += NeonXEqualRegOpExecute.subst(iop)
779 for type in types:
780 substDict = { "targs" : type,
781 "class_name" : Name }
782 exec_output += NeonXExecDeclare.subst(substDict)
783
784 def insToGprInstX(name, Name, opClass, types, rCount, gprSpec,
785 signExt=False):
786 global header_output, exec_output
787 eWalkCode = simd64EnabledCheckCode + '''
788 FullRegVect srcReg;
789 '''
790 for reg in range(4):
791 eWalkCode += '''
792 srcReg.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
793 ''' % { "reg" : reg }
794 if signExt:
795 eWalkCode += '''
796 %sDest = sext<sizeof(Element) * 8>(srcReg.elements[imm]);
797 ''' % gprSpec
798 else:
799 eWalkCode += '''
800 %sDest = srcReg.elements[imm];
801 ''' % gprSpec
802 iop = InstObjParams(name, Name,
803 "DataX1RegImmOp",
804 { "code": eWalkCode,
805 "r_count": rCount,
806 "op_class": opClass }, [])
807 header_output += NeonX1RegImmOpDeclare.subst(iop)
808 exec_output += NeonXEqualRegOpExecute.subst(iop)
809 for type in types:
810 substDict = { "targs" : type,
811 "class_name" : Name }
812 exec_output += NeonXExecDeclare.subst(substDict)
813
814 def tbxTblInstX(name, Name, opClass, types, length, isTbl, rCount):
815 global header_output, decoder_output, exec_output
816 code = simd64EnabledCheckCode + '''
817 union
818 {
819 uint8_t bytes[64];
820 FloatRegBits regs[16];
821 } table;
822
823 union
824 {
825 uint8_t bytes[%(rCount)d * 4];
826 FloatRegBits regs[%(rCount)d];
827 } destReg, srcReg2;
828
829 const unsigned length = %(length)d;
830 const bool isTbl = %(isTbl)s;
831 ''' % { "rCount" : rCount, "length" : length, "isTbl" : isTbl }
832 for reg in range(rCount):
833 code += '''
834 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw);
835 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
836 ''' % { "reg" : reg }
837 for reg in range(16):
838 if reg < length * 4:
839 code += '''
840 table.regs[%(reg)d] = htog(AA64FpOp1P%(p)dV%(v)dS_uw);
841 ''' % { "reg" : reg, "p" : reg % 4, "v" : reg / 4 }
842 else:
843 code += '''
844 table.regs[%(reg)d] = 0;
845 ''' % { "reg" : reg }
846 code += '''
847 for (unsigned i = 0; i < sizeof(destReg); i++) {
848 uint8_t index = srcReg2.bytes[i];
849 if (index < 16 * length) {
850 destReg.bytes[i] = table.bytes[index];
851 } else {
852 if (isTbl)
853 destReg.bytes[i] = 0;
854 // else destReg.bytes[i] unchanged
855 }
856 }
857 '''
858 for reg in range(rCount):
859 code += '''
860 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
861 ''' % { "reg" : reg }
862 if rCount < 4: # zero upper half
863 for reg in range(rCount, 4):
864 code += '''
865 AA64FpDestP%(reg)d_uw = 0;
866 ''' % { "reg" : reg }
867 iop = InstObjParams(name, Name,
868 "DataX2RegOp",
869 { "code": code,
870 "r_count": rCount,
871 "op_class": opClass }, [])
872 header_output += NeonX2RegOpDeclare.subst(iop)
873 exec_output += NeonXEqualRegOpExecute.subst(iop)
874 for type in types:
875 substDict = { "targs" : type,
876 "class_name" : Name }
877 exec_output += NeonXExecDeclare.subst(substDict)
878
879 # ABS
880 absCode = '''
881 if (srcElem1 < 0) {
882 destElem = -srcElem1;
883 } else {
884 destElem = srcElem1;
885 }
886 '''
887 twoEqualRegInstX("abs", "AbsDX", "SimdAluOp", signedTypes, 2, absCode)
888 twoEqualRegInstX("abs", "AbsQX", "SimdAluOp", signedTypes, 4, absCode)
889 # ADD
890 addCode = "destElem = srcElem1 + srcElem2;"
891 threeEqualRegInstX("add", "AddDX", "SimdAddOp", unsignedTypes, 2, addCode)
892 threeEqualRegInstX("add", "AddQX", "SimdAddOp", unsignedTypes, 4, addCode)
893 # ADDHN, ADDHN2
894 addhnCode = '''
895 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
896 (sizeof(Element) * 8);
897 '''
898 threeRegNarrowInstX("addhn", "AddhnX", "SimdAddOp", smallUnsignedTypes,
899 addhnCode)
900 threeRegNarrowInstX("addhn2", "Addhn2X", "SimdAddOp", smallUnsignedTypes,
901 addhnCode, hi=True)
902 # ADDP (scalar)
903 twoRegPairwiseScInstX("addp", "AddpScQX", "SimdAddOp", ("uint64_t",), 4,
904 addCode)
905 # ADDP (vector)
906 threeEqualRegInstX("addp", "AddpDX", "SimdAddOp", smallUnsignedTypes, 2,
907 addCode, pairwise=True)
908 threeEqualRegInstX("addp", "AddpQX", "SimdAddOp", unsignedTypes, 4,
909 addCode, pairwise=True)
910 # ADDV
911 # Note: SimdAddOp can be a bit optimistic here
912 addAcrossCode = "destElem += srcElem1;"
913 twoRegAcrossInstX("addv", "AddvDX", "SimdAddOp", ("uint8_t", "uint16_t"),
914 2, addAcrossCode)
915 twoRegAcrossInstX("addv", "AddvQX", "SimdAddOp", smallUnsignedTypes, 4,
916 addAcrossCode)
917 # AND
918 andCode = "destElem = srcElem1 & srcElem2;"
919 threeEqualRegInstX("and", "AndDX", "SimdAluOp", ("uint64_t",), 2, andCode)
920 threeEqualRegInstX("and", "AndQX", "SimdAluOp", ("uint64_t",), 4, andCode)
921 # BIC (immediate)
922 bicImmCode = "destElem &= ~imm;"
923 oneRegImmInstX("bic", "BicImmDX", "SimdAluOp", ("uint64_t",), 2,
924 bicImmCode, True)
925 oneRegImmInstX("bic", "BicImmQX", "SimdAluOp", ("uint64_t",), 4,
926 bicImmCode, True)
927 # BIC (register)
928 bicCode = "destElem = srcElem1 & ~srcElem2;"
929 threeEqualRegInstX("bic", "BicDX", "SimdAluOp", ("uint64_t",), 2, bicCode)
930 threeEqualRegInstX("bic", "BicQX", "SimdAluOp", ("uint64_t",), 4, bicCode)
931 # BIF
932 bifCode = "destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);"
933 threeEqualRegInstX("bif", "BifDX", "SimdAluOp", ("uint64_t",), 2, bifCode,
934 True)
935 threeEqualRegInstX("bif", "BifQX", "SimdAluOp", ("uint64_t",), 4, bifCode,
936 True)
937 # BIT
938 bitCode = "destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);"
939 threeEqualRegInstX("bit", "BitDX", "SimdAluOp", ("uint64_t",), 2, bitCode,
940 True)
941 threeEqualRegInstX("bit", "BitQX", "SimdAluOp", ("uint64_t",), 4, bitCode,
942 True)
943 # BSL
944 bslCode = "destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);"
945 threeEqualRegInstX("bsl", "BslDX", "SimdAluOp", ("uint64_t",), 2, bslCode,
946 True)
947 threeEqualRegInstX("bsl", "BslQX", "SimdAluOp", ("uint64_t",), 4, bslCode,
948 True)
949 # CLS
950 clsCode = '''
951 unsigned count = 0;
952 if (srcElem1 < 0) {
953 srcElem1 <<= 1;
954 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
955 count++;
956 srcElem1 <<= 1;
957 }
958 } else {
959 srcElem1 <<= 1;
960 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
961 count++;
962 srcElem1 <<= 1;
963 }
964 }
965 destElem = count;
966 '''
967 twoEqualRegInstX("cls", "ClsDX", "SimdAluOp", smallSignedTypes, 2, clsCode)
968 twoEqualRegInstX("cls", "ClsQX", "SimdAluOp", smallSignedTypes, 4, clsCode)
969 # CLZ
970 clzCode = '''
971 unsigned count = 0;
972 while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
973 count++;
974 srcElem1 <<= 1;
975 }
976 destElem = count;
977 '''
978 twoEqualRegInstX("clz", "ClzDX", "SimdAluOp", smallSignedTypes, 2, clzCode)
979 twoEqualRegInstX("clz", "ClzQX", "SimdAluOp", smallSignedTypes, 4, clzCode)
980 # CMEQ (register)
981 cmeqCode = "destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;"
982 threeEqualRegInstX("cmeq", "CmeqDX", "SimdCmpOp", unsignedTypes, 2,
983 cmeqCode)
984 threeEqualRegInstX("cmeq", "CmeqQX", "SimdCmpOp", unsignedTypes, 4,
985 cmeqCode)
986 # CMEQ (zero)
987 cmeqZeroCode = "destElem = (srcElem1 == 0) ? (Element)(-1) : 0;"
988 twoEqualRegInstX("cmeq", "CmeqZeroDX", "SimdCmpOp", signedTypes, 2,
989 cmeqZeroCode)
990 twoEqualRegInstX("cmeq", "CmeqZeroQX", "SimdCmpOp", signedTypes, 4,
991 cmeqZeroCode)
992 # CMGE (register)
993 cmgeCode = "destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;"
994 threeEqualRegInstX("cmge", "CmgeDX", "SimdCmpOp", signedTypes, 2, cmgeCode)
995 threeEqualRegInstX("cmge", "CmgeQX", "SimdCmpOp", signedTypes, 4, cmgeCode)
996 # CMGE (zero)
997 cmgeZeroCode = "destElem = (srcElem1 >= 0) ? (Element)(-1) : 0;"
998 twoEqualRegInstX("cmge", "CmgeZeroDX", "SimdCmpOp", signedTypes, 2,
999 cmgeZeroCode)
1000 twoEqualRegInstX("cmge", "CmgeZeroQX", "SimdCmpOp", signedTypes, 4,
1001 cmgeZeroCode)
1002 # CMGT (register)
1003 cmgtCode = "destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;"
1004 threeEqualRegInstX("cmgt", "CmgtDX", "SimdCmpOp", signedTypes, 2, cmgtCode)
1005 threeEqualRegInstX("cmgt", "CmgtQX", "SimdCmpOp", signedTypes, 4, cmgtCode)
1006 # CMGT (zero)
1007 cmgtZeroCode = "destElem = (srcElem1 > 0) ? (Element)(-1) : 0;"
1008 twoEqualRegInstX("cmgt", "CmgtZeroDX", "SimdCmpOp", signedTypes, 2,
1009 cmgtZeroCode)
1010 twoEqualRegInstX("cmgt", "CmgtZeroQX", "SimdCmpOp", signedTypes, 4,
1011 cmgtZeroCode)
1012 # CMHI (register)
1013 threeEqualRegInstX("cmhi", "CmhiDX", "SimdCmpOp", unsignedTypes, 2,
1014 cmgtCode)
1015 threeEqualRegInstX("cmhi", "CmhiQX", "SimdCmpOp", unsignedTypes, 4,
1016 cmgtCode)
1017 # CMHS (register)
1018 threeEqualRegInstX("cmhs", "CmhsDX", "SimdCmpOp", unsignedTypes, 2,
1019 cmgeCode)
1020 threeEqualRegInstX("cmhs", "CmhsQX", "SimdCmpOp", unsignedTypes, 4,
1021 cmgeCode)
1022 # CMLE (zero)
1023 cmleZeroCode = "destElem = (srcElem1 <= 0) ? (Element)(-1) : 0;"
1024 twoEqualRegInstX("cmle", "CmleZeroDX", "SimdCmpOp", signedTypes, 2,
1025 cmleZeroCode)
1026 twoEqualRegInstX("cmle", "CmleZeroQX", "SimdCmpOp", signedTypes, 4,
1027 cmleZeroCode)
1028 # CMLT (zero)
1029 cmltZeroCode = "destElem = (srcElem1 < 0) ? (Element)(-1) : 0;"
1030 twoEqualRegInstX("cmlt", "CmltZeroDX", "SimdCmpOp", signedTypes, 2,
1031 cmltZeroCode)
1032 twoEqualRegInstX("cmlt", "CmltZeroQX", "SimdCmpOp", signedTypes, 4,
1033 cmltZeroCode)
1034 # CMTST (register)
1035 tstCode = "destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;"
1036 threeEqualRegInstX("cmtst", "CmtstDX", "SimdAluOp", unsignedTypes, 2,
1037 tstCode)
1038 threeEqualRegInstX("cmtst", "CmtstQX", "SimdAluOp", unsignedTypes, 4,
1039 tstCode)
1040 # CNT
1041 cntCode = '''
1042 unsigned count = 0;
1043 while (srcElem1 && count < sizeof(Element) * 8) {
1044 count += srcElem1 & 0x1;
1045 srcElem1 >>= 1;
1046 }
1047 destElem = count;
1048 '''
1049 twoEqualRegInstX("cnt", "CntDX", "SimdAluOp", ("uint8_t",), 2, cntCode)
1050 twoEqualRegInstX("cnt", "CntQX", "SimdAluOp", ("uint8_t",), 4, cntCode)
1051 # DUP (element)
1052 dupCode = "destElem = srcElem1;"
1053 twoEqualRegInstX("dup", "DupElemDX", "SimdMiscOp", smallUnsignedTypes, 2,
1054 dupCode, isDup=True, byElem=True)
1055 twoEqualRegInstX("dup", "DupElemQX", "SimdMiscOp", unsignedTypes, 4,
1056 dupCode, isDup=True, byElem=True)
1057 twoEqualRegInstX("dup", "DupElemScX", "SimdMiscOp", unsignedTypes, 4,
1058 dupCode, isDup=True, byElem=True, scalar=True)
1059 # DUP (general register)
1060 dupGprInstX("dup", "DupGprWDX", "SimdMiscOp", smallUnsignedTypes, 2, 'W')
1061 dupGprInstX("dup", "DupGprWQX", "SimdMiscOp", smallUnsignedTypes, 4, 'W')
1062 dupGprInstX("dup", "DupGprXQX", "SimdMiscOp", ("uint64_t",), 4, 'X')
1063 # EOR
1064 eorCode = "destElem = srcElem1 ^ srcElem2;"
1065 threeEqualRegInstX("eor", "EorDX", "SimdAluOp", ("uint64_t",), 2, eorCode)
1066 threeEqualRegInstX("eor", "EorQX", "SimdAluOp", ("uint64_t",), 4, eorCode)
1067 # EXT
1068 extCode = '''
1069 for (unsigned i = 0; i < eCount; i++) {
1070 unsigned index = i + imm;
1071 if (index < eCount) {
1072 destReg.elements[i] = srcReg1.elements[index];
1073 } else {
1074 index -= eCount;
1075 if (index >= eCount) {
1076 fault = new UndefinedInstruction(machInst, false, mnemonic);
1077 } else {
1078 destReg.elements[i] = srcReg2.elements[index];
1079 }
1080 }
1081 }
1082 '''
1083 extInstX("Ext", "ExtDX", "SimdMiscOp", ("uint8_t",), 2, extCode)
1084 extInstX("Ext", "ExtQX", "SimdMiscOp", ("uint8_t",), 4, extCode)
1085 # FABD
1086 fpOp = '''
1087 FPSCR fpscr = (FPSCR) FpscrExc;
1088 destElem = %s;
1089 FpscrExc = fpscr;
1090 '''
1091 fabdCode = fpOp % "fplibAbs<Element>(fplibSub(srcElem1, srcElem2, fpscr))"
1092 threeEqualRegInstX("fabd", "FabdDX", "SimdFloatAddOp", smallFloatTypes, 2,
1093 fabdCode)
1094 threeEqualRegInstX("fabd", "FabdQX", "SimdFloatAddOp", floatTypes, 4,
1095 fabdCode)
1096 threeEqualRegInstX("fabd", "FabdScX", "SimdFloatAddOp", floatTypes, 4,
1097 fabdCode, scalar=True)
1098 # FABS
1099 fabsCode = fpOp % "fplibAbs<Element>(srcElem1)"
1100 twoEqualRegInstX("Abs", "FabsDX", "SimdFloatAluOp", smallFloatTypes, 2,
1101 fabsCode)
1102 twoEqualRegInstX("Abs", "FabsQX", "SimdFloatAluOp", floatTypes, 4,
1103 fabsCode)
1104 # FACGE
1105 fpCmpAbsOp = fpOp % ("fplibCompare%s<Element>(fplibAbs<Element>(srcElem1),"
1106 " fplibAbs<Element>(srcElem2), fpscr) ? -1 : 0")
1107 facgeCode = fpCmpAbsOp % "GE"
1108 threeEqualRegInstX("facge", "FacgeDX", "SimdFloatCmpOp", smallFloatTypes,
1109 2, facgeCode)
1110 threeEqualRegInstX("facge", "FacgeQX", "SimdFloatCmpOp", floatTypes, 4,
1111 facgeCode)
1112 threeEqualRegInstX("facge", "FacgeScX", "SimdFloatCmpOp", floatTypes, 4,
1113 facgeCode, scalar=True)
1114 # FACGT
1115 facgtCode = fpCmpAbsOp % "GT"
1116 threeEqualRegInstX("facgt", "FacgtDX", "SimdFloatCmpOp", smallFloatTypes,
1117 2, facgtCode)
1118 threeEqualRegInstX("facgt", "FacgtQX", "SimdFloatCmpOp", floatTypes, 4,
1119 facgtCode)
1120 threeEqualRegInstX("facgt", "FacgtScX", "SimdFloatCmpOp", floatTypes, 4,
1121 facgtCode, scalar=True)
1122 # FADD
1123 fpBinOp = fpOp % "fplib%s<Element>(srcElem1, srcElem2, fpscr)"
1124 faddCode = fpBinOp % "Add"
1125 threeEqualRegInstX("fadd", "FaddDX", "SimdFloatAddOp", smallFloatTypes, 2,
1126 faddCode)
1127 threeEqualRegInstX("fadd", "FaddQX", "SimdFloatAddOp", floatTypes, 4,
1128 faddCode)
1129 # FADDP (scalar)
1130 twoRegPairwiseScInstX("faddp", "FaddpScDX", "SimdFloatAddOp",
1131 ("uint32_t",), 2, faddCode)
1132 twoRegPairwiseScInstX("faddp", "FaddpScQX", "SimdFloatAddOp",
1133 ("uint64_t",), 4, faddCode)
1134 # FADDP (vector)
1135 threeEqualRegInstX("faddp", "FaddpDX", "SimdFloatAddOp", smallFloatTypes,
1136 2, faddCode, pairwise=True)
1137 threeEqualRegInstX("faddp", "FaddpQX", "SimdFloatAddOp", floatTypes, 4,
1138 faddCode, pairwise=True)
1139 # FCMEQ (register)
1140 fpCmpOp = fpOp % ("fplibCompare%s<Element>(srcElem1, srcElem2, fpscr) ?"
1141 " -1 : 0")
1142 fcmeqCode = fpCmpOp % "EQ"
1143 threeEqualRegInstX("fcmeq", "FcmeqDX", "SimdFloatCmpOp", smallFloatTypes,
1144 2, fcmeqCode)
1145 threeEqualRegInstX("fcmeq", "FcmeqQX", "SimdFloatCmpOp", floatTypes, 4,
1146 fcmeqCode)
1147 threeEqualRegInstX("fcmeq", "FcmeqScX", "SimdFloatCmpOp", floatTypes, 4,
1148 fcmeqCode, scalar=True)
1149 # FCMEQ (zero)
1150 fpCmpZeroOp = fpOp % "fplibCompare%s<Element>(srcElem1, 0, fpscr) ? -1 : 0"
1151 fcmeqZeroCode = fpCmpZeroOp % "EQ"
1152 twoEqualRegInstX("fcmeq", "FcmeqZeroDX", "SimdFloatCmpOp", smallFloatTypes,
1153 2, fcmeqZeroCode)
1154 twoEqualRegInstX("fcmeq", "FcmeqZeroQX", "SimdFloatCmpOp", floatTypes, 4,
1155 fcmeqZeroCode)
1156 twoEqualRegInstX("fcmeq", "FcmeqZeroScX", "SimdFloatCmpOp", floatTypes, 4,
1157 fcmeqZeroCode, scalar=True)
1158 # FCMGE (register)
1159 fcmgeCode = fpCmpOp % "GE"
1160 threeEqualRegInstX("fcmge", "FcmgeDX", "SimdFloatCmpOp", smallFloatTypes,
1161 2, fcmgeCode)
1162 threeEqualRegInstX("fcmge", "FcmgeQX", "SimdFloatCmpOp", floatTypes, 4,
1163 fcmgeCode)
1164 threeEqualRegInstX("fcmge", "FcmgeScX", "SimdFloatCmpOp", floatTypes, 4,
1165 fcmgeCode, scalar=True)
1166 # FCMGE (zero)
1167 fcmgeZeroCode = fpCmpZeroOp % "GE"
1168 twoEqualRegInstX("fcmge", "FcmgeZeroDX", "SimdFloatCmpOp", smallFloatTypes,
1169 2, fcmgeZeroCode)
1170 twoEqualRegInstX("fcmge", "FcmgeZeroQX", "SimdFloatCmpOp", floatTypes, 4,
1171 fcmgeZeroCode)
1172 twoEqualRegInstX("fcmge", "FcmgeZeroScX", "SimdFloatCmpOp", floatTypes, 4,
1173 fcmgeZeroCode, scalar=True)
1174 # FCMGT (register)
1175 fcmgtCode = fpCmpOp % "GT"
1176 threeEqualRegInstX("fcmgt", "FcmgtDX", "SimdFloatCmpOp", smallFloatTypes,
1177 2, fcmgtCode)
1178 threeEqualRegInstX("fcmgt", "FcmgtQX", "SimdFloatCmpOp", floatTypes, 4,
1179 fcmgtCode)
1180 threeEqualRegInstX("fcmgt", "FcmgtScX", "SimdFloatCmpOp", floatTypes, 4,
1181 fcmgtCode, scalar=True)
1182 # FCMGT (zero)
1183 fcmgtZeroCode = fpCmpZeroOp % "GT"
1184 twoEqualRegInstX("fcmgt", "FcmgtZeroDX", "SimdFloatCmpOp", smallFloatTypes,
1185 2, fcmgtZeroCode)
1186 twoEqualRegInstX("fcmgt", "FcmgtZeroQX", "SimdFloatCmpOp", floatTypes, 4,
1187 fcmgtZeroCode)
1188 twoEqualRegInstX("fcmgt", "FcmgtZeroScX", "SimdFloatCmpOp", floatTypes, 4,
1189 fcmgtZeroCode, scalar=True)
1190 # FCMLE (zero)
1191 fpCmpRevZeroOp = fpOp % ("fplibCompare%s<Element>(0, srcElem1, fpscr) ?"
1192 " -1 : 0")
1193 fcmleZeroCode = fpCmpRevZeroOp % "GE"
1194 twoEqualRegInstX("fcmle", "FcmleZeroDX", "SimdFloatCmpOp", smallFloatTypes,
1195 2, fcmleZeroCode)
1196 twoEqualRegInstX("fcmle", "FcmleZeroQX", "SimdFloatCmpOp", floatTypes, 4,
1197 fcmleZeroCode)
1198 twoEqualRegInstX("fcmle", "FcmleZeroScX", "SimdFloatCmpOp", floatTypes, 4,
1199 fcmleZeroCode, scalar=True)
1200 # FCMLT (zero)
1201 fcmltZeroCode = fpCmpRevZeroOp % "GT"
1202 twoEqualRegInstX("fcmlt", "FcmltZeroDX", "SimdFloatCmpOp", smallFloatTypes,
1203 2, fcmltZeroCode)
1204 twoEqualRegInstX("fcmlt", "FcmltZeroQX", "SimdFloatCmpOp", floatTypes, 4,
1205 fcmltZeroCode)
1206 twoEqualRegInstX("fcmlt", "FcmltZeroScX", "SimdFloatCmpOp", floatTypes, 4,
1207 fcmltZeroCode, scalar=True)
1208 # FCVTAS
1209 fcvtCode = fpOp % ("fplibFPToFixed<Element, Element>("
1210 "srcElem1, %s, %s, %s, fpscr)")
1211 fcvtasCode = fcvtCode % ("0", "false", "FPRounding_TIEAWAY")
1212 twoEqualRegInstX("fcvtas", "FcvtasDX", "SimdCvtOp", smallFloatTypes, 2,
1213 fcvtasCode)
1214 twoEqualRegInstX("fcvtas", "FcvtasQX", "SimdCvtOp", floatTypes, 4,
1215 fcvtasCode)
1216 twoEqualRegInstX("fcvtas", "FcvtasScX", "SimdCvtOp", floatTypes, 4,
1217 fcvtasCode, scalar=True)
1218 # FCVTAU
1219 fcvtauCode = fcvtCode % ("0", "true", "FPRounding_TIEAWAY")
1220 twoEqualRegInstX("fcvtau", "FcvtauDX", "SimdCvtOp", smallFloatTypes, 2,
1221 fcvtauCode)
1222 twoEqualRegInstX("fcvtau", "FcvtauQX", "SimdCvtOp", floatTypes, 4,
1223 fcvtauCode)
1224 twoEqualRegInstX("fcvtau", "FcvtauScX", "SimdCvtOp", floatTypes, 4,
1225 fcvtauCode, scalar=True)
1226 # FCVTL, FCVTL2
1227 fcvtlCode = fpOp % ("fplibConvert<Element, BigElement>("
1228 "srcElem1, FPCRRounding(fpscr), fpscr)")
1229 twoRegLongInstX("fcvtl", "FcvtlX", "SimdCvtOp", ("uint16_t", "uint32_t"),
1230 fcvtlCode)
1231 twoRegLongInstX("fcvtl", "Fcvtl2X", "SimdCvtOp", ("uint16_t", "uint32_t"),
1232 fcvtlCode, hi=True)
1233 # FCVTMS
1234 fcvtmsCode = fcvtCode % ("0", "false", "FPRounding_NEGINF")
1235 twoEqualRegInstX("fcvtms", "FcvtmsDX", "SimdCvtOp", smallFloatTypes, 2,
1236 fcvtmsCode)
1237 twoEqualRegInstX("fcvtms", "FcvtmsQX", "SimdCvtOp", floatTypes, 4,
1238 fcvtmsCode)
1239 twoEqualRegInstX("fcvtms", "FcvtmsScX", "SimdCvtOp", floatTypes, 4,
1240 fcvtmsCode, scalar=True)
1241 # FCVTMU
1242 fcvtmuCode = fcvtCode % ("0", "true", "FPRounding_NEGINF")
1243 twoEqualRegInstX("fcvtmu", "FcvtmuDX", "SimdCvtOp", smallFloatTypes, 2,
1244 fcvtmuCode)
1245 twoEqualRegInstX("fcvtmu", "FcvtmuQX", "SimdCvtOp", floatTypes, 4,
1246 fcvtmuCode)
1247 twoEqualRegInstX("fcvtmu", "FcvtmuScX", "SimdCvtOp", floatTypes, 4,
1248 fcvtmuCode, scalar=True)
1249 # FCVTN, FCVTN2
1250 fcvtnCode = fpOp % ("fplibConvert<BigElement, Element>("
1251 "srcElem1, FPCRRounding(fpscr), fpscr)")
1252 twoRegNarrowInstX("fcvtn", "FcvtnX", "SimdCvtOp",
1253 ("uint16_t", "uint32_t"), fcvtnCode)
1254 twoRegNarrowInstX("fcvtn", "Fcvtn2X", "SimdCvtOp",
1255 ("uint16_t", "uint32_t"), fcvtnCode, hi=True)
1256 # FCVTNS
1257 fcvtnsCode = fcvtCode % ("0", "false", "FPRounding_TIEEVEN")
1258 twoEqualRegInstX("fcvtns", "FcvtnsDX", "SimdCvtOp", smallFloatTypes, 2,
1259 fcvtnsCode)
1260 twoEqualRegInstX("fcvtns", "FcvtnsQX", "SimdCvtOp", floatTypes, 4,
1261 fcvtnsCode)
1262 twoEqualRegInstX("fcvtns", "FcvtnsScX", "SimdCvtOp", floatTypes, 4,
1263 fcvtnsCode, scalar=True)
1264 # FCVTNU
1265 fcvtnuCode = fcvtCode % ("0", "true", "FPRounding_TIEEVEN")
1266 twoEqualRegInstX("fcvtnu", "FcvtnuDX", "SimdCvtOp", smallFloatTypes, 2,
1267 fcvtnuCode)
1268 twoEqualRegInstX("fcvtnu", "FcvtnuQX", "SimdCvtOp", floatTypes, 4,
1269 fcvtnuCode)
1270 twoEqualRegInstX("fcvtnu", "FcvtnuScX", "SimdCvtOp", floatTypes, 4,
1271 fcvtnuCode, scalar=True)
1272 # FCVTPS
1273 fcvtpsCode = fcvtCode % ("0", "false", "FPRounding_POSINF")
1274 twoEqualRegInstX("fcvtps", "FcvtpsDX", "SimdCvtOp", smallFloatTypes, 2,
1275 fcvtpsCode)
1276 twoEqualRegInstX("fcvtps", "FcvtpsQX", "SimdCvtOp", floatTypes, 4,
1277 fcvtpsCode)
1278 twoEqualRegInstX("fcvtps", "FcvtpsScX", "SimdCvtOp", floatTypes, 4,
1279 fcvtpsCode, scalar=True)
1280 # FCVTPU
1281 fcvtpuCode = fcvtCode % ("0", "true", "FPRounding_POSINF")
1282 twoEqualRegInstX("fcvtpu", "FcvtpuDX", "SimdCvtOp", smallFloatTypes, 2,
1283 fcvtpuCode)
1284 twoEqualRegInstX("fcvtpu", "FcvtpuQX", "SimdCvtOp", floatTypes, 4,
1285 fcvtpuCode)
1286 twoEqualRegInstX("fcvtpu", "FcvtpuScX", "SimdCvtOp", floatTypes, 4,
1287 fcvtpuCode, scalar=True)
1288 # FCVTXN, FCVTXN2
1289 fcvtxnCode = fpOp % ("fplibConvert<BigElement, Element>("
1290 "srcElem1, FPRounding_ODD, fpscr)")
1291 twoRegNarrowInstX("fcvtxn", "FcvtxnX", "SimdCvtOp", smallFloatTypes,
1292 fcvtxnCode)
1293 twoRegNarrowInstX("fcvtxn", "Fcvtxn2X", "SimdCvtOp", smallFloatTypes,
1294 fcvtxnCode, hi=True)
1295 twoRegNarrowInstX("fcvtxn", "FcvtxnScX", "SimdCvtOp", smallFloatTypes,
1296 fcvtxnCode, scalar=True)
1297 # FCVTZS (fixed-point)
1298 fcvtzsCode = fcvtCode % ("imm", "false", "FPRounding_ZERO")
1299 twoEqualRegInstX("fcvtzs", "FcvtzsFixedDX", "SimdCvtOp", smallFloatTypes,
1300 2, fcvtzsCode, hasImm=True)
1301 twoEqualRegInstX("fcvtzs", "FcvtzsFixedQX", "SimdCvtOp", floatTypes, 4,
1302 fcvtzsCode, hasImm=True)
1303 twoEqualRegInstX("fcvtzs", "FcvtzsFixedScX", "SimdCvtOp", floatTypes, 4,
1304 fcvtzsCode, hasImm=True, scalar=True)
1305 # FCVTZS (integer)
1306 fcvtzsIntCode = fcvtCode % ("0", "false", "FPRounding_ZERO")
1307 twoEqualRegInstX("fcvtzs", "FcvtzsIntDX", "SimdCvtOp", smallFloatTypes,
1308 2, fcvtzsIntCode)
1309 twoEqualRegInstX("fcvtzs", "FcvtzsIntQX", "SimdCvtOp", floatTypes, 4,
1310 fcvtzsIntCode)
1311 twoEqualRegInstX("fcvtzs", "FcvtzsIntScX", "SimdCvtOp", floatTypes, 4,
1312 fcvtzsIntCode, scalar=True)
1313 # FCVTZU (fixed-point)
1314 fcvtzuCode = fcvtCode % ("imm", "true", "FPRounding_ZERO")
1315 twoEqualRegInstX("fcvtzu", "FcvtzuFixedDX", "SimdCvtOp", smallFloatTypes,
1316 2, fcvtzuCode, hasImm=True)
1317 twoEqualRegInstX("fcvtzu", "FcvtzuFixedQX", "SimdCvtOp", floatTypes, 4,
1318 fcvtzuCode, hasImm=True)
1319 twoEqualRegInstX("fcvtzu", "FcvtzuFixedScX", "SimdCvtOp", floatTypes, 4,
1320 fcvtzuCode, hasImm=True, scalar=True)
1321 # FCVTZU (integer)
1322 fcvtzuIntCode = fcvtCode % ("0", "true", "FPRounding_ZERO")
1323 twoEqualRegInstX("fcvtzu", "FcvtzuIntDX", "SimdCvtOp", smallFloatTypes, 2,
1324 fcvtzuIntCode)
1325 twoEqualRegInstX("fcvtzu", "FcvtzuIntQX", "SimdCvtOp", floatTypes, 4,
1326 fcvtzuIntCode)
1327 twoEqualRegInstX("fcvtzu", "FcvtzuIntScX", "SimdCvtOp", floatTypes, 4,
1328 fcvtzuIntCode, scalar=True)
1329 # FDIV
1330 fdivCode = fpBinOp % "Div"
1331 threeEqualRegInstX("fdiv", "FdivDX", "SimdFloatDivOp", smallFloatTypes, 2,
1332 fdivCode)
1333 threeEqualRegInstX("fdiv", "FdivQX", "SimdFloatDivOp", floatTypes, 4,
1334 fdivCode)
1335 # FMAX
1336 fmaxCode = fpBinOp % "Max"
1337 threeEqualRegInstX("fmax", "FmaxDX", "SimdFloatCmpOp", smallFloatTypes, 2,
1338 fmaxCode)
1339 threeEqualRegInstX("fmax", "FmaxQX", "SimdFloatCmpOp", floatTypes, 4,
1340 fmaxCode)
1341 # FMAXNM
1342 fmaxnmCode = fpBinOp % "MaxNum"
1343 threeEqualRegInstX("fmaxnm", "FmaxnmDX", "SimdFloatCmpOp", smallFloatTypes,
1344 2, fmaxnmCode)
1345 threeEqualRegInstX("fmaxnm", "FmaxnmQX", "SimdFloatCmpOp", floatTypes, 4,
1346 fmaxnmCode)
1347 # FMAXNMP (scalar)
1348 twoRegPairwiseScInstX("fmaxnmp", "FmaxnmpScDX", "SimdFloatCmpOp",
1349 ("uint32_t",), 2, fmaxnmCode)
1350 twoRegPairwiseScInstX("fmaxnmp", "FmaxnmpScQX", "SimdFloatCmpOp",
1351 ("uint64_t",), 4, fmaxnmCode)
1352 # FMAXNMP (vector)
1353 threeEqualRegInstX("fmaxnmp", "FmaxnmpDX", "SimdFloatCmpOp",
1354 smallFloatTypes, 2, fmaxnmCode, pairwise=True)
1355 threeEqualRegInstX("fmaxnmp", "FmaxnmpQX", "SimdFloatCmpOp", floatTypes, 4,
1356 fmaxnmCode, pairwise=True)
1357 # FMAXNMV
1358 # Note: SimdFloatCmpOp can be a bit optimistic here
1359 fpAcrossOp = fpOp % "fplib%s<Element>(destElem, srcElem1, fpscr)"
1360 fmaxnmAcrossCode = fpAcrossOp % "MaxNum"
1361 twoRegAcrossInstX("fmaxnmv", "FmaxnmvQX", "SimdFloatCmpOp", ("uint32_t",),
1362 4, fmaxnmAcrossCode)
1363 # FMAXP (scalar)
1364 twoRegPairwiseScInstX("fmaxp", "FmaxpScDX", "SimdFloatCmpOp",
1365 ("uint32_t",), 2, fmaxCode)
1366 twoRegPairwiseScInstX("fmaxp", "FmaxpScQX", "SimdFloatCmpOp",
1367 ("uint64_t",), 4, fmaxCode)
1368 # FMAXP (vector)
1369 threeEqualRegInstX("fmaxp", "FmaxpDX", "SimdFloatCmpOp", smallFloatTypes,
1370 2, fmaxCode, pairwise=True)
1371 threeEqualRegInstX("fmaxp", "FmaxpQX", "SimdFloatCmpOp", floatTypes, 4,
1372 fmaxCode, pairwise=True)
1373 # FMAXV
1374 # Note: SimdFloatCmpOp can be a bit optimistic here
1375 fmaxAcrossCode = fpAcrossOp % "Max"
1376 twoRegAcrossInstX("fmaxv", "FmaxvQX", "SimdFloatCmpOp", ("uint32_t",), 4,
1377 fmaxAcrossCode)
1378 # FMIN
1379 fminCode = fpBinOp % "Min"
1380 threeEqualRegInstX("fmin", "FminDX", "SimdFloatCmpOp", smallFloatTypes, 2,
1381 fminCode)
1382 threeEqualRegInstX("fmin", "FminQX", "SimdFloatCmpOp", floatTypes, 4,
1383 fminCode)
1384 # FMINNM
1385 fminnmCode = fpBinOp % "MinNum"
1386 threeEqualRegInstX("fminnm", "FminnmDX", "SimdFloatCmpOp", smallFloatTypes,
1387 2, fminnmCode)
1388 threeEqualRegInstX("fminnm", "FminnmQX", "SimdFloatCmpOp", floatTypes, 4,
1389 fminnmCode)
1390 # FMINNMP (scalar)
1391 twoRegPairwiseScInstX("fminnmp", "FminnmpScDX", "SimdFloatCmpOp",
1392 ("uint32_t",), 2, fminnmCode)
1393 twoRegPairwiseScInstX("fminnmp", "FminnmpScQX", "SimdFloatCmpOp",
1394 ("uint64_t",), 4, fminnmCode)
1395 # FMINNMP (vector)
1396 threeEqualRegInstX("fminnmp", "FminnmpDX", "SimdFloatCmpOp",
1397 smallFloatTypes, 2, fminnmCode, pairwise=True)
1398 threeEqualRegInstX("fminnmp", "FminnmpQX", "SimdFloatCmpOp", floatTypes, 4,
1399 fminnmCode, pairwise=True)
1400 # FMINNMV
1401 # Note: SimdFloatCmpOp can be a bit optimistic here
1402 fminnmAcrossCode = fpAcrossOp % "MinNum"
1403 twoRegAcrossInstX("fminnmv", "FminnmvQX", "SimdFloatCmpOp", ("uint32_t",),
1404 4, fminnmAcrossCode)
1405 # FMINP (scalar)
1406 twoRegPairwiseScInstX("fminp", "FminpScDX", "SimdFloatCmpOp",
1407 ("uint32_t",), 2, fminCode)
1408 twoRegPairwiseScInstX("fminp", "FminpScQX", "SimdFloatCmpOp",
1409 ("uint64_t",), 4, fminCode)
1410 # FMINP (vector)
1411 threeEqualRegInstX("fminp", "FminpDX", "SimdFloatCmpOp", smallFloatTypes,
1412 2, fminCode, pairwise=True)
1413 threeEqualRegInstX("fminp", "FminpQX", "SimdFloatCmpOp", floatTypes, 4,
1414 fminCode, pairwise=True)
1415 # FMINV
1416 # Note: SimdFloatCmpOp can be a bit optimistic here
1417 fminAcrossCode = fpAcrossOp % "Min"
1418 twoRegAcrossInstX("fminv", "FminvQX", "SimdFloatCmpOp", ("uint32_t",), 4,
1419 fminAcrossCode)
1420 # FMLA (by element)
1421 fmlaCode = fpOp % ("fplibMulAdd<Element>("
1422 "destElem, srcElem1, srcElem2, fpscr)")
1423 threeEqualRegInstX("fmla", "FmlaElemDX", "SimdFloatMultAccOp",
1424 smallFloatTypes, 2, fmlaCode, True, byElem=True)
1425 threeEqualRegInstX("fmla", "FmlaElemQX", "SimdFloatMultAccOp", floatTypes,
1426 4, fmlaCode, True, byElem=True)
1427 threeEqualRegInstX("fmla", "FmlaElemScX", "SimdFloatMultAccOp", floatTypes,
1428 4, fmlaCode, True, byElem=True, scalar=True)
1429 # FMLA (vector)
1430 threeEqualRegInstX("fmla", "FmlaDX", "SimdFloatMultAccOp", smallFloatTypes,
1431 2, fmlaCode, True)
1432 threeEqualRegInstX("fmla", "FmlaQX", "SimdFloatMultAccOp", floatTypes, 4,
1433 fmlaCode, True)
1434 # FMLS (by element)
1435 fmlsCode = fpOp % ("fplibMulAdd<Element>(destElem,"
1436 " fplibNeg<Element>(srcElem1), srcElem2, fpscr)")
1437 threeEqualRegInstX("fmls", "FmlsElemDX", "SimdFloatMultAccOp",
1438 smallFloatTypes, 2, fmlsCode, True, byElem=True)
1439 threeEqualRegInstX("fmls", "FmlsElemQX", "SimdFloatMultAccOp", floatTypes,
1440 4, fmlsCode, True, byElem=True)
1441 threeEqualRegInstX("fmls", "FmlsElemScX", "SimdFloatMultAccOp", floatTypes,
1442 4, fmlsCode, True, byElem=True, scalar=True)
1443 # FMLS (vector)
1444 threeEqualRegInstX("fmls", "FmlsDX", "SimdFloatMultAccOp", smallFloatTypes,
1445 2, fmlsCode, True)
1446 threeEqualRegInstX("fmls", "FmlsQX", "SimdFloatMultAccOp", floatTypes, 4,
1447 fmlsCode, True)
1448 # FMOV
1449 fmovCode = 'destElem = imm;'
1450 oneRegImmInstX("fmov", "FmovDX", "SimdMiscOp", smallFloatTypes, 2,
1451 fmovCode)
1452 oneRegImmInstX("fmov", "FmovQX", "SimdMiscOp", floatTypes, 4, fmovCode)
1453 # FMUL (by element)
1454 fmulCode = fpBinOp % "Mul"
1455 threeEqualRegInstX("fmul", "FmulElemDX", "SimdFloatMultOp",
1456 smallFloatTypes, 2, fmulCode, byElem=True)
1457 threeEqualRegInstX("fmul", "FmulElemQX", "SimdFloatMultOp", floatTypes, 4,
1458 fmulCode, byElem=True)
1459 threeEqualRegInstX("fmul", "FmulElemScX", "SimdFloatMultOp", floatTypes, 4,
1460 fmulCode, byElem=True, scalar=True)
1461 # FMUL (vector)
1462 threeEqualRegInstX("fmul", "FmulDX", "SimdFloatMultOp", smallFloatTypes, 2,
1463 fmulCode)
1464 threeEqualRegInstX("fmul", "FmulQX", "SimdFloatMultOp", floatTypes, 4,
1465 fmulCode)
1466 # FMULX
1467 fmulxCode = fpBinOp % "MulX"
1468 threeEqualRegInstX("fmulx", "FmulxDX", "SimdFloatMultOp", smallFloatTypes,
1469 2, fmulxCode)
1470 threeEqualRegInstX("fmulx", "FmulxQX", "SimdFloatMultOp", floatTypes, 4,
1471 fmulxCode)
1472 threeEqualRegInstX("fmulx", "FmulxScX", "SimdFloatMultOp", floatTypes, 4,
1473 fmulxCode, scalar=True)
1474 # FMULX (by element)
1475 threeEqualRegInstX("fmulx", "FmulxElemDX", "SimdFloatMultOp",
1476 smallFloatTypes, 2, fmulxCode, byElem=True)
1477 threeEqualRegInstX("fmulx", "FmulxElemQX", "SimdFloatMultOp", floatTypes,
1478 4, fmulxCode, byElem=True)
1479 threeEqualRegInstX("fmulx", "FmulxElemScX", "SimdFloatMultOp", floatTypes,
1480 4, fmulxCode, byElem=True, scalar=True)
1481 # FNEG
1482 fnegCode = fpOp % "fplibNeg<Element>(srcElem1)"
1483 twoEqualRegInstX("Neg", "FnegDX", "SimdFloatAluOp", smallFloatTypes, 2,
1484 fnegCode)
1485 twoEqualRegInstX("Neg", "FnegQX", "SimdFloatAluOp", floatTypes, 4,
1486 fnegCode)
1487 # FRECPE
1488 frecpeCode = fpOp % "fplibRecipEstimate<Element>(srcElem1, fpscr)"
1489 twoEqualRegInstX("frecpe", "FrecpeDX", "SimdFloatMultAccOp",
1490 smallFloatTypes, 2, frecpeCode)
1491 twoEqualRegInstX("frecpe", "FrecpeQX", "SimdFloatMultAccOp", floatTypes, 4,
1492 frecpeCode)
1493 twoEqualRegInstX("frecpe", "FrecpeScX", "SimdFloatMultAccOp", floatTypes,
1494 4, frecpeCode, scalar=True)
1495 # FRECPS
1496 frecpsCode = fpBinOp % "RecipStepFused"
1497 threeEqualRegInstX("frecps", "FrecpsDX", "SimdFloatMultAccOp",
1498 smallFloatTypes, 2, frecpsCode)
1499 threeEqualRegInstX("frecps", "FrecpsQX", "SimdFloatMultAccOp", floatTypes,
1500 4, frecpsCode)
1501 threeEqualRegInstX("frecps", "FrecpsScX", "SimdFloatMultAccOp", floatTypes,
1502 4, frecpsCode, scalar=True)
1503 # FRECPX
1504 frecpxCode = fpOp % "fplibRecpX<Element>(srcElem1, fpscr)"
1505 twoEqualRegInstX("frecpx", "FrecpxX", "SimdFloatMultAccOp", floatTypes, 4,
1506 frecpxCode, scalar=True)
1507 # FRINTA
1508 frintCode = fpOp % "fplibRoundInt<Element>(srcElem1, %s, %s, fpscr)"
1509 frintaCode = frintCode % ("FPRounding_TIEAWAY", "false")
1510 twoEqualRegInstX("frinta", "FrintaDX", "SimdCvtOp", smallFloatTypes, 2,
1511 frintaCode)
1512 twoEqualRegInstX("frinta", "FrintaQX", "SimdCvtOp", floatTypes, 4,
1513 frintaCode)
1514 # FRINTI
1515 frintiCode = frintCode % ("FPCRRounding(fpscr)", "false")
1516 twoEqualRegInstX("frinti", "FrintiDX", "SimdCvtOp", smallFloatTypes, 2,
1517 frintiCode)
1518 twoEqualRegInstX("frinti", "FrintiQX", "SimdCvtOp", floatTypes, 4,
1519 frintiCode)
1520 # FRINTM
1521 frintmCode = frintCode % ("FPRounding_NEGINF", "false")
1522 twoEqualRegInstX("frintm", "FrintmDX", "SimdCvtOp", smallFloatTypes, 2,
1523 frintmCode)
1524 twoEqualRegInstX("frintm", "FrintmQX", "SimdCvtOp", floatTypes, 4,
1525 frintmCode)
1526 # FRINTN
1527 frintnCode = frintCode % ("FPRounding_TIEEVEN", "false")
1528 twoEqualRegInstX("frintn", "FrintnDX", "SimdCvtOp", smallFloatTypes, 2,
1529 frintnCode)
1530 twoEqualRegInstX("frintn", "FrintnQX", "SimdCvtOp", floatTypes, 4,
1531 frintnCode)
1532 # FRINTP
1533 frintpCode = frintCode % ("FPRounding_POSINF", "false")
1534 twoEqualRegInstX("frintp", "FrintpDX", "SimdCvtOp", smallFloatTypes, 2,
1535 frintpCode)
1536 twoEqualRegInstX("frintp", "FrintpQX", "SimdCvtOp", floatTypes, 4,
1537 frintpCode)
1538 # FRINTX
1539 frintxCode = frintCode % ("FPCRRounding(fpscr)", "true")
1540 twoEqualRegInstX("frintx", "FrintxDX", "SimdCvtOp", smallFloatTypes, 2,
1541 frintxCode)
1542 twoEqualRegInstX("frintx", "FrintxQX", "SimdCvtOp", floatTypes, 4,
1543 frintxCode)
1544 # FRINTZ
1545 frintzCode = frintCode % ("FPRounding_ZERO", "false")
1546 twoEqualRegInstX("frintz", "FrintzDX", "SimdCvtOp", smallFloatTypes, 2,
1547 frintzCode)
1548 twoEqualRegInstX("frintz", "FrintzQX", "SimdCvtOp", floatTypes, 4,
1549 frintzCode)
1550 # FRSQRTE
1551 frsqrteCode = fpOp % "fplibRSqrtEstimate<Element>(srcElem1, fpscr)"
1552 twoEqualRegInstX("frsqrte", "FrsqrteDX", "SimdFloatSqrtOp",
1553 smallFloatTypes, 2, frsqrteCode)
1554 twoEqualRegInstX("frsqrte", "FrsqrteQX", "SimdFloatSqrtOp", floatTypes, 4,
1555 frsqrteCode)
1556 twoEqualRegInstX("frsqrte", "FrsqrteScX", "SimdFloatSqrtOp", floatTypes, 4,
1557 frsqrteCode, scalar=True)
1558 # FRSQRTS
1559 frsqrtsCode = fpBinOp % "RSqrtStepFused"
1560 threeEqualRegInstX("frsqrts", "FrsqrtsDX", "SimdFloatMiscOp",
1561 smallFloatTypes, 2, frsqrtsCode)
1562 threeEqualRegInstX("frsqrts", "FrsqrtsQX", "SimdFloatMiscOp", floatTypes,
1563 4, frsqrtsCode)
1564 threeEqualRegInstX("frsqrts", "FrsqrtsScX", "SimdFloatMiscOp", floatTypes,
1565 4, frsqrtsCode, scalar=True)
1566 # FSQRT
1567 fsqrtCode = fpOp % "fplibSqrt<Element>(srcElem1, fpscr)"
1568 twoEqualRegInstX("fsqrt", "FsqrtDX", "SimdFloatSqrtOp", smallFloatTypes, 2,
1569 fsqrtCode)
1570 twoEqualRegInstX("fsqrt", "FsqrtQX", "SimdFloatSqrtOp", floatTypes, 4,
1571 fsqrtCode)
1572 # FSUB
1573 fsubCode = fpBinOp % "Sub"
1574 threeEqualRegInstX("fsub", "FsubDX", "SimdFloatAddOp", smallFloatTypes, 2,
1575 fsubCode)
1576 threeEqualRegInstX("fsub", "FsubQX", "SimdFloatAddOp", floatTypes, 4,
1577 fsubCode)
1578 # INS (element)
1579 insFromVecElemInstX("ins", "InsElemX", "SimdMiscOp", unsignedTypes, 4)
1580 # INS (general register)
1581 insFromGprInstX("ins", "InsGprWX", "SimdMiscOp", smallUnsignedTypes, 4,
1582 'W')
1583 insFromGprInstX("ins", "InsGprXX", "SimdMiscOp", unsignedTypes, 4, 'X')
1584 # MLA (by element)
1585 mlaCode = "destElem += srcElem1 * srcElem2;"
1586 threeEqualRegInstX("mla", "MlaElemDX", "SimdMultAccOp",
1587 ("uint16_t", "uint32_t"), 2, mlaCode, True, byElem=True)
1588 threeEqualRegInstX("mla", "MlaElemQX", "SimdMultAccOp",
1589 ("uint16_t", "uint32_t"), 4, mlaCode, True, byElem=True)
1590 # MLA (vector)
1591 threeEqualRegInstX("mla", "MlaDX", "SimdMultAccOp", smallUnsignedTypes, 2,
1592 mlaCode, True)
1593 threeEqualRegInstX("mla", "MlaQX", "SimdMultAccOp", smallUnsignedTypes, 4,
1594 mlaCode, True)
1595 # MLS (by element)
1596 mlsCode = "destElem -= srcElem1 * srcElem2;"
1597 threeEqualRegInstX("mls", "MlsElemDX", "SimdMultAccOp",
1598 ("uint16_t", "uint32_t"), 2, mlsCode, True, byElem=True)
1599 threeEqualRegInstX("mls", "MlsElemQX", "SimdMultAccOp",
1600 ("uint16_t", "uint32_t"), 4, mlsCode, True, byElem=True)
1601 # MLS (vector)
1602 threeEqualRegInstX("mls", "MlsDX", "SimdMultAccOp", smallUnsignedTypes, 2,
1603 mlsCode, True)
1604 threeEqualRegInstX("mls", "MlsQX", "SimdMultAccOp", smallUnsignedTypes, 4,
1605 mlsCode, True)
1606 # MOV (element) -> alias to INS (element)
1607 # MOV (from general) -> alias to INS (general register)
1608 # MOV (scalar) -> alias to DUP (element)
1609 # MOV (to general) -> alias to UMOV
1610 # MOV (vector) -> alias to ORR (register)
1611 # MOVI
1612 movImmCode = "destElem = imm;"
1613 oneRegImmInstX("movi", "MoviDX", "SimdMiscOp", ("uint64_t",), 2,
1614 movImmCode)
1615 oneRegImmInstX("movi", "MoviQX", "SimdMiscOp", ("uint64_t",), 4,
1616 movImmCode)
1617 # MUL (by element)
1618 mulCode = "destElem = srcElem1 * srcElem2;"
1619 threeEqualRegInstX("mul", "MulElemDX", "SimdMultOp",
1620 ("uint16_t", "uint32_t"), 2, mulCode, byElem=True)
1621 threeEqualRegInstX("mul", "MulElemQX", "SimdMultOp",
1622 ("uint16_t", "uint32_t"), 4, mulCode, byElem=True)
1623 # MUL (vector)
1624 threeEqualRegInstX("mul", "MulDX", "SimdMultOp", smallUnsignedTypes, 2,
1625 mulCode)
1626 threeEqualRegInstX("mul", "MulQX", "SimdMultOp", smallUnsignedTypes, 4,
1627 mulCode)
1628 # MVN
1629 mvnCode = "destElem = ~srcElem1;"
1630 twoEqualRegInstX("mvn", "MvnDX", "SimdAluOp", ("uint64_t",), 2, mvnCode)
1631 twoEqualRegInstX("mvn", "MvnQX", "SimdAluOp", ("uint64_t",), 4, mvnCode)
1632 # MVNI
1633 mvniCode = "destElem = ~imm;"
1634 oneRegImmInstX("mvni", "MvniDX", "SimdAluOp", ("uint64_t",), 2, mvniCode)
1635 oneRegImmInstX("mvni", "MvniQX", "SimdAluOp", ("uint64_t",), 4, mvniCode)
1636 # NEG
1637 negCode = "destElem = -srcElem1;"
1638 twoEqualRegInstX("neg", "NegDX", "SimdAluOp", signedTypes, 2, negCode)
1639 twoEqualRegInstX("neg", "NegQX", "SimdAluOp", signedTypes, 4, negCode)
1640 # NOT -> alias to MVN
1641 # ORN
1642 ornCode = "destElem = srcElem1 | ~srcElem2;"
1643 threeEqualRegInstX("orn", "OrnDX", "SimdAluOp", ("uint64_t",), 2, ornCode)
1644 threeEqualRegInstX("orn", "OrnQX", "SimdAluOp", ("uint64_t",), 4, ornCode)
1645 # ORR (immediate)
1646 orrImmCode = "destElem |= imm;"
1647 oneRegImmInstX("orr", "OrrImmDX", "SimdAluOp", ("uint64_t",), 2,
1648 orrImmCode, True)
1649 oneRegImmInstX("orr", "OrrImmQX", "SimdAluOp", ("uint64_t",), 4,
1650 orrImmCode, True)
1651 # ORR (register)
1652 orrCode = "destElem = srcElem1 | srcElem2;"
1653 threeEqualRegInstX("orr", "OrrDX", "SimdAluOp", ("uint64_t",), 2, orrCode)
1654 threeEqualRegInstX("orr", "OrrQX", "SimdAluOp", ("uint64_t",), 4, orrCode)
1655 # PMUL
1656 pmulCode = '''
1657 destElem = 0;
1658 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
1659 if (bits(srcElem2, j))
1660 destElem ^= srcElem1 << j;
1661 }
1662 '''
1663 threeEqualRegInstX("pmul", "PmulDX", "SimdMultOp", ("uint8_t",), 2,
1664 pmulCode)
1665 threeEqualRegInstX("pmul", "PmulQX", "SimdMultOp", ("uint8_t",), 4,
1666 pmulCode)
1667 # PMULL, PMULL2
1668 # Note: 64-bit PMULL is not available (Crypto. Extension)
1669 pmullCode = '''
1670 destElem = 0;
1671 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
1672 if (bits(srcElem2, j))
1673 destElem ^= (BigElement)srcElem1 << j;
1674 }
1675 '''
1676 threeRegLongInstX("pmull", "PmullX", "SimdMultOp", ("uint8_t",), pmullCode)
1677 threeRegLongInstX("pmull", "Pmull2X", "SimdMultOp", ("uint8_t",),
1678 pmullCode, hi=True)
1679 # RADDHN, RADDHN2
1680 raddhnCode = '''
1681 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
1682 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1683 (sizeof(Element) * 8);
1684 '''
1685 threeRegNarrowInstX("raddhn", "RaddhnX", "SimdAddOp", smallUnsignedTypes,
1686 raddhnCode)
1687 threeRegNarrowInstX("raddhn2", "Raddhn2X", "SimdAddOp", smallUnsignedTypes,
1688 raddhnCode, hi=True)
1689 # RBIT
1690 rbitCode = '''
1691 destElem = 0;
1692 Element temp = srcElem1;
1693 for (int i = 0; i < 8 * sizeof(Element); i++) {
1694 destElem = destElem | ((temp & 0x1) <<
1695 (8 * sizeof(Element) - 1 - i));
1696 temp >>= 1;
1697 }
1698 '''
1699 twoEqualRegInstX("rbit", "RbitDX", "SimdAluOp", ("uint8_t",), 2, rbitCode)
1700 twoEqualRegInstX("rbit", "RbitQX", "SimdAluOp", ("uint8_t",), 4, rbitCode)
1701 # REV16
1702 rev16Code = '''
1703 destElem = srcElem1;
1704 unsigned groupSize = ((1 << 1) / sizeof(Element));
1705 unsigned reverseMask = (groupSize - 1);
1706 j = i ^ reverseMask;
1707 '''
1708 twoEqualRegInstX("rev16", "Rev16DX", "SimdAluOp", ("uint8_t",), 2,
1709 rev16Code)
1710 twoEqualRegInstX("rev16", "Rev16QX", "SimdAluOp", ("uint8_t",), 4,
1711 rev16Code)
1712 # REV32
1713 rev32Code = '''
1714 destElem = srcElem1;
1715 unsigned groupSize = ((1 << 2) / sizeof(Element));
1716 unsigned reverseMask = (groupSize - 1);
1717 j = i ^ reverseMask;
1718 '''
1719 twoEqualRegInstX("rev32", "Rev32DX", "SimdAluOp", ("uint8_t", "uint16_t"),
1720 2, rev32Code)
1721 twoEqualRegInstX("rev32", "Rev32QX", "SimdAluOp", ("uint8_t", "uint16_t"),
1722 4, rev32Code)
1723 # REV64
1724 rev64Code = '''
1725 destElem = srcElem1;
1726 unsigned groupSize = ((1 << 3) / sizeof(Element));
1727 unsigned reverseMask = (groupSize - 1);
1728 j = i ^ reverseMask;
1729 '''
1730 twoEqualRegInstX("rev64", "Rev64DX", "SimdAluOp", smallUnsignedTypes, 2,
1731 rev64Code)
1732 twoEqualRegInstX("rev64", "Rev64QX", "SimdAluOp", smallUnsignedTypes, 4,
1733 rev64Code)
1734 # RSHRN, RSHRN2
1735 rshrnCode = '''
1736 if (imm > sizeof(srcElem1) * 8) {
1737 destElem = 0;
1738 } else if (imm) {
1739 Element rBit = bits(srcElem1, imm - 1);
1740 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
1741 } else {
1742 destElem = srcElem1;
1743 }
1744 '''
1745 twoRegNarrowInstX("rshrn", "RshrnX", "SimdShiftOp", smallUnsignedTypes,
1746 rshrnCode, hasImm=True)
1747 twoRegNarrowInstX("rshrn2", "Rshrn2X", "SimdShiftOp", smallUnsignedTypes,
1748 rshrnCode, hasImm=True, hi=True)
1749 # RSUBHN, RSUBHN2
1750 rsubhnCode = '''
1751 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
1752 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1753 (sizeof(Element) * 8);
1754 '''
1755 threeRegNarrowInstX("rsubhn", "RsubhnX", "SimdAddOp", smallTypes,
1756 rsubhnCode)
1757 threeRegNarrowInstX("rsubhn2", "Rsubhn2X", "SimdAddOp", smallTypes,
1758 rsubhnCode, hi=True)
1759 # SABA
1760 abaCode = '''
1761 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1762 (srcElem2 - srcElem1);
1763 '''
1764 threeEqualRegInstX("saba", "SabaDX", "SimdAddAccOp", smallSignedTypes, 2,
1765 abaCode, True)
1766 threeEqualRegInstX("saba", "SabaQX", "SimdAddAccOp", smallSignedTypes, 4,
1767 abaCode, True)
1768 # SABAL, SABAL2
1769 abalCode = '''
1770 destElem += (srcElem1 > srcElem2) ?
1771 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1772 ((BigElement)srcElem2 - (BigElement)srcElem1);
1773 '''
1774 threeRegLongInstX("sabal", "SabalX", "SimdAddAccOp", smallSignedTypes,
1775 abalCode, True)
1776 threeRegLongInstX("sabal2", "Sabal2X", "SimdAddAccOp", smallSignedTypes,
1777 abalCode, True, hi=True)
1778 # SABD
1779 abdCode = '''
1780 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1781 (srcElem2 - srcElem1);
1782 '''
1783 threeEqualRegInstX("sabd", "SabdDX", "SimdAddOp", smallSignedTypes, 2,
1784 abdCode)
1785 threeEqualRegInstX("sabd", "SabdQX", "SimdAddOp", smallSignedTypes, 4,
1786 abdCode)
1787 # SABDL, SABDL2
1788 abdlCode = '''
1789 destElem = (srcElem1 > srcElem2) ?
1790 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1791 ((BigElement)srcElem2 - (BigElement)srcElem1);
1792 '''
1793 threeRegLongInstX("sabdl", "SabdlX", "SimdAddAccOp", smallSignedTypes,
1794 abdlCode, True)
1795 threeRegLongInstX("sabdl2", "Sabdl2X", "SimdAddAccOp", smallSignedTypes,
1796 abdlCode, True, hi=True)
1797 # SADALP
1798 adalpCode = "destElem += (BigElement)srcElem1 + (BigElement)srcElem2;"
1799 twoRegCondenseInstX("sadalp", "SadalpDX", "SimdAddOp", smallSignedTypes, 2,
1800 adalpCode, True)
1801 twoRegCondenseInstX("sadalp", "SadalpQX", "SimdAddOp", smallSignedTypes, 4,
1802 adalpCode, True)
1803 # SADDL, SADDL2
1804 addlwCode = "destElem = (BigElement)srcElem1 + (BigElement)srcElem2;"
1805 threeRegLongInstX("saddl", "SaddlX", "SimdAddAccOp", smallSignedTypes,
1806 addlwCode)
1807 threeRegLongInstX("saddl2", "Saddl2X", "SimdAddAccOp", smallSignedTypes,
1808 addlwCode, hi=True)
1809 # SADDLP
1810 twoRegCondenseInstX("saddlp", "SaddlpDX", "SimdAddOp", smallSignedTypes, 2,
1811 addlwCode)
1812 twoRegCondenseInstX("saddlp", "SaddlpQX", "SimdAddOp", smallSignedTypes, 4,
1813 addlwCode)
1814 # SADDLV
1815 # Note: SimdAddOp can be a bit optimistic here
1816 addAcrossLongCode = "destElem += (BigElement)srcElem1;"
1817 twoRegAcrossInstX("saddlv", "SaddlvDX", "SimdAddOp", ("int8_t", "int16_t"),
1818 2, addAcrossLongCode, long=True)
1819 twoRegAcrossInstX("saddlv", "SaddlvQX", "SimdAddOp", ("int8_t", "int16_t"),
1820 4, addAcrossLongCode, long=True)
1821 twoRegAcrossInstX("saddlv", "SaddlvBQX", "SimdAddOp", ("int32_t",), 4,
1822 addAcrossLongCode, doubleDest=True, long=True)
1823 # SADDW, SADDW2
1824 threeRegWideInstX("saddw", "SaddwX", "SimdAddAccOp", smallSignedTypes,
1825 addlwCode)
1826 threeRegWideInstX("saddw2", "Saddw2X", "SimdAddAccOp", smallSignedTypes,
1827 addlwCode, hi=True)
1828 # SCVTF (fixed-point)
1829 scvtfFixedCode = fpOp % ("fplibFixedToFP<Element>((int%d_t) srcElem1, imm,"
1830 " false, FPCRRounding(fpscr), fpscr)")
1831 twoEqualRegInstX("scvtf", "ScvtfFixedDX", "SimdCvtOp", smallFloatTypes, 2,
1832 scvtfFixedCode % 32, hasImm=True)
1833 twoEqualRegInstX("scvtf", "ScvtfFixedSQX", "SimdCvtOp", smallFloatTypes, 4,
1834 scvtfFixedCode % 32, hasImm=True)
1835 twoEqualRegInstX("scvtf", "ScvtfFixedDQX", "SimdCvtOp", ("uint64_t",), 4,
1836 scvtfFixedCode % 64, hasImm=True)
1837 twoEqualRegInstX("scvtf", "ScvtfFixedScSX", "SimdCvtOp", smallFloatTypes,
1838 4, scvtfFixedCode % 32, hasImm=True, scalar=True)
1839 twoEqualRegInstX("scvtf", "ScvtfFixedScDX", "SimdCvtOp", ("uint64_t",), 4,
1840 scvtfFixedCode % 64, hasImm=True, scalar=True)
1841 # SCVTF (integer)
1842 scvtfIntCode = fpOp % ("fplibFixedToFP<Element>((int%d_t) srcElem1, 0,"
1843 " false, FPCRRounding(fpscr), fpscr)")
1844 twoEqualRegInstX("scvtf", "ScvtfIntDX", "SimdCvtOp", smallFloatTypes, 2,
1845 scvtfIntCode % 32)
1846 twoEqualRegInstX("scvtf", "ScvtfIntSQX", "SimdCvtOp", smallFloatTypes, 4,
1847 scvtfIntCode % 32)
1848 twoEqualRegInstX("scvtf", "ScvtfIntDQX", "SimdCvtOp", ("uint64_t",), 4,
1849 scvtfIntCode % 64)
1850 twoEqualRegInstX("scvtf", "ScvtfIntScSX", "SimdCvtOp", smallFloatTypes, 4,
1851 scvtfIntCode % 32, scalar=True)
1852 twoEqualRegInstX("scvtf", "ScvtfIntScDX", "SimdCvtOp", ("uint64_t",), 4,
1853 scvtfIntCode % 64, scalar=True)
1854 # SHADD
1855 haddCode = '''
1856 Element carryBit =
1857 (((unsigned)srcElem1 & 0x1) +
1858 ((unsigned)srcElem2 & 0x1)) >> 1;
1859 // Use division instead of a shift to ensure the sign extension works
1860 // right. The compiler will figure out if it can be a shift. Mask the
1861 // inputs so they get truncated correctly.
1862 destElem = (((srcElem1 & ~(Element)1) / 2) +
1863 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1864 '''
1865 threeEqualRegInstX("shadd", "ShaddDX", "SimdAddOp", smallSignedTypes, 2,
1866 haddCode)
1867 threeEqualRegInstX("shadd", "ShaddQX", "SimdAddOp", smallSignedTypes, 4,
1868 haddCode)
1869 # SHL
1870 shlCode = '''
1871 if (imm >= sizeof(Element) * 8)
1872 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
1873 else
1874 destElem = srcElem1 << imm;
1875 '''
1876 twoEqualRegInstX("shl", "ShlDX", "SimdShiftOp", unsignedTypes, 2, shlCode,
1877 hasImm=True)
1878 twoEqualRegInstX("shl", "ShlQX", "SimdShiftOp", unsignedTypes, 4, shlCode,
1879 hasImm=True)
1880 # SHLL, SHLL2
1881 shllCode = "destElem = ((BigElement)srcElem1) << (sizeof(Element) * 8);"
1882 twoRegLongInstX("shll", "ShllX", "SimdShiftOp", smallTypes, shllCode)
1883 twoRegLongInstX("shll", "Shll2X", "SimdShiftOp", smallTypes, shllCode,
1884 hi=True)
1885 # SHRN, SHRN2
1886 shrnCode = '''
1887 if (imm >= sizeof(srcElem1) * 8) {
1888 destElem = 0;
1889 } else {
1890 destElem = srcElem1 >> imm;
1891 }
1892 '''
1893 twoRegNarrowInstX("shrn", "ShrnX", "SimdShiftOp", smallUnsignedTypes,
1894 shrnCode, hasImm=True)
1895 twoRegNarrowInstX("shrn2", "Shrn2X", "SimdShiftOp", smallUnsignedTypes,
1896 shrnCode, hasImm=True, hi=True)
1897 # SHSUB
1898 hsubCode = '''
1899 Element borrowBit =
1900 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
1901 // Use division instead of a shift to ensure the sign extension works
1902 // right. The compiler will figure out if it can be a shift. Mask the
1903 // inputs so they get truncated correctly.
1904 destElem = (((srcElem1 & ~(Element)1) / 2) -
1905 ((srcElem2 & ~(Element)1) / 2)) - borrowBit;
1906 '''
1907 threeEqualRegInstX("shsub", "ShsubDX", "SimdAddOp", smallSignedTypes, 2,
1908 hsubCode)
1909 threeEqualRegInstX("shsub", "ShsubQX", "SimdAddOp", smallSignedTypes, 4,
1910 hsubCode)
1911 # SLI
1912 sliCode = '''
1913 if (imm >= sizeof(Element) * 8)
1914 destElem = destElem;
1915 else
1916 destElem = (srcElem1 << imm) | (destElem & mask(imm));
1917 '''
1918 twoEqualRegInstX("sli", "SliDX", "SimdShiftOp", unsignedTypes, 2, sliCode,
1919 True, hasImm=True)
1920 twoEqualRegInstX("sli", "SliQX", "SimdShiftOp", unsignedTypes, 4, sliCode,
1921 True, hasImm=True)
1922 # SMAX
1923 maxCode = "destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;"
1924 threeEqualRegInstX("smax", "SmaxDX", "SimdCmpOp", smallSignedTypes, 2,
1925 maxCode)
1926 threeEqualRegInstX("smax", "SmaxQX", "SimdCmpOp", smallSignedTypes, 4,
1927 maxCode)
1928 # SMAXP
1929 threeEqualRegInstX("smaxp", "SmaxpDX", "SimdCmpOp", smallSignedTypes, 2,
1930 maxCode, pairwise=True)
1931 threeEqualRegInstX("smaxp", "SmaxpQX", "SimdCmpOp", smallSignedTypes, 4,
1932 maxCode, pairwise=True)
1933 # SMAXV
1934 maxAcrossCode = '''
1935 if (i == 0 || srcElem1 > destElem)
1936 destElem = srcElem1;
1937 '''
1938 twoRegAcrossInstX("smaxv", "SmaxvDX", "SimdCmpOp", ("int8_t", "int16_t"),
1939 2, maxAcrossCode)
1940 twoRegAcrossInstX("smaxv", "SmaxvQX", "SimdCmpOp", smallSignedTypes, 4,
1941 maxAcrossCode)
1942 # SMIN
1943 minCode = "destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;"
1944 threeEqualRegInstX("smin", "SminDX", "SimdCmpOp", smallSignedTypes, 2,
1945 minCode)
1946 threeEqualRegInstX("smin", "SminQX", "SimdCmpOp", smallSignedTypes, 4,
1947 minCode)
1948 # SMINP
1949 threeEqualRegInstX("sminp", "SminpDX", "SimdCmpOp", smallSignedTypes, 2,
1950 minCode, pairwise=True)
1951 threeEqualRegInstX("sminp", "SminpQX", "SimdCmpOp", smallSignedTypes, 4,
1952 minCode, pairwise=True)
1953 # SMINV
1954 minAcrossCode = '''
1955 if (i == 0 || srcElem1 < destElem)
1956 destElem = srcElem1;
1957 '''
1958 twoRegAcrossInstX("sminv", "SminvDX", "SimdCmpOp", ("int8_t", "int16_t"),
1959 2, minAcrossCode)
1960 twoRegAcrossInstX("sminv", "SminvQX", "SimdCmpOp", smallSignedTypes, 4,
1961 minAcrossCode)
1962 # SMLAL, SMLAL2 (by element)
1963 mlalCode = "destElem += (BigElement)srcElem1 * (BigElement)srcElem2;"
1964 threeRegLongInstX("smlal", "SmlalElemX", "SimdMultAccOp",
1965 ("int16_t", "int32_t"), mlalCode, True, byElem=True)
1966 threeRegLongInstX("smlal", "SmlalElem2X", "SimdMultAccOp",
1967 ("int16_t", "int32_t"), mlalCode, True, byElem=True,
1968 hi=True)
1969 # SMLAL, SMLAL2 (vector)
1970 threeRegLongInstX("smlal", "SmlalX", "SimdMultAccOp", smallSignedTypes,
1971 mlalCode, True)
1972 threeRegLongInstX("smlal", "Smlal2X", "SimdMultAccOp", smallSignedTypes,
1973 mlalCode, True, hi=True)
1974 # SMLSL, SMLSL2 (by element)
1975 mlslCode = "destElem -= (BigElement)srcElem1 * (BigElement)srcElem2;"
1976 threeRegLongInstX("smlsl", "SmlslElemX", "SimdMultAccOp", smallSignedTypes,
1977 mlslCode, True, byElem=True)
1978 threeRegLongInstX("smlsl", "SmlslElem2X", "SimdMultAccOp",
1979 smallSignedTypes, mlslCode, True, byElem=True, hi=True)
1980 # SMLSL, SMLSL2 (vector)
1981 threeRegLongInstX("smlsl", "SmlslX", "SimdMultAccOp", smallSignedTypes,
1982 mlslCode, True)
1983 threeRegLongInstX("smlsl", "Smlsl2X", "SimdMultAccOp", smallSignedTypes,
1984 mlslCode, True, hi=True)
1985 # SMOV
1986 insToGprInstX("smov", "SmovWX", "SimdMiscOp", ("int8_t", "int16_t"), 4,
1987 'W', True)
1988 insToGprInstX("smov", "SmovXX", "SimdMiscOp", smallSignedTypes, 4, 'X',
1989 True)
1990 # SMULL, SMULL2 (by element)
1991 mullCode = "destElem = (BigElement)srcElem1 * (BigElement)srcElem2;"
1992 threeRegLongInstX("smull", "SmullElemX", "SimdMultOp", smallSignedTypes,
1993 mullCode, byElem=True)
1994 threeRegLongInstX("smull", "SmullElem2X", "SimdMultOp", smallSignedTypes,
1995 mullCode, byElem=True, hi=True)
1996 # SMULL, SMULL2 (vector)
1997 threeRegLongInstX("smull", "SmullX", "SimdMultOp", smallSignedTypes,
1998 mullCode)
1999 threeRegLongInstX("smull", "Smull2X", "SimdMultOp", smallSignedTypes,
2000 mullCode, hi=True)
2001 # SQABS
2002 sqabsCode = '''
2003 FPSCR fpscr = (FPSCR) FpscrQc;
2004 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2005 fpscr.qc = 1;
2006 destElem = ~srcElem1;
2007 } else if (srcElem1 < 0) {
2008 destElem = -srcElem1;
2009 } else {
2010 destElem = srcElem1;
2011 }
2012 FpscrQc = fpscr;
2013 '''
2014 twoEqualRegInstX("sqabs", "SqabsDX", "SimdAluOp", smallSignedTypes, 2,
2015 sqabsCode)
2016 twoEqualRegInstX("sqabs", "SqabsQX", "SimdAluOp", signedTypes, 4,
2017 sqabsCode)
2018 twoEqualRegInstX("sqabs", "SqabsScX", "SimdAluOp", signedTypes, 4,
2019 sqabsCode, scalar=True)
2020 # SQADD
2021 sqaddCode = '''
2022 destElem = srcElem1 + srcElem2;
2023 FPSCR fpscr = (FPSCR) FpscrQc;
2024 bool negDest = (destElem < 0);
2025 bool negSrc1 = (srcElem1 < 0);
2026 bool negSrc2 = (srcElem2 < 0);
2027 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
2028 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2029 if (negDest)
2030 destElem -= 1;
2031 fpscr.qc = 1;
2032 }
2033 FpscrQc = fpscr;
2034 '''
2035 threeEqualRegInstX("sqadd", "SqaddDX", "SimdAddOp", smallSignedTypes, 2,
2036 sqaddCode)
2037 threeEqualRegInstX("sqadd", "SqaddQX", "SimdAddOp", signedTypes, 4,
2038 sqaddCode)
2039 threeEqualRegInstX("sqadd", "SqaddScX", "SimdAddOp", signedTypes, 4,
2040 sqaddCode, scalar=True)
2041 # SQDMLAL, SQDMLAL2 (by element)
2042 qdmlalCode = '''
2043 FPSCR fpscr = (FPSCR) FpscrQc;
2044 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2045 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2046 Element halfNeg = maxNeg / 2;
2047 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2048 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2049 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2050 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2051 fpscr.qc = 1;
2052 }
2053 bool negPreDest = ltz(destElem);
2054 destElem += midElem;
2055 bool negDest = ltz(destElem);
2056 bool negMid = ltz(midElem);
2057 if (negPreDest == negMid && negMid != negDest) {
2058 destElem = mask(sizeof(BigElement) * 8 - 1);
2059 if (negPreDest)
2060 destElem = ~destElem;
2061 fpscr.qc = 1;
2062 }
2063 FpscrQc = fpscr;
2064 '''
2065 threeRegLongInstX("sqdmlal", "SqdmlalElemX", "SimdMultAccOp",
2066 ("int16_t", "int32_t"), qdmlalCode, True, byElem=True)
2067 threeRegLongInstX("sqdmlal", "SqdmlalElem2X", "SimdMultAccOp",
2068 ("int16_t", "int32_t"), qdmlalCode, True, byElem=True,
2069 hi=True)
2070 threeRegLongInstX("sqdmlal", "SqdmlalElemScX", "SimdMultAccOp",
2071 ("int16_t", "int32_t"), qdmlalCode, True, byElem=True,
2072 scalar=True)
2073 # SQDMLAL, SQDMLAL2 (vector)
2074 threeRegLongInstX("sqdmlal", "SqdmlalX", "SimdMultAccOp",
2075 ("int16_t", "int32_t"), qdmlalCode, True)
2076 threeRegLongInstX("sqdmlal", "Sqdmlal2X", "SimdMultAccOp",
2077 ("int16_t", "int32_t"), qdmlalCode, True, hi=True)
2078 threeRegLongInstX("sqdmlal", "SqdmlalScX", "SimdMultAccOp",
2079 ("int16_t", "int32_t"), qdmlalCode, True, scalar=True)
2080 # SQDMLSL, SQDMLSL2 (by element)
2081 qdmlslCode = '''
2082 FPSCR fpscr = (FPSCR) FpscrQc;
2083 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2084 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2085 Element halfNeg = maxNeg / 2;
2086 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2087 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2088 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2089 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2090 fpscr.qc = 1;
2091 }
2092 bool negPreDest = ltz(destElem);
2093 destElem -= midElem;
2094 bool negDest = ltz(destElem);
2095 bool posMid = ltz((BigElement)-midElem);
2096 if (negPreDest == posMid && posMid != negDest) {
2097 destElem = mask(sizeof(BigElement) * 8 - 1);
2098 if (negPreDest)
2099 destElem = ~destElem;
2100 fpscr.qc = 1;
2101 }
2102 FpscrQc = fpscr;
2103 '''
2104 threeRegLongInstX("sqdmlsl", "SqdmlslElemX", "SimdMultAccOp",
2105 ("int16_t", "int32_t"), qdmlslCode, True, byElem=True)
2106 threeRegLongInstX("sqdmlsl", "SqdmlslElem2X", "SimdMultAccOp",
2107 ("int16_t", "int32_t"), qdmlslCode, True, byElem=True,
2108 hi=True)
2109 threeRegLongInstX("sqdmlsl", "SqdmlslElemScX", "SimdMultAccOp",
2110 ("int16_t", "int32_t"), qdmlslCode, True, byElem=True,
2111 scalar=True)
2112 # SQDMLSL, SQDMLSL2 (vector)
2113 threeRegLongInstX("sqdmlsl", "SqdmlslX", "SimdMultAccOp",
2114 ("int16_t", "int32_t"), qdmlslCode, True)
2115 threeRegLongInstX("sqdmlsl", "Sqdmlsl2X", "SimdMultAccOp",
2116 ("int16_t", "int32_t"), qdmlslCode, True, hi=True)
2117 threeRegLongInstX("sqdmlsl", "SqdmlslScX", "SimdMultAccOp",
2118 ("int16_t", "int32_t"), qdmlslCode, True, scalar=True)
2119 # SQDMULH (by element)
2120 sqdmulhCode = '''
2121 FPSCR fpscr = (FPSCR) FpscrQc;
2122 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2123 (sizeof(Element) * 8);
2124 if (srcElem1 == srcElem2 &&
2125 srcElem1 == (Element)((Element)1 <<
2126 (sizeof(Element) * 8 - 1))) {
2127 destElem = ~srcElem1;
2128 fpscr.qc = 1;
2129 }
2130 FpscrQc = fpscr;
2131 '''
2132 threeEqualRegInstX("sqdmulh", "SqdmulhElemDX", "SimdMultOp",
2133 ("int16_t", "int32_t"), 2, sqdmulhCode, byElem=True)
2134 threeEqualRegInstX("sqdmulh", "SqdmulhElemQX", "SimdMultOp",
2135 ("int16_t", "int32_t"), 4, sqdmulhCode, byElem=True)
2136 threeEqualRegInstX("sqdmulh", "SqdmulhElemScX", "SimdMultOp",
2137 ("int16_t", "int32_t"), 4, sqdmulhCode, byElem=True,
2138 scalar=True)
2139 # SQDMULH (vector)
2140 threeEqualRegInstX("sqdmulh", "SqdmulhDX", "SimdMultOp",
2141 ("int16_t", "int32_t"), 2, sqdmulhCode)
2142 threeEqualRegInstX("sqdmulh", "SqdmulhQX", "SimdMultOp",
2143 ("int16_t", "int32_t"), 4, sqdmulhCode)
2144 threeEqualRegInstX("sqdmulh", "SqdmulhScX", "SimdMultOp",
2145 ("int16_t", "int32_t"), 4, sqdmulhCode, scalar=True)
2146 # SQDMULL, SQDMULL2 (by element)
2147 qdmullCode = '''
2148 FPSCR fpscr = (FPSCR) FpscrQc;
2149 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2150 if (srcElem1 == srcElem2 &&
2151 srcElem1 == (Element)((Element)1 <<
2152 (Element)(sizeof(Element) * 8 - 1))) {
2153 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2154 fpscr.qc = 1;
2155 }
2156 FpscrQc = fpscr;
2157 '''
2158 threeRegLongInstX("sqdmull", "SqdmullElemX", "SimdMultOp",
2159 ("int16_t", "int32_t"), qdmullCode, True, byElem=True)
2160 threeRegLongInstX("sqdmull", "SqdmullElem2X", "SimdMultOp",
2161 ("int16_t", "int32_t"), qdmullCode, True, byElem=True,
2162 hi=True)
2163 threeRegLongInstX("sqdmull", "SqdmullElemScX", "SimdMultOp",
2164 ("int16_t", "int32_t"), qdmullCode, True, byElem=True,
2165 scalar=True)
2166 # SQDMULL, SQDMULL2 (vector)
2167 threeRegLongInstX("sqdmull", "SqdmullX", "SimdMultOp",
2168 ("int16_t", "int32_t"), qdmullCode, True)
2169 threeRegLongInstX("sqdmull", "Sqdmull2X", "SimdMultOp",
2170 ("int16_t", "int32_t"), qdmullCode, True, hi=True)
2171 threeRegLongInstX("sqdmull", "SqdmullScX", "SimdMultOp",
2172 ("int16_t", "int32_t"), qdmullCode, True, scalar=True)
2173 # SQNEG
2174 sqnegCode = '''
2175 FPSCR fpscr = (FPSCR) FpscrQc;
2176 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2177 fpscr.qc = 1;
2178 destElem = ~srcElem1;
2179 } else {
2180 destElem = -srcElem1;
2181 }
2182 FpscrQc = fpscr;
2183 '''
2184 twoEqualRegInstX("sqneg", "SqnegDX", "SimdAluOp", smallSignedTypes, 2,
2185 sqnegCode)
2186 twoEqualRegInstX("sqneg", "SqnegQX", "SimdAluOp", signedTypes, 4,
2187 sqnegCode)
2188 twoEqualRegInstX("sqneg", "SqnegScX", "SimdAluOp", signedTypes, 4,
2189 sqnegCode, scalar=True)
2190 # SQRDMULH (by element)
2191 sqrdmulhCode = '''
2192 FPSCR fpscr = (FPSCR) FpscrQc;
2193 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2194 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2195 (sizeof(Element) * 8);
2196 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2197 Element halfNeg = maxNeg / 2;
2198 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2199 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2200 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2201 if (destElem < 0) {
2202 destElem = mask(sizeof(Element) * 8 - 1);
2203 } else {
2204 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2205 }
2206 fpscr.qc = 1;
2207 }
2208 FpscrQc = fpscr;
2209 '''
2210 threeEqualRegInstX("sqrdmulh", "SqrdmulhElemDX", "SimdMultOp",
2211 ("int16_t", "int32_t"), 2, sqrdmulhCode, byElem=True)
2212 threeEqualRegInstX("sqrdmulh", "SqrdmulhElemQX", "SimdMultOp",
2213 ("int16_t", "int32_t"), 4, sqrdmulhCode, byElem=True)
2214 threeEqualRegInstX("sqrdmulh", "SqrdmulhElemScX", "SimdMultOp",
2215 ("int16_t", "int32_t"), 4, sqrdmulhCode, byElem=True,
2216 scalar=True)
2217 # SQRDMULH (vector)
2218 threeEqualRegInstX("sqrdmulh", "SqrdmulhDX", "SimdMultOp",
2219 ("int16_t", "int32_t"), 2, sqrdmulhCode)
2220 threeEqualRegInstX("sqrdmulh", "SqrdmulhQX", "SimdMultOp",
2221 ("int16_t", "int32_t"), 4, sqrdmulhCode)
2222 threeEqualRegInstX("sqrdmulh", "SqrdmulhScX", "SimdMultOp",
2223 ("int16_t", "int32_t"), 4, sqrdmulhCode, scalar=True)
2224 # SQRSHL
2225 sqrshlCode = '''
2226 int16_t shiftAmt = (int8_t)srcElem2;
2227 FPSCR fpscr = (FPSCR) FpscrQc;
2228 if (shiftAmt < 0) {
2229 shiftAmt = -shiftAmt;
2230 Element rBit = 0;
2231 if (shiftAmt <= sizeof(Element) * 8)
2232 rBit = bits(srcElem1, shiftAmt - 1);
2233 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
2234 rBit = 1;
2235 if (shiftAmt >= sizeof(Element) * 8) {
2236 shiftAmt = sizeof(Element) * 8 - 1;
2237 destElem = 0;
2238 } else {
2239 destElem = (srcElem1 >> shiftAmt);
2240 }
2241 // Make sure the right shift sign extended when it should.
2242 if (srcElem1 < 0 && destElem >= 0) {
2243 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2244 1 - shiftAmt));
2245 }
2246 destElem += rBit;
2247 } else if (shiftAmt > 0) {
2248 bool sat = false;
2249 if (shiftAmt >= sizeof(Element) * 8) {
2250 if (srcElem1 != 0)
2251 sat = true;
2252 else
2253 destElem = 0;
2254 } else {
2255 if (bits((uint64_t) srcElem1, sizeof(Element) * 8 - 1,
2256 sizeof(Element) * 8 - 1 - shiftAmt) !=
2257 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2258 sat = true;
2259 } else {
2260 destElem = srcElem1 << shiftAmt;
2261 }
2262 }
2263 if (sat) {
2264 fpscr.qc = 1;
2265 destElem = mask(sizeof(Element) * 8 - 1);
2266 if (srcElem1 < 0)
2267 destElem = ~destElem;
2268 }
2269 } else {
2270 destElem = srcElem1;
2271 }
2272 FpscrQc = fpscr;
2273 '''
2274 threeEqualRegInstX("sqrshl", "SqrshlDX", "SimdCmpOp", smallSignedTypes, 2,
2275 sqrshlCode)
2276 threeEqualRegInstX("sqrshl", "SqrshlQX", "SimdCmpOp", signedTypes, 4,
2277 sqrshlCode)
2278 threeEqualRegInstX("sqrshl", "SqrshlScX", "SimdCmpOp", signedTypes, 4,
2279 sqrshlCode, scalar=True)
2280 # SQRSHRN, SQRSHRN2
2281 sqrshrnCode = '''
2282 FPSCR fpscr = (FPSCR) FpscrQc;
2283 if (imm > sizeof(srcElem1) * 8) {
2284 if (srcElem1 != 0 && srcElem1 != -1)
2285 fpscr.qc = 1;
2286 destElem = 0;
2287 } else if (imm) {
2288 BigElement mid = (srcElem1 >> (imm - 1));
2289 uint64_t rBit = mid & 0x1;
2290 mid >>= 1;
2291 mid |= -(mid & ((BigElement)1 <<
2292 (sizeof(BigElement) * 8 - 1 - imm)));
2293 mid += rBit;
2294 if (mid != (Element)mid) {
2295 destElem = mask(sizeof(Element) * 8 - 1);
2296 if (srcElem1 < 0)
2297 destElem = ~destElem;
2298 fpscr.qc = 1;
2299 } else {
2300 destElem = mid;
2301 }
2302 } else {
2303 if (srcElem1 != (Element)srcElem1) {
2304 destElem = mask(sizeof(Element) * 8 - 1);
2305 if (srcElem1 < 0)
2306 destElem = ~destElem;
2307 fpscr.qc = 1;
2308 } else {
2309 destElem = srcElem1;
2310 }
2311 }
2312 FpscrQc = fpscr;
2313 '''
2314 twoRegNarrowInstX("sqrshrn", "SqrshrnX", "SimdShiftOp", smallSignedTypes,
2315 sqrshrnCode, hasImm=True)
2316 twoRegNarrowInstX("sqrshrn2", "Sqrshrn2X", "SimdShiftOp", smallSignedTypes,
2317 sqrshrnCode, hasImm=True, hi=True)
2318 twoRegNarrowInstX("sqrshrn", "SqrshrnScX", "SimdShiftOp", smallSignedTypes,
2319 sqrshrnCode, hasImm=True, scalar=True)
2320 # SQRSHRUN, SQRSHRUN2
2321 sqrshrunCode = '''
2322 FPSCR fpscr = (FPSCR) FpscrQc;
2323 if (imm > sizeof(srcElem1) * 8) {
2324 if (srcElem1 != 0)
2325 fpscr.qc = 1;
2326 destElem = 0;
2327 } else if (imm) {
2328 BigElement mid = (srcElem1 >> (imm - 1));
2329 uint64_t rBit = mid & 0x1;
2330 mid >>= 1;
2331 mid |= -(mid & ((BigElement)1 <<
2332 (sizeof(BigElement) * 8 - 1 - imm)));
2333 mid += rBit;
2334 if (bits(mid, sizeof(BigElement) * 8 - 1,
2335 sizeof(Element) * 8) != 0) {
2336 if (srcElem1 < 0) {
2337 destElem = 0;
2338 } else {
2339 destElem = mask(sizeof(Element) * 8);
2340 }
2341 fpscr.qc = 1;
2342 } else {
2343 destElem = mid;
2344 }
2345 } else {
2346 if (srcElem1 < 0) {
2347 fpscr.qc = 1;
2348 destElem = 0;
2349 } else {
2350 destElem = srcElem1;
2351 }
2352 }
2353 FpscrQc = fpscr;
2354 '''
2355 twoRegNarrowInstX("sqrshrun", "SqrshrunX", "SimdShiftOp", smallSignedTypes,
2356 sqrshrunCode, hasImm=True)
2357 twoRegNarrowInstX("sqrshrun", "Sqrshrun2X", "SimdShiftOp",
2358 smallSignedTypes, sqrshrunCode, hasImm=True, hi=True)
2359 twoRegNarrowInstX("sqrshrun", "SqrshrunScX", "SimdShiftOp",
2360 smallSignedTypes, sqrshrunCode, hasImm=True, scalar=True)
2361 # SQSHL (immediate)
2362 sqshlImmCode = '''
2363 FPSCR fpscr = (FPSCR) FpscrQc;
2364 if (imm >= sizeof(Element) * 8) {
2365 if (srcElem1 != 0) {
2366 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2367 if (srcElem1 > 0)
2368 destElem = ~destElem;
2369 fpscr.qc = 1;
2370 } else {
2371 destElem = 0;
2372 }
2373 } else if (imm) {
2374 destElem = (srcElem1 << imm);
2375 uint64_t topBits = bits((uint64_t)srcElem1,
2376 sizeof(Element) * 8 - 1,
2377 sizeof(Element) * 8 - 1 - imm);
2378 if (topBits != 0 && topBits != mask(imm + 1)) {
2379 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2380 if (srcElem1 > 0)
2381 destElem = ~destElem;
2382 fpscr.qc = 1;
2383 }
2384 } else {
2385 destElem = srcElem1;
2386 }
2387 FpscrQc = fpscr;
2388 '''
2389 twoEqualRegInstX("sqshl", "SqshlImmDX", "SimdAluOp", smallSignedTypes, 2,
2390 sqshlImmCode, hasImm=True)
2391 twoEqualRegInstX("sqshl", "SqshlImmQX", "SimdAluOp", signedTypes, 4,
2392 sqshlImmCode, hasImm=True)
2393 twoEqualRegInstX("sqshl", "SqshlImmScX", "SimdAluOp", signedTypes, 4,
2394 sqshlImmCode, hasImm=True, scalar=True)
2395 # SQSHL (register)
2396 sqshlCode = '''
2397 int16_t shiftAmt = (int8_t)srcElem2;
2398 FPSCR fpscr = (FPSCR) FpscrQc;
2399 if (shiftAmt < 0) {
2400 shiftAmt = -shiftAmt;
2401 if (shiftAmt >= sizeof(Element) * 8) {
2402 shiftAmt = sizeof(Element) * 8 - 1;
2403 destElem = 0;
2404 } else {
2405 destElem = (srcElem1 >> shiftAmt);
2406 }
2407 // Make sure the right shift sign extended when it should.
2408 if (srcElem1 < 0 && destElem >= 0) {
2409 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2410 1 - shiftAmt));
2411 }
2412 } else if (shiftAmt > 0) {
2413 bool sat = false;
2414 if (shiftAmt >= sizeof(Element) * 8) {
2415 if (srcElem1 != 0)
2416 sat = true;
2417 else
2418 destElem = 0;
2419 } else {
2420 if (bits((uint64_t) srcElem1, sizeof(Element) * 8 - 1,
2421 sizeof(Element) * 8 - 1 - shiftAmt) !=
2422 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2423 sat = true;
2424 } else {
2425 destElem = srcElem1 << shiftAmt;
2426 }
2427 }
2428 if (sat) {
2429 fpscr.qc = 1;
2430 destElem = mask(sizeof(Element) * 8 - 1);
2431 if (srcElem1 < 0)
2432 destElem = ~destElem;
2433 }
2434 } else {
2435 destElem = srcElem1;
2436 }
2437 FpscrQc = fpscr;
2438 '''
2439 threeEqualRegInstX("sqshl", "SqshlDX", "SimdAluOp", smallSignedTypes, 2,
2440 sqshlCode)
2441 threeEqualRegInstX("sqshl", "SqshlQX", "SimdAluOp", signedTypes, 4,
2442 sqshlCode)
2443 threeEqualRegInstX("sqshl", "SqshlScX", "SimdAluOp", signedTypes, 4,
2444 sqshlCode, scalar=True)
2445 # SQSHLU
2446 sqshluCode = '''
2447 FPSCR fpscr = (FPSCR) FpscrQc;
2448 if (imm >= sizeof(Element) * 8) {
2449 if (srcElem1 < 0) {
2450 destElem = 0;
2451 fpscr.qc = 1;
2452 } else if (srcElem1 > 0) {
2453 destElem = mask(sizeof(Element) * 8);
2454 fpscr.qc = 1;
2455 } else {
2456 destElem = 0;
2457 }
2458 } else if (imm) {
2459 destElem = (srcElem1 << imm);
2460 uint64_t topBits = bits((uint64_t)srcElem1,
2461 sizeof(Element) * 8 - 1,
2462 sizeof(Element) * 8 - imm);
2463 if (srcElem1 < 0) {
2464 destElem = 0;
2465 fpscr.qc = 1;
2466 } else if (topBits != 0) {
2467 destElem = mask(sizeof(Element) * 8);
2468 fpscr.qc = 1;
2469 }
2470 } else {
2471 if (srcElem1 < 0) {
2472 fpscr.qc = 1;
2473 destElem = 0;
2474 } else {
2475 destElem = srcElem1;
2476 }
2477 }
2478 FpscrQc = fpscr;
2479 '''
2480 twoEqualRegInstX("sqshlu", "SqshluDX", "SimdAluOp", smallSignedTypes, 2,
2481 sqshluCode, hasImm=True)
2482 twoEqualRegInstX("sqshlu", "SqshluQX", "SimdAluOp", signedTypes, 4,
2483 sqshluCode, hasImm=True)
2484 twoEqualRegInstX("sqshlu", "SqshluScX", "SimdAluOp", signedTypes, 4,
2485 sqshluCode, hasImm=True, scalar=True)
2486 # SQSHRN, SQSHRN2
2487 sqshrnCode = '''
2488 FPSCR fpscr = (FPSCR) FpscrQc;
2489 if (imm > sizeof(srcElem1) * 8) {
2490 if (srcElem1 != 0 && srcElem1 != -1)
2491 fpscr.qc = 1;
2492 destElem = 0;
2493 } else if (imm) {
2494 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2495 mid |= -(mid & ((BigElement)1 <<
2496 (sizeof(BigElement) * 8 - 1 - imm)));
2497 if (mid != (Element)mid) {
2498 destElem = mask(sizeof(Element) * 8 - 1);
2499 if (srcElem1 < 0)
2500 destElem = ~destElem;
2501 fpscr.qc = 1;
2502 } else {
2503 destElem = mid;
2504 }
2505 } else {
2506 destElem = srcElem1;
2507 }
2508 FpscrQc = fpscr;
2509 '''
2510 twoRegNarrowInstX("sqshrn", "SqshrnX", "SimdShiftOp", smallSignedTypes,
2511 sqshrnCode, hasImm=True)
2512 twoRegNarrowInstX("sqshrn2", "Sqshrn2X", "SimdShiftOp", smallSignedTypes,
2513 sqshrnCode, hasImm=True, hi=True)
2514 twoRegNarrowInstX("sqshrn", "SqshrnScX", "SimdShiftOp", smallSignedTypes,
2515 sqshrnCode, hasImm=True, scalar=True)
2516 # SQSHRUN, SQSHRUN2
2517 sqshrunCode = '''
2518 FPSCR fpscr = (FPSCR) FpscrQc;
2519 if (imm > sizeof(srcElem1) * 8) {
2520 if (srcElem1 != 0)
2521 fpscr.qc = 1;
2522 destElem = 0;
2523 } else if (imm) {
2524 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2525 if (bits(mid, sizeof(BigElement) * 8 - 1,
2526 sizeof(Element) * 8) != 0) {
2527 if (srcElem1 < 0) {
2528 destElem = 0;
2529 } else {
2530 destElem = mask(sizeof(Element) * 8);
2531 }
2532 fpscr.qc = 1;
2533 } else {
2534 destElem = mid;
2535 }
2536 } else {
2537 destElem = srcElem1;
2538 }
2539 FpscrQc = fpscr;
2540 '''
2541 twoRegNarrowInstX("sqshrun", "SqshrunX", "SimdShiftOp", smallSignedTypes,
2542 sqshrunCode, hasImm=True)
2543 twoRegNarrowInstX("sqshrun", "Sqshrun2X", "SimdShiftOp", smallSignedTypes,
2544 sqshrunCode, hasImm=True, hi=True)
2545 twoRegNarrowInstX("sqshrun", "SqshrunScX", "SimdShiftOp", smallSignedTypes,
2546 sqshrunCode, hasImm=True, scalar=True)
2547 # SQSUB
2548 sqsubCode = '''
2549 destElem = srcElem1 - srcElem2;
2550 FPSCR fpscr = (FPSCR) FpscrQc;
2551 bool negDest = (destElem < 0);
2552 bool negSrc1 = (srcElem1 < 0);
2553 bool posSrc2 = (srcElem2 >= 0);
2554 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
2555 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2556 if (negDest)
2557 destElem -= 1;
2558 fpscr.qc = 1;
2559 }
2560 FpscrQc = fpscr;
2561 '''
2562 threeEqualRegInstX("sqsub", "SqsubDX", "SimdAddOp", smallSignedTypes, 2,
2563 sqsubCode)
2564 threeEqualRegInstX("sqsub", "SqsubQX", "SimdAddOp", signedTypes, 4,
2565 sqsubCode)
2566 threeEqualRegInstX("sqsub", "SqsubScX", "SimdAddOp", signedTypes, 4,
2567 sqsubCode, scalar=True)
2568 # SQXTN, SQXTN2
2569 sqxtnCode = '''
2570 FPSCR fpscr = (FPSCR) FpscrQc;
2571 destElem = srcElem1;
2572 if ((BigElement)destElem != srcElem1) {
2573 fpscr.qc = 1;
2574 destElem = mask(sizeof(Element) * 8 - 1);
2575 if (srcElem1 < 0)
2576 destElem = ~destElem;
2577 }
2578 FpscrQc = fpscr;
2579 '''
2580 twoRegNarrowInstX("sqxtn", "SqxtnX", "SimdMiscOp", smallSignedTypes,
2581 sqxtnCode)
2582 twoRegNarrowInstX("sqxtn", "Sqxtn2X", "SimdMiscOp", smallSignedTypes,
2583 sqxtnCode, hi=True)
2584 twoRegNarrowInstX("sqxtn", "SqxtnScX", "SimdMiscOp", smallSignedTypes,
2585 sqxtnCode, scalar=True)
2586 # SQXTUN, SQXTUN2
2587 sqxtunCode = '''
2588 FPSCR fpscr = (FPSCR) FpscrQc;
2589 destElem = srcElem1;
2590 if (srcElem1 < 0 ||
2591 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
2592 fpscr.qc = 1;
2593 destElem = mask(sizeof(Element) * 8);
2594 if (srcElem1 < 0)
2595 destElem = ~destElem;
2596 }
2597 FpscrQc = fpscr;
2598 '''
2599 twoRegNarrowInstX("sqxtun", "SqxtunX", "SimdMiscOp", smallSignedTypes,
2600 sqxtunCode)
2601 twoRegNarrowInstX("sqxtun", "Sqxtun2X", "SimdMiscOp", smallSignedTypes,
2602 sqxtunCode, hi=True)
2603 twoRegNarrowInstX("sqxtun", "SqxtunScX", "SimdMiscOp", smallSignedTypes,
2604 sqxtunCode, scalar=True)
2605 # SRHADD
2606 rhaddCode = '''
2607 Element carryBit =
2608 (((unsigned)srcElem1 & 0x1) +
2609 ((unsigned)srcElem2 & 0x1) + 1) >> 1;
2610 // Use division instead of a shift to ensure the sign extension works
2611 // right. The compiler will figure out if it can be a shift. Mask the
2612 // inputs so they get truncated correctly.
2613 destElem = (((srcElem1 & ~(Element)1) / 2) +
2614 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
2615 '''
2616 threeEqualRegInstX("srhadd", "SrhaddDX", "SimdAddOp", smallSignedTypes, 2,
2617 rhaddCode)
2618 threeEqualRegInstX("srhadd", "SrhaddQX", "SimdAddOp", smallSignedTypes, 4,
2619 rhaddCode)
2620 # SRI
2621 sriCode = '''
2622 if (imm >= sizeof(Element) * 8)
2623 destElem = destElem;
2624 else
2625 destElem = (srcElem1 >> imm) |
2626 (destElem & ~mask(sizeof(Element) * 8 - imm));
2627 '''
2628 twoEqualRegInstX("sri", "SriDX", "SimdShiftOp", unsignedTypes, 2, sriCode,
2629 True, hasImm=True)
2630 twoEqualRegInstX("sri", "SriQX", "SimdShiftOp", unsignedTypes, 4, sriCode,
2631 True, hasImm=True)
2632 # SRSHL
2633 rshlCode = '''
2634 int16_t shiftAmt = (int8_t)srcElem2;
2635 if (shiftAmt < 0) {
2636 shiftAmt = -shiftAmt;
2637 Element rBit = 0;
2638 if (shiftAmt <= sizeof(Element) * 8)
2639 rBit = bits(srcElem1, shiftAmt - 1);
2640 if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
2641 rBit = 1;
2642 if (shiftAmt >= sizeof(Element) * 8) {
2643 shiftAmt = sizeof(Element) * 8 - 1;
2644 destElem = 0;
2645 } else {
2646 destElem = (srcElem1 >> shiftAmt);
2647 }
2648 // Make sure the right shift sign extended when it should.
2649 if (ltz(srcElem1) && !ltz(destElem)) {
2650 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2651 1 - shiftAmt));
2652 }
2653 destElem += rBit;
2654 } else if (shiftAmt > 0) {
2655 if (shiftAmt >= sizeof(Element) * 8) {
2656 destElem = 0;
2657 } else {
2658 destElem = srcElem1 << shiftAmt;
2659 }
2660 } else {
2661 destElem = srcElem1;
2662 }
2663 '''
2664 threeEqualRegInstX("srshl", "SrshlDX", "SimdShiftOp", signedTypes, 2,
2665 rshlCode)
2666 threeEqualRegInstX("srshl", "SrshlQX", "SimdShiftOp", signedTypes, 4,
2667 rshlCode)
2668 # SRSHR
2669 rshrCode = '''
2670 if (imm > sizeof(srcElem1) * 8) {
2671 destElem = 0;
2672 } else if (imm) {
2673 Element rBit = bits(srcElem1, imm - 1);
2674 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2675 } else {
2676 destElem = srcElem1;
2677 }
2678 '''
2679 twoEqualRegInstX("srshr", "SrshrDX", "SimdShiftOp", signedTypes, 2,
2680 rshrCode, hasImm=True)
2681 twoEqualRegInstX("srshr", "SrshrQX", "SimdShiftOp", signedTypes, 4,
2682 rshrCode, hasImm=True)
2683 # SRSRA
2684 rsraCode = '''
2685 if (imm > sizeof(srcElem1) * 8) {
2686 destElem += 0;
2687 } else if (imm) {
2688 Element rBit = bits(srcElem1, imm - 1);
2689 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2690 } else {
2691 destElem += srcElem1;
2692 }
2693 '''
2694 twoEqualRegInstX("srsra", "SrsraDX", "SimdShiftOp", signedTypes, 2,
2695 rsraCode, True, hasImm=True)
2696 twoEqualRegInstX("srsra", "SrsraQX", "SimdShiftOp", signedTypes, 4,
2697 rsraCode, True, hasImm=True)
2698 # SSHL
2699 shlCode = '''
2700 int16_t shiftAmt = (int8_t)srcElem2;
2701 if (shiftAmt < 0) {
2702 shiftAmt = -shiftAmt;
2703 if (shiftAmt >= sizeof(Element) * 8) {
2704 shiftAmt = sizeof(Element) * 8 - 1;
2705 destElem = 0;
2706 } else {
2707 destElem = (srcElem1 >> shiftAmt);
2708 }
2709 // Make sure the right shift sign extended when it should.
2710 if (ltz(srcElem1) && !ltz(destElem)) {
2711 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2712 1 - shiftAmt));
2713 }
2714 } else {
2715 if (shiftAmt >= sizeof(Element) * 8) {
2716 destElem = 0;
2717 } else {
2718 destElem = srcElem1 << shiftAmt;
2719 }
2720 }
2721 '''
2722 threeEqualRegInstX("sshl", "SshlDX", "SimdShiftOp", signedTypes, 2,
2723 shlCode)
2724 threeEqualRegInstX("sshl", "SshlQX", "SimdShiftOp", signedTypes, 4,
2725 shlCode)
2726 # SSHLL, SSHLL2
2727 shllCode = '''
2728 if (imm >= sizeof(destElem) * 8) {
2729 destElem = 0;
2730 } else {
2731 destElem = (BigElement)srcElem1 << imm;
2732 }
2733 '''
2734 twoRegLongInstX("sshll", "SshllX", "SimdShiftOp", smallSignedTypes,
2735 shllCode, hasImm=True)
2736 twoRegLongInstX("sshll", "Sshll2X", "SimdShiftOp", smallSignedTypes,
2737 shllCode, hasImm=True, hi=True)
2738 # SSHR
2739 shrCode = '''
2740 if (imm >= sizeof(srcElem1) * 8) {
2741 if (ltz(srcElem1))
2742 destElem = -1;
2743 else
2744 destElem = 0;
2745 } else {
2746 destElem = srcElem1 >> imm;
2747 }
2748 '''
2749 twoEqualRegInstX("sshr", "SshrDX", "SimdShiftOp", signedTypes, 2, shrCode,
2750 hasImm=True)
2751 twoEqualRegInstX("sshr", "SshrQX", "SimdShiftOp", signedTypes, 4, shrCode,
2752 hasImm=True)
2753 # SSRA
2754 sraCode = '''
2755 Element mid;;
2756 if (imm >= sizeof(srcElem1) * 8) {
2757 mid = ltz(srcElem1) ? -1 : 0;
2758 } else {
2759 mid = srcElem1 >> imm;
2760 if (ltz(srcElem1) && !ltz(mid)) {
2761 mid |= -(mid & ((Element)1 <<
2762 (sizeof(Element) * 8 - 1 - imm)));
2763 }
2764 }
2765 destElem += mid;
2766 '''
2767 twoEqualRegInstX("ssra", "SsraDX", "SimdShiftOp", signedTypes, 2, sraCode,
2768 True, hasImm=True)
2769 twoEqualRegInstX("ssra", "SsraQX", "SimdShiftOp", signedTypes, 4, sraCode,
2770 True, hasImm=True)
2771 # SSUBL
2772 sublwCode = "destElem = (BigElement)srcElem1 - (BigElement)srcElem2;"
2773 threeRegLongInstX("ssubl", "SsublX", "SimdAddOp", smallSignedTypes,
2774 sublwCode)
2775 threeRegLongInstX("ssubl2", "Ssubl2X", "SimdAddOp", smallSignedTypes,
2776 sublwCode, hi=True)
2777 # SSUBW
2778 threeRegWideInstX("ssubw", "SsubwX", "SimdAddOp", smallSignedTypes,
2779 sublwCode)
2780 threeRegWideInstX("ssubw2", "Ssubw2X", "SimdAddOp", smallSignedTypes,
2781 sublwCode, hi=True)
2782 # SUB
2783 subCode = "destElem = srcElem1 - srcElem2;"
2784 threeEqualRegInstX("sub", "SubDX", "SimdAddOp", unsignedTypes, 2, subCode)
2785 threeEqualRegInstX("sub", "SubQX", "SimdAddOp", unsignedTypes, 4, subCode)
2786 # SUBHN, SUBHN2
2787 subhnCode = '''
2788 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
2789 (sizeof(Element) * 8);
2790 '''
2791 threeRegNarrowInstX("subhn", "SubhnX", "SimdAddOp", smallUnsignedTypes,
2792 subhnCode)
2793 threeRegNarrowInstX("subhn2", "Subhn2X", "SimdAddOp", smallUnsignedTypes,
2794 subhnCode, hi=True)
2795 # SUQADD
2796 suqaddCode = '''
2797 FPSCR fpscr = (FPSCR) FpscrQc;
2798 Element tmp = destElem + srcElem1;
2799 if (bits(destElem, sizeof(Element) * 8 - 1) == 0) {
2800 if (bits(tmp, sizeof(Element) * 8 - 1) == 1 ||
2801 tmp < srcElem1 || tmp < destElem) {
2802 destElem = (((Element) 1) << (sizeof(Element) * 8 - 1)) - 1;
2803 fpscr.qc = 1;
2804 } else {
2805 destElem = tmp;
2806 }
2807 } else {
2808 Element absDestElem = (~destElem) + 1;
2809 if (absDestElem < srcElem1) {
2810 // Still check for positive sat., no need to check for negative sat.
2811 if (bits(tmp, sizeof(Element) * 8 - 1) == 1) {
2812 destElem = (((Element) 1) << (sizeof(Element) * 8 - 1)) - 1;
2813 fpscr.qc = 1;
2814 } else {
2815 destElem = tmp;
2816 }
2817 } else {
2818 destElem = tmp;
2819 }
2820 }
2821 FpscrQc = fpscr;
2822 '''
2823 twoEqualRegInstX("suqadd", "SuqaddDX", "SimdAddOp", smallUnsignedTypes, 2,
2824 suqaddCode, True)
2825 twoEqualRegInstX("suqadd", "SuqaddQX", "SimdAddOp", unsignedTypes, 4,
2826 suqaddCode, True)
2827 twoEqualRegInstX("suqadd", "SuqaddScX", "SimdAddOp", unsignedTypes, 4,
2828 suqaddCode, True, scalar=True)
2829 # SXTL -> alias to SSHLL
2830 # TBL
2831 tbxTblInstX("tbl", "Tbl1DX", "SimdMiscOp", ("uint8_t",), 1, "true", 2)
2832 tbxTblInstX("tbl", "Tbl1QX", "SimdMiscOp", ("uint8_t",), 1, "true", 4)
2833 tbxTblInstX("tbl", "Tbl2DX", "SimdMiscOp", ("uint8_t",), 2, "true", 2)
2834 tbxTblInstX("tbl", "Tbl2QX", "SimdMiscOp", ("uint8_t",), 2, "true", 4)
2835 tbxTblInstX("tbl", "Tbl3DX", "SimdMiscOp", ("uint8_t",), 3, "true", 2)
2836 tbxTblInstX("tbl", "Tbl3QX", "SimdMiscOp", ("uint8_t",), 3, "true", 4)
2837 tbxTblInstX("tbl", "Tbl4DX", "SimdMiscOp", ("uint8_t",), 4, "true", 2)
2838 tbxTblInstX("tbl", "Tbl4QX", "SimdMiscOp", ("uint8_t",), 4, "true", 4)
2839 # TBX
2840 tbxTblInstX("tbx", "Tbx1DX", "SimdMiscOp", ("uint8_t",), 1, "false", 2)
2841 tbxTblInstX("tbx", "Tbx1QX", "SimdMiscOp", ("uint8_t",), 1, "false", 4)
2842 tbxTblInstX("tbx", "Tbx2DX", "SimdMiscOp", ("uint8_t",), 2, "false", 2)
2843 tbxTblInstX("tbx", "Tbx2QX", "SimdMiscOp", ("uint8_t",), 2, "false", 4)
2844 tbxTblInstX("tbx", "Tbx3DX", "SimdMiscOp", ("uint8_t",), 3, "false", 2)
2845 tbxTblInstX("tbx", "Tbx3QX", "SimdMiscOp", ("uint8_t",), 3, "false", 4)
2846 tbxTblInstX("tbx", "Tbx4DX", "SimdMiscOp", ("uint8_t",), 4, "false", 2)
2847 tbxTblInstX("tbx", "Tbx4QX", "SimdMiscOp", ("uint8_t",), 4, "false", 4)
2848 # TRN1
2849 trnCode = '''
2850 unsigned part = %s;
2851 for (unsigned i = 0; i < eCount / 2; i++) {
2852 destReg.elements[2 * i] = srcReg1.elements[2 * i + part];
2853 destReg.elements[2 * i + 1] = srcReg2.elements[2 * i + part];
2854 }
2855 '''
2856 threeRegScrambleInstX("trn1", "Trn1DX", "SimdAluOp", smallUnsignedTypes, 2,
2857 trnCode % "0")
2858 threeRegScrambleInstX("trn1", "Trn1QX", "SimdAluOp", unsignedTypes, 4,
2859 trnCode % "0")
2860 # TRN2
2861 threeRegScrambleInstX("trn2", "Trn2DX", "SimdAluOp", smallUnsignedTypes, 2,
2862 trnCode % "1")
2863 threeRegScrambleInstX("trn2", "Trn2QX", "SimdAluOp", unsignedTypes, 4,
2864 trnCode % "1")
2865 # UABA
2866 threeEqualRegInstX("uaba", "UabaDX", "SimdAddAccOp", smallUnsignedTypes, 2,
2867 abaCode, True)
2868 threeEqualRegInstX("uaba", "UabaQX", "SimdAddAccOp", smallUnsignedTypes, 4,
2869 abaCode, True)
2870 # UABAL, UABAL2
2871 threeRegLongInstX("uabal", "UabalX", "SimdAddAccOp", smallUnsignedTypes,
2872 abalCode, True)
2873 threeRegLongInstX("uabal2", "Uabal2X", "SimdAddAccOp", smallUnsignedTypes,
2874 abalCode, True, hi=True)
2875 # UABD
2876 threeEqualRegInstX("uabd", "UabdDX", "SimdAddOp", smallUnsignedTypes, 2,
2877 abdCode)
2878 threeEqualRegInstX("uabd", "UabdQX", "SimdAddOp", smallUnsignedTypes, 4,
2879 abdCode)
2880 # UABDL, UABDL2
2881 threeRegLongInstX("uabdl", "UabdlX", "SimdAddAccOp", smallUnsignedTypes,
2882 abdlCode, True)
2883 threeRegLongInstX("uabdl2", "Uabdl2X", "SimdAddAccOp", smallUnsignedTypes,
2884 abdlCode, True, hi=True)
2885 # UADALP
2886 twoRegCondenseInstX("uadalp", "UadalpDX", "SimdAddOp", smallUnsignedTypes,
2887 2, adalpCode, True)
2888 twoRegCondenseInstX("uadalp", "UadalpQX", "SimdAddOp", smallUnsignedTypes,
2889 4, adalpCode, True)
2890 # UADDL, UADDL2
2891 threeRegLongInstX("uaddl", "UaddlX", "SimdAddAccOp", smallUnsignedTypes,
2892 addlwCode)
2893 threeRegLongInstX("uaddl2", "Uaddl2X", "SimdAddAccOp", smallUnsignedTypes,
2894 addlwCode, hi=True)
2895 # UADDLP
2896 twoRegCondenseInstX("uaddlp", "UaddlpDX", "SimdAddOp", smallUnsignedTypes,
2897 2, addlwCode)
2898 twoRegCondenseInstX("uaddlp", "UaddlpQX", "SimdAddOp", smallUnsignedTypes,
2899 4, addlwCode)
2900 # UADDLV
2901 twoRegAcrossInstX("uaddlv", "UaddlvDX", "SimdAddOp",
2902 ("uint8_t", "uint16_t"), 2, addAcrossLongCode, long=True)
2903 twoRegAcrossInstX("uaddlv", "UaddlvQX", "SimdAddOp",
2904 ("uint8_t", "uint16_t"), 4, addAcrossLongCode, long=True)
2905 twoRegAcrossInstX("uaddlv", "UaddlvBQX", "SimdAddOp", ("uint32_t",), 4,
2906 addAcrossLongCode, doubleDest=True, long=True)
2907 # UADDW
2908 threeRegWideInstX("uaddw", "UaddwX", "SimdAddAccOp", smallUnsignedTypes,
2909 addlwCode)
2910 threeRegWideInstX("uaddw2", "Uaddw2X", "SimdAddAccOp", smallUnsignedTypes,
2911 addlwCode, hi=True)
2912 # UCVTF (fixed-point)
2913 ucvtfFixedCode = fpOp % ("fplibFixedToFP<Element>(srcElem1, imm, true,"
2914 " FPCRRounding(fpscr), fpscr)")
2915 twoEqualRegInstX("ucvtf", "UcvtfFixedDX", "SimdCvtOp", smallFloatTypes, 2,
2916 ucvtfFixedCode, hasImm=True)
2917 twoEqualRegInstX("ucvtf", "UcvtfFixedQX", "SimdCvtOp", floatTypes, 4,
2918 ucvtfFixedCode, hasImm=True)
2919 twoEqualRegInstX("ucvtf", "UcvtfFixedScX", "SimdCvtOp", floatTypes, 4,
2920 ucvtfFixedCode, hasImm=True, scalar=True)
2921 # UCVTF (integer)
2922 ucvtfIntCode = fpOp % ("fplibFixedToFP<Element>(srcElem1, 0, true,"
2923 " FPCRRounding(fpscr), fpscr)")
2924 twoEqualRegInstX("ucvtf", "UcvtfIntDX", "SimdCvtOp", smallFloatTypes, 2,
2925 ucvtfIntCode)
2926 twoEqualRegInstX("ucvtf", "UcvtfIntQX", "SimdCvtOp", floatTypes, 4,
2927 ucvtfIntCode)
2928 twoEqualRegInstX("ucvtf", "UcvtfIntScX", "SimdCvtOp", floatTypes, 4,
2929 ucvtfIntCode, scalar=True)
2930 # UHADD
2931 threeEqualRegInstX("uhadd", "UhaddDX", "SimdAddOp", smallUnsignedTypes, 2,
2932 haddCode)
2933 threeEqualRegInstX("uhadd", "UhaddQX", "SimdAddOp", smallUnsignedTypes, 4,
2934 haddCode)
2935 # UHSUB
2936 threeEqualRegInstX("uhsub", "UhsubDX", "SimdAddOp", smallUnsignedTypes, 2,
2937 hsubCode)
2938 threeEqualRegInstX("uhsub", "UhsubQX", "SimdAddOp", smallUnsignedTypes, 4,
2939 hsubCode)
2940 # UMAX
2941 threeEqualRegInstX("umax", "UmaxDX", "SimdCmpOp", smallUnsignedTypes, 2,
2942 maxCode)
2943 threeEqualRegInstX("umax", "UmaxQX", "SimdCmpOp", smallUnsignedTypes, 4,
2944 maxCode)
2945 # UMAXP
2946 threeEqualRegInstX("umaxp", "UmaxpDX", "SimdCmpOp", smallUnsignedTypes, 2,
2947 maxCode, pairwise=True)
2948 threeEqualRegInstX("umaxp", "UmaxpQX", "SimdCmpOp", smallUnsignedTypes, 4,
2949 maxCode, pairwise=True)
2950 # UMAXV
2951 twoRegAcrossInstX("umaxv", "UmaxvDX", "SimdCmpOp", ("uint8_t", "uint16_t"),
2952 2, maxAcrossCode)
2953 twoRegAcrossInstX("umaxv", "UmaxvQX", "SimdCmpOp", smallUnsignedTypes, 4,
2954 maxAcrossCode)
2955 # UMIN
2956 threeEqualRegInstX("umin", "UminDX", "SimdCmpOp", smallUnsignedTypes, 2,
2957 minCode)
2958 threeEqualRegInstX("umin", "UminQX", "SimdCmpOp", smallUnsignedTypes, 4,
2959 minCode)
2960 # UMINP
2961 threeEqualRegInstX("uminp", "UminpDX", "SimdCmpOp", smallUnsignedTypes, 2,
2962 minCode, pairwise=True)
2963 threeEqualRegInstX("uminp", "UminpQX", "SimdCmpOp", smallUnsignedTypes, 4,
2964 minCode, pairwise=True)
2965 # UMINV
2966 twoRegAcrossInstX("uminv", "UminvDX", "SimdCmpOp", ("uint8_t", "uint16_t"),
2967 2, minAcrossCode)
2968 twoRegAcrossInstX("uminv", "UminvQX", "SimdCmpOp", smallUnsignedTypes, 4,
2969 minAcrossCode)
2970 # UMLAL (by element)
2971 threeRegLongInstX("umlal", "UmlalElemX", "SimdMultAccOp",
2972 smallUnsignedTypes, mlalCode, True, byElem=True)
2973 threeRegLongInstX("umlal", "UmlalElem2X", "SimdMultAccOp",
2974 smallUnsignedTypes, mlalCode, True, byElem=True, hi=True)
2975 # UMLAL (vector)
2976 threeRegLongInstX("umlal", "UmlalX", "SimdMultAccOp", smallUnsignedTypes,
2977 mlalCode, True)
2978 threeRegLongInstX("umlal", "Umlal2X", "SimdMultAccOp", smallUnsignedTypes,
2979 mlalCode, True, hi=True)
2980 # UMLSL (by element)
2981 threeRegLongInstX("umlsl", "UmlslElemX", "SimdMultAccOp",
2982 smallUnsignedTypes, mlslCode, True, byElem=True)
2983 threeRegLongInstX("umlsl", "UmlslElem2X", "SimdMultAccOp",
2984 smallUnsignedTypes, mlslCode, True, byElem=True, hi=True)
2985 # UMLSL (vector)
2986 threeRegLongInstX("umlsl", "UmlslX", "SimdMultAccOp", smallUnsignedTypes,
2987 mlslCode, True)
2988 threeRegLongInstX("umlsl", "Umlsl2X", "SimdMultAccOp", smallUnsignedTypes,
2989 mlslCode, True, hi=True)
2990 # UMOV
2991 insToGprInstX("umov", "UmovWX", "SimdMiscOp", smallUnsignedTypes, 4, 'W')
2992 insToGprInstX("umov", "UmovXX", "SimdMiscOp", ("uint64_t",), 4, 'X')
2993 # UMULL, UMULL2 (by element)
2994 threeRegLongInstX("umull", "UmullElemX", "SimdMultOp", smallUnsignedTypes,
2995 mullCode, byElem=True)
2996 threeRegLongInstX("umull", "UmullElem2X", "SimdMultOp", smallUnsignedTypes,
2997 mullCode, byElem=True, hi=True)
2998 # UMULL, UMULL2 (vector)
2999 threeRegLongInstX("umull", "UmullX", "SimdMultOp", smallUnsignedTypes,
3000 mullCode)
3001 threeRegLongInstX("umull", "Umull2X", "SimdMultOp", smallUnsignedTypes,
3002 mullCode, hi=True)
3003 # UQADD
3004 uqaddCode = '''
3005 destElem = srcElem1 + srcElem2;
3006 FPSCR fpscr = (FPSCR) FpscrQc;
3007 if (destElem < srcElem1 || destElem < srcElem2) {
3008 destElem = (Element)(-1);
3009 fpscr.qc = 1;
3010 }
3011 FpscrQc = fpscr;
3012 '''
3013 threeEqualRegInstX("uqadd", "UqaddDX", "SimdAddOp", smallUnsignedTypes, 2,
3014 uqaddCode)
3015 threeEqualRegInstX("uqadd", "UqaddQX", "SimdAddOp", unsignedTypes, 4,
3016 uqaddCode)
3017 threeEqualRegInstX("uqadd", "UqaddScX", "SimdAddOp", unsignedTypes, 4,
3018 uqaddCode, scalar=True)
3019 # UQRSHL
3020 uqrshlCode = '''
3021 int16_t shiftAmt = (int8_t)srcElem2;
3022 FPSCR fpscr = (FPSCR) FpscrQc;
3023 if (shiftAmt < 0) {
3024 shiftAmt = -shiftAmt;
3025 Element rBit = 0;
3026 if (shiftAmt <= sizeof(Element) * 8)
3027 rBit = bits(srcElem1, shiftAmt - 1);
3028 if (shiftAmt >= sizeof(Element) * 8) {
3029 shiftAmt = sizeof(Element) * 8 - 1;
3030 destElem = 0;
3031 } else {
3032 destElem = (srcElem1 >> shiftAmt);
3033 }
3034 destElem += rBit;
3035 } else {
3036 if (shiftAmt >= sizeof(Element) * 8) {
3037 if (srcElem1 != 0) {
3038 destElem = mask(sizeof(Element) * 8);
3039 fpscr.qc = 1;
3040 } else {
3041 destElem = 0;
3042 }
3043 } else {
3044 if (bits(srcElem1, sizeof(Element) * 8 - 1,
3045 sizeof(Element) * 8 - shiftAmt)) {
3046 destElem = mask(sizeof(Element) * 8);
3047 fpscr.qc = 1;
3048 } else {
3049 destElem = srcElem1 << shiftAmt;
3050 }
3051 }
3052 }
3053 FpscrQc = fpscr;
3054 '''
3055 threeEqualRegInstX("uqrshl", "UqrshlDX", "SimdCmpOp", smallUnsignedTypes,
3056 2, uqrshlCode)
3057 threeEqualRegInstX("uqrshl", "UqrshlQX", "SimdCmpOp", unsignedTypes, 4,
3058 uqrshlCode)
3059 threeEqualRegInstX("uqrshl", "UqrshlScX", "SimdCmpOp", unsignedTypes, 4,
3060 uqrshlCode, scalar=True)
3061 # UQRSHRN
3062 uqrshrnCode = '''
3063 FPSCR fpscr = (FPSCR) FpscrQc;
3064 if (imm > sizeof(srcElem1) * 8) {
3065 if (srcElem1 != 0)
3066 fpscr.qc = 1;
3067 destElem = 0;
3068 } else if (imm) {
3069 BigElement mid = (srcElem1 >> (imm - 1));
3070 uint64_t rBit = mid & 0x1;
3071 mid >>= 1;
3072 mid += rBit;
3073 if (mid != (Element)mid) {
3074 destElem = mask(sizeof(Element) * 8);
3075 fpscr.qc = 1;
3076 } else {
3077 destElem = mid;
3078 }
3079 } else {
3080 if (srcElem1 != (Element)srcElem1) {
3081 destElem = mask(sizeof(Element) * 8 - 1);
3082 fpscr.qc = 1;
3083 } else {
3084 destElem = srcElem1;
3085 }
3086 }
3087 FpscrQc = fpscr;
3088 '''
3089 twoRegNarrowInstX("uqrshrn", "UqrshrnX", "SimdShiftOp", smallUnsignedTypes,
3090 uqrshrnCode, hasImm=True)
3091 twoRegNarrowInstX("uqrshrn2", "Uqrshrn2X", "SimdShiftOp",
3092 smallUnsignedTypes, uqrshrnCode, hasImm=True, hi=True)
3093 twoRegNarrowInstX("uqrshrn", "UqrshrnScX", "SimdShiftOp",
3094 smallUnsignedTypes, uqrshrnCode, hasImm=True,
3095 scalar=True)
3096 # UQSHL (immediate)
3097 uqshlImmCode = '''
3098 FPSCR fpscr = (FPSCR) FpscrQc;
3099 if (imm >= sizeof(Element) * 8) {
3100 if (srcElem1 != 0) {
3101 destElem = mask(sizeof(Element) * 8);
3102 fpscr.qc = 1;
3103 } else {
3104 destElem = 0;
3105 }
3106 } else if (imm) {
3107 destElem = (srcElem1 << imm);
3108 uint64_t topBits = bits((uint64_t)srcElem1,
3109 sizeof(Element) * 8 - 1,
3110 sizeof(Element) * 8 - imm);
3111 if (topBits != 0) {
3112 destElem = mask(sizeof(Element) * 8);
3113 fpscr.qc = 1;
3114 }
3115 } else {
3116 destElem = srcElem1;
3117 }
3118 FpscrQc = fpscr;
3119 '''
3120 twoEqualRegInstX("uqshl", "UqshlImmDX", "SimdAluOp", smallUnsignedTypes, 2,
3121 uqshlImmCode, hasImm=True)
3122 twoEqualRegInstX("uqshl", "UqshlImmQX", "SimdAluOp", unsignedTypes, 4,
3123 uqshlImmCode, hasImm=True)
3124 twoEqualRegInstX("uqshl", "UqshlImmScX", "SimdAluOp", unsignedTypes, 4,
3125 uqshlImmCode, hasImm=True, scalar=True)
3126 # UQSHL (register)
3127 uqshlCode = '''
3128 int16_t shiftAmt = (int8_t)srcElem2;
3129 FPSCR fpscr = (FPSCR) FpscrQc;
3130 if (shiftAmt < 0) {
3131 shiftAmt = -shiftAmt;
3132 if (shiftAmt >= sizeof(Element) * 8) {
3133 shiftAmt = sizeof(Element) * 8 - 1;
3134 destElem = 0;
3135 } else {
3136 destElem = (srcElem1 >> shiftAmt);
3137 }
3138 } else if (shiftAmt > 0) {
3139 if (shiftAmt >= sizeof(Element) * 8) {
3140 if (srcElem1 != 0) {
3141 destElem = mask(sizeof(Element) * 8);
3142 fpscr.qc = 1;
3143 } else {
3144 destElem = 0;
3145 }
3146 } else {
3147 if (bits(srcElem1, sizeof(Element) * 8 - 1,
3148 sizeof(Element) * 8 - shiftAmt)) {
3149 destElem = mask(sizeof(Element) * 8);
3150 fpscr.qc = 1;
3151 } else {
3152 destElem = srcElem1 << shiftAmt;
3153 }
3154 }
3155 } else {
3156 destElem = srcElem1;
3157 }
3158 FpscrQc = fpscr;
3159 '''
3160 threeEqualRegInstX("uqshl", "UqshlDX", "SimdAluOp", smallUnsignedTypes, 2,
3161 uqshlCode)
3162 threeEqualRegInstX("uqshl", "UqshlQX", "SimdAluOp", unsignedTypes, 4,
3163 uqshlCode)
3164 threeEqualRegInstX("uqshl", "UqshlScX", "SimdAluOp", unsignedTypes, 4,
3165 uqshlCode, scalar=True)
3166 # UQSHRN, UQSHRN2
3167 uqshrnCode = '''
3168 FPSCR fpscr = (FPSCR) FpscrQc;
3169 if (imm > sizeof(srcElem1) * 8) {
3170 if (srcElem1 != 0)
3171 fpscr.qc = 1;
3172 destElem = 0;
3173 } else if (imm) {
3174 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3175 if (mid != (Element)mid) {
3176 destElem = mask(sizeof(Element) * 8);
3177 fpscr.qc = 1;
3178 } else {
3179 destElem = mid;
3180 }
3181 } else {
3182 destElem = srcElem1;
3183 }
3184 FpscrQc = fpscr;
3185 '''
3186 twoRegNarrowInstX("uqshrn", "UqshrnX", "SimdShiftOp", smallUnsignedTypes,
3187 uqshrnCode, hasImm=True)
3188 twoRegNarrowInstX("uqshrn2", "Uqshrn2X", "SimdShiftOp", smallUnsignedTypes,
3189 uqshrnCode, hasImm=True, hi=True)
3190 twoRegNarrowInstX("uqshrn", "UqshrnScX", "SimdShiftOp", smallUnsignedTypes,
3191 uqshrnCode, hasImm=True, scalar=True)
3192 # UQSUB
3193 uqsubCode = '''
3194 destElem = srcElem1 - srcElem2;
3195 FPSCR fpscr = (FPSCR) FpscrQc;
3196 if (destElem > srcElem1) {
3197 destElem = 0;
3198 fpscr.qc = 1;
3199 }
3200 FpscrQc = fpscr;
3201 '''
3202 threeEqualRegInstX("uqsub", "UqsubDX", "SimdAddOp", smallUnsignedTypes, 2,
3203 uqsubCode)
3204 threeEqualRegInstX("uqsub", "UqsubQX", "SimdAddOp", unsignedTypes, 4,
3205 uqsubCode)
3206 threeEqualRegInstX("uqsub", "UqsubScX", "SimdAddOp", unsignedTypes, 4,
3207 uqsubCode, scalar=True)
3208 # UQXTN
3209 uqxtnCode = '''
3210 FPSCR fpscr = (FPSCR) FpscrQc;
3211 destElem = srcElem1;
3212 if ((BigElement)destElem != srcElem1) {
3213 fpscr.qc = 1;
3214 destElem = mask(sizeof(Element) * 8);
3215 }
3216 FpscrQc = fpscr;
3217 '''
3218 twoRegNarrowInstX("uqxtn", "UqxtnX", "SimdMiscOp", smallUnsignedTypes,
3219 uqxtnCode)
3220 twoRegNarrowInstX("uqxtn", "Uqxtn2X", "SimdMiscOp", smallUnsignedTypes,
3221 uqxtnCode, hi=True)
3222 twoRegNarrowInstX("uqxtn", "UqxtnScX", "SimdMiscOp", smallUnsignedTypes,
3223 uqxtnCode, scalar=True)
3224 # URECPE
3225 urecpeCode = "destElem = unsignedRecipEstimate(srcElem1);"
3226 twoEqualRegInstX("urecpe", "UrecpeDX", "SimdMultAccOp", ("uint32_t",), 2,
3227 urecpeCode)
3228 twoEqualRegInstX("urecpe", "UrecpeQX", "SimdMultAccOp", ("uint32_t",), 4,
3229 urecpeCode)
3230 # URHADD
3231 threeEqualRegInstX("urhadd", "UrhaddDX", "SimdAddOp", smallUnsignedTypes,
3232 2, rhaddCode)
3233 threeEqualRegInstX("urhadd", "UrhaddQX", "SimdAddOp", smallUnsignedTypes,
3234 4, rhaddCode)
3235 # URSHL
3236 threeEqualRegInstX("urshl", "UrshlDX", "SimdShiftOp", unsignedTypes, 2,
3237 rshlCode)
3238 threeEqualRegInstX("urshl", "UrshlQX", "SimdShiftOp", unsignedTypes, 4,
3239 rshlCode)
3240 # URSHR
3241 twoEqualRegInstX("urshr", "UrshrDX", "SimdShiftOp", unsignedTypes, 2,
3242 rshrCode, hasImm=True)
3243 twoEqualRegInstX("urshr", "UrshrQX", "SimdShiftOp", unsignedTypes, 4,
3244 rshrCode, hasImm=True)
3245 # URSQRTE
3246 ursqrteCode = "destElem = unsignedRSqrtEstimate(srcElem1);"
3247 twoEqualRegInstX("ursqrte", "UrsqrteDX", "SimdSqrtOp", ("uint32_t",), 2,
3248 ursqrteCode)
3249 twoEqualRegInstX("ursqrte", "UrsqrteQX", "SimdSqrtOp", ("uint32_t",), 4,
3250 ursqrteCode)
3251 # URSRA
3252 twoEqualRegInstX("ursra", "UrsraDX", "SimdShiftOp", unsignedTypes, 2,
3253 rsraCode, True, hasImm=True)
3254 twoEqualRegInstX("ursra", "UrsraQX", "SimdShiftOp", unsignedTypes, 4,
3255 rsraCode, True, hasImm=True)
3256 # USHL
3257 threeEqualRegInstX("ushl", "UshlDX", "SimdShiftOp", unsignedTypes, 2,
3258 shlCode)
3259 threeEqualRegInstX("ushl", "UshlQX", "SimdShiftOp", unsignedTypes, 4,
3260 shlCode)
3261 # USHLL, USHLL2
3262 twoRegLongInstX("ushll", "UshllX", "SimdShiftOp", smallUnsignedTypes,
3263 shllCode, hasImm=True)
3264 twoRegLongInstX("ushll", "Ushll2X", "SimdShiftOp", smallUnsignedTypes,
3265 shllCode, hi=True, hasImm=True)
3266 # USHR
3267 twoEqualRegInstX("ushr", "UshrDX", "SimdShiftOp", unsignedTypes, 2,
3268 shrCode, hasImm=True)
3269 twoEqualRegInstX("ushr", "UshrQX", "SimdShiftOp", unsignedTypes, 4,
3270 shrCode, hasImm=True)
3271 # USQADD
3272 usqaddCode = '''
3273 FPSCR fpscr = (FPSCR) FpscrQc;
3274 Element tmp = destElem + srcElem1;
3275 if (bits(srcElem1, sizeof(Element) * 8 - 1) == 0) {
3276 if (tmp < srcElem1 || tmp < destElem) {
3277 destElem = (Element)(-1);
3278 fpscr.qc = 1;
3279 } else {
3280 destElem = tmp;
3281 }
3282 } else {
3283 Element absSrcElem1 = (~srcElem1) + 1;
3284 if (absSrcElem1 > destElem) {
3285 destElem = 0;
3286 fpscr.qc = 1;
3287 } else {
3288 destElem = tmp;
3289 }
3290 }
3291 FpscrQc = fpscr;
3292 '''
3293 twoEqualRegInstX("usqadd", "UsqaddDX", "SimdAddOp", smallUnsignedTypes, 2,
3294 usqaddCode, True)
3295 twoEqualRegInstX("usqadd", "UsqaddQX", "SimdAddOp", unsignedTypes, 4,
3296 usqaddCode, True)
3297 twoEqualRegInstX("usqadd", "UsqaddScX", "SimdAddOp", unsignedTypes, 4,
3298 usqaddCode, True, scalar=True)
3299 # USRA
3300 twoEqualRegInstX("usra", "UsraDX", "SimdShiftOp", unsignedTypes, 2,
3301 sraCode, True, hasImm=True)
3302 twoEqualRegInstX("usra", "UsraQX", "SimdShiftOp", unsignedTypes, 4,
3303 sraCode, True, hasImm=True)
3304 # USUBL
3305 threeRegLongInstX("usubl", "UsublX", "SimdAddOp", smallUnsignedTypes,
3306 sublwCode)
3307 threeRegLongInstX("usubl2", "Usubl2X", "SimdAddOp", smallUnsignedTypes,
3308 sublwCode, hi=True)
3309 # USUBW
3310 threeRegWideInstX("usubw", "UsubwX", "SimdAddOp", smallUnsignedTypes,
3311 sublwCode)
3312 threeRegWideInstX("usubw2", "Usubw2X", "SimdAddOp", smallUnsignedTypes,
3313 sublwCode, hi=True)
3314 # UXTL -> alias to USHLL
3315 # UZP1
3316 uzpCode = '''
3317 unsigned part = %s;
3318 for (unsigned i = 0; i < eCount / 2; i++) {
3319 destReg.elements[i] = srcReg1.elements[2 * i + part];
3320 destReg.elements[eCount / 2 + i] = srcReg2.elements[2 * i + part];
3321 }
3322 '''
3323 threeRegScrambleInstX("Uzp1", "Uzp1DX", "SimdAluOp", smallUnsignedTypes, 2,
3324 uzpCode % "0")
3325 threeRegScrambleInstX("Uzp1", "Uzp1QX", "SimdAluOp", unsignedTypes, 4,
3326 uzpCode % "0")
3327 # UZP2
3328 threeRegScrambleInstX("Uzp2", "Uzp2DX", "SimdAluOp", smallUnsignedTypes, 2,
3329 uzpCode % "1")
3330 threeRegScrambleInstX("Uzp2", "Uzp2QX", "SimdAluOp", unsignedTypes, 4,
3331 uzpCode % "1")
3332 # XTN, XTN2
3333 xtnCode = "destElem = srcElem1;"
3334 twoRegNarrowInstX("Xtn", "XtnX", "SimdMiscOp", smallUnsignedTypes, xtnCode)
3335 twoRegNarrowInstX("Xtn", "Xtn2X", "SimdMiscOp", smallUnsignedTypes,
3336 xtnCode, hi=True)
3337 # ZIP1
3338 zipCode = '''
3339 unsigned base = %s;
3340 for (unsigned i = 0; i < eCount / 2; i++) {
3341 destReg.elements[2 * i] = srcReg1.elements[base + i];
3342 destReg.elements[2 * i + 1] = srcReg2.elements[base + i];
3343 }
3344 '''
3345 threeRegScrambleInstX("zip1", "Zip1DX", "SimdAluOp", smallUnsignedTypes, 2,
3346 zipCode % "0")
3347 threeRegScrambleInstX("zip1", "Zip1QX", "SimdAluOp", unsignedTypes, 4,
3348 zipCode % "0")
3349 # ZIP2
3350 threeRegScrambleInstX("zip2", "Zip2DX", "SimdAluOp", smallUnsignedTypes, 2,
3351 zipCode % "eCount / 2")
3352 threeRegScrambleInstX("zip2", "Zip2QX", "SimdAluOp", unsignedTypes, 4,
3353 zipCode % "eCount / 2")
3354
3355 }};