3 // Copyright (c) 2010 ARM Limited
6 // The license below extends only to copyright in the software and shall
7 // not be construed as granting a license to any other intellectual
8 // property including but not limited to intellectual property relating
9 // to a hardware implementation of the functionality of the software
10 // licensed hereunder. You may use the software subject to the license
11 // terms below provided that you ensure that this notice is replicated
12 // unmodified and in its entirety in all distributions of the software,
13 // modified or unmodified, in source code or in binary form.
15 // Redistribution and use in source and binary forms, with or without
16 // modification, are permitted provided that the following conditions are
17 // met: redistributions of source code must retain the above copyright
18 // notice, this list of conditions and the following disclaimer;
19 // redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution;
22 // neither the name of the copyright holders nor the names of its
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
26 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 // Authors: Gabe Black
41 template <template <typename T> class Base>
43 decodeNeonUThreeUReg(unsigned size,
44 ExtMachInst machInst, IntRegIndex dest,
45 IntRegIndex op1, IntRegIndex op2)
49 return new Base<uint8_t>(machInst, dest, op1, op2);
51 return new Base<uint16_t>(machInst, dest, op1, op2);
53 return new Base<uint32_t>(machInst, dest, op1, op2);
55 return new Base<uint64_t>(machInst, dest, op1, op2);
57 return new Unknown(machInst);
61 template <template <typename T> class Base>
63 decodeNeonSThreeUReg(unsigned size,
64 ExtMachInst machInst, IntRegIndex dest,
65 IntRegIndex op1, IntRegIndex op2)
69 return new Base<int8_t>(machInst, dest, op1, op2);
71 return new Base<int16_t>(machInst, dest, op1, op2);
73 return new Base<int32_t>(machInst, dest, op1, op2);
75 return new Base<int64_t>(machInst, dest, op1, op2);
77 return new Unknown(machInst);
81 template <template <typename T> class Base>
83 decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84 ExtMachInst machInst, IntRegIndex dest,
85 IntRegIndex op1, IntRegIndex op2)
88 return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
90 return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
94 template <template <typename T> class Base>
96 decodeNeonUThreeUSReg(unsigned size,
97 ExtMachInst machInst, IntRegIndex dest,
98 IntRegIndex op1, IntRegIndex op2)
102 return new Base<uint8_t>(machInst, dest, op1, op2);
104 return new Base<uint16_t>(machInst, dest, op1, op2);
106 return new Base<uint32_t>(machInst, dest, op1, op2);
108 return new Unknown(machInst);
112 template <template <typename T> class Base>
114 decodeNeonSThreeUSReg(unsigned size,
115 ExtMachInst machInst, IntRegIndex dest,
116 IntRegIndex op1, IntRegIndex op2)
120 return new Base<int8_t>(machInst, dest, op1, op2);
122 return new Base<int16_t>(machInst, dest, op1, op2);
124 return new Base<int32_t>(machInst, dest, op1, op2);
126 return new Unknown(machInst);
130 template <template <typename T> class Base>
132 decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
133 ExtMachInst machInst, IntRegIndex dest,
134 IntRegIndex op1, IntRegIndex op2)
137 return decodeNeonUThreeUSReg<Base>(
138 size, machInst, dest, op1, op2);
140 return decodeNeonSThreeUSReg<Base>(
141 size, machInst, dest, op1, op2);
145 template <template <typename T> class BaseD,
146 template <typename T> class BaseQ>
148 decodeNeonUThreeSReg(bool q, unsigned size,
149 ExtMachInst machInst, IntRegIndex dest,
150 IntRegIndex op1, IntRegIndex op2)
153 return decodeNeonUThreeUSReg<BaseQ>(
154 size, machInst, dest, op1, op2);
156 return decodeNeonUThreeUSReg<BaseD>(
157 size, machInst, dest, op1, op2);
161 template <template <typename T> class BaseD,
162 template <typename T> class BaseQ>
164 decodeNeonSThreeSReg(bool q, unsigned size,
165 ExtMachInst machInst, IntRegIndex dest,
166 IntRegIndex op1, IntRegIndex op2)
169 return decodeNeonSThreeUSReg<BaseQ>(
170 size, machInst, dest, op1, op2);
172 return decodeNeonSThreeUSReg<BaseD>(
173 size, machInst, dest, op1, op2);
177 template <template <typename T> class BaseD,
178 template <typename T> class BaseQ>
180 decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
181 ExtMachInst machInst, IntRegIndex dest,
182 IntRegIndex op1, IntRegIndex op2)
185 return decodeNeonUThreeSReg<BaseD, BaseQ>(
186 q, size, machInst, dest, op1, op2);
188 return decodeNeonSThreeSReg<BaseD, BaseQ>(
189 q, size, machInst, dest, op1, op2);
193 template <template <typename T> class BaseD,
194 template <typename T> class BaseQ>
196 decodeNeonUThreeReg(bool q, unsigned size,
197 ExtMachInst machInst, IntRegIndex dest,
198 IntRegIndex op1, IntRegIndex op2)
201 return decodeNeonUThreeUReg<BaseQ>(
202 size, machInst, dest, op1, op2);
204 return decodeNeonUThreeUReg<BaseD>(
205 size, machInst, dest, op1, op2);
209 template <template <typename T> class BaseD,
210 template <typename T> class BaseQ>
212 decodeNeonSThreeReg(bool q, unsigned size,
213 ExtMachInst machInst, IntRegIndex dest,
214 IntRegIndex op1, IntRegIndex op2)
217 return decodeNeonSThreeUReg<BaseQ>(
218 size, machInst, dest, op1, op2);
220 return decodeNeonSThreeUReg<BaseD>(
221 size, machInst, dest, op1, op2);
225 template <template <typename T> class BaseD,
226 template <typename T> class BaseQ>
228 decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
229 ExtMachInst machInst, IntRegIndex dest,
230 IntRegIndex op1, IntRegIndex op2)
233 return decodeNeonUThreeReg<BaseD, BaseQ>(
234 q, size, machInst, dest, op1, op2);
236 return decodeNeonSThreeReg<BaseD, BaseQ>(
237 q, size, machInst, dest, op1, op2);
241 template <template <typename T> class BaseD,
242 template <typename T> class BaseQ>
244 decodeNeonUTwoShiftReg(bool q, unsigned size,
245 ExtMachInst machInst, IntRegIndex dest,
246 IntRegIndex op1, uint64_t imm)
251 return new BaseQ<uint8_t>(machInst, dest, op1, imm);
253 return new BaseQ<uint16_t>(machInst, dest, op1, imm);
255 return new BaseQ<uint32_t>(machInst, dest, op1, imm);
257 return new BaseQ<uint64_t>(machInst, dest, op1, imm);
259 return new Unknown(machInst);
264 return new BaseD<uint8_t>(machInst, dest, op1, imm);
266 return new BaseD<uint16_t>(machInst, dest, op1, imm);
268 return new BaseD<uint32_t>(machInst, dest, op1, imm);
270 return new BaseD<uint64_t>(machInst, dest, op1, imm);
272 return new Unknown(machInst);
277 template <template <typename T> class BaseD,
278 template <typename T> class BaseQ>
280 decodeNeonSTwoShiftReg(bool q, unsigned size,
281 ExtMachInst machInst, IntRegIndex dest,
282 IntRegIndex op1, uint64_t imm)
287 return new BaseQ<int8_t>(machInst, dest, op1, imm);
289 return new BaseQ<int16_t>(machInst, dest, op1, imm);
291 return new BaseQ<int32_t>(machInst, dest, op1, imm);
293 return new BaseQ<int64_t>(machInst, dest, op1, imm);
295 return new Unknown(machInst);
300 return new BaseD<int8_t>(machInst, dest, op1, imm);
302 return new BaseD<int16_t>(machInst, dest, op1, imm);
304 return new BaseD<int32_t>(machInst, dest, op1, imm);
306 return new BaseD<int64_t>(machInst, dest, op1, imm);
308 return new Unknown(machInst);
314 template <template <typename T> class BaseD,
315 template <typename T> class BaseQ>
317 decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
318 ExtMachInst machInst, IntRegIndex dest,
319 IntRegIndex op1, uint64_t imm)
322 return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
323 q, size, machInst, dest, op1, imm);
325 return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
326 q, size, machInst, dest, op1, imm);
330 template <template <typename T> class Base>
332 decodeNeonUTwoShiftUSReg(unsigned size,
333 ExtMachInst machInst, IntRegIndex dest,
334 IntRegIndex op1, uint64_t imm)
338 return new Base<uint8_t>(machInst, dest, op1, imm);
340 return new Base<uint16_t>(machInst, dest, op1, imm);
342 return new Base<uint32_t>(machInst, dest, op1, imm);
344 return new Unknown(machInst);
348 template <template <typename T> class BaseD,
349 template <typename T> class BaseQ>
351 decodeNeonUTwoShiftSReg(bool q, unsigned size,
352 ExtMachInst machInst, IntRegIndex dest,
353 IntRegIndex op1, uint64_t imm)
356 return decodeNeonUTwoShiftUSReg<BaseQ>(
357 size, machInst, dest, op1, imm);
359 return decodeNeonUTwoShiftUSReg<BaseD>(
360 size, machInst, dest, op1, imm);
364 template <template <typename T> class Base>
366 decodeNeonSTwoShiftUSReg(unsigned size,
367 ExtMachInst machInst, IntRegIndex dest,
368 IntRegIndex op1, uint64_t imm)
372 return new Base<int8_t>(machInst, dest, op1, imm);
374 return new Base<int16_t>(machInst, dest, op1, imm);
376 return new Base<int32_t>(machInst, dest, op1, imm);
378 return new Unknown(machInst);
382 template <template <typename T> class BaseD,
383 template <typename T> class BaseQ>
385 decodeNeonSTwoShiftSReg(bool q, unsigned size,
386 ExtMachInst machInst, IntRegIndex dest,
387 IntRegIndex op1, uint64_t imm)
390 return decodeNeonSTwoShiftUSReg<BaseQ>(
391 size, machInst, dest, op1, imm);
393 return decodeNeonSTwoShiftUSReg<BaseD>(
394 size, machInst, dest, op1, imm);
398 template <template <typename T> class BaseD,
399 template <typename T> class BaseQ>
401 decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
402 ExtMachInst machInst, IntRegIndex dest,
403 IntRegIndex op1, uint64_t imm)
406 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
407 q, size, machInst, dest, op1, imm);
409 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
410 q, size, machInst, dest, op1, imm);
414 template <template <typename T> class Base>
416 decodeNeonUTwoMiscUSReg(unsigned size,
417 ExtMachInst machInst, IntRegIndex dest,
422 return new Base<uint8_t>(machInst, dest, op1);
424 return new Base<uint16_t>(machInst, dest, op1);
426 return new Base<uint32_t>(machInst, dest, op1);
428 return new Unknown(machInst);
432 template <template <typename T> class Base>
434 decodeNeonSTwoMiscUSReg(unsigned size,
435 ExtMachInst machInst, IntRegIndex dest,
440 return new Base<int8_t>(machInst, dest, op1);
442 return new Base<int16_t>(machInst, dest, op1);
444 return new Base<int32_t>(machInst, dest, op1);
446 return new Unknown(machInst);
450 template <template <typename T> class BaseD,
451 template <typename T> class BaseQ>
453 decodeNeonUTwoMiscSReg(bool q, unsigned size,
454 ExtMachInst machInst, IntRegIndex dest,
458 return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
460 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
464 template <template <typename T> class BaseD,
465 template <typename T> class BaseQ>
467 decodeNeonSTwoMiscSReg(bool q, unsigned size,
468 ExtMachInst machInst, IntRegIndex dest,
472 return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
474 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
478 template <template <typename T> class Base>
480 decodeNeonUTwoMiscUReg(unsigned size,
481 ExtMachInst machInst, IntRegIndex dest,
486 return new Base<uint8_t>(machInst, dest, op1);
488 return new Base<uint16_t>(machInst, dest, op1);
490 return new Base<uint32_t>(machInst, dest, op1);
492 return new Base<uint64_t>(machInst, dest, op1);
494 return new Unknown(machInst);
498 template <template <typename T> class Base>
500 decodeNeonSTwoMiscUReg(unsigned size,
501 ExtMachInst machInst, IntRegIndex dest,
506 return new Base<int8_t>(machInst, dest, op1);
508 return new Base<int16_t>(machInst, dest, op1);
510 return new Base<int32_t>(machInst, dest, op1);
512 return new Base<int64_t>(machInst, dest, op1);
514 return new Unknown(machInst);
518 template <template <typename T> class BaseD,
519 template <typename T> class BaseQ>
521 decodeNeonSTwoMiscReg(bool q, unsigned size,
522 ExtMachInst machInst, IntRegIndex dest,
526 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
528 return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
532 template <template <typename T> class BaseD,
533 template <typename T> class BaseQ>
535 decodeNeonUTwoMiscReg(bool q, unsigned size,
536 ExtMachInst machInst, IntRegIndex dest,
540 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
542 return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
546 template <template <typename T> class BaseD,
547 template <typename T> class BaseQ>
549 decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
550 ExtMachInst machInst, IntRegIndex dest,
554 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
555 q, size, machInst, dest, op1);
557 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
558 q, size, machInst, dest, op1);
566 vcgtFunc(float op1, float op2)
568 if (isSnan(op1) || isSnan(op2))
570 return (op1 > op2) ? 0.0 : 1.0;
574 vcgeFunc(float op1, float op2)
576 if (isSnan(op1) || isSnan(op2))
578 return (op1 >= op2) ? 0.0 : 1.0;
582 vceqFunc(float op1, float op2)
584 if (isSnan(op1) || isSnan(op2))
586 return (op1 == op2) ? 0.0 : 1.0;
590 vcleFunc(float op1, float op2)
592 if (isSnan(op1) || isSnan(op2))
594 return (op1 <= op2) ? 0.0 : 1.0;
598 vcltFunc(float op1, float op2)
600 if (isSnan(op1) || isSnan(op2))
602 return (op1 < op2) ? 0.0 : 1.0;
606 vacgtFunc(float op1, float op2)
608 if (isSnan(op1) || isSnan(op2))
610 return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
614 vacgeFunc(float op1, float op2)
616 if (isSnan(op1) || isSnan(op2))
618 return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
627 smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
628 unsignedTypes = smallUnsignedTypes + ("uint64_t",)
629 smallSignedTypes = ("int8_t", "int16_t", "int32_t")
630 signedTypes = smallSignedTypes + ("int64_t",)
631 smallTypes = smallUnsignedTypes + smallSignedTypes
632 allTypes = unsignedTypes + signedTypes
634 def threeEqualRegInst(name, Name, types, rCount, op,
635 readDest=False, pairwise=False):
636 global header_output, exec_output
637 eWalkCode = simdEnabledCheckCode + '''
638 RegVect srcReg1, srcReg2, destReg;
640 for reg in range(rCount):
642 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
643 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
644 ''' % { "reg" : reg }
647 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
648 ''' % { "reg" : reg }
651 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
654 for (unsigned i = 0; i < eCount; i++) {
655 Element srcElem1 = gtoh(2 * i < eCount ?
656 srcReg1.elements[2 * i] :
657 srcReg2.elements[2 * i - eCount]);
658 Element srcElem2 = gtoh(2 * i < eCount ?
659 srcReg1.elements[2 * i + 1] :
660 srcReg2.elements[2 * i + 1 - eCount]);
664 destReg.elements[i] = htog(destElem);
666 ''' % { "op" : op, "readDest" : readDestCode }
669 for (unsigned i = 0; i < eCount; i++) {
670 Element srcElem1 = gtoh(srcReg1.elements[i]);
671 Element srcElem2 = gtoh(srcReg2.elements[i]);
675 destReg.elements[i] = htog(destElem);
677 ''' % { "op" : op, "readDest" : readDestCode }
678 for reg in range(rCount):
680 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
681 ''' % { "reg" : reg }
682 iop = InstObjParams(name, Name,
686 "predicate_test": predicateTest }, [])
687 header_output += NeonRegRegRegOpDeclare.subst(iop)
688 exec_output += NeonEqualRegExecute.subst(iop)
690 substDict = { "targs" : type,
691 "class_name" : Name }
692 exec_output += NeonExecDeclare.subst(substDict)
694 def threeEqualRegInstFp(name, Name, types, rCount, op,
695 readDest=False, pairwise=False, toInt=False):
696 global header_output, exec_output
697 eWalkCode = simdEnabledCheckCode + '''
698 typedef FloatReg FloatVect[rCount];
699 FloatVect srcRegs1, srcRegs2;
702 eWalkCode += 'RegVect destRegs;\n'
704 eWalkCode += 'FloatVect destRegs;\n'
705 for reg in range(rCount):
707 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
708 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
709 ''' % { "reg" : reg }
713 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
714 ''' % { "reg" : reg }
717 destRegs[%(reg)d] = FpDestP%(reg)d;
718 ''' % { "reg" : reg }
721 readDestCode = 'destReg = destRegs[r];'
722 destType = 'FloatReg'
723 writeDest = 'destRegs[r] = destReg;'
725 destType = 'FloatRegBits'
726 writeDest = 'destRegs.regs[r] = destReg;'
729 for (unsigned r = 0; r < rCount; r++) {
730 FloatReg srcReg1 = (2 * r < rCount) ?
731 srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
732 FloatReg srcReg2 = (2 * r < rCount) ?
733 srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
734 %(destType)s destReg;
740 "readDest" : readDestCode,
741 "destType" : destType,
742 "writeDest" : writeDest }
745 for (unsigned r = 0; r < rCount; r++) {
746 FloatReg srcReg1 = srcRegs1[r];
747 FloatReg srcReg2 = srcRegs2[r];
748 %(destType)s destReg;
754 "readDest" : readDestCode,
755 "destType" : destType,
756 "writeDest" : writeDest }
757 for reg in range(rCount):
760 FpDestP%(reg)d.uw = destRegs.regs[%(reg)d];
761 ''' % { "reg" : reg }
764 FpDestP%(reg)d = destRegs[%(reg)d];
765 ''' % { "reg" : reg }
766 iop = InstObjParams(name, Name,
770 "predicate_test": predicateTest }, [])
771 header_output += NeonRegRegRegOpDeclare.subst(iop)
772 exec_output += NeonEqualRegExecute.subst(iop)
774 substDict = { "targs" : type,
775 "class_name" : Name }
776 exec_output += NeonExecDeclare.subst(substDict)
778 def threeUnequalRegInst(name, Name, types, op,
779 bigSrc1, bigSrc2, bigDest, readDest):
780 global header_output, exec_output
781 src1Cnt = src2Cnt = destCnt = 2
782 src1Prefix = src2Prefix = destPrefix = ''
792 eWalkCode = simdEnabledCheckCode + '''
796 ''' % (src1Prefix, src2Prefix, destPrefix)
797 for reg in range(src1Cnt):
799 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
800 ''' % { "reg" : reg }
801 for reg in range(src2Cnt):
803 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
804 ''' % { "reg" : reg }
806 for reg in range(destCnt):
808 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
809 ''' % { "reg" : reg }
812 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
814 for (unsigned i = 0; i < eCount; i++) {
815 %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
816 %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
817 %(destPrefix)sElement destElem;
820 destReg.elements[i] = htog(destElem);
822 ''' % { "op" : op, "readDest" : readDestCode,
823 "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
824 "destPrefix" : destPrefix }
825 for reg in range(destCnt):
827 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
828 ''' % { "reg" : reg }
829 iop = InstObjParams(name, Name,
833 "predicate_test": predicateTest }, [])
834 header_output += NeonRegRegRegOpDeclare.subst(iop)
835 exec_output += NeonUnequalRegExecute.subst(iop)
837 substDict = { "targs" : type,
838 "class_name" : Name }
839 exec_output += NeonExecDeclare.subst(substDict)
841 def threeRegNarrowInst(name, Name, types, op, readDest=False):
842 threeUnequalRegInst(name, Name, types, op,
843 True, True, False, readDest)
845 def threeRegLongInst(name, Name, types, op, readDest=False):
846 threeUnequalRegInst(name, Name, types, op,
847 False, False, True, readDest)
849 def threeRegWideInst(name, Name, types, op, readDest=False):
850 threeUnequalRegInst(name, Name, types, op,
851 True, False, True, readDest)
853 def twoEqualRegInst(name, Name, types, rCount, op, readDest=False):
854 global header_output, exec_output
855 eWalkCode = simdEnabledCheckCode + '''
856 RegVect srcReg1, srcReg2, destReg;
858 for reg in range(rCount):
860 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
861 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
862 ''' % { "reg" : reg }
865 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
866 ''' % { "reg" : reg }
869 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
871 assert(imm >= 0 && imm < eCount);
872 for (unsigned i = 0; i < eCount; i++) {
873 Element srcElem1 = gtoh(srcReg1.elements[i]);
874 Element srcElem2 = gtoh(srcReg2.elements[imm]);
878 destReg.elements[i] = htog(destElem);
880 ''' % { "op" : op, "readDest" : readDestCode }
881 for reg in range(rCount):
883 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
884 ''' % { "reg" : reg }
885 iop = InstObjParams(name, Name,
889 "predicate_test": predicateTest }, [])
890 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
891 exec_output += NeonEqualRegExecute.subst(iop)
893 substDict = { "targs" : type,
894 "class_name" : Name }
895 exec_output += NeonExecDeclare.subst(substDict)
897 def twoRegLongInst(name, Name, types, op, readDest=False):
898 global header_output, exec_output
900 eWalkCode = simdEnabledCheckCode + '''
901 RegVect srcReg1, srcReg2;
904 for reg in range(rCount):
906 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
907 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);;
908 ''' % { "reg" : reg }
910 for reg in range(2 * rCount):
912 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
913 ''' % { "reg" : reg }
916 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
918 assert(imm >= 0 && imm < eCount);
919 for (unsigned i = 0; i < eCount; i++) {
920 Element srcElem1 = gtoh(srcReg1.elements[i]);
921 Element srcElem2 = gtoh(srcReg2.elements[imm]);
925 destReg.elements[i] = htog(destElem);
927 ''' % { "op" : op, "readDest" : readDestCode }
928 for reg in range(2 * rCount):
930 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
931 ''' % { "reg" : reg }
932 iop = InstObjParams(name, Name,
936 "predicate_test": predicateTest }, [])
937 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
938 exec_output += NeonUnequalRegExecute.subst(iop)
940 substDict = { "targs" : type,
941 "class_name" : Name }
942 exec_output += NeonExecDeclare.subst(substDict)
944 def twoEqualRegInstFp(name, Name, types, rCount, op, readDest=False):
945 global header_output, exec_output
946 eWalkCode = simdEnabledCheckCode + '''
947 typedef FloatReg FloatVect[rCount];
948 FloatVect srcRegs1, srcRegs2, destRegs;
950 for reg in range(rCount):
952 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
953 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
954 ''' % { "reg" : reg }
957 destRegs[%(reg)d] = FpDestP%(reg)d;
958 ''' % { "reg" : reg }
961 readDestCode = 'destReg = destRegs[i];'
963 assert(imm >= 0 && imm < rCount);
964 for (unsigned i = 0; i < rCount; i++) {
965 FloatReg srcReg1 = srcRegs1[i];
966 FloatReg srcReg2 = srcRegs2[imm];
970 destRegs[i] = destReg;
972 ''' % { "op" : op, "readDest" : readDestCode }
973 for reg in range(rCount):
975 FpDestP%(reg)d = destRegs[%(reg)d];
976 ''' % { "reg" : reg }
977 iop = InstObjParams(name, Name,
981 "predicate_test": predicateTest }, [])
982 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
983 exec_output += NeonEqualRegExecute.subst(iop)
985 substDict = { "targs" : type,
986 "class_name" : Name }
987 exec_output += NeonExecDeclare.subst(substDict)
989 def twoRegShiftInst(name, Name, types, rCount, op,
990 readDest=False, toInt=False, fromInt=False):
991 global header_output, exec_output
992 eWalkCode = simdEnabledCheckCode + '''
993 RegVect srcRegs1, destRegs;
995 for reg in range(rCount):
997 srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
998 ''' % { "reg" : reg }
1001 destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1002 ''' % { "reg" : reg }
1005 readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1007 readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1008 readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1010 readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);'
1011 declDest = 'Element destElem;'
1012 writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1014 declDest = 'FloatRegBits destReg;'
1015 writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1017 for (unsigned i = 0; i < eCount; i++) {
1024 ''' % { "readOp" : readOpCode,
1025 "declDest" : declDest,
1026 "readDest" : readDestCode,
1028 "writeDest" : writeDestCode }
1029 for reg in range(rCount):
1031 FpDestP%(reg)d.uw = gtoh(destRegs.regs[%(reg)d]);
1032 ''' % { "reg" : reg }
1033 iop = InstObjParams(name, Name,
1035 { "code": eWalkCode,
1037 "predicate_test": predicateTest }, [])
1038 header_output += NeonRegRegImmOpDeclare.subst(iop)
1039 exec_output += NeonEqualRegExecute.subst(iop)
1041 substDict = { "targs" : type,
1042 "class_name" : Name }
1043 exec_output += NeonExecDeclare.subst(substDict)
1045 def twoRegNarrowShiftInst(name, Name, types, op, readDest=False):
1046 global header_output, exec_output
1047 eWalkCode = simdEnabledCheckCode + '''
1051 for reg in range(4):
1053 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1054 ''' % { "reg" : reg }
1056 for reg in range(2):
1058 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1059 ''' % { "reg" : reg }
1062 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1064 for (unsigned i = 0; i < eCount; i++) {
1065 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1069 destReg.elements[i] = htog(destElem);
1071 ''' % { "op" : op, "readDest" : readDestCode }
1072 for reg in range(2):
1074 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1075 ''' % { "reg" : reg }
1076 iop = InstObjParams(name, Name,
1078 { "code": eWalkCode,
1080 "predicate_test": predicateTest }, [])
1081 header_output += NeonRegRegImmOpDeclare.subst(iop)
1082 exec_output += NeonUnequalRegExecute.subst(iop)
1084 substDict = { "targs" : type,
1085 "class_name" : Name }
1086 exec_output += NeonExecDeclare.subst(substDict)
1088 def twoRegLongShiftInst(name, Name, types, op, readDest=False):
1089 global header_output, exec_output
1090 eWalkCode = simdEnabledCheckCode + '''
1094 for reg in range(2):
1096 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1097 ''' % { "reg" : reg }
1099 for reg in range(4):
1101 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1102 ''' % { "reg" : reg }
1105 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1107 for (unsigned i = 0; i < eCount; i++) {
1108 Element srcElem1 = gtoh(srcReg1.elements[i]);
1109 BigElement destElem;
1112 destReg.elements[i] = htog(destElem);
1114 ''' % { "op" : op, "readDest" : readDestCode }
1115 for reg in range(4):
1117 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1118 ''' % { "reg" : reg }
1119 iop = InstObjParams(name, Name,
1121 { "code": eWalkCode,
1123 "predicate_test": predicateTest }, [])
1124 header_output += NeonRegRegImmOpDeclare.subst(iop)
1125 exec_output += NeonUnequalRegExecute.subst(iop)
1127 substDict = { "targs" : type,
1128 "class_name" : Name }
1129 exec_output += NeonExecDeclare.subst(substDict)
1131 def twoRegMiscInst(name, Name, types, rCount, op, readDest=False):
1132 global header_output, exec_output
1133 eWalkCode = simdEnabledCheckCode + '''
1134 RegVect srcReg1, destReg;
1136 for reg in range(rCount):
1138 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1139 ''' % { "reg" : reg }
1142 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1143 ''' % { "reg" : reg }
1146 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1148 for (unsigned i = 0; i < eCount; i++) {
1150 Element srcElem1 = gtoh(srcReg1.elements[i]);
1154 destReg.elements[j] = htog(destElem);
1156 ''' % { "op" : op, "readDest" : readDestCode }
1157 for reg in range(rCount):
1159 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1160 ''' % { "reg" : reg }
1161 iop = InstObjParams(name, Name,
1163 { "code": eWalkCode,
1165 "predicate_test": predicateTest }, [])
1166 header_output += NeonRegRegOpDeclare.subst(iop)
1167 exec_output += NeonEqualRegExecute.subst(iop)
1169 substDict = { "targs" : type,
1170 "class_name" : Name }
1171 exec_output += NeonExecDeclare.subst(substDict)
1173 def twoRegMiscScInst(name, Name, types, rCount, op, readDest=False):
1174 global header_output, exec_output
1175 eWalkCode = simdEnabledCheckCode + '''
1176 RegVect srcReg1, destReg;
1178 for reg in range(rCount):
1180 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1181 ''' % { "reg" : reg }
1184 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1185 ''' % { "reg" : reg }
1188 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1190 for (unsigned i = 0; i < eCount; i++) {
1191 Element srcElem1 = gtoh(srcReg1.elements[imm]);
1195 destReg.elements[i] = htog(destElem);
1197 ''' % { "op" : op, "readDest" : readDestCode }
1198 for reg in range(rCount):
1200 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1201 ''' % { "reg" : reg }
1202 iop = InstObjParams(name, Name,
1204 { "code": eWalkCode,
1206 "predicate_test": predicateTest }, [])
1207 header_output += NeonRegRegImmOpDeclare.subst(iop)
1208 exec_output += NeonEqualRegExecute.subst(iop)
1210 substDict = { "targs" : type,
1211 "class_name" : Name }
1212 exec_output += NeonExecDeclare.subst(substDict)
1214 def twoRegMiscScramble(name, Name, types, rCount, op, readDest=False):
1215 global header_output, exec_output
1216 eWalkCode = simdEnabledCheckCode + '''
1217 RegVect srcReg1, destReg;
1219 for reg in range(rCount):
1221 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1222 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1223 ''' % { "reg" : reg }
1226 ''' % { "reg" : reg }
1229 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1231 for reg in range(rCount):
1233 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1234 FpOp1P%(reg)d.uw = gtoh(srcReg1.regs[%(reg)d]);
1235 ''' % { "reg" : reg }
1236 iop = InstObjParams(name, Name,
1238 { "code": eWalkCode,
1240 "predicate_test": predicateTest }, [])
1241 header_output += NeonRegRegOpDeclare.subst(iop)
1242 exec_output += NeonEqualRegExecute.subst(iop)
1244 substDict = { "targs" : type,
1245 "class_name" : Name }
1246 exec_output += NeonExecDeclare.subst(substDict)
1248 def twoRegMiscInstFp(name, Name, types, rCount, op,
1249 readDest=False, toInt=False):
1250 global header_output, exec_output
1251 eWalkCode = simdEnabledCheckCode + '''
1252 typedef FloatReg FloatVect[rCount];
1256 eWalkCode += 'RegVect destRegs;\n'
1258 eWalkCode += 'FloatVect destRegs;\n'
1259 for reg in range(rCount):
1261 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1262 ''' % { "reg" : reg }
1266 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1267 ''' % { "reg" : reg }
1270 destRegs[%(reg)d] = FpDestP%(reg)d;
1271 ''' % { "reg" : reg }
1274 readDestCode = 'destReg = destRegs[i];'
1275 destType = 'FloatReg'
1276 writeDest = 'destRegs[r] = destReg;'
1278 destType = 'FloatRegBits'
1279 writeDest = 'destRegs.regs[r] = destReg;'
1281 for (unsigned r = 0; r < rCount; r++) {
1282 FloatReg srcReg1 = srcRegs1[r];
1283 %(destType)s destReg;
1289 "readDest" : readDestCode,
1290 "destType" : destType,
1291 "writeDest" : writeDest }
1292 for reg in range(rCount):
1295 FpDestP%(reg)d.uw = destRegs.regs[%(reg)d];
1296 ''' % { "reg" : reg }
1299 FpDestP%(reg)d = destRegs[%(reg)d];
1300 ''' % { "reg" : reg }
1301 iop = InstObjParams(name, Name,
1303 { "code": eWalkCode,
1305 "predicate_test": predicateTest }, [])
1306 header_output += NeonRegRegOpDeclare.subst(iop)
1307 exec_output += NeonEqualRegExecute.subst(iop)
1309 substDict = { "targs" : type,
1310 "class_name" : Name }
1311 exec_output += NeonExecDeclare.subst(substDict)
1313 def twoRegCondenseInst(name, Name, types, rCount, op, readDest=False):
1314 global header_output, exec_output
1315 eWalkCode = simdEnabledCheckCode + '''
1319 for reg in range(rCount):
1321 srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1322 ''' % { "reg" : reg }
1325 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1326 ''' % { "reg" : reg }
1329 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1331 for (unsigned i = 0; i < eCount / 2; i++) {
1332 Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1333 Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1334 BigElement destElem;
1337 destReg.elements[i] = htog(destElem);
1339 ''' % { "op" : op, "readDest" : readDestCode }
1340 for reg in range(rCount):
1342 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1343 ''' % { "reg" : reg }
1344 iop = InstObjParams(name, Name,
1346 { "code": eWalkCode,
1348 "predicate_test": predicateTest }, [])
1349 header_output += NeonRegRegOpDeclare.subst(iop)
1350 exec_output += NeonUnequalRegExecute.subst(iop)
1352 substDict = { "targs" : type,
1353 "class_name" : Name }
1354 exec_output += NeonExecDeclare.subst(substDict)
1356 def twoRegNarrowMiscInst(name, Name, types, op, readDest=False):
1357 global header_output, exec_output
1358 eWalkCode = simdEnabledCheckCode + '''
1362 for reg in range(4):
1364 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1365 ''' % { "reg" : reg }
1367 for reg in range(2):
1369 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1370 ''' % { "reg" : reg }
1373 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1375 for (unsigned i = 0; i < eCount; i++) {
1376 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1380 destReg.elements[i] = htog(destElem);
1382 ''' % { "op" : op, "readDest" : readDestCode }
1383 for reg in range(2):
1385 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1386 ''' % { "reg" : reg }
1387 iop = InstObjParams(name, Name,
1389 { "code": eWalkCode,
1391 "predicate_test": predicateTest }, [])
1392 header_output += NeonRegRegOpDeclare.subst(iop)
1393 exec_output += NeonUnequalRegExecute.subst(iop)
1395 substDict = { "targs" : type,
1396 "class_name" : Name }
1397 exec_output += NeonExecDeclare.subst(substDict)
1399 def oneRegImmInst(name, Name, types, rCount, op, readDest=False):
1400 global header_output, exec_output
1401 eWalkCode = simdEnabledCheckCode + '''
1405 for reg in range(rCount):
1407 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1408 ''' % { "reg" : reg }
1411 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1413 for (unsigned i = 0; i < eCount; i++) {
1417 destReg.elements[i] = htog(destElem);
1419 ''' % { "op" : op, "readDest" : readDestCode }
1420 for reg in range(rCount):
1422 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1423 ''' % { "reg" : reg }
1424 iop = InstObjParams(name, Name,
1426 { "code": eWalkCode,
1428 "predicate_test": predicateTest }, [])
1429 header_output += NeonRegImmOpDeclare.subst(iop)
1430 exec_output += NeonEqualRegExecute.subst(iop)
1432 substDict = { "targs" : type,
1433 "class_name" : Name }
1434 exec_output += NeonExecDeclare.subst(substDict)
1436 def twoRegLongMiscInst(name, Name, types, op, readDest=False):
1437 global header_output, exec_output
1438 eWalkCode = simdEnabledCheckCode + '''
1442 for reg in range(2):
1444 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1445 ''' % { "reg" : reg }
1447 for reg in range(4):
1449 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1450 ''' % { "reg" : reg }
1453 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1455 for (unsigned i = 0; i < eCount; i++) {
1456 Element srcElem1 = gtoh(srcReg1.elements[i]);
1457 BigElement destElem;
1460 destReg.elements[i] = htog(destElem);
1462 ''' % { "op" : op, "readDest" : readDestCode }
1463 for reg in range(4):
1465 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1466 ''' % { "reg" : reg }
1467 iop = InstObjParams(name, Name,
1469 { "code": eWalkCode,
1471 "predicate_test": predicateTest }, [])
1472 header_output += NeonRegRegOpDeclare.subst(iop)
1473 exec_output += NeonUnequalRegExecute.subst(iop)
1475 substDict = { "targs" : type,
1476 "class_name" : Name }
1477 exec_output += NeonExecDeclare.subst(substDict)
1481 (((unsigned)srcElem1 & 0x1) +
1482 ((unsigned)srcElem2 & 0x1)) >> 1;
1483 // Use division instead of a shift to ensure the sign extension works
1484 // right. The compiler will figure out if it can be a shift. Mask the
1485 // inputs so they get truncated correctly.
1486 destElem = (((srcElem1 & ~(Element)1) / 2) +
1487 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1489 threeEqualRegInst("vhadd", "VhaddD", allTypes, 2, vhaddCode)
1490 threeEqualRegInst("vhadd", "VhaddQ", allTypes, 4, vhaddCode)
1494 (((unsigned)srcElem1 & 0x1) +
1495 ((unsigned)srcElem2 & 0x1) + 1) >> 1;
1496 // Use division instead of a shift to ensure the sign extension works
1497 // right. The compiler will figure out if it can be a shift. Mask the
1498 // inputs so they get truncated correctly.
1499 destElem = (((srcElem1 & ~(Element)1) / 2) +
1500 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1502 threeEqualRegInst("vrhadd", "VrhaddD", allTypes, 2, vrhaddCode)
1503 threeEqualRegInst("vrhadd", "VrhaddQ", allTypes, 4, vrhaddCode)
1507 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
1508 // Use division instead of a shift to ensure the sign extension works
1509 // right. The compiler will figure out if it can be a shift. Mask the
1510 // inputs so they get truncated correctly.
1511 destElem = (((srcElem1 & ~(Element)1) / 2) -
1512 ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
1514 threeEqualRegInst("vhsub", "VhsubD", allTypes, 2, vhsubCode)
1515 threeEqualRegInst("vhsub", "VhsubQ", allTypes, 4, vhsubCode)
1518 destElem = srcElem1 & srcElem2;
1520 threeEqualRegInst("vand", "VandD", unsignedTypes, 2, vandCode)
1521 threeEqualRegInst("vand", "VandQ", unsignedTypes, 4, vandCode)
1524 destElem = srcElem1 & ~srcElem2;
1526 threeEqualRegInst("vbic", "VbicD", unsignedTypes, 2, vbicCode)
1527 threeEqualRegInst("vbic", "VbicQ", unsignedTypes, 4, vbicCode)
1530 destElem = srcElem1 | srcElem2;
1532 threeEqualRegInst("vorr", "VorrD", unsignedTypes, 2, vorrCode)
1533 threeEqualRegInst("vorr", "VorrQ", unsignedTypes, 4, vorrCode)
1535 threeEqualRegInst("vmov", "VmovD", unsignedTypes, 2, vorrCode)
1536 threeEqualRegInst("vmov", "VmovQ", unsignedTypes, 4, vorrCode)
1539 destElem = srcElem1 | ~srcElem2;
1541 threeEqualRegInst("vorn", "VornD", unsignedTypes, 2, vornCode)
1542 threeEqualRegInst("vorn", "VornQ", unsignedTypes, 4, vornCode)
1545 destElem = srcElem1 ^ srcElem2;
1547 threeEqualRegInst("veor", "VeorD", unsignedTypes, 2, veorCode)
1548 threeEqualRegInst("veor", "VeorQ", unsignedTypes, 4, veorCode)
1551 destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
1553 threeEqualRegInst("vbif", "VbifD", unsignedTypes, 2, vbifCode, True)
1554 threeEqualRegInst("vbif", "VbifQ", unsignedTypes, 4, vbifCode, True)
1556 destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
1558 threeEqualRegInst("vbit", "VbitD", unsignedTypes, 2, vbitCode, True)
1559 threeEqualRegInst("vbit", "VbitQ", unsignedTypes, 4, vbitCode, True)
1561 destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
1563 threeEqualRegInst("vbsl", "VbslD", unsignedTypes, 2, vbslCode, True)
1564 threeEqualRegInst("vbsl", "VbslQ", unsignedTypes, 4, vbslCode, True)
1567 destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
1569 threeEqualRegInst("vmax", "VmaxD", allTypes, 2, vmaxCode)
1570 threeEqualRegInst("vmax", "VmaxQ", allTypes, 4, vmaxCode)
1573 destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
1575 threeEqualRegInst("vmin", "VminD", allTypes, 2, vminCode)
1576 threeEqualRegInst("vmin", "VminQ", allTypes, 4, vminCode)
1579 destElem = srcElem1 + srcElem2;
1581 threeEqualRegInst("vadd", "NVaddD", unsignedTypes, 2, vaddCode)
1582 threeEqualRegInst("vadd", "NVaddQ", unsignedTypes, 4, vaddCode)
1584 threeEqualRegInst("vpadd", "NVpaddD", unsignedTypes,
1585 2, vaddCode, pairwise=True)
1586 threeEqualRegInst("vpadd", "NVpaddQ", unsignedTypes,
1587 4, vaddCode, pairwise=True)
1589 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
1591 threeRegLongInst("vaddl", "Vaddl", smallTypes, vaddlwCode)
1592 threeRegWideInst("vaddw", "Vaddw", smallTypes, vaddlwCode)
1594 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
1595 (sizeof(Element) * 8);
1597 threeRegNarrowInst("vaddhn", "Vaddhn", smallTypes, vaddhnCode)
1599 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
1600 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1601 (sizeof(Element) * 8);
1603 threeRegNarrowInst("vraddhn", "Vraddhn", smallTypes, vraddhnCode)
1606 destElem = srcElem1 - srcElem2;
1608 threeEqualRegInst("vsub", "NVsubD", unsignedTypes, 2, vsubCode)
1609 threeEqualRegInst("vsub", "NVsubQ", unsignedTypes, 4, vsubCode)
1611 destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
1613 threeRegLongInst("vsubl", "Vsubl", smallTypes, vsublwCode)
1614 threeRegWideInst("vsubw", "Vsubw", smallTypes, vsublwCode)
1617 destElem = srcElem1 + srcElem2;
1618 FPSCR fpscr = (FPSCR)Fpscr;
1619 if (destElem < srcElem1 || destElem < srcElem2) {
1620 destElem = (Element)(-1);
1625 threeEqualRegInst("vqadd", "VqaddUD", unsignedTypes, 2, vqaddUCode)
1626 threeEqualRegInst("vqadd", "VqaddUQ", unsignedTypes, 4, vqaddUCode)
1628 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
1629 (sizeof(Element) * 8);
1631 threeRegNarrowInst("vsubhn", "Vsubhn", smallTypes, vsubhnCode)
1633 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
1634 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1635 (sizeof(Element) * 8);
1637 threeRegNarrowInst("vrsubhn", "Vrsubhn", smallTypes, vrsubhnCode)
1640 destElem = srcElem1 + srcElem2;
1641 FPSCR fpscr = (FPSCR)Fpscr;
1642 bool negDest = (destElem < 0);
1643 bool negSrc1 = (srcElem1 < 0);
1644 bool negSrc2 = (srcElem2 < 0);
1645 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
1646 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1653 threeEqualRegInst("vqadd", "VqaddSD", signedTypes, 2, vqaddSCode)
1654 threeEqualRegInst("vqadd", "VqaddSQ", signedTypes, 4, vqaddSCode)
1657 destElem = srcElem1 - srcElem2;
1658 FPSCR fpscr = (FPSCR)Fpscr;
1659 if (destElem > srcElem1) {
1665 threeEqualRegInst("vqsub", "VqsubUD", unsignedTypes, 2, vqsubUCode)
1666 threeEqualRegInst("vqsub", "VqsubUQ", unsignedTypes, 4, vqsubUCode)
1669 destElem = srcElem1 - srcElem2;
1670 FPSCR fpscr = (FPSCR)Fpscr;
1671 bool negDest = (destElem < 0);
1672 bool negSrc1 = (srcElem1 < 0);
1673 bool posSrc2 = (srcElem2 >= 0);
1674 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
1675 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1682 threeEqualRegInst("vqsub", "VqsubSD", signedTypes, 2, vqsubSCode)
1683 threeEqualRegInst("vqsub", "VqsubSQ", signedTypes, 4, vqsubSCode)
1686 destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;
1688 threeEqualRegInst("vcgt", "VcgtD", allTypes, 2, vcgtCode)
1689 threeEqualRegInst("vcgt", "VcgtQ", allTypes, 4, vcgtCode)
1692 destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
1694 threeEqualRegInst("vcge", "VcgeD", allTypes, 2, vcgeCode)
1695 threeEqualRegInst("vcge", "VcgeQ", allTypes, 4, vcgeCode)
1698 destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;
1700 threeEqualRegInst("vceq", "VceqD", unsignedTypes, 2, vceqCode)
1701 threeEqualRegInst("vceq", "VceqQ", unsignedTypes, 4, vceqCode)
1704 int16_t shiftAmt = (int8_t)srcElem2;
1706 shiftAmt = -shiftAmt;
1707 if (shiftAmt >= sizeof(Element) * 8) {
1708 shiftAmt = sizeof(Element) * 8 - 1;
1711 destElem = (srcElem1 >> shiftAmt);
1713 // Make sure the right shift sign extended when it should.
1714 if (ltz(srcElem1) && !ltz(destElem)) {
1715 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1719 if (shiftAmt >= sizeof(Element) * 8) {
1722 destElem = srcElem1 << shiftAmt;
1726 threeEqualRegInst("vshl", "VshlD", allTypes, 2, vshlCode)
1727 threeEqualRegInst("vshl", "VshlQ", allTypes, 4, vshlCode)
1730 int16_t shiftAmt = (int8_t)srcElem2;
1732 shiftAmt = -shiftAmt;
1734 if (shiftAmt <= sizeof(Element) * 8)
1735 rBit = bits(srcElem1, shiftAmt - 1);
1736 if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
1738 if (shiftAmt >= sizeof(Element) * 8) {
1739 shiftAmt = sizeof(Element) * 8 - 1;
1742 destElem = (srcElem1 >> shiftAmt);
1744 // Make sure the right shift sign extended when it should.
1745 if (ltz(srcElem1) && !ltz(destElem)) {
1746 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1750 } else if (shiftAmt > 0) {
1751 if (shiftAmt >= sizeof(Element) * 8) {
1754 destElem = srcElem1 << shiftAmt;
1757 destElem = srcElem1;
1760 threeEqualRegInst("vrshl", "VrshlD", allTypes, 2, vrshlCode)
1761 threeEqualRegInst("vrshl", "VrshlQ", allTypes, 4, vrshlCode)
1764 int16_t shiftAmt = (int8_t)srcElem2;
1765 FPSCR fpscr = (FPSCR)Fpscr;
1767 shiftAmt = -shiftAmt;
1768 if (shiftAmt >= sizeof(Element) * 8) {
1769 shiftAmt = sizeof(Element) * 8 - 1;
1772 destElem = (srcElem1 >> shiftAmt);
1774 } else if (shiftAmt > 0) {
1775 if (shiftAmt >= sizeof(Element) * 8) {
1776 if (srcElem1 != 0) {
1777 destElem = mask(sizeof(Element) * 8);
1783 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1784 sizeof(Element) * 8 - shiftAmt)) {
1785 destElem = mask(sizeof(Element) * 8);
1788 destElem = srcElem1 << shiftAmt;
1792 destElem = srcElem1;
1796 threeEqualRegInst("vqshl", "VqshlUD", unsignedTypes, 2, vqshlUCode)
1797 threeEqualRegInst("vqshl", "VqshlUQ", unsignedTypes, 4, vqshlUCode)
1800 int16_t shiftAmt = (int8_t)srcElem2;
1801 FPSCR fpscr = (FPSCR)Fpscr;
1803 shiftAmt = -shiftAmt;
1804 if (shiftAmt >= sizeof(Element) * 8) {
1805 shiftAmt = sizeof(Element) * 8 - 1;
1808 destElem = (srcElem1 >> shiftAmt);
1810 // Make sure the right shift sign extended when it should.
1811 if (srcElem1 < 0 && destElem >= 0) {
1812 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1815 } else if (shiftAmt > 0) {
1817 if (shiftAmt >= sizeof(Element) * 8) {
1823 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1824 sizeof(Element) * 8 - 1 - shiftAmt) !=
1825 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1828 destElem = srcElem1 << shiftAmt;
1833 destElem = mask(sizeof(Element) * 8 - 1);
1835 destElem = ~destElem;
1838 destElem = srcElem1;
1842 threeEqualRegInst("vqshl", "VqshlSD", signedTypes, 2, vqshlSCode)
1843 threeEqualRegInst("vqshl", "VqshlSQ", signedTypes, 4, vqshlSCode)
1846 int16_t shiftAmt = (int8_t)srcElem2;
1847 FPSCR fpscr = (FPSCR)Fpscr;
1849 shiftAmt = -shiftAmt;
1851 if (shiftAmt <= sizeof(Element) * 8)
1852 rBit = bits(srcElem1, shiftAmt - 1);
1853 if (shiftAmt >= sizeof(Element) * 8) {
1854 shiftAmt = sizeof(Element) * 8 - 1;
1857 destElem = (srcElem1 >> shiftAmt);
1861 if (shiftAmt >= sizeof(Element) * 8) {
1862 if (srcElem1 != 0) {
1863 destElem = mask(sizeof(Element) * 8);
1869 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1870 sizeof(Element) * 8 - shiftAmt)) {
1871 destElem = mask(sizeof(Element) * 8);
1874 destElem = srcElem1 << shiftAmt;
1880 threeEqualRegInst("vqrshl", "VqrshlUD", unsignedTypes, 2, vqrshlUCode)
1881 threeEqualRegInst("vqrshl", "VqrshlUQ", unsignedTypes, 4, vqrshlUCode)
1884 int16_t shiftAmt = (int8_t)srcElem2;
1885 FPSCR fpscr = (FPSCR)Fpscr;
1887 shiftAmt = -shiftAmt;
1889 if (shiftAmt <= sizeof(Element) * 8)
1890 rBit = bits(srcElem1, shiftAmt - 1);
1891 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
1893 if (shiftAmt >= sizeof(Element) * 8) {
1894 shiftAmt = sizeof(Element) * 8 - 1;
1897 destElem = (srcElem1 >> shiftAmt);
1899 // Make sure the right shift sign extended when it should.
1900 if (srcElem1 < 0 && destElem >= 0) {
1901 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1905 } else if (shiftAmt > 0) {
1907 if (shiftAmt >= sizeof(Element) * 8) {
1913 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1914 sizeof(Element) * 8 - 1 - shiftAmt) !=
1915 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1918 destElem = srcElem1 << shiftAmt;
1923 destElem = mask(sizeof(Element) * 8 - 1);
1925 destElem = ~destElem;
1928 destElem = srcElem1;
1932 threeEqualRegInst("vqrshl", "VqrshlSD", signedTypes, 2, vqrshlSCode)
1933 threeEqualRegInst("vqrshl", "VqrshlSQ", signedTypes, 4, vqrshlSCode)
1936 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1937 (srcElem2 - srcElem1);
1939 threeEqualRegInst("vaba", "VabaD", allTypes, 2, vabaCode, True)
1940 threeEqualRegInst("vaba", "VabaQ", allTypes, 4, vabaCode, True)
1942 destElem += (srcElem1 > srcElem2) ?
1943 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1944 ((BigElement)srcElem2 - (BigElement)srcElem1);
1946 threeRegLongInst("vabal", "Vabal", smallTypes, vabalCode, True)
1949 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1950 (srcElem2 - srcElem1);
1952 threeEqualRegInst("vabd", "VabdD", allTypes, 2, vabdCode)
1953 threeEqualRegInst("vabd", "VabdQ", allTypes, 4, vabdCode)
1955 destElem = (srcElem1 > srcElem2) ?
1956 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1957 ((BigElement)srcElem2 - (BigElement)srcElem1);
1959 threeRegLongInst("vabdl", "Vabdl", smallTypes, vabdlCode)
1962 destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
1964 threeEqualRegInst("vtst", "VtstD", unsignedTypes, 2, vtstCode)
1965 threeEqualRegInst("vtst", "VtstQ", unsignedTypes, 4, vtstCode)
1968 destElem = srcElem1 * srcElem2;
1970 threeEqualRegInst("vmul", "NVmulD", allTypes, 2, vmulCode)
1971 threeEqualRegInst("vmul", "NVmulQ", allTypes, 4, vmulCode)
1973 destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
1975 threeRegLongInst("vmull", "Vmull", smallTypes, vmullCode)
1978 destElem = destElem + srcElem1 * srcElem2;
1980 threeEqualRegInst("vmla", "NVmlaD", allTypes, 2, vmlaCode, True)
1981 threeEqualRegInst("vmla", "NVmlaQ", allTypes, 4, vmlaCode, True)
1983 destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
1985 threeRegLongInst("vmlal", "Vmlal", smallTypes, vmlalCode, True)
1988 FPSCR fpscr = (FPSCR)Fpscr;
1989 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
1990 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
1991 Element halfNeg = maxNeg / 2;
1992 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
1993 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
1994 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
1995 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
1998 bool negPreDest = ltz(destElem);
1999 destElem += midElem;
2000 bool negDest = ltz(destElem);
2001 bool negMid = ltz(midElem);
2002 if (negPreDest == negMid && negMid != negDest) {
2003 destElem = mask(sizeof(BigElement) * 8 - 1);
2005 destElem = ~destElem;
2010 threeRegLongInst("vqdmlal", "Vqdmlal", smallTypes, vqdmlalCode, True)
2013 FPSCR fpscr = (FPSCR)Fpscr;
2014 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2015 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2016 Element halfNeg = maxNeg / 2;
2017 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2018 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2019 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2020 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2023 bool negPreDest = ltz(destElem);
2024 destElem -= midElem;
2025 bool negDest = ltz(destElem);
2026 bool posMid = ltz((BigElement)-midElem);
2027 if (negPreDest == posMid && posMid != negDest) {
2028 destElem = mask(sizeof(BigElement) * 8 - 1);
2030 destElem = ~destElem;
2035 threeRegLongInst("vqdmlsl", "Vqdmlsl", smallTypes, vqdmlslCode, True)
2038 FPSCR fpscr = (FPSCR)Fpscr;
2039 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2040 if (srcElem1 == srcElem2 &&
2041 srcElem1 == (Element)((Element)1 <<
2042 (Element)(sizeof(Element) * 8 - 1))) {
2043 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2048 threeRegLongInst("vqdmull", "Vqdmull", smallTypes, vqdmullCode)
2051 destElem = destElem - srcElem1 * srcElem2;
2053 threeEqualRegInst("vmls", "NVmlsD", allTypes, 2, vmlsCode, True)
2054 threeEqualRegInst("vmls", "NVmlsQ", allTypes, 4, vmlsCode, True)
2056 destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2058 threeRegLongInst("vmlsl", "Vmlsl", smallTypes, vmlslCode, True)
2062 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2063 if (bits(srcElem2, j))
2064 destElem ^= srcElem1 << j;
2067 threeEqualRegInst("vmul", "NVmulpD", unsignedTypes, 2, vmulpCode)
2068 threeEqualRegInst("vmul", "NVmulpQ", unsignedTypes, 4, vmulpCode)
2071 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2072 if (bits(srcElem2, j))
2073 destElem ^= (BigElement)srcElem1 << j;
2076 threeRegLongInst("vmull", "Vmullp", smallUnsignedTypes, vmullpCode)
2078 threeEqualRegInst("vpmax", "VpmaxD", allTypes, 2, vmaxCode, pairwise=True)
2079 threeEqualRegInst("vpmax", "VpmaxQ", allTypes, 4, vmaxCode, pairwise=True)
2081 threeEqualRegInst("vpmin", "VpminD", allTypes, 2, vminCode, pairwise=True)
2082 threeEqualRegInst("vpmin", "VpminQ", allTypes, 4, vminCode, pairwise=True)
2085 FPSCR fpscr = (FPSCR)Fpscr;
2086 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2087 (sizeof(Element) * 8);
2088 if (srcElem1 == srcElem2 &&
2089 srcElem1 == (Element)((Element)1 <<
2090 (sizeof(Element) * 8 - 1))) {
2091 destElem = ~srcElem1;
2096 threeEqualRegInst("vqdmulh", "VqdmulhD", smallSignedTypes, 2, vqdmulhCode)
2097 threeEqualRegInst("vqdmulh", "VqdmulhQ", smallSignedTypes, 4, vqdmulhCode)
2100 FPSCR fpscr = (FPSCR)Fpscr;
2101 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2102 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2103 (sizeof(Element) * 8);
2104 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2105 Element halfNeg = maxNeg / 2;
2106 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2107 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2108 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2110 destElem = mask(sizeof(Element) * 8 - 1);
2112 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2118 threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2119 smallSignedTypes, 2, vqrdmulhCode)
2120 threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2121 smallSignedTypes, 4, vqrdmulhCode)
2124 FPSCR fpscr = (FPSCR)Fpscr;
2126 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2128 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS,
2129 true, true, VfpRoundNearest);
2130 } else if (flushToZero(srcReg1, srcReg2)) {
2135 threeEqualRegInstFp("vmax", "VmaxDFp", ("float",), 2, vmaxfpCode)
2136 threeEqualRegInstFp("vmax", "VmaxQFp", ("float",), 4, vmaxfpCode)
2139 FPSCR fpscr = (FPSCR)Fpscr;
2141 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2143 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS,
2144 true, true, VfpRoundNearest);
2145 } else if (flushToZero(srcReg1, srcReg2)) {
2150 threeEqualRegInstFp("vmin", "VminDFp", ("float",), 2, vminfpCode)
2151 threeEqualRegInstFp("vmin", "VminQFp", ("float",), 4, vminfpCode)
2153 threeEqualRegInstFp("vpmax", "VpmaxDFp", ("float",),
2154 2, vmaxfpCode, pairwise=True)
2155 threeEqualRegInstFp("vpmax", "VpmaxQFp", ("float",),
2156 4, vmaxfpCode, pairwise=True)
2158 threeEqualRegInstFp("vpmin", "VpminDFp", ("float",),
2159 2, vminfpCode, pairwise=True)
2160 threeEqualRegInstFp("vpmin", "VpminQFp", ("float",),
2161 4, vminfpCode, pairwise=True)
2164 FPSCR fpscr = Fpscr;
2165 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2166 true, true, VfpRoundNearest);
2169 threeEqualRegInstFp("vadd", "VaddDFp", ("float",), 2, vaddfpCode)
2170 threeEqualRegInstFp("vadd", "VaddQFp", ("float",), 4, vaddfpCode)
2172 threeEqualRegInstFp("vpadd", "VpaddDFp", ("float",),
2173 2, vaddfpCode, pairwise=True)
2174 threeEqualRegInstFp("vpadd", "VpaddQFp", ("float",),
2175 4, vaddfpCode, pairwise=True)
2178 FPSCR fpscr = Fpscr;
2179 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2180 true, true, VfpRoundNearest);
2183 threeEqualRegInstFp("vsub", "VsubDFp", ("float",), 2, vsubfpCode)
2184 threeEqualRegInstFp("vsub", "VsubQFp", ("float",), 4, vsubfpCode)
2187 FPSCR fpscr = Fpscr;
2188 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2189 true, true, VfpRoundNearest);
2192 threeEqualRegInstFp("vmul", "NVmulDFp", ("float",), 2, vmulfpCode)
2193 threeEqualRegInstFp("vmul", "NVmulQFp", ("float",), 4, vmulfpCode)
2196 FPSCR fpscr = Fpscr;
2197 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2198 true, true, VfpRoundNearest);
2199 destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2200 true, true, VfpRoundNearest);
2203 threeEqualRegInstFp("vmla", "NVmlaDFp", ("float",), 2, vmlafpCode, True)
2204 threeEqualRegInstFp("vmla", "NVmlaQFp", ("float",), 4, vmlafpCode, True)
2207 FPSCR fpscr = Fpscr;
2208 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2209 true, true, VfpRoundNearest);
2210 destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2211 true, true, VfpRoundNearest);
2214 threeEqualRegInstFp("vmls", "NVmlsDFp", ("float",), 2, vmlsfpCode, True)
2215 threeEqualRegInstFp("vmls", "NVmlsQFp", ("float",), 4, vmlsfpCode, True)
2218 FPSCR fpscr = (FPSCR)Fpscr;
2219 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2220 true, true, VfpRoundNearest);
2221 destReg = (res == 0) ? -1 : 0;
2226 threeEqualRegInstFp("vcgt", "VcgtDFp", ("float",),
2227 2, vcgtfpCode, toInt = True)
2228 threeEqualRegInstFp("vcgt", "VcgtQFp", ("float",),
2229 4, vcgtfpCode, toInt = True)
2232 FPSCR fpscr = (FPSCR)Fpscr;
2233 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2234 true, true, VfpRoundNearest);
2235 destReg = (res == 0) ? -1 : 0;
2240 threeEqualRegInstFp("vcge", "VcgeDFp", ("float",),
2241 2, vcgefpCode, toInt = True)
2242 threeEqualRegInstFp("vcge", "VcgeQFp", ("float",),
2243 4, vcgefpCode, toInt = True)
2246 FPSCR fpscr = (FPSCR)Fpscr;
2247 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2248 true, true, VfpRoundNearest);
2249 destReg = (res == 0) ? -1 : 0;
2254 threeEqualRegInstFp("vacgt", "VacgtDFp", ("float",),
2255 2, vacgtfpCode, toInt = True)
2256 threeEqualRegInstFp("vacgt", "VacgtQFp", ("float",),
2257 4, vacgtfpCode, toInt = True)
2260 FPSCR fpscr = (FPSCR)Fpscr;
2261 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2262 true, true, VfpRoundNearest);
2263 destReg = (res == 0) ? -1 : 0;
2268 threeEqualRegInstFp("vacge", "VacgeDFp", ("float",),
2269 2, vacgefpCode, toInt = True)
2270 threeEqualRegInstFp("vacge", "VacgeQFp", ("float",),
2271 4, vacgefpCode, toInt = True)
2274 FPSCR fpscr = (FPSCR)Fpscr;
2275 float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2276 true, true, VfpRoundNearest);
2277 destReg = (res == 0) ? -1 : 0;
2282 threeEqualRegInstFp("vceq", "VceqDFp", ("float",),
2283 2, vceqfpCode, toInt = True)
2284 threeEqualRegInstFp("vceq", "VceqQFp", ("float",),
2285 4, vceqfpCode, toInt = True)
2288 FPSCR fpscr = Fpscr;
2289 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2290 true, true, VfpRoundNearest);
2293 threeEqualRegInstFp("vrecps", "VrecpsDFp", ("float",), 2, vrecpsCode)
2294 threeEqualRegInstFp("vrecps", "VrecpsQFp", ("float",), 4, vrecpsCode)
2297 FPSCR fpscr = Fpscr;
2298 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2299 true, true, VfpRoundNearest);
2302 threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", ("float",), 2, vrsqrtsCode)
2303 threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", ("float",), 4, vrsqrtsCode)
2306 FPSCR fpscr = Fpscr;
2307 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2308 true, true, VfpRoundNearest);
2309 destReg = fabs(mid);
2312 threeEqualRegInstFp("vabd", "VabdDFp", ("float",), 2, vabdfpCode)
2313 threeEqualRegInstFp("vabd", "VabdQFp", ("float",), 4, vabdfpCode)
2315 twoEqualRegInst("vmla", "VmlasD", unsignedTypes, 2, vmlaCode, True)
2316 twoEqualRegInst("vmla", "VmlasQ", unsignedTypes, 4, vmlaCode, True)
2317 twoEqualRegInstFp("vmla", "VmlasDFp", ("float",), 2, vmlafpCode, True)
2318 twoEqualRegInstFp("vmla", "VmlasQFp", ("float",), 4, vmlafpCode, True)
2319 twoRegLongInst("vmlal", "Vmlals", smallTypes, vmlalCode, True)
2321 twoEqualRegInst("vmls", "VmlssD", allTypes, 2, vmlsCode, True)
2322 twoEqualRegInst("vmls", "VmlssQ", allTypes, 4, vmlsCode, True)
2323 twoEqualRegInstFp("vmls", "VmlssDFp", ("float",), 2, vmlsfpCode, True)
2324 twoEqualRegInstFp("vmls", "VmlssQFp", ("float",), 4, vmlsfpCode, True)
2325 twoRegLongInst("vmlsl", "Vmlsls", smallTypes, vmlslCode, True)
2327 twoEqualRegInst("vmul", "VmulsD", allTypes, 2, vmulCode)
2328 twoEqualRegInst("vmul", "VmulsQ", allTypes, 4, vmulCode)
2329 twoEqualRegInstFp("vmul", "VmulsDFp", ("float",), 2, vmulfpCode)
2330 twoEqualRegInstFp("vmul", "VmulsQFp", ("float",), 4, vmulfpCode)
2331 twoRegLongInst("vmull", "Vmulls", smallTypes, vmullCode)
2333 twoRegLongInst("vqdmull", "Vqdmulls", smallTypes, vqdmullCode)
2334 twoRegLongInst("vqdmlal", "Vqdmlals", smallTypes, vqdmlalCode, True)
2335 twoRegLongInst("vqdmlsl", "Vqdmlsls", smallTypes, vqdmlslCode, True)
2336 twoEqualRegInst("vqdmulh", "VqdmulhsD", smallSignedTypes, 2, vqdmulhCode)
2337 twoEqualRegInst("vqdmulh", "VqdmulhsQ", smallSignedTypes, 4, vqdmulhCode)
2338 twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2339 smallSignedTypes, 2, vqrdmulhCode)
2340 twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2341 smallSignedTypes, 4, vqrdmulhCode)
2344 if (imm >= sizeof(srcElem1) * 8) {
2350 destElem = srcElem1 >> imm;
2353 twoRegShiftInst("vshr", "NVshrD", allTypes, 2, vshrCode)
2354 twoRegShiftInst("vshr", "NVshrQ", allTypes, 4, vshrCode)
2358 if (imm >= sizeof(srcElem1) * 8) {
2359 mid = ltz(srcElem1) ? -1 : 0;
2361 mid = srcElem1 >> imm;
2362 if (ltz(srcElem1) && !ltz(mid)) {
2363 mid |= -(mid & ((Element)1 <<
2364 (sizeof(Element) * 8 - 1 - imm)));
2369 twoRegShiftInst("vsra", "NVsraD", allTypes, 2, vsraCode, True)
2370 twoRegShiftInst("vsra", "NVsraQ", allTypes, 4, vsraCode, True)
2373 if (imm > sizeof(srcElem1) * 8) {
2376 Element rBit = bits(srcElem1, imm - 1);
2377 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2379 destElem = srcElem1;
2382 twoRegShiftInst("vrshr", "NVrshrD", allTypes, 2, vrshrCode)
2383 twoRegShiftInst("vrshr", "NVrshrQ", allTypes, 4, vrshrCode)
2386 if (imm > sizeof(srcElem1) * 8) {
2389 Element rBit = bits(srcElem1, imm - 1);
2390 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2392 destElem += srcElem1;
2395 twoRegShiftInst("vrsra", "NVrsraD", allTypes, 2, vrsraCode, True)
2396 twoRegShiftInst("vrsra", "NVrsraQ", allTypes, 4, vrsraCode, True)
2399 if (imm >= sizeof(Element) * 8)
2400 destElem = destElem;
2402 destElem = (srcElem1 >> imm) |
2403 (destElem & ~mask(sizeof(Element) * 8 - imm));
2405 twoRegShiftInst("vsri", "NVsriD", unsignedTypes, 2, vsriCode, True)
2406 twoRegShiftInst("vsri", "NVsriQ", unsignedTypes, 4, vsriCode, True)
2409 if (imm >= sizeof(Element) * 8)
2410 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2412 destElem = srcElem1 << imm;
2414 twoRegShiftInst("vshl", "NVshlD", unsignedTypes, 2, vshlCode)
2415 twoRegShiftInst("vshl", "NVshlQ", unsignedTypes, 4, vshlCode)
2418 if (imm >= sizeof(Element) * 8)
2419 destElem = destElem;
2421 destElem = (srcElem1 << imm) | (destElem & mask(imm));
2423 twoRegShiftInst("vsli", "NVsliD", unsignedTypes, 2, vsliCode, True)
2424 twoRegShiftInst("vsli", "NVsliQ", unsignedTypes, 4, vsliCode, True)
2427 FPSCR fpscr = (FPSCR)Fpscr;
2428 if (imm >= sizeof(Element) * 8) {
2429 if (srcElem1 != 0) {
2430 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2432 destElem = ~destElem;
2438 destElem = (srcElem1 << imm);
2439 uint64_t topBits = bits((uint64_t)srcElem1,
2440 sizeof(Element) * 8 - 1,
2441 sizeof(Element) * 8 - 1 - imm);
2442 if (topBits != 0 && topBits != mask(imm + 1)) {
2443 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2445 destElem = ~destElem;
2449 destElem = srcElem1;
2453 twoRegShiftInst("vqshl", "NVqshlD", signedTypes, 2, vqshlCode)
2454 twoRegShiftInst("vqshl", "NVqshlQ", signedTypes, 4, vqshlCode)
2457 FPSCR fpscr = (FPSCR)Fpscr;
2458 if (imm >= sizeof(Element) * 8) {
2459 if (srcElem1 != 0) {
2460 destElem = mask(sizeof(Element) * 8);
2466 destElem = (srcElem1 << imm);
2467 uint64_t topBits = bits((uint64_t)srcElem1,
2468 sizeof(Element) * 8 - 1,
2469 sizeof(Element) * 8 - imm);
2471 destElem = mask(sizeof(Element) * 8);
2475 destElem = srcElem1;
2479 twoRegShiftInst("vqshlu", "NVqshluD", unsignedTypes, 2, vqshluCode)
2480 twoRegShiftInst("vqshlu", "NVqshluQ", unsignedTypes, 4, vqshluCode)
2483 FPSCR fpscr = (FPSCR)Fpscr;
2484 if (imm >= sizeof(Element) * 8) {
2488 } else if (srcElem1 > 0) {
2489 destElem = mask(sizeof(Element) * 8);
2495 destElem = (srcElem1 << imm);
2496 uint64_t topBits = bits((uint64_t)srcElem1,
2497 sizeof(Element) * 8 - 1,
2498 sizeof(Element) * 8 - imm);
2502 } else if (topBits != 0) {
2503 destElem = mask(sizeof(Element) * 8);
2511 destElem = srcElem1;
2516 twoRegShiftInst("vqshlus", "NVqshlusD", signedTypes, 2, vqshlusCode)
2517 twoRegShiftInst("vqshlus", "NVqshlusQ", signedTypes, 4, vqshlusCode)
2520 if (imm >= sizeof(srcElem1) * 8) {
2523 destElem = srcElem1 >> imm;
2526 twoRegNarrowShiftInst("vshrn", "NVshrn", smallUnsignedTypes, vshrnCode)
2529 if (imm > sizeof(srcElem1) * 8) {
2532 Element rBit = bits(srcElem1, imm - 1);
2533 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2535 destElem = srcElem1;
2538 twoRegNarrowShiftInst("vrshrn", "NVrshrn", smallUnsignedTypes, vrshrnCode)
2541 FPSCR fpscr = (FPSCR)Fpscr;
2542 if (imm > sizeof(srcElem1) * 8) {
2543 if (srcElem1 != 0 && srcElem1 != -1)
2547 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2548 mid |= -(mid & ((BigElement)1 <<
2549 (sizeof(BigElement) * 8 - 1 - imm)));
2550 if (mid != (Element)mid) {
2551 destElem = mask(sizeof(Element) * 8 - 1);
2553 destElem = ~destElem;
2559 destElem = srcElem1;
2563 twoRegNarrowShiftInst("vqshrn", "NVqshrn", smallSignedTypes, vqshrnCode)
2566 FPSCR fpscr = (FPSCR)Fpscr;
2567 if (imm > sizeof(srcElem1) * 8) {
2572 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2573 if (mid != (Element)mid) {
2574 destElem = mask(sizeof(Element) * 8);
2580 destElem = srcElem1;
2584 twoRegNarrowShiftInst("vqshrun", "NVqshrun",
2585 smallUnsignedTypes, vqshrunCode)
2588 FPSCR fpscr = (FPSCR)Fpscr;
2589 if (imm > sizeof(srcElem1) * 8) {
2594 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2595 if (bits(mid, sizeof(BigElement) * 8 - 1,
2596 sizeof(Element) * 8) != 0) {
2600 destElem = mask(sizeof(Element) * 8);
2607 destElem = srcElem1;
2611 twoRegNarrowShiftInst("vqshrun", "NVqshruns",
2612 smallSignedTypes, vqshrunsCode)
2615 FPSCR fpscr = (FPSCR)Fpscr;
2616 if (imm > sizeof(srcElem1) * 8) {
2617 if (srcElem1 != 0 && srcElem1 != -1)
2621 BigElement mid = (srcElem1 >> (imm - 1));
2622 uint64_t rBit = mid & 0x1;
2624 mid |= -(mid & ((BigElement)1 <<
2625 (sizeof(BigElement) * 8 - 1 - imm)));
2627 if (mid != (Element)mid) {
2628 destElem = mask(sizeof(Element) * 8 - 1);
2630 destElem = ~destElem;
2636 if (srcElem1 != (Element)srcElem1) {
2637 destElem = mask(sizeof(Element) * 8 - 1);
2639 destElem = ~destElem;
2642 destElem = srcElem1;
2647 twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
2648 smallSignedTypes, vqrshrnCode)
2651 FPSCR fpscr = (FPSCR)Fpscr;
2652 if (imm > sizeof(srcElem1) * 8) {
2657 BigElement mid = (srcElem1 >> (imm - 1));
2658 uint64_t rBit = mid & 0x1;
2661 if (mid != (Element)mid) {
2662 destElem = mask(sizeof(Element) * 8);
2668 if (srcElem1 != (Element)srcElem1) {
2669 destElem = mask(sizeof(Element) * 8 - 1);
2672 destElem = srcElem1;
2677 twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
2678 smallUnsignedTypes, vqrshrunCode)
2681 FPSCR fpscr = (FPSCR)Fpscr;
2682 if (imm > sizeof(srcElem1) * 8) {
2687 BigElement mid = (srcElem1 >> (imm - 1));
2688 uint64_t rBit = mid & 0x1;
2690 mid |= -(mid & ((BigElement)1 <<
2691 (sizeof(BigElement) * 8 - 1 - imm)));
2693 if (bits(mid, sizeof(BigElement) * 8 - 1,
2694 sizeof(Element) * 8) != 0) {
2698 destElem = mask(sizeof(Element) * 8);
2709 destElem = srcElem1;
2714 twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
2715 smallSignedTypes, vqrshrunsCode)
2718 if (imm >= sizeof(destElem) * 8) {
2721 destElem = (BigElement)srcElem1 << imm;
2724 twoRegLongShiftInst("vshll", "NVshll", smallTypes, vshllCode)
2727 destElem = srcElem1;
2729 twoRegLongShiftInst("vmovl", "NVmovl", smallTypes, vmovlCode)
2732 FPSCR fpscr = Fpscr;
2733 if (flushToZero(srcElem1))
2735 VfpSavedState state = prepFpState(VfpRoundNearest);
2736 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2737 destReg = vfpFpSToFixed(srcElem1, false, false, imm);
2738 __asm__ __volatile__("" :: "m" (destReg));
2739 finishVfp(fpscr, state, true);
2742 twoRegShiftInst("vcvt", "NVcvt2ufxD", ("float",),
2743 2, vcvt2ufxCode, toInt = True)
2744 twoRegShiftInst("vcvt", "NVcvt2ufxQ", ("float",),
2745 4, vcvt2ufxCode, toInt = True)
2748 FPSCR fpscr = Fpscr;
2749 if (flushToZero(srcElem1))
2751 VfpSavedState state = prepFpState(VfpRoundNearest);
2752 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2753 destReg = vfpFpSToFixed(srcElem1, true, false, imm);
2754 __asm__ __volatile__("" :: "m" (destReg));
2755 finishVfp(fpscr, state, true);
2758 twoRegShiftInst("vcvt", "NVcvt2sfxD", ("float",),
2759 2, vcvt2sfxCode, toInt = True)
2760 twoRegShiftInst("vcvt", "NVcvt2sfxQ", ("float",),
2761 4, vcvt2sfxCode, toInt = True)
2764 FPSCR fpscr = Fpscr;
2765 VfpSavedState state = prepFpState(VfpRoundNearest);
2766 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2767 destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm);
2768 __asm__ __volatile__("" :: "m" (destElem));
2769 finishVfp(fpscr, state, true);
2772 twoRegShiftInst("vcvt", "NVcvtu2fpD", ("float",),
2773 2, vcvtu2fpCode, fromInt = True)
2774 twoRegShiftInst("vcvt", "NVcvtu2fpQ", ("float",),
2775 4, vcvtu2fpCode, fromInt = True)
2778 FPSCR fpscr = Fpscr;
2779 VfpSavedState state = prepFpState(VfpRoundNearest);
2780 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2781 destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm);
2782 __asm__ __volatile__("" :: "m" (destElem));
2783 finishVfp(fpscr, state, true);
2786 twoRegShiftInst("vcvt", "NVcvts2fpD", ("float",),
2787 2, vcvts2fpCode, fromInt = True)
2788 twoRegShiftInst("vcvt", "NVcvts2fpQ", ("float",),
2789 4, vcvts2fpCode, fromInt = True)
2792 FPSCR fpscr = Fpscr;
2793 float srcFp1 = bitsToFp(srcElem1, (float)0.0);
2794 if (flushToZero(srcFp1))
2796 VfpSavedState state = prepFpState(VfpRoundNearest);
2797 __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
2798 : "m" (srcFp1), "m" (destElem));
2799 destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
2801 __asm__ __volatile__("" :: "m" (destElem));
2802 finishVfp(fpscr, state, true);
2805 twoRegNarrowMiscInst("vcvt", "NVcvts2h", ("uint16_t",), vcvts2hCode)
2808 FPSCR fpscr = Fpscr;
2809 VfpSavedState state = prepFpState(VfpRoundNearest);
2810 __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
2811 : "m" (srcElem1), "m" (destElem));
2812 destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
2813 __asm__ __volatile__("" :: "m" (destElem));
2814 finishVfp(fpscr, state, true);
2817 twoRegLongMiscInst("vcvt", "NVcvth2s", ("uint16_t",), vcvth2sCode)
2820 destElem = unsignedRSqrtEstimate(srcElem1);
2822 twoRegMiscInst("vrsqrte", "NVrsqrteD", ("uint32_t",), 2, vrsqrteCode)
2823 twoRegMiscInst("vrsqrte", "NVrsqrteQ", ("uint32_t",), 4, vrsqrteCode)
2826 FPSCR fpscr = Fpscr;
2827 if (flushToZero(srcReg1))
2829 destReg = fprSqrtEstimate(fpscr, srcReg1);
2832 twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", ("float",), 2, vrsqrtefpCode)
2833 twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", ("float",), 4, vrsqrtefpCode)
2836 destElem = unsignedRecipEstimate(srcElem1);
2838 twoRegMiscInst("vrecpe", "NVrecpeD", ("uint32_t",), 2, vrecpeCode)
2839 twoRegMiscInst("vrecpe", "NVrecpeQ", ("uint32_t",), 4, vrecpeCode)
2842 FPSCR fpscr = Fpscr;
2843 if (flushToZero(srcReg1))
2845 destReg = fpRecipEstimate(fpscr, srcReg1);
2848 twoRegMiscInstFp("vrecpe", "NVrecpeDFp", ("float",), 2, vrecpefpCode)
2849 twoRegMiscInstFp("vrecpe", "NVrecpeQFp", ("float",), 4, vrecpefpCode)
2852 destElem = srcElem1;
2853 unsigned groupSize = ((1 << 1) / sizeof(Element));
2854 unsigned reverseMask = (groupSize - 1);
2855 j = i ^ reverseMask;
2857 twoRegMiscInst("vrev16", "NVrev16D", ("uint8_t",), 2, vrev16Code)
2858 twoRegMiscInst("vrev16", "NVrev16Q", ("uint8_t",), 4, vrev16Code)
2860 destElem = srcElem1;
2861 unsigned groupSize = ((1 << 2) / sizeof(Element));
2862 unsigned reverseMask = (groupSize - 1);
2863 j = i ^ reverseMask;
2865 twoRegMiscInst("vrev32", "NVrev32D",
2866 ("uint8_t", "uint16_t"), 2, vrev32Code)
2867 twoRegMiscInst("vrev32", "NVrev32Q",
2868 ("uint8_t", "uint16_t"), 4, vrev32Code)
2870 destElem = srcElem1;
2871 unsigned groupSize = ((1 << 3) / sizeof(Element));
2872 unsigned reverseMask = (groupSize - 1);
2873 j = i ^ reverseMask;
2875 twoRegMiscInst("vrev64", "NVrev64D", smallUnsignedTypes, 2, vrev64Code)
2876 twoRegMiscInst("vrev64", "NVrev64Q", smallUnsignedTypes, 4, vrev64Code)
2879 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2881 twoRegCondenseInst("vpaddl", "NVpaddlD", smallTypes, 2, vpaddlCode)
2882 twoRegCondenseInst("vpaddl", "NVpaddlQ", smallTypes, 4, vpaddlCode)
2885 destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
2887 twoRegCondenseInst("vpadal", "NVpadalD", smallTypes, 2, vpadalCode, True)
2888 twoRegCondenseInst("vpadal", "NVpadalQ", smallTypes, 4, vpadalCode, True)
2894 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
2900 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
2907 twoRegMiscInst("vcls", "NVclsD", signedTypes, 2, vclsCode)
2908 twoRegMiscInst("vcls", "NVclsQ", signedTypes, 4, vclsCode)
2912 while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
2918 twoRegMiscInst("vclz", "NVclzD", signedTypes, 2, vclzCode)
2919 twoRegMiscInst("vclz", "NVclzQ", signedTypes, 4, vclzCode)
2923 while (srcElem1 && count < sizeof(Element) * 8) {
2924 count += srcElem1 & 0x1;
2929 twoRegMiscInst("vcnt", "NVcntD", unsignedTypes, 2, vcntCode)
2930 twoRegMiscInst("vcnt", "NVcntQ", unsignedTypes, 4, vcntCode)
2933 destElem = ~srcElem1;
2935 twoRegMiscInst("vmvn", "NVmvnD", ("uint64_t",), 2, vmvnCode)
2936 twoRegMiscInst("vmvn", "NVmvnQ", ("uint64_t",), 4, vmvnCode)
2939 FPSCR fpscr = (FPSCR)Fpscr;
2940 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2942 destElem = ~srcElem1;
2943 } else if (srcElem1 < 0) {
2944 destElem = -srcElem1;
2946 destElem = srcElem1;
2950 twoRegMiscInst("vqabs", "NVqabsD", signedTypes, 2, vqabsCode)
2951 twoRegMiscInst("vqabs", "NVqabsQ", signedTypes, 4, vqabsCode)
2954 FPSCR fpscr = (FPSCR)Fpscr;
2955 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2957 destElem = ~srcElem1;
2959 destElem = -srcElem1;
2963 twoRegMiscInst("vqneg", "NVqnegD", signedTypes, 2, vqnegCode)
2964 twoRegMiscInst("vqneg", "NVqnegQ", signedTypes, 4, vqnegCode)
2968 destElem = -srcElem1;
2970 destElem = srcElem1;
2973 twoRegMiscInst("vabs", "NVabsD", signedTypes, 2, vabsCode)
2974 twoRegMiscInst("vabs", "NVabsQ", signedTypes, 4, vabsCode)
2981 cStruct.f = srcReg1;
2982 cStruct.i &= mask(sizeof(Element) * 8 - 1);
2983 destReg = cStruct.f;
2985 twoRegMiscInstFp("vabs", "NVabsDFp", ("float",), 2, vabsfpCode)
2986 twoRegMiscInstFp("vabs", "NVabsQFp", ("float",), 4, vabsfpCode)
2989 destElem = -srcElem1;
2991 twoRegMiscInst("vneg", "NVnegD", signedTypes, 2, vnegCode)
2992 twoRegMiscInst("vneg", "NVnegQ", signedTypes, 4, vnegCode)
2996 twoRegMiscInstFp("vneg", "NVnegDFp", ("float",), 2, vnegfpCode)
2997 twoRegMiscInstFp("vneg", "NVnegQFp", ("float",), 4, vnegfpCode)
2999 vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3000 twoRegMiscInst("vcgt", "NVcgtD", signedTypes, 2, vcgtCode)
3001 twoRegMiscInst("vcgt", "NVcgtQ", signedTypes, 4, vcgtCode)
3003 FPSCR fpscr = (FPSCR)Fpscr;
3004 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3005 true, true, VfpRoundNearest);
3006 destReg = (res == 0) ? -1 : 0;
3011 twoRegMiscInstFp("vcgt", "NVcgtDFp", ("float",),
3012 2, vcgtfpCode, toInt = True)
3013 twoRegMiscInstFp("vcgt", "NVcgtQFp", ("float",),
3014 4, vcgtfpCode, toInt = True)
3016 vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3017 twoRegMiscInst("vcge", "NVcgeD", signedTypes, 2, vcgeCode)
3018 twoRegMiscInst("vcge", "NVcgeQ", signedTypes, 4, vcgeCode)
3020 FPSCR fpscr = (FPSCR)Fpscr;
3021 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3022 true, true, VfpRoundNearest);
3023 destReg = (res == 0) ? -1 : 0;
3028 twoRegMiscInstFp("vcge", "NVcgeDFp", ("float",),
3029 2, vcgefpCode, toInt = True)
3030 twoRegMiscInstFp("vcge", "NVcgeQFp", ("float",),
3031 4, vcgefpCode, toInt = True)
3033 vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3034 twoRegMiscInst("vceq", "NVceqD", signedTypes, 2, vceqCode)
3035 twoRegMiscInst("vceq", "NVceqQ", signedTypes, 4, vceqCode)
3037 FPSCR fpscr = (FPSCR)Fpscr;
3038 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3039 true, true, VfpRoundNearest);
3040 destReg = (res == 0) ? -1 : 0;
3045 twoRegMiscInstFp("vceq", "NVceqDFp", ("float",),
3046 2, vceqfpCode, toInt = True)
3047 twoRegMiscInstFp("vceq", "NVceqQFp", ("float",),
3048 4, vceqfpCode, toInt = True)
3050 vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3051 twoRegMiscInst("vcle", "NVcleD", signedTypes, 2, vcleCode)
3052 twoRegMiscInst("vcle", "NVcleQ", signedTypes, 4, vcleCode)
3054 FPSCR fpscr = (FPSCR)Fpscr;
3055 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3056 true, true, VfpRoundNearest);
3057 destReg = (res == 0) ? -1 : 0;
3062 twoRegMiscInstFp("vcle", "NVcleDFp", ("float",),
3063 2, vclefpCode, toInt = True)
3064 twoRegMiscInstFp("vcle", "NVcleQFp", ("float",),
3065 4, vclefpCode, toInt = True)
3067 vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3068 twoRegMiscInst("vclt", "NVcltD", signedTypes, 2, vcltCode)
3069 twoRegMiscInst("vclt", "NVcltQ", signedTypes, 4, vcltCode)
3071 FPSCR fpscr = (FPSCR)Fpscr;
3072 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3073 true, true, VfpRoundNearest);
3074 destReg = (res == 0) ? -1 : 0;
3079 twoRegMiscInstFp("vclt", "NVcltDFp", ("float",),
3080 2, vcltfpCode, toInt = True)
3081 twoRegMiscInstFp("vclt", "NVcltQFp", ("float",),
3082 4, vcltfpCode, toInt = True)
3086 for (unsigned r = 0; r < rCount; r++) {
3087 mid = srcReg1.regs[r];
3088 srcReg1.regs[r] = destReg.regs[r];
3089 destReg.regs[r] = mid;
3092 twoRegMiscScramble("vswp", "NVswpD", ("uint64_t",), 2, vswpCode)
3093 twoRegMiscScramble("vswp", "NVswpQ", ("uint64_t",), 4, vswpCode)
3097 for (unsigned i = 0; i < eCount; i += 2) {
3098 mid = srcReg1.elements[i];
3099 srcReg1.elements[i] = destReg.elements[i + 1];
3100 destReg.elements[i + 1] = mid;
3103 twoRegMiscScramble("vtrn", "NVtrnD", unsignedTypes, 2, vtrnCode)
3104 twoRegMiscScramble("vtrn", "NVtrnQ", unsignedTypes, 4, vtrnCode)
3107 Element mid[eCount];
3108 memcpy(&mid, &srcReg1, sizeof(srcReg1));
3109 for (unsigned i = 0; i < eCount / 2; i++) {
3110 srcReg1.elements[i] = destReg.elements[2 * i + 1];
3111 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3112 destReg.elements[i] = destReg.elements[2 * i];
3114 for (unsigned i = 0; i < eCount / 2; i++) {
3115 destReg.elements[eCount / 2 + i] = mid[2 * i];
3118 twoRegMiscScramble("vuzp", "NVuzpD", unsignedTypes, 2, vuzpCode)
3119 twoRegMiscScramble("vuzp", "NVuzpQ", unsignedTypes, 4, vuzpCode)
3122 Element mid[eCount];
3123 memcpy(&mid, &destReg, sizeof(destReg));
3124 for (unsigned i = 0; i < eCount / 2; i++) {
3125 destReg.elements[2 * i] = mid[i];
3126 destReg.elements[2 * i + 1] = srcReg1.elements[i];
3128 for (int i = 0; i < eCount / 2; i++) {
3129 srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3130 srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3133 twoRegMiscScramble("vzip", "NVzipD", unsignedTypes, 2, vzipCode)
3134 twoRegMiscScramble("vzip", "NVzipQ", unsignedTypes, 4, vzipCode)
3136 vmovnCode = 'destElem = srcElem1;'
3137 twoRegNarrowMiscInst("vmovn", "NVmovn", smallUnsignedTypes, vmovnCode)
3139 vdupCode = 'destElem = srcElem1;'
3140 twoRegMiscScInst("vdup", "NVdupD", smallUnsignedTypes, 2, vdupCode)
3141 twoRegMiscScInst("vdup", "NVdupQ", smallUnsignedTypes, 4, vdupCode)
3143 def vdupGprInst(name, Name, types, rCount):
3144 global header_output, exec_output
3147 for (unsigned i = 0; i < eCount; i++) {
3148 destReg.elements[i] = htog((Element)Op1);
3151 for reg in range(rCount):
3153 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
3154 ''' % { "reg" : reg }
3155 iop = InstObjParams(name, Name,
3157 { "code": eWalkCode,
3159 "predicate_test": predicateTest }, [])
3160 header_output += NeonRegRegOpDeclare.subst(iop)
3161 exec_output += NeonEqualRegExecute.subst(iop)
3163 substDict = { "targs" : type,
3164 "class_name" : Name }
3165 exec_output += NeonExecDeclare.subst(substDict)
3166 vdupGprInst("vdup", "NVdupDGpr", smallUnsignedTypes, 2)
3167 vdupGprInst("vdup", "NVdupQGpr", smallUnsignedTypes, 4)
3169 vmovCode = 'destElem = imm;'
3170 oneRegImmInst("vmov", "NVmoviD", ("uint64_t",), 2, vmovCode)
3171 oneRegImmInst("vmov", "NVmoviQ", ("uint64_t",), 4, vmovCode)
3173 vorrCode = 'destElem |= imm;'
3174 oneRegImmInst("vorr", "NVorriD", ("uint64_t",), 2, vorrCode, True)
3175 oneRegImmInst("vorr", "NVorriQ", ("uint64_t",), 4, vorrCode, True)
3177 vmvnCode = 'destElem = ~imm;'
3178 oneRegImmInst("vmvn", "NVmvniD", ("uint64_t",), 2, vmvnCode)
3179 oneRegImmInst("vmvn", "NVmvniQ", ("uint64_t",), 4, vmvnCode)
3181 vbicCode = 'destElem &= ~imm;'
3182 oneRegImmInst("vbic", "NVbiciD", ("uint64_t",), 2, vbicCode, True)
3183 oneRegImmInst("vbic", "NVbiciQ", ("uint64_t",), 4, vbicCode, True)
3186 FPSCR fpscr = (FPSCR)Fpscr;
3187 destElem = srcElem1;
3188 if ((BigElement)destElem != srcElem1) {
3190 destElem = mask(sizeof(Element) * 8 - 1);
3192 destElem = ~destElem;
3196 twoRegNarrowMiscInst("vqmovn", "NVqmovn", smallSignedTypes, vqmovnCode)
3199 FPSCR fpscr = (FPSCR)Fpscr;
3200 destElem = srcElem1;
3201 if ((BigElement)destElem != srcElem1) {
3203 destElem = mask(sizeof(Element) * 8);
3207 twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3208 smallUnsignedTypes, vqmovunCode)
3211 FPSCR fpscr = (FPSCR)Fpscr;
3212 destElem = srcElem1;
3214 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3216 destElem = mask(sizeof(Element) * 8);
3218 destElem = ~destElem;
3222 twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3223 smallSignedTypes, vqmovunsCode)
3225 def buildVext(name, Name, types, rCount, op):
3226 global header_output, exec_output
3228 RegVect srcReg1, srcReg2, destReg;
3230 for reg in range(rCount):
3231 eWalkCode += simdEnabledCheckCode + '''
3232 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
3233 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
3234 ''' % { "reg" : reg }
3236 for reg in range(rCount):
3238 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
3239 ''' % { "reg" : reg }
3240 iop = InstObjParams(name, Name,
3242 { "code": eWalkCode,
3244 "predicate_test": predicateTest }, [])
3245 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3246 exec_output += NeonEqualRegExecute.subst(iop)
3248 substDict = { "targs" : type,
3249 "class_name" : Name }
3250 exec_output += NeonExecDeclare.subst(substDict)
3253 for (unsigned i = 0; i < eCount; i++) {
3254 unsigned index = i + imm;
3255 if (index < eCount) {
3256 destReg.elements[i] = srcReg1.elements[index];
3259 assert(index < eCount);
3260 destReg.elements[i] = srcReg2.elements[index];
3264 buildVext("vext", "NVextD", ("uint8_t",), 2, vextCode)
3265 buildVext("vext", "NVextQ", ("uint8_t",), 4, vextCode)
3267 def buildVtbxl(name, Name, length, isVtbl):
3268 global header_output, decoder_output, exec_output
3273 FloatRegBits regs[8];
3279 FloatRegBits regs[2];
3282 const unsigned length = %(length)d;
3283 const bool isVtbl = %(isVtbl)s;
3285 srcReg2.regs[0] = htog(FpOp2P0.uw);
3286 srcReg2.regs[1] = htog(FpOp2P1.uw);
3288 destReg.regs[0] = htog(FpDestP0.uw);
3289 destReg.regs[1] = htog(FpDestP1.uw);
3290 ''' % { "length" : length, "isVtbl" : isVtbl }
3291 for reg in range(8):
3292 if reg < length * 2:
3293 code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);\n' % \
3296 code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3298 for (unsigned i = 0; i < sizeof(destReg); i++) {
3299 uint8_t index = srcReg2.bytes[i];
3300 if (index < 8 * length) {
3301 destReg.bytes[i] = table.bytes[index];
3304 destReg.bytes[i] = 0;
3305 // else destReg.bytes[i] unchanged
3309 FpDestP0.uw = gtoh(destReg.regs[0]);
3310 FpDestP1.uw = gtoh(destReg.regs[1]);
3312 iop = InstObjParams(name, Name,
3315 "predicate_test": predicateTest }, [])
3316 header_output += RegRegRegOpDeclare.subst(iop)
3317 decoder_output += RegRegRegOpConstructor.subst(iop)
3318 exec_output += PredOpExecute.subst(iop)
3320 buildVtbxl("vtbl", "NVtbl1", 1, "true")
3321 buildVtbxl("vtbl", "NVtbl2", 2, "true")
3322 buildVtbxl("vtbl", "NVtbl3", 3, "true")
3323 buildVtbxl("vtbl", "NVtbl4", 4, "true")
3325 buildVtbxl("vtbx", "NVtbx1", 1, "false")
3326 buildVtbxl("vtbx", "NVtbx2", 2, "false")
3327 buildVtbxl("vtbx", "NVtbx3", 3, "false")
3328 buildVtbxl("vtbx", "NVtbx4", 4, "false")