3 // Copyright (c) 2010 ARM Limited
6 // The license below extends only to copyright in the software and shall
7 // not be construed as granting a license to any other intellectual
8 // property including but not limited to intellectual property relating
9 // to a hardware implementation of the functionality of the software
10 // licensed hereunder. You may use the software subject to the license
11 // terms below provided that you ensure that this notice is replicated
12 // unmodified and in its entirety in all distributions of the software,
13 // modified or unmodified, in source code or in binary form.
15 // Redistribution and use in source and binary forms, with or without
16 // modification, are permitted provided that the following conditions are
17 // met: redistributions of source code must retain the above copyright
18 // notice, this list of conditions and the following disclaimer;
19 // redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution;
22 // neither the name of the copyright holders nor the names of its
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
26 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 // Authors: Gabe Black
41 template <template <typename T> class Base>
43 decodeNeonUThreeUReg(unsigned size,
44 ExtMachInst machInst, IntRegIndex dest,
45 IntRegIndex op1, IntRegIndex op2)
49 return new Base<uint8_t>(machInst, dest, op1, op2);
51 return new Base<uint16_t>(machInst, dest, op1, op2);
53 return new Base<uint32_t>(machInst, dest, op1, op2);
55 return new Base<uint64_t>(machInst, dest, op1, op2);
57 return new Unknown(machInst);
61 template <template <typename T> class Base>
63 decodeNeonSThreeUReg(unsigned size,
64 ExtMachInst machInst, IntRegIndex dest,
65 IntRegIndex op1, IntRegIndex op2)
69 return new Base<int8_t>(machInst, dest, op1, op2);
71 return new Base<int16_t>(machInst, dest, op1, op2);
73 return new Base<int32_t>(machInst, dest, op1, op2);
75 return new Base<int64_t>(machInst, dest, op1, op2);
77 return new Unknown(machInst);
81 template <template <typename T> class Base>
83 decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84 ExtMachInst machInst, IntRegIndex dest,
85 IntRegIndex op1, IntRegIndex op2)
88 return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
90 return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
94 template <template <typename T> class Base>
96 decodeNeonUThreeUSReg(unsigned size,
97 ExtMachInst machInst, IntRegIndex dest,
98 IntRegIndex op1, IntRegIndex op2)
102 return new Base<uint8_t>(machInst, dest, op1, op2);
104 return new Base<uint16_t>(machInst, dest, op1, op2);
106 return new Base<uint32_t>(machInst, dest, op1, op2);
108 return new Unknown(machInst);
112 template <template <typename T> class Base>
114 decodeNeonSThreeUSReg(unsigned size,
115 ExtMachInst machInst, IntRegIndex dest,
116 IntRegIndex op1, IntRegIndex op2)
120 return new Base<int8_t>(machInst, dest, op1, op2);
122 return new Base<int16_t>(machInst, dest, op1, op2);
124 return new Base<int32_t>(machInst, dest, op1, op2);
126 return new Unknown(machInst);
130 template <template <typename T> class Base>
132 decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
133 ExtMachInst machInst, IntRegIndex dest,
134 IntRegIndex op1, IntRegIndex op2)
137 return decodeNeonUThreeUSReg<Base>(
138 size, machInst, dest, op1, op2);
140 return decodeNeonSThreeUSReg<Base>(
141 size, machInst, dest, op1, op2);
145 template <template <typename T> class BaseD,
146 template <typename T> class BaseQ>
148 decodeNeonUThreeSReg(bool q, unsigned size,
149 ExtMachInst machInst, IntRegIndex dest,
150 IntRegIndex op1, IntRegIndex op2)
153 return decodeNeonUThreeUSReg<BaseQ>(
154 size, machInst, dest, op1, op2);
156 return decodeNeonUThreeUSReg<BaseD>(
157 size, machInst, dest, op1, op2);
161 template <template <typename T> class BaseD,
162 template <typename T> class BaseQ>
164 decodeNeonSThreeSReg(bool q, unsigned size,
165 ExtMachInst machInst, IntRegIndex dest,
166 IntRegIndex op1, IntRegIndex op2)
169 return decodeNeonSThreeUSReg<BaseQ>(
170 size, machInst, dest, op1, op2);
172 return decodeNeonSThreeUSReg<BaseD>(
173 size, machInst, dest, op1, op2);
177 template <template <typename T> class BaseD,
178 template <typename T> class BaseQ>
180 decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
181 ExtMachInst machInst, IntRegIndex dest,
182 IntRegIndex op1, IntRegIndex op2)
185 return decodeNeonUThreeSReg<BaseD, BaseQ>(
186 q, size, machInst, dest, op1, op2);
188 return decodeNeonSThreeSReg<BaseD, BaseQ>(
189 q, size, machInst, dest, op1, op2);
193 template <template <typename T> class BaseD,
194 template <typename T> class BaseQ>
196 decodeNeonUThreeReg(bool q, unsigned size,
197 ExtMachInst machInst, IntRegIndex dest,
198 IntRegIndex op1, IntRegIndex op2)
201 return decodeNeonUThreeUReg<BaseQ>(
202 size, machInst, dest, op1, op2);
204 return decodeNeonUThreeUReg<BaseD>(
205 size, machInst, dest, op1, op2);
209 template <template <typename T> class BaseD,
210 template <typename T> class BaseQ>
212 decodeNeonSThreeReg(bool q, unsigned size,
213 ExtMachInst machInst, IntRegIndex dest,
214 IntRegIndex op1, IntRegIndex op2)
217 return decodeNeonSThreeUReg<BaseQ>(
218 size, machInst, dest, op1, op2);
220 return decodeNeonSThreeUReg<BaseD>(
221 size, machInst, dest, op1, op2);
225 template <template <typename T> class BaseD,
226 template <typename T> class BaseQ>
228 decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
229 ExtMachInst machInst, IntRegIndex dest,
230 IntRegIndex op1, IntRegIndex op2)
233 return decodeNeonUThreeReg<BaseD, BaseQ>(
234 q, size, machInst, dest, op1, op2);
236 return decodeNeonSThreeReg<BaseD, BaseQ>(
237 q, size, machInst, dest, op1, op2);
241 template <template <typename T> class BaseD,
242 template <typename T> class BaseQ>
244 decodeNeonUTwoShiftReg(bool q, unsigned size,
245 ExtMachInst machInst, IntRegIndex dest,
246 IntRegIndex op1, uint64_t imm)
251 return new BaseQ<uint8_t>(machInst, dest, op1, imm);
253 return new BaseQ<uint16_t>(machInst, dest, op1, imm);
255 return new BaseQ<uint32_t>(machInst, dest, op1, imm);
257 return new BaseQ<uint64_t>(machInst, dest, op1, imm);
259 return new Unknown(machInst);
264 return new BaseD<uint8_t>(machInst, dest, op1, imm);
266 return new BaseD<uint16_t>(machInst, dest, op1, imm);
268 return new BaseD<uint32_t>(machInst, dest, op1, imm);
270 return new BaseD<uint64_t>(machInst, dest, op1, imm);
272 return new Unknown(machInst);
277 template <template <typename T> class BaseD,
278 template <typename T> class BaseQ>
280 decodeNeonSTwoShiftReg(bool q, unsigned size,
281 ExtMachInst machInst, IntRegIndex dest,
282 IntRegIndex op1, uint64_t imm)
287 return new BaseQ<int8_t>(machInst, dest, op1, imm);
289 return new BaseQ<int16_t>(machInst, dest, op1, imm);
291 return new BaseQ<int32_t>(machInst, dest, op1, imm);
293 return new BaseQ<int64_t>(machInst, dest, op1, imm);
295 return new Unknown(machInst);
300 return new BaseD<int8_t>(machInst, dest, op1, imm);
302 return new BaseD<int16_t>(machInst, dest, op1, imm);
304 return new BaseD<int32_t>(machInst, dest, op1, imm);
306 return new BaseD<int64_t>(machInst, dest, op1, imm);
308 return new Unknown(machInst);
314 template <template <typename T> class BaseD,
315 template <typename T> class BaseQ>
317 decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
318 ExtMachInst machInst, IntRegIndex dest,
319 IntRegIndex op1, uint64_t imm)
322 return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
323 q, size, machInst, dest, op1, imm);
325 return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
326 q, size, machInst, dest, op1, imm);
330 template <template <typename T> class Base>
332 decodeNeonUTwoShiftUSReg(unsigned size,
333 ExtMachInst machInst, IntRegIndex dest,
334 IntRegIndex op1, uint64_t imm)
338 return new Base<uint8_t>(machInst, dest, op1, imm);
340 return new Base<uint16_t>(machInst, dest, op1, imm);
342 return new Base<uint32_t>(machInst, dest, op1, imm);
344 return new Unknown(machInst);
348 template <template <typename T> class BaseD,
349 template <typename T> class BaseQ>
351 decodeNeonUTwoShiftSReg(bool q, unsigned size,
352 ExtMachInst machInst, IntRegIndex dest,
353 IntRegIndex op1, uint64_t imm)
356 return decodeNeonUTwoShiftUSReg<BaseQ>(
357 size, machInst, dest, op1, imm);
359 return decodeNeonUTwoShiftUSReg<BaseD>(
360 size, machInst, dest, op1, imm);
364 template <template <typename T> class Base>
366 decodeNeonSTwoShiftUSReg(unsigned size,
367 ExtMachInst machInst, IntRegIndex dest,
368 IntRegIndex op1, uint64_t imm)
372 return new Base<int8_t>(machInst, dest, op1, imm);
374 return new Base<int16_t>(machInst, dest, op1, imm);
376 return new Base<int32_t>(machInst, dest, op1, imm);
378 return new Unknown(machInst);
382 template <template <typename T> class BaseD,
383 template <typename T> class BaseQ>
385 decodeNeonSTwoShiftSReg(bool q, unsigned size,
386 ExtMachInst machInst, IntRegIndex dest,
387 IntRegIndex op1, uint64_t imm)
390 return decodeNeonSTwoShiftUSReg<BaseQ>(
391 size, machInst, dest, op1, imm);
393 return decodeNeonSTwoShiftUSReg<BaseD>(
394 size, machInst, dest, op1, imm);
398 template <template <typename T> class BaseD,
399 template <typename T> class BaseQ>
401 decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
402 ExtMachInst machInst, IntRegIndex dest,
403 IntRegIndex op1, uint64_t imm)
406 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
407 q, size, machInst, dest, op1, imm);
409 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
410 q, size, machInst, dest, op1, imm);
414 template <template <typename T> class Base>
416 decodeNeonUTwoMiscUSReg(unsigned size,
417 ExtMachInst machInst, IntRegIndex dest,
422 return new Base<uint8_t>(machInst, dest, op1);
424 return new Base<uint16_t>(machInst, dest, op1);
426 return new Base<uint32_t>(machInst, dest, op1);
428 return new Unknown(machInst);
432 template <template <typename T> class Base>
434 decodeNeonSTwoMiscUSReg(unsigned size,
435 ExtMachInst machInst, IntRegIndex dest,
440 return new Base<int8_t>(machInst, dest, op1);
442 return new Base<int16_t>(machInst, dest, op1);
444 return new Base<int32_t>(machInst, dest, op1);
446 return new Unknown(machInst);
450 template <template <typename T> class BaseD,
451 template <typename T> class BaseQ>
453 decodeNeonUTwoMiscSReg(bool q, unsigned size,
454 ExtMachInst machInst, IntRegIndex dest,
458 return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
460 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
464 template <template <typename T> class BaseD,
465 template <typename T> class BaseQ>
467 decodeNeonSTwoMiscSReg(bool q, unsigned size,
468 ExtMachInst machInst, IntRegIndex dest,
472 return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
474 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
478 template <template <typename T> class Base>
480 decodeNeonUTwoMiscUReg(unsigned size,
481 ExtMachInst machInst, IntRegIndex dest,
486 return new Base<uint8_t>(machInst, dest, op1);
488 return new Base<uint16_t>(machInst, dest, op1);
490 return new Base<uint32_t>(machInst, dest, op1);
492 return new Base<uint64_t>(machInst, dest, op1);
494 return new Unknown(machInst);
498 template <template <typename T> class Base>
500 decodeNeonSTwoMiscUReg(unsigned size,
501 ExtMachInst machInst, IntRegIndex dest,
506 return new Base<int8_t>(machInst, dest, op1);
508 return new Base<int16_t>(machInst, dest, op1);
510 return new Base<int32_t>(machInst, dest, op1);
512 return new Base<int64_t>(machInst, dest, op1);
514 return new Unknown(machInst);
518 template <template <typename T> class BaseD,
519 template <typename T> class BaseQ>
521 decodeNeonSTwoMiscReg(bool q, unsigned size,
522 ExtMachInst machInst, IntRegIndex dest,
526 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
528 return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
532 template <template <typename T> class BaseD,
533 template <typename T> class BaseQ>
535 decodeNeonUTwoMiscReg(bool q, unsigned size,
536 ExtMachInst machInst, IntRegIndex dest,
540 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
542 return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
546 template <template <typename T> class BaseD,
547 template <typename T> class BaseQ>
549 decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
550 ExtMachInst machInst, IntRegIndex dest,
554 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
555 q, size, machInst, dest, op1);
557 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
558 q, size, machInst, dest, op1);
566 vcgtFunc(float op1, float op2)
568 if (isSnan(op1) || isSnan(op2))
570 return (op1 > op2) ? 0.0 : 1.0;
574 vcgeFunc(float op1, float op2)
576 if (isSnan(op1) || isSnan(op2))
578 return (op1 >= op2) ? 0.0 : 1.0;
582 vceqFunc(float op1, float op2)
584 if (isSnan(op1) || isSnan(op2))
586 return (op1 == op2) ? 0.0 : 1.0;
590 vcleFunc(float op1, float op2)
592 if (isSnan(op1) || isSnan(op2))
594 return (op1 <= op2) ? 0.0 : 1.0;
598 vcltFunc(float op1, float op2)
600 if (isSnan(op1) || isSnan(op2))
602 return (op1 < op2) ? 0.0 : 1.0;
606 vacgtFunc(float op1, float op2)
608 if (isSnan(op1) || isSnan(op2))
610 return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
614 vacgeFunc(float op1, float op2)
616 if (isSnan(op1) || isSnan(op2))
618 return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
627 smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
628 unsignedTypes = smallUnsignedTypes + ("uint64_t",)
629 smallSignedTypes = ("int8_t", "int16_t", "int32_t")
630 signedTypes = smallSignedTypes + ("int64_t",)
631 smallTypes = smallUnsignedTypes + smallSignedTypes
632 allTypes = unsignedTypes + signedTypes
634 def threeEqualRegInst(name, Name, opClass, types, rCount, op,
635 readDest=False, pairwise=False):
636 global header_output, exec_output
637 eWalkCode = simdEnabledCheckCode + '''
638 RegVect srcReg1, srcReg2, destReg;
640 for reg in range(rCount):
642 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
643 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
644 ''' % { "reg" : reg }
647 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
648 ''' % { "reg" : reg }
651 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
654 for (unsigned i = 0; i < eCount; i++) {
655 Element srcElem1 = gtoh(2 * i < eCount ?
656 srcReg1.elements[2 * i] :
657 srcReg2.elements[2 * i - eCount]);
658 Element srcElem2 = gtoh(2 * i < eCount ?
659 srcReg1.elements[2 * i + 1] :
660 srcReg2.elements[2 * i + 1 - eCount]);
664 destReg.elements[i] = htog(destElem);
666 ''' % { "op" : op, "readDest" : readDestCode }
669 for (unsigned i = 0; i < eCount; i++) {
670 Element srcElem1 = gtoh(srcReg1.elements[i]);
671 Element srcElem2 = gtoh(srcReg2.elements[i]);
675 destReg.elements[i] = htog(destElem);
677 ''' % { "op" : op, "readDest" : readDestCode }
678 for reg in range(rCount):
680 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
681 ''' % { "reg" : reg }
682 iop = InstObjParams(name, Name,
686 "predicate_test": predicateTest,
687 "op_class": opClass }, [])
688 header_output += NeonRegRegRegOpDeclare.subst(iop)
689 exec_output += NeonEqualRegExecute.subst(iop)
691 substDict = { "targs" : type,
692 "class_name" : Name }
693 exec_output += NeonExecDeclare.subst(substDict)
695 def threeEqualRegInstFp(name, Name, opClass, types, rCount, op,
696 readDest=False, pairwise=False, toInt=False):
697 global header_output, exec_output
698 eWalkCode = simdEnabledCheckCode + '''
699 typedef FloatReg FloatVect[rCount];
700 FloatVect srcRegs1, srcRegs2;
703 eWalkCode += 'RegVect destRegs;\n'
705 eWalkCode += 'FloatVect destRegs;\n'
706 for reg in range(rCount):
708 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
709 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
710 ''' % { "reg" : reg }
714 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
715 ''' % { "reg" : reg }
718 destRegs[%(reg)d] = FpDestP%(reg)d;
719 ''' % { "reg" : reg }
722 readDestCode = 'destReg = destRegs[r];'
723 destType = 'FloatReg'
724 writeDest = 'destRegs[r] = destReg;'
726 destType = 'FloatRegBits'
727 writeDest = 'destRegs.regs[r] = destReg;'
730 for (unsigned r = 0; r < rCount; r++) {
731 FloatReg srcReg1 = (2 * r < rCount) ?
732 srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
733 FloatReg srcReg2 = (2 * r < rCount) ?
734 srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
735 %(destType)s destReg;
741 "readDest" : readDestCode,
742 "destType" : destType,
743 "writeDest" : writeDest }
746 for (unsigned r = 0; r < rCount; r++) {
747 FloatReg srcReg1 = srcRegs1[r];
748 FloatReg srcReg2 = srcRegs2[r];
749 %(destType)s destReg;
755 "readDest" : readDestCode,
756 "destType" : destType,
757 "writeDest" : writeDest }
758 for reg in range(rCount):
761 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
762 ''' % { "reg" : reg }
765 FpDestP%(reg)d = destRegs[%(reg)d];
766 ''' % { "reg" : reg }
767 iop = InstObjParams(name, Name,
771 "predicate_test": predicateTest,
772 "op_class": opClass }, [])
773 header_output += NeonRegRegRegOpDeclare.subst(iop)
774 exec_output += NeonEqualRegExecute.subst(iop)
776 substDict = { "targs" : type,
777 "class_name" : Name }
778 exec_output += NeonExecDeclare.subst(substDict)
780 def threeUnequalRegInst(name, Name, opClass, types, op,
781 bigSrc1, bigSrc2, bigDest, readDest):
782 global header_output, exec_output
783 src1Cnt = src2Cnt = destCnt = 2
784 src1Prefix = src2Prefix = destPrefix = ''
794 eWalkCode = simdEnabledCheckCode + '''
798 ''' % (src1Prefix, src2Prefix, destPrefix)
799 for reg in range(src1Cnt):
801 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
802 ''' % { "reg" : reg }
803 for reg in range(src2Cnt):
805 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
806 ''' % { "reg" : reg }
808 for reg in range(destCnt):
810 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
811 ''' % { "reg" : reg }
814 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
816 for (unsigned i = 0; i < eCount; i++) {
817 %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
818 %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
819 %(destPrefix)sElement destElem;
822 destReg.elements[i] = htog(destElem);
824 ''' % { "op" : op, "readDest" : readDestCode,
825 "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
826 "destPrefix" : destPrefix }
827 for reg in range(destCnt):
829 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
830 ''' % { "reg" : reg }
831 iop = InstObjParams(name, Name,
835 "predicate_test": predicateTest,
836 "op_class": opClass }, [])
837 header_output += NeonRegRegRegOpDeclare.subst(iop)
838 exec_output += NeonUnequalRegExecute.subst(iop)
840 substDict = { "targs" : type,
841 "class_name" : Name }
842 exec_output += NeonExecDeclare.subst(substDict)
844 def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False):
845 threeUnequalRegInst(name, Name, opClass, types, op,
846 True, True, False, readDest)
848 def threeRegLongInst(name, Name, opClass, types, op, readDest=False):
849 threeUnequalRegInst(name, Name, opClass, types, op,
850 False, False, True, readDest)
852 def threeRegWideInst(name, Name, opClass, types, op, readDest=False):
853 threeUnequalRegInst(name, Name, opClass, types, op,
854 True, False, True, readDest)
856 def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
857 global header_output, exec_output
858 eWalkCode = simdEnabledCheckCode + '''
859 RegVect srcReg1, srcReg2, destReg;
861 for reg in range(rCount):
863 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
864 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
865 ''' % { "reg" : reg }
868 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
869 ''' % { "reg" : reg }
872 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
874 if (imm < 0 && imm >= eCount) {
876 fault = new UndefinedInstruction;
878 fault = new UndefinedInstruction(false, mnemonic);
880 for (unsigned i = 0; i < eCount; i++) {
881 Element srcElem1 = gtoh(srcReg1.elements[i]);
882 Element srcElem2 = gtoh(srcReg2.elements[imm]);
886 destReg.elements[i] = htog(destElem);
889 ''' % { "op" : op, "readDest" : readDestCode }
890 for reg in range(rCount):
892 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
893 ''' % { "reg" : reg }
894 iop = InstObjParams(name, Name,
898 "predicate_test": predicateTest,
899 "op_class": opClass }, [])
900 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
901 exec_output += NeonEqualRegExecute.subst(iop)
903 substDict = { "targs" : type,
904 "class_name" : Name }
905 exec_output += NeonExecDeclare.subst(substDict)
907 def twoRegLongInst(name, Name, opClass, types, op, readDest=False):
908 global header_output, exec_output
910 eWalkCode = simdEnabledCheckCode + '''
911 RegVect srcReg1, srcReg2;
914 for reg in range(rCount):
916 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
917 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);;
918 ''' % { "reg" : reg }
920 for reg in range(2 * rCount):
922 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
923 ''' % { "reg" : reg }
926 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
928 if (imm < 0 && imm >= eCount) {
930 fault = new UndefinedInstruction;
932 fault = new UndefinedInstruction(false, mnemonic);
934 for (unsigned i = 0; i < eCount; i++) {
935 Element srcElem1 = gtoh(srcReg1.elements[i]);
936 Element srcElem2 = gtoh(srcReg2.elements[imm]);
940 destReg.elements[i] = htog(destElem);
943 ''' % { "op" : op, "readDest" : readDestCode }
944 for reg in range(2 * rCount):
946 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
947 ''' % { "reg" : reg }
948 iop = InstObjParams(name, Name,
952 "predicate_test": predicateTest,
953 "op_class": opClass }, [])
954 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
955 exec_output += NeonUnequalRegExecute.subst(iop)
957 substDict = { "targs" : type,
958 "class_name" : Name }
959 exec_output += NeonExecDeclare.subst(substDict)
961 def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False):
962 global header_output, exec_output
963 eWalkCode = simdEnabledCheckCode + '''
964 typedef FloatReg FloatVect[rCount];
965 FloatVect srcRegs1, srcRegs2, destRegs;
967 for reg in range(rCount):
969 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
970 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
971 ''' % { "reg" : reg }
974 destRegs[%(reg)d] = FpDestP%(reg)d;
975 ''' % { "reg" : reg }
978 readDestCode = 'destReg = destRegs[i];'
980 if (imm < 0 && imm >= eCount) {
982 fault = new UndefinedInstruction;
984 fault = new UndefinedInstruction(false, mnemonic);
986 for (unsigned i = 0; i < rCount; i++) {
987 FloatReg srcReg1 = srcRegs1[i];
988 FloatReg srcReg2 = srcRegs2[imm];
992 destRegs[i] = destReg;
995 ''' % { "op" : op, "readDest" : readDestCode }
996 for reg in range(rCount):
998 FpDestP%(reg)d = destRegs[%(reg)d];
999 ''' % { "reg" : reg }
1000 iop = InstObjParams(name, Name,
1002 { "code": eWalkCode,
1004 "predicate_test": predicateTest,
1005 "op_class": opClass }, [])
1006 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1007 exec_output += NeonEqualRegExecute.subst(iop)
1009 substDict = { "targs" : type,
1010 "class_name" : Name }
1011 exec_output += NeonExecDeclare.subst(substDict)
1013 def twoRegShiftInst(name, Name, opClass, types, rCount, op,
1014 readDest=False, toInt=False, fromInt=False):
1015 global header_output, exec_output
1016 eWalkCode = simdEnabledCheckCode + '''
1017 RegVect srcRegs1, destRegs;
1019 for reg in range(rCount):
1021 srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1022 ''' % { "reg" : reg }
1025 destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1026 ''' % { "reg" : reg }
1029 readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1031 readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1032 readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1034 readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);'
1035 declDest = 'Element destElem;'
1036 writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1038 declDest = 'FloatRegBits destReg;'
1039 writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1041 for (unsigned i = 0; i < eCount; i++) {
1048 ''' % { "readOp" : readOpCode,
1049 "declDest" : declDest,
1050 "readDest" : readDestCode,
1052 "writeDest" : writeDestCode }
1053 for reg in range(rCount):
1055 FpDestP%(reg)d_uw = gtoh(destRegs.regs[%(reg)d]);
1056 ''' % { "reg" : reg }
1057 iop = InstObjParams(name, Name,
1059 { "code": eWalkCode,
1061 "predicate_test": predicateTest,
1062 "op_class": opClass }, [])
1063 header_output += NeonRegRegImmOpDeclare.subst(iop)
1064 exec_output += NeonEqualRegExecute.subst(iop)
1066 substDict = { "targs" : type,
1067 "class_name" : Name }
1068 exec_output += NeonExecDeclare.subst(substDict)
1070 def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False):
1071 global header_output, exec_output
1072 eWalkCode = simdEnabledCheckCode + '''
1076 for reg in range(4):
1078 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1079 ''' % { "reg" : reg }
1081 for reg in range(2):
1083 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1084 ''' % { "reg" : reg }
1087 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1089 for (unsigned i = 0; i < eCount; i++) {
1090 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1094 destReg.elements[i] = htog(destElem);
1096 ''' % { "op" : op, "readDest" : readDestCode }
1097 for reg in range(2):
1099 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1100 ''' % { "reg" : reg }
1101 iop = InstObjParams(name, Name,
1103 { "code": eWalkCode,
1105 "predicate_test": predicateTest,
1106 "op_class": opClass }, [])
1107 header_output += NeonRegRegImmOpDeclare.subst(iop)
1108 exec_output += NeonUnequalRegExecute.subst(iop)
1110 substDict = { "targs" : type,
1111 "class_name" : Name }
1112 exec_output += NeonExecDeclare.subst(substDict)
1114 def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False):
1115 global header_output, exec_output
1116 eWalkCode = simdEnabledCheckCode + '''
1120 for reg in range(2):
1122 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1123 ''' % { "reg" : reg }
1125 for reg in range(4):
1127 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1128 ''' % { "reg" : reg }
1131 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1133 for (unsigned i = 0; i < eCount; i++) {
1134 Element srcElem1 = gtoh(srcReg1.elements[i]);
1135 BigElement destElem;
1138 destReg.elements[i] = htog(destElem);
1140 ''' % { "op" : op, "readDest" : readDestCode }
1141 for reg in range(4):
1143 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1144 ''' % { "reg" : reg }
1145 iop = InstObjParams(name, Name,
1147 { "code": eWalkCode,
1149 "predicate_test": predicateTest,
1150 "op_class": opClass }, [])
1151 header_output += NeonRegRegImmOpDeclare.subst(iop)
1152 exec_output += NeonUnequalRegExecute.subst(iop)
1154 substDict = { "targs" : type,
1155 "class_name" : Name }
1156 exec_output += NeonExecDeclare.subst(substDict)
1158 def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False):
1159 global header_output, exec_output
1160 eWalkCode = simdEnabledCheckCode + '''
1161 RegVect srcReg1, destReg;
1163 for reg in range(rCount):
1165 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1166 ''' % { "reg" : reg }
1169 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1170 ''' % { "reg" : reg }
1173 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1175 for (unsigned i = 0; i < eCount; i++) {
1177 Element srcElem1 = gtoh(srcReg1.elements[i]);
1181 destReg.elements[j] = htog(destElem);
1183 ''' % { "op" : op, "readDest" : readDestCode }
1184 for reg in range(rCount):
1186 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1187 ''' % { "reg" : reg }
1188 iop = InstObjParams(name, Name,
1190 { "code": eWalkCode,
1192 "predicate_test": predicateTest,
1193 "op_class": opClass }, [])
1194 header_output += NeonRegRegOpDeclare.subst(iop)
1195 exec_output += NeonEqualRegExecute.subst(iop)
1197 substDict = { "targs" : type,
1198 "class_name" : Name }
1199 exec_output += NeonExecDeclare.subst(substDict)
1201 def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False):
1202 global header_output, exec_output
1203 eWalkCode = simdEnabledCheckCode + '''
1204 RegVect srcReg1, destReg;
1206 for reg in range(rCount):
1208 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1209 ''' % { "reg" : reg }
1212 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1213 ''' % { "reg" : reg }
1216 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1218 for (unsigned i = 0; i < eCount; i++) {
1219 Element srcElem1 = gtoh(srcReg1.elements[imm]);
1223 destReg.elements[i] = htog(destElem);
1225 ''' % { "op" : op, "readDest" : readDestCode }
1226 for reg in range(rCount):
1228 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1229 ''' % { "reg" : reg }
1230 iop = InstObjParams(name, Name,
1232 { "code": eWalkCode,
1234 "predicate_test": predicateTest,
1235 "op_class": opClass }, [])
1236 header_output += NeonRegRegImmOpDeclare.subst(iop)
1237 exec_output += NeonEqualRegExecute.subst(iop)
1239 substDict = { "targs" : type,
1240 "class_name" : Name }
1241 exec_output += NeonExecDeclare.subst(substDict)
1243 def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False):
1244 global header_output, exec_output
1245 eWalkCode = simdEnabledCheckCode + '''
1246 RegVect srcReg1, destReg;
1248 for reg in range(rCount):
1250 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1251 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1252 ''' % { "reg" : reg }
1255 ''' % { "reg" : reg }
1258 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1260 for reg in range(rCount):
1262 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1263 FpOp1P%(reg)d_uw = gtoh(srcReg1.regs[%(reg)d]);
1264 ''' % { "reg" : reg }
1265 iop = InstObjParams(name, Name,
1267 { "code": eWalkCode,
1269 "predicate_test": predicateTest,
1270 "op_class": opClass }, [])
1271 header_output += NeonRegRegOpDeclare.subst(iop)
1272 exec_output += NeonEqualRegExecute.subst(iop)
1274 substDict = { "targs" : type,
1275 "class_name" : Name }
1276 exec_output += NeonExecDeclare.subst(substDict)
1278 def twoRegMiscInstFp(name, Name, opClass, types, rCount, op,
1279 readDest=False, toInt=False):
1280 global header_output, exec_output
1281 eWalkCode = simdEnabledCheckCode + '''
1282 typedef FloatReg FloatVect[rCount];
1286 eWalkCode += 'RegVect destRegs;\n'
1288 eWalkCode += 'FloatVect destRegs;\n'
1289 for reg in range(rCount):
1291 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1292 ''' % { "reg" : reg }
1296 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1297 ''' % { "reg" : reg }
1300 destRegs[%(reg)d] = FpDestP%(reg)d;
1301 ''' % { "reg" : reg }
1304 readDestCode = 'destReg = destRegs[i];'
1305 destType = 'FloatReg'
1306 writeDest = 'destRegs[r] = destReg;'
1308 destType = 'FloatRegBits'
1309 writeDest = 'destRegs.regs[r] = destReg;'
1311 for (unsigned r = 0; r < rCount; r++) {
1312 FloatReg srcReg1 = srcRegs1[r];
1313 %(destType)s destReg;
1319 "readDest" : readDestCode,
1320 "destType" : destType,
1321 "writeDest" : writeDest }
1322 for reg in range(rCount):
1325 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1326 ''' % { "reg" : reg }
1329 FpDestP%(reg)d = destRegs[%(reg)d];
1330 ''' % { "reg" : reg }
1331 iop = InstObjParams(name, Name,
1333 { "code": eWalkCode,
1335 "predicate_test": predicateTest,
1336 "op_class": opClass }, [])
1337 header_output += NeonRegRegOpDeclare.subst(iop)
1338 exec_output += NeonEqualRegExecute.subst(iop)
1340 substDict = { "targs" : type,
1341 "class_name" : Name }
1342 exec_output += NeonExecDeclare.subst(substDict)
1344 def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False):
1345 global header_output, exec_output
1346 eWalkCode = simdEnabledCheckCode + '''
1350 for reg in range(rCount):
1352 srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1353 ''' % { "reg" : reg }
1356 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1357 ''' % { "reg" : reg }
1360 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1362 for (unsigned i = 0; i < eCount / 2; i++) {
1363 Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1364 Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1365 BigElement destElem;
1368 destReg.elements[i] = htog(destElem);
1370 ''' % { "op" : op, "readDest" : readDestCode }
1371 for reg in range(rCount):
1373 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1374 ''' % { "reg" : reg }
1375 iop = InstObjParams(name, Name,
1377 { "code": eWalkCode,
1379 "predicate_test": predicateTest,
1380 "op_class": opClass }, [])
1381 header_output += NeonRegRegOpDeclare.subst(iop)
1382 exec_output += NeonUnequalRegExecute.subst(iop)
1384 substDict = { "targs" : type,
1385 "class_name" : Name }
1386 exec_output += NeonExecDeclare.subst(substDict)
1388 def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False):
1389 global header_output, exec_output
1390 eWalkCode = simdEnabledCheckCode + '''
1394 for reg in range(4):
1396 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1397 ''' % { "reg" : reg }
1399 for reg in range(2):
1401 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1402 ''' % { "reg" : reg }
1405 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1407 for (unsigned i = 0; i < eCount; i++) {
1408 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1412 destReg.elements[i] = htog(destElem);
1414 ''' % { "op" : op, "readDest" : readDestCode }
1415 for reg in range(2):
1417 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1418 ''' % { "reg" : reg }
1419 iop = InstObjParams(name, Name,
1421 { "code": eWalkCode,
1423 "predicate_test": predicateTest,
1424 "op_class": opClass }, [])
1425 header_output += NeonRegRegOpDeclare.subst(iop)
1426 exec_output += NeonUnequalRegExecute.subst(iop)
1428 substDict = { "targs" : type,
1429 "class_name" : Name }
1430 exec_output += NeonExecDeclare.subst(substDict)
1432 def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False):
1433 global header_output, exec_output
1434 eWalkCode = simdEnabledCheckCode + '''
1438 for reg in range(rCount):
1440 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1441 ''' % { "reg" : reg }
1444 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1446 for (unsigned i = 0; i < eCount; i++) {
1450 destReg.elements[i] = htog(destElem);
1452 ''' % { "op" : op, "readDest" : readDestCode }
1453 for reg in range(rCount):
1455 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1456 ''' % { "reg" : reg }
1457 iop = InstObjParams(name, Name,
1459 { "code": eWalkCode,
1461 "predicate_test": predicateTest,
1462 "op_class": opClass }, [])
1463 header_output += NeonRegImmOpDeclare.subst(iop)
1464 exec_output += NeonEqualRegExecute.subst(iop)
1466 substDict = { "targs" : type,
1467 "class_name" : Name }
1468 exec_output += NeonExecDeclare.subst(substDict)
1470 def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False):
1471 global header_output, exec_output
1472 eWalkCode = simdEnabledCheckCode + '''
1476 for reg in range(2):
1478 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1479 ''' % { "reg" : reg }
1481 for reg in range(4):
1483 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1484 ''' % { "reg" : reg }
1487 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1489 for (unsigned i = 0; i < eCount; i++) {
1490 Element srcElem1 = gtoh(srcReg1.elements[i]);
1491 BigElement destElem;
1494 destReg.elements[i] = htog(destElem);
1496 ''' % { "op" : op, "readDest" : readDestCode }
1497 for reg in range(4):
1499 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1500 ''' % { "reg" : reg }
1501 iop = InstObjParams(name, Name,
1503 { "code": eWalkCode,
1505 "predicate_test": predicateTest,
1506 "op_class": opClass }, [])
1507 header_output += NeonRegRegOpDeclare.subst(iop)
1508 exec_output += NeonUnequalRegExecute.subst(iop)
1510 substDict = { "targs" : type,
1511 "class_name" : Name }
1512 exec_output += NeonExecDeclare.subst(substDict)
1516 (((unsigned)srcElem1 & 0x1) +
1517 ((unsigned)srcElem2 & 0x1)) >> 1;
1518 // Use division instead of a shift to ensure the sign extension works
1519 // right. The compiler will figure out if it can be a shift. Mask the
1520 // inputs so they get truncated correctly.
1521 destElem = (((srcElem1 & ~(Element)1) / 2) +
1522 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1524 threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode)
1525 threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode)
1529 (((unsigned)srcElem1 & 0x1) +
1530 ((unsigned)srcElem2 & 0x1) + 1) >> 1;
1531 // Use division instead of a shift to ensure the sign extension works
1532 // right. The compiler will figure out if it can be a shift. Mask the
1533 // inputs so they get truncated correctly.
1534 destElem = (((srcElem1 & ~(Element)1) / 2) +
1535 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1537 threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode)
1538 threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode)
1542 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
1543 // Use division instead of a shift to ensure the sign extension works
1544 // right. The compiler will figure out if it can be a shift. Mask the
1545 // inputs so they get truncated correctly.
1546 destElem = (((srcElem1 & ~(Element)1) / 2) -
1547 ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
1549 threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode)
1550 threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode)
1553 destElem = srcElem1 & srcElem2;
1555 threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode)
1556 threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode)
1559 destElem = srcElem1 & ~srcElem2;
1561 threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode)
1562 threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode)
1565 destElem = srcElem1 | srcElem2;
1567 threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode)
1568 threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode)
1570 threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode)
1571 threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode)
1574 destElem = srcElem1 | ~srcElem2;
1576 threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode)
1577 threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode)
1580 destElem = srcElem1 ^ srcElem2;
1582 threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode)
1583 threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode)
1586 destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
1588 threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True)
1589 threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True)
1591 destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
1593 threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True)
1594 threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True)
1596 destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
1598 threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True)
1599 threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True)
1602 destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
1604 threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode)
1605 threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode)
1608 destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
1610 threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode)
1611 threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
1614 destElem = srcElem1 + srcElem2;
1616 threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
1617 threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
1619 threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", smallUnsignedTypes,
1620 2, vaddCode, pairwise=True)
1622 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
1624 threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
1625 threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
1627 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
1628 (sizeof(Element) * 8);
1630 threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode)
1632 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
1633 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1634 (sizeof(Element) * 8);
1636 threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode)
1639 destElem = srcElem1 - srcElem2;
1641 threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode)
1642 threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode)
1644 destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
1646 threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode)
1647 threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode)
1650 destElem = srcElem1 + srcElem2;
1651 FPSCR fpscr = (FPSCR) FpscrQc;
1652 if (destElem < srcElem1 || destElem < srcElem2) {
1653 destElem = (Element)(-1);
1658 threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode)
1659 threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode)
1661 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
1662 (sizeof(Element) * 8);
1664 threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode)
1666 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
1667 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1668 (sizeof(Element) * 8);
1670 threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode)
1673 destElem = srcElem1 + srcElem2;
1674 FPSCR fpscr = (FPSCR) FpscrQc;
1675 bool negDest = (destElem < 0);
1676 bool negSrc1 = (srcElem1 < 0);
1677 bool negSrc2 = (srcElem2 < 0);
1678 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
1679 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1686 threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode)
1687 threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode)
1690 destElem = srcElem1 - srcElem2;
1691 FPSCR fpscr = (FPSCR) FpscrQc;
1692 if (destElem > srcElem1) {
1698 threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode)
1699 threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode)
1702 destElem = srcElem1 - srcElem2;
1703 FPSCR fpscr = (FPSCR) FpscrQc;
1704 bool negDest = (destElem < 0);
1705 bool negSrc1 = (srcElem1 < 0);
1706 bool posSrc2 = (srcElem2 >= 0);
1707 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
1708 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1715 threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode)
1716 threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode)
1719 destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;
1721 threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode)
1722 threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode)
1725 destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
1727 threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode)
1728 threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode)
1731 destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;
1733 threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode)
1734 threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode)
1737 int16_t shiftAmt = (int8_t)srcElem2;
1739 shiftAmt = -shiftAmt;
1740 if (shiftAmt >= sizeof(Element) * 8) {
1741 shiftAmt = sizeof(Element) * 8 - 1;
1744 destElem = (srcElem1 >> shiftAmt);
1746 // Make sure the right shift sign extended when it should.
1747 if (ltz(srcElem1) && !ltz(destElem)) {
1748 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1752 if (shiftAmt >= sizeof(Element) * 8) {
1755 destElem = srcElem1 << shiftAmt;
1759 threeEqualRegInst("vshl", "VshlD", "SimdShiftOp", allTypes, 2, vshlCode)
1760 threeEqualRegInst("vshl", "VshlQ", "SimdShiftOp", allTypes, 4, vshlCode)
1763 int16_t shiftAmt = (int8_t)srcElem2;
1765 shiftAmt = -shiftAmt;
1767 if (shiftAmt <= sizeof(Element) * 8)
1768 rBit = bits(srcElem1, shiftAmt - 1);
1769 if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
1771 if (shiftAmt >= sizeof(Element) * 8) {
1772 shiftAmt = sizeof(Element) * 8 - 1;
1775 destElem = (srcElem1 >> shiftAmt);
1777 // Make sure the right shift sign extended when it should.
1778 if (ltz(srcElem1) && !ltz(destElem)) {
1779 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1783 } else if (shiftAmt > 0) {
1784 if (shiftAmt >= sizeof(Element) * 8) {
1787 destElem = srcElem1 << shiftAmt;
1790 destElem = srcElem1;
1793 threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode)
1794 threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode)
1797 int16_t shiftAmt = (int8_t)srcElem2;
1798 FPSCR fpscr = (FPSCR) FpscrQc;
1800 shiftAmt = -shiftAmt;
1801 if (shiftAmt >= sizeof(Element) * 8) {
1802 shiftAmt = sizeof(Element) * 8 - 1;
1805 destElem = (srcElem1 >> shiftAmt);
1807 } else if (shiftAmt > 0) {
1808 if (shiftAmt >= sizeof(Element) * 8) {
1809 if (srcElem1 != 0) {
1810 destElem = mask(sizeof(Element) * 8);
1816 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1817 sizeof(Element) * 8 - shiftAmt)) {
1818 destElem = mask(sizeof(Element) * 8);
1821 destElem = srcElem1 << shiftAmt;
1825 destElem = srcElem1;
1829 threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode)
1830 threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode)
1833 int16_t shiftAmt = (int8_t)srcElem2;
1834 FPSCR fpscr = (FPSCR) FpscrQc;
1836 shiftAmt = -shiftAmt;
1837 if (shiftAmt >= sizeof(Element) * 8) {
1838 shiftAmt = sizeof(Element) * 8 - 1;
1841 destElem = (srcElem1 >> shiftAmt);
1843 // Make sure the right shift sign extended when it should.
1844 if (srcElem1 < 0 && destElem >= 0) {
1845 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1848 } else if (shiftAmt > 0) {
1850 if (shiftAmt >= sizeof(Element) * 8) {
1856 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1857 sizeof(Element) * 8 - 1 - shiftAmt) !=
1858 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1861 destElem = srcElem1 << shiftAmt;
1866 destElem = mask(sizeof(Element) * 8 - 1);
1868 destElem = ~destElem;
1871 destElem = srcElem1;
1875 threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode)
1876 threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode)
1879 int16_t shiftAmt = (int8_t)srcElem2;
1880 FPSCR fpscr = (FPSCR) FpscrQc;
1882 shiftAmt = -shiftAmt;
1884 if (shiftAmt <= sizeof(Element) * 8)
1885 rBit = bits(srcElem1, shiftAmt - 1);
1886 if (shiftAmt >= sizeof(Element) * 8) {
1887 shiftAmt = sizeof(Element) * 8 - 1;
1890 destElem = (srcElem1 >> shiftAmt);
1894 if (shiftAmt >= sizeof(Element) * 8) {
1895 if (srcElem1 != 0) {
1896 destElem = mask(sizeof(Element) * 8);
1902 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1903 sizeof(Element) * 8 - shiftAmt)) {
1904 destElem = mask(sizeof(Element) * 8);
1907 destElem = srcElem1 << shiftAmt;
1913 threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode)
1914 threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode)
1917 int16_t shiftAmt = (int8_t)srcElem2;
1918 FPSCR fpscr = (FPSCR) FpscrQc;
1920 shiftAmt = -shiftAmt;
1922 if (shiftAmt <= sizeof(Element) * 8)
1923 rBit = bits(srcElem1, shiftAmt - 1);
1924 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
1926 if (shiftAmt >= sizeof(Element) * 8) {
1927 shiftAmt = sizeof(Element) * 8 - 1;
1930 destElem = (srcElem1 >> shiftAmt);
1932 // Make sure the right shift sign extended when it should.
1933 if (srcElem1 < 0 && destElem >= 0) {
1934 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1938 } else if (shiftAmt > 0) {
1940 if (shiftAmt >= sizeof(Element) * 8) {
1946 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1947 sizeof(Element) * 8 - 1 - shiftAmt) !=
1948 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1951 destElem = srcElem1 << shiftAmt;
1956 destElem = mask(sizeof(Element) * 8 - 1);
1958 destElem = ~destElem;
1961 destElem = srcElem1;
1965 threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode)
1966 threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode)
1969 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1970 (srcElem2 - srcElem1);
1972 threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True)
1973 threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True)
1975 destElem += (srcElem1 > srcElem2) ?
1976 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1977 ((BigElement)srcElem2 - (BigElement)srcElem1);
1979 threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True)
1982 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1983 (srcElem2 - srcElem1);
1985 threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode)
1986 threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode)
1988 destElem = (srcElem1 > srcElem2) ?
1989 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1990 ((BigElement)srcElem2 - (BigElement)srcElem1);
1992 threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode)
1995 destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
1997 threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode)
1998 threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode)
2001 destElem = srcElem1 * srcElem2;
2003 threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode)
2004 threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode)
2006 destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
2008 threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode)
2011 destElem = destElem + srcElem1 * srcElem2;
2013 threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True)
2014 threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True)
2016 destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2018 threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True)
2021 FPSCR fpscr = (FPSCR) FpscrQc;
2022 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2023 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2024 Element halfNeg = maxNeg / 2;
2025 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2026 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2027 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2028 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2031 bool negPreDest = ltz(destElem);
2032 destElem += midElem;
2033 bool negDest = ltz(destElem);
2034 bool negMid = ltz(midElem);
2035 if (negPreDest == negMid && negMid != negDest) {
2036 destElem = mask(sizeof(BigElement) * 8 - 1);
2038 destElem = ~destElem;
2043 threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2046 FPSCR fpscr = (FPSCR) FpscrQc;
2047 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2048 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2049 Element halfNeg = maxNeg / 2;
2050 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2051 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2052 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2053 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2056 bool negPreDest = ltz(destElem);
2057 destElem -= midElem;
2058 bool negDest = ltz(destElem);
2059 bool posMid = ltz((BigElement)-midElem);
2060 if (negPreDest == posMid && posMid != negDest) {
2061 destElem = mask(sizeof(BigElement) * 8 - 1);
2063 destElem = ~destElem;
2068 threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2071 FPSCR fpscr = (FPSCR) FpscrQc;
2072 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2073 if (srcElem1 == srcElem2 &&
2074 srcElem1 == (Element)((Element)1 <<
2075 (Element)(sizeof(Element) * 8 - 1))) {
2076 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2081 threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode)
2084 destElem = destElem - srcElem1 * srcElem2;
2086 threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2087 threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2089 destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2091 threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True)
2095 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2096 if (bits(srcElem2, j))
2097 destElem ^= srcElem1 << j;
2100 threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode)
2101 threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode)
2104 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2105 if (bits(srcElem2, j))
2106 destElem ^= (BigElement)srcElem1 << j;
2109 threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2111 threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", smallTypes, 2, vmaxCode, pairwise=True)
2113 threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", smallTypes, 2, vminCode, pairwise=True)
2116 FPSCR fpscr = (FPSCR) FpscrQc;
2117 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2118 (sizeof(Element) * 8);
2119 if (srcElem1 == srcElem2 &&
2120 srcElem1 == (Element)((Element)1 <<
2121 (sizeof(Element) * 8 - 1))) {
2122 destElem = ~srcElem1;
2127 threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2128 threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2131 FPSCR fpscr = (FPSCR) FpscrQc;
2132 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2133 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2134 (sizeof(Element) * 8);
2135 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2136 Element halfNeg = maxNeg / 2;
2137 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2138 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2139 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2141 destElem = mask(sizeof(Element) * 8 - 1);
2143 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2149 threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2150 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2151 threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2152 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2155 FPSCR fpscr = (FPSCR) FpscrExc;
2157 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2159 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS,
2160 true, true, VfpRoundNearest);
2161 } else if (flushToZero(srcReg1, srcReg2)) {
2166 threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode)
2167 threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode)
2170 FPSCR fpscr = (FPSCR) FpscrExc;
2172 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2174 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS,
2175 true, true, VfpRoundNearest);
2176 } else if (flushToZero(srcReg1, srcReg2)) {
2181 threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode)
2182 threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode)
2184 threeEqualRegInstFp("vpmax", "VpmaxDFp", "SimdFloatCmpOp", ("float",),
2185 2, vmaxfpCode, pairwise=True)
2186 threeEqualRegInstFp("vpmax", "VpmaxQFp", "SimdFloatCmpOp", ("float",),
2187 4, vmaxfpCode, pairwise=True)
2189 threeEqualRegInstFp("vpmin", "VpminDFp", "SimdFloatCmpOp", ("float",),
2190 2, vminfpCode, pairwise=True)
2191 threeEqualRegInstFp("vpmin", "VpminQFp", "SimdFloatCmpOp", ("float",),
2192 4, vminfpCode, pairwise=True)
2195 FPSCR fpscr = (FPSCR) FpscrExc;
2196 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2197 true, true, VfpRoundNearest);
2200 threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode)
2201 threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode)
2203 threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",),
2204 2, vaddfpCode, pairwise=True)
2205 threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",),
2206 4, vaddfpCode, pairwise=True)
2209 FPSCR fpscr = (FPSCR) FpscrExc;
2210 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2211 true, true, VfpRoundNearest);
2214 threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode)
2215 threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode)
2218 FPSCR fpscr = (FPSCR) FpscrExc;
2219 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2220 true, true, VfpRoundNearest);
2223 threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2224 threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2227 FPSCR fpscr = (FPSCR) FpscrExc;
2228 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2229 true, true, VfpRoundNearest);
2230 destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2231 true, true, VfpRoundNearest);
2234 threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2235 threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2238 FPSCR fpscr = (FPSCR) FpscrExc;
2239 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2240 true, true, VfpRoundNearest);
2241 destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2242 true, true, VfpRoundNearest);
2245 threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2246 threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2249 FPSCR fpscr = (FPSCR) FpscrExc;
2250 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2251 true, true, VfpRoundNearest);
2252 destReg = (res == 0) ? -1 : 0;
2257 threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",),
2258 2, vcgtfpCode, toInt = True)
2259 threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",),
2260 4, vcgtfpCode, toInt = True)
2263 FPSCR fpscr = (FPSCR) FpscrExc;
2264 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2265 true, true, VfpRoundNearest);
2266 destReg = (res == 0) ? -1 : 0;
2271 threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",),
2272 2, vcgefpCode, toInt = True)
2273 threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",),
2274 4, vcgefpCode, toInt = True)
2277 FPSCR fpscr = (FPSCR) FpscrExc;
2278 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2279 true, true, VfpRoundNearest);
2280 destReg = (res == 0) ? -1 : 0;
2285 threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",),
2286 2, vacgtfpCode, toInt = True)
2287 threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",),
2288 4, vacgtfpCode, toInt = True)
2291 FPSCR fpscr = (FPSCR) FpscrExc;
2292 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2293 true, true, VfpRoundNearest);
2294 destReg = (res == 0) ? -1 : 0;
2299 threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",),
2300 2, vacgefpCode, toInt = True)
2301 threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",),
2302 4, vacgefpCode, toInt = True)
2305 FPSCR fpscr = (FPSCR) FpscrExc;
2306 float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2307 true, true, VfpRoundNearest);
2308 destReg = (res == 0) ? -1 : 0;
2313 threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",),
2314 2, vceqfpCode, toInt = True)
2315 threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",),
2316 4, vceqfpCode, toInt = True)
2319 FPSCR fpscr = (FPSCR) FpscrExc;
2320 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2321 true, true, VfpRoundNearest);
2324 threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode)
2325 threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode)
2328 FPSCR fpscr = (FPSCR) FpscrExc;
2329 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2330 true, true, VfpRoundNearest);
2333 threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode)
2334 threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode)
2337 FPSCR fpscr = (FPSCR) FpscrExc;
2338 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2339 true, true, VfpRoundNearest);
2340 destReg = fabs(mid);
2343 threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode)
2344 threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode)
2346 twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True)
2347 twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True)
2348 twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2349 twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2350 twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True)
2352 twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2353 twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2354 twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2355 twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2356 twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True)
2358 twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode)
2359 twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode)
2360 twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2361 twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2362 twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode)
2364 twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode)
2365 twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2366 twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2367 twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2368 twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2369 twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2370 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2371 twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2372 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2375 if (imm >= sizeof(srcElem1) * 8) {
2381 destElem = srcElem1 >> imm;
2384 twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode)
2385 twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode)
2389 if (imm >= sizeof(srcElem1) * 8) {
2390 mid = ltz(srcElem1) ? -1 : 0;
2392 mid = srcElem1 >> imm;
2393 if (ltz(srcElem1) && !ltz(mid)) {
2394 mid |= -(mid & ((Element)1 <<
2395 (sizeof(Element) * 8 - 1 - imm)));
2400 twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True)
2401 twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True)
2404 if (imm > sizeof(srcElem1) * 8) {
2407 Element rBit = bits(srcElem1, imm - 1);
2408 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2410 destElem = srcElem1;
2413 twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode)
2414 twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode)
2417 if (imm > sizeof(srcElem1) * 8) {
2420 Element rBit = bits(srcElem1, imm - 1);
2421 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2423 destElem += srcElem1;
2426 twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True)
2427 twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True)
2430 if (imm >= sizeof(Element) * 8)
2431 destElem = destElem;
2433 destElem = (srcElem1 >> imm) |
2434 (destElem & ~mask(sizeof(Element) * 8 - imm));
2436 twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True)
2437 twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True)
2440 if (imm >= sizeof(Element) * 8)
2441 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2443 destElem = srcElem1 << imm;
2445 twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode)
2446 twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode)
2449 if (imm >= sizeof(Element) * 8)
2450 destElem = destElem;
2452 destElem = (srcElem1 << imm) | (destElem & mask(imm));
2454 twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True)
2455 twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True)
2458 FPSCR fpscr = (FPSCR) FpscrQc;
2459 if (imm >= sizeof(Element) * 8) {
2460 if (srcElem1 != 0) {
2461 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2463 destElem = ~destElem;
2469 destElem = (srcElem1 << imm);
2470 uint64_t topBits = bits((uint64_t)srcElem1,
2471 sizeof(Element) * 8 - 1,
2472 sizeof(Element) * 8 - 1 - imm);
2473 if (topBits != 0 && topBits != mask(imm + 1)) {
2474 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2476 destElem = ~destElem;
2480 destElem = srcElem1;
2484 twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode)
2485 twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode)
2488 FPSCR fpscr = (FPSCR) FpscrQc;
2489 if (imm >= sizeof(Element) * 8) {
2490 if (srcElem1 != 0) {
2491 destElem = mask(sizeof(Element) * 8);
2497 destElem = (srcElem1 << imm);
2498 uint64_t topBits = bits((uint64_t)srcElem1,
2499 sizeof(Element) * 8 - 1,
2500 sizeof(Element) * 8 - imm);
2502 destElem = mask(sizeof(Element) * 8);
2506 destElem = srcElem1;
2510 twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode)
2511 twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode)
2514 FPSCR fpscr = (FPSCR) FpscrQc;
2515 if (imm >= sizeof(Element) * 8) {
2519 } else if (srcElem1 > 0) {
2520 destElem = mask(sizeof(Element) * 8);
2526 destElem = (srcElem1 << imm);
2527 uint64_t topBits = bits((uint64_t)srcElem1,
2528 sizeof(Element) * 8 - 1,
2529 sizeof(Element) * 8 - imm);
2533 } else if (topBits != 0) {
2534 destElem = mask(sizeof(Element) * 8);
2542 destElem = srcElem1;
2547 twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode)
2548 twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode)
2551 if (imm >= sizeof(srcElem1) * 8) {
2554 destElem = srcElem1 >> imm;
2557 twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode)
2560 if (imm > sizeof(srcElem1) * 8) {
2563 Element rBit = bits(srcElem1, imm - 1);
2564 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2566 destElem = srcElem1;
2569 twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode)
2572 FPSCR fpscr = (FPSCR) FpscrQc;
2573 if (imm > sizeof(srcElem1) * 8) {
2574 if (srcElem1 != 0 && srcElem1 != -1)
2578 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2579 mid |= -(mid & ((BigElement)1 <<
2580 (sizeof(BigElement) * 8 - 1 - imm)));
2581 if (mid != (Element)mid) {
2582 destElem = mask(sizeof(Element) * 8 - 1);
2584 destElem = ~destElem;
2590 destElem = srcElem1;
2594 twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode)
2597 FPSCR fpscr = (FPSCR) FpscrQc;
2598 if (imm > sizeof(srcElem1) * 8) {
2603 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2604 if (mid != (Element)mid) {
2605 destElem = mask(sizeof(Element) * 8);
2611 destElem = srcElem1;
2615 twoRegNarrowShiftInst("vqshrun", "NVqshrun",
2616 "SimdShiftOp", smallUnsignedTypes, vqshrunCode)
2619 FPSCR fpscr = (FPSCR) FpscrQc;
2620 if (imm > sizeof(srcElem1) * 8) {
2625 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2626 if (bits(mid, sizeof(BigElement) * 8 - 1,
2627 sizeof(Element) * 8) != 0) {
2631 destElem = mask(sizeof(Element) * 8);
2638 destElem = srcElem1;
2642 twoRegNarrowShiftInst("vqshrun", "NVqshruns",
2643 "SimdShiftOp", smallSignedTypes, vqshrunsCode)
2646 FPSCR fpscr = (FPSCR) FpscrQc;
2647 if (imm > sizeof(srcElem1) * 8) {
2648 if (srcElem1 != 0 && srcElem1 != -1)
2652 BigElement mid = (srcElem1 >> (imm - 1));
2653 uint64_t rBit = mid & 0x1;
2655 mid |= -(mid & ((BigElement)1 <<
2656 (sizeof(BigElement) * 8 - 1 - imm)));
2658 if (mid != (Element)mid) {
2659 destElem = mask(sizeof(Element) * 8 - 1);
2661 destElem = ~destElem;
2667 if (srcElem1 != (Element)srcElem1) {
2668 destElem = mask(sizeof(Element) * 8 - 1);
2670 destElem = ~destElem;
2673 destElem = srcElem1;
2678 twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
2679 "SimdShiftOp", smallSignedTypes, vqrshrnCode)
2682 FPSCR fpscr = (FPSCR) FpscrQc;
2683 if (imm > sizeof(srcElem1) * 8) {
2688 BigElement mid = (srcElem1 >> (imm - 1));
2689 uint64_t rBit = mid & 0x1;
2692 if (mid != (Element)mid) {
2693 destElem = mask(sizeof(Element) * 8);
2699 if (srcElem1 != (Element)srcElem1) {
2700 destElem = mask(sizeof(Element) * 8 - 1);
2703 destElem = srcElem1;
2708 twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
2709 "SimdShiftOp", smallUnsignedTypes, vqrshrunCode)
2712 FPSCR fpscr = (FPSCR) FpscrQc;
2713 if (imm > sizeof(srcElem1) * 8) {
2718 BigElement mid = (srcElem1 >> (imm - 1));
2719 uint64_t rBit = mid & 0x1;
2721 mid |= -(mid & ((BigElement)1 <<
2722 (sizeof(BigElement) * 8 - 1 - imm)));
2724 if (bits(mid, sizeof(BigElement) * 8 - 1,
2725 sizeof(Element) * 8) != 0) {
2729 destElem = mask(sizeof(Element) * 8);
2740 destElem = srcElem1;
2745 twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
2746 "SimdShiftOp", smallSignedTypes, vqrshrunsCode)
2749 if (imm >= sizeof(destElem) * 8) {
2752 destElem = (BigElement)srcElem1 << imm;
2755 twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode)
2758 destElem = srcElem1;
2760 twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode)
2763 FPSCR fpscr = (FPSCR) FpscrExc;
2764 if (flushToZero(srcElem1))
2766 VfpSavedState state = prepFpState(VfpRoundNearest);
2767 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2768 destReg = vfpFpSToFixed(srcElem1, false, false, imm);
2769 __asm__ __volatile__("" :: "m" (destReg));
2770 finishVfp(fpscr, state, true);
2773 twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",),
2774 2, vcvt2ufxCode, toInt = True)
2775 twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",),
2776 4, vcvt2ufxCode, toInt = True)
2779 FPSCR fpscr = (FPSCR) FpscrExc;
2780 if (flushToZero(srcElem1))
2782 VfpSavedState state = prepFpState(VfpRoundNearest);
2783 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2784 destReg = vfpFpSToFixed(srcElem1, true, false, imm);
2785 __asm__ __volatile__("" :: "m" (destReg));
2786 finishVfp(fpscr, state, true);
2789 twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",),
2790 2, vcvt2sfxCode, toInt = True)
2791 twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",),
2792 4, vcvt2sfxCode, toInt = True)
2795 FPSCR fpscr = (FPSCR) FpscrExc;
2796 VfpSavedState state = prepFpState(VfpRoundNearest);
2797 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2798 destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm);
2799 __asm__ __volatile__("" :: "m" (destElem));
2800 finishVfp(fpscr, state, true);
2803 twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",),
2804 2, vcvtu2fpCode, fromInt = True)
2805 twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",),
2806 4, vcvtu2fpCode, fromInt = True)
2809 FPSCR fpscr = (FPSCR) FpscrExc;
2810 VfpSavedState state = prepFpState(VfpRoundNearest);
2811 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2812 destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm);
2813 __asm__ __volatile__("" :: "m" (destElem));
2814 finishVfp(fpscr, state, true);
2817 twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",),
2818 2, vcvts2fpCode, fromInt = True)
2819 twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",),
2820 4, vcvts2fpCode, fromInt = True)
2823 FPSCR fpscr = (FPSCR) FpscrExc;
2824 float srcFp1 = bitsToFp(srcElem1, (float)0.0);
2825 if (flushToZero(srcFp1))
2827 VfpSavedState state = prepFpState(VfpRoundNearest);
2828 __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
2829 : "m" (srcFp1), "m" (destElem));
2830 destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
2832 __asm__ __volatile__("" :: "m" (destElem));
2833 finishVfp(fpscr, state, true);
2836 twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode)
2839 FPSCR fpscr = (FPSCR) FpscrExc;
2840 VfpSavedState state = prepFpState(VfpRoundNearest);
2841 __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
2842 : "m" (srcElem1), "m" (destElem));
2843 destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
2844 __asm__ __volatile__("" :: "m" (destElem));
2845 finishVfp(fpscr, state, true);
2848 twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
2851 destElem = unsignedRSqrtEstimate(srcElem1);
2853 twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode)
2854 twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode)
2857 FPSCR fpscr = (FPSCR) FpscrExc;
2858 if (flushToZero(srcReg1))
2860 destReg = fprSqrtEstimate(fpscr, srcReg1);
2863 twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode)
2864 twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode)
2867 destElem = unsignedRecipEstimate(srcElem1);
2869 twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode)
2870 twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode)
2873 FPSCR fpscr = (FPSCR) FpscrExc;
2874 if (flushToZero(srcReg1))
2876 destReg = fpRecipEstimate(fpscr, srcReg1);
2879 twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode)
2880 twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode)
2883 destElem = srcElem1;
2884 unsigned groupSize = ((1 << 1) / sizeof(Element));
2885 unsigned reverseMask = (groupSize - 1);
2886 j = i ^ reverseMask;
2888 twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code)
2889 twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code)
2891 destElem = srcElem1;
2892 unsigned groupSize = ((1 << 2) / sizeof(Element));
2893 unsigned reverseMask = (groupSize - 1);
2894 j = i ^ reverseMask;
2896 twoRegMiscInst("vrev32", "NVrev32D",
2897 "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code)
2898 twoRegMiscInst("vrev32", "NVrev32Q",
2899 "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code)
2901 destElem = srcElem1;
2902 unsigned groupSize = ((1 << 3) / sizeof(Element));
2903 unsigned reverseMask = (groupSize - 1);
2904 j = i ^ reverseMask;
2906 twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code)
2907 twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code)
2910 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2912 twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode)
2913 twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode)
2916 destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
2918 twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True)
2919 twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True)
2925 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
2931 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
2938 twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode)
2939 twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode)
2943 while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
2949 twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode)
2950 twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode)
2954 while (srcElem1 && count < sizeof(Element) * 8) {
2955 count += srcElem1 & 0x1;
2961 twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode)
2962 twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode)
2965 destElem = ~srcElem1;
2967 twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
2968 twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
2971 FPSCR fpscr = (FPSCR) FpscrQc;
2972 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2974 destElem = ~srcElem1;
2975 } else if (srcElem1 < 0) {
2976 destElem = -srcElem1;
2978 destElem = srcElem1;
2982 twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode)
2983 twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode)
2986 FPSCR fpscr = (FPSCR) FpscrQc;
2987 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2989 destElem = ~srcElem1;
2991 destElem = -srcElem1;
2995 twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode)
2996 twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode)
3000 destElem = -srcElem1;
3002 destElem = srcElem1;
3006 twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode)
3007 twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode)
3014 cStruct.f = srcReg1;
3015 cStruct.i &= mask(sizeof(Element) * 8 - 1);
3016 destReg = cStruct.f;
3018 twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode)
3019 twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode)
3022 destElem = -srcElem1;
3024 twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode)
3025 twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode)
3029 twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode)
3030 twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode)
3032 vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3033 twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode)
3034 twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode)
3036 FPSCR fpscr = (FPSCR) FpscrExc;
3037 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3038 true, true, VfpRoundNearest);
3039 destReg = (res == 0) ? -1 : 0;
3044 twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",),
3045 2, vcgtfpCode, toInt = True)
3046 twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",),
3047 4, vcgtfpCode, toInt = True)
3049 vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3050 twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode)
3051 twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode)
3053 FPSCR fpscr = (FPSCR) FpscrExc;
3054 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3055 true, true, VfpRoundNearest);
3056 destReg = (res == 0) ? -1 : 0;
3061 twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",),
3062 2, vcgefpCode, toInt = True)
3063 twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",),
3064 4, vcgefpCode, toInt = True)
3066 vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3067 twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode)
3068 twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode)
3070 FPSCR fpscr = (FPSCR) FpscrExc;
3071 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3072 true, true, VfpRoundNearest);
3073 destReg = (res == 0) ? -1 : 0;
3078 twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",),
3079 2, vceqfpCode, toInt = True)
3080 twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",),
3081 4, vceqfpCode, toInt = True)
3083 vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3084 twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode)
3085 twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode)
3087 FPSCR fpscr = (FPSCR) FpscrExc;
3088 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3089 true, true, VfpRoundNearest);
3090 destReg = (res == 0) ? -1 : 0;
3095 twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",),
3096 2, vclefpCode, toInt = True)
3097 twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",),
3098 4, vclefpCode, toInt = True)
3100 vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3101 twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode)
3102 twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode)
3104 FPSCR fpscr = (FPSCR) FpscrExc;
3105 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3106 true, true, VfpRoundNearest);
3107 destReg = (res == 0) ? -1 : 0;
3112 twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",),
3113 2, vcltfpCode, toInt = True)
3114 twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",),
3115 4, vcltfpCode, toInt = True)
3119 for (unsigned r = 0; r < rCount; r++) {
3120 mid = srcReg1.regs[r];
3121 srcReg1.regs[r] = destReg.regs[r];
3122 destReg.regs[r] = mid;
3125 twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode)
3126 twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode)
3130 for (unsigned i = 0; i < eCount; i += 2) {
3131 mid = srcReg1.elements[i];
3132 srcReg1.elements[i] = destReg.elements[i + 1];
3133 destReg.elements[i + 1] = mid;
3136 twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp",
3137 smallUnsignedTypes, 2, vtrnCode)
3138 twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp",
3139 smallUnsignedTypes, 4, vtrnCode)
3142 Element mid[eCount];
3143 memcpy(&mid, &srcReg1, sizeof(srcReg1));
3144 for (unsigned i = 0; i < eCount / 2; i++) {
3145 srcReg1.elements[i] = destReg.elements[2 * i + 1];
3146 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3147 destReg.elements[i] = destReg.elements[2 * i];
3149 for (unsigned i = 0; i < eCount / 2; i++) {
3150 destReg.elements[eCount / 2 + i] = mid[2 * i];
3153 twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode)
3154 twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode)
3157 Element mid[eCount];
3158 memcpy(&mid, &destReg, sizeof(destReg));
3159 for (unsigned i = 0; i < eCount / 2; i++) {
3160 destReg.elements[2 * i] = mid[i];
3161 destReg.elements[2 * i + 1] = srcReg1.elements[i];
3163 for (int i = 0; i < eCount / 2; i++) {
3164 srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3165 srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3168 twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode)
3169 twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode)
3171 vmovnCode = 'destElem = srcElem1;'
3172 twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode)
3174 vdupCode = 'destElem = srcElem1;'
3175 twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode)
3176 twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode)
3178 def vdupGprInst(name, Name, opClass, types, rCount):
3179 global header_output, exec_output
3182 for (unsigned i = 0; i < eCount; i++) {
3183 destReg.elements[i] = htog((Element)Op1);
3186 for reg in range(rCount):
3188 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3189 ''' % { "reg" : reg }
3190 iop = InstObjParams(name, Name,
3192 { "code": eWalkCode,
3194 "predicate_test": predicateTest,
3195 "op_class": opClass }, [])
3196 header_output += NeonRegRegOpDeclare.subst(iop)
3197 exec_output += NeonEqualRegExecute.subst(iop)
3199 substDict = { "targs" : type,
3200 "class_name" : Name }
3201 exec_output += NeonExecDeclare.subst(substDict)
3202 vdupGprInst("vdup", "NVdupDGpr", "SimdMiscOp", smallUnsignedTypes, 2)
3203 vdupGprInst("vdup", "NVdupQGpr", "SimdMiscOp", smallUnsignedTypes, 4)
3205 vmovCode = 'destElem = imm;'
3206 oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode)
3207 oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode)
3209 vorrCode = 'destElem |= imm;'
3210 oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True)
3211 oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True)
3213 vmvnCode = 'destElem = ~imm;'
3214 oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3215 oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3217 vbicCode = 'destElem &= ~imm;'
3218 oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True)
3219 oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True)
3222 FPSCR fpscr = (FPSCR) FpscrQc;
3223 destElem = srcElem1;
3224 if ((BigElement)destElem != srcElem1) {
3226 destElem = mask(sizeof(Element) * 8 - 1);
3228 destElem = ~destElem;
3232 twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode)
3235 FPSCR fpscr = (FPSCR) FpscrQc;
3236 destElem = srcElem1;
3237 if ((BigElement)destElem != srcElem1) {
3239 destElem = mask(sizeof(Element) * 8);
3243 twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3244 "SimdMiscOp", smallUnsignedTypes, vqmovunCode)
3247 FPSCR fpscr = (FPSCR) FpscrQc;
3248 destElem = srcElem1;
3250 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3252 destElem = mask(sizeof(Element) * 8);
3254 destElem = ~destElem;
3258 twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3259 "SimdMiscOp", smallSignedTypes, vqmovunsCode)
3261 def buildVext(name, Name, opClass, types, rCount, op):
3262 global header_output, exec_output
3264 RegVect srcReg1, srcReg2, destReg;
3266 for reg in range(rCount):
3267 eWalkCode += simdEnabledCheckCode + '''
3268 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
3269 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
3270 ''' % { "reg" : reg }
3272 for reg in range(rCount):
3274 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3275 ''' % { "reg" : reg }
3276 iop = InstObjParams(name, Name,
3278 { "code": eWalkCode,
3280 "predicate_test": predicateTest,
3281 "op_class": opClass }, [])
3282 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3283 exec_output += NeonEqualRegExecute.subst(iop)
3285 substDict = { "targs" : type,
3286 "class_name" : Name }
3287 exec_output += NeonExecDeclare.subst(substDict)
3290 for (unsigned i = 0; i < eCount; i++) {
3291 unsigned index = i + imm;
3292 if (index < eCount) {
3293 destReg.elements[i] = srcReg1.elements[index];
3296 if (index >= eCount) {
3298 fault = new UndefinedInstruction;
3300 fault = new UndefinedInstruction(false, mnemonic);
3302 destReg.elements[i] = srcReg2.elements[index];
3307 buildVext("vext", "NVextD", "SimdMiscOp", ("uint8_t",), 2, vextCode)
3308 buildVext("vext", "NVextQ", "SimdMiscOp", ("uint8_t",), 4, vextCode)
3310 def buildVtbxl(name, Name, opClass, length, isVtbl):
3311 global header_output, decoder_output, exec_output
3316 FloatRegBits regs[8];
3322 FloatRegBits regs[2];
3325 const unsigned length = %(length)d;
3326 const bool isVtbl = %(isVtbl)s;
3328 srcReg2.regs[0] = htog(FpOp2P0_uw);
3329 srcReg2.regs[1] = htog(FpOp2P1_uw);
3331 destReg.regs[0] = htog(FpDestP0_uw);
3332 destReg.regs[1] = htog(FpDestP1_uw);
3333 ''' % { "length" : length, "isVtbl" : isVtbl }
3334 for reg in range(8):
3335 if reg < length * 2:
3336 code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);\n' % \
3339 code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3341 for (unsigned i = 0; i < sizeof(destReg); i++) {
3342 uint8_t index = srcReg2.bytes[i];
3343 if (index < 8 * length) {
3344 destReg.bytes[i] = table.bytes[index];
3347 destReg.bytes[i] = 0;
3348 // else destReg.bytes[i] unchanged
3352 FpDestP0_uw = gtoh(destReg.regs[0]);
3353 FpDestP1_uw = gtoh(destReg.regs[1]);
3355 iop = InstObjParams(name, Name,
3358 "predicate_test": predicateTest,
3359 "op_class": opClass }, [])
3360 header_output += RegRegRegOpDeclare.subst(iop)
3361 decoder_output += RegRegRegOpConstructor.subst(iop)
3362 exec_output += PredOpExecute.subst(iop)
3364 buildVtbxl("vtbl", "NVtbl1", "SimdMiscOp", 1, "true")
3365 buildVtbxl("vtbl", "NVtbl2", "SimdMiscOp", 2, "true")
3366 buildVtbxl("vtbl", "NVtbl3", "SimdMiscOp", 3, "true")
3367 buildVtbxl("vtbl", "NVtbl4", "SimdMiscOp", 4, "true")
3369 buildVtbxl("vtbx", "NVtbx1", "SimdMiscOp", 1, "false")
3370 buildVtbxl("vtbx", "NVtbx2", "SimdMiscOp", 2, "false")
3371 buildVtbxl("vtbx", "NVtbx3", "SimdMiscOp", 3, "false")
3372 buildVtbxl("vtbx", "NVtbx4", "SimdMiscOp", 4, "false")