3 // Copyright (c) 2010 ARM Limited
6 // The license below extends only to copyright in the software and shall
7 // not be construed as granting a license to any other intellectual
8 // property including but not limited to intellectual property relating
9 // to a hardware implementation of the functionality of the software
10 // licensed hereunder. You may use the software subject to the license
11 // terms below provided that you ensure that this notice is replicated
12 // unmodified and in its entirety in all distributions of the software,
13 // modified or unmodified, in source code or in binary form.
15 // Redistribution and use in source and binary forms, with or without
16 // modification, are permitted provided that the following conditions are
17 // met: redistributions of source code must retain the above copyright
18 // notice, this list of conditions and the following disclaimer;
19 // redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution;
22 // neither the name of the copyright holders nor the names of its
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
26 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 // Authors: Gabe Black
41 template <template <typename T> class Base>
43 decodeNeonUThreeUReg(unsigned size,
44 ExtMachInst machInst, IntRegIndex dest,
45 IntRegIndex op1, IntRegIndex op2)
49 return new Base<uint8_t>(machInst, dest, op1, op2);
51 return new Base<uint16_t>(machInst, dest, op1, op2);
53 return new Base<uint32_t>(machInst, dest, op1, op2);
55 return new Base<uint64_t>(machInst, dest, op1, op2);
57 return new Unknown(machInst);
61 template <template <typename T> class Base>
63 decodeNeonSThreeUReg(unsigned size,
64 ExtMachInst machInst, IntRegIndex dest,
65 IntRegIndex op1, IntRegIndex op2)
69 return new Base<int8_t>(machInst, dest, op1, op2);
71 return new Base<int16_t>(machInst, dest, op1, op2);
73 return new Base<int32_t>(machInst, dest, op1, op2);
75 return new Base<int64_t>(machInst, dest, op1, op2);
77 return new Unknown(machInst);
81 template <template <typename T> class Base>
83 decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84 ExtMachInst machInst, IntRegIndex dest,
85 IntRegIndex op1, IntRegIndex op2)
88 return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
90 return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
94 template <template <typename T> class Base>
96 decodeNeonUThreeUSReg(unsigned size,
97 ExtMachInst machInst, IntRegIndex dest,
98 IntRegIndex op1, IntRegIndex op2)
102 return new Base<uint8_t>(machInst, dest, op1, op2);
104 return new Base<uint16_t>(machInst, dest, op1, op2);
106 return new Base<uint32_t>(machInst, dest, op1, op2);
108 return new Unknown(machInst);
112 template <template <typename T> class Base>
114 decodeNeonSThreeUSReg(unsigned size,
115 ExtMachInst machInst, IntRegIndex dest,
116 IntRegIndex op1, IntRegIndex op2)
120 return new Base<int8_t>(machInst, dest, op1, op2);
122 return new Base<int16_t>(machInst, dest, op1, op2);
124 return new Base<int32_t>(machInst, dest, op1, op2);
126 return new Unknown(machInst);
130 template <template <typename T> class Base>
132 decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
133 ExtMachInst machInst, IntRegIndex dest,
134 IntRegIndex op1, IntRegIndex op2)
137 return decodeNeonUThreeUSReg<Base>(
138 size, machInst, dest, op1, op2);
140 return decodeNeonSThreeUSReg<Base>(
141 size, machInst, dest, op1, op2);
145 template <template <typename T> class BaseD,
146 template <typename T> class BaseQ>
148 decodeNeonUThreeSReg(bool q, unsigned size,
149 ExtMachInst machInst, IntRegIndex dest,
150 IntRegIndex op1, IntRegIndex op2)
153 return decodeNeonUThreeUSReg<BaseQ>(
154 size, machInst, dest, op1, op2);
156 return decodeNeonUThreeUSReg<BaseD>(
157 size, machInst, dest, op1, op2);
161 template <template <typename T> class BaseD,
162 template <typename T> class BaseQ>
164 decodeNeonSThreeSReg(bool q, unsigned size,
165 ExtMachInst machInst, IntRegIndex dest,
166 IntRegIndex op1, IntRegIndex op2)
169 return decodeNeonSThreeUSReg<BaseQ>(
170 size, machInst, dest, op1, op2);
172 return decodeNeonSThreeUSReg<BaseD>(
173 size, machInst, dest, op1, op2);
177 template <template <typename T> class BaseD,
178 template <typename T> class BaseQ>
180 decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
181 ExtMachInst machInst, IntRegIndex dest,
182 IntRegIndex op1, IntRegIndex op2)
185 return decodeNeonUThreeSReg<BaseD, BaseQ>(
186 q, size, machInst, dest, op1, op2);
188 return decodeNeonSThreeSReg<BaseD, BaseQ>(
189 q, size, machInst, dest, op1, op2);
193 template <template <typename T> class BaseD,
194 template <typename T> class BaseQ>
196 decodeNeonUThreeReg(bool q, unsigned size,
197 ExtMachInst machInst, IntRegIndex dest,
198 IntRegIndex op1, IntRegIndex op2)
201 return decodeNeonUThreeUReg<BaseQ>(
202 size, machInst, dest, op1, op2);
204 return decodeNeonUThreeUReg<BaseD>(
205 size, machInst, dest, op1, op2);
209 template <template <typename T> class BaseD,
210 template <typename T> class BaseQ>
212 decodeNeonSThreeReg(bool q, unsigned size,
213 ExtMachInst machInst, IntRegIndex dest,
214 IntRegIndex op1, IntRegIndex op2)
217 return decodeNeonSThreeUReg<BaseQ>(
218 size, machInst, dest, op1, op2);
220 return decodeNeonSThreeUReg<BaseD>(
221 size, machInst, dest, op1, op2);
225 template <template <typename T> class BaseD,
226 template <typename T> class BaseQ>
228 decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
229 ExtMachInst machInst, IntRegIndex dest,
230 IntRegIndex op1, IntRegIndex op2)
233 return decodeNeonUThreeReg<BaseD, BaseQ>(
234 q, size, machInst, dest, op1, op2);
236 return decodeNeonSThreeReg<BaseD, BaseQ>(
237 q, size, machInst, dest, op1, op2);
241 template <template <typename T> class BaseD,
242 template <typename T> class BaseQ>
244 decodeNeonUTwoShiftReg(bool q, unsigned size,
245 ExtMachInst machInst, IntRegIndex dest,
246 IntRegIndex op1, uint64_t imm)
251 return new BaseQ<uint8_t>(machInst, dest, op1, imm);
253 return new BaseQ<uint16_t>(machInst, dest, op1, imm);
255 return new BaseQ<uint32_t>(machInst, dest, op1, imm);
257 return new BaseQ<uint64_t>(machInst, dest, op1, imm);
259 return new Unknown(machInst);
264 return new BaseD<uint8_t>(machInst, dest, op1, imm);
266 return new BaseD<uint16_t>(machInst, dest, op1, imm);
268 return new BaseD<uint32_t>(machInst, dest, op1, imm);
270 return new BaseD<uint64_t>(machInst, dest, op1, imm);
272 return new Unknown(machInst);
277 template <template <typename T> class BaseD,
278 template <typename T> class BaseQ>
280 decodeNeonSTwoShiftReg(bool q, unsigned size,
281 ExtMachInst machInst, IntRegIndex dest,
282 IntRegIndex op1, uint64_t imm)
287 return new BaseQ<int8_t>(machInst, dest, op1, imm);
289 return new BaseQ<int16_t>(machInst, dest, op1, imm);
291 return new BaseQ<int32_t>(machInst, dest, op1, imm);
293 return new BaseQ<int64_t>(machInst, dest, op1, imm);
295 return new Unknown(machInst);
300 return new BaseD<int8_t>(machInst, dest, op1, imm);
302 return new BaseD<int16_t>(machInst, dest, op1, imm);
304 return new BaseD<int32_t>(machInst, dest, op1, imm);
306 return new BaseD<int64_t>(machInst, dest, op1, imm);
308 return new Unknown(machInst);
314 template <template <typename T> class BaseD,
315 template <typename T> class BaseQ>
317 decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
318 ExtMachInst machInst, IntRegIndex dest,
319 IntRegIndex op1, uint64_t imm)
322 return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
323 q, size, machInst, dest, op1, imm);
325 return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
326 q, size, machInst, dest, op1, imm);
330 template <template <typename T> class Base>
332 decodeNeonUTwoShiftUSReg(unsigned size,
333 ExtMachInst machInst, IntRegIndex dest,
334 IntRegIndex op1, uint64_t imm)
338 return new Base<uint8_t>(machInst, dest, op1, imm);
340 return new Base<uint16_t>(machInst, dest, op1, imm);
342 return new Base<uint32_t>(machInst, dest, op1, imm);
344 return new Unknown(machInst);
348 template <template <typename T> class BaseD,
349 template <typename T> class BaseQ>
351 decodeNeonUTwoShiftSReg(bool q, unsigned size,
352 ExtMachInst machInst, IntRegIndex dest,
353 IntRegIndex op1, uint64_t imm)
356 return decodeNeonUTwoShiftUSReg<BaseQ>(
357 size, machInst, dest, op1, imm);
359 return decodeNeonUTwoShiftUSReg<BaseD>(
360 size, machInst, dest, op1, imm);
364 template <template <typename T> class Base>
366 decodeNeonSTwoShiftUSReg(unsigned size,
367 ExtMachInst machInst, IntRegIndex dest,
368 IntRegIndex op1, uint64_t imm)
372 return new Base<int8_t>(machInst, dest, op1, imm);
374 return new Base<int16_t>(machInst, dest, op1, imm);
376 return new Base<int32_t>(machInst, dest, op1, imm);
378 return new Unknown(machInst);
382 template <template <typename T> class BaseD,
383 template <typename T> class BaseQ>
385 decodeNeonSTwoShiftSReg(bool q, unsigned size,
386 ExtMachInst machInst, IntRegIndex dest,
387 IntRegIndex op1, uint64_t imm)
390 return decodeNeonSTwoShiftUSReg<BaseQ>(
391 size, machInst, dest, op1, imm);
393 return decodeNeonSTwoShiftUSReg<BaseD>(
394 size, machInst, dest, op1, imm);
398 template <template <typename T> class BaseD,
399 template <typename T> class BaseQ>
401 decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
402 ExtMachInst machInst, IntRegIndex dest,
403 IntRegIndex op1, uint64_t imm)
406 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
407 q, size, machInst, dest, op1, imm);
409 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
410 q, size, machInst, dest, op1, imm);
414 template <template <typename T> class Base>
416 decodeNeonUTwoMiscUSReg(unsigned size,
417 ExtMachInst machInst, IntRegIndex dest,
422 return new Base<uint8_t>(machInst, dest, op1);
424 return new Base<uint16_t>(machInst, dest, op1);
426 return new Base<uint32_t>(machInst, dest, op1);
428 return new Unknown(machInst);
432 template <template <typename T> class Base>
434 decodeNeonSTwoMiscUSReg(unsigned size,
435 ExtMachInst machInst, IntRegIndex dest,
440 return new Base<int8_t>(machInst, dest, op1);
442 return new Base<int16_t>(machInst, dest, op1);
444 return new Base<int32_t>(machInst, dest, op1);
446 return new Unknown(machInst);
450 template <template <typename T> class BaseD,
451 template <typename T> class BaseQ>
453 decodeNeonUTwoMiscSReg(bool q, unsigned size,
454 ExtMachInst machInst, IntRegIndex dest,
458 return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
460 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
464 template <template <typename T> class BaseD,
465 template <typename T> class BaseQ>
467 decodeNeonSTwoMiscSReg(bool q, unsigned size,
468 ExtMachInst machInst, IntRegIndex dest,
472 return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
474 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
478 template <template <typename T> class Base>
480 decodeNeonUTwoMiscUReg(unsigned size,
481 ExtMachInst machInst, IntRegIndex dest,
486 return new Base<uint8_t>(machInst, dest, op1);
488 return new Base<uint16_t>(machInst, dest, op1);
490 return new Base<uint32_t>(machInst, dest, op1);
492 return new Base<uint64_t>(machInst, dest, op1);
494 return new Unknown(machInst);
498 template <template <typename T> class Base>
500 decodeNeonSTwoMiscUReg(unsigned size,
501 ExtMachInst machInst, IntRegIndex dest,
506 return new Base<int8_t>(machInst, dest, op1);
508 return new Base<int16_t>(machInst, dest, op1);
510 return new Base<int32_t>(machInst, dest, op1);
512 return new Base<int64_t>(machInst, dest, op1);
514 return new Unknown(machInst);
518 template <template <typename T> class BaseD,
519 template <typename T> class BaseQ>
521 decodeNeonSTwoMiscReg(bool q, unsigned size,
522 ExtMachInst machInst, IntRegIndex dest,
526 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
528 return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
532 template <template <typename T> class BaseD,
533 template <typename T> class BaseQ>
535 decodeNeonUTwoMiscReg(bool q, unsigned size,
536 ExtMachInst machInst, IntRegIndex dest,
540 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
542 return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
546 template <template <typename T> class BaseD,
547 template <typename T> class BaseQ>
549 decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
550 ExtMachInst machInst, IntRegIndex dest,
554 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
555 q, size, machInst, dest, op1);
557 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
558 q, size, machInst, dest, op1);
566 vcgtFunc(float op1, float op2)
568 if (isSnan(op1) || isSnan(op2))
570 return (op1 > op2) ? 0.0 : 1.0;
574 vcgeFunc(float op1, float op2)
576 if (isSnan(op1) || isSnan(op2))
578 return (op1 >= op2) ? 0.0 : 1.0;
582 vceqFunc(float op1, float op2)
584 if (isSnan(op1) || isSnan(op2))
586 return (op1 == op2) ? 0.0 : 1.0;
590 vcleFunc(float op1, float op2)
592 if (isSnan(op1) || isSnan(op2))
594 return (op1 <= op2) ? 0.0 : 1.0;
598 vcltFunc(float op1, float op2)
600 if (isSnan(op1) || isSnan(op2))
602 return (op1 < op2) ? 0.0 : 1.0;
606 vacgtFunc(float op1, float op2)
608 if (isSnan(op1) || isSnan(op2))
610 return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
614 vacgeFunc(float op1, float op2)
616 if (isSnan(op1) || isSnan(op2))
618 return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
627 smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
628 unsignedTypes = smallUnsignedTypes + ("uint64_t",)
629 smallSignedTypes = ("int8_t", "int16_t", "int32_t")
630 signedTypes = smallSignedTypes + ("int64_t",)
631 smallTypes = smallUnsignedTypes + smallSignedTypes
632 allTypes = unsignedTypes + signedTypes
634 def threeEqualRegInst(name, Name, opClass, types, rCount, op,
635 readDest=False, pairwise=False):
636 global header_output, exec_output
637 eWalkCode = simdEnabledCheckCode + '''
638 RegVect srcReg1, srcReg2, destReg;
640 for reg in range(rCount):
642 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
643 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
644 ''' % { "reg" : reg }
647 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
648 ''' % { "reg" : reg }
651 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
654 for (unsigned i = 0; i < eCount; i++) {
655 Element srcElem1 = gtoh(2 * i < eCount ?
656 srcReg1.elements[2 * i] :
657 srcReg2.elements[2 * i - eCount]);
658 Element srcElem2 = gtoh(2 * i < eCount ?
659 srcReg1.elements[2 * i + 1] :
660 srcReg2.elements[2 * i + 1 - eCount]);
664 destReg.elements[i] = htog(destElem);
666 ''' % { "op" : op, "readDest" : readDestCode }
669 for (unsigned i = 0; i < eCount; i++) {
670 Element srcElem1 = gtoh(srcReg1.elements[i]);
671 Element srcElem2 = gtoh(srcReg2.elements[i]);
675 destReg.elements[i] = htog(destElem);
677 ''' % { "op" : op, "readDest" : readDestCode }
678 for reg in range(rCount):
680 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
681 ''' % { "reg" : reg }
682 iop = InstObjParams(name, Name,
686 "predicate_test": predicateTest,
687 "op_class": opClass }, [])
688 header_output += NeonRegRegRegOpDeclare.subst(iop)
689 exec_output += NeonEqualRegExecute.subst(iop)
691 substDict = { "targs" : type,
692 "class_name" : Name }
693 exec_output += NeonExecDeclare.subst(substDict)
695 def threeEqualRegInstFp(name, Name, opClass, types, rCount, op,
696 readDest=False, pairwise=False, toInt=False):
697 global header_output, exec_output
698 eWalkCode = simdEnabledCheckCode + '''
699 typedef FloatReg FloatVect[rCount];
700 FloatVect srcRegs1, srcRegs2;
703 eWalkCode += 'RegVect destRegs;\n'
705 eWalkCode += 'FloatVect destRegs;\n'
706 for reg in range(rCount):
708 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
709 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
710 ''' % { "reg" : reg }
714 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
715 ''' % { "reg" : reg }
718 destRegs[%(reg)d] = FpDestP%(reg)d;
719 ''' % { "reg" : reg }
722 readDestCode = 'destReg = destRegs[r];'
723 destType = 'FloatReg'
724 writeDest = 'destRegs[r] = destReg;'
726 destType = 'FloatRegBits'
727 writeDest = 'destRegs.regs[r] = destReg;'
730 for (unsigned r = 0; r < rCount; r++) {
731 FloatReg srcReg1 = (2 * r < rCount) ?
732 srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
733 FloatReg srcReg2 = (2 * r < rCount) ?
734 srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
735 %(destType)s destReg;
741 "readDest" : readDestCode,
742 "destType" : destType,
743 "writeDest" : writeDest }
746 for (unsigned r = 0; r < rCount; r++) {
747 FloatReg srcReg1 = srcRegs1[r];
748 FloatReg srcReg2 = srcRegs2[r];
749 %(destType)s destReg;
755 "readDest" : readDestCode,
756 "destType" : destType,
757 "writeDest" : writeDest }
758 for reg in range(rCount):
761 FpDestP%(reg)d.uw = destRegs.regs[%(reg)d];
762 ''' % { "reg" : reg }
765 FpDestP%(reg)d = destRegs[%(reg)d];
766 ''' % { "reg" : reg }
767 iop = InstObjParams(name, Name,
771 "predicate_test": predicateTest,
772 "op_class": opClass }, [])
773 header_output += NeonRegRegRegOpDeclare.subst(iop)
774 exec_output += NeonEqualRegExecute.subst(iop)
776 substDict = { "targs" : type,
777 "class_name" : Name }
778 exec_output += NeonExecDeclare.subst(substDict)
780 def threeUnequalRegInst(name, Name, opClass, types, op,
781 bigSrc1, bigSrc2, bigDest, readDest):
782 global header_output, exec_output
783 src1Cnt = src2Cnt = destCnt = 2
784 src1Prefix = src2Prefix = destPrefix = ''
794 eWalkCode = simdEnabledCheckCode + '''
798 ''' % (src1Prefix, src2Prefix, destPrefix)
799 for reg in range(src1Cnt):
801 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
802 ''' % { "reg" : reg }
803 for reg in range(src2Cnt):
805 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
806 ''' % { "reg" : reg }
808 for reg in range(destCnt):
810 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
811 ''' % { "reg" : reg }
814 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
816 for (unsigned i = 0; i < eCount; i++) {
817 %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
818 %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
819 %(destPrefix)sElement destElem;
822 destReg.elements[i] = htog(destElem);
824 ''' % { "op" : op, "readDest" : readDestCode,
825 "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
826 "destPrefix" : destPrefix }
827 for reg in range(destCnt):
829 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
830 ''' % { "reg" : reg }
831 iop = InstObjParams(name, Name,
835 "predicate_test": predicateTest,
836 "op_class": opClass }, [])
837 header_output += NeonRegRegRegOpDeclare.subst(iop)
838 exec_output += NeonUnequalRegExecute.subst(iop)
840 substDict = { "targs" : type,
841 "class_name" : Name }
842 exec_output += NeonExecDeclare.subst(substDict)
844 def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False):
845 threeUnequalRegInst(name, Name, opClass, types, op,
846 True, True, False, readDest)
848 def threeRegLongInst(name, Name, opClass, types, op, readDest=False):
849 threeUnequalRegInst(name, Name, opClass, types, op,
850 False, False, True, readDest)
852 def threeRegWideInst(name, Name, opClass, types, op, readDest=False):
853 threeUnequalRegInst(name, Name, opClass, types, op,
854 True, False, True, readDest)
856 def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
857 global header_output, exec_output
858 eWalkCode = simdEnabledCheckCode + '''
859 RegVect srcReg1, srcReg2, destReg;
861 for reg in range(rCount):
863 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
864 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
865 ''' % { "reg" : reg }
868 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
869 ''' % { "reg" : reg }
872 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
874 assert(imm >= 0 && imm < eCount);
875 for (unsigned i = 0; i < eCount; i++) {
876 Element srcElem1 = gtoh(srcReg1.elements[i]);
877 Element srcElem2 = gtoh(srcReg2.elements[imm]);
881 destReg.elements[i] = htog(destElem);
883 ''' % { "op" : op, "readDest" : readDestCode }
884 for reg in range(rCount):
886 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
887 ''' % { "reg" : reg }
888 iop = InstObjParams(name, Name,
892 "predicate_test": predicateTest,
893 "op_class": opClass }, [])
894 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
895 exec_output += NeonEqualRegExecute.subst(iop)
897 substDict = { "targs" : type,
898 "class_name" : Name }
899 exec_output += NeonExecDeclare.subst(substDict)
901 def twoRegLongInst(name, Name, opClass, types, op, readDest=False):
902 global header_output, exec_output
904 eWalkCode = simdEnabledCheckCode + '''
905 RegVect srcReg1, srcReg2;
908 for reg in range(rCount):
910 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
911 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);;
912 ''' % { "reg" : reg }
914 for reg in range(2 * rCount):
916 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
917 ''' % { "reg" : reg }
920 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
922 assert(imm >= 0 && imm < eCount);
923 for (unsigned i = 0; i < eCount; i++) {
924 Element srcElem1 = gtoh(srcReg1.elements[i]);
925 Element srcElem2 = gtoh(srcReg2.elements[imm]);
929 destReg.elements[i] = htog(destElem);
931 ''' % { "op" : op, "readDest" : readDestCode }
932 for reg in range(2 * rCount):
934 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
935 ''' % { "reg" : reg }
936 iop = InstObjParams(name, Name,
940 "predicate_test": predicateTest,
941 "op_class": opClass }, [])
942 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
943 exec_output += NeonUnequalRegExecute.subst(iop)
945 substDict = { "targs" : type,
946 "class_name" : Name }
947 exec_output += NeonExecDeclare.subst(substDict)
949 def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False):
950 global header_output, exec_output
951 eWalkCode = simdEnabledCheckCode + '''
952 typedef FloatReg FloatVect[rCount];
953 FloatVect srcRegs1, srcRegs2, destRegs;
955 for reg in range(rCount):
957 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
958 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
959 ''' % { "reg" : reg }
962 destRegs[%(reg)d] = FpDestP%(reg)d;
963 ''' % { "reg" : reg }
966 readDestCode = 'destReg = destRegs[i];'
968 assert(imm >= 0 && imm < rCount);
969 for (unsigned i = 0; i < rCount; i++) {
970 FloatReg srcReg1 = srcRegs1[i];
971 FloatReg srcReg2 = srcRegs2[imm];
975 destRegs[i] = destReg;
977 ''' % { "op" : op, "readDest" : readDestCode }
978 for reg in range(rCount):
980 FpDestP%(reg)d = destRegs[%(reg)d];
981 ''' % { "reg" : reg }
982 iop = InstObjParams(name, Name,
986 "predicate_test": predicateTest,
987 "op_class": opClass }, [])
988 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
989 exec_output += NeonEqualRegExecute.subst(iop)
991 substDict = { "targs" : type,
992 "class_name" : Name }
993 exec_output += NeonExecDeclare.subst(substDict)
995 def twoRegShiftInst(name, Name, opClass, types, rCount, op,
996 readDest=False, toInt=False, fromInt=False):
997 global header_output, exec_output
998 eWalkCode = simdEnabledCheckCode + '''
999 RegVect srcRegs1, destRegs;
1001 for reg in range(rCount):
1003 srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1004 ''' % { "reg" : reg }
1007 destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1008 ''' % { "reg" : reg }
1011 readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1013 readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1014 readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1016 readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);'
1017 declDest = 'Element destElem;'
1018 writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1020 declDest = 'FloatRegBits destReg;'
1021 writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1023 for (unsigned i = 0; i < eCount; i++) {
1030 ''' % { "readOp" : readOpCode,
1031 "declDest" : declDest,
1032 "readDest" : readDestCode,
1034 "writeDest" : writeDestCode }
1035 for reg in range(rCount):
1037 FpDestP%(reg)d.uw = gtoh(destRegs.regs[%(reg)d]);
1038 ''' % { "reg" : reg }
1039 iop = InstObjParams(name, Name,
1041 { "code": eWalkCode,
1043 "predicate_test": predicateTest,
1044 "op_class": opClass }, [])
1045 header_output += NeonRegRegImmOpDeclare.subst(iop)
1046 exec_output += NeonEqualRegExecute.subst(iop)
1048 substDict = { "targs" : type,
1049 "class_name" : Name }
1050 exec_output += NeonExecDeclare.subst(substDict)
1052 def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False):
1053 global header_output, exec_output
1054 eWalkCode = simdEnabledCheckCode + '''
1058 for reg in range(4):
1060 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1061 ''' % { "reg" : reg }
1063 for reg in range(2):
1065 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1066 ''' % { "reg" : reg }
1069 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1071 for (unsigned i = 0; i < eCount; i++) {
1072 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1076 destReg.elements[i] = htog(destElem);
1078 ''' % { "op" : op, "readDest" : readDestCode }
1079 for reg in range(2):
1081 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1082 ''' % { "reg" : reg }
1083 iop = InstObjParams(name, Name,
1085 { "code": eWalkCode,
1087 "predicate_test": predicateTest,
1088 "op_class": opClass }, [])
1089 header_output += NeonRegRegImmOpDeclare.subst(iop)
1090 exec_output += NeonUnequalRegExecute.subst(iop)
1092 substDict = { "targs" : type,
1093 "class_name" : Name }
1094 exec_output += NeonExecDeclare.subst(substDict)
1096 def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False):
1097 global header_output, exec_output
1098 eWalkCode = simdEnabledCheckCode + '''
1102 for reg in range(2):
1104 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1105 ''' % { "reg" : reg }
1107 for reg in range(4):
1109 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1110 ''' % { "reg" : reg }
1113 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1115 for (unsigned i = 0; i < eCount; i++) {
1116 Element srcElem1 = gtoh(srcReg1.elements[i]);
1117 BigElement destElem;
1120 destReg.elements[i] = htog(destElem);
1122 ''' % { "op" : op, "readDest" : readDestCode }
1123 for reg in range(4):
1125 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1126 ''' % { "reg" : reg }
1127 iop = InstObjParams(name, Name,
1129 { "code": eWalkCode,
1131 "predicate_test": predicateTest,
1132 "op_class": opClass }, [])
1133 header_output += NeonRegRegImmOpDeclare.subst(iop)
1134 exec_output += NeonUnequalRegExecute.subst(iop)
1136 substDict = { "targs" : type,
1137 "class_name" : Name }
1138 exec_output += NeonExecDeclare.subst(substDict)
1140 def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False):
1141 global header_output, exec_output
1142 eWalkCode = simdEnabledCheckCode + '''
1143 RegVect srcReg1, destReg;
1145 for reg in range(rCount):
1147 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1148 ''' % { "reg" : reg }
1151 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1152 ''' % { "reg" : reg }
1155 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1157 for (unsigned i = 0; i < eCount; i++) {
1159 Element srcElem1 = gtoh(srcReg1.elements[i]);
1163 destReg.elements[j] = htog(destElem);
1165 ''' % { "op" : op, "readDest" : readDestCode }
1166 for reg in range(rCount):
1168 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1169 ''' % { "reg" : reg }
1170 iop = InstObjParams(name, Name,
1172 { "code": eWalkCode,
1174 "predicate_test": predicateTest,
1175 "op_class": opClass }, [])
1176 header_output += NeonRegRegOpDeclare.subst(iop)
1177 exec_output += NeonEqualRegExecute.subst(iop)
1179 substDict = { "targs" : type,
1180 "class_name" : Name }
1181 exec_output += NeonExecDeclare.subst(substDict)
1183 def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False):
1184 global header_output, exec_output
1185 eWalkCode = simdEnabledCheckCode + '''
1186 RegVect srcReg1, destReg;
1188 for reg in range(rCount):
1190 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1191 ''' % { "reg" : reg }
1194 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1195 ''' % { "reg" : reg }
1198 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1200 for (unsigned i = 0; i < eCount; i++) {
1201 Element srcElem1 = gtoh(srcReg1.elements[imm]);
1205 destReg.elements[i] = htog(destElem);
1207 ''' % { "op" : op, "readDest" : readDestCode }
1208 for reg in range(rCount):
1210 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1211 ''' % { "reg" : reg }
1212 iop = InstObjParams(name, Name,
1214 { "code": eWalkCode,
1216 "predicate_test": predicateTest,
1217 "op_class": opClass }, [])
1218 header_output += NeonRegRegImmOpDeclare.subst(iop)
1219 exec_output += NeonEqualRegExecute.subst(iop)
1221 substDict = { "targs" : type,
1222 "class_name" : Name }
1223 exec_output += NeonExecDeclare.subst(substDict)
1225 def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False):
1226 global header_output, exec_output
1227 eWalkCode = simdEnabledCheckCode + '''
1228 RegVect srcReg1, destReg;
1230 for reg in range(rCount):
1232 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1233 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1234 ''' % { "reg" : reg }
1237 ''' % { "reg" : reg }
1240 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1242 for reg in range(rCount):
1244 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1245 FpOp1P%(reg)d.uw = gtoh(srcReg1.regs[%(reg)d]);
1246 ''' % { "reg" : reg }
1247 iop = InstObjParams(name, Name,
1249 { "code": eWalkCode,
1251 "predicate_test": predicateTest,
1252 "op_class": opClass }, [])
1253 header_output += NeonRegRegOpDeclare.subst(iop)
1254 exec_output += NeonEqualRegExecute.subst(iop)
1256 substDict = { "targs" : type,
1257 "class_name" : Name }
1258 exec_output += NeonExecDeclare.subst(substDict)
1260 def twoRegMiscInstFp(name, Name, opClass, types, rCount, op,
1261 readDest=False, toInt=False):
1262 global header_output, exec_output
1263 eWalkCode = simdEnabledCheckCode + '''
1264 typedef FloatReg FloatVect[rCount];
1268 eWalkCode += 'RegVect destRegs;\n'
1270 eWalkCode += 'FloatVect destRegs;\n'
1271 for reg in range(rCount):
1273 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1274 ''' % { "reg" : reg }
1278 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1279 ''' % { "reg" : reg }
1282 destRegs[%(reg)d] = FpDestP%(reg)d;
1283 ''' % { "reg" : reg }
1286 readDestCode = 'destReg = destRegs[i];'
1287 destType = 'FloatReg'
1288 writeDest = 'destRegs[r] = destReg;'
1290 destType = 'FloatRegBits'
1291 writeDest = 'destRegs.regs[r] = destReg;'
1293 for (unsigned r = 0; r < rCount; r++) {
1294 FloatReg srcReg1 = srcRegs1[r];
1295 %(destType)s destReg;
1301 "readDest" : readDestCode,
1302 "destType" : destType,
1303 "writeDest" : writeDest }
1304 for reg in range(rCount):
1307 FpDestP%(reg)d.uw = destRegs.regs[%(reg)d];
1308 ''' % { "reg" : reg }
1311 FpDestP%(reg)d = destRegs[%(reg)d];
1312 ''' % { "reg" : reg }
1313 iop = InstObjParams(name, Name,
1315 { "code": eWalkCode,
1317 "predicate_test": predicateTest,
1318 "op_class": opClass }, [])
1319 header_output += NeonRegRegOpDeclare.subst(iop)
1320 exec_output += NeonEqualRegExecute.subst(iop)
1322 substDict = { "targs" : type,
1323 "class_name" : Name }
1324 exec_output += NeonExecDeclare.subst(substDict)
1326 def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False):
1327 global header_output, exec_output
1328 eWalkCode = simdEnabledCheckCode + '''
1332 for reg in range(rCount):
1334 srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1335 ''' % { "reg" : reg }
1338 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1339 ''' % { "reg" : reg }
1342 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1344 for (unsigned i = 0; i < eCount / 2; i++) {
1345 Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1346 Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1347 BigElement destElem;
1350 destReg.elements[i] = htog(destElem);
1352 ''' % { "op" : op, "readDest" : readDestCode }
1353 for reg in range(rCount):
1355 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1356 ''' % { "reg" : reg }
1357 iop = InstObjParams(name, Name,
1359 { "code": eWalkCode,
1361 "predicate_test": predicateTest,
1362 "op_class": opClass }, [])
1363 header_output += NeonRegRegOpDeclare.subst(iop)
1364 exec_output += NeonUnequalRegExecute.subst(iop)
1366 substDict = { "targs" : type,
1367 "class_name" : Name }
1368 exec_output += NeonExecDeclare.subst(substDict)
1370 def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False):
1371 global header_output, exec_output
1372 eWalkCode = simdEnabledCheckCode + '''
1376 for reg in range(4):
1378 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1379 ''' % { "reg" : reg }
1381 for reg in range(2):
1383 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1384 ''' % { "reg" : reg }
1387 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1389 for (unsigned i = 0; i < eCount; i++) {
1390 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1394 destReg.elements[i] = htog(destElem);
1396 ''' % { "op" : op, "readDest" : readDestCode }
1397 for reg in range(2):
1399 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1400 ''' % { "reg" : reg }
1401 iop = InstObjParams(name, Name,
1403 { "code": eWalkCode,
1405 "predicate_test": predicateTest,
1406 "op_class": opClass }, [])
1407 header_output += NeonRegRegOpDeclare.subst(iop)
1408 exec_output += NeonUnequalRegExecute.subst(iop)
1410 substDict = { "targs" : type,
1411 "class_name" : Name }
1412 exec_output += NeonExecDeclare.subst(substDict)
1414 def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False):
1415 global header_output, exec_output
1416 eWalkCode = simdEnabledCheckCode + '''
1420 for reg in range(rCount):
1422 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1423 ''' % { "reg" : reg }
1426 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1428 for (unsigned i = 0; i < eCount; i++) {
1432 destReg.elements[i] = htog(destElem);
1434 ''' % { "op" : op, "readDest" : readDestCode }
1435 for reg in range(rCount):
1437 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1438 ''' % { "reg" : reg }
1439 iop = InstObjParams(name, Name,
1441 { "code": eWalkCode,
1443 "predicate_test": predicateTest,
1444 "op_class": opClass }, [])
1445 header_output += NeonRegImmOpDeclare.subst(iop)
1446 exec_output += NeonEqualRegExecute.subst(iop)
1448 substDict = { "targs" : type,
1449 "class_name" : Name }
1450 exec_output += NeonExecDeclare.subst(substDict)
1452 def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False):
1453 global header_output, exec_output
1454 eWalkCode = simdEnabledCheckCode + '''
1458 for reg in range(2):
1460 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1461 ''' % { "reg" : reg }
1463 for reg in range(4):
1465 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1466 ''' % { "reg" : reg }
1469 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1471 for (unsigned i = 0; i < eCount; i++) {
1472 Element srcElem1 = gtoh(srcReg1.elements[i]);
1473 BigElement destElem;
1476 destReg.elements[i] = htog(destElem);
1478 ''' % { "op" : op, "readDest" : readDestCode }
1479 for reg in range(4):
1481 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1482 ''' % { "reg" : reg }
1483 iop = InstObjParams(name, Name,
1485 { "code": eWalkCode,
1487 "predicate_test": predicateTest,
1488 "op_class": opClass }, [])
1489 header_output += NeonRegRegOpDeclare.subst(iop)
1490 exec_output += NeonUnequalRegExecute.subst(iop)
1492 substDict = { "targs" : type,
1493 "class_name" : Name }
1494 exec_output += NeonExecDeclare.subst(substDict)
1498 (((unsigned)srcElem1 & 0x1) +
1499 ((unsigned)srcElem2 & 0x1)) >> 1;
1500 // Use division instead of a shift to ensure the sign extension works
1501 // right. The compiler will figure out if it can be a shift. Mask the
1502 // inputs so they get truncated correctly.
1503 destElem = (((srcElem1 & ~(Element)1) / 2) +
1504 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1506 threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode)
1507 threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode)
1511 (((unsigned)srcElem1 & 0x1) +
1512 ((unsigned)srcElem2 & 0x1) + 1) >> 1;
1513 // Use division instead of a shift to ensure the sign extension works
1514 // right. The compiler will figure out if it can be a shift. Mask the
1515 // inputs so they get truncated correctly.
1516 destElem = (((srcElem1 & ~(Element)1) / 2) +
1517 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1519 threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode)
1520 threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode)
1524 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
1525 // Use division instead of a shift to ensure the sign extension works
1526 // right. The compiler will figure out if it can be a shift. Mask the
1527 // inputs so they get truncated correctly.
1528 destElem = (((srcElem1 & ~(Element)1) / 2) -
1529 ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
1531 threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode)
1532 threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode)
1535 destElem = srcElem1 & srcElem2;
1537 threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode)
1538 threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode)
1541 destElem = srcElem1 & ~srcElem2;
1543 threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode)
1544 threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode)
1547 destElem = srcElem1 | srcElem2;
1549 threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode)
1550 threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode)
1552 threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode)
1553 threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode)
1556 destElem = srcElem1 | ~srcElem2;
1558 threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode)
1559 threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode)
1562 destElem = srcElem1 ^ srcElem2;
1564 threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode)
1565 threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode)
1568 destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
1570 threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True)
1571 threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True)
1573 destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
1575 threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True)
1576 threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True)
1578 destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
1580 threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True)
1581 threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True)
1584 destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
1586 threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode)
1587 threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode)
1590 destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
1592 threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode)
1593 threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
1596 destElem = srcElem1 + srcElem2;
1598 threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
1599 threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
1601 threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", unsignedTypes,
1602 2, vaddCode, pairwise=True)
1603 threeEqualRegInst("vpadd", "NVpaddQ", "SimdAddOp", unsignedTypes,
1604 4, vaddCode, pairwise=True)
1606 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
1608 threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
1609 threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
1611 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
1612 (sizeof(Element) * 8);
1614 threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode)
1616 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
1617 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1618 (sizeof(Element) * 8);
1620 threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode)
1623 destElem = srcElem1 - srcElem2;
1625 threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode)
1626 threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode)
1628 destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
1630 threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode)
1631 threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode)
1634 destElem = srcElem1 + srcElem2;
1635 FPSCR fpscr = (FPSCR) FpscrQc;
1636 if (destElem < srcElem1 || destElem < srcElem2) {
1637 destElem = (Element)(-1);
1642 threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode)
1643 threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode)
1645 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
1646 (sizeof(Element) * 8);
1648 threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode)
1650 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
1651 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1652 (sizeof(Element) * 8);
1654 threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode)
1657 destElem = srcElem1 + srcElem2;
1658 FPSCR fpscr = (FPSCR) FpscrQc;
1659 bool negDest = (destElem < 0);
1660 bool negSrc1 = (srcElem1 < 0);
1661 bool negSrc2 = (srcElem2 < 0);
1662 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
1663 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1670 threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode)
1671 threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode)
1674 destElem = srcElem1 - srcElem2;
1675 FPSCR fpscr = (FPSCR) FpscrQc;
1676 if (destElem > srcElem1) {
1682 threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode)
1683 threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode)
1686 destElem = srcElem1 - srcElem2;
1687 FPSCR fpscr = (FPSCR) FpscrQc;
1688 bool negDest = (destElem < 0);
1689 bool negSrc1 = (srcElem1 < 0);
1690 bool posSrc2 = (srcElem2 >= 0);
1691 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
1692 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1699 threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode)
1700 threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode)
1703 destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;
1705 threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode)
1706 threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode)
1709 destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
1711 threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode)
1712 threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode)
1715 destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;
1717 threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode)
1718 threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode)
1721 int16_t shiftAmt = (int8_t)srcElem2;
1723 shiftAmt = -shiftAmt;
1724 if (shiftAmt >= sizeof(Element) * 8) {
1725 shiftAmt = sizeof(Element) * 8 - 1;
1728 destElem = (srcElem1 >> shiftAmt);
1730 // Make sure the right shift sign extended when it should.
1731 if (ltz(srcElem1) && !ltz(destElem)) {
1732 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1736 if (shiftAmt >= sizeof(Element) * 8) {
1739 destElem = srcElem1 << shiftAmt;
1743 threeEqualRegInst("vshl", "VshlD", "SimdAluOp", allTypes, 2, vshlCode)
1744 threeEqualRegInst("vshl", "VshlQ", "SimdAluOp", allTypes, 4, vshlCode)
1747 int16_t shiftAmt = (int8_t)srcElem2;
1749 shiftAmt = -shiftAmt;
1751 if (shiftAmt <= sizeof(Element) * 8)
1752 rBit = bits(srcElem1, shiftAmt - 1);
1753 if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
1755 if (shiftAmt >= sizeof(Element) * 8) {
1756 shiftAmt = sizeof(Element) * 8 - 1;
1759 destElem = (srcElem1 >> shiftAmt);
1761 // Make sure the right shift sign extended when it should.
1762 if (ltz(srcElem1) && !ltz(destElem)) {
1763 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1767 } else if (shiftAmt > 0) {
1768 if (shiftAmt >= sizeof(Element) * 8) {
1771 destElem = srcElem1 << shiftAmt;
1774 destElem = srcElem1;
1777 threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode)
1778 threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode)
1781 int16_t shiftAmt = (int8_t)srcElem2;
1782 FPSCR fpscr = (FPSCR) FpscrQc;
1784 shiftAmt = -shiftAmt;
1785 if (shiftAmt >= sizeof(Element) * 8) {
1786 shiftAmt = sizeof(Element) * 8 - 1;
1789 destElem = (srcElem1 >> shiftAmt);
1791 } else if (shiftAmt > 0) {
1792 if (shiftAmt >= sizeof(Element) * 8) {
1793 if (srcElem1 != 0) {
1794 destElem = mask(sizeof(Element) * 8);
1800 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1801 sizeof(Element) * 8 - shiftAmt)) {
1802 destElem = mask(sizeof(Element) * 8);
1805 destElem = srcElem1 << shiftAmt;
1809 destElem = srcElem1;
1813 threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode)
1814 threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode)
1817 int16_t shiftAmt = (int8_t)srcElem2;
1818 FPSCR fpscr = (FPSCR) FpscrQc;
1820 shiftAmt = -shiftAmt;
1821 if (shiftAmt >= sizeof(Element) * 8) {
1822 shiftAmt = sizeof(Element) * 8 - 1;
1825 destElem = (srcElem1 >> shiftAmt);
1827 // Make sure the right shift sign extended when it should.
1828 if (srcElem1 < 0 && destElem >= 0) {
1829 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1832 } else if (shiftAmt > 0) {
1834 if (shiftAmt >= sizeof(Element) * 8) {
1840 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1841 sizeof(Element) * 8 - 1 - shiftAmt) !=
1842 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1845 destElem = srcElem1 << shiftAmt;
1850 destElem = mask(sizeof(Element) * 8 - 1);
1852 destElem = ~destElem;
1855 destElem = srcElem1;
1859 threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode)
1860 threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode)
1863 int16_t shiftAmt = (int8_t)srcElem2;
1864 FPSCR fpscr = (FPSCR) FpscrQc;
1866 shiftAmt = -shiftAmt;
1868 if (shiftAmt <= sizeof(Element) * 8)
1869 rBit = bits(srcElem1, shiftAmt - 1);
1870 if (shiftAmt >= sizeof(Element) * 8) {
1871 shiftAmt = sizeof(Element) * 8 - 1;
1874 destElem = (srcElem1 >> shiftAmt);
1878 if (shiftAmt >= sizeof(Element) * 8) {
1879 if (srcElem1 != 0) {
1880 destElem = mask(sizeof(Element) * 8);
1886 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1887 sizeof(Element) * 8 - shiftAmt)) {
1888 destElem = mask(sizeof(Element) * 8);
1891 destElem = srcElem1 << shiftAmt;
1897 threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode)
1898 threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode)
1901 int16_t shiftAmt = (int8_t)srcElem2;
1902 FPSCR fpscr = (FPSCR) FpscrQc;
1904 shiftAmt = -shiftAmt;
1906 if (shiftAmt <= sizeof(Element) * 8)
1907 rBit = bits(srcElem1, shiftAmt - 1);
1908 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
1910 if (shiftAmt >= sizeof(Element) * 8) {
1911 shiftAmt = sizeof(Element) * 8 - 1;
1914 destElem = (srcElem1 >> shiftAmt);
1916 // Make sure the right shift sign extended when it should.
1917 if (srcElem1 < 0 && destElem >= 0) {
1918 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1922 } else if (shiftAmt > 0) {
1924 if (shiftAmt >= sizeof(Element) * 8) {
1930 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1931 sizeof(Element) * 8 - 1 - shiftAmt) !=
1932 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1935 destElem = srcElem1 << shiftAmt;
1940 destElem = mask(sizeof(Element) * 8 - 1);
1942 destElem = ~destElem;
1945 destElem = srcElem1;
1949 threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode)
1950 threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode)
1953 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1954 (srcElem2 - srcElem1);
1956 threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True)
1957 threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True)
1959 destElem += (srcElem1 > srcElem2) ?
1960 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1961 ((BigElement)srcElem2 - (BigElement)srcElem1);
1963 threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True)
1966 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1967 (srcElem2 - srcElem1);
1969 threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode)
1970 threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode)
1972 destElem = (srcElem1 > srcElem2) ?
1973 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1974 ((BigElement)srcElem2 - (BigElement)srcElem1);
1976 threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode)
1979 destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
1981 threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode)
1982 threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode)
1985 destElem = srcElem1 * srcElem2;
1987 threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode)
1988 threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode)
1990 destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
1992 threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode)
1995 destElem = destElem + srcElem1 * srcElem2;
1997 threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True)
1998 threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True)
2000 destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2002 threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True)
2005 FPSCR fpscr = (FPSCR) FpscrQc;
2006 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2007 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2008 Element halfNeg = maxNeg / 2;
2009 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2010 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2011 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2012 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2015 bool negPreDest = ltz(destElem);
2016 destElem += midElem;
2017 bool negDest = ltz(destElem);
2018 bool negMid = ltz(midElem);
2019 if (negPreDest == negMid && negMid != negDest) {
2020 destElem = mask(sizeof(BigElement) * 8 - 1);
2022 destElem = ~destElem;
2027 threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2030 FPSCR fpscr = (FPSCR) FpscrQc;
2031 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2032 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2033 Element halfNeg = maxNeg / 2;
2034 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2035 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2036 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2037 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2040 bool negPreDest = ltz(destElem);
2041 destElem -= midElem;
2042 bool negDest = ltz(destElem);
2043 bool posMid = ltz((BigElement)-midElem);
2044 if (negPreDest == posMid && posMid != negDest) {
2045 destElem = mask(sizeof(BigElement) * 8 - 1);
2047 destElem = ~destElem;
2052 threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2055 FPSCR fpscr = (FPSCR) FpscrQc;
2056 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2057 if (srcElem1 == srcElem2 &&
2058 srcElem1 == (Element)((Element)1 <<
2059 (Element)(sizeof(Element) * 8 - 1))) {
2060 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2065 threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode)
2068 destElem = destElem - srcElem1 * srcElem2;
2070 threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2071 threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2073 destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2075 threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True)
2079 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2080 if (bits(srcElem2, j))
2081 destElem ^= srcElem1 << j;
2084 threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode)
2085 threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode)
2088 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2089 if (bits(srcElem2, j))
2090 destElem ^= (BigElement)srcElem1 << j;
2093 threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2095 threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", allTypes, 2, vmaxCode, pairwise=True)
2096 threeEqualRegInst("vpmax", "VpmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode, pairwise=True)
2098 threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", allTypes, 2, vminCode, pairwise=True)
2099 threeEqualRegInst("vpmin", "VpminQ", "SimdCmpOp", allTypes, 4, vminCode, pairwise=True)
2102 FPSCR fpscr = (FPSCR) FpscrQc;
2103 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2104 (sizeof(Element) * 8);
2105 if (srcElem1 == srcElem2 &&
2106 srcElem1 == (Element)((Element)1 <<
2107 (sizeof(Element) * 8 - 1))) {
2108 destElem = ~srcElem1;
2113 threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2114 threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2117 FPSCR fpscr = (FPSCR) FpscrQc;
2118 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2119 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2120 (sizeof(Element) * 8);
2121 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2122 Element halfNeg = maxNeg / 2;
2123 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2124 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2125 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2127 destElem = mask(sizeof(Element) * 8 - 1);
2129 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2135 threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2136 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2137 threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2138 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2141 FPSCR fpscr = (FPSCR) FpscrExc;
2143 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2145 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS,
2146 true, true, VfpRoundNearest);
2147 } else if (flushToZero(srcReg1, srcReg2)) {
2152 threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode)
2153 threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode)
2156 FPSCR fpscr = (FPSCR) FpscrExc;
2158 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2160 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS,
2161 true, true, VfpRoundNearest);
2162 } else if (flushToZero(srcReg1, srcReg2)) {
2167 threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode)
2168 threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode)
2170 threeEqualRegInstFp("vpmax", "VpmaxDFp", "SimdFloatCmpOp", ("float",),
2171 2, vmaxfpCode, pairwise=True)
2172 threeEqualRegInstFp("vpmax", "VpmaxQFp", "SimdFloatCmpOp", ("float",),
2173 4, vmaxfpCode, pairwise=True)
2175 threeEqualRegInstFp("vpmin", "VpminDFp", "SimdFloatCmpOp", ("float",),
2176 2, vminfpCode, pairwise=True)
2177 threeEqualRegInstFp("vpmin", "VpminQFp", "SimdFloatCmpOp", ("float",),
2178 4, vminfpCode, pairwise=True)
2181 FPSCR fpscr = (FPSCR) FpscrExc;
2182 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2183 true, true, VfpRoundNearest);
2186 threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode)
2187 threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode)
2189 threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",),
2190 2, vaddfpCode, pairwise=True)
2191 threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",),
2192 4, vaddfpCode, pairwise=True)
2195 FPSCR fpscr = (FPSCR) FpscrExc;
2196 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2197 true, true, VfpRoundNearest);
2200 threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode)
2201 threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode)
2204 FPSCR fpscr = (FPSCR) FpscrExc;
2205 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2206 true, true, VfpRoundNearest);
2209 threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2210 threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2213 FPSCR fpscr = (FPSCR) FpscrExc;
2214 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2215 true, true, VfpRoundNearest);
2216 destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2217 true, true, VfpRoundNearest);
2220 threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2221 threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2224 FPSCR fpscr = (FPSCR) FpscrExc;
2225 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2226 true, true, VfpRoundNearest);
2227 destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2228 true, true, VfpRoundNearest);
2231 threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2232 threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2235 FPSCR fpscr = (FPSCR) FpscrExc;
2236 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2237 true, true, VfpRoundNearest);
2238 destReg = (res == 0) ? -1 : 0;
2243 threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",),
2244 2, vcgtfpCode, toInt = True)
2245 threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",),
2246 4, vcgtfpCode, toInt = True)
2249 FPSCR fpscr = (FPSCR) FpscrExc;
2250 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2251 true, true, VfpRoundNearest);
2252 destReg = (res == 0) ? -1 : 0;
2257 threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",),
2258 2, vcgefpCode, toInt = True)
2259 threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",),
2260 4, vcgefpCode, toInt = True)
2263 FPSCR fpscr = (FPSCR) FpscrExc;
2264 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2265 true, true, VfpRoundNearest);
2266 destReg = (res == 0) ? -1 : 0;
2271 threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",),
2272 2, vacgtfpCode, toInt = True)
2273 threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",),
2274 4, vacgtfpCode, toInt = True)
2277 FPSCR fpscr = (FPSCR) FpscrExc;
2278 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2279 true, true, VfpRoundNearest);
2280 destReg = (res == 0) ? -1 : 0;
2285 threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",),
2286 2, vacgefpCode, toInt = True)
2287 threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",),
2288 4, vacgefpCode, toInt = True)
2291 FPSCR fpscr = (FPSCR) FpscrExc;
2292 float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2293 true, true, VfpRoundNearest);
2294 destReg = (res == 0) ? -1 : 0;
2299 threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",),
2300 2, vceqfpCode, toInt = True)
2301 threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",),
2302 4, vceqfpCode, toInt = True)
2305 FPSCR fpscr = (FPSCR) FpscrExc;
2306 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2307 true, true, VfpRoundNearest);
2310 threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode)
2311 threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode)
2314 FPSCR fpscr = (FPSCR) FpscrExc;
2315 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2316 true, true, VfpRoundNearest);
2319 threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode)
2320 threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode)
2323 FPSCR fpscr = (FPSCR) FpscrExc;
2324 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2325 true, true, VfpRoundNearest);
2326 destReg = fabs(mid);
2329 threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode)
2330 threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode)
2332 twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True)
2333 twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True)
2334 twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2335 twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2336 twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True)
2338 twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2339 twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2340 twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2341 twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2342 twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True)
2344 twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode)
2345 twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode)
2346 twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2347 twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2348 twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode)
2350 twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode)
2351 twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2352 twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2353 twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2354 twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2355 twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2356 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2357 twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2358 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2361 if (imm >= sizeof(srcElem1) * 8) {
2367 destElem = srcElem1 >> imm;
2370 twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode)
2371 twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode)
2375 if (imm >= sizeof(srcElem1) * 8) {
2376 mid = ltz(srcElem1) ? -1 : 0;
2378 mid = srcElem1 >> imm;
2379 if (ltz(srcElem1) && !ltz(mid)) {
2380 mid |= -(mid & ((Element)1 <<
2381 (sizeof(Element) * 8 - 1 - imm)));
2386 twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True)
2387 twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True)
2390 if (imm > sizeof(srcElem1) * 8) {
2393 Element rBit = bits(srcElem1, imm - 1);
2394 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2396 destElem = srcElem1;
2399 twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode)
2400 twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode)
2403 if (imm > sizeof(srcElem1) * 8) {
2406 Element rBit = bits(srcElem1, imm - 1);
2407 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2409 destElem += srcElem1;
2412 twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True)
2413 twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True)
2416 if (imm >= sizeof(Element) * 8)
2417 destElem = destElem;
2419 destElem = (srcElem1 >> imm) |
2420 (destElem & ~mask(sizeof(Element) * 8 - imm));
2422 twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True)
2423 twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True)
2426 if (imm >= sizeof(Element) * 8)
2427 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2429 destElem = srcElem1 << imm;
2431 twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode)
2432 twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode)
2435 if (imm >= sizeof(Element) * 8)
2436 destElem = destElem;
2438 destElem = (srcElem1 << imm) | (destElem & mask(imm));
2440 twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True)
2441 twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True)
2444 FPSCR fpscr = (FPSCR) FpscrQc;
2445 if (imm >= sizeof(Element) * 8) {
2446 if (srcElem1 != 0) {
2447 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2449 destElem = ~destElem;
2455 destElem = (srcElem1 << imm);
2456 uint64_t topBits = bits((uint64_t)srcElem1,
2457 sizeof(Element) * 8 - 1,
2458 sizeof(Element) * 8 - 1 - imm);
2459 if (topBits != 0 && topBits != mask(imm + 1)) {
2460 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2462 destElem = ~destElem;
2466 destElem = srcElem1;
2470 twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode)
2471 twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode)
2474 FPSCR fpscr = (FPSCR) FpscrQc;
2475 if (imm >= sizeof(Element) * 8) {
2476 if (srcElem1 != 0) {
2477 destElem = mask(sizeof(Element) * 8);
2483 destElem = (srcElem1 << imm);
2484 uint64_t topBits = bits((uint64_t)srcElem1,
2485 sizeof(Element) * 8 - 1,
2486 sizeof(Element) * 8 - imm);
2488 destElem = mask(sizeof(Element) * 8);
2492 destElem = srcElem1;
2496 twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode)
2497 twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode)
2500 FPSCR fpscr = (FPSCR) FpscrQc;
2501 if (imm >= sizeof(Element) * 8) {
2505 } else if (srcElem1 > 0) {
2506 destElem = mask(sizeof(Element) * 8);
2512 destElem = (srcElem1 << imm);
2513 uint64_t topBits = bits((uint64_t)srcElem1,
2514 sizeof(Element) * 8 - 1,
2515 sizeof(Element) * 8 - imm);
2519 } else if (topBits != 0) {
2520 destElem = mask(sizeof(Element) * 8);
2528 destElem = srcElem1;
2533 twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode)
2534 twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode)
2537 if (imm >= sizeof(srcElem1) * 8) {
2540 destElem = srcElem1 >> imm;
2543 twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode)
2546 if (imm > sizeof(srcElem1) * 8) {
2549 Element rBit = bits(srcElem1, imm - 1);
2550 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2552 destElem = srcElem1;
2555 twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode)
2558 FPSCR fpscr = (FPSCR) FpscrQc;
2559 if (imm > sizeof(srcElem1) * 8) {
2560 if (srcElem1 != 0 && srcElem1 != -1)
2564 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2565 mid |= -(mid & ((BigElement)1 <<
2566 (sizeof(BigElement) * 8 - 1 - imm)));
2567 if (mid != (Element)mid) {
2568 destElem = mask(sizeof(Element) * 8 - 1);
2570 destElem = ~destElem;
2576 destElem = srcElem1;
2580 twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode)
2583 FPSCR fpscr = (FPSCR) FpscrQc;
2584 if (imm > sizeof(srcElem1) * 8) {
2589 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2590 if (mid != (Element)mid) {
2591 destElem = mask(sizeof(Element) * 8);
2597 destElem = srcElem1;
2601 twoRegNarrowShiftInst("vqshrun", "NVqshrun",
2602 "SimdShiftOp", smallUnsignedTypes, vqshrunCode)
2605 FPSCR fpscr = (FPSCR) FpscrQc;
2606 if (imm > sizeof(srcElem1) * 8) {
2611 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2612 if (bits(mid, sizeof(BigElement) * 8 - 1,
2613 sizeof(Element) * 8) != 0) {
2617 destElem = mask(sizeof(Element) * 8);
2624 destElem = srcElem1;
2628 twoRegNarrowShiftInst("vqshrun", "NVqshruns",
2629 "SimdShiftOp", smallSignedTypes, vqshrunsCode)
2632 FPSCR fpscr = (FPSCR) FpscrQc;
2633 if (imm > sizeof(srcElem1) * 8) {
2634 if (srcElem1 != 0 && srcElem1 != -1)
2638 BigElement mid = (srcElem1 >> (imm - 1));
2639 uint64_t rBit = mid & 0x1;
2641 mid |= -(mid & ((BigElement)1 <<
2642 (sizeof(BigElement) * 8 - 1 - imm)));
2644 if (mid != (Element)mid) {
2645 destElem = mask(sizeof(Element) * 8 - 1);
2647 destElem = ~destElem;
2653 if (srcElem1 != (Element)srcElem1) {
2654 destElem = mask(sizeof(Element) * 8 - 1);
2656 destElem = ~destElem;
2659 destElem = srcElem1;
2664 twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
2665 "SimdShiftOp", smallSignedTypes, vqrshrnCode)
2668 FPSCR fpscr = (FPSCR) FpscrQc;
2669 if (imm > sizeof(srcElem1) * 8) {
2674 BigElement mid = (srcElem1 >> (imm - 1));
2675 uint64_t rBit = mid & 0x1;
2678 if (mid != (Element)mid) {
2679 destElem = mask(sizeof(Element) * 8);
2685 if (srcElem1 != (Element)srcElem1) {
2686 destElem = mask(sizeof(Element) * 8 - 1);
2689 destElem = srcElem1;
2694 twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
2695 "SimdShiftOp", smallUnsignedTypes, vqrshrunCode)
2698 FPSCR fpscr = (FPSCR) FpscrQc;
2699 if (imm > sizeof(srcElem1) * 8) {
2704 BigElement mid = (srcElem1 >> (imm - 1));
2705 uint64_t rBit = mid & 0x1;
2707 mid |= -(mid & ((BigElement)1 <<
2708 (sizeof(BigElement) * 8 - 1 - imm)));
2710 if (bits(mid, sizeof(BigElement) * 8 - 1,
2711 sizeof(Element) * 8) != 0) {
2715 destElem = mask(sizeof(Element) * 8);
2726 destElem = srcElem1;
2731 twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
2732 "SimdShiftOp", smallSignedTypes, vqrshrunsCode)
2735 if (imm >= sizeof(destElem) * 8) {
2738 destElem = (BigElement)srcElem1 << imm;
2741 twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode)
2744 destElem = srcElem1;
2746 twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode)
2749 FPSCR fpscr = (FPSCR) FpscrExc;
2750 if (flushToZero(srcElem1))
2752 VfpSavedState state = prepFpState(VfpRoundNearest);
2753 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2754 destReg = vfpFpSToFixed(srcElem1, false, false, imm);
2755 __asm__ __volatile__("" :: "m" (destReg));
2756 finishVfp(fpscr, state, true);
2759 twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",),
2760 2, vcvt2ufxCode, toInt = True)
2761 twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",),
2762 4, vcvt2ufxCode, toInt = True)
2765 FPSCR fpscr = (FPSCR) FpscrExc;
2766 if (flushToZero(srcElem1))
2768 VfpSavedState state = prepFpState(VfpRoundNearest);
2769 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2770 destReg = vfpFpSToFixed(srcElem1, true, false, imm);
2771 __asm__ __volatile__("" :: "m" (destReg));
2772 finishVfp(fpscr, state, true);
2775 twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",),
2776 2, vcvt2sfxCode, toInt = True)
2777 twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",),
2778 4, vcvt2sfxCode, toInt = True)
2781 FPSCR fpscr = (FPSCR) FpscrExc;
2782 VfpSavedState state = prepFpState(VfpRoundNearest);
2783 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2784 destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm);
2785 __asm__ __volatile__("" :: "m" (destElem));
2786 finishVfp(fpscr, state, true);
2789 twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",),
2790 2, vcvtu2fpCode, fromInt = True)
2791 twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",),
2792 4, vcvtu2fpCode, fromInt = True)
2795 FPSCR fpscr = (FPSCR) FpscrExc;
2796 VfpSavedState state = prepFpState(VfpRoundNearest);
2797 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2798 destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm);
2799 __asm__ __volatile__("" :: "m" (destElem));
2800 finishVfp(fpscr, state, true);
2803 twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",),
2804 2, vcvts2fpCode, fromInt = True)
2805 twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",),
2806 4, vcvts2fpCode, fromInt = True)
2809 FPSCR fpscr = (FPSCR) FpscrExc;
2810 float srcFp1 = bitsToFp(srcElem1, (float)0.0);
2811 if (flushToZero(srcFp1))
2813 VfpSavedState state = prepFpState(VfpRoundNearest);
2814 __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
2815 : "m" (srcFp1), "m" (destElem));
2816 destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
2818 __asm__ __volatile__("" :: "m" (destElem));
2819 finishVfp(fpscr, state, true);
2822 twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode)
2825 FPSCR fpscr = (FPSCR) FpscrExc;
2826 VfpSavedState state = prepFpState(VfpRoundNearest);
2827 __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
2828 : "m" (srcElem1), "m" (destElem));
2829 destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
2830 __asm__ __volatile__("" :: "m" (destElem));
2831 finishVfp(fpscr, state, true);
2834 twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
2837 destElem = unsignedRSqrtEstimate(srcElem1);
2839 twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode)
2840 twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode)
2843 FPSCR fpscr = (FPSCR) FpscrExc;
2844 if (flushToZero(srcReg1))
2846 destReg = fprSqrtEstimate(fpscr, srcReg1);
2849 twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode)
2850 twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode)
2853 destElem = unsignedRecipEstimate(srcElem1);
2855 twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode)
2856 twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode)
2859 FPSCR fpscr = (FPSCR) FpscrExc;
2860 if (flushToZero(srcReg1))
2862 destReg = fpRecipEstimate(fpscr, srcReg1);
2865 twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode)
2866 twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode)
2869 destElem = srcElem1;
2870 unsigned groupSize = ((1 << 1) / sizeof(Element));
2871 unsigned reverseMask = (groupSize - 1);
2872 j = i ^ reverseMask;
2874 twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code)
2875 twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code)
2877 destElem = srcElem1;
2878 unsigned groupSize = ((1 << 2) / sizeof(Element));
2879 unsigned reverseMask = (groupSize - 1);
2880 j = i ^ reverseMask;
2882 twoRegMiscInst("vrev32", "NVrev32D",
2883 "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code)
2884 twoRegMiscInst("vrev32", "NVrev32Q",
2885 "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code)
2887 destElem = srcElem1;
2888 unsigned groupSize = ((1 << 3) / sizeof(Element));
2889 unsigned reverseMask = (groupSize - 1);
2890 j = i ^ reverseMask;
2892 twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code)
2893 twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code)
2896 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2898 twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode)
2899 twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode)
2902 destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
2904 twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True)
2905 twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True)
2911 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
2917 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
2924 twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode)
2925 twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode)
2929 while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
2935 twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode)
2936 twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode)
2940 while (srcElem1 && count < sizeof(Element) * 8) {
2941 count += srcElem1 & 0x1;
2947 twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode)
2948 twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode)
2951 destElem = ~srcElem1;
2953 twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
2954 twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
2957 FPSCR fpscr = (FPSCR) FpscrQc;
2958 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2960 destElem = ~srcElem1;
2961 } else if (srcElem1 < 0) {
2962 destElem = -srcElem1;
2964 destElem = srcElem1;
2968 twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode)
2969 twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode)
2972 FPSCR fpscr = (FPSCR) FpscrQc;
2973 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2975 destElem = ~srcElem1;
2977 destElem = -srcElem1;
2981 twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode)
2982 twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode)
2986 destElem = -srcElem1;
2988 destElem = srcElem1;
2992 twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode)
2993 twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode)
3000 cStruct.f = srcReg1;
3001 cStruct.i &= mask(sizeof(Element) * 8 - 1);
3002 destReg = cStruct.f;
3004 twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode)
3005 twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode)
3008 destElem = -srcElem1;
3010 twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode)
3011 twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode)
3015 twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode)
3016 twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode)
3018 vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3019 twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode)
3020 twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode)
3022 FPSCR fpscr = (FPSCR) FpscrExc;
3023 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3024 true, true, VfpRoundNearest);
3025 destReg = (res == 0) ? -1 : 0;
3030 twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",),
3031 2, vcgtfpCode, toInt = True)
3032 twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",),
3033 4, vcgtfpCode, toInt = True)
3035 vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3036 twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode)
3037 twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode)
3039 FPSCR fpscr = (FPSCR) FpscrExc;
3040 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3041 true, true, VfpRoundNearest);
3042 destReg = (res == 0) ? -1 : 0;
3047 twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",),
3048 2, vcgefpCode, toInt = True)
3049 twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",),
3050 4, vcgefpCode, toInt = True)
3052 vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3053 twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode)
3054 twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode)
3056 FPSCR fpscr = (FPSCR) FpscrExc;
3057 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3058 true, true, VfpRoundNearest);
3059 destReg = (res == 0) ? -1 : 0;
3064 twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",),
3065 2, vceqfpCode, toInt = True)
3066 twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",),
3067 4, vceqfpCode, toInt = True)
3069 vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3070 twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode)
3071 twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode)
3073 FPSCR fpscr = (FPSCR) FpscrExc;
3074 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3075 true, true, VfpRoundNearest);
3076 destReg = (res == 0) ? -1 : 0;
3081 twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",),
3082 2, vclefpCode, toInt = True)
3083 twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",),
3084 4, vclefpCode, toInt = True)
3086 vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3087 twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode)
3088 twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode)
3090 FPSCR fpscr = (FPSCR) FpscrExc;
3091 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3092 true, true, VfpRoundNearest);
3093 destReg = (res == 0) ? -1 : 0;
3098 twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",),
3099 2, vcltfpCode, toInt = True)
3100 twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",),
3101 4, vcltfpCode, toInt = True)
3105 for (unsigned r = 0; r < rCount; r++) {
3106 mid = srcReg1.regs[r];
3107 srcReg1.regs[r] = destReg.regs[r];
3108 destReg.regs[r] = mid;
3111 twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode)
3112 twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode)
3116 for (unsigned i = 0; i < eCount; i += 2) {
3117 mid = srcReg1.elements[i];
3118 srcReg1.elements[i] = destReg.elements[i + 1];
3119 destReg.elements[i + 1] = mid;
3122 twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp", unsignedTypes, 2, vtrnCode)
3123 twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp", unsignedTypes, 4, vtrnCode)
3126 Element mid[eCount];
3127 memcpy(&mid, &srcReg1, sizeof(srcReg1));
3128 for (unsigned i = 0; i < eCount / 2; i++) {
3129 srcReg1.elements[i] = destReg.elements[2 * i + 1];
3130 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3131 destReg.elements[i] = destReg.elements[2 * i];
3133 for (unsigned i = 0; i < eCount / 2; i++) {
3134 destReg.elements[eCount / 2 + i] = mid[2 * i];
3137 twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode)
3138 twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode)
3141 Element mid[eCount];
3142 memcpy(&mid, &destReg, sizeof(destReg));
3143 for (unsigned i = 0; i < eCount / 2; i++) {
3144 destReg.elements[2 * i] = mid[i];
3145 destReg.elements[2 * i + 1] = srcReg1.elements[i];
3147 for (int i = 0; i < eCount / 2; i++) {
3148 srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3149 srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3152 twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode)
3153 twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode)
3155 vmovnCode = 'destElem = srcElem1;'
3156 twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode)
3158 vdupCode = 'destElem = srcElem1;'
3159 twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode)
3160 twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode)
3162 def vdupGprInst(name, Name, opClass, types, rCount):
3163 global header_output, exec_output
3166 for (unsigned i = 0; i < eCount; i++) {
3167 destReg.elements[i] = htog((Element)Op1);
3170 for reg in range(rCount):
3172 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
3173 ''' % { "reg" : reg }
3174 iop = InstObjParams(name, Name,
3176 { "code": eWalkCode,
3178 "predicate_test": predicateTest,
3179 "op_class": opClass }, [])
3180 header_output += NeonRegRegOpDeclare.subst(iop)
3181 exec_output += NeonEqualRegExecute.subst(iop)
3183 substDict = { "targs" : type,
3184 "class_name" : Name }
3185 exec_output += NeonExecDeclare.subst(substDict)
3186 vdupGprInst("vdup", "NVdupDGpr", "SimdAluOp", smallUnsignedTypes, 2)
3187 vdupGprInst("vdup", "NVdupQGpr", "SimdAluOp", smallUnsignedTypes, 4)
3189 vmovCode = 'destElem = imm;'
3190 oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode)
3191 oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode)
3193 vorrCode = 'destElem |= imm;'
3194 oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True)
3195 oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True)
3197 vmvnCode = 'destElem = ~imm;'
3198 oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3199 oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3201 vbicCode = 'destElem &= ~imm;'
3202 oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True)
3203 oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True)
3206 FPSCR fpscr = (FPSCR) FpscrQc;
3207 destElem = srcElem1;
3208 if ((BigElement)destElem != srcElem1) {
3210 destElem = mask(sizeof(Element) * 8 - 1);
3212 destElem = ~destElem;
3216 twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode)
3219 FPSCR fpscr = (FPSCR) FpscrQc;
3220 destElem = srcElem1;
3221 if ((BigElement)destElem != srcElem1) {
3223 destElem = mask(sizeof(Element) * 8);
3227 twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3228 "SimdMiscOp", smallUnsignedTypes, vqmovunCode)
3231 FPSCR fpscr = (FPSCR) FpscrQc;
3232 destElem = srcElem1;
3234 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3236 destElem = mask(sizeof(Element) * 8);
3238 destElem = ~destElem;
3242 twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3243 "SimdMiscOp", smallSignedTypes, vqmovunsCode)
3245 def buildVext(name, Name, opClass, types, rCount, op):
3246 global header_output, exec_output
3248 RegVect srcReg1, srcReg2, destReg;
3250 for reg in range(rCount):
3251 eWalkCode += simdEnabledCheckCode + '''
3252 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
3253 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
3254 ''' % { "reg" : reg }
3256 for reg in range(rCount):
3258 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
3259 ''' % { "reg" : reg }
3260 iop = InstObjParams(name, Name,
3262 { "code": eWalkCode,
3264 "predicate_test": predicateTest,
3265 "op_class": opClass }, [])
3266 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3267 exec_output += NeonEqualRegExecute.subst(iop)
3269 substDict = { "targs" : type,
3270 "class_name" : Name }
3271 exec_output += NeonExecDeclare.subst(substDict)
3274 for (unsigned i = 0; i < eCount; i++) {
3275 unsigned index = i + imm;
3276 if (index < eCount) {
3277 destReg.elements[i] = srcReg1.elements[index];
3280 assert(index < eCount);
3281 destReg.elements[i] = srcReg2.elements[index];
3285 buildVext("vext", "NVextD", "SimdAluOp", ("uint8_t",), 2, vextCode)
3286 buildVext("vext", "NVextQ", "SimdAluOp", ("uint8_t",), 4, vextCode)
3288 def buildVtbxl(name, Name, opClass, length, isVtbl):
3289 global header_output, decoder_output, exec_output
3294 FloatRegBits regs[8];
3300 FloatRegBits regs[2];
3303 const unsigned length = %(length)d;
3304 const bool isVtbl = %(isVtbl)s;
3306 srcReg2.regs[0] = htog(FpOp2P0.uw);
3307 srcReg2.regs[1] = htog(FpOp2P1.uw);
3309 destReg.regs[0] = htog(FpDestP0.uw);
3310 destReg.regs[1] = htog(FpDestP1.uw);
3311 ''' % { "length" : length, "isVtbl" : isVtbl }
3312 for reg in range(8):
3313 if reg < length * 2:
3314 code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);\n' % \
3317 code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3319 for (unsigned i = 0; i < sizeof(destReg); i++) {
3320 uint8_t index = srcReg2.bytes[i];
3321 if (index < 8 * length) {
3322 destReg.bytes[i] = table.bytes[index];
3325 destReg.bytes[i] = 0;
3326 // else destReg.bytes[i] unchanged
3330 FpDestP0.uw = gtoh(destReg.regs[0]);
3331 FpDestP1.uw = gtoh(destReg.regs[1]);
3333 iop = InstObjParams(name, Name,
3336 "predicate_test": predicateTest,
3337 "op_class": opClass }, [])
3338 header_output += RegRegRegOpDeclare.subst(iop)
3339 decoder_output += RegRegRegOpConstructor.subst(iop)
3340 exec_output += PredOpExecute.subst(iop)
3342 buildVtbxl("vtbl", "NVtbl1", "SimdAluOp", 1, "true")
3343 buildVtbxl("vtbl", "NVtbl2", "SimdAluOp", 2, "true")
3344 buildVtbxl("vtbl", "NVtbl3", "SimdAluOp", 3, "true")
3345 buildVtbxl("vtbl", "NVtbl4", "SimdAluOp", 4, "true")
3347 buildVtbxl("vtbx", "NVtbx1", "SimdAluOp", 1, "false")
3348 buildVtbxl("vtbx", "NVtbx2", "SimdAluOp", 2, "false")
3349 buildVtbxl("vtbx", "NVtbx3", "SimdAluOp", 3, "false")
3350 buildVtbxl("vtbx", "NVtbx4", "SimdAluOp", 4, "false")