3 // Copyright (c) 2010-2011 ARM Limited
6 // The license below extends only to copyright in the software and shall
7 // not be construed as granting a license to any other intellectual
8 // property including but not limited to intellectual property relating
9 // to a hardware implementation of the functionality of the software
10 // licensed hereunder. You may use the software subject to the license
11 // terms below provided that you ensure that this notice is replicated
12 // unmodified and in its entirety in all distributions of the software,
13 // modified or unmodified, in source code or in binary form.
15 // Redistribution and use in source and binary forms, with or without
16 // modification, are permitted provided that the following conditions are
17 // met: redistributions of source code must retain the above copyright
18 // notice, this list of conditions and the following disclaimer;
19 // redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution;
22 // neither the name of the copyright holders nor the names of its
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
26 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 // Authors: Gabe Black
41 template <template <typename T> class Base>
43 decodeNeonUThreeUReg(unsigned size,
44 ExtMachInst machInst, IntRegIndex dest,
45 IntRegIndex op1, IntRegIndex op2)
49 return new Base<uint8_t>(machInst, dest, op1, op2);
51 return new Base<uint16_t>(machInst, dest, op1, op2);
53 return new Base<uint32_t>(machInst, dest, op1, op2);
55 return new Base<uint64_t>(machInst, dest, op1, op2);
57 return new Unknown(machInst);
61 template <template <typename T> class Base>
63 decodeNeonSThreeUReg(unsigned size,
64 ExtMachInst machInst, IntRegIndex dest,
65 IntRegIndex op1, IntRegIndex op2)
69 return new Base<int8_t>(machInst, dest, op1, op2);
71 return new Base<int16_t>(machInst, dest, op1, op2);
73 return new Base<int32_t>(machInst, dest, op1, op2);
75 return new Base<int64_t>(machInst, dest, op1, op2);
77 return new Unknown(machInst);
81 template <template <typename T> class Base>
83 decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84 ExtMachInst machInst, IntRegIndex dest,
85 IntRegIndex op1, IntRegIndex op2)
88 return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
90 return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
94 template <template <typename T> class Base>
96 decodeNeonUThreeUSReg(unsigned size,
97 ExtMachInst machInst, IntRegIndex dest,
98 IntRegIndex op1, IntRegIndex op2)
102 return new Base<uint8_t>(machInst, dest, op1, op2);
104 return new Base<uint16_t>(machInst, dest, op1, op2);
106 return new Base<uint32_t>(machInst, dest, op1, op2);
108 return new Unknown(machInst);
112 template <template <typename T> class Base>
114 decodeNeonSThreeUSReg(unsigned size,
115 ExtMachInst machInst, IntRegIndex dest,
116 IntRegIndex op1, IntRegIndex op2)
120 return new Base<int8_t>(machInst, dest, op1, op2);
122 return new Base<int16_t>(machInst, dest, op1, op2);
124 return new Base<int32_t>(machInst, dest, op1, op2);
126 return new Unknown(machInst);
130 template <template <typename T> class Base>
132 decodeNeonSThreeHAndWReg(unsigned size, ExtMachInst machInst,
133 IntRegIndex dest, IntRegIndex op1,
138 return new Base<int16_t>(machInst, dest, op1, op2);
140 return new Base<int32_t>(machInst, dest, op1, op2);
142 return new Unknown(machInst);
146 template <template <typename T> class Base>
148 decodeNeonSThreeImmHAndWReg(unsigned size, ExtMachInst machInst,
149 IntRegIndex dest, IntRegIndex op1,
150 IntRegIndex op2, uint64_t imm)
154 return new Base<int16_t>(machInst, dest, op1, op2, imm);
156 return new Base<int32_t>(machInst, dest, op1, op2, imm);
158 return new Unknown(machInst);
162 template <template <typename T> class Base>
164 decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
165 ExtMachInst machInst, IntRegIndex dest,
166 IntRegIndex op1, IntRegIndex op2)
169 return decodeNeonUThreeUSReg<Base>(
170 size, machInst, dest, op1, op2);
172 return decodeNeonSThreeUSReg<Base>(
173 size, machInst, dest, op1, op2);
177 template <template <typename T> class BaseD,
178 template <typename T> class BaseQ>
180 decodeNeonUThreeSReg(bool q, unsigned size,
181 ExtMachInst machInst, IntRegIndex dest,
182 IntRegIndex op1, IntRegIndex op2)
185 return decodeNeonUThreeUSReg<BaseQ>(
186 size, machInst, dest, op1, op2);
188 return decodeNeonUThreeUSReg<BaseD>(
189 size, machInst, dest, op1, op2);
193 template <template <typename T> class BaseD,
194 template <typename T> class BaseQ>
196 decodeNeonSThreeSReg(bool q, unsigned size,
197 ExtMachInst machInst, IntRegIndex dest,
198 IntRegIndex op1, IntRegIndex op2)
201 return decodeNeonSThreeUSReg<BaseQ>(
202 size, machInst, dest, op1, op2);
204 return decodeNeonSThreeUSReg<BaseD>(
205 size, machInst, dest, op1, op2);
209 template <template <typename T> class BaseD,
210 template <typename T> class BaseQ>
212 decodeNeonSThreeXReg(bool q, unsigned size,
213 ExtMachInst machInst, IntRegIndex dest,
214 IntRegIndex op1, IntRegIndex op2)
217 return decodeNeonSThreeUReg<BaseQ>(
218 size, machInst, dest, op1, op2);
220 return decodeNeonSThreeUSReg<BaseD>(
221 size, machInst, dest, op1, op2);
225 template <template <typename T> class BaseD,
226 template <typename T> class BaseQ>
228 decodeNeonUThreeXReg(bool q, unsigned size,
229 ExtMachInst machInst, IntRegIndex dest,
230 IntRegIndex op1, IntRegIndex op2)
233 return decodeNeonUThreeUReg<BaseQ>(
234 size, machInst, dest, op1, op2);
236 return decodeNeonUThreeUSReg<BaseD>(
237 size, machInst, dest, op1, op2);
241 template <template <typename T> class BaseD,
242 template <typename T> class BaseQ>
244 decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
245 ExtMachInst machInst, IntRegIndex dest,
246 IntRegIndex op1, IntRegIndex op2)
249 return decodeNeonUThreeSReg<BaseD, BaseQ>(
250 q, size, machInst, dest, op1, op2);
252 return decodeNeonSThreeSReg<BaseD, BaseQ>(
253 q, size, machInst, dest, op1, op2);
257 template <template <typename T> class BaseD,
258 template <typename T> class BaseQ>
260 decodeNeonUThreeReg(bool q, unsigned size,
261 ExtMachInst machInst, IntRegIndex dest,
262 IntRegIndex op1, IntRegIndex op2)
265 return decodeNeonUThreeUReg<BaseQ>(
266 size, machInst, dest, op1, op2);
268 return decodeNeonUThreeUReg<BaseD>(
269 size, machInst, dest, op1, op2);
273 template <template <typename T> class BaseD,
274 template <typename T> class BaseQ>
276 decodeNeonSThreeReg(bool q, unsigned size,
277 ExtMachInst machInst, IntRegIndex dest,
278 IntRegIndex op1, IntRegIndex op2)
281 return decodeNeonSThreeUReg<BaseQ>(
282 size, machInst, dest, op1, op2);
284 return decodeNeonSThreeUReg<BaseD>(
285 size, machInst, dest, op1, op2);
289 template <template <typename T> class BaseD,
290 template <typename T> class BaseQ>
292 decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
293 ExtMachInst machInst, IntRegIndex dest,
294 IntRegIndex op1, IntRegIndex op2)
297 return decodeNeonUThreeReg<BaseD, BaseQ>(
298 q, size, machInst, dest, op1, op2);
300 return decodeNeonSThreeReg<BaseD, BaseQ>(
301 q, size, machInst, dest, op1, op2);
305 template <template <typename T> class BaseD,
306 template <typename T> class BaseQ>
308 decodeNeonUThreeFpReg(bool q, unsigned size, ExtMachInst machInst,
309 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
313 return new BaseQ<uint64_t>(machInst, dest, op1, op2);
315 return new BaseQ<uint32_t>(machInst, dest, op1, op2);
318 return new Unknown(machInst);
320 return new BaseD<uint32_t>(machInst, dest, op1, op2);
324 template <template <typename T> class Base>
326 decodeNeonUThreeScFpReg(bool size, ExtMachInst machInst,
327 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
330 return new Base<uint64_t>(machInst, dest, op1, op2);
332 return new Base<uint32_t>(machInst, dest, op1, op2);
335 template <template <typename T> class Base>
337 decodeNeonUThreeImmScFpReg(bool size, ExtMachInst machInst,
338 IntRegIndex dest, IntRegIndex op1,
339 IntRegIndex op2, uint64_t imm)
342 return new Base<uint64_t>(machInst, dest, op1, op2, imm);
344 return new Base<uint32_t>(machInst, dest, op1, op2, imm);
347 template <template <typename T> class BaseD,
348 template <typename T> class BaseQ>
350 decodeNeonUThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
351 IntRegIndex dest, IntRegIndex op1,
352 IntRegIndex op2, uint64_t imm)
357 return new BaseQ<uint16_t>(machInst, dest, op1, op2, imm);
359 return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
361 return new Unknown(machInst);
366 return new BaseD<uint16_t>(machInst, dest, op1, op2, imm);
368 return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
370 return new Unknown(machInst);
375 template <template <typename T> class BaseD,
376 template <typename T> class BaseQ>
378 decodeNeonSThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
379 IntRegIndex dest, IntRegIndex op1,
380 IntRegIndex op2, uint64_t imm)
385 return new BaseQ<int16_t>(machInst, dest, op1, op2, imm);
387 return new BaseQ<int32_t>(machInst, dest, op1, op2, imm);
389 return new Unknown(machInst);
394 return new BaseD<int16_t>(machInst, dest, op1, op2, imm);
396 return new BaseD<int32_t>(machInst, dest, op1, op2, imm);
398 return new Unknown(machInst);
403 template <template <typename T> class BaseD,
404 template <typename T> class BaseQ>
406 decodeNeonUThreeImmFpReg(bool q, unsigned size, ExtMachInst machInst,
407 IntRegIndex dest, IntRegIndex op1,
408 IntRegIndex op2, uint64_t imm)
412 return new BaseQ<uint64_t>(machInst, dest, op1, op2, imm);
414 return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
417 return new Unknown(machInst);
419 return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
423 template <template <typename T> class BaseD,
424 template <typename T> class BaseQ>
426 decodeNeonUTwoShiftReg(bool q, unsigned size,
427 ExtMachInst machInst, IntRegIndex dest,
428 IntRegIndex op1, uint64_t imm)
433 return new BaseQ<uint8_t>(machInst, dest, op1, imm);
435 return new BaseQ<uint16_t>(machInst, dest, op1, imm);
437 return new BaseQ<uint32_t>(machInst, dest, op1, imm);
439 return new BaseQ<uint64_t>(machInst, dest, op1, imm);
441 return new Unknown(machInst);
446 return new BaseD<uint8_t>(machInst, dest, op1, imm);
448 return new BaseD<uint16_t>(machInst, dest, op1, imm);
450 return new BaseD<uint32_t>(machInst, dest, op1, imm);
452 return new BaseD<uint64_t>(machInst, dest, op1, imm);
454 return new Unknown(machInst);
459 template <template <typename T> class BaseD,
460 template <typename T> class BaseQ>
462 decodeNeonSTwoShiftReg(bool q, unsigned size,
463 ExtMachInst machInst, IntRegIndex dest,
464 IntRegIndex op1, uint64_t imm)
469 return new BaseQ<int8_t>(machInst, dest, op1, imm);
471 return new BaseQ<int16_t>(machInst, dest, op1, imm);
473 return new BaseQ<int32_t>(machInst, dest, op1, imm);
475 return new BaseQ<int64_t>(machInst, dest, op1, imm);
477 return new Unknown(machInst);
482 return new BaseD<int8_t>(machInst, dest, op1, imm);
484 return new BaseD<int16_t>(machInst, dest, op1, imm);
486 return new BaseD<int32_t>(machInst, dest, op1, imm);
488 return new BaseD<int64_t>(machInst, dest, op1, imm);
490 return new Unknown(machInst);
496 template <template <typename T> class BaseD,
497 template <typename T> class BaseQ>
499 decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
500 ExtMachInst machInst, IntRegIndex dest,
501 IntRegIndex op1, uint64_t imm)
504 return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
505 q, size, machInst, dest, op1, imm);
507 return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
508 q, size, machInst, dest, op1, imm);
512 template <template <typename T> class Base>
514 decodeNeonUTwoShiftUSReg(unsigned size,
515 ExtMachInst machInst, IntRegIndex dest,
516 IntRegIndex op1, uint64_t imm)
520 return new Base<uint8_t>(machInst, dest, op1, imm);
522 return new Base<uint16_t>(machInst, dest, op1, imm);
524 return new Base<uint32_t>(machInst, dest, op1, imm);
526 return new Unknown(machInst);
530 template <template <typename T> class Base>
532 decodeNeonUTwoShiftUReg(unsigned size,
533 ExtMachInst machInst, IntRegIndex dest,
534 IntRegIndex op1, uint64_t imm)
538 return new Base<uint8_t>(machInst, dest, op1, imm);
540 return new Base<uint16_t>(machInst, dest, op1, imm);
542 return new Base<uint32_t>(machInst, dest, op1, imm);
544 return new Base<uint64_t>(machInst, dest, op1, imm);
546 return new Unknown(machInst);
550 template <template <typename T> class Base>
552 decodeNeonSTwoShiftUReg(unsigned size,
553 ExtMachInst machInst, IntRegIndex dest,
554 IntRegIndex op1, uint64_t imm)
558 return new Base<int8_t>(machInst, dest, op1, imm);
560 return new Base<int16_t>(machInst, dest, op1, imm);
562 return new Base<int32_t>(machInst, dest, op1, imm);
564 return new Base<int64_t>(machInst, dest, op1, imm);
566 return new Unknown(machInst);
570 template <template <typename T> class BaseD,
571 template <typename T> class BaseQ>
573 decodeNeonUTwoShiftSReg(bool q, unsigned size,
574 ExtMachInst machInst, IntRegIndex dest,
575 IntRegIndex op1, uint64_t imm)
578 return decodeNeonUTwoShiftUSReg<BaseQ>(
579 size, machInst, dest, op1, imm);
581 return decodeNeonUTwoShiftUSReg<BaseD>(
582 size, machInst, dest, op1, imm);
586 template <template <typename T> class Base>
588 decodeNeonSTwoShiftUSReg(unsigned size,
589 ExtMachInst machInst, IntRegIndex dest,
590 IntRegIndex op1, uint64_t imm)
594 return new Base<int8_t>(machInst, dest, op1, imm);
596 return new Base<int16_t>(machInst, dest, op1, imm);
598 return new Base<int32_t>(machInst, dest, op1, imm);
600 return new Unknown(machInst);
604 template <template <typename T> class BaseD,
605 template <typename T> class BaseQ>
607 decodeNeonSTwoShiftSReg(bool q, unsigned size,
608 ExtMachInst machInst, IntRegIndex dest,
609 IntRegIndex op1, uint64_t imm)
612 return decodeNeonSTwoShiftUSReg<BaseQ>(
613 size, machInst, dest, op1, imm);
615 return decodeNeonSTwoShiftUSReg<BaseD>(
616 size, machInst, dest, op1, imm);
620 template <template <typename T> class BaseD,
621 template <typename T> class BaseQ>
623 decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
624 ExtMachInst machInst, IntRegIndex dest,
625 IntRegIndex op1, uint64_t imm)
628 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
629 q, size, machInst, dest, op1, imm);
631 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
632 q, size, machInst, dest, op1, imm);
636 template <template <typename T> class BaseD,
637 template <typename T> class BaseQ>
639 decodeNeonUTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
640 IntRegIndex dest, IntRegIndex op1, uint64_t imm)
643 return decodeNeonUTwoShiftUReg<BaseQ>(
644 size, machInst, dest, op1, imm);
646 return decodeNeonUTwoShiftUSReg<BaseD>(
647 size, machInst, dest, op1, imm);
651 template <template <typename T> class BaseD,
652 template <typename T> class BaseQ>
654 decodeNeonSTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
655 IntRegIndex dest, IntRegIndex op1, uint64_t imm)
658 return decodeNeonSTwoShiftUReg<BaseQ>(
659 size, machInst, dest, op1, imm);
661 return decodeNeonSTwoShiftUSReg<BaseD>(
662 size, machInst, dest, op1, imm);
666 template <template <typename T> class Base>
668 decodeNeonUTwoShiftUFpReg(unsigned size, ExtMachInst machInst,
669 IntRegIndex dest, IntRegIndex op1, uint64_t imm)
672 return new Base<uint64_t>(machInst, dest, op1, imm);
674 return new Base<uint32_t>(machInst, dest, op1, imm);
677 template <template <typename T> class BaseD,
678 template <typename T> class BaseQ>
680 decodeNeonUTwoShiftFpReg(bool q, unsigned size, ExtMachInst machInst,
681 IntRegIndex dest, IntRegIndex op1, uint64_t imm)
685 return new BaseQ<uint64_t>(machInst, dest, op1, imm);
687 return new BaseQ<uint32_t>(machInst, dest, op1, imm);
690 return new Unknown(machInst);
692 return new BaseD<uint32_t>(machInst, dest, op1, imm);
696 template <template <typename T> class Base>
698 decodeNeonUTwoMiscUSReg(unsigned size,
699 ExtMachInst machInst, IntRegIndex dest,
704 return new Base<uint8_t>(machInst, dest, op1);
706 return new Base<uint16_t>(machInst, dest, op1);
708 return new Base<uint32_t>(machInst, dest, op1);
710 return new Unknown(machInst);
714 template <template <typename T> class Base>
716 decodeNeonSTwoMiscUSReg(unsigned size,
717 ExtMachInst machInst, IntRegIndex dest,
722 return new Base<int8_t>(machInst, dest, op1);
724 return new Base<int16_t>(machInst, dest, op1);
726 return new Base<int32_t>(machInst, dest, op1);
728 return new Unknown(machInst);
732 template <template <typename T> class BaseD,
733 template <typename T> class BaseQ>
735 decodeNeonUTwoMiscSReg(bool q, unsigned size,
736 ExtMachInst machInst, IntRegIndex dest,
740 return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
742 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
746 template <template <typename T> class BaseD,
747 template <typename T> class BaseQ>
749 decodeNeonSTwoMiscSReg(bool q, unsigned size,
750 ExtMachInst machInst, IntRegIndex dest,
754 return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
756 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
760 template <template <typename T> class Base>
762 decodeNeonUTwoMiscUReg(unsigned size,
763 ExtMachInst machInst, IntRegIndex dest,
768 return new Base<uint8_t>(machInst, dest, op1);
770 return new Base<uint16_t>(machInst, dest, op1);
772 return new Base<uint32_t>(machInst, dest, op1);
774 return new Base<uint64_t>(machInst, dest, op1);
776 return new Unknown(machInst);
780 template <template <typename T> class Base>
782 decodeNeonSTwoMiscUReg(unsigned size,
783 ExtMachInst machInst, IntRegIndex dest,
788 return new Base<int8_t>(machInst, dest, op1);
790 return new Base<int16_t>(machInst, dest, op1);
792 return new Base<int32_t>(machInst, dest, op1);
794 return new Base<int64_t>(machInst, dest, op1);
796 return new Unknown(machInst);
800 template <template <typename T> class BaseD,
801 template <typename T> class BaseQ>
803 decodeNeonSTwoMiscReg(bool q, unsigned size,
804 ExtMachInst machInst, IntRegIndex dest,
808 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
810 return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
814 template <template <typename T> class BaseD,
815 template <typename T> class BaseQ>
817 decodeNeonUTwoMiscReg(bool q, unsigned size,
818 ExtMachInst machInst, IntRegIndex dest,
822 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
824 return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
828 template <template <typename T> class BaseD,
829 template <typename T> class BaseQ>
831 decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
832 ExtMachInst machInst, IntRegIndex dest,
836 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
837 q, size, machInst, dest, op1);
839 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
840 q, size, machInst, dest, op1);
844 template <template <typename T> class BaseD,
845 template <typename T> class BaseQ>
847 decodeNeonUTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
848 IntRegIndex dest, IntRegIndex op1)
851 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
853 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
857 template <template <typename T> class BaseD,
858 template <typename T> class BaseQ>
860 decodeNeonSTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
861 IntRegIndex dest, IntRegIndex op1)
864 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
866 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
870 template <template <typename T> class BaseD,
871 template <typename T> class BaseQ>
873 decodeNeonUTwoMiscFpReg(bool q, unsigned size, ExtMachInst machInst,
874 IntRegIndex dest, IntRegIndex op1)
878 return new BaseQ<uint64_t>(machInst, dest, op1);
880 return new BaseQ<uint32_t>(machInst, dest, op1);
883 return new Unknown(machInst);
885 return new BaseD<uint32_t>(machInst, dest, op1);
889 template <template <typename T> class BaseD,
890 template <typename T> class BaseQ>
892 decodeNeonUTwoMiscPwiseScFpReg(unsigned size, ExtMachInst machInst,
893 IntRegIndex dest, IntRegIndex op1)
896 return new BaseQ<uint64_t>(machInst, dest, op1);
898 return new BaseD<uint32_t>(machInst, dest, op1);
901 template <template <typename T> class Base>
903 decodeNeonUTwoMiscScFpReg(unsigned size, ExtMachInst machInst,
904 IntRegIndex dest, IntRegIndex op1)
907 return new Base<uint64_t>(machInst, dest, op1);
909 return new Base<uint32_t>(machInst, dest, op1);
912 template <template <typename T> class BaseD,
913 template <typename T> class BaseQ>
915 decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
916 IntRegIndex dest, IntRegIndex op1)
921 return new BaseQ<uint8_t>(machInst, dest, op1);
923 return new BaseQ<uint16_t>(machInst, dest, op1);
925 return new BaseQ<uint32_t>(machInst, dest, op1);
927 return new Unknown(machInst);
932 return new BaseD<uint8_t>(machInst, dest, op1);
934 return new BaseD<uint16_t>(machInst, dest, op1);
936 return new Unknown(machInst);
941 template <template <typename T> class BaseD,
942 template <typename T> class BaseQ,
943 template <typename T> class BaseBQ>
945 decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
946 IntRegIndex dest, IntRegIndex op1)
951 return new BaseQ<uint8_t>(machInst, dest, op1);
953 return new BaseQ<uint16_t>(machInst, dest, op1);
955 return new BaseBQ<uint32_t>(machInst, dest, op1);
957 return new Unknown(machInst);
962 return new BaseD<uint8_t>(machInst, dest, op1);
964 return new BaseD<uint16_t>(machInst, dest, op1);
966 return new Unknown(machInst);
971 template <template <typename T> class BaseD,
972 template <typename T> class BaseQ>
974 decodeNeonSAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
975 IntRegIndex dest, IntRegIndex op1)
980 return new BaseQ<int8_t>(machInst, dest, op1);
982 return new BaseQ<int16_t>(machInst, dest, op1);
984 return new BaseQ<int32_t>(machInst, dest, op1);
986 return new Unknown(machInst);
991 return new BaseD<int8_t>(machInst, dest, op1);
993 return new BaseD<int16_t>(machInst, dest, op1);
995 return new Unknown(machInst);
1000 template <template <typename T> class BaseD,
1001 template <typename T> class BaseQ,
1002 template <typename T> class BaseBQ>
1004 decodeNeonUAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
1005 IntRegIndex dest, IntRegIndex op1)
1010 return new BaseQ<uint8_t>(machInst, dest, op1);
1012 return new BaseQ<uint16_t>(machInst, dest, op1);
1014 return new BaseBQ<uint32_t>(machInst, dest, op1);
1016 return new Unknown(machInst);
1021 return new BaseD<uint8_t>(machInst, dest, op1);
1023 return new BaseD<uint16_t>(machInst, dest, op1);
1025 return new Unknown(machInst);
1030 template <template <typename T> class BaseD,
1031 template <typename T> class BaseQ,
1032 template <typename T> class BaseBQ>
1034 decodeNeonSAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
1035 IntRegIndex dest, IntRegIndex op1)
1040 return new BaseQ<int8_t>(machInst, dest, op1);
1042 return new BaseQ<int16_t>(machInst, dest, op1);
1044 return new BaseBQ<int32_t>(machInst, dest, op1);
1046 return new Unknown(machInst);
1051 return new BaseD<int8_t>(machInst, dest, op1);
1053 return new BaseD<int16_t>(machInst, dest, op1);
1055 return new Unknown(machInst);
1063 vcgtFunc(float op1, float op2)
1065 if (std::isnan(op1) || std::isnan(op2))
1067 return (op1 > op2) ? 0.0 : 1.0;
1071 vcgeFunc(float op1, float op2)
1073 if (std::isnan(op1) || std::isnan(op2))
1075 return (op1 >= op2) ? 0.0 : 1.0;
1079 vceqFunc(float op1, float op2)
1081 if (isSnan(op1) || isSnan(op2))
1083 return (op1 == op2) ? 0.0 : 1.0;
1087 vcleFunc(float op1, float op2)
1089 if (std::isnan(op1) || std::isnan(op2))
1091 return (op1 <= op2) ? 0.0 : 1.0;
1095 vcltFunc(float op1, float op2)
1097 if (std::isnan(op1) || std::isnan(op2))
1099 return (op1 < op2) ? 0.0 : 1.0;
1103 vacgtFunc(float op1, float op2)
1105 if (std::isnan(op1) || std::isnan(op2))
1107 return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
1111 vacgeFunc(float op1, float op2)
1113 if (std::isnan(op1) || std::isnan(op2))
1115 return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
1124 smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
1125 unsignedTypes = smallUnsignedTypes + ("uint64_t",)
1126 smallSignedTypes = ("int8_t", "int16_t", "int32_t")
1127 signedTypes = smallSignedTypes + ("int64_t",)
1128 smallTypes = smallUnsignedTypes + smallSignedTypes
1129 allTypes = unsignedTypes + signedTypes
1131 def threeEqualRegInst(name, Name, opClass, types, rCount, op,
1132 readDest=False, pairwise=False):
1133 global header_output, exec_output
1134 eWalkCode = simdEnabledCheckCode + '''
1135 RegVect srcReg1, srcReg2, destReg;
1137 for reg in range(rCount):
1139 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1140 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1141 ''' % { "reg" : reg }
1144 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1145 ''' % { "reg" : reg }
1148 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1151 for (unsigned i = 0; i < eCount; i++) {
1152 Element srcElem1 = gtoh(2 * i < eCount ?
1153 srcReg1.elements[2 * i] :
1154 srcReg2.elements[2 * i - eCount]);
1155 Element srcElem2 = gtoh(2 * i < eCount ?
1156 srcReg1.elements[2 * i + 1] :
1157 srcReg2.elements[2 * i + 1 - eCount]);
1161 destReg.elements[i] = htog(destElem);
1163 ''' % { "op" : op, "readDest" : readDestCode }
1166 for (unsigned i = 0; i < eCount; i++) {
1167 Element srcElem1 = gtoh(srcReg1.elements[i]);
1168 Element srcElem2 = gtoh(srcReg2.elements[i]);
1172 destReg.elements[i] = htog(destElem);
1174 ''' % { "op" : op, "readDest" : readDestCode }
1175 for reg in range(rCount):
1177 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1178 ''' % { "reg" : reg }
1179 iop = InstObjParams(name, Name,
1181 { "code": eWalkCode,
1183 "predicate_test": predicateTest,
1184 "op_class": opClass }, [])
1185 header_output += NeonRegRegRegOpDeclare.subst(iop)
1186 exec_output += NeonEqualRegExecute.subst(iop)
1188 substDict = { "targs" : type,
1189 "class_name" : Name }
1190 exec_output += NeonExecDeclare.subst(substDict)
1192 def threeEqualRegInstFp(name, Name, opClass, types, rCount, op,
1193 readDest=False, pairwise=False, toInt=False):
1194 global header_output, exec_output
1195 eWalkCode = simdEnabledCheckCode + '''
1196 typedef FloatReg FloatVect[rCount];
1197 FloatVect srcRegs1, srcRegs2;
1200 eWalkCode += 'RegVect destRegs;\n'
1202 eWalkCode += 'FloatVect destRegs;\n'
1203 for reg in range(rCount):
1205 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1206 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
1207 ''' % { "reg" : reg }
1211 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1212 ''' % { "reg" : reg }
1215 destRegs[%(reg)d] = FpDestP%(reg)d;
1216 ''' % { "reg" : reg }
1219 readDestCode = 'destReg = destRegs[r];'
1220 destType = 'FloatReg'
1221 writeDest = 'destRegs[r] = destReg;'
1223 destType = 'FloatRegBits'
1224 writeDest = 'destRegs.regs[r] = destReg;'
1227 for (unsigned r = 0; r < rCount; r++) {
1228 FloatReg srcReg1 = (2 * r < rCount) ?
1229 srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
1230 FloatReg srcReg2 = (2 * r < rCount) ?
1231 srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
1232 %(destType)s destReg;
1238 "readDest" : readDestCode,
1239 "destType" : destType,
1240 "writeDest" : writeDest }
1243 for (unsigned r = 0; r < rCount; r++) {
1244 FloatReg srcReg1 = srcRegs1[r];
1245 FloatReg srcReg2 = srcRegs2[r];
1246 %(destType)s destReg;
1252 "readDest" : readDestCode,
1253 "destType" : destType,
1254 "writeDest" : writeDest }
1255 for reg in range(rCount):
1258 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1259 ''' % { "reg" : reg }
1262 FpDestP%(reg)d = destRegs[%(reg)d];
1263 ''' % { "reg" : reg }
1264 iop = InstObjParams(name, Name,
1266 { "code": eWalkCode,
1268 "predicate_test": predicateTest,
1269 "op_class": opClass }, [])
1270 header_output += NeonRegRegRegOpDeclare.subst(iop)
1271 exec_output += NeonEqualRegExecute.subst(iop)
1273 substDict = { "targs" : type,
1274 "class_name" : Name }
1275 exec_output += NeonExecDeclare.subst(substDict)
1277 def threeUnequalRegInst(name, Name, opClass, types, op,
1278 bigSrc1, bigSrc2, bigDest, readDest):
1279 global header_output, exec_output
1280 src1Cnt = src2Cnt = destCnt = 2
1281 src1Prefix = src2Prefix = destPrefix = ''
1291 eWalkCode = simdEnabledCheckCode + '''
1295 ''' % (src1Prefix, src2Prefix, destPrefix)
1296 for reg in range(src1Cnt):
1298 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1299 ''' % { "reg" : reg }
1300 for reg in range(src2Cnt):
1302 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1303 ''' % { "reg" : reg }
1305 for reg in range(destCnt):
1307 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1308 ''' % { "reg" : reg }
1311 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1313 for (unsigned i = 0; i < eCount; i++) {
1314 %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
1315 %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
1316 %(destPrefix)sElement destElem;
1319 destReg.elements[i] = htog(destElem);
1321 ''' % { "op" : op, "readDest" : readDestCode,
1322 "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
1323 "destPrefix" : destPrefix }
1324 for reg in range(destCnt):
1326 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1327 ''' % { "reg" : reg }
1328 iop = InstObjParams(name, Name,
1330 { "code": eWalkCode,
1332 "predicate_test": predicateTest,
1333 "op_class": opClass }, [])
1334 header_output += NeonRegRegRegOpDeclare.subst(iop)
1335 exec_output += NeonUnequalRegExecute.subst(iop)
1337 substDict = { "targs" : type,
1338 "class_name" : Name }
1339 exec_output += NeonExecDeclare.subst(substDict)
1341 def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False):
1342 threeUnequalRegInst(name, Name, opClass, types, op,
1343 True, True, False, readDest)
1345 def threeRegLongInst(name, Name, opClass, types, op, readDest=False):
1346 threeUnequalRegInst(name, Name, opClass, types, op,
1347 False, False, True, readDest)
1349 def threeRegWideInst(name, Name, opClass, types, op, readDest=False):
1350 threeUnequalRegInst(name, Name, opClass, types, op,
1351 True, False, True, readDest)
1353 def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
1354 global header_output, exec_output
1355 eWalkCode = simdEnabledCheckCode + '''
1356 RegVect srcReg1, srcReg2, destReg;
1358 for reg in range(rCount):
1360 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1361 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1362 ''' % { "reg" : reg }
1365 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1366 ''' % { "reg" : reg }
1369 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1371 if (imm < 0 && imm >= eCount) {
1372 fault = new UndefinedInstruction(machInst, false, mnemonic);
1374 for (unsigned i = 0; i < eCount; i++) {
1375 Element srcElem1 = gtoh(srcReg1.elements[i]);
1376 Element srcElem2 = gtoh(srcReg2.elements[imm]);
1380 destReg.elements[i] = htog(destElem);
1383 ''' % { "op" : op, "readDest" : readDestCode }
1384 for reg in range(rCount):
1386 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1387 ''' % { "reg" : reg }
1388 iop = InstObjParams(name, Name,
1390 { "code": eWalkCode,
1392 "predicate_test": predicateTest,
1393 "op_class": opClass }, [])
1394 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1395 exec_output += NeonEqualRegExecute.subst(iop)
1397 substDict = { "targs" : type,
1398 "class_name" : Name }
1399 exec_output += NeonExecDeclare.subst(substDict)
1401 def twoRegLongInst(name, Name, opClass, types, op, readDest=False):
1402 global header_output, exec_output
1404 eWalkCode = simdEnabledCheckCode + '''
1405 RegVect srcReg1, srcReg2;
1408 for reg in range(rCount):
1410 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1411 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);;
1412 ''' % { "reg" : reg }
1414 for reg in range(2 * rCount):
1416 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1417 ''' % { "reg" : reg }
1420 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1422 if (imm < 0 && imm >= eCount) {
1423 fault = new UndefinedInstruction(machInst, false, mnemonic);
1425 for (unsigned i = 0; i < eCount; i++) {
1426 Element srcElem1 = gtoh(srcReg1.elements[i]);
1427 Element srcElem2 = gtoh(srcReg2.elements[imm]);
1428 BigElement destElem;
1431 destReg.elements[i] = htog(destElem);
1434 ''' % { "op" : op, "readDest" : readDestCode }
1435 for reg in range(2 * rCount):
1437 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1438 ''' % { "reg" : reg }
1439 iop = InstObjParams(name, Name,
1441 { "code": eWalkCode,
1443 "predicate_test": predicateTest,
1444 "op_class": opClass }, [])
1445 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1446 exec_output += NeonUnequalRegExecute.subst(iop)
1448 substDict = { "targs" : type,
1449 "class_name" : Name }
1450 exec_output += NeonExecDeclare.subst(substDict)
1452 def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False):
1453 global header_output, exec_output
1454 eWalkCode = simdEnabledCheckCode + '''
1455 typedef FloatReg FloatVect[rCount];
1456 FloatVect srcRegs1, srcRegs2, destRegs;
1458 for reg in range(rCount):
1460 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1461 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
1462 ''' % { "reg" : reg }
1465 destRegs[%(reg)d] = FpDestP%(reg)d;
1466 ''' % { "reg" : reg }
1469 readDestCode = 'destReg = destRegs[i];'
1471 if (imm < 0 && imm >= eCount) {
1472 fault = new UndefinedInstruction(machInst, false, mnemonic);
1474 for (unsigned i = 0; i < rCount; i++) {
1475 FloatReg srcReg1 = srcRegs1[i];
1476 FloatReg srcReg2 = srcRegs2[imm];
1480 destRegs[i] = destReg;
1483 ''' % { "op" : op, "readDest" : readDestCode }
1484 for reg in range(rCount):
1486 FpDestP%(reg)d = destRegs[%(reg)d];
1487 ''' % { "reg" : reg }
1488 iop = InstObjParams(name, Name,
1490 { "code": eWalkCode,
1492 "predicate_test": predicateTest,
1493 "op_class": opClass }, [])
1494 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1495 exec_output += NeonEqualRegExecute.subst(iop)
1497 substDict = { "targs" : type,
1498 "class_name" : Name }
1499 exec_output += NeonExecDeclare.subst(substDict)
1501 def twoRegShiftInst(name, Name, opClass, types, rCount, op,
1502 readDest=False, toInt=False, fromInt=False):
1503 global header_output, exec_output
1504 eWalkCode = simdEnabledCheckCode + '''
1505 RegVect srcRegs1, destRegs;
1507 for reg in range(rCount):
1509 srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1510 ''' % { "reg" : reg }
1513 destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1514 ''' % { "reg" : reg }
1517 readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1519 readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1520 readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1522 readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);'
1523 declDest = 'Element destElem;'
1524 writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1526 declDest = 'FloatRegBits destReg;'
1527 writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1529 for (unsigned i = 0; i < eCount; i++) {
1536 ''' % { "readOp" : readOpCode,
1537 "declDest" : declDest,
1538 "readDest" : readDestCode,
1540 "writeDest" : writeDestCode }
1541 for reg in range(rCount):
1543 FpDestP%(reg)d_uw = gtoh(destRegs.regs[%(reg)d]);
1544 ''' % { "reg" : reg }
1545 iop = InstObjParams(name, Name,
1547 { "code": eWalkCode,
1549 "predicate_test": predicateTest,
1550 "op_class": opClass }, [])
1551 header_output += NeonRegRegImmOpDeclare.subst(iop)
1552 exec_output += NeonEqualRegExecute.subst(iop)
1554 substDict = { "targs" : type,
1555 "class_name" : Name }
1556 exec_output += NeonExecDeclare.subst(substDict)
1558 def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False):
1559 global header_output, exec_output
1560 eWalkCode = simdEnabledCheckCode + '''
1564 for reg in range(4):
1566 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1567 ''' % { "reg" : reg }
1569 for reg in range(2):
1571 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1572 ''' % { "reg" : reg }
1575 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1577 for (unsigned i = 0; i < eCount; i++) {
1578 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1582 destReg.elements[i] = htog(destElem);
1584 ''' % { "op" : op, "readDest" : readDestCode }
1585 for reg in range(2):
1587 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1588 ''' % { "reg" : reg }
1589 iop = InstObjParams(name, Name,
1591 { "code": eWalkCode,
1593 "predicate_test": predicateTest,
1594 "op_class": opClass }, [])
1595 header_output += NeonRegRegImmOpDeclare.subst(iop)
1596 exec_output += NeonUnequalRegExecute.subst(iop)
1598 substDict = { "targs" : type,
1599 "class_name" : Name }
1600 exec_output += NeonExecDeclare.subst(substDict)
1602 def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False):
1603 global header_output, exec_output
1604 eWalkCode = simdEnabledCheckCode + '''
1608 for reg in range(2):
1610 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1611 ''' % { "reg" : reg }
1613 for reg in range(4):
1615 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1616 ''' % { "reg" : reg }
1619 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1621 for (unsigned i = 0; i < eCount; i++) {
1622 Element srcElem1 = gtoh(srcReg1.elements[i]);
1623 BigElement destElem;
1626 destReg.elements[i] = htog(destElem);
1628 ''' % { "op" : op, "readDest" : readDestCode }
1629 for reg in range(4):
1631 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1632 ''' % { "reg" : reg }
1633 iop = InstObjParams(name, Name,
1635 { "code": eWalkCode,
1637 "predicate_test": predicateTest,
1638 "op_class": opClass }, [])
1639 header_output += NeonRegRegImmOpDeclare.subst(iop)
1640 exec_output += NeonUnequalRegExecute.subst(iop)
1642 substDict = { "targs" : type,
1643 "class_name" : Name }
1644 exec_output += NeonExecDeclare.subst(substDict)
1646 def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False):
1647 global header_output, exec_output
1648 eWalkCode = simdEnabledCheckCode + '''
1649 RegVect srcReg1, destReg;
1651 for reg in range(rCount):
1653 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1654 ''' % { "reg" : reg }
1657 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1658 ''' % { "reg" : reg }
1661 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1663 for (unsigned i = 0; i < eCount; i++) {
1665 Element srcElem1 = gtoh(srcReg1.elements[i]);
1669 destReg.elements[j] = htog(destElem);
1671 ''' % { "op" : op, "readDest" : readDestCode }
1672 for reg in range(rCount):
1674 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1675 ''' % { "reg" : reg }
1676 iop = InstObjParams(name, Name,
1678 { "code": eWalkCode,
1680 "predicate_test": predicateTest,
1681 "op_class": opClass }, [])
1682 header_output += NeonRegRegOpDeclare.subst(iop)
1683 exec_output += NeonEqualRegExecute.subst(iop)
1685 substDict = { "targs" : type,
1686 "class_name" : Name }
1687 exec_output += NeonExecDeclare.subst(substDict)
1689 def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False):
1690 global header_output, exec_output
1691 eWalkCode = simdEnabledCheckCode + '''
1692 RegVect srcReg1, destReg;
1694 for reg in range(rCount):
1696 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1697 ''' % { "reg" : reg }
1700 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1701 ''' % { "reg" : reg }
1704 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1706 for (unsigned i = 0; i < eCount; i++) {
1707 Element srcElem1 = gtoh(srcReg1.elements[imm]);
1711 destReg.elements[i] = htog(destElem);
1713 ''' % { "op" : op, "readDest" : readDestCode }
1714 for reg in range(rCount):
1716 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1717 ''' % { "reg" : reg }
1718 iop = InstObjParams(name, Name,
1720 { "code": eWalkCode,
1722 "predicate_test": predicateTest,
1723 "op_class": opClass }, [])
1724 header_output += NeonRegRegImmOpDeclare.subst(iop)
1725 exec_output += NeonEqualRegExecute.subst(iop)
1727 substDict = { "targs" : type,
1728 "class_name" : Name }
1729 exec_output += NeonExecDeclare.subst(substDict)
1731 def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False):
1732 global header_output, exec_output
1733 eWalkCode = simdEnabledCheckCode + '''
1734 RegVect srcReg1, destReg;
1736 for reg in range(rCount):
1738 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1739 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1740 ''' % { "reg" : reg }
1743 ''' % { "reg" : reg }
1746 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1748 for reg in range(rCount):
1750 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1751 FpOp1P%(reg)d_uw = gtoh(srcReg1.regs[%(reg)d]);
1752 ''' % { "reg" : reg }
1753 iop = InstObjParams(name, Name,
1755 { "code": eWalkCode,
1757 "predicate_test": predicateTest,
1758 "op_class": opClass }, [])
1759 header_output += NeonRegRegOpDeclare.subst(iop)
1760 exec_output += NeonEqualRegExecute.subst(iop)
1762 substDict = { "targs" : type,
1763 "class_name" : Name }
1764 exec_output += NeonExecDeclare.subst(substDict)
1766 def twoRegMiscInstFp(name, Name, opClass, types, rCount, op,
1767 readDest=False, toInt=False):
1768 global header_output, exec_output
1769 eWalkCode = simdEnabledCheckCode + '''
1770 typedef FloatReg FloatVect[rCount];
1774 eWalkCode += 'RegVect destRegs;\n'
1776 eWalkCode += 'FloatVect destRegs;\n'
1777 for reg in range(rCount):
1779 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1780 ''' % { "reg" : reg }
1784 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1785 ''' % { "reg" : reg }
1788 destRegs[%(reg)d] = FpDestP%(reg)d;
1789 ''' % { "reg" : reg }
1792 readDestCode = 'destReg = destRegs[i];'
1793 destType = 'FloatReg'
1794 writeDest = 'destRegs[r] = destReg;'
1796 destType = 'FloatRegBits'
1797 writeDest = 'destRegs.regs[r] = destReg;'
1799 for (unsigned r = 0; r < rCount; r++) {
1800 FloatReg srcReg1 = srcRegs1[r];
1801 %(destType)s destReg;
1807 "readDest" : readDestCode,
1808 "destType" : destType,
1809 "writeDest" : writeDest }
1810 for reg in range(rCount):
1813 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1814 ''' % { "reg" : reg }
1817 FpDestP%(reg)d = destRegs[%(reg)d];
1818 ''' % { "reg" : reg }
1819 iop = InstObjParams(name, Name,
1821 { "code": eWalkCode,
1823 "predicate_test": predicateTest,
1824 "op_class": opClass }, [])
1825 header_output += NeonRegRegOpDeclare.subst(iop)
1826 exec_output += NeonEqualRegExecute.subst(iop)
1828 substDict = { "targs" : type,
1829 "class_name" : Name }
1830 exec_output += NeonExecDeclare.subst(substDict)
1832 def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False):
1833 global header_output, exec_output
1834 eWalkCode = simdEnabledCheckCode + '''
1838 for reg in range(rCount):
1840 srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1841 ''' % { "reg" : reg }
1844 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1845 ''' % { "reg" : reg }
1848 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1850 for (unsigned i = 0; i < eCount / 2; i++) {
1851 Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1852 Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1853 BigElement destElem;
1856 destReg.elements[i] = htog(destElem);
1858 ''' % { "op" : op, "readDest" : readDestCode }
1859 for reg in range(rCount):
1861 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1862 ''' % { "reg" : reg }
1863 iop = InstObjParams(name, Name,
1865 { "code": eWalkCode,
1867 "predicate_test": predicateTest,
1868 "op_class": opClass }, [])
1869 header_output += NeonRegRegOpDeclare.subst(iop)
1870 exec_output += NeonUnequalRegExecute.subst(iop)
1872 substDict = { "targs" : type,
1873 "class_name" : Name }
1874 exec_output += NeonExecDeclare.subst(substDict)
1876 def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False):
1877 global header_output, exec_output
1878 eWalkCode = simdEnabledCheckCode + '''
1882 for reg in range(4):
1884 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1885 ''' % { "reg" : reg }
1887 for reg in range(2):
1889 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1890 ''' % { "reg" : reg }
1893 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1895 for (unsigned i = 0; i < eCount; i++) {
1896 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1900 destReg.elements[i] = htog(destElem);
1902 ''' % { "op" : op, "readDest" : readDestCode }
1903 for reg in range(2):
1905 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1906 ''' % { "reg" : reg }
1907 iop = InstObjParams(name, Name,
1909 { "code": eWalkCode,
1911 "predicate_test": predicateTest,
1912 "op_class": opClass }, [])
1913 header_output += NeonRegRegOpDeclare.subst(iop)
1914 exec_output += NeonUnequalRegExecute.subst(iop)
1916 substDict = { "targs" : type,
1917 "class_name" : Name }
1918 exec_output += NeonExecDeclare.subst(substDict)
1920 def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False):
1921 global header_output, exec_output
1922 eWalkCode = simdEnabledCheckCode + '''
1926 for reg in range(rCount):
1928 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1929 ''' % { "reg" : reg }
1932 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1934 for (unsigned i = 0; i < eCount; i++) {
1938 destReg.elements[i] = htog(destElem);
1940 ''' % { "op" : op, "readDest" : readDestCode }
1941 for reg in range(rCount):
1943 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1944 ''' % { "reg" : reg }
1945 iop = InstObjParams(name, Name,
1947 { "code": eWalkCode,
1949 "predicate_test": predicateTest,
1950 "op_class": opClass }, [])
1951 header_output += NeonRegImmOpDeclare.subst(iop)
1952 exec_output += NeonEqualRegExecute.subst(iop)
1954 substDict = { "targs" : type,
1955 "class_name" : Name }
1956 exec_output += NeonExecDeclare.subst(substDict)
1958 def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False):
1959 global header_output, exec_output
1960 eWalkCode = simdEnabledCheckCode + '''
1964 for reg in range(2):
1966 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1967 ''' % { "reg" : reg }
1969 for reg in range(4):
1971 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1972 ''' % { "reg" : reg }
1975 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1977 for (unsigned i = 0; i < eCount; i++) {
1978 Element srcElem1 = gtoh(srcReg1.elements[i]);
1979 BigElement destElem;
1982 destReg.elements[i] = htog(destElem);
1984 ''' % { "op" : op, "readDest" : readDestCode }
1985 for reg in range(4):
1987 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1988 ''' % { "reg" : reg }
1989 iop = InstObjParams(name, Name,
1991 { "code": eWalkCode,
1993 "predicate_test": predicateTest,
1994 "op_class": opClass }, [])
1995 header_output += NeonRegRegOpDeclare.subst(iop)
1996 exec_output += NeonUnequalRegExecute.subst(iop)
1998 substDict = { "targs" : type,
1999 "class_name" : Name }
2000 exec_output += NeonExecDeclare.subst(substDict)
2004 (((unsigned)srcElem1 & 0x1) +
2005 ((unsigned)srcElem2 & 0x1)) >> 1;
2006 // Use division instead of a shift to ensure the sign extension works
2007 // right. The compiler will figure out if it can be a shift. Mask the
2008 // inputs so they get truncated correctly.
2009 destElem = (((srcElem1 & ~(Element)1) / 2) +
2010 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
2012 threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode)
2013 threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode)
2017 (((unsigned)srcElem1 & 0x1) +
2018 ((unsigned)srcElem2 & 0x1) + 1) >> 1;
2019 // Use division instead of a shift to ensure the sign extension works
2020 // right. The compiler will figure out if it can be a shift. Mask the
2021 // inputs so they get truncated correctly.
2022 destElem = (((srcElem1 & ~(Element)1) / 2) +
2023 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
2025 threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode)
2026 threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode)
2030 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
2031 // Use division instead of a shift to ensure the sign extension works
2032 // right. The compiler will figure out if it can be a shift. Mask the
2033 // inputs so they get truncated correctly.
2034 destElem = (((srcElem1 & ~(Element)1) / 2) -
2035 ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
2037 threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode)
2038 threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode)
2041 destElem = srcElem1 & srcElem2;
2043 threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode)
2044 threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode)
2047 destElem = srcElem1 & ~srcElem2;
2049 threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode)
2050 threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode)
2053 destElem = srcElem1 | srcElem2;
2055 threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode)
2056 threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode)
2058 threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode)
2059 threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode)
2062 destElem = srcElem1 | ~srcElem2;
2064 threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode)
2065 threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode)
2068 destElem = srcElem1 ^ srcElem2;
2070 threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode)
2071 threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode)
2074 destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
2076 threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True)
2077 threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True)
2079 destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
2081 threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True)
2082 threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True)
2084 destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
2086 threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True)
2087 threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True)
2090 destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
2092 threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode)
2093 threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode)
2096 destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
2098 threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode)
2099 threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
2102 destElem = srcElem1 + srcElem2;
2104 threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
2105 threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
2107 threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", smallUnsignedTypes,
2108 2, vaddCode, pairwise=True)
2110 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2112 threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
2113 threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
2115 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
2116 (sizeof(Element) * 8);
2118 threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode)
2120 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
2121 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
2122 (sizeof(Element) * 8);
2124 threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode)
2127 destElem = srcElem1 - srcElem2;
2129 threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode)
2130 threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode)
2132 destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
2134 threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode)
2135 threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode)
2138 destElem = srcElem1 + srcElem2;
2139 FPSCR fpscr = (FPSCR) FpscrQc;
2140 if (destElem < srcElem1 || destElem < srcElem2) {
2141 destElem = (Element)(-1);
2146 threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode)
2147 threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode)
2149 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
2150 (sizeof(Element) * 8);
2152 threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode)
2154 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
2155 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
2156 (sizeof(Element) * 8);
2158 threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode)
2161 destElem = srcElem1 + srcElem2;
2162 FPSCR fpscr = (FPSCR) FpscrQc;
2163 bool negDest = (destElem < 0);
2164 bool negSrc1 = (srcElem1 < 0);
2165 bool negSrc2 = (srcElem2 < 0);
2166 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
2167 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2174 threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode)
2175 threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode)
2178 destElem = srcElem1 - srcElem2;
2179 FPSCR fpscr = (FPSCR) FpscrQc;
2180 if (destElem > srcElem1) {
2186 threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode)
2187 threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode)
2190 destElem = srcElem1 - srcElem2;
2191 FPSCR fpscr = (FPSCR) FpscrQc;
2192 bool negDest = (destElem < 0);
2193 bool negSrc1 = (srcElem1 < 0);
2194 bool posSrc2 = (srcElem2 >= 0);
2195 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
2196 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2203 threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode)
2204 threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode)
2207 destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;
2209 threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode)
2210 threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode)
2213 destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
2215 threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode)
2216 threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode)
2219 destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;
2221 threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode)
2222 threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode)
2225 int16_t shiftAmt = (int8_t)srcElem2;
2227 shiftAmt = -shiftAmt;
2228 if (shiftAmt >= sizeof(Element) * 8) {
2229 shiftAmt = sizeof(Element) * 8 - 1;
2232 destElem = (srcElem1 >> shiftAmt);
2234 // Make sure the right shift sign extended when it should.
2235 if (ltz(srcElem1) && !ltz(destElem)) {
2236 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2240 if (shiftAmt >= sizeof(Element) * 8) {
2243 destElem = srcElem1 << shiftAmt;
2247 threeEqualRegInst("vshl", "VshlD", "SimdShiftOp", allTypes, 2, vshlCode)
2248 threeEqualRegInst("vshl", "VshlQ", "SimdShiftOp", allTypes, 4, vshlCode)
2251 int16_t shiftAmt = (int8_t)srcElem2;
2253 shiftAmt = -shiftAmt;
2255 if (shiftAmt <= sizeof(Element) * 8)
2256 rBit = bits(srcElem1, shiftAmt - 1);
2257 if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
2259 if (shiftAmt >= sizeof(Element) * 8) {
2260 shiftAmt = sizeof(Element) * 8 - 1;
2263 destElem = (srcElem1 >> shiftAmt);
2265 // Make sure the right shift sign extended when it should.
2266 if (ltz(srcElem1) && !ltz(destElem)) {
2267 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2271 } else if (shiftAmt > 0) {
2272 if (shiftAmt >= sizeof(Element) * 8) {
2275 destElem = srcElem1 << shiftAmt;
2278 destElem = srcElem1;
2281 threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode)
2282 threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode)
2285 int16_t shiftAmt = (int8_t)srcElem2;
2286 FPSCR fpscr = (FPSCR) FpscrQc;
2288 shiftAmt = -shiftAmt;
2289 if (shiftAmt >= sizeof(Element) * 8) {
2290 shiftAmt = sizeof(Element) * 8 - 1;
2293 destElem = (srcElem1 >> shiftAmt);
2295 } else if (shiftAmt > 0) {
2296 if (shiftAmt >= sizeof(Element) * 8) {
2297 if (srcElem1 != 0) {
2298 destElem = mask(sizeof(Element) * 8);
2304 if (bits(srcElem1, sizeof(Element) * 8 - 1,
2305 sizeof(Element) * 8 - shiftAmt)) {
2306 destElem = mask(sizeof(Element) * 8);
2309 destElem = srcElem1 << shiftAmt;
2313 destElem = srcElem1;
2317 threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode)
2318 threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode)
2321 int16_t shiftAmt = (int8_t)srcElem2;
2322 FPSCR fpscr = (FPSCR) FpscrQc;
2324 shiftAmt = -shiftAmt;
2325 if (shiftAmt >= sizeof(Element) * 8) {
2326 shiftAmt = sizeof(Element) * 8 - 1;
2329 destElem = (srcElem1 >> shiftAmt);
2331 // Make sure the right shift sign extended when it should.
2332 if (srcElem1 < 0 && destElem >= 0) {
2333 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2336 } else if (shiftAmt > 0) {
2338 if (shiftAmt >= sizeof(Element) * 8) {
2344 if (bits(srcElem1, sizeof(Element) * 8 - 1,
2345 sizeof(Element) * 8 - 1 - shiftAmt) !=
2346 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2349 destElem = srcElem1 << shiftAmt;
2354 destElem = mask(sizeof(Element) * 8 - 1);
2356 destElem = ~destElem;
2359 destElem = srcElem1;
2363 threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode)
2364 threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode)
2367 int16_t shiftAmt = (int8_t)srcElem2;
2368 FPSCR fpscr = (FPSCR) FpscrQc;
2370 shiftAmt = -shiftAmt;
2372 if (shiftAmt <= sizeof(Element) * 8)
2373 rBit = bits(srcElem1, shiftAmt - 1);
2374 if (shiftAmt >= sizeof(Element) * 8) {
2375 shiftAmt = sizeof(Element) * 8 - 1;
2378 destElem = (srcElem1 >> shiftAmt);
2382 if (shiftAmt >= sizeof(Element) * 8) {
2383 if (srcElem1 != 0) {
2384 destElem = mask(sizeof(Element) * 8);
2390 if (bits(srcElem1, sizeof(Element) * 8 - 1,
2391 sizeof(Element) * 8 - shiftAmt)) {
2392 destElem = mask(sizeof(Element) * 8);
2395 destElem = srcElem1 << shiftAmt;
2401 threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode)
2402 threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode)
2405 int16_t shiftAmt = (int8_t)srcElem2;
2406 FPSCR fpscr = (FPSCR) FpscrQc;
2408 shiftAmt = -shiftAmt;
2410 if (shiftAmt <= sizeof(Element) * 8)
2411 rBit = bits(srcElem1, shiftAmt - 1);
2412 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
2414 if (shiftAmt >= sizeof(Element) * 8) {
2415 shiftAmt = sizeof(Element) * 8 - 1;
2418 destElem = (srcElem1 >> shiftAmt);
2420 // Make sure the right shift sign extended when it should.
2421 if (srcElem1 < 0 && destElem >= 0) {
2422 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2426 } else if (shiftAmt > 0) {
2428 if (shiftAmt >= sizeof(Element) * 8) {
2434 if (bits(srcElem1, sizeof(Element) * 8 - 1,
2435 sizeof(Element) * 8 - 1 - shiftAmt) !=
2436 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2439 destElem = srcElem1 << shiftAmt;
2444 destElem = mask(sizeof(Element) * 8 - 1);
2446 destElem = ~destElem;
2449 destElem = srcElem1;
2453 threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode)
2454 threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode)
2457 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
2458 (srcElem2 - srcElem1);
2460 threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True)
2461 threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True)
2463 destElem += (srcElem1 > srcElem2) ?
2464 ((BigElement)srcElem1 - (BigElement)srcElem2) :
2465 ((BigElement)srcElem2 - (BigElement)srcElem1);
2467 threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True)
2470 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
2471 (srcElem2 - srcElem1);
2473 threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode)
2474 threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode)
2476 destElem = (srcElem1 > srcElem2) ?
2477 ((BigElement)srcElem1 - (BigElement)srcElem2) :
2478 ((BigElement)srcElem2 - (BigElement)srcElem1);
2480 threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode)
2483 destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
2485 threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode)
2486 threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode)
2489 destElem = srcElem1 * srcElem2;
2491 threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode)
2492 threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode)
2494 destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
2496 threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode)
2499 destElem = destElem + srcElem1 * srcElem2;
2501 threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True)
2502 threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True)
2504 destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2506 threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True)
2509 FPSCR fpscr = (FPSCR) FpscrQc;
2510 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2511 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2512 Element halfNeg = maxNeg / 2;
2513 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2514 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2515 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2516 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2519 bool negPreDest = ltz(destElem);
2520 destElem += midElem;
2521 bool negDest = ltz(destElem);
2522 bool negMid = ltz(midElem);
2523 if (negPreDest == negMid && negMid != negDest) {
2524 destElem = mask(sizeof(BigElement) * 8 - 1);
2526 destElem = ~destElem;
2531 threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2534 FPSCR fpscr = (FPSCR) FpscrQc;
2535 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2536 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2537 Element halfNeg = maxNeg / 2;
2538 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2539 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2540 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2541 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2544 bool negPreDest = ltz(destElem);
2545 destElem -= midElem;
2546 bool negDest = ltz(destElem);
2547 bool posMid = ltz((BigElement)-midElem);
2548 if (negPreDest == posMid && posMid != negDest) {
2549 destElem = mask(sizeof(BigElement) * 8 - 1);
2551 destElem = ~destElem;
2556 threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2559 FPSCR fpscr = (FPSCR) FpscrQc;
2560 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2561 if (srcElem1 == srcElem2 &&
2562 srcElem1 == (Element)((Element)1 <<
2563 (Element)(sizeof(Element) * 8 - 1))) {
2564 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2569 threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode)
2572 destElem = destElem - srcElem1 * srcElem2;
2574 threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2575 threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2577 destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2579 threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True)
2583 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2584 if (bits(srcElem2, j))
2585 destElem ^= srcElem1 << j;
2588 threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode)
2589 threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode)
2592 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2593 if (bits(srcElem2, j))
2594 destElem ^= (BigElement)srcElem1 << j;
2597 threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2599 threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", smallTypes, 2, vmaxCode, pairwise=True)
2601 threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", smallTypes, 2, vminCode, pairwise=True)
2604 FPSCR fpscr = (FPSCR) FpscrQc;
2605 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2606 (sizeof(Element) * 8);
2607 if (srcElem1 == srcElem2 &&
2608 srcElem1 == (Element)((Element)1 <<
2609 (sizeof(Element) * 8 - 1))) {
2610 destElem = ~srcElem1;
2615 threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2616 threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2619 FPSCR fpscr = (FPSCR) FpscrQc;
2620 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2621 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2622 (sizeof(Element) * 8);
2623 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2624 Element halfNeg = maxNeg / 2;
2625 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2626 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2627 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2629 destElem = mask(sizeof(Element) * 8 - 1);
2631 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2637 threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2638 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2639 threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2640 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2643 FPSCR fpscr = (FPSCR) FpscrExc;
2645 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2647 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMax<float>,
2648 true, true, VfpRoundNearest);
2649 } else if (flushToZero(srcReg1, srcReg2)) {
2654 threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode)
2655 threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode)
2658 FPSCR fpscr = (FPSCR) FpscrExc;
2660 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2662 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMin<float>,
2663 true, true, VfpRoundNearest);
2664 } else if (flushToZero(srcReg1, srcReg2)) {
2669 threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode)
2670 threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode)
2672 threeEqualRegInstFp("vpmax", "VpmaxDFp", "SimdFloatCmpOp", ("float",),
2673 2, vmaxfpCode, pairwise=True)
2674 threeEqualRegInstFp("vpmax", "VpmaxQFp", "SimdFloatCmpOp", ("float",),
2675 4, vmaxfpCode, pairwise=True)
2677 threeEqualRegInstFp("vpmin", "VpminDFp", "SimdFloatCmpOp", ("float",),
2678 2, vminfpCode, pairwise=True)
2679 threeEqualRegInstFp("vpmin", "VpminQFp", "SimdFloatCmpOp", ("float",),
2680 4, vminfpCode, pairwise=True)
2683 FPSCR fpscr = (FPSCR) FpscrExc;
2684 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2685 true, true, VfpRoundNearest);
2688 threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode)
2689 threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode)
2691 threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",),
2692 2, vaddfpCode, pairwise=True)
2693 threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",),
2694 4, vaddfpCode, pairwise=True)
2697 FPSCR fpscr = (FPSCR) FpscrExc;
2698 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2699 true, true, VfpRoundNearest);
2702 threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode)
2703 threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode)
2706 FPSCR fpscr = (FPSCR) FpscrExc;
2707 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2708 true, true, VfpRoundNearest);
2711 threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2712 threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2715 FPSCR fpscr = (FPSCR) FpscrExc;
2716 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2717 true, true, VfpRoundNearest);
2718 destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2719 true, true, VfpRoundNearest);
2722 threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2723 threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2726 FPSCR fpscr = (FPSCR) FpscrExc;
2727 destReg = ternaryOp(fpscr, srcReg1, srcReg2, destReg, fpMulAdd<float>,
2728 true, true, VfpRoundNearest);
2731 threeEqualRegInstFp("vfma", "NVfmaDFp", "SimdFloatMultAccOp", ("float",), 2, vfmafpCode, True)
2732 threeEqualRegInstFp("vfma", "NVfmaQFp", "SimdFloatMultAccOp", ("float",), 4, vfmafpCode, True)
2735 FPSCR fpscr = (FPSCR) FpscrExc;
2736 destReg = ternaryOp(fpscr, -srcReg1, srcReg2, destReg, fpMulAdd<float>,
2737 true, true, VfpRoundNearest);
2740 threeEqualRegInstFp("vfms", "NVfmsDFp", "SimdFloatMultAccOp", ("float",), 2, vfmsfpCode, True)
2741 threeEqualRegInstFp("vfms", "NVfmsQFp", "SimdFloatMultAccOp", ("float",), 4, vfmsfpCode, True)
2744 FPSCR fpscr = (FPSCR) FpscrExc;
2745 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2746 true, true, VfpRoundNearest);
2747 destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2748 true, true, VfpRoundNearest);
2751 threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2752 threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2755 FPSCR fpscr = (FPSCR) FpscrExc;
2756 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2757 true, true, VfpRoundNearest);
2758 destReg = (res == 0) ? -1 : 0;
2763 threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",),
2764 2, vcgtfpCode, toInt = True)
2765 threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",),
2766 4, vcgtfpCode, toInt = True)
2769 FPSCR fpscr = (FPSCR) FpscrExc;
2770 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2771 true, true, VfpRoundNearest);
2772 destReg = (res == 0) ? -1 : 0;
2777 threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",),
2778 2, vcgefpCode, toInt = True)
2779 threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",),
2780 4, vcgefpCode, toInt = True)
2783 FPSCR fpscr = (FPSCR) FpscrExc;
2784 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2785 true, true, VfpRoundNearest);
2786 destReg = (res == 0) ? -1 : 0;
2791 threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",),
2792 2, vacgtfpCode, toInt = True)
2793 threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",),
2794 4, vacgtfpCode, toInt = True)
2797 FPSCR fpscr = (FPSCR) FpscrExc;
2798 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2799 true, true, VfpRoundNearest);
2800 destReg = (res == 0) ? -1 : 0;
2805 threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",),
2806 2, vacgefpCode, toInt = True)
2807 threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",),
2808 4, vacgefpCode, toInt = True)
2811 FPSCR fpscr = (FPSCR) FpscrExc;
2812 float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2813 true, true, VfpRoundNearest);
2814 destReg = (res == 0) ? -1 : 0;
2819 threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",),
2820 2, vceqfpCode, toInt = True)
2821 threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",),
2822 4, vceqfpCode, toInt = True)
2825 FPSCR fpscr = (FPSCR) FpscrExc;
2826 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2827 true, true, VfpRoundNearest);
2830 threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode)
2831 threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode)
2834 FPSCR fpscr = (FPSCR) FpscrExc;
2835 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2836 true, true, VfpRoundNearest);
2839 threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode)
2840 threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode)
2843 FPSCR fpscr = (FPSCR) FpscrExc;
2844 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2845 true, true, VfpRoundNearest);
2846 destReg = fabs(mid);
2849 threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode)
2850 threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode)
2852 twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True)
2853 twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True)
2854 twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2855 twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2856 twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True)
2858 twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2859 twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2860 twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2861 twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2862 twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True)
2864 twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode)
2865 twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode)
2866 twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2867 twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2868 twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode)
2870 twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode)
2871 twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2872 twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2873 twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2874 twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2875 twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2876 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2877 twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2878 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2881 if (imm >= sizeof(srcElem1) * 8) {
2887 destElem = srcElem1 >> imm;
2890 twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode)
2891 twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode)
2895 if (imm >= sizeof(srcElem1) * 8) {
2896 mid = ltz(srcElem1) ? -1 : 0;
2898 mid = srcElem1 >> imm;
2899 if (ltz(srcElem1) && !ltz(mid)) {
2900 mid |= -(mid & ((Element)1 <<
2901 (sizeof(Element) * 8 - 1 - imm)));
2906 twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True)
2907 twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True)
2910 if (imm > sizeof(srcElem1) * 8) {
2913 Element rBit = bits(srcElem1, imm - 1);
2914 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2916 destElem = srcElem1;
2919 twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode)
2920 twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode)
2923 if (imm > sizeof(srcElem1) * 8) {
2926 Element rBit = bits(srcElem1, imm - 1);
2927 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2929 destElem += srcElem1;
2932 twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True)
2933 twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True)
2936 if (imm >= sizeof(Element) * 8)
2937 destElem = destElem;
2939 destElem = (srcElem1 >> imm) |
2940 (destElem & ~mask(sizeof(Element) * 8 - imm));
2942 twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True)
2943 twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True)
2946 if (imm >= sizeof(Element) * 8)
2947 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2949 destElem = srcElem1 << imm;
2951 twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode)
2952 twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode)
2955 if (imm >= sizeof(Element) * 8)
2956 destElem = destElem;
2958 destElem = (srcElem1 << imm) | (destElem & mask(imm));
2960 twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True)
2961 twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True)
2964 FPSCR fpscr = (FPSCR) FpscrQc;
2965 if (imm >= sizeof(Element) * 8) {
2966 if (srcElem1 != 0) {
2967 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2969 destElem = ~destElem;
2975 destElem = (srcElem1 << imm);
2976 uint64_t topBits = bits((uint64_t)srcElem1,
2977 sizeof(Element) * 8 - 1,
2978 sizeof(Element) * 8 - 1 - imm);
2979 if (topBits != 0 && topBits != mask(imm + 1)) {
2980 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2982 destElem = ~destElem;
2986 destElem = srcElem1;
2990 twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode)
2991 twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode)
2994 FPSCR fpscr = (FPSCR) FpscrQc;
2995 if (imm >= sizeof(Element) * 8) {
2996 if (srcElem1 != 0) {
2997 destElem = mask(sizeof(Element) * 8);
3003 destElem = (srcElem1 << imm);
3004 uint64_t topBits = bits((uint64_t)srcElem1,
3005 sizeof(Element) * 8 - 1,
3006 sizeof(Element) * 8 - imm);
3008 destElem = mask(sizeof(Element) * 8);
3012 destElem = srcElem1;
3016 twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode)
3017 twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode)
3020 FPSCR fpscr = (FPSCR) FpscrQc;
3021 if (imm >= sizeof(Element) * 8) {
3025 } else if (srcElem1 > 0) {
3026 destElem = mask(sizeof(Element) * 8);
3032 destElem = (srcElem1 << imm);
3033 uint64_t topBits = bits((uint64_t)srcElem1,
3034 sizeof(Element) * 8 - 1,
3035 sizeof(Element) * 8 - imm);
3039 } else if (topBits != 0) {
3040 destElem = mask(sizeof(Element) * 8);
3048 destElem = srcElem1;
3053 twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode)
3054 twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode)
3057 if (imm >= sizeof(srcElem1) * 8) {
3060 destElem = srcElem1 >> imm;
3063 twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode)
3066 if (imm > sizeof(srcElem1) * 8) {
3069 Element rBit = bits(srcElem1, imm - 1);
3070 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
3072 destElem = srcElem1;
3075 twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode)
3078 FPSCR fpscr = (FPSCR) FpscrQc;
3079 if (imm > sizeof(srcElem1) * 8) {
3080 if (srcElem1 != 0 && srcElem1 != -1)
3084 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3085 mid |= -(mid & ((BigElement)1 <<
3086 (sizeof(BigElement) * 8 - 1 - imm)));
3087 if (mid != (Element)mid) {
3088 destElem = mask(sizeof(Element) * 8 - 1);
3090 destElem = ~destElem;
3096 destElem = srcElem1;
3100 twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode)
3103 FPSCR fpscr = (FPSCR) FpscrQc;
3104 if (imm > sizeof(srcElem1) * 8) {
3109 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3110 if (mid != (Element)mid) {
3111 destElem = mask(sizeof(Element) * 8);
3117 destElem = srcElem1;
3121 twoRegNarrowShiftInst("vqshrun", "NVqshrun",
3122 "SimdShiftOp", smallUnsignedTypes, vqshrunCode)
3125 FPSCR fpscr = (FPSCR) FpscrQc;
3126 if (imm > sizeof(srcElem1) * 8) {
3131 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3132 if (bits(mid, sizeof(BigElement) * 8 - 1,
3133 sizeof(Element) * 8) != 0) {
3137 destElem = mask(sizeof(Element) * 8);
3144 destElem = srcElem1;
3148 twoRegNarrowShiftInst("vqshrun", "NVqshruns",
3149 "SimdShiftOp", smallSignedTypes, vqshrunsCode)
3152 FPSCR fpscr = (FPSCR) FpscrQc;
3153 if (imm > sizeof(srcElem1) * 8) {
3154 if (srcElem1 != 0 && srcElem1 != -1)
3158 BigElement mid = (srcElem1 >> (imm - 1));
3159 uint64_t rBit = mid & 0x1;
3161 mid |= -(mid & ((BigElement)1 <<
3162 (sizeof(BigElement) * 8 - 1 - imm)));
3164 if (mid != (Element)mid) {
3165 destElem = mask(sizeof(Element) * 8 - 1);
3167 destElem = ~destElem;
3173 if (srcElem1 != (Element)srcElem1) {
3174 destElem = mask(sizeof(Element) * 8 - 1);
3176 destElem = ~destElem;
3179 destElem = srcElem1;
3184 twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
3185 "SimdShiftOp", smallSignedTypes, vqrshrnCode)
3188 FPSCR fpscr = (FPSCR) FpscrQc;
3189 if (imm > sizeof(srcElem1) * 8) {
3194 BigElement mid = (srcElem1 >> (imm - 1));
3195 uint64_t rBit = mid & 0x1;
3198 if (mid != (Element)mid) {
3199 destElem = mask(sizeof(Element) * 8);
3205 if (srcElem1 != (Element)srcElem1) {
3206 destElem = mask(sizeof(Element) * 8 - 1);
3209 destElem = srcElem1;
3214 twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
3215 "SimdShiftOp", smallUnsignedTypes, vqrshrunCode)
3218 FPSCR fpscr = (FPSCR) FpscrQc;
3219 if (imm > sizeof(srcElem1) * 8) {
3224 BigElement mid = (srcElem1 >> (imm - 1));
3225 uint64_t rBit = mid & 0x1;
3227 mid |= -(mid & ((BigElement)1 <<
3228 (sizeof(BigElement) * 8 - 1 - imm)));
3230 if (bits(mid, sizeof(BigElement) * 8 - 1,
3231 sizeof(Element) * 8) != 0) {
3235 destElem = mask(sizeof(Element) * 8);
3246 destElem = srcElem1;
3251 twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
3252 "SimdShiftOp", smallSignedTypes, vqrshrunsCode)
3255 if (imm >= sizeof(destElem) * 8) {
3258 destElem = (BigElement)srcElem1 << imm;
3261 twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode)
3264 destElem = srcElem1;
3266 twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode)
3269 FPSCR fpscr = (FPSCR) FpscrExc;
3270 if (flushToZero(srcElem1))
3272 VfpSavedState state = prepFpState(VfpRoundNearest);
3273 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
3274 destReg = vfpFpToFixed<float>(srcElem1, false, 32, imm);
3275 __asm__ __volatile__("" :: "m" (destReg));
3276 finishVfp(fpscr, state, true);
3279 twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",),
3280 2, vcvt2ufxCode, toInt = True)
3281 twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",),
3282 4, vcvt2ufxCode, toInt = True)
3285 FPSCR fpscr = (FPSCR) FpscrExc;
3286 if (flushToZero(srcElem1))
3288 VfpSavedState state = prepFpState(VfpRoundNearest);
3289 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
3290 destReg = vfpFpToFixed<float>(srcElem1, true, 32, imm);
3291 __asm__ __volatile__("" :: "m" (destReg));
3292 finishVfp(fpscr, state, true);
3295 twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",),
3296 2, vcvt2sfxCode, toInt = True)
3297 twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",),
3298 4, vcvt2sfxCode, toInt = True)
3301 FPSCR fpscr = (FPSCR) FpscrExc;
3302 VfpSavedState state = prepFpState(VfpRoundNearest);
3303 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
3304 destElem = vfpUFixedToFpS(true, true, srcReg1, 32, imm);
3305 __asm__ __volatile__("" :: "m" (destElem));
3306 finishVfp(fpscr, state, true);
3309 twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",),
3310 2, vcvtu2fpCode, fromInt = True)
3311 twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",),
3312 4, vcvtu2fpCode, fromInt = True)
3315 FPSCR fpscr = (FPSCR) FpscrExc;
3316 VfpSavedState state = prepFpState(VfpRoundNearest);
3317 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
3318 destElem = vfpSFixedToFpS(true, true, srcReg1, 32, imm);
3319 __asm__ __volatile__("" :: "m" (destElem));
3320 finishVfp(fpscr, state, true);
3323 twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",),
3324 2, vcvts2fpCode, fromInt = True)
3325 twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",),
3326 4, vcvts2fpCode, fromInt = True)
3330 FPSCR fpscr = (FPSCR) FpscrExc;
3331 float srcFp1 = bitsToFp(srcElem1, (float)0.0);
3332 if (flushToZero(srcFp1))
3334 VfpSavedState state = prepFpState(VfpRoundNearest);
3335 __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
3336 : "m" (srcFp1), "m" (destElem));
3337 destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
3339 __asm__ __volatile__("" :: "m" (destElem));
3340 finishVfp(fpscr, state, true);
3343 twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode)
3347 FPSCR fpscr = (FPSCR) FpscrExc;
3348 VfpSavedState state = prepFpState(VfpRoundNearest);
3349 __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
3350 : "m" (srcElem1), "m" (destElem));
3351 destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
3352 __asm__ __volatile__("" :: "m" (destElem));
3353 finishVfp(fpscr, state, true);
3356 twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
3359 destElem = unsignedRSqrtEstimate(srcElem1);
3361 twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode)
3362 twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode)
3365 FPSCR fpscr = (FPSCR) FpscrExc;
3366 if (flushToZero(srcReg1))
3368 destReg = fprSqrtEstimate(fpscr, srcReg1);
3371 twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode)
3372 twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode)
3375 destElem = unsignedRecipEstimate(srcElem1);
3377 twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode)
3378 twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode)
3381 FPSCR fpscr = (FPSCR) FpscrExc;
3382 if (flushToZero(srcReg1))
3384 destReg = fpRecipEstimate(fpscr, srcReg1);
3387 twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode)
3388 twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode)
3391 destElem = srcElem1;
3392 unsigned groupSize = ((1 << 1) / sizeof(Element));
3393 unsigned reverseMask = (groupSize - 1);
3394 j = i ^ reverseMask;
3396 twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code)
3397 twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code)
3399 destElem = srcElem1;
3400 unsigned groupSize = ((1 << 2) / sizeof(Element));
3401 unsigned reverseMask = (groupSize - 1);
3402 j = i ^ reverseMask;
3404 twoRegMiscInst("vrev32", "NVrev32D",
3405 "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code)
3406 twoRegMiscInst("vrev32", "NVrev32Q",
3407 "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code)
3409 destElem = srcElem1;
3410 unsigned groupSize = ((1 << 3) / sizeof(Element));
3411 unsigned reverseMask = (groupSize - 1);
3412 j = i ^ reverseMask;
3414 twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code)
3415 twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code)
3418 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
3420 twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode)
3421 twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode)
3424 destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
3426 twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True)
3427 twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True)
3433 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
3439 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
3446 twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode)
3447 twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode)
3451 while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
3457 twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode)
3458 twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode)
3462 while (srcElem1 && count < sizeof(Element) * 8) {
3463 count += srcElem1 & 0x1;
3469 twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode)
3470 twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode)
3473 destElem = ~srcElem1;
3475 twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3476 twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3479 FPSCR fpscr = (FPSCR) FpscrQc;
3480 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
3482 destElem = ~srcElem1;
3483 } else if (srcElem1 < 0) {
3484 destElem = -srcElem1;
3486 destElem = srcElem1;
3490 twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode)
3491 twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode)
3494 FPSCR fpscr = (FPSCR) FpscrQc;
3495 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
3497 destElem = ~srcElem1;
3499 destElem = -srcElem1;
3503 twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode)
3504 twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode)
3508 destElem = -srcElem1;
3510 destElem = srcElem1;
3514 twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode)
3515 twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode)
3522 cStruct.f = srcReg1;
3523 cStruct.i &= mask(sizeof(Element) * 8 - 1);
3524 destReg = cStruct.f;
3526 twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode)
3527 twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode)
3530 destElem = -srcElem1;
3532 twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode)
3533 twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode)
3537 twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode)
3538 twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode)
3540 vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3541 twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode)
3542 twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode)
3544 FPSCR fpscr = (FPSCR) FpscrExc;
3545 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3546 true, true, VfpRoundNearest);
3547 destReg = (res == 0) ? -1 : 0;
3552 twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",),
3553 2, vcgtfpCode, toInt = True)
3554 twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",),
3555 4, vcgtfpCode, toInt = True)
3557 vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3558 twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode)
3559 twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode)
3561 FPSCR fpscr = (FPSCR) FpscrExc;
3562 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3563 true, true, VfpRoundNearest);
3564 destReg = (res == 0) ? -1 : 0;
3569 twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",),
3570 2, vcgefpCode, toInt = True)
3571 twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",),
3572 4, vcgefpCode, toInt = True)
3574 vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3575 twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode)
3576 twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode)
3578 FPSCR fpscr = (FPSCR) FpscrExc;
3579 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3580 true, true, VfpRoundNearest);
3581 destReg = (res == 0) ? -1 : 0;
3586 twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",),
3587 2, vceqfpCode, toInt = True)
3588 twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",),
3589 4, vceqfpCode, toInt = True)
3591 vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3592 twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode)
3593 twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode)
3595 FPSCR fpscr = (FPSCR) FpscrExc;
3596 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3597 true, true, VfpRoundNearest);
3598 destReg = (res == 0) ? -1 : 0;
3603 twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",),
3604 2, vclefpCode, toInt = True)
3605 twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",),
3606 4, vclefpCode, toInt = True)
3608 vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3609 twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode)
3610 twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode)
3612 FPSCR fpscr = (FPSCR) FpscrExc;
3613 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3614 true, true, VfpRoundNearest);
3615 destReg = (res == 0) ? -1 : 0;
3620 twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",),
3621 2, vcltfpCode, toInt = True)
3622 twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",),
3623 4, vcltfpCode, toInt = True)
3627 for (unsigned r = 0; r < rCount; r++) {
3628 mid = srcReg1.regs[r];
3629 srcReg1.regs[r] = destReg.regs[r];
3630 destReg.regs[r] = mid;
3633 twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode)
3634 twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode)
3638 for (unsigned i = 0; i < eCount; i += 2) {
3639 mid = srcReg1.elements[i];
3640 srcReg1.elements[i] = destReg.elements[i + 1];
3641 destReg.elements[i + 1] = mid;
3644 twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp",
3645 smallUnsignedTypes, 2, vtrnCode)
3646 twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp",
3647 smallUnsignedTypes, 4, vtrnCode)
3650 Element mid[eCount];
3651 memcpy(&mid, &srcReg1, sizeof(srcReg1));
3652 for (unsigned i = 0; i < eCount / 2; i++) {
3653 srcReg1.elements[i] = destReg.elements[2 * i + 1];
3654 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3655 destReg.elements[i] = destReg.elements[2 * i];
3657 for (unsigned i = 0; i < eCount / 2; i++) {
3658 destReg.elements[eCount / 2 + i] = mid[2 * i];
3661 twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode)
3662 twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode)
3665 Element mid[eCount];
3666 memcpy(&mid, &destReg, sizeof(destReg));
3667 for (unsigned i = 0; i < eCount / 2; i++) {
3668 destReg.elements[2 * i] = mid[i];
3669 destReg.elements[2 * i + 1] = srcReg1.elements[i];
3671 for (int i = 0; i < eCount / 2; i++) {
3672 srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3673 srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3676 twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode)
3677 twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode)
3679 vmovnCode = 'destElem = srcElem1;'
3680 twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode)
3682 vdupCode = 'destElem = srcElem1;'
3683 twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode)
3684 twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode)
3686 def vdupGprInst(name, Name, opClass, types, rCount):
3687 global header_output, exec_output
3690 for (unsigned i = 0; i < eCount; i++) {
3691 destReg.elements[i] = htog((Element)Op1);
3694 for reg in range(rCount):
3696 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3697 ''' % { "reg" : reg }
3698 iop = InstObjParams(name, Name,
3700 { "code": eWalkCode,
3702 "predicate_test": predicateTest,
3703 "op_class": opClass }, [])
3704 header_output += NeonRegRegOpDeclare.subst(iop)
3705 exec_output += NeonEqualRegExecute.subst(iop)
3707 substDict = { "targs" : type,
3708 "class_name" : Name }
3709 exec_output += NeonExecDeclare.subst(substDict)
3710 vdupGprInst("vdup", "NVdupDGpr", "SimdMiscOp", smallUnsignedTypes, 2)
3711 vdupGprInst("vdup", "NVdupQGpr", "SimdMiscOp", smallUnsignedTypes, 4)
3713 vmovCode = 'destElem = imm;'
3714 oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode)
3715 oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode)
3717 vorrCode = 'destElem |= imm;'
3718 oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True)
3719 oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True)
3721 vmvnCode = 'destElem = ~imm;'
3722 oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3723 oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3725 vbicCode = 'destElem &= ~imm;'
3726 oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True)
3727 oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True)
3730 FPSCR fpscr = (FPSCR) FpscrQc;
3731 destElem = srcElem1;
3732 if ((BigElement)destElem != srcElem1) {
3734 destElem = mask(sizeof(Element) * 8 - 1);
3736 destElem = ~destElem;
3740 twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode)
3743 FPSCR fpscr = (FPSCR) FpscrQc;
3744 destElem = srcElem1;
3745 if ((BigElement)destElem != srcElem1) {
3747 destElem = mask(sizeof(Element) * 8);
3751 twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3752 "SimdMiscOp", smallUnsignedTypes, vqmovunCode)
3755 FPSCR fpscr = (FPSCR) FpscrQc;
3756 destElem = srcElem1;
3758 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3760 destElem = mask(sizeof(Element) * 8);
3762 destElem = ~destElem;
3766 twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3767 "SimdMiscOp", smallSignedTypes, vqmovunsCode)
3769 def buildVext(name, Name, opClass, types, rCount, op):
3770 global header_output, exec_output
3772 RegVect srcReg1, srcReg2, destReg;
3774 for reg in range(rCount):
3775 eWalkCode += simdEnabledCheckCode + '''
3776 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
3777 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
3778 ''' % { "reg" : reg }
3780 for reg in range(rCount):
3782 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3783 ''' % { "reg" : reg }
3784 iop = InstObjParams(name, Name,
3786 { "code": eWalkCode,
3788 "predicate_test": predicateTest,
3789 "op_class": opClass }, [])
3790 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3791 exec_output += NeonEqualRegExecute.subst(iop)
3793 substDict = { "targs" : type,
3794 "class_name" : Name }
3795 exec_output += NeonExecDeclare.subst(substDict)
3798 for (unsigned i = 0; i < eCount; i++) {
3799 unsigned index = i + imm;
3800 if (index < eCount) {
3801 destReg.elements[i] = srcReg1.elements[index];
3804 if (index >= eCount) {
3805 fault = new UndefinedInstruction(machInst, false, mnemonic);
3807 destReg.elements[i] = srcReg2.elements[index];
3812 buildVext("vext", "NVextD", "SimdMiscOp", ("uint8_t",), 2, vextCode)
3813 buildVext("vext", "NVextQ", "SimdMiscOp", ("uint8_t",), 4, vextCode)
3815 def buildVtbxl(name, Name, opClass, length, isVtbl):
3816 global header_output, decoder_output, exec_output
3821 FloatRegBits regs[8];
3827 FloatRegBits regs[2];
3830 const unsigned length = %(length)d;
3831 const bool isVtbl = %(isVtbl)s;
3833 srcReg2.regs[0] = htog(FpOp2P0_uw);
3834 srcReg2.regs[1] = htog(FpOp2P1_uw);
3836 destReg.regs[0] = htog(FpDestP0_uw);
3837 destReg.regs[1] = htog(FpDestP1_uw);
3838 ''' % { "length" : length, "isVtbl" : isVtbl }
3839 for reg in range(8):
3840 if reg < length * 2:
3841 code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);\n' % \
3844 code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3846 for (unsigned i = 0; i < sizeof(destReg); i++) {
3847 uint8_t index = srcReg2.bytes[i];
3848 if (index < 8 * length) {
3849 destReg.bytes[i] = table.bytes[index];
3852 destReg.bytes[i] = 0;
3853 // else destReg.bytes[i] unchanged
3857 FpDestP0_uw = gtoh(destReg.regs[0]);
3858 FpDestP1_uw = gtoh(destReg.regs[1]);
3860 iop = InstObjParams(name, Name,
3863 "predicate_test": predicateTest,
3864 "op_class": opClass }, [])
3865 header_output += RegRegRegOpDeclare.subst(iop)
3866 decoder_output += RegRegRegOpConstructor.subst(iop)
3867 exec_output += PredOpExecute.subst(iop)
3869 buildVtbxl("vtbl", "NVtbl1", "SimdMiscOp", 1, "true")
3870 buildVtbxl("vtbl", "NVtbl2", "SimdMiscOp", 2, "true")
3871 buildVtbxl("vtbl", "NVtbl3", "SimdMiscOp", 3, "true")
3872 buildVtbxl("vtbl", "NVtbl4", "SimdMiscOp", 4, "true")
3874 buildVtbxl("vtbx", "NVtbx1", "SimdMiscOp", 1, "false")
3875 buildVtbxl("vtbx", "NVtbx2", "SimdMiscOp", 2, "false")
3876 buildVtbxl("vtbx", "NVtbx3", "SimdMiscOp", 3, "false")
3877 buildVtbxl("vtbx", "NVtbx4", "SimdMiscOp", 4, "false")