arm: fix some fp comparisons that worked by accident.
[gem5.git] / src / arch / arm / isa / insts / neon.isa
1 // -*- mode:c++ -*-
2
3 // Copyright (c) 2010 ARM Limited
4 // All rights reserved
5 //
6 // The license below extends only to copyright in the software and shall
7 // not be construed as granting a license to any other intellectual
8 // property including but not limited to intellectual property relating
9 // to a hardware implementation of the functionality of the software
10 // licensed hereunder. You may use the software subject to the license
11 // terms below provided that you ensure that this notice is replicated
12 // unmodified and in its entirety in all distributions of the software,
13 // modified or unmodified, in source code or in binary form.
14 //
15 // Redistribution and use in source and binary forms, with or without
16 // modification, are permitted provided that the following conditions are
17 // met: redistributions of source code must retain the above copyright
18 // notice, this list of conditions and the following disclaimer;
19 // redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution;
22 // neither the name of the copyright holders nor the names of its
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Authors: Gabe Black
39
40 output header {{
41 template <template <typename T> class Base>
42 StaticInstPtr
43 decodeNeonUThreeUReg(unsigned size,
44 ExtMachInst machInst, IntRegIndex dest,
45 IntRegIndex op1, IntRegIndex op2)
46 {
47 switch (size) {
48 case 0:
49 return new Base<uint8_t>(machInst, dest, op1, op2);
50 case 1:
51 return new Base<uint16_t>(machInst, dest, op1, op2);
52 case 2:
53 return new Base<uint32_t>(machInst, dest, op1, op2);
54 case 3:
55 return new Base<uint64_t>(machInst, dest, op1, op2);
56 default:
57 return new Unknown(machInst);
58 }
59 }
60
61 template <template <typename T> class Base>
62 StaticInstPtr
63 decodeNeonSThreeUReg(unsigned size,
64 ExtMachInst machInst, IntRegIndex dest,
65 IntRegIndex op1, IntRegIndex op2)
66 {
67 switch (size) {
68 case 0:
69 return new Base<int8_t>(machInst, dest, op1, op2);
70 case 1:
71 return new Base<int16_t>(machInst, dest, op1, op2);
72 case 2:
73 return new Base<int32_t>(machInst, dest, op1, op2);
74 case 3:
75 return new Base<int64_t>(machInst, dest, op1, op2);
76 default:
77 return new Unknown(machInst);
78 }
79 }
80
81 template <template <typename T> class Base>
82 StaticInstPtr
83 decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84 ExtMachInst machInst, IntRegIndex dest,
85 IntRegIndex op1, IntRegIndex op2)
86 {
87 if (notSigned) {
88 return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
89 } else {
90 return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
91 }
92 }
93
94 template <template <typename T> class Base>
95 StaticInstPtr
96 decodeNeonUThreeUSReg(unsigned size,
97 ExtMachInst machInst, IntRegIndex dest,
98 IntRegIndex op1, IntRegIndex op2)
99 {
100 switch (size) {
101 case 0:
102 return new Base<uint8_t>(machInst, dest, op1, op2);
103 case 1:
104 return new Base<uint16_t>(machInst, dest, op1, op2);
105 case 2:
106 return new Base<uint32_t>(machInst, dest, op1, op2);
107 default:
108 return new Unknown(machInst);
109 }
110 }
111
112 template <template <typename T> class Base>
113 StaticInstPtr
114 decodeNeonSThreeUSReg(unsigned size,
115 ExtMachInst machInst, IntRegIndex dest,
116 IntRegIndex op1, IntRegIndex op2)
117 {
118 switch (size) {
119 case 0:
120 return new Base<int8_t>(machInst, dest, op1, op2);
121 case 1:
122 return new Base<int16_t>(machInst, dest, op1, op2);
123 case 2:
124 return new Base<int32_t>(machInst, dest, op1, op2);
125 default:
126 return new Unknown(machInst);
127 }
128 }
129
130 template <template <typename T> class Base>
131 StaticInstPtr
132 decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
133 ExtMachInst machInst, IntRegIndex dest,
134 IntRegIndex op1, IntRegIndex op2)
135 {
136 if (notSigned) {
137 return decodeNeonUThreeUSReg<Base>(
138 size, machInst, dest, op1, op2);
139 } else {
140 return decodeNeonSThreeUSReg<Base>(
141 size, machInst, dest, op1, op2);
142 }
143 }
144
145 template <template <typename T> class BaseD,
146 template <typename T> class BaseQ>
147 StaticInstPtr
148 decodeNeonUThreeSReg(bool q, unsigned size,
149 ExtMachInst machInst, IntRegIndex dest,
150 IntRegIndex op1, IntRegIndex op2)
151 {
152 if (q) {
153 return decodeNeonUThreeUSReg<BaseQ>(
154 size, machInst, dest, op1, op2);
155 } else {
156 return decodeNeonUThreeUSReg<BaseD>(
157 size, machInst, dest, op1, op2);
158 }
159 }
160
161 template <template <typename T> class BaseD,
162 template <typename T> class BaseQ>
163 StaticInstPtr
164 decodeNeonSThreeSReg(bool q, unsigned size,
165 ExtMachInst machInst, IntRegIndex dest,
166 IntRegIndex op1, IntRegIndex op2)
167 {
168 if (q) {
169 return decodeNeonSThreeUSReg<BaseQ>(
170 size, machInst, dest, op1, op2);
171 } else {
172 return decodeNeonSThreeUSReg<BaseD>(
173 size, machInst, dest, op1, op2);
174 }
175 }
176
177 template <template <typename T> class BaseD,
178 template <typename T> class BaseQ>
179 StaticInstPtr
180 decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
181 ExtMachInst machInst, IntRegIndex dest,
182 IntRegIndex op1, IntRegIndex op2)
183 {
184 if (notSigned) {
185 return decodeNeonUThreeSReg<BaseD, BaseQ>(
186 q, size, machInst, dest, op1, op2);
187 } else {
188 return decodeNeonSThreeSReg<BaseD, BaseQ>(
189 q, size, machInst, dest, op1, op2);
190 }
191 }
192
193 template <template <typename T> class BaseD,
194 template <typename T> class BaseQ>
195 StaticInstPtr
196 decodeNeonUThreeReg(bool q, unsigned size,
197 ExtMachInst machInst, IntRegIndex dest,
198 IntRegIndex op1, IntRegIndex op2)
199 {
200 if (q) {
201 return decodeNeonUThreeUReg<BaseQ>(
202 size, machInst, dest, op1, op2);
203 } else {
204 return decodeNeonUThreeUReg<BaseD>(
205 size, machInst, dest, op1, op2);
206 }
207 }
208
209 template <template <typename T> class BaseD,
210 template <typename T> class BaseQ>
211 StaticInstPtr
212 decodeNeonSThreeReg(bool q, unsigned size,
213 ExtMachInst machInst, IntRegIndex dest,
214 IntRegIndex op1, IntRegIndex op2)
215 {
216 if (q) {
217 return decodeNeonSThreeUReg<BaseQ>(
218 size, machInst, dest, op1, op2);
219 } else {
220 return decodeNeonSThreeUReg<BaseD>(
221 size, machInst, dest, op1, op2);
222 }
223 }
224
225 template <template <typename T> class BaseD,
226 template <typename T> class BaseQ>
227 StaticInstPtr
228 decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
229 ExtMachInst machInst, IntRegIndex dest,
230 IntRegIndex op1, IntRegIndex op2)
231 {
232 if (notSigned) {
233 return decodeNeonUThreeReg<BaseD, BaseQ>(
234 q, size, machInst, dest, op1, op2);
235 } else {
236 return decodeNeonSThreeReg<BaseD, BaseQ>(
237 q, size, machInst, dest, op1, op2);
238 }
239 }
240
241 template <template <typename T> class BaseD,
242 template <typename T> class BaseQ>
243 StaticInstPtr
244 decodeNeonUTwoShiftReg(bool q, unsigned size,
245 ExtMachInst machInst, IntRegIndex dest,
246 IntRegIndex op1, uint64_t imm)
247 {
248 if (q) {
249 switch (size) {
250 case 0:
251 return new BaseQ<uint8_t>(machInst, dest, op1, imm);
252 case 1:
253 return new BaseQ<uint16_t>(machInst, dest, op1, imm);
254 case 2:
255 return new BaseQ<uint32_t>(machInst, dest, op1, imm);
256 case 3:
257 return new BaseQ<uint64_t>(machInst, dest, op1, imm);
258 default:
259 return new Unknown(machInst);
260 }
261 } else {
262 switch (size) {
263 case 0:
264 return new BaseD<uint8_t>(machInst, dest, op1, imm);
265 case 1:
266 return new BaseD<uint16_t>(machInst, dest, op1, imm);
267 case 2:
268 return new BaseD<uint32_t>(machInst, dest, op1, imm);
269 case 3:
270 return new BaseD<uint64_t>(machInst, dest, op1, imm);
271 default:
272 return new Unknown(machInst);
273 }
274 }
275 }
276
277 template <template <typename T> class BaseD,
278 template <typename T> class BaseQ>
279 StaticInstPtr
280 decodeNeonSTwoShiftReg(bool q, unsigned size,
281 ExtMachInst machInst, IntRegIndex dest,
282 IntRegIndex op1, uint64_t imm)
283 {
284 if (q) {
285 switch (size) {
286 case 0:
287 return new BaseQ<int8_t>(machInst, dest, op1, imm);
288 case 1:
289 return new BaseQ<int16_t>(machInst, dest, op1, imm);
290 case 2:
291 return new BaseQ<int32_t>(machInst, dest, op1, imm);
292 case 3:
293 return new BaseQ<int64_t>(machInst, dest, op1, imm);
294 default:
295 return new Unknown(machInst);
296 }
297 } else {
298 switch (size) {
299 case 0:
300 return new BaseD<int8_t>(machInst, dest, op1, imm);
301 case 1:
302 return new BaseD<int16_t>(machInst, dest, op1, imm);
303 case 2:
304 return new BaseD<int32_t>(machInst, dest, op1, imm);
305 case 3:
306 return new BaseD<int64_t>(machInst, dest, op1, imm);
307 default:
308 return new Unknown(machInst);
309 }
310 }
311 }
312
313
314 template <template <typename T> class BaseD,
315 template <typename T> class BaseQ>
316 StaticInstPtr
317 decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
318 ExtMachInst machInst, IntRegIndex dest,
319 IntRegIndex op1, uint64_t imm)
320 {
321 if (notSigned) {
322 return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
323 q, size, machInst, dest, op1, imm);
324 } else {
325 return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
326 q, size, machInst, dest, op1, imm);
327 }
328 }
329
330 template <template <typename T> class Base>
331 StaticInstPtr
332 decodeNeonUTwoShiftUSReg(unsigned size,
333 ExtMachInst machInst, IntRegIndex dest,
334 IntRegIndex op1, uint64_t imm)
335 {
336 switch (size) {
337 case 0:
338 return new Base<uint8_t>(machInst, dest, op1, imm);
339 case 1:
340 return new Base<uint16_t>(machInst, dest, op1, imm);
341 case 2:
342 return new Base<uint32_t>(machInst, dest, op1, imm);
343 default:
344 return new Unknown(machInst);
345 }
346 }
347
348 template <template <typename T> class BaseD,
349 template <typename T> class BaseQ>
350 StaticInstPtr
351 decodeNeonUTwoShiftSReg(bool q, unsigned size,
352 ExtMachInst machInst, IntRegIndex dest,
353 IntRegIndex op1, uint64_t imm)
354 {
355 if (q) {
356 return decodeNeonUTwoShiftUSReg<BaseQ>(
357 size, machInst, dest, op1, imm);
358 } else {
359 return decodeNeonUTwoShiftUSReg<BaseD>(
360 size, machInst, dest, op1, imm);
361 }
362 }
363
364 template <template <typename T> class Base>
365 StaticInstPtr
366 decodeNeonSTwoShiftUSReg(unsigned size,
367 ExtMachInst machInst, IntRegIndex dest,
368 IntRegIndex op1, uint64_t imm)
369 {
370 switch (size) {
371 case 0:
372 return new Base<int8_t>(machInst, dest, op1, imm);
373 case 1:
374 return new Base<int16_t>(machInst, dest, op1, imm);
375 case 2:
376 return new Base<int32_t>(machInst, dest, op1, imm);
377 default:
378 return new Unknown(machInst);
379 }
380 }
381
382 template <template <typename T> class BaseD,
383 template <typename T> class BaseQ>
384 StaticInstPtr
385 decodeNeonSTwoShiftSReg(bool q, unsigned size,
386 ExtMachInst machInst, IntRegIndex dest,
387 IntRegIndex op1, uint64_t imm)
388 {
389 if (q) {
390 return decodeNeonSTwoShiftUSReg<BaseQ>(
391 size, machInst, dest, op1, imm);
392 } else {
393 return decodeNeonSTwoShiftUSReg<BaseD>(
394 size, machInst, dest, op1, imm);
395 }
396 }
397
398 template <template <typename T> class BaseD,
399 template <typename T> class BaseQ>
400 StaticInstPtr
401 decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
402 ExtMachInst machInst, IntRegIndex dest,
403 IntRegIndex op1, uint64_t imm)
404 {
405 if (notSigned) {
406 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
407 q, size, machInst, dest, op1, imm);
408 } else {
409 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
410 q, size, machInst, dest, op1, imm);
411 }
412 }
413
414 template <template <typename T> class Base>
415 StaticInstPtr
416 decodeNeonUTwoMiscUSReg(unsigned size,
417 ExtMachInst machInst, IntRegIndex dest,
418 IntRegIndex op1)
419 {
420 switch (size) {
421 case 0:
422 return new Base<uint8_t>(machInst, dest, op1);
423 case 1:
424 return new Base<uint16_t>(machInst, dest, op1);
425 case 2:
426 return new Base<uint32_t>(machInst, dest, op1);
427 default:
428 return new Unknown(machInst);
429 }
430 }
431
432 template <template <typename T> class Base>
433 StaticInstPtr
434 decodeNeonSTwoMiscUSReg(unsigned size,
435 ExtMachInst machInst, IntRegIndex dest,
436 IntRegIndex op1)
437 {
438 switch (size) {
439 case 0:
440 return new Base<int8_t>(machInst, dest, op1);
441 case 1:
442 return new Base<int16_t>(machInst, dest, op1);
443 case 2:
444 return new Base<int32_t>(machInst, dest, op1);
445 default:
446 return new Unknown(machInst);
447 }
448 }
449
450 template <template <typename T> class BaseD,
451 template <typename T> class BaseQ>
452 StaticInstPtr
453 decodeNeonUTwoMiscSReg(bool q, unsigned size,
454 ExtMachInst machInst, IntRegIndex dest,
455 IntRegIndex op1)
456 {
457 if (q) {
458 return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
459 } else {
460 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
461 }
462 }
463
464 template <template <typename T> class BaseD,
465 template <typename T> class BaseQ>
466 StaticInstPtr
467 decodeNeonSTwoMiscSReg(bool q, unsigned size,
468 ExtMachInst machInst, IntRegIndex dest,
469 IntRegIndex op1)
470 {
471 if (q) {
472 return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
473 } else {
474 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
475 }
476 }
477
478 template <template <typename T> class Base>
479 StaticInstPtr
480 decodeNeonUTwoMiscUReg(unsigned size,
481 ExtMachInst machInst, IntRegIndex dest,
482 IntRegIndex op1)
483 {
484 switch (size) {
485 case 0:
486 return new Base<uint8_t>(machInst, dest, op1);
487 case 1:
488 return new Base<uint16_t>(machInst, dest, op1);
489 case 2:
490 return new Base<uint32_t>(machInst, dest, op1);
491 case 3:
492 return new Base<uint64_t>(machInst, dest, op1);
493 default:
494 return new Unknown(machInst);
495 }
496 }
497
498 template <template <typename T> class Base>
499 StaticInstPtr
500 decodeNeonSTwoMiscUReg(unsigned size,
501 ExtMachInst machInst, IntRegIndex dest,
502 IntRegIndex op1)
503 {
504 switch (size) {
505 case 0:
506 return new Base<int8_t>(machInst, dest, op1);
507 case 1:
508 return new Base<int16_t>(machInst, dest, op1);
509 case 2:
510 return new Base<int32_t>(machInst, dest, op1);
511 case 3:
512 return new Base<int64_t>(machInst, dest, op1);
513 default:
514 return new Unknown(machInst);
515 }
516 }
517
518 template <template <typename T> class BaseD,
519 template <typename T> class BaseQ>
520 StaticInstPtr
521 decodeNeonSTwoMiscReg(bool q, unsigned size,
522 ExtMachInst machInst, IntRegIndex dest,
523 IntRegIndex op1)
524 {
525 if (q) {
526 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
527 } else {
528 return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
529 }
530 }
531
532 template <template <typename T> class BaseD,
533 template <typename T> class BaseQ>
534 StaticInstPtr
535 decodeNeonUTwoMiscReg(bool q, unsigned size,
536 ExtMachInst machInst, IntRegIndex dest,
537 IntRegIndex op1)
538 {
539 if (q) {
540 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
541 } else {
542 return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
543 }
544 }
545
546 template <template <typename T> class BaseD,
547 template <typename T> class BaseQ>
548 StaticInstPtr
549 decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
550 ExtMachInst machInst, IntRegIndex dest,
551 IntRegIndex op1)
552 {
553 if (notSigned) {
554 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
555 q, size, machInst, dest, op1);
556 } else {
557 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
558 q, size, machInst, dest, op1);
559 }
560 }
561
562 }};
563
564 output exec {{
565 static float
566 vcgtFunc(float op1, float op2)
567 {
568 if (std::isnan(op1) || std::isnan(op2))
569 return 2.0;
570 return (op1 > op2) ? 0.0 : 1.0;
571 }
572
573 static float
574 vcgeFunc(float op1, float op2)
575 {
576 if (std::isnan(op1) || std::isnan(op2))
577 return 2.0;
578 return (op1 >= op2) ? 0.0 : 1.0;
579 }
580
581 static float
582 vceqFunc(float op1, float op2)
583 {
584 if (isSnan(op1) || isSnan(op2))
585 return 2.0;
586 return (op1 == op2) ? 0.0 : 1.0;
587 }
588
589 static float
590 vcleFunc(float op1, float op2)
591 {
592 if (std::isnan(op1) || std::isnan(op2))
593 return 2.0;
594 return (op1 <= op2) ? 0.0 : 1.0;
595 }
596
597 static float
598 vcltFunc(float op1, float op2)
599 {
600 if (std::isnan(op1) || std::isnan(op2))
601 return 2.0;
602 return (op1 < op2) ? 0.0 : 1.0;
603 }
604
605 static float
606 vacgtFunc(float op1, float op2)
607 {
608 if (std::isnan(op1) || std::isnan(op2))
609 return 2.0;
610 return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
611 }
612
613 static float
614 vacgeFunc(float op1, float op2)
615 {
616 if (std::isnan(op1) || std::isnan(op2))
617 return 2.0;
618 return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
619 }
620 }};
621
622 let {{
623
624 header_output = ""
625 exec_output = ""
626
627 smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
628 unsignedTypes = smallUnsignedTypes + ("uint64_t",)
629 smallSignedTypes = ("int8_t", "int16_t", "int32_t")
630 signedTypes = smallSignedTypes + ("int64_t",)
631 smallTypes = smallUnsignedTypes + smallSignedTypes
632 allTypes = unsignedTypes + signedTypes
633
634 def threeEqualRegInst(name, Name, opClass, types, rCount, op,
635 readDest=False, pairwise=False):
636 global header_output, exec_output
637 eWalkCode = simdEnabledCheckCode + '''
638 RegVect srcReg1, srcReg2, destReg;
639 '''
640 for reg in range(rCount):
641 eWalkCode += '''
642 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
643 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
644 ''' % { "reg" : reg }
645 if readDest:
646 eWalkCode += '''
647 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
648 ''' % { "reg" : reg }
649 readDestCode = ''
650 if readDest:
651 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
652 if pairwise:
653 eWalkCode += '''
654 for (unsigned i = 0; i < eCount; i++) {
655 Element srcElem1 = gtoh(2 * i < eCount ?
656 srcReg1.elements[2 * i] :
657 srcReg2.elements[2 * i - eCount]);
658 Element srcElem2 = gtoh(2 * i < eCount ?
659 srcReg1.elements[2 * i + 1] :
660 srcReg2.elements[2 * i + 1 - eCount]);
661 Element destElem;
662 %(readDest)s
663 %(op)s
664 destReg.elements[i] = htog(destElem);
665 }
666 ''' % { "op" : op, "readDest" : readDestCode }
667 else:
668 eWalkCode += '''
669 for (unsigned i = 0; i < eCount; i++) {
670 Element srcElem1 = gtoh(srcReg1.elements[i]);
671 Element srcElem2 = gtoh(srcReg2.elements[i]);
672 Element destElem;
673 %(readDest)s
674 %(op)s
675 destReg.elements[i] = htog(destElem);
676 }
677 ''' % { "op" : op, "readDest" : readDestCode }
678 for reg in range(rCount):
679 eWalkCode += '''
680 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
681 ''' % { "reg" : reg }
682 iop = InstObjParams(name, Name,
683 "RegRegRegOp",
684 { "code": eWalkCode,
685 "r_count": rCount,
686 "predicate_test": predicateTest,
687 "op_class": opClass }, [])
688 header_output += NeonRegRegRegOpDeclare.subst(iop)
689 exec_output += NeonEqualRegExecute.subst(iop)
690 for type in types:
691 substDict = { "targs" : type,
692 "class_name" : Name }
693 exec_output += NeonExecDeclare.subst(substDict)
694
695 def threeEqualRegInstFp(name, Name, opClass, types, rCount, op,
696 readDest=False, pairwise=False, toInt=False):
697 global header_output, exec_output
698 eWalkCode = simdEnabledCheckCode + '''
699 typedef FloatReg FloatVect[rCount];
700 FloatVect srcRegs1, srcRegs2;
701 '''
702 if toInt:
703 eWalkCode += 'RegVect destRegs;\n'
704 else:
705 eWalkCode += 'FloatVect destRegs;\n'
706 for reg in range(rCount):
707 eWalkCode += '''
708 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
709 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
710 ''' % { "reg" : reg }
711 if readDest:
712 if toInt:
713 eWalkCode += '''
714 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
715 ''' % { "reg" : reg }
716 else:
717 eWalkCode += '''
718 destRegs[%(reg)d] = FpDestP%(reg)d;
719 ''' % { "reg" : reg }
720 readDestCode = ''
721 if readDest:
722 readDestCode = 'destReg = destRegs[r];'
723 destType = 'FloatReg'
724 writeDest = 'destRegs[r] = destReg;'
725 if toInt:
726 destType = 'FloatRegBits'
727 writeDest = 'destRegs.regs[r] = destReg;'
728 if pairwise:
729 eWalkCode += '''
730 for (unsigned r = 0; r < rCount; r++) {
731 FloatReg srcReg1 = (2 * r < rCount) ?
732 srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
733 FloatReg srcReg2 = (2 * r < rCount) ?
734 srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
735 %(destType)s destReg;
736 %(readDest)s
737 %(op)s
738 %(writeDest)s
739 }
740 ''' % { "op" : op,
741 "readDest" : readDestCode,
742 "destType" : destType,
743 "writeDest" : writeDest }
744 else:
745 eWalkCode += '''
746 for (unsigned r = 0; r < rCount; r++) {
747 FloatReg srcReg1 = srcRegs1[r];
748 FloatReg srcReg2 = srcRegs2[r];
749 %(destType)s destReg;
750 %(readDest)s
751 %(op)s
752 %(writeDest)s
753 }
754 ''' % { "op" : op,
755 "readDest" : readDestCode,
756 "destType" : destType,
757 "writeDest" : writeDest }
758 for reg in range(rCount):
759 if toInt:
760 eWalkCode += '''
761 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
762 ''' % { "reg" : reg }
763 else:
764 eWalkCode += '''
765 FpDestP%(reg)d = destRegs[%(reg)d];
766 ''' % { "reg" : reg }
767 iop = InstObjParams(name, Name,
768 "FpRegRegRegOp",
769 { "code": eWalkCode,
770 "r_count": rCount,
771 "predicate_test": predicateTest,
772 "op_class": opClass }, [])
773 header_output += NeonRegRegRegOpDeclare.subst(iop)
774 exec_output += NeonEqualRegExecute.subst(iop)
775 for type in types:
776 substDict = { "targs" : type,
777 "class_name" : Name }
778 exec_output += NeonExecDeclare.subst(substDict)
779
780 def threeUnequalRegInst(name, Name, opClass, types, op,
781 bigSrc1, bigSrc2, bigDest, readDest):
782 global header_output, exec_output
783 src1Cnt = src2Cnt = destCnt = 2
784 src1Prefix = src2Prefix = destPrefix = ''
785 if bigSrc1:
786 src1Cnt = 4
787 src1Prefix = 'Big'
788 if bigSrc2:
789 src2Cnt = 4
790 src2Prefix = 'Big'
791 if bigDest:
792 destCnt = 4
793 destPrefix = 'Big'
794 eWalkCode = simdEnabledCheckCode + '''
795 %sRegVect srcReg1;
796 %sRegVect srcReg2;
797 %sRegVect destReg;
798 ''' % (src1Prefix, src2Prefix, destPrefix)
799 for reg in range(src1Cnt):
800 eWalkCode += '''
801 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
802 ''' % { "reg" : reg }
803 for reg in range(src2Cnt):
804 eWalkCode += '''
805 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
806 ''' % { "reg" : reg }
807 if readDest:
808 for reg in range(destCnt):
809 eWalkCode += '''
810 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
811 ''' % { "reg" : reg }
812 readDestCode = ''
813 if readDest:
814 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
815 eWalkCode += '''
816 for (unsigned i = 0; i < eCount; i++) {
817 %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
818 %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
819 %(destPrefix)sElement destElem;
820 %(readDest)s
821 %(op)s
822 destReg.elements[i] = htog(destElem);
823 }
824 ''' % { "op" : op, "readDest" : readDestCode,
825 "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
826 "destPrefix" : destPrefix }
827 for reg in range(destCnt):
828 eWalkCode += '''
829 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
830 ''' % { "reg" : reg }
831 iop = InstObjParams(name, Name,
832 "RegRegRegOp",
833 { "code": eWalkCode,
834 "r_count": 2,
835 "predicate_test": predicateTest,
836 "op_class": opClass }, [])
837 header_output += NeonRegRegRegOpDeclare.subst(iop)
838 exec_output += NeonUnequalRegExecute.subst(iop)
839 for type in types:
840 substDict = { "targs" : type,
841 "class_name" : Name }
842 exec_output += NeonExecDeclare.subst(substDict)
843
844 def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False):
845 threeUnequalRegInst(name, Name, opClass, types, op,
846 True, True, False, readDest)
847
848 def threeRegLongInst(name, Name, opClass, types, op, readDest=False):
849 threeUnequalRegInst(name, Name, opClass, types, op,
850 False, False, True, readDest)
851
852 def threeRegWideInst(name, Name, opClass, types, op, readDest=False):
853 threeUnequalRegInst(name, Name, opClass, types, op,
854 True, False, True, readDest)
855
856 def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
857 global header_output, exec_output
858 eWalkCode = simdEnabledCheckCode + '''
859 RegVect srcReg1, srcReg2, destReg;
860 '''
861 for reg in range(rCount):
862 eWalkCode += '''
863 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
864 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
865 ''' % { "reg" : reg }
866 if readDest:
867 eWalkCode += '''
868 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
869 ''' % { "reg" : reg }
870 readDestCode = ''
871 if readDest:
872 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
873 eWalkCode += '''
874 if (imm < 0 && imm >= eCount) {
875 if (FullSystem)
876 fault = new UndefinedInstruction;
877 else
878 fault = new UndefinedInstruction(false, mnemonic);
879 } else {
880 for (unsigned i = 0; i < eCount; i++) {
881 Element srcElem1 = gtoh(srcReg1.elements[i]);
882 Element srcElem2 = gtoh(srcReg2.elements[imm]);
883 Element destElem;
884 %(readDest)s
885 %(op)s
886 destReg.elements[i] = htog(destElem);
887 }
888 }
889 ''' % { "op" : op, "readDest" : readDestCode }
890 for reg in range(rCount):
891 eWalkCode += '''
892 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
893 ''' % { "reg" : reg }
894 iop = InstObjParams(name, Name,
895 "RegRegRegImmOp",
896 { "code": eWalkCode,
897 "r_count": rCount,
898 "predicate_test": predicateTest,
899 "op_class": opClass }, [])
900 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
901 exec_output += NeonEqualRegExecute.subst(iop)
902 for type in types:
903 substDict = { "targs" : type,
904 "class_name" : Name }
905 exec_output += NeonExecDeclare.subst(substDict)
906
907 def twoRegLongInst(name, Name, opClass, types, op, readDest=False):
908 global header_output, exec_output
909 rCount = 2
910 eWalkCode = simdEnabledCheckCode + '''
911 RegVect srcReg1, srcReg2;
912 BigRegVect destReg;
913 '''
914 for reg in range(rCount):
915 eWalkCode += '''
916 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
917 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);;
918 ''' % { "reg" : reg }
919 if readDest:
920 for reg in range(2 * rCount):
921 eWalkCode += '''
922 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
923 ''' % { "reg" : reg }
924 readDestCode = ''
925 if readDest:
926 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
927 eWalkCode += '''
928 if (imm < 0 && imm >= eCount) {
929 if (FullSystem)
930 fault = new UndefinedInstruction;
931 else
932 fault = new UndefinedInstruction(false, mnemonic);
933 } else {
934 for (unsigned i = 0; i < eCount; i++) {
935 Element srcElem1 = gtoh(srcReg1.elements[i]);
936 Element srcElem2 = gtoh(srcReg2.elements[imm]);
937 BigElement destElem;
938 %(readDest)s
939 %(op)s
940 destReg.elements[i] = htog(destElem);
941 }
942 }
943 ''' % { "op" : op, "readDest" : readDestCode }
944 for reg in range(2 * rCount):
945 eWalkCode += '''
946 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
947 ''' % { "reg" : reg }
948 iop = InstObjParams(name, Name,
949 "RegRegRegImmOp",
950 { "code": eWalkCode,
951 "r_count": rCount,
952 "predicate_test": predicateTest,
953 "op_class": opClass }, [])
954 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
955 exec_output += NeonUnequalRegExecute.subst(iop)
956 for type in types:
957 substDict = { "targs" : type,
958 "class_name" : Name }
959 exec_output += NeonExecDeclare.subst(substDict)
960
961 def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False):
962 global header_output, exec_output
963 eWalkCode = simdEnabledCheckCode + '''
964 typedef FloatReg FloatVect[rCount];
965 FloatVect srcRegs1, srcRegs2, destRegs;
966 '''
967 for reg in range(rCount):
968 eWalkCode += '''
969 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
970 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
971 ''' % { "reg" : reg }
972 if readDest:
973 eWalkCode += '''
974 destRegs[%(reg)d] = FpDestP%(reg)d;
975 ''' % { "reg" : reg }
976 readDestCode = ''
977 if readDest:
978 readDestCode = 'destReg = destRegs[i];'
979 eWalkCode += '''
980 if (imm < 0 && imm >= eCount) {
981 if (FullSystem)
982 fault = new UndefinedInstruction;
983 else
984 fault = new UndefinedInstruction(false, mnemonic);
985 } else {
986 for (unsigned i = 0; i < rCount; i++) {
987 FloatReg srcReg1 = srcRegs1[i];
988 FloatReg srcReg2 = srcRegs2[imm];
989 FloatReg destReg;
990 %(readDest)s
991 %(op)s
992 destRegs[i] = destReg;
993 }
994 }
995 ''' % { "op" : op, "readDest" : readDestCode }
996 for reg in range(rCount):
997 eWalkCode += '''
998 FpDestP%(reg)d = destRegs[%(reg)d];
999 ''' % { "reg" : reg }
1000 iop = InstObjParams(name, Name,
1001 "FpRegRegRegImmOp",
1002 { "code": eWalkCode,
1003 "r_count": rCount,
1004 "predicate_test": predicateTest,
1005 "op_class": opClass }, [])
1006 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1007 exec_output += NeonEqualRegExecute.subst(iop)
1008 for type in types:
1009 substDict = { "targs" : type,
1010 "class_name" : Name }
1011 exec_output += NeonExecDeclare.subst(substDict)
1012
1013 def twoRegShiftInst(name, Name, opClass, types, rCount, op,
1014 readDest=False, toInt=False, fromInt=False):
1015 global header_output, exec_output
1016 eWalkCode = simdEnabledCheckCode + '''
1017 RegVect srcRegs1, destRegs;
1018 '''
1019 for reg in range(rCount):
1020 eWalkCode += '''
1021 srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1022 ''' % { "reg" : reg }
1023 if readDest:
1024 eWalkCode += '''
1025 destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1026 ''' % { "reg" : reg }
1027 readDestCode = ''
1028 if readDest:
1029 readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1030 if toInt:
1031 readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1032 readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1033 if fromInt:
1034 readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);'
1035 declDest = 'Element destElem;'
1036 writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1037 if toInt:
1038 declDest = 'FloatRegBits destReg;'
1039 writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1040 eWalkCode += '''
1041 for (unsigned i = 0; i < eCount; i++) {
1042 %(readOp)s
1043 %(declDest)s
1044 %(readDest)s
1045 %(op)s
1046 %(writeDest)s
1047 }
1048 ''' % { "readOp" : readOpCode,
1049 "declDest" : declDest,
1050 "readDest" : readDestCode,
1051 "op" : op,
1052 "writeDest" : writeDestCode }
1053 for reg in range(rCount):
1054 eWalkCode += '''
1055 FpDestP%(reg)d_uw = gtoh(destRegs.regs[%(reg)d]);
1056 ''' % { "reg" : reg }
1057 iop = InstObjParams(name, Name,
1058 "RegRegImmOp",
1059 { "code": eWalkCode,
1060 "r_count": rCount,
1061 "predicate_test": predicateTest,
1062 "op_class": opClass }, [])
1063 header_output += NeonRegRegImmOpDeclare.subst(iop)
1064 exec_output += NeonEqualRegExecute.subst(iop)
1065 for type in types:
1066 substDict = { "targs" : type,
1067 "class_name" : Name }
1068 exec_output += NeonExecDeclare.subst(substDict)
1069
1070 def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False):
1071 global header_output, exec_output
1072 eWalkCode = simdEnabledCheckCode + '''
1073 BigRegVect srcReg1;
1074 RegVect destReg;
1075 '''
1076 for reg in range(4):
1077 eWalkCode += '''
1078 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1079 ''' % { "reg" : reg }
1080 if readDest:
1081 for reg in range(2):
1082 eWalkCode += '''
1083 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1084 ''' % { "reg" : reg }
1085 readDestCode = ''
1086 if readDest:
1087 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1088 eWalkCode += '''
1089 for (unsigned i = 0; i < eCount; i++) {
1090 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1091 Element destElem;
1092 %(readDest)s
1093 %(op)s
1094 destReg.elements[i] = htog(destElem);
1095 }
1096 ''' % { "op" : op, "readDest" : readDestCode }
1097 for reg in range(2):
1098 eWalkCode += '''
1099 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1100 ''' % { "reg" : reg }
1101 iop = InstObjParams(name, Name,
1102 "RegRegImmOp",
1103 { "code": eWalkCode,
1104 "r_count": 2,
1105 "predicate_test": predicateTest,
1106 "op_class": opClass }, [])
1107 header_output += NeonRegRegImmOpDeclare.subst(iop)
1108 exec_output += NeonUnequalRegExecute.subst(iop)
1109 for type in types:
1110 substDict = { "targs" : type,
1111 "class_name" : Name }
1112 exec_output += NeonExecDeclare.subst(substDict)
1113
1114 def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False):
1115 global header_output, exec_output
1116 eWalkCode = simdEnabledCheckCode + '''
1117 RegVect srcReg1;
1118 BigRegVect destReg;
1119 '''
1120 for reg in range(2):
1121 eWalkCode += '''
1122 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1123 ''' % { "reg" : reg }
1124 if readDest:
1125 for reg in range(4):
1126 eWalkCode += '''
1127 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1128 ''' % { "reg" : reg }
1129 readDestCode = ''
1130 if readDest:
1131 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1132 eWalkCode += '''
1133 for (unsigned i = 0; i < eCount; i++) {
1134 Element srcElem1 = gtoh(srcReg1.elements[i]);
1135 BigElement destElem;
1136 %(readDest)s
1137 %(op)s
1138 destReg.elements[i] = htog(destElem);
1139 }
1140 ''' % { "op" : op, "readDest" : readDestCode }
1141 for reg in range(4):
1142 eWalkCode += '''
1143 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1144 ''' % { "reg" : reg }
1145 iop = InstObjParams(name, Name,
1146 "RegRegImmOp",
1147 { "code": eWalkCode,
1148 "r_count": 2,
1149 "predicate_test": predicateTest,
1150 "op_class": opClass }, [])
1151 header_output += NeonRegRegImmOpDeclare.subst(iop)
1152 exec_output += NeonUnequalRegExecute.subst(iop)
1153 for type in types:
1154 substDict = { "targs" : type,
1155 "class_name" : Name }
1156 exec_output += NeonExecDeclare.subst(substDict)
1157
1158 def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False):
1159 global header_output, exec_output
1160 eWalkCode = simdEnabledCheckCode + '''
1161 RegVect srcReg1, destReg;
1162 '''
1163 for reg in range(rCount):
1164 eWalkCode += '''
1165 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1166 ''' % { "reg" : reg }
1167 if readDest:
1168 eWalkCode += '''
1169 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1170 ''' % { "reg" : reg }
1171 readDestCode = ''
1172 if readDest:
1173 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1174 eWalkCode += '''
1175 for (unsigned i = 0; i < eCount; i++) {
1176 unsigned j = i;
1177 Element srcElem1 = gtoh(srcReg1.elements[i]);
1178 Element destElem;
1179 %(readDest)s
1180 %(op)s
1181 destReg.elements[j] = htog(destElem);
1182 }
1183 ''' % { "op" : op, "readDest" : readDestCode }
1184 for reg in range(rCount):
1185 eWalkCode += '''
1186 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1187 ''' % { "reg" : reg }
1188 iop = InstObjParams(name, Name,
1189 "RegRegOp",
1190 { "code": eWalkCode,
1191 "r_count": rCount,
1192 "predicate_test": predicateTest,
1193 "op_class": opClass }, [])
1194 header_output += NeonRegRegOpDeclare.subst(iop)
1195 exec_output += NeonEqualRegExecute.subst(iop)
1196 for type in types:
1197 substDict = { "targs" : type,
1198 "class_name" : Name }
1199 exec_output += NeonExecDeclare.subst(substDict)
1200
1201 def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False):
1202 global header_output, exec_output
1203 eWalkCode = simdEnabledCheckCode + '''
1204 RegVect srcReg1, destReg;
1205 '''
1206 for reg in range(rCount):
1207 eWalkCode += '''
1208 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1209 ''' % { "reg" : reg }
1210 if readDest:
1211 eWalkCode += '''
1212 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1213 ''' % { "reg" : reg }
1214 readDestCode = ''
1215 if readDest:
1216 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1217 eWalkCode += '''
1218 for (unsigned i = 0; i < eCount; i++) {
1219 Element srcElem1 = gtoh(srcReg1.elements[imm]);
1220 Element destElem;
1221 %(readDest)s
1222 %(op)s
1223 destReg.elements[i] = htog(destElem);
1224 }
1225 ''' % { "op" : op, "readDest" : readDestCode }
1226 for reg in range(rCount):
1227 eWalkCode += '''
1228 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1229 ''' % { "reg" : reg }
1230 iop = InstObjParams(name, Name,
1231 "RegRegImmOp",
1232 { "code": eWalkCode,
1233 "r_count": rCount,
1234 "predicate_test": predicateTest,
1235 "op_class": opClass }, [])
1236 header_output += NeonRegRegImmOpDeclare.subst(iop)
1237 exec_output += NeonEqualRegExecute.subst(iop)
1238 for type in types:
1239 substDict = { "targs" : type,
1240 "class_name" : Name }
1241 exec_output += NeonExecDeclare.subst(substDict)
1242
1243 def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False):
1244 global header_output, exec_output
1245 eWalkCode = simdEnabledCheckCode + '''
1246 RegVect srcReg1, destReg;
1247 '''
1248 for reg in range(rCount):
1249 eWalkCode += '''
1250 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1251 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1252 ''' % { "reg" : reg }
1253 if readDest:
1254 eWalkCode += '''
1255 ''' % { "reg" : reg }
1256 readDestCode = ''
1257 if readDest:
1258 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1259 eWalkCode += op
1260 for reg in range(rCount):
1261 eWalkCode += '''
1262 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1263 FpOp1P%(reg)d_uw = gtoh(srcReg1.regs[%(reg)d]);
1264 ''' % { "reg" : reg }
1265 iop = InstObjParams(name, Name,
1266 "RegRegOp",
1267 { "code": eWalkCode,
1268 "r_count": rCount,
1269 "predicate_test": predicateTest,
1270 "op_class": opClass }, [])
1271 header_output += NeonRegRegOpDeclare.subst(iop)
1272 exec_output += NeonEqualRegExecute.subst(iop)
1273 for type in types:
1274 substDict = { "targs" : type,
1275 "class_name" : Name }
1276 exec_output += NeonExecDeclare.subst(substDict)
1277
1278 def twoRegMiscInstFp(name, Name, opClass, types, rCount, op,
1279 readDest=False, toInt=False):
1280 global header_output, exec_output
1281 eWalkCode = simdEnabledCheckCode + '''
1282 typedef FloatReg FloatVect[rCount];
1283 FloatVect srcRegs1;
1284 '''
1285 if toInt:
1286 eWalkCode += 'RegVect destRegs;\n'
1287 else:
1288 eWalkCode += 'FloatVect destRegs;\n'
1289 for reg in range(rCount):
1290 eWalkCode += '''
1291 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1292 ''' % { "reg" : reg }
1293 if readDest:
1294 if toInt:
1295 eWalkCode += '''
1296 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1297 ''' % { "reg" : reg }
1298 else:
1299 eWalkCode += '''
1300 destRegs[%(reg)d] = FpDestP%(reg)d;
1301 ''' % { "reg" : reg }
1302 readDestCode = ''
1303 if readDest:
1304 readDestCode = 'destReg = destRegs[i];'
1305 destType = 'FloatReg'
1306 writeDest = 'destRegs[r] = destReg;'
1307 if toInt:
1308 destType = 'FloatRegBits'
1309 writeDest = 'destRegs.regs[r] = destReg;'
1310 eWalkCode += '''
1311 for (unsigned r = 0; r < rCount; r++) {
1312 FloatReg srcReg1 = srcRegs1[r];
1313 %(destType)s destReg;
1314 %(readDest)s
1315 %(op)s
1316 %(writeDest)s
1317 }
1318 ''' % { "op" : op,
1319 "readDest" : readDestCode,
1320 "destType" : destType,
1321 "writeDest" : writeDest }
1322 for reg in range(rCount):
1323 if toInt:
1324 eWalkCode += '''
1325 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1326 ''' % { "reg" : reg }
1327 else:
1328 eWalkCode += '''
1329 FpDestP%(reg)d = destRegs[%(reg)d];
1330 ''' % { "reg" : reg }
1331 iop = InstObjParams(name, Name,
1332 "FpRegRegOp",
1333 { "code": eWalkCode,
1334 "r_count": rCount,
1335 "predicate_test": predicateTest,
1336 "op_class": opClass }, [])
1337 header_output += NeonRegRegOpDeclare.subst(iop)
1338 exec_output += NeonEqualRegExecute.subst(iop)
1339 for type in types:
1340 substDict = { "targs" : type,
1341 "class_name" : Name }
1342 exec_output += NeonExecDeclare.subst(substDict)
1343
1344 def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False):
1345 global header_output, exec_output
1346 eWalkCode = simdEnabledCheckCode + '''
1347 RegVect srcRegs;
1348 BigRegVect destReg;
1349 '''
1350 for reg in range(rCount):
1351 eWalkCode += '''
1352 srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1353 ''' % { "reg" : reg }
1354 if readDest:
1355 eWalkCode += '''
1356 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1357 ''' % { "reg" : reg }
1358 readDestCode = ''
1359 if readDest:
1360 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1361 eWalkCode += '''
1362 for (unsigned i = 0; i < eCount / 2; i++) {
1363 Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1364 Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1365 BigElement destElem;
1366 %(readDest)s
1367 %(op)s
1368 destReg.elements[i] = htog(destElem);
1369 }
1370 ''' % { "op" : op, "readDest" : readDestCode }
1371 for reg in range(rCount):
1372 eWalkCode += '''
1373 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1374 ''' % { "reg" : reg }
1375 iop = InstObjParams(name, Name,
1376 "RegRegOp",
1377 { "code": eWalkCode,
1378 "r_count": rCount,
1379 "predicate_test": predicateTest,
1380 "op_class": opClass }, [])
1381 header_output += NeonRegRegOpDeclare.subst(iop)
1382 exec_output += NeonUnequalRegExecute.subst(iop)
1383 for type in types:
1384 substDict = { "targs" : type,
1385 "class_name" : Name }
1386 exec_output += NeonExecDeclare.subst(substDict)
1387
1388 def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False):
1389 global header_output, exec_output
1390 eWalkCode = simdEnabledCheckCode + '''
1391 BigRegVect srcReg1;
1392 RegVect destReg;
1393 '''
1394 for reg in range(4):
1395 eWalkCode += '''
1396 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1397 ''' % { "reg" : reg }
1398 if readDest:
1399 for reg in range(2):
1400 eWalkCode += '''
1401 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1402 ''' % { "reg" : reg }
1403 readDestCode = ''
1404 if readDest:
1405 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1406 eWalkCode += '''
1407 for (unsigned i = 0; i < eCount; i++) {
1408 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1409 Element destElem;
1410 %(readDest)s
1411 %(op)s
1412 destReg.elements[i] = htog(destElem);
1413 }
1414 ''' % { "op" : op, "readDest" : readDestCode }
1415 for reg in range(2):
1416 eWalkCode += '''
1417 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1418 ''' % { "reg" : reg }
1419 iop = InstObjParams(name, Name,
1420 "RegRegOp",
1421 { "code": eWalkCode,
1422 "r_count": 2,
1423 "predicate_test": predicateTest,
1424 "op_class": opClass }, [])
1425 header_output += NeonRegRegOpDeclare.subst(iop)
1426 exec_output += NeonUnequalRegExecute.subst(iop)
1427 for type in types:
1428 substDict = { "targs" : type,
1429 "class_name" : Name }
1430 exec_output += NeonExecDeclare.subst(substDict)
1431
1432 def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False):
1433 global header_output, exec_output
1434 eWalkCode = simdEnabledCheckCode + '''
1435 RegVect destReg;
1436 '''
1437 if readDest:
1438 for reg in range(rCount):
1439 eWalkCode += '''
1440 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1441 ''' % { "reg" : reg }
1442 readDestCode = ''
1443 if readDest:
1444 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1445 eWalkCode += '''
1446 for (unsigned i = 0; i < eCount; i++) {
1447 Element destElem;
1448 %(readDest)s
1449 %(op)s
1450 destReg.elements[i] = htog(destElem);
1451 }
1452 ''' % { "op" : op, "readDest" : readDestCode }
1453 for reg in range(rCount):
1454 eWalkCode += '''
1455 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1456 ''' % { "reg" : reg }
1457 iop = InstObjParams(name, Name,
1458 "RegImmOp",
1459 { "code": eWalkCode,
1460 "r_count": rCount,
1461 "predicate_test": predicateTest,
1462 "op_class": opClass }, [])
1463 header_output += NeonRegImmOpDeclare.subst(iop)
1464 exec_output += NeonEqualRegExecute.subst(iop)
1465 for type in types:
1466 substDict = { "targs" : type,
1467 "class_name" : Name }
1468 exec_output += NeonExecDeclare.subst(substDict)
1469
1470 def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False):
1471 global header_output, exec_output
1472 eWalkCode = simdEnabledCheckCode + '''
1473 RegVect srcReg1;
1474 BigRegVect destReg;
1475 '''
1476 for reg in range(2):
1477 eWalkCode += '''
1478 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1479 ''' % { "reg" : reg }
1480 if readDest:
1481 for reg in range(4):
1482 eWalkCode += '''
1483 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1484 ''' % { "reg" : reg }
1485 readDestCode = ''
1486 if readDest:
1487 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1488 eWalkCode += '''
1489 for (unsigned i = 0; i < eCount; i++) {
1490 Element srcElem1 = gtoh(srcReg1.elements[i]);
1491 BigElement destElem;
1492 %(readDest)s
1493 %(op)s
1494 destReg.elements[i] = htog(destElem);
1495 }
1496 ''' % { "op" : op, "readDest" : readDestCode }
1497 for reg in range(4):
1498 eWalkCode += '''
1499 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1500 ''' % { "reg" : reg }
1501 iop = InstObjParams(name, Name,
1502 "RegRegOp",
1503 { "code": eWalkCode,
1504 "r_count": 2,
1505 "predicate_test": predicateTest,
1506 "op_class": opClass }, [])
1507 header_output += NeonRegRegOpDeclare.subst(iop)
1508 exec_output += NeonUnequalRegExecute.subst(iop)
1509 for type in types:
1510 substDict = { "targs" : type,
1511 "class_name" : Name }
1512 exec_output += NeonExecDeclare.subst(substDict)
1513
1514 vhaddCode = '''
1515 Element carryBit =
1516 (((unsigned)srcElem1 & 0x1) +
1517 ((unsigned)srcElem2 & 0x1)) >> 1;
1518 // Use division instead of a shift to ensure the sign extension works
1519 // right. The compiler will figure out if it can be a shift. Mask the
1520 // inputs so they get truncated correctly.
1521 destElem = (((srcElem1 & ~(Element)1) / 2) +
1522 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1523 '''
1524 threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode)
1525 threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode)
1526
1527 vrhaddCode = '''
1528 Element carryBit =
1529 (((unsigned)srcElem1 & 0x1) +
1530 ((unsigned)srcElem2 & 0x1) + 1) >> 1;
1531 // Use division instead of a shift to ensure the sign extension works
1532 // right. The compiler will figure out if it can be a shift. Mask the
1533 // inputs so they get truncated correctly.
1534 destElem = (((srcElem1 & ~(Element)1) / 2) +
1535 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1536 '''
1537 threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode)
1538 threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode)
1539
1540 vhsubCode = '''
1541 Element barrowBit =
1542 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
1543 // Use division instead of a shift to ensure the sign extension works
1544 // right. The compiler will figure out if it can be a shift. Mask the
1545 // inputs so they get truncated correctly.
1546 destElem = (((srcElem1 & ~(Element)1) / 2) -
1547 ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
1548 '''
1549 threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode)
1550 threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode)
1551
1552 vandCode = '''
1553 destElem = srcElem1 & srcElem2;
1554 '''
1555 threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode)
1556 threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode)
1557
1558 vbicCode = '''
1559 destElem = srcElem1 & ~srcElem2;
1560 '''
1561 threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode)
1562 threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode)
1563
1564 vorrCode = '''
1565 destElem = srcElem1 | srcElem2;
1566 '''
1567 threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode)
1568 threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode)
1569
1570 threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode)
1571 threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode)
1572
1573 vornCode = '''
1574 destElem = srcElem1 | ~srcElem2;
1575 '''
1576 threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode)
1577 threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode)
1578
1579 veorCode = '''
1580 destElem = srcElem1 ^ srcElem2;
1581 '''
1582 threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode)
1583 threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode)
1584
1585 vbifCode = '''
1586 destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
1587 '''
1588 threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True)
1589 threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True)
1590 vbitCode = '''
1591 destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
1592 '''
1593 threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True)
1594 threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True)
1595 vbslCode = '''
1596 destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
1597 '''
1598 threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True)
1599 threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True)
1600
1601 vmaxCode = '''
1602 destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
1603 '''
1604 threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode)
1605 threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode)
1606
1607 vminCode = '''
1608 destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
1609 '''
1610 threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode)
1611 threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
1612
1613 vaddCode = '''
1614 destElem = srcElem1 + srcElem2;
1615 '''
1616 threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
1617 threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
1618
1619 threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", smallUnsignedTypes,
1620 2, vaddCode, pairwise=True)
1621 vaddlwCode = '''
1622 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
1623 '''
1624 threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
1625 threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
1626 vaddhnCode = '''
1627 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
1628 (sizeof(Element) * 8);
1629 '''
1630 threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode)
1631 vraddhnCode = '''
1632 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
1633 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1634 (sizeof(Element) * 8);
1635 '''
1636 threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode)
1637
1638 vsubCode = '''
1639 destElem = srcElem1 - srcElem2;
1640 '''
1641 threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode)
1642 threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode)
1643 vsublwCode = '''
1644 destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
1645 '''
1646 threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode)
1647 threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode)
1648
1649 vqaddUCode = '''
1650 destElem = srcElem1 + srcElem2;
1651 FPSCR fpscr = (FPSCR) FpscrQc;
1652 if (destElem < srcElem1 || destElem < srcElem2) {
1653 destElem = (Element)(-1);
1654 fpscr.qc = 1;
1655 }
1656 FpscrQc = fpscr;
1657 '''
1658 threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode)
1659 threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode)
1660 vsubhnCode = '''
1661 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
1662 (sizeof(Element) * 8);
1663 '''
1664 threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode)
1665 vrsubhnCode = '''
1666 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
1667 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1668 (sizeof(Element) * 8);
1669 '''
1670 threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode)
1671
1672 vqaddSCode = '''
1673 destElem = srcElem1 + srcElem2;
1674 FPSCR fpscr = (FPSCR) FpscrQc;
1675 bool negDest = (destElem < 0);
1676 bool negSrc1 = (srcElem1 < 0);
1677 bool negSrc2 = (srcElem2 < 0);
1678 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
1679 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1680 if (negDest)
1681 destElem -= 1;
1682 fpscr.qc = 1;
1683 }
1684 FpscrQc = fpscr;
1685 '''
1686 threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode)
1687 threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode)
1688
1689 vqsubUCode = '''
1690 destElem = srcElem1 - srcElem2;
1691 FPSCR fpscr = (FPSCR) FpscrQc;
1692 if (destElem > srcElem1) {
1693 destElem = 0;
1694 fpscr.qc = 1;
1695 }
1696 FpscrQc = fpscr;
1697 '''
1698 threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode)
1699 threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode)
1700
1701 vqsubSCode = '''
1702 destElem = srcElem1 - srcElem2;
1703 FPSCR fpscr = (FPSCR) FpscrQc;
1704 bool negDest = (destElem < 0);
1705 bool negSrc1 = (srcElem1 < 0);
1706 bool posSrc2 = (srcElem2 >= 0);
1707 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
1708 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1709 if (negDest)
1710 destElem -= 1;
1711 fpscr.qc = 1;
1712 }
1713 FpscrQc = fpscr;
1714 '''
1715 threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode)
1716 threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode)
1717
1718 vcgtCode = '''
1719 destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;
1720 '''
1721 threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode)
1722 threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode)
1723
1724 vcgeCode = '''
1725 destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
1726 '''
1727 threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode)
1728 threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode)
1729
1730 vceqCode = '''
1731 destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;
1732 '''
1733 threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode)
1734 threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode)
1735
1736 vshlCode = '''
1737 int16_t shiftAmt = (int8_t)srcElem2;
1738 if (shiftAmt < 0) {
1739 shiftAmt = -shiftAmt;
1740 if (shiftAmt >= sizeof(Element) * 8) {
1741 shiftAmt = sizeof(Element) * 8 - 1;
1742 destElem = 0;
1743 } else {
1744 destElem = (srcElem1 >> shiftAmt);
1745 }
1746 // Make sure the right shift sign extended when it should.
1747 if (ltz(srcElem1) && !ltz(destElem)) {
1748 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1749 1 - shiftAmt));
1750 }
1751 } else {
1752 if (shiftAmt >= sizeof(Element) * 8) {
1753 destElem = 0;
1754 } else {
1755 destElem = srcElem1 << shiftAmt;
1756 }
1757 }
1758 '''
1759 threeEqualRegInst("vshl", "VshlD", "SimdShiftOp", allTypes, 2, vshlCode)
1760 threeEqualRegInst("vshl", "VshlQ", "SimdShiftOp", allTypes, 4, vshlCode)
1761
1762 vrshlCode = '''
1763 int16_t shiftAmt = (int8_t)srcElem2;
1764 if (shiftAmt < 0) {
1765 shiftAmt = -shiftAmt;
1766 Element rBit = 0;
1767 if (shiftAmt <= sizeof(Element) * 8)
1768 rBit = bits(srcElem1, shiftAmt - 1);
1769 if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
1770 rBit = 1;
1771 if (shiftAmt >= sizeof(Element) * 8) {
1772 shiftAmt = sizeof(Element) * 8 - 1;
1773 destElem = 0;
1774 } else {
1775 destElem = (srcElem1 >> shiftAmt);
1776 }
1777 // Make sure the right shift sign extended when it should.
1778 if (ltz(srcElem1) && !ltz(destElem)) {
1779 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1780 1 - shiftAmt));
1781 }
1782 destElem += rBit;
1783 } else if (shiftAmt > 0) {
1784 if (shiftAmt >= sizeof(Element) * 8) {
1785 destElem = 0;
1786 } else {
1787 destElem = srcElem1 << shiftAmt;
1788 }
1789 } else {
1790 destElem = srcElem1;
1791 }
1792 '''
1793 threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode)
1794 threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode)
1795
1796 vqshlUCode = '''
1797 int16_t shiftAmt = (int8_t)srcElem2;
1798 FPSCR fpscr = (FPSCR) FpscrQc;
1799 if (shiftAmt < 0) {
1800 shiftAmt = -shiftAmt;
1801 if (shiftAmt >= sizeof(Element) * 8) {
1802 shiftAmt = sizeof(Element) * 8 - 1;
1803 destElem = 0;
1804 } else {
1805 destElem = (srcElem1 >> shiftAmt);
1806 }
1807 } else if (shiftAmt > 0) {
1808 if (shiftAmt >= sizeof(Element) * 8) {
1809 if (srcElem1 != 0) {
1810 destElem = mask(sizeof(Element) * 8);
1811 fpscr.qc = 1;
1812 } else {
1813 destElem = 0;
1814 }
1815 } else {
1816 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1817 sizeof(Element) * 8 - shiftAmt)) {
1818 destElem = mask(sizeof(Element) * 8);
1819 fpscr.qc = 1;
1820 } else {
1821 destElem = srcElem1 << shiftAmt;
1822 }
1823 }
1824 } else {
1825 destElem = srcElem1;
1826 }
1827 FpscrQc = fpscr;
1828 '''
1829 threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode)
1830 threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode)
1831
1832 vqshlSCode = '''
1833 int16_t shiftAmt = (int8_t)srcElem2;
1834 FPSCR fpscr = (FPSCR) FpscrQc;
1835 if (shiftAmt < 0) {
1836 shiftAmt = -shiftAmt;
1837 if (shiftAmt >= sizeof(Element) * 8) {
1838 shiftAmt = sizeof(Element) * 8 - 1;
1839 destElem = 0;
1840 } else {
1841 destElem = (srcElem1 >> shiftAmt);
1842 }
1843 // Make sure the right shift sign extended when it should.
1844 if (srcElem1 < 0 && destElem >= 0) {
1845 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1846 1 - shiftAmt));
1847 }
1848 } else if (shiftAmt > 0) {
1849 bool sat = false;
1850 if (shiftAmt >= sizeof(Element) * 8) {
1851 if (srcElem1 != 0)
1852 sat = true;
1853 else
1854 destElem = 0;
1855 } else {
1856 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1857 sizeof(Element) * 8 - 1 - shiftAmt) !=
1858 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1859 sat = true;
1860 } else {
1861 destElem = srcElem1 << shiftAmt;
1862 }
1863 }
1864 if (sat) {
1865 fpscr.qc = 1;
1866 destElem = mask(sizeof(Element) * 8 - 1);
1867 if (srcElem1 < 0)
1868 destElem = ~destElem;
1869 }
1870 } else {
1871 destElem = srcElem1;
1872 }
1873 FpscrQc = fpscr;
1874 '''
1875 threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode)
1876 threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode)
1877
1878 vqrshlUCode = '''
1879 int16_t shiftAmt = (int8_t)srcElem2;
1880 FPSCR fpscr = (FPSCR) FpscrQc;
1881 if (shiftAmt < 0) {
1882 shiftAmt = -shiftAmt;
1883 Element rBit = 0;
1884 if (shiftAmt <= sizeof(Element) * 8)
1885 rBit = bits(srcElem1, shiftAmt - 1);
1886 if (shiftAmt >= sizeof(Element) * 8) {
1887 shiftAmt = sizeof(Element) * 8 - 1;
1888 destElem = 0;
1889 } else {
1890 destElem = (srcElem1 >> shiftAmt);
1891 }
1892 destElem += rBit;
1893 } else {
1894 if (shiftAmt >= sizeof(Element) * 8) {
1895 if (srcElem1 != 0) {
1896 destElem = mask(sizeof(Element) * 8);
1897 fpscr.qc = 1;
1898 } else {
1899 destElem = 0;
1900 }
1901 } else {
1902 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1903 sizeof(Element) * 8 - shiftAmt)) {
1904 destElem = mask(sizeof(Element) * 8);
1905 fpscr.qc = 1;
1906 } else {
1907 destElem = srcElem1 << shiftAmt;
1908 }
1909 }
1910 }
1911 FpscrQc = fpscr;
1912 '''
1913 threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode)
1914 threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode)
1915
1916 vqrshlSCode = '''
1917 int16_t shiftAmt = (int8_t)srcElem2;
1918 FPSCR fpscr = (FPSCR) FpscrQc;
1919 if (shiftAmt < 0) {
1920 shiftAmt = -shiftAmt;
1921 Element rBit = 0;
1922 if (shiftAmt <= sizeof(Element) * 8)
1923 rBit = bits(srcElem1, shiftAmt - 1);
1924 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
1925 rBit = 1;
1926 if (shiftAmt >= sizeof(Element) * 8) {
1927 shiftAmt = sizeof(Element) * 8 - 1;
1928 destElem = 0;
1929 } else {
1930 destElem = (srcElem1 >> shiftAmt);
1931 }
1932 // Make sure the right shift sign extended when it should.
1933 if (srcElem1 < 0 && destElem >= 0) {
1934 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1935 1 - shiftAmt));
1936 }
1937 destElem += rBit;
1938 } else if (shiftAmt > 0) {
1939 bool sat = false;
1940 if (shiftAmt >= sizeof(Element) * 8) {
1941 if (srcElem1 != 0)
1942 sat = true;
1943 else
1944 destElem = 0;
1945 } else {
1946 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1947 sizeof(Element) * 8 - 1 - shiftAmt) !=
1948 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1949 sat = true;
1950 } else {
1951 destElem = srcElem1 << shiftAmt;
1952 }
1953 }
1954 if (sat) {
1955 fpscr.qc = 1;
1956 destElem = mask(sizeof(Element) * 8 - 1);
1957 if (srcElem1 < 0)
1958 destElem = ~destElem;
1959 }
1960 } else {
1961 destElem = srcElem1;
1962 }
1963 FpscrQc = fpscr;
1964 '''
1965 threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode)
1966 threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode)
1967
1968 vabaCode = '''
1969 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1970 (srcElem2 - srcElem1);
1971 '''
1972 threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True)
1973 threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True)
1974 vabalCode = '''
1975 destElem += (srcElem1 > srcElem2) ?
1976 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1977 ((BigElement)srcElem2 - (BigElement)srcElem1);
1978 '''
1979 threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True)
1980
1981 vabdCode = '''
1982 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1983 (srcElem2 - srcElem1);
1984 '''
1985 threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode)
1986 threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode)
1987 vabdlCode = '''
1988 destElem = (srcElem1 > srcElem2) ?
1989 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1990 ((BigElement)srcElem2 - (BigElement)srcElem1);
1991 '''
1992 threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode)
1993
1994 vtstCode = '''
1995 destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
1996 '''
1997 threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode)
1998 threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode)
1999
2000 vmulCode = '''
2001 destElem = srcElem1 * srcElem2;
2002 '''
2003 threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode)
2004 threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode)
2005 vmullCode = '''
2006 destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
2007 '''
2008 threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode)
2009
2010 vmlaCode = '''
2011 destElem = destElem + srcElem1 * srcElem2;
2012 '''
2013 threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True)
2014 threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True)
2015 vmlalCode = '''
2016 destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2017 '''
2018 threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True)
2019
2020 vqdmlalCode = '''
2021 FPSCR fpscr = (FPSCR) FpscrQc;
2022 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2023 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2024 Element halfNeg = maxNeg / 2;
2025 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2026 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2027 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2028 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2029 fpscr.qc = 1;
2030 }
2031 bool negPreDest = ltz(destElem);
2032 destElem += midElem;
2033 bool negDest = ltz(destElem);
2034 bool negMid = ltz(midElem);
2035 if (negPreDest == negMid && negMid != negDest) {
2036 destElem = mask(sizeof(BigElement) * 8 - 1);
2037 if (negPreDest)
2038 destElem = ~destElem;
2039 fpscr.qc = 1;
2040 }
2041 FpscrQc = fpscr;
2042 '''
2043 threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2044
2045 vqdmlslCode = '''
2046 FPSCR fpscr = (FPSCR) FpscrQc;
2047 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2048 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2049 Element halfNeg = maxNeg / 2;
2050 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2051 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2052 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2053 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2054 fpscr.qc = 1;
2055 }
2056 bool negPreDest = ltz(destElem);
2057 destElem -= midElem;
2058 bool negDest = ltz(destElem);
2059 bool posMid = ltz((BigElement)-midElem);
2060 if (negPreDest == posMid && posMid != negDest) {
2061 destElem = mask(sizeof(BigElement) * 8 - 1);
2062 if (negPreDest)
2063 destElem = ~destElem;
2064 fpscr.qc = 1;
2065 }
2066 FpscrQc = fpscr;
2067 '''
2068 threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2069
2070 vqdmullCode = '''
2071 FPSCR fpscr = (FPSCR) FpscrQc;
2072 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2073 if (srcElem1 == srcElem2 &&
2074 srcElem1 == (Element)((Element)1 <<
2075 (Element)(sizeof(Element) * 8 - 1))) {
2076 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2077 fpscr.qc = 1;
2078 }
2079 FpscrQc = fpscr;
2080 '''
2081 threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode)
2082
2083 vmlsCode = '''
2084 destElem = destElem - srcElem1 * srcElem2;
2085 '''
2086 threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2087 threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2088 vmlslCode = '''
2089 destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2090 '''
2091 threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True)
2092
2093 vmulpCode = '''
2094 destElem = 0;
2095 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2096 if (bits(srcElem2, j))
2097 destElem ^= srcElem1 << j;
2098 }
2099 '''
2100 threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode)
2101 threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode)
2102 vmullpCode = '''
2103 destElem = 0;
2104 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2105 if (bits(srcElem2, j))
2106 destElem ^= (BigElement)srcElem1 << j;
2107 }
2108 '''
2109 threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2110
2111 threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", smallTypes, 2, vmaxCode, pairwise=True)
2112
2113 threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", smallTypes, 2, vminCode, pairwise=True)
2114
2115 vqdmulhCode = '''
2116 FPSCR fpscr = (FPSCR) FpscrQc;
2117 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2118 (sizeof(Element) * 8);
2119 if (srcElem1 == srcElem2 &&
2120 srcElem1 == (Element)((Element)1 <<
2121 (sizeof(Element) * 8 - 1))) {
2122 destElem = ~srcElem1;
2123 fpscr.qc = 1;
2124 }
2125 FpscrQc = fpscr;
2126 '''
2127 threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2128 threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2129
2130 vqrdmulhCode = '''
2131 FPSCR fpscr = (FPSCR) FpscrQc;
2132 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2133 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2134 (sizeof(Element) * 8);
2135 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2136 Element halfNeg = maxNeg / 2;
2137 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2138 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2139 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2140 if (destElem < 0) {
2141 destElem = mask(sizeof(Element) * 8 - 1);
2142 } else {
2143 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2144 }
2145 fpscr.qc = 1;
2146 }
2147 FpscrQc = fpscr;
2148 '''
2149 threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2150 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2151 threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2152 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2153
2154 vmaxfpCode = '''
2155 FPSCR fpscr = (FPSCR) FpscrExc;
2156 bool done;
2157 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2158 if (!done) {
2159 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS,
2160 true, true, VfpRoundNearest);
2161 } else if (flushToZero(srcReg1, srcReg2)) {
2162 fpscr.idc = 1;
2163 }
2164 FpscrExc = fpscr;
2165 '''
2166 threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode)
2167 threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode)
2168
2169 vminfpCode = '''
2170 FPSCR fpscr = (FPSCR) FpscrExc;
2171 bool done;
2172 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2173 if (!done) {
2174 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS,
2175 true, true, VfpRoundNearest);
2176 } else if (flushToZero(srcReg1, srcReg2)) {
2177 fpscr.idc = 1;
2178 }
2179 FpscrExc = fpscr;
2180 '''
2181 threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode)
2182 threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode)
2183
2184 threeEqualRegInstFp("vpmax", "VpmaxDFp", "SimdFloatCmpOp", ("float",),
2185 2, vmaxfpCode, pairwise=True)
2186 threeEqualRegInstFp("vpmax", "VpmaxQFp", "SimdFloatCmpOp", ("float",),
2187 4, vmaxfpCode, pairwise=True)
2188
2189 threeEqualRegInstFp("vpmin", "VpminDFp", "SimdFloatCmpOp", ("float",),
2190 2, vminfpCode, pairwise=True)
2191 threeEqualRegInstFp("vpmin", "VpminQFp", "SimdFloatCmpOp", ("float",),
2192 4, vminfpCode, pairwise=True)
2193
2194 vaddfpCode = '''
2195 FPSCR fpscr = (FPSCR) FpscrExc;
2196 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2197 true, true, VfpRoundNearest);
2198 FpscrExc = fpscr;
2199 '''
2200 threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode)
2201 threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode)
2202
2203 threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",),
2204 2, vaddfpCode, pairwise=True)
2205 threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",),
2206 4, vaddfpCode, pairwise=True)
2207
2208 vsubfpCode = '''
2209 FPSCR fpscr = (FPSCR) FpscrExc;
2210 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2211 true, true, VfpRoundNearest);
2212 FpscrExc = fpscr;
2213 '''
2214 threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode)
2215 threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode)
2216
2217 vmulfpCode = '''
2218 FPSCR fpscr = (FPSCR) FpscrExc;
2219 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2220 true, true, VfpRoundNearest);
2221 FpscrExc = fpscr;
2222 '''
2223 threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2224 threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2225
2226 vmlafpCode = '''
2227 FPSCR fpscr = (FPSCR) FpscrExc;
2228 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2229 true, true, VfpRoundNearest);
2230 destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2231 true, true, VfpRoundNearest);
2232 FpscrExc = fpscr;
2233 '''
2234 threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2235 threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2236
2237 vmlsfpCode = '''
2238 FPSCR fpscr = (FPSCR) FpscrExc;
2239 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2240 true, true, VfpRoundNearest);
2241 destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2242 true, true, VfpRoundNearest);
2243 FpscrExc = fpscr;
2244 '''
2245 threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2246 threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2247
2248 vcgtfpCode = '''
2249 FPSCR fpscr = (FPSCR) FpscrExc;
2250 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2251 true, true, VfpRoundNearest);
2252 destReg = (res == 0) ? -1 : 0;
2253 if (res == 2.0)
2254 fpscr.ioc = 1;
2255 FpscrExc = fpscr;
2256 '''
2257 threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",),
2258 2, vcgtfpCode, toInt = True)
2259 threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",),
2260 4, vcgtfpCode, toInt = True)
2261
2262 vcgefpCode = '''
2263 FPSCR fpscr = (FPSCR) FpscrExc;
2264 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2265 true, true, VfpRoundNearest);
2266 destReg = (res == 0) ? -1 : 0;
2267 if (res == 2.0)
2268 fpscr.ioc = 1;
2269 FpscrExc = fpscr;
2270 '''
2271 threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",),
2272 2, vcgefpCode, toInt = True)
2273 threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",),
2274 4, vcgefpCode, toInt = True)
2275
2276 vacgtfpCode = '''
2277 FPSCR fpscr = (FPSCR) FpscrExc;
2278 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2279 true, true, VfpRoundNearest);
2280 destReg = (res == 0) ? -1 : 0;
2281 if (res == 2.0)
2282 fpscr.ioc = 1;
2283 FpscrExc = fpscr;
2284 '''
2285 threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",),
2286 2, vacgtfpCode, toInt = True)
2287 threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",),
2288 4, vacgtfpCode, toInt = True)
2289
2290 vacgefpCode = '''
2291 FPSCR fpscr = (FPSCR) FpscrExc;
2292 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2293 true, true, VfpRoundNearest);
2294 destReg = (res == 0) ? -1 : 0;
2295 if (res == 2.0)
2296 fpscr.ioc = 1;
2297 FpscrExc = fpscr;
2298 '''
2299 threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",),
2300 2, vacgefpCode, toInt = True)
2301 threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",),
2302 4, vacgefpCode, toInt = True)
2303
2304 vceqfpCode = '''
2305 FPSCR fpscr = (FPSCR) FpscrExc;
2306 float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2307 true, true, VfpRoundNearest);
2308 destReg = (res == 0) ? -1 : 0;
2309 if (res == 2.0)
2310 fpscr.ioc = 1;
2311 FpscrExc = fpscr;
2312 '''
2313 threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",),
2314 2, vceqfpCode, toInt = True)
2315 threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",),
2316 4, vceqfpCode, toInt = True)
2317
2318 vrecpsCode = '''
2319 FPSCR fpscr = (FPSCR) FpscrExc;
2320 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2321 true, true, VfpRoundNearest);
2322 FpscrExc = fpscr;
2323 '''
2324 threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode)
2325 threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode)
2326
2327 vrsqrtsCode = '''
2328 FPSCR fpscr = (FPSCR) FpscrExc;
2329 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2330 true, true, VfpRoundNearest);
2331 FpscrExc = fpscr;
2332 '''
2333 threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode)
2334 threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode)
2335
2336 vabdfpCode = '''
2337 FPSCR fpscr = (FPSCR) FpscrExc;
2338 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2339 true, true, VfpRoundNearest);
2340 destReg = fabs(mid);
2341 FpscrExc = fpscr;
2342 '''
2343 threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode)
2344 threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode)
2345
2346 twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True)
2347 twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True)
2348 twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2349 twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2350 twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True)
2351
2352 twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2353 twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2354 twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2355 twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2356 twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True)
2357
2358 twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode)
2359 twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode)
2360 twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2361 twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2362 twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode)
2363
2364 twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode)
2365 twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2366 twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2367 twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2368 twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2369 twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2370 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2371 twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2372 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2373
2374 vshrCode = '''
2375 if (imm >= sizeof(srcElem1) * 8) {
2376 if (ltz(srcElem1))
2377 destElem = -1;
2378 else
2379 destElem = 0;
2380 } else {
2381 destElem = srcElem1 >> imm;
2382 }
2383 '''
2384 twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode)
2385 twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode)
2386
2387 vsraCode = '''
2388 Element mid;;
2389 if (imm >= sizeof(srcElem1) * 8) {
2390 mid = ltz(srcElem1) ? -1 : 0;
2391 } else {
2392 mid = srcElem1 >> imm;
2393 if (ltz(srcElem1) && !ltz(mid)) {
2394 mid |= -(mid & ((Element)1 <<
2395 (sizeof(Element) * 8 - 1 - imm)));
2396 }
2397 }
2398 destElem += mid;
2399 '''
2400 twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True)
2401 twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True)
2402
2403 vrshrCode = '''
2404 if (imm > sizeof(srcElem1) * 8) {
2405 destElem = 0;
2406 } else if (imm) {
2407 Element rBit = bits(srcElem1, imm - 1);
2408 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2409 } else {
2410 destElem = srcElem1;
2411 }
2412 '''
2413 twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode)
2414 twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode)
2415
2416 vrsraCode = '''
2417 if (imm > sizeof(srcElem1) * 8) {
2418 destElem += 0;
2419 } else if (imm) {
2420 Element rBit = bits(srcElem1, imm - 1);
2421 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2422 } else {
2423 destElem += srcElem1;
2424 }
2425 '''
2426 twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True)
2427 twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True)
2428
2429 vsriCode = '''
2430 if (imm >= sizeof(Element) * 8)
2431 destElem = destElem;
2432 else
2433 destElem = (srcElem1 >> imm) |
2434 (destElem & ~mask(sizeof(Element) * 8 - imm));
2435 '''
2436 twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True)
2437 twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True)
2438
2439 vshlCode = '''
2440 if (imm >= sizeof(Element) * 8)
2441 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2442 else
2443 destElem = srcElem1 << imm;
2444 '''
2445 twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode)
2446 twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode)
2447
2448 vsliCode = '''
2449 if (imm >= sizeof(Element) * 8)
2450 destElem = destElem;
2451 else
2452 destElem = (srcElem1 << imm) | (destElem & mask(imm));
2453 '''
2454 twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True)
2455 twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True)
2456
2457 vqshlCode = '''
2458 FPSCR fpscr = (FPSCR) FpscrQc;
2459 if (imm >= sizeof(Element) * 8) {
2460 if (srcElem1 != 0) {
2461 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2462 if (srcElem1 > 0)
2463 destElem = ~destElem;
2464 fpscr.qc = 1;
2465 } else {
2466 destElem = 0;
2467 }
2468 } else if (imm) {
2469 destElem = (srcElem1 << imm);
2470 uint64_t topBits = bits((uint64_t)srcElem1,
2471 sizeof(Element) * 8 - 1,
2472 sizeof(Element) * 8 - 1 - imm);
2473 if (topBits != 0 && topBits != mask(imm + 1)) {
2474 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2475 if (srcElem1 > 0)
2476 destElem = ~destElem;
2477 fpscr.qc = 1;
2478 }
2479 } else {
2480 destElem = srcElem1;
2481 }
2482 FpscrQc = fpscr;
2483 '''
2484 twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode)
2485 twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode)
2486
2487 vqshluCode = '''
2488 FPSCR fpscr = (FPSCR) FpscrQc;
2489 if (imm >= sizeof(Element) * 8) {
2490 if (srcElem1 != 0) {
2491 destElem = mask(sizeof(Element) * 8);
2492 fpscr.qc = 1;
2493 } else {
2494 destElem = 0;
2495 }
2496 } else if (imm) {
2497 destElem = (srcElem1 << imm);
2498 uint64_t topBits = bits((uint64_t)srcElem1,
2499 sizeof(Element) * 8 - 1,
2500 sizeof(Element) * 8 - imm);
2501 if (topBits != 0) {
2502 destElem = mask(sizeof(Element) * 8);
2503 fpscr.qc = 1;
2504 }
2505 } else {
2506 destElem = srcElem1;
2507 }
2508 FpscrQc = fpscr;
2509 '''
2510 twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode)
2511 twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode)
2512
2513 vqshlusCode = '''
2514 FPSCR fpscr = (FPSCR) FpscrQc;
2515 if (imm >= sizeof(Element) * 8) {
2516 if (srcElem1 < 0) {
2517 destElem = 0;
2518 fpscr.qc = 1;
2519 } else if (srcElem1 > 0) {
2520 destElem = mask(sizeof(Element) * 8);
2521 fpscr.qc = 1;
2522 } else {
2523 destElem = 0;
2524 }
2525 } else if (imm) {
2526 destElem = (srcElem1 << imm);
2527 uint64_t topBits = bits((uint64_t)srcElem1,
2528 sizeof(Element) * 8 - 1,
2529 sizeof(Element) * 8 - imm);
2530 if (srcElem1 < 0) {
2531 destElem = 0;
2532 fpscr.qc = 1;
2533 } else if (topBits != 0) {
2534 destElem = mask(sizeof(Element) * 8);
2535 fpscr.qc = 1;
2536 }
2537 } else {
2538 if (srcElem1 < 0) {
2539 fpscr.qc = 1;
2540 destElem = 0;
2541 } else {
2542 destElem = srcElem1;
2543 }
2544 }
2545 FpscrQc = fpscr;
2546 '''
2547 twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode)
2548 twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode)
2549
2550 vshrnCode = '''
2551 if (imm >= sizeof(srcElem1) * 8) {
2552 destElem = 0;
2553 } else {
2554 destElem = srcElem1 >> imm;
2555 }
2556 '''
2557 twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode)
2558
2559 vrshrnCode = '''
2560 if (imm > sizeof(srcElem1) * 8) {
2561 destElem = 0;
2562 } else if (imm) {
2563 Element rBit = bits(srcElem1, imm - 1);
2564 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2565 } else {
2566 destElem = srcElem1;
2567 }
2568 '''
2569 twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode)
2570
2571 vqshrnCode = '''
2572 FPSCR fpscr = (FPSCR) FpscrQc;
2573 if (imm > sizeof(srcElem1) * 8) {
2574 if (srcElem1 != 0 && srcElem1 != -1)
2575 fpscr.qc = 1;
2576 destElem = 0;
2577 } else if (imm) {
2578 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2579 mid |= -(mid & ((BigElement)1 <<
2580 (sizeof(BigElement) * 8 - 1 - imm)));
2581 if (mid != (Element)mid) {
2582 destElem = mask(sizeof(Element) * 8 - 1);
2583 if (srcElem1 < 0)
2584 destElem = ~destElem;
2585 fpscr.qc = 1;
2586 } else {
2587 destElem = mid;
2588 }
2589 } else {
2590 destElem = srcElem1;
2591 }
2592 FpscrQc = fpscr;
2593 '''
2594 twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode)
2595
2596 vqshrunCode = '''
2597 FPSCR fpscr = (FPSCR) FpscrQc;
2598 if (imm > sizeof(srcElem1) * 8) {
2599 if (srcElem1 != 0)
2600 fpscr.qc = 1;
2601 destElem = 0;
2602 } else if (imm) {
2603 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2604 if (mid != (Element)mid) {
2605 destElem = mask(sizeof(Element) * 8);
2606 fpscr.qc = 1;
2607 } else {
2608 destElem = mid;
2609 }
2610 } else {
2611 destElem = srcElem1;
2612 }
2613 FpscrQc = fpscr;
2614 '''
2615 twoRegNarrowShiftInst("vqshrun", "NVqshrun",
2616 "SimdShiftOp", smallUnsignedTypes, vqshrunCode)
2617
2618 vqshrunsCode = '''
2619 FPSCR fpscr = (FPSCR) FpscrQc;
2620 if (imm > sizeof(srcElem1) * 8) {
2621 if (srcElem1 != 0)
2622 fpscr.qc = 1;
2623 destElem = 0;
2624 } else if (imm) {
2625 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2626 if (bits(mid, sizeof(BigElement) * 8 - 1,
2627 sizeof(Element) * 8) != 0) {
2628 if (srcElem1 < 0) {
2629 destElem = 0;
2630 } else {
2631 destElem = mask(sizeof(Element) * 8);
2632 }
2633 fpscr.qc = 1;
2634 } else {
2635 destElem = mid;
2636 }
2637 } else {
2638 destElem = srcElem1;
2639 }
2640 FpscrQc = fpscr;
2641 '''
2642 twoRegNarrowShiftInst("vqshrun", "NVqshruns",
2643 "SimdShiftOp", smallSignedTypes, vqshrunsCode)
2644
2645 vqrshrnCode = '''
2646 FPSCR fpscr = (FPSCR) FpscrQc;
2647 if (imm > sizeof(srcElem1) * 8) {
2648 if (srcElem1 != 0 && srcElem1 != -1)
2649 fpscr.qc = 1;
2650 destElem = 0;
2651 } else if (imm) {
2652 BigElement mid = (srcElem1 >> (imm - 1));
2653 uint64_t rBit = mid & 0x1;
2654 mid >>= 1;
2655 mid |= -(mid & ((BigElement)1 <<
2656 (sizeof(BigElement) * 8 - 1 - imm)));
2657 mid += rBit;
2658 if (mid != (Element)mid) {
2659 destElem = mask(sizeof(Element) * 8 - 1);
2660 if (srcElem1 < 0)
2661 destElem = ~destElem;
2662 fpscr.qc = 1;
2663 } else {
2664 destElem = mid;
2665 }
2666 } else {
2667 if (srcElem1 != (Element)srcElem1) {
2668 destElem = mask(sizeof(Element) * 8 - 1);
2669 if (srcElem1 < 0)
2670 destElem = ~destElem;
2671 fpscr.qc = 1;
2672 } else {
2673 destElem = srcElem1;
2674 }
2675 }
2676 FpscrQc = fpscr;
2677 '''
2678 twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
2679 "SimdShiftOp", smallSignedTypes, vqrshrnCode)
2680
2681 vqrshrunCode = '''
2682 FPSCR fpscr = (FPSCR) FpscrQc;
2683 if (imm > sizeof(srcElem1) * 8) {
2684 if (srcElem1 != 0)
2685 fpscr.qc = 1;
2686 destElem = 0;
2687 } else if (imm) {
2688 BigElement mid = (srcElem1 >> (imm - 1));
2689 uint64_t rBit = mid & 0x1;
2690 mid >>= 1;
2691 mid += rBit;
2692 if (mid != (Element)mid) {
2693 destElem = mask(sizeof(Element) * 8);
2694 fpscr.qc = 1;
2695 } else {
2696 destElem = mid;
2697 }
2698 } else {
2699 if (srcElem1 != (Element)srcElem1) {
2700 destElem = mask(sizeof(Element) * 8 - 1);
2701 fpscr.qc = 1;
2702 } else {
2703 destElem = srcElem1;
2704 }
2705 }
2706 FpscrQc = fpscr;
2707 '''
2708 twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
2709 "SimdShiftOp", smallUnsignedTypes, vqrshrunCode)
2710
2711 vqrshrunsCode = '''
2712 FPSCR fpscr = (FPSCR) FpscrQc;
2713 if (imm > sizeof(srcElem1) * 8) {
2714 if (srcElem1 != 0)
2715 fpscr.qc = 1;
2716 destElem = 0;
2717 } else if (imm) {
2718 BigElement mid = (srcElem1 >> (imm - 1));
2719 uint64_t rBit = mid & 0x1;
2720 mid >>= 1;
2721 mid |= -(mid & ((BigElement)1 <<
2722 (sizeof(BigElement) * 8 - 1 - imm)));
2723 mid += rBit;
2724 if (bits(mid, sizeof(BigElement) * 8 - 1,
2725 sizeof(Element) * 8) != 0) {
2726 if (srcElem1 < 0) {
2727 destElem = 0;
2728 } else {
2729 destElem = mask(sizeof(Element) * 8);
2730 }
2731 fpscr.qc = 1;
2732 } else {
2733 destElem = mid;
2734 }
2735 } else {
2736 if (srcElem1 < 0) {
2737 fpscr.qc = 1;
2738 destElem = 0;
2739 } else {
2740 destElem = srcElem1;
2741 }
2742 }
2743 FpscrQc = fpscr;
2744 '''
2745 twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
2746 "SimdShiftOp", smallSignedTypes, vqrshrunsCode)
2747
2748 vshllCode = '''
2749 if (imm >= sizeof(destElem) * 8) {
2750 destElem = 0;
2751 } else {
2752 destElem = (BigElement)srcElem1 << imm;
2753 }
2754 '''
2755 twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode)
2756
2757 vmovlCode = '''
2758 destElem = srcElem1;
2759 '''
2760 twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode)
2761
2762 vcvt2ufxCode = '''
2763 FPSCR fpscr = (FPSCR) FpscrExc;
2764 if (flushToZero(srcElem1))
2765 fpscr.idc = 1;
2766 VfpSavedState state = prepFpState(VfpRoundNearest);
2767 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2768 destReg = vfpFpSToFixed(srcElem1, false, false, imm);
2769 __asm__ __volatile__("" :: "m" (destReg));
2770 finishVfp(fpscr, state, true);
2771 FpscrExc = fpscr;
2772 '''
2773 twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",),
2774 2, vcvt2ufxCode, toInt = True)
2775 twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",),
2776 4, vcvt2ufxCode, toInt = True)
2777
2778 vcvt2sfxCode = '''
2779 FPSCR fpscr = (FPSCR) FpscrExc;
2780 if (flushToZero(srcElem1))
2781 fpscr.idc = 1;
2782 VfpSavedState state = prepFpState(VfpRoundNearest);
2783 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2784 destReg = vfpFpSToFixed(srcElem1, true, false, imm);
2785 __asm__ __volatile__("" :: "m" (destReg));
2786 finishVfp(fpscr, state, true);
2787 FpscrExc = fpscr;
2788 '''
2789 twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",),
2790 2, vcvt2sfxCode, toInt = True)
2791 twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",),
2792 4, vcvt2sfxCode, toInt = True)
2793
2794 vcvtu2fpCode = '''
2795 FPSCR fpscr = (FPSCR) FpscrExc;
2796 VfpSavedState state = prepFpState(VfpRoundNearest);
2797 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2798 destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm);
2799 __asm__ __volatile__("" :: "m" (destElem));
2800 finishVfp(fpscr, state, true);
2801 FpscrExc = fpscr;
2802 '''
2803 twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",),
2804 2, vcvtu2fpCode, fromInt = True)
2805 twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",),
2806 4, vcvtu2fpCode, fromInt = True)
2807
2808 vcvts2fpCode = '''
2809 FPSCR fpscr = (FPSCR) FpscrExc;
2810 VfpSavedState state = prepFpState(VfpRoundNearest);
2811 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2812 destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm);
2813 __asm__ __volatile__("" :: "m" (destElem));
2814 finishVfp(fpscr, state, true);
2815 FpscrExc = fpscr;
2816 '''
2817 twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",),
2818 2, vcvts2fpCode, fromInt = True)
2819 twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",),
2820 4, vcvts2fpCode, fromInt = True)
2821
2822 vcvts2hCode = '''
2823 FPSCR fpscr = (FPSCR) FpscrExc;
2824 float srcFp1 = bitsToFp(srcElem1, (float)0.0);
2825 if (flushToZero(srcFp1))
2826 fpscr.idc = 1;
2827 VfpSavedState state = prepFpState(VfpRoundNearest);
2828 __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
2829 : "m" (srcFp1), "m" (destElem));
2830 destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
2831 fpscr.ahp, srcFp1);
2832 __asm__ __volatile__("" :: "m" (destElem));
2833 finishVfp(fpscr, state, true);
2834 FpscrExc = fpscr;
2835 '''
2836 twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode)
2837
2838 vcvth2sCode = '''
2839 FPSCR fpscr = (FPSCR) FpscrExc;
2840 VfpSavedState state = prepFpState(VfpRoundNearest);
2841 __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
2842 : "m" (srcElem1), "m" (destElem));
2843 destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
2844 __asm__ __volatile__("" :: "m" (destElem));
2845 finishVfp(fpscr, state, true);
2846 FpscrExc = fpscr;
2847 '''
2848 twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
2849
2850 vrsqrteCode = '''
2851 destElem = unsignedRSqrtEstimate(srcElem1);
2852 '''
2853 twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode)
2854 twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode)
2855
2856 vrsqrtefpCode = '''
2857 FPSCR fpscr = (FPSCR) FpscrExc;
2858 if (flushToZero(srcReg1))
2859 fpscr.idc = 1;
2860 destReg = fprSqrtEstimate(fpscr, srcReg1);
2861 FpscrExc = fpscr;
2862 '''
2863 twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode)
2864 twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode)
2865
2866 vrecpeCode = '''
2867 destElem = unsignedRecipEstimate(srcElem1);
2868 '''
2869 twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode)
2870 twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode)
2871
2872 vrecpefpCode = '''
2873 FPSCR fpscr = (FPSCR) FpscrExc;
2874 if (flushToZero(srcReg1))
2875 fpscr.idc = 1;
2876 destReg = fpRecipEstimate(fpscr, srcReg1);
2877 FpscrExc = fpscr;
2878 '''
2879 twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode)
2880 twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode)
2881
2882 vrev16Code = '''
2883 destElem = srcElem1;
2884 unsigned groupSize = ((1 << 1) / sizeof(Element));
2885 unsigned reverseMask = (groupSize - 1);
2886 j = i ^ reverseMask;
2887 '''
2888 twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code)
2889 twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code)
2890 vrev32Code = '''
2891 destElem = srcElem1;
2892 unsigned groupSize = ((1 << 2) / sizeof(Element));
2893 unsigned reverseMask = (groupSize - 1);
2894 j = i ^ reverseMask;
2895 '''
2896 twoRegMiscInst("vrev32", "NVrev32D",
2897 "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code)
2898 twoRegMiscInst("vrev32", "NVrev32Q",
2899 "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code)
2900 vrev64Code = '''
2901 destElem = srcElem1;
2902 unsigned groupSize = ((1 << 3) / sizeof(Element));
2903 unsigned reverseMask = (groupSize - 1);
2904 j = i ^ reverseMask;
2905 '''
2906 twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code)
2907 twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code)
2908
2909 vpaddlCode = '''
2910 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2911 '''
2912 twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode)
2913 twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode)
2914
2915 vpadalCode = '''
2916 destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
2917 '''
2918 twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True)
2919 twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True)
2920
2921 vclsCode = '''
2922 unsigned count = 0;
2923 if (srcElem1 < 0) {
2924 srcElem1 <<= 1;
2925 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
2926 count++;
2927 srcElem1 <<= 1;
2928 }
2929 } else {
2930 srcElem1 <<= 1;
2931 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
2932 count++;
2933 srcElem1 <<= 1;
2934 }
2935 }
2936 destElem = count;
2937 '''
2938 twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode)
2939 twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode)
2940
2941 vclzCode = '''
2942 unsigned count = 0;
2943 while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
2944 count++;
2945 srcElem1 <<= 1;
2946 }
2947 destElem = count;
2948 '''
2949 twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode)
2950 twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode)
2951
2952 vcntCode = '''
2953 unsigned count = 0;
2954 while (srcElem1 && count < sizeof(Element) * 8) {
2955 count += srcElem1 & 0x1;
2956 srcElem1 >>= 1;
2957 }
2958 destElem = count;
2959 '''
2960
2961 twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode)
2962 twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode)
2963
2964 vmvnCode = '''
2965 destElem = ~srcElem1;
2966 '''
2967 twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
2968 twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
2969
2970 vqabsCode = '''
2971 FPSCR fpscr = (FPSCR) FpscrQc;
2972 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2973 fpscr.qc = 1;
2974 destElem = ~srcElem1;
2975 } else if (srcElem1 < 0) {
2976 destElem = -srcElem1;
2977 } else {
2978 destElem = srcElem1;
2979 }
2980 FpscrQc = fpscr;
2981 '''
2982 twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode)
2983 twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode)
2984
2985 vqnegCode = '''
2986 FPSCR fpscr = (FPSCR) FpscrQc;
2987 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2988 fpscr.qc = 1;
2989 destElem = ~srcElem1;
2990 } else {
2991 destElem = -srcElem1;
2992 }
2993 FpscrQc = fpscr;
2994 '''
2995 twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode)
2996 twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode)
2997
2998 vabsCode = '''
2999 if (srcElem1 < 0) {
3000 destElem = -srcElem1;
3001 } else {
3002 destElem = srcElem1;
3003 }
3004 '''
3005
3006 twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode)
3007 twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode)
3008 vabsfpCode = '''
3009 union
3010 {
3011 uint32_t i;
3012 float f;
3013 } cStruct;
3014 cStruct.f = srcReg1;
3015 cStruct.i &= mask(sizeof(Element) * 8 - 1);
3016 destReg = cStruct.f;
3017 '''
3018 twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode)
3019 twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode)
3020
3021 vnegCode = '''
3022 destElem = -srcElem1;
3023 '''
3024 twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode)
3025 twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode)
3026 vnegfpCode = '''
3027 destReg = -srcReg1;
3028 '''
3029 twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode)
3030 twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode)
3031
3032 vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3033 twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode)
3034 twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode)
3035 vcgtfpCode = '''
3036 FPSCR fpscr = (FPSCR) FpscrExc;
3037 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3038 true, true, VfpRoundNearest);
3039 destReg = (res == 0) ? -1 : 0;
3040 if (res == 2.0)
3041 fpscr.ioc = 1;
3042 FpscrExc = fpscr;
3043 '''
3044 twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",),
3045 2, vcgtfpCode, toInt = True)
3046 twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",),
3047 4, vcgtfpCode, toInt = True)
3048
3049 vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3050 twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode)
3051 twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode)
3052 vcgefpCode = '''
3053 FPSCR fpscr = (FPSCR) FpscrExc;
3054 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3055 true, true, VfpRoundNearest);
3056 destReg = (res == 0) ? -1 : 0;
3057 if (res == 2.0)
3058 fpscr.ioc = 1;
3059 FpscrExc = fpscr;
3060 '''
3061 twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",),
3062 2, vcgefpCode, toInt = True)
3063 twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",),
3064 4, vcgefpCode, toInt = True)
3065
3066 vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3067 twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode)
3068 twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode)
3069 vceqfpCode = '''
3070 FPSCR fpscr = (FPSCR) FpscrExc;
3071 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3072 true, true, VfpRoundNearest);
3073 destReg = (res == 0) ? -1 : 0;
3074 if (res == 2.0)
3075 fpscr.ioc = 1;
3076 FpscrExc = fpscr;
3077 '''
3078 twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",),
3079 2, vceqfpCode, toInt = True)
3080 twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",),
3081 4, vceqfpCode, toInt = True)
3082
3083 vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3084 twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode)
3085 twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode)
3086 vclefpCode = '''
3087 FPSCR fpscr = (FPSCR) FpscrExc;
3088 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3089 true, true, VfpRoundNearest);
3090 destReg = (res == 0) ? -1 : 0;
3091 if (res == 2.0)
3092 fpscr.ioc = 1;
3093 FpscrExc = fpscr;
3094 '''
3095 twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",),
3096 2, vclefpCode, toInt = True)
3097 twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",),
3098 4, vclefpCode, toInt = True)
3099
3100 vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3101 twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode)
3102 twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode)
3103 vcltfpCode = '''
3104 FPSCR fpscr = (FPSCR) FpscrExc;
3105 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3106 true, true, VfpRoundNearest);
3107 destReg = (res == 0) ? -1 : 0;
3108 if (res == 2.0)
3109 fpscr.ioc = 1;
3110 FpscrExc = fpscr;
3111 '''
3112 twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",),
3113 2, vcltfpCode, toInt = True)
3114 twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",),
3115 4, vcltfpCode, toInt = True)
3116
3117 vswpCode = '''
3118 FloatRegBits mid;
3119 for (unsigned r = 0; r < rCount; r++) {
3120 mid = srcReg1.regs[r];
3121 srcReg1.regs[r] = destReg.regs[r];
3122 destReg.regs[r] = mid;
3123 }
3124 '''
3125 twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode)
3126 twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode)
3127
3128 vtrnCode = '''
3129 Element mid;
3130 for (unsigned i = 0; i < eCount; i += 2) {
3131 mid = srcReg1.elements[i];
3132 srcReg1.elements[i] = destReg.elements[i + 1];
3133 destReg.elements[i + 1] = mid;
3134 }
3135 '''
3136 twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp",
3137 smallUnsignedTypes, 2, vtrnCode)
3138 twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp",
3139 smallUnsignedTypes, 4, vtrnCode)
3140
3141 vuzpCode = '''
3142 Element mid[eCount];
3143 memcpy(&mid, &srcReg1, sizeof(srcReg1));
3144 for (unsigned i = 0; i < eCount / 2; i++) {
3145 srcReg1.elements[i] = destReg.elements[2 * i + 1];
3146 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3147 destReg.elements[i] = destReg.elements[2 * i];
3148 }
3149 for (unsigned i = 0; i < eCount / 2; i++) {
3150 destReg.elements[eCount / 2 + i] = mid[2 * i];
3151 }
3152 '''
3153 twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode)
3154 twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode)
3155
3156 vzipCode = '''
3157 Element mid[eCount];
3158 memcpy(&mid, &destReg, sizeof(destReg));
3159 for (unsigned i = 0; i < eCount / 2; i++) {
3160 destReg.elements[2 * i] = mid[i];
3161 destReg.elements[2 * i + 1] = srcReg1.elements[i];
3162 }
3163 for (int i = 0; i < eCount / 2; i++) {
3164 srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3165 srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3166 }
3167 '''
3168 twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode)
3169 twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode)
3170
3171 vmovnCode = 'destElem = srcElem1;'
3172 twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode)
3173
3174 vdupCode = 'destElem = srcElem1;'
3175 twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode)
3176 twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode)
3177
3178 def vdupGprInst(name, Name, opClass, types, rCount):
3179 global header_output, exec_output
3180 eWalkCode = '''
3181 RegVect destReg;
3182 for (unsigned i = 0; i < eCount; i++) {
3183 destReg.elements[i] = htog((Element)Op1);
3184 }
3185 '''
3186 for reg in range(rCount):
3187 eWalkCode += '''
3188 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3189 ''' % { "reg" : reg }
3190 iop = InstObjParams(name, Name,
3191 "RegRegOp",
3192 { "code": eWalkCode,
3193 "r_count": rCount,
3194 "predicate_test": predicateTest,
3195 "op_class": opClass }, [])
3196 header_output += NeonRegRegOpDeclare.subst(iop)
3197 exec_output += NeonEqualRegExecute.subst(iop)
3198 for type in types:
3199 substDict = { "targs" : type,
3200 "class_name" : Name }
3201 exec_output += NeonExecDeclare.subst(substDict)
3202 vdupGprInst("vdup", "NVdupDGpr", "SimdMiscOp", smallUnsignedTypes, 2)
3203 vdupGprInst("vdup", "NVdupQGpr", "SimdMiscOp", smallUnsignedTypes, 4)
3204
3205 vmovCode = 'destElem = imm;'
3206 oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode)
3207 oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode)
3208
3209 vorrCode = 'destElem |= imm;'
3210 oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True)
3211 oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True)
3212
3213 vmvnCode = 'destElem = ~imm;'
3214 oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3215 oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3216
3217 vbicCode = 'destElem &= ~imm;'
3218 oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True)
3219 oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True)
3220
3221 vqmovnCode = '''
3222 FPSCR fpscr = (FPSCR) FpscrQc;
3223 destElem = srcElem1;
3224 if ((BigElement)destElem != srcElem1) {
3225 fpscr.qc = 1;
3226 destElem = mask(sizeof(Element) * 8 - 1);
3227 if (srcElem1 < 0)
3228 destElem = ~destElem;
3229 }
3230 FpscrQc = fpscr;
3231 '''
3232 twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode)
3233
3234 vqmovunCode = '''
3235 FPSCR fpscr = (FPSCR) FpscrQc;
3236 destElem = srcElem1;
3237 if ((BigElement)destElem != srcElem1) {
3238 fpscr.qc = 1;
3239 destElem = mask(sizeof(Element) * 8);
3240 }
3241 FpscrQc = fpscr;
3242 '''
3243 twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3244 "SimdMiscOp", smallUnsignedTypes, vqmovunCode)
3245
3246 vqmovunsCode = '''
3247 FPSCR fpscr = (FPSCR) FpscrQc;
3248 destElem = srcElem1;
3249 if (srcElem1 < 0 ||
3250 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3251 fpscr.qc = 1;
3252 destElem = mask(sizeof(Element) * 8);
3253 if (srcElem1 < 0)
3254 destElem = ~destElem;
3255 }
3256 FpscrQc = fpscr;
3257 '''
3258 twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3259 "SimdMiscOp", smallSignedTypes, vqmovunsCode)
3260
3261 def buildVext(name, Name, opClass, types, rCount, op):
3262 global header_output, exec_output
3263 eWalkCode = '''
3264 RegVect srcReg1, srcReg2, destReg;
3265 '''
3266 for reg in range(rCount):
3267 eWalkCode += simdEnabledCheckCode + '''
3268 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
3269 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
3270 ''' % { "reg" : reg }
3271 eWalkCode += op
3272 for reg in range(rCount):
3273 eWalkCode += '''
3274 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3275 ''' % { "reg" : reg }
3276 iop = InstObjParams(name, Name,
3277 "RegRegRegImmOp",
3278 { "code": eWalkCode,
3279 "r_count": rCount,
3280 "predicate_test": predicateTest,
3281 "op_class": opClass }, [])
3282 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3283 exec_output += NeonEqualRegExecute.subst(iop)
3284 for type in types:
3285 substDict = { "targs" : type,
3286 "class_name" : Name }
3287 exec_output += NeonExecDeclare.subst(substDict)
3288
3289 vextCode = '''
3290 for (unsigned i = 0; i < eCount; i++) {
3291 unsigned index = i + imm;
3292 if (index < eCount) {
3293 destReg.elements[i] = srcReg1.elements[index];
3294 } else {
3295 index -= eCount;
3296 if (index >= eCount) {
3297 if (FullSystem)
3298 fault = new UndefinedInstruction;
3299 else
3300 fault = new UndefinedInstruction(false, mnemonic);
3301 } else {
3302 destReg.elements[i] = srcReg2.elements[index];
3303 }
3304 }
3305 }
3306 '''
3307 buildVext("vext", "NVextD", "SimdMiscOp", ("uint8_t",), 2, vextCode)
3308 buildVext("vext", "NVextQ", "SimdMiscOp", ("uint8_t",), 4, vextCode)
3309
3310 def buildVtbxl(name, Name, opClass, length, isVtbl):
3311 global header_output, decoder_output, exec_output
3312 code = '''
3313 union
3314 {
3315 uint8_t bytes[32];
3316 FloatRegBits regs[8];
3317 } table;
3318
3319 union
3320 {
3321 uint8_t bytes[8];
3322 FloatRegBits regs[2];
3323 } destReg, srcReg2;
3324
3325 const unsigned length = %(length)d;
3326 const bool isVtbl = %(isVtbl)s;
3327
3328 srcReg2.regs[0] = htog(FpOp2P0_uw);
3329 srcReg2.regs[1] = htog(FpOp2P1_uw);
3330
3331 destReg.regs[0] = htog(FpDestP0_uw);
3332 destReg.regs[1] = htog(FpDestP1_uw);
3333 ''' % { "length" : length, "isVtbl" : isVtbl }
3334 for reg in range(8):
3335 if reg < length * 2:
3336 code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);\n' % \
3337 { "reg" : reg }
3338 else:
3339 code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3340 code += '''
3341 for (unsigned i = 0; i < sizeof(destReg); i++) {
3342 uint8_t index = srcReg2.bytes[i];
3343 if (index < 8 * length) {
3344 destReg.bytes[i] = table.bytes[index];
3345 } else {
3346 if (isVtbl)
3347 destReg.bytes[i] = 0;
3348 // else destReg.bytes[i] unchanged
3349 }
3350 }
3351
3352 FpDestP0_uw = gtoh(destReg.regs[0]);
3353 FpDestP1_uw = gtoh(destReg.regs[1]);
3354 '''
3355 iop = InstObjParams(name, Name,
3356 "RegRegRegOp",
3357 { "code": code,
3358 "predicate_test": predicateTest,
3359 "op_class": opClass }, [])
3360 header_output += RegRegRegOpDeclare.subst(iop)
3361 decoder_output += RegRegRegOpConstructor.subst(iop)
3362 exec_output += PredOpExecute.subst(iop)
3363
3364 buildVtbxl("vtbl", "NVtbl1", "SimdMiscOp", 1, "true")
3365 buildVtbxl("vtbl", "NVtbl2", "SimdMiscOp", 2, "true")
3366 buildVtbxl("vtbl", "NVtbl3", "SimdMiscOp", 3, "true")
3367 buildVtbxl("vtbl", "NVtbl4", "SimdMiscOp", 4, "true")
3368
3369 buildVtbxl("vtbx", "NVtbx1", "SimdMiscOp", 1, "false")
3370 buildVtbxl("vtbx", "NVtbx2", "SimdMiscOp", 2, "false")
3371 buildVtbxl("vtbx", "NVtbx3", "SimdMiscOp", 3, "false")
3372 buildVtbxl("vtbx", "NVtbx4", "SimdMiscOp", 4, "false")
3373 }};