ARM: Fix VFP enabled checks for mem instructions
[gem5.git] / src / arch / arm / isa / insts / neon.isa
1 // -*- mode:c++ -*-
2
3 // Copyright (c) 2010 ARM Limited
4 // All rights reserved
5 //
6 // The license below extends only to copyright in the software and shall
7 // not be construed as granting a license to any other intellectual
8 // property including but not limited to intellectual property relating
9 // to a hardware implementation of the functionality of the software
10 // licensed hereunder. You may use the software subject to the license
11 // terms below provided that you ensure that this notice is replicated
12 // unmodified and in its entirety in all distributions of the software,
13 // modified or unmodified, in source code or in binary form.
14 //
15 // Redistribution and use in source and binary forms, with or without
16 // modification, are permitted provided that the following conditions are
17 // met: redistributions of source code must retain the above copyright
18 // notice, this list of conditions and the following disclaimer;
19 // redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution;
22 // neither the name of the copyright holders nor the names of its
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Authors: Gabe Black
39
40 output header {{
41 template <template <typename T> class Base>
42 StaticInstPtr
43 decodeNeonUThreeUReg(unsigned size,
44 ExtMachInst machInst, IntRegIndex dest,
45 IntRegIndex op1, IntRegIndex op2)
46 {
47 switch (size) {
48 case 0:
49 return new Base<uint8_t>(machInst, dest, op1, op2);
50 case 1:
51 return new Base<uint16_t>(machInst, dest, op1, op2);
52 case 2:
53 return new Base<uint32_t>(machInst, dest, op1, op2);
54 case 3:
55 return new Base<uint64_t>(machInst, dest, op1, op2);
56 default:
57 return new Unknown(machInst);
58 }
59 }
60
61 template <template <typename T> class Base>
62 StaticInstPtr
63 decodeNeonSThreeUReg(unsigned size,
64 ExtMachInst machInst, IntRegIndex dest,
65 IntRegIndex op1, IntRegIndex op2)
66 {
67 switch (size) {
68 case 0:
69 return new Base<int8_t>(machInst, dest, op1, op2);
70 case 1:
71 return new Base<int16_t>(machInst, dest, op1, op2);
72 case 2:
73 return new Base<int32_t>(machInst, dest, op1, op2);
74 case 3:
75 return new Base<int64_t>(machInst, dest, op1, op2);
76 default:
77 return new Unknown(machInst);
78 }
79 }
80
81 template <template <typename T> class Base>
82 StaticInstPtr
83 decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84 ExtMachInst machInst, IntRegIndex dest,
85 IntRegIndex op1, IntRegIndex op2)
86 {
87 if (notSigned) {
88 return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
89 } else {
90 return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
91 }
92 }
93
94 template <template <typename T> class Base>
95 StaticInstPtr
96 decodeNeonUThreeUSReg(unsigned size,
97 ExtMachInst machInst, IntRegIndex dest,
98 IntRegIndex op1, IntRegIndex op2)
99 {
100 switch (size) {
101 case 0:
102 return new Base<uint8_t>(machInst, dest, op1, op2);
103 case 1:
104 return new Base<uint16_t>(machInst, dest, op1, op2);
105 case 2:
106 return new Base<uint32_t>(machInst, dest, op1, op2);
107 default:
108 return new Unknown(machInst);
109 }
110 }
111
112 template <template <typename T> class Base>
113 StaticInstPtr
114 decodeNeonSThreeUSReg(unsigned size,
115 ExtMachInst machInst, IntRegIndex dest,
116 IntRegIndex op1, IntRegIndex op2)
117 {
118 switch (size) {
119 case 0:
120 return new Base<int8_t>(machInst, dest, op1, op2);
121 case 1:
122 return new Base<int16_t>(machInst, dest, op1, op2);
123 case 2:
124 return new Base<int32_t>(machInst, dest, op1, op2);
125 default:
126 return new Unknown(machInst);
127 }
128 }
129
130 template <template <typename T> class Base>
131 StaticInstPtr
132 decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
133 ExtMachInst machInst, IntRegIndex dest,
134 IntRegIndex op1, IntRegIndex op2)
135 {
136 if (notSigned) {
137 return decodeNeonUThreeUSReg<Base>(
138 size, machInst, dest, op1, op2);
139 } else {
140 return decodeNeonSThreeUSReg<Base>(
141 size, machInst, dest, op1, op2);
142 }
143 }
144
145 template <template <typename T> class BaseD,
146 template <typename T> class BaseQ>
147 StaticInstPtr
148 decodeNeonUThreeSReg(bool q, unsigned size,
149 ExtMachInst machInst, IntRegIndex dest,
150 IntRegIndex op1, IntRegIndex op2)
151 {
152 if (q) {
153 return decodeNeonUThreeUSReg<BaseQ>(
154 size, machInst, dest, op1, op2);
155 } else {
156 return decodeNeonUThreeUSReg<BaseD>(
157 size, machInst, dest, op1, op2);
158 }
159 }
160
161 template <template <typename T> class BaseD,
162 template <typename T> class BaseQ>
163 StaticInstPtr
164 decodeNeonSThreeSReg(bool q, unsigned size,
165 ExtMachInst machInst, IntRegIndex dest,
166 IntRegIndex op1, IntRegIndex op2)
167 {
168 if (q) {
169 return decodeNeonSThreeUSReg<BaseQ>(
170 size, machInst, dest, op1, op2);
171 } else {
172 return decodeNeonSThreeUSReg<BaseD>(
173 size, machInst, dest, op1, op2);
174 }
175 }
176
177 template <template <typename T> class BaseD,
178 template <typename T> class BaseQ>
179 StaticInstPtr
180 decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
181 ExtMachInst machInst, IntRegIndex dest,
182 IntRegIndex op1, IntRegIndex op2)
183 {
184 if (notSigned) {
185 return decodeNeonUThreeSReg<BaseD, BaseQ>(
186 q, size, machInst, dest, op1, op2);
187 } else {
188 return decodeNeonSThreeSReg<BaseD, BaseQ>(
189 q, size, machInst, dest, op1, op2);
190 }
191 }
192
193 template <template <typename T> class BaseD,
194 template <typename T> class BaseQ>
195 StaticInstPtr
196 decodeNeonUThreeReg(bool q, unsigned size,
197 ExtMachInst machInst, IntRegIndex dest,
198 IntRegIndex op1, IntRegIndex op2)
199 {
200 if (q) {
201 return decodeNeonUThreeUReg<BaseQ>(
202 size, machInst, dest, op1, op2);
203 } else {
204 return decodeNeonUThreeUReg<BaseD>(
205 size, machInst, dest, op1, op2);
206 }
207 }
208
209 template <template <typename T> class BaseD,
210 template <typename T> class BaseQ>
211 StaticInstPtr
212 decodeNeonSThreeReg(bool q, unsigned size,
213 ExtMachInst machInst, IntRegIndex dest,
214 IntRegIndex op1, IntRegIndex op2)
215 {
216 if (q) {
217 return decodeNeonSThreeUReg<BaseQ>(
218 size, machInst, dest, op1, op2);
219 } else {
220 return decodeNeonSThreeUReg<BaseD>(
221 size, machInst, dest, op1, op2);
222 }
223 }
224
225 template <template <typename T> class BaseD,
226 template <typename T> class BaseQ>
227 StaticInstPtr
228 decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
229 ExtMachInst machInst, IntRegIndex dest,
230 IntRegIndex op1, IntRegIndex op2)
231 {
232 if (notSigned) {
233 return decodeNeonUThreeReg<BaseD, BaseQ>(
234 q, size, machInst, dest, op1, op2);
235 } else {
236 return decodeNeonSThreeReg<BaseD, BaseQ>(
237 q, size, machInst, dest, op1, op2);
238 }
239 }
240
241 template <template <typename T> class BaseD,
242 template <typename T> class BaseQ>
243 StaticInstPtr
244 decodeNeonUTwoShiftReg(bool q, unsigned size,
245 ExtMachInst machInst, IntRegIndex dest,
246 IntRegIndex op1, uint64_t imm)
247 {
248 if (q) {
249 switch (size) {
250 case 0:
251 return new BaseQ<uint8_t>(machInst, dest, op1, imm);
252 case 1:
253 return new BaseQ<uint16_t>(machInst, dest, op1, imm);
254 case 2:
255 return new BaseQ<uint32_t>(machInst, dest, op1, imm);
256 case 3:
257 return new BaseQ<uint64_t>(machInst, dest, op1, imm);
258 default:
259 return new Unknown(machInst);
260 }
261 } else {
262 switch (size) {
263 case 0:
264 return new BaseD<uint8_t>(machInst, dest, op1, imm);
265 case 1:
266 return new BaseD<uint16_t>(machInst, dest, op1, imm);
267 case 2:
268 return new BaseD<uint32_t>(machInst, dest, op1, imm);
269 case 3:
270 return new BaseD<uint64_t>(machInst, dest, op1, imm);
271 default:
272 return new Unknown(machInst);
273 }
274 }
275 }
276
277 template <template <typename T> class BaseD,
278 template <typename T> class BaseQ>
279 StaticInstPtr
280 decodeNeonSTwoShiftReg(bool q, unsigned size,
281 ExtMachInst machInst, IntRegIndex dest,
282 IntRegIndex op1, uint64_t imm)
283 {
284 if (q) {
285 switch (size) {
286 case 0:
287 return new BaseQ<int8_t>(machInst, dest, op1, imm);
288 case 1:
289 return new BaseQ<int16_t>(machInst, dest, op1, imm);
290 case 2:
291 return new BaseQ<int32_t>(machInst, dest, op1, imm);
292 case 3:
293 return new BaseQ<int64_t>(machInst, dest, op1, imm);
294 default:
295 return new Unknown(machInst);
296 }
297 } else {
298 switch (size) {
299 case 0:
300 return new BaseD<int8_t>(machInst, dest, op1, imm);
301 case 1:
302 return new BaseD<int16_t>(machInst, dest, op1, imm);
303 case 2:
304 return new BaseD<int32_t>(machInst, dest, op1, imm);
305 case 3:
306 return new BaseD<int64_t>(machInst, dest, op1, imm);
307 default:
308 return new Unknown(machInst);
309 }
310 }
311 }
312
313
314 template <template <typename T> class BaseD,
315 template <typename T> class BaseQ>
316 StaticInstPtr
317 decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
318 ExtMachInst machInst, IntRegIndex dest,
319 IntRegIndex op1, uint64_t imm)
320 {
321 if (notSigned) {
322 return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
323 q, size, machInst, dest, op1, imm);
324 } else {
325 return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
326 q, size, machInst, dest, op1, imm);
327 }
328 }
329
330 template <template <typename T> class Base>
331 StaticInstPtr
332 decodeNeonUTwoShiftUSReg(unsigned size,
333 ExtMachInst machInst, IntRegIndex dest,
334 IntRegIndex op1, uint64_t imm)
335 {
336 switch (size) {
337 case 0:
338 return new Base<uint8_t>(machInst, dest, op1, imm);
339 case 1:
340 return new Base<uint16_t>(machInst, dest, op1, imm);
341 case 2:
342 return new Base<uint32_t>(machInst, dest, op1, imm);
343 default:
344 return new Unknown(machInst);
345 }
346 }
347
348 template <template <typename T> class BaseD,
349 template <typename T> class BaseQ>
350 StaticInstPtr
351 decodeNeonUTwoShiftSReg(bool q, unsigned size,
352 ExtMachInst machInst, IntRegIndex dest,
353 IntRegIndex op1, uint64_t imm)
354 {
355 if (q) {
356 return decodeNeonUTwoShiftUSReg<BaseQ>(
357 size, machInst, dest, op1, imm);
358 } else {
359 return decodeNeonUTwoShiftUSReg<BaseD>(
360 size, machInst, dest, op1, imm);
361 }
362 }
363
364 template <template <typename T> class Base>
365 StaticInstPtr
366 decodeNeonSTwoShiftUSReg(unsigned size,
367 ExtMachInst machInst, IntRegIndex dest,
368 IntRegIndex op1, uint64_t imm)
369 {
370 switch (size) {
371 case 0:
372 return new Base<int8_t>(machInst, dest, op1, imm);
373 case 1:
374 return new Base<int16_t>(machInst, dest, op1, imm);
375 case 2:
376 return new Base<int32_t>(machInst, dest, op1, imm);
377 default:
378 return new Unknown(machInst);
379 }
380 }
381
382 template <template <typename T> class BaseD,
383 template <typename T> class BaseQ>
384 StaticInstPtr
385 decodeNeonSTwoShiftSReg(bool q, unsigned size,
386 ExtMachInst machInst, IntRegIndex dest,
387 IntRegIndex op1, uint64_t imm)
388 {
389 if (q) {
390 return decodeNeonSTwoShiftUSReg<BaseQ>(
391 size, machInst, dest, op1, imm);
392 } else {
393 return decodeNeonSTwoShiftUSReg<BaseD>(
394 size, machInst, dest, op1, imm);
395 }
396 }
397
398 template <template <typename T> class BaseD,
399 template <typename T> class BaseQ>
400 StaticInstPtr
401 decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
402 ExtMachInst machInst, IntRegIndex dest,
403 IntRegIndex op1, uint64_t imm)
404 {
405 if (notSigned) {
406 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
407 q, size, machInst, dest, op1, imm);
408 } else {
409 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
410 q, size, machInst, dest, op1, imm);
411 }
412 }
413
414 template <template <typename T> class Base>
415 StaticInstPtr
416 decodeNeonUTwoMiscUSReg(unsigned size,
417 ExtMachInst machInst, IntRegIndex dest,
418 IntRegIndex op1)
419 {
420 switch (size) {
421 case 0:
422 return new Base<uint8_t>(machInst, dest, op1);
423 case 1:
424 return new Base<uint16_t>(machInst, dest, op1);
425 case 2:
426 return new Base<uint32_t>(machInst, dest, op1);
427 default:
428 return new Unknown(machInst);
429 }
430 }
431
432 template <template <typename T> class Base>
433 StaticInstPtr
434 decodeNeonSTwoMiscUSReg(unsigned size,
435 ExtMachInst machInst, IntRegIndex dest,
436 IntRegIndex op1)
437 {
438 switch (size) {
439 case 0:
440 return new Base<int8_t>(machInst, dest, op1);
441 case 1:
442 return new Base<int16_t>(machInst, dest, op1);
443 case 2:
444 return new Base<int32_t>(machInst, dest, op1);
445 default:
446 return new Unknown(machInst);
447 }
448 }
449
450 template <template <typename T> class BaseD,
451 template <typename T> class BaseQ>
452 StaticInstPtr
453 decodeNeonUTwoMiscSReg(bool q, unsigned size,
454 ExtMachInst machInst, IntRegIndex dest,
455 IntRegIndex op1)
456 {
457 if (q) {
458 return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
459 } else {
460 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
461 }
462 }
463
464 template <template <typename T> class BaseD,
465 template <typename T> class BaseQ>
466 StaticInstPtr
467 decodeNeonSTwoMiscSReg(bool q, unsigned size,
468 ExtMachInst machInst, IntRegIndex dest,
469 IntRegIndex op1)
470 {
471 if (q) {
472 return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
473 } else {
474 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
475 }
476 }
477
478 template <template <typename T> class Base>
479 StaticInstPtr
480 decodeNeonUTwoMiscUReg(unsigned size,
481 ExtMachInst machInst, IntRegIndex dest,
482 IntRegIndex op1)
483 {
484 switch (size) {
485 case 0:
486 return new Base<uint8_t>(machInst, dest, op1);
487 case 1:
488 return new Base<uint16_t>(machInst, dest, op1);
489 case 2:
490 return new Base<uint32_t>(machInst, dest, op1);
491 case 3:
492 return new Base<uint64_t>(machInst, dest, op1);
493 default:
494 return new Unknown(machInst);
495 }
496 }
497
498 template <template <typename T> class Base>
499 StaticInstPtr
500 decodeNeonSTwoMiscUReg(unsigned size,
501 ExtMachInst machInst, IntRegIndex dest,
502 IntRegIndex op1)
503 {
504 switch (size) {
505 case 0:
506 return new Base<int8_t>(machInst, dest, op1);
507 case 1:
508 return new Base<int16_t>(machInst, dest, op1);
509 case 2:
510 return new Base<int32_t>(machInst, dest, op1);
511 case 3:
512 return new Base<int64_t>(machInst, dest, op1);
513 default:
514 return new Unknown(machInst);
515 }
516 }
517
518 template <template <typename T> class BaseD,
519 template <typename T> class BaseQ>
520 StaticInstPtr
521 decodeNeonSTwoMiscReg(bool q, unsigned size,
522 ExtMachInst machInst, IntRegIndex dest,
523 IntRegIndex op1)
524 {
525 if (q) {
526 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
527 } else {
528 return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
529 }
530 }
531
532 template <template <typename T> class BaseD,
533 template <typename T> class BaseQ>
534 StaticInstPtr
535 decodeNeonUTwoMiscReg(bool q, unsigned size,
536 ExtMachInst machInst, IntRegIndex dest,
537 IntRegIndex op1)
538 {
539 if (q) {
540 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
541 } else {
542 return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
543 }
544 }
545
546 template <template <typename T> class BaseD,
547 template <typename T> class BaseQ>
548 StaticInstPtr
549 decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
550 ExtMachInst machInst, IntRegIndex dest,
551 IntRegIndex op1)
552 {
553 if (notSigned) {
554 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
555 q, size, machInst, dest, op1);
556 } else {
557 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
558 q, size, machInst, dest, op1);
559 }
560 }
561
562 }};
563
564 output exec {{
565 static float
566 vcgtFunc(float op1, float op2)
567 {
568 if (isSnan(op1) || isSnan(op2))
569 return 2.0;
570 return (op1 > op2) ? 0.0 : 1.0;
571 }
572
573 static float
574 vcgeFunc(float op1, float op2)
575 {
576 if (isSnan(op1) || isSnan(op2))
577 return 2.0;
578 return (op1 >= op2) ? 0.0 : 1.0;
579 }
580
581 static float
582 vceqFunc(float op1, float op2)
583 {
584 if (isSnan(op1) || isSnan(op2))
585 return 2.0;
586 return (op1 == op2) ? 0.0 : 1.0;
587 }
588
589 static float
590 vcleFunc(float op1, float op2)
591 {
592 if (isSnan(op1) || isSnan(op2))
593 return 2.0;
594 return (op1 <= op2) ? 0.0 : 1.0;
595 }
596
597 static float
598 vcltFunc(float op1, float op2)
599 {
600 if (isSnan(op1) || isSnan(op2))
601 return 2.0;
602 return (op1 < op2) ? 0.0 : 1.0;
603 }
604
605 static float
606 vacgtFunc(float op1, float op2)
607 {
608 if (isSnan(op1) || isSnan(op2))
609 return 2.0;
610 return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
611 }
612
613 static float
614 vacgeFunc(float op1, float op2)
615 {
616 if (isSnan(op1) || isSnan(op2))
617 return 2.0;
618 return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
619 }
620 }};
621
622 let {{
623
624 header_output = ""
625 exec_output = ""
626
627 smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
628 unsignedTypes = smallUnsignedTypes + ("uint64_t",)
629 smallSignedTypes = ("int8_t", "int16_t", "int32_t")
630 signedTypes = smallSignedTypes + ("int64_t",)
631 smallTypes = smallUnsignedTypes + smallSignedTypes
632 allTypes = unsignedTypes + signedTypes
633
634 def threeEqualRegInst(name, Name, types, rCount, op,
635 readDest=False, pairwise=False):
636 global header_output, exec_output
637 eWalkCode = simdEnabledCheckCode + '''
638 RegVect srcReg1, srcReg2, destReg;
639 '''
640 for reg in range(rCount):
641 eWalkCode += '''
642 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
643 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
644 ''' % { "reg" : reg }
645 if readDest:
646 eWalkCode += '''
647 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
648 ''' % { "reg" : reg }
649 readDestCode = ''
650 if readDest:
651 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
652 if pairwise:
653 eWalkCode += '''
654 for (unsigned i = 0; i < eCount; i++) {
655 Element srcElem1 = gtoh(2 * i < eCount ?
656 srcReg1.elements[2 * i] :
657 srcReg2.elements[2 * i - eCount]);
658 Element srcElem2 = gtoh(2 * i < eCount ?
659 srcReg1.elements[2 * i + 1] :
660 srcReg2.elements[2 * i + 1 - eCount]);
661 Element destElem;
662 %(readDest)s
663 %(op)s
664 destReg.elements[i] = htog(destElem);
665 }
666 ''' % { "op" : op, "readDest" : readDestCode }
667 else:
668 eWalkCode += '''
669 for (unsigned i = 0; i < eCount; i++) {
670 Element srcElem1 = gtoh(srcReg1.elements[i]);
671 Element srcElem2 = gtoh(srcReg2.elements[i]);
672 Element destElem;
673 %(readDest)s
674 %(op)s
675 destReg.elements[i] = htog(destElem);
676 }
677 ''' % { "op" : op, "readDest" : readDestCode }
678 for reg in range(rCount):
679 eWalkCode += '''
680 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
681 ''' % { "reg" : reg }
682 iop = InstObjParams(name, Name,
683 "RegRegRegOp",
684 { "code": eWalkCode,
685 "r_count": rCount,
686 "predicate_test": predicateTest }, [])
687 header_output += NeonRegRegRegOpDeclare.subst(iop)
688 exec_output += NeonEqualRegExecute.subst(iop)
689 for type in types:
690 substDict = { "targs" : type,
691 "class_name" : Name }
692 exec_output += NeonExecDeclare.subst(substDict)
693
694 def threeEqualRegInstFp(name, Name, types, rCount, op,
695 readDest=False, pairwise=False, toInt=False):
696 global header_output, exec_output
697 eWalkCode = simdEnabledCheckCode + '''
698 typedef FloatReg FloatVect[rCount];
699 FloatVect srcRegs1, srcRegs2;
700 '''
701 if toInt:
702 eWalkCode += 'RegVect destRegs;\n'
703 else:
704 eWalkCode += 'FloatVect destRegs;\n'
705 for reg in range(rCount):
706 eWalkCode += '''
707 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
708 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
709 ''' % { "reg" : reg }
710 if readDest:
711 if toInt:
712 eWalkCode += '''
713 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
714 ''' % { "reg" : reg }
715 else:
716 eWalkCode += '''
717 destRegs[%(reg)d] = FpDestP%(reg)d;
718 ''' % { "reg" : reg }
719 readDestCode = ''
720 if readDest:
721 readDestCode = 'destReg = destRegs[r];'
722 destType = 'FloatReg'
723 writeDest = 'destRegs[r] = destReg;'
724 if toInt:
725 destType = 'FloatRegBits'
726 writeDest = 'destRegs.regs[r] = destReg;'
727 if pairwise:
728 eWalkCode += '''
729 for (unsigned r = 0; r < rCount; r++) {
730 FloatReg srcReg1 = (2 * r < rCount) ?
731 srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
732 FloatReg srcReg2 = (2 * r < rCount) ?
733 srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
734 %(destType)s destReg;
735 %(readDest)s
736 %(op)s
737 %(writeDest)s
738 }
739 ''' % { "op" : op,
740 "readDest" : readDestCode,
741 "destType" : destType,
742 "writeDest" : writeDest }
743 else:
744 eWalkCode += '''
745 for (unsigned r = 0; r < rCount; r++) {
746 FloatReg srcReg1 = srcRegs1[r];
747 FloatReg srcReg2 = srcRegs2[r];
748 %(destType)s destReg;
749 %(readDest)s
750 %(op)s
751 %(writeDest)s
752 }
753 ''' % { "op" : op,
754 "readDest" : readDestCode,
755 "destType" : destType,
756 "writeDest" : writeDest }
757 for reg in range(rCount):
758 if toInt:
759 eWalkCode += '''
760 FpDestP%(reg)d.uw = destRegs.regs[%(reg)d];
761 ''' % { "reg" : reg }
762 else:
763 eWalkCode += '''
764 FpDestP%(reg)d = destRegs[%(reg)d];
765 ''' % { "reg" : reg }
766 iop = InstObjParams(name, Name,
767 "FpRegRegRegOp",
768 { "code": eWalkCode,
769 "r_count": rCount,
770 "predicate_test": predicateTest }, [])
771 header_output += NeonRegRegRegOpDeclare.subst(iop)
772 exec_output += NeonEqualRegExecute.subst(iop)
773 for type in types:
774 substDict = { "targs" : type,
775 "class_name" : Name }
776 exec_output += NeonExecDeclare.subst(substDict)
777
778 def threeUnequalRegInst(name, Name, types, op,
779 bigSrc1, bigSrc2, bigDest, readDest):
780 global header_output, exec_output
781 src1Cnt = src2Cnt = destCnt = 2
782 src1Prefix = src2Prefix = destPrefix = ''
783 if bigSrc1:
784 src1Cnt = 4
785 src1Prefix = 'Big'
786 if bigSrc2:
787 src2Cnt = 4
788 src2Prefix = 'Big'
789 if bigDest:
790 destCnt = 4
791 destPrefix = 'Big'
792 eWalkCode = simdEnabledCheckCode + '''
793 %sRegVect srcReg1;
794 %sRegVect srcReg2;
795 %sRegVect destReg;
796 ''' % (src1Prefix, src2Prefix, destPrefix)
797 for reg in range(src1Cnt):
798 eWalkCode += '''
799 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
800 ''' % { "reg" : reg }
801 for reg in range(src2Cnt):
802 eWalkCode += '''
803 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
804 ''' % { "reg" : reg }
805 if readDest:
806 for reg in range(destCnt):
807 eWalkCode += '''
808 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
809 ''' % { "reg" : reg }
810 readDestCode = ''
811 if readDest:
812 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
813 eWalkCode += '''
814 for (unsigned i = 0; i < eCount; i++) {
815 %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
816 %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
817 %(destPrefix)sElement destElem;
818 %(readDest)s
819 %(op)s
820 destReg.elements[i] = htog(destElem);
821 }
822 ''' % { "op" : op, "readDest" : readDestCode,
823 "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
824 "destPrefix" : destPrefix }
825 for reg in range(destCnt):
826 eWalkCode += '''
827 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
828 ''' % { "reg" : reg }
829 iop = InstObjParams(name, Name,
830 "RegRegRegOp",
831 { "code": eWalkCode,
832 "r_count": 2,
833 "predicate_test": predicateTest }, [])
834 header_output += NeonRegRegRegOpDeclare.subst(iop)
835 exec_output += NeonUnequalRegExecute.subst(iop)
836 for type in types:
837 substDict = { "targs" : type,
838 "class_name" : Name }
839 exec_output += NeonExecDeclare.subst(substDict)
840
841 def threeRegNarrowInst(name, Name, types, op, readDest=False):
842 threeUnequalRegInst(name, Name, types, op,
843 True, True, False, readDest)
844
845 def threeRegLongInst(name, Name, types, op, readDest=False):
846 threeUnequalRegInst(name, Name, types, op,
847 False, False, True, readDest)
848
849 def threeRegWideInst(name, Name, types, op, readDest=False):
850 threeUnequalRegInst(name, Name, types, op,
851 True, False, True, readDest)
852
853 def twoEqualRegInst(name, Name, types, rCount, op, readDest=False):
854 global header_output, exec_output
855 eWalkCode = simdEnabledCheckCode + '''
856 RegVect srcReg1, srcReg2, destReg;
857 '''
858 for reg in range(rCount):
859 eWalkCode += '''
860 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
861 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
862 ''' % { "reg" : reg }
863 if readDest:
864 eWalkCode += '''
865 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
866 ''' % { "reg" : reg }
867 readDestCode = ''
868 if readDest:
869 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
870 eWalkCode += '''
871 assert(imm >= 0 && imm < eCount);
872 for (unsigned i = 0; i < eCount; i++) {
873 Element srcElem1 = gtoh(srcReg1.elements[i]);
874 Element srcElem2 = gtoh(srcReg2.elements[imm]);
875 Element destElem;
876 %(readDest)s
877 %(op)s
878 destReg.elements[i] = htog(destElem);
879 }
880 ''' % { "op" : op, "readDest" : readDestCode }
881 for reg in range(rCount):
882 eWalkCode += '''
883 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
884 ''' % { "reg" : reg }
885 iop = InstObjParams(name, Name,
886 "RegRegRegImmOp",
887 { "code": eWalkCode,
888 "r_count": rCount,
889 "predicate_test": predicateTest }, [])
890 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
891 exec_output += NeonEqualRegExecute.subst(iop)
892 for type in types:
893 substDict = { "targs" : type,
894 "class_name" : Name }
895 exec_output += NeonExecDeclare.subst(substDict)
896
897 def twoRegLongInst(name, Name, types, op, readDest=False):
898 global header_output, exec_output
899 rCount = 2
900 eWalkCode = simdEnabledCheckCode + '''
901 RegVect srcReg1, srcReg2;
902 BigRegVect destReg;
903 '''
904 for reg in range(rCount):
905 eWalkCode += '''
906 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
907 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);;
908 ''' % { "reg" : reg }
909 if readDest:
910 for reg in range(2 * rCount):
911 eWalkCode += '''
912 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
913 ''' % { "reg" : reg }
914 readDestCode = ''
915 if readDest:
916 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
917 eWalkCode += '''
918 assert(imm >= 0 && imm < eCount);
919 for (unsigned i = 0; i < eCount; i++) {
920 Element srcElem1 = gtoh(srcReg1.elements[i]);
921 Element srcElem2 = gtoh(srcReg2.elements[imm]);
922 BigElement destElem;
923 %(readDest)s
924 %(op)s
925 destReg.elements[i] = htog(destElem);
926 }
927 ''' % { "op" : op, "readDest" : readDestCode }
928 for reg in range(2 * rCount):
929 eWalkCode += '''
930 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
931 ''' % { "reg" : reg }
932 iop = InstObjParams(name, Name,
933 "RegRegRegImmOp",
934 { "code": eWalkCode,
935 "r_count": rCount,
936 "predicate_test": predicateTest }, [])
937 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
938 exec_output += NeonUnequalRegExecute.subst(iop)
939 for type in types:
940 substDict = { "targs" : type,
941 "class_name" : Name }
942 exec_output += NeonExecDeclare.subst(substDict)
943
944 def twoEqualRegInstFp(name, Name, types, rCount, op, readDest=False):
945 global header_output, exec_output
946 eWalkCode = simdEnabledCheckCode + '''
947 typedef FloatReg FloatVect[rCount];
948 FloatVect srcRegs1, srcRegs2, destRegs;
949 '''
950 for reg in range(rCount):
951 eWalkCode += '''
952 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
953 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
954 ''' % { "reg" : reg }
955 if readDest:
956 eWalkCode += '''
957 destRegs[%(reg)d] = FpDestP%(reg)d;
958 ''' % { "reg" : reg }
959 readDestCode = ''
960 if readDest:
961 readDestCode = 'destReg = destRegs[i];'
962 eWalkCode += '''
963 assert(imm >= 0 && imm < rCount);
964 for (unsigned i = 0; i < rCount; i++) {
965 FloatReg srcReg1 = srcRegs1[i];
966 FloatReg srcReg2 = srcRegs2[imm];
967 FloatReg destReg;
968 %(readDest)s
969 %(op)s
970 destRegs[i] = destReg;
971 }
972 ''' % { "op" : op, "readDest" : readDestCode }
973 for reg in range(rCount):
974 eWalkCode += '''
975 FpDestP%(reg)d = destRegs[%(reg)d];
976 ''' % { "reg" : reg }
977 iop = InstObjParams(name, Name,
978 "FpRegRegRegImmOp",
979 { "code": eWalkCode,
980 "r_count": rCount,
981 "predicate_test": predicateTest }, [])
982 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
983 exec_output += NeonEqualRegExecute.subst(iop)
984 for type in types:
985 substDict = { "targs" : type,
986 "class_name" : Name }
987 exec_output += NeonExecDeclare.subst(substDict)
988
989 def twoRegShiftInst(name, Name, types, rCount, op,
990 readDest=False, toInt=False, fromInt=False):
991 global header_output, exec_output
992 eWalkCode = simdEnabledCheckCode + '''
993 RegVect srcRegs1, destRegs;
994 '''
995 for reg in range(rCount):
996 eWalkCode += '''
997 srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
998 ''' % { "reg" : reg }
999 if readDest:
1000 eWalkCode += '''
1001 destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1002 ''' % { "reg" : reg }
1003 readDestCode = ''
1004 if readDest:
1005 readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1006 if toInt:
1007 readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1008 readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1009 if fromInt:
1010 readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);'
1011 declDest = 'Element destElem;'
1012 writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1013 if toInt:
1014 declDest = 'FloatRegBits destReg;'
1015 writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1016 eWalkCode += '''
1017 for (unsigned i = 0; i < eCount; i++) {
1018 %(readOp)s
1019 %(declDest)s
1020 %(readDest)s
1021 %(op)s
1022 %(writeDest)s
1023 }
1024 ''' % { "readOp" : readOpCode,
1025 "declDest" : declDest,
1026 "readDest" : readDestCode,
1027 "op" : op,
1028 "writeDest" : writeDestCode }
1029 for reg in range(rCount):
1030 eWalkCode += '''
1031 FpDestP%(reg)d.uw = gtoh(destRegs.regs[%(reg)d]);
1032 ''' % { "reg" : reg }
1033 iop = InstObjParams(name, Name,
1034 "RegRegImmOp",
1035 { "code": eWalkCode,
1036 "r_count": rCount,
1037 "predicate_test": predicateTest }, [])
1038 header_output += NeonRegRegImmOpDeclare.subst(iop)
1039 exec_output += NeonEqualRegExecute.subst(iop)
1040 for type in types:
1041 substDict = { "targs" : type,
1042 "class_name" : Name }
1043 exec_output += NeonExecDeclare.subst(substDict)
1044
1045 def twoRegNarrowShiftInst(name, Name, types, op, readDest=False):
1046 global header_output, exec_output
1047 eWalkCode = simdEnabledCheckCode + '''
1048 BigRegVect srcReg1;
1049 RegVect destReg;
1050 '''
1051 for reg in range(4):
1052 eWalkCode += '''
1053 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1054 ''' % { "reg" : reg }
1055 if readDest:
1056 for reg in range(2):
1057 eWalkCode += '''
1058 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1059 ''' % { "reg" : reg }
1060 readDestCode = ''
1061 if readDest:
1062 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1063 eWalkCode += '''
1064 for (unsigned i = 0; i < eCount; i++) {
1065 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1066 Element destElem;
1067 %(readDest)s
1068 %(op)s
1069 destReg.elements[i] = htog(destElem);
1070 }
1071 ''' % { "op" : op, "readDest" : readDestCode }
1072 for reg in range(2):
1073 eWalkCode += '''
1074 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1075 ''' % { "reg" : reg }
1076 iop = InstObjParams(name, Name,
1077 "RegRegImmOp",
1078 { "code": eWalkCode,
1079 "r_count": 2,
1080 "predicate_test": predicateTest }, [])
1081 header_output += NeonRegRegImmOpDeclare.subst(iop)
1082 exec_output += NeonUnequalRegExecute.subst(iop)
1083 for type in types:
1084 substDict = { "targs" : type,
1085 "class_name" : Name }
1086 exec_output += NeonExecDeclare.subst(substDict)
1087
1088 def twoRegLongShiftInst(name, Name, types, op, readDest=False):
1089 global header_output, exec_output
1090 eWalkCode = simdEnabledCheckCode + '''
1091 RegVect srcReg1;
1092 BigRegVect destReg;
1093 '''
1094 for reg in range(2):
1095 eWalkCode += '''
1096 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1097 ''' % { "reg" : reg }
1098 if readDest:
1099 for reg in range(4):
1100 eWalkCode += '''
1101 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1102 ''' % { "reg" : reg }
1103 readDestCode = ''
1104 if readDest:
1105 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1106 eWalkCode += '''
1107 for (unsigned i = 0; i < eCount; i++) {
1108 Element srcElem1 = gtoh(srcReg1.elements[i]);
1109 BigElement destElem;
1110 %(readDest)s
1111 %(op)s
1112 destReg.elements[i] = htog(destElem);
1113 }
1114 ''' % { "op" : op, "readDest" : readDestCode }
1115 for reg in range(4):
1116 eWalkCode += '''
1117 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1118 ''' % { "reg" : reg }
1119 iop = InstObjParams(name, Name,
1120 "RegRegImmOp",
1121 { "code": eWalkCode,
1122 "r_count": 2,
1123 "predicate_test": predicateTest }, [])
1124 header_output += NeonRegRegImmOpDeclare.subst(iop)
1125 exec_output += NeonUnequalRegExecute.subst(iop)
1126 for type in types:
1127 substDict = { "targs" : type,
1128 "class_name" : Name }
1129 exec_output += NeonExecDeclare.subst(substDict)
1130
1131 def twoRegMiscInst(name, Name, types, rCount, op, readDest=False):
1132 global header_output, exec_output
1133 eWalkCode = simdEnabledCheckCode + '''
1134 RegVect srcReg1, destReg;
1135 '''
1136 for reg in range(rCount):
1137 eWalkCode += '''
1138 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1139 ''' % { "reg" : reg }
1140 if readDest:
1141 eWalkCode += '''
1142 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1143 ''' % { "reg" : reg }
1144 readDestCode = ''
1145 if readDest:
1146 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1147 eWalkCode += '''
1148 for (unsigned i = 0; i < eCount; i++) {
1149 unsigned j = i;
1150 Element srcElem1 = gtoh(srcReg1.elements[i]);
1151 Element destElem;
1152 %(readDest)s
1153 %(op)s
1154 destReg.elements[j] = htog(destElem);
1155 }
1156 ''' % { "op" : op, "readDest" : readDestCode }
1157 for reg in range(rCount):
1158 eWalkCode += '''
1159 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1160 ''' % { "reg" : reg }
1161 iop = InstObjParams(name, Name,
1162 "RegRegOp",
1163 { "code": eWalkCode,
1164 "r_count": rCount,
1165 "predicate_test": predicateTest }, [])
1166 header_output += NeonRegRegOpDeclare.subst(iop)
1167 exec_output += NeonEqualRegExecute.subst(iop)
1168 for type in types:
1169 substDict = { "targs" : type,
1170 "class_name" : Name }
1171 exec_output += NeonExecDeclare.subst(substDict)
1172
1173 def twoRegMiscScInst(name, Name, types, rCount, op, readDest=False):
1174 global header_output, exec_output
1175 eWalkCode = simdEnabledCheckCode + '''
1176 RegVect srcReg1, destReg;
1177 '''
1178 for reg in range(rCount):
1179 eWalkCode += '''
1180 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1181 ''' % { "reg" : reg }
1182 if readDest:
1183 eWalkCode += '''
1184 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1185 ''' % { "reg" : reg }
1186 readDestCode = ''
1187 if readDest:
1188 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1189 eWalkCode += '''
1190 for (unsigned i = 0; i < eCount; i++) {
1191 Element srcElem1 = gtoh(srcReg1.elements[imm]);
1192 Element destElem;
1193 %(readDest)s
1194 %(op)s
1195 destReg.elements[i] = htog(destElem);
1196 }
1197 ''' % { "op" : op, "readDest" : readDestCode }
1198 for reg in range(rCount):
1199 eWalkCode += '''
1200 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1201 ''' % { "reg" : reg }
1202 iop = InstObjParams(name, Name,
1203 "RegRegImmOp",
1204 { "code": eWalkCode,
1205 "r_count": rCount,
1206 "predicate_test": predicateTest }, [])
1207 header_output += NeonRegRegImmOpDeclare.subst(iop)
1208 exec_output += NeonEqualRegExecute.subst(iop)
1209 for type in types:
1210 substDict = { "targs" : type,
1211 "class_name" : Name }
1212 exec_output += NeonExecDeclare.subst(substDict)
1213
1214 def twoRegMiscScramble(name, Name, types, rCount, op, readDest=False):
1215 global header_output, exec_output
1216 eWalkCode = simdEnabledCheckCode + '''
1217 RegVect srcReg1, destReg;
1218 '''
1219 for reg in range(rCount):
1220 eWalkCode += '''
1221 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1222 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1223 ''' % { "reg" : reg }
1224 if readDest:
1225 eWalkCode += '''
1226 ''' % { "reg" : reg }
1227 readDestCode = ''
1228 if readDest:
1229 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1230 eWalkCode += op
1231 for reg in range(rCount):
1232 eWalkCode += '''
1233 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1234 FpOp1P%(reg)d.uw = gtoh(srcReg1.regs[%(reg)d]);
1235 ''' % { "reg" : reg }
1236 iop = InstObjParams(name, Name,
1237 "RegRegOp",
1238 { "code": eWalkCode,
1239 "r_count": rCount,
1240 "predicate_test": predicateTest }, [])
1241 header_output += NeonRegRegOpDeclare.subst(iop)
1242 exec_output += NeonEqualRegExecute.subst(iop)
1243 for type in types:
1244 substDict = { "targs" : type,
1245 "class_name" : Name }
1246 exec_output += NeonExecDeclare.subst(substDict)
1247
1248 def twoRegMiscInstFp(name, Name, types, rCount, op,
1249 readDest=False, toInt=False):
1250 global header_output, exec_output
1251 eWalkCode = simdEnabledCheckCode + '''
1252 typedef FloatReg FloatVect[rCount];
1253 FloatVect srcRegs1;
1254 '''
1255 if toInt:
1256 eWalkCode += 'RegVect destRegs;\n'
1257 else:
1258 eWalkCode += 'FloatVect destRegs;\n'
1259 for reg in range(rCount):
1260 eWalkCode += '''
1261 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1262 ''' % { "reg" : reg }
1263 if readDest:
1264 if toInt:
1265 eWalkCode += '''
1266 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1267 ''' % { "reg" : reg }
1268 else:
1269 eWalkCode += '''
1270 destRegs[%(reg)d] = FpDestP%(reg)d;
1271 ''' % { "reg" : reg }
1272 readDestCode = ''
1273 if readDest:
1274 readDestCode = 'destReg = destRegs[i];'
1275 destType = 'FloatReg'
1276 writeDest = 'destRegs[r] = destReg;'
1277 if toInt:
1278 destType = 'FloatRegBits'
1279 writeDest = 'destRegs.regs[r] = destReg;'
1280 eWalkCode += '''
1281 for (unsigned r = 0; r < rCount; r++) {
1282 FloatReg srcReg1 = srcRegs1[r];
1283 %(destType)s destReg;
1284 %(readDest)s
1285 %(op)s
1286 %(writeDest)s
1287 }
1288 ''' % { "op" : op,
1289 "readDest" : readDestCode,
1290 "destType" : destType,
1291 "writeDest" : writeDest }
1292 for reg in range(rCount):
1293 if toInt:
1294 eWalkCode += '''
1295 FpDestP%(reg)d.uw = destRegs.regs[%(reg)d];
1296 ''' % { "reg" : reg }
1297 else:
1298 eWalkCode += '''
1299 FpDestP%(reg)d = destRegs[%(reg)d];
1300 ''' % { "reg" : reg }
1301 iop = InstObjParams(name, Name,
1302 "FpRegRegOp",
1303 { "code": eWalkCode,
1304 "r_count": rCount,
1305 "predicate_test": predicateTest }, [])
1306 header_output += NeonRegRegOpDeclare.subst(iop)
1307 exec_output += NeonEqualRegExecute.subst(iop)
1308 for type in types:
1309 substDict = { "targs" : type,
1310 "class_name" : Name }
1311 exec_output += NeonExecDeclare.subst(substDict)
1312
1313 def twoRegCondenseInst(name, Name, types, rCount, op, readDest=False):
1314 global header_output, exec_output
1315 eWalkCode = simdEnabledCheckCode + '''
1316 RegVect srcRegs;
1317 BigRegVect destReg;
1318 '''
1319 for reg in range(rCount):
1320 eWalkCode += '''
1321 srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1322 ''' % { "reg" : reg }
1323 if readDest:
1324 eWalkCode += '''
1325 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1326 ''' % { "reg" : reg }
1327 readDestCode = ''
1328 if readDest:
1329 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1330 eWalkCode += '''
1331 for (unsigned i = 0; i < eCount / 2; i++) {
1332 Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1333 Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1334 BigElement destElem;
1335 %(readDest)s
1336 %(op)s
1337 destReg.elements[i] = htog(destElem);
1338 }
1339 ''' % { "op" : op, "readDest" : readDestCode }
1340 for reg in range(rCount):
1341 eWalkCode += '''
1342 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1343 ''' % { "reg" : reg }
1344 iop = InstObjParams(name, Name,
1345 "RegRegOp",
1346 { "code": eWalkCode,
1347 "r_count": rCount,
1348 "predicate_test": predicateTest }, [])
1349 header_output += NeonRegRegOpDeclare.subst(iop)
1350 exec_output += NeonUnequalRegExecute.subst(iop)
1351 for type in types:
1352 substDict = { "targs" : type,
1353 "class_name" : Name }
1354 exec_output += NeonExecDeclare.subst(substDict)
1355
1356 def twoRegNarrowMiscInst(name, Name, types, op, readDest=False):
1357 global header_output, exec_output
1358 eWalkCode = simdEnabledCheckCode + '''
1359 BigRegVect srcReg1;
1360 RegVect destReg;
1361 '''
1362 for reg in range(4):
1363 eWalkCode += '''
1364 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1365 ''' % { "reg" : reg }
1366 if readDest:
1367 for reg in range(2):
1368 eWalkCode += '''
1369 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1370 ''' % { "reg" : reg }
1371 readDestCode = ''
1372 if readDest:
1373 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1374 eWalkCode += '''
1375 for (unsigned i = 0; i < eCount; i++) {
1376 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1377 Element destElem;
1378 %(readDest)s
1379 %(op)s
1380 destReg.elements[i] = htog(destElem);
1381 }
1382 ''' % { "op" : op, "readDest" : readDestCode }
1383 for reg in range(2):
1384 eWalkCode += '''
1385 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1386 ''' % { "reg" : reg }
1387 iop = InstObjParams(name, Name,
1388 "RegRegOp",
1389 { "code": eWalkCode,
1390 "r_count": 2,
1391 "predicate_test": predicateTest }, [])
1392 header_output += NeonRegRegOpDeclare.subst(iop)
1393 exec_output += NeonUnequalRegExecute.subst(iop)
1394 for type in types:
1395 substDict = { "targs" : type,
1396 "class_name" : Name }
1397 exec_output += NeonExecDeclare.subst(substDict)
1398
1399 def oneRegImmInst(name, Name, types, rCount, op, readDest=False):
1400 global header_output, exec_output
1401 eWalkCode = simdEnabledCheckCode + '''
1402 RegVect destReg;
1403 '''
1404 if readDest:
1405 for reg in range(rCount):
1406 eWalkCode += '''
1407 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1408 ''' % { "reg" : reg }
1409 readDestCode = ''
1410 if readDest:
1411 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1412 eWalkCode += '''
1413 for (unsigned i = 0; i < eCount; i++) {
1414 Element destElem;
1415 %(readDest)s
1416 %(op)s
1417 destReg.elements[i] = htog(destElem);
1418 }
1419 ''' % { "op" : op, "readDest" : readDestCode }
1420 for reg in range(rCount):
1421 eWalkCode += '''
1422 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1423 ''' % { "reg" : reg }
1424 iop = InstObjParams(name, Name,
1425 "RegImmOp",
1426 { "code": eWalkCode,
1427 "r_count": rCount,
1428 "predicate_test": predicateTest }, [])
1429 header_output += NeonRegImmOpDeclare.subst(iop)
1430 exec_output += NeonEqualRegExecute.subst(iop)
1431 for type in types:
1432 substDict = { "targs" : type,
1433 "class_name" : Name }
1434 exec_output += NeonExecDeclare.subst(substDict)
1435
1436 def twoRegLongMiscInst(name, Name, types, op, readDest=False):
1437 global header_output, exec_output
1438 eWalkCode = simdEnabledCheckCode + '''
1439 RegVect srcReg1;
1440 BigRegVect destReg;
1441 '''
1442 for reg in range(2):
1443 eWalkCode += '''
1444 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1445 ''' % { "reg" : reg }
1446 if readDest:
1447 for reg in range(4):
1448 eWalkCode += '''
1449 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1450 ''' % { "reg" : reg }
1451 readDestCode = ''
1452 if readDest:
1453 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1454 eWalkCode += '''
1455 for (unsigned i = 0; i < eCount; i++) {
1456 Element srcElem1 = gtoh(srcReg1.elements[i]);
1457 BigElement destElem;
1458 %(readDest)s
1459 %(op)s
1460 destReg.elements[i] = htog(destElem);
1461 }
1462 ''' % { "op" : op, "readDest" : readDestCode }
1463 for reg in range(4):
1464 eWalkCode += '''
1465 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1466 ''' % { "reg" : reg }
1467 iop = InstObjParams(name, Name,
1468 "RegRegOp",
1469 { "code": eWalkCode,
1470 "r_count": 2,
1471 "predicate_test": predicateTest }, [])
1472 header_output += NeonRegRegOpDeclare.subst(iop)
1473 exec_output += NeonUnequalRegExecute.subst(iop)
1474 for type in types:
1475 substDict = { "targs" : type,
1476 "class_name" : Name }
1477 exec_output += NeonExecDeclare.subst(substDict)
1478
1479 vhaddCode = '''
1480 Element carryBit =
1481 (((unsigned)srcElem1 & 0x1) +
1482 ((unsigned)srcElem2 & 0x1)) >> 1;
1483 // Use division instead of a shift to ensure the sign extension works
1484 // right. The compiler will figure out if it can be a shift. Mask the
1485 // inputs so they get truncated correctly.
1486 destElem = (((srcElem1 & ~(Element)1) / 2) +
1487 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1488 '''
1489 threeEqualRegInst("vhadd", "VhaddD", allTypes, 2, vhaddCode)
1490 threeEqualRegInst("vhadd", "VhaddQ", allTypes, 4, vhaddCode)
1491
1492 vrhaddCode = '''
1493 Element carryBit =
1494 (((unsigned)srcElem1 & 0x1) +
1495 ((unsigned)srcElem2 & 0x1) + 1) >> 1;
1496 // Use division instead of a shift to ensure the sign extension works
1497 // right. The compiler will figure out if it can be a shift. Mask the
1498 // inputs so they get truncated correctly.
1499 destElem = (((srcElem1 & ~(Element)1) / 2) +
1500 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1501 '''
1502 threeEqualRegInst("vrhadd", "VrhaddD", allTypes, 2, vrhaddCode)
1503 threeEqualRegInst("vrhadd", "VrhaddQ", allTypes, 4, vrhaddCode)
1504
1505 vhsubCode = '''
1506 Element barrowBit =
1507 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
1508 // Use division instead of a shift to ensure the sign extension works
1509 // right. The compiler will figure out if it can be a shift. Mask the
1510 // inputs so they get truncated correctly.
1511 destElem = (((srcElem1 & ~(Element)1) / 2) -
1512 ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
1513 '''
1514 threeEqualRegInst("vhsub", "VhsubD", allTypes, 2, vhsubCode)
1515 threeEqualRegInst("vhsub", "VhsubQ", allTypes, 4, vhsubCode)
1516
1517 vandCode = '''
1518 destElem = srcElem1 & srcElem2;
1519 '''
1520 threeEqualRegInst("vand", "VandD", unsignedTypes, 2, vandCode)
1521 threeEqualRegInst("vand", "VandQ", unsignedTypes, 4, vandCode)
1522
1523 vbicCode = '''
1524 destElem = srcElem1 & ~srcElem2;
1525 '''
1526 threeEqualRegInst("vbic", "VbicD", unsignedTypes, 2, vbicCode)
1527 threeEqualRegInst("vbic", "VbicQ", unsignedTypes, 4, vbicCode)
1528
1529 vorrCode = '''
1530 destElem = srcElem1 | srcElem2;
1531 '''
1532 threeEqualRegInst("vorr", "VorrD", unsignedTypes, 2, vorrCode)
1533 threeEqualRegInst("vorr", "VorrQ", unsignedTypes, 4, vorrCode)
1534
1535 threeEqualRegInst("vmov", "VmovD", unsignedTypes, 2, vorrCode)
1536 threeEqualRegInst("vmov", "VmovQ", unsignedTypes, 4, vorrCode)
1537
1538 vornCode = '''
1539 destElem = srcElem1 | ~srcElem2;
1540 '''
1541 threeEqualRegInst("vorn", "VornD", unsignedTypes, 2, vornCode)
1542 threeEqualRegInst("vorn", "VornQ", unsignedTypes, 4, vornCode)
1543
1544 veorCode = '''
1545 destElem = srcElem1 ^ srcElem2;
1546 '''
1547 threeEqualRegInst("veor", "VeorD", unsignedTypes, 2, veorCode)
1548 threeEqualRegInst("veor", "VeorQ", unsignedTypes, 4, veorCode)
1549
1550 vbifCode = '''
1551 destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
1552 '''
1553 threeEqualRegInst("vbif", "VbifD", unsignedTypes, 2, vbifCode, True)
1554 threeEqualRegInst("vbif", "VbifQ", unsignedTypes, 4, vbifCode, True)
1555 vbitCode = '''
1556 destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
1557 '''
1558 threeEqualRegInst("vbit", "VbitD", unsignedTypes, 2, vbitCode, True)
1559 threeEqualRegInst("vbit", "VbitQ", unsignedTypes, 4, vbitCode, True)
1560 vbslCode = '''
1561 destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
1562 '''
1563 threeEqualRegInst("vbsl", "VbslD", unsignedTypes, 2, vbslCode, True)
1564 threeEqualRegInst("vbsl", "VbslQ", unsignedTypes, 4, vbslCode, True)
1565
1566 vmaxCode = '''
1567 destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
1568 '''
1569 threeEqualRegInst("vmax", "VmaxD", allTypes, 2, vmaxCode)
1570 threeEqualRegInst("vmax", "VmaxQ", allTypes, 4, vmaxCode)
1571
1572 vminCode = '''
1573 destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
1574 '''
1575 threeEqualRegInst("vmin", "VminD", allTypes, 2, vminCode)
1576 threeEqualRegInst("vmin", "VminQ", allTypes, 4, vminCode)
1577
1578 vaddCode = '''
1579 destElem = srcElem1 + srcElem2;
1580 '''
1581 threeEqualRegInst("vadd", "NVaddD", unsignedTypes, 2, vaddCode)
1582 threeEqualRegInst("vadd", "NVaddQ", unsignedTypes, 4, vaddCode)
1583
1584 threeEqualRegInst("vpadd", "NVpaddD", unsignedTypes,
1585 2, vaddCode, pairwise=True)
1586 threeEqualRegInst("vpadd", "NVpaddQ", unsignedTypes,
1587 4, vaddCode, pairwise=True)
1588 vaddlwCode = '''
1589 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
1590 '''
1591 threeRegLongInst("vaddl", "Vaddl", smallTypes, vaddlwCode)
1592 threeRegWideInst("vaddw", "Vaddw", smallTypes, vaddlwCode)
1593 vaddhnCode = '''
1594 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
1595 (sizeof(Element) * 8);
1596 '''
1597 threeRegNarrowInst("vaddhn", "Vaddhn", smallTypes, vaddhnCode)
1598 vraddhnCode = '''
1599 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
1600 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1601 (sizeof(Element) * 8);
1602 '''
1603 threeRegNarrowInst("vraddhn", "Vraddhn", smallTypes, vraddhnCode)
1604
1605 vsubCode = '''
1606 destElem = srcElem1 - srcElem2;
1607 '''
1608 threeEqualRegInst("vsub", "NVsubD", unsignedTypes, 2, vsubCode)
1609 threeEqualRegInst("vsub", "NVsubQ", unsignedTypes, 4, vsubCode)
1610 vsublwCode = '''
1611 destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
1612 '''
1613 threeRegLongInst("vsubl", "Vsubl", smallTypes, vsublwCode)
1614 threeRegWideInst("vsubw", "Vsubw", smallTypes, vsublwCode)
1615
1616 vqaddUCode = '''
1617 destElem = srcElem1 + srcElem2;
1618 FPSCR fpscr = (FPSCR)Fpscr;
1619 if (destElem < srcElem1 || destElem < srcElem2) {
1620 destElem = (Element)(-1);
1621 fpscr.qc = 1;
1622 }
1623 Fpscr = fpscr;
1624 '''
1625 threeEqualRegInst("vqadd", "VqaddUD", unsignedTypes, 2, vqaddUCode)
1626 threeEqualRegInst("vqadd", "VqaddUQ", unsignedTypes, 4, vqaddUCode)
1627 vsubhnCode = '''
1628 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
1629 (sizeof(Element) * 8);
1630 '''
1631 threeRegNarrowInst("vsubhn", "Vsubhn", smallTypes, vsubhnCode)
1632 vrsubhnCode = '''
1633 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
1634 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1635 (sizeof(Element) * 8);
1636 '''
1637 threeRegNarrowInst("vrsubhn", "Vrsubhn", smallTypes, vrsubhnCode)
1638
1639 vqaddSCode = '''
1640 destElem = srcElem1 + srcElem2;
1641 FPSCR fpscr = (FPSCR)Fpscr;
1642 bool negDest = (destElem < 0);
1643 bool negSrc1 = (srcElem1 < 0);
1644 bool negSrc2 = (srcElem2 < 0);
1645 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
1646 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1647 if (negDest)
1648 destElem -= 1;
1649 fpscr.qc = 1;
1650 }
1651 Fpscr = fpscr;
1652 '''
1653 threeEqualRegInst("vqadd", "VqaddSD", signedTypes, 2, vqaddSCode)
1654 threeEqualRegInst("vqadd", "VqaddSQ", signedTypes, 4, vqaddSCode)
1655
1656 vqsubUCode = '''
1657 destElem = srcElem1 - srcElem2;
1658 FPSCR fpscr = (FPSCR)Fpscr;
1659 if (destElem > srcElem1) {
1660 destElem = 0;
1661 fpscr.qc = 1;
1662 }
1663 Fpscr = fpscr;
1664 '''
1665 threeEqualRegInst("vqsub", "VqsubUD", unsignedTypes, 2, vqsubUCode)
1666 threeEqualRegInst("vqsub", "VqsubUQ", unsignedTypes, 4, vqsubUCode)
1667
1668 vqsubSCode = '''
1669 destElem = srcElem1 - srcElem2;
1670 FPSCR fpscr = (FPSCR)Fpscr;
1671 bool negDest = (destElem < 0);
1672 bool negSrc1 = (srcElem1 < 0);
1673 bool posSrc2 = (srcElem2 >= 0);
1674 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
1675 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1676 if (negDest)
1677 destElem -= 1;
1678 fpscr.qc = 1;
1679 }
1680 Fpscr = fpscr;
1681 '''
1682 threeEqualRegInst("vqsub", "VqsubSD", signedTypes, 2, vqsubSCode)
1683 threeEqualRegInst("vqsub", "VqsubSQ", signedTypes, 4, vqsubSCode)
1684
1685 vcgtCode = '''
1686 destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;
1687 '''
1688 threeEqualRegInst("vcgt", "VcgtD", allTypes, 2, vcgtCode)
1689 threeEqualRegInst("vcgt", "VcgtQ", allTypes, 4, vcgtCode)
1690
1691 vcgeCode = '''
1692 destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
1693 '''
1694 threeEqualRegInst("vcge", "VcgeD", allTypes, 2, vcgeCode)
1695 threeEqualRegInst("vcge", "VcgeQ", allTypes, 4, vcgeCode)
1696
1697 vceqCode = '''
1698 destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;
1699 '''
1700 threeEqualRegInst("vceq", "VceqD", unsignedTypes, 2, vceqCode)
1701 threeEqualRegInst("vceq", "VceqQ", unsignedTypes, 4, vceqCode)
1702
1703 vshlCode = '''
1704 int16_t shiftAmt = (int8_t)srcElem2;
1705 if (shiftAmt < 0) {
1706 shiftAmt = -shiftAmt;
1707 if (shiftAmt >= sizeof(Element) * 8) {
1708 shiftAmt = sizeof(Element) * 8 - 1;
1709 destElem = 0;
1710 } else {
1711 destElem = (srcElem1 >> shiftAmt);
1712 }
1713 // Make sure the right shift sign extended when it should.
1714 if (ltz(srcElem1) && !ltz(destElem)) {
1715 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1716 1 - shiftAmt));
1717 }
1718 } else {
1719 if (shiftAmt >= sizeof(Element) * 8) {
1720 destElem = 0;
1721 } else {
1722 destElem = srcElem1 << shiftAmt;
1723 }
1724 }
1725 '''
1726 threeEqualRegInst("vshl", "VshlD", allTypes, 2, vshlCode)
1727 threeEqualRegInst("vshl", "VshlQ", allTypes, 4, vshlCode)
1728
1729 vrshlCode = '''
1730 int16_t shiftAmt = (int8_t)srcElem2;
1731 if (shiftAmt < 0) {
1732 shiftAmt = -shiftAmt;
1733 Element rBit = 0;
1734 if (shiftAmt <= sizeof(Element) * 8)
1735 rBit = bits(srcElem1, shiftAmt - 1);
1736 if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
1737 rBit = 1;
1738 if (shiftAmt >= sizeof(Element) * 8) {
1739 shiftAmt = sizeof(Element) * 8 - 1;
1740 destElem = 0;
1741 } else {
1742 destElem = (srcElem1 >> shiftAmt);
1743 }
1744 // Make sure the right shift sign extended when it should.
1745 if (ltz(srcElem1) && !ltz(destElem)) {
1746 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1747 1 - shiftAmt));
1748 }
1749 destElem += rBit;
1750 } else if (shiftAmt > 0) {
1751 if (shiftAmt >= sizeof(Element) * 8) {
1752 destElem = 0;
1753 } else {
1754 destElem = srcElem1 << shiftAmt;
1755 }
1756 } else {
1757 destElem = srcElem1;
1758 }
1759 '''
1760 threeEqualRegInst("vrshl", "VrshlD", allTypes, 2, vrshlCode)
1761 threeEqualRegInst("vrshl", "VrshlQ", allTypes, 4, vrshlCode)
1762
1763 vqshlUCode = '''
1764 int16_t shiftAmt = (int8_t)srcElem2;
1765 FPSCR fpscr = (FPSCR)Fpscr;
1766 if (shiftAmt < 0) {
1767 shiftAmt = -shiftAmt;
1768 if (shiftAmt >= sizeof(Element) * 8) {
1769 shiftAmt = sizeof(Element) * 8 - 1;
1770 destElem = 0;
1771 } else {
1772 destElem = (srcElem1 >> shiftAmt);
1773 }
1774 } else if (shiftAmt > 0) {
1775 if (shiftAmt >= sizeof(Element) * 8) {
1776 if (srcElem1 != 0) {
1777 destElem = mask(sizeof(Element) * 8);
1778 fpscr.qc = 1;
1779 } else {
1780 destElem = 0;
1781 }
1782 } else {
1783 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1784 sizeof(Element) * 8 - shiftAmt)) {
1785 destElem = mask(sizeof(Element) * 8);
1786 fpscr.qc = 1;
1787 } else {
1788 destElem = srcElem1 << shiftAmt;
1789 }
1790 }
1791 } else {
1792 destElem = srcElem1;
1793 }
1794 Fpscr = fpscr;
1795 '''
1796 threeEqualRegInst("vqshl", "VqshlUD", unsignedTypes, 2, vqshlUCode)
1797 threeEqualRegInst("vqshl", "VqshlUQ", unsignedTypes, 4, vqshlUCode)
1798
1799 vqshlSCode = '''
1800 int16_t shiftAmt = (int8_t)srcElem2;
1801 FPSCR fpscr = (FPSCR)Fpscr;
1802 if (shiftAmt < 0) {
1803 shiftAmt = -shiftAmt;
1804 if (shiftAmt >= sizeof(Element) * 8) {
1805 shiftAmt = sizeof(Element) * 8 - 1;
1806 destElem = 0;
1807 } else {
1808 destElem = (srcElem1 >> shiftAmt);
1809 }
1810 // Make sure the right shift sign extended when it should.
1811 if (srcElem1 < 0 && destElem >= 0) {
1812 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1813 1 - shiftAmt));
1814 }
1815 } else if (shiftAmt > 0) {
1816 bool sat = false;
1817 if (shiftAmt >= sizeof(Element) * 8) {
1818 if (srcElem1 != 0)
1819 sat = true;
1820 else
1821 destElem = 0;
1822 } else {
1823 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1824 sizeof(Element) * 8 - 1 - shiftAmt) !=
1825 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1826 sat = true;
1827 } else {
1828 destElem = srcElem1 << shiftAmt;
1829 }
1830 }
1831 if (sat) {
1832 fpscr.qc = 1;
1833 destElem = mask(sizeof(Element) * 8 - 1);
1834 if (srcElem1 < 0)
1835 destElem = ~destElem;
1836 }
1837 } else {
1838 destElem = srcElem1;
1839 }
1840 Fpscr = fpscr;
1841 '''
1842 threeEqualRegInst("vqshl", "VqshlSD", signedTypes, 2, vqshlSCode)
1843 threeEqualRegInst("vqshl", "VqshlSQ", signedTypes, 4, vqshlSCode)
1844
1845 vqrshlUCode = '''
1846 int16_t shiftAmt = (int8_t)srcElem2;
1847 FPSCR fpscr = (FPSCR)Fpscr;
1848 if (shiftAmt < 0) {
1849 shiftAmt = -shiftAmt;
1850 Element rBit = 0;
1851 if (shiftAmt <= sizeof(Element) * 8)
1852 rBit = bits(srcElem1, shiftAmt - 1);
1853 if (shiftAmt >= sizeof(Element) * 8) {
1854 shiftAmt = sizeof(Element) * 8 - 1;
1855 destElem = 0;
1856 } else {
1857 destElem = (srcElem1 >> shiftAmt);
1858 }
1859 destElem += rBit;
1860 } else {
1861 if (shiftAmt >= sizeof(Element) * 8) {
1862 if (srcElem1 != 0) {
1863 destElem = mask(sizeof(Element) * 8);
1864 fpscr.qc = 1;
1865 } else {
1866 destElem = 0;
1867 }
1868 } else {
1869 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1870 sizeof(Element) * 8 - shiftAmt)) {
1871 destElem = mask(sizeof(Element) * 8);
1872 fpscr.qc = 1;
1873 } else {
1874 destElem = srcElem1 << shiftAmt;
1875 }
1876 }
1877 }
1878 Fpscr = fpscr;
1879 '''
1880 threeEqualRegInst("vqrshl", "VqrshlUD", unsignedTypes, 2, vqrshlUCode)
1881 threeEqualRegInst("vqrshl", "VqrshlUQ", unsignedTypes, 4, vqrshlUCode)
1882
1883 vqrshlSCode = '''
1884 int16_t shiftAmt = (int8_t)srcElem2;
1885 FPSCR fpscr = (FPSCR)Fpscr;
1886 if (shiftAmt < 0) {
1887 shiftAmt = -shiftAmt;
1888 Element rBit = 0;
1889 if (shiftAmt <= sizeof(Element) * 8)
1890 rBit = bits(srcElem1, shiftAmt - 1);
1891 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
1892 rBit = 1;
1893 if (shiftAmt >= sizeof(Element) * 8) {
1894 shiftAmt = sizeof(Element) * 8 - 1;
1895 destElem = 0;
1896 } else {
1897 destElem = (srcElem1 >> shiftAmt);
1898 }
1899 // Make sure the right shift sign extended when it should.
1900 if (srcElem1 < 0 && destElem >= 0) {
1901 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1902 1 - shiftAmt));
1903 }
1904 destElem += rBit;
1905 } else if (shiftAmt > 0) {
1906 bool sat = false;
1907 if (shiftAmt >= sizeof(Element) * 8) {
1908 if (srcElem1 != 0)
1909 sat = true;
1910 else
1911 destElem = 0;
1912 } else {
1913 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1914 sizeof(Element) * 8 - 1 - shiftAmt) !=
1915 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1916 sat = true;
1917 } else {
1918 destElem = srcElem1 << shiftAmt;
1919 }
1920 }
1921 if (sat) {
1922 fpscr.qc = 1;
1923 destElem = mask(sizeof(Element) * 8 - 1);
1924 if (srcElem1 < 0)
1925 destElem = ~destElem;
1926 }
1927 } else {
1928 destElem = srcElem1;
1929 }
1930 Fpscr = fpscr;
1931 '''
1932 threeEqualRegInst("vqrshl", "VqrshlSD", signedTypes, 2, vqrshlSCode)
1933 threeEqualRegInst("vqrshl", "VqrshlSQ", signedTypes, 4, vqrshlSCode)
1934
1935 vabaCode = '''
1936 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1937 (srcElem2 - srcElem1);
1938 '''
1939 threeEqualRegInst("vaba", "VabaD", allTypes, 2, vabaCode, True)
1940 threeEqualRegInst("vaba", "VabaQ", allTypes, 4, vabaCode, True)
1941 vabalCode = '''
1942 destElem += (srcElem1 > srcElem2) ?
1943 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1944 ((BigElement)srcElem2 - (BigElement)srcElem1);
1945 '''
1946 threeRegLongInst("vabal", "Vabal", smallTypes, vabalCode, True)
1947
1948 vabdCode = '''
1949 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1950 (srcElem2 - srcElem1);
1951 '''
1952 threeEqualRegInst("vabd", "VabdD", allTypes, 2, vabdCode)
1953 threeEqualRegInst("vabd", "VabdQ", allTypes, 4, vabdCode)
1954 vabdlCode = '''
1955 destElem = (srcElem1 > srcElem2) ?
1956 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1957 ((BigElement)srcElem2 - (BigElement)srcElem1);
1958 '''
1959 threeRegLongInst("vabdl", "Vabdl", smallTypes, vabdlCode)
1960
1961 vtstCode = '''
1962 destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
1963 '''
1964 threeEqualRegInst("vtst", "VtstD", unsignedTypes, 2, vtstCode)
1965 threeEqualRegInst("vtst", "VtstQ", unsignedTypes, 4, vtstCode)
1966
1967 vmulCode = '''
1968 destElem = srcElem1 * srcElem2;
1969 '''
1970 threeEqualRegInst("vmul", "NVmulD", allTypes, 2, vmulCode)
1971 threeEqualRegInst("vmul", "NVmulQ", allTypes, 4, vmulCode)
1972 vmullCode = '''
1973 destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
1974 '''
1975 threeRegLongInst("vmull", "Vmull", smallTypes, vmullCode)
1976
1977 vmlaCode = '''
1978 destElem = destElem + srcElem1 * srcElem2;
1979 '''
1980 threeEqualRegInst("vmla", "NVmlaD", allTypes, 2, vmlaCode, True)
1981 threeEqualRegInst("vmla", "NVmlaQ", allTypes, 4, vmlaCode, True)
1982 vmlalCode = '''
1983 destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
1984 '''
1985 threeRegLongInst("vmlal", "Vmlal", smallTypes, vmlalCode, True)
1986
1987 vqdmlalCode = '''
1988 FPSCR fpscr = (FPSCR)Fpscr;
1989 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
1990 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
1991 Element halfNeg = maxNeg / 2;
1992 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
1993 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
1994 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
1995 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
1996 fpscr.qc = 1;
1997 }
1998 bool negPreDest = ltz(destElem);
1999 destElem += midElem;
2000 bool negDest = ltz(destElem);
2001 bool negMid = ltz(midElem);
2002 if (negPreDest == negMid && negMid != negDest) {
2003 destElem = mask(sizeof(BigElement) * 8 - 1);
2004 if (negPreDest)
2005 destElem = ~destElem;
2006 fpscr.qc = 1;
2007 }
2008 Fpscr = fpscr;
2009 '''
2010 threeRegLongInst("vqdmlal", "Vqdmlal", smallTypes, vqdmlalCode, True)
2011
2012 vqdmlslCode = '''
2013 FPSCR fpscr = (FPSCR)Fpscr;
2014 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2015 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2016 Element halfNeg = maxNeg / 2;
2017 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2018 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2019 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2020 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2021 fpscr.qc = 1;
2022 }
2023 bool negPreDest = ltz(destElem);
2024 destElem -= midElem;
2025 bool negDest = ltz(destElem);
2026 bool posMid = ltz((BigElement)-midElem);
2027 if (negPreDest == posMid && posMid != negDest) {
2028 destElem = mask(sizeof(BigElement) * 8 - 1);
2029 if (negPreDest)
2030 destElem = ~destElem;
2031 fpscr.qc = 1;
2032 }
2033 Fpscr = fpscr;
2034 '''
2035 threeRegLongInst("vqdmlsl", "Vqdmlsl", smallTypes, vqdmlslCode, True)
2036
2037 vqdmullCode = '''
2038 FPSCR fpscr = (FPSCR)Fpscr;
2039 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2040 if (srcElem1 == srcElem2 &&
2041 srcElem1 == (Element)((Element)1 <<
2042 (Element)(sizeof(Element) * 8 - 1))) {
2043 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2044 fpscr.qc = 1;
2045 }
2046 Fpscr = fpscr;
2047 '''
2048 threeRegLongInst("vqdmull", "Vqdmull", smallTypes, vqdmullCode)
2049
2050 vmlsCode = '''
2051 destElem = destElem - srcElem1 * srcElem2;
2052 '''
2053 threeEqualRegInst("vmls", "NVmlsD", allTypes, 2, vmlsCode, True)
2054 threeEqualRegInst("vmls", "NVmlsQ", allTypes, 4, vmlsCode, True)
2055 vmlslCode = '''
2056 destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2057 '''
2058 threeRegLongInst("vmlsl", "Vmlsl", smallTypes, vmlslCode, True)
2059
2060 vmulpCode = '''
2061 destElem = 0;
2062 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2063 if (bits(srcElem2, j))
2064 destElem ^= srcElem1 << j;
2065 }
2066 '''
2067 threeEqualRegInst("vmul", "NVmulpD", unsignedTypes, 2, vmulpCode)
2068 threeEqualRegInst("vmul", "NVmulpQ", unsignedTypes, 4, vmulpCode)
2069 vmullpCode = '''
2070 destElem = 0;
2071 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2072 if (bits(srcElem2, j))
2073 destElem ^= (BigElement)srcElem1 << j;
2074 }
2075 '''
2076 threeRegLongInst("vmull", "Vmullp", smallUnsignedTypes, vmullpCode)
2077
2078 threeEqualRegInst("vpmax", "VpmaxD", allTypes, 2, vmaxCode, pairwise=True)
2079 threeEqualRegInst("vpmax", "VpmaxQ", allTypes, 4, vmaxCode, pairwise=True)
2080
2081 threeEqualRegInst("vpmin", "VpminD", allTypes, 2, vminCode, pairwise=True)
2082 threeEqualRegInst("vpmin", "VpminQ", allTypes, 4, vminCode, pairwise=True)
2083
2084 vqdmulhCode = '''
2085 FPSCR fpscr = (FPSCR)Fpscr;
2086 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2087 (sizeof(Element) * 8);
2088 if (srcElem1 == srcElem2 &&
2089 srcElem1 == (Element)((Element)1 <<
2090 (sizeof(Element) * 8 - 1))) {
2091 destElem = ~srcElem1;
2092 fpscr.qc = 1;
2093 }
2094 Fpscr = fpscr;
2095 '''
2096 threeEqualRegInst("vqdmulh", "VqdmulhD", smallSignedTypes, 2, vqdmulhCode)
2097 threeEqualRegInst("vqdmulh", "VqdmulhQ", smallSignedTypes, 4, vqdmulhCode)
2098
2099 vqrdmulhCode = '''
2100 FPSCR fpscr = (FPSCR)Fpscr;
2101 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2102 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2103 (sizeof(Element) * 8);
2104 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2105 Element halfNeg = maxNeg / 2;
2106 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2107 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2108 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2109 if (destElem < 0) {
2110 destElem = mask(sizeof(Element) * 8 - 1);
2111 } else {
2112 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2113 }
2114 fpscr.qc = 1;
2115 }
2116 Fpscr = fpscr;
2117 '''
2118 threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2119 smallSignedTypes, 2, vqrdmulhCode)
2120 threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2121 smallSignedTypes, 4, vqrdmulhCode)
2122
2123 vmaxfpCode = '''
2124 FPSCR fpscr = (FPSCR)Fpscr;
2125 bool done;
2126 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2127 if (!done) {
2128 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS,
2129 true, true, VfpRoundNearest);
2130 } else if (flushToZero(srcReg1, srcReg2)) {
2131 fpscr.idc = 1;
2132 }
2133 Fpscr = fpscr;
2134 '''
2135 threeEqualRegInstFp("vmax", "VmaxDFp", ("float",), 2, vmaxfpCode)
2136 threeEqualRegInstFp("vmax", "VmaxQFp", ("float",), 4, vmaxfpCode)
2137
2138 vminfpCode = '''
2139 FPSCR fpscr = (FPSCR)Fpscr;
2140 bool done;
2141 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2142 if (!done) {
2143 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS,
2144 true, true, VfpRoundNearest);
2145 } else if (flushToZero(srcReg1, srcReg2)) {
2146 fpscr.idc = 1;
2147 }
2148 Fpscr = fpscr;
2149 '''
2150 threeEqualRegInstFp("vmin", "VminDFp", ("float",), 2, vminfpCode)
2151 threeEqualRegInstFp("vmin", "VminQFp", ("float",), 4, vminfpCode)
2152
2153 threeEqualRegInstFp("vpmax", "VpmaxDFp", ("float",),
2154 2, vmaxfpCode, pairwise=True)
2155 threeEqualRegInstFp("vpmax", "VpmaxQFp", ("float",),
2156 4, vmaxfpCode, pairwise=True)
2157
2158 threeEqualRegInstFp("vpmin", "VpminDFp", ("float",),
2159 2, vminfpCode, pairwise=True)
2160 threeEqualRegInstFp("vpmin", "VpminQFp", ("float",),
2161 4, vminfpCode, pairwise=True)
2162
2163 vaddfpCode = '''
2164 FPSCR fpscr = Fpscr;
2165 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2166 true, true, VfpRoundNearest);
2167 Fpscr = fpscr;
2168 '''
2169 threeEqualRegInstFp("vadd", "VaddDFp", ("float",), 2, vaddfpCode)
2170 threeEqualRegInstFp("vadd", "VaddQFp", ("float",), 4, vaddfpCode)
2171
2172 threeEqualRegInstFp("vpadd", "VpaddDFp", ("float",),
2173 2, vaddfpCode, pairwise=True)
2174 threeEqualRegInstFp("vpadd", "VpaddQFp", ("float",),
2175 4, vaddfpCode, pairwise=True)
2176
2177 vsubfpCode = '''
2178 FPSCR fpscr = Fpscr;
2179 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2180 true, true, VfpRoundNearest);
2181 Fpscr = fpscr;
2182 '''
2183 threeEqualRegInstFp("vsub", "VsubDFp", ("float",), 2, vsubfpCode)
2184 threeEqualRegInstFp("vsub", "VsubQFp", ("float",), 4, vsubfpCode)
2185
2186 vmulfpCode = '''
2187 FPSCR fpscr = Fpscr;
2188 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2189 true, true, VfpRoundNearest);
2190 Fpscr = fpscr;
2191 '''
2192 threeEqualRegInstFp("vmul", "NVmulDFp", ("float",), 2, vmulfpCode)
2193 threeEqualRegInstFp("vmul", "NVmulQFp", ("float",), 4, vmulfpCode)
2194
2195 vmlafpCode = '''
2196 FPSCR fpscr = Fpscr;
2197 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2198 true, true, VfpRoundNearest);
2199 destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2200 true, true, VfpRoundNearest);
2201 Fpscr = fpscr;
2202 '''
2203 threeEqualRegInstFp("vmla", "NVmlaDFp", ("float",), 2, vmlafpCode, True)
2204 threeEqualRegInstFp("vmla", "NVmlaQFp", ("float",), 4, vmlafpCode, True)
2205
2206 vmlsfpCode = '''
2207 FPSCR fpscr = Fpscr;
2208 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2209 true, true, VfpRoundNearest);
2210 destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2211 true, true, VfpRoundNearest);
2212 Fpscr = fpscr;
2213 '''
2214 threeEqualRegInstFp("vmls", "NVmlsDFp", ("float",), 2, vmlsfpCode, True)
2215 threeEqualRegInstFp("vmls", "NVmlsQFp", ("float",), 4, vmlsfpCode, True)
2216
2217 vcgtfpCode = '''
2218 FPSCR fpscr = (FPSCR)Fpscr;
2219 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2220 true, true, VfpRoundNearest);
2221 destReg = (res == 0) ? -1 : 0;
2222 if (res == 2.0)
2223 fpscr.ioc = 1;
2224 Fpscr = fpscr;
2225 '''
2226 threeEqualRegInstFp("vcgt", "VcgtDFp", ("float",),
2227 2, vcgtfpCode, toInt = True)
2228 threeEqualRegInstFp("vcgt", "VcgtQFp", ("float",),
2229 4, vcgtfpCode, toInt = True)
2230
2231 vcgefpCode = '''
2232 FPSCR fpscr = (FPSCR)Fpscr;
2233 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2234 true, true, VfpRoundNearest);
2235 destReg = (res == 0) ? -1 : 0;
2236 if (res == 2.0)
2237 fpscr.ioc = 1;
2238 Fpscr = fpscr;
2239 '''
2240 threeEqualRegInstFp("vcge", "VcgeDFp", ("float",),
2241 2, vcgefpCode, toInt = True)
2242 threeEqualRegInstFp("vcge", "VcgeQFp", ("float",),
2243 4, vcgefpCode, toInt = True)
2244
2245 vacgtfpCode = '''
2246 FPSCR fpscr = (FPSCR)Fpscr;
2247 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2248 true, true, VfpRoundNearest);
2249 destReg = (res == 0) ? -1 : 0;
2250 if (res == 2.0)
2251 fpscr.ioc = 1;
2252 Fpscr = fpscr;
2253 '''
2254 threeEqualRegInstFp("vacgt", "VacgtDFp", ("float",),
2255 2, vacgtfpCode, toInt = True)
2256 threeEqualRegInstFp("vacgt", "VacgtQFp", ("float",),
2257 4, vacgtfpCode, toInt = True)
2258
2259 vacgefpCode = '''
2260 FPSCR fpscr = (FPSCR)Fpscr;
2261 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2262 true, true, VfpRoundNearest);
2263 destReg = (res == 0) ? -1 : 0;
2264 if (res == 2.0)
2265 fpscr.ioc = 1;
2266 Fpscr = fpscr;
2267 '''
2268 threeEqualRegInstFp("vacge", "VacgeDFp", ("float",),
2269 2, vacgefpCode, toInt = True)
2270 threeEqualRegInstFp("vacge", "VacgeQFp", ("float",),
2271 4, vacgefpCode, toInt = True)
2272
2273 vceqfpCode = '''
2274 FPSCR fpscr = (FPSCR)Fpscr;
2275 float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2276 true, true, VfpRoundNearest);
2277 destReg = (res == 0) ? -1 : 0;
2278 if (res == 2.0)
2279 fpscr.ioc = 1;
2280 Fpscr = fpscr;
2281 '''
2282 threeEqualRegInstFp("vceq", "VceqDFp", ("float",),
2283 2, vceqfpCode, toInt = True)
2284 threeEqualRegInstFp("vceq", "VceqQFp", ("float",),
2285 4, vceqfpCode, toInt = True)
2286
2287 vrecpsCode = '''
2288 FPSCR fpscr = Fpscr;
2289 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2290 true, true, VfpRoundNearest);
2291 Fpscr = fpscr;
2292 '''
2293 threeEqualRegInstFp("vrecps", "VrecpsDFp", ("float",), 2, vrecpsCode)
2294 threeEqualRegInstFp("vrecps", "VrecpsQFp", ("float",), 4, vrecpsCode)
2295
2296 vrsqrtsCode = '''
2297 FPSCR fpscr = Fpscr;
2298 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2299 true, true, VfpRoundNearest);
2300 Fpscr = fpscr;
2301 '''
2302 threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", ("float",), 2, vrsqrtsCode)
2303 threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", ("float",), 4, vrsqrtsCode)
2304
2305 vabdfpCode = '''
2306 FPSCR fpscr = Fpscr;
2307 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2308 true, true, VfpRoundNearest);
2309 destReg = fabs(mid);
2310 Fpscr = fpscr;
2311 '''
2312 threeEqualRegInstFp("vabd", "VabdDFp", ("float",), 2, vabdfpCode)
2313 threeEqualRegInstFp("vabd", "VabdQFp", ("float",), 4, vabdfpCode)
2314
2315 twoEqualRegInst("vmla", "VmlasD", unsignedTypes, 2, vmlaCode, True)
2316 twoEqualRegInst("vmla", "VmlasQ", unsignedTypes, 4, vmlaCode, True)
2317 twoEqualRegInstFp("vmla", "VmlasDFp", ("float",), 2, vmlafpCode, True)
2318 twoEqualRegInstFp("vmla", "VmlasQFp", ("float",), 4, vmlafpCode, True)
2319 twoRegLongInst("vmlal", "Vmlals", smallTypes, vmlalCode, True)
2320
2321 twoEqualRegInst("vmls", "VmlssD", allTypes, 2, vmlsCode, True)
2322 twoEqualRegInst("vmls", "VmlssQ", allTypes, 4, vmlsCode, True)
2323 twoEqualRegInstFp("vmls", "VmlssDFp", ("float",), 2, vmlsfpCode, True)
2324 twoEqualRegInstFp("vmls", "VmlssQFp", ("float",), 4, vmlsfpCode, True)
2325 twoRegLongInst("vmlsl", "Vmlsls", smallTypes, vmlslCode, True)
2326
2327 twoEqualRegInst("vmul", "VmulsD", allTypes, 2, vmulCode)
2328 twoEqualRegInst("vmul", "VmulsQ", allTypes, 4, vmulCode)
2329 twoEqualRegInstFp("vmul", "VmulsDFp", ("float",), 2, vmulfpCode)
2330 twoEqualRegInstFp("vmul", "VmulsQFp", ("float",), 4, vmulfpCode)
2331 twoRegLongInst("vmull", "Vmulls", smallTypes, vmullCode)
2332
2333 twoRegLongInst("vqdmull", "Vqdmulls", smallTypes, vqdmullCode)
2334 twoRegLongInst("vqdmlal", "Vqdmlals", smallTypes, vqdmlalCode, True)
2335 twoRegLongInst("vqdmlsl", "Vqdmlsls", smallTypes, vqdmlslCode, True)
2336 twoEqualRegInst("vqdmulh", "VqdmulhsD", smallSignedTypes, 2, vqdmulhCode)
2337 twoEqualRegInst("vqdmulh", "VqdmulhsQ", smallSignedTypes, 4, vqdmulhCode)
2338 twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2339 smallSignedTypes, 2, vqrdmulhCode)
2340 twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2341 smallSignedTypes, 4, vqrdmulhCode)
2342
2343 vshrCode = '''
2344 if (imm >= sizeof(srcElem1) * 8) {
2345 if (ltz(srcElem1))
2346 destElem = -1;
2347 else
2348 destElem = 0;
2349 } else {
2350 destElem = srcElem1 >> imm;
2351 }
2352 '''
2353 twoRegShiftInst("vshr", "NVshrD", allTypes, 2, vshrCode)
2354 twoRegShiftInst("vshr", "NVshrQ", allTypes, 4, vshrCode)
2355
2356 vsraCode = '''
2357 Element mid;;
2358 if (imm >= sizeof(srcElem1) * 8) {
2359 mid = ltz(srcElem1) ? -1 : 0;
2360 } else {
2361 mid = srcElem1 >> imm;
2362 if (ltz(srcElem1) && !ltz(mid)) {
2363 mid |= -(mid & ((Element)1 <<
2364 (sizeof(Element) * 8 - 1 - imm)));
2365 }
2366 }
2367 destElem += mid;
2368 '''
2369 twoRegShiftInst("vsra", "NVsraD", allTypes, 2, vsraCode, True)
2370 twoRegShiftInst("vsra", "NVsraQ", allTypes, 4, vsraCode, True)
2371
2372 vrshrCode = '''
2373 if (imm > sizeof(srcElem1) * 8) {
2374 destElem = 0;
2375 } else if (imm) {
2376 Element rBit = bits(srcElem1, imm - 1);
2377 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2378 } else {
2379 destElem = srcElem1;
2380 }
2381 '''
2382 twoRegShiftInst("vrshr", "NVrshrD", allTypes, 2, vrshrCode)
2383 twoRegShiftInst("vrshr", "NVrshrQ", allTypes, 4, vrshrCode)
2384
2385 vrsraCode = '''
2386 if (imm > sizeof(srcElem1) * 8) {
2387 destElem += 0;
2388 } else if (imm) {
2389 Element rBit = bits(srcElem1, imm - 1);
2390 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2391 } else {
2392 destElem += srcElem1;
2393 }
2394 '''
2395 twoRegShiftInst("vrsra", "NVrsraD", allTypes, 2, vrsraCode, True)
2396 twoRegShiftInst("vrsra", "NVrsraQ", allTypes, 4, vrsraCode, True)
2397
2398 vsriCode = '''
2399 if (imm >= sizeof(Element) * 8)
2400 destElem = destElem;
2401 else
2402 destElem = (srcElem1 >> imm) |
2403 (destElem & ~mask(sizeof(Element) * 8 - imm));
2404 '''
2405 twoRegShiftInst("vsri", "NVsriD", unsignedTypes, 2, vsriCode, True)
2406 twoRegShiftInst("vsri", "NVsriQ", unsignedTypes, 4, vsriCode, True)
2407
2408 vshlCode = '''
2409 if (imm >= sizeof(Element) * 8)
2410 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2411 else
2412 destElem = srcElem1 << imm;
2413 '''
2414 twoRegShiftInst("vshl", "NVshlD", unsignedTypes, 2, vshlCode)
2415 twoRegShiftInst("vshl", "NVshlQ", unsignedTypes, 4, vshlCode)
2416
2417 vsliCode = '''
2418 if (imm >= sizeof(Element) * 8)
2419 destElem = destElem;
2420 else
2421 destElem = (srcElem1 << imm) | (destElem & mask(imm));
2422 '''
2423 twoRegShiftInst("vsli", "NVsliD", unsignedTypes, 2, vsliCode, True)
2424 twoRegShiftInst("vsli", "NVsliQ", unsignedTypes, 4, vsliCode, True)
2425
2426 vqshlCode = '''
2427 FPSCR fpscr = (FPSCR)Fpscr;
2428 if (imm >= sizeof(Element) * 8) {
2429 if (srcElem1 != 0) {
2430 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2431 if (srcElem1 > 0)
2432 destElem = ~destElem;
2433 fpscr.qc = 1;
2434 } else {
2435 destElem = 0;
2436 }
2437 } else if (imm) {
2438 destElem = (srcElem1 << imm);
2439 uint64_t topBits = bits((uint64_t)srcElem1,
2440 sizeof(Element) * 8 - 1,
2441 sizeof(Element) * 8 - 1 - imm);
2442 if (topBits != 0 && topBits != mask(imm + 1)) {
2443 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2444 if (srcElem1 > 0)
2445 destElem = ~destElem;
2446 fpscr.qc = 1;
2447 }
2448 } else {
2449 destElem = srcElem1;
2450 }
2451 Fpscr = fpscr;
2452 '''
2453 twoRegShiftInst("vqshl", "NVqshlD", signedTypes, 2, vqshlCode)
2454 twoRegShiftInst("vqshl", "NVqshlQ", signedTypes, 4, vqshlCode)
2455
2456 vqshluCode = '''
2457 FPSCR fpscr = (FPSCR)Fpscr;
2458 if (imm >= sizeof(Element) * 8) {
2459 if (srcElem1 != 0) {
2460 destElem = mask(sizeof(Element) * 8);
2461 fpscr.qc = 1;
2462 } else {
2463 destElem = 0;
2464 }
2465 } else if (imm) {
2466 destElem = (srcElem1 << imm);
2467 uint64_t topBits = bits((uint64_t)srcElem1,
2468 sizeof(Element) * 8 - 1,
2469 sizeof(Element) * 8 - imm);
2470 if (topBits != 0) {
2471 destElem = mask(sizeof(Element) * 8);
2472 fpscr.qc = 1;
2473 }
2474 } else {
2475 destElem = srcElem1;
2476 }
2477 Fpscr = fpscr;
2478 '''
2479 twoRegShiftInst("vqshlu", "NVqshluD", unsignedTypes, 2, vqshluCode)
2480 twoRegShiftInst("vqshlu", "NVqshluQ", unsignedTypes, 4, vqshluCode)
2481
2482 vqshlusCode = '''
2483 FPSCR fpscr = (FPSCR)Fpscr;
2484 if (imm >= sizeof(Element) * 8) {
2485 if (srcElem1 < 0) {
2486 destElem = 0;
2487 fpscr.qc = 1;
2488 } else if (srcElem1 > 0) {
2489 destElem = mask(sizeof(Element) * 8);
2490 fpscr.qc = 1;
2491 } else {
2492 destElem = 0;
2493 }
2494 } else if (imm) {
2495 destElem = (srcElem1 << imm);
2496 uint64_t topBits = bits((uint64_t)srcElem1,
2497 sizeof(Element) * 8 - 1,
2498 sizeof(Element) * 8 - imm);
2499 if (srcElem1 < 0) {
2500 destElem = 0;
2501 fpscr.qc = 1;
2502 } else if (topBits != 0) {
2503 destElem = mask(sizeof(Element) * 8);
2504 fpscr.qc = 1;
2505 }
2506 } else {
2507 if (srcElem1 < 0) {
2508 fpscr.qc = 1;
2509 destElem = 0;
2510 } else {
2511 destElem = srcElem1;
2512 }
2513 }
2514 Fpscr = fpscr;
2515 '''
2516 twoRegShiftInst("vqshlus", "NVqshlusD", signedTypes, 2, vqshlusCode)
2517 twoRegShiftInst("vqshlus", "NVqshlusQ", signedTypes, 4, vqshlusCode)
2518
2519 vshrnCode = '''
2520 if (imm >= sizeof(srcElem1) * 8) {
2521 destElem = 0;
2522 } else {
2523 destElem = srcElem1 >> imm;
2524 }
2525 '''
2526 twoRegNarrowShiftInst("vshrn", "NVshrn", smallUnsignedTypes, vshrnCode)
2527
2528 vrshrnCode = '''
2529 if (imm > sizeof(srcElem1) * 8) {
2530 destElem = 0;
2531 } else if (imm) {
2532 Element rBit = bits(srcElem1, imm - 1);
2533 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2534 } else {
2535 destElem = srcElem1;
2536 }
2537 '''
2538 twoRegNarrowShiftInst("vrshrn", "NVrshrn", smallUnsignedTypes, vrshrnCode)
2539
2540 vqshrnCode = '''
2541 FPSCR fpscr = (FPSCR)Fpscr;
2542 if (imm > sizeof(srcElem1) * 8) {
2543 if (srcElem1 != 0 && srcElem1 != -1)
2544 fpscr.qc = 1;
2545 destElem = 0;
2546 } else if (imm) {
2547 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2548 mid |= -(mid & ((BigElement)1 <<
2549 (sizeof(BigElement) * 8 - 1 - imm)));
2550 if (mid != (Element)mid) {
2551 destElem = mask(sizeof(Element) * 8 - 1);
2552 if (srcElem1 < 0)
2553 destElem = ~destElem;
2554 fpscr.qc = 1;
2555 } else {
2556 destElem = mid;
2557 }
2558 } else {
2559 destElem = srcElem1;
2560 }
2561 Fpscr = fpscr;
2562 '''
2563 twoRegNarrowShiftInst("vqshrn", "NVqshrn", smallSignedTypes, vqshrnCode)
2564
2565 vqshrunCode = '''
2566 FPSCR fpscr = (FPSCR)Fpscr;
2567 if (imm > sizeof(srcElem1) * 8) {
2568 if (srcElem1 != 0)
2569 fpscr.qc = 1;
2570 destElem = 0;
2571 } else if (imm) {
2572 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2573 if (mid != (Element)mid) {
2574 destElem = mask(sizeof(Element) * 8);
2575 fpscr.qc = 1;
2576 } else {
2577 destElem = mid;
2578 }
2579 } else {
2580 destElem = srcElem1;
2581 }
2582 Fpscr = fpscr;
2583 '''
2584 twoRegNarrowShiftInst("vqshrun", "NVqshrun",
2585 smallUnsignedTypes, vqshrunCode)
2586
2587 vqshrunsCode = '''
2588 FPSCR fpscr = (FPSCR)Fpscr;
2589 if (imm > sizeof(srcElem1) * 8) {
2590 if (srcElem1 != 0)
2591 fpscr.qc = 1;
2592 destElem = 0;
2593 } else if (imm) {
2594 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2595 if (bits(mid, sizeof(BigElement) * 8 - 1,
2596 sizeof(Element) * 8) != 0) {
2597 if (srcElem1 < 0) {
2598 destElem = 0;
2599 } else {
2600 destElem = mask(sizeof(Element) * 8);
2601 }
2602 fpscr.qc = 1;
2603 } else {
2604 destElem = mid;
2605 }
2606 } else {
2607 destElem = srcElem1;
2608 }
2609 Fpscr = fpscr;
2610 '''
2611 twoRegNarrowShiftInst("vqshrun", "NVqshruns",
2612 smallSignedTypes, vqshrunsCode)
2613
2614 vqrshrnCode = '''
2615 FPSCR fpscr = (FPSCR)Fpscr;
2616 if (imm > sizeof(srcElem1) * 8) {
2617 if (srcElem1 != 0 && srcElem1 != -1)
2618 fpscr.qc = 1;
2619 destElem = 0;
2620 } else if (imm) {
2621 BigElement mid = (srcElem1 >> (imm - 1));
2622 uint64_t rBit = mid & 0x1;
2623 mid >>= 1;
2624 mid |= -(mid & ((BigElement)1 <<
2625 (sizeof(BigElement) * 8 - 1 - imm)));
2626 mid += rBit;
2627 if (mid != (Element)mid) {
2628 destElem = mask(sizeof(Element) * 8 - 1);
2629 if (srcElem1 < 0)
2630 destElem = ~destElem;
2631 fpscr.qc = 1;
2632 } else {
2633 destElem = mid;
2634 }
2635 } else {
2636 if (srcElem1 != (Element)srcElem1) {
2637 destElem = mask(sizeof(Element) * 8 - 1);
2638 if (srcElem1 < 0)
2639 destElem = ~destElem;
2640 fpscr.qc = 1;
2641 } else {
2642 destElem = srcElem1;
2643 }
2644 }
2645 Fpscr = fpscr;
2646 '''
2647 twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
2648 smallSignedTypes, vqrshrnCode)
2649
2650 vqrshrunCode = '''
2651 FPSCR fpscr = (FPSCR)Fpscr;
2652 if (imm > sizeof(srcElem1) * 8) {
2653 if (srcElem1 != 0)
2654 fpscr.qc = 1;
2655 destElem = 0;
2656 } else if (imm) {
2657 BigElement mid = (srcElem1 >> (imm - 1));
2658 uint64_t rBit = mid & 0x1;
2659 mid >>= 1;
2660 mid += rBit;
2661 if (mid != (Element)mid) {
2662 destElem = mask(sizeof(Element) * 8);
2663 fpscr.qc = 1;
2664 } else {
2665 destElem = mid;
2666 }
2667 } else {
2668 if (srcElem1 != (Element)srcElem1) {
2669 destElem = mask(sizeof(Element) * 8 - 1);
2670 fpscr.qc = 1;
2671 } else {
2672 destElem = srcElem1;
2673 }
2674 }
2675 Fpscr = fpscr;
2676 '''
2677 twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
2678 smallUnsignedTypes, vqrshrunCode)
2679
2680 vqrshrunsCode = '''
2681 FPSCR fpscr = (FPSCR)Fpscr;
2682 if (imm > sizeof(srcElem1) * 8) {
2683 if (srcElem1 != 0)
2684 fpscr.qc = 1;
2685 destElem = 0;
2686 } else if (imm) {
2687 BigElement mid = (srcElem1 >> (imm - 1));
2688 uint64_t rBit = mid & 0x1;
2689 mid >>= 1;
2690 mid |= -(mid & ((BigElement)1 <<
2691 (sizeof(BigElement) * 8 - 1 - imm)));
2692 mid += rBit;
2693 if (bits(mid, sizeof(BigElement) * 8 - 1,
2694 sizeof(Element) * 8) != 0) {
2695 if (srcElem1 < 0) {
2696 destElem = 0;
2697 } else {
2698 destElem = mask(sizeof(Element) * 8);
2699 }
2700 fpscr.qc = 1;
2701 } else {
2702 destElem = mid;
2703 }
2704 } else {
2705 if (srcElem1 < 0) {
2706 fpscr.qc = 1;
2707 destElem = 0;
2708 } else {
2709 destElem = srcElem1;
2710 }
2711 }
2712 Fpscr = fpscr;
2713 '''
2714 twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
2715 smallSignedTypes, vqrshrunsCode)
2716
2717 vshllCode = '''
2718 if (imm >= sizeof(destElem) * 8) {
2719 destElem = 0;
2720 } else {
2721 destElem = (BigElement)srcElem1 << imm;
2722 }
2723 '''
2724 twoRegLongShiftInst("vshll", "NVshll", smallTypes, vshllCode)
2725
2726 vmovlCode = '''
2727 destElem = srcElem1;
2728 '''
2729 twoRegLongShiftInst("vmovl", "NVmovl", smallTypes, vmovlCode)
2730
2731 vcvt2ufxCode = '''
2732 FPSCR fpscr = Fpscr;
2733 if (flushToZero(srcElem1))
2734 fpscr.idc = 1;
2735 VfpSavedState state = prepFpState(VfpRoundNearest);
2736 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2737 destReg = vfpFpSToFixed(srcElem1, false, false, imm);
2738 __asm__ __volatile__("" :: "m" (destReg));
2739 finishVfp(fpscr, state, true);
2740 Fpscr = fpscr;
2741 '''
2742 twoRegShiftInst("vcvt", "NVcvt2ufxD", ("float",),
2743 2, vcvt2ufxCode, toInt = True)
2744 twoRegShiftInst("vcvt", "NVcvt2ufxQ", ("float",),
2745 4, vcvt2ufxCode, toInt = True)
2746
2747 vcvt2sfxCode = '''
2748 FPSCR fpscr = Fpscr;
2749 if (flushToZero(srcElem1))
2750 fpscr.idc = 1;
2751 VfpSavedState state = prepFpState(VfpRoundNearest);
2752 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2753 destReg = vfpFpSToFixed(srcElem1, true, false, imm);
2754 __asm__ __volatile__("" :: "m" (destReg));
2755 finishVfp(fpscr, state, true);
2756 Fpscr = fpscr;
2757 '''
2758 twoRegShiftInst("vcvt", "NVcvt2sfxD", ("float",),
2759 2, vcvt2sfxCode, toInt = True)
2760 twoRegShiftInst("vcvt", "NVcvt2sfxQ", ("float",),
2761 4, vcvt2sfxCode, toInt = True)
2762
2763 vcvtu2fpCode = '''
2764 FPSCR fpscr = Fpscr;
2765 VfpSavedState state = prepFpState(VfpRoundNearest);
2766 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2767 destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm);
2768 __asm__ __volatile__("" :: "m" (destElem));
2769 finishVfp(fpscr, state, true);
2770 Fpscr = fpscr;
2771 '''
2772 twoRegShiftInst("vcvt", "NVcvtu2fpD", ("float",),
2773 2, vcvtu2fpCode, fromInt = True)
2774 twoRegShiftInst("vcvt", "NVcvtu2fpQ", ("float",),
2775 4, vcvtu2fpCode, fromInt = True)
2776
2777 vcvts2fpCode = '''
2778 FPSCR fpscr = Fpscr;
2779 VfpSavedState state = prepFpState(VfpRoundNearest);
2780 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2781 destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm);
2782 __asm__ __volatile__("" :: "m" (destElem));
2783 finishVfp(fpscr, state, true);
2784 Fpscr = fpscr;
2785 '''
2786 twoRegShiftInst("vcvt", "NVcvts2fpD", ("float",),
2787 2, vcvts2fpCode, fromInt = True)
2788 twoRegShiftInst("vcvt", "NVcvts2fpQ", ("float",),
2789 4, vcvts2fpCode, fromInt = True)
2790
2791 vcvts2hCode = '''
2792 FPSCR fpscr = Fpscr;
2793 float srcFp1 = bitsToFp(srcElem1, (float)0.0);
2794 if (flushToZero(srcFp1))
2795 fpscr.idc = 1;
2796 VfpSavedState state = prepFpState(VfpRoundNearest);
2797 __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
2798 : "m" (srcFp1), "m" (destElem));
2799 destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
2800 fpscr.ahp, srcFp1);
2801 __asm__ __volatile__("" :: "m" (destElem));
2802 finishVfp(fpscr, state, true);
2803 Fpscr = fpscr;
2804 '''
2805 twoRegNarrowMiscInst("vcvt", "NVcvts2h", ("uint16_t",), vcvts2hCode)
2806
2807 vcvth2sCode = '''
2808 FPSCR fpscr = Fpscr;
2809 VfpSavedState state = prepFpState(VfpRoundNearest);
2810 __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
2811 : "m" (srcElem1), "m" (destElem));
2812 destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
2813 __asm__ __volatile__("" :: "m" (destElem));
2814 finishVfp(fpscr, state, true);
2815 Fpscr = fpscr;
2816 '''
2817 twoRegLongMiscInst("vcvt", "NVcvth2s", ("uint16_t",), vcvth2sCode)
2818
2819 vrsqrteCode = '''
2820 destElem = unsignedRSqrtEstimate(srcElem1);
2821 '''
2822 twoRegMiscInst("vrsqrte", "NVrsqrteD", ("uint32_t",), 2, vrsqrteCode)
2823 twoRegMiscInst("vrsqrte", "NVrsqrteQ", ("uint32_t",), 4, vrsqrteCode)
2824
2825 vrsqrtefpCode = '''
2826 FPSCR fpscr = Fpscr;
2827 if (flushToZero(srcReg1))
2828 fpscr.idc = 1;
2829 destReg = fprSqrtEstimate(fpscr, srcReg1);
2830 Fpscr = fpscr;
2831 '''
2832 twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", ("float",), 2, vrsqrtefpCode)
2833 twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", ("float",), 4, vrsqrtefpCode)
2834
2835 vrecpeCode = '''
2836 destElem = unsignedRecipEstimate(srcElem1);
2837 '''
2838 twoRegMiscInst("vrecpe", "NVrecpeD", ("uint32_t",), 2, vrecpeCode)
2839 twoRegMiscInst("vrecpe", "NVrecpeQ", ("uint32_t",), 4, vrecpeCode)
2840
2841 vrecpefpCode = '''
2842 FPSCR fpscr = Fpscr;
2843 if (flushToZero(srcReg1))
2844 fpscr.idc = 1;
2845 destReg = fpRecipEstimate(fpscr, srcReg1);
2846 Fpscr = fpscr;
2847 '''
2848 twoRegMiscInstFp("vrecpe", "NVrecpeDFp", ("float",), 2, vrecpefpCode)
2849 twoRegMiscInstFp("vrecpe", "NVrecpeQFp", ("float",), 4, vrecpefpCode)
2850
2851 vrev16Code = '''
2852 destElem = srcElem1;
2853 unsigned groupSize = ((1 << 1) / sizeof(Element));
2854 unsigned reverseMask = (groupSize - 1);
2855 j = i ^ reverseMask;
2856 '''
2857 twoRegMiscInst("vrev16", "NVrev16D", ("uint8_t",), 2, vrev16Code)
2858 twoRegMiscInst("vrev16", "NVrev16Q", ("uint8_t",), 4, vrev16Code)
2859 vrev32Code = '''
2860 destElem = srcElem1;
2861 unsigned groupSize = ((1 << 2) / sizeof(Element));
2862 unsigned reverseMask = (groupSize - 1);
2863 j = i ^ reverseMask;
2864 '''
2865 twoRegMiscInst("vrev32", "NVrev32D",
2866 ("uint8_t", "uint16_t"), 2, vrev32Code)
2867 twoRegMiscInst("vrev32", "NVrev32Q",
2868 ("uint8_t", "uint16_t"), 4, vrev32Code)
2869 vrev64Code = '''
2870 destElem = srcElem1;
2871 unsigned groupSize = ((1 << 3) / sizeof(Element));
2872 unsigned reverseMask = (groupSize - 1);
2873 j = i ^ reverseMask;
2874 '''
2875 twoRegMiscInst("vrev64", "NVrev64D", smallUnsignedTypes, 2, vrev64Code)
2876 twoRegMiscInst("vrev64", "NVrev64Q", smallUnsignedTypes, 4, vrev64Code)
2877
2878 vpaddlCode = '''
2879 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2880 '''
2881 twoRegCondenseInst("vpaddl", "NVpaddlD", smallTypes, 2, vpaddlCode)
2882 twoRegCondenseInst("vpaddl", "NVpaddlQ", smallTypes, 4, vpaddlCode)
2883
2884 vpadalCode = '''
2885 destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
2886 '''
2887 twoRegCondenseInst("vpadal", "NVpadalD", smallTypes, 2, vpadalCode, True)
2888 twoRegCondenseInst("vpadal", "NVpadalQ", smallTypes, 4, vpadalCode, True)
2889
2890 vclsCode = '''
2891 unsigned count = 0;
2892 if (srcElem1 < 0) {
2893 srcElem1 <<= 1;
2894 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
2895 count++;
2896 srcElem1 <<= 1;
2897 }
2898 } else {
2899 srcElem1 <<= 1;
2900 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
2901 count++;
2902 srcElem1 <<= 1;
2903 }
2904 }
2905 destElem = count;
2906 '''
2907 twoRegMiscInst("vcls", "NVclsD", signedTypes, 2, vclsCode)
2908 twoRegMiscInst("vcls", "NVclsQ", signedTypes, 4, vclsCode)
2909
2910 vclzCode = '''
2911 unsigned count = 0;
2912 while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
2913 count++;
2914 srcElem1 <<= 1;
2915 }
2916 destElem = count;
2917 '''
2918 twoRegMiscInst("vclz", "NVclzD", signedTypes, 2, vclzCode)
2919 twoRegMiscInst("vclz", "NVclzQ", signedTypes, 4, vclzCode)
2920
2921 vcntCode = '''
2922 unsigned count = 0;
2923 while (srcElem1 && count < sizeof(Element) * 8) {
2924 count += srcElem1 & 0x1;
2925 srcElem1 >>= 1;
2926 }
2927 destElem = count;
2928 '''
2929 twoRegMiscInst("vcnt", "NVcntD", unsignedTypes, 2, vcntCode)
2930 twoRegMiscInst("vcnt", "NVcntQ", unsignedTypes, 4, vcntCode)
2931
2932 vmvnCode = '''
2933 destElem = ~srcElem1;
2934 '''
2935 twoRegMiscInst("vmvn", "NVmvnD", ("uint64_t",), 2, vmvnCode)
2936 twoRegMiscInst("vmvn", "NVmvnQ", ("uint64_t",), 4, vmvnCode)
2937
2938 vqabsCode = '''
2939 FPSCR fpscr = (FPSCR)Fpscr;
2940 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2941 fpscr.qc = 1;
2942 destElem = ~srcElem1;
2943 } else if (srcElem1 < 0) {
2944 destElem = -srcElem1;
2945 } else {
2946 destElem = srcElem1;
2947 }
2948 Fpscr = fpscr;
2949 '''
2950 twoRegMiscInst("vqabs", "NVqabsD", signedTypes, 2, vqabsCode)
2951 twoRegMiscInst("vqabs", "NVqabsQ", signedTypes, 4, vqabsCode)
2952
2953 vqnegCode = '''
2954 FPSCR fpscr = (FPSCR)Fpscr;
2955 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2956 fpscr.qc = 1;
2957 destElem = ~srcElem1;
2958 } else {
2959 destElem = -srcElem1;
2960 }
2961 Fpscr = fpscr;
2962 '''
2963 twoRegMiscInst("vqneg", "NVqnegD", signedTypes, 2, vqnegCode)
2964 twoRegMiscInst("vqneg", "NVqnegQ", signedTypes, 4, vqnegCode)
2965
2966 vabsCode = '''
2967 if (srcElem1 < 0) {
2968 destElem = -srcElem1;
2969 } else {
2970 destElem = srcElem1;
2971 }
2972 '''
2973 twoRegMiscInst("vabs", "NVabsD", signedTypes, 2, vabsCode)
2974 twoRegMiscInst("vabs", "NVabsQ", signedTypes, 4, vabsCode)
2975 vabsfpCode = '''
2976 union
2977 {
2978 uint32_t i;
2979 float f;
2980 } cStruct;
2981 cStruct.f = srcReg1;
2982 cStruct.i &= mask(sizeof(Element) * 8 - 1);
2983 destReg = cStruct.f;
2984 '''
2985 twoRegMiscInstFp("vabs", "NVabsDFp", ("float",), 2, vabsfpCode)
2986 twoRegMiscInstFp("vabs", "NVabsQFp", ("float",), 4, vabsfpCode)
2987
2988 vnegCode = '''
2989 destElem = -srcElem1;
2990 '''
2991 twoRegMiscInst("vneg", "NVnegD", signedTypes, 2, vnegCode)
2992 twoRegMiscInst("vneg", "NVnegQ", signedTypes, 4, vnegCode)
2993 vnegfpCode = '''
2994 destReg = -srcReg1;
2995 '''
2996 twoRegMiscInstFp("vneg", "NVnegDFp", ("float",), 2, vnegfpCode)
2997 twoRegMiscInstFp("vneg", "NVnegQFp", ("float",), 4, vnegfpCode)
2998
2999 vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3000 twoRegMiscInst("vcgt", "NVcgtD", signedTypes, 2, vcgtCode)
3001 twoRegMiscInst("vcgt", "NVcgtQ", signedTypes, 4, vcgtCode)
3002 vcgtfpCode = '''
3003 FPSCR fpscr = (FPSCR)Fpscr;
3004 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3005 true, true, VfpRoundNearest);
3006 destReg = (res == 0) ? -1 : 0;
3007 if (res == 2.0)
3008 fpscr.ioc = 1;
3009 Fpscr = fpscr;
3010 '''
3011 twoRegMiscInstFp("vcgt", "NVcgtDFp", ("float",),
3012 2, vcgtfpCode, toInt = True)
3013 twoRegMiscInstFp("vcgt", "NVcgtQFp", ("float",),
3014 4, vcgtfpCode, toInt = True)
3015
3016 vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3017 twoRegMiscInst("vcge", "NVcgeD", signedTypes, 2, vcgeCode)
3018 twoRegMiscInst("vcge", "NVcgeQ", signedTypes, 4, vcgeCode)
3019 vcgefpCode = '''
3020 FPSCR fpscr = (FPSCR)Fpscr;
3021 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3022 true, true, VfpRoundNearest);
3023 destReg = (res == 0) ? -1 : 0;
3024 if (res == 2.0)
3025 fpscr.ioc = 1;
3026 Fpscr = fpscr;
3027 '''
3028 twoRegMiscInstFp("vcge", "NVcgeDFp", ("float",),
3029 2, vcgefpCode, toInt = True)
3030 twoRegMiscInstFp("vcge", "NVcgeQFp", ("float",),
3031 4, vcgefpCode, toInt = True)
3032
3033 vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3034 twoRegMiscInst("vceq", "NVceqD", signedTypes, 2, vceqCode)
3035 twoRegMiscInst("vceq", "NVceqQ", signedTypes, 4, vceqCode)
3036 vceqfpCode = '''
3037 FPSCR fpscr = (FPSCR)Fpscr;
3038 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3039 true, true, VfpRoundNearest);
3040 destReg = (res == 0) ? -1 : 0;
3041 if (res == 2.0)
3042 fpscr.ioc = 1;
3043 Fpscr = fpscr;
3044 '''
3045 twoRegMiscInstFp("vceq", "NVceqDFp", ("float",),
3046 2, vceqfpCode, toInt = True)
3047 twoRegMiscInstFp("vceq", "NVceqQFp", ("float",),
3048 4, vceqfpCode, toInt = True)
3049
3050 vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3051 twoRegMiscInst("vcle", "NVcleD", signedTypes, 2, vcleCode)
3052 twoRegMiscInst("vcle", "NVcleQ", signedTypes, 4, vcleCode)
3053 vclefpCode = '''
3054 FPSCR fpscr = (FPSCR)Fpscr;
3055 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3056 true, true, VfpRoundNearest);
3057 destReg = (res == 0) ? -1 : 0;
3058 if (res == 2.0)
3059 fpscr.ioc = 1;
3060 Fpscr = fpscr;
3061 '''
3062 twoRegMiscInstFp("vcle", "NVcleDFp", ("float",),
3063 2, vclefpCode, toInt = True)
3064 twoRegMiscInstFp("vcle", "NVcleQFp", ("float",),
3065 4, vclefpCode, toInt = True)
3066
3067 vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3068 twoRegMiscInst("vclt", "NVcltD", signedTypes, 2, vcltCode)
3069 twoRegMiscInst("vclt", "NVcltQ", signedTypes, 4, vcltCode)
3070 vcltfpCode = '''
3071 FPSCR fpscr = (FPSCR)Fpscr;
3072 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3073 true, true, VfpRoundNearest);
3074 destReg = (res == 0) ? -1 : 0;
3075 if (res == 2.0)
3076 fpscr.ioc = 1;
3077 Fpscr = fpscr;
3078 '''
3079 twoRegMiscInstFp("vclt", "NVcltDFp", ("float",),
3080 2, vcltfpCode, toInt = True)
3081 twoRegMiscInstFp("vclt", "NVcltQFp", ("float",),
3082 4, vcltfpCode, toInt = True)
3083
3084 vswpCode = '''
3085 FloatRegBits mid;
3086 for (unsigned r = 0; r < rCount; r++) {
3087 mid = srcReg1.regs[r];
3088 srcReg1.regs[r] = destReg.regs[r];
3089 destReg.regs[r] = mid;
3090 }
3091 '''
3092 twoRegMiscScramble("vswp", "NVswpD", ("uint64_t",), 2, vswpCode)
3093 twoRegMiscScramble("vswp", "NVswpQ", ("uint64_t",), 4, vswpCode)
3094
3095 vtrnCode = '''
3096 Element mid;
3097 for (unsigned i = 0; i < eCount; i += 2) {
3098 mid = srcReg1.elements[i];
3099 srcReg1.elements[i] = destReg.elements[i + 1];
3100 destReg.elements[i + 1] = mid;
3101 }
3102 '''
3103 twoRegMiscScramble("vtrn", "NVtrnD", unsignedTypes, 2, vtrnCode)
3104 twoRegMiscScramble("vtrn", "NVtrnQ", unsignedTypes, 4, vtrnCode)
3105
3106 vuzpCode = '''
3107 Element mid[eCount];
3108 memcpy(&mid, &srcReg1, sizeof(srcReg1));
3109 for (unsigned i = 0; i < eCount / 2; i++) {
3110 srcReg1.elements[i] = destReg.elements[2 * i + 1];
3111 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3112 destReg.elements[i] = destReg.elements[2 * i];
3113 }
3114 for (unsigned i = 0; i < eCount / 2; i++) {
3115 destReg.elements[eCount / 2 + i] = mid[2 * i];
3116 }
3117 '''
3118 twoRegMiscScramble("vuzp", "NVuzpD", unsignedTypes, 2, vuzpCode)
3119 twoRegMiscScramble("vuzp", "NVuzpQ", unsignedTypes, 4, vuzpCode)
3120
3121 vzipCode = '''
3122 Element mid[eCount];
3123 memcpy(&mid, &destReg, sizeof(destReg));
3124 for (unsigned i = 0; i < eCount / 2; i++) {
3125 destReg.elements[2 * i] = mid[i];
3126 destReg.elements[2 * i + 1] = srcReg1.elements[i];
3127 }
3128 for (int i = 0; i < eCount / 2; i++) {
3129 srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3130 srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3131 }
3132 '''
3133 twoRegMiscScramble("vzip", "NVzipD", unsignedTypes, 2, vzipCode)
3134 twoRegMiscScramble("vzip", "NVzipQ", unsignedTypes, 4, vzipCode)
3135
3136 vmovnCode = 'destElem = srcElem1;'
3137 twoRegNarrowMiscInst("vmovn", "NVmovn", smallUnsignedTypes, vmovnCode)
3138
3139 vdupCode = 'destElem = srcElem1;'
3140 twoRegMiscScInst("vdup", "NVdupD", smallUnsignedTypes, 2, vdupCode)
3141 twoRegMiscScInst("vdup", "NVdupQ", smallUnsignedTypes, 4, vdupCode)
3142
3143 def vdupGprInst(name, Name, types, rCount):
3144 global header_output, exec_output
3145 eWalkCode = '''
3146 RegVect destReg;
3147 for (unsigned i = 0; i < eCount; i++) {
3148 destReg.elements[i] = htog((Element)Op1);
3149 }
3150 '''
3151 for reg in range(rCount):
3152 eWalkCode += '''
3153 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
3154 ''' % { "reg" : reg }
3155 iop = InstObjParams(name, Name,
3156 "RegRegOp",
3157 { "code": eWalkCode,
3158 "r_count": rCount,
3159 "predicate_test": predicateTest }, [])
3160 header_output += NeonRegRegOpDeclare.subst(iop)
3161 exec_output += NeonEqualRegExecute.subst(iop)
3162 for type in types:
3163 substDict = { "targs" : type,
3164 "class_name" : Name }
3165 exec_output += NeonExecDeclare.subst(substDict)
3166 vdupGprInst("vdup", "NVdupDGpr", smallUnsignedTypes, 2)
3167 vdupGprInst("vdup", "NVdupQGpr", smallUnsignedTypes, 4)
3168
3169 vmovCode = 'destElem = imm;'
3170 oneRegImmInst("vmov", "NVmoviD", ("uint64_t",), 2, vmovCode)
3171 oneRegImmInst("vmov", "NVmoviQ", ("uint64_t",), 4, vmovCode)
3172
3173 vorrCode = 'destElem |= imm;'
3174 oneRegImmInst("vorr", "NVorriD", ("uint64_t",), 2, vorrCode, True)
3175 oneRegImmInst("vorr", "NVorriQ", ("uint64_t",), 4, vorrCode, True)
3176
3177 vmvnCode = 'destElem = ~imm;'
3178 oneRegImmInst("vmvn", "NVmvniD", ("uint64_t",), 2, vmvnCode)
3179 oneRegImmInst("vmvn", "NVmvniQ", ("uint64_t",), 4, vmvnCode)
3180
3181 vbicCode = 'destElem &= ~imm;'
3182 oneRegImmInst("vbic", "NVbiciD", ("uint64_t",), 2, vbicCode, True)
3183 oneRegImmInst("vbic", "NVbiciQ", ("uint64_t",), 4, vbicCode, True)
3184
3185 vqmovnCode = '''
3186 FPSCR fpscr = (FPSCR)Fpscr;
3187 destElem = srcElem1;
3188 if ((BigElement)destElem != srcElem1) {
3189 fpscr.qc = 1;
3190 destElem = mask(sizeof(Element) * 8 - 1);
3191 if (srcElem1 < 0)
3192 destElem = ~destElem;
3193 }
3194 Fpscr = fpscr;
3195 '''
3196 twoRegNarrowMiscInst("vqmovn", "NVqmovn", smallSignedTypes, vqmovnCode)
3197
3198 vqmovunCode = '''
3199 FPSCR fpscr = (FPSCR)Fpscr;
3200 destElem = srcElem1;
3201 if ((BigElement)destElem != srcElem1) {
3202 fpscr.qc = 1;
3203 destElem = mask(sizeof(Element) * 8);
3204 }
3205 Fpscr = fpscr;
3206 '''
3207 twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3208 smallUnsignedTypes, vqmovunCode)
3209
3210 vqmovunsCode = '''
3211 FPSCR fpscr = (FPSCR)Fpscr;
3212 destElem = srcElem1;
3213 if (srcElem1 < 0 ||
3214 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3215 fpscr.qc = 1;
3216 destElem = mask(sizeof(Element) * 8);
3217 if (srcElem1 < 0)
3218 destElem = ~destElem;
3219 }
3220 Fpscr = fpscr;
3221 '''
3222 twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3223 smallSignedTypes, vqmovunsCode)
3224
3225 def buildVext(name, Name, types, rCount, op):
3226 global header_output, exec_output
3227 eWalkCode = '''
3228 RegVect srcReg1, srcReg2, destReg;
3229 '''
3230 for reg in range(rCount):
3231 eWalkCode += simdEnabledCheckCode + '''
3232 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
3233 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
3234 ''' % { "reg" : reg }
3235 eWalkCode += op
3236 for reg in range(rCount):
3237 eWalkCode += '''
3238 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
3239 ''' % { "reg" : reg }
3240 iop = InstObjParams(name, Name,
3241 "RegRegRegImmOp",
3242 { "code": eWalkCode,
3243 "r_count": rCount,
3244 "predicate_test": predicateTest }, [])
3245 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3246 exec_output += NeonEqualRegExecute.subst(iop)
3247 for type in types:
3248 substDict = { "targs" : type,
3249 "class_name" : Name }
3250 exec_output += NeonExecDeclare.subst(substDict)
3251
3252 vextCode = '''
3253 for (unsigned i = 0; i < eCount; i++) {
3254 unsigned index = i + imm;
3255 if (index < eCount) {
3256 destReg.elements[i] = srcReg1.elements[index];
3257 } else {
3258 index -= eCount;
3259 assert(index < eCount);
3260 destReg.elements[i] = srcReg2.elements[index];
3261 }
3262 }
3263 '''
3264 buildVext("vext", "NVextD", ("uint8_t",), 2, vextCode)
3265 buildVext("vext", "NVextQ", ("uint8_t",), 4, vextCode)
3266
3267 def buildVtbxl(name, Name, length, isVtbl):
3268 global header_output, decoder_output, exec_output
3269 code = '''
3270 union
3271 {
3272 uint8_t bytes[32];
3273 FloatRegBits regs[8];
3274 } table;
3275
3276 union
3277 {
3278 uint8_t bytes[8];
3279 FloatRegBits regs[2];
3280 } destReg, srcReg2;
3281
3282 const unsigned length = %(length)d;
3283 const bool isVtbl = %(isVtbl)s;
3284
3285 srcReg2.regs[0] = htog(FpOp2P0.uw);
3286 srcReg2.regs[1] = htog(FpOp2P1.uw);
3287
3288 destReg.regs[0] = htog(FpDestP0.uw);
3289 destReg.regs[1] = htog(FpDestP1.uw);
3290 ''' % { "length" : length, "isVtbl" : isVtbl }
3291 for reg in range(8):
3292 if reg < length * 2:
3293 code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);\n' % \
3294 { "reg" : reg }
3295 else:
3296 code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3297 code += '''
3298 for (unsigned i = 0; i < sizeof(destReg); i++) {
3299 uint8_t index = srcReg2.bytes[i];
3300 if (index < 8 * length) {
3301 destReg.bytes[i] = table.bytes[index];
3302 } else {
3303 if (isVtbl)
3304 destReg.bytes[i] = 0;
3305 // else destReg.bytes[i] unchanged
3306 }
3307 }
3308
3309 FpDestP0.uw = gtoh(destReg.regs[0]);
3310 FpDestP1.uw = gtoh(destReg.regs[1]);
3311 '''
3312 iop = InstObjParams(name, Name,
3313 "RegRegRegOp",
3314 { "code": code,
3315 "predicate_test": predicateTest }, [])
3316 header_output += RegRegRegOpDeclare.subst(iop)
3317 decoder_output += RegRegRegOpConstructor.subst(iop)
3318 exec_output += PredOpExecute.subst(iop)
3319
3320 buildVtbxl("vtbl", "NVtbl1", 1, "true")
3321 buildVtbxl("vtbl", "NVtbl2", 2, "true")
3322 buildVtbxl("vtbl", "NVtbl3", 3, "true")
3323 buildVtbxl("vtbl", "NVtbl4", 4, "true")
3324
3325 buildVtbxl("vtbx", "NVtbx1", 1, "false")
3326 buildVtbxl("vtbx", "NVtbx2", 2, "false")
3327 buildVtbxl("vtbx", "NVtbx3", 3, "false")
3328 buildVtbxl("vtbx", "NVtbx4", 4, "false")
3329 }};