1 // Copyright (c) 2017-2019 ARM Limited
4 // The license below extends only to copyright in the software and shall
5 // not be construed as granting a license to any other intellectual
6 // property including but not limited to intellectual property relating
7 // to a hardware implementation of the functionality of the software
8 // licensed hereunder. You may use the software subject to the license
9 // terms below provided that you ensure that this notice is replicated
10 // unmodified and in its entirety in all distributions of the software,
11 // modified or unmodified, in source code or in binary form.
13 // Redistribution and use in source and binary forms, with or without
14 // modification, are permitted provided that the following conditions are
15 // met: redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer;
17 // redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution;
20 // neither the name of the copyright holders nor the names of its
21 // contributors may be used to endorse or promote products derived from
22 // this software without specific prior written permission.
24 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
28 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
30 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
31 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
32 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
33 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
34 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 // Authors: Giacomo Gabrielli
38 // @file Definition of SVE instructions.
42 // Decodes unary, constructive, predicated (merging) SVE instructions,
43 // handling signed and unsigned variants.
44 template <template <typename T> class BaseS,
45 template <typename T> class BaseU>
47 decodeSveUnaryPred(unsigned size, unsigned u, ExtMachInst machInst,
48 IntRegIndex dest, IntRegIndex op1, IntRegIndex gp)
53 return new BaseU<uint8_t>(machInst, dest, op1, gp);
55 return new BaseS<int8_t>(machInst, dest, op1, gp);
59 return new BaseU<uint16_t>(machInst, dest, op1, gp);
61 return new BaseS<int16_t>(machInst, dest, op1, gp);
65 return new BaseU<uint32_t>(machInst, dest, op1, gp);
67 return new BaseS<int32_t>(machInst, dest, op1, gp);
71 return new BaseU<uint64_t>(machInst, dest, op1, gp);
73 return new BaseS<int64_t>(machInst, dest, op1, gp);
76 return new Unknown64(machInst);
80 // Decodes SVE widening reductions.
81 // handling signed and unsigned variants.
82 template <template <typename T1, typename T2> class BaseS,
83 template <typename T1, typename T2> class BaseU>
85 decodeSveWideningReduc(unsigned size, unsigned u, ExtMachInst machInst,
86 IntRegIndex dest, IntRegIndex op1, IntRegIndex gp)
91 return new BaseU<uint8_t, uint64_t>(machInst, dest, op1, gp);
93 return new BaseS<int8_t, int64_t>(machInst, dest, op1, gp);
97 return new BaseU<uint16_t, uint64_t>(machInst, dest, op1, gp);
99 return new BaseS<int16_t, int64_t>(machInst, dest, op1, gp);
103 return new BaseU<uint32_t, uint64_t>(machInst, dest, op1, gp);
105 return new BaseS<int32_t, int64_t>(machInst, dest, op1, gp);
109 return new BaseU<uint64_t, uint64_t>(machInst, dest, op1, gp);
111 return new Unknown64(machInst);
115 // Decodes unary, constructive, predicated (merging) SVE instructions,
116 // handling signed variants only.
117 template <template <typename T> class Base>
119 decodeSveUnaryPredS(unsigned size, ExtMachInst machInst,
120 IntRegIndex dest, IntRegIndex op1, IntRegIndex gp)
124 return new Base<int8_t>(machInst, dest, op1, gp);
126 return new Base<int16_t>(machInst, dest, op1, gp);
128 return new Base<int32_t>(machInst, dest, op1, gp);
130 return new Base<int64_t>(machInst, dest, op1, gp);
132 return new Unknown64(machInst);
136 // Decodes unary, constructive, predicated (merging) SVE instructions,
137 // handling unsigned variants only.
138 template <template <typename T> class Base>
140 decodeSveUnaryPredU(unsigned size, ExtMachInst machInst,
141 IntRegIndex dest, IntRegIndex op1, IntRegIndex gp)
145 return new Base<uint8_t>(machInst, dest, op1, gp);
147 return new Base<uint16_t>(machInst, dest, op1, gp);
149 return new Base<uint32_t>(machInst, dest, op1, gp);
151 return new Base<uint64_t>(machInst, dest, op1, gp);
153 return new Unknown64(machInst);
157 // Decodes unary, constructive, predicated (merging) SVE instructions,
158 // handling signed and unsigned variants, for small element sizes (8- to
160 template <template <typename T> class BaseS,
161 template <typename T> class BaseU>
163 decodeSveUnaryPredSmall(unsigned size, unsigned u, ExtMachInst machInst,
164 IntRegIndex dest, IntRegIndex op1, IntRegIndex gp)
169 return new BaseU<uint8_t>(machInst, dest, op1, gp);
171 return new BaseS<int8_t>(machInst, dest, op1, gp);
175 return new BaseU<uint16_t>(machInst, dest, op1, gp);
177 return new BaseS<int16_t>(machInst, dest, op1, gp);
181 return new BaseU<uint32_t>(machInst, dest, op1, gp);
183 return new BaseS<int32_t>(machInst, dest, op1, gp);
186 return new Unknown64(machInst);
190 // Decodes unary, constructive, predicated (merging) SVE instructions,
191 // handling floating point variants only.
192 template <template <typename T> class Base>
194 decodeSveUnaryPredF(unsigned size, ExtMachInst machInst,
195 IntRegIndex dest, IntRegIndex op1, IntRegIndex gp)
199 return new Base<uint16_t>(machInst, dest, op1, gp);
201 return new Base<uint32_t>(machInst, dest, op1, gp);
203 return new Base<uint64_t>(machInst, dest, op1, gp);
205 return new Unknown64(machInst);
209 // Decodes unary, constructive, unpredicated SVE instructions, handling
210 // unsigned variants only.
211 template <template <typename T> class Base>
213 decodeSveUnaryUnpredU(unsigned size, ExtMachInst machInst,
214 IntRegIndex dest, IntRegIndex op1)
218 return new Base<uint8_t>(machInst, dest, op1);
220 return new Base<uint16_t>(machInst, dest, op1);
222 return new Base<uint32_t>(machInst, dest, op1);
224 return new Base<uint64_t>(machInst, dest, op1);
226 return new Unknown64(machInst);
230 // Decodes unary, constructive, unpredicated SVE instructions, handling
231 // floating-point variants only.
232 template <template <typename T> class Base>
234 decodeSveUnaryUnpredF(unsigned size, ExtMachInst machInst,
235 IntRegIndex dest, IntRegIndex op1)
239 return new Base<uint16_t>(machInst, dest, op1);
241 return new Base<uint32_t>(machInst, dest, op1);
243 return new Base<uint64_t>(machInst, dest, op1);
245 return new Unknown64(machInst);
249 // Decodes binary, destructive, predicated (merging) SVE instructions,
250 // handling signed and unsigned variants.
251 template <template <typename T> class BaseS,
252 template <typename T> class BaseU>
254 decodeSveBinDestrPred(unsigned size, unsigned u, ExtMachInst machInst,
255 IntRegIndex dest, IntRegIndex op2, IntRegIndex gp)
260 return new BaseU<uint8_t>(machInst, dest, op2, gp);
262 return new BaseS<int8_t>(machInst, dest, op2, gp);
266 return new BaseU<uint16_t>(machInst, dest, op2, gp);
268 return new BaseS<int16_t>(machInst, dest, op2, gp);
272 return new BaseU<uint32_t>(machInst, dest, op2, gp);
274 return new BaseS<int32_t>(machInst, dest, op2, gp);
278 return new BaseU<uint64_t>(machInst, dest, op2, gp);
280 return new BaseS<int64_t>(machInst, dest, op2, gp);
283 return new Unknown64(machInst);
287 // Decodes binary with immediate operand, constructive, unpredicated
288 // SVE instructions, handling signed variants only.
289 template <template <typename T> class Base>
291 decodeSveBinImmUnpredS(unsigned size, ExtMachInst machInst,
292 IntRegIndex dest, IntRegIndex op1, unsigned immediate)
296 return new Base<int8_t>(machInst, dest, op1, immediate);
298 return new Base<int16_t>(machInst, dest, op1, immediate);
300 return new Base<int32_t>(machInst, dest, op1, immediate);
302 return new Base<int64_t>(machInst, dest, op1, immediate);
304 return new Unknown64(machInst);
309 // Decodes binary with immediate operand, constructive, unpredicated
310 // SVE instructions, handling unsigned variants only.
311 template <template <typename T> class Base>
313 decodeSveBinImmUnpredU(unsigned size, ExtMachInst machInst,
314 IntRegIndex dest, IntRegIndex op1, unsigned immediate)
318 return new Base<uint8_t>(machInst, dest, op1, immediate);
320 return new Base<uint16_t>(machInst, dest, op1, immediate);
322 return new Base<uint32_t>(machInst, dest, op1, immediate);
324 return new Base<uint64_t>(machInst, dest, op1, immediate);
326 return new Unknown64(machInst);
330 // Decodes binary with immediate operand, destructive, predicated (merging)
331 // SVE instructions, handling unsigned variants only.
332 template <template <typename T> class Base>
334 decodeSveBinImmPredU(unsigned size, ExtMachInst machInst, IntRegIndex dest,
335 unsigned immediate, IntRegIndex gp)
339 return new Base<uint8_t>(machInst, dest, immediate, gp);
341 return new Base<uint16_t>(machInst, dest, immediate, gp);
343 return new Base<uint32_t>(machInst, dest, immediate, gp);
345 return new Base<uint64_t>(machInst, dest, immediate, gp);
347 return new Unknown64(machInst);
351 // Decodes binary with immediate operand, destructive, predicated (merging)
352 // SVE instructions, handling signed variants only.
353 template <template <typename T> class Base>
355 decodeSveBinImmPredS(unsigned size, ExtMachInst machInst, IntRegIndex dest,
356 unsigned immediate, IntRegIndex gp)
360 return new Base<int8_t>(machInst, dest, immediate, gp);
362 return new Base<int16_t>(machInst, dest, immediate, gp);
364 return new Base<int32_t>(machInst, dest, immediate, gp);
366 return new Base<int64_t>(machInst, dest, immediate, gp);
368 return new Unknown64(machInst);
372 // Decodes binary with immediate operand, destructive, predicated (merging)
373 // SVE instructions, handling floating-point variants only.
374 template <template <typename T> class Base>
376 decodeSveBinImmPredF(unsigned size, ExtMachInst machInst, IntRegIndex dest,
377 uint64_t immediate, IntRegIndex gp)
381 return new Base<uint16_t>(machInst, dest, immediate, gp);
383 return new Base<uint32_t>(machInst, dest, immediate, gp);
385 return new Base<uint64_t>(machInst, dest, immediate, gp);
387 return new Unknown64(machInst);
391 // Decodes unary/binary with wide immediate operand, destructive,
392 // unpredicated SVE instructions, handling unsigned variants only.
393 template <template <typename T> class Base>
395 decodeSveWideImmUnpredU(unsigned size, ExtMachInst machInst,
396 IntRegIndex dest, uint64_t immediate)
400 return new Base<uint8_t>(machInst, dest, immediate);
402 return new Base<uint16_t>(machInst, dest, immediate);
404 return new Base<uint32_t>(machInst, dest, immediate);
406 return new Base<uint64_t>(machInst, dest, immediate);
408 return new Unknown64(machInst);
412 // Decodes unary/binary with wide immediate operand, destructive,
413 // unpredicated SVE instructions, handling signed variants only.
414 template <template <typename T> class Base>
416 decodeSveWideImmUnpredS(unsigned size, ExtMachInst machInst,
417 IntRegIndex dest, uint64_t immediate)
421 return new Base<int8_t>(machInst, dest, immediate);
423 return new Base<int16_t>(machInst, dest, immediate);
425 return new Base<int32_t>(machInst, dest, immediate);
427 return new Base<int64_t>(machInst, dest, immediate);
429 return new Unknown64(machInst);
433 // Decodes unary/binary with wide immediate operand, destructive,
434 // unpredicated SVE instructions, handling floating-point variants only.
435 template <template <typename T> class Base>
437 decodeSveWideImmUnpredF(unsigned size, ExtMachInst machInst,
438 IntRegIndex dest, uint64_t immediate)
442 return new Base<uint16_t>(machInst, dest, immediate);
444 return new Base<uint32_t>(machInst, dest, immediate);
446 return new Base<uint64_t>(machInst, dest, immediate);
448 return new Unknown64(machInst);
452 // Decodes unary/binary with wide immediate operand, destructive,
453 // predicated SVE instructions, handling unsigned variants only.
454 template <template <typename T> class Base>
456 decodeSveWideImmPredU(unsigned size, ExtMachInst machInst,
457 IntRegIndex dest, uint64_t immediate, IntRegIndex gp,
458 bool isMerging = true)
462 return new Base<uint8_t>(machInst, dest, immediate, gp,
465 return new Base<uint16_t>(machInst, dest, immediate, gp,
468 return new Base<uint32_t>(machInst, dest, immediate, gp,
471 return new Base<uint64_t>(machInst, dest, immediate, gp,
474 return new Unknown64(machInst);
478 // Decodes unary/binary with wide immediate operand, destructive,
479 // predicated SVE instructions, handling floating-point variants only.
480 template <template <typename T> class Base>
482 decodeSveWideImmPredF(unsigned size, ExtMachInst machInst,
483 IntRegIndex dest, uint64_t immediate, IntRegIndex gp)
487 return new Base<uint16_t>(machInst, dest, immediate, gp);
489 return new Base<uint32_t>(machInst, dest, immediate, gp);
491 return new Base<uint64_t>(machInst, dest, immediate, gp);
493 return new Unknown64(machInst);
497 // Decodes binary, destructive, predicated (merging) SVE instructions,
498 // handling unsigned variants only.
499 template <template <typename T> class Base>
501 decodeSveBinDestrPredU(unsigned size, ExtMachInst machInst,
502 IntRegIndex dest, IntRegIndex op2, IntRegIndex gp)
506 return new Base<uint8_t>(machInst, dest, op2, gp);
508 return new Base<uint16_t>(machInst, dest, op2, gp);
510 return new Base<uint32_t>(machInst, dest, op2, gp);
512 return new Base<uint64_t>(machInst, dest, op2, gp);
514 return new Unknown64(machInst);
518 // Decodes binary, destructive, predicated (merging) SVE instructions,
519 // handling signed variants only.
520 template <template <typename T> class Base>
522 decodeSveBinDestrPredS(unsigned size, ExtMachInst machInst,
523 IntRegIndex dest, IntRegIndex op2, IntRegIndex gp)
527 return new Base<int8_t>(machInst, dest, op2, gp);
529 return new Base<int16_t>(machInst, dest, op2, gp);
531 return new Base<int32_t>(machInst, dest, op2, gp);
533 return new Base<int64_t>(machInst, dest, op2, gp);
535 return new Unknown64(machInst);
539 // Decodes binary, destructive, predicated (merging) SVE instructions,
540 // handling floating-point variants only.
541 template <template <typename T> class Base>
543 decodeSveBinDestrPredF(unsigned size, ExtMachInst machInst,
544 IntRegIndex dest, IntRegIndex op2, IntRegIndex gp)
548 return new Base<uint16_t>(machInst, dest, op2, gp);
550 return new Base<uint32_t>(machInst, dest, op2, gp);
552 return new Base<uint64_t>(machInst, dest, op2, gp);
554 return new Unknown64(machInst);
558 // Decodes binary, constructive, predicated SVE instructions, handling
559 // unsigned variants only.
560 template <template <typename T> class Base>
562 decodeSveBinConstrPredU(unsigned size, ExtMachInst machInst,
563 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2,
564 IntRegIndex gp, SvePredType predType)
568 return new Base<uint8_t>(machInst, dest, op1, op2, gp, predType);
570 return new Base<uint16_t>(machInst, dest, op1, op2, gp, predType);
572 return new Base<uint32_t>(machInst, dest, op1, op2, gp, predType);
574 return new Base<uint64_t>(machInst, dest, op1, op2, gp, predType);
576 return new Unknown64(machInst);
580 // Decodes binary, constructive, unpredicated SVE instructions.
581 template <template <typename T> class Base>
583 decodeSveBinUnpred(unsigned size, unsigned u, ExtMachInst machInst,
584 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
589 return new Base<uint8_t>(machInst, dest, op1, op2);
591 return new Base<int8_t>(machInst, dest, op1, op2);
595 return new Base<uint16_t>(machInst, dest, op1, op2);
597 return new Base<int16_t>(machInst, dest, op1, op2);
601 return new Base<uint32_t>(machInst, dest, op1, op2);
603 return new Base<int32_t>(machInst, dest, op1, op2);
607 return new Base<uint64_t>(machInst, dest, op1, op2);
609 return new Base<int64_t>(machInst, dest, op1, op2);
612 return new Unknown64(machInst);
616 // Decodes binary, constructive, unpredicated SVE instructions.
617 // Unsigned instructions only.
618 template <template <typename T> class Base>
620 decodeSveBinUnpredU(unsigned size, ExtMachInst machInst, IntRegIndex dest,
621 IntRegIndex op1, IntRegIndex op2)
625 return new Base<uint8_t>(machInst, dest, op1, op2);
627 return new Base<uint16_t>(machInst, dest, op1, op2);
629 return new Base<uint32_t>(machInst, dest, op1, op2);
631 return new Base<uint64_t>(machInst, dest, op1, op2);
633 return new Unknown64(machInst);
637 // Decodes binary, constructive, unpredicated SVE instructions.
638 // Signed instructions only.
639 template <template <typename T> class Base>
641 decodeSveBinUnpredS(unsigned size, ExtMachInst machInst, IntRegIndex dest,
642 IntRegIndex op1, IntRegIndex op2)
646 return new Base<int8_t>(machInst, dest, op1, op2);
648 return new Base<int16_t>(machInst, dest, op1, op2);
650 return new Base<int32_t>(machInst, dest, op1, op2);
652 return new Base<int64_t>(machInst, dest, op1, op2);
654 return new Unknown64(machInst);
658 // Decodes binary, costructive, unpredicated SVE instructions, handling
659 // floating-point variants only.
660 template <template <typename T> class Base>
662 decodeSveBinUnpredF(unsigned size, ExtMachInst machInst, IntRegIndex dest,
663 IntRegIndex op1, IntRegIndex op2)
667 return new Base<uint16_t>(machInst, dest, op1, op2);
669 return new Base<uint32_t>(machInst, dest, op1, op2);
671 return new Base<uint64_t>(machInst, dest, op1, op2);
673 return new Unknown64(machInst);
677 // Decodes SVE compare instructions - binary, predicated (zeroing),
678 // generating a predicate - handling floating-point variants only.
679 template <template <typename T> class Base>
681 decodeSveCmpF(unsigned size, ExtMachInst machInst,
682 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2,
687 return new Base<uint16_t>(machInst, dest, op1, op2, gp);
689 return new Base<uint32_t>(machInst, dest, op1, op2, gp);
691 return new Base<uint64_t>(machInst, dest, op1, op2, gp);
693 return new Unknown64(machInst);
697 // Decodes SVE compare-with-immediate instructions - binary, predicated
698 // (zeroing), generating a predicate - handling floating-point variants
700 template <template <typename T> class Base>
702 decodeSveCmpImmF(unsigned size, ExtMachInst machInst,
703 IntRegIndex dest, IntRegIndex op1, uint64_t imm,
708 return new Base<uint16_t>(machInst, dest, op1, imm, gp);
710 return new Base<uint32_t>(machInst, dest, op1, imm, gp);
712 return new Base<uint64_t>(machInst, dest, op1, imm, gp);
714 return new Unknown64(machInst);
718 // Decodes ternary, destructive, predicated (merging) SVE instructions.
719 template <template <typename T> class Base>
721 decodeSveTerPred(unsigned size, unsigned u, ExtMachInst machInst,
722 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2,
728 return new Base<uint8_t>(machInst, dest, op1, op2, gp);
730 return new Base<int8_t>(machInst, dest, op1, op2, gp);
734 return new Base<uint16_t>(machInst, dest, op1, op2, gp);
736 return new Base<int16_t>(machInst, dest, op1, op2, gp);
740 return new Base<uint32_t>(machInst, dest, op1, op2, gp);
742 return new Base<int32_t>(machInst, dest, op1, op2, gp);
746 return new Base<uint64_t>(machInst, dest, op1, op2, gp);
748 return new Base<int64_t>(machInst, dest, op1, op2, gp);
751 return new Unknown64(machInst);
755 // Decodes ternary, destructive, predicated (merging) SVE instructions,
756 // handling wide signed variants only. XXX: zeroing for CMP instructions.
757 template <template <typename T> class Base>
759 decodeSveTerPredWS(unsigned size, ExtMachInst machInst,
760 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2,
765 return new Base<int8_t>(machInst, dest, op1, op2, gp);
767 return new Base<int16_t>(machInst, dest, op1, op2, gp);
769 return new Base<int32_t>(machInst, dest, op1, op2, gp);
771 return new Unknown64(machInst);
775 // Decodes ternary, destructive, predicated (merging) SVE instructions,
776 // handling wide unsigned variants only. XXX: zeroing for CMP instructions.
777 template <template <typename T> class Base>
779 decodeSveTerPredWU(unsigned size, ExtMachInst machInst,
780 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2,
785 return new Base<uint8_t>(machInst, dest, op1, op2, gp);
787 return new Base<uint16_t>(machInst, dest, op1, op2, gp);
789 return new Base<uint32_t>(machInst, dest, op1, op2, gp);
791 return new Unknown64(machInst);
795 // Decodes ternary, destructive, predicated (merging) SVE instructions,
796 // handling signed variants only. XXX: zeroing for CMP instructions.
797 template <template <typename T> class Base>
799 decodeSveTerPredS(unsigned size, ExtMachInst machInst,
800 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2,
805 return new Base<int8_t>(machInst, dest, op1, op2, gp);
807 return new Base<int16_t>(machInst, dest, op1, op2, gp);
809 return new Base<int32_t>(machInst, dest, op1, op2, gp);
811 return new Base<int64_t>(machInst, dest, op1, op2, gp);
813 return new Unknown64(machInst);
817 // Decodes ternary, destructive, predicated (merging) SVE instructions,
818 // handling unsigned variants only. XXX: zeroing for CMP instructions.
819 template <template <typename T> class Base>
821 decodeSveTerPredU(unsigned size, ExtMachInst machInst,
822 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2,
827 return new Base<uint8_t>(machInst, dest, op1, op2, gp);
829 return new Base<uint16_t>(machInst, dest, op1, op2, gp);
831 return new Base<uint32_t>(machInst, dest, op1, op2, gp);
833 return new Base<uint64_t>(machInst, dest, op1, op2, gp);
835 return new Unknown64(machInst);
839 // Decodes SVE signed unary extension instructions (8-bit source element
841 template <template <typename TS, typename TD> class Base>
843 decodeSveUnaryExtendFromBPredS(unsigned dsize, ExtMachInst machInst,
844 IntRegIndex dest, IntRegIndex op1,
849 return new Base<int8_t, int16_t>(machInst, dest, op1, gp);
851 return new Base<int8_t, int32_t>(machInst, dest, op1, gp);
853 return new Base<int8_t, int64_t>(machInst, dest, op1, gp);
855 return new Unknown64(machInst);
858 // Decodes SVE unsigned unary extension instructions (8-bit source element
860 template <template <typename TS, typename TD> class Base>
862 decodeSveUnaryExtendFromBPredU(unsigned dsize, ExtMachInst machInst,
863 IntRegIndex dest, IntRegIndex op1,
868 return new Base<uint8_t, uint16_t>(machInst, dest, op1, gp);
870 return new Base<uint8_t, uint32_t>(machInst, dest, op1, gp);
872 return new Base<uint8_t, uint64_t>(machInst, dest, op1, gp);
874 return new Unknown64(machInst);
877 // Decodes SVE signed unary extension instructions (16-bit source element
879 template <template <typename TS, typename TD> class Base>
881 decodeSveUnaryExtendFromHPredS(unsigned dsize, ExtMachInst machInst,
882 IntRegIndex dest, IntRegIndex op1,
887 return new Base<int16_t, int32_t>(machInst, dest, op1, gp);
889 return new Base<int16_t, int64_t>(machInst, dest, op1, gp);
891 return new Unknown64(machInst);
894 // Decodes SVE unsigned unary extension instructions (16-bit source element
896 template <template <typename TS, typename TD> class Base>
898 decodeSveUnaryExtendFromHPredU(unsigned dsize, ExtMachInst machInst,
899 IntRegIndex dest, IntRegIndex op1,
904 return new Base<uint16_t, uint32_t>(machInst, dest, op1, gp);
906 return new Base<uint16_t, uint64_t>(machInst, dest, op1, gp);
908 return new Unknown64(machInst);
911 // Decodes ternary, destructive, predicated (merging) SVE instructions,
912 // handling floating-point variants only.
913 template <template <typename T> class Base>
915 decodeSveTerPredF(unsigned size, ExtMachInst machInst,
916 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2,
921 return new Base<uint16_t>(machInst, dest, op1, op2, gp);
923 return new Base<uint32_t>(machInst, dest, op1, op2, gp);
925 return new Base<uint64_t>(machInst, dest, op1, op2, gp);
927 return new Unknown64(machInst);
931 // Decodes ternary with immediate operand, destructive, unpredicated SVE
932 // instructions handling floating-point variants only.
933 template <template <typename T> class Base>
935 decodeSveTerImmUnpredF(unsigned size, ExtMachInst machInst,
936 IntRegIndex dest, IntRegIndex op2, uint8_t imm)
940 return new Base<uint16_t>(machInst, dest, op2, imm);
942 return new Base<uint32_t>(machInst, dest, op2, imm);
944 return new Base<uint64_t>(machInst, dest, op2, imm);
946 return new Unknown64(machInst);
950 // Decodes SVE PTRUE(S) instructions.
951 template <template <typename T> class Base>
953 decodeSvePtrue(unsigned size, ExtMachInst machInst,
954 IntRegIndex dest, uint8_t imm)
958 return new Base<uint8_t>(machInst, dest, imm);
960 return new Base<uint16_t>(machInst, dest, imm);
962 return new Base<uint32_t>(machInst, dest, imm);
964 return new Base<uint64_t>(machInst, dest, imm);
966 return new Unknown64(machInst);
970 // Decodes SVE predicate count instructions, scalar signed variant only
971 template <template <typename T> class Base>
973 decodeSvePredCountS(unsigned size, ExtMachInst machInst,
974 IntRegIndex dest, IntRegIndex op1)
978 return new Base<int8_t>(machInst, dest, op1);
980 return new Base<int16_t>(machInst, dest, op1);
982 return new Base<int32_t>(machInst, dest, op1);
984 return new Base<int64_t>(machInst, dest, op1);
986 return new Unknown64(machInst);
990 // Decodes SVE predicate count instructions, scalar unsigned variant only
991 template <template <typename T> class Base>
993 decodeSvePredCountU(unsigned size, ExtMachInst machInst,
994 IntRegIndex dest, IntRegIndex op1)
998 return new Base<uint8_t>(machInst, dest, op1);
1000 return new Base<uint16_t>(machInst, dest, op1);
1002 return new Base<uint32_t>(machInst, dest, op1);
1004 return new Base<uint64_t>(machInst, dest, op1);
1006 return new Unknown64(machInst);
1010 // Decodes SVE predicate count instructions, vector signed variant only
1011 template <template <typename T> class Base>
1013 decodeSvePredCountVS(unsigned size, ExtMachInst machInst,
1014 IntRegIndex dest, IntRegIndex op1)
1018 return new Base<int16_t>(machInst, dest, op1);
1020 return new Base<int32_t>(machInst, dest, op1);
1022 return new Base<int64_t>(machInst, dest, op1);
1024 return new Unknown64(machInst);
1028 // Decodes SVE predicate count instructions, vector unsigned variant only
1029 template <template <typename T> class Base>
1031 decodeSvePredCountVU(unsigned size, ExtMachInst machInst,
1032 IntRegIndex dest, IntRegIndex op1)
1036 return new Base<uint16_t>(machInst, dest, op1);
1038 return new Base<uint32_t>(machInst, dest, op1);
1040 return new Base<uint64_t>(machInst, dest, op1);
1042 return new Unknown64(machInst);
1046 // Decodes ternary with immediate operand, predicated SVE
1047 // instructions handling unsigned variants only.
1048 template <template <typename T> class Base>
1050 decodeSveTerImmPredU(unsigned size, ExtMachInst machInst,
1051 IntRegIndex dest, IntRegIndex op1, int64_t imm, IntRegIndex gp)
1055 return new Base<uint8_t>(machInst, dest, op1, imm, gp);
1057 return new Base<uint16_t>(machInst, dest, op1, imm, gp);
1059 return new Base<uint32_t>(machInst, dest, op1, imm, gp);
1061 return new Base<uint64_t>(machInst, dest, op1, imm, gp);
1063 return new Unknown64(machInst);
1067 // Decodes ternary with immediate operand, predicated SVE
1068 // instructions handling signed variants only.
1069 template <template <typename T> class Base>
1071 decodeSveTerImmPredS(unsigned size, ExtMachInst machInst,
1072 IntRegIndex dest, IntRegIndex op1, int64_t imm, IntRegIndex gp)
1076 return new Base<int8_t>(machInst, dest, op1, imm, gp);
1078 return new Base<int16_t>(machInst, dest, op1, imm, gp);
1080 return new Base<int32_t>(machInst, dest, op1, imm, gp);
1082 return new Base<int64_t>(machInst, dest, op1, imm, gp);
1084 return new Unknown64(machInst);
1088 // Decodes integer element count SVE instructions, handling
1089 // signed variants only.
1090 template <template <typename T> class Base>
1092 decodeSveElemIntCountS(unsigned size, ExtMachInst machInst,
1093 IntRegIndex dest, uint8_t pattern, uint8_t imm4)
1097 return new Base<int8_t>(machInst, dest, pattern, imm4);
1099 return new Base<int16_t>(machInst, dest, pattern, imm4);
1101 return new Base<int32_t>(machInst, dest, pattern, imm4);
1103 return new Base<int64_t>(machInst, dest, pattern, imm4);
1105 return new Unknown64(machInst);
1109 // Decodes integer element count SVE instructions, handling
1110 // unsigned variants only.
1111 template <template <typename T> class Base>
1113 decodeSveElemIntCountU(unsigned size, ExtMachInst machInst,
1114 IntRegIndex dest, uint8_t pattern, uint8_t imm4)
1118 return new Base<uint8_t>(machInst, dest, pattern, imm4);
1120 return new Base<uint16_t>(machInst, dest, pattern, imm4);
1122 return new Base<uint32_t>(machInst, dest, pattern, imm4);
1124 return new Base<uint64_t>(machInst, dest, pattern, imm4);
1126 return new Unknown64(machInst);
1130 // Decodes integer element count SVE instructions, handling
1131 // signed variants from 16 to 64 bits only.
1132 template <template <typename T> class Base>
1134 decodeSveElemIntCountLS(unsigned size, ExtMachInst machInst,
1135 IntRegIndex dest, uint8_t pattern, uint8_t imm4)
1139 return new Base<int16_t>(machInst, dest, pattern, imm4);
1141 return new Base<int32_t>(machInst, dest, pattern, imm4);
1143 return new Base<int64_t>(machInst, dest, pattern, imm4);
1145 return new Unknown64(machInst);
1149 // Decodes integer element count SVE instructions, handling
1150 // unsigned variants from 16 to 64 bits only.
1151 template <template <typename T> class Base>
1153 decodeSveElemIntCountLU(unsigned size, ExtMachInst machInst,
1154 IntRegIndex dest, uint8_t pattern, uint8_t imm4)
1158 return new Base<uint16_t>(machInst, dest, pattern, imm4);
1160 return new Base<uint32_t>(machInst, dest, pattern, imm4);
1162 return new Base<uint64_t>(machInst, dest, pattern, imm4);
1164 return new Unknown64(machInst);
1168 // Decodes SVE unpack instructions. Handling signed variants.
1169 template <template <typename T1, typename T2> class Base>
1171 decodeSveUnpackS(unsigned size, ExtMachInst machInst,
1172 IntRegIndex dest, IntRegIndex op1)
1176 return new Base<int8_t, int16_t>(machInst, dest, op1);
1178 return new Base<int16_t, int32_t>(machInst, dest, op1);
1180 return new Base<int32_t, int64_t>(machInst, dest, op1);
1182 return new Unknown64(machInst);
1186 // Decodes SVE unpack instructions. Handling unsigned variants.
1187 template <template <typename T1, typename T2> class Base>
1189 decodeSveUnpackU(unsigned size, ExtMachInst machInst,
1190 IntRegIndex dest, IntRegIndex op1)
1194 return new Base<uint8_t, uint16_t>(machInst, dest, op1);
1196 return new Base<uint16_t, uint32_t>(machInst, dest, op1);
1198 return new Base<uint32_t, uint64_t>(machInst, dest, op1);
1200 return new Unknown64(machInst);
1209 decoders = { 'Generic': {} }
1221 class IndexFormat(object):
1227 class SrcRegType(object):
1233 class DstRegType(object):
1239 class DestType(object):
1243 class SrcSize(object):
1247 class Break(object):
1251 class Unpack(object):
1255 # Generates definitions for SVE ADR instructions
1256 def sveAdrInst(name, Name, opClass, types, op):
1257 global header_output, exec_output, decoders
1258 code = sveEnabledCheckCode + '''
1259 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1261 for (unsigned i = 0; i < eCount; i++) {
1262 const Element& srcElem1 = AA64FpOp1_x[i];
1263 Element srcElem2 = AA64FpOp2_x[i];
1264 Element destElem = 0;
1266 AA64FpDest_x[i] = destElem;
1268 iop = InstObjParams(name, 'Sve' + Name, 'SveAdrOp',
1269 {'code': code, 'op_class': opClass}, [])
1270 header_output += SveAdrOpDeclare.subst(iop)
1271 exec_output += SveOpExecute.subst(iop)
1273 substDict = {'targs' : type,
1274 'class_name' : 'Sve' + Name}
1275 exec_output += SveOpExecDeclare.subst(substDict)
1277 # Generates definition for SVE while predicate generation instructions
1278 def sveWhileInst(name, Name, opClass, types, op,
1279 srcSize = SrcSize.Src64bit):
1280 global header_output, exec_output, decoders
1281 extraPrologCode = '''
1282 auto& destPred = PDest;'''
1283 if 'int32_t' in types:
1284 srcType = 'int64_t' if srcSize == SrcSize.Src64bit else 'int32_t'
1286 srcType = 'uint64_t' if srcSize == SrcSize.Src64bit else 'uint32_t'
1287 code = sveEnabledCheckCode + '''
1288 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1290 %(stype)s srcElem1 = static_cast<%(stype)s>(XOp1);
1291 %(stype)s srcElem2 = static_cast<%(stype)s>(XOp2);
1292 bool cond, first = false, none = true, last = true;
1294 for (unsigned i = 0; i < eCount; i++) {
1296 last = last && cond;
1297 none = none && !cond;
1298 first = first || (i == 0 && cond);
1302 CondCodesNZ = (first << 1) | none;
1305 '''%{'op': op, 'stype': srcType}
1306 iop = InstObjParams(name, 'Sve' + Name, 'SveWhileOp',
1307 {'code': code, 'op_class': opClass, 'srcIs32b': srcSize}, [])
1308 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
1309 header_output += SveWhileOpDeclare.subst(iop)
1310 exec_output += SveOpExecute.subst(iop)
1312 substDict = {'targs' : type, 'class_name' : 'Sve' + Name}
1313 exec_output += SveOpExecDeclare.subst(substDict);
1315 # Generate definition for SVE compare & terminate instructions
1316 def sveCompTermInst(name, Name, opClass, types, op):
1317 global header_output, exec_output, decoders
1318 code = sveEnabledCheckCode + '''
1320 Element srcElem1 = static_cast<Element>(XOp1);
1321 Element srcElem2 = static_cast<Element>(XOp2);
1324 CondCodesNZ = CondCodesNZ | 0x2;
1327 CondCodesNZ = CondCodesNZ & ~0x2;
1328 CondCodesV = !CondCodesC;
1331 iop = InstObjParams(name, 'Sve' + Name, 'SveCompTermOp',
1332 {'code': code, 'op_class': opClass}, [])
1333 header_output += SveCompTermOpDeclare.subst(iop)
1334 exec_output += SveOpExecute.subst(iop)
1336 substDict = {'targs' : type, 'class_name': 'Sve' + Name}
1337 exec_output += SveOpExecDeclare.subst(substDict);
1339 # Generates definition for SVE predicate count instructions
1340 def svePredCountInst(name, Name, opClass, types, op,
1341 destType=DestType.Vector,
1342 srcSize=SrcSize.Src64bit):
1343 global header_output, exec_output, decoders
1344 assert not (destType == DestType.Vector and
1345 srcSize != SrcSize.Src64bit)
1346 code = sveEnabledCheckCode + '''
1347 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1350 for (unsigned i = 0; i < eCount; i++) {
1355 if destType == DestType.Vector:
1357 for (unsigned i = 0; i < eCount; i++) {
1358 Element destElem = 0;
1359 const Element& srcElem = AA64FpDestMerge_x[i];
1361 AA64FpDest_x[i] = destElem;
1365 %(op)s''' % {'op': op}
1366 iop = InstObjParams(name, 'Sve' + Name, 'SvePredCountOp',
1367 {'code': code, 'op_class': opClass, 'srcIs32b': srcSize,
1368 'destIsVec': destType}, [])
1369 header_output += SvePredCountOpDeclare.subst(iop)
1370 exec_output += SveOpExecute.subst(iop)
1372 substDict = {'targs' : type, 'class_name' : 'Sve' + Name}
1373 exec_output += SveOpExecDeclare.subst(substDict);
1375 # Generates definition for SVE predicate count instructions (predicated)
1376 def svePredCountPredInst(name, Name, opClass, types):
1377 global header_output, exec_output, decoders
1378 code = sveEnabledCheckCode + '''
1379 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1382 for (unsigned i = 0; i < eCount; i++) {
1383 if (POp1_x[i] && GpOp_x[i]) {
1389 iop = InstObjParams(name, 'Sve' + Name, 'SvePredCountPredOp',
1390 {'code': code, 'op_class': opClass}, [])
1391 header_output += SvePredCountPredOpDeclare.subst(iop)
1392 exec_output += SveOpExecute.subst(iop)
1394 substDict = {'targs' : type, 'class_name' : 'Sve' + Name}
1395 exec_output += SveOpExecDeclare.subst(substDict)
1397 # Generates definition for SVE Index generation instructions
1399 global header_output, exec_output, decoders
1400 code = sveEnabledCheckCode + '''
1401 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1403 if fmt == IndexFormat.ImmReg or fmt == IndexFormat.ImmImm:
1405 const Element& srcElem1 = imm1;'''
1406 if fmt == IndexFormat.RegImm or fmt == IndexFormat.RegReg:
1408 const Element& srcElem1 = XOp1;'''
1409 if fmt == IndexFormat.RegImm or fmt == IndexFormat.ImmImm:
1411 const Element& srcElem2 = imm2;'''
1412 if fmt == IndexFormat.ImmReg or fmt == IndexFormat.RegReg:
1414 const Element& srcElem2 = XOp2;'''
1416 for (unsigned i = 0; i < eCount; i++) {
1417 AA64FpDest_x[i] = srcElem1 + i * srcElem2;
1419 iop = InstObjParams('index', 'SveIndex'+fmt, 'SveIndex'+fmt+'Op',
1420 {'code': code, 'op_class': 'SimdAluOp'})
1421 if fmt == IndexFormat.ImmImm:
1422 header_output += SveIndexIIOpDeclare.subst(iop)
1423 elif fmt == IndexFormat.ImmReg:
1424 header_output += SveIndexIROpDeclare.subst(iop)
1425 elif fmt == IndexFormat.RegImm:
1426 header_output += SveIndexRIOpDeclare.subst(iop)
1427 elif fmt == IndexFormat.RegReg:
1428 header_output += SveIndexRROpDeclare.subst(iop)
1429 exec_output += SveOpExecute.subst(iop)
1430 for type in ['int8_t', 'int16_t', 'int32_t', 'int64_t']:
1431 substDict = {'targs': type, 'class_name': 'SveIndex'+fmt}
1432 exec_output += SveOpExecDeclare.subst(substDict)
1434 # Generates definitions for widening unary SVE instructions
1435 # (always constructive)
1436 def sveWidenUnaryInst(name, Name, opClass, types, op,
1437 predType=PredType.NONE, decoder='Generic'):
1438 global header_output, exec_output, decoders
1439 code = sveEnabledCheckCode + '''
1440 unsigned eCount = ArmStaticInst::getCurSveVecLen<DElement>(
1442 for (unsigned i = 0; i < eCount; i++) {
1443 SElement srcElem1 = AA64FpOp1_xd[i];
1444 DElement destElem = 0;'''
1445 if predType != PredType.NONE:
1450 destElem = %(dest_elem)s;
1452 'dest_elem': 'AA64FpDestMerge_xd[i]'
1453 if predType == PredType.MERGE
1457 %(op)s''' % {'op': op}
1459 AA64FpDest_xd[i] = destElem;
1461 iop = InstObjParams(name, 'Sve' + Name,
1462 'SveUnaryPredOp' if predType != PredType.NONE
1463 else 'SveUnaryUnpredOp',
1464 {'code': code, 'op_class': opClass}, [])
1465 if predType != PredType.NONE:
1466 header_output += SveWideningUnaryPredOpDeclare.subst(iop)
1468 header_output += SveWideningUnaryUnpredOpDeclare.subst(iop)
1469 exec_output += SveWideningOpExecute.subst(iop)
1471 substDict = {'targs' : type,
1472 'class_name' : 'Sve' + Name}
1473 exec_output += SveOpExecDeclare.subst(substDict)
1475 # Generates definitions for unary SVE instructions (always constructive)
1476 def sveUnaryInst(name, Name, opClass, types, op, predType=PredType.NONE,
1477 srcRegType=SrcRegType.Vector, decoder='Generic'):
1478 global header_output, exec_output, decoders
1479 op1 = ('AA64FpOp1_x[i]' if srcRegType == SrcRegType.Vector
1480 else 'XOp1' if srcRegType == SrcRegType.Scalar
1481 else 'AA64FpOp1_x[0]')
1482 code = sveEnabledCheckCode + '''
1483 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1485 for (unsigned i = 0; i < eCount; i++) {
1486 Element srcElem1 = %s;
1487 Element destElem = 0;''' % op1
1488 if predType != PredType.NONE:
1493 destElem = %(dest_elem)s;
1495 'dest_elem': 'AA64FpDestMerge_x[i]'
1496 if predType == PredType.MERGE
1500 %(op)s''' % {'op': op}
1502 AA64FpDest_x[i] = destElem;
1504 iop = InstObjParams(name, 'Sve' + Name,
1505 'SveUnaryPredOp' if predType != PredType.NONE
1506 else 'SveUnaryUnpredOp',
1507 {'code': code, 'op_class': opClass}, [])
1508 if predType != PredType.NONE:
1509 header_output += SveUnaryPredOpDeclare.subst(iop)
1511 header_output += SveUnaryUnpredOpDeclare.subst(iop)
1512 exec_output += SveOpExecute.subst(iop)
1514 substDict = {'targs' : type,
1515 'class_name' : 'Sve' + Name}
1516 exec_output += SveOpExecDeclare.subst(substDict)
1518 # Generates definitions for SVE floating-point conversions (always
1519 # unary, constructive, merging
1520 def sveCvtInst(name, Name, opClass, types, op, direction=CvtDir.Narrow,
1522 global header_output, exec_output, decoders
1523 code = sveEnabledCheckCode + '''
1524 unsigned eCount = ArmStaticInst::getCurSveVecLen<%(bigElemType)s>(
1526 for (unsigned i = 0; i < eCount; i++) {
1527 SElement srcElem1 = AA64FpOp1_x%(bigElemSuffix)s[i] &
1528 mask(sizeof(SElement) * 8);
1529 DElement destElem = 0;
1530 if (GpOp_x%(bigElemSuffix)s[i]) {
1532 AA64FpDest_x%(bigElemSuffix)s[i] = destElem;
1534 AA64FpDest_x%(bigElemSuffix)s[i] =
1535 AA64FpDestMerge_x%(bigElemSuffix)s[i];
1539 'bigElemType': 'SElement' if direction == CvtDir.Narrow
1541 'bigElemSuffix': 's' if direction == CvtDir.Narrow else 'd'}
1542 iop = InstObjParams(name, 'Sve' + Name, 'SveUnaryPredOp',
1543 {'code': code, 'op_class': opClass}, [])
1544 header_output += SveWideningUnaryPredOpDeclare.subst(iop)
1545 exec_output += SveWideningOpExecute.subst(iop)
1547 substDict = {'targs' : type,
1548 'class_name' : 'Sve' + Name}
1549 exec_output += SveOpExecDeclare.subst(substDict)
1551 # Generates definitions for associative SVE reductions
1552 def sveAssocReducInst(name, Name, opClass, types, op, identity,
1554 global header_output, exec_output, decoders
1555 code = sveEnabledCheckCode + '''
1556 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1558 ArmISA::VecRegContainer tmpVecC;
1559 auto auxOp1 = tmpVecC.as<Element>();
1560 for (unsigned i = 0; i < eCount; ++i) {
1561 auxOp1[i] = AA64FpOp1_x[i];
1563 Element destElem = %(identity)s;
1564 for (unsigned i = 0; i < eCount; i++) {
1565 AA64FpDest_x[i] = 0; // zero upper part
1567 const Element& srcElem1 = auxOp1[i];
1571 AA64FpDest_x[0] = destElem;
1572 ''' % {'op': op, 'identity': identity}
1573 iop = InstObjParams(name, 'Sve' + Name, 'SveReducOp',
1574 {'code': code, 'op_class': opClass}, [])
1575 header_output += SveReducOpDeclare.subst(iop)
1576 exec_output += SveOpExecute.subst(iop)
1578 substDict = {'targs' : type,
1579 'class_name' : 'Sve' + Name}
1580 exec_output += SveOpExecDeclare.subst(substDict)
1582 # Generates definitions for widening associative SVE reductions
1583 def sveWideningAssocReducInst(name, Name, opClass, types, op, identity,
1585 global header_output, exec_output, decoders
1586 code = sveEnabledCheckCode + '''
1587 unsigned eCount = ArmStaticInst::getCurSveVecLen<SElement>(
1589 unsigned eWideCount = ArmStaticInst::getCurSveVecLen<DElement>(
1591 DElement destElem = %(identity)s;
1592 for (unsigned i = 0; i < eCount; i++) {
1594 DElement srcElem1 = AA64FpOp1_xs[i];
1598 AA64FpDest_xd[0] = destElem;
1599 for (int i = 1; i < eWideCount; i++) {
1600 AA64FpDest_xd[i] = 0;
1602 ''' % {'op': op, 'identity': identity}
1603 iop = InstObjParams(name, 'Sve' + Name, 'SveReducOp',
1604 {'code': code, 'op_class': opClass}, [])
1605 header_output += SveWideningReducOpDeclare.subst(iop)
1606 exec_output += SveWideningOpExecute.subst(iop)
1608 substDict = {'targs' : type,
1609 'class_name' : 'Sve' + Name}
1610 exec_output += SveOpExecDeclare.subst(substDict)
1612 # Generates definitions for non-associative SVE reductions
1613 def sveNonAssocReducInst(name, Name, opClass, types, op, identity,
1615 global header_output, exec_output, decoders
1616 code = sveEnabledCheckCode + '''
1617 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1619 ArmISA::VecRegContainer tmpVecC;
1620 auto tmpVec = tmpVecC.as<Element>();
1622 while (ePow2Count < eCount) {
1626 for (unsigned i = 0; i < ePow2Count; i++) {
1627 if (i < eCount && GpOp_x[i]) {
1628 tmpVec[i] = AA64FpOp1_x[i];
1630 tmpVec[i] = %(identity)s;
1634 unsigned n = ePow2Count;
1638 for (unsigned i = 0; i < max; i += 2) {
1639 Element srcElem1 = tmpVec[i];
1640 Element srcElem2 = tmpVec[i + 1];
1641 Element destElem = 0;
1643 tmpVec[n] = destElem;
1647 AA64FpDest_x[0] = tmpVec[0];
1648 for (unsigned i = 1; i < eCount; i++) {
1649 AA64FpDest_x[i] = 0; // zero upper part
1651 ''' % {'op': op, 'identity': identity}
1652 iop = InstObjParams(name, 'Sve' + Name, 'SveReducOp',
1653 {'code': code, 'op_class': opClass}, [])
1654 header_output += SveReducOpDeclare.subst(iop)
1655 exec_output += SveOpExecute.subst(iop)
1657 substDict = {'targs' : type,
1658 'class_name' : 'Sve' + Name}
1659 exec_output += SveOpExecDeclare.subst(substDict)
1661 # Generates definitions for binary SVE instructions with immediate operand
1662 def sveBinImmInst(name, Name, opClass, types, op, predType=PredType.NONE,
1664 global header_output, exec_output, decoders
1665 code = sveEnabledCheckCode + '''
1666 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1668 for (unsigned i = 0; i < eCount; i++) {'''
1669 if predType != PredType.NONE:
1671 const Element& srcElem1 = %s;''' % (
1672 'AA64FpDestMerge_x[i]' if predType == PredType.MERGE else '0')
1675 const Element& srcElem1 = AA64FpOp1_x[i];'''
1677 Element srcElem2 = imm;
1678 Element destElem = 0;'''
1679 if predType != PredType.NONE:
1684 destElem = %(dest_elem)s;
1686 'dest_elem': 'AA64FpDestMerge_x[i]'
1687 if predType == PredType.MERGE else '0'}
1690 %(op)s''' % {'op': op}
1692 AA64FpDest_x[i] = destElem;
1694 iop = InstObjParams(name, 'Sve' + Name,
1695 'SveBinImmPredOp' if predType != PredType.NONE
1696 else 'SveBinImmUnpredConstrOp',
1697 {'code': code, 'op_class': opClass}, [])
1698 if predType != PredType.NONE:
1699 header_output += SveBinImmPredOpDeclare.subst(iop)
1701 header_output += SveBinImmUnpredOpDeclare.subst(iop)
1702 exec_output += SveOpExecute.subst(iop)
1704 substDict = {'targs' : type,
1705 'class_name' : 'Sve' + Name}
1706 exec_output += SveOpExecDeclare.subst(substDict)
1708 # Generates definitions for unary and binary SVE instructions with wide
1710 def sveWideImmInst(name, Name, opClass, types, op, predType=PredType.NONE,
1711 isUnary=False, decoder='Generic'):
1712 global header_output, exec_output, decoders
1713 code = sveEnabledCheckCode + '''
1714 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1716 for (unsigned i = 0; i < eCount; i++) {'''
1717 # TODO: handle unsigned-to-signed conversion properly...
1720 Element srcElem1 = imm;'''
1723 const Element& srcElem1 = AA64FpDestMerge_x[i];
1724 Element srcElem2 = imm;'''
1726 Element destElem = 0;'''
1727 if predType != PredType.NONE:
1732 destElem = %(dest_elem)s;
1734 'dest_elem': 'AA64FpDestMerge_x[i]'
1735 if predType == PredType.MERGE else '0'}
1738 %(op)s''' % {'op': op}
1740 AA64FpDest_x[i] = destElem;
1742 iop = InstObjParams(name, 'Sve' + Name,
1743 'Sve%sWideImm%sOp' % (
1744 'Unary' if isUnary else 'Bin',
1745 'Unpred' if predType == PredType.NONE else 'Pred'),
1746 {'code': code, 'op_class': opClass}, [])
1747 if predType == PredType.NONE:
1748 header_output += SveWideImmUnpredOpDeclare.subst(iop)
1750 header_output += SveWideImmPredOpDeclare.subst(iop)
1751 exec_output += SveOpExecute.subst(iop)
1753 substDict = {'targs' : type,
1754 'class_name' : 'Sve' + Name}
1755 exec_output += SveOpExecDeclare.subst(substDict)
1757 # Generates definitions for shift SVE instructions with wide elements
1758 def sveShiftByWideElemsInst(name, Name, opClass, types, op,
1759 predType=PredType.NONE, decoder='Generic'):
1760 global header_output, exec_output, decoders
1761 code = sveEnabledCheckCode + '''
1762 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1764 ArmISA::VecRegContainer tmpVecC;
1765 auto auxOp2 = tmpVecC.as<Element>();
1766 for (unsigned i = 0; i < eCount; i++) {
1767 auxOp2[i] = AA64FpOp2_ud[i];
1769 for (unsigned i = 0; i < eCount; i++) {'''
1770 if predType != PredType.NONE:
1772 const Element& srcElem1 = AA64FpDestMerge_x[i];'''
1775 const Element& srcElem1 = AA64FpOp1_x[i];'''
1777 const auto& srcElem2 = auxOp2[
1778 (i * sizeof(Element) * 8) / 64];
1779 Element destElem = 0;'''
1780 if predType != PredType.NONE:
1785 destElem = %(dest_elem)s;
1787 'dest_elem': 'AA64FpDestMerge_x[i]'
1788 if predType == PredType.MERGE else '0'}
1791 %(op)s''' % {'op': op}
1793 AA64FpDest_x[i] = destElem;
1795 iop = InstObjParams(name, 'Sve' + Name,
1796 'SveBinDestrPredOp' if predType != PredType.NONE
1797 else 'SveBinUnpredOp',
1798 {'code': code, 'op_class': opClass}, [])
1799 if predType != PredType.NONE:
1800 header_output += SveBinDestrPredOpDeclare.subst(iop)
1802 header_output += SveBinUnpredOpDeclare.subst(iop)
1803 exec_output += SveOpExecute.subst(iop)
1805 substDict = {'targs' : type,
1806 'class_name' : 'Sve' + Name}
1807 exec_output += SveOpExecDeclare.subst(substDict)
1809 # Generates definitions for binary indexed SVE instructions
1810 # (always unpredicated)
1811 def sveBinIdxInst(name, Name, opClass, types, op, decoder='Generic'):
1812 global header_output, exec_output, decoders
1813 code = sveEnabledCheckCode + '''
1814 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1817 // Number of elements in a 128 bit segment
1818 constexpr unsigned ePerSegment = 128 / sizeof(Element);
1823 for (unsigned i = 0; i < eCount; i++) {
1824 const auto segmentBase = i - i % ePerSegment;
1825 const auto segmentIdx = segmentBase + index;
1827 const Element& srcElem1 = AA64FpOp1_x[i];
1828 const Element& srcElem2 = AA64FpOp2_x[segmentIdx];
1829 Element destElem = 0;
1835 AA64FpDest_x[i] = destElem;
1839 baseClass = 'SveBinIdxUnpredOp'
1841 iop = InstObjParams(name, 'Sve' + Name, baseClass,
1842 {'code': code, 'op_class': opClass}, [])
1843 header_output += SveBinIdxUnpredOpDeclare.subst(iop)
1844 exec_output += SveOpExecute.subst(iop)
1846 substDict = {'targs' : type,
1847 'class_name' : 'Sve' + Name}
1848 exec_output += SveOpExecDeclare.subst(substDict)
1850 # Generates definitions for binary SVE instructions
1851 def sveBinInst(name, Name, opClass, types, op, predType=PredType.NONE,
1852 isDestructive=False, customIterCode=None,
1854 assert not (predType in (PredType.NONE, PredType.SELECT) and
1856 global header_output, exec_output, decoders
1857 code = sveEnabledCheckCode + '''
1858 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1860 if customIterCode is None:
1862 for (unsigned i = 0; i < eCount; i++) {'''
1863 if predType == PredType.MERGE:
1865 const Element& srcElem1 = AA64FpDestMerge_x[i];'''
1868 const Element& srcElem1 = AA64FpOp1_x[i];'''
1870 const Element& srcElem2 = AA64FpOp2_x[i];
1871 Element destElem = 0;'''
1872 if predType != PredType.NONE:
1877 destElem = %(dest_elem)s;
1880 'AA64FpDestMerge_x[i]' if predType == PredType.MERGE
1881 else '0' if predType == PredType.ZERO
1885 %(op)s''' % {'op': op}
1887 AA64FpDest_x[i] = destElem;
1890 code += customIterCode
1891 if predType == PredType.NONE:
1892 baseClass = 'SveBinUnpredOp'
1894 baseClass = 'SveBinDestrPredOp'
1896 baseClass = 'SveBinConstrPredOp'
1897 iop = InstObjParams(name, 'Sve' + Name, baseClass,
1898 {'code': code, 'op_class': opClass}, [])
1899 if predType == PredType.NONE:
1900 header_output += SveBinUnpredOpDeclare.subst(iop)
1902 header_output += SveBinDestrPredOpDeclare.subst(iop)
1904 header_output += SveBinConstrPredOpDeclare.subst(iop)
1905 exec_output += SveOpExecute.subst(iop)
1907 substDict = {'targs' : type,
1908 'class_name' : 'Sve' + Name}
1909 exec_output += SveOpExecDeclare.subst(substDict)
1911 # Generates definitions for predicate logical instructions
1912 def svePredLogicalInst(name, Name, opClass, types, op,
1913 predType=PredType.ZERO, isFlagSetting=False,
1915 global header_output, exec_output, decoders
1916 assert predType in (PredType.ZERO, PredType.SELECT)
1917 code = sveEnabledCheckCode + '''
1918 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1920 ArmISA::VecPredRegContainer tmpPredC;
1921 auto auxGpOp = tmpPredC.as<Element>();
1922 for (unsigned i = 0; i < eCount; i++) {
1923 auxGpOp[i] = GpOp_x[i];
1925 for (unsigned i = 0; i < eCount; i++) {
1926 bool srcElem1 = POp1_x[i];
1927 bool srcElem2 = POp2_x[i];
1928 bool destElem = false;
1932 destElem = %(dest_elem)s;
1934 PDest_x[i] = destElem;
1936 'dest_elem': 'false' if predType == PredType.ZERO
1938 extraPrologCode = ''
1941 CondCodesNZ = (destPred.firstActive(auxGpOp, eCount) << 1) |
1942 destPred.noneActive(auxGpOp, eCount);
1943 CondCodesC = !destPred.lastActive(auxGpOp, eCount);
1945 extraPrologCode += '''
1946 auto& destPred = PDest;'''
1947 iop = InstObjParams(name, 'Sve' + Name, 'SvePredLogicalOp',
1948 {'code': code, 'op_class': opClass}, [])
1949 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
1950 header_output += SvePredLogicalOpDeclare.subst(iop)
1951 exec_output += SveOpExecute.subst(iop)
1953 substDict = {'targs' : type,
1954 'class_name' : 'Sve' + Name}
1955 exec_output += SveOpExecDeclare.subst(substDict)
1957 # Generates definitions for predicate permute instructions
1958 def svePredBinPermInst(name, Name, opClass, types, iterCode,
1960 global header_output, exec_output, decoders
1961 code = sveEnabledCheckCode + '''
1962 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1965 iop = InstObjParams(name, 'Sve' + Name, 'SvePredBinPermOp',
1966 {'code': code, 'op_class': opClass}, [])
1967 header_output += SveBinUnpredOpDeclare.subst(iop)
1968 exec_output += SveOpExecute.subst(iop)
1970 substDict = {'targs' : type,
1971 'class_name' : 'Sve' + Name}
1972 exec_output += SveOpExecDeclare.subst(substDict)
1974 # Generates definitions for SVE compare instructions
1975 # NOTE: compares are all predicated zeroing
1976 def sveCmpInst(name, Name, opClass, types, op, isImm=False,
1978 global header_output, exec_output, decoders
1979 extraPrologCode = '''
1980 auto& destPred = PDest;'''
1981 code = sveEnabledCheckCode + '''
1982 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
1984 ArmISA::VecPredRegContainer tmpPredC;
1985 auto tmpPred = tmpPredC.as<Element>();
1986 for (unsigned i = 0; i < eCount; ++i)
1987 tmpPred[i] = GpOp_x[i];
1989 for (unsigned i = 0; i < eCount; i++) {
1990 const Element& srcElem1 = AA64FpOp1_x[i];
1991 %(src_elem_2_ty)s srcElem2 __attribute__((unused)) =
1993 bool destElem = false;
1999 PDest_x[i] = destElem;
2001 'src_elem_2_ty': 'Element' if isImm else 'const Element&',
2002 'src_elem_2': 'imm' if isImm else 'AA64FpOp2_x[i]'}
2003 iop = InstObjParams(name, 'Sve' + Name,
2004 'SveCmpImmOp' if isImm else 'SveCmpOp',
2005 {'code': code, 'op_class': opClass}, [])
2006 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2008 header_output += SveCmpImmOpDeclare.subst(iop)
2010 header_output += SveCmpOpDeclare.subst(iop)
2011 exec_output += SveOpExecute.subst(iop)
2013 substDict = {'targs' : type,
2014 'class_name' : 'Sve' + Name}
2015 exec_output += SveOpExecDeclare.subst(substDict)
2017 # Generates definitions for ternary SVE intructions (always predicated -
2019 def sveTerInst(name, Name, opClass, types, op, decoder='Generic'):
2020 global header_output, exec_output, decoders
2021 code = sveEnabledCheckCode + '''
2022 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2024 for (unsigned i = 0; i < eCount; i++) {
2025 const Element& srcElem1 = AA64FpOp1_x[i];
2026 const Element& srcElem2 = AA64FpOp2_x[i];
2027 Element destElem = AA64FpDestMerge_x[i];
2031 AA64FpDest_x[i] = destElem;
2033 iop = InstObjParams(name, 'Sve' + Name, 'SveTerPredOp',
2034 {'code': code, 'op_class': opClass}, [])
2035 header_output += SveTerPredOpDeclare.subst(iop)
2036 exec_output += SveOpExecute.subst(iop)
2038 substDict = {'targs' : type,
2039 'class_name' : 'Sve' + Name}
2040 exec_output += SveOpExecDeclare.subst(substDict)
2042 # Generates definitions for ternary SVE instructions with indexed operand
2043 def sveTerIdxInst(name, Name, opClass, types, op, decoder='Generic'):
2044 global header_output, exec_output, decoders
2045 code = sveEnabledCheckCode + '''
2046 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2049 // Number of elements in a 128 bit segment
2050 constexpr unsigned ePerSegment = 128 / sizeof(Element);
2052 for (unsigned i = 0; i < eCount; i++) {
2053 const auto segmentBase = i - i % ePerSegment;
2054 const auto segmentIdx = segmentBase + index;
2056 const Element& srcElem1 = AA64FpOp1_x[i];
2057 const Element& srcElem2 = AA64FpOp2_x[segmentIdx];
2058 Element destElem = AA64FpDestMerge_x[i];
2063 AA64FpDest_x[i] = destElem;
2066 iop = InstObjParams(name, 'Sve' + Name, 'SveBinIdxUnpredOp',
2067 {'code': code, 'op_class': opClass}, [])
2068 header_output += SveBinIdxUnpredOpDeclare.subst(iop)
2069 exec_output += SveOpExecute.subst(iop)
2071 substDict = {'targs' : type,
2072 'class_name' : 'Sve' + Name}
2073 exec_output += SveOpExecDeclare.subst(substDict)
2075 # Generates definitions for ternary SVE intructions with immediate operand
2076 # (always unpredicated)
2077 def sveTerImmInst(name, Name, opClass, types, op, decoder='Generic'):
2078 global header_output, exec_output, decoders
2079 code = sveEnabledCheckCode + '''
2080 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2082 for (unsigned i = 0; i < eCount; i++) {
2083 const Element& srcElem2 = AA64FpOp2_x[i];
2084 Element srcElem3 = imm;
2085 Element destElem = AA64FpDestMerge_x[i];
2087 AA64FpDest_x[i] = destElem;
2089 iop = InstObjParams(name, 'Sve' + Name, 'SveTerImmUnpredOp',
2090 {'code': code, 'op_class': opClass}, [])
2091 header_output += SveTerImmUnpredOpDeclare.subst(iop)
2092 exec_output += SveOpExecute.subst(iop)
2094 substDict = {'targs' : type,
2095 'class_name' : 'Sve' + Name}
2096 exec_output += SveOpExecDeclare.subst(substDict)
2098 # Generates definitions for PTRUE and PTRUES instructions.
2099 def svePtrueInst(name, Name, opClass, types, isFlagSetting=False,
2101 global header_output, exec_output, decoders
2102 extraPrologCode = '''
2103 auto& destPred = PDest;'''
2104 code = sveEnabledCheckCode + '''
2105 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2107 unsigned predCount = sveDecodePredCount(imm, eCount);
2109 for (unsigned i = 0; i < eCount; i++) {
2110 PDest_x[i] = (i < predCount);
2114 CondCodesNZ = (destPred.firstActive(destPred, eCount) << 1) |
2115 destPred.noneActive(destPred, eCount);
2116 CondCodesC = !destPred.lastActive(destPred, eCount);
2118 iop = InstObjParams(name, 'Sve' + Name, 'SvePtrueOp',
2119 {'code': code, 'op_class': opClass}, [])
2120 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2121 header_output += SvePtrueOpDeclare.subst(iop)
2122 exec_output += SveOpExecute.subst(iop)
2124 substDict = {'targs' : type,
2125 'class_name' : 'Sve' + Name}
2126 exec_output += SveOpExecDeclare.subst(substDict)
2128 # Generate definitions for integer CMP<cc> instructions
2129 def sveIntCmpInst(name, Name, opClass, types, op, wideop = False,
2130 decoder = 'Generic'):
2131 global header_output, exec_output, decoders
2132 signed = 'int8_t' in types
2136 srcType = 'int64_t' if signed else 'uint64_t'
2137 op2Suffix = 'sd' if signed else 'ud'
2138 extraPrologCode = '''
2139 auto& destPred = PDest;'''
2140 code = sveEnabledCheckCode + '''
2141 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2143 ArmISA::VecPredRegContainer tmpPredC;
2144 auto tmpPred = tmpPredC.as<Element>();
2145 for (unsigned i = 0; i < eCount; ++i)
2146 tmpPred[i] = GpOp_x[i];
2148 for (unsigned i = 0; i < eCount; ++i) {
2149 %(srcType)s srcElem1 = (%(srcType)s) AA64FpOp1_x[i];
2150 %(srcType)s srcElem2 = AA64FpOp2_%(op2Suffix)s[%(op2Index)s];
2151 bool destElem = false;
2155 PDest_x[i] = destElem;
2157 CondCodesNZ = (destPred.firstActive(tmpPred, eCount) << 1) |
2158 destPred.noneActive(tmpPred, eCount);
2159 CondCodesC = !destPred.lastActive(tmpPred, eCount);
2160 CondCodesV = 0;''' % {
2163 'op2Suffix': op2Suffix,
2164 'op2Index': '(i * sizeof(Element)) / 8' if wideop else 'i'
2166 iop = InstObjParams(name, 'Sve' + Name, 'SveIntCmpOp',
2169 'op_class': opClass,
2170 'op2IsWide': 'true' if wideop else 'false',
2172 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2173 header_output += SveIntCmpOpDeclare.subst(iop)
2174 exec_output += SveOpExecute.subst(iop)
2176 substDict = {'targs' : type,
2177 'class_name' : 'Sve' + Name}
2178 exec_output += SveOpExecDeclare.subst(substDict)
2180 # Generate definitions for integer CMP<cc> instructions (with immediate)
2181 def sveIntCmpImmInst(name, Name, opClass, types, op, decoder = 'Generic'):
2182 global header_output, exec_output, decoders
2183 extraPrologCode = '''
2184 auto& destPred = PDest;'''
2185 code = sveEnabledCheckCode + '''
2186 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2188 ArmISA::VecPredRegContainer tmpPredC;
2189 auto tmpPred = tmpPredC.as<Element>();
2190 for (unsigned i = 0; i < eCount; ++i)
2191 tmpPred[i] = GpOp_x[i];
2193 for (unsigned i = 0; i < eCount; ++i) {
2194 Element srcElem1 = AA64FpOp1_x[i];
2195 Element srcElem2 = static_cast<Element>(imm);
2196 bool destElem = false;
2200 PDest_x[i] = destElem;
2202 CondCodesNZ = (destPred.firstActive(tmpPred, eCount) << 1) |
2203 destPred.noneActive(tmpPred, eCount);
2204 CondCodesC = !destPred.lastActive(tmpPred, eCount);
2205 CondCodesV = 0;'''%{'op': op}
2206 iop = InstObjParams(name, 'Sve' + Name, 'SveIntCmpImmOp',
2207 {'code': code, 'op_class': opClass,}, [])
2208 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2209 header_output += SveIntCmpImmOpDeclare.subst(iop)
2210 exec_output += SveOpExecute.subst(iop)
2212 substDict = {'targs' : type,
2213 'class_name' : 'Sve' + Name}
2214 exec_output += SveOpExecDeclare.subst(substDict)
2216 # Generate definitions for SVE element count instructions
2217 def sveElemCountInst(name, Name, opClass, types, op,
2218 destType = DestType.Scalar, dstIs32b = False,
2219 dstAcc = True, decoder = 'Generic'):
2220 global header_output, exec_output, decoders
2221 code = sveEnabledCheckCode + '''
2222 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2224 unsigned count = sveDecodePredCount(pattern, eCount);
2226 if destType == DestType.Vector:
2228 for (unsigned i = 0; i < eCount; ++i) {
2229 Element srcElem1 = AA64FpDestMerge_x[i];
2230 Element destElem = 0;
2232 AA64FpDest_x[i] = destElem;
2235 if 'uint16_t' in types:
2237 dstType = 'uint32_t'
2239 dstType = 'uint64_t'
2247 %(dstType)s srcElem1 = XDest;
2248 '''%{'dstType': dstType}
2250 %(dstType)s destElem = 0;
2253 '''%{'op': op, 'dstType': dstType}
2254 iop = InstObjParams(name, 'Sve' + Name, 'SveElemCountOp',
2255 {'code': code, 'op_class': opClass, 'dstIsVec': destType,
2256 'dstIs32b': 'true' if dstIs32b else 'false'}, [])
2257 header_output += SveElemCountOpDeclare.subst(iop)
2258 exec_output += SveOpExecute.subst(iop)
2260 substDict = {'targs' : type,
2261 'class_name' : 'Sve' + Name}
2262 exec_output += SveOpExecDeclare.subst(substDict);
2264 def svePartBrkInst(name, Name, opClass, isFlagSetting, predType, whenBrk,
2265 decoder = 'Generic'):
2266 global header_output, exec_output, decoders
2267 code = sveEnabledCheckCode + '''
2268 unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>(
2270 bool dobreak = false;
2271 ArmISA::VecPredRegContainer tmpPredC;
2272 auto auxGpOp = tmpPredC.as<uint8_t>();
2273 for (unsigned i = 0; i < eCount; ++i) {
2274 auxGpOp[i] = GpOp_ub[i];
2276 for (unsigned i = 0; i < eCount; ++i) {
2277 bool element = POp1_ub[i] == 1;
2278 if (auxGpOp[i]) {'''
2280 dobreak = dobreak || element;'''
2281 if whenBrk == Break.Before:
2284 PDest_ub[i] = !dobreak;'''
2285 if whenBrk == Break.After:
2289 if predType == PredType.ZERO:
2293 elif predType == PredType.MERGE:
2295 PDest_ub[i] = PDestMerge_ub[i];
2299 extraPrologCode = ''
2302 CondCodesNZ = (destPred.firstActive(auxGpOp, eCount) << 1) |
2303 destPred.noneActive(auxGpOp, eCount);
2304 CondCodesC = !destPred.lastActive(auxGpOp, eCount);
2306 extraPrologCode += '''
2307 auto& destPred = PDest;'''
2308 iop = InstObjParams(name, 'Sve' + Name, 'SvePartBrkOp',
2309 {'code': code, 'op_class': opClass,
2310 'isMerging': 'true' if predType == PredType.MERGE
2312 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2313 header_output += SvePartBrkOpDeclare.subst(iop)
2314 exec_output += SveNonTemplatedOpExecute.subst(iop)
2316 def svePartBrkPropPrevInst(name, Name, opClass, isFlagSetting, whenBrk,
2317 decoder = 'Generic'):
2318 global header_output, exec_output, decoders
2319 code = sveEnabledCheckCode + '''
2320 unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>(
2322 bool last = POp1_ub.lastActive(GpOp_ub, eCount);
2323 ArmISA::VecPredRegContainer tmpPredC;
2324 auto auxGpOp = tmpPredC.as<uint8_t>();
2325 for (unsigned i = 0; i < eCount; ++i) {
2326 auxGpOp[i] = GpOp_ub[i];
2328 for (unsigned i = 0; i < eCount; ++i) {
2329 if (auxGpOp[i]) {'''
2331 last = last && (POp2_ub[i] == 0);'''
2332 if whenBrk == Break.Before:
2335 PDest_ub[i] = last;'''
2336 if whenBrk == Break.After:
2343 extraPrologCode = ''
2346 CondCodesNZ = (destPred.firstActive(auxGpOp, eCount) << 1) |
2347 destPred.noneActive(auxGpOp, eCount);
2348 CondCodesC = !destPred.lastActive(auxGpOp, eCount);
2350 extraPrologCode += '''
2351 auto& destPred = PDest;'''
2352 iop = InstObjParams(name, 'Sve' + Name, 'SvePartBrkPropOp',
2353 {'code': code, 'op_class': opClass}, [])
2354 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2355 header_output += SvePartBrkPropOpDeclare.subst(iop)
2356 exec_output += SveNonTemplatedOpExecute.subst(iop)
2358 def svePartBrkPropNextInst(name, Name, opClass, isFlagSetting,
2359 decoder = 'Generic'):
2360 global header_output, exec_output, decoders
2361 code = sveEnabledCheckCode + '''
2362 unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>(
2364 bool last = POp1_ub.lastActive(GpOp_ub, eCount);
2365 for (unsigned i = 0; i < eCount; i++) {
2369 PDest_ub[i] = PDestMerge_ub[i];
2372 extraPrologCode = ''
2375 VecPredRegT<uint8_t, MaxSveVecLenInBytes, false, false>::Container c;
2376 VecPredRegT<uint8_t, MaxSveVecLenInBytes, false, false> predOnes(c);
2377 for (unsigned i = 0; i < eCount; i++) {
2380 CondCodesNZ = (destPred.firstActive(predOnes, eCount) << 1) |
2381 destPred.noneActive(predOnes, eCount);
2382 CondCodesC = !destPred.lastActive(predOnes, eCount);
2384 extraPrologCode += '''
2385 auto& destPred = PDest;'''
2386 iop = InstObjParams(name, 'Sve' + Name, 'SvePartBrkPropOp',
2387 {'code': code, 'op_class': opClass}, [])
2388 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2389 header_output += SvePartBrkPropOpDeclare.subst(iop)
2390 exec_output += SveNonTemplatedOpExecute.subst(iop)
2392 # Generate definitions for scalar select instructions
2393 def sveSelectInst(name, Name, opClass, types, op, isCond,
2394 destType = DstRegType.Scalar, decoder = 'Generic'):
2395 global header_output, exec_output, decoders
2396 code = sveEnabledCheckCode + '''
2397 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2400 for (last = eCount - 1; last >= 0; --last) {
2411 %(op)s'''%{'op': op}
2412 if destType == DstRegType.Vector:
2414 for (unsigned i = 0; i < eCount; ++i)
2415 AA64FpDest_x[i] = destElem;'''
2416 elif destType == DstRegType.Scalar:
2418 XDest = destElem;'''
2419 elif destType == DstRegType.SimdFpScalar:
2421 AA64FpDest_x[0] = destElem;'''
2425 if destType == DstRegType.Scalar:
2427 XDest = (Element) XDest;
2429 elif destType == DstRegType.Vector:
2431 for (unsigned i = 0; i < eCount; ++i)
2432 AA64FpDest_x[i] = AA64FpDestMerge_x[i];
2434 elif destType == DstRegType.SimdFpScalar:
2436 AA64FpDest_x[0] = AA64FpDestMerge_x[0];
2438 iop = InstObjParams(name, 'Sve' + Name, 'SveSelectOp',
2439 {'code': code, 'op_class': opClass,
2440 'isCond': 'true' if isCond else 'false',
2442 if destType == DstRegType.Scalar else 'false',
2444 if destType == DstRegType.SimdFpScalar
2447 header_output += SveSelectOpDeclare.subst(iop)
2448 exec_output += SveOpExecute.subst(iop)
2450 substDict = {'targs' : type,
2451 'class_name' : 'Sve' + Name}
2452 exec_output += SveOpExecDeclare.subst(substDict)
2454 # Generate definitions for PNEXT (find next active predicate)
2456 def svePNextInst(name, Name, opClass, types, decoder = 'Generic'):
2457 global header_output, exec_output, decoders
2458 code = sveEnabledCheckCode + '''
2459 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2461 ArmISA::VecPredRegContainer tmpPredC;
2462 auto auxGpOp = tmpPredC.as<Element>();
2463 for (unsigned i = 0; i < eCount; ++i) {
2464 auxGpOp[i] = GpOp_x[i];
2467 for (last = eCount - 1; last >= 0; --last) {
2472 int next = last + 1;
2473 while (next < eCount && GpOp_x[next] == 0) {
2477 if (next < eCount) {
2480 CondCodesNZ = (destPred.firstActive(auxGpOp, eCount) << 1) |
2481 destPred.noneActive(auxGpOp, eCount);
2482 CondCodesC = !destPred.lastActive(auxGpOp, eCount);
2484 extraPrologCode = '''
2485 auto& destPred = PDest;'''
2486 iop = InstObjParams(name, 'Sve' + Name, 'SveUnaryPredPredOp',
2487 {'code': code, 'op_class': opClass}, [])
2488 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2489 header_output += SveUnaryPredOpDeclare.subst(iop)
2490 exec_output += SveOpExecute.subst(iop)
2492 substDict = {'targs' : type,
2493 'class_name' : 'Sve' + Name}
2494 exec_output += SveOpExecDeclare.subst(substDict)
2496 # Generate definitions for PFIRST (set first active predicate)
2498 def svePFirstInst(name, Name, opClass, decoder = 'Generic'):
2499 global header_output, exec_output, decoders
2500 code = sveEnabledCheckCode + '''
2501 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2503 ArmISA::VecPredRegContainer tmpPredC;
2504 auto auxGpOp = tmpPredC.as<Element>();
2505 for (unsigned i = 0; i < eCount; ++i)
2506 auxGpOp[i] = GpOp_x[i];
2508 for (int i = 0; i < eCount; ++i) {
2509 if (auxGpOp[i] && first == -1) {
2513 for (int i = 0; i < eCount; ++i) {
2514 PDest_x[i] = PDestMerge_x[i];
2519 CondCodesNZ = (destPred.firstActive(auxGpOp, eCount) << 1) |
2520 destPred.noneActive(auxGpOp, eCount);
2521 CondCodesC = !destPred.lastActive(auxGpOp, eCount);
2523 extraPrologCode = '''
2524 auto& destPred = PDest;'''
2525 iop = InstObjParams(name, 'Sve' + Name, 'SveUnaryPredPredOp',
2526 {'code': code, 'op_class': opClass}, [])
2527 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2528 header_output += SveUnaryPredOpDeclare.subst(iop)
2529 exec_output += SveOpExecute.subst(iop)
2530 substDict = {'targs' : 'uint8_t',
2531 'class_name' : 'Sve' + Name}
2532 exec_output += SveOpExecDeclare.subst(substDict)
2534 # Generate definitions for SVE TBL instructions
2535 def sveTblInst(name, Name, opClass, decoder = 'Generic'):
2536 global header_output, exec_output, decoders
2537 code = sveEnabledCheckCode + '''
2538 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2540 for (int i = 0; i < eCount; ++i) {
2541 Element idx = AA64FpOp2_x[i];
2544 val = AA64FpOp1_x[idx];
2548 AA64FpDest_x[i] = val;
2550 iop = InstObjParams(name, 'Sve' + Name, 'SveTblOp',
2551 {'code': code, 'op_class': opClass}, [])
2552 header_output += SveBinUnpredOpDeclare.subst(iop)
2553 exec_output += SveOpExecute.subst(iop)
2554 for type in unsignedTypes:
2555 substDict = {'targs' : type,
2556 'class_name' : 'Sve' + Name}
2557 exec_output += SveOpExecDeclare.subst(substDict)
2559 # Generate definitions for SVE Unpack instructions
2560 def sveUnpackInst(name, Name, opClass, sdtypes, unpackHalf,
2561 regType, decoder = 'Generic'):
2562 global header_output, exec_output, decoders
2563 extraPrologCode = '''
2564 auto& destPred = PDest;'''
2565 code = sveEnabledCheckCode + '''
2566 unsigned eCount = ArmStaticInst::getCurSveVecLen<DElement>(
2568 if unpackHalf == Unpack.Low:
2569 if regType == SrcRegType.Predicate:
2571 ArmISA::VecPredRegContainer tmpPredC;
2572 auto auxPOp1 = tmpPredC.as<SElement>();
2573 for (int i = 0; i < eCount; ++i) {
2574 auxPOp1[i] = POp1_xs[i];
2578 ArmISA::VecRegContainer tmpVecC;
2579 auto auxOp1 = tmpVecC.as<SElement>();
2580 for (int i = 0; i < eCount; ++i) {
2581 auxOp1[i] = AA64FpOp1_xs[i];
2584 for (int i = 0; i < eCount; ++i) {'''
2585 if regType == SrcRegType.Predicate:
2586 if unpackHalf == Unpack.High:
2588 const SElement& srcElem1 = POp1_xs[i + eCount];'''
2591 const SElement& srcElem1 = auxPOp1[i];'''
2593 destPred.set_raw(i, 0);
2594 PDest_xd[i] = srcElem1;'''
2596 if unpackHalf == Unpack.High:
2598 const SElement& srcElem1 = AA64FpOp1_xs[i + eCount];'''
2601 const SElement& srcElem1 = auxOp1[i];'''
2603 AA64FpDest_xd[i] = static_cast<DElement>(srcElem1);'''
2607 iop = InstObjParams(name, 'Sve' + Name, 'SveUnpackOp',
2608 {'code': code, 'op_class': opClass}, [])
2609 if regType == SrcRegType.Predicate:
2610 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2611 header_output += SveUnpackOpDeclare.subst(iop)
2612 exec_output += SveWideningOpExecute.subst(iop)
2613 for srcType, dstType in sdtypes:
2614 substDict = {'targs': srcType + ', ' + dstType,
2615 'class_name': 'Sve' + Name}
2616 exec_output += SveOpExecDeclare.subst(substDict)
2618 # Generate definition for SVE predicate test instructions
2619 def svePredTestInst(name, Name, opClass, decoder = 'Generic'):
2620 global header_output, exec_output, decoders
2621 code = sveEnabledCheckCode + '''
2622 unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>(
2624 CondCodesNZ = (POp1_ub.firstActive(GpOp_ub, eCount) << 1) |
2625 POp1_ub.noneActive(GpOp_ub, eCount);
2626 CondCodesC = !POp1_ub.lastActive(GpOp_ub, eCount);
2628 iop = InstObjParams(name, 'Sve' + Name, 'SvePredTestOp',
2629 {'code': code, 'op_class': opClass}, [])
2630 header_output += SvePredicateTestOpDeclare.subst(iop)
2631 exec_output += SveNonTemplatedOpExecute.subst(iop)
2633 # Generate definition for SVE predicate compact operations
2634 def sveCompactInst(name, Name, opClass, types, decoder = 'Generic'):
2635 global header_output, exec_output, decoders
2636 code = sveEnabledCheckCode + '''
2637 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2639 ArmISA::VecRegContainer tmpVecC;
2640 auto auxOp1 = tmpVecC.as<Element>();
2641 for (unsigned i = 0; i < eCount; ++i) {
2642 auxOp1[i] = AA64FpOp1_x[i];
2645 for (unsigned i = 0; i < eCount; ++i) {
2646 AA64FpDest_x[i] = 0;
2648 AA64FpDest_x[x] = auxOp1[i];
2652 iop = InstObjParams(name, 'Sve' + Name, 'SveUnaryPredOp',
2653 {'code': code, 'op_class': opClass}, [])
2654 header_output += SveUnaryPredOpDeclare.subst(iop)
2655 exec_output += SveOpExecute.subst(iop)
2657 substDict = {'targs': type, 'class_name': 'Sve' + Name}
2658 exec_output += SveOpExecDeclare.subst(substDict)
2660 # Generate definition for unary SVE predicate instructions with implicit
2661 # source operand (PFALSE, RDFFR(S))
2662 def svePredUnaryWImplicitSrcInst(name, Name, opClass, op,
2663 predType=PredType.NONE, isFlagSetting=False, decoder='Generic'):
2664 global header_output, exec_output, decoders
2665 code = sveEnabledCheckCode + op
2668 CondCodesNZ = (destPred.firstActive(GpOp, eCount) << 1) |
2669 destPred.noneActive(GpOp, eCount);
2670 CondCodesC = !destPred.lastActive(GpOp, eCount);
2672 extraPrologCode = '''
2673 auto& destPred M5_VAR_USED = PDest;'''
2674 baseClass = ('SvePredUnaryWImplicitSrcOp' if predType == PredType.NONE
2675 else 'SvePredUnaryWImplicitSrcPredOp')
2676 iop = InstObjParams(name, 'Sve' + Name, baseClass,
2677 {'code': code, 'op_class': opClass}, [])
2678 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2679 if predType == PredType.NONE:
2680 header_output += SvePredUnaryOpWImplicitSrcDeclare.subst(iop)
2682 header_output += SvePredUnaryPredOpWImplicitSrcDeclare.subst(iop)
2683 exec_output += SveNonTemplatedOpExecute.subst(iop)
2685 # Generate definition for SVE instructions writing to the FFR (SETFFR,
2687 def svePredWriteFfrInst(name, Name, opClass, op, isSetFfr,
2689 global header_output, exec_output, decoders
2690 code = sveEnabledCheckCode + op
2691 extraPrologCode = '''
2692 auto& destPred M5_VAR_USED = Ffr;'''
2693 baseClass = ('SveWImplicitSrcDstOp' if isSetFfr
2694 else 'SvePredUnaryWImplicitDstOp')
2695 iop = InstObjParams(name, 'Sve' + Name, baseClass,
2696 {'code': code, 'op_class': opClass}, [])
2697 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2699 header_output += SveOpWImplicitSrcDstDeclare.subst(iop)
2701 header_output += SvePredUnaryOpWImplicitDstDeclare.subst(iop)
2702 exec_output += SveNonTemplatedOpExecute.subst(iop)
2704 # Generate definition for SVE Ext instruction
2705 def sveExtInst(name, Name, opClass, decoder = 'Generic'):
2706 global header_output, exec_output, decoders
2707 code = sveEnabledCheckCode + '''
2708 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2710 ArmISA::VecRegContainer tmpVecC;
2711 auto auxOp1 = tmpVecC.as<Element>();
2712 for (unsigned i = 0; i < eCount; ++i) {
2713 auxOp1[i] = AA64FpOp1_x[i];
2718 for (int i = 0; i < eCount; ++i, ++pos)
2721 AA64FpDest_x[i] = AA64FpDestMerge_x[pos];
2723 AA64FpDest_x[i] = auxOp1[pos-eCount];
2726 iop = InstObjParams(name, 'Sve' + Name, 'SveBinImmUnpredDestrOp',
2727 {'code': code, 'op_class': opClass}, [])
2728 header_output += SveBinImmUnpredOpDeclare.subst(iop);
2729 exec_output += SveOpExecute.subst(iop)
2730 substDict = {'targs': 'uint8_t', 'class_name': 'Sve' + Name}
2731 exec_output += SveOpExecDeclare.subst(substDict)
2733 # Generate definition for SVE Slice instruction
2734 def sveSpliceInst(name, Name, opClass, types, decoder = 'Generic'):
2735 global header_output, exec_output, decoders
2736 code = sveEnabledCheckCode + '''
2737 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2739 ArmISA::VecRegContainer tmpVecC;
2740 auto auxDest = tmpVecC.as<Element>();
2741 int firstelem = -1, lastelem = -2;
2742 for (int i = 0; i < eCount; ++i) {
2750 for (int i = firstelem; i <= lastelem; ++i, ++x) {
2751 auxDest[x] = AA64FpDestMerge_x[i];
2753 int remaining = eCount - x;
2754 for (int i = 0; i < remaining; ++i, ++x) {
2755 auxDest[x] = AA64FpOp2_x[i];
2757 for (int i = 0; i < eCount; ++i) {
2758 AA64FpDest_x[i] = auxDest[i];
2761 iop = InstObjParams(name, 'Sve' + Name, 'SveBinDestrPredOp',
2762 {'code': code, 'op_class': opClass}, [])
2763 header_output += SveBinDestrPredOpDeclare.subst(iop)
2764 exec_output += SveOpExecute.subst(iop)
2766 substDict = {'targs': type, 'class_name': 'Sve' + Name}
2767 exec_output += SveOpExecDeclare.subst(substDict)
2769 # Generate definition for SVE DUP (index) instruction
2770 def sveDupIndexInst(name, Name, opClass, types, decoder = 'Generic'):
2771 global header_output, exec_output, decoders
2772 code = sveEnabledCheckCode + '''
2773 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2775 Element srcElem1 = 0;
2777 srcElem1 = AA64FpOp1_x[imm];
2779 for (int i = 0; i < eCount; ++i) {
2780 AA64FpDest_x[i] = srcElem1;
2782 iop = InstObjParams(name, 'Sve' + Name, 'SveBinImmIdxUnpredOp',
2783 {'code': code, 'op_class': opClass}, [])
2784 header_output += SveBinImmUnpredOpDeclare.subst(iop)
2785 exec_output += SveOpExecute.subst(iop)
2787 substDict = {'targs': type, 'class_name': 'Sve' + Name}
2788 exec_output += SveOpExecDeclare.subst(substDict)
2790 # Generate definition for SVE reverse elements instructions
2791 def sveReverseElementsInst(name, Name, opClass, types,
2792 srcType = SrcRegType.Vector, decoder = 'Generic'):
2793 assert srcType in (SrcRegType.Vector, SrcRegType.Predicate)
2794 global header_output, exec_output, decoders
2795 extraPrologCode = '''
2796 auto& destPred = PDest;'''
2797 code = sveEnabledCheckCode + '''
2798 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2800 if srcType == SrcRegType.Predicate:
2802 ArmISA::VecPredRegContainer tmpPredC;
2803 auto auxPOp1 = tmpPredC.as<Element>();
2804 for (unsigned i = 0; i < eCount; ++i) {
2805 uint8_t v = POp1_x.get_raw(i);
2806 auxPOp1.set_raw(i, v);
2811 ArmISA::VecRegContainer tmpRegC;
2812 auto auxOp1 = tmpRegC.as<Element>();
2813 for (unsigned i = 0; i < eCount; ++i) {
2814 auxOp1[i] = AA64FpOp1_x[i];
2817 for (int i = 0; i < eCount; ++i) {'''
2818 if srcType == SrcRegType.Vector:
2820 AA64FpDest_x[i] = auxOp1[eCount - i - 1];'''
2823 destPred.set_raw(i, auxPOp1.get_raw(eCount - i - 1));'''
2826 iop = InstObjParams(name, 'Sve' + Name, 'SveUnaryUnpredOp',
2827 {'code': code, 'op_class': opClass}, [])
2828 if srcType == SrcRegType.Predicate:
2829 iop.snippets['code'] = extraPrologCode + iop.snippets['code']
2830 header_output += SveUnaryUnpredOpDeclare.subst(iop)
2831 exec_output += SveOpExecute.subst(iop)
2833 substDict = {'targs': type, 'class_name': 'Sve' + Name}
2834 exec_output += SveOpExecDeclare.subst(substDict)
2836 # Generate definition for shift & insert instructions
2837 def sveShiftAndInsertInst(name, Name, opClass, types,
2838 srcType = SrcRegType.Scalar, decoder = 'Generic'):
2839 assert srcType in (SrcRegType.SimdFpScalar, SrcRegType.Scalar)
2840 global header_output, exec_output, decoders
2841 code = sveEnabledCheckCode + '''
2842 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2844 if srcType == SrcRegType.Scalar:
2846 auto& srcElem1 = XOp1;'''
2847 elif srcType == SrcRegType.SimdFpScalar:
2849 auto& srcElem1 = AA64FpOp1_x[0];'''
2851 for (int i = eCount - 1; i > 0; --i) {
2852 AA64FpDest_x[i] = AA64FpDestMerge_x[i-1];
2854 AA64FpDest_x[0] = srcElem1;'''
2855 iop = InstObjParams(name, 'Sve' + Name, 'SveUnarySca2VecUnpredOp',
2856 {'code': code, 'op_class': opClass,
2857 'isSimdFp': 'true' if srcType == SrcRegType.SimdFpScalar
2859 header_output += SveShiftAndInsertOpDeclare.subst(iop)
2860 exec_output += SveOpExecute.subst(iop)
2862 substDict = {'targs': type, 'class_name': 'Sve' + Name}
2863 exec_output += SveOpExecDeclare.subst(substDict)
2865 # Generate definition for DOT instructions
2866 def sveDotInst(name, Name, opClass, types, isIndexed = True):
2867 global header_output, exec_output, decoders
2868 code = sveEnabledCheckCode + '''
2869 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2871 for (int i = 0; i < eCount; ++i) {'''
2874 int segbase = i - i % (16 / sizeof(Element));
2875 int s = segbase + imm;'''
2877 DElement res = AA64FpDest_xd[i];
2878 DElement srcElem1, srcElem2;
2879 for (int j = 0; j <= 3; ++j) {
2880 srcElem1 = static_cast<DElement>(AA64FpOp1_xs[4 * i + j]);'''
2883 srcElem2 = static_cast<DElement>(AA64FpOp2_xs[4 * s + j]);'''
2886 srcElem2 = static_cast<DElement>(AA64FpOp2_xs[4 * i + j]);'''
2888 res += srcElem1 * srcElem2;
2890 AA64FpDestMerge_xd[i] = res;
2892 iop = InstObjParams(name, 'Sve' + Name,
2893 'SveDotProdIdxOp' if isIndexed else
2895 {'code': code, 'op_class': opClass}, [])
2897 header_output += SveWideningTerImmOpDeclare.subst(iop)
2899 header_output += SveWideningTerOpDeclare.subst(iop)
2900 exec_output += SveWideningOpExecute.subst(iop)
2902 substDict = {'targs': type, 'class_name': 'Sve' + Name}
2903 exec_output += SveOpExecDeclare.subst(substDict)
2905 # Generate definition for ordered reduction
2906 def sveOrderedReduction(name, Name, opClass, types, op,
2907 decoder = 'Generic'):
2908 global header_output, exec_output, decoders
2909 code = sveEnabledCheckCode + '''
2910 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2913 Element destElem = AA64FpDestMerge_x[0];
2914 for (int i = 0; i < eCount; ++i) {
2916 Element srcElem1 = AA64FpOp1_x[i];
2920 for (int i = 1; i < eCount; ++i) {
2921 AA64FpDest_x[i] = 0;
2923 AA64FpDest_x[0] = destElem;'''%{'op': op}
2924 iop = InstObjParams(name, 'Sve' + Name, 'SveOrdReducOp',
2925 {'code': code, 'op_class': opClass}, [])
2926 header_output += SveReducOpDeclare.subst(iop)
2927 exec_output += SveOpExecute.subst(iop)
2929 substDict = {'targs' : type,
2930 'class_name' : 'Sve' + Name}
2931 exec_output += SveOpExecDeclare.subst(substDict)
2933 # Generate definitions for complex addition instructions
2934 def sveComplexAddInst(name, Name, opClass, types,
2935 decoder = 'Generic'):
2936 global header_output, exec_output, decoders
2937 code = sveEnabledCheckCode + '''
2938 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2941 bool sub_i = (rot == 1);
2942 bool sub_r = (rot == 3);
2943 for (int i = 0; i < eCount / 2; ++i) {
2944 Element acc_r = AA64FpOp1_x[2 * i];
2945 Element acc_i = AA64FpOp1_x[2 * i + 1];
2946 Element elt2_r = AA64FpOp2_x[2 * i];
2947 Element elt2_i = AA64FpOp2_x[2 * i + 1];
2950 if (GpOp_x[2 * i]) {
2952 elt2_i = fplibNeg<Element>(elt2_i);
2954 fpscr = (FPSCR) FpscrExc;
2955 acc_r = fplibAdd<Element>(acc_r, elt2_i, fpscr);
2958 if (GpOp_x[2 * i + 1]) {
2960 elt2_r = fplibNeg<Element>(elt2_r);
2962 fpscr = (FPSCR) FpscrExc;
2963 acc_i = fplibAdd<Element>(acc_i, elt2_r, fpscr);
2967 AA64FpDest_x[2 * i] = acc_r;
2968 AA64FpDest_x[2 * i + 1] = acc_i;
2971 iop = InstObjParams(name, 'Sve' + Name, 'SveComplexOp',
2972 {'code': code, 'op_class': opClass}, [])
2973 header_output += SveComplexOpDeclare.subst(iop)
2974 exec_output += SveOpExecute.subst(iop)
2976 substDict = {'targs' : type,
2977 'class_name' : 'Sve' + Name}
2978 exec_output += SveOpExecDeclare.subst(substDict)
2980 # Generate definitions for complex multiply and accumulate instructions
2981 def sveComplexMulAddInst(name, Name, opClass, types,
2982 predType=PredType.NONE, decoder='Generic'):
2983 assert predType in (PredType.NONE, PredType.MERGE)
2984 global header_output, exec_output, decoders
2985 code = sveEnabledCheckCode + '''
2986 unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
2989 uint32_t sel_a = bits(rot, 0);
2990 uint32_t sel_b = sel_a ? 0 : 1;
2991 bool neg_i = bits(rot, 1);
2992 bool neg_r = bits(rot, 0) != bits(rot, 1);'''
2993 if predType == PredType.NONE:
2995 uint32_t eltspersegment = 16 / (2 * sizeof(Element));'''
2997 for (int i = 0; i < eCount / 2; ++i) {'''
2998 if predType == PredType.NONE:
3000 uint32_t segmentbase = i - (i % eltspersegment);
3001 uint32_t s = segmentbase + imm;'''
3006 Element addend_r = AA64FpDestMerge_x[2 * i];
3007 Element addend_i = AA64FpDestMerge_x[2 * i + 1];
3008 Element elt1_a = AA64FpOp1_x[2 * i + sel_a];
3009 Element elt2_a = AA64FpOp2_x[2 * s + sel_a];
3010 Element elt2_b = AA64FpOp2_x[2 * s + sel_b];
3013 if predType != PredType.NONE:
3015 if (GpOp_x[2 * i]) {'''
3018 elt2_a = fplibNeg<Element>(elt2_a);
3020 fpscr = (FPSCR) FpscrExc;
3021 addend_r = fplibMulAdd<Element>(addend_r, elt1_a, elt2_a, fpscr);
3022 FpscrExc = fpscr;'''
3023 if predType != PredType.NONE:
3026 if predType != PredType.NONE:
3028 if (GpOp_x[2 * i + 1]) {'''
3031 elt2_b = fplibNeg<Element>(elt2_b);
3033 fpscr = (FPSCR) FpscrExc;
3034 addend_i = fplibMulAdd<Element>(addend_i, elt1_a, elt2_b, fpscr);
3035 FpscrExc = fpscr;'''
3036 if predType != PredType.NONE:
3040 AA64FpDest_x[2 * i] = addend_r;
3041 AA64FpDest_x[2 * i + 1] = addend_i;
3043 iop = InstObjParams(name, 'Sve' + Name,
3044 'SveComplexIdxOp' if predType == PredType.NONE
3045 else 'SveComplexOp',
3046 {'code': code, 'op_class': opClass}, [])
3047 if predType == PredType.NONE:
3048 header_output += SveComplexIndexOpDeclare.subst(iop)
3050 header_output += SveComplexOpDeclare.subst(iop)
3051 exec_output += SveOpExecute.subst(iop)
3053 substDict = {'targs' : type,
3054 'class_name' : 'Sve' + Name}
3055 exec_output += SveOpExecDeclare.subst(substDict)
3057 fpTypes = ('uint16_t', 'uint32_t', 'uint64_t')
3058 signedTypes = ('int8_t', 'int16_t', 'int32_t', 'int64_t')
3059 unsignedTypes = ('uint8_t', 'uint16_t', 'uint32_t', 'uint64_t')
3061 smallSignedTypes = ('int8_t', 'int16_t', 'int32_t')
3062 bigSignedTypes = ('int16_t', 'int32_t', 'int64_t')
3063 smallUnsignedTypes = ('uint8_t', 'uint16_t', 'uint32_t')
3064 bigUnsignedTypes = ('uint16_t', 'uint32_t', 'uint64_t')
3066 unsignedWideSDTypes = (('uint8_t', 'uint16_t'),
3067 ('uint16_t', 'uint32_t'), ('uint32_t', 'uint64_t'))
3068 signedWideSDTypes = (('int8_t', 'int16_t'),
3069 ('int16_t', 'int32_t'), ('int32_t', 'int64_t'))
3072 absCode = 'destElem = (Element) std::abs(srcElem1);'
3073 sveUnaryInst('abs', 'Abs', 'SimdAluOp', signedTypes, absCode,
3076 sveWideImmInst('add', 'AddImm', 'SimdAddOp', unsignedTypes, addCode, False)
3077 # ADD (vectors, predicated)
3078 addCode = 'destElem = srcElem1 + srcElem2;'
3079 sveBinInst('add', 'AddPred', 'SimdAddOp', unsignedTypes, addCode,
3080 PredType.MERGE, True)
3081 # ADD (vectors, unpredicated)
3082 addCode = 'destElem = srcElem1 + srcElem2;'
3083 sveBinInst('add', 'AddUnpred', 'SimdAddOp', unsignedTypes, addCode)
3085 addvlCode = sveEnabledCheckCode + '''
3086 unsigned eCount = ArmStaticInst::getCurSveVecLen<uint%d_t>(
3088 XDest = XOp1 + eCount * (int64_t) imm;
3090 buildXImmDataInst('addpl', addvlCode % 64, buildCc=False)
3092 buildXImmDataInst('addvl', addvlCode % 8, buildCc=False)
3095 if (offsetFormat == SveAdrOffsetUnpackedSigned) {
3096 srcElem2 = sext<32>(srcElem2 & mask(32));
3097 } else if (offsetFormat == SveAdrOffsetUnpackedUnsigned) {
3098 srcElem2 = srcElem2 & mask(32);
3100 destElem = srcElem1 + srcElem2 * mult;
3102 sveAdrInst('adr', 'Adr', 'SimdAddOp', ('uint32_t', 'uint64_t'), adrCode)
3104 andCode = 'destElem = srcElem1 & srcElem2;'
3105 sveWideImmInst('and', 'AndImm', 'SimdAluOp', ('uint64_t',), andCode)
3106 # AND (vectors, predicated)
3107 sveBinInst('and', 'AndPred', 'SimdAluOp', unsignedTypes, andCode,
3108 PredType.MERGE, True)
3109 # AND (vectors, unpredicated)
3110 andCode = 'destElem = srcElem1 & srcElem2;'
3111 sveBinInst('and', 'AndUnpred', 'SimdAluOp', ('uint64_t',), andCode)
3112 # AND, ANDS (predicates)
3113 svePredLogicalInst('and', 'PredAnd', 'SimdPredAluOp', ('uint8_t',),
3115 svePredLogicalInst('ands', 'PredAnds', 'SimdPredAluOp', ('uint8_t',),
3116 andCode, isFlagSetting=True)
3118 andvCode = 'destElem &= srcElem1;'
3119 sveAssocReducInst('andv', 'Andv', 'SimdReduceAluOp', unsignedTypes,
3120 andvCode, 'std::numeric_limits<Element>::max()')
3121 # ASR (immediate, predicated)
3123 int sign_bit = bits(srcElem1, sizeof(Element) * 8 - 1);
3124 if (srcElem2 == 0) {
3125 destElem = srcElem1;
3126 } else if (srcElem2 >= sizeof(Element) * 8) {
3127 destElem = sign_bit ? std::numeric_limits<Element>::max() : 0;
3129 destElem = srcElem1 >> srcElem2;
3131 destElem |= ~mask(sizeof(Element) * 8 - srcElem2);
3135 sveBinImmInst('asr', 'AsrImmPred', 'SimdAluOp', unsignedTypes, asrCode,
3137 # ASR (immediate, unpredicated)
3138 sveBinImmInst('asr', 'AsrImmUnpred', 'SimdAluOp', unsignedTypes, asrCode)
3140 sveBinInst('asr', 'AsrPred', 'SimdAluOp', unsignedTypes, asrCode,
3141 PredType.MERGE, True)
3142 # ASR (wide elements, predicated)
3143 sveShiftByWideElemsInst('asr', 'AsrWidePred', 'SimdAluOp', unsignedTypes,
3144 asrCode, PredType.MERGE)
3145 # ASR (wide elements, unpredicated)
3146 sveShiftByWideElemsInst('asr', 'AsrWideUnpred', 'SimdAluOp', unsignedTypes,
3150 Element element1 = srcElem1;
3151 Element shift = srcElem2;
3153 Element tmp = ((1L << shift) - 1L);
3157 element1 = element1 + tmp;
3160 destElem = (element1 >> shift);
3162 sveBinImmInst('asrd', 'Asrd', 'SimdAluOp', signedTypes, asrdCode,
3166 int sign_bit = bits(srcElem2, sizeof(Element) * 8 - 1);
3167 if (srcElem1 == 0) {
3168 destElem = srcElem2;
3169 } else if (srcElem1 >= sizeof(Element) * 8) {
3170 destElem = sign_bit ? std::numeric_limits<Element>::max() : 0;
3172 destElem = srcElem2 >> srcElem1;
3174 destElem |= ~mask(sizeof(Element) * 8 - srcElem1);
3178 sveBinInst('asrr', 'Asrr', 'SimdAluOp', unsignedTypes, asrrCode,
3179 PredType.MERGE, True)
3180 # BIC (vectors, predicated)
3181 bicCode = 'destElem = srcElem1 & ~srcElem2;'
3182 sveBinInst('bic', 'BicPred', 'SimdAluOp', unsignedTypes, bicCode,
3183 PredType.MERGE, True)
3184 # BIC (vectors, unpredicated)
3185 sveBinInst('bic', 'BicUnpred', 'SimdAluOp', unsignedTypes, bicCode)
3186 # BIC, BICS (predicates)
3187 bicCode = 'destElem = srcElem1 && !srcElem2;'
3188 svePredLogicalInst('bic', 'PredBic', 'SimdPredAluOp', ('uint8_t',),
3190 svePredLogicalInst('bics', 'PredBics', 'SimdPredAluOp', ('uint8_t',),
3191 bicCode, isFlagSetting=True)
3193 svePartBrkInst('brka', 'Brkam', 'SimdPredAluOp', isFlagSetting = False,
3194 predType = PredType.MERGE, whenBrk = Break.After)
3196 svePartBrkInst('brka', 'Brkaz', 'SimdPredAluOp', isFlagSetting = False,
3197 predType = PredType.ZERO, whenBrk = Break.After)
3199 svePartBrkInst('brkas', 'Brkas', 'SimdPredAluOp', isFlagSetting = True,
3200 predType = PredType.ZERO, whenBrk = Break.After)
3202 svePartBrkInst('brkb', 'Brkbm', 'SimdPredAluOp', isFlagSetting = False,
3203 predType = PredType.MERGE, whenBrk = Break.Before)
3205 svePartBrkInst('brkb', 'Brkbz', 'SimdPredAluOp', isFlagSetting = False,
3206 predType = PredType.ZERO, whenBrk = Break.Before)
3208 svePartBrkInst('brkbs', 'Brkbs', 'SimdPredAluOp', isFlagSetting = True,
3209 predType = PredType.ZERO, whenBrk = Break.Before)
3211 svePartBrkPropNextInst('brkn', 'Brkn', 'SimdPredAluOp',
3212 isFlagSetting = False)
3214 svePartBrkPropNextInst('brkns', 'Brkns', 'SimdPredAluOp',
3215 isFlagSetting = True)
3217 svePartBrkPropPrevInst('brkpa', 'Brkpa', 'SimdPredAluOp',
3218 isFlagSetting = False, whenBrk = Break.After)
3220 svePartBrkPropPrevInst('brkpas', 'Brkpas', 'SimdPredAluOp',
3221 isFlagSetting = True, whenBrk = Break.After)
3223 svePartBrkPropPrevInst('brkpb', 'Brkpb', 'SimdPredAluOp',
3224 isFlagSetting = False, whenBrk = Break.Before)
3226 svePartBrkPropPrevInst('brkpbs', 'Brkpbs', 'SimdPredAluOp',
3227 isFlagSetting = True, whenBrk = Break.Before)
3233 destElem = AA64FpOp1_x[last];'''
3234 sveSelectInst('clasta', 'Clasta', 'SimdAluOp', unsignedTypes, clastaCode,
3235 isCond = True, destType = DstRegType.Scalar)
3236 # CLASTA (SIMD&FP scalar)
3237 sveSelectInst('clasta', 'Clastaf', 'SimdAluOp', unsignedTypes, clastaCode,
3238 isCond = True, destType = DstRegType.SimdFpScalar)
3240 sveSelectInst('clasta', 'Clastav', 'SimdAluOp', unsignedTypes, clastaCode,
3241 isCond = True, destType = DstRegType.Vector)
3244 destElem = AA64FpOp1_x[last];'''
3245 sveSelectInst('clastb', 'Clastb', 'SimdAluOp', unsignedTypes, clastbCode,
3246 isCond = True, destType = DstRegType.Scalar)
3247 # CLASTB (SIMD&FP scalar)
3248 sveSelectInst('clastb', 'Clastbf', 'SimdAluOp', unsignedTypes, clastbCode,
3249 isCond = True, destType = DstRegType.SimdFpScalar)
3251 sveSelectInst('clastb', 'Clastbv', 'SimdAluOp', unsignedTypes, clastbCode,
3252 isCond = True, destType = DstRegType.Vector)
3256 Element val = srcElem1;
3265 while (val >= 0 && destElem < sizeof(Element) * 8 - 1) {
3271 sveUnaryInst('cls', 'Cls', 'SimdAluOp', signedTypes, clsCode,
3276 Element val = srcElem1;
3277 while (val >= 0 && destElem < sizeof(Element) * 8) {
3282 sveUnaryInst('clz', 'Clz', 'SimdAluOp', signedTypes, clzCode,
3286 destElem = (srcElem1 == srcElem2);
3288 sveIntCmpImmInst('cmpeq', 'Cmpeqi', 'SimdCmpOp', unsignedTypes, cmpeqCode)
3290 sveIntCmpInst('cmpeq', 'Cmpeq', 'SimdCmpOp', unsignedTypes, cmpeqCode)
3291 # CMPEQ (wide elements)
3292 sveIntCmpInst('cmpeq', 'Cmpeqw', 'SimdCmpOp', smallUnsignedTypes,
3296 destElem = (srcElem1 >= srcElem2);
3298 sveIntCmpImmInst('cmpge', 'Cmpgei', 'SimdCmpOp', signedTypes, cmpgeCode)
3300 sveIntCmpInst('cmpge', 'Cmpge', 'SimdCmpOp', signedTypes, cmpgeCode)
3301 # CMPGE (wide elements)
3302 sveIntCmpInst('cmpge', 'Cmpgew', 'SimdCmpOp', smallSignedTypes,
3306 destElem = (srcElem1 > srcElem2);
3308 sveIntCmpImmInst('cmpge', 'Cmpgti', 'SimdCmpOp', signedTypes, cmpgtCode)
3310 sveIntCmpInst('cmpge', 'Cmpgt', 'SimdCmpOp', signedTypes, cmpgtCode)
3311 # CMPGT (wide elements)
3312 sveIntCmpInst('cmpge', 'Cmpgtw', 'SimdCmpOp', smallSignedTypes,
3315 sveIntCmpImmInst('cmphi', 'Cmphii', 'SimdCmpOp', unsignedTypes, cmpgtCode)
3317 sveIntCmpInst('cmphi', 'Cmphi', 'SimdCmpOp', unsignedTypes, cmpgtCode)
3318 # CMPHI (wide elements)
3319 sveIntCmpInst('cmphi', 'Cmphiw', 'SimdCmpOp', smallUnsignedTypes,
3322 sveIntCmpImmInst('cmphs', 'Cmphsi', 'SimdCmpOp', unsignedTypes, cmpgeCode)
3324 sveIntCmpInst('cmphs', 'Cmphs', 'SimdCmpOp', unsignedTypes, cmpgeCode)
3325 # CMPHS (wide elements)
3326 sveIntCmpInst('cmphs', 'Cmphsw', 'SimdCmpOp', smallUnsignedTypes,
3330 destElem = (srcElem1 <= srcElem2);
3332 sveIntCmpImmInst('cmple', 'Cmplei', 'SimdCmpOp', signedTypes, cmpleCode)
3333 # CMPLE (wide elements)
3334 sveIntCmpInst('cmple', 'Cmplew', 'SimdCmpOp', smallSignedTypes,
3338 destElem = (srcElem1 < srcElem2);
3340 sveIntCmpImmInst('cmplo', 'Cmploi', 'SimdCmpOp', unsignedTypes, cmpltCode)
3341 # CMPLO (wide elements)
3342 sveIntCmpInst('cmplo', 'Cmplow', 'SimdCmpOp', smallUnsignedTypes,
3345 sveIntCmpImmInst('cmpls', 'Cmplsi', 'SimdCmpOp', unsignedTypes, cmpleCode)
3346 # CMPLS (wide elements)
3347 sveIntCmpInst('cmpls', 'Cmplsw', 'SimdCmpOp', smallUnsignedTypes,
3350 sveIntCmpImmInst('cmplt', 'Cmplti', 'SimdCmpOp', signedTypes, cmpltCode)
3351 # CMPLT (wide elements)
3352 sveIntCmpInst('cmplt', 'Cmpltw', 'SimdCmpOp', smallSignedTypes,
3356 destElem = (srcElem1 != srcElem2);
3358 sveIntCmpImmInst('cmpeq', 'Cmpnei', 'SimdCmpOp', unsignedTypes, cmpneCode)
3360 sveIntCmpInst('cmpeq', 'Cmpne', 'SimdCmpOp', unsignedTypes, cmpneCode)
3361 # CMPNE (wide elements)
3362 sveIntCmpInst('cmpeq', 'Cmpnew', 'SimdCmpOp', smallUnsignedTypes,
3366 destElem = srcElem1?0:1;
3368 sveUnaryInst('cnot', 'Cnot', 'SimdAluOp', unsignedTypes, cnotCode,
3373 Element val = srcElem1;
3375 destElem += val & 0x1;
3379 sveUnaryInst('cnt', 'Cnt', 'SimdAluOp', unsignedTypes, cntCode,
3381 # CNTB, CNTD, CNTH, CNTW
3383 destElem = (count * imm);
3385 sveElemCountInst('cnt', 'Cntx', 'SimdAluOp', unsignedTypes, cntxCode,
3386 destType = DestType.Scalar, dstIs32b = False, dstAcc = False)
3388 sveCompactInst('compact', 'Compact', 'SimdPredAluOp',
3389 ('uint32_t', 'uint64_t'))
3391 dupCode = 'destElem = srcElem1;'
3392 sveWideImmInst('cpy', 'CpyImmMerge', 'SimdAluOp', unsignedTypes, dupCode,
3393 predType=PredType.MERGE, isUnary=True)
3394 sveWideImmInst('cpy', 'CpyImmZero', 'SimdAluOp', unsignedTypes, dupCode,
3395 predType=PredType.ZERO, isUnary=True)
3397 sveUnaryInst('cpy', 'CpyScalar', 'SimdAluOp', unsignedTypes, dupCode,
3398 PredType.MERGE, srcRegType=SrcRegType.Scalar)
3399 # CPY (SIMD&FP scalar)
3400 sveUnaryInst('cpy', 'CpySimdFpScalar', 'SimdAluOp', unsignedTypes, dupCode,
3401 PredType.MERGE, srcRegType=SrcRegType.SimdFpScalar)
3403 svePredCountPredInst('cntp', 'Cntp', 'SimdAluOp', unsignedTypes)
3406 destElem = srcElem1 == srcElem2;
3408 sveCompTermInst('ctermeq', 'Ctermeq', 'IntAluOp',
3409 ['uint32_t', 'uint64_t'], cteqCode)
3412 destElem = srcElem1 != srcElem2;
3414 sveCompTermInst('ctermne', 'Ctermne', 'IntAluOp',
3415 ['uint32_t', 'uint64_t'], ctneCode)
3416 # DECB, DECH, DECW, DECD (scalar)
3418 destElem = srcElem1 - (count * imm);
3420 sveElemCountInst('dec', 'Dec', 'SimdAluOp', unsignedTypes, decxCode,
3421 destType = DestType.Scalar, dstIs32b = False)
3422 # DECH, DECW, DECD (vector)
3423 sveElemCountInst('dec', 'Decv', 'SimdAluOp', bigUnsignedTypes, decxCode,
3424 destType = DestType.Vector, dstIs32b = False)
3427 XDest = XDest - count;
3429 svePredCountInst('decp', 'Decp', 'SimdAluOp', unsignedTypes, decpCode,
3430 DestType.Scalar, SrcSize.Src64bit)
3433 destElem = srcElem - count;
3435 svePredCountInst('decp', 'Decpv', 'SimdAluOp', unsignedTypes, decpvCode,
3438 sveWideImmInst('dup', 'DupImm', 'SimdAluOp', unsignedTypes, dupCode,
3441 sveDupIndexInst('mov', 'DupIdx', 'SimdAluOp',
3442 list(unsignedTypes) + ['__uint128_t'])
3444 sveUnaryInst('dup', 'DupScalar', 'SimdAluOp', unsignedTypes, dupCode,
3445 PredType.NONE, srcRegType=SrcRegType.Scalar)
3447 sveWideImmInst('dupm', 'Dupm', 'SimdAluOp', unsignedTypes, dupCode,
3450 eorCode = 'destElem = srcElem1 ^ srcElem2;'
3451 sveWideImmInst('eor', 'EorImm', 'SimdAluOp', ('uint64_t',), eorCode)
3452 # EOR (vectors, predicated)
3453 sveBinInst('eor', 'EorPred', 'SimdAluOp', unsignedTypes, eorCode,
3454 PredType.MERGE, True)
3455 # EOR (vectors, unpredicated)
3456 eorCode = 'destElem = srcElem1 ^ srcElem2;'
3457 sveBinInst('eor', 'EorUnpred', 'SimdAluOp', ('uint64_t',), eorCode)
3458 # EOR, EORS (predicates)
3459 svePredLogicalInst('eor', 'PredEor', 'SimdPredAluOp', ('uint8_t',),
3461 svePredLogicalInst('eors', 'PredEors', 'SimdPredAluOp', ('uint8_t',),
3462 eorCode, isFlagSetting=True)
3464 eorvCode = 'destElem ^= srcElem1;'
3465 sveAssocReducInst('eorv', 'Eorv', 'SimdReduceAluOp', unsignedTypes,
3468 sveExtInst('ext', 'Ext', 'SimdAluOp')
3471 FPSCR fpscr = (FPSCR) FpscrExc;
3475 fabdCode = fpOp % 'fplibAbs<Element>(fplibSub(srcElem1, srcElem2, fpscr))'
3476 sveBinInst('fabd', 'Fabd', 'SimdFloatAddOp', floatTypes, fabdCode,
3477 PredType.MERGE, True)
3479 fabsCode = 'destElem = fplibAbs<Element>(srcElem1);'
3480 sveUnaryInst('fabs', 'Fabs', 'SimdFloatAluOp', fpTypes, fabsCode,
3483 fpCmpAbsOp = fpOp % ('fplibCompare%s<Element>(fplibAbs<Element>(srcElem1),'
3484 ' fplibAbs<Element>(srcElem2), fpscr)')
3485 facgeCode = fpCmpAbsOp % 'GE'
3486 sveCmpInst('facge', 'Facge', 'SimdFloatCmpOp', fpTypes, facgeCode)
3488 facgtCode = fpCmpAbsOp % 'GT'
3489 sveCmpInst('facgt', 'Facgt', 'SimdFloatCmpOp', fpTypes, facgtCode)
3491 fpBinOp = fpOp % 'fplib%s<Element>(srcElem1, srcElem2, fpscr)'
3492 faddCode = fpBinOp % 'Add'
3493 sveBinImmInst('fadd', 'FaddImm', 'SimdFloatAddOp', floatTypes, faddCode,
3495 # FADD (vectors, predicated)
3496 sveBinInst('fadd', 'FaddPred', 'SimdFloatAddOp', floatTypes, faddCode,
3497 PredType.MERGE, True)
3498 # FADD (vectors, unpredicated)
3499 sveBinInst('fadd', 'FaddUnpred', 'SimdFloatAddOp', floatTypes, faddCode)
3502 FPSCR fpscr = (FPSCR) FpscrExc;
3503 destElem = fplibAdd<Element>(destElem, srcElem1, fpscr);
3504 FpscrExc = FpscrExc | fpscr;
3506 sveOrderedReduction('fadda', 'Fadda', 'SimdFloatReduceAddOp', floatTypes,
3510 FPSCR fpscr = (FPSCR) FpscrExc;
3511 destElem = fplib%s<Element>(srcElem1, srcElem2, fpscr);
3512 FpscrExc = FpscrExc | fpscr;
3514 faddvCode = fpReduceOp % 'Add'
3515 sveNonAssocReducInst('faddv', 'Faddv', 'SimdFloatReduceAddOp', floatTypes,
3518 sveComplexAddInst('fcadd','Fcadd', 'SimdFloatAddOp', fpTypes)
3520 fpCmpOp = fpOp % ('fplibCompare%s<Element>(srcElem1, srcElem2, fpscr)')
3521 fcmeqCode = fpCmpOp % 'EQ'
3522 sveCmpInst('fcmeq', 'Fcmeq', 'SimdFloatCmpOp', fpTypes, fcmeqCode)
3524 fpCmpZeroOp = fpOp % 'fplibCompare%s<Element>(srcElem1, 0, fpscr)'
3525 fcmeqZeroCode = fpCmpZeroOp % 'EQ'
3526 sveCmpInst('fcmeq', 'FcmeqZero', 'SimdFloatCmpOp', fpTypes, fcmeqZeroCode,
3529 fcmgeCode = fpCmpOp % 'GE'
3530 sveCmpInst('fcmge', 'Fcmge', 'SimdFloatCmpOp', fpTypes, fcmgeCode)
3532 fcmgeZeroCode = fpCmpZeroOp % 'GE'
3533 sveCmpInst('fcmge', 'FcmgeZero', 'SimdFloatCmpOp', fpTypes, fcmgeZeroCode,
3536 fcmgtCode = fpCmpOp % 'GT'
3537 sveCmpInst('fcmgt', 'Fcmgt', 'SimdFloatCmpOp', fpTypes, fcmgtCode)
3539 fcmgtZeroCode = fpCmpZeroOp % 'GT'
3540 sveCmpInst('fcmgt', 'FcmgtZero', 'SimdFloatCmpOp', fpTypes, fcmgtZeroCode,
3543 fpCmpRevZeroOp = fpOp % ('fplibCompare%s<Element>(0, srcElem1, fpscr)')
3544 fcmleZeroCode = fpCmpRevZeroOp % 'GE'
3545 sveCmpInst('fcmle', 'FcmleZero', 'SimdFloatCmpOp', fpTypes, fcmleZeroCode,
3548 fcmltZeroCode = fpCmpRevZeroOp % 'GT'
3549 sveCmpInst('fcmlt', 'FcmltZero', 'SimdFloatCmpOp', fpTypes, fcmltZeroCode,
3552 fcmneCode = fpOp % ('!fplibCompareEQ<Element>(srcElem1, srcElem2, fpscr)')
3553 sveCmpInst('fcmne', 'Fcmne', 'SimdFloatCmpOp', fpTypes, fcmneCode)
3555 fcmneZeroCode = fpOp % ('!fplibCompareEQ<Element>(srcElem1, 0, fpscr)')
3556 sveCmpInst('fcmne', 'FcmneZero', 'SimdFloatCmpOp', fpTypes, fcmneZeroCode,
3559 fcmuoCode = fpCmpOp % 'UN'
3560 sveCmpInst('fcmuo', 'Fcmuo', 'SimdFloatCmpOp', fpTypes, fcmuoCode)
3562 sveComplexMulAddInst('fcmla', 'Fcmlai', 'SimdFloatMultAccOp',
3563 fpTypes[1:], predType = PredType.NONE)
3565 sveComplexMulAddInst('fcmla', 'Fcmlav', 'SimdFloatMultAccOp',
3566 fpTypes, predType = PredType.MERGE)
3568 sveWideImmInst('fcpy', 'Fcpy', 'SimdAluOp', unsignedTypes, dupCode,
3569 predType=PredType.MERGE, isUnary=True)
3571 fcvtCode = fpOp % ('fplibConvert<SElement, DElement>('
3572 'srcElem1, FPCRRounding(fpscr), fpscr)')
3573 sveCvtInst('fcvt', 'FcvtNarrow', 'SimdCvtOp',
3574 ('uint32_t, uint16_t',
3575 'uint64_t, uint16_t',
3576 'uint64_t, uint32_t'),
3577 fcvtCode, CvtDir.Narrow)
3578 sveCvtInst('fcvt', 'FcvtWiden', 'SimdCvtOp',
3579 ('uint16_t, uint32_t',
3580 'uint16_t, uint64_t',
3581 'uint32_t, uint64_t'),
3582 fcvtCode, CvtDir.Widen)
3584 fcvtIntCode = fpOp % ('fplibFPToFixed<SElement, DElement>('
3585 'srcElem1, %s, %s, %s, fpscr)')
3586 fcvtzsCode = fcvtIntCode % ('0', 'false', 'FPRounding_ZERO')
3587 sveCvtInst('fcvtzs', 'FcvtzsNarrow', 'SimdCvtOp',
3588 ('uint16_t, uint16_t',
3589 'uint32_t, uint32_t',
3590 'uint64_t, uint32_t',
3591 'uint64_t, uint64_t'),
3592 fcvtzsCode, CvtDir.Narrow)
3593 sveCvtInst('fcvtzs', 'FcvtzsWiden', 'SimdCvtOp',
3594 ('uint16_t, uint32_t',
3595 'uint16_t, uint64_t',
3596 'uint32_t, uint64_t'),
3597 fcvtzsCode, CvtDir.Widen)
3599 fcvtzuCode = fcvtIntCode % ('0', 'true', 'FPRounding_ZERO')
3600 sveCvtInst('fcvtzu', 'FcvtzuNarrow', 'SimdCvtOp',
3601 ('uint16_t, uint16_t',
3602 'uint32_t, uint32_t',
3603 'uint64_t, uint32_t',
3604 'uint64_t, uint64_t'),
3605 fcvtzuCode, CvtDir.Narrow)
3606 sveCvtInst('fcvtzu', 'FcvtzuWiden', 'SimdCvtOp',
3607 ('uint16_t, uint32_t',
3608 'uint16_t, uint64_t',
3609 'uint32_t, uint64_t'),
3610 fcvtzuCode, CvtDir.Widen)
3612 fdivCode = fpBinOp % 'Div'
3613 sveBinInst('fdiv', 'Fdiv', 'SimdFloatDivOp', floatTypes, fdivCode,
3614 PredType.MERGE, True)
3616 fpBinRevOp = fpOp % 'fplib%s<Element>(srcElem2, srcElem1, fpscr)'
3617 fdivrCode = fpBinRevOp % 'Div'
3618 sveBinInst('fdivr', 'Fdivr', 'SimdFloatDivOp', floatTypes, fdivrCode,
3619 PredType.MERGE, True)
3621 sveWideImmInst('fdup', 'Fdup', 'SimdFloatAluOp', floatTypes, dupCode,
3624 fexpaCode = 'destElem = fplibExpA<Element>(srcElem1);'
3625 sveUnaryInst('fexpa', 'Fexpa', 'SimdFloatAluOp', fpTypes, fexpaCode)
3627 fmadCode = fpOp % ('fplibMulAdd<Element>('
3628 'srcElem1, destElem, srcElem2, fpscr)')
3629 sveTerInst('fmad', 'Fmad', 'SimdFloatMultAccOp', floatTypes, fmadCode,
3632 fmaxCode = fpBinOp % 'Max'
3633 sveBinImmInst('fmax', 'FmaxImm', 'SimdFloatCmpOp', floatTypes, fmaxCode,
3636 sveBinInst('fmax', 'Fmax', 'SimdFloatCmpOp', floatTypes, fmaxCode,
3637 PredType.MERGE, True)
3638 # FMAXNM (immediate)
3639 fmaxnmCode = fpBinOp % 'MaxNum'
3640 sveBinImmInst('fmaxnm', 'FmaxnmImm', 'SimdFloatCmpOp', floatTypes,
3641 fmaxnmCode, PredType.MERGE)
3643 sveBinInst('fmaxnm', 'Fmaxnm', 'SimdFloatCmpOp', floatTypes, fmaxnmCode,
3644 PredType.MERGE, True)
3646 fmaxnmvCode = fpReduceOp % 'MaxNum'
3647 sveNonAssocReducInst('fmaxnmv', 'Fmaxnmv', 'SimdFloatReduceCmpOp',
3648 floatTypes, fmaxnmvCode, 'fplibDefaultNaN<Element>()')
3650 fmaxvCode = fpReduceOp % 'Max'
3651 sveNonAssocReducInst('fmaxv', 'Fmaxv', 'SimdFloatReduceCmpOp', floatTypes,
3652 fmaxvCode, 'fplibInfinity<Element>(1)')
3654 fminCode = fpBinOp % 'Min'
3655 sveBinImmInst('fmin', 'FminImm', 'SimdFloatCmpOp', floatTypes, fminCode,
3658 sveBinInst('fmin', 'Fmin', 'SimdFloatCmpOp', floatTypes, fminCode,
3659 PredType.MERGE, True)
3660 # FMINNM (immediate)
3661 fminnmCode = fpBinOp % 'MinNum'
3662 sveBinImmInst('fminnm', 'FminnmImm', 'SimdFloatCmpOp', floatTypes,
3663 fminnmCode, PredType.MERGE)
3665 sveBinInst('fminnm', 'Fminnm', 'SimdFloatCmpOp', floatTypes, fminnmCode,
3666 PredType.MERGE, True)
3668 fminnmvCode = fpReduceOp % 'MinNum'
3669 sveNonAssocReducInst('fminnmv', 'Fminnmv', 'SimdFloatReduceCmpOp',
3670 floatTypes, fminnmvCode, 'fplibDefaultNaN<Element>()')
3672 fminvCode = fpReduceOp % 'Min'
3673 sveNonAssocReducInst('fminv', 'Fminv', 'SimdFloatReduceCmpOp', floatTypes,
3674 fminvCode, 'fplibInfinity<Element>(0)')
3675 fmlaCode = fpOp % ('fplibMulAdd<Element>('
3676 'destElem, srcElem1, srcElem2, fpscr)')
3678 sveTerIdxInst('fmla', 'FmlaIdx', 'SimdFloatMultAccOp', floatTypes,
3679 fmlaCode, PredType.MERGE)
3681 sveTerInst('fmla', 'Fmla', 'SimdFloatMultAccOp', floatTypes, fmlaCode,
3683 fmlsCode = fpOp % ('fplibMulAdd<Element>(destElem, '
3684 'fplibNeg<Element>(srcElem1), srcElem2, fpscr)')
3686 sveTerIdxInst('fmls', 'FmlsIdx', 'SimdFloatMultAccOp', floatTypes,
3687 fmlsCode, PredType.MERGE)
3689 sveTerInst('fmls', 'Fmls', 'SimdFloatMultAccOp', floatTypes, fmlsCode,
3692 fmsbCode = fpOp % ('fplibMulAdd<Element>(srcElem1, '
3693 'fplibNeg<Element>(destElem), srcElem2, fpscr)')
3694 sveTerInst('fmsb', 'Fmsb', 'SimdFloatMultAccOp', floatTypes, fmsbCode,
3697 fpBinOp = fpOp % 'fplib%s<Element>(srcElem1, srcElem2, fpscr)'
3698 fmulCode = fpBinOp % 'Mul'
3699 sveBinImmInst('fmul', 'FmulImm', 'SimdFloatMultOp', floatTypes, fmulCode,
3701 # TODO: FMUL (indexed)
3702 # FMUL (vectors, predicated)
3703 fmulCode = fpBinOp % 'Mul'
3704 sveBinInst('fmul', 'FmulPred', 'SimdFloatMultOp', floatTypes, fmulCode,
3705 PredType.MERGE, True)
3706 # FMUL (vectors, unpredicated)
3707 sveBinInst('fmul', 'FmulUnpred', 'SimdFloatMultOp', floatTypes, fmulCode)
3709 sveBinIdxInst('fmul', 'FmulIdx', 'SimdFloatMultOp', floatTypes, fmulCode)
3712 fmulxCode = fpBinOp % 'MulX'
3713 sveBinInst('fmulx', 'Fmulx', 'SimdFloatMultOp', floatTypes, fmulxCode,
3714 PredType.MERGE, True)
3716 fnegCode = 'destElem = fplibNeg<Element>(srcElem1);'
3717 sveUnaryInst('fneg', 'Fneg', 'SimdFloatAluOp', fpTypes, fnegCode,
3720 fnmadCode = fpOp % ('fplibMulAdd<Element>('
3721 'fplibNeg<Element>(srcElem1), '
3722 'fplibNeg<Element>(destElem), srcElem2, fpscr)')
3723 sveTerInst('fnmad', 'Fnmad', 'SimdFloatMultAccOp', floatTypes, fnmadCode,
3726 fnmlaCode = fpOp % ('fplibMulAdd<Element>('
3727 'fplibNeg<Element>(destElem), '
3728 'fplibNeg<Element>(srcElem1), srcElem2, fpscr)')
3729 sveTerInst('fnmla', 'Fnmla', 'SimdFloatMultAccOp', floatTypes, fnmlaCode,
3732 fnmlsCode = fpOp % ('fplibMulAdd<Element>('
3733 'fplibNeg<Element>(destElem), srcElem1, srcElem2, '
3735 sveTerInst('fnmls', 'Fnmls', 'SimdFloatMultAccOp', floatTypes, fnmlsCode,
3738 fnmsbCode = fpOp % ('fplibMulAdd<Element>('
3739 'fplibNeg<Element>(srcElem1), destElem, srcElem2, '
3741 sveTerInst('fnmsb', 'Fnmsb', 'SimdFloatMultAccOp', floatTypes, fnmsbCode,
3744 frecpeCode = fpOp % 'fplibRecipEstimate<Element>(srcElem1, fpscr)'
3745 sveUnaryInst('frecpe', 'Frecpe', 'SimdFloatMultAccOp', floatTypes,
3748 frecpsCode = fpBinOp % 'RecipStepFused'
3749 sveBinInst('frecps', 'Frecps', 'SimdFloatMultAccOp', floatTypes,
3752 frecpxCode = fpOp % "fplibRecpX<Element>(srcElem1, fpscr)"
3753 sveUnaryInst('frecpx', 'Frecpx', 'SimdFloatMultAccOp', floatTypes,
3754 frecpxCode, PredType.MERGE)
3756 frintCode = fpOp % 'fplibRoundInt<Element>(srcElem1, %s, %s, fpscr)'
3757 frintaCode = frintCode % ('FPRounding_TIEAWAY', 'false')
3758 sveUnaryInst('frinta', 'Frinta', 'SimdCvtOp', floatTypes, frintaCode,
3761 frintiCode = frintCode % ('FPCRRounding(fpscr)', 'false')
3762 sveUnaryInst('frinti', 'Frinti', 'SimdCvtOp', floatTypes, frintiCode,
3765 frintmCode = frintCode % ('FPRounding_NEGINF', 'false')
3766 sveUnaryInst('frintm', 'Frintm', 'SimdCvtOp', floatTypes, frintmCode,
3769 frintnCode = frintCode % ('FPRounding_TIEEVEN', 'false')
3770 sveUnaryInst('frintn', 'Frintn', 'SimdCvtOp', floatTypes, frintnCode,
3773 frintpCode = frintCode % ('FPRounding_POSINF', 'false')
3774 sveUnaryInst('frintp', 'Frintp', 'SimdCvtOp', floatTypes, frintpCode,
3777 frintxCode = frintCode % ('FPCRRounding(fpscr)', 'true')
3778 sveUnaryInst('frintx', 'Frintx', 'SimdCvtOp', floatTypes, frintxCode,
3781 frintzCode = frintCode % ('FPRounding_ZERO', 'false')
3782 sveUnaryInst('frintz', 'Frintz', 'SimdCvtOp', floatTypes, frintzCode,
3785 frsqrteCode = fpOp % 'fplibRSqrtEstimate<Element>(srcElem1, fpscr)'
3786 sveUnaryInst('frsqrte', 'Frsqrte', 'SimdFloatSqrtOp', floatTypes,
3789 frsqrtsCode = fpBinOp % 'RSqrtStepFused'
3790 sveBinInst('frsqrts', 'Frsqrts', 'SimdFloatMiscOp', floatTypes,
3793 fscaleCode = fpBinOp % 'Scale'
3794 sveBinInst('fscale', 'Fscale', 'SimdFloatMiscOp', floatTypes, fscaleCode,
3795 PredType.MERGE, True)
3797 fsqrtCode = fpOp % "fplibSqrt<Element>(srcElem1, fpscr)"
3798 sveUnaryInst('fsqrt', 'Fsqrt', 'SimdFloatSqrtOp', floatTypes, fsqrtCode,
3801 fsubCode = fpBinOp % 'Sub'
3802 sveBinImmInst('fsub', 'FsubImm', 'SimdFloatAddOp', floatTypes, fsubCode,
3804 # FSUB (vectors, predicated)
3805 sveBinInst('fsub', 'FsubPred', 'SimdFloatAddOp', floatTypes, fsubCode,
3806 PredType.MERGE, True)
3807 # FSUB (vectors, unpredicated)
3808 sveBinInst('fsub', 'FsubUnpred', 'SimdFloatAddOp', floatTypes, fsubCode)
3810 fsubrCode = fpBinRevOp % 'Sub'
3811 sveBinImmInst('fsubr', 'FsubrImm', 'SimdFloatAddOp', floatTypes, fsubrCode,
3814 sveBinInst('fsubr', 'Fsubr', 'SimdFloatAddOp', floatTypes, fsubrCode,
3815 PredType.MERGE, True)
3817 ftmadCode = fpOp % ('fplibTrigMulAdd<Element>('
3818 'srcElem3, destElem, srcElem2, fpscr)')
3819 sveTerImmInst('ftmad', 'Ftmad', 'SimdFloatMultAccOp', floatTypes,
3822 ftsmulCode = fpBinOp % 'TrigSMul'
3823 sveBinInst('ftsmul', 'Ftsmul', 'SimdFloatMiscOp', floatTypes, ftsmulCode)
3825 ftsselCode = fpBinOp % 'TrigSSel'
3826 sveBinInst('ftssel', 'Ftssel', 'SimdFloatMultOp', floatTypes, ftsselCode)
3827 # INCB, INCH, INCW, INCD (scalar)
3829 destElem = srcElem1 + (count * imm);
3831 sveElemCountInst('inc', 'Inc', 'SimdAluOp', unsignedTypes, incxCode,
3832 destType = DestType.Scalar, dstIs32b = False)
3833 # INCH, INCW, INCD (vector)
3834 sveElemCountInst('inc', 'Incv', 'SimdAluOp', bigUnsignedTypes, incxCode,
3835 destType = DestType.Vector, dstIs32b = False)
3838 XDest = XDest + count;
3840 svePredCountInst('incp', 'Incp', 'SimdAluOp', unsignedTypes, incpCode,
3841 DestType.Scalar, SrcSize.Src64bit)
3844 destElem = srcElem + count;
3846 svePredCountInst('incp', 'Incpv', 'SimdAluOp', unsignedTypes, incpvCode,
3848 # INDEX (immediate, scalar)
3849 sveIndex(IndexFormat.ImmReg)
3850 # INDEX (immediates)
3851 sveIndex(IndexFormat.ImmImm)
3852 # INDEX (scalar, immediate)
3853 sveIndex(IndexFormat.RegImm)
3855 sveIndex(IndexFormat.RegReg)
3857 sveShiftAndInsertInst('insr', 'Insr', 'SimdAluOp', unsignedTypes,
3858 srcType = SrcRegType.Scalar)
3859 # INSR (SIMD&FP scalar)
3860 sveShiftAndInsertInst('insr', 'Insrf', 'SimdAluOp', unsignedTypes,
3861 srcType = SrcRegType.SimdFpScalar)
3865 if (last >= eCount) {
3868 destElem = AA64FpOp1_x[last];'''
3869 sveSelectInst('lasta', 'Lasta', 'SimdAluOp', unsignedTypes, lastaCode,
3871 # LASTA (SIMD&FP scalar)
3872 sveSelectInst('lasta', 'Lastaf', 'SimdAluOp', unsignedTypes, lastaCode,
3873 isCond = False, destType = DstRegType.SimdFpScalar)
3879 destElem = AA64FpOp1_x[last];'''
3880 sveSelectInst('lastb', 'Lastb', 'SimdAluOp', unsignedTypes, lastbCode,
3882 # LASTB (SIMD&FP scalar)
3883 sveSelectInst('lastb', 'Lastbf', 'SimdAluOp', unsignedTypes, lastbCode,
3884 isCond = False, destType = DstRegType.SimdFpScalar)
3885 # LSL (immediate, predicated)
3887 if (srcElem2 == 0) {
3888 destElem = srcElem1;
3889 } else if (srcElem2 >= sizeof(Element) * 8) {
3892 destElem = srcElem1 << srcElem2;
3895 sveBinImmInst('lsl', 'LslImmPred', 'SimdAluOp', unsignedTypes, lslCode,
3897 # LSL (immediate, unpredicated)
3898 sveBinImmInst('lsl', 'LslImmUnpred', 'SimdAluOp', unsignedTypes, lslCode)
3900 sveBinInst('lsl', 'LslPred', 'SimdAluOp', unsignedTypes, lslCode,
3901 PredType.MERGE, True)
3902 # LSL (wide elements, predicated)
3903 sveShiftByWideElemsInst('lsl', 'LslWidePred', 'SimdAluOp', unsignedTypes,
3904 lslCode, PredType.MERGE)
3905 # LSL (wide elements, unpredicated)
3906 sveShiftByWideElemsInst('lsl', 'LslWideUnpred', 'SimdAluOp', unsignedTypes,
3910 if (srcElem1 == 0) {
3911 destElem = srcElem2;
3912 } else if (srcElem1 >= sizeof(Element) * 8) {
3915 destElem = srcElem2 << srcElem1;
3918 sveBinInst('lslr', 'Lslr', 'SimdAluOp', unsignedTypes, lslrCode,
3919 PredType.MERGE, True)
3920 # LSR (immediate, predicated)
3922 if (srcElem2 >= sizeof(Element) * 8) {
3925 destElem = srcElem1 >> srcElem2;
3928 sveBinImmInst('lsr', 'LsrImmPred', 'SimdAluOp', unsignedTypes, lsrCode,
3930 # LSR (immediate, unpredicated)
3931 sveBinImmInst('lsr', 'LsrImmUnpred', 'SimdAluOp', unsignedTypes, lsrCode)
3933 sveBinInst('lsr', 'LsrPred', 'SimdAluOp', unsignedTypes, lsrCode,
3934 PredType.MERGE, True)
3935 # LSR (wide elements, predicated)
3936 sveShiftByWideElemsInst('lsr', 'LsrWidePred', 'SimdAluOp', unsignedTypes,
3937 lsrCode, PredType.MERGE)
3938 # LSR (wide elements, unpredicated)
3939 sveShiftByWideElemsInst('lsr', 'LsrWideUnpred', 'SimdAluOp', unsignedTypes,
3943 if (srcElem1 >= sizeof(Element) * 8) {
3946 destElem = srcElem2 >> srcElem1;
3949 sveBinInst('lsrr', 'Lsrr', 'SimdAluOp', unsignedTypes, lsrrCode,
3950 PredType.MERGE, True)
3952 madCode = 'destElem = srcElem1 + destElem * srcElem2;'
3953 sveTerInst('mad', 'Mad', 'SimdMultAccOp', signedTypes, madCode)
3955 mlaCode = 'destElem += srcElem1 * srcElem2;'
3956 sveTerInst('mla', 'Mla', 'SimdMultAccOp', signedTypes, mlaCode)
3958 mlsCode = 'destElem -= srcElem1 * srcElem2;'
3959 sveTerInst('mls', 'Mls', 'SimdMultAccOp', signedTypes, mlsCode)
3960 # MOVPRFX (predicated)
3961 movCode = 'destElem = srcElem1;'
3962 sveUnaryInst('movprfx', 'MovprfxPredM', 'SimdMiscOp', unsignedTypes,
3963 movCode, PredType.MERGE)
3964 sveUnaryInst('movprfx', 'MovprfxPredZ', 'SimdMiscOp', unsignedTypes,
3965 movCode, PredType.ZERO)
3966 # MOVPRFX (unpredicated)
3967 sveUnaryInst('movprfx', 'MovprfxUnpred', 'SimdMiscOp', ('uint64_t',),
3970 msbCode = 'destElem = srcElem1 - destElem * srcElem2;'
3971 sveTerInst('msb', 'Msb', 'SimdMultAccOp', signedTypes, msbCode)
3973 mulCode = 'destElem = srcElem1 * srcElem2;'
3974 sveWideImmInst('mul', 'MulImm', 'SimdMultOp', unsignedTypes, mulCode)
3976 sveBinInst('mul', 'Mul', 'SimdMultOp', unsignedTypes, mulCode,
3977 PredType.MERGE, True)
3979 nandCode = 'destElem = !(srcElem1 & srcElem2);';
3980 svePredLogicalInst('nand', 'PredNand', 'SimdPredAluOp', ('uint8_t',),
3982 svePredLogicalInst('nands', 'PredNands', 'SimdPredAluOp', ('uint8_t',),
3983 nandCode, isFlagSetting=True)
3985 negCode = 'destElem = -srcElem1;'
3986 sveUnaryInst('neg', 'Neg', 'SimdAluOp', signedTypes, negCode,
3989 norCode = 'destElem = !(srcElem1 | srcElem2);';
3990 svePredLogicalInst('nor', 'PredNor', 'SimdPredAluOp', ('uint8_t',),
3992 svePredLogicalInst('nors', 'PredNors', 'SimdPredAluOp', ('uint8_t',),
3993 norCode, isFlagSetting=True)
3995 notCode = 'destElem = ~srcElem1;'
3996 sveUnaryInst('not', 'Not', 'SimdAluOp', unsignedTypes, notCode,
3998 # ORN, ORNS (predicates)
3999 ornCode = 'destElem = srcElem1 | !srcElem2;';
4000 svePredLogicalInst('orn', 'PredOrn', 'SimdPredAluOp', ('uint8_t',),
4002 svePredLogicalInst('orns', 'PredOrns', 'SimdPredAluOp', ('uint8_t',),
4003 ornCode, isFlagSetting=True)
4005 orCode = 'destElem = srcElem1 | srcElem2;'
4006 sveWideImmInst('orr', 'OrrImm', 'SimdAluOp', ('uint64_t',), orCode)
4007 # ORR (vectors, predicated)
4008 sveBinInst('orr', 'OrrPred', 'SimdAluOp', unsignedTypes, orCode,
4009 PredType.MERGE, True)
4010 # ORR (vectors, unpredicated)
4011 orCode = 'destElem = srcElem1 | srcElem2;'
4012 sveBinInst('orr', 'OrrUnpred', 'SimdAluOp', ('uint64_t',), orCode)
4013 # ORR, ORRS (predicates)
4014 svePredLogicalInst('orr', 'PredOrr', 'SimdPredAluOp', ('uint8_t',), orCode)
4015 svePredLogicalInst('orrs', 'PredOrrs', 'SimdPredAluOp', ('uint8_t',),
4016 orCode, isFlagSetting=True)
4018 orvCode = 'destElem |= srcElem1;'
4019 sveAssocReducInst('orv', 'Orv', 'SimdReduceAluOp', unsignedTypes,
4026 svePredUnaryWImplicitSrcInst('pfalse', 'Pfalse', 'SimdPredAluOp',
4029 svePFirstInst('pfirst', 'Pfirst', 'SimdPredAluOp')
4031 svePNextInst('pnext', 'Pnext', 'SimdPredAluOp', unsignedTypes)
4033 svePredTestInst('ptest', 'Ptest', 'SimdPredAluOp')
4035 svePtrueInst('ptrue', 'Ptrue', 'SimdPredAluOp', unsignedTypes, False)
4037 svePtrueInst('ptrues', 'Ptrues', 'SimdPredAluOp', unsignedTypes, True)
4039 sveUnpackInst('punpkhi', 'Punpkhi', 'SimdPredAluOp', unsignedWideSDTypes,
4040 unpackHalf = Unpack.High, regType = SrcRegType.Predicate)
4042 sveUnpackInst('punpklo', 'Punpklo', 'SimdPredAluOp', unsignedWideSDTypes,
4043 unpackHalf = Unpack.Low, regType = SrcRegType.Predicate)
4046 destElem = reverseBits(srcElem1);'''
4047 sveUnaryInst('rbit', 'Rbit', 'SimdAluOp', unsignedTypes, rbitCode,
4048 predType=PredType.MERGE, srcRegType=SrcRegType.Vector)
4049 # RDFFR (unpredicated)
4050 rdffrUnpredCode = '''
4051 unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>(
4053 for (unsigned i = 0; i < eCount; i++) {
4054 PDest_ub[i] = Ffr_ub[i];
4056 svePredUnaryWImplicitSrcInst('rdffr', 'RdffrUnpred', 'SimdPredAluOp',
4058 # RDFFR, RDFFRS (predicated)
4060 unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>(
4062 for (unsigned i = 0; i < eCount; i++) {
4064 PDest_ub[i] = Ffr_ub[i];
4066 PDest_ub[i] = false;
4069 svePredUnaryWImplicitSrcInst('rdffr', 'RdffrPred', 'SimdPredAluOp',
4070 rdffrPredCode, PredType.ZERO, False)
4071 svePredUnaryWImplicitSrcInst('rdffrs', 'RdffrsPred', 'SimdPredAluOp',
4072 rdffrPredCode, PredType.ZERO, True)
4074 rdvlCode = sveEnabledCheckCode + '''
4075 unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>(
4077 XDest = eCount * (int64_t) imm;
4079 rdvlIop = InstObjParams('rdvl', 'SveRdvl', 'RegImmOp', rdvlCode, [])
4080 header_output += RegImmOpDeclare.subst(rdvlIop)
4081 decoder_output += RegImmOpConstructor.subst(rdvlIop)
4082 exec_output += BasicExecute.subst(rdvlIop)
4084 sveReverseElementsInst('rev', 'Revp', 'SimdPredAluOp', unsignedTypes,
4085 srcType = SrcRegType.Predicate)
4087 sveReverseElementsInst('rev', 'Revv', 'SimdAluOp', unsignedTypes,
4088 srcType = SrcRegType.Vector)
4091 %(revtype)s* srcPtr = reinterpret_cast<%(revtype)s*>(&srcElem1);
4092 %(revtype)s* dstPtr = reinterpret_cast<%(revtype)s*>(&destElem);
4093 uint8_t subelements = sizeof(Element) / sizeof(%(revtype)s);
4094 for(int i = 0; i < subelements; ++i) {
4095 dstPtr[subelements - i - 1] = srcPtr[i];
4097 sveUnaryInst('revb', 'Revb', 'SimdAluOp',
4098 ['uint16_t', 'uint32_t', 'uint64_t'],
4099 revCode % {'revtype' : 'uint8_t'}, predType=PredType.MERGE,
4100 srcRegType=SrcRegType.Vector, decoder='Generic')
4102 sveUnaryInst('revh', 'Revh', 'SimdAluOp', ['uint32_t', 'uint64_t'],
4103 revCode % {'revtype' : 'uint16_t'}, predType=PredType.MERGE,
4104 srcRegType=SrcRegType.Vector, decoder='Generic')
4106 sveUnaryInst('revw', 'Revw', 'SimdAluOp', ['uint64_t'],
4107 revCode % {'revtype' : 'uint32_t'}, predType=PredType.MERGE,
4108 srcRegType=SrcRegType.Vector, decoder='Generic')
4111 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
4112 (srcElem2 - srcElem1);
4114 sveBinInst('sabd', 'Sabd', 'SimdAddOp', signedTypes, abdCode,
4115 PredType.MERGE, True)
4117 addvCode = 'destElem += srcElem1;'
4118 sveWideningAssocReducInst('saddv', 'Saddv', 'SimdReduceAddOp',
4119 ['int8_t, int64_t', 'int16_t, int64_t', 'int32_t, int64_t'],
4122 scvtfCode = fpOp % ('fplibFixedToFP<DElement>('
4123 'sext<sizeof(SElement) * 8>(srcElem1), 0,'
4124 ' false, FPCRRounding(fpscr), fpscr)')
4125 sveCvtInst('scvtf', 'ScvtfNarrow', 'SimdCvtOp',
4126 ('uint16_t, uint16_t',
4127 'uint32_t, uint16_t',
4128 'uint64_t, uint16_t',
4129 'uint32_t, uint32_t',
4130 'uint64_t, uint32_t',
4131 'uint64_t, uint64_t'),
4132 scvtfCode, CvtDir.Narrow)
4133 sveCvtInst('scvtf', 'ScvtfWiden', 'SimdCvtOp', ('uint32_t, uint64_t',),
4134 scvtfCode, CvtDir.Widen)
4137 constexpr Element ELEM_MIN = std::numeric_limits<Element>::min();
4138 destElem = (srcElem2 == 0) ? 0 :
4139 (srcElem2 == -1 && srcElem1 == ELEM_MIN) ? ELEM_MIN :
4140 (srcElem1 / srcElem2);
4142 sveBinInst('sdiv', 'Sdiv', 'SimdDivOp', signedTypes, sdivCode,
4143 PredType.MERGE, True)
4146 constexpr Element ELEM_MIN = std::numeric_limits<Element>::min();
4147 destElem = (srcElem1 == 0) ? 0 :
4148 (srcElem1 == -1 && srcElem2 == ELEM_MIN) ? ELEM_MIN :
4149 (srcElem2 / srcElem1);
4151 sveBinInst('sdivr', 'Sdivr', 'SimdDivOp', signedTypes, sdivrCode,
4152 PredType.MERGE, True)
4154 sveDotInst('sdot', 'Sdoti', 'SimdAluOp', ['int8_t, int32_t',
4155 'int16_t, int64_t'], isIndexed = True)
4157 sveDotInst('sdot', 'Sdotv', 'SimdAluOp', ['int8_t, int32_t',
4158 'int16_t, int64_t'], isIndexed = False)
4160 selCode = 'destElem = srcElem1;'
4161 svePredLogicalInst('sel', 'PredSel', 'SimdPredAluOp', ('uint8_t',),
4162 selCode, PredType.SELECT)
4164 sveBinInst('sel', 'Sel', 'SimdAluOp', unsignedTypes, selCode,
4165 PredType.SELECT, False)
4170 svePredWriteFfrInst('setffr', 'Setffr', 'SimdPredAluOp', setffrCode, True)
4172 maxCode = 'destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;'
4173 sveWideImmInst('smax', 'SmaxImm', 'SimdCmpOp', signedTypes, maxCode)
4175 sveBinInst('smax', 'Smax', 'SimdCmpOp', signedTypes, maxCode,
4176 PredType.MERGE, True)
4179 if (srcElem1 > destElem)
4180 destElem = srcElem1;
4182 sveAssocReducInst('smaxv', 'Smaxv', 'SimdReduceCmpOp', signedTypes,
4183 maxvCode, 'std::numeric_limits<Element>::min()')
4185 minCode = 'destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;'
4186 sveWideImmInst('smin', 'SminImm', 'SimdCmpOp', signedTypes, minCode)
4188 sveBinInst('smin', 'Smin', 'SimdCmpOp', signedTypes, minCode,
4189 PredType.MERGE, True)
4192 if (srcElem1 < destElem)
4193 destElem = srcElem1;
4195 sveAssocReducInst('sminv', 'Sminv', 'SimdReduceCmpOp', signedTypes,
4196 minvCode, 'std::numeric_limits<Element>::max()')
4200 T do_mulh(T srcElem1, T srcElem2)
4202 return ((int64_t)srcElem1 * (int64_t)srcElem2) >> sizeof(T) * 8;
4205 int64_t do_mulh(int64_t srcElem1, int64_t srcElem2)
4207 uint64_t x = (uint64_t) llabs(srcElem1);
4208 uint64_t y = (uint64_t) llabs(srcElem2);
4210 uint64_t a = x >> 32;
4211 uint64_t b = x & 0xFFFFFFFF;
4212 uint64_t c = y >> 32;
4213 uint64_t d = y & 0xFFFFFFFF;
4215 uint64_t hi = a * c;
4216 uint64_t lo = b * d;
4218 hi += (a * d) >> 32;
4220 lo += ((a * d) & 0xFFFFFFFF) << 32;
4224 hi += (b * c) >> 32;
4226 lo += ((b * c) & 0xFFFFFFFF) << 32;
4230 uint64_t destElem = hi;
4231 if ((srcElem1 < 0) ^ (srcElem2 < 0)) {
4232 uint64_t tmp = lo = ~lo;
4241 uint64_t do_mulh(uint64_t srcElem1, uint64_t srcElem2)
4243 uint64_t x = srcElem1;
4244 uint64_t y = srcElem2;
4246 uint64_t a = x >> 32;
4247 uint64_t b = x & 0xFFFFFFFF;
4248 uint64_t c = y >> 32;
4249 uint64_t d = y & 0xFFFFFFFF;
4251 uint64_t hi = a * c;
4252 uint64_t lo = b * d;
4254 hi += (a * d) >> 32;
4256 lo += ((a * d) & 0xFFFFFFFF) << 32;
4260 hi += (b * c) >> 32;
4262 lo += ((b * c) & 0xFFFFFFFF) << 32;
4269 destElem = do_mulh(srcElem1, srcElem2);'''
4270 sveBinInst('smulh', 'Smulh', 'SimdMultOp', signedTypes, mulhCode,
4271 PredType.MERGE, True)
4273 sveSpliceInst('splice', 'Splice', 'SimdAluOp', unsignedTypes)
4276 destElem = srcElem1 + srcElem2;
4277 bool negDest = (destElem < 0);
4278 bool negSrc1 = (srcElem1 < 0);
4279 bool negSrc2 = (srcElem2 < 0);
4280 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
4281 destElem = static_cast<Element>(
4282 (Element)1 << (sizeof(Element) * 8 - 1)
4288 sveWideImmInst('sqadd', 'SqaddImm', 'SimdAddOp', signedTypes, sqaddCode)
4290 sveBinInst('sqadd', 'Sqadd', 'SimdAddOp', signedTypes, sqaddCode)
4291 # SQDECB, SQDECH, SQDECW, SQDECD (scalar, 32-bit)
4293 destElem = srcElem1 - (count * imm);
4294 bool negDest = (destElem < 0);
4295 bool negSrc = (srcElem1 < 0);
4296 bool posCount = ((count * imm) >= 0);
4297 if ((negDest != negSrc) && (negSrc == posCount)) {
4298 destElem = static_cast<%(dstType)s>(
4299 (%(dstType)s)1 << (sizeof(%(dstType)s) * 8 - 1)
4305 sveElemCountInst('sqdec', 'Sqdec32', 'SimdAluOp', signedTypes,
4306 sqdecCode%{'dstType':'int32_t'}, destType = DestType.Scalar,
4308 # SQDECB, SQDECH, SQDECW, SQDECD (scalar, 64-bit)
4309 sveElemCountInst('sqdec', 'Sqdec', 'SimdAluOp', signedTypes,
4310 sqdecCode%{'dstType':'int64_t'}, destType = DestType.Scalar,
4312 # SQDECH, SQDECW, SQDECD (vector)
4313 sveElemCountInst('sqdec', 'Sqdecv', 'SimdAluOp', bigSignedTypes,
4314 sqdecCode%{'dstType':'Element'}, destType = DestType.Vector,
4316 # SQDECP (scalar, 32-bit)
4318 destElem = srcElem - count;
4319 bool negDest = (destElem < 0);
4320 bool negSrc = (srcElem < 0);
4321 bool posCount = (count >= 0);
4322 if ((negDest != negSrc) && (negSrc == posCount)) {
4323 destElem = std::numeric_limits<%s>::min();
4329 int32_t srcElem = WDest;
4330 int32_t destElem;''' + (sqdecpCode % 'int32_t') + '''
4332 XDest = static_cast<uint32_t>(destElem) | ~mask(32);
4337 svePredCountInst('sqdecp', 'Sqdecp32', 'SimdAluOp', signedTypes,
4338 sqdecp32Code, DestType.Scalar, SrcSize.Src32bit)
4339 # SQDECP (scalar, 64-bit)
4341 int64_t srcElem = XDest;
4342 int64_t destElem;''' + (sqdecpCode % 'int64_t') + '''
4345 svePredCountInst('sqdecp', 'Sqdecp64', 'SimdAluOp', signedTypes,
4346 sqdecp64Code, DestType.Scalar, SrcSize.Src64bit)
4348 svePredCountInst('sqdecp', 'Sqdecpv', 'SimdAluOp', signedTypes,
4349 sqdecpCode % 'Element', DestType.Vector)
4350 # SQINCB, SQINCH, SQINCW, SQINCD (scalar, 32-bit)
4352 destElem = srcElem1 + (count * imm);
4353 bool negDest = (destElem < 0);
4354 bool negSrc = (srcElem1 < 0);
4355 bool negCount = ((count * imm) < 0);
4356 if ((negDest != negSrc) && (negSrc == negCount)) {
4357 destElem = static_cast<%(dstType)s>(
4358 (%(dstType)s)1 << (sizeof(%(dstType)s) * 8 - 1)
4364 sveElemCountInst('sqinc', 'Sqinc32', 'SimdAluOp', signedTypes,
4365 sqincCode%{'dstType':'int32_t'}, destType = DestType.Scalar,
4367 # SQINCB, SQINCH, SQINCW, SQINCD (scalar, 64-bit)
4368 sveElemCountInst('sqinc', 'Sqinc', 'SimdAluOp', signedTypes,
4369 sqincCode%{'dstType':'int64_t'}, destType = DestType.Scalar,
4371 # SQINCH, SQINCW, SQINCD (vector)
4372 sveElemCountInst('sqinc', 'Sqincv', 'SimdAluOp', bigSignedTypes,
4373 sqincCode%{'dstType':'Element'}, destType = DestType.Vector,
4375 # SQINCP (scalar, 32-bit)
4377 destElem = srcElem + count;
4378 bool negDest = (destElem < 0);
4379 bool negSrc = (srcElem < 0);
4380 bool negCount = (count < 0);
4381 if ((negDest != negSrc) && (negSrc == negCount)) {
4382 destElem = std::numeric_limits<%s>::min();
4388 int32_t srcElem = WDest;
4389 int32_t destElem;''' + (sqincpCode % 'int32_t') + '''
4391 XDest = static_cast<uint32_t>(destElem) | ~mask(32);
4396 svePredCountInst('sqincp', 'Sqincp32', 'SimdAluOp', signedTypes,
4397 sqincp32Code, DestType.Scalar, SrcSize.Src32bit)
4398 # SQINCP (scalar, 64-bit)
4400 int64_t srcElem = XDest;
4401 int64_t destElem;''' + (sqincpCode % 'int64_t') + '''
4404 svePredCountInst('sqincp', 'Sqincp64', 'SimdAluOp', signedTypes,
4405 sqincp64Code, DestType.Scalar, SrcSize.Src64bit)
4407 svePredCountInst('sqincp', 'Sqincpv', 'SimdAluOp', signedTypes,
4408 sqincpCode % 'Element', DestType.Vector)
4411 destElem = srcElem1 - srcElem2;
4412 bool negDest = (destElem < 0);
4413 bool negSrc1 = (srcElem1 < 0);
4414 bool posSrc2 = (srcElem2 >= 0);
4415 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
4416 destElem = static_cast<Element>(
4417 (Element)1 << (sizeof(Element) * 8 - 1)
4423 sveWideImmInst('sqsub', 'SqsubImm', 'SimdAddOp', signedTypes, sqsubCode)
4425 sveBinInst('sqsub', 'Sqsub', 'SimdAddOp', signedTypes, sqsubCode)
4427 subCode = 'destElem = srcElem1 - srcElem2;'
4428 sveWideImmInst('sub', 'SubImm', 'SimdAddOp', unsignedTypes, subCode)
4429 # SUB (vectors, predicated)
4430 sveBinInst('sub', 'SubPred', 'SimdAddOp', unsignedTypes, subCode,
4431 PredType.MERGE, True)
4432 # SUB (vectors, unpredicated)
4433 subCode = 'destElem = srcElem1 - srcElem2;'
4434 sveBinInst('sub', 'SubUnpred', 'SimdAddOp', unsignedTypes, subCode)
4436 subrCode = 'destElem = srcElem2 - srcElem1;'
4437 sveWideImmInst('subr', 'SubrImm', 'SimdAddOp', unsignedTypes, subrCode)
4439 sveBinInst('subr', 'Subr', 'SimdAddOp', unsignedTypes, subrCode,
4440 PredType.MERGE, True)
4442 sveUnpackInst('sunpkhi', 'Sunpkhi', 'SimdAluOp', signedWideSDTypes,
4443 unpackHalf = Unpack.High, regType = SrcRegType.Vector)
4445 sveUnpackInst('sunpklo', 'Sunpklo', 'SimdAluOp', signedWideSDTypes,
4446 unpackHalf = Unpack.Low, regType = SrcRegType.Vector)
4448 sxtCode = 'destElem = sext<8 * sizeof(SElement)>(srcElem1);'
4449 sveWidenUnaryInst('sxtb', 'Sxtb', 'SimdAluOp',
4450 ['uint8_t, uint16_t', 'uint8_t, uint32_t', 'uint8_t, uint64_t'],
4451 sxtCode, PredType.MERGE)
4453 sveWidenUnaryInst('sxth', 'Sxth', 'SimdAluOp',
4454 ['uint16_t, uint32_t', 'uint16_t, uint64_t'],
4455 sxtCode, PredType.MERGE)
4457 sveWidenUnaryInst('sxtw', 'Sxtw', 'SimdAluOp',
4458 ['uint32_t, uint64_t'],
4459 sxtCode, PredType.MERGE)
4461 sveTblInst('tbl', 'Tbl', 'SimdAluOp')
4462 # TRN1, TRN2 (predicates)
4463 trnPredIterCode = '''
4464 constexpr unsigned sz = sizeof(Element);
4467 ArmISA::VecPredRegContainer tmpPredC;
4468 auto auxPDest = tmpPredC.as<uint8_t>();
4469 for (unsigned i = 0; i < eCount / 2; i++) {
4471 for (unsigned j = 0; j < sz; j++) {
4472 auxPDest[(2 * i) * sz + j] = POp1_pb[s * sz + j];
4473 auxPDest[(2 * i + 1) * sz + j] = POp2_pb[s * sz + j];
4476 for (unsigned i = 0; i < eCount * sz; i++) {
4477 PDest_pb[i] = auxPDest[i];
4480 svePredBinPermInst('trn1', 'Trn1Pred', 'SimdPredAluOp', unsignedTypes,
4481 trnPredIterCode % 0)
4482 svePredBinPermInst('trn2', 'Trn2Pred', 'SimdPredAluOp', unsignedTypes,
4483 trnPredIterCode % 1)
4484 # TRN1, TRN2 (vectors)
4488 ArmISA::VecRegContainer tmpVecC;
4489 auto auxDest = tmpVecC.as<Element>();
4490 for (unsigned i = 0; i < eCount / 2; i++) {
4492 auxDest[2 * i] = AA64FpOp1_x[s];
4493 auxDest[2 * i + 1] = AA64FpOp2_x[s];
4495 for (unsigned i = 0; i < eCount; i++) {
4496 AA64FpDest_x[i] = auxDest[i];
4499 sveBinInst('trn1', 'Trn1', 'SimdAluOp', unsignedTypes, '',
4500 customIterCode=trnIterCode % 0)
4501 sveBinInst('trn2', 'Trn2', 'SimdAluOp', unsignedTypes, '',
4502 customIterCode=trnIterCode % 1)
4504 sveBinInst('uabd', 'Uabd', 'SimdAddOp', unsignedTypes, abdCode,
4505 PredType.MERGE, True)
4507 sveWideningAssocReducInst('uaddv', 'Uaddv', 'SimdReduceAddOp',
4508 ['uint8_t, uint64_t', 'uint16_t, uint64_t', 'uint32_t, uint64_t',
4509 'uint64_t, uint64_t'],
4512 ucvtfCode = fpOp % ('fplibFixedToFP<DElement>(srcElem1, 0, true,'
4513 ' FPCRRounding(fpscr), fpscr)')
4514 sveCvtInst('ucvtf', 'UcvtfNarrow', 'SimdCvtOp',
4515 ('uint16_t, uint16_t',
4516 'uint32_t, uint16_t',
4517 'uint64_t, uint16_t',
4518 'uint32_t, uint32_t',
4519 'uint64_t, uint32_t',
4520 'uint64_t, uint64_t'),
4521 ucvtfCode, CvtDir.Narrow)
4522 sveCvtInst('ucvtf', 'UcvtfWiden', 'SimdCvtOp', ('uint32_t, uint64_t',),
4523 ucvtfCode, CvtDir.Widen)
4525 udivCode = 'destElem = (srcElem2 == 0) ? 0 : (srcElem1 / srcElem2);'
4526 sveBinInst('udiv', 'Udiv', 'SimdDivOp', unsignedTypes, udivCode,
4527 PredType.MERGE, True)
4529 udivrCode = 'destElem = (srcElem1 == 0) ? 0 : (srcElem2 / srcElem1);'
4530 sveBinInst('udivr', 'Udivr', 'SimdDivOp', unsignedTypes, udivrCode,
4531 PredType.MERGE, True)
4533 sveDotInst('udot', 'Udoti', 'SimdAluOp', ['uint8_t, uint32_t',
4534 'uint16_t, uint64_t'], isIndexed = True)
4536 sveDotInst('udot', 'Udotv', 'SimdAluOp', ['uint8_t, uint32_t',
4537 'uint16_t, uint64_t'], isIndexed = False)
4539 sveWideImmInst('umax', 'UmaxImm', 'SimdCmpOp', unsignedTypes, maxCode)
4541 sveBinInst('umax', 'Umax', 'SimdCmpOp', unsignedTypes, maxCode,
4542 PredType.MERGE, True)
4544 sveAssocReducInst('umaxv', 'Umaxv', 'SimdReduceCmpOp', unsignedTypes,
4545 maxvCode, 'std::numeric_limits<Element>::min()')
4547 sveWideImmInst('umin', 'UminImm', 'SimdCmpOp', unsignedTypes, minCode)
4549 sveBinInst('umin', 'Umin', 'SimdCmpOp', unsignedTypes, minCode,
4550 PredType.MERGE, True)
4552 sveAssocReducInst('uminv', 'Uminv', 'SimdReduceCmpOp', unsignedTypes,
4553 minvCode, 'std::numeric_limits<Element>::max()')
4555 sveBinInst('umulh', 'Umulh', 'SimdMultOp', unsignedTypes, mulhCode,
4556 PredType.MERGE, True)
4559 destElem = srcElem1 + srcElem2;
4560 if (destElem < srcElem1 || destElem < srcElem2) {
4561 destElem = (Element)(-1);
4564 sveWideImmInst('uqadd', 'UqaddImm', 'SimdAddOp', unsignedTypes, uqaddCode)
4566 sveBinInst('uqadd', 'Uqadd', 'SimdAddOp', unsignedTypes, uqaddCode)
4567 # UQDECB, UQDECH, UQDECW, UQDECD (scalar, 32-bit)
4569 destElem = srcElem1 - (imm * count);
4570 if (destElem > srcElem1) {
4574 sveElemCountInst('uqdec', 'Uqdec32', 'SimdAluOp', unsignedTypes,
4575 uqdecCode, destType = DestType.Scalar, dstIs32b = True)
4576 # UQDECB, UQDECH, UQDECW, UQDECD (scalar, 64-bit)
4577 sveElemCountInst('uqdec', 'Uqdec', 'SimdAluOp', unsignedTypes,
4578 uqdecCode, destType = DestType.Scalar, dstIs32b = False)
4579 # UQDECH, UQDECW, UQDECD (vector)
4580 sveElemCountInst('uqdec', 'Uqdecv', 'SimdAluOp', bigUnsignedTypes,
4581 uqdecCode, destType = DestType.Vector, dstIs32b = False)
4582 # UQDECP (scalar, 32-bit)
4584 destElem = srcElem - count;
4585 if (destElem > srcElem) {
4590 uint32_t srcElem = WDest;
4591 uint32_t destElem;''' + uqdecpCode + '''
4594 svePredCountInst('uqdecp', 'Uqdecp32', 'SimdAluOp', unsignedTypes,
4595 uqdecp32Code, DestType.Scalar, SrcSize.Src32bit)
4596 # UQDECP (scalar, 64-bit)
4598 uint64_t srcElem = XDest;
4599 uint64_t destElem;''' + uqdecpCode + '''
4602 svePredCountInst('uqdecp', 'Uqdecp64', 'SimdAluOp', unsignedTypes,
4603 uqdecp64Code, DestType.Scalar, SrcSize.Src64bit)
4605 svePredCountInst('uqdecp', 'Uqdecpv', 'SimdAluOp', unsignedTypes,
4606 uqdecpCode, DestType.Vector)
4607 # UQDECB, UQDECH, UQDECW, UQDECD (scalar, 32-bit)
4609 destElem = srcElem1 + (imm * count);
4610 if (destElem < srcElem1 || destElem < (imm * count)) {
4611 destElem = static_cast<%(destType)s>(-1);
4614 sveElemCountInst('uqinc', 'Uqinc32', 'SimdAluOp', unsignedTypes,
4615 uqincCode%{'destType': 'uint32_t'}, destType = DestType.Scalar,
4617 # UQDECB, UQDECH, UQDECW, UQDECD (scalar, 64-bit)
4618 sveElemCountInst('uqinc', 'Uqinc', 'SimdAluOp', unsignedTypes,
4619 uqincCode%{'destType': 'uint64_t'}, destType = DestType.Scalar,
4621 # UQDECH, UQDECW, UQDECD (vector)
4622 sveElemCountInst('uqinc', 'Uqincv', 'SimdAluOp', bigUnsignedTypes,
4623 uqincCode%{'destType': 'Element'}, destType = DestType.Vector,
4625 # UQINCP (scalar, 32-bit)
4627 destElem = srcElem + count;
4628 if (destElem < srcElem || destElem < count) {
4629 destElem = std::numeric_limits<%s>::max();
4633 uint32_t srcElem = WDest;
4634 uint32_t destElem;''' + (uqincpCode % 'uint32_t') + '''
4637 svePredCountInst('uqincp', 'Uqincp32', 'SimdAluOp', unsignedTypes,
4638 uqincp32Code, DestType.Scalar, SrcSize.Src32bit)
4639 # UQINCP (scalar, 64-bit)
4641 uint64_t srcElem = XDest;
4642 uint64_t destElem;''' + (uqincpCode % 'uint64_t') + '''
4645 svePredCountInst('uqincp', 'Uqincp64', 'SimdAluOp', unsignedTypes,
4646 uqincp64Code, DestType.Scalar, SrcSize.Src64bit)
4648 svePredCountInst('uqincp', 'Uqincpv', 'SimdAluOp', unsignedTypes,
4649 uqincpCode % 'Element', DestType.Vector)
4652 destElem = srcElem1 - srcElem2;
4653 if (destElem > srcElem1) {
4657 sveWideImmInst('uqsub', 'UqsubImm', 'SimdAddOp', unsignedTypes, uqsubCode)
4659 sveBinInst('uqsub', 'Uqsub', 'SimdAddOp', unsignedTypes, uqsubCode)
4661 sveUnpackInst('uunpkhi', 'Uunpkhi', 'SimdAluOp', unsignedWideSDTypes,
4662 unpackHalf = Unpack.High, regType = SrcRegType.Vector)
4664 sveUnpackInst('uunpklo', 'Uunpklo', 'SimdAluOp', unsignedWideSDTypes,
4665 unpackHalf = Unpack.Low, regType = SrcRegType.Vector)
4667 uxtCode = 'destElem = srcElem1;'
4668 sveWidenUnaryInst('uxtb', 'Uxtb', 'SimdAluOp',
4669 ['uint8_t, uint16_t', 'uint8_t, uint32_t', 'uint8_t, uint64_t'],
4670 uxtCode, PredType.MERGE)
4672 sveWidenUnaryInst('uxth', 'Uxth', 'SimdAluOp',
4673 ['uint16_t, uint32_t', 'uint16_t, uint64_t'],
4674 uxtCode, PredType.MERGE)
4676 sveWidenUnaryInst('uxtw', 'Uxtw', 'SimdAluOp',
4677 ['uint32_t, uint64_t'],
4678 uxtCode, PredType.MERGE)
4679 # UZP1, UZP2 (predicates)
4680 uzpPredIterCode = '''
4681 constexpr unsigned sz = sizeof(Element);
4684 ArmISA::VecPredRegContainer tmpPredC;
4685 auto auxPDest = tmpPredC.as<uint8_t>();
4686 for (unsigned i = 0; i < eCount; i++) {
4688 for (unsigned j = 0; j < sz; j++) {
4690 auxPDest[i * sz + j] = POp1_pb[s * sz + j];
4692 auxPDest[i * sz + j] = POp2_pb[(s - eCount) * sz + j];
4696 for (unsigned i = 0; i < eCount * sz; i++) {
4697 PDest_pb[i] = auxPDest[i];
4700 svePredBinPermInst('uzp1', 'Uzp1Pred', 'SimdPredAluOp', unsignedTypes,
4701 uzpPredIterCode % 0)
4702 svePredBinPermInst('uzp2', 'Uzp2Pred', 'SimdPredAluOp', unsignedTypes,
4703 uzpPredIterCode % 1)
4704 # UZP1, UZP2 (vectors)
4708 ArmISA::VecRegContainer tmpVecC;
4709 auto auxDest = tmpVecC.as<Element>();
4710 for (unsigned i = 0; i < eCount; i++) {
4713 auxDest[i] = AA64FpOp1_x[s];
4715 auxDest[i] = AA64FpOp2_x[s - eCount];
4718 for (unsigned i = 0; i < eCount; i++) {
4719 AA64FpDest_x[i] = auxDest[i];
4722 sveBinInst('uzp1', 'Uzp1', 'SimdAluOp', unsignedTypes, '',
4723 customIterCode=uzpIterCode % 0)
4724 sveBinInst('uzp2', 'Uzp2', 'SimdAluOp', unsignedTypes, '',
4725 customIterCode=uzpIterCode % 1)
4728 cond = srcElem1 <= srcElem2;
4730 sveWhileInst('whilele', 'Whilele32', 'SimdCmpOp', signedTypes, whileLECode,
4733 sveWhileInst('whilele', 'Whilele64', 'SimdCmpOp', signedTypes, whileLECode,
4737 cond = srcElem1 < srcElem2;
4739 sveWhileInst('whilelo', 'Whilelo32', 'SimdCmpOp', unsignedTypes,
4740 whileLTCode, SrcSize.Src32bit)
4742 sveWhileInst('whilelo', 'Whilelo64', 'SimdCmpOp', unsignedTypes,
4743 whileLTCode, SrcSize.Src64bit)
4745 sveWhileInst('whilels', 'Whilels32', 'SimdCmpOp', unsignedTypes,
4746 whileLECode, SrcSize.Src32bit)
4748 sveWhileInst('whilels', 'Whilels64', 'SimdCmpOp', unsignedTypes,
4749 whileLECode, SrcSize.Src64bit)
4751 sveWhileInst('whilelt', 'Whilelt32', 'SimdCmpOp', signedTypes,
4752 whileLTCode, SrcSize.Src32bit)
4754 sveWhileInst('whilelt', 'Whilelt64', 'SimdCmpOp', signedTypes,
4755 whileLTCode, SrcSize.Src64bit)
4758 unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>(
4760 for (unsigned i = 0; i < eCount; i++) {
4761 Ffr_ub[i] = POp1_ub[i];
4763 svePredWriteFfrInst('wrffr', 'Wrffr', 'SimdPredAluOp', wrffrCode, False)
4764 # ZIP1, ZIP2 (predicates)
4765 zipPredIterCode = '''
4766 constexpr unsigned sz = sizeof(Element);
4769 ArmISA::VecPredRegContainer tmpPredC;
4770 auto auxPDest = tmpPredC.as<uint8_t>();
4771 for (unsigned i = 0; i < eCount / 2; i++) {
4772 s = i + (part * (eCount / 2));
4773 for (unsigned j = 0; j < sz; j++) {
4774 auxPDest[(2 * i) * sz + j] = POp1_pb[s * sz + j];
4775 auxPDest[(2 * i + 1) * sz + j] = POp2_pb[s * sz + j];
4778 for (unsigned i = 0; i < eCount * sz; i++) {
4779 PDest_pb[i] = auxPDest[i];
4782 svePredBinPermInst('zip1', 'Zip1Pred', 'SimdPredAluOp', unsignedTypes,
4783 zipPredIterCode % 0)
4784 svePredBinPermInst('zip2', 'Zip2Pred', 'SimdPredAluOp', unsignedTypes,
4785 zipPredIterCode % 1)
4786 # ZIP1, ZIP2 (vectors)
4790 ArmISA::VecRegContainer tmpVecC;
4791 auto auxDest = tmpVecC.as<Element>();
4792 for (unsigned i = 0; i < eCount / 2; i++) {
4793 s = i + (part * (eCount / 2));
4794 auxDest[2 * i] = AA64FpOp1_x[s];
4795 auxDest[2 * i + 1] = AA64FpOp2_x[s];
4797 for (unsigned i = 0; i < eCount; i++) {
4798 AA64FpDest_x[i] = auxDest[i];
4801 sveBinInst('zip1', 'Zip1', 'SimdAluOp', unsignedTypes, '',
4802 customIterCode=zipIterCode % 0)
4803 sveBinInst('zip2', 'Zip2', 'SimdAluOp', unsignedTypes, '',
4804 customIterCode=zipIterCode % 1)