1 /// Copyright (c) 2009 The Regents of The University of Michigan
2 // All rights reserved.
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met: redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer;
8 // redistributions in binary form must reproduce the above copyright
9 // notice, this list of conditions and the following disclaimer in the
10 // documentation and/or other materials provided with the distribution;
11 // neither the name of the copyright holders nor the names of its
12 // contributors may be used to endorse or promote products derived from
13 // this software without specific prior written permission.
15 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 // Authors: Gabe Black
29 def template MediaOpExecute {{
30 Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
31 Trace::InstRecord *traceData) const
33 Fault fault = NoFault;
40 //Write the resulting state to the execution context
49 def template MediaOpRegDeclare {{
50 class %(class_name)s : public %(base_class)s
56 %(class_name)s(ExtMachInst _machInst,
57 const char * instMnem,
58 bool isMicro, bool isDelayed, bool isFirst, bool isLast,
59 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
60 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
62 %(class_name)s(ExtMachInst _machInst,
63 const char * instMnem,
64 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
65 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
71 def template MediaOpImmDeclare {{
73 class %(class_name)s : public %(base_class)s
79 %(class_name)s(ExtMachInst _machInst,
80 const char * instMnem,
81 bool isMicro, bool isDelayed, bool isFirst, bool isLast,
82 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
83 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
85 %(class_name)s(ExtMachInst _machInst,
86 const char * instMnem,
87 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
88 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
94 def template MediaOpRegConstructor {{
96 inline void %(class_name)s::buildMe()
101 inline %(class_name)s::%(class_name)s(
102 ExtMachInst machInst, const char * instMnem,
103 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
104 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
105 %(base_class)s(machInst, "%(mnemonic)s", instMnem,
106 false, false, false, false,
107 _src1, _src2, _dest, _srcSize, _destSize, _ext,
113 inline %(class_name)s::%(class_name)s(
114 ExtMachInst machInst, const char * instMnem,
115 bool isMicro, bool isDelayed, bool isFirst, bool isLast,
116 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
117 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
118 %(base_class)s(machInst, "%(mnemonic)s", instMnem,
119 isMicro, isDelayed, isFirst, isLast,
120 _src1, _src2, _dest, _srcSize, _destSize, _ext,
127 def template MediaOpImmConstructor {{
129 inline void %(class_name)s::buildMe()
134 inline %(class_name)s::%(class_name)s(
135 ExtMachInst machInst, const char * instMnem,
136 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
137 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
138 %(base_class)s(machInst, "%(mnemonic)s", instMnem,
139 false, false, false, false,
140 _src1, _imm8, _dest, _srcSize, _destSize, _ext,
146 inline %(class_name)s::%(class_name)s(
147 ExtMachInst machInst, const char * instMnem,
148 bool isMicro, bool isDelayed, bool isFirst, bool isLast,
149 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
150 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
151 %(base_class)s(machInst, "%(mnemonic)s", instMnem,
152 isMicro, isDelayed, isFirst, isLast,
153 _src1, _imm8, _dest, _srcSize, _destSize, _ext,
161 # Make these empty strings so that concatenating onto
162 # them will always work.
169 MediaOpImmConstructor,
174 MediaOpRegConstructor,
177 class MediaOpMeta(type):
178 def buildCppClasses(self, name, Name, suffix, code):
180 # Globals to stick the output in
182 global decoder_output
185 # If op2 is used anywhere, make register and immediate versions
187 matcher = re.compile("(?<!\\w)(?P<prefix>s?)op2(?P<typeQual>\\.\\w+)?")
188 match = matcher.search(code)
191 if match.group("typeQual"):
192 typeQual = match.group("typeQual")
193 src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual)
194 self.buildCppClasses(name, Name, suffix,
195 matcher.sub(src2_name, code))
196 self.buildCppClasses(name + "i", Name, suffix + "Imm",
197 matcher.sub("imm8", code))
200 base = "X86ISA::MediaOp"
202 # If imm8 shows up in the code, use the immediate templates, if
203 # not, hopefully the register ones will be correct.
204 matcher = re.compile("(?<!\w)imm8(?!\w)")
205 if matcher.search(code):
207 templates = immTemplates
210 templates = regTemplates
212 # Get everything ready for the substitution
213 iop = InstObjParams(name, Name + suffix, base, {"code" : code})
215 # Generate the actual code (finally!)
216 header_output += templates[0].subst(iop)
217 decoder_output += templates[1].subst(iop)
218 exec_output += templates[2].subst(iop)
221 def __new__(mcls, Name, bases, dict):
224 if "abstract" in dict:
225 abstract = dict['abstract']
228 cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict)
231 cls.base_mnemonic = name
234 # Set up the C++ classes
235 mcls.buildCppClasses(cls, name, Name, "", code)
237 # Hook into the microassembler dict
238 global microopClasses
239 microopClasses[name] = cls
241 # If op2 is used anywhere, make register and immediate versions
243 matcher = re.compile("op2(?P<typeQual>\\.\\w+)?")
244 if matcher.search(code):
245 microopClasses[name + 'i'] = cls
249 class MediaOp(X86Microop):
250 __metaclass__ = MediaOpMeta
251 # This class itself doesn't act as a microop
254 def __init__(self, dest, src1, op2,
255 size = None, destSize = None, srcSize = None, ext = None):
262 if srcSize is not None:
263 self.srcSize = srcSize
264 if destSize is not None:
265 self.destSize = destSize
266 if self.srcSize is None:
267 raise Exception, "Source size not set."
268 if self.destSize is None:
269 raise Exception, "Dest size not set."
275 def getAllocator(self, *microFlags):
276 className = self.className
277 if self.mnemonic == self.base_mnemonic + 'i':
279 allocator = '''new %(class_name)s(machInst, macrocodeBlock
280 %(flags)s, %(src1)s, %(op2)s, %(dest)s,
281 %(srcSize)s, %(destSize)s, %(ext)s)''' % {
282 "class_name" : className,
283 "flags" : self.microFlagsText(microFlags),
284 "src1" : self.src1, "op2" : self.op2,
286 "srcSize" : self.srcSize,
287 "destSize" : self.destSize,
291 class Mov2int(MediaOp):
292 def __init__(self, dest, src, \
293 size = None, destSize = None, srcSize = None, ext = None):
294 super(Mov2int, self).__init__(dest, src,\
295 "InstRegIndex(0)", size, destSize, srcSize, ext)
297 uint64_t fpSrcReg1 = bits(FpSrcReg1.uqw, srcSize * 8 - 1, 0);
298 DestReg = merge(DestReg, fpSrcReg1, destSize);
301 class Mov2fp(MediaOp):
302 def __init__(self, dest, src, \
303 size = None, destSize = None, srcSize = None, ext = None):
304 super(Mov2fp, self).__init__(dest, src,\
305 "InstRegIndex(0)", size, destSize, srcSize, ext)
307 uint64_t srcReg1 = pick(SrcReg1, 0, srcSize);
309 insertBits(FpDestReg.uqw, destSize * 8 - 1, 0, srcReg1);
312 class Unpack(MediaOp):
314 assert(srcSize == destSize);
316 int items = (sizeof(FloatRegBits) / size) / 2;
317 int offset = ext ? items : 0;
319 for (int i = 0; i < items; i++) {
321 bits(FpSrcReg1.uqw, (i + offset + 1) * 8 * size - 1,
322 (i + offset) * 8 * size);
323 result = insertBits(result,
324 (2 * i + 1) * 8 * size - 1,
325 (2 * i + 0) * 8 * size,
327 uint64_t pickedHigh =
328 bits(FpSrcReg2.uqw, (i + offset + 1) * 8 * size - 1,
329 (i + offset) * 8 * size);
330 result = insertBits(result,
331 (2 * i + 2) * 8 * size - 1,
332 (2 * i + 1) * 8 * size,
335 FpDestReg.uqw = result;
340 assert(srcSize == destSize * 2);
341 int items = (sizeof(FloatRegBits) / destSize);
342 int destBits = destSize * 8;
343 int srcBits = srcSize * 8;
346 for (i = 0; i < items / 2; i++) {
348 bits(FpSrcReg1.uqw, (i + 1) * srcBits - 1,
350 unsigned signBit = bits(picked, srcBits - 1);
351 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
353 // Handle saturation.
355 if (overflow != mask(destBits - srcBits + 1)) {
357 picked = (1 << (destBits - 1));
364 picked = mask(destBits - 1);
366 picked = mask(destBits);
369 result = insertBits(result,
370 (i + 1) * destBits - 1,
374 for (;i < items; i++) {
376 bits(FpSrcReg2.uqw, (i - items + 1) * srcBits - 1,
377 (i - items + 0) * srcBits);
378 unsigned signBit = bits(picked, srcBits - 1);
379 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
381 // Handle saturation.
383 if (overflow != mask(destBits - srcBits + 1)) {
385 picked = (1 << (destBits - 1));
392 picked = mask(destBits - 1);
394 picked = mask(destBits);
397 result = insertBits(result,
398 (i + 1) * destBits - 1,
402 FpDestReg.uqw = result;
406 def __init__(self, dest, src1, src2):
407 super(Mxor, self).__init__(dest, src1, src2, 1)
409 FpDestReg.uqw = FpSrcReg1.uqw ^ FpSrcReg2.uqw;
413 def __init__(self, dest, src1, src2):
414 super(Mor, self).__init__(dest, src1, src2, 1)
416 FpDestReg.uqw = FpSrcReg1.uqw | FpSrcReg2.uqw;
420 def __init__(self, dest, src1, src2):
421 super(Mand, self).__init__(dest, src1, src2, 1)
423 FpDestReg.uqw = FpSrcReg1.uqw & FpSrcReg2.uqw;
426 class Mandn(MediaOp):
427 def __init__(self, dest, src1, src2):
428 super(Mandn, self).__init__(dest, src1, src2, 1)
430 FpDestReg.uqw = ~FpSrcReg1.uqw & FpSrcReg2.uqw;
433 class Mminf(MediaOp):
446 assert(srcSize == destSize);
448 int sizeBits = size * 8;
449 assert(srcSize == 4 || srcSize == 8);
450 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
451 uint64_t result = FpDestReg.uqw;
453 for (int i = 0; i < items; i++) {
455 int hiIndex = (i + 1) * sizeBits - 1;
456 int loIndex = (i + 0) * sizeBits;
457 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
458 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
475 result = insertBits(result, hiIndex, loIndex, arg1Bits);
477 result = insertBits(result, hiIndex, loIndex, arg2Bits);
480 FpDestReg.uqw = result;
483 class Mmaxf(MediaOp):
496 assert(srcSize == destSize);
498 int sizeBits = size * 8;
499 assert(srcSize == 4 || srcSize == 8);
500 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
501 uint64_t result = FpDestReg.uqw;
503 for (int i = 0; i < items; i++) {
505 int hiIndex = (i + 1) * sizeBits - 1;
506 int loIndex = (i + 0) * sizeBits;
507 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
508 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
525 result = insertBits(result, hiIndex, loIndex, arg1Bits);
527 result = insertBits(result, hiIndex, loIndex, arg2Bits);
530 FpDestReg.uqw = result;
533 class Mmini(MediaOp):
536 assert(srcSize == destSize);
538 int sizeBits = size * 8;
539 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
540 uint64_t result = FpDestReg.uqw;
542 for (int i = 0; i < items; i++) {
543 int hiIndex = (i + 1) * sizeBits - 1;
544 int loIndex = (i + 0) * sizeBits;
545 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
546 int64_t arg1 = arg1Bits |
547 (0 - (arg1Bits & (1 << (sizeBits - 1))));
548 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
549 int64_t arg2 = arg2Bits |
550 (0 - (arg2Bits & (1 << (sizeBits - 1))));
560 if (arg1Bits < arg2Bits) {
566 result = insertBits(result, hiIndex, loIndex, resBits);
568 FpDestReg.uqw = result;
571 class Mmaxi(MediaOp):
574 assert(srcSize == destSize);
576 int sizeBits = size * 8;
577 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
578 uint64_t result = FpDestReg.uqw;
580 for (int i = 0; i < items; i++) {
581 int hiIndex = (i + 1) * sizeBits - 1;
582 int loIndex = (i + 0) * sizeBits;
583 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
584 int64_t arg1 = arg1Bits |
585 (0 - (arg1Bits & (1 << (sizeBits - 1))));
586 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
587 int64_t arg2 = arg2Bits |
588 (0 - (arg2Bits & (1 << (sizeBits - 1))));
598 if (arg1Bits > arg2Bits) {
604 result = insertBits(result, hiIndex, loIndex, resBits);
606 FpDestReg.uqw = result;
609 class Msqrt(MediaOp):
610 def __init__(self, dest, src, \
611 size = None, destSize = None, srcSize = None, ext = None):
612 super(Msqrt, self).__init__(dest, src,\
613 "InstRegIndex(0)", size, destSize, srcSize, ext)
626 assert(srcSize == destSize);
628 int sizeBits = size * 8;
629 assert(srcSize == 4 || srcSize == 8);
630 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
631 uint64_t result = FpDestReg.uqw;
633 for (int i = 0; i < items; i++) {
634 int hiIndex = (i + 1) * sizeBits - 1;
635 int loIndex = (i + 0) * sizeBits;
636 uint64_t argBits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
649 result = insertBits(result, hiIndex, loIndex, argBits);
651 FpDestReg.uqw = result;
654 class Maddf(MediaOp):
667 assert(srcSize == destSize);
669 int sizeBits = size * 8;
670 assert(srcSize == 4 || srcSize == 8);
671 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
672 uint64_t result = FpDestReg.uqw;
674 for (int i = 0; i < items; i++) {
675 int hiIndex = (i + 1) * sizeBits - 1;
676 int loIndex = (i + 0) * sizeBits;
677 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
678 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
682 floatInt arg1, arg2, res;
685 res.f = arg1.f + arg2.f;
688 doubleInt arg1, arg2, res;
691 res.d = arg1.d + arg2.d;
695 result = insertBits(result, hiIndex, loIndex, resBits);
697 FpDestReg.uqw = result;
700 class Msubf(MediaOp):
713 assert(srcSize == destSize);
715 int sizeBits = size * 8;
716 assert(srcSize == 4 || srcSize == 8);
717 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
718 uint64_t result = FpDestReg.uqw;
720 for (int i = 0; i < items; i++) {
721 int hiIndex = (i + 1) * sizeBits - 1;
722 int loIndex = (i + 0) * sizeBits;
723 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
724 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
728 floatInt arg1, arg2, res;
731 res.f = arg1.f - arg2.f;
734 doubleInt arg1, arg2, res;
737 res.d = arg1.d - arg2.d;
741 result = insertBits(result, hiIndex, loIndex, resBits);
743 FpDestReg.uqw = result;
746 class Mmulf(MediaOp):
759 assert(srcSize == destSize);
761 int sizeBits = size * 8;
762 assert(srcSize == 4 || srcSize == 8);
763 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
764 uint64_t result = FpDestReg.uqw;
766 for (int i = 0; i < items; i++) {
767 int hiIndex = (i + 1) * sizeBits - 1;
768 int loIndex = (i + 0) * sizeBits;
769 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
770 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
774 floatInt arg1, arg2, res;
777 res.f = arg1.f * arg2.f;
780 doubleInt arg1, arg2, res;
783 res.d = arg1.d * arg2.d;
787 result = insertBits(result, hiIndex, loIndex, resBits);
789 FpDestReg.uqw = result;
792 class Mdivf(MediaOp):
805 assert(srcSize == destSize);
807 int sizeBits = size * 8;
808 assert(srcSize == 4 || srcSize == 8);
809 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
810 uint64_t result = FpDestReg.uqw;
812 for (int i = 0; i < items; i++) {
813 int hiIndex = (i + 1) * sizeBits - 1;
814 int loIndex = (i + 0) * sizeBits;
815 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
816 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
820 floatInt arg1, arg2, res;
823 res.f = arg1.f / arg2.f;
826 doubleInt arg1, arg2, res;
829 res.d = arg1.d / arg2.d;
833 result = insertBits(result, hiIndex, loIndex, resBits);
835 FpDestReg.uqw = result;
838 class Maddi(MediaOp):
840 assert(srcSize == destSize);
842 int sizeBits = size * 8;
843 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
844 uint64_t result = FpDestReg.uqw;
846 for (int i = 0; i < items; i++) {
847 int hiIndex = (i + 1) * sizeBits - 1;
848 int loIndex = (i + 0) * sizeBits;
849 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
850 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
851 uint64_t resBits = arg1Bits + arg2Bits;
854 if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits))
855 resBits = mask(sizeBits);
856 } else if (ext & 0x4) {
857 int arg1Sign = bits(arg1Bits, sizeBits - 1);
858 int arg2Sign = bits(arg2Bits, sizeBits - 1);
859 int resSign = bits(resBits, sizeBits - 1);
860 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
862 resBits = (1 << (sizeBits - 1));
864 resBits = mask(sizeBits - 1);
868 result = insertBits(result, hiIndex, loIndex, resBits);
870 FpDestReg.uqw = result;
873 class Msubi(MediaOp):
875 assert(srcSize == destSize);
877 int sizeBits = size * 8;
878 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
879 uint64_t result = FpDestReg.uqw;
881 for (int i = 0; i < items; i++) {
882 int hiIndex = (i + 1) * sizeBits - 1;
883 int loIndex = (i + 0) * sizeBits;
884 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
885 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
886 uint64_t resBits = arg1Bits - arg2Bits;
889 if (arg2Bits > arg1Bits) {
891 } else if (!findCarry(sizeBits, resBits,
892 arg1Bits, ~arg2Bits)) {
893 resBits = mask(sizeBits);
895 } else if (ext & 0x4) {
896 int arg1Sign = bits(arg1Bits, sizeBits - 1);
897 int arg2Sign = !bits(arg2Bits, sizeBits - 1);
898 int resSign = bits(resBits, sizeBits - 1);
899 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
901 resBits = (1 << (sizeBits - 1));
903 resBits = mask(sizeBits - 1);
907 result = insertBits(result, hiIndex, loIndex, resBits);
909 FpDestReg.uqw = result;
912 class Mmuli(MediaOp):
914 int srcBits = srcSize * 8;
915 int destBits = destSize * 8;
916 assert(destBits <= 64);
917 assert(destSize >= srcSize);
918 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / destSize);
919 uint64_t result = FpDestReg.uqw;
921 for (int i = 0; i < items; i++) {
925 offset = i * (destBits - srcBits);
927 offset = i * (destBits - srcBits) + srcBits;
929 int srcHiIndex = (i + 1) * srcBits - 1 + offset;
930 int srcLoIndex = (i + 0) * srcBits + offset;
931 uint64_t arg1Bits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
932 uint64_t arg2Bits = bits(FpSrcReg2.uqw, srcHiIndex, srcLoIndex);
936 int64_t arg1 = arg1Bits |
937 (0 - (arg1Bits & (1 << (srcBits - 1))));
938 int64_t arg2 = arg2Bits |
939 (0 - (arg2Bits & (1 << (srcBits - 1))));
940 resBits = (uint64_t)(arg1 * arg2);
942 resBits = arg1Bits * arg2Bits;
946 resBits += (1 << (destBits - 1));
949 resBits >>= destBits;
951 int destHiIndex = (i + 1) * destBits - 1;
952 int destLoIndex = (i + 0) * destBits;
953 result = insertBits(result, destHiIndex, destLoIndex, resBits);
955 FpDestReg.uqw = result;
960 assert(srcSize == destSize);
962 int sizeBits = size * 8;
963 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
964 uint64_t result = FpDestReg.uqw;
966 for (int i = 0; i < items; i++) {
967 int hiIndex = (i + 1) * sizeBits - 1;
968 int loIndex = (i + 0) * sizeBits;
969 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
970 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
971 uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2;
973 result = insertBits(result, hiIndex, loIndex, resBits);
975 FpDestReg.uqw = result;
980 int srcBits = srcSize * 8;
981 int items = sizeof(FloatRegBits) / srcSize;
984 for (int i = 0; i < items; i++) {
985 int hiIndex = (i + 1) * srcBits - 1;
986 int loIndex = (i + 0) * srcBits;
987 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
988 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
989 int64_t resBits = arg1Bits - arg2Bits;
994 FpDestReg.uqw = sum & mask(destSize * 8);
1000 assert(srcSize == destSize);
1002 int sizeBits = size * 8;
1003 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
1004 uint64_t shiftAmt = op2.uqw;
1005 uint64_t result = FpDestReg.uqw;
1007 for (int i = 0; i < items; i++) {
1008 int hiIndex = (i + 1) * sizeBits - 1;
1009 int loIndex = (i + 0) * sizeBits;
1010 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1012 if (shiftAmt >= sizeBits) {
1015 resBits = (arg1Bits >> shiftAmt) &
1016 mask(sizeBits - shiftAmt);
1019 result = insertBits(result, hiIndex, loIndex, resBits);
1021 FpDestReg.uqw = result;
1024 class Msra(MediaOp):
1027 assert(srcSize == destSize);
1029 int sizeBits = size * 8;
1030 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
1031 uint64_t shiftAmt = op2.uqw;
1032 uint64_t result = FpDestReg.uqw;
1034 for (int i = 0; i < items; i++) {
1035 int hiIndex = (i + 1) * sizeBits - 1;
1036 int loIndex = (i + 0) * sizeBits;
1037 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1039 if (shiftAmt >= sizeBits) {
1040 if (bits(arg1Bits, sizeBits - 1))
1041 resBits = mask(sizeBits);
1045 resBits = (arg1Bits >> shiftAmt);
1047 (0 - (resBits & (1 << (sizeBits - 1 - shiftAmt))));
1050 result = insertBits(result, hiIndex, loIndex, resBits);
1052 FpDestReg.uqw = result;
1055 class Msll(MediaOp):
1058 assert(srcSize == destSize);
1060 int sizeBits = size * 8;
1061 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
1062 uint64_t shiftAmt = op2.uqw;
1063 uint64_t result = FpDestReg.uqw;
1065 for (int i = 0; i < items; i++) {
1066 int hiIndex = (i + 1) * sizeBits - 1;
1067 int loIndex = (i + 0) * sizeBits;
1068 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1070 if (shiftAmt >= sizeBits) {
1073 resBits = (arg1Bits << shiftAmt);
1076 result = insertBits(result, hiIndex, loIndex, resBits);
1078 FpDestReg.uqw = result;
1081 class Cvti2f(MediaOp):
1082 def __init__(self, dest, src, \
1083 size = None, destSize = None, srcSize = None, ext = None):
1084 super(Cvti2f, self).__init__(dest, src,\
1085 "InstRegIndex(0)", size, destSize, srcSize, ext)
1098 assert(destSize == 4 || destSize == 8);
1099 assert(srcSize == 4 || srcSize == 8);
1100 int srcSizeBits = srcSize * 8;
1101 int destSizeBits = destSize * 8;
1105 if (srcSize == 2 * destSize) {
1106 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize;
1108 destStart = destSizeBits * items;
1109 } else if (destSize == 2 * srcSize) {
1110 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
1112 srcStart = srcSizeBits * items;
1114 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
1116 uint64_t result = FpDestReg.uqw;
1118 for (int i = 0; i < items; i++) {
1119 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1120 int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1121 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1122 int64_t sArg = argBits | (0 - (argBits & (1 << srcHiIndex)));
1125 if (destSize == 4) {
1134 int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1135 int destLoIndex = destStart + (i + 0) * destSizeBits;
1136 result = insertBits(result, destHiIndex, destLoIndex, argBits);
1138 FpDestReg.uqw = result;
1141 class Cvtf2f(MediaOp):
1142 def __init__(self, dest, src, \
1143 size = None, destSize = None, srcSize = None, ext = None):
1144 super(Cvtf2f, self).__init__(dest, src,\
1145 "InstRegIndex(0)", size, destSize, srcSize, ext)
1158 assert(destSize == 4 || destSize == 8);
1159 assert(srcSize == 4 || srcSize == 8);
1160 int srcSizeBits = srcSize * 8;
1161 int destSizeBits = destSize * 8;
1165 if (srcSize == 2 * destSize) {
1166 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize;
1168 destStart = destSizeBits * items;
1169 } else if (destSize == 2 * srcSize) {
1170 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
1172 srcStart = srcSizeBits * items;
1174 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
1176 uint64_t result = FpDestReg.uqw;
1178 for (int i = 0; i < items; i++) {
1179 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1180 int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1181 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1193 if (destSize == 4) {
1202 int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1203 int destLoIndex = destStart + (i + 0) * destSizeBits;
1204 result = insertBits(result, destHiIndex, destLoIndex, argBits);
1206 FpDestReg.uqw = result;
1209 class Mcmpi2r(MediaOp):
1222 assert(srcSize == destSize);
1224 int sizeBits = size * 8;
1225 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
1226 uint64_t result = FpDestReg.uqw;
1228 for (int i = 0; i < items; i++) {
1229 int hiIndex = (i + 1) * sizeBits - 1;
1230 int loIndex = (i + 0) * sizeBits;
1231 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1232 int64_t arg1 = arg1Bits |
1233 (0 - (arg1Bits & (1 << (sizeBits - 1))));
1234 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1235 int64_t arg2 = arg2Bits |
1236 (0 - (arg2Bits & (1 << (sizeBits - 1))));
1238 uint64_t resBits = 0;
1239 if ((ext & 0x2) == 0 && arg1 == arg2 ||
1240 (ext & 0x2) == 0x2 && arg1 > arg2)
1241 resBits = mask(sizeBits);
1243 result = insertBits(result, hiIndex, loIndex, resBits);
1245 FpDestReg.uqw = result;