1 /// Copyright (c) 2009 The Regents of The University of Michigan
2 // All rights reserved.
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met: redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer;
8 // redistributions in binary form must reproduce the above copyright
9 // notice, this list of conditions and the following disclaimer in the
10 // documentation and/or other materials provided with the distribution;
11 // neither the name of the copyright holders nor the names of its
12 // contributors may be used to endorse or promote products derived from
13 // this software without specific prior written permission.
15 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 // Authors: Gabe Black
29 def template MediaOpExecute {{
30 Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
31 Trace::InstRecord *traceData) const
33 Fault fault = NoFault;
40 //Write the resulting state to the execution context
49 def template MediaOpRegDeclare {{
50 class %(class_name)s : public %(base_class)s
56 %(class_name)s(ExtMachInst _machInst,
57 const char * instMnem,
58 bool isMicro, bool isDelayed, bool isFirst, bool isLast,
59 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
60 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
62 %(class_name)s(ExtMachInst _machInst,
63 const char * instMnem,
64 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
65 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
71 def template MediaOpImmDeclare {{
73 class %(class_name)s : public %(base_class)s
79 %(class_name)s(ExtMachInst _machInst,
80 const char * instMnem,
81 bool isMicro, bool isDelayed, bool isFirst, bool isLast,
82 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
83 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
85 %(class_name)s(ExtMachInst _machInst,
86 const char * instMnem,
87 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
88 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
94 def template MediaOpRegConstructor {{
96 inline void %(class_name)s::buildMe()
101 inline %(class_name)s::%(class_name)s(
102 ExtMachInst machInst, const char * instMnem,
103 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
104 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
105 %(base_class)s(machInst, "%(mnemonic)s", instMnem,
106 false, false, false, false,
107 _src1, _src2, _dest, _srcSize, _destSize, _ext,
113 inline %(class_name)s::%(class_name)s(
114 ExtMachInst machInst, const char * instMnem,
115 bool isMicro, bool isDelayed, bool isFirst, bool isLast,
116 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
117 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
118 %(base_class)s(machInst, "%(mnemonic)s", instMnem,
119 isMicro, isDelayed, isFirst, isLast,
120 _src1, _src2, _dest, _srcSize, _destSize, _ext,
127 def template MediaOpImmConstructor {{
129 inline void %(class_name)s::buildMe()
134 inline %(class_name)s::%(class_name)s(
135 ExtMachInst machInst, const char * instMnem,
136 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
137 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
138 %(base_class)s(machInst, "%(mnemonic)s", instMnem,
139 false, false, false, false,
140 _src1, _imm8, _dest, _srcSize, _destSize, _ext,
146 inline %(class_name)s::%(class_name)s(
147 ExtMachInst machInst, const char * instMnem,
148 bool isMicro, bool isDelayed, bool isFirst, bool isLast,
149 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
150 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
151 %(base_class)s(machInst, "%(mnemonic)s", instMnem,
152 isMicro, isDelayed, isFirst, isLast,
153 _src1, _imm8, _dest, _srcSize, _destSize, _ext,
161 # Make these empty strings so that concatenating onto
162 # them will always work.
169 MediaOpImmConstructor,
174 MediaOpRegConstructor,
177 class MediaOpMeta(type):
178 def buildCppClasses(self, name, Name, suffix, code):
180 # Globals to stick the output in
182 global decoder_output
185 # If op2 is used anywhere, make register and immediate versions
187 matcher = re.compile("(?<!\\w)(?P<prefix>s?)op2(?P<typeQual>\\.\\w+)?")
188 match = matcher.search(code)
191 if match.group("typeQual"):
192 typeQual = match.group("typeQual")
193 src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual)
194 self.buildCppClasses(name, Name, suffix,
195 matcher.sub(src2_name, code))
196 self.buildCppClasses(name + "i", Name, suffix + "Imm",
197 matcher.sub("imm8", code))
200 base = "X86ISA::MediaOp"
202 # If imm8 shows up in the code, use the immediate templates, if
203 # not, hopefully the register ones will be correct.
204 matcher = re.compile("(?<!\w)imm8(?!\w)")
205 if matcher.search(code):
207 templates = immTemplates
210 templates = regTemplates
212 # Get everything ready for the substitution
213 iop = InstObjParams(name, Name + suffix, base, {"code" : code})
215 # Generate the actual code (finally!)
216 header_output += templates[0].subst(iop)
217 decoder_output += templates[1].subst(iop)
218 exec_output += templates[2].subst(iop)
221 def __new__(mcls, Name, bases, dict):
224 if "abstract" in dict:
225 abstract = dict['abstract']
228 cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict)
231 cls.base_mnemonic = name
234 # Set up the C++ classes
235 mcls.buildCppClasses(cls, name, Name, "", code)
237 # Hook into the microassembler dict
238 global microopClasses
239 microopClasses[name] = cls
241 # If op2 is used anywhere, make register and immediate versions
243 matcher = re.compile("op2(?P<typeQual>\\.\\w+)?")
244 if matcher.search(code):
245 microopClasses[name + 'i'] = cls
249 class MediaOp(X86Microop):
250 __metaclass__ = MediaOpMeta
251 # This class itself doesn't act as a microop
254 def __init__(self, dest, src1, op2,
255 size = None, destSize = None, srcSize = None, ext = None):
262 if srcSize is not None:
263 self.srcSize = srcSize
264 if destSize is not None:
265 self.destSize = destSize
266 if self.srcSize is None:
267 raise Exception, "Source size not set."
268 if self.destSize is None:
269 raise Exception, "Dest size not set."
275 def getAllocator(self, *microFlags):
276 className = self.className
277 if self.mnemonic == self.base_mnemonic + 'i':
279 allocator = '''new %(class_name)s(machInst, macrocodeBlock
280 %(flags)s, %(src1)s, %(op2)s, %(dest)s,
281 %(srcSize)s, %(destSize)s, %(ext)s)''' % {
282 "class_name" : className,
283 "flags" : self.microFlagsText(microFlags),
284 "src1" : self.src1, "op2" : self.op2,
286 "srcSize" : self.srcSize,
287 "destSize" : self.destSize,
291 class Mov2int(MediaOp):
292 def __init__(self, dest, src1, src2 = 0, \
293 size = None, destSize = None, srcSize = None, ext = None):
294 super(Mov2int, self).__init__(dest, src1,\
295 src2, size, destSize, srcSize, ext)
297 int items = sizeof(FloatRegBits) / srcSize;
299 if (bits(src1, 0) && (ext & 0x1))
301 if (offset >= 0 && offset < items) {
304 (offset + 1) * srcSize * 8 - 1,
305 (offset + 0) * srcSize * 8);
306 DestReg = merge(0, fpSrcReg1, destSize);
312 class Mov2fp(MediaOp):
313 def __init__(self, dest, src1, src2 = 0, \
314 size = None, destSize = None, srcSize = None, ext = None):
315 super(Mov2fp, self).__init__(dest, src1,\
316 src2, size, destSize, srcSize, ext)
318 int items = sizeof(FloatRegBits) / destSize;
320 if (bits(dest, 0) && (ext & 0x1))
322 if (offset >= 0 && offset < items) {
323 uint64_t srcReg1 = pick(SrcReg1, 0, srcSize);
325 insertBits(FpDestReg.uqw,
326 (offset + 1) * destSize * 8 - 1,
327 (offset + 0) * destSize * 8, srcReg1);
329 FpDestReg.uqw = FpDestReg.uqw;
333 class Movsign(MediaOp):
334 def __init__(self, dest, src, \
335 size = None, destSize = None, srcSize = None, ext = None):
336 super(Movsign, self).__init__(dest, src,\
337 "InstRegIndex(0)", size, destSize, srcSize, ext)
339 int items = sizeof(FloatRegBits) / srcSize;
341 int offset = (ext & 0x1) ? items : 0;
342 for (int i = 0; i < items; i++) {
344 bits(FpSrcReg1.uqw, (i + 1) * 8 * srcSize - 1);
345 result = insertBits(result, i + offset, i + offset, picked);
347 DestReg = DestReg | result;
350 class Maskmov(MediaOp):
352 assert(srcSize == destSize);
354 int sizeBits = size * 8;
355 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
356 uint64_t result = FpDestReg.uqw;
358 for (int i = 0; i < items; i++) {
359 int hiIndex = (i + 1) * sizeBits - 1;
360 int loIndex = (i + 0) * sizeBits;
361 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
362 if (bits(FpSrcReg2.uqw, hiIndex))
363 result = insertBits(result, hiIndex, loIndex, arg1Bits);
365 FpDestReg.uqw = result;
368 class shuffle(MediaOp):
370 assert(srcSize == destSize);
372 int sizeBits = size * 8;
373 int items = sizeof(FloatRegBits) / size;
387 for (int i = 0; i < items; i++) {
389 uint8_t lsel = sel & mask(optionBits);
390 if (lsel * size >= sizeof(FloatRegBits)) {
392 resBits = bits(FpSrcReg2.uqw,
393 (lsel + 1) * sizeBits - 1,
394 (lsel + 0) * sizeBits);
396 resBits = bits(FpSrcReg1.uqw,
397 (lsel + 1) * sizeBits - 1,
398 (lsel + 0) * sizeBits);
403 int hiIndex = (i + 1) * sizeBits - 1;
404 int loIndex = (i + 0) * sizeBits;
405 result = insertBits(result, hiIndex, loIndex, resBits);
407 FpDestReg.uqw = result;
410 class Unpack(MediaOp):
412 assert(srcSize == destSize);
414 int items = (sizeof(FloatRegBits) / size) / 2;
415 int offset = ext ? items : 0;
417 for (int i = 0; i < items; i++) {
419 bits(FpSrcReg1.uqw, (i + offset + 1) * 8 * size - 1,
420 (i + offset) * 8 * size);
421 result = insertBits(result,
422 (2 * i + 1) * 8 * size - 1,
423 (2 * i + 0) * 8 * size,
425 uint64_t pickedHigh =
426 bits(FpSrcReg2.uqw, (i + offset + 1) * 8 * size - 1,
427 (i + offset) * 8 * size);
428 result = insertBits(result,
429 (2 * i + 2) * 8 * size - 1,
430 (2 * i + 1) * 8 * size,
433 FpDestReg.uqw = result;
438 assert(srcSize == destSize * 2);
439 int items = (sizeof(FloatRegBits) / destSize);
440 int destBits = destSize * 8;
441 int srcBits = srcSize * 8;
444 for (i = 0; i < items / 2; i++) {
446 bits(FpSrcReg1.uqw, (i + 1) * srcBits - 1,
448 unsigned signBit = bits(picked, srcBits - 1);
449 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
451 // Handle saturation.
453 if (overflow != mask(destBits - srcBits + 1)) {
455 picked = (ULL(1) << (destBits - 1));
462 picked = mask(destBits - 1);
464 picked = mask(destBits);
467 result = insertBits(result,
468 (i + 1) * destBits - 1,
472 for (;i < items; i++) {
474 bits(FpSrcReg2.uqw, (i - items + 1) * srcBits - 1,
475 (i - items + 0) * srcBits);
476 unsigned signBit = bits(picked, srcBits - 1);
477 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
479 // Handle saturation.
481 if (overflow != mask(destBits - srcBits + 1)) {
483 picked = (ULL(1) << (destBits - 1));
490 picked = mask(destBits - 1);
492 picked = mask(destBits);
495 result = insertBits(result,
496 (i + 1) * destBits - 1,
500 FpDestReg.uqw = result;
504 def __init__(self, dest, src1, src2):
505 super(Mxor, self).__init__(dest, src1, src2, 1)
507 FpDestReg.uqw = FpSrcReg1.uqw ^ FpSrcReg2.uqw;
511 def __init__(self, dest, src1, src2):
512 super(Mor, self).__init__(dest, src1, src2, 1)
514 FpDestReg.uqw = FpSrcReg1.uqw | FpSrcReg2.uqw;
518 def __init__(self, dest, src1, src2):
519 super(Mand, self).__init__(dest, src1, src2, 1)
521 FpDestReg.uqw = FpSrcReg1.uqw & FpSrcReg2.uqw;
524 class Mandn(MediaOp):
525 def __init__(self, dest, src1, src2):
526 super(Mandn, self).__init__(dest, src1, src2, 1)
528 FpDestReg.uqw = ~FpSrcReg1.uqw & FpSrcReg2.uqw;
531 class Mminf(MediaOp):
544 assert(srcSize == destSize);
546 int sizeBits = size * 8;
547 assert(srcSize == 4 || srcSize == 8);
548 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
549 uint64_t result = FpDestReg.uqw;
551 for (int i = 0; i < items; i++) {
553 int hiIndex = (i + 1) * sizeBits - 1;
554 int loIndex = (i + 0) * sizeBits;
555 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
556 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
573 result = insertBits(result, hiIndex, loIndex, arg1Bits);
575 result = insertBits(result, hiIndex, loIndex, arg2Bits);
578 FpDestReg.uqw = result;
581 class Mmaxf(MediaOp):
594 assert(srcSize == destSize);
596 int sizeBits = size * 8;
597 assert(srcSize == 4 || srcSize == 8);
598 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
599 uint64_t result = FpDestReg.uqw;
601 for (int i = 0; i < items; i++) {
603 int hiIndex = (i + 1) * sizeBits - 1;
604 int loIndex = (i + 0) * sizeBits;
605 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
606 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
623 result = insertBits(result, hiIndex, loIndex, arg1Bits);
625 result = insertBits(result, hiIndex, loIndex, arg2Bits);
628 FpDestReg.uqw = result;
631 class Mmini(MediaOp):
634 assert(srcSize == destSize);
636 int sizeBits = size * 8;
637 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
638 uint64_t result = FpDestReg.uqw;
640 for (int i = 0; i < items; i++) {
641 int hiIndex = (i + 1) * sizeBits - 1;
642 int loIndex = (i + 0) * sizeBits;
643 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
644 int64_t arg1 = arg1Bits |
645 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
646 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
647 int64_t arg2 = arg2Bits |
648 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
658 if (arg1Bits < arg2Bits) {
664 result = insertBits(result, hiIndex, loIndex, resBits);
666 FpDestReg.uqw = result;
669 class Mmaxi(MediaOp):
672 assert(srcSize == destSize);
674 int sizeBits = size * 8;
675 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
676 uint64_t result = FpDestReg.uqw;
678 for (int i = 0; i < items; i++) {
679 int hiIndex = (i + 1) * sizeBits - 1;
680 int loIndex = (i + 0) * sizeBits;
681 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
682 int64_t arg1 = arg1Bits |
683 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
684 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
685 int64_t arg2 = arg2Bits |
686 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
696 if (arg1Bits > arg2Bits) {
702 result = insertBits(result, hiIndex, loIndex, resBits);
704 FpDestReg.uqw = result;
707 class Msqrt(MediaOp):
708 def __init__(self, dest, src, \
709 size = None, destSize = None, srcSize = None, ext = None):
710 super(Msqrt, self).__init__(dest, src,\
711 "InstRegIndex(0)", size, destSize, srcSize, ext)
724 assert(srcSize == destSize);
726 int sizeBits = size * 8;
727 assert(srcSize == 4 || srcSize == 8);
728 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
729 uint64_t result = FpDestReg.uqw;
731 for (int i = 0; i < items; i++) {
732 int hiIndex = (i + 1) * sizeBits - 1;
733 int loIndex = (i + 0) * sizeBits;
734 uint64_t argBits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
747 result = insertBits(result, hiIndex, loIndex, argBits);
749 FpDestReg.uqw = result;
752 class Maddf(MediaOp):
765 assert(srcSize == destSize);
767 int sizeBits = size * 8;
768 assert(srcSize == 4 || srcSize == 8);
769 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
770 uint64_t result = FpDestReg.uqw;
772 for (int i = 0; i < items; i++) {
773 int hiIndex = (i + 1) * sizeBits - 1;
774 int loIndex = (i + 0) * sizeBits;
775 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
776 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
780 floatInt arg1, arg2, res;
783 res.f = arg1.f + arg2.f;
786 doubleInt arg1, arg2, res;
789 res.d = arg1.d + arg2.d;
793 result = insertBits(result, hiIndex, loIndex, resBits);
795 FpDestReg.uqw = result;
798 class Msubf(MediaOp):
811 assert(srcSize == destSize);
813 int sizeBits = size * 8;
814 assert(srcSize == 4 || srcSize == 8);
815 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
816 uint64_t result = FpDestReg.uqw;
818 for (int i = 0; i < items; i++) {
819 int hiIndex = (i + 1) * sizeBits - 1;
820 int loIndex = (i + 0) * sizeBits;
821 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
822 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
826 floatInt arg1, arg2, res;
829 res.f = arg1.f - arg2.f;
832 doubleInt arg1, arg2, res;
835 res.d = arg1.d - arg2.d;
839 result = insertBits(result, hiIndex, loIndex, resBits);
841 FpDestReg.uqw = result;
844 class Mmulf(MediaOp):
857 assert(srcSize == destSize);
859 int sizeBits = size * 8;
860 assert(srcSize == 4 || srcSize == 8);
861 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
862 uint64_t result = FpDestReg.uqw;
864 for (int i = 0; i < items; i++) {
865 int hiIndex = (i + 1) * sizeBits - 1;
866 int loIndex = (i + 0) * sizeBits;
867 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
868 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
872 floatInt arg1, arg2, res;
875 res.f = arg1.f * arg2.f;
878 doubleInt arg1, arg2, res;
881 res.d = arg1.d * arg2.d;
885 result = insertBits(result, hiIndex, loIndex, resBits);
887 FpDestReg.uqw = result;
890 class Mdivf(MediaOp):
903 assert(srcSize == destSize);
905 int sizeBits = size * 8;
906 assert(srcSize == 4 || srcSize == 8);
907 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
908 uint64_t result = FpDestReg.uqw;
910 for (int i = 0; i < items; i++) {
911 int hiIndex = (i + 1) * sizeBits - 1;
912 int loIndex = (i + 0) * sizeBits;
913 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
914 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
918 floatInt arg1, arg2, res;
921 res.f = arg1.f / arg2.f;
924 doubleInt arg1, arg2, res;
927 res.d = arg1.d / arg2.d;
931 result = insertBits(result, hiIndex, loIndex, resBits);
933 FpDestReg.uqw = result;
936 class Maddi(MediaOp):
938 assert(srcSize == destSize);
940 int sizeBits = size * 8;
941 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
942 uint64_t result = FpDestReg.uqw;
944 for (int i = 0; i < items; i++) {
945 int hiIndex = (i + 1) * sizeBits - 1;
946 int loIndex = (i + 0) * sizeBits;
947 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
948 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
949 uint64_t resBits = arg1Bits + arg2Bits;
952 if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits))
953 resBits = mask(sizeBits);
954 } else if (ext & 0x4) {
955 int arg1Sign = bits(arg1Bits, sizeBits - 1);
956 int arg2Sign = bits(arg2Bits, sizeBits - 1);
957 int resSign = bits(resBits, sizeBits - 1);
958 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
960 resBits = (ULL(1) << (sizeBits - 1));
962 resBits = mask(sizeBits - 1);
966 result = insertBits(result, hiIndex, loIndex, resBits);
968 FpDestReg.uqw = result;
971 class Msubi(MediaOp):
973 assert(srcSize == destSize);
975 int sizeBits = size * 8;
976 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
977 uint64_t result = FpDestReg.uqw;
979 for (int i = 0; i < items; i++) {
980 int hiIndex = (i + 1) * sizeBits - 1;
981 int loIndex = (i + 0) * sizeBits;
982 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
983 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
984 uint64_t resBits = arg1Bits - arg2Bits;
987 if (arg2Bits > arg1Bits) {
989 } else if (!findCarry(sizeBits, resBits,
990 arg1Bits, ~arg2Bits)) {
991 resBits = mask(sizeBits);
993 } else if (ext & 0x4) {
994 int arg1Sign = bits(arg1Bits, sizeBits - 1);
995 int arg2Sign = !bits(arg2Bits, sizeBits - 1);
996 int resSign = bits(resBits, sizeBits - 1);
997 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
999 resBits = (ULL(1) << (sizeBits - 1));
1001 resBits = mask(sizeBits - 1);
1005 result = insertBits(result, hiIndex, loIndex, resBits);
1007 FpDestReg.uqw = result;
1010 class Mmuli(MediaOp):
1012 int srcBits = srcSize * 8;
1013 int destBits = destSize * 8;
1014 assert(destBits <= 64);
1015 assert(destSize >= srcSize);
1016 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / destSize);
1017 uint64_t result = FpDestReg.uqw;
1019 for (int i = 0; i < items; i++) {
1023 offset = i * (destBits - srcBits);
1025 offset = i * (destBits - srcBits) + srcBits;
1027 int srcHiIndex = (i + 1) * srcBits - 1 + offset;
1028 int srcLoIndex = (i + 0) * srcBits + offset;
1029 uint64_t arg1Bits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1030 uint64_t arg2Bits = bits(FpSrcReg2.uqw, srcHiIndex, srcLoIndex);
1034 int64_t arg1 = arg1Bits |
1035 (0 - (arg1Bits & (ULL(1) << (srcBits - 1))));
1036 int64_t arg2 = arg2Bits |
1037 (0 - (arg2Bits & (ULL(1) << (srcBits - 1))));
1038 resBits = (uint64_t)(arg1 * arg2);
1040 resBits = arg1Bits * arg2Bits;
1044 resBits += (ULL(1) << (destBits - 1));
1047 resBits >>= destBits;
1049 int destHiIndex = (i + 1) * destBits - 1;
1050 int destLoIndex = (i + 0) * destBits;
1051 result = insertBits(result, destHiIndex, destLoIndex, resBits);
1053 FpDestReg.uqw = result;
1056 class Mavg(MediaOp):
1058 assert(srcSize == destSize);
1060 int sizeBits = size * 8;
1061 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
1062 uint64_t result = FpDestReg.uqw;
1064 for (int i = 0; i < items; i++) {
1065 int hiIndex = (i + 1) * sizeBits - 1;
1066 int loIndex = (i + 0) * sizeBits;
1067 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1068 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1069 uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2;
1071 result = insertBits(result, hiIndex, loIndex, resBits);
1073 FpDestReg.uqw = result;
1076 class Msad(MediaOp):
1078 int srcBits = srcSize * 8;
1079 int items = sizeof(FloatRegBits) / srcSize;
1082 for (int i = 0; i < items; i++) {
1083 int hiIndex = (i + 1) * srcBits - 1;
1084 int loIndex = (i + 0) * srcBits;
1085 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1086 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1087 int64_t resBits = arg1Bits - arg2Bits;
1092 FpDestReg.uqw = sum & mask(destSize * 8);
1095 class Msrl(MediaOp):
1098 assert(srcSize == destSize);
1100 int sizeBits = size * 8;
1101 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
1102 uint64_t shiftAmt = op2.uqw;
1103 uint64_t result = FpDestReg.uqw;
1105 for (int i = 0; i < items; i++) {
1106 int hiIndex = (i + 1) * sizeBits - 1;
1107 int loIndex = (i + 0) * sizeBits;
1108 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1110 if (shiftAmt >= sizeBits) {
1113 resBits = (arg1Bits >> shiftAmt) &
1114 mask(sizeBits - shiftAmt);
1117 result = insertBits(result, hiIndex, loIndex, resBits);
1119 FpDestReg.uqw = result;
1122 class Msra(MediaOp):
1125 assert(srcSize == destSize);
1127 int sizeBits = size * 8;
1128 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
1129 uint64_t shiftAmt = op2.uqw;
1130 uint64_t result = FpDestReg.uqw;
1132 for (int i = 0; i < items; i++) {
1133 int hiIndex = (i + 1) * sizeBits - 1;
1134 int loIndex = (i + 0) * sizeBits;
1135 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1137 if (shiftAmt >= sizeBits) {
1138 if (bits(arg1Bits, sizeBits - 1))
1139 resBits = mask(sizeBits);
1143 resBits = (arg1Bits >> shiftAmt);
1145 (0 - (resBits & (ULL(1) << (sizeBits - 1 - shiftAmt))));
1148 result = insertBits(result, hiIndex, loIndex, resBits);
1150 FpDestReg.uqw = result;
1153 class Msll(MediaOp):
1156 assert(srcSize == destSize);
1158 int sizeBits = size * 8;
1159 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
1160 uint64_t shiftAmt = op2.uqw;
1161 uint64_t result = FpDestReg.uqw;
1163 for (int i = 0; i < items; i++) {
1164 int hiIndex = (i + 1) * sizeBits - 1;
1165 int loIndex = (i + 0) * sizeBits;
1166 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1168 if (shiftAmt >= sizeBits) {
1171 resBits = (arg1Bits << shiftAmt);
1174 result = insertBits(result, hiIndex, loIndex, resBits);
1176 FpDestReg.uqw = result;
1179 class Cvtf2i(MediaOp):
1180 def __init__(self, dest, src, \
1181 size = None, destSize = None, srcSize = None, ext = None):
1182 super(Cvtf2i, self).__init__(dest, src,\
1183 "InstRegIndex(0)", size, destSize, srcSize, ext)
1196 assert(destSize == 4 || destSize == 8);
1197 assert(srcSize == 4 || srcSize == 8);
1198 int srcSizeBits = srcSize * 8;
1199 int destSizeBits = destSize * 8;
1203 if (srcSize == 2 * destSize) {
1204 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize;
1206 destStart = destSizeBits * items;
1207 } else if (destSize == 2 * srcSize) {
1208 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
1210 srcStart = srcSizeBits * items;
1212 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
1214 uint64_t result = FpDestReg.uqw;
1216 for (int i = 0; i < items; i++) {
1217 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1218 int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1219 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1239 if (destSize == 4) {
1240 argBits = (uint32_t)arg;
1242 argBits = (uint64_t)arg;
1244 int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1245 int destLoIndex = destStart + (i + 0) * destSizeBits;
1246 result = insertBits(result, destHiIndex, destLoIndex, argBits);
1248 FpDestReg.uqw = result;
1251 class Cvti2f(MediaOp):
1252 def __init__(self, dest, src, \
1253 size = None, destSize = None, srcSize = None, ext = None):
1254 super(Cvti2f, self).__init__(dest, src,\
1255 "InstRegIndex(0)", size, destSize, srcSize, ext)
1268 assert(destSize == 4 || destSize == 8);
1269 assert(srcSize == 4 || srcSize == 8);
1270 int srcSizeBits = srcSize * 8;
1271 int destSizeBits = destSize * 8;
1275 if (srcSize == 2 * destSize) {
1276 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize;
1278 destStart = destSizeBits * items;
1279 } else if (destSize == 2 * srcSize) {
1280 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
1282 srcStart = srcSizeBits * items;
1284 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
1286 uint64_t result = FpDestReg.uqw;
1288 for (int i = 0; i < items; i++) {
1289 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1290 int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1291 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1293 int64_t sArg = argBits | (0 - (argBits & (ULL(1) << srcHiIndex)));
1296 if (destSize == 4) {
1305 int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1306 int destLoIndex = destStart + (i + 0) * destSizeBits;
1307 result = insertBits(result, destHiIndex, destLoIndex, argBits);
1309 FpDestReg.uqw = result;
1312 class Cvtf2f(MediaOp):
1313 def __init__(self, dest, src, \
1314 size = None, destSize = None, srcSize = None, ext = None):
1315 super(Cvtf2f, self).__init__(dest, src,\
1316 "InstRegIndex(0)", size, destSize, srcSize, ext)
1329 assert(destSize == 4 || destSize == 8);
1330 assert(srcSize == 4 || srcSize == 8);
1331 int srcSizeBits = srcSize * 8;
1332 int destSizeBits = destSize * 8;
1336 if (srcSize == 2 * destSize) {
1337 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize;
1339 destStart = destSizeBits * items;
1340 } else if (destSize == 2 * srcSize) {
1341 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
1343 srcStart = srcSizeBits * items;
1345 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
1347 uint64_t result = FpDestReg.uqw;
1349 for (int i = 0; i < items; i++) {
1350 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1351 int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1352 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1364 if (destSize == 4) {
1373 int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1374 int destLoIndex = destStart + (i + 0) * destSizeBits;
1375 result = insertBits(result, destHiIndex, destLoIndex, argBits);
1377 FpDestReg.uqw = result;
1380 class Mcmpi2r(MediaOp):
1393 assert(srcSize == destSize);
1395 int sizeBits = size * 8;
1396 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
1397 uint64_t result = FpDestReg.uqw;
1399 for (int i = 0; i < items; i++) {
1400 int hiIndex = (i + 1) * sizeBits - 1;
1401 int loIndex = (i + 0) * sizeBits;
1402 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1403 int64_t arg1 = arg1Bits |
1404 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
1405 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1406 int64_t arg2 = arg2Bits |
1407 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
1409 uint64_t resBits = 0;
1410 if (((ext & 0x2) == 0 && arg1 == arg2) ||
1411 ((ext & 0x2) == 0x2 && arg1 > arg2))
1412 resBits = mask(sizeBits);
1414 result = insertBits(result, hiIndex, loIndex, resBits);
1416 FpDestReg.uqw = result;
1419 class Mcmpf2r(MediaOp):
1432 assert(srcSize == destSize);
1434 int sizeBits = size * 8;
1435 int items = (ext & 0x8) ? 1: (sizeof(FloatRegBits) / size);
1436 uint64_t result = FpDestReg.uqw;
1438 for (int i = 0; i < items; i++) {
1439 int hiIndex = (i + 1) * sizeBits - 1;
1440 int loIndex = (i + 0) * sizeBits;
1441 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1442 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1459 uint64_t resBits = 0;
1460 bool nanop = isnan(arg1) || isnan(arg2);
1461 switch (ext & mask(3)) {
1463 if (arg1 == arg2 && !nanop)
1464 resBits = mask(sizeBits);
1467 if (arg1 < arg2 && !nanop)
1468 resBits = mask(sizeBits);
1471 if (arg1 <= arg2 && !nanop)
1472 resBits = mask(sizeBits);
1476 resBits = mask(sizeBits);
1479 if (arg1 != arg2 || nanop)
1480 resBits = mask(sizeBits);
1483 if (!(arg1 < arg2) || nanop)
1484 resBits = mask(sizeBits);
1487 if (!(arg1 <= arg2) || nanop)
1488 resBits = mask(sizeBits);
1492 resBits = mask(sizeBits);
1496 result = insertBits(result, hiIndex, loIndex, resBits);
1498 FpDestReg.uqw = result;
1501 class Mcmpf2rf(MediaOp):
1502 def __init__(self, src1, src2,\
1503 size = None, destSize = None, srcSize = None, ext = None):
1504 super(Mcmpf2rf, self).__init__("InstRegIndex(0)", src1,\
1505 src2, size, destSize, srcSize, ext)
1518 assert(srcSize == destSize);
1519 assert(srcSize == 4 || srcSize == 8);
1521 int sizeBits = size * 8;
1524 uint64_t arg1Bits = bits(FpSrcReg1.uqw, sizeBits - 1, 0);
1525 uint64_t arg2Bits = bits(FpSrcReg2.uqw, sizeBits - 1, 0);
1542 // Greater than 0 0 0
1546 ccFlagBits = ccFlagBits & ~(OFBit | SFBit | AFBit |
1547 ZFBit | PFBit | CFBit);
1548 if (isnan(arg1) || isnan(arg2))
1549 ccFlagBits = ccFlagBits | (ZFBit | PFBit | CFBit);
1550 else if(arg1 < arg2)
1551 ccFlagBits = ccFlagBits | CFBit;
1552 else if(arg1 == arg2)
1553 ccFlagBits = ccFlagBits | ZFBit;