X86: Implement a media average microop.
[gem5.git] / src / arch / x86 / isa / microops / mediaop.isa
1 /// Copyright (c) 2009 The Regents of The University of Michigan
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met: redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer;
8 // redistributions in binary form must reproduce the above copyright
9 // notice, this list of conditions and the following disclaimer in the
10 // documentation and/or other materials provided with the distribution;
11 // neither the name of the copyright holders nor the names of its
12 // contributors may be used to endorse or promote products derived from
13 // this software without specific prior written permission.
14 //
15 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 //
27 // Authors: Gabe Black
28
29 def template MediaOpExecute {{
30 Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
31 Trace::InstRecord *traceData) const
32 {
33 Fault fault = NoFault;
34
35 %(op_decl)s;
36 %(op_rd)s;
37
38 %(code)s;
39
40 //Write the resulting state to the execution context
41 if(fault == NoFault)
42 {
43 %(op_wb)s;
44 }
45 return fault;
46 }
47 }};
48
49 def template MediaOpRegDeclare {{
50 class %(class_name)s : public %(base_class)s
51 {
52 protected:
53 void buildMe();
54
55 public:
56 %(class_name)s(ExtMachInst _machInst,
57 const char * instMnem,
58 bool isMicro, bool isDelayed, bool isFirst, bool isLast,
59 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
60 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
61
62 %(class_name)s(ExtMachInst _machInst,
63 const char * instMnem,
64 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
65 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
66
67 %(BasicExecDeclare)s
68 };
69 }};
70
71 def template MediaOpImmDeclare {{
72
73 class %(class_name)s : public %(base_class)s
74 {
75 protected:
76 void buildMe();
77
78 public:
79 %(class_name)s(ExtMachInst _machInst,
80 const char * instMnem,
81 bool isMicro, bool isDelayed, bool isFirst, bool isLast,
82 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
83 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
84
85 %(class_name)s(ExtMachInst _machInst,
86 const char * instMnem,
87 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
88 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
89
90 %(BasicExecDeclare)s
91 };
92 }};
93
94 def template MediaOpRegConstructor {{
95
96 inline void %(class_name)s::buildMe()
97 {
98 %(constructor)s;
99 }
100
101 inline %(class_name)s::%(class_name)s(
102 ExtMachInst machInst, const char * instMnem,
103 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
104 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
105 %(base_class)s(machInst, "%(mnemonic)s", instMnem,
106 false, false, false, false,
107 _src1, _src2, _dest, _srcSize, _destSize, _ext,
108 %(op_class)s)
109 {
110 buildMe();
111 }
112
113 inline %(class_name)s::%(class_name)s(
114 ExtMachInst machInst, const char * instMnem,
115 bool isMicro, bool isDelayed, bool isFirst, bool isLast,
116 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
117 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
118 %(base_class)s(machInst, "%(mnemonic)s", instMnem,
119 isMicro, isDelayed, isFirst, isLast,
120 _src1, _src2, _dest, _srcSize, _destSize, _ext,
121 %(op_class)s)
122 {
123 buildMe();
124 }
125 }};
126
127 def template MediaOpImmConstructor {{
128
129 inline void %(class_name)s::buildMe()
130 {
131 %(constructor)s;
132 }
133
134 inline %(class_name)s::%(class_name)s(
135 ExtMachInst machInst, const char * instMnem,
136 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
137 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
138 %(base_class)s(machInst, "%(mnemonic)s", instMnem,
139 false, false, false, false,
140 _src1, _imm8, _dest, _srcSize, _destSize, _ext,
141 %(op_class)s)
142 {
143 buildMe();
144 }
145
146 inline %(class_name)s::%(class_name)s(
147 ExtMachInst machInst, const char * instMnem,
148 bool isMicro, bool isDelayed, bool isFirst, bool isLast,
149 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
150 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
151 %(base_class)s(machInst, "%(mnemonic)s", instMnem,
152 isMicro, isDelayed, isFirst, isLast,
153 _src1, _imm8, _dest, _srcSize, _destSize, _ext,
154 %(op_class)s)
155 {
156 buildMe();
157 }
158 }};
159
160 let {{
161 # Make these empty strings so that concatenating onto
162 # them will always work.
163 header_output = ""
164 decoder_output = ""
165 exec_output = ""
166
167 immTemplates = (
168 MediaOpImmDeclare,
169 MediaOpImmConstructor,
170 MediaOpExecute)
171
172 regTemplates = (
173 MediaOpRegDeclare,
174 MediaOpRegConstructor,
175 MediaOpExecute)
176
177 class MediaOpMeta(type):
178 def buildCppClasses(self, name, Name, suffix, code):
179
180 # Globals to stick the output in
181 global header_output
182 global decoder_output
183 global exec_output
184
185 # If op2 is used anywhere, make register and immediate versions
186 # of this code.
187 matcher = re.compile("(?<!\\w)(?P<prefix>s?)op2(?P<typeQual>\\.\\w+)?")
188 match = matcher.search(code)
189 if match:
190 typeQual = ""
191 if match.group("typeQual"):
192 typeQual = match.group("typeQual")
193 src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual)
194 self.buildCppClasses(name, Name, suffix,
195 matcher.sub(src2_name, code))
196 self.buildCppClasses(name + "i", Name, suffix + "Imm",
197 matcher.sub("imm8", code))
198 return
199
200 base = "X86ISA::MediaOp"
201
202 # If imm8 shows up in the code, use the immediate templates, if
203 # not, hopefully the register ones will be correct.
204 matcher = re.compile("(?<!\w)imm8(?!\w)")
205 if matcher.search(code):
206 base += "Imm"
207 templates = immTemplates
208 else:
209 base += "Reg"
210 templates = regTemplates
211
212 # Get everything ready for the substitution
213 iop = InstObjParams(name, Name + suffix, base, {"code" : code})
214
215 # Generate the actual code (finally!)
216 header_output += templates[0].subst(iop)
217 decoder_output += templates[1].subst(iop)
218 exec_output += templates[2].subst(iop)
219
220
221 def __new__(mcls, Name, bases, dict):
222 abstract = False
223 name = Name.lower()
224 if "abstract" in dict:
225 abstract = dict['abstract']
226 del dict['abstract']
227
228 cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict)
229 if not abstract:
230 cls.className = Name
231 cls.base_mnemonic = name
232 code = cls.code
233
234 # Set up the C++ classes
235 mcls.buildCppClasses(cls, name, Name, "", code)
236
237 # Hook into the microassembler dict
238 global microopClasses
239 microopClasses[name] = cls
240
241 # If op2 is used anywhere, make register and immediate versions
242 # of this code.
243 matcher = re.compile("op2(?P<typeQual>\\.\\w+)?")
244 if matcher.search(code):
245 microopClasses[name + 'i'] = cls
246 return cls
247
248
249 class MediaOp(X86Microop):
250 __metaclass__ = MediaOpMeta
251 # This class itself doesn't act as a microop
252 abstract = True
253
254 def __init__(self, dest, src1, op2,
255 size = None, destSize = None, srcSize = None, ext = None):
256 self.dest = dest
257 self.src1 = src1
258 self.op2 = op2
259 if size is not None:
260 self.srcSize = size
261 self.destSize = size
262 if srcSize is not None:
263 self.srcSize = srcSize
264 if destSize is not None:
265 self.destSize = destSize
266 if self.srcSize is None:
267 raise Exception, "Source size not set."
268 if self.destSize is None:
269 raise Exception, "Dest size not set."
270 if ext is None:
271 self.ext = 0
272 else:
273 self.ext = ext
274
275 def getAllocator(self, *microFlags):
276 className = self.className
277 if self.mnemonic == self.base_mnemonic + 'i':
278 className += "Imm"
279 allocator = '''new %(class_name)s(machInst, macrocodeBlock
280 %(flags)s, %(src1)s, %(op2)s, %(dest)s,
281 %(srcSize)s, %(destSize)s, %(ext)s)''' % {
282 "class_name" : className,
283 "flags" : self.microFlagsText(microFlags),
284 "src1" : self.src1, "op2" : self.op2,
285 "dest" : self.dest,
286 "srcSize" : self.srcSize,
287 "destSize" : self.destSize,
288 "ext" : self.ext}
289 return allocator
290
291 class Mov2int(MediaOp):
292 def __init__(self, dest, src, \
293 size = None, destSize = None, srcSize = None, ext = None):
294 super(Mov2int, self).__init__(dest, src,\
295 "InstRegIndex(0)", size, destSize, srcSize, ext)
296 code = '''
297 uint64_t fpSrcReg1 = bits(FpSrcReg1.uqw, srcSize * 8 - 1, 0);
298 DestReg = merge(DestReg, fpSrcReg1, destSize);
299 '''
300
301 class Mov2fp(MediaOp):
302 def __init__(self, dest, src, \
303 size = None, destSize = None, srcSize = None, ext = None):
304 super(Mov2fp, self).__init__(dest, src,\
305 "InstRegIndex(0)", size, destSize, srcSize, ext)
306 code = '''
307 uint64_t srcReg1 = pick(SrcReg1, 0, srcSize);
308 FpDestReg.uqw =
309 insertBits(FpDestReg.uqw, destSize * 8 - 1, 0, srcReg1);
310 '''
311
312 class Unpack(MediaOp):
313 code = '''
314 assert(srcSize == destSize);
315 int size = destSize;
316 int items = (sizeof(FloatRegBits) / size) / 2;
317 int offset = ext ? items : 0;
318 uint64_t result = 0;
319 for (int i = 0; i < items; i++) {
320 uint64_t pickedLow =
321 bits(FpSrcReg1.uqw, (i + offset + 1) * 8 * size - 1,
322 (i + offset) * 8 * size);
323 result = insertBits(result,
324 (2 * i + 1) * 8 * size - 1,
325 (2 * i + 0) * 8 * size,
326 pickedLow);
327 uint64_t pickedHigh =
328 bits(FpSrcReg2.uqw, (i + offset + 1) * 8 * size - 1,
329 (i + offset) * 8 * size);
330 result = insertBits(result,
331 (2 * i + 2) * 8 * size - 1,
332 (2 * i + 1) * 8 * size,
333 pickedHigh);
334 }
335 FpDestReg.uqw = result;
336 '''
337
338 class Pack(MediaOp):
339 code = '''
340 assert(srcSize == destSize * 2);
341 int items = (sizeof(FloatRegBits) / destSize);
342 int destBits = destSize * 8;
343 int srcBits = srcSize * 8;
344 uint64_t result = 0;
345 int i;
346 for (i = 0; i < items / 2; i++) {
347 uint64_t picked =
348 bits(FpSrcReg1.uqw, (i + 1) * srcBits - 1,
349 (i + 0) * srcBits);
350 unsigned signBit = bits(picked, srcBits - 1);
351 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
352
353 // Handle saturation.
354 if (signBit) {
355 if (overflow != mask(destBits - srcBits + 1)) {
356 if (ext & 0x1)
357 picked = (1 << (destBits - 1));
358 else
359 picked = 0;
360 }
361 } else {
362 if (overflow != 0) {
363 if (ext & 0x1)
364 picked = mask(destBits - 1);
365 else
366 picked = mask(destBits);
367 }
368 }
369 result = insertBits(result,
370 (i + 1) * destBits - 1,
371 (i + 0) * destBits,
372 picked);
373 }
374 for (;i < items; i++) {
375 uint64_t picked =
376 bits(FpSrcReg2.uqw, (i - items + 1) * srcBits - 1,
377 (i - items + 0) * srcBits);
378 unsigned signBit = bits(picked, srcBits - 1);
379 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
380
381 // Handle saturation.
382 if (signBit) {
383 if (overflow != mask(destBits - srcBits + 1)) {
384 if (ext & 0x1)
385 picked = (1 << (destBits - 1));
386 else
387 picked = 0;
388 }
389 } else {
390 if (overflow != 0) {
391 if (ext & 0x1)
392 picked = mask(destBits - 1);
393 else
394 picked = mask(destBits);
395 }
396 }
397 result = insertBits(result,
398 (i + 1) * destBits - 1,
399 (i + 0) * destBits,
400 picked);
401 }
402 FpDestReg.uqw = result;
403 '''
404
405 class Mxor(MediaOp):
406 def __init__(self, dest, src1, src2):
407 super(Mxor, self).__init__(dest, src1, src2, 1)
408 code = '''
409 FpDestReg.uqw = FpSrcReg1.uqw ^ FpSrcReg2.uqw;
410 '''
411
412 class Mor(MediaOp):
413 def __init__(self, dest, src1, src2):
414 super(Mor, self).__init__(dest, src1, src2, 1)
415 code = '''
416 FpDestReg.uqw = FpSrcReg1.uqw | FpSrcReg2.uqw;
417 '''
418
419 class Mand(MediaOp):
420 def __init__(self, dest, src1, src2):
421 super(Mand, self).__init__(dest, src1, src2, 1)
422 code = '''
423 FpDestReg.uqw = FpSrcReg1.uqw & FpSrcReg2.uqw;
424 '''
425
426 class Mandn(MediaOp):
427 def __init__(self, dest, src1, src2):
428 super(Mandn, self).__init__(dest, src1, src2, 1)
429 code = '''
430 FpDestReg.uqw = ~FpSrcReg1.uqw & FpSrcReg2.uqw;
431 '''
432
433 class Mminf(MediaOp):
434 code = '''
435 union floatInt
436 {
437 float f;
438 uint32_t i;
439 };
440 union doubleInt
441 {
442 double d;
443 uint64_t i;
444 };
445
446 assert(srcSize == destSize);
447 int size = srcSize;
448 int sizeBits = size * 8;
449 assert(srcSize == 4 || srcSize == 8);
450 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
451 uint64_t result = FpDestReg.uqw;
452
453 for (int i = 0; i < items; i++) {
454 double arg1, arg2;
455 int hiIndex = (i + 1) * sizeBits - 1;
456 int loIndex = (i + 0) * sizeBits;
457 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
458 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
459
460 if (size == 4) {
461 floatInt fi;
462 fi.i = arg1Bits;
463 arg1 = fi.f;
464 fi.i = arg2Bits;
465 arg2 = fi.f;
466 } else {
467 doubleInt di;
468 di.i = arg1Bits;
469 arg1 = di.d;
470 di.i = arg2Bits;
471 arg2 = di.d;
472 }
473
474 if (arg1 < arg2) {
475 result = insertBits(result, hiIndex, loIndex, arg1Bits);
476 } else {
477 result = insertBits(result, hiIndex, loIndex, arg2Bits);
478 }
479 }
480 FpDestReg.uqw = result;
481 '''
482
483 class Mmaxf(MediaOp):
484 code = '''
485 union floatInt
486 {
487 float f;
488 uint32_t i;
489 };
490 union doubleInt
491 {
492 double d;
493 uint64_t i;
494 };
495
496 assert(srcSize == destSize);
497 int size = srcSize;
498 int sizeBits = size * 8;
499 assert(srcSize == 4 || srcSize == 8);
500 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
501 uint64_t result = FpDestReg.uqw;
502
503 for (int i = 0; i < items; i++) {
504 double arg1, arg2;
505 int hiIndex = (i + 1) * sizeBits - 1;
506 int loIndex = (i + 0) * sizeBits;
507 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
508 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
509
510 if (size == 4) {
511 floatInt fi;
512 fi.i = arg1Bits;
513 arg1 = fi.f;
514 fi.i = arg2Bits;
515 arg2 = fi.f;
516 } else {
517 doubleInt di;
518 di.i = arg1Bits;
519 arg1 = di.d;
520 di.i = arg2Bits;
521 arg2 = di.d;
522 }
523
524 if (arg1 > arg2) {
525 result = insertBits(result, hiIndex, loIndex, arg1Bits);
526 } else {
527 result = insertBits(result, hiIndex, loIndex, arg2Bits);
528 }
529 }
530 FpDestReg.uqw = result;
531 '''
532
533 class Mmini(MediaOp):
534 code = '''
535
536 assert(srcSize == destSize);
537 int size = srcSize;
538 int sizeBits = size * 8;
539 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
540 uint64_t result = FpDestReg.uqw;
541
542 for (int i = 0; i < items; i++) {
543 int hiIndex = (i + 1) * sizeBits - 1;
544 int loIndex = (i + 0) * sizeBits;
545 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
546 int64_t arg1 = arg1Bits |
547 (0 - (arg1Bits & (1 << (sizeBits - 1))));
548 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
549 int64_t arg2 = arg2Bits |
550 (0 - (arg2Bits & (1 << (sizeBits - 1))));
551 uint64_t resBits;
552
553 if (ext & 0x2) {
554 if (arg1 < arg2) {
555 resBits = arg1Bits;
556 } else {
557 resBits = arg2Bits;
558 }
559 } else {
560 if (arg1Bits < arg2Bits) {
561 resBits = arg1Bits;
562 } else {
563 resBits = arg2Bits;
564 }
565 }
566 result = insertBits(result, hiIndex, loIndex, resBits);
567 }
568 FpDestReg.uqw = result;
569 '''
570
571 class Mmaxi(MediaOp):
572 code = '''
573
574 assert(srcSize == destSize);
575 int size = srcSize;
576 int sizeBits = size * 8;
577 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
578 uint64_t result = FpDestReg.uqw;
579
580 for (int i = 0; i < items; i++) {
581 int hiIndex = (i + 1) * sizeBits - 1;
582 int loIndex = (i + 0) * sizeBits;
583 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
584 int64_t arg1 = arg1Bits |
585 (0 - (arg1Bits & (1 << (sizeBits - 1))));
586 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
587 int64_t arg2 = arg2Bits |
588 (0 - (arg2Bits & (1 << (sizeBits - 1))));
589 uint64_t resBits;
590
591 if (ext & 0x2) {
592 if (arg1 > arg2) {
593 resBits = arg1Bits;
594 } else {
595 resBits = arg2Bits;
596 }
597 } else {
598 if (arg1Bits > arg2Bits) {
599 resBits = arg1Bits;
600 } else {
601 resBits = arg2Bits;
602 }
603 }
604 result = insertBits(result, hiIndex, loIndex, resBits);
605 }
606 FpDestReg.uqw = result;
607 '''
608
609 class Msqrt(MediaOp):
610 def __init__(self, dest, src, \
611 size = None, destSize = None, srcSize = None, ext = None):
612 super(Msqrt, self).__init__(dest, src,\
613 "InstRegIndex(0)", size, destSize, srcSize, ext)
614 code = '''
615 union floatInt
616 {
617 float f;
618 uint32_t i;
619 };
620 union doubleInt
621 {
622 double d;
623 uint64_t i;
624 };
625
626 assert(srcSize == destSize);
627 int size = srcSize;
628 int sizeBits = size * 8;
629 assert(srcSize == 4 || srcSize == 8);
630 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
631 uint64_t result = FpDestReg.uqw;
632
633 for (int i = 0; i < items; i++) {
634 int hiIndex = (i + 1) * sizeBits - 1;
635 int loIndex = (i + 0) * sizeBits;
636 uint64_t argBits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
637
638 if (size == 4) {
639 floatInt fi;
640 fi.i = argBits;
641 fi.f = sqrt(fi.f);
642 argBits = fi.i;
643 } else {
644 doubleInt di;
645 di.i = argBits;
646 di.d = sqrt(di.d);
647 argBits = di.i;
648 }
649 result = insertBits(result, hiIndex, loIndex, argBits);
650 }
651 FpDestReg.uqw = result;
652 '''
653
654 class Maddf(MediaOp):
655 code = '''
656 union floatInt
657 {
658 float f;
659 uint32_t i;
660 };
661 union doubleInt
662 {
663 double d;
664 uint64_t i;
665 };
666
667 assert(srcSize == destSize);
668 int size = srcSize;
669 int sizeBits = size * 8;
670 assert(srcSize == 4 || srcSize == 8);
671 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
672 uint64_t result = FpDestReg.uqw;
673
674 for (int i = 0; i < items; i++) {
675 int hiIndex = (i + 1) * sizeBits - 1;
676 int loIndex = (i + 0) * sizeBits;
677 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
678 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
679 uint64_t resBits;
680
681 if (size == 4) {
682 floatInt arg1, arg2, res;
683 arg1.i = arg1Bits;
684 arg2.i = arg2Bits;
685 res.f = arg1.f + arg2.f;
686 resBits = res.i;
687 } else {
688 doubleInt arg1, arg2, res;
689 arg1.i = arg1Bits;
690 arg2.i = arg2Bits;
691 res.d = arg1.d + arg2.d;
692 resBits = res.i;
693 }
694
695 result = insertBits(result, hiIndex, loIndex, resBits);
696 }
697 FpDestReg.uqw = result;
698 '''
699
700 class Msubf(MediaOp):
701 code = '''
702 union floatInt
703 {
704 float f;
705 uint32_t i;
706 };
707 union doubleInt
708 {
709 double d;
710 uint64_t i;
711 };
712
713 assert(srcSize == destSize);
714 int size = srcSize;
715 int sizeBits = size * 8;
716 assert(srcSize == 4 || srcSize == 8);
717 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
718 uint64_t result = FpDestReg.uqw;
719
720 for (int i = 0; i < items; i++) {
721 int hiIndex = (i + 1) * sizeBits - 1;
722 int loIndex = (i + 0) * sizeBits;
723 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
724 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
725 uint64_t resBits;
726
727 if (size == 4) {
728 floatInt arg1, arg2, res;
729 arg1.i = arg1Bits;
730 arg2.i = arg2Bits;
731 res.f = arg1.f - arg2.f;
732 resBits = res.i;
733 } else {
734 doubleInt arg1, arg2, res;
735 arg1.i = arg1Bits;
736 arg2.i = arg2Bits;
737 res.d = arg1.d - arg2.d;
738 resBits = res.i;
739 }
740
741 result = insertBits(result, hiIndex, loIndex, resBits);
742 }
743 FpDestReg.uqw = result;
744 '''
745
746 class Mmulf(MediaOp):
747 code = '''
748 union floatInt
749 {
750 float f;
751 uint32_t i;
752 };
753 union doubleInt
754 {
755 double d;
756 uint64_t i;
757 };
758
759 assert(srcSize == destSize);
760 int size = srcSize;
761 int sizeBits = size * 8;
762 assert(srcSize == 4 || srcSize == 8);
763 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
764 uint64_t result = FpDestReg.uqw;
765
766 for (int i = 0; i < items; i++) {
767 int hiIndex = (i + 1) * sizeBits - 1;
768 int loIndex = (i + 0) * sizeBits;
769 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
770 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
771 uint64_t resBits;
772
773 if (size == 4) {
774 floatInt arg1, arg2, res;
775 arg1.i = arg1Bits;
776 arg2.i = arg2Bits;
777 res.f = arg1.f * arg2.f;
778 resBits = res.i;
779 } else {
780 doubleInt arg1, arg2, res;
781 arg1.i = arg1Bits;
782 arg2.i = arg2Bits;
783 res.d = arg1.d * arg2.d;
784 resBits = res.i;
785 }
786
787 result = insertBits(result, hiIndex, loIndex, resBits);
788 }
789 FpDestReg.uqw = result;
790 '''
791
792 class Mdivf(MediaOp):
793 code = '''
794 union floatInt
795 {
796 float f;
797 uint32_t i;
798 };
799 union doubleInt
800 {
801 double d;
802 uint64_t i;
803 };
804
805 assert(srcSize == destSize);
806 int size = srcSize;
807 int sizeBits = size * 8;
808 assert(srcSize == 4 || srcSize == 8);
809 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
810 uint64_t result = FpDestReg.uqw;
811
812 for (int i = 0; i < items; i++) {
813 int hiIndex = (i + 1) * sizeBits - 1;
814 int loIndex = (i + 0) * sizeBits;
815 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
816 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
817 uint64_t resBits;
818
819 if (size == 4) {
820 floatInt arg1, arg2, res;
821 arg1.i = arg1Bits;
822 arg2.i = arg2Bits;
823 res.f = arg1.f / arg2.f;
824 resBits = res.i;
825 } else {
826 doubleInt arg1, arg2, res;
827 arg1.i = arg1Bits;
828 arg2.i = arg2Bits;
829 res.d = arg1.d / arg2.d;
830 resBits = res.i;
831 }
832
833 result = insertBits(result, hiIndex, loIndex, resBits);
834 }
835 FpDestReg.uqw = result;
836 '''
837
838 class Maddi(MediaOp):
839 code = '''
840 assert(srcSize == destSize);
841 int size = srcSize;
842 int sizeBits = size * 8;
843 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
844 uint64_t result = FpDestReg.uqw;
845
846 for (int i = 0; i < items; i++) {
847 int hiIndex = (i + 1) * sizeBits - 1;
848 int loIndex = (i + 0) * sizeBits;
849 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
850 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
851 uint64_t resBits = arg1Bits + arg2Bits;
852
853 if (ext & 0x2) {
854 if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits))
855 resBits = mask(sizeBits);
856 } else if (ext & 0x4) {
857 int arg1Sign = bits(arg1Bits, sizeBits - 1);
858 int arg2Sign = bits(arg2Bits, sizeBits - 1);
859 int resSign = bits(resBits, sizeBits - 1);
860 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
861 if (resSign == 0)
862 resBits = (1 << (sizeBits - 1));
863 else
864 resBits = mask(sizeBits - 1);
865 }
866 }
867
868 result = insertBits(result, hiIndex, loIndex, resBits);
869 }
870 FpDestReg.uqw = result;
871 '''
872
873 class Msubi(MediaOp):
874 code = '''
875 assert(srcSize == destSize);
876 int size = srcSize;
877 int sizeBits = size * 8;
878 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
879 uint64_t result = FpDestReg.uqw;
880
881 for (int i = 0; i < items; i++) {
882 int hiIndex = (i + 1) * sizeBits - 1;
883 int loIndex = (i + 0) * sizeBits;
884 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
885 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
886 uint64_t resBits = arg1Bits - arg2Bits;
887
888 if (ext & 0x2) {
889 if (arg2Bits > arg1Bits) {
890 resBits = 0;
891 } else if (!findCarry(sizeBits, resBits,
892 arg1Bits, ~arg2Bits)) {
893 resBits = mask(sizeBits);
894 }
895 } else if (ext & 0x4) {
896 int arg1Sign = bits(arg1Bits, sizeBits - 1);
897 int arg2Sign = !bits(arg2Bits, sizeBits - 1);
898 int resSign = bits(resBits, sizeBits - 1);
899 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
900 if (resSign == 0)
901 resBits = (1 << (sizeBits - 1));
902 else
903 resBits = mask(sizeBits - 1);
904 }
905 }
906
907 result = insertBits(result, hiIndex, loIndex, resBits);
908 }
909 FpDestReg.uqw = result;
910 '''
911
912 class Mmuli(MediaOp):
913 code = '''
914 int srcBits = srcSize * 8;
915 int destBits = destSize * 8;
916 assert(destBits <= 64);
917 assert(destSize >= srcSize);
918 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / destSize);
919 uint64_t result = FpDestReg.uqw;
920
921 for (int i = 0; i < items; i++) {
922 int offset = 0;
923 if (ext & 16) {
924 if (ext & 32)
925 offset = i * (destBits - srcBits);
926 else
927 offset = i * (destBits - srcBits) + srcBits;
928 }
929 int srcHiIndex = (i + 1) * srcBits - 1 + offset;
930 int srcLoIndex = (i + 0) * srcBits + offset;
931 uint64_t arg1Bits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
932 uint64_t arg2Bits = bits(FpSrcReg2.uqw, srcHiIndex, srcLoIndex);
933 uint64_t resBits;
934
935 if (ext & 0x2) {
936 int64_t arg1 = arg1Bits |
937 (0 - (arg1Bits & (1 << (srcBits - 1))));
938 int64_t arg2 = arg2Bits |
939 (0 - (arg2Bits & (1 << (srcBits - 1))));
940 resBits = (uint64_t)(arg1 * arg2);
941 } else {
942 resBits = arg1Bits * arg2Bits;
943 }
944
945 if (ext & 0x4)
946 resBits += (1 << (destBits - 1));
947
948 if (ext & 0x8)
949 resBits >>= destBits;
950
951 int destHiIndex = (i + 1) * destBits - 1;
952 int destLoIndex = (i + 0) * destBits;
953 result = insertBits(result, destHiIndex, destLoIndex, resBits);
954 }
955 FpDestReg.uqw = result;
956 '''
957
958 class Mavg(MediaOp):
959 code = '''
960 assert(srcSize == destSize);
961 int size = srcSize;
962 int sizeBits = size * 8;
963 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
964 uint64_t result = FpDestReg.uqw;
965
966 for (int i = 0; i < items; i++) {
967 int hiIndex = (i + 1) * sizeBits - 1;
968 int loIndex = (i + 0) * sizeBits;
969 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
970 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
971 uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2;
972
973 result = insertBits(result, hiIndex, loIndex, resBits);
974 }
975 FpDestReg.uqw = result;
976 '''
977
978 class Msad(MediaOp):
979 code = '''
980 int srcBits = srcSize * 8;
981 int items = sizeof(FloatRegBits) / srcSize;
982
983 uint64_t sum = 0;
984 for (int i = 0; i < items; i++) {
985 int hiIndex = (i + 1) * srcBits - 1;
986 int loIndex = (i + 0) * srcBits;
987 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
988 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
989 int64_t resBits = arg1Bits - arg2Bits;
990 if (resBits < 0)
991 resBits = -resBits;
992 sum += resBits;
993 }
994 FpDestReg.uqw = sum & mask(destSize * 8);
995 '''
996
997 class Msrl(MediaOp):
998 code = '''
999
1000 assert(srcSize == destSize);
1001 int size = srcSize;
1002 int sizeBits = size * 8;
1003 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
1004 uint64_t shiftAmt = op2.uqw;
1005 uint64_t result = FpDestReg.uqw;
1006
1007 for (int i = 0; i < items; i++) {
1008 int hiIndex = (i + 1) * sizeBits - 1;
1009 int loIndex = (i + 0) * sizeBits;
1010 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1011 uint64_t resBits;
1012 if (shiftAmt >= sizeBits) {
1013 resBits = 0;
1014 } else {
1015 resBits = (arg1Bits >> shiftAmt) &
1016 mask(sizeBits - shiftAmt);
1017 }
1018
1019 result = insertBits(result, hiIndex, loIndex, resBits);
1020 }
1021 FpDestReg.uqw = result;
1022 '''
1023
1024 class Msra(MediaOp):
1025 code = '''
1026
1027 assert(srcSize == destSize);
1028 int size = srcSize;
1029 int sizeBits = size * 8;
1030 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
1031 uint64_t shiftAmt = op2.uqw;
1032 uint64_t result = FpDestReg.uqw;
1033
1034 for (int i = 0; i < items; i++) {
1035 int hiIndex = (i + 1) * sizeBits - 1;
1036 int loIndex = (i + 0) * sizeBits;
1037 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1038 uint64_t resBits;
1039 if (shiftAmt >= sizeBits) {
1040 if (bits(arg1Bits, sizeBits - 1))
1041 resBits = mask(sizeBits);
1042 else
1043 resBits = 0;
1044 } else {
1045 resBits = (arg1Bits >> shiftAmt);
1046 resBits = resBits |
1047 (0 - (resBits & (1 << (sizeBits - 1 - shiftAmt))));
1048 }
1049
1050 result = insertBits(result, hiIndex, loIndex, resBits);
1051 }
1052 FpDestReg.uqw = result;
1053 '''
1054
1055 class Msll(MediaOp):
1056 code = '''
1057
1058 assert(srcSize == destSize);
1059 int size = srcSize;
1060 int sizeBits = size * 8;
1061 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
1062 uint64_t shiftAmt = op2.uqw;
1063 uint64_t result = FpDestReg.uqw;
1064
1065 for (int i = 0; i < items; i++) {
1066 int hiIndex = (i + 1) * sizeBits - 1;
1067 int loIndex = (i + 0) * sizeBits;
1068 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1069 uint64_t resBits;
1070 if (shiftAmt >= sizeBits) {
1071 resBits = 0;
1072 } else {
1073 resBits = (arg1Bits << shiftAmt);
1074 }
1075
1076 result = insertBits(result, hiIndex, loIndex, resBits);
1077 }
1078 FpDestReg.uqw = result;
1079 '''
1080
1081 class Cvti2f(MediaOp):
1082 def __init__(self, dest, src, \
1083 size = None, destSize = None, srcSize = None, ext = None):
1084 super(Cvti2f, self).__init__(dest, src,\
1085 "InstRegIndex(0)", size, destSize, srcSize, ext)
1086 code = '''
1087 union floatInt
1088 {
1089 float f;
1090 uint32_t i;
1091 };
1092 union doubleInt
1093 {
1094 double d;
1095 uint64_t i;
1096 };
1097
1098 assert(destSize == 4 || destSize == 8);
1099 assert(srcSize == 4 || srcSize == 8);
1100 int srcSizeBits = srcSize * 8;
1101 int destSizeBits = destSize * 8;
1102 int items;
1103 int srcStart = 0;
1104 int destStart = 0;
1105 if (srcSize == 2 * destSize) {
1106 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize;
1107 if (ext & 0x2)
1108 destStart = destSizeBits * items;
1109 } else if (destSize == 2 * srcSize) {
1110 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
1111 if (ext & 0x2)
1112 srcStart = srcSizeBits * items;
1113 } else {
1114 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
1115 }
1116 uint64_t result = FpDestReg.uqw;
1117
1118 for (int i = 0; i < items; i++) {
1119 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1120 int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1121 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1122 int64_t sArg = argBits | (0 - (argBits & (1 << srcHiIndex)));
1123 double arg = sArg;
1124
1125 if (destSize == 4) {
1126 floatInt fi;
1127 fi.f = arg;
1128 argBits = fi.i;
1129 } else {
1130 doubleInt di;
1131 di.d = arg;
1132 argBits = di.i;
1133 }
1134 int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1135 int destLoIndex = destStart + (i + 0) * destSizeBits;
1136 result = insertBits(result, destHiIndex, destLoIndex, argBits);
1137 }
1138 FpDestReg.uqw = result;
1139 '''
1140
1141 class Cvtf2f(MediaOp):
1142 def __init__(self, dest, src, \
1143 size = None, destSize = None, srcSize = None, ext = None):
1144 super(Cvtf2f, self).__init__(dest, src,\
1145 "InstRegIndex(0)", size, destSize, srcSize, ext)
1146 code = '''
1147 union floatInt
1148 {
1149 float f;
1150 uint32_t i;
1151 };
1152 union doubleInt
1153 {
1154 double d;
1155 uint64_t i;
1156 };
1157
1158 assert(destSize == 4 || destSize == 8);
1159 assert(srcSize == 4 || srcSize == 8);
1160 int srcSizeBits = srcSize * 8;
1161 int destSizeBits = destSize * 8;
1162 int items;
1163 int srcStart = 0;
1164 int destStart = 0;
1165 if (srcSize == 2 * destSize) {
1166 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize;
1167 if (ext & 0x2)
1168 destStart = destSizeBits * items;
1169 } else if (destSize == 2 * srcSize) {
1170 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
1171 if (ext & 0x2)
1172 srcStart = srcSizeBits * items;
1173 } else {
1174 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
1175 }
1176 uint64_t result = FpDestReg.uqw;
1177
1178 for (int i = 0; i < items; i++) {
1179 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1180 int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1181 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1182 double arg;
1183
1184 if (srcSize == 4) {
1185 floatInt fi;
1186 fi.i = argBits;
1187 arg = fi.f;
1188 } else {
1189 doubleInt di;
1190 di.i = argBits;
1191 arg = di.d;
1192 }
1193 if (destSize == 4) {
1194 floatInt fi;
1195 fi.f = arg;
1196 argBits = fi.i;
1197 } else {
1198 doubleInt di;
1199 di.d = arg;
1200 argBits = di.i;
1201 }
1202 int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1203 int destLoIndex = destStart + (i + 0) * destSizeBits;
1204 result = insertBits(result, destHiIndex, destLoIndex, argBits);
1205 }
1206 FpDestReg.uqw = result;
1207 '''
1208
1209 class Mcmpi2r(MediaOp):
1210 code = '''
1211 union floatInt
1212 {
1213 float f;
1214 uint32_t i;
1215 };
1216 union doubleInt
1217 {
1218 double d;
1219 uint64_t i;
1220 };
1221
1222 assert(srcSize == destSize);
1223 int size = srcSize;
1224 int sizeBits = size * 8;
1225 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
1226 uint64_t result = FpDestReg.uqw;
1227
1228 for (int i = 0; i < items; i++) {
1229 int hiIndex = (i + 1) * sizeBits - 1;
1230 int loIndex = (i + 0) * sizeBits;
1231 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1232 int64_t arg1 = arg1Bits |
1233 (0 - (arg1Bits & (1 << (sizeBits - 1))));
1234 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1235 int64_t arg2 = arg2Bits |
1236 (0 - (arg2Bits & (1 << (sizeBits - 1))));
1237
1238 uint64_t resBits = 0;
1239 if ((ext & 0x2) == 0 && arg1 == arg2 ||
1240 (ext & 0x2) == 0x2 && arg1 > arg2)
1241 resBits = mask(sizeBits);
1242
1243 result = insertBits(result, hiIndex, loIndex, resBits);
1244 }
1245 FpDestReg.uqw = result;
1246 '''
1247 }};