X86: Implement a media square root microop.
[gem5.git] / src / arch / x86 / isa / microops / mediaop.isa
1 /// Copyright (c) 2009 The Regents of The University of Michigan
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met: redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer;
8 // redistributions in binary form must reproduce the above copyright
9 // notice, this list of conditions and the following disclaimer in the
10 // documentation and/or other materials provided with the distribution;
11 // neither the name of the copyright holders nor the names of its
12 // contributors may be used to endorse or promote products derived from
13 // this software without specific prior written permission.
14 //
15 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 //
27 // Authors: Gabe Black
28
29 def template MediaOpExecute {{
30 Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
31 Trace::InstRecord *traceData) const
32 {
33 Fault fault = NoFault;
34
35 %(op_decl)s;
36 %(op_rd)s;
37
38 %(code)s;
39
40 //Write the resulting state to the execution context
41 if(fault == NoFault)
42 {
43 %(op_wb)s;
44 }
45 return fault;
46 }
47 }};
48
49 def template MediaOpRegDeclare {{
50 class %(class_name)s : public %(base_class)s
51 {
52 protected:
53 void buildMe();
54
55 public:
56 %(class_name)s(ExtMachInst _machInst,
57 const char * instMnem,
58 bool isMicro, bool isDelayed, bool isFirst, bool isLast,
59 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
60 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
61
62 %(class_name)s(ExtMachInst _machInst,
63 const char * instMnem,
64 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
65 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
66
67 %(BasicExecDeclare)s
68 };
69 }};
70
71 def template MediaOpImmDeclare {{
72
73 class %(class_name)s : public %(base_class)s
74 {
75 protected:
76 void buildMe();
77
78 public:
79 %(class_name)s(ExtMachInst _machInst,
80 const char * instMnem,
81 bool isMicro, bool isDelayed, bool isFirst, bool isLast,
82 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
83 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
84
85 %(class_name)s(ExtMachInst _machInst,
86 const char * instMnem,
87 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
88 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
89
90 %(BasicExecDeclare)s
91 };
92 }};
93
94 def template MediaOpRegConstructor {{
95
96 inline void %(class_name)s::buildMe()
97 {
98 %(constructor)s;
99 }
100
101 inline %(class_name)s::%(class_name)s(
102 ExtMachInst machInst, const char * instMnem,
103 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
104 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
105 %(base_class)s(machInst, "%(mnemonic)s", instMnem,
106 false, false, false, false,
107 _src1, _src2, _dest, _srcSize, _destSize, _ext,
108 %(op_class)s)
109 {
110 buildMe();
111 }
112
113 inline %(class_name)s::%(class_name)s(
114 ExtMachInst machInst, const char * instMnem,
115 bool isMicro, bool isDelayed, bool isFirst, bool isLast,
116 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
117 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
118 %(base_class)s(machInst, "%(mnemonic)s", instMnem,
119 isMicro, isDelayed, isFirst, isLast,
120 _src1, _src2, _dest, _srcSize, _destSize, _ext,
121 %(op_class)s)
122 {
123 buildMe();
124 }
125 }};
126
127 def template MediaOpImmConstructor {{
128
129 inline void %(class_name)s::buildMe()
130 {
131 %(constructor)s;
132 }
133
134 inline %(class_name)s::%(class_name)s(
135 ExtMachInst machInst, const char * instMnem,
136 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
137 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
138 %(base_class)s(machInst, "%(mnemonic)s", instMnem,
139 false, false, false, false,
140 _src1, _imm8, _dest, _srcSize, _destSize, _ext,
141 %(op_class)s)
142 {
143 buildMe();
144 }
145
146 inline %(class_name)s::%(class_name)s(
147 ExtMachInst machInst, const char * instMnem,
148 bool isMicro, bool isDelayed, bool isFirst, bool isLast,
149 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
150 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
151 %(base_class)s(machInst, "%(mnemonic)s", instMnem,
152 isMicro, isDelayed, isFirst, isLast,
153 _src1, _imm8, _dest, _srcSize, _destSize, _ext,
154 %(op_class)s)
155 {
156 buildMe();
157 }
158 }};
159
160 let {{
161 # Make these empty strings so that concatenating onto
162 # them will always work.
163 header_output = ""
164 decoder_output = ""
165 exec_output = ""
166
167 immTemplates = (
168 MediaOpImmDeclare,
169 MediaOpImmConstructor,
170 MediaOpExecute)
171
172 regTemplates = (
173 MediaOpRegDeclare,
174 MediaOpRegConstructor,
175 MediaOpExecute)
176
177 class MediaOpMeta(type):
178 def buildCppClasses(self, name, Name, suffix, code):
179
180 # Globals to stick the output in
181 global header_output
182 global decoder_output
183 global exec_output
184
185 # If op2 is used anywhere, make register and immediate versions
186 # of this code.
187 matcher = re.compile("(?<!\\w)(?P<prefix>s?)op2(?P<typeQual>\\.\\w+)?")
188 match = matcher.search(code)
189 if match:
190 typeQual = ""
191 if match.group("typeQual"):
192 typeQual = match.group("typeQual")
193 src2_name = "%spsrc2%s" % (match.group("prefix"), typeQual)
194 self.buildCppClasses(name, Name, suffix,
195 matcher.sub(src2_name, code))
196 self.buildCppClasses(name + "i", Name, suffix + "Imm",
197 matcher.sub("imm8", code))
198 return
199
200 base = "X86ISA::MediaOp"
201
202 # If imm8 shows up in the code, use the immediate templates, if
203 # not, hopefully the register ones will be correct.
204 matcher = re.compile("(?<!\w)imm8(?!\w)")
205 if matcher.search(code):
206 base += "Imm"
207 templates = immTemplates
208 else:
209 base += "Reg"
210 templates = regTemplates
211
212 # Get everything ready for the substitution
213 iop = InstObjParams(name, Name + suffix, base, {"code" : code})
214
215 # Generate the actual code (finally!)
216 header_output += templates[0].subst(iop)
217 decoder_output += templates[1].subst(iop)
218 exec_output += templates[2].subst(iop)
219
220
221 def __new__(mcls, Name, bases, dict):
222 abstract = False
223 name = Name.lower()
224 if "abstract" in dict:
225 abstract = dict['abstract']
226 del dict['abstract']
227
228 cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict)
229 if not abstract:
230 cls.className = Name
231 cls.base_mnemonic = name
232 code = cls.code
233
234 # Set up the C++ classes
235 mcls.buildCppClasses(cls, name, Name, "", code)
236
237 # Hook into the microassembler dict
238 global microopClasses
239 microopClasses[name] = cls
240
241 # If op2 is used anywhere, make register and immediate versions
242 # of this code.
243 matcher = re.compile("op2(?P<typeQual>\\.\\w+)?")
244 if matcher.search(code):
245 microopClasses[name + 'i'] = cls
246 return cls
247
248
249 class MediaOp(X86Microop):
250 __metaclass__ = MediaOpMeta
251 # This class itself doesn't act as a microop
252 abstract = True
253
254 def __init__(self, dest, src1, op2,
255 size = None, destSize = None, srcSize = None, ext = None):
256 self.dest = dest
257 self.src1 = src1
258 self.op2 = op2
259 if size is not None:
260 self.srcSize = size
261 self.destSize = size
262 if srcSize is not None:
263 self.srcSize = srcSize
264 if destSize is not None:
265 self.destSize = destSize
266 if self.srcSize is None:
267 raise Exception, "Source size not set."
268 if self.destSize is None:
269 raise Exception, "Dest size not set."
270 if ext is None:
271 self.ext = 0
272 else:
273 self.ext = ext
274
275 def getAllocator(self, *microFlags):
276 className = self.className
277 if self.mnemonic == self.base_mnemonic + 'i':
278 className += "Imm"
279 allocator = '''new %(class_name)s(machInst, macrocodeBlock
280 %(flags)s, %(src1)s, %(op2)s, %(dest)s,
281 %(srcSize)s, %(destSize)s, %(ext)s)''' % {
282 "class_name" : className,
283 "flags" : self.microFlagsText(microFlags),
284 "src1" : self.src1, "op2" : self.op2,
285 "dest" : self.dest,
286 "srcSize" : self.srcSize,
287 "destSize" : self.destSize,
288 "ext" : self.ext}
289 return allocator
290
291 class Mov2int(MediaOp):
292 def __init__(self, dest, src, \
293 size = None, destSize = None, srcSize = None, ext = None):
294 super(Mov2int, self).__init__(dest, src,\
295 "InstRegIndex(0)", size, destSize, srcSize, ext)
296 code = '''
297 uint64_t fpSrcReg1 = bits(FpSrcReg1.uqw, srcSize * 8 - 1, 0);
298 DestReg = merge(DestReg, fpSrcReg1, destSize);
299 '''
300
301 class Mov2fp(MediaOp):
302 def __init__(self, dest, src, \
303 size = None, destSize = None, srcSize = None, ext = None):
304 super(Mov2fp, self).__init__(dest, src,\
305 "InstRegIndex(0)", size, destSize, srcSize, ext)
306 code = '''
307 uint64_t srcReg1 = pick(SrcReg1, 0, srcSize);
308 FpDestReg.uqw =
309 insertBits(FpDestReg.uqw, destSize * 8 - 1, 0, srcReg1);
310 '''
311
312 class Unpack(MediaOp):
313 code = '''
314 assert(srcSize == destSize);
315 int size = destSize;
316 int items = (sizeof(FloatRegBits) / size) / 2;
317 int offset = ext ? items : 0;
318 uint64_t result = 0;
319 for (int i = 0; i < items; i++) {
320 uint64_t pickedLow =
321 bits(FpSrcReg1.uqw, (i + offset + 1) * 8 * size - 1,
322 (i + offset) * 8 * size);
323 result = insertBits(result,
324 (2 * i + 1) * 8 * size - 1,
325 (2 * i + 0) * 8 * size,
326 pickedLow);
327 uint64_t pickedHigh =
328 bits(FpSrcReg2.uqw, (i + offset + 1) * 8 * size - 1,
329 (i + offset) * 8 * size);
330 result = insertBits(result,
331 (2 * i + 2) * 8 * size - 1,
332 (2 * i + 1) * 8 * size,
333 pickedHigh);
334 }
335 FpDestReg.uqw = result;
336 '''
337
338 class Pack(MediaOp):
339 code = '''
340 assert(srcSize == destSize * 2);
341 int items = (sizeof(FloatRegBits) / destSize);
342 int destBits = destSize * 8;
343 int srcBits = srcSize * 8;
344 uint64_t result = 0;
345 int i;
346 for (i = 0; i < items / 2; i++) {
347 uint64_t picked =
348 bits(FpSrcReg1.uqw, (i + 1) * srcBits - 1,
349 (i + 0) * srcBits);
350 unsigned signBit = bits(picked, srcBits - 1);
351 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
352
353 // Handle saturation.
354 if (signBit) {
355 if (overflow != mask(destBits - srcBits + 1)) {
356 if (ext & 0x1)
357 picked = (1 << (destBits - 1));
358 else
359 picked = 0;
360 }
361 } else {
362 if (overflow != 0) {
363 if (ext & 0x1)
364 picked = mask(destBits - 1);
365 else
366 picked = mask(destBits);
367 }
368 }
369 result = insertBits(result,
370 (i + 1) * destBits - 1,
371 (i + 0) * destBits,
372 picked);
373 }
374 for (;i < items; i++) {
375 uint64_t picked =
376 bits(FpSrcReg2.uqw, (i - items + 1) * srcBits - 1,
377 (i - items + 0) * srcBits);
378 unsigned signBit = bits(picked, srcBits - 1);
379 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
380
381 // Handle saturation.
382 if (signBit) {
383 if (overflow != mask(destBits - srcBits + 1)) {
384 if (ext & 0x1)
385 picked = (1 << (destBits - 1));
386 else
387 picked = 0;
388 }
389 } else {
390 if (overflow != 0) {
391 if (ext & 0x1)
392 picked = mask(destBits - 1);
393 else
394 picked = mask(destBits);
395 }
396 }
397 result = insertBits(result,
398 (i + 1) * destBits - 1,
399 (i + 0) * destBits,
400 picked);
401 }
402 FpDestReg.uqw = result;
403 '''
404
405 class Mxor(MediaOp):
406 def __init__(self, dest, src1, src2):
407 super(Mxor, self).__init__(dest, src1, src2, 1)
408 code = '''
409 FpDestReg.uqw = FpSrcReg1.uqw ^ FpSrcReg2.uqw;
410 '''
411
412 class Mor(MediaOp):
413 def __init__(self, dest, src1, src2):
414 super(Mor, self).__init__(dest, src1, src2, 1)
415 code = '''
416 FpDestReg.uqw = FpSrcReg1.uqw | FpSrcReg2.uqw;
417 '''
418
419 class Mand(MediaOp):
420 def __init__(self, dest, src1, src2):
421 super(Mand, self).__init__(dest, src1, src2, 1)
422 code = '''
423 FpDestReg.uqw = FpSrcReg1.uqw & FpSrcReg2.uqw;
424 '''
425
426 class Mandn(MediaOp):
427 def __init__(self, dest, src1, src2):
428 super(Mandn, self).__init__(dest, src1, src2, 1)
429 code = '''
430 FpDestReg.uqw = ~FpSrcReg1.uqw & FpSrcReg2.uqw;
431 '''
432
433 class Mminf(MediaOp):
434 code = '''
435 union floatInt
436 {
437 float f;
438 uint32_t i;
439 };
440 union doubleInt
441 {
442 double d;
443 uint64_t i;
444 };
445
446 assert(srcSize == destSize);
447 int size = srcSize;
448 int sizeBits = size * 8;
449 assert(srcSize == 4 || srcSize == 8);
450 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
451 uint64_t result = FpDestReg.uqw;
452
453 for (int i = 0; i < items; i++) {
454 double arg1, arg2;
455 int hiIndex = (i + 1) * sizeBits - 1;
456 int loIndex = (i + 0) * sizeBits;
457 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
458 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
459
460 if (size == 4) {
461 floatInt fi;
462 fi.i = arg1Bits;
463 arg1 = fi.f;
464 fi.i = arg2Bits;
465 arg2 = fi.f;
466 } else {
467 doubleInt di;
468 di.i = arg1Bits;
469 arg1 = di.d;
470 di.i = arg2Bits;
471 arg2 = di.d;
472 }
473
474 if (arg1 < arg2) {
475 result = insertBits(result, hiIndex, loIndex, arg1Bits);
476 } else {
477 result = insertBits(result, hiIndex, loIndex, arg2Bits);
478 }
479 }
480 FpDestReg.uqw = result;
481 '''
482
483 class Mmaxf(MediaOp):
484 code = '''
485 union floatInt
486 {
487 float f;
488 uint32_t i;
489 };
490 union doubleInt
491 {
492 double d;
493 uint64_t i;
494 };
495
496 assert(srcSize == destSize);
497 int size = srcSize;
498 int sizeBits = size * 8;
499 assert(srcSize == 4 || srcSize == 8);
500 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
501 uint64_t result = FpDestReg.uqw;
502
503 for (int i = 0; i < items; i++) {
504 double arg1, arg2;
505 int hiIndex = (i + 1) * sizeBits - 1;
506 int loIndex = (i + 0) * sizeBits;
507 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
508 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
509
510 if (size == 4) {
511 floatInt fi;
512 fi.i = arg1Bits;
513 arg1 = fi.f;
514 fi.i = arg2Bits;
515 arg2 = fi.f;
516 } else {
517 doubleInt di;
518 di.i = arg1Bits;
519 arg1 = di.d;
520 di.i = arg2Bits;
521 arg2 = di.d;
522 }
523
524 if (arg1 > arg2) {
525 result = insertBits(result, hiIndex, loIndex, arg1Bits);
526 } else {
527 result = insertBits(result, hiIndex, loIndex, arg2Bits);
528 }
529 }
530 FpDestReg.uqw = result;
531 '''
532
533 class Msqrt(MediaOp):
534 def __init__(self, dest, src, \
535 size = None, destSize = None, srcSize = None, ext = None):
536 super(Msqrt, self).__init__(dest, src,\
537 "InstRegIndex(0)", size, destSize, srcSize, ext)
538 code = '''
539 union floatInt
540 {
541 float f;
542 uint32_t i;
543 };
544 union doubleInt
545 {
546 double d;
547 uint64_t i;
548 };
549
550 assert(srcSize == destSize);
551 int size = srcSize;
552 int sizeBits = size * 8;
553 assert(srcSize == 4 || srcSize == 8);
554 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
555 uint64_t result = FpDestReg.uqw;
556
557 for (int i = 0; i < items; i++) {
558 int hiIndex = (i + 1) * sizeBits - 1;
559 int loIndex = (i + 0) * sizeBits;
560 uint64_t argBits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
561
562 if (size == 4) {
563 floatInt fi;
564 fi.i = argBits;
565 fi.f = sqrt(fi.f);
566 argBits = fi.i;
567 } else {
568 doubleInt di;
569 di.i = argBits;
570 di.d = sqrt(di.d);
571 argBits = di.i;
572 }
573 result = insertBits(result, hiIndex, loIndex, argBits);
574 }
575 FpDestReg.uqw = result;
576 '''
577 }};