X86: Implement a media square root microop.
authorGabe Black <gblack@eecs.umich.edu>
Tue, 18 Aug 2009 01:34:16 +0000 (18:34 -0700)
committerGabe Black <gblack@eecs.umich.edu>
Tue, 18 Aug 2009 01:34:16 +0000 (18:34 -0700)
src/arch/x86/isa/microops/mediaop.isa

index 5cd7bd4f57f5554562b48605ede2522f194fc04b..f61c8234f885028f744591299424944b7ccbe6a7 100644 (file)
@@ -529,4 +529,49 @@ let {{
             }
             FpDestReg.uqw = result;
         '''
+
+    class Msqrt(MediaOp):
+        def __init__(self, dest, src, \
+                size = None, destSize = None, srcSize = None, ext = None):
+            super(Msqrt, self).__init__(dest, src,\
+                    "InstRegIndex(0)", size, destSize, srcSize, ext)
+        code = '''
+            union floatInt
+            {
+                float f;
+                uint32_t i;
+            };
+            union doubleInt
+            {
+                double d;
+                uint64_t i;
+            };
+
+            assert(srcSize == destSize);
+            int size = srcSize;
+            int sizeBits = size * 8;
+            assert(srcSize == 4 || srcSize == 8);
+            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+            uint64_t result = FpDestReg.uqw;
+
+            for (int i = 0; i < items; i++) {
+                int hiIndex = (i + 1) * sizeBits - 1;
+                int loIndex = (i + 0) * sizeBits;
+                uint64_t argBits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+
+                if (size == 4) {
+                    floatInt fi;
+                    fi.i = argBits;
+                    fi.f = sqrt(fi.f);
+                    argBits = fi.i;
+                } else {
+                    doubleInt di;
+                    di.i = argBits;
+                    di.d = sqrt(di.d);
+                    argBits = di.i;
+                }
+                result = insertBits(result, hiIndex, loIndex, argBits);
+            }
+            FpDestReg.uqw = result;
+        '''
 }};