x86: implement rcpps and rcpss SSE insts

author Steve Reinhardt <steve.reinhardt@amd.com>

Wed, 7 Oct 2015 00:26:50 +0000 (17:26 -0700)

committer Steve Reinhardt <steve.reinhardt@amd.com>

Wed, 7 Oct 2015 00:26:50 +0000 (17:26 -0700)
author Steve Reinhardt <steve.reinhardt@amd.com>
Wed, 7 Oct 2015 00:26:50 +0000 (17:26 -0700)
committer Steve Reinhardt <steve.reinhardt@amd.com>
Wed, 7 Oct 2015 00:26:50 +0000 (17:26 -0700)
diff --git a/src/arch/x86/isa/decoder/two_byte_opcodes.isa b/src/arch/x86/isa/decoder/two_byte_opcodes.isa

index 0ba7434e8a853264a26917a065f9dd5eff3fbaa9..4a21e2900761210a3e83cbda3ac19f967a3fa321 100644 (file)
--- a/src/arch/x86/isa/decoder/two_byte_opcodes.isa
+++ b/src/arch/x86/isa/decoder/two_byte_opcodes.isa
@@ -463,7 +463,7 @@
                      0x0: MOVMSKPS(Gd,VRo);
                      0x1: SQRTPS(Vo,Wo);
                      0x2: WarnUnimpl::rqsrtps_Vo_Wo();
-                    0x3: WarnUnimpl::rcpps_Vo_Wo();
+                    0x3: RCPPS(Vo,Wo);
                      0x4: ANDPS(Vo,Wo);
                      0x5: ANDNPS(Vo,Wo);
                      0x6: ORPS(Vo,Wo);
@@ -473,7 +473,7 @@
                  0x4: decode OPCODE_OP_BOTTOM3 {
                      0x1: SQRTSS(Vd,Wd);
                      0x2: WarnUnimpl::rsqrtss_Vd_Wd();
-                    0x3: WarnUnimpl::rcpss_Vd_Wd();
+                    0x3: RCPSS(Vd,Wd);
                      default: UD2();
                  }
                  // operand size (0x66)
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/reciprocal_estimation.py b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/reciprocal_estimation.py

index 6e0d7fbb635e20e1f9b7a8707acdcdf6e3b48871..666c45ca12ecc13bef5ea109a2c4631123dc0499 100644 (file)
--- a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/reciprocal_estimation.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/reciprocal_estimation.py
@@ -1,4 +1,6 @@
  # Copyright (c) 2007 The Hewlett-Packard Development Company
+# Copyright (c) 2015 Advanced Micro Devices, Inc.
+#
  # All rights reserved.
  #
  # The license below extends only to copyright in the software and shall
@@ -34,8 +36,41 @@
  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  #
  # Authors: Gabe Black
+#          Steve Reinhardt
  
  microcode = '''
-# RCPPS
-# RCPSS
+def macroop RCPSS_XMM_XMM {
+    mrcp xmml, xmmlm, size=4, ext=Scalar
+};
+
+def macroop RCPSS_XMM_M {
+    ldfp ufp1, seg, sib, disp, dataSize=8
+    mrcp xmml, ufp1, size=4, ext=Scalar
+};
+
+def macroop RCPSS_XMM_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, disp, dataSize=8
+    mrcp xmml, ufp1, size=4, ext=Scalar
+};
+
+def macroop RCPPS_XMM_XMM {
+    mrcp xmml, xmmlm, size=4, ext=0
+    mrcp xmmh, xmmhm, size=4, ext=0
+};
+
+def macroop RCPPS_XMM_M {
+    ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+    mrcp xmml, ufp1, size=4, ext=0
+    mrcp xmmh, ufp2, size=4, ext=0
+};
+
+def macroop RCPPS_XMM_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+    mrcp xmml, ufp1, size=4, ext=0
+    mrcp xmmh, ufp2, size=4, ext=0
+};
  '''
diff --git a/src/arch/x86/isa/microops/mediaop.isa b/src/arch/x86/isa/microops/mediaop.isa

index e382151efc0385fead2ba719475ed57425006e25..e5f04109f9ab756e586c9ea9bca63b1340da9894 100644 (file)
--- a/src/arch/x86/isa/microops/mediaop.isa
+++ b/src/arch/x86/isa/microops/mediaop.isa
@@ -1,4 +1,6 @@
-/// Copyright (c) 2009 The Regents of The University of Michigan
+// Copyright (c) 2009 The Regents of The University of Michigan
+// Copyright (c) 2015 Advanced Micro Devices, Inc.
+//
  // All rights reserved.
  //
  // Redistribution and use in source and binary forms, with or without
@@ -691,6 +693,41 @@ let {{
              FpDestReg_uqw = result;
          '''
  
+    # compute approximate reciprocal --- single-precision only
+    class Mrcp(MediaOp):
+        def __init__(self, dest, src, \
+                size = None, destSize = None, srcSize = None, ext = None):
+            super(Mrcp, self).__init__(dest, src,\
+                    "InstRegIndex(0)", size, destSize, srcSize, ext)
+        code = '''
+            union floatInt
+            {
+                float f;
+                uint32_t i;
+            };
+
+            assert(srcSize == 4);  // ISA defines single-precision only
+            assert(srcSize == destSize);
+            const int size = 4;
+            const int sizeBits = size * 8;
+            int items = numItems(size);
+            uint64_t result = FpDestReg_uqw;
+
+            for (int i = 0; i < items; i++) {
+                int hiIndex = (i + 1) * sizeBits - 1;
+                int loIndex = (i + 0) * sizeBits;
+                uint64_t argBits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
+
+                floatInt fi;
+                fi.i = argBits;
+                // This is more accuracy than HW provides, but oh well
+                fi.f = 1.0 / fi.f;
+                argBits = fi.i;
+                result = insertBits(result, hiIndex, loIndex, argBits);
+            }
+            FpDestReg_uqw = result;
+        '''
+
      class Maddf(MediaOp):
          code = '''
              union floatInt
author	Steve Reinhardt <steve.reinhardt@amd.com>
	Wed, 7 Oct 2015 00:26:50 +0000 (17:26 -0700)
committer	Steve Reinhardt <steve.reinhardt@amd.com>
	Wed, 7 Oct 2015 00:26:50 +0000 (17:26 -0700)
src/arch/x86/isa/decoder/two_byte_opcodes.isa		patch \| blob \| history
src/arch/x86/isa/insts/simd128/floating_point/arithmetic/reciprocal_estimation.py		patch \| blob \| history
src/arch/x86/isa/microops/mediaop.isa		patch \| blob \| history