arch: ISA parser additions of vector registers

author Rekai Gonzalez-Alberquilla <Rekai.GonzalezAlberquilla@arm.com>

Wed, 5 Apr 2017 18:24:23 +0000 (13:24 -0500)

committer Andreas Sandberg <andreas.sandberg@arm.com>

Wed, 5 Jul 2017 14:43:49 +0000 (14:43 +0000)
author Rekai Gonzalez-Alberquilla <Rekai.GonzalezAlberquilla@arm.com>
Wed, 5 Apr 2017 18:24:23 +0000 (13:24 -0500)
committer Andreas Sandberg <andreas.sandberg@arm.com>
Wed, 5 Jul 2017 14:43:49 +0000 (14:43 +0000)
diff --git a/src/arch/alpha/faults.cc b/src/arch/alpha/faults.cc

index 8a6e469f29c9d03a9fb9fbbb111600f2f43eb078..59d95000b2c4975f1b76b169cc1f9ea9d3b3afe7 100644 (file)
--- a/src/arch/alpha/faults.cc
+++ b/src/arch/alpha/faults.cc
@@ -98,6 +98,13 @@ FaultName FloatEnableFault::_name = "fen";
  FaultVect FloatEnableFault::_vect = 0x0581;
  FaultStat FloatEnableFault::_count;
  
+/* We use the same fault vector, as for the guest system these should be the
+ * same, but for host purposes, having differentiation is helpful for
+ * debug/monitorization purposes. */
+FaultName VectorEnableFault::_name = "ven";
+FaultVect VectorEnableFault::_vect = 0x0581;
+FaultStat VectorEnableFault::_count;
+
  FaultName PalFault::_name = "pal";
  FaultVect PalFault::_vect = 0x2001;
  FaultStat PalFault::_count;
diff --git a/src/arch/alpha/faults.hh b/src/arch/alpha/faults.hh

index 80e3ae5e185d9e36fc28d6a9b0a24c9476811c62..07789a22ec0f44e41645d969041102d8093be565 100644 (file)
--- a/src/arch/alpha/faults.hh
+++ b/src/arch/alpha/faults.hh
@@ -299,6 +299,19 @@ class FloatEnableFault : public AlphaFault
      FaultStat & countStat() {return _count;}
  };
  
+class VectorEnableFault : public AlphaFault
+{
+  private:
+    static FaultName _name;
+    static FaultVect _vect;
+    static FaultStat _count;
+
+  public:
+    FaultName name() const {return _name;}
+    FaultVect vect() {return _vect;}
+    FaultStat & countStat() {return _count;}
+};
+
  class PalFault : public AlphaFault
  {
    private:
diff --git a/src/arch/alpha/isa/fp.isa b/src/arch/alpha/isa/fp.isa

index 6213c8e08b8304aefee8a1f982418c13a87519f4..ea692aeef544216c0d7a46c5689dd4b160a372dc 100644 (file)
--- a/src/arch/alpha/isa/fp.isa
+++ b/src/arch/alpha/isa/fp.isa
@@ -50,6 +50,9 @@ output exec {{
          }
          return fault;
      }
+    inline Fault checkVectorEnableFault(CPU_EXEC_CONTEXT *xc) {
+        return std::make_shared<VectorEnableFault>();
+    }
  }};
  
  output header {{
diff --git a/src/arch/arm/isa/insts/fp64.isa b/src/arch/arm/isa/insts/fp64.isa

index 706f77fb012ddccef77f04a199a8121769df9c7a..a5e1085ded23caf9eccbba905aeea3b7fe6d7753 100644 (file)
--- a/src/arch/arm/isa/insts/fp64.isa
+++ b/src/arch/arm/isa/insts/fp64.isa
@@ -1,6 +1,6 @@
  // -*- mode:c++ -*-
  
-// Copyright (c) 2012-2013 ARM Limited
+// Copyright (c) 2012-2013, 2016 ARM Limited
  // All rights reserved
  //
  // The license below extends only to copyright in the software and shall
@@ -123,9 +123,11 @@ let {{
      exec_output    += BasicExecute.subst(fmovCoreRegXIop);
  
      fmovUCoreRegXCode = vfp64EnabledCheckCode + '''
+        /* Explicitly merge with previous value */
+        AA64FpDestP0_uw = AA64FpDestP0_uw;
+        AA64FpDestP1_uw = AA64FpDestP1_uw;
          AA64FpDestP2_uw = XOp1_ud;
-        AA64FpDestP3_uw = XOp1_ud >> 32;
-    '''
+        AA64FpDestP3_uw = XOp1_ud >> 32;'''
      fmovUCoreRegXIop = InstObjParams("fmov", "FmovUCoreRegX", "FpRegRegOp",
                                      { "code": fmovUCoreRegXCode,
                                        "op_class": "FloatMiscOp" }, [])
diff --git a/src/arch/arm/isa/insts/neon64.isa b/src/arch/arm/isa/insts/neon64.isa

index 7c9040be30984801a524be029056f66383d749ab..4897e7c91a940f6b792af4424692e931a484064f 100644 (file)
--- a/src/arch/arm/isa/insts/neon64.isa
+++ b/src/arch/arm/isa/insts/neon64.isa
@@ -1,6 +1,6 @@
  // -*- mode: c++ -*-
  
-// Copyright (c) 2012-2013, 2015 ARM Limited
+// Copyright (c) 2012-2013, 2015-2016 ARM Limited
  // All rights reserved
  //
  // The license below extends only to copyright in the software and shall
@@ -225,11 +225,16 @@ let {{
          AA64FpDestP%(destReg)d_uw = gtoh(destReg.regs[%(reg)d]);
          ''' % { "reg" : reg, "destReg": destReg }
              destReg += 1
-        if destCnt < 4 and not hi:  # zero upper half
-            for reg in range(destCnt, 4):
-                eWalkCode += '''
-        AA64FpDestP%(reg)d_uw = 0;
-        ''' % { "reg" : reg }
+        if destCnt < 4:
+            if hi:  # Explicitly merge with lower half
+                for reg in range(0, destCnt):
+                    eWalkCode += '''
+        AA64FpDestP%(reg)d_uw = AA64FpDestP%(reg)d_uw;''' % { "reg" : reg }
+            else:  # zero upper half
+                for reg in range(destCnt, 4):
+                    eWalkCode += '''
+        AA64FpDestP%(reg)d_uw = 0;''' % { "reg" : reg }
+
          iop = InstObjParams(name, Name,
                              "DataX2RegImmOp" if byElem else "DataX2RegOp",
                              { "code": eWalkCode,
@@ -429,11 +434,16 @@ let {{
          AA64FpDestP%(destReg)d_uw = gtoh(destReg.regs[%(reg)d]);
          ''' % { "reg" : reg, "destReg": destReg }
              destReg += 1
-        if not hi:
+        if hi:
+            for reg in range(0, 2):  # Explicitly merge with the lower half
+                eWalkCode += '''
+        AA64FpDestP%(reg)d_uw = AA64FpDestP%(reg)d_uw;''' % { "reg" : reg }
+        else:
              for reg in range(2, 4):  # zero upper half
                  eWalkCode += '''
          AA64FpDestP%(reg)d_uw = 0;
          ''' % { "reg" : reg }
+
          iop = InstObjParams(name, Name,
                              "DataX1RegImmOp" if hasImm else "DataX1RegOp",
                              { "code": eWalkCode,
diff --git a/src/arch/arm/isa/operands.isa b/src/arch/arm/isa/operands.isa

index e48c154d44b83cd0884e6a67a18bbf62434d0a8a..5898075ab2d984820b5bedcb475b3f810973048b 100644 (file)
--- a/src/arch/arm/isa/operands.isa
+++ b/src/arch/arm/isa/operands.isa
@@ -1,5 +1,5 @@
  // -*- mode:c++ -*-
-// Copyright (c) 2010-2014 ARM Limited
+// Copyright (c) 2010-2014, 2016 ARM Limited
  // All rights reserved
  //
  // The license below extends only to copyright in the software and shall
@@ -49,7 +49,10 @@ def operand_types {{
      'ud' : 'uint64_t',
      'tud' : 'Twin64_t',
      'sf' : 'float',
-    'df' : 'double'
+    'df' : 'double',
+    'vc' : 'TheISA::VecRegContainer',
+    # For operations that are implemented as a template
+    'x' : 'TPElem',
  }};
  
  let {{
@@ -117,6 +120,15 @@ let {{
      srtMode = 1
      srtEPC = 0
  
+    def vectorElem(idx, elem):
+        return ('VecElem', 'sf', (idx, elem), 'IsVectorElem', srtNormal)
+
+    def vectorReg(idx, elems = None):
+        return ('VecReg', 'vc', (idx, elems) , 'IsVector', srtNormal)
+
+    def vectorRegElem(elem, ext = 'sf', zeroing = False):
+        return (elem, ext, zeroing)
+
      def floatReg(idx):
          return ('FloatReg', 'sf', idx, 'IsFloating', srtNormal)
  
@@ -297,86 +309,203 @@ def operands {{
      'FpOp2P3': floatReg('(op2 + 3)'),
  
      # Create AArch64 unpacked view of the FP registers
-    'AA64FpOp1P0':   floatReg('((op1 * 4) + 0)'),
-    'AA64FpOp1P1':   floatReg('((op1 * 4) + 1)'),
-    'AA64FpOp1P2':   floatReg('((op1 * 4) + 2)'),
-    'AA64FpOp1P3':   floatReg('((op1 * 4) + 3)'),
-    'AA64FpOp2P0':   floatReg('((op2 * 4) + 0)'),
-    'AA64FpOp2P1':   floatReg('((op2 * 4) + 1)'),
-    'AA64FpOp2P2':   floatReg('((op2 * 4) + 2)'),
-    'AA64FpOp2P3':   floatReg('((op2 * 4) + 3)'),
-    'AA64FpOp3P0':   floatReg('((op3 * 4) + 0)'),
-    'AA64FpOp3P1':   floatReg('((op3 * 4) + 1)'),
-    'AA64FpOp3P2':   floatReg('((op3 * 4) + 2)'),
-    'AA64FpOp3P3':   floatReg('((op3 * 4) + 3)'),
-    'AA64FpDestP0':  floatReg('((dest * 4) + 0)'),
-    'AA64FpDestP1':  floatReg('((dest * 4) + 1)'),
-    'AA64FpDestP2':  floatReg('((dest * 4) + 2)'),
-    'AA64FpDestP3':  floatReg('((dest * 4) + 3)'),
-    'AA64FpDest2P0': floatReg('((dest2 * 4) + 0)'),
-    'AA64FpDest2P1': floatReg('((dest2 * 4) + 1)'),
-    'AA64FpDest2P2': floatReg('((dest2 * 4) + 2)'),
-    'AA64FpDest2P3': floatReg('((dest2 * 4) + 3)'),
-
-    'AA64FpOp1P0V0':   floatReg('((((op1+0)) * 4) + 0)'),
-    'AA64FpOp1P1V0':   floatReg('((((op1+0)) * 4) + 1)'),
-    'AA64FpOp1P2V0':   floatReg('((((op1+0)) * 4) + 2)'),
-    'AA64FpOp1P3V0':   floatReg('((((op1+0)) * 4) + 3)'),
-
-    'AA64FpOp1P0V1':   floatReg('((((op1+1)) * 4) + 0)'),
-    'AA64FpOp1P1V1':   floatReg('((((op1+1)) * 4) + 1)'),
-    'AA64FpOp1P2V1':   floatReg('((((op1+1)) * 4) + 2)'),
-    'AA64FpOp1P3V1':   floatReg('((((op1+1)) * 4) + 3)'),
-
-    'AA64FpOp1P0V2':   floatReg('((((op1+2)) * 4) + 0)'),
-    'AA64FpOp1P1V2':   floatReg('((((op1+2)) * 4) + 1)'),
-    'AA64FpOp1P2V2':   floatReg('((((op1+2)) * 4) + 2)'),
-    'AA64FpOp1P3V2':   floatReg('((((op1+2)) * 4) + 3)'),
-
-    'AA64FpOp1P0V3':   floatReg('((((op1+3)) * 4) + 0)'),
-    'AA64FpOp1P1V3':   floatReg('((((op1+3)) * 4) + 1)'),
-    'AA64FpOp1P2V3':   floatReg('((((op1+3)) * 4) + 2)'),
-    'AA64FpOp1P3V3':   floatReg('((((op1+3)) * 4) + 3)'),
-
-    'AA64FpOp1P0V0S':   floatReg('((((op1+0)%32) * 4) + 0)'),
-    'AA64FpOp1P1V0S':   floatReg('((((op1+0)%32) * 4) + 1)'),
-    'AA64FpOp1P2V0S':   floatReg('((((op1+0)%32) * 4) + 2)'),
-    'AA64FpOp1P3V0S':   floatReg('((((op1+0)%32) * 4) + 3)'),
-
-    'AA64FpOp1P0V1S':   floatReg('((((op1+1)%32) * 4) + 0)'),
-    'AA64FpOp1P1V1S':   floatReg('((((op1+1)%32) * 4) + 1)'),
-    'AA64FpOp1P2V1S':   floatReg('((((op1+1)%32) * 4) + 2)'),
-    'AA64FpOp1P3V1S':   floatReg('((((op1+1)%32) * 4) + 3)'),
-
-    'AA64FpOp1P0V2S':   floatReg('((((op1+2)%32) * 4) + 0)'),
-    'AA64FpOp1P1V2S':   floatReg('((((op1+2)%32) * 4) + 1)'),
-    'AA64FpOp1P2V2S':   floatReg('((((op1+2)%32) * 4) + 2)'),
-    'AA64FpOp1P3V2S':   floatReg('((((op1+2)%32) * 4) + 3)'),
-
-    'AA64FpOp1P0V3S':   floatReg('((((op1+3)%32) * 4) + 0)'),
-    'AA64FpOp1P1V3S':   floatReg('((((op1+3)%32) * 4) + 1)'),
-    'AA64FpOp1P2V3S':   floatReg('((((op1+3)%32) * 4) + 2)'),
-    'AA64FpOp1P3V3S':   floatReg('((((op1+3)%32) * 4) + 3)'),
-
-    'AA64FpDestP0V0':   floatReg('((((dest+0)) * 4) + 0)'),
-    'AA64FpDestP1V0':   floatReg('((((dest+0)) * 4) + 1)'),
-    'AA64FpDestP2V0':   floatReg('((((dest+0)) * 4) + 2)'),
-    'AA64FpDestP3V0':   floatReg('((((dest+0)) * 4) + 3)'),
-
-    'AA64FpDestP0V1':   floatReg('((((dest+1)) * 4) + 0)'),
-    'AA64FpDestP1V1':   floatReg('((((dest+1)) * 4) + 1)'),
-    'AA64FpDestP2V1':   floatReg('((((dest+1)) * 4) + 2)'),
-    'AA64FpDestP3V1':   floatReg('((((dest+1)) * 4) + 3)'),
-
-    'AA64FpDestP0V0L':   floatReg('((((dest+0)%32) * 4) + 0)'),
-    'AA64FpDestP1V0L':   floatReg('((((dest+0)%32) * 4) + 1)'),
-    'AA64FpDestP2V0L':   floatReg('((((dest+0)%32) * 4) + 2)'),
-    'AA64FpDestP3V0L':   floatReg('((((dest+0)%32) * 4) + 3)'),
-
-    'AA64FpDestP0V1L':   floatReg('((((dest+1)%32) * 4) + 0)'),
-    'AA64FpDestP1V1L':   floatReg('((((dest+1)%32) * 4) + 1)'),
-    'AA64FpDestP2V1L':   floatReg('((((dest+1)%32) * 4) + 2)'),
-    'AA64FpDestP3V1L':   floatReg('((((dest+1)%32) * 4) + 3)'),
+    # Name   ::= 'AA64Vec' OpSpec [LaneSpec]
+    # OpSpec ::= IOSpec [Index] [Plus]
+    # IOSpec ::= 'S' | 'D'
+    # Index  ::= '0' | ... | '9'
+    # Plus  ::= [PlusAmount] ['l']
+    # PlusAmount ::= 'p' [PlusAmount]
+    # LaneSpec ::= 'L' Index
+    #
+    # All the constituents are hierarchically defined as part of the Vector
+    # Register they belong to
+
+    'AA64FpOp1':       vectorReg('op1',
+    {
+        'AA64FpOp1P0': vectorRegElem('0'),
+        'AA64FpOp1P1': vectorRegElem('1'),
+        'AA64FpOp1P2': vectorRegElem('2'),
+        'AA64FpOp1P3': vectorRegElem('3'),
+        'AA64FpOp1S':  vectorRegElem('0', 'sf', zeroing = True),
+        'AA64FpOp1D':  vectorRegElem('0', 'df', zeroing = True),
+        'AA64FpOp1Q':  vectorRegElem('0', 'tud', zeroing = True)
+    }),
+
+    'AA64FpOp2':       vectorReg('op2',
+    {
+        'AA64FpOp2P0': vectorRegElem('0'),
+        'AA64FpOp2P1': vectorRegElem('1'),
+        'AA64FpOp2P2': vectorRegElem('2'),
+        'AA64FpOp2P3': vectorRegElem('3'),
+        'AA64FpOp2S':  vectorRegElem('0', 'sf', zeroing = True),
+        'AA64FpOp2D':  vectorRegElem('0', 'df', zeroing = True),
+        'AA64FpOp2Q':  vectorRegElem('0', 'tud', zeroing = True)
+    }),
+
+    'AA64FpOp3':       vectorReg('op3',
+    {
+        'AA64FpOp3P0': vectorRegElem('0'),
+        'AA64FpOp3P1': vectorRegElem('1'),
+        'AA64FpOp3P2': vectorRegElem('2'),
+        'AA64FpOp3P3': vectorRegElem('3'),
+        'AA64FpOp3S':  vectorRegElem('0', 'sf', zeroing = True),
+        'AA64FpOp3D':  vectorRegElem('0', 'df', zeroing = True),
+        'AA64FpOp3Q':  vectorRegElem('0', 'tud', zeroing = True)
+    }),
+
+    'AA64FpDest':       vectorReg('dest',
+    {
+        'AA64FpDestP0': vectorRegElem('0'),
+        'AA64FpDestP1': vectorRegElem('1'),
+        'AA64FpDestP2': vectorRegElem('2'),
+        'AA64FpDestP3': vectorRegElem('3'),
+        'AA64FpDestS':  vectorRegElem('0', 'sf', zeroing = True),
+        'AA64FpDestD':  vectorRegElem('0', 'df', zeroing = True),
+        'AA64FpDestQ':  vectorRegElem('0', 'tud', zeroing = True)
+    }),
+
+    'AA64FpDest2':       vectorReg('dest2',
+    {
+        'AA64FpDest2P0': vectorRegElem('0'),
+        'AA64FpDest2P1': vectorRegElem('1'),
+        'AA64FpDest2P2': vectorRegElem('2'),
+        'AA64FpDest2P3': vectorRegElem('3'),
+        'AA64FpDest2S':  vectorRegElem('0', 'sf', zeroing = True),
+        'AA64FpDest2D':  vectorRegElem('0', 'df', zeroing = True),
+        'AA64FpDest2Q':  vectorRegElem('0', 'tud', zeroing = True)
+    }),
+
+    'AA64FpOp1V0':       vectorReg('op1',
+    {
+        'AA64FpOp1P0V0': vectorRegElem('0'),
+        'AA64FpOp1P1V0': vectorRegElem('1'),
+        'AA64FpOp1P2V0': vectorRegElem('2'),
+        'AA64FpOp1P3V0': vectorRegElem('3'),
+        'AA64FpOp1SV0':  vectorRegElem('0', 'sf', zeroing = True),
+        'AA64FpOp1DV0':  vectorRegElem('0', 'df', zeroing = True),
+        'AA64FpOp1QV0':  vectorRegElem('0', 'tud', zeroing = True)
+    }),
+
+    'AA64FpOp1V1':       vectorReg('op1+1',
+    {
+        'AA64FpOp1P0V1': vectorRegElem('0'),
+        'AA64FpOp1P1V1': vectorRegElem('1'),
+        'AA64FpOp1P2V1': vectorRegElem('2'),
+        'AA64FpOp1P3V1': vectorRegElem('3'),
+        'AA64FpOp1SV1':  vectorRegElem('0', 'sf', zeroing = True),
+        'AA64FpOp1DV1':  vectorRegElem('0', 'df', zeroing = True),
+        'AA64FpOp1QV1':  vectorRegElem('0', 'tud', zeroing = True)
+    }),
+
+    'AA64FpOp1V2':       vectorReg('op1+2',
+    {
+        'AA64FpOp1P0V2': vectorRegElem('0'),
+        'AA64FpOp1P1V2': vectorRegElem('1'),
+        'AA64FpOp1P2V2': vectorRegElem('2'),
+        'AA64FpOp1P3V2': vectorRegElem('3'),
+        'AA64FpOp1SV2':  vectorRegElem('0', 'sf', zeroing = True),
+        'AA64FpOp1DV2':  vectorRegElem('0', 'df', zeroing = True),
+        'AA64FpOp1QV2':  vectorRegElem('0', 'tud', zeroing = True)
+    }),
+
+    'AA64FpOp1V3':       vectorReg('op1+3',
+    {
+        'AA64FpOp1P0V3': vectorRegElem('0'),
+        'AA64FpOp1P1V3': vectorRegElem('1'),
+        'AA64FpOp1P2V3': vectorRegElem('2'),
+        'AA64FpOp1P3V3': vectorRegElem('3'),
+        'AA64FpOp1SV3':  vectorRegElem('0', 'sf', zeroing = True),
+        'AA64FpOp1DV3':  vectorRegElem('0', 'df', zeroing = True),
+        'AA64FpOp1QV3':  vectorRegElem('0', 'tud', zeroing = True)
+    }),
+
+    'AA64FpOp1V0S':       vectorReg('(op1+0)%32',
+    {
+        'AA64FpOp1P0V0S': vectorRegElem('0'),
+        'AA64FpOp1P1V0S': vectorRegElem('1'),
+        'AA64FpOp1P2V0S': vectorRegElem('2'),
+        'AA64FpOp1P3V0S': vectorRegElem('3'),
+        'AA64FpOp1SV0S':  vectorRegElem('0', 'sf', zeroing = True),
+        'AA64FpOp1DV0S':  vectorRegElem('0', 'df', zeroing = True),
+        'AA64FpOp1QV0S':  vectorRegElem('0', 'tud', zeroing = True)
+    }),
+
+    'AA64FpOp1V1S':       vectorReg('(op1+1)%32',
+    {
+        'AA64FpOp1P0V1S': vectorRegElem('0'),
+        'AA64FpOp1P1V1S': vectorRegElem('1'),
+        'AA64FpOp1P2V1S': vectorRegElem('2'),
+        'AA64FpOp1P3V1S': vectorRegElem('3'),
+        'AA64FpOp1SV1S':  vectorRegElem('0', 'sf', zeroing = True),
+        'AA64FpOp1DV1S':  vectorRegElem('0', 'df', zeroing = True),
+        'AA64FpOp1QV1S':  vectorRegElem('0', 'tud', zeroing = True)
+    }),
+
+    'AA64FpOp1V2S':       vectorReg('(op1+2)%32',
+    {
+        'AA64FpOp1P0V2S': vectorRegElem('0'),
+        'AA64FpOp1P1V2S': vectorRegElem('1'),
+        'AA64FpOp1P2V2S': vectorRegElem('2'),
+        'AA64FpOp1P3V2S': vectorRegElem('3'),
+        'AA64FpOp1SV2S':  vectorRegElem('0', 'sf', zeroing = True),
+        'AA64FpOp1DV2S':  vectorRegElem('0', 'df', zeroing = True),
+        'AA64FpOp1QV2S':  vectorRegElem('0', 'tud', zeroing = True)
+    }),
+
+    'AA64FpOp1V3S':       vectorReg('(op1+3)%32',
+    {
+        'AA64FpOp1P0V3S': vectorRegElem('0'),
+        'AA64FpOp1P1V3S': vectorRegElem('1'),
+        'AA64FpOp1P2V3S': vectorRegElem('2'),
+        'AA64FpOp1P3V3S': vectorRegElem('3'),
+        'AA64FpOp1SV3S':  vectorRegElem('0', 'sf', zeroing = True),
+        'AA64FpOp1DV3S':  vectorRegElem('0', 'df', zeroing = True),
+        'AA64FpOp1QV3S':  vectorRegElem('0', 'tud', zeroing = True)
+    }),
+
+    'AA64FpDestV0':       vectorReg('(dest+0)',
+    {
+        'AA64FpDestP0V0': vectorRegElem('0'),
+        'AA64FpDestP1V0': vectorRegElem('1'),
+        'AA64FpDestP2V0': vectorRegElem('2'),
+        'AA64FpDestP3V0': vectorRegElem('3'),
+        'AA64FpDestSV0':  vectorRegElem('0', 'sf', zeroing = True),
+        'AA64FpDestDV0':  vectorRegElem('0', 'df', zeroing = True),
+        'AA64FpDestQV0':  vectorRegElem('0', 'tud', zeroing = True)
+    }),
+
+    'AA64FpDestV1':       vectorReg('(dest+1)',
+    {
+        'AA64FpDestP0V1': vectorRegElem('0'),
+        'AA64FpDestP1V1': vectorRegElem('1'),
+        'AA64FpDestP2V1': vectorRegElem('2'),
+        'AA64FpDestP3V1': vectorRegElem('3'),
+        'AA64FpDestSV1':  vectorRegElem('0', 'sf', zeroing = True),
+        'AA64FpDestDV1':  vectorRegElem('0', 'df', zeroing = True),
+        'AA64FpDestQV1':  vectorRegElem('0', 'tud', zeroing = True)
+    }),
+
+    'AA64FpDestV0L':       vectorReg('(dest+0)%32',
+    {
+        'AA64FpDestP0V0L': vectorRegElem('0'),
+        'AA64FpDestP1V0L': vectorRegElem('1'),
+        'AA64FpDestP2V0L': vectorRegElem('2'),
+        'AA64FpDestP3V0L': vectorRegElem('3'),
+        'AA64FpDestSV0L':  vectorRegElem('0', 'sf', zeroing = True),
+        'AA64FpDestDV0L':  vectorRegElem('0', 'df', zeroing = True),
+        'AA64FpDestQV0L':  vectorRegElem('0', 'tud', zeroing = True)
+    }),
+
+    'AA64FpDestV1L':       vectorReg('(dest+1)%32',
+    {
+        'AA64FpDestP0V1L': vectorRegElem('0'),
+        'AA64FpDestP1V1L': vectorRegElem('1'),
+        'AA64FpDestP2V1L': vectorRegElem('2'),
+        'AA64FpDestP3V1L': vectorRegElem('3'),
+        'AA64FpDestSV1L':  vectorRegElem('0', 'sf', zeroing = True),
+        'AA64FpDestDV1L':  vectorRegElem('0', 'df', zeroing = True),
+        'AA64FpDestQV1L':  vectorRegElem('0', 'tud', zeroing = True)
+    }),
  
      #Abstracted control reg operands
      'MiscDest': cntrlReg('dest'),
diff --git a/src/arch/arm/isa/templates/mem.isa b/src/arch/arm/isa/templates/mem.isa

index 51f598f5046434abd784d2b1c49ca3b3f950d1e2..a0942d15153f46a9b00c86e0183a19fb0627f695 100644 (file)
--- a/src/arch/arm/isa/templates/mem.isa
+++ b/src/arch/arm/isa/templates/mem.isa
@@ -1,6 +1,6 @@
  // -*- mode:c++ -*-
  
-// Copyright (c) 2010, 2012, 2014 ARM Limited
+// Copyright (c) 2010, 2012, 2014, 2016 ARM Limited
  // All rights reserved
  //
  // The license below extends only to copyright in the software and shall
@@ -1150,7 +1150,7 @@ def template LoadRegConstructor {{
  #if %(use_uops)d
          assert(numMicroops >= 2);
          uops = new StaticInstPtr[numMicroops];
-        if (_dest == INTREG_PC && !isFloating()) {
+        if (_dest == INTREG_PC && !isFloating() && !isVector()) {
              IntRegIndex wbIndexReg = index;
              uops[0] = new %(acc_name)s(machInst, INTREG_UREG0, _base, _add,
                                         _shiftAmt, _shiftType, _index);
@@ -1187,7 +1187,7 @@ def template LoadRegConstructor {{
  
          }
  #else
-        if (_dest == INTREG_PC && !isFloating()) {
+        if (_dest == INTREG_PC && !isFloating() && !isVector()) {
              flags[IsControl] = true;
              flags[IsIndirectControl] = true;
              if (conditional)
@@ -1216,7 +1216,7 @@ def template LoadImmConstructor {{
  #if %(use_uops)d
          assert(numMicroops >= 2);
          uops = new StaticInstPtr[numMicroops];
-        if (_dest == INTREG_PC && !isFloating()) {
+        if (_dest == INTREG_PC && !isFloating() && !isVector()) {
              uops[0] = new %(acc_name)s(machInst, INTREG_UREG0, _base, _add,
                                     _imm);
              uops[0]->setDelayedCommit();
@@ -1250,7 +1250,7 @@ def template LoadImmConstructor {{
              uops[1]->setLastMicroop();
          }
  #else
-        if (_dest == INTREG_PC && !isFloating()) {
+        if (_dest == INTREG_PC && !isFloating() && !isVector()) {
              flags[IsControl] = true;
              flags[IsIndirectControl] = true;
              if (conditional)
diff --git a/src/arch/arm/isa/templates/pred.isa b/src/arch/arm/isa/templates/pred.isa

index 752ab8d1e8fb7cd5560b7faeb100e5d1d92d2888..7b372bdee133d054412bd28235b910398a772e81 100644 (file)
--- a/src/arch/arm/isa/templates/pred.isa
+++ b/src/arch/arm/isa/templates/pred.isa
@@ -1,6 +1,6 @@
  // -*- mode:c++ -*-
  
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010, 2016 ARM Limited
  // All rights reserved
  //
  // The license below extends only to copyright in the software and shall
@@ -77,7 +77,7 @@ def template DataImmConstructor {{
              }
          }
  
-        if (%(is_branch)s && !isFloating()){
+        if (%(is_branch)s && !isFloating() && !isVector()){
              flags[IsControl] = true;
              flags[IsIndirectControl] = true;
              if (condCode == COND_AL || condCode == COND_UC)
@@ -117,7 +117,7 @@ def template DataRegConstructor {{
              }
          }
  
-        if (%(is_branch)s && !isFloating()){
+        if (%(is_branch)s && !isFloating() && !isVector()){
              flags[IsControl] = true;
              flags[IsIndirectControl] = true;
              if (condCode == COND_AL || condCode == COND_UC)
diff --git a/src/arch/isa_parser.py b/src/arch/isa_parser.py

index 759b50c0d1a9e8071d9f764d625c7ad1af451b8e..ac639b41309a3cf4358ae7c83e9628e4f318aff5 100755 (executable)
--- a/src/arch/isa_parser.py
+++ b/src/arch/isa_parser.py
@@ -493,6 +493,12 @@ class Operand(object):
      def isControlReg(self):
          return 0
  
+    def isVecReg(self):
+        return 0
+
+    def isVecElem(self):
+        return 0
+
      def isPCState(self):
          return 0
  
@@ -658,6 +664,200 @@ class FloatRegOperand(Operand):
          }''' % (self.ctype, self.base_name, wp)
          return wb
  
+class VecRegOperand(Operand):
+    reg_class = 'VecRegClass'
+
+    def __init__(self, parser, full_name, ext, is_src, is_dest):
+        Operand.__init__(self, parser, full_name, ext, is_src, is_dest)
+        self.elemExt = None
+        self.parser = parser
+
+    def isReg(self):
+        return 1
+
+    def isVecReg(self):
+        return 1
+
+    def makeDeclElem(self, elem_op):
+        (elem_name, elem_ext) = elem_op
+        (elem_spec, dflt_elem_ext, zeroing) = self.elems[elem_name]
+        if elem_ext:
+            ext = elem_ext
+        else:
+            ext = dflt_elem_ext
+        ctype = self.parser.operandTypeMap[ext]
+        return '\n\t%s %s = 0;' % (ctype, elem_name)
+
+    def makeDecl(self):
+        if not self.is_dest and self.is_src:
+            c_decl = '\t/* Vars for %s*/' % (self.base_name)
+            if hasattr(self, 'active_elems'):
+                if self.active_elems:
+                    for elem in self.active_elems:
+                        c_decl += self.makeDeclElem(elem)
+            return c_decl + '\t/* End vars for %s */\n' % (self.base_name)
+        else:
+            return ''
+
+    def makeConstructor(self, predRead, predWrite):
+        c_src = ''
+        c_dest = ''
+
+        numAccessNeeded = 1
+
+        if self.is_src:
+            c_src = src_reg_constructor % (self.reg_class, self.reg_spec)
+
+        if self.is_dest:
+            c_dest = dst_reg_constructor % (self.reg_class, self.reg_spec)
+            c_dest += '\n\t_numVecDestRegs++;'
+
+        return c_src + c_dest
+
+    # Read destination register to write
+    def makeReadWElem(self, elem_op):
+        (elem_name, elem_ext) = elem_op
+        (elem_spec, dflt_elem_ext, zeroing) = self.elems[elem_name]
+        if elem_ext:
+            ext = elem_ext
+        else:
+            ext = dflt_elem_ext
+        ctype = self.parser.operandTypeMap[ext]
+        c_read = '\t\t%s& %s = %s[%s];\n' % \
+                  (ctype, elem_name, self.base_name, elem_spec)
+        return c_read
+
+    def makeReadW(self, predWrite):
+        func = 'getWritableVecRegOperand'
+        if self.read_code != None:
+            return self.buildReadCode(func)
+
+        if predWrite:
+            rindex = '_destIndex++'
+        else:
+            rindex = '%d' % self.dest_reg_idx
+
+        c_readw = '\t\t%s& tmp_d%s = xc->%s(this, %s);\n'\
+                % ('TheISA::VecRegContainer', rindex, func, rindex)
+        if self.elemExt:
+            c_readw += '\t\tauto %s = tmp_d%s.as<%s>();\n' % (self.base_name,
+                        rindex, self.parser.operandTypeMap[self.elemExt])
+        if self.ext:
+            c_readw += '\t\tauto %s = tmp_d%s.as<%s>();\n' % (self.base_name,
+                        rindex, self.parser.operandTypeMap[self.ext])
+        if hasattr(self, 'active_elems'):
+            if self.active_elems:
+                for elem in self.active_elems:
+                    c_readw += self.makeReadWElem(elem)
+        return c_readw
+
+    # Normal source operand read
+    def makeReadElem(self, elem_op, name):
+        (elem_name, elem_ext) = elem_op
+        (elem_spec, dflt_elem_ext, zeroing) = self.elems[elem_name]
+
+        if elem_ext:
+            ext = elem_ext
+        else:
+            ext = dflt_elem_ext
+        ctype = self.parser.operandTypeMap[ext]
+        c_read = '\t\t%s = %s[%s];\n' % \
+                  (elem_name, name, elem_spec)
+        return c_read
+
+    def makeRead(self, predRead):
+        func = 'readVecRegOperand'
+        if self.read_code != None:
+            return self.buildReadCode(func)
+
+        if predRead:
+            rindex = '_sourceIndex++'
+        else:
+            rindex = '%d' % self.src_reg_idx
+
+        name = self.base_name
+        if self.is_dest and self.is_src:
+            name += '_merger'
+
+        c_read =  '\t\t%s& tmp_s%s = xc->%s(this, %s);\n' \
+                % ('const TheISA::VecRegContainer', rindex, func, rindex)
+        # If the parser has detected that elements are being access, create
+        # the appropriate view
+        if self.elemExt:
+            c_read += '\t\tauto %s = tmp_s%s.as<%s>();\n' % \
+                 (name, rindex, self.parser.operandTypeMap[self.elemExt])
+        if self.ext:
+            c_read += '\t\tauto %s = tmp_s%s.as<%s>();\n' % \
+                 (name, rindex, self.parser.operandTypeMap[self.ext])
+        if hasattr(self, 'active_elems'):
+            if self.active_elems:
+                for elem in self.active_elems:
+                    c_read += self.makeReadElem(elem, name)
+        return c_read
+
+    def makeWrite(self, predWrite):
+        func = 'setVecRegOperand'
+        if self.write_code != None:
+            return self.buildWriteCode(func)
+
+        wb = '''
+        if (traceData) {
+            panic("Vectors not supported yet in tracedata");
+            /*traceData->setData(final_val);*/
+        }
+        '''
+        return wb
+
+    def finalize(self, predRead, predWrite):
+        super(VecRegOperand, self).finalize(predRead, predWrite)
+        if self.is_dest:
+            self.op_rd = self.makeReadW(predWrite) + self.op_rd
+
+class VecElemOperand(Operand):
+    reg_class = 'VectorElemClass'
+
+    def isReg(self):
+        return 1
+
+    def isVecElem(self):
+        return 1
+
+    def makeDecl(self):
+        if self.is_dest and not self.is_src:
+            return '\n\t%s %s;' % (self.ctype, self.base_name)
+        else:
+            return ''
+
+    def makeConstructor(self, predRead, predWrite):
+        c_src = ''
+        c_dest = ''
+
+        numAccessNeeded = 1
+        regId = 'RegId(%s, %s * numVecElemPerVecReg + elemIdx, %s)' % \
+                (self.reg_class, self.reg_spec)
+
+        if self.is_src:
+            c_src = ('\n\t_srcRegIdx[_numSrcRegs++] = RegId(%s, %s, %s);' %
+                    (self.reg_class, self.reg_spec, self.elem_spec))
+
+        if self.is_dest:
+            c_dest = ('\n\t_destRegIdx[_numDestRegs++] = RegId(%s, %s, %s);' %
+                    (self.reg_class, self.reg_spec, self.elem_spec))
+            c_dest += '\n\t_numVecElemDestRegs++;'
+        return c_src + c_dest
+
+    def makeRead(self, predRead):
+        c_read = ('\n/* Elem is kept inside the operand description */' +
+                  '\n\tVecElem %s = xc->readVecElemOperand(this, %d);' %
+                  (self.base_name, self.src_reg_idx))
+        return c_read
+
+    def makeWrite(self, predWrite):
+        c_write = ('\n/* Elem is kept inside the operand description */' +
+                   '\n\txc->setVecElemOperand(this, %d, %s);' %
+                   (self.dest_reg_idx, self.base_name))
+        return c_write
+
  class CCRegOperand(Operand):
      reg_class = 'CCRegClass'
  
@@ -857,22 +1057,49 @@ class OperandList(object):
              op = match.groups()
              # regexp groups are operand full name, base, and extension
              (op_full, op_base, op_ext) = op
+            # If is a elem operand, define or update the corresponding
+            # vector operand
+            isElem = False
+            if op_base in parser.elemToVector:
+                isElem = True
+                elem_op = (op_base, op_ext)
+                op_base = parser.elemToVector[op_base]
+                op_ext = '' # use the default one
              # if the token following the operand is an assignment, this is
              # a destination (LHS), else it's a source (RHS)
              is_dest = (assignRE.match(code, match.end()) != None)
              is_src = not is_dest
+
              # see if we've already seen this one
              op_desc = self.find_base(op_base)
              if op_desc:
-                if op_desc.ext != op_ext:
-                    error ('Inconsistent extensions for operand %s' % \
-                            op_base)
+                if op_ext and op_ext != '' and op_desc.ext != op_ext:
+                    error ('Inconsistent extensions for operand %s: %s - %s' \
+                            % (op_base, op_desc.ext, op_ext))
                  op_desc.is_src = op_desc.is_src or is_src
                  op_desc.is_dest = op_desc.is_dest or is_dest
+                if isElem:
+                    (elem_base, elem_ext) = elem_op
+                    found = False
+                    for ae in op_desc.active_elems:
+                        (ae_base, ae_ext) = ae
+                        if ae_base == elem_base:
+                            if ae_ext != elem_ext:
+                                error('Inconsistent extensions for elem'
+                                      ' operand %s' % elem_base)
+                            else:
+                                found = True
+                    if not found:
+                        op_desc.active_elems.append(elem_op)
              else:
                  # new operand: create new descriptor
                  op_desc = parser.operandNameMap[op_base](parser,
                      op_full, op_ext, is_src, is_dest)
+                # if operand is a vector elem, add the corresponding vector
+                # operand if not already done
+                if isElem:
+                    op_desc.elemExt = elem_op[1]
+                    op_desc.active_elems = [elem_op]
                  self.append(op_desc)
              # start next search after end of current match
              next_pos = match.end()
@@ -883,6 +1110,7 @@ class OperandList(object):
          self.numDestRegs = 0
          self.numFPDestRegs = 0
          self.numIntDestRegs = 0
+        self.numVecDestRegs = 0
          self.numCCDestRegs = 0
          self.numMiscDestRegs = 0
          self.memOperand = None
@@ -904,6 +1132,8 @@ class OperandList(object):
                          self.numFPDestRegs += 1
                      elif op_desc.isIntReg():
                          self.numIntDestRegs += 1
+                    elif op_desc.isVecReg():
+                        self.numVecDestRegs += 1
                      elif op_desc.isCCReg():
                          self.numCCDestRegs += 1
                      elif op_desc.isControlReg():
@@ -994,6 +1224,11 @@ class SubOperandList(OperandList):
              op = match.groups()
              # regexp groups are operand full name, base, and extension
              (op_full, op_base, op_ext) = op
+            # If is a elem operand, define or update the corresponding
+            # vector operand
+            if op_base in parser.elemToVector:
+                elem_op = op_base
+                op_base = parser.elemToVector[elem_op]
              # find this op in the master list
              op_desc = master_list.find_base(op_base)
              if not op_desc:
@@ -1105,6 +1340,8 @@ class InstObjParams(object):
          header += '\n\t_numSrcRegs = 0;'
          header += '\n\t_numDestRegs = 0;'
          header += '\n\t_numFPDestRegs = 0;'
+        header += '\n\t_numVecDestRegs = 0;'
+        header += '\n\t_numVecElemDestRegs = 0;'
          header += '\n\t_numIntDestRegs = 0;'
          header += '\n\t_numCCDestRegs = 0;'
  
@@ -1149,6 +1386,8 @@ class InstObjParams(object):
                      self.op_class = 'MemReadOp'
              elif 'IsFloating' in self.flags:
                  self.op_class = 'FloatAddOp'
+            elif 'IsVector' in self.flags:
+                self.op_class = 'SimdAddOp'
              else:
                  self.op_class = 'IntAluOp'
  
@@ -1158,8 +1397,12 @@ class InstObjParams(object):
  
          # if 'IsFloating' is set, add call to the FP enable check
          # function (which should be provided by isa_desc via a declare)
+        # if 'IsVector' is set, add call to the Vector enable check
+        # function (which should be provided by isa_desc via a declare)
          if 'IsFloating' in self.flags:
              self.fp_enable_check = 'fault = checkFpEnableFault(xc);'
+        elif 'IsVector' in self.flags:
+            self.fp_enable_check = 'fault = checkVecEnableFault(xc);'
          else:
              self.fp_enable_check = ''
  
@@ -2300,6 +2543,16 @@ StaticInstPtr
              if dflt_ext:
                  dflt_ctype = self.operandTypeMap[dflt_ext]
                  attrList.extend(['dflt_ctype', 'dflt_ext'])
+            # reg_spec is either just a string or a dictionary
+            # (for elems of vector)
+            if isinstance(reg_spec, tuple):
+                (reg_spec, elem_spec) = reg_spec
+                if isinstance(elem_spec, str):
+                    attrList.append('elem_spec')
+                else:
+                    assert(isinstance(elem_spec, dict))
+                    elems = elem_spec
+                    attrList.append('elems')
              for attr in attrList:
                  tmp_dict[attr] = eval(attr)
              tmp_dict['base_name'] = op_name
@@ -2323,6 +2576,15 @@ StaticInstPtr
  
          # Define operand variables.
          operands = user_dict.keys()
+        # Add the elems defined in the vector operands and
+        # build a map elem -> vector (used in OperandList)
+        elem_to_vec = {}
+        for op in user_dict.keys():
+            if hasattr(self.operandNameMap[op], 'elems'):
+                for elem in self.operandNameMap[op].elems.keys():
+                    operands.append(elem)
+                    elem_to_vec[elem] = op
+        self.elemToVector = elem_to_vec
          extensions = self.operandTypeMap.keys()
  
          operandsREString = r'''
diff --git a/src/arch/sparc/faults.cc b/src/arch/sparc/faults.cc

index c09bd0da2bbbeb4c344af59340898df46f21c535..13e9c19f6ec6ce114897bebcfeed31410f737e38 100644 (file)
--- a/src/arch/sparc/faults.cc
+++ b/src/arch/sparc/faults.cc
@@ -108,6 +108,12 @@ template<> SparcFaultBase::FaultVals
      SparcFault<FpDisabled>::vals =
  {"fp_disabled", 0x020, 800, {P, P, H}, FaultStat()};
  
+/* SPARCv8 and SPARCv9 define just fp_disabled trap. SIMD is not contemplated
+ * as a separate part. Therefore, we use the same code and TT */
+template<> SparcFaultBase::FaultVals
+    SparcFault<VecDisabled>::vals =
+{"fp_disabled", 0x020, 800, {P, P, H}, FaultStat()};
+
  template<> SparcFaultBase::FaultVals
      SparcFault<FpExceptionIEEE754>::vals =
  {"fp_exception_ieee_754", 0x021, 1110, {P, P, H}, FaultStat()};
diff --git a/src/arch/sparc/faults.hh b/src/arch/sparc/faults.hh

index 42c8b71491a72fecc78f7ebd0345b701a4dc66e7..aa270fa31031e1fb1a771f412a83919d31894538 100644 (file)
--- a/src/arch/sparc/faults.hh
+++ b/src/arch/sparc/faults.hh
@@ -122,6 +122,7 @@ class PrivilegedOpcode : public SparcFault<PrivilegedOpcode> {};
  // class UnimplementedSTD : public SparcFault<UnimplementedSTD> {};
  
  class FpDisabled : public SparcFault<FpDisabled> {};
+class VecDisabled : public SparcFault<VecDisabled> {};
  
  class FpExceptionIEEE754 : public SparcFault<FpExceptionIEEE754> {};
  
diff --git a/src/arch/sparc/isa/base.isa b/src/arch/sparc/isa/base.isa

index b517d462cad8adda571e2bee780bd9aebdfa6b3f..4b61c940ce3f327c6d5bce7004a66e278ea1c403 100644 (file)
--- a/src/arch/sparc/isa/base.isa
+++ b/src/arch/sparc/isa/base.isa
@@ -578,6 +578,11 @@ output exec {{
              return NoFault;
          }
      }
+    static inline Fault
+    checkVecEnableFault(CPU_EXEC_CONTEXT *xc)
+    {
+        return std::make_shared<VecDisabled>();
+    }
  }};
  
  
diff --git a/src/cpu/StaticInstFlags.py b/src/cpu/StaticInstFlags.py

index ef29726fc41907579d90adb95c62d196daaf2d8d..55ef456ce4a218af6303aa34908e5a5297e73589 100644 (file)
--- a/src/cpu/StaticInstFlags.py
+++ b/src/cpu/StaticInstFlags.py
@@ -58,6 +58,8 @@ class StaticInstFlags(Enum):
          'IsInteger',        # References integer regs.
          'IsFloating',       # References FP regs.
          'IsCC',             # References CC regs.
+        'IsVector',         # References Vector regs.
+        'IsVectorElem',     # References Vector reg elems.
  
          'IsMemRef',         # References memory (load, store, or prefetch)
          'IsLoad',           # Reads from memory (load or prefetch).
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh

index 132c390b370120d5a7b3141f0ec0609e6fa2fa87..d7d32e6296b1392067332771e0fb91b546fc33e2 100644 (file)
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -517,6 +517,7 @@ class BaseDynInst : public ExecContext, public RefCounted
      bool isDataPrefetch() const { return staticInst->isDataPrefetch(); }
      bool isInteger()      const { return staticInst->isInteger(); }
      bool isFloating()     const { return staticInst->isFloating(); }
+    bool isVector()       const { return staticInst->isVector(); }
      bool isControl()      const { return staticInst->isControl(); }
      bool isCall()         const { return staticInst->isCall(); }
      bool isReturn()       const { return staticInst->isReturn(); }
diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh

index 3cce7f69c00e8d6849cf3bf5c307c7c75ef51114..5977f94f3d791ecd52cc185cf05e886734266afd 100644 (file)
--- a/src/cpu/o3/commit.hh
+++ b/src/cpu/o3/commit.hh
@@ -517,6 +517,8 @@ class DefaultCommit
      Stats::Vector statComMembars;
      /** Total number of committed branches. */
      Stats::Vector statComBranches;
+    /** Total number of vector instructions */
+    Stats::Vector statComVector;
      /** Total number of floating point instructions */
      Stats::Vector statComFloating;
      /** Total number of integer instructions */
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh

index ea77f18fb54613c916130c09c55284f70aec3aba..aba2696c2c351cb9ad8c2c7c1f292f569e1cda17 100644 (file)
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -260,6 +260,13 @@ DefaultCommit<Impl>::regStats()
          .flags(total)
          ;
  
+    statComVector
+        .init(cpu->numThreads)
+        .name(name() + ".vec_insts")
+        .desc("Number of committed Vector instructions.")
+        .flags(total)
+        ;
+
      statComInteger
          .init(cpu->numThreads)
          .name(name()+".int_insts")
@@ -1404,6 +1411,9 @@ DefaultCommit<Impl>::updateComInstStats(DynInstPtr &inst)
      // Floating Point Instruction
      if (inst->isFloating())
          statComFloating[tid]++;
+    // Vector Instruction
+    if (inst->isVector())
+        statComVector[tid]++;
  
      // Function Calls
      if (inst->isCall())
diff --git a/src/cpu/o3/inst_queue.hh b/src/cpu/o3/inst_queue.hh

index 23d8d416c5244c149ada8a3557049d5727873754..64f8aa1be8fa8493bbd5105367e4dc5e2e18d742 100644 (file)
--- a/src/cpu/o3/inst_queue.hh
+++ b/src/cpu/o3/inst_queue.hh
@@ -543,10 +543,14 @@ class InstructionQueue
      Stats::Scalar intInstQueueWakeupAccesses;
      Stats::Scalar fpInstQueueReads;
      Stats::Scalar fpInstQueueWrites;
-    Stats::Scalar fpInstQueueWakeupQccesses;
+    Stats::Scalar fpInstQueueWakeupAccesses;
+    Stats::Scalar vecInstQueueReads;
+    Stats::Scalar vecInstQueueWrites;
+    Stats::Scalar vecInstQueueWakeupAccesses;
  
      Stats::Scalar intAluAccesses;
      Stats::Scalar fpAluAccesses;
+    Stats::Scalar vecAluAccesses;
  };
  
  #endif //__CPU_O3_INST_QUEUE_HH__
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh

index 2b113ae04518b70a1e21399513e44848f3a15fb7..3da72fd863ea451c623a22c9c683e11cfaff3c0c 100644 (file)
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -364,7 +364,7 @@ InstructionQueue<Impl>::regStats()
          .desc("Number of floating instruction queue writes")
          .flags(total);
  
-    fpInstQueueWakeupQccesses
+    fpInstQueueWakeupAccesses
          .name(name() + ".fp_inst_queue_wakeup_accesses")
          .desc("Number of floating instruction queue wakeup accesses")
          .flags(total);
@@ -567,7 +567,13 @@ template <class Impl>
  void
  InstructionQueue<Impl>::insert(DynInstPtr &new_inst)
  {
-    new_inst->isFloating() ? fpInstQueueWrites++ : intInstQueueWrites++;
+    if (new_inst->isFloating()) {
+        fpInstQueueWrites++;
+    } else if (new_inst->isVector()) {
+        vecInstQueueWrites++;
+    } else {
+        intInstQueueWrites++;
+    }
      // Make sure the instruction is valid
      assert(new_inst);
  
@@ -609,7 +615,13 @@ InstructionQueue<Impl>::insertNonSpec(DynInstPtr &new_inst)
  {
      // @todo: Clean up this code; can do it by setting inst as unable
      // to issue, then calling normal insert on the inst.
-    new_inst->isFloating() ? fpInstQueueWrites++ : intInstQueueWrites++;
+    if (new_inst->isFloating()) {
+        fpInstQueueWrites++;
+    } else if (new_inst->isVector()) {
+        vecInstQueueWrites++;
+    } else {
+        intInstQueueWrites++;
+    }
  
      assert(new_inst);
  
@@ -660,8 +672,10 @@ InstructionQueue<Impl>::getInstToExecute()
      assert(!instsToExecute.empty());
      DynInstPtr inst = instsToExecute.front();
      instsToExecute.pop_front();
-    if (inst->isFloating()){
+    if (inst->isFloating()) {
          fpInstQueueReads++;
+    } else if (inst->isVector()) {
+        vecInstQueueReads++;
      } else {
          intInstQueueReads++;
      }
@@ -783,7 +797,13 @@ InstructionQueue<Impl>::scheduleReadyInsts()
  
          DynInstPtr issuing_inst = readyInsts[op_class].top();
  
-        issuing_inst->isFloating() ? fpInstQueueReads++ : intInstQueueReads++;
+        if (issuing_inst->isFloating()) {
+            fpInstQueueReads++;
+        } else if (issuing_inst->isVector()) {
+            vecInstQueueReads++;
+        } else {
+            intInstQueueReads++;
+        }
  
          assert(issuing_inst->seqNum == (*order_it).oldestInst);
  
@@ -810,7 +830,13 @@ InstructionQueue<Impl>::scheduleReadyInsts()
  
          if (op_class != No_OpClass) {
              idx = fuPool->getUnit(op_class);
-            issuing_inst->isFloating() ? fpAluAccesses++ : intAluAccesses++;
+            if (issuing_inst->isFloating()) {
+                fpAluAccesses++;
+            } else if (issuing_inst->isVector()) {
+                vecAluAccesses++;
+            } else {
+                intAluAccesses++;
+            }
              if (idx > FUPool::NoFreeFU) {
                  op_latency = fuPool->getOpLatency(op_class);
              }
@@ -955,7 +981,9 @@ InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
  
      // The instruction queue here takes care of both floating and int ops
      if (completed_inst->isFloating()) {
-        fpInstQueueWakeupQccesses++;
+        fpInstQueueWakeupAccesses++;
+    } else if (completed_inst->isVector()) {
+        vecInstQueueWakeupAccesses++;
      } else {
          intInstQueueWakeupAccesses++;
      }
@@ -1189,7 +1217,13 @@ InstructionQueue<Impl>::doSquash(ThreadID tid)
             (*squash_it)->seqNum > squashedSeqNum[tid]) {
  
          DynInstPtr squashed_inst = (*squash_it);
-        squashed_inst->isFloating() ? fpInstQueueWrites++ : intInstQueueWrites++;
+        if (squashed_inst->isFloating()) {
+            fpInstQueueWrites++;
+        } else if (squashed_inst->isVector()) {
+            vecInstQueueWrites++;
+        } else {
+            intInstQueueWrites++;
+        }
  
          // Only handle the instruction if it actually is in the IQ and
          // hasn't already been squashed in the IQ.
diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc

index 57cea4ba769ebc7c905fc8d2aaa56300216cbf65..7839676027948a8051845b653bdd2392d60d99d9 100644 (file)
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -252,6 +252,11 @@ BaseSimpleCPU::regStats()
              .desc("Number of float alu accesses")
              ;
  
+        t_info.numVecAluAccesses
+            .name(thread_str + ".num_vec_alu_accesses")
+            .desc("Number of vector alu accesses")
+            ;
+
          t_info.numCallsReturns
              .name(thread_str + ".num_func_calls")
              .desc("number of times a function call or return occured")
@@ -272,6 +277,11 @@ BaseSimpleCPU::regStats()
              .desc("number of float instructions")
              ;
  
+        t_info.numVecInsts
+            .name(thread_str + ".num_vec_insts")
+            .desc("number of vector instructions")
+            ;
+
          t_info.numIntRegReads
              .name(thread_str + ".num_int_register_reads")
              .desc("number of times the integer registers were read")
@@ -613,6 +623,12 @@ BaseSimpleCPU::postExecute()
          t_info.numFpInsts++;
      }
  
+    //vector alu accesses
+    if (curStaticInst->isVector()){
+        t_info.numVecAluAccesses++;
+        t_info.numVecInsts++;
+    }
+
      //number of function calls/returns to get window accesses
      if (curStaticInst->isCall() || curStaticInst->isReturn()){
          t_info.numCallsReturns++;
diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh

index 0f546407d59368c444de143c83bc34d889dcb571..6d51e5ed908d43cebbd68622b11348e5389de424 100644 (file)
--- a/src/cpu/simple/exec_context.hh
+++ b/src/cpu/simple/exec_context.hh
@@ -94,6 +94,9 @@ class SimpleExecContext : public ExecContext {
      // Number of float alu accesses
      Stats::Scalar numFpAluAccesses;
  
+    // Number of vector alu accesses
+    Stats::Scalar numVecAluAccesses;
+
      // Number of function calls/returns
      Stats::Scalar numCallsReturns;
  
@@ -106,6 +109,9 @@ class SimpleExecContext : public ExecContext {
      // Number of float instructions
      Stats::Scalar numFpInsts;
  
+    // Number of vector instructions
+    Stats::Scalar numVecInsts;
+
      // Number of integer register file accesses
      Stats::Scalar numIntRegReads;
      Stats::Scalar numIntRegWrites;
diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh

index e7507c6a63ec7457f30371d21077c37787c88706..883c532ac19cf1a3bd5d5c7f8117a2888f71af0d 100644 (file)
--- a/src/cpu/static_inst.hh
+++ b/src/cpu/static_inst.hh
@@ -150,6 +150,7 @@ class StaticInst : public RefCounted, public StaticInstFlags
  
      bool isInteger()      const { return flags[IsInteger]; }
      bool isFloating()     const { return flags[IsFloating]; }
+    bool isVector()       const { return flags[IsVector]; }
      bool isCC()           const { return flags[IsCC]; }
  
      bool isControl()      const { return flags[IsControl]; }
author	Rekai Gonzalez-Alberquilla <Rekai.GonzalezAlberquilla@arm.com>
	Wed, 5 Apr 2017 18:24:23 +0000 (13:24 -0500)
committer	Andreas Sandberg <andreas.sandberg@arm.com>
	Wed, 5 Jul 2017 14:43:49 +0000 (14:43 +0000)
src/arch/alpha/faults.cc		patch \| blob \| history
src/arch/alpha/faults.hh		patch \| blob \| history
src/arch/alpha/isa/fp.isa		patch \| blob \| history
src/arch/arm/isa/insts/fp64.isa		patch \| blob \| history
src/arch/arm/isa/insts/neon64.isa		patch \| blob \| history
src/arch/arm/isa/operands.isa		patch \| blob \| history
src/arch/arm/isa/templates/mem.isa		patch \| blob \| history
src/arch/arm/isa/templates/pred.isa		patch \| blob \| history
src/arch/isa_parser.py		patch \| blob \| history
src/arch/sparc/faults.cc		patch \| blob \| history
src/arch/sparc/faults.hh		patch \| blob \| history
src/arch/sparc/isa/base.isa		patch \| blob \| history
src/cpu/StaticInstFlags.py		patch \| blob \| history
src/cpu/base_dyn_inst.hh		patch \| blob \| history
src/cpu/o3/commit.hh		patch \| blob \| history
src/cpu/o3/commit_impl.hh		patch \| blob \| history
src/cpu/o3/inst_queue.hh		patch \| blob \| history
src/cpu/o3/inst_queue_impl.hh		patch \| blob \| history
src/cpu/simple/base.cc		patch \| blob \| history
src/cpu/simple/exec_context.hh		patch \| blob \| history
src/cpu/static_inst.hh		patch \| blob \| history