arm: Fix v8 neon latency issue for loads/stores

author Mitch Hayenga <mitch.hayenga@arm.com>

Wed, 3 Sep 2014 11:42:44 +0000 (07:42 -0400)

committer Mitch Hayenga <mitch.hayenga@arm.com>

Wed, 3 Sep 2014 11:42:44 +0000 (07:42 -0400)
author Mitch Hayenga <mitch.hayenga@arm.com>
Wed, 3 Sep 2014 11:42:44 +0000 (07:42 -0400)
committer Mitch Hayenga <mitch.hayenga@arm.com>
Wed, 3 Sep 2014 11:42:44 +0000 (07:42 -0400)
diff --git a/src/arch/arm/insts/macromem.cc b/src/arch/arm/insts/macromem.cc

index 2ada2953927de15ddb766cd1bfbe290bfd652a5b..65cd2c3b7fd7f47c99536b7f6077717b69541c68 100644 (file)
--- a/src/arch/arm/insts/macromem.cc
+++ b/src/arch/arm/insts/macromem.cc
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2010-2013 ARM Limited
+ * Copyright (c) 2010-2014 ARM Limited
   * All rights reserved
   *
   * The license below extends only to copyright in the software and shall
@@ -1107,9 +1107,26 @@ VldMultOp64::VldMultOp64(const char *mnem, ExtMachInst machInst,
      }
  
      for (int i = 0; i < numMarshalMicroops; ++i) {
-        microOps[uopIdx++] = new MicroDeintNeon64(
-            machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
-            numStructElems, numRegs, i /* step */);
+        switch(numRegs) {
+            case 1: microOps[uopIdx++] = new MicroDeintNeon64_1Reg(
+                        machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
+                        numStructElems, 1, i /* step */);
+                    break;
+            case 2: microOps[uopIdx++] = new MicroDeintNeon64_2Reg(
+                        machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
+                        numStructElems, 2, i /* step */);
+                    break;
+            case 3: microOps[uopIdx++] = new MicroDeintNeon64_3Reg(
+                        machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
+                        numStructElems, 3, i /* step */);
+                    break;
+            case 4: microOps[uopIdx++] = new MicroDeintNeon64_4Reg(
+                        machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
+                        numStructElems, 4, i /* step */);
+                    break;
+            default: panic("Invalid number of registers");
+        }
+
      }
  
      assert(uopIdx == numMicroops);
@@ -1150,9 +1167,25 @@ VstMultOp64::VstMultOp64(const char *mnem, ExtMachInst machInst,
      unsigned uopIdx = 0;
  
      for(int i = 0; i < numMarshalMicroops; ++i) {
-        microOps[uopIdx++] = new MicroIntNeon64(
-            machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
-            numStructElems, numRegs, i /* step */);
+        switch (numRegs) {
+            case 1: microOps[uopIdx++] = new MicroIntNeon64_1Reg(
+                        machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
+                        numStructElems, 1, i /* step */);
+                    break;
+            case 2: microOps[uopIdx++] = new MicroIntNeon64_2Reg(
+                        machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
+                        numStructElems, 2, i /* step */);
+                    break;
+            case 3: microOps[uopIdx++] = new MicroIntNeon64_3Reg(
+                        machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
+                        numStructElems, 3, i /* step */);
+                    break;
+            case 4: microOps[uopIdx++] = new MicroIntNeon64_4Reg(
+                        machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
+                        numStructElems, 4, i /* step */);
+                    break;
+            default: panic("Invalid number of registers");
+        }
      }
  
      uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
diff --git a/src/arch/arm/isa/insts/neon64_mem.isa b/src/arch/arm/isa/insts/neon64_mem.isa

index 32a37f87ed0de75ce6a25122900935f9716b70ef..91fb4fa349defed8f7bbb13c0c909c44b0f9f887 100644 (file)
--- a/src/arch/arm/isa/insts/neon64_mem.isa
+++ b/src/arch/arm/isa/insts/neon64_mem.isa
@@ -1,6 +1,6 @@
  // -*- mode: c++ -*-
  
-// Copyright (c) 2012-2013 ARM Limited
+// Copyright (c) 2012-2014 ARM Limited
  // All rights reserved
  //
  // The license below extends only to copyright in the software and shall
@@ -163,11 +163,11 @@ let {{
          header_output += MicroNeonMemDeclare64.subst(loadIop) + \
              MicroNeonMemDeclare64.subst(storeIop)
  
-    def mkMarshalMicroOp(name, Name):
+    def mkMarshalMicroOp(name, Name, numRegs=4):
          global header_output, decoder_output, exec_output
  
          getInputCodeOp1L = ''
-        for v in range(4):
+        for v in range(numRegs):
              for p in range(4):
                  getInputCodeOp1L += '''
              writeVecElem(&input[%(v)d], (XReg) AA64FpOp1P%(p)dV%(v)d_uw,
@@ -175,7 +175,7 @@ let {{
              ''' % { 'v' : v, 'p' : p }
  
          getInputCodeOp1S = ''
-        for v in range(4):
+        for v in range(numRegs):
              for p in range(4):
                  getInputCodeOp1S += '''
              writeVecElem(&input[%(v)d], (XReg) AA64FpOp1P%(p)dV%(v)dS_uw,
@@ -262,7 +262,8 @@ let {{
              '''
  
              iop = InstObjParams(name, Name, 'MicroNeonMixOp64',
-                                { 'code' : eCode }, ['IsMicroop'])
+                                { 'code' : eCode, 'op_class' : 'No_OpClass' },
+                                ['IsMicroop'])
              header_output += MicroNeonMixDeclare64.subst(iop)
              exec_output += MicroNeonMixExecute64.subst(iop)
  
@@ -323,7 +324,8 @@ let {{
                  ''' % { 'v': v, 'p': p}
  
              iop = InstObjParams(name, Name, 'MicroNeonMixOp64',
-                                { 'code' : eCode }, ['IsMicroop'])
+                                { 'code' : eCode, 'op_class' : 'No_OpClass' },
+                                ['IsMicroop'])
              header_output += MicroNeonMixDeclare64.subst(iop)
              exec_output += MicroNeonMixExecute64.subst(iop)
  
@@ -443,8 +445,14 @@ let {{
  
      # Generate instructions
      mkMemAccMicroOp('mem_neon_uop')
-    mkMarshalMicroOp('deint_neon_uop', 'MicroDeintNeon64')
-    mkMarshalMicroOp('int_neon_uop', 'MicroIntNeon64')
+    mkMarshalMicroOp('deint_neon_uop', 'MicroDeintNeon64_1Reg', numRegs=1)
+    mkMarshalMicroOp('deint_neon_uop', 'MicroDeintNeon64_2Reg', numRegs=2)
+    mkMarshalMicroOp('deint_neon_uop', 'MicroDeintNeon64_3Reg', numRegs=3)
+    mkMarshalMicroOp('deint_neon_uop', 'MicroDeintNeon64_4Reg', numRegs=4)
+    mkMarshalMicroOp('int_neon_uop', 'MicroIntNeon64_1Reg', numRegs=1)
+    mkMarshalMicroOp('int_neon_uop', 'MicroIntNeon64_2Reg', numRegs=2)
+    mkMarshalMicroOp('int_neon_uop', 'MicroIntNeon64_3Reg', numRegs=3)
+    mkMarshalMicroOp('int_neon_uop', 'MicroIntNeon64_4Reg', numRegs=4)
      mkMarshalMicroOp('unpack_neon_uop', 'MicroUnpackNeon64')
      mkMarshalMicroOp('pack_neon_uop', 'MicroPackNeon64')
author	Mitch Hayenga <mitch.hayenga@arm.com>
	Wed, 3 Sep 2014 11:42:44 +0000 (07:42 -0400)
committer	Mitch Hayenga <mitch.hayenga@arm.com>
	Wed, 3 Sep 2014 11:42:44 +0000 (07:42 -0400)
src/arch/arm/insts/macromem.cc		patch \| blob \| history
src/arch/arm/isa/insts/neon64_mem.isa		patch \| blob \| history