cpu: o3: replace issueLatency with bool pipelined
authorNilay Vaish <nilay@cs.wisc.edu>
Thu, 30 Apr 2015 03:35:22 +0000 (22:35 -0500)
committerNilay Vaish <nilay@cs.wisc.edu>
Thu, 30 Apr 2015 03:35:22 +0000 (22:35 -0500)
Currently, each op class has a parameter issueLat that denotes the cycles after
which another op of the same class can be issued.  As of now, this latency can
either be one cycle (fully pipelined) or same as execution latency of the op
(not at all pipelined).  The fact that issueLat is a parameter of type Cycles
makes one believe that it can be set to any value.  To avoid the confusion, the
parameter is being renamed as 'pipelined' with type boolean.  If set to true,
the op would execute in a fully pipelined fashion. Otherwise, it would execute
in an unpipelined fashion.

configs/common/O3_ARM_v7a.py
src/cpu/FuncUnit.py
src/cpu/func_unit.cc
src/cpu/func_unit.hh
src/cpu/o3/FuncUnitConfig.py
src/cpu/o3/fu_pool.cc
src/cpu/o3/fu_pool.hh
src/cpu/o3/inst_queue_impl.hh

index 7d4987d7f2180392d29ea6931a869f6fb9dcd4ba..1bb2b4a5e3bb5b789b0ae72084e45dead9108df7 100644 (file)
@@ -36,9 +36,9 @@ class O3_ARM_v7a_Simple_Int(FUDesc):
 
 # Complex ALU instructions have a variable latencies
 class O3_ARM_v7a_Complex_Int(FUDesc):
-    opList = [ OpDesc(opClass='IntMult', opLat=3, issueLat=1),
-               OpDesc(opClass='IntDiv', opLat=12, issueLat=12),
-               OpDesc(opClass='IprAccess', opLat=3, issueLat=1) ]
+    opList = [ OpDesc(opClass='IntMult', opLat=3, pipelined=True),
+               OpDesc(opClass='IntDiv', opLat=12, pipelined=False),
+               OpDesc(opClass='IprAccess', opLat=3, pipelined=True) ]
     count = 1
 
 
@@ -67,8 +67,8 @@ class O3_ARM_v7a_FP(FUDesc):
                OpDesc(opClass='FloatAdd', opLat=5),
                OpDesc(opClass='FloatCmp', opLat=5),
                OpDesc(opClass='FloatCvt', opLat=5),
-               OpDesc(opClass='FloatDiv', opLat=9, issueLat=9),
-               OpDesc(opClass='FloatSqrt', opLat=33, issueLat=33),
+               OpDesc(opClass='FloatDiv', opLat=9, pipelined=False),
+               OpDesc(opClass='FloatSqrt', opLat=33, pipelined=False),
                OpDesc(opClass='FloatMult', opLat=4) ]
     count = 2
 
index 0bb23e876d0690aff21720905d99ad7ff44b7634..d4493ecf25ec48eae4f0fcac005eb9b41d5c67f4 100644 (file)
@@ -54,9 +54,10 @@ class OpClass(Enum):
 class OpDesc(SimObject):
     type = 'OpDesc'
     cxx_header = "cpu/func_unit.hh"
-    issueLat = Param.Cycles(1, "cycles until another can be issued")
     opClass = Param.OpClass("type of operation")
     opLat = Param.Cycles(1, "cycles until result is available")
+    pipelined = Param.Bool(True, "set to true when the functional unit for"
+        "this op is fully pipelined. False means not pipelined at all.")
 
 class FUDesc(SimObject):
     type = 'FUDesc'
index bb7427da59551124b8c20ae5c5d94d930a2a9b7b..6d009a440093572ceb25bee452207f7132adce4d 100644 (file)
@@ -52,7 +52,7 @@ FuncUnit::FuncUnit(const FuncUnit &fu)
 
     for (int i = 0; i < Num_OpClasses; ++i) {
         opLatencies[i] = fu.opLatencies[i];
-        issueLatencies[i] = fu.issueLatencies[i];
+        pipelined[i] = fu.pipelined[i];
     }
 
     capabilityList = fu.capabilityList;
@@ -60,15 +60,15 @@ FuncUnit::FuncUnit(const FuncUnit &fu)
 
 
 void
-FuncUnit::addCapability(OpClass cap, unsigned oplat, unsigned issuelat)
+FuncUnit::addCapability(OpClass cap, unsigned oplat, bool pipeline)
 {
-    if (issuelat == 0 || oplat == 0)
+    if (oplat == 0)
         panic("FuncUnit:  you don't really want a zero-cycle latency do you?");
 
     capabilityList.set(cap);
 
     opLatencies[cap] = oplat;
-    issueLatencies[cap] = issuelat;
+    pipelined[cap] = pipeline;
 }
 
 bool
@@ -89,10 +89,10 @@ FuncUnit::opLatency(OpClass cap)
     return opLatencies[cap];
 }
 
-unsigned
-FuncUnit::issueLatency(OpClass capability)
+bool
+FuncUnit::isPipelined(OpClass capability)
 {
-    return issueLatencies[capability];
+    return pipelined[capability];
 }
 
 ////////////////////////////////////////////////////////////////////////////
index 51e2011f8776d25ee14d98805086787bf29ccc08..721a69df1149b798d5bec1473e3f550bbc46da06 100644 (file)
@@ -52,11 +52,11 @@ class OpDesc : public SimObject
   public:
     OpClass opClass;
     Cycles opLat;
-    Cycles issueLat;
+    bool pipelined;
 
     OpDesc(const OpDescParams *p)
         : SimObject(p), opClass(p->opClass), opLat(p->opLat),
-          issueLat(p->issueLat) {};
+          pipelined(p->pipelined) {};
 };
 
 class FUDesc : public SimObject
@@ -85,7 +85,7 @@ class FuncUnit
 {
   private:
     unsigned opLatencies[Num_OpClasses];
-    unsigned issueLatencies[Num_OpClasses];
+    bool pipelined[Num_OpClasses];
     std::bitset<Num_OpClasses> capabilityList;
 
   public:
@@ -94,13 +94,13 @@ class FuncUnit
 
     std::string name;
 
-    void addCapability(OpClass cap, unsigned oplat, unsigned issuelat);
+    void addCapability(OpClass cap, unsigned oplat, bool pipelined);
 
     bool provides(OpClass capability);
     std::bitset<Num_OpClasses> capabilities();
 
     unsigned &opLatency(OpClass capability);
-    unsigned issueLatency(OpClass capability);
+    bool isPipelined(OpClass capability);
 };
 
 #endif // __FU_POOL_HH__
index 0f5efb7767baa99a2f99ae51a662ad2ca80257e6..b8be400b5ee46049f7d9d1b775dc0dfc8d994e5e 100644 (file)
@@ -49,7 +49,7 @@ class IntALU(FUDesc):
 
 class IntMultDiv(FUDesc):
     opList = [ OpDesc(opClass='IntMult', opLat=3),
-               OpDesc(opClass='IntDiv', opLat=20, issueLat=19) ]
+               OpDesc(opClass='IntDiv', opLat=20, pipelined=False) ]
 
     # DIV and IDIV instructions in x86 are implemented using a loop which
     # issues division microops.  The latency of these microops should really be
@@ -57,7 +57,6 @@ class IntMultDiv(FUDesc):
     # of the quotient.
     if buildEnv['TARGET_ISA'] in ('x86'):
         opList[1].opLat=1
-        opList[1].issueLat=1
 
     count=2
 
@@ -69,8 +68,8 @@ class FP_ALU(FUDesc):
 
 class FP_MultDiv(FUDesc):
     opList = [ OpDesc(opClass='FloatMult', opLat=4),
-               OpDesc(opClass='FloatDiv', opLat=12, issueLat=12),
-               OpDesc(opClass='FloatSqrt', opLat=24, issueLat=24) ]
+               OpDesc(opClass='FloatDiv', opLat=12, pipelined=False),
+               OpDesc(opClass='FloatSqrt', opLat=24, pipelined=False) ]
     count = 2
 
 class SIMD_Unit(FUDesc):
@@ -109,6 +108,6 @@ class RdWrPort(FUDesc):
     count = 4
 
 class IprPort(FUDesc):
-    opList = [ OpDesc(opClass='IprAccess', opLat = 3, issueLat = 3) ]
+    opList = [ OpDesc(opClass='IprAccess', opLat = 3, pipelined = False) ]
     count = 1
 
index 016b171bc613e491e101078c2bf9d05f33da9050..dab7dbed2c5aedfed7dfce4933cbadeeec8b15be 100644 (file)
@@ -89,7 +89,7 @@ FUPool::FUPool(const Params *p)
 
     for (int i = 0; i < Num_OpClasses; ++i) {
         maxOpLatencies[i] = Cycles(0);
-        maxIssueLatencies[i] = Cycles(0);
+        pipelined[i] = true;
     }
 
     //
@@ -123,13 +123,13 @@ FUPool::FUPool(const Params *p)
                     fuPerCapList[(*j)->opClass].addFU(numFU + k);
 
                 // indicate that this FU has the capability
-                fu->addCapability((*j)->opClass, (*j)->opLat, (*j)->issueLat);
+                fu->addCapability((*j)->opClass, (*j)->opLat, (*j)->pipelined);
 
                 if ((*j)->opLat > maxOpLatencies[(*j)->opClass])
                     maxOpLatencies[(*j)->opClass] = (*j)->opLat;
 
-                if ((*j)->issueLat > maxIssueLatencies[(*j)->opClass])
-                    maxIssueLatencies[(*j)->opClass] = (*j)->issueLat;
+                if (!(*j)->pipelined)
+                    pipelined[(*j)->opClass] = false;
             }
 
             numFU++;
index 2e1b71dad1994ca32c2887029760d847c9e575a7..8b501fc81231d32a7cf48e29baec6c615771313c 100644 (file)
@@ -72,8 +72,8 @@ class FUPool : public SimObject
   private:
     /** Maximum op execution latencies, per op class. */
     Cycles maxOpLatencies[Num_OpClasses];
-    /** Maximum issue latencies, per op class. */
-    Cycles maxIssueLatencies[Num_OpClasses];
+    /** Whether op is pipelined or not. */
+    bool pipelined[Num_OpClasses];
 
     /** Bitvector listing capabilities of this FU pool. */
     std::bitset<Num_OpClasses> capabilityList;
@@ -160,8 +160,8 @@ class FUPool : public SimObject
     }
 
     /** Returns the issue latency of the given capability. */
-    Cycles getIssueLatency(OpClass capability) {
-        return maxIssueLatencies[capability];
+    bool isPipelined(OpClass capability) {
+        return pipelined[capability];
     }
 
     /** Have all the FUs drained? */
index fa621ffbf273f73141015398192f8af6697f4dd4..7d359b9922046040f1c1be5a2ba5974cd723a73f 100644 (file)
@@ -825,7 +825,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
                 if (idx >= 0)
                     fuPool->freeUnitNextCycle(idx);
             } else {
-                Cycles issue_latency = fuPool->getIssueLatency(op_class);
+                bool pipelined = fuPool->isPipelined(op_class);
                 // Generate completion event for the FU
                 ++wbOutstanding;
                 FUCompletion *execution = new FUCompletion(issuing_inst,
@@ -834,8 +834,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
                 cpu->schedule(execution,
                               cpu->clockEdge(Cycles(op_latency - 1)));
 
-                // @todo: Enforce that issue_latency == 1 or op_latency
-                if (issue_latency > Cycles(1)) {
+                if (!pipelined) {
                     // If FU isn't pipelined, then it must be freed
                     // upon the execution completing.
                     execution->setFreeFU();