src/cpu/trace/trace_cpu.cc

   1 /*
   2  * Copyright (c) 2013 - 2016 ARM Limited
   3  * All rights reserved
   4  *
   5  * The license below extends only to copyright in the software and shall
   6  * not be construed as granting a license to any other intellectual
   7  * property including but not limited to intellectual property relating
   8  * to a hardware implementation of the functionality of the software
   9  * licensed hereunder.  You may use the software subject to the license
  10  * terms below provided that you ensure that this notice is replicated
  11  * unmodified and in its entirety in all distributions of the software,
  12  * modified or unmodified, in source code or in binary form.
  13  *
  14  * Redistribution and use in source and binary forms, with or without
  15  * modification, are permitted provided that the following conditions are
  16  * met: redistributions of source code must retain the above copyright
  17  * notice, this list of conditions and the following disclaimer;
  18  * redistributions in binary form must reproduce the above copyright
  19  * notice, this list of conditions and the following disclaimer in the
  20  * documentation and/or other materials provided with the distribution;
  21  * neither the name of the copyright holders nor the names of its
  22  * contributors may be used to endorse or promote products derived from
  23  * this software without specific prior written permission.
  24  *
  25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  26  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  27  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  28  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  29  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  30  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  31  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  32  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  33  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  34  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  35  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  36  */
  37
  38 #include "cpu/trace/trace_cpu.hh"
  39
  40 #include "sim/sim_exit.hh"
  41
  42 // Declare and initialize the static counter for number of trace CPUs.
  43 int TraceCPU::numTraceCPUs = 0;
  44
  45 TraceCPU::TraceCPU(const TraceCPUParams &params)
  46     :   BaseCPU(params),
  47         icachePort(this),
  48         dcachePort(this),
  49         instRequestorID(params.system->getRequestorId(this, "inst")),
  50         dataRequestorID(params.system->getRequestorId(this, "data")),
  51         instTraceFile(params.instTraceFile),
  52         dataTraceFile(params.dataTraceFile),
  53         icacheGen(*this, ".iside", icachePort, instRequestorID, instTraceFile),
  54         dcacheGen(*this, ".dside", dcachePort, dataRequestorID, dataTraceFile,
  55                   params),
  56         icacheNextEvent([this]{ schedIcacheNext(); }, name()),
  57         dcacheNextEvent([this]{ schedDcacheNext(); }, name()),
  58         oneTraceComplete(false),
  59         traceOffset(0),
  60         execCompleteEvent(nullptr),
  61         enableEarlyExit(params.enableEarlyExit),
  62         progressMsgInterval(params.progressMsgInterval),
  63         progressMsgThreshold(params.progressMsgInterval), traceStats(this)
  64 {
  65     // Increment static counter for number of Trace CPUs.
  66     ++TraceCPU::numTraceCPUs;
  67
  68     // Check that the python parameters for sizes of ROB, store buffer and
  69     // load buffer do not overflow the corresponding C++ variables.
  70     fatal_if(params.sizeROB > UINT16_MAX,
  71              "ROB size set to %d exceeds the max. value of %d.",
  72              params.sizeROB, UINT16_MAX);
  73     fatal_if(params.sizeStoreBuffer > UINT16_MAX,
  74              "ROB size set to %d exceeds the max. value of %d.",
  75              params.sizeROB, UINT16_MAX);
  76     fatal_if(params.sizeLoadBuffer > UINT16_MAX,
  77              "Load buffer size set to %d exceeds the max. value of %d.",
  78                 params.sizeLoadBuffer, UINT16_MAX);
  79 }
  80
  81 void
  82 TraceCPU::updateNumOps(uint64_t rob_num)
  83 {
  84     traceStats.numOps = rob_num;
  85     if (progressMsgInterval != 0 &&
  86          traceStats.numOps.value() >= progressMsgThreshold) {
  87         inform("%s: %i insts committed\n", name(), progressMsgThreshold);
  88         progressMsgThreshold += progressMsgInterval;
  89     }
  90 }
  91
  92 void
  93 TraceCPU::takeOverFrom(BaseCPU *oldCPU)
  94 {
  95     // Unbind the ports of the old CPU and bind the ports of the TraceCPU.
  96     getInstPort().takeOverFrom(&oldCPU->getInstPort());
  97     getDataPort().takeOverFrom(&oldCPU->getDataPort());
  98 }
  99
 100 void
 101 TraceCPU::init()
 102 {
 103     DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\".\n",
 104             instTraceFile);
 105     DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n",
 106             dataTraceFile);
 107
 108     BaseCPU::init();
 109
 110     // Get the send tick of the first instruction read request
 111     Tick first_icache_tick = icacheGen.init();
 112
 113     // Get the send tick of the first data read/write request
 114     Tick first_dcache_tick = dcacheGen.init();
 115
 116     // Set the trace offset as the minimum of that in both traces
 117     traceOffset = std::min(first_icache_tick, first_dcache_tick);
 118     inform("%s: Time offset (tick) found as min of both traces is %lli.",
 119             name(), traceOffset);
 120
 121     // Schedule next icache and dcache event by subtracting the offset
 122     schedule(icacheNextEvent, first_icache_tick - traceOffset);
 123     schedule(dcacheNextEvent, first_dcache_tick - traceOffset);
 124
 125     // Adjust the trace offset for the dcache generator's ready nodes
 126     // We don't need to do this for the icache generator as it will
 127     // send its first request at the first event and schedule subsequent
 128     // events using a relative tick delta
 129     dcacheGen.adjustInitTraceOffset(traceOffset);
 130
 131     // If the Trace CPU simulation is configured to exit on any one trace
 132     // completion then we don't need a counted event to count down all Trace
 133     // CPUs in the system. If not then instantiate a counted event.
 134     if (!enableEarlyExit) {
 135         // The static counter for number of Trace CPUs is correctly set at
 136         // this point so create an event and pass it.
 137         execCompleteEvent = new CountedExitEvent("end of all traces reached.",
 138                                                  numTraceCPUs);
 139     }
 140
 141 }
 142
 143 void
 144 TraceCPU::schedIcacheNext()
 145 {
 146     DPRINTF(TraceCPUInst, "IcacheGen event.\n");
 147
 148     // Try to send the current packet or a retry packet if there is one
 149     bool sched_next = icacheGen.tryNext();
 150     // If packet sent successfully, schedule next event
 151     if (sched_next) {
 152         DPRINTF(TraceCPUInst,
 153                 "Scheduling next icacheGen event at %d.\n",
 154                 curTick() + icacheGen.tickDelta());
 155         schedule(icacheNextEvent, curTick() + icacheGen.tickDelta());
 156         ++traceStats.numSchedIcacheEvent;
 157     } else {
 158         // check if traceComplete. If not, do nothing because sending failed
 159         // and next event will be scheduled via RecvRetry()
 160         if (icacheGen.isTraceComplete()) {
 161             // If this is the first trace to complete, set the variable. If it
 162             // is already set then both traces are complete to exit sim.
 163             checkAndSchedExitEvent();
 164         }
 165     }
 166     return;
 167 }
 168
 169 void
 170 TraceCPU::schedDcacheNext()
 171 {
 172     DPRINTF(TraceCPUData, "DcacheGen event.\n");
 173
 174     // Update stat for numCycles
 175     baseStats.numCycles = clockEdge() / clockPeriod();
 176
 177     dcacheGen.execute();
 178     if (dcacheGen.isExecComplete()) {
 179         checkAndSchedExitEvent();
 180     }
 181 }
 182
 183 void
 184 TraceCPU::checkAndSchedExitEvent()
 185 {
 186     if (!oneTraceComplete) {
 187         oneTraceComplete = true;
 188     } else {
 189         // Schedule event to indicate execution is complete as both
 190         // instruction and data access traces have been played back.
 191         inform("%s: Execution complete.", name());
 192         // If the replay is configured to exit early, that is when any one
 193         // execution is complete then exit immediately and return. Otherwise,
 194         // schedule the counted exit that counts down completion of each Trace
 195         // CPU.
 196         if (enableEarlyExit) {
 197             exitSimLoop("End of trace reached");
 198         } else {
 199             schedule(*execCompleteEvent, curTick());
 200         }
 201     }
 202 }
 203
 204 TraceCPU::TraceStats::TraceStats(TraceCPU *trace) :
 205     Stats::Group(trace),
 206     ADD_STAT(numSchedDcacheEvent,
 207             "Number of events scheduled to trigger data request generator"),
 208     ADD_STAT(numSchedIcacheEvent,
 209             "Number of events scheduled to trigger instruction request "
 210             "generator"),
 211     ADD_STAT(numOps, "Number of micro-ops simulated by the Trace CPU"),
 212     ADD_STAT(cpi, "Cycles per micro-op used as a proxy for CPI",
 213             trace->baseStats.numCycles / numOps)
 214 {
 215     cpi.precision(6);
 216 }
 217
 218 TraceCPU::ElasticDataGen::
 219 ElasticDataGenStatGroup::ElasticDataGenStatGroup(Stats::Group *parent,
 220                                                  const std::string& _name) :
 221     Stats::Group(parent, _name.c_str()),
 222     ADD_STAT(maxDependents, "Max number of dependents observed on a node"),
 223     ADD_STAT(maxReadyListSize, "Max size of the ready list observed"),
 224     ADD_STAT(numSendAttempted, "Number of first attempts to send a request"),
 225     ADD_STAT(numSendSucceeded, "Number of successful first attempts"),
 226     ADD_STAT(numSendFailed, "Number of failed first attempts"),
 227     ADD_STAT(numRetrySucceeded, "Number of successful retries"),
 228     ADD_STAT(numSplitReqs, "Number of split requests"),
 229     ADD_STAT(numSOLoads, "Number of strictly ordered loads"),
 230     ADD_STAT(numSOStores, "Number of strictly ordered stores"),
 231     ADD_STAT(dataLastTick, "Last tick simulated from the elastic data trace")
 232 {
 233 }
 234
 235 Tick
 236 TraceCPU::ElasticDataGen::init()
 237 {
 238     DPRINTF(TraceCPUData, "Initializing data memory request generator "
 239             "DcacheGen: elastic issue with retry.\n");
 240
 241     panic_if(!readNextWindow(),
 242             "Trace has %d elements. It must have at least %d elements.",
 243             depGraph.size(), 2 * windowSize);
 244     DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n",
 245             depGraph.size());
 246
 247     panic_if(!readNextWindow(),
 248             "Trace has %d elements. It must have at least %d elements.",
 249             depGraph.size(), 2 * windowSize);
 250     DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n",
 251             depGraph.size());
 252
 253     // Print readyList
 254     if (DTRACE(TraceCPUData)) {
 255         printReadyList();
 256     }
 257     auto free_itr = readyList.begin();
 258     DPRINTF(TraceCPUData,
 259             "Execute tick of the first dependency free node %lli is %d.\n",
 260             free_itr->seqNum, free_itr->execTick);
 261     // Return the execute tick of the earliest ready node so that an event
 262     // can be scheduled to call execute()
 263     return (free_itr->execTick);
 264 }
 265
 266 void
 267 TraceCPU::ElasticDataGen::adjustInitTraceOffset(Tick& offset)
 268 {
 269     for (auto& free_node : readyList) {
 270         free_node.execTick -= offset;
 271     }
 272 }
 273
 274 void
 275 TraceCPU::ElasticDataGen::exit()
 276 {
 277     trace.reset();
 278 }
 279
 280 bool
 281 TraceCPU::ElasticDataGen::readNextWindow()
 282 {
 283     // Read and add next window
 284     DPRINTF(TraceCPUData, "Reading next window from file.\n");
 285
 286     if (traceComplete) {
 287         // We are at the end of the file, thus we have no more records.
 288         // Return false.
 289         return false;
 290     }
 291
 292     DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n",
 293             depGraph.size());
 294
 295     uint32_t num_read = 0;
 296     while (num_read != windowSize) {
 297
 298         // Create a new graph node
 299         GraphNode* new_node = new GraphNode;
 300
 301         // Read the next line to get the next record. If that fails then end of
 302         // trace has been reached and traceComplete needs to be set in addition
 303         // to returning false.
 304         if (!trace.read(new_node)) {
 305             DPRINTF(TraceCPUData, "\tTrace complete!\n");
 306             traceComplete = true;
 307             return false;
 308         }
 309
 310         // Annotate the ROB dependencies of the new node onto the parent nodes.
 311         addDepsOnParent(new_node, new_node->robDep, new_node->numRobDep);
 312         // Annotate the register dependencies of the new node onto the parent
 313         // nodes.
 314         addDepsOnParent(new_node, new_node->regDep, new_node->numRegDep);
 315
 316         num_read++;
 317         // Add to map
 318         depGraph[new_node->seqNum] = new_node;
 319         if (new_node->numRobDep == 0 && new_node->numRegDep == 0) {
 320             // Source dependencies are already complete, check if resources
 321             // are available and issue. The execution time is approximated
 322             // to current time plus the computational delay.
 323             checkAndIssue(new_node);
 324         }
 325     }
 326
 327     DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n",
 328             depGraph.size());
 329     return true;
 330 }
 331
 332 template<typename T>
 333 void
 334 TraceCPU::ElasticDataGen::addDepsOnParent(GraphNode *new_node,
 335                                           T& dep_array, uint8_t& num_dep)
 336 {
 337     for (auto& a_dep : dep_array) {
 338         // The convention is to set the dependencies starting with the first
 339         // index in the ROB and register dependency arrays. Thus, when we reach
 340         // a dependency equal to the initialisation value of zero, we know have
 341         // iterated over all dependencies and can break.
 342         if (a_dep == 0)
 343             break;
 344         // We look up the valid dependency, i.e. the parent of this node
 345         auto parent_itr = depGraph.find(a_dep);
 346         if (parent_itr != depGraph.end()) {
 347             // If the parent is found, it is yet to be executed. Append a
 348             // pointer to the new node to the dependents list of the parent
 349             // node.
 350             parent_itr->second->dependents.push_back(new_node);
 351             auto num_depts = parent_itr->second->dependents.size();
 352             elasticStats.maxDependents = std::max<double>(num_depts,
 353                                         elasticStats.maxDependents.value());
 354         } else {
 355             // The dependency is not found in the graph. So consider
 356             // the execution of the parent is complete, i.e. remove this
 357             // dependency.
 358             a_dep = 0;
 359             num_dep--;
 360         }
 361     }
 362 }
 363
 364 void
 365 TraceCPU::ElasticDataGen::execute()
 366 {
 367     DPRINTF(TraceCPUData, "Execute start occupancy:\n");
 368     DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
 369             "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
 370             depFreeQueue.size());
 371     hwResource.printOccupancy();
 372
 373     // Read next window to make sure that dependents of all dep-free nodes
 374     // are in the depGraph
 375     if (nextRead) {
 376         readNextWindow();
 377         nextRead = false;
 378     }
 379
 380     // First attempt to issue the pending dependency-free nodes held
 381     // in depFreeQueue. If resources have become available for a node,
 382     // then issue it, i.e. add the node to readyList.
 383     while (!depFreeQueue.empty()) {
 384         if (checkAndIssue(depFreeQueue.front(), false)) {
 385             DPRINTF(TraceCPUData,
 386                     "Removing from depFreeQueue: seq. num %lli.\n",
 387                     (depFreeQueue.front())->seqNum);
 388             depFreeQueue.pop();
 389         } else {
 390             break;
 391         }
 392     }
 393     // Proceed to execute from readyList
 394     auto graph_itr = depGraph.begin();
 395     auto free_itr = readyList.begin();
 396     // Iterate through readyList until the next free node has its execute
 397     // tick later than curTick or the end of readyList is reached
 398     while (free_itr->execTick <= curTick() && free_itr != readyList.end()) {
 399
 400         // Get pointer to the node to be executed
 401         graph_itr = depGraph.find(free_itr->seqNum);
 402         assert(graph_itr != depGraph.end());
 403         GraphNode* node_ptr = graph_itr->second;
 404
 405         // If there is a retryPkt send that else execute the load
 406         if (retryPkt) {
 407             // The retryPkt must be the request that was created by the
 408             // first node in the readyList.
 409             if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) {
 410                 panic("Retry packet's seqence number does not match "
 411                       "the first node in the readyList.\n");
 412             }
 413             if (port.sendTimingReq(retryPkt)) {
 414                 ++elasticStats.numRetrySucceeded;
 415                 retryPkt = nullptr;
 416             }
 417         } else if (node_ptr->isLoad() || node_ptr->isStore()) {
 418             // If there is no retryPkt, attempt to send a memory request in
 419             // case of a load or store node. If the send fails, executeMemReq()
 420             // returns a packet pointer, which we save in retryPkt. In case of
 421             // a comp node we don't do anything and simply continue as if the
 422             // execution of the comp node succedded.
 423             retryPkt = executeMemReq(node_ptr);
 424         }
 425         // If the retryPkt or a new load/store node failed, we exit from here
 426         // as a retry from cache will bring the control to execute(). The
 427         // first node in readyList then, will be the failed node.
 428         if (retryPkt) {
 429             break;
 430         }
 431
 432         // Proceed to remove dependencies for the successfully executed node.
 433         // If it is a load which is not strictly ordered and we sent a
 434         // request for it successfully, we do not yet mark any register
 435         // dependencies complete. But as per dependency modelling we need
 436         // to mark ROB dependencies of load and non load/store nodes which
 437         // are based on successful sending of the load as complete.
 438         if (node_ptr->isLoad() && !node_ptr->isStrictlyOrdered()) {
 439             // If execute succeeded mark its dependents as complete
 440             DPRINTF(TraceCPUData,
 441                     "Node seq. num %lli sent. Waking up dependents..\n",
 442                     node_ptr->seqNum);
 443
 444             auto child_itr = (node_ptr->dependents).begin();
 445             while (child_itr != (node_ptr->dependents).end()) {
 446                 // ROB dependency of a store on a load must not be removed
 447                 // after load is sent but after response is received
 448                 if (!(*child_itr)->isStore() &&
 449                     (*child_itr)->removeRobDep(node_ptr->seqNum)) {
 450
 451                     // Check if the child node has become dependency free
 452                     if ((*child_itr)->numRobDep == 0 &&
 453                         (*child_itr)->numRegDep == 0) {
 454
 455                         // Source dependencies are complete, check if
 456                         // resources are available and issue
 457                         checkAndIssue(*child_itr);
 458                     }
 459                     // Remove this child for the sent load and point to new
 460                     // location of the element following the erased element
 461                     child_itr = node_ptr->dependents.erase(child_itr);
 462                 } else {
 463                     // This child is not dependency-free, point to the next
 464                     // child
 465                     child_itr++;
 466                 }
 467             }
 468         } else {
 469             // If it is a strictly ordered load mark its dependents as complete
 470             // as we do not send a request for this case. If it is a store or a
 471             // comp node we also mark all its dependents complete.
 472             DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking"
 473                     " up dependents..\n", node_ptr->seqNum);
 474
 475             for (auto child : node_ptr->dependents) {
 476                 // If the child node is dependency free removeDepOnInst()
 477                 // returns true.
 478                 if (child->removeDepOnInst(node_ptr->seqNum)) {
 479                     // Source dependencies are complete, check if resources
 480                     // are available and issue
 481                     checkAndIssue(child);
 482                 }
 483             }
 484         }
 485
 486         // After executing the node, remove from readyList and delete node.
 487         readyList.erase(free_itr);
 488         // If it is a cacheable load which was sent, don't delete
 489         // just yet.  Delete it in completeMemAccess() after the
 490         // response is received. If it is an strictly ordered
 491         // load, it was not sent and all dependencies were simply
 492         // marked complete. Thus it is safe to delete it. For
 493         // stores and non load/store nodes all dependencies were
 494         // marked complete so it is safe to delete it.
 495         if (!node_ptr->isLoad() || node_ptr->isStrictlyOrdered()) {
 496             // Release all resources occupied by the completed node
 497             hwResource.release(node_ptr);
 498             // clear the dynamically allocated set of dependents
 499             (node_ptr->dependents).clear();
 500             // Update the stat for numOps simulated
 501             owner.updateNumOps(node_ptr->robNum);
 502             // delete node
 503             delete node_ptr;
 504             // remove from graph
 505             depGraph.erase(graph_itr);
 506         }
 507         // Point to first node to continue to next iteration of while loop
 508         free_itr = readyList.begin();
 509     } // end of while loop
 510
 511     // Print readyList, sizes of queues and resource status after updating
 512     if (DTRACE(TraceCPUData)) {
 513         printReadyList();
 514         DPRINTF(TraceCPUData, "Execute end occupancy:\n");
 515         DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
 516                 "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
 517                 depFreeQueue.size());
 518         hwResource.printOccupancy();
 519     }
 520
 521     if (retryPkt) {
 522         DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry"
 523                 "event from the cache for seq. num %lli.\n",
 524                 retryPkt->req->getReqInstSeqNum());
 525         return;
 526     }
 527     // If the size of the dependency graph is less than the dependency window
 528     // then read from the trace file to populate the graph next time we are in
 529     // execute.
 530     if (depGraph.size() < windowSize && !traceComplete)
 531         nextRead = true;
 532
 533     // If cache is not blocked, schedule an event for the first execTick in
 534     // readyList else retry from cache will schedule the event. If the ready
 535     // list is empty then check if the next pending node has resources
 536     // available to issue. If yes, then schedule an event for the next cycle.
 537     if (!readyList.empty()) {
 538         Tick next_event_tick = std::max(readyList.begin()->execTick,
 539                                         curTick());
 540         DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
 541                 next_event_tick);
 542         owner.schedDcacheNextEvent(next_event_tick);
 543     } else if (readyList.empty() && !depFreeQueue.empty() &&
 544                 hwResource.isAvailable(depFreeQueue.front())) {
 545         DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
 546                 owner.clockEdge(Cycles(1)));
 547         owner.schedDcacheNextEvent(owner.clockEdge(Cycles(1)));
 548     }
 549
 550     // If trace is completely read, readyList is empty and depGraph is empty,
 551     // set execComplete to true
 552     if (depGraph.empty() && readyList.empty() && traceComplete &&
 553         !hwResource.awaitingResponse()) {
 554         DPRINTF(TraceCPUData, "\tExecution Complete!\n");
 555         execComplete = true;
 556         elasticStats.dataLastTick = curTick();
 557     }
 558 }
 559
 560 PacketPtr
 561 TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr)
 562 {
 563     DPRINTF(TraceCPUData, "Executing memory request %lli (phys addr %d, "
 564             "virt addr %d, pc %#x, size %d, flags %d).\n",
 565             node_ptr->seqNum, node_ptr->physAddr, node_ptr->virtAddr,
 566             node_ptr->pc, node_ptr->size, node_ptr->flags);
 567
 568     // If the request is strictly ordered, do not send it. Just return nullptr
 569     // as if it was succesfully sent.
 570     if (node_ptr->isStrictlyOrdered()) {
 571         node_ptr->isLoad() ? ++elasticStats.numSOLoads :
 572              ++elasticStats.numSOStores;
 573         DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n",
 574                 node_ptr->seqNum);
 575         return nullptr;
 576     }
 577
 578     // Check if the request spans two cache lines as this condition triggers
 579     // an assert fail in the L1 cache. If it does then truncate the size to
 580     // access only until the end of that line and ignore the remainder. The
 581     // stat counting this is useful to keep a check on how frequently this
 582     // happens. If required the code could be revised to mimick splitting such
 583     // a request into two.
 584     unsigned blk_size = owner.cacheLineSize();
 585     Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1));
 586     if (!(blk_offset + node_ptr->size <= blk_size)) {
 587         node_ptr->size = blk_size - blk_offset;
 588         ++elasticStats.numSplitReqs;
 589     }
 590
 591     // Create a request and the packet containing request
 592     auto req = std::make_shared<Request>(
 593         node_ptr->physAddr, node_ptr->size, node_ptr->flags, requestorId);
 594     req->setReqInstSeqNum(node_ptr->seqNum);
 595
 596     // If this is not done it triggers assert in L1 cache for invalid contextId
 597     req->setContext(ContextID(0));
 598
 599     req->setPC(node_ptr->pc);
 600     // If virtual address is valid, set the virtual address field
 601     // of the request.
 602     if (node_ptr->virtAddr != 0) {
 603         req->setVirt(node_ptr->virtAddr, node_ptr->size,
 604                      node_ptr->flags, requestorId, node_ptr->pc);
 605         req->setPaddr(node_ptr->physAddr);
 606         req->setReqInstSeqNum(node_ptr->seqNum);
 607     }
 608
 609     PacketPtr pkt;
 610     uint8_t* pkt_data = new uint8_t[req->getSize()];
 611     if (node_ptr->isLoad()) {
 612         pkt = Packet::createRead(req);
 613     } else {
 614         pkt = Packet::createWrite(req);
 615         memset(pkt_data, 0xA, req->getSize());
 616     }
 617     pkt->dataDynamic(pkt_data);
 618
 619     // Call RequestPort method to send a timing request for this packet
 620     bool success = port.sendTimingReq(pkt);
 621     ++elasticStats.numSendAttempted;
 622
 623     if (!success) {
 624         // If it fails, return the packet to retry when a retry is signalled by
 625         // the cache
 626         ++elasticStats.numSendFailed;
 627         DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n");
 628         return pkt;
 629     } else {
 630         // It is succeeds, return nullptr
 631         ++elasticStats.numSendSucceeded;
 632         return nullptr;
 633     }
 634 }
 635
 636 bool
 637 TraceCPU::ElasticDataGen::checkAndIssue(const GraphNode* node_ptr, bool first)
 638 {
 639     // Assert the node is dependency-free
 640     assert(node_ptr->numRobDep == 0 && node_ptr->numRegDep == 0);
 641
 642     // If this is the first attempt, print a debug message to indicate this.
 643     if (first) {
 644         DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now"
 645                 " dependency free.\n", node_ptr->seqNum, node_ptr->typeToStr(),
 646                 node_ptr->robNum);
 647     }
 648
 649     // Check if resources are available to issue the specific node
 650     if (hwResource.isAvailable(node_ptr)) {
 651         // If resources are free only then add to readyList
 652         DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. "
 653                 "Adding to readyList, occupying resources.\n",
 654                 node_ptr->seqNum);
 655         // Compute the execute tick by adding the compute delay for the node
 656         // and add the ready node to the ready list
 657         addToSortedReadyList(node_ptr->seqNum,
 658                              owner.clockEdge() + node_ptr->compDelay);
 659         // Account for the resources taken up by this issued node.
 660         hwResource.occupy(node_ptr);
 661         return true;
 662     } else {
 663         if (first) {
 664             // Although dependencies are complete, resources are not available.
 665             DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num "
 666                     "%lli. Adding to depFreeQueue.\n", node_ptr->seqNum);
 667             depFreeQueue.push(node_ptr);
 668         } else {
 669             DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num "
 670                     "%lli. Still pending issue.\n", node_ptr->seqNum);
 671         }
 672         return false;
 673     }
 674 }
 675
 676 void
 677 TraceCPU::ElasticDataGen::completeMemAccess(PacketPtr pkt)
 678 {
 679     // Release the resources for this completed node.
 680     if (pkt->isWrite()) {
 681         // Consider store complete.
 682         hwResource.releaseStoreBuffer();
 683         // If it is a store response then do nothing since we do not model
 684         // dependencies on store completion in the trace. But if we were
 685         // blocking execution due to store buffer fullness, we need to schedule
 686         // an event and attempt to progress.
 687     } else {
 688         // If it is a load response then release the dependents waiting on it.
 689         // Get pointer to the completed load
 690         auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum());
 691         assert(graph_itr != depGraph.end());
 692         GraphNode* node_ptr = graph_itr->second;
 693
 694         // Release resources occupied by the load
 695         hwResource.release(node_ptr);
 696
 697         DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up"
 698                 " dependents..\n", node_ptr->seqNum);
 699
 700         for (auto child : node_ptr->dependents) {
 701             if (child->removeDepOnInst(node_ptr->seqNum)) {
 702                 checkAndIssue(child);
 703             }
 704         }
 705
 706         // clear the dynamically allocated set of dependents
 707         (node_ptr->dependents).clear();
 708         // Update the stat for numOps completed
 709         owner.updateNumOps(node_ptr->robNum);
 710         // delete node
 711         delete node_ptr;
 712         // remove from graph
 713         depGraph.erase(graph_itr);
 714     }
 715
 716     if (DTRACE(TraceCPUData)) {
 717         printReadyList();
 718     }
 719
 720     // If the size of the dependency graph is less than the dependency window
 721     // then read from the trace file to populate the graph next time we are in
 722     // execute.
 723     if (depGraph.size() < windowSize && !traceComplete)
 724         nextRead = true;
 725
 726     // If not waiting for retry, attempt to schedule next event
 727     if (!retryPkt) {
 728         // We might have new dep-free nodes in the list which will have execute
 729         // tick greater than or equal to curTick. But a new dep-free node might
 730         // have its execute tick earlier. Therefore, attempt to reschedule. It
 731         // could happen that the readyList is empty and we got here via a
 732         // last remaining response. So, either the trace is complete or there
 733         // are pending nodes in the depFreeQueue. The checking is done in the
 734         // execute() control flow, so schedule an event to go via that flow.
 735         Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) :
 736             std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1)));
 737         DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
 738                 next_event_tick);
 739         owner.schedDcacheNextEvent(next_event_tick);
 740     }
 741 }
 742
 743 void
 744 TraceCPU::ElasticDataGen::addToSortedReadyList(NodeSeqNum seq_num,
 745                                                Tick exec_tick)
 746 {
 747     ReadyNode ready_node;
 748     ready_node.seqNum = seq_num;
 749     ready_node.execTick = exec_tick;
 750
 751     // Iterator to readyList
 752     auto itr = readyList.begin();
 753
 754     // If the readyList is empty, simply insert the new node at the beginning
 755     // and return
 756     if (itr == readyList.end()) {
 757         readyList.insert(itr, ready_node);
 758         elasticStats.maxReadyListSize =
 759             std::max<double>(readyList.size(),
 760                              elasticStats.maxReadyListSize.value());
 761         return;
 762     }
 763
 764     // If the new node has its execution tick equal to the first node in the
 765     // list then go to the next node. If the first node in the list failed
 766     // to execute, its position as the first is thus maintained.
 767     if (retryPkt) {
 768         if (retryPkt->req->getReqInstSeqNum() == itr->seqNum)
 769             itr++;
 770     }
 771
 772     // Increment the iterator and compare the node pointed to by it to the new
 773     // node till the position to insert the new node is found.
 774     bool found = false;
 775     while (!found && itr != readyList.end()) {
 776         // If the execution tick of the new node is less than the node then
 777         // this is the position to insert
 778         if (exec_tick < itr->execTick) {
 779             found = true;
 780         // If the execution tick of the new node is equal to the node then
 781         // sort in ascending order of sequence numbers
 782         } else if (exec_tick == itr->execTick) {
 783             // If the sequence number of the new node is less than the node
 784             // then this is the position to insert
 785             if (seq_num < itr->seqNum) {
 786                 found = true;
 787             // Else go to next node
 788             } else {
 789                 itr++;
 790             }
 791         } else {
 792             // If the execution tick of the new node is greater than the node
 793             // then go to the next node.
 794             itr++;
 795         }
 796     }
 797     readyList.insert(itr, ready_node);
 798     // Update the stat for max size reached of the readyList
 799     elasticStats.maxReadyListSize = std::max<double>(readyList.size(),
 800                                         elasticStats.maxReadyListSize.value());
 801 }
 802
 803 void
 804 TraceCPU::ElasticDataGen::printReadyList()
 805 {
 806     auto itr = readyList.begin();
 807     if (itr == readyList.end()) {
 808         DPRINTF(TraceCPUData, "readyList is empty.\n");
 809         return;
 810     }
 811     DPRINTF(TraceCPUData, "Printing readyList:\n");
 812     while (itr != readyList.end()) {
 813         auto graph_itr = depGraph.find(itr->seqNum);
 814         M5_VAR_USED GraphNode* node_ptr = graph_itr->second;
 815         DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum,
 816             node_ptr->typeToStr(), itr->execTick);
 817         itr++;
 818     }
 819 }
 820
 821 TraceCPU::ElasticDataGen::HardwareResource::HardwareResource(
 822         uint16_t max_rob, uint16_t max_stores, uint16_t max_loads) :
 823     sizeROB(max_rob),
 824     sizeStoreBuffer(max_stores),
 825     sizeLoadBuffer(max_loads),
 826     oldestInFlightRobNum(UINT64_MAX),
 827     numInFlightLoads(0),
 828     numInFlightStores(0)
 829 {}
 830
 831 void
 832 TraceCPU::ElasticDataGen::HardwareResource::occupy(const GraphNode* new_node)
 833 {
 834     // Occupy ROB entry for the issued node
 835     // Merely maintain the oldest node, i.e. numerically least robNum by saving
 836     // it in the variable oldestInFLightRobNum.
 837     inFlightNodes[new_node->seqNum] = new_node->robNum;
 838     oldestInFlightRobNum = inFlightNodes.begin()->second;
 839
 840     // Occupy Load/Store Buffer entry for the issued node if applicable
 841     if (new_node->isLoad()) {
 842         ++numInFlightLoads;
 843     } else if (new_node->isStore()) {
 844         ++numInFlightStores;
 845     } // else if it is a non load/store node, no buffer entry is occupied
 846
 847     printOccupancy();
 848 }
 849
 850 void
 851 TraceCPU::ElasticDataGen::HardwareResource::release(const GraphNode* done_node)
 852 {
 853     assert(!inFlightNodes.empty());
 854     DPRINTFR(TraceCPUData,
 855             "\tClearing done seq. num %d from inFlightNodes..\n",
 856             done_node->seqNum);
 857
 858     assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end());
 859     inFlightNodes.erase(done_node->seqNum);
 860
 861     if (inFlightNodes.empty()) {
 862         // If we delete the only in-flight node and then the
 863         // oldestInFlightRobNum is set to it's initialized (max) value.
 864         oldestInFlightRobNum = UINT64_MAX;
 865     } else {
 866         // Set the oldest in-flight node rob number equal to the first node in
 867         // the inFlightNodes since that will have the numerically least value.
 868         oldestInFlightRobNum = inFlightNodes.begin()->second;
 869     }
 870
 871     DPRINTFR(TraceCPUData,
 872             "\tCleared. inFlightNodes.size() = %d, "
 873             "oldestInFlightRobNum = %d\n", inFlightNodes.size(),
 874             oldestInFlightRobNum);
 875
 876     // A store is considered complete when a request is sent, thus ROB entry is
 877     // freed. But it occupies an entry in the Store Buffer until its response
 878     // is received. A load is considered complete when a response is received,
 879     // thus both ROB and Load Buffer entries can be released.
 880     if (done_node->isLoad()) {
 881         assert(numInFlightLoads != 0);
 882         --numInFlightLoads;
 883     }
 884     // For normal writes, we send the requests out and clear a store buffer
 885     // entry on response. For writes which are strictly ordered, for e.g.
 886     // writes to device registers, we do that within release() which is called
 887     // when node is executed and taken off from readyList.
 888     if (done_node->isStore() && done_node->isStrictlyOrdered()) {
 889         releaseStoreBuffer();
 890     }
 891 }
 892
 893 void
 894 TraceCPU::ElasticDataGen::HardwareResource::releaseStoreBuffer()
 895 {
 896     assert(numInFlightStores != 0);
 897     --numInFlightStores;
 898 }
 899
 900 bool
 901 TraceCPU::ElasticDataGen::HardwareResource::isAvailable(
 902         const GraphNode* new_node) const
 903 {
 904     uint16_t num_in_flight_nodes;
 905     if (inFlightNodes.empty()) {
 906         num_in_flight_nodes = 0;
 907         DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
 908                 " #in-flight nodes = 0", new_node->seqNum);
 909     } else if (new_node->robNum > oldestInFlightRobNum) {
 910         // This is the intuitive case where new dep-free node is younger
 911         // instruction than the oldest instruction in-flight. Thus we make sure
 912         // in_flight_nodes does not overflow.
 913         num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum;
 914         DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
 915                 " #in-flight nodes = %d - %d =  %d", new_node->seqNum,
 916                 new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes);
 917     } else {
 918         // This is the case where an instruction older than the oldest in-
 919         // flight instruction becomes dep-free. Thus we must have already
 920         // accounted for the entry in ROB for this new dep-free node.
 921         // Immediately after this check returns true, oldestInFlightRobNum will
 922         // be updated in occupy(). We simply let this node issue now.
 923         num_in_flight_nodes = 0;
 924         DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
 925                 " new oldestInFlightRobNum = %d, #in-flight nodes ignored",
 926                 new_node->seqNum, new_node->robNum);
 927     }
 928     DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ  = %d/%d.\n",
 929             numInFlightLoads, sizeLoadBuffer,
 930             numInFlightStores, sizeStoreBuffer);
 931     // Check if resources are available to issue the specific node
 932     if (num_in_flight_nodes >= sizeROB) {
 933         return false;
 934     }
 935     if (new_node->isLoad() && numInFlightLoads >= sizeLoadBuffer) {
 936         return false;
 937     }
 938     if (new_node->isStore() && numInFlightStores >= sizeStoreBuffer) {
 939         return false;
 940     }
 941     return true;
 942 }
 943
 944 bool
 945 TraceCPU::ElasticDataGen::HardwareResource::awaitingResponse() const
 946 {
 947     // Return true if there is at least one read or write request in flight
 948     return (numInFlightStores != 0 || numInFlightLoads != 0);
 949 }
 950
 951 void
 952 TraceCPU::ElasticDataGen::HardwareResource::printOccupancy()
 953 {
 954     DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, "
 955             "LQ = %d/%d, SQ  = %d/%d.\n",
 956             oldestInFlightRobNum,
 957             numInFlightLoads, sizeLoadBuffer,
 958             numInFlightStores, sizeStoreBuffer);
 959 }
 960
 961 TraceCPU::FixedRetryGen::FixedRetryGenStatGroup::FixedRetryGenStatGroup(
 962         Stats::Group *parent, const std::string& _name) :
 963     Stats::Group(parent, _name.c_str()),
 964     ADD_STAT(numSendAttempted, "Number of first attempts to send a request"),
 965     ADD_STAT(numSendSucceeded, "Number of successful first attempts"),
 966     ADD_STAT(numSendFailed, "Number of failed first attempts"),
 967     ADD_STAT(numRetrySucceeded, "Number of successful retries"),
 968     ADD_STAT(instLastTick, "Last tick simulated from the fixed inst trace")
 969 {
 970
 971 }
 972
 973 Tick
 974 TraceCPU::FixedRetryGen::init()
 975 {
 976     DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator"
 977             " IcacheGen: fixed issue with retry.\n");
 978
 979     if (nextExecute()) {
 980         DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick);
 981         return currElement.tick;
 982     } else {
 983         panic("Read of first message in the trace failed.\n");
 984         return MaxTick;
 985     }
 986 }
 987
 988 bool
 989 TraceCPU::FixedRetryGen::tryNext()
 990 {
 991     // If there is a retry packet, try to send it
 992     if (retryPkt) {
 993         DPRINTF(TraceCPUInst, "Trying to send retry packet.\n");
 994
 995         if (!port.sendTimingReq(retryPkt)) {
 996             // Still blocked! This should never occur.
 997             DPRINTF(TraceCPUInst, "Retry packet sending failed.\n");
 998             return false;
 999         }
1000         ++fixedStats.numRetrySucceeded;
1001     } else {
1002         DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n");
1003
1004         // try sending current element
1005         assert(currElement.isValid());
1006
1007         ++fixedStats.numSendAttempted;
1008
1009         if (!send(currElement.addr, currElement.blocksize,
1010                     currElement.cmd, currElement.flags, currElement.pc)) {
1011             DPRINTF(TraceCPUInst, "currElement sending failed.\n");
1012             ++fixedStats.numSendFailed;
1013             // return false to indicate not to schedule next event
1014             return false;
1015         } else {
1016             ++fixedStats.numSendSucceeded;
1017         }
1018     }
1019     // If packet was sent successfully, either retryPkt or currElement, return
1020     // true to indicate to schedule event at current Tick plus delta. If packet
1021     // was sent successfully and there is no next packet to send, return false.
1022     DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next "
1023         "element.\n");
1024     retryPkt = nullptr;
1025     // Read next element into currElement, currElement gets cleared so save the
1026     // tick to calculate delta
1027     Tick last_tick = currElement.tick;
1028     if (nextExecute()) {
1029         assert(currElement.tick >= last_tick);
1030         delta = currElement.tick - last_tick;
1031     }
1032     return !traceComplete;
1033 }
1034
1035 void
1036 TraceCPU::FixedRetryGen::exit()
1037 {
1038     trace.reset();
1039 }
1040
1041 bool
1042 TraceCPU::FixedRetryGen::nextExecute()
1043 {
1044     if (traceComplete)
1045         // We are at the end of the file, thus we have no more messages.
1046         // Return false.
1047         return false;
1048
1049
1050     //Reset the currElement to the default values
1051     currElement.clear();
1052
1053     // Read the next line to get the next message. If that fails then end of
1054     // trace has been reached and traceComplete needs to be set in addition
1055     // to returning false. If successful then next message is in currElement.
1056     if (!trace.read(&currElement)) {
1057         traceComplete = true;
1058         fixedStats.instLastTick = curTick();
1059         return false;
1060     }
1061
1062     DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n",
1063             currElement.cmd.isRead() ? 'r' : 'w',
1064             currElement.addr,
1065             currElement.pc,
1066             currElement.blocksize,
1067             currElement.tick);
1068
1069     return true;
1070 }
1071
1072 bool
1073 TraceCPU::FixedRetryGen::send(Addr addr, unsigned size, const MemCmd& cmd,
1074         Request::FlagsType flags, Addr pc)
1075 {
1076
1077     // Create new request
1078     auto req = std::make_shared<Request>(addr, size, flags, requestorId);
1079     req->setPC(pc);
1080
1081     // If this is not done it triggers assert in L1 cache for invalid contextId
1082     req->setContext(ContextID(0));
1083
1084     // Embed it in a packet
1085     PacketPtr pkt = new Packet(req, cmd);
1086
1087     uint8_t* pkt_data = new uint8_t[req->getSize()];
1088     pkt->dataDynamic(pkt_data);
1089
1090     if (cmd.isWrite()) {
1091         memset(pkt_data, 0xA, req->getSize());
1092     }
1093
1094     // Call RequestPort method to send a timing request for this packet
1095     bool success = port.sendTimingReq(pkt);
1096     if (!success) {
1097         // If it fails, save the packet to retry when a retry is signalled by
1098         // the cache
1099         retryPkt = pkt;
1100     }
1101     return success;
1102 }
1103
1104 void
1105 TraceCPU::icacheRetryRecvd()
1106 {
1107     // Schedule an event to go through the control flow in the same tick as
1108     // retry is received
1109     DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen"
1110             " event @%lli.\n", curTick());
1111     schedule(icacheNextEvent, curTick());
1112 }
1113
1114 void
1115 TraceCPU::dcacheRetryRecvd()
1116 {
1117     // Schedule an event to go through the execute flow in the same tick as
1118     // retry is received
1119     DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen"
1120             " event @%lli.\n", curTick());
1121     schedule(dcacheNextEvent, curTick());
1122 }
1123
1124 void
1125 TraceCPU::schedDcacheNextEvent(Tick when)
1126 {
1127     if (!dcacheNextEvent.scheduled()) {
1128         DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n",
1129                 when);
1130         schedule(dcacheNextEvent, when);
1131         ++traceStats.numSchedDcacheEvent;
1132     } else if (when < dcacheNextEvent.when()) {
1133         DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli"
1134                 " to %lli.\n", dcacheNextEvent.when(), when);
1135         reschedule(dcacheNextEvent, when);
1136     }
1137
1138 }
1139
1140 bool
1141 TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt)
1142 {
1143     // All responses on the instruction fetch side are ignored. Simply delete
1144     // the packet to free allocated memory
1145     delete pkt;
1146
1147     return true;
1148 }
1149
1150 void
1151 TraceCPU::IcachePort::recvReqRetry()
1152 {
1153     owner->icacheRetryRecvd();
1154 }
1155
1156 void
1157 TraceCPU::dcacheRecvTimingResp(PacketPtr pkt)
1158 {
1159     DPRINTF(TraceCPUData, "Received timing response from Dcache.\n");
1160     dcacheGen.completeMemAccess(pkt);
1161 }
1162
1163 bool
1164 TraceCPU::DcachePort::recvTimingResp(PacketPtr pkt)
1165 {
1166     // Handle the responses for data memory requests which is done inside the
1167     // elastic data generator
1168     owner->dcacheRecvTimingResp(pkt);
1169     // After processing the response delete the packet to free
1170     // memory
1171     delete pkt;
1172
1173     return true;
1174 }
1175
1176 void
1177 TraceCPU::DcachePort::recvReqRetry()
1178 {
1179     owner->dcacheRetryRecvd();
1180 }
1181
1182 TraceCPU::ElasticDataGen::InputStream::InputStream(
1183         const std::string& filename, const double time_multiplier) :
1184     trace(filename),
1185     timeMultiplier(time_multiplier),
1186     microOpCount(0)
1187 {
1188     // Create a protobuf message for the header and read it from the stream
1189     ProtoMessage::InstDepRecordHeader header_msg;
1190     if (!trace.read(header_msg)) {
1191         panic("Failed to read packet header from %s\n", filename);
1192
1193         if (header_msg.tick_freq() != SimClock::Frequency) {
1194             panic("Trace %s was recorded with a different tick frequency %d\n",
1195                   header_msg.tick_freq());
1196         }
1197     } else {
1198         // Assign window size equal to the field in the trace that was recorded
1199         // when the data dependency trace was captured in the o3cpu model
1200         windowSize = header_msg.window_size();
1201     }
1202 }
1203
1204 void
1205 TraceCPU::ElasticDataGen::InputStream::reset()
1206 {
1207     trace.reset();
1208 }
1209
1210 bool
1211 TraceCPU::ElasticDataGen::InputStream::read(GraphNode* element)
1212 {
1213     ProtoMessage::InstDepRecord pkt_msg;
1214     if (trace.read(pkt_msg)) {
1215         // Required fields
1216         element->seqNum = pkt_msg.seq_num();
1217         element->type = pkt_msg.type();
1218         // Scale the compute delay to effectively scale the Trace CPU frequency
1219         element->compDelay = pkt_msg.comp_delay() * timeMultiplier;
1220
1221         // Repeated field robDepList
1222         element->clearRobDep();
1223         assert((pkt_msg.rob_dep()).size() <= element->maxRobDep);
1224         for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) {
1225             element->robDep[element->numRobDep] = pkt_msg.rob_dep(i);
1226             element->numRobDep += 1;
1227         }
1228
1229         // Repeated field
1230         element->clearRegDep();
1231         assert((pkt_msg.reg_dep()).size() <= TheISA::MaxInstSrcRegs);
1232         for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) {
1233             // There is a possibility that an instruction has both, a register
1234             // and order dependency on an instruction. In such a case, the
1235             // register dependency is omitted
1236             bool duplicate = false;
1237             for (int j = 0; j < element->numRobDep; j++) {
1238                 duplicate |= (pkt_msg.reg_dep(i) == element->robDep[j]);
1239             }
1240             if (!duplicate) {
1241                 element->regDep[element->numRegDep] = pkt_msg.reg_dep(i);
1242                 element->numRegDep += 1;
1243             }
1244         }
1245
1246         // Optional fields
1247         if (pkt_msg.has_p_addr())
1248             element->physAddr = pkt_msg.p_addr();
1249         else
1250             element->physAddr = 0;
1251
1252         if (pkt_msg.has_v_addr())
1253             element->virtAddr = pkt_msg.v_addr();
1254         else
1255             element->virtAddr = 0;
1256
1257         if (pkt_msg.has_size())
1258             element->size = pkt_msg.size();
1259         else
1260             element->size = 0;
1261
1262         if (pkt_msg.has_flags())
1263             element->flags = pkt_msg.flags();
1264         else
1265             element->flags = 0;
1266
1267         if (pkt_msg.has_pc())
1268             element->pc = pkt_msg.pc();
1269         else
1270             element->pc = 0;
1271
1272         // ROB occupancy number
1273         ++microOpCount;
1274         if (pkt_msg.has_weight()) {
1275             microOpCount += pkt_msg.weight();
1276         }
1277         element->robNum = microOpCount;
1278         return true;
1279     }
1280
1281     // We have reached the end of the file
1282     return false;
1283 }
1284
1285 bool
1286 TraceCPU::ElasticDataGen::GraphNode::removeRegDep(NodeSeqNum reg_dep)
1287 {
1288     for (auto& own_reg_dep : regDep) {
1289         if (own_reg_dep == reg_dep) {
1290             // If register dependency is found, make it zero and return true
1291             own_reg_dep = 0;
1292             assert(numRegDep > 0);
1293             --numRegDep;
1294             DPRINTFR(TraceCPUData, "\tFor %lli: Marking register dependency "
1295                     "%lli done.\n", seqNum, reg_dep);
1296             return true;
1297         }
1298     }
1299
1300     // Return false if the dependency is not found
1301     return false;
1302 }
1303
1304 bool
1305 TraceCPU::ElasticDataGen::GraphNode::removeRobDep(NodeSeqNum rob_dep)
1306 {
1307     for (auto& own_rob_dep : robDep) {
1308         if (own_rob_dep == rob_dep) {
1309             // If the rob dependency is found, make it zero and return true
1310             own_rob_dep = 0;
1311             assert(numRobDep > 0);
1312             --numRobDep;
1313             DPRINTFR(TraceCPUData,
1314                     "\tFor %lli: Marking ROB dependency %lli done.\n",
1315                     seqNum, rob_dep);
1316             return true;
1317         }
1318     }
1319     return false;
1320 }
1321
1322 void
1323 TraceCPU::ElasticDataGen::GraphNode::clearRegDep()
1324 {
1325     for (auto& own_reg_dep : regDep) {
1326         own_reg_dep = 0;
1327     }
1328     numRegDep = 0;
1329 }
1330
1331 void
1332 TraceCPU::ElasticDataGen::GraphNode::clearRobDep()
1333 {
1334     for (auto& own_rob_dep : robDep) {
1335         own_rob_dep = 0;
1336     }
1337     numRobDep = 0;
1338 }
1339
1340 bool
1341 TraceCPU::ElasticDataGen::GraphNode::removeDepOnInst(NodeSeqNum done_seq_num)
1342 {
1343     // If it is an rob dependency then remove it
1344     if (!removeRobDep(done_seq_num)) {
1345         // If it is not an rob dependency then it must be a register dependency
1346         // If the register dependency is not found, it violates an assumption
1347         // and must be caught by assert.
1348         M5_VAR_USED bool regdep_found = removeRegDep(done_seq_num);
1349         assert(regdep_found);
1350     }
1351     // Return true if the node is dependency free
1352     return (numRobDep == 0 && numRegDep == 0);
1353 }
1354
1355 void
1356 TraceCPU::ElasticDataGen::GraphNode::writeElementAsTrace() const
1357 {
1358     DPRINTFR(TraceCPUData, "%lli", seqNum);
1359     DPRINTFR(TraceCPUData, ",%s", typeToStr());
1360     if (isLoad() || isStore()) {
1361         DPRINTFR(TraceCPUData, ",%i", physAddr);
1362         DPRINTFR(TraceCPUData, ",%i", size);
1363         DPRINTFR(TraceCPUData, ",%i", flags);
1364     }
1365     DPRINTFR(TraceCPUData, ",%lli", compDelay);
1366     int i = 0;
1367     DPRINTFR(TraceCPUData, "robDep:");
1368     while (robDep[i] != 0) {
1369         DPRINTFR(TraceCPUData, ",%lli", robDep[i]);
1370         i++;
1371     }
1372     i = 0;
1373     DPRINTFR(TraceCPUData, "regDep:");
1374     while (regDep[i] != 0) {
1375         DPRINTFR(TraceCPUData, ",%lli", regDep[i]);
1376         i++;
1377     }
1378     auto child_itr = dependents.begin();
1379     DPRINTFR(TraceCPUData, "dependents:");
1380     while (child_itr != dependents.end()) {
1381         DPRINTFR(TraceCPUData, ":%lli", (*child_itr)->seqNum);
1382         child_itr++;
1383     }
1384
1385     DPRINTFR(TraceCPUData, "\n");
1386 }
1387
1388 std::string
1389 TraceCPU::ElasticDataGen::GraphNode::typeToStr() const
1390 {
1391     return Record::RecordType_Name(type);
1392 }
1393
1394 TraceCPU::FixedRetryGen::InputStream::InputStream(const std::string& filename)
1395     : trace(filename)
1396 {
1397     // Create a protobuf message for the header and read it from the stream
1398     ProtoMessage::PacketHeader header_msg;
1399     if (!trace.read(header_msg)) {
1400         panic("Failed to read packet header from %s\n", filename);
1401
1402         if (header_msg.tick_freq() != SimClock::Frequency) {
1403             panic("Trace %s was recorded with a different tick frequency %d\n",
1404                   header_msg.tick_freq());
1405         }
1406     }
1407 }
1408
1409 void
1410 TraceCPU::FixedRetryGen::InputStream::reset()
1411 {
1412     trace.reset();
1413 }
1414
1415 bool
1416 TraceCPU::FixedRetryGen::InputStream::read(TraceElement* element)
1417 {
1418     ProtoMessage::Packet pkt_msg;
1419     if (trace.read(pkt_msg)) {
1420         element->cmd = pkt_msg.cmd();
1421         element->addr = pkt_msg.addr();
1422         element->blocksize = pkt_msg.size();
1423         element->tick = pkt_msg.tick();
1424         element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0;
1425         element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0;
1426         return true;
1427     }
1428
1429     // We have reached the end of the file
1430     return false;
1431 }