src/cpu/trace/trace_cpu.cc

   1 /*
   2  * Copyright (c) 2013 - 2015 ARM Limited
   3  * All rights reserved
   4  *
   5  * The license below extends only to copyright in the software and shall
   6  * not be construed as granting a license to any other intellectual
   7  * property including but not limited to intellectual property relating
   8  * to a hardware implementation of the functionality of the software
   9  * licensed hereunder.  You may use the software subject to the license
  10  * terms below provided that you ensure that this notice is replicated
  11  * unmodified and in its entirety in all distributions of the software,
  12  * modified or unmodified, in source code or in binary form.
  13  *
  14  * Redistribution and use in source and binary forms, with or without
  15  * modification, are permitted provided that the following conditions are
  16  * met: redistributions of source code must retain the above copyright
  17  * notice, this list of conditions and the following disclaimer;
  18  * redistributions in binary form must reproduce the above copyright
  19  * notice, this list of conditions and the following disclaimer in the
  20  * documentation and/or other materials provided with the distribution;
  21  * neither the name of the copyright holders nor the names of its
  22  * contributors may be used to endorse or promote products derived from
  23  * this software without specific prior written permission.
  24  *
  25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  26  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  27  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  28  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  29  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  30  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  31  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  32  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  33  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  34  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  35  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  36  *
  37  * Authors: Radhika Jagtap
  38  *          Andreas Hansson
  39  *          Thomas Grass
  40  */
  41
  42 #include "cpu/trace/trace_cpu.hh"
  43
  44 #include "sim/sim_exit.hh"
  45
  46 // Declare and initialize the static counter for number of trace CPUs.
  47 int TraceCPU::numTraceCPUs = 0;
  48
  49 TraceCPU::TraceCPU(TraceCPUParams *params)
  50     :   BaseCPU(params),
  51         icachePort(this),
  52         dcachePort(this),
  53         instMasterID(params->system->getMasterId(name() + ".inst")),
  54         dataMasterID(params->system->getMasterId(name() + ".data")),
  55         instTraceFile(params->instTraceFile),
  56         dataTraceFile(params->dataTraceFile),
  57         icacheGen(*this, ".iside", icachePort, instMasterID, instTraceFile),
  58         dcacheGen(*this, ".dside", dcachePort, dataMasterID, dataTraceFile,
  59                     params->sizeROB, params->sizeStoreBuffer,
  60                     params->sizeLoadBuffer),
  61         icacheNextEvent(this),
  62         dcacheNextEvent(this),
  63         oneTraceComplete(false),
  64         firstFetchTick(0),
  65         execCompleteEvent(nullptr)
  66 {
  67     // Increment static counter for number of Trace CPUs.
  68     ++TraceCPU::numTraceCPUs;
  69
  70     // Check that the python parameters for sizes of ROB, store buffer and load
  71     // buffer do not overflow the corresponding C++ variables.
  72     fatal_if(params->sizeROB > UINT16_MAX, "ROB size set to %d exceeds the "
  73                 "max. value of %d.\n", params->sizeROB, UINT16_MAX);
  74     fatal_if(params->sizeStoreBuffer > UINT16_MAX, "ROB size set to %d "
  75                 "exceeds the max. value of %d.\n", params->sizeROB,
  76                 UINT16_MAX);
  77     fatal_if(params->sizeLoadBuffer > UINT16_MAX, "Load buffer size set to"
  78                 " %d exceeds the max. value of %d.\n",
  79                 params->sizeLoadBuffer, UINT16_MAX);
  80 }
  81
  82 TraceCPU::~TraceCPU()
  83 {
  84
  85 }
  86
  87 TraceCPU*
  88 TraceCPUParams::create()
  89 {
  90     return new TraceCPU(this);
  91 }
  92
  93 void
  94 TraceCPU::takeOverFrom(BaseCPU *oldCPU)
  95 {
  96     // Unbind the ports of the old CPU and bind the ports of the TraceCPU.
  97     assert(!getInstPort().isConnected());
  98     assert(oldCPU->getInstPort().isConnected());
  99     BaseSlavePort &inst_peer_port = oldCPU->getInstPort().getSlavePort();
 100     oldCPU->getInstPort().unbind();
 101     getInstPort().bind(inst_peer_port);
 102
 103     assert(!getDataPort().isConnected());
 104     assert(oldCPU->getDataPort().isConnected());
 105     BaseSlavePort &data_peer_port = oldCPU->getDataPort().getSlavePort();
 106     oldCPU->getDataPort().unbind();
 107     getDataPort().bind(data_peer_port);
 108 }
 109
 110 void
 111 TraceCPU::init()
 112 {
 113     DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\"."
 114             "\n", instTraceFile);
 115     DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n",
 116             dataTraceFile);
 117
 118     BaseCPU::init();
 119
 120     // Get the send tick of the first instruction read request and schedule
 121     // icacheNextEvent at that tick.
 122     Tick first_icache_tick = icacheGen.init();
 123     schedule(icacheNextEvent, first_icache_tick);
 124
 125     // Get the send tick of the first data read/write request and schedule
 126     // dcacheNextEvent at that tick.
 127     Tick first_dcache_tick = dcacheGen.init();
 128     schedule(dcacheNextEvent, first_dcache_tick);
 129
 130     // The static counter for number of Trace CPUs is correctly set at this
 131     // point so create an event and pass it.
 132     execCompleteEvent = new CountedExitEvent("end of all traces reached.",
 133                                                 numTraceCPUs);
 134     // Save the first fetch request tick to dump it as tickOffset
 135     firstFetchTick = first_icache_tick;
 136 }
 137
 138 void
 139 TraceCPU::schedIcacheNext()
 140 {
 141     DPRINTF(TraceCPUInst, "IcacheGen event.\n");
 142
 143     // Try to send the current packet or a retry packet if there is one
 144     bool sched_next = icacheGen.tryNext();
 145     // If packet sent successfully, schedule next event
 146     if (sched_next) {
 147         DPRINTF(TraceCPUInst, "Scheduling next icacheGen event "
 148                 "at %d.\n", curTick() + icacheGen.tickDelta());
 149         schedule(icacheNextEvent, curTick() + icacheGen.tickDelta());
 150         ++numSchedIcacheEvent;
 151     } else {
 152         // check if traceComplete. If not, do nothing because sending failed
 153         // and next event will be scheduled via RecvRetry()
 154         if (icacheGen.isTraceComplete()) {
 155             // If this is the first trace to complete, set the variable. If it
 156             // is already set then both traces are complete to exit sim.
 157             checkAndSchedExitEvent();
 158         }
 159     }
 160     return;
 161 }
 162
 163 void
 164 TraceCPU::schedDcacheNext()
 165 {
 166     DPRINTF(TraceCPUData, "DcacheGen event.\n");
 167
 168     dcacheGen.execute();
 169     if (dcacheGen.isExecComplete()) {
 170         checkAndSchedExitEvent();
 171     }
 172 }
 173
 174 void
 175 TraceCPU::checkAndSchedExitEvent()
 176 {
 177     if (!oneTraceComplete) {
 178         oneTraceComplete = true;
 179     } else {
 180         // Schedule event to indicate execution is complete as both
 181         // instruction and data access traces have been played back.
 182         inform("%s: Execution complete.\n", name());
 183
 184         // Record stats which are computed at the end of simulation
 185         tickOffset = firstFetchTick;
 186         numCycles = (clockEdge() - firstFetchTick) / clockPeriod();
 187         numOps = dcacheGen.getMicroOpCount();
 188         schedule(*execCompleteEvent, curTick());
 189     }
 190 }
 191
 192 void
 193 TraceCPU::regStats()
 194 {
 195
 196     BaseCPU::regStats();
 197
 198     numSchedDcacheEvent
 199     .name(name() + ".numSchedDcacheEvent")
 200     .desc("Number of events scheduled to trigger data request generator")
 201     ;
 202
 203     numSchedIcacheEvent
 204     .name(name() + ".numSchedIcacheEvent")
 205     .desc("Number of events scheduled to trigger instruction request generator")
 206     ;
 207
 208     numOps
 209     .name(name() + ".numOps")
 210     .desc("Number of micro-ops simulated by the Trace CPU")
 211     ;
 212
 213     cpi
 214     .name(name() + ".cpi")
 215     .desc("Cycles per micro-op used as a proxy for CPI")
 216     .precision(6)
 217     ;
 218     cpi = numCycles/numOps;
 219
 220     tickOffset
 221     .name(name() + ".tickOffset")
 222     .desc("The first execution tick for the root node of elastic traces")
 223     ;
 224
 225     icacheGen.regStats();
 226     dcacheGen.regStats();
 227 }
 228
 229 void
 230 TraceCPU::ElasticDataGen::regStats()
 231 {
 232     using namespace Stats;
 233
 234     maxDependents
 235     .name(name() + ".maxDependents")
 236     .desc("Max number of dependents observed on a node")
 237     ;
 238
 239     maxReadyListSize
 240     .name(name() + ".maxReadyListSize")
 241     .desc("Max size of the ready list observed")
 242     ;
 243
 244     numSendAttempted
 245     .name(name() + ".numSendAttempted")
 246     .desc("Number of first attempts to send a request")
 247     ;
 248
 249     numSendSucceeded
 250     .name(name() + ".numSendSucceeded")
 251     .desc("Number of successful first attempts")
 252     ;
 253
 254     numSendFailed
 255     .name(name() + ".numSendFailed")
 256     .desc("Number of failed first attempts")
 257     ;
 258
 259     numRetrySucceeded
 260     .name(name() + ".numRetrySucceeded")
 261     .desc("Number of successful retries")
 262     ;
 263
 264     numSplitReqs
 265     .name(name() + ".numSplitReqs")
 266     .desc("Number of split requests")
 267     ;
 268
 269     numSOLoads
 270     .name(name() + ".numSOLoads")
 271     .desc("Number of strictly ordered loads")
 272     ;
 273
 274     numSOStores
 275     .name(name() + ".numSOStores")
 276     .desc("Number of strictly ordered stores")
 277     ;
 278
 279     dataLastTick
 280     .name(name() + ".dataLastTick")
 281     .desc("Last tick simulated from the elastic data trace")
 282     ;
 283 }
 284
 285 Tick
 286 TraceCPU::ElasticDataGen::init()
 287 {
 288     DPRINTF(TraceCPUData, "Initializing data memory request generator "
 289             "DcacheGen: elastic issue with retry.\n");
 290
 291     if (!readNextWindow())
 292         panic("Trace has %d elements. It must have at least %d elements.\n",
 293               depGraph.size(), 2 * windowSize);
 294     DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n",
 295             depGraph.size());
 296
 297     if (!readNextWindow())
 298         panic("Trace has %d elements. It must have at least %d elements.\n",
 299               depGraph.size(), 2 * windowSize);
 300     DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n",
 301             depGraph.size());
 302
 303     // Print readyList
 304     if (DTRACE(TraceCPUData)) {
 305         printReadyList();
 306     }
 307     auto free_itr = readyList.begin();
 308     DPRINTF(TraceCPUData, "Execute tick of the first dependency free node %lli"
 309             " is %d.\n", free_itr->seqNum, free_itr->execTick);
 310     // Return the execute tick of the earliest ready node so that an event
 311     // can be scheduled to call execute()
 312     return (free_itr->execTick);
 313 }
 314
 315 void
 316 TraceCPU::ElasticDataGen::exit()
 317 {
 318     trace.reset();
 319 }
 320
 321 bool
 322 TraceCPU::ElasticDataGen::readNextWindow()
 323 {
 324
 325     // Read and add next window
 326     DPRINTF(TraceCPUData, "Reading next window from file.\n");
 327
 328     if (traceComplete) {
 329         // We are at the end of the file, thus we have no more records.
 330         // Return false.
 331         return false;
 332     }
 333
 334     DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n",
 335             depGraph.size());
 336
 337     uint32_t num_read = 0;
 338     while (num_read != windowSize) {
 339
 340         // Create a new graph node
 341         GraphNode* new_node = new GraphNode;
 342
 343         // Read the next line to get the next record. If that fails then end of
 344         // trace has been reached and traceComplete needs to be set in addition
 345         // to returning false.
 346         if (!trace.read(new_node)) {
 347             DPRINTF(TraceCPUData, "\tTrace complete!\n");
 348             traceComplete = true;
 349             return false;
 350         }
 351
 352         // Annotate the ROB dependencies of the new node onto the parent nodes.
 353         addDepsOnParent(new_node, new_node->robDep, new_node->numRobDep);
 354         // Annotate the register dependencies of the new node onto the parent
 355         // nodes.
 356         addDepsOnParent(new_node, new_node->regDep, new_node->numRegDep);
 357
 358         num_read++;
 359         // Add to map
 360         depGraph[new_node->seqNum] = new_node;
 361         if (new_node->numRobDep == 0 && new_node->numRegDep == 0) {
 362             // Source dependencies are already complete, check if resources
 363             // are available and issue. The execution time is approximated
 364             // to current time plus the computational delay.
 365             checkAndIssue(new_node);
 366         }
 367     }
 368
 369     DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n",
 370             depGraph.size());
 371     return true;
 372 }
 373
 374 template<typename T> void
 375 TraceCPU::ElasticDataGen::addDepsOnParent(GraphNode *new_node,
 376                                             T& dep_array, uint8_t& num_dep)
 377 {
 378     for (auto& a_dep : dep_array) {
 379         // The convention is to set the dependencies starting with the first
 380         // index in the ROB and register dependency arrays. Thus, when we reach
 381         // a dependency equal to the initialisation value of zero, we know have
 382         // iterated over all dependencies and can break.
 383         if (a_dep == 0)
 384             break;
 385         // We look up the valid dependency, i.e. the parent of this node
 386         auto parent_itr = depGraph.find(a_dep);
 387         if (parent_itr != depGraph.end()) {
 388             // If the parent is found, it is yet to be executed. Append a
 389             // pointer to the new node to the dependents list of the parent
 390             // node.
 391             parent_itr->second->dependents.push_back(new_node);
 392             auto num_depts = parent_itr->second->dependents.size();
 393             maxDependents = std::max<double>(num_depts, maxDependents.value());
 394         } else {
 395             // The dependency is not found in the graph. So consider
 396             // the execution of the parent is complete, i.e. remove this
 397             // dependency.
 398             a_dep = 0;
 399             num_dep--;
 400         }
 401     }
 402 }
 403
 404 void
 405 TraceCPU::ElasticDataGen::execute()
 406 {
 407     DPRINTF(TraceCPUData, "Execute start occupancy:\n");
 408     DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
 409             "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
 410             depFreeQueue.size());
 411     hwResource.printOccupancy();
 412
 413     // Read next window to make sure that dependents of all dep-free nodes
 414     // are in the depGraph
 415     if (nextRead) {
 416         readNextWindow();
 417         nextRead = false;
 418     }
 419
 420     // First attempt to issue the pending dependency-free nodes held
 421     // in depFreeQueue. If resources have become available for a node,
 422     // then issue it, i.e. add the node to readyList.
 423     while (!depFreeQueue.empty()) {
 424         if (checkAndIssue(depFreeQueue.front(), false)) {
 425             DPRINTF(TraceCPUData, "Removing from depFreeQueue: seq. num "
 426                 "%lli.\n", (depFreeQueue.front())->seqNum);
 427             depFreeQueue.pop();
 428         } else {
 429             break;
 430         }
 431     }
 432     // Proceed to execute from readyList
 433     auto graph_itr = depGraph.begin();
 434     auto free_itr = readyList.begin();
 435     // Iterate through readyList until the next free node has its execute
 436     // tick later than curTick or the end of readyList is reached
 437     while (free_itr->execTick <= curTick() && free_itr != readyList.end()) {
 438
 439         // Get pointer to the node to be executed
 440         graph_itr = depGraph.find(free_itr->seqNum);
 441         assert(graph_itr != depGraph.end());
 442         GraphNode* node_ptr = graph_itr->second;
 443
 444         // If there is a retryPkt send that else execute the load
 445         if (retryPkt) {
 446             // The retryPkt must be the request that was created by the
 447             // first node in the readyList.
 448             if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) {
 449                 panic("Retry packet's seqence number does not match "
 450                       "the first node in the readyList.\n");
 451             }
 452             if (port.sendTimingReq(retryPkt)) {
 453                 ++numRetrySucceeded;
 454                 retryPkt = nullptr;
 455             }
 456         } else if (node_ptr->isLoad() || node_ptr->isStore()) {
 457             // If there is no retryPkt, attempt to send a memory request in
 458             // case of a load or store node. If the send fails, executeMemReq()
 459             // returns a packet pointer, which we save in retryPkt. In case of
 460             // a comp node we don't do anything and simply continue as if the
 461             // execution of the comp node succedded.
 462             retryPkt = executeMemReq(node_ptr);
 463         }
 464         // If the retryPkt or a new load/store node failed, we exit from here
 465         // as a retry from cache will bring the control to execute(). The
 466         // first node in readyList then, will be the failed node.
 467         if (retryPkt) {
 468             break;
 469         }
 470
 471         // Proceed to remove dependencies for the successfully executed node.
 472         // If it is a load which is not strictly ordered and we sent a
 473         // request for it successfully, we do not yet mark any register
 474         // dependencies complete. But as per dependency modelling we need
 475         // to mark ROB dependencies of load and non load/store nodes which
 476         // are based on successful sending of the load as complete.
 477         if (node_ptr->isLoad() && !node_ptr->isStrictlyOrdered()) {
 478             // If execute succeeded mark its dependents as complete
 479             DPRINTF(TraceCPUData, "Node seq. num %lli sent. Waking up "
 480                     "dependents..\n", node_ptr->seqNum);
 481
 482             auto child_itr = (node_ptr->dependents).begin();
 483             while (child_itr != (node_ptr->dependents).end()) {
 484                 // ROB dependency of a store on a load must not be removed
 485                 // after load is sent but after response is received
 486                 if (!(*child_itr)->isStore() &&
 487                     (*child_itr)->removeRobDep(node_ptr->seqNum)) {
 488
 489                     // Check if the child node has become dependency free
 490                     if ((*child_itr)->numRobDep == 0 &&
 491                         (*child_itr)->numRegDep == 0) {
 492
 493                         // Source dependencies are complete, check if
 494                         // resources are available and issue
 495                         checkAndIssue(*child_itr);
 496                     }
 497                     // Remove this child for the sent load and point to new
 498                     // location of the element following the erased element
 499                     child_itr = node_ptr->dependents.erase(child_itr);
 500                 } else {
 501                     // This child is not dependency-free, point to the next
 502                     // child
 503                     child_itr++;
 504                 }
 505             }
 506         } else {
 507             // If it is a strictly ordered load mark its dependents as complete
 508             // as we do not send a request for this case. If it is a store or a
 509             // comp node we also mark all its dependents complete.
 510             DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking"
 511                     " up dependents..\n", node_ptr->seqNum);
 512
 513             for (auto child : node_ptr->dependents) {
 514                 // If the child node is dependency free removeDepOnInst()
 515                 // returns true.
 516                 if (child->removeDepOnInst(node_ptr->seqNum)) {
 517                     // Source dependencies are complete, check if resources
 518                     // are available and issue
 519                     checkAndIssue(child);
 520                 }
 521             }
 522         }
 523
 524         // After executing the node, remove from readyList and delete node.
 525         readyList.erase(free_itr);
 526         // If it is a cacheable load which was sent, don't delete
 527         // just yet.  Delete it in completeMemAccess() after the
 528         // response is received. If it is an strictly ordered
 529         // load, it was not sent and all dependencies were simply
 530         // marked complete. Thus it is safe to delete it. For
 531         // stores and non load/store nodes all dependencies were
 532         // marked complete so it is safe to delete it.
 533         if (!node_ptr->isLoad() || node_ptr->isStrictlyOrdered()) {
 534             // Release all resources occupied by the completed node
 535             hwResource.release(node_ptr);
 536             // clear the dynamically allocated set of dependents
 537             (node_ptr->dependents).clear();
 538             // delete node
 539             delete node_ptr;
 540             // remove from graph
 541             depGraph.erase(graph_itr);
 542         }
 543         // Point to first node to continue to next iteration of while loop
 544         free_itr = readyList.begin();
 545     } // end of while loop
 546
 547     // Print readyList, sizes of queues and resource status after updating
 548     if (DTRACE(TraceCPUData)) {
 549         printReadyList();
 550         DPRINTF(TraceCPUData, "Execute end occupancy:\n");
 551         DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
 552                 "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
 553                 depFreeQueue.size());
 554         hwResource.printOccupancy();
 555     }
 556
 557     if (retryPkt) {
 558         DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry"
 559                 "event from the cache for seq. num %lli.\n",
 560                 retryPkt->req->getReqInstSeqNum());
 561         return;
 562     }
 563     // If the size of the dependency graph is less than the dependency window
 564     // then read from the trace file to populate the graph next time we are in
 565     // execute.
 566     if (depGraph.size() < windowSize && !traceComplete)
 567         nextRead = true;
 568
 569     // If cache is not blocked, schedule an event for the first execTick in
 570     // readyList else retry from cache will schedule the event. If the ready
 571     // list is empty then check if the next pending node has resources
 572     // available to issue. If yes, then schedule an event for the next cycle.
 573     if (!readyList.empty()) {
 574         Tick next_event_tick = std::max(readyList.begin()->execTick,
 575                                         curTick());
 576         DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
 577                 next_event_tick);
 578         owner.schedDcacheNextEvent(next_event_tick);
 579     } else if (readyList.empty() && !depFreeQueue.empty() &&
 580                 hwResource.isAvailable(depFreeQueue.front())) {
 581         DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
 582                 owner.clockEdge(Cycles(1)));
 583         owner.schedDcacheNextEvent(owner.clockEdge(Cycles(1)));
 584     }
 585
 586     // If trace is completely read, readyList is empty and depGraph is empty,
 587     // set execComplete to true
 588     if (depGraph.empty() && readyList.empty() && traceComplete &&
 589         !hwResource.awaitingResponse()) {
 590         DPRINTF(TraceCPUData, "\tExecution Complete!\n");
 591         execComplete = true;
 592         dataLastTick = curTick();
 593     }
 594 }
 595
 596 PacketPtr
 597 TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr)
 598 {
 599
 600     DPRINTF(TraceCPUData, "Executing memory request %lli (phys addr %d, "
 601             "virt addr %d, pc %#x, size %d, flags %d).\n",
 602             node_ptr->seqNum, node_ptr->physAddr, node_ptr->virtAddr,
 603             node_ptr->pc, node_ptr->size, node_ptr->flags);
 604
 605     // If the request is strictly ordered, do not send it. Just return nullptr
 606     // as if it was succesfully sent.
 607     if (node_ptr->isStrictlyOrdered()) {
 608         node_ptr->isLoad() ? ++numSOLoads : ++numSOStores;
 609         DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n",
 610                 node_ptr->seqNum);
 611         return nullptr;
 612     }
 613
 614     // Check if the request spans two cache lines as this condition triggers
 615     // an assert fail in the L1 cache. If it does then truncate the size to
 616     // access only until the end of that line and ignore the remainder. The
 617     // stat counting this is useful to keep a check on how frequently this
 618     // happens. If required the code could be revised to mimick splitting such
 619     // a request into two.
 620     unsigned blk_size = owner.cacheLineSize();
 621     Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1));
 622     if (!(blk_offset + node_ptr->size <= blk_size)) {
 623         node_ptr->size = blk_size - blk_offset;
 624         ++numSplitReqs;
 625     }
 626
 627     // Create a request and the packet containing request
 628     Request* req = new Request(node_ptr->physAddr, node_ptr->size,
 629                                node_ptr->flags, masterID, node_ptr->seqNum,
 630                                ContextID(0));
 631     req->setPC(node_ptr->pc);
 632     // If virtual address is valid, set the asid and virtual address fields
 633     // of the request.
 634     if (node_ptr->virtAddr != 0) {
 635         req->setVirt(node_ptr->asid, node_ptr->virtAddr, node_ptr->size,
 636                         node_ptr->flags, masterID, node_ptr->pc);
 637         req->setPaddr(node_ptr->physAddr);
 638         req->setReqInstSeqNum(node_ptr->seqNum);
 639     }
 640
 641     PacketPtr pkt;
 642     uint8_t* pkt_data = new uint8_t[req->getSize()];
 643     if (node_ptr->isLoad()) {
 644         pkt = Packet::createRead(req);
 645     } else {
 646         pkt = Packet::createWrite(req);
 647         memset(pkt_data, 0xA, req->getSize());
 648     }
 649     pkt->dataDynamic(pkt_data);
 650
 651     // Call MasterPort method to send a timing request for this packet
 652     bool success = port.sendTimingReq(pkt);
 653     ++numSendAttempted;
 654
 655     if (!success) {
 656         // If it fails, return the packet to retry when a retry is signalled by
 657         // the cache
 658         ++numSendFailed;
 659         DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n");
 660         return pkt;
 661     } else {
 662         // It is succeeds, return nullptr
 663         ++numSendSucceeded;
 664         return nullptr;
 665     }
 666 }
 667
 668 bool
 669 TraceCPU::ElasticDataGen::checkAndIssue(const GraphNode* node_ptr, bool first)
 670 {
 671     // Assert the node is dependency-free
 672     assert(node_ptr->numRobDep == 0 && node_ptr->numRegDep == 0);
 673
 674     // If this is the first attempt, print a debug message to indicate this.
 675     if (first) {
 676         DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now"
 677             " dependency free.\n", node_ptr->seqNum, node_ptr->typeToStr(),
 678             node_ptr->robNum);
 679     }
 680
 681     // Check if resources are available to issue the specific node
 682     if (hwResource.isAvailable(node_ptr)) {
 683         // If resources are free only then add to readyList
 684         DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. Adding"
 685             " to readyList, occupying resources.\n", node_ptr->seqNum);
 686         // Compute the execute tick by adding the compute delay for the node
 687         // and add the ready node to the ready list
 688         addToSortedReadyList(node_ptr->seqNum,
 689                                 owner.clockEdge() + node_ptr->compDelay);
 690         // Account for the resources taken up by this issued node.
 691         hwResource.occupy(node_ptr);
 692         return true;
 693
 694     } else {
 695         if (first) {
 696             // Although dependencies are complete, resources are not available.
 697             DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli."
 698                 " Adding to depFreeQueue.\n", node_ptr->seqNum);
 699             depFreeQueue.push(node_ptr);
 700         } else {
 701             DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli. "
 702                 "Still pending issue.\n", node_ptr->seqNum);
 703         }
 704         return false;
 705     }
 706 }
 707
 708 void
 709 TraceCPU::ElasticDataGen::completeMemAccess(PacketPtr pkt)
 710 {
 711     // Release the resources for this completed node.
 712     if (pkt->isWrite()) {
 713         // Consider store complete.
 714         hwResource.releaseStoreBuffer();
 715         // If it is a store response then do nothing since we do not model
 716         // dependencies on store completion in the trace. But if we were
 717         // blocking execution due to store buffer fullness, we need to schedule
 718         // an event and attempt to progress.
 719     } else {
 720         // If it is a load response then release the dependents waiting on it.
 721         // Get pointer to the completed load
 722         auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum());
 723         assert(graph_itr != depGraph.end());
 724         GraphNode* node_ptr = graph_itr->second;
 725
 726         // Release resources occupied by the load
 727         hwResource.release(node_ptr);
 728
 729         DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up"
 730                 " dependents..\n", node_ptr->seqNum);
 731
 732         for (auto child : node_ptr->dependents) {
 733             if (child->removeDepOnInst(node_ptr->seqNum)) {
 734                 checkAndIssue(child);
 735             }
 736         }
 737
 738         // clear the dynamically allocated set of dependents
 739         (node_ptr->dependents).clear();
 740         // delete node
 741         delete node_ptr;
 742         // remove from graph
 743         depGraph.erase(graph_itr);
 744     }
 745
 746     if (DTRACE(TraceCPUData)) {
 747         printReadyList();
 748     }
 749
 750     // If the size of the dependency graph is less than the dependency window
 751     // then read from the trace file to populate the graph next time we are in
 752     // execute.
 753     if (depGraph.size() < windowSize && !traceComplete)
 754         nextRead = true;
 755
 756     // If not waiting for retry, attempt to schedule next event
 757     if (!retryPkt) {
 758         // We might have new dep-free nodes in the list which will have execute
 759         // tick greater than or equal to curTick. But a new dep-free node might
 760         // have its execute tick earlier. Therefore, attempt to reschedule. It
 761         // could happen that the readyList is empty and we got here via a
 762         // last remaining response. So, either the trace is complete or there
 763         // are pending nodes in the depFreeQueue. The checking is done in the
 764         // execute() control flow, so schedule an event to go via that flow.
 765         Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) :
 766             std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1)));
 767         DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
 768                 next_event_tick);
 769         owner.schedDcacheNextEvent(next_event_tick);
 770     }
 771 }
 772
 773 void
 774 TraceCPU::ElasticDataGen::addToSortedReadyList(NodeSeqNum seq_num,
 775                                                     Tick exec_tick)
 776 {
 777     ReadyNode ready_node;
 778     ready_node.seqNum = seq_num;
 779     ready_node.execTick = exec_tick;
 780
 781     // Iterator to readyList
 782     auto itr = readyList.begin();
 783
 784     // If the readyList is empty, simply insert the new node at the beginning
 785     // and return
 786     if (itr == readyList.end()) {
 787         readyList.insert(itr, ready_node);
 788         maxReadyListSize = std::max<double>(readyList.size(),
 789                                               maxReadyListSize.value());
 790         return;
 791     }
 792
 793     // If the new node has its execution tick equal to the first node in the
 794     // list then go to the next node. If the first node in the list failed
 795     // to execute, its position as the first is thus maintained.
 796     if (retryPkt)
 797         if (retryPkt->req->getReqInstSeqNum() == itr->seqNum)
 798             itr++;
 799
 800     // Increment the iterator and compare the node pointed to by it to the new
 801     // node till the position to insert the new node is found.
 802     bool found = false;
 803     while (!found && itr != readyList.end()) {
 804         // If the execution tick of the new node is less than the node then
 805         // this is the position to insert
 806         if (exec_tick < itr->execTick)
 807             found = true;
 808         // If the execution tick of the new node is equal to the node then
 809         // sort in ascending order of sequence numbers
 810         else if (exec_tick == itr->execTick) {
 811             // If the sequence number of the new node is less than the node
 812             // then this is the position to insert
 813             if (seq_num < itr->seqNum)
 814                 found = true;
 815             // Else go to next node
 816             else
 817                 itr++;
 818         }
 819         // If the execution tick of the new node is greater than the node then
 820         // go to the next node
 821         else
 822             itr++;
 823     }
 824     readyList.insert(itr, ready_node);
 825     // Update the stat for max size reached of the readyList
 826     maxReadyListSize = std::max<double>(readyList.size(),
 827                                           maxReadyListSize.value());
 828 }
 829
 830 void
 831 TraceCPU::ElasticDataGen::printReadyList() {
 832
 833     auto itr = readyList.begin();
 834     if (itr == readyList.end()) {
 835         DPRINTF(TraceCPUData, "readyList is empty.\n");
 836         return;
 837     }
 838     DPRINTF(TraceCPUData, "Printing readyList:\n");
 839     while (itr != readyList.end()) {
 840         auto graph_itr = depGraph.find(itr->seqNum);
 841         GraphNode* node_ptr M5_VAR_USED = graph_itr->second;
 842         DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum,
 843             node_ptr->typeToStr(), itr->execTick);
 844         itr++;
 845     }
 846 }
 847
 848 TraceCPU::ElasticDataGen::HardwareResource::HardwareResource(
 849     uint16_t max_rob, uint16_t max_stores, uint16_t max_loads)
 850   : sizeROB(max_rob),
 851     sizeStoreBuffer(max_stores),
 852     sizeLoadBuffer(max_loads),
 853     oldestInFlightRobNum(UINT64_MAX),
 854     numInFlightLoads(0),
 855     numInFlightStores(0)
 856 {}
 857
 858 void
 859 TraceCPU::ElasticDataGen::HardwareResource::occupy(const GraphNode* new_node)
 860 {
 861     // Occupy ROB entry for the issued node
 862     // Merely maintain the oldest node, i.e. numerically least robNum by saving
 863     // it in the variable oldestInFLightRobNum.
 864     inFlightNodes[new_node->seqNum] = new_node->robNum;
 865     oldestInFlightRobNum = inFlightNodes.begin()->second;
 866
 867     // Occupy Load/Store Buffer entry for the issued node if applicable
 868     if (new_node->isLoad()) {
 869         ++numInFlightLoads;
 870     } else if (new_node->isStore()) {
 871         ++numInFlightStores;
 872     } // else if it is a non load/store node, no buffer entry is occupied
 873
 874     printOccupancy();
 875 }
 876
 877 void
 878 TraceCPU::ElasticDataGen::HardwareResource::release(const GraphNode* done_node)
 879 {
 880     assert(!inFlightNodes.empty());
 881     DPRINTFR(TraceCPUData, "\tClearing done seq. num %d from inFlightNodes..\n",
 882         done_node->seqNum);
 883
 884     assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end());
 885     inFlightNodes.erase(done_node->seqNum);
 886
 887     if (inFlightNodes.empty()) {
 888         // If we delete the only in-flight node and then the
 889         // oldestInFlightRobNum is set to it's initialized (max) value.
 890         oldestInFlightRobNum = UINT64_MAX;
 891     } else {
 892         // Set the oldest in-flight node rob number equal to the first node in
 893         // the inFlightNodes since that will have the numerically least value.
 894         oldestInFlightRobNum = inFlightNodes.begin()->second;
 895     }
 896
 897     DPRINTFR(TraceCPUData, "\tCleared. inFlightNodes.size() = %d, "
 898         "oldestInFlightRobNum = %d\n", inFlightNodes.size(),
 899         oldestInFlightRobNum);
 900
 901     // A store is considered complete when a request is sent, thus ROB entry is
 902     // freed. But it occupies an entry in the Store Buffer until its response
 903     // is received. A load is considered complete when a response is received,
 904     // thus both ROB and Load Buffer entries can be released.
 905     if (done_node->isLoad()) {
 906         assert(numInFlightLoads != 0);
 907         --numInFlightLoads;
 908     }
 909     // For normal writes, we send the requests out and clear a store buffer
 910     // entry on response. For writes which are strictly ordered, for e.g.
 911     // writes to device registers, we do that within release() which is called
 912     // when node is executed and taken off from readyList.
 913     if (done_node->isStore() && done_node->isStrictlyOrdered()) {
 914         releaseStoreBuffer();
 915     }
 916 }
 917
 918 void
 919 TraceCPU::ElasticDataGen::HardwareResource::releaseStoreBuffer()
 920 {
 921     assert(numInFlightStores != 0);
 922     --numInFlightStores;
 923 }
 924
 925 bool
 926 TraceCPU::ElasticDataGen::HardwareResource::isAvailable(
 927     const GraphNode* new_node) const
 928 {
 929     uint16_t num_in_flight_nodes;
 930     if (inFlightNodes.empty()) {
 931         num_in_flight_nodes = 0;
 932         DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
 933             " #in-flight nodes = 0", new_node->seqNum);
 934     } else if (new_node->robNum > oldestInFlightRobNum) {
 935         // This is the intuitive case where new dep-free node is younger
 936         // instruction than the oldest instruction in-flight. Thus we make sure
 937         // in_flight_nodes does not overflow.
 938         num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum;
 939         DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
 940             " #in-flight nodes = %d - %d =  %d", new_node->seqNum,
 941              new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes);
 942     } else {
 943         // This is the case where an instruction older than the oldest in-
 944         // flight instruction becomes dep-free. Thus we must have already
 945         // accounted for the entry in ROB for this new dep-free node.
 946         // Immediately after this check returns true, oldestInFlightRobNum will
 947         // be updated in occupy(). We simply let this node issue now.
 948         num_in_flight_nodes = 0;
 949         DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
 950             " new oldestInFlightRobNum = %d, #in-flight nodes ignored",
 951             new_node->seqNum, new_node->robNum);
 952     }
 953     DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ  = %d/%d.\n",
 954         numInFlightLoads, sizeLoadBuffer,
 955         numInFlightStores, sizeStoreBuffer);
 956     // Check if resources are available to issue the specific node
 957     if (num_in_flight_nodes >= sizeROB) {
 958         return false;
 959     }
 960     if (new_node->isLoad() && numInFlightLoads >= sizeLoadBuffer) {
 961         return false;
 962     }
 963     if (new_node->isStore() && numInFlightStores >= sizeStoreBuffer) {
 964         return false;
 965     }
 966     return true;
 967 }
 968
 969 bool
 970 TraceCPU::ElasticDataGen::HardwareResource::awaitingResponse() const {
 971     // Return true if there is at least one read or write request in flight
 972     return (numInFlightStores != 0 || numInFlightLoads != 0);
 973 }
 974
 975 void
 976 TraceCPU::ElasticDataGen::HardwareResource::printOccupancy() {
 977     DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, "
 978             "LQ = %d/%d, SQ  = %d/%d.\n",
 979             oldestInFlightRobNum,
 980             numInFlightLoads, sizeLoadBuffer,
 981             numInFlightStores, sizeStoreBuffer);
 982 }
 983
 984 void
 985 TraceCPU::FixedRetryGen::regStats()
 986 {
 987     using namespace Stats;
 988
 989     numSendAttempted
 990     .name(name() + ".numSendAttempted")
 991     .desc("Number of first attempts to send a request")
 992     ;
 993
 994     numSendSucceeded
 995     .name(name() + ".numSendSucceeded")
 996     .desc("Number of successful first attempts")
 997     ;
 998
 999     numSendFailed
1000     .name(name() + ".numSendFailed")
1001     .desc("Number of failed first attempts")
1002     ;
1003
1004     numRetrySucceeded
1005     .name(name() + ".numRetrySucceeded")
1006     .desc("Number of successful retries")
1007     ;
1008
1009     instLastTick
1010     .name(name() + ".instLastTick")
1011     .desc("Last tick simulated from the fixed inst trace")
1012     ;
1013 }
1014
1015 Tick
1016 TraceCPU::FixedRetryGen::init()
1017 {
1018     DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator"
1019             " IcacheGen: fixed issue with retry.\n");
1020
1021     if (nextExecute()) {
1022         DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick);
1023         return currElement.tick;
1024     } else {
1025         panic("Read of first message in the trace failed.\n");
1026         return MaxTick;
1027     }
1028 }
1029
1030 bool
1031 TraceCPU::FixedRetryGen::tryNext()
1032 {
1033     // If there is a retry packet, try to send it
1034     if (retryPkt) {
1035
1036         DPRINTF(TraceCPUInst, "Trying to send retry packet.\n");
1037
1038         if (!port.sendTimingReq(retryPkt)) {
1039             // Still blocked! This should never occur.
1040             DPRINTF(TraceCPUInst, "Retry packet sending failed.\n");
1041             return false;
1042         }
1043         ++numRetrySucceeded;
1044     } else {
1045
1046         DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n");
1047
1048         // try sending current element
1049         assert(currElement.isValid());
1050
1051         ++numSendAttempted;
1052
1053         if (!send(currElement.addr, currElement.blocksize,
1054                     currElement.cmd, currElement.flags, currElement.pc)) {
1055             DPRINTF(TraceCPUInst, "currElement sending failed.\n");
1056             ++numSendFailed;
1057             // return false to indicate not to schedule next event
1058             return false;
1059         } else {
1060             ++numSendSucceeded;
1061         }
1062     }
1063     // If packet was sent successfully, either retryPkt or currElement, return
1064     // true to indicate to schedule event at current Tick plus delta. If packet
1065     // was sent successfully and there is no next packet to send, return false.
1066     DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next "
1067         "element.\n");
1068     retryPkt = nullptr;
1069     // Read next element into currElement, currElement gets cleared so save the
1070     // tick to calculate delta
1071     Tick last_tick = currElement.tick;
1072     if (nextExecute()) {
1073         assert(currElement.tick >= last_tick);
1074         delta = currElement.tick - last_tick;
1075     }
1076     return !traceComplete;
1077 }
1078
1079 void
1080 TraceCPU::FixedRetryGen::exit()
1081 {
1082     trace.reset();
1083 }
1084
1085 bool
1086 TraceCPU::FixedRetryGen::nextExecute()
1087 {
1088     if (traceComplete)
1089         // We are at the end of the file, thus we have no more messages.
1090         // Return false.
1091         return false;
1092
1093
1094     //Reset the currElement to the default values
1095     currElement.clear();
1096
1097     // Read the next line to get the next message. If that fails then end of
1098     // trace has been reached and traceComplete needs to be set in addition
1099     // to returning false. If successful then next message is in currElement.
1100     if (!trace.read(&currElement)) {
1101         traceComplete = true;
1102         instLastTick = curTick();
1103         return false;
1104     }
1105
1106     DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n",
1107             currElement.cmd.isRead() ? 'r' : 'w',
1108             currElement.addr,
1109             currElement.pc,
1110             currElement.blocksize,
1111             currElement.tick);
1112
1113     return true;
1114 }
1115
1116 bool
1117 TraceCPU::FixedRetryGen::send(Addr addr, unsigned size, const MemCmd& cmd,
1118               Request::FlagsType flags, Addr pc)
1119 {
1120
1121     // Create new request
1122     Request* req = new Request(addr, size, flags, masterID);
1123     req->setPC(pc);
1124
1125     // If this is not done it triggers assert in L1 cache for invalid contextId
1126     req->setContext(ContextID(0));
1127
1128     // Embed it in a packet
1129     PacketPtr pkt = new Packet(req, cmd);
1130
1131     uint8_t* pkt_data = new uint8_t[req->getSize()];
1132     pkt->dataDynamic(pkt_data);
1133
1134     if (cmd.isWrite()) {
1135         memset(pkt_data, 0xA, req->getSize());
1136     }
1137
1138     // Call MasterPort method to send a timing request for this packet
1139     bool success = port.sendTimingReq(pkt);
1140     if (!success) {
1141         // If it fails, save the packet to retry when a retry is signalled by
1142         // the cache
1143         retryPkt = pkt;
1144     }
1145     return success;
1146 }
1147
1148 void
1149 TraceCPU::icacheRetryRecvd()
1150 {
1151     // Schedule an event to go through the control flow in the same tick as
1152     // retry is received
1153     DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen"
1154             " event @%lli.\n", curTick());
1155     schedule(icacheNextEvent, curTick());
1156 }
1157
1158 void
1159 TraceCPU::dcacheRetryRecvd()
1160 {
1161     // Schedule an event to go through the execute flow in the same tick as
1162     // retry is received
1163     DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen"
1164             " event @%lli.\n", curTick());
1165     schedule(dcacheNextEvent, curTick());
1166 }
1167
1168 void
1169 TraceCPU::schedDcacheNextEvent(Tick when)
1170 {
1171     if (!dcacheNextEvent.scheduled()) {
1172         DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n",
1173                 when);
1174         schedule(dcacheNextEvent, when);
1175         ++numSchedDcacheEvent;
1176     } else if (when < dcacheNextEvent.when()) {
1177         DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli"
1178                 " to %lli.\n", dcacheNextEvent.when(), when);
1179         reschedule(dcacheNextEvent, when);
1180     }
1181
1182 }
1183
1184 bool
1185 TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt)
1186 {
1187     // All responses on the instruction fetch side are ignored. Simply delete
1188     // the request and packet to free allocated memory
1189     delete pkt->req;
1190     delete pkt;
1191
1192     return true;
1193 }
1194
1195 void
1196 TraceCPU::IcachePort::recvReqRetry()
1197 {
1198     owner->icacheRetryRecvd();
1199 }
1200
1201 void
1202 TraceCPU::dcacheRecvTimingResp(PacketPtr pkt)
1203 {
1204     DPRINTF(TraceCPUData, "Received timing response from Dcache.\n");
1205     dcacheGen.completeMemAccess(pkt);
1206 }
1207
1208 bool
1209 TraceCPU::DcachePort::recvTimingResp(PacketPtr pkt)
1210 {
1211     // Handle the responses for data memory requests which is done inside the
1212     // elastic data generator
1213     owner->dcacheRecvTimingResp(pkt);
1214     // After processing the response delete the request and packet to free
1215     // memory
1216     delete pkt->req;
1217     delete pkt;
1218
1219     return true;
1220 }
1221
1222 void
1223 TraceCPU::DcachePort::recvReqRetry()
1224 {
1225     owner->dcacheRetryRecvd();
1226 }
1227
1228 TraceCPU::ElasticDataGen::InputStream::InputStream(const std::string& filename)
1229     : trace(filename),
1230       microOpCount(0)
1231 {
1232     // Create a protobuf message for the header and read it from the stream
1233     ProtoMessage::InstDepRecordHeader header_msg;
1234     if (!trace.read(header_msg)) {
1235         panic("Failed to read packet header from %s\n", filename);
1236
1237         if (header_msg.tick_freq() != SimClock::Frequency) {
1238             panic("Trace %s was recorded with a different tick frequency %d\n",
1239                   header_msg.tick_freq());
1240         }
1241     } else {
1242         // Assign window size equal to the field in the trace that was recorded
1243         // when the data dependency trace was captured in the o3cpu model
1244         windowSize = header_msg.window_size();
1245     }
1246 }
1247
1248 void
1249 TraceCPU::ElasticDataGen::InputStream::reset()
1250 {
1251     trace.reset();
1252 }
1253
1254 bool
1255 TraceCPU::ElasticDataGen::InputStream::read(GraphNode* element)
1256 {
1257     ProtoMessage::InstDepRecord pkt_msg;
1258     if (trace.read(pkt_msg)) {
1259         // Required fields
1260         element->seqNum = pkt_msg.seq_num();
1261         element->type = pkt_msg.type();
1262         element->compDelay = pkt_msg.comp_delay();
1263
1264         // Repeated field robDepList
1265         element->clearRobDep();
1266         assert((pkt_msg.rob_dep()).size() <= element->maxRobDep);
1267         for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) {
1268             element->robDep[element->numRobDep] = pkt_msg.rob_dep(i);
1269             element->numRobDep += 1;
1270         }
1271
1272         // Repeated field
1273         element->clearRegDep();
1274         assert((pkt_msg.reg_dep()).size() <= TheISA::MaxInstSrcRegs);
1275         for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) {
1276             // There is a possibility that an instruction has both, a register
1277             // and order dependency on an instruction. In such a case, the
1278             // register dependency is omitted
1279             bool duplicate = false;
1280             for (int j = 0; j < element->numRobDep; j++) {
1281                 duplicate |= (pkt_msg.reg_dep(i) == element->robDep[j]);
1282             }
1283             if (!duplicate) {
1284                 element->regDep[element->numRegDep] = pkt_msg.reg_dep(i);
1285                 element->numRegDep += 1;
1286             }
1287         }
1288
1289         // Optional fields
1290         if (pkt_msg.has_p_addr())
1291             element->physAddr = pkt_msg.p_addr();
1292         else
1293             element->physAddr = 0;
1294
1295         if (pkt_msg.has_v_addr())
1296             element->virtAddr = pkt_msg.v_addr();
1297         else
1298             element->virtAddr = 0;
1299
1300         if (pkt_msg.has_asid())
1301             element->asid = pkt_msg.asid();
1302         else
1303             element->asid = 0;
1304
1305         if (pkt_msg.has_size())
1306             element->size = pkt_msg.size();
1307         else
1308             element->size = 0;
1309
1310         if (pkt_msg.has_flags())
1311             element->flags = pkt_msg.flags();
1312         else
1313             element->flags = 0;
1314
1315         if (pkt_msg.has_pc())
1316             element->pc = pkt_msg.pc();
1317         else
1318             element->pc = 0;
1319
1320         // ROB occupancy number
1321         ++microOpCount;
1322         if (pkt_msg.has_weight()) {
1323             microOpCount += pkt_msg.weight();
1324         }
1325         element->robNum = microOpCount;
1326         return true;
1327     }
1328
1329     // We have reached the end of the file
1330     return false;
1331 }
1332
1333 bool
1334 TraceCPU::ElasticDataGen::GraphNode::removeRegDep(NodeSeqNum reg_dep)
1335 {
1336     for (auto& own_reg_dep : regDep) {
1337         if (own_reg_dep == reg_dep) {
1338             // If register dependency is found, make it zero and return true
1339             own_reg_dep = 0;
1340             assert(numRegDep > 0);
1341             --numRegDep;
1342             DPRINTFR(TraceCPUData, "\tFor %lli: Marking register dependency %lli "
1343                     "done.\n", seqNum, reg_dep);
1344             return true;
1345         }
1346     }
1347
1348     // Return false if the dependency is not found
1349     return false;
1350 }
1351
1352 bool
1353 TraceCPU::ElasticDataGen::GraphNode::removeRobDep(NodeSeqNum rob_dep)
1354 {
1355     for (auto& own_rob_dep : robDep) {
1356         if (own_rob_dep == rob_dep) {
1357             // If the rob dependency is found, make it zero and return true
1358             own_rob_dep = 0;
1359             assert(numRobDep > 0);
1360             --numRobDep;
1361             DPRINTFR(TraceCPUData, "\tFor %lli: Marking ROB dependency %lli "
1362                 "done.\n", seqNum, rob_dep);
1363             return true;
1364         }
1365     }
1366     return false;
1367 }
1368
1369 void
1370 TraceCPU::ElasticDataGen::GraphNode::clearRegDep() {
1371     for (auto& own_reg_dep : regDep) {
1372         own_reg_dep = 0;
1373     }
1374     numRegDep = 0;
1375 }
1376
1377 void
1378 TraceCPU::ElasticDataGen::GraphNode::clearRobDep() {
1379     for (auto& own_rob_dep : robDep) {
1380         own_rob_dep = 0;
1381     }
1382     numRobDep = 0;
1383 }
1384
1385 bool
1386 TraceCPU::ElasticDataGen::GraphNode::removeDepOnInst(NodeSeqNum done_seq_num)
1387 {
1388     // If it is an rob dependency then remove it
1389     if (!removeRobDep(done_seq_num)) {
1390         // If it is not an rob dependency then it must be a register dependency
1391         // If the register dependency is not found, it violates an assumption
1392         // and must be caught by assert.
1393         bool regdep_found M5_VAR_USED = removeRegDep(done_seq_num);
1394         assert(regdep_found);
1395     }
1396     // Return true if the node is dependency free
1397     return (numRobDep == 0 && numRegDep == 0);
1398 }
1399
1400 void
1401 TraceCPU::ElasticDataGen::GraphNode::writeElementAsTrace() const
1402 {
1403     DPRINTFR(TraceCPUData, "%lli", seqNum);
1404     DPRINTFR(TraceCPUData, ",%s", typeToStr());
1405     if (isLoad() || isStore()) {
1406         DPRINTFR(TraceCPUData, ",%i", physAddr);
1407         DPRINTFR(TraceCPUData, ",%i", size);
1408         DPRINTFR(TraceCPUData, ",%i", flags);
1409     }
1410     DPRINTFR(TraceCPUData, ",%lli", compDelay);
1411     int i = 0;
1412     DPRINTFR(TraceCPUData, "robDep:");
1413     while (robDep[i] != 0) {
1414         DPRINTFR(TraceCPUData, ",%lli", robDep[i]);
1415         i++;
1416     }
1417     i = 0;
1418     DPRINTFR(TraceCPUData, "regDep:");
1419     while (regDep[i] != 0) {
1420         DPRINTFR(TraceCPUData, ",%lli", regDep[i]);
1421         i++;
1422     }
1423     auto child_itr = dependents.begin();
1424     DPRINTFR(TraceCPUData, "dependents:");
1425     while (child_itr != dependents.end()) {
1426         DPRINTFR(TraceCPUData, ":%lli", (*child_itr)->seqNum);
1427         child_itr++;
1428     }
1429
1430     DPRINTFR(TraceCPUData, "\n");
1431 }
1432
1433 std::string
1434 TraceCPU::ElasticDataGen::GraphNode::typeToStr() const
1435 {
1436     return Record::RecordType_Name(type);
1437 }
1438
1439 TraceCPU::FixedRetryGen::InputStream::InputStream(const std::string& filename)
1440     : trace(filename)
1441 {
1442     // Create a protobuf message for the header and read it from the stream
1443     ProtoMessage::PacketHeader header_msg;
1444     if (!trace.read(header_msg)) {
1445         panic("Failed to read packet header from %s\n", filename);
1446
1447         if (header_msg.tick_freq() != SimClock::Frequency) {
1448             panic("Trace %s was recorded with a different tick frequency %d\n",
1449                   header_msg.tick_freq());
1450         }
1451     }
1452 }
1453
1454 void
1455 TraceCPU::FixedRetryGen::InputStream::reset()
1456 {
1457     trace.reset();
1458 }
1459
1460 bool
1461 TraceCPU::FixedRetryGen::InputStream::read(TraceElement* element)
1462 {
1463     ProtoMessage::Packet pkt_msg;
1464     if (trace.read(pkt_msg)) {
1465         element->cmd = pkt_msg.cmd();
1466         element->addr = pkt_msg.addr();
1467         element->blocksize = pkt_msg.size();
1468         element->tick = pkt_msg.tick();
1469         element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0;
1470         element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0;
1471         return true;
1472     }
1473
1474     // We have reached the end of the file
1475     return false;
1476 }