src/cpu/trace/trace_cpu.cc

   1 /*
   2  * Copyright (c) 2013 - 2016 ARM Limited
   3  * All rights reserved
   4  *
   5  * The license below extends only to copyright in the software and shall
   6  * not be construed as granting a license to any other intellectual
   7  * property including but not limited to intellectual property relating
   8  * to a hardware implementation of the functionality of the software
   9  * licensed hereunder.  You may use the software subject to the license
  10  * terms below provided that you ensure that this notice is replicated
  11  * unmodified and in its entirety in all distributions of the software,
  12  * modified or unmodified, in source code or in binary form.
  13  *
  14  * Redistribution and use in source and binary forms, with or without
  15  * modification, are permitted provided that the following conditions are
  16  * met: redistributions of source code must retain the above copyright
  17  * notice, this list of conditions and the following disclaimer;
  18  * redistributions in binary form must reproduce the above copyright
  19  * notice, this list of conditions and the following disclaimer in the
  20  * documentation and/or other materials provided with the distribution;
  21  * neither the name of the copyright holders nor the names of its
  22  * contributors may be used to endorse or promote products derived from
  23  * this software without specific prior written permission.
  24  *
  25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  26  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  27  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  28  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  29  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  30  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  31  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  32  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  33  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  34  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  35  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  36  *
  37  * Authors: Radhika Jagtap
  38  *          Andreas Hansson
  39  *          Thomas Grass
  40  */
  41
  42 #include "cpu/trace/trace_cpu.hh"
  43
  44 #include "sim/sim_exit.hh"
  45
  46 // Declare and initialize the static counter for number of trace CPUs.
  47 int TraceCPU::numTraceCPUs = 0;
  48
  49 TraceCPU::TraceCPU(TraceCPUParams *params)
  50     :   BaseCPU(params),
  51         icachePort(this),
  52         dcachePort(this),
  53         instMasterID(params->system->getMasterId(this, "inst")),
  54         dataMasterID(params->system->getMasterId(this, "data")),
  55         instTraceFile(params->instTraceFile),
  56         dataTraceFile(params->dataTraceFile),
  57         icacheGen(*this, ".iside", icachePort, instMasterID, instTraceFile),
  58         dcacheGen(*this, ".dside", dcachePort, dataMasterID, dataTraceFile,
  59                   params),
  60         icacheNextEvent([this]{ schedIcacheNext(); }, name()),
  61         dcacheNextEvent([this]{ schedDcacheNext(); }, name()),
  62         oneTraceComplete(false),
  63         traceOffset(0),
  64         execCompleteEvent(nullptr),
  65         enableEarlyExit(params->enableEarlyExit),
  66         progressMsgInterval(params->progressMsgInterval),
  67         progressMsgThreshold(params->progressMsgInterval)
  68 {
  69     // Increment static counter for number of Trace CPUs.
  70     ++TraceCPU::numTraceCPUs;
  71
  72     // Check that the python parameters for sizes of ROB, store buffer and
  73     // load buffer do not overflow the corresponding C++ variables.
  74     fatal_if(params->sizeROB > UINT16_MAX, "ROB size set to %d exceeds the "
  75                 "max. value of %d.\n", params->sizeROB, UINT16_MAX);
  76     fatal_if(params->sizeStoreBuffer > UINT16_MAX, "ROB size set to %d "
  77                 "exceeds the max. value of %d.\n", params->sizeROB,
  78                 UINT16_MAX);
  79     fatal_if(params->sizeLoadBuffer > UINT16_MAX, "Load buffer size set to"
  80                 " %d exceeds the max. value of %d.\n",
  81                 params->sizeLoadBuffer, UINT16_MAX);
  82 }
  83
  84 TraceCPU::~TraceCPU()
  85 {
  86
  87 }
  88
  89 TraceCPU*
  90 TraceCPUParams::create()
  91 {
  92     return new TraceCPU(this);
  93 }
  94
  95 void
  96 TraceCPU::updateNumOps(uint64_t rob_num)
  97 {
  98     numOps = rob_num;
  99     if (progressMsgInterval != 0 && numOps.value() >= progressMsgThreshold) {
 100         inform("%s: %i insts committed\n", name(), progressMsgThreshold);
 101         progressMsgThreshold += progressMsgInterval;
 102     }
 103 }
 104
 105 void
 106 TraceCPU::takeOverFrom(BaseCPU *oldCPU)
 107 {
 108     // Unbind the ports of the old CPU and bind the ports of the TraceCPU.
 109     getInstPort().takeOverFrom(&oldCPU->getInstPort());
 110     getDataPort().takeOverFrom(&oldCPU->getDataPort());
 111 }
 112
 113 void
 114 TraceCPU::init()
 115 {
 116     DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\"."
 117             "\n", instTraceFile);
 118     DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n",
 119             dataTraceFile);
 120
 121     BaseCPU::init();
 122
 123     // Get the send tick of the first instruction read request
 124     Tick first_icache_tick = icacheGen.init();
 125
 126     // Get the send tick of the first data read/write request
 127     Tick first_dcache_tick = dcacheGen.init();
 128
 129     // Set the trace offset as the minimum of that in both traces
 130     traceOffset = std::min(first_icache_tick, first_dcache_tick);
 131     inform("%s: Time offset (tick) found as min of both traces is %lli.\n",
 132             name(), traceOffset);
 133
 134     // Schedule next icache and dcache event by subtracting the offset
 135     schedule(icacheNextEvent, first_icache_tick - traceOffset);
 136     schedule(dcacheNextEvent, first_dcache_tick - traceOffset);
 137
 138     // Adjust the trace offset for the dcache generator's ready nodes
 139     // We don't need to do this for the icache generator as it will
 140     // send its first request at the first event and schedule subsequent
 141     // events using a relative tick delta
 142     dcacheGen.adjustInitTraceOffset(traceOffset);
 143
 144     // If the Trace CPU simulation is configured to exit on any one trace
 145     // completion then we don't need a counted event to count down all Trace
 146     // CPUs in the system. If not then instantiate a counted event.
 147     if (!enableEarlyExit) {
 148         // The static counter for number of Trace CPUs is correctly set at
 149         // this point so create an event and pass it.
 150         execCompleteEvent = new CountedExitEvent("end of all traces reached.",
 151                                                  numTraceCPUs);
 152     }
 153
 154 }
 155
 156 void
 157 TraceCPU::schedIcacheNext()
 158 {
 159     DPRINTF(TraceCPUInst, "IcacheGen event.\n");
 160
 161     // Try to send the current packet or a retry packet if there is one
 162     bool sched_next = icacheGen.tryNext();
 163     // If packet sent successfully, schedule next event
 164     if (sched_next) {
 165         DPRINTF(TraceCPUInst, "Scheduling next icacheGen event "
 166                 "at %d.\n", curTick() + icacheGen.tickDelta());
 167         schedule(icacheNextEvent, curTick() + icacheGen.tickDelta());
 168         ++numSchedIcacheEvent;
 169     } else {
 170         // check if traceComplete. If not, do nothing because sending failed
 171         // and next event will be scheduled via RecvRetry()
 172         if (icacheGen.isTraceComplete()) {
 173             // If this is the first trace to complete, set the variable. If it
 174             // is already set then both traces are complete to exit sim.
 175             checkAndSchedExitEvent();
 176         }
 177     }
 178     return;
 179 }
 180
 181 void
 182 TraceCPU::schedDcacheNext()
 183 {
 184     DPRINTF(TraceCPUData, "DcacheGen event.\n");
 185
 186     // Update stat for numCycles
 187     numCycles = clockEdge() / clockPeriod();
 188
 189     dcacheGen.execute();
 190     if (dcacheGen.isExecComplete()) {
 191         checkAndSchedExitEvent();
 192     }
 193 }
 194
 195 void
 196 TraceCPU::checkAndSchedExitEvent()
 197 {
 198     if (!oneTraceComplete) {
 199         oneTraceComplete = true;
 200     } else {
 201         // Schedule event to indicate execution is complete as both
 202         // instruction and data access traces have been played back.
 203         inform("%s: Execution complete.\n", name());
 204         // If the replay is configured to exit early, that is when any one
 205         // execution is complete then exit immediately and return. Otherwise,
 206         // schedule the counted exit that counts down completion of each Trace
 207         // CPU.
 208         if (enableEarlyExit) {
 209             exitSimLoop("End of trace reached");
 210         } else {
 211             schedule(*execCompleteEvent, curTick());
 212         }
 213     }
 214 }
 215
 216 void
 217 TraceCPU::regStats()
 218 {
 219
 220     BaseCPU::regStats();
 221
 222     numSchedDcacheEvent
 223     .name(name() + ".numSchedDcacheEvent")
 224     .desc("Number of events scheduled to trigger data request generator")
 225     ;
 226
 227     numSchedIcacheEvent
 228     .name(name() + ".numSchedIcacheEvent")
 229     .desc("Number of events scheduled to trigger instruction request generator")
 230     ;
 231
 232     numOps
 233     .name(name() + ".numOps")
 234     .desc("Number of micro-ops simulated by the Trace CPU")
 235     ;
 236
 237     cpi
 238     .name(name() + ".cpi")
 239     .desc("Cycles per micro-op used as a proxy for CPI")
 240     .precision(6)
 241     ;
 242     cpi = numCycles/numOps;
 243
 244     icacheGen.regStats();
 245     dcacheGen.regStats();
 246 }
 247
 248 void
 249 TraceCPU::ElasticDataGen::regStats()
 250 {
 251     using namespace Stats;
 252
 253     maxDependents
 254     .name(name() + ".maxDependents")
 255     .desc("Max number of dependents observed on a node")
 256     ;
 257
 258     maxReadyListSize
 259     .name(name() + ".maxReadyListSize")
 260     .desc("Max size of the ready list observed")
 261     ;
 262
 263     numSendAttempted
 264     .name(name() + ".numSendAttempted")
 265     .desc("Number of first attempts to send a request")
 266     ;
 267
 268     numSendSucceeded
 269     .name(name() + ".numSendSucceeded")
 270     .desc("Number of successful first attempts")
 271     ;
 272
 273     numSendFailed
 274     .name(name() + ".numSendFailed")
 275     .desc("Number of failed first attempts")
 276     ;
 277
 278     numRetrySucceeded
 279     .name(name() + ".numRetrySucceeded")
 280     .desc("Number of successful retries")
 281     ;
 282
 283     numSplitReqs
 284     .name(name() + ".numSplitReqs")
 285     .desc("Number of split requests")
 286     ;
 287
 288     numSOLoads
 289     .name(name() + ".numSOLoads")
 290     .desc("Number of strictly ordered loads")
 291     ;
 292
 293     numSOStores
 294     .name(name() + ".numSOStores")
 295     .desc("Number of strictly ordered stores")
 296     ;
 297
 298     dataLastTick
 299     .name(name() + ".dataLastTick")
 300     .desc("Last tick simulated from the elastic data trace")
 301     ;
 302 }
 303
 304 Tick
 305 TraceCPU::ElasticDataGen::init()
 306 {
 307     DPRINTF(TraceCPUData, "Initializing data memory request generator "
 308             "DcacheGen: elastic issue with retry.\n");
 309
 310     if (!readNextWindow())
 311         panic("Trace has %d elements. It must have at least %d elements.\n",
 312               depGraph.size(), 2 * windowSize);
 313     DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n",
 314             depGraph.size());
 315
 316     if (!readNextWindow())
 317         panic("Trace has %d elements. It must have at least %d elements.\n",
 318               depGraph.size(), 2 * windowSize);
 319     DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n",
 320             depGraph.size());
 321
 322     // Print readyList
 323     if (DTRACE(TraceCPUData)) {
 324         printReadyList();
 325     }
 326     auto free_itr = readyList.begin();
 327     DPRINTF(TraceCPUData, "Execute tick of the first dependency free node %lli"
 328             " is %d.\n", free_itr->seqNum, free_itr->execTick);
 329     // Return the execute tick of the earliest ready node so that an event
 330     // can be scheduled to call execute()
 331     return (free_itr->execTick);
 332 }
 333
 334 void
 335 TraceCPU::ElasticDataGen::adjustInitTraceOffset(Tick& offset) {
 336     for (auto& free_node : readyList) {
 337         free_node.execTick -= offset;
 338     }
 339 }
 340
 341 void
 342 TraceCPU::ElasticDataGen::exit()
 343 {
 344     trace.reset();
 345 }
 346
 347 bool
 348 TraceCPU::ElasticDataGen::readNextWindow()
 349 {
 350
 351     // Read and add next window
 352     DPRINTF(TraceCPUData, "Reading next window from file.\n");
 353
 354     if (traceComplete) {
 355         // We are at the end of the file, thus we have no more records.
 356         // Return false.
 357         return false;
 358     }
 359
 360     DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n",
 361             depGraph.size());
 362
 363     uint32_t num_read = 0;
 364     while (num_read != windowSize) {
 365
 366         // Create a new graph node
 367         GraphNode* new_node = new GraphNode;
 368
 369         // Read the next line to get the next record. If that fails then end of
 370         // trace has been reached and traceComplete needs to be set in addition
 371         // to returning false.
 372         if (!trace.read(new_node)) {
 373             DPRINTF(TraceCPUData, "\tTrace complete!\n");
 374             traceComplete = true;
 375             return false;
 376         }
 377
 378         // Annotate the ROB dependencies of the new node onto the parent nodes.
 379         addDepsOnParent(new_node, new_node->robDep, new_node->numRobDep);
 380         // Annotate the register dependencies of the new node onto the parent
 381         // nodes.
 382         addDepsOnParent(new_node, new_node->regDep, new_node->numRegDep);
 383
 384         num_read++;
 385         // Add to map
 386         depGraph[new_node->seqNum] = new_node;
 387         if (new_node->numRobDep == 0 && new_node->numRegDep == 0) {
 388             // Source dependencies are already complete, check if resources
 389             // are available and issue. The execution time is approximated
 390             // to current time plus the computational delay.
 391             checkAndIssue(new_node);
 392         }
 393     }
 394
 395     DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n",
 396             depGraph.size());
 397     return true;
 398 }
 399
 400 template<typename T> void
 401 TraceCPU::ElasticDataGen::addDepsOnParent(GraphNode *new_node,
 402                                             T& dep_array, uint8_t& num_dep)
 403 {
 404     for (auto& a_dep : dep_array) {
 405         // The convention is to set the dependencies starting with the first
 406         // index in the ROB and register dependency arrays. Thus, when we reach
 407         // a dependency equal to the initialisation value of zero, we know have
 408         // iterated over all dependencies and can break.
 409         if (a_dep == 0)
 410             break;
 411         // We look up the valid dependency, i.e. the parent of this node
 412         auto parent_itr = depGraph.find(a_dep);
 413         if (parent_itr != depGraph.end()) {
 414             // If the parent is found, it is yet to be executed. Append a
 415             // pointer to the new node to the dependents list of the parent
 416             // node.
 417             parent_itr->second->dependents.push_back(new_node);
 418             auto num_depts = parent_itr->second->dependents.size();
 419             maxDependents = std::max<double>(num_depts, maxDependents.value());
 420         } else {
 421             // The dependency is not found in the graph. So consider
 422             // the execution of the parent is complete, i.e. remove this
 423             // dependency.
 424             a_dep = 0;
 425             num_dep--;
 426         }
 427     }
 428 }
 429
 430 void
 431 TraceCPU::ElasticDataGen::execute()
 432 {
 433     DPRINTF(TraceCPUData, "Execute start occupancy:\n");
 434     DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
 435             "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
 436             depFreeQueue.size());
 437     hwResource.printOccupancy();
 438
 439     // Read next window to make sure that dependents of all dep-free nodes
 440     // are in the depGraph
 441     if (nextRead) {
 442         readNextWindow();
 443         nextRead = false;
 444     }
 445
 446     // First attempt to issue the pending dependency-free nodes held
 447     // in depFreeQueue. If resources have become available for a node,
 448     // then issue it, i.e. add the node to readyList.
 449     while (!depFreeQueue.empty()) {
 450         if (checkAndIssue(depFreeQueue.front(), false)) {
 451             DPRINTF(TraceCPUData, "Removing from depFreeQueue: seq. num "
 452                 "%lli.\n", (depFreeQueue.front())->seqNum);
 453             depFreeQueue.pop();
 454         } else {
 455             break;
 456         }
 457     }
 458     // Proceed to execute from readyList
 459     auto graph_itr = depGraph.begin();
 460     auto free_itr = readyList.begin();
 461     // Iterate through readyList until the next free node has its execute
 462     // tick later than curTick or the end of readyList is reached
 463     while (free_itr->execTick <= curTick() && free_itr != readyList.end()) {
 464
 465         // Get pointer to the node to be executed
 466         graph_itr = depGraph.find(free_itr->seqNum);
 467         assert(graph_itr != depGraph.end());
 468         GraphNode* node_ptr = graph_itr->second;
 469
 470         // If there is a retryPkt send that else execute the load
 471         if (retryPkt) {
 472             // The retryPkt must be the request that was created by the
 473             // first node in the readyList.
 474             if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) {
 475                 panic("Retry packet's seqence number does not match "
 476                       "the first node in the readyList.\n");
 477             }
 478             if (port.sendTimingReq(retryPkt)) {
 479                 ++numRetrySucceeded;
 480                 retryPkt = nullptr;
 481             }
 482         } else if (node_ptr->isLoad() || node_ptr->isStore()) {
 483             // If there is no retryPkt, attempt to send a memory request in
 484             // case of a load or store node. If the send fails, executeMemReq()
 485             // returns a packet pointer, which we save in retryPkt. In case of
 486             // a comp node we don't do anything and simply continue as if the
 487             // execution of the comp node succedded.
 488             retryPkt = executeMemReq(node_ptr);
 489         }
 490         // If the retryPkt or a new load/store node failed, we exit from here
 491         // as a retry from cache will bring the control to execute(). The
 492         // first node in readyList then, will be the failed node.
 493         if (retryPkt) {
 494             break;
 495         }
 496
 497         // Proceed to remove dependencies for the successfully executed node.
 498         // If it is a load which is not strictly ordered and we sent a
 499         // request for it successfully, we do not yet mark any register
 500         // dependencies complete. But as per dependency modelling we need
 501         // to mark ROB dependencies of load and non load/store nodes which
 502         // are based on successful sending of the load as complete.
 503         if (node_ptr->isLoad() && !node_ptr->isStrictlyOrdered()) {
 504             // If execute succeeded mark its dependents as complete
 505             DPRINTF(TraceCPUData, "Node seq. num %lli sent. Waking up "
 506                     "dependents..\n", node_ptr->seqNum);
 507
 508             auto child_itr = (node_ptr->dependents).begin();
 509             while (child_itr != (node_ptr->dependents).end()) {
 510                 // ROB dependency of a store on a load must not be removed
 511                 // after load is sent but after response is received
 512                 if (!(*child_itr)->isStore() &&
 513                     (*child_itr)->removeRobDep(node_ptr->seqNum)) {
 514
 515                     // Check if the child node has become dependency free
 516                     if ((*child_itr)->numRobDep == 0 &&
 517                         (*child_itr)->numRegDep == 0) {
 518
 519                         // Source dependencies are complete, check if
 520                         // resources are available and issue
 521                         checkAndIssue(*child_itr);
 522                     }
 523                     // Remove this child for the sent load and point to new
 524                     // location of the element following the erased element
 525                     child_itr = node_ptr->dependents.erase(child_itr);
 526                 } else {
 527                     // This child is not dependency-free, point to the next
 528                     // child
 529                     child_itr++;
 530                 }
 531             }
 532         } else {
 533             // If it is a strictly ordered load mark its dependents as complete
 534             // as we do not send a request for this case. If it is a store or a
 535             // comp node we also mark all its dependents complete.
 536             DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking"
 537                     " up dependents..\n", node_ptr->seqNum);
 538
 539             for (auto child : node_ptr->dependents) {
 540                 // If the child node is dependency free removeDepOnInst()
 541                 // returns true.
 542                 if (child->removeDepOnInst(node_ptr->seqNum)) {
 543                     // Source dependencies are complete, check if resources
 544                     // are available and issue
 545                     checkAndIssue(child);
 546                 }
 547             }
 548         }
 549
 550         // After executing the node, remove from readyList and delete node.
 551         readyList.erase(free_itr);
 552         // If it is a cacheable load which was sent, don't delete
 553         // just yet.  Delete it in completeMemAccess() after the
 554         // response is received. If it is an strictly ordered
 555         // load, it was not sent and all dependencies were simply
 556         // marked complete. Thus it is safe to delete it. For
 557         // stores and non load/store nodes all dependencies were
 558         // marked complete so it is safe to delete it.
 559         if (!node_ptr->isLoad() || node_ptr->isStrictlyOrdered()) {
 560             // Release all resources occupied by the completed node
 561             hwResource.release(node_ptr);
 562             // clear the dynamically allocated set of dependents
 563             (node_ptr->dependents).clear();
 564             // Update the stat for numOps simulated
 565             owner.updateNumOps(node_ptr->robNum);
 566             // delete node
 567             delete node_ptr;
 568             // remove from graph
 569             depGraph.erase(graph_itr);
 570         }
 571         // Point to first node to continue to next iteration of while loop
 572         free_itr = readyList.begin();
 573     } // end of while loop
 574
 575     // Print readyList, sizes of queues and resource status after updating
 576     if (DTRACE(TraceCPUData)) {
 577         printReadyList();
 578         DPRINTF(TraceCPUData, "Execute end occupancy:\n");
 579         DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
 580                 "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
 581                 depFreeQueue.size());
 582         hwResource.printOccupancy();
 583     }
 584
 585     if (retryPkt) {
 586         DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry"
 587                 "event from the cache for seq. num %lli.\n",
 588                 retryPkt->req->getReqInstSeqNum());
 589         return;
 590     }
 591     // If the size of the dependency graph is less than the dependency window
 592     // then read from the trace file to populate the graph next time we are in
 593     // execute.
 594     if (depGraph.size() < windowSize && !traceComplete)
 595         nextRead = true;
 596
 597     // If cache is not blocked, schedule an event for the first execTick in
 598     // readyList else retry from cache will schedule the event. If the ready
 599     // list is empty then check if the next pending node has resources
 600     // available to issue. If yes, then schedule an event for the next cycle.
 601     if (!readyList.empty()) {
 602         Tick next_event_tick = std::max(readyList.begin()->execTick,
 603                                         curTick());
 604         DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
 605                 next_event_tick);
 606         owner.schedDcacheNextEvent(next_event_tick);
 607     } else if (readyList.empty() && !depFreeQueue.empty() &&
 608                 hwResource.isAvailable(depFreeQueue.front())) {
 609         DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
 610                 owner.clockEdge(Cycles(1)));
 611         owner.schedDcacheNextEvent(owner.clockEdge(Cycles(1)));
 612     }
 613
 614     // If trace is completely read, readyList is empty and depGraph is empty,
 615     // set execComplete to true
 616     if (depGraph.empty() && readyList.empty() && traceComplete &&
 617         !hwResource.awaitingResponse()) {
 618         DPRINTF(TraceCPUData, "\tExecution Complete!\n");
 619         execComplete = true;
 620         dataLastTick = curTick();
 621     }
 622 }
 623
 624 PacketPtr
 625 TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr)
 626 {
 627
 628     DPRINTF(TraceCPUData, "Executing memory request %lli (phys addr %d, "
 629             "virt addr %d, pc %#x, size %d, flags %d).\n",
 630             node_ptr->seqNum, node_ptr->physAddr, node_ptr->virtAddr,
 631             node_ptr->pc, node_ptr->size, node_ptr->flags);
 632
 633     // If the request is strictly ordered, do not send it. Just return nullptr
 634     // as if it was succesfully sent.
 635     if (node_ptr->isStrictlyOrdered()) {
 636         node_ptr->isLoad() ? ++numSOLoads : ++numSOStores;
 637         DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n",
 638                 node_ptr->seqNum);
 639         return nullptr;
 640     }
 641
 642     // Check if the request spans two cache lines as this condition triggers
 643     // an assert fail in the L1 cache. If it does then truncate the size to
 644     // access only until the end of that line and ignore the remainder. The
 645     // stat counting this is useful to keep a check on how frequently this
 646     // happens. If required the code could be revised to mimick splitting such
 647     // a request into two.
 648     unsigned blk_size = owner.cacheLineSize();
 649     Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1));
 650     if (!(blk_offset + node_ptr->size <= blk_size)) {
 651         node_ptr->size = blk_size - blk_offset;
 652         ++numSplitReqs;
 653     }
 654
 655     // Create a request and the packet containing request
 656     auto req = std::make_shared<Request>(
 657         node_ptr->physAddr, node_ptr->size,
 658         node_ptr->flags, masterID, node_ptr->seqNum,
 659         ContextID(0));
 660
 661     req->setPC(node_ptr->pc);
 662     // If virtual address is valid, set the asid and virtual address fields
 663     // of the request.
 664     if (node_ptr->virtAddr != 0) {
 665         req->setVirt(node_ptr->asid, node_ptr->virtAddr, node_ptr->size,
 666                         node_ptr->flags, masterID, node_ptr->pc);
 667         req->setPaddr(node_ptr->physAddr);
 668         req->setReqInstSeqNum(node_ptr->seqNum);
 669     }
 670
 671     PacketPtr pkt;
 672     uint8_t* pkt_data = new uint8_t[req->getSize()];
 673     if (node_ptr->isLoad()) {
 674         pkt = Packet::createRead(req);
 675     } else {
 676         pkt = Packet::createWrite(req);
 677         memset(pkt_data, 0xA, req->getSize());
 678     }
 679     pkt->dataDynamic(pkt_data);
 680
 681     // Call MasterPort method to send a timing request for this packet
 682     bool success = port.sendTimingReq(pkt);
 683     ++numSendAttempted;
 684
 685     if (!success) {
 686         // If it fails, return the packet to retry when a retry is signalled by
 687         // the cache
 688         ++numSendFailed;
 689         DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n");
 690         return pkt;
 691     } else {
 692         // It is succeeds, return nullptr
 693         ++numSendSucceeded;
 694         return nullptr;
 695     }
 696 }
 697
 698 bool
 699 TraceCPU::ElasticDataGen::checkAndIssue(const GraphNode* node_ptr, bool first)
 700 {
 701     // Assert the node is dependency-free
 702     assert(node_ptr->numRobDep == 0 && node_ptr->numRegDep == 0);
 703
 704     // If this is the first attempt, print a debug message to indicate this.
 705     if (first) {
 706         DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now"
 707             " dependency free.\n", node_ptr->seqNum, node_ptr->typeToStr(),
 708             node_ptr->robNum);
 709     }
 710
 711     // Check if resources are available to issue the specific node
 712     if (hwResource.isAvailable(node_ptr)) {
 713         // If resources are free only then add to readyList
 714         DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. Adding"
 715             " to readyList, occupying resources.\n", node_ptr->seqNum);
 716         // Compute the execute tick by adding the compute delay for the node
 717         // and add the ready node to the ready list
 718         addToSortedReadyList(node_ptr->seqNum,
 719                                 owner.clockEdge() + node_ptr->compDelay);
 720         // Account for the resources taken up by this issued node.
 721         hwResource.occupy(node_ptr);
 722         return true;
 723
 724     } else {
 725         if (first) {
 726             // Although dependencies are complete, resources are not available.
 727             DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli."
 728                 " Adding to depFreeQueue.\n", node_ptr->seqNum);
 729             depFreeQueue.push(node_ptr);
 730         } else {
 731             DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli. "
 732                 "Still pending issue.\n", node_ptr->seqNum);
 733         }
 734         return false;
 735     }
 736 }
 737
 738 void
 739 TraceCPU::ElasticDataGen::completeMemAccess(PacketPtr pkt)
 740 {
 741     // Release the resources for this completed node.
 742     if (pkt->isWrite()) {
 743         // Consider store complete.
 744         hwResource.releaseStoreBuffer();
 745         // If it is a store response then do nothing since we do not model
 746         // dependencies on store completion in the trace. But if we were
 747         // blocking execution due to store buffer fullness, we need to schedule
 748         // an event and attempt to progress.
 749     } else {
 750         // If it is a load response then release the dependents waiting on it.
 751         // Get pointer to the completed load
 752         auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum());
 753         assert(graph_itr != depGraph.end());
 754         GraphNode* node_ptr = graph_itr->second;
 755
 756         // Release resources occupied by the load
 757         hwResource.release(node_ptr);
 758
 759         DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up"
 760                 " dependents..\n", node_ptr->seqNum);
 761
 762         for (auto child : node_ptr->dependents) {
 763             if (child->removeDepOnInst(node_ptr->seqNum)) {
 764                 checkAndIssue(child);
 765             }
 766         }
 767
 768         // clear the dynamically allocated set of dependents
 769         (node_ptr->dependents).clear();
 770         // Update the stat for numOps completed
 771         owner.updateNumOps(node_ptr->robNum);
 772         // delete node
 773         delete node_ptr;
 774         // remove from graph
 775         depGraph.erase(graph_itr);
 776     }
 777
 778     if (DTRACE(TraceCPUData)) {
 779         printReadyList();
 780     }
 781
 782     // If the size of the dependency graph is less than the dependency window
 783     // then read from the trace file to populate the graph next time we are in
 784     // execute.
 785     if (depGraph.size() < windowSize && !traceComplete)
 786         nextRead = true;
 787
 788     // If not waiting for retry, attempt to schedule next event
 789     if (!retryPkt) {
 790         // We might have new dep-free nodes in the list which will have execute
 791         // tick greater than or equal to curTick. But a new dep-free node might
 792         // have its execute tick earlier. Therefore, attempt to reschedule. It
 793         // could happen that the readyList is empty and we got here via a
 794         // last remaining response. So, either the trace is complete or there
 795         // are pending nodes in the depFreeQueue. The checking is done in the
 796         // execute() control flow, so schedule an event to go via that flow.
 797         Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) :
 798             std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1)));
 799         DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
 800                 next_event_tick);
 801         owner.schedDcacheNextEvent(next_event_tick);
 802     }
 803 }
 804
 805 void
 806 TraceCPU::ElasticDataGen::addToSortedReadyList(NodeSeqNum seq_num,
 807                                                     Tick exec_tick)
 808 {
 809     ReadyNode ready_node;
 810     ready_node.seqNum = seq_num;
 811     ready_node.execTick = exec_tick;
 812
 813     // Iterator to readyList
 814     auto itr = readyList.begin();
 815
 816     // If the readyList is empty, simply insert the new node at the beginning
 817     // and return
 818     if (itr == readyList.end()) {
 819         readyList.insert(itr, ready_node);
 820         maxReadyListSize = std::max<double>(readyList.size(),
 821                                               maxReadyListSize.value());
 822         return;
 823     }
 824
 825     // If the new node has its execution tick equal to the first node in the
 826     // list then go to the next node. If the first node in the list failed
 827     // to execute, its position as the first is thus maintained.
 828     if (retryPkt)
 829         if (retryPkt->req->getReqInstSeqNum() == itr->seqNum)
 830             itr++;
 831
 832     // Increment the iterator and compare the node pointed to by it to the new
 833     // node till the position to insert the new node is found.
 834     bool found = false;
 835     while (!found && itr != readyList.end()) {
 836         // If the execution tick of the new node is less than the node then
 837         // this is the position to insert
 838         if (exec_tick < itr->execTick)
 839             found = true;
 840         // If the execution tick of the new node is equal to the node then
 841         // sort in ascending order of sequence numbers
 842         else if (exec_tick == itr->execTick) {
 843             // If the sequence number of the new node is less than the node
 844             // then this is the position to insert
 845             if (seq_num < itr->seqNum)
 846                 found = true;
 847             // Else go to next node
 848             else
 849                 itr++;
 850         }
 851         // If the execution tick of the new node is greater than the node then
 852         // go to the next node
 853         else
 854             itr++;
 855     }
 856     readyList.insert(itr, ready_node);
 857     // Update the stat for max size reached of the readyList
 858     maxReadyListSize = std::max<double>(readyList.size(),
 859                                           maxReadyListSize.value());
 860 }
 861
 862 void
 863 TraceCPU::ElasticDataGen::printReadyList() {
 864
 865     auto itr = readyList.begin();
 866     if (itr == readyList.end()) {
 867         DPRINTF(TraceCPUData, "readyList is empty.\n");
 868         return;
 869     }
 870     DPRINTF(TraceCPUData, "Printing readyList:\n");
 871     while (itr != readyList.end()) {
 872         auto graph_itr = depGraph.find(itr->seqNum);
 873         GraphNode* node_ptr M5_VAR_USED = graph_itr->second;
 874         DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum,
 875             node_ptr->typeToStr(), itr->execTick);
 876         itr++;
 877     }
 878 }
 879
 880 TraceCPU::ElasticDataGen::HardwareResource::HardwareResource(
 881     uint16_t max_rob, uint16_t max_stores, uint16_t max_loads)
 882   : sizeROB(max_rob),
 883     sizeStoreBuffer(max_stores),
 884     sizeLoadBuffer(max_loads),
 885     oldestInFlightRobNum(UINT64_MAX),
 886     numInFlightLoads(0),
 887     numInFlightStores(0)
 888 {}
 889
 890 void
 891 TraceCPU::ElasticDataGen::HardwareResource::occupy(const GraphNode* new_node)
 892 {
 893     // Occupy ROB entry for the issued node
 894     // Merely maintain the oldest node, i.e. numerically least robNum by saving
 895     // it in the variable oldestInFLightRobNum.
 896     inFlightNodes[new_node->seqNum] = new_node->robNum;
 897     oldestInFlightRobNum = inFlightNodes.begin()->second;
 898
 899     // Occupy Load/Store Buffer entry for the issued node if applicable
 900     if (new_node->isLoad()) {
 901         ++numInFlightLoads;
 902     } else if (new_node->isStore()) {
 903         ++numInFlightStores;
 904     } // else if it is a non load/store node, no buffer entry is occupied
 905
 906     printOccupancy();
 907 }
 908
 909 void
 910 TraceCPU::ElasticDataGen::HardwareResource::release(const GraphNode* done_node)
 911 {
 912     assert(!inFlightNodes.empty());
 913     DPRINTFR(TraceCPUData, "\tClearing done seq. num %d from inFlightNodes..\n",
 914         done_node->seqNum);
 915
 916     assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end());
 917     inFlightNodes.erase(done_node->seqNum);
 918
 919     if (inFlightNodes.empty()) {
 920         // If we delete the only in-flight node and then the
 921         // oldestInFlightRobNum is set to it's initialized (max) value.
 922         oldestInFlightRobNum = UINT64_MAX;
 923     } else {
 924         // Set the oldest in-flight node rob number equal to the first node in
 925         // the inFlightNodes since that will have the numerically least value.
 926         oldestInFlightRobNum = inFlightNodes.begin()->second;
 927     }
 928
 929     DPRINTFR(TraceCPUData, "\tCleared. inFlightNodes.size() = %d, "
 930         "oldestInFlightRobNum = %d\n", inFlightNodes.size(),
 931         oldestInFlightRobNum);
 932
 933     // A store is considered complete when a request is sent, thus ROB entry is
 934     // freed. But it occupies an entry in the Store Buffer until its response
 935     // is received. A load is considered complete when a response is received,
 936     // thus both ROB and Load Buffer entries can be released.
 937     if (done_node->isLoad()) {
 938         assert(numInFlightLoads != 0);
 939         --numInFlightLoads;
 940     }
 941     // For normal writes, we send the requests out and clear a store buffer
 942     // entry on response. For writes which are strictly ordered, for e.g.
 943     // writes to device registers, we do that within release() which is called
 944     // when node is executed and taken off from readyList.
 945     if (done_node->isStore() && done_node->isStrictlyOrdered()) {
 946         releaseStoreBuffer();
 947     }
 948 }
 949
 950 void
 951 TraceCPU::ElasticDataGen::HardwareResource::releaseStoreBuffer()
 952 {
 953     assert(numInFlightStores != 0);
 954     --numInFlightStores;
 955 }
 956
 957 bool
 958 TraceCPU::ElasticDataGen::HardwareResource::isAvailable(
 959     const GraphNode* new_node) const
 960 {
 961     uint16_t num_in_flight_nodes;
 962     if (inFlightNodes.empty()) {
 963         num_in_flight_nodes = 0;
 964         DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
 965             " #in-flight nodes = 0", new_node->seqNum);
 966     } else if (new_node->robNum > oldestInFlightRobNum) {
 967         // This is the intuitive case where new dep-free node is younger
 968         // instruction than the oldest instruction in-flight. Thus we make sure
 969         // in_flight_nodes does not overflow.
 970         num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum;
 971         DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
 972             " #in-flight nodes = %d - %d =  %d", new_node->seqNum,
 973              new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes);
 974     } else {
 975         // This is the case where an instruction older than the oldest in-
 976         // flight instruction becomes dep-free. Thus we must have already
 977         // accounted for the entry in ROB for this new dep-free node.
 978         // Immediately after this check returns true, oldestInFlightRobNum will
 979         // be updated in occupy(). We simply let this node issue now.
 980         num_in_flight_nodes = 0;
 981         DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
 982             " new oldestInFlightRobNum = %d, #in-flight nodes ignored",
 983             new_node->seqNum, new_node->robNum);
 984     }
 985     DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ  = %d/%d.\n",
 986         numInFlightLoads, sizeLoadBuffer,
 987         numInFlightStores, sizeStoreBuffer);
 988     // Check if resources are available to issue the specific node
 989     if (num_in_flight_nodes >= sizeROB) {
 990         return false;
 991     }
 992     if (new_node->isLoad() && numInFlightLoads >= sizeLoadBuffer) {
 993         return false;
 994     }
 995     if (new_node->isStore() && numInFlightStores >= sizeStoreBuffer) {
 996         return false;
 997     }
 998     return true;
 999 }
1000
1001 bool
1002 TraceCPU::ElasticDataGen::HardwareResource::awaitingResponse() const {
1003     // Return true if there is at least one read or write request in flight
1004     return (numInFlightStores != 0 || numInFlightLoads != 0);
1005 }
1006
1007 void
1008 TraceCPU::ElasticDataGen::HardwareResource::printOccupancy() {
1009     DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, "
1010             "LQ = %d/%d, SQ  = %d/%d.\n",
1011             oldestInFlightRobNum,
1012             numInFlightLoads, sizeLoadBuffer,
1013             numInFlightStores, sizeStoreBuffer);
1014 }
1015
1016 void
1017 TraceCPU::FixedRetryGen::regStats()
1018 {
1019     using namespace Stats;
1020
1021     numSendAttempted
1022     .name(name() + ".numSendAttempted")
1023     .desc("Number of first attempts to send a request")
1024     ;
1025
1026     numSendSucceeded
1027     .name(name() + ".numSendSucceeded")
1028     .desc("Number of successful first attempts")
1029     ;
1030
1031     numSendFailed
1032     .name(name() + ".numSendFailed")
1033     .desc("Number of failed first attempts")
1034     ;
1035
1036     numRetrySucceeded
1037     .name(name() + ".numRetrySucceeded")
1038     .desc("Number of successful retries")
1039     ;
1040
1041     instLastTick
1042     .name(name() + ".instLastTick")
1043     .desc("Last tick simulated from the fixed inst trace")
1044     ;
1045 }
1046
1047 Tick
1048 TraceCPU::FixedRetryGen::init()
1049 {
1050     DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator"
1051             " IcacheGen: fixed issue with retry.\n");
1052
1053     if (nextExecute()) {
1054         DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick);
1055         return currElement.tick;
1056     } else {
1057         panic("Read of first message in the trace failed.\n");
1058         return MaxTick;
1059     }
1060 }
1061
1062 bool
1063 TraceCPU::FixedRetryGen::tryNext()
1064 {
1065     // If there is a retry packet, try to send it
1066     if (retryPkt) {
1067
1068         DPRINTF(TraceCPUInst, "Trying to send retry packet.\n");
1069
1070         if (!port.sendTimingReq(retryPkt)) {
1071             // Still blocked! This should never occur.
1072             DPRINTF(TraceCPUInst, "Retry packet sending failed.\n");
1073             return false;
1074         }
1075         ++numRetrySucceeded;
1076     } else {
1077
1078         DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n");
1079
1080         // try sending current element
1081         assert(currElement.isValid());
1082
1083         ++numSendAttempted;
1084
1085         if (!send(currElement.addr, currElement.blocksize,
1086                     currElement.cmd, currElement.flags, currElement.pc)) {
1087             DPRINTF(TraceCPUInst, "currElement sending failed.\n");
1088             ++numSendFailed;
1089             // return false to indicate not to schedule next event
1090             return false;
1091         } else {
1092             ++numSendSucceeded;
1093         }
1094     }
1095     // If packet was sent successfully, either retryPkt or currElement, return
1096     // true to indicate to schedule event at current Tick plus delta. If packet
1097     // was sent successfully and there is no next packet to send, return false.
1098     DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next "
1099         "element.\n");
1100     retryPkt = nullptr;
1101     // Read next element into currElement, currElement gets cleared so save the
1102     // tick to calculate delta
1103     Tick last_tick = currElement.tick;
1104     if (nextExecute()) {
1105         assert(currElement.tick >= last_tick);
1106         delta = currElement.tick - last_tick;
1107     }
1108     return !traceComplete;
1109 }
1110
1111 void
1112 TraceCPU::FixedRetryGen::exit()
1113 {
1114     trace.reset();
1115 }
1116
1117 bool
1118 TraceCPU::FixedRetryGen::nextExecute()
1119 {
1120     if (traceComplete)
1121         // We are at the end of the file, thus we have no more messages.
1122         // Return false.
1123         return false;
1124
1125
1126     //Reset the currElement to the default values
1127     currElement.clear();
1128
1129     // Read the next line to get the next message. If that fails then end of
1130     // trace has been reached and traceComplete needs to be set in addition
1131     // to returning false. If successful then next message is in currElement.
1132     if (!trace.read(&currElement)) {
1133         traceComplete = true;
1134         instLastTick = curTick();
1135         return false;
1136     }
1137
1138     DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n",
1139             currElement.cmd.isRead() ? 'r' : 'w',
1140             currElement.addr,
1141             currElement.pc,
1142             currElement.blocksize,
1143             currElement.tick);
1144
1145     return true;
1146 }
1147
1148 bool
1149 TraceCPU::FixedRetryGen::send(Addr addr, unsigned size, const MemCmd& cmd,
1150               Request::FlagsType flags, Addr pc)
1151 {
1152
1153     // Create new request
1154     auto req = std::make_shared<Request>(addr, size, flags, masterID);
1155     req->setPC(pc);
1156
1157     // If this is not done it triggers assert in L1 cache for invalid contextId
1158     req->setContext(ContextID(0));
1159
1160     // Embed it in a packet
1161     PacketPtr pkt = new Packet(req, cmd);
1162
1163     uint8_t* pkt_data = new uint8_t[req->getSize()];
1164     pkt->dataDynamic(pkt_data);
1165
1166     if (cmd.isWrite()) {
1167         memset(pkt_data, 0xA, req->getSize());
1168     }
1169
1170     // Call MasterPort method to send a timing request for this packet
1171     bool success = port.sendTimingReq(pkt);
1172     if (!success) {
1173         // If it fails, save the packet to retry when a retry is signalled by
1174         // the cache
1175         retryPkt = pkt;
1176     }
1177     return success;
1178 }
1179
1180 void
1181 TraceCPU::icacheRetryRecvd()
1182 {
1183     // Schedule an event to go through the control flow in the same tick as
1184     // retry is received
1185     DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen"
1186             " event @%lli.\n", curTick());
1187     schedule(icacheNextEvent, curTick());
1188 }
1189
1190 void
1191 TraceCPU::dcacheRetryRecvd()
1192 {
1193     // Schedule an event to go through the execute flow in the same tick as
1194     // retry is received
1195     DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen"
1196             " event @%lli.\n", curTick());
1197     schedule(dcacheNextEvent, curTick());
1198 }
1199
1200 void
1201 TraceCPU::schedDcacheNextEvent(Tick when)
1202 {
1203     if (!dcacheNextEvent.scheduled()) {
1204         DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n",
1205                 when);
1206         schedule(dcacheNextEvent, when);
1207         ++numSchedDcacheEvent;
1208     } else if (when < dcacheNextEvent.when()) {
1209         DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli"
1210                 " to %lli.\n", dcacheNextEvent.when(), when);
1211         reschedule(dcacheNextEvent, when);
1212     }
1213
1214 }
1215
1216 bool
1217 TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt)
1218 {
1219     // All responses on the instruction fetch side are ignored. Simply delete
1220     // the packet to free allocated memory
1221     delete pkt;
1222
1223     return true;
1224 }
1225
1226 void
1227 TraceCPU::IcachePort::recvReqRetry()
1228 {
1229     owner->icacheRetryRecvd();
1230 }
1231
1232 void
1233 TraceCPU::dcacheRecvTimingResp(PacketPtr pkt)
1234 {
1235     DPRINTF(TraceCPUData, "Received timing response from Dcache.\n");
1236     dcacheGen.completeMemAccess(pkt);
1237 }
1238
1239 bool
1240 TraceCPU::DcachePort::recvTimingResp(PacketPtr pkt)
1241 {
1242     // Handle the responses for data memory requests which is done inside the
1243     // elastic data generator
1244     owner->dcacheRecvTimingResp(pkt);
1245     // After processing the response delete the packet to free
1246     // memory
1247     delete pkt;
1248
1249     return true;
1250 }
1251
1252 void
1253 TraceCPU::DcachePort::recvReqRetry()
1254 {
1255     owner->dcacheRetryRecvd();
1256 }
1257
1258 TraceCPU::ElasticDataGen::InputStream::InputStream(
1259     const std::string& filename,
1260     const double time_multiplier)
1261     : trace(filename),
1262       timeMultiplier(time_multiplier),
1263       microOpCount(0)
1264 {
1265     // Create a protobuf message for the header and read it from the stream
1266     ProtoMessage::InstDepRecordHeader header_msg;
1267     if (!trace.read(header_msg)) {
1268         panic("Failed to read packet header from %s\n", filename);
1269
1270         if (header_msg.tick_freq() != SimClock::Frequency) {
1271             panic("Trace %s was recorded with a different tick frequency %d\n",
1272                   header_msg.tick_freq());
1273         }
1274     } else {
1275         // Assign window size equal to the field in the trace that was recorded
1276         // when the data dependency trace was captured in the o3cpu model
1277         windowSize = header_msg.window_size();
1278     }
1279 }
1280
1281 void
1282 TraceCPU::ElasticDataGen::InputStream::reset()
1283 {
1284     trace.reset();
1285 }
1286
1287 bool
1288 TraceCPU::ElasticDataGen::InputStream::read(GraphNode* element)
1289 {
1290     ProtoMessage::InstDepRecord pkt_msg;
1291     if (trace.read(pkt_msg)) {
1292         // Required fields
1293         element->seqNum = pkt_msg.seq_num();
1294         element->type = pkt_msg.type();
1295         // Scale the compute delay to effectively scale the Trace CPU frequency
1296         element->compDelay = pkt_msg.comp_delay() * timeMultiplier;
1297
1298         // Repeated field robDepList
1299         element->clearRobDep();
1300         assert((pkt_msg.rob_dep()).size() <= element->maxRobDep);
1301         for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) {
1302             element->robDep[element->numRobDep] = pkt_msg.rob_dep(i);
1303             element->numRobDep += 1;
1304         }
1305
1306         // Repeated field
1307         element->clearRegDep();
1308         assert((pkt_msg.reg_dep()).size() <= TheISA::MaxInstSrcRegs);
1309         for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) {
1310             // There is a possibility that an instruction has both, a register
1311             // and order dependency on an instruction. In such a case, the
1312             // register dependency is omitted
1313             bool duplicate = false;
1314             for (int j = 0; j < element->numRobDep; j++) {
1315                 duplicate |= (pkt_msg.reg_dep(i) == element->robDep[j]);
1316             }
1317             if (!duplicate) {
1318                 element->regDep[element->numRegDep] = pkt_msg.reg_dep(i);
1319                 element->numRegDep += 1;
1320             }
1321         }
1322
1323         // Optional fields
1324         if (pkt_msg.has_p_addr())
1325             element->physAddr = pkt_msg.p_addr();
1326         else
1327             element->physAddr = 0;
1328
1329         if (pkt_msg.has_v_addr())
1330             element->virtAddr = pkt_msg.v_addr();
1331         else
1332             element->virtAddr = 0;
1333
1334         if (pkt_msg.has_asid())
1335             element->asid = pkt_msg.asid();
1336         else
1337             element->asid = 0;
1338
1339         if (pkt_msg.has_size())
1340             element->size = pkt_msg.size();
1341         else
1342             element->size = 0;
1343
1344         if (pkt_msg.has_flags())
1345             element->flags = pkt_msg.flags();
1346         else
1347             element->flags = 0;
1348
1349         if (pkt_msg.has_pc())
1350             element->pc = pkt_msg.pc();
1351         else
1352             element->pc = 0;
1353
1354         // ROB occupancy number
1355         ++microOpCount;
1356         if (pkt_msg.has_weight()) {
1357             microOpCount += pkt_msg.weight();
1358         }
1359         element->robNum = microOpCount;
1360         return true;
1361     }
1362
1363     // We have reached the end of the file
1364     return false;
1365 }
1366
1367 bool
1368 TraceCPU::ElasticDataGen::GraphNode::removeRegDep(NodeSeqNum reg_dep)
1369 {
1370     for (auto& own_reg_dep : regDep) {
1371         if (own_reg_dep == reg_dep) {
1372             // If register dependency is found, make it zero and return true
1373             own_reg_dep = 0;
1374             assert(numRegDep > 0);
1375             --numRegDep;
1376             DPRINTFR(TraceCPUData, "\tFor %lli: Marking register dependency %lli "
1377                     "done.\n", seqNum, reg_dep);
1378             return true;
1379         }
1380     }
1381
1382     // Return false if the dependency is not found
1383     return false;
1384 }
1385
1386 bool
1387 TraceCPU::ElasticDataGen::GraphNode::removeRobDep(NodeSeqNum rob_dep)
1388 {
1389     for (auto& own_rob_dep : robDep) {
1390         if (own_rob_dep == rob_dep) {
1391             // If the rob dependency is found, make it zero and return true
1392             own_rob_dep = 0;
1393             assert(numRobDep > 0);
1394             --numRobDep;
1395             DPRINTFR(TraceCPUData, "\tFor %lli: Marking ROB dependency %lli "
1396                 "done.\n", seqNum, rob_dep);
1397             return true;
1398         }
1399     }
1400     return false;
1401 }
1402
1403 void
1404 TraceCPU::ElasticDataGen::GraphNode::clearRegDep() {
1405     for (auto& own_reg_dep : regDep) {
1406         own_reg_dep = 0;
1407     }
1408     numRegDep = 0;
1409 }
1410
1411 void
1412 TraceCPU::ElasticDataGen::GraphNode::clearRobDep() {
1413     for (auto& own_rob_dep : robDep) {
1414         own_rob_dep = 0;
1415     }
1416     numRobDep = 0;
1417 }
1418
1419 bool
1420 TraceCPU::ElasticDataGen::GraphNode::removeDepOnInst(NodeSeqNum done_seq_num)
1421 {
1422     // If it is an rob dependency then remove it
1423     if (!removeRobDep(done_seq_num)) {
1424         // If it is not an rob dependency then it must be a register dependency
1425         // If the register dependency is not found, it violates an assumption
1426         // and must be caught by assert.
1427         bool regdep_found M5_VAR_USED = removeRegDep(done_seq_num);
1428         assert(regdep_found);
1429     }
1430     // Return true if the node is dependency free
1431     return (numRobDep == 0 && numRegDep == 0);
1432 }
1433
1434 void
1435 TraceCPU::ElasticDataGen::GraphNode::writeElementAsTrace() const
1436 {
1437     DPRINTFR(TraceCPUData, "%lli", seqNum);
1438     DPRINTFR(TraceCPUData, ",%s", typeToStr());
1439     if (isLoad() || isStore()) {
1440         DPRINTFR(TraceCPUData, ",%i", physAddr);
1441         DPRINTFR(TraceCPUData, ",%i", size);
1442         DPRINTFR(TraceCPUData, ",%i", flags);
1443     }
1444     DPRINTFR(TraceCPUData, ",%lli", compDelay);
1445     int i = 0;
1446     DPRINTFR(TraceCPUData, "robDep:");
1447     while (robDep[i] != 0) {
1448         DPRINTFR(TraceCPUData, ",%lli", robDep[i]);
1449         i++;
1450     }
1451     i = 0;
1452     DPRINTFR(TraceCPUData, "regDep:");
1453     while (regDep[i] != 0) {
1454         DPRINTFR(TraceCPUData, ",%lli", regDep[i]);
1455         i++;
1456     }
1457     auto child_itr = dependents.begin();
1458     DPRINTFR(TraceCPUData, "dependents:");
1459     while (child_itr != dependents.end()) {
1460         DPRINTFR(TraceCPUData, ":%lli", (*child_itr)->seqNum);
1461         child_itr++;
1462     }
1463
1464     DPRINTFR(TraceCPUData, "\n");
1465 }
1466
1467 std::string
1468 TraceCPU::ElasticDataGen::GraphNode::typeToStr() const
1469 {
1470     return Record::RecordType_Name(type);
1471 }
1472
1473 TraceCPU::FixedRetryGen::InputStream::InputStream(const std::string& filename)
1474     : trace(filename)
1475 {
1476     // Create a protobuf message for the header and read it from the stream
1477     ProtoMessage::PacketHeader header_msg;
1478     if (!trace.read(header_msg)) {
1479         panic("Failed to read packet header from %s\n", filename);
1480
1481         if (header_msg.tick_freq() != SimClock::Frequency) {
1482             panic("Trace %s was recorded with a different tick frequency %d\n",
1483                   header_msg.tick_freq());
1484         }
1485     }
1486 }
1487
1488 void
1489 TraceCPU::FixedRetryGen::InputStream::reset()
1490 {
1491     trace.reset();
1492 }
1493
1494 bool
1495 TraceCPU::FixedRetryGen::InputStream::read(TraceElement* element)
1496 {
1497     ProtoMessage::Packet pkt_msg;
1498     if (trace.read(pkt_msg)) {
1499         element->cmd = pkt_msg.cmd();
1500         element->addr = pkt_msg.addr();
1501         element->blocksize = pkt_msg.size();
1502         element->tick = pkt_msg.tick();
1503         element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0;
1504         element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0;
1505         return true;
1506     }
1507
1508     // We have reached the end of the file
1509     return false;
1510 }