src/cpu/trace/trace_cpu.cc

   1 /*
   2  * Copyright (c) 2013 - 2016 ARM Limited
   3  * All rights reserved
   4  *
   5  * The license below extends only to copyright in the software and shall
   6  * not be construed as granting a license to any other intellectual
   7  * property including but not limited to intellectual property relating
   8  * to a hardware implementation of the functionality of the software
   9  * licensed hereunder.  You may use the software subject to the license
  10  * terms below provided that you ensure that this notice is replicated
  11  * unmodified and in its entirety in all distributions of the software,
  12  * modified or unmodified, in source code or in binary form.
  13  *
  14  * Redistribution and use in source and binary forms, with or without
  15  * modification, are permitted provided that the following conditions are
  16  * met: redistributions of source code must retain the above copyright
  17  * notice, this list of conditions and the following disclaimer;
  18  * redistributions in binary form must reproduce the above copyright
  19  * notice, this list of conditions and the following disclaimer in the
  20  * documentation and/or other materials provided with the distribution;
  21  * neither the name of the copyright holders nor the names of its
  22  * contributors may be used to endorse or promote products derived from
  23  * this software without specific prior written permission.
  24  *
  25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  26  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  27  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  28  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  29  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  30  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  31  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  32  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  33  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  34  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  35  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  36  */
  37
  38 #include "cpu/trace/trace_cpu.hh"
  39
  40 #include "sim/sim_exit.hh"
  41
  42 // Declare and initialize the static counter for number of trace CPUs.
  43 int TraceCPU::numTraceCPUs = 0;
  44
  45 TraceCPU::TraceCPU(TraceCPUParams *params)
  46     :   BaseCPU(params),
  47         icachePort(this),
  48         dcachePort(this),
  49         instMasterID(params->system->getMasterId(this, "inst")),
  50         dataMasterID(params->system->getMasterId(this, "data")),
  51         instTraceFile(params->instTraceFile),
  52         dataTraceFile(params->dataTraceFile),
  53         icacheGen(*this, "iside", icachePort, instMasterID, instTraceFile),
  54         dcacheGen(*this, "dside", dcachePort, dataMasterID, dataTraceFile,
  55                   params),
  56         icacheNextEvent([this]{ schedIcacheNext(); }, name()),
  57         dcacheNextEvent([this]{ schedDcacheNext(); }, name()),
  58         oneTraceComplete(false),
  59         traceOffset(0),
  60         execCompleteEvent(nullptr),
  61         enableEarlyExit(params->enableEarlyExit),
  62         progressMsgInterval(params->progressMsgInterval),
  63         progressMsgThreshold(params->progressMsgInterval), traceStats(this)
  64 {
  65     // Increment static counter for number of Trace CPUs.
  66     ++TraceCPU::numTraceCPUs;
  67
  68     // Check that the python parameters for sizes of ROB, store buffer and
  69     // load buffer do not overflow the corresponding C++ variables.
  70     fatal_if(params->sizeROB > UINT16_MAX, "ROB size set to %d exceeds the "
  71                 "max. value of %d.\n", params->sizeROB, UINT16_MAX);
  72     fatal_if(params->sizeStoreBuffer > UINT16_MAX, "ROB size set to %d "
  73                 "exceeds the max. value of %d.\n", params->sizeROB,
  74                 UINT16_MAX);
  75     fatal_if(params->sizeLoadBuffer > UINT16_MAX, "Load buffer size set to"
  76                 " %d exceeds the max. value of %d.\n",
  77                 params->sizeLoadBuffer, UINT16_MAX);
  78 }
  79
  80 TraceCPU::~TraceCPU()
  81 {
  82
  83 }
  84
  85 TraceCPU*
  86 TraceCPUParams::create()
  87 {
  88     return new TraceCPU(this);
  89 }
  90
  91 void
  92 TraceCPU::updateNumOps(uint64_t rob_num)
  93 {
  94     traceStats.numOps = rob_num;
  95     if (progressMsgInterval != 0 &&
  96          traceStats.numOps.value() >= progressMsgThreshold) {
  97         inform("%s: %i insts committed\n", name(), progressMsgThreshold);
  98         progressMsgThreshold += progressMsgInterval;
  99     }
 100 }
 101
 102 void
 103 TraceCPU::takeOverFrom(BaseCPU *oldCPU)
 104 {
 105     // Unbind the ports of the old CPU and bind the ports of the TraceCPU.
 106     getInstPort().takeOverFrom(&oldCPU->getInstPort());
 107     getDataPort().takeOverFrom(&oldCPU->getDataPort());
 108 }
 109
 110 void
 111 TraceCPU::init()
 112 {
 113     DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\"."
 114             "\n", instTraceFile);
 115     DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n",
 116             dataTraceFile);
 117
 118     BaseCPU::init();
 119
 120     // Get the send tick of the first instruction read request
 121     Tick first_icache_tick = icacheGen.init();
 122
 123     // Get the send tick of the first data read/write request
 124     Tick first_dcache_tick = dcacheGen.init();
 125
 126     // Set the trace offset as the minimum of that in both traces
 127     traceOffset = std::min(first_icache_tick, first_dcache_tick);
 128     inform("%s: Time offset (tick) found as min of both traces is %lli.\n",
 129             name(), traceOffset);
 130
 131     // Schedule next icache and dcache event by subtracting the offset
 132     schedule(icacheNextEvent, first_icache_tick - traceOffset);
 133     schedule(dcacheNextEvent, first_dcache_tick - traceOffset);
 134
 135     // Adjust the trace offset for the dcache generator's ready nodes
 136     // We don't need to do this for the icache generator as it will
 137     // send its first request at the first event and schedule subsequent
 138     // events using a relative tick delta
 139     dcacheGen.adjustInitTraceOffset(traceOffset);
 140
 141     // If the Trace CPU simulation is configured to exit on any one trace
 142     // completion then we don't need a counted event to count down all Trace
 143     // CPUs in the system. If not then instantiate a counted event.
 144     if (!enableEarlyExit) {
 145         // The static counter for number of Trace CPUs is correctly set at
 146         // this point so create an event and pass it.
 147         execCompleteEvent = new CountedExitEvent("end of all traces reached.",
 148                                                  numTraceCPUs);
 149     }
 150
 151 }
 152
 153 void
 154 TraceCPU::schedIcacheNext()
 155 {
 156     DPRINTF(TraceCPUInst, "IcacheGen event.\n");
 157
 158     // Try to send the current packet or a retry packet if there is one
 159     bool sched_next = icacheGen.tryNext();
 160     // If packet sent successfully, schedule next event
 161     if (sched_next) {
 162         DPRINTF(TraceCPUInst, "Scheduling next icacheGen event "
 163                 "at %d.\n", curTick() + icacheGen.tickDelta());
 164         schedule(icacheNextEvent, curTick() + icacheGen.tickDelta());
 165         ++traceStats.numSchedIcacheEvent;
 166     } else {
 167         // check if traceComplete. If not, do nothing because sending failed
 168         // and next event will be scheduled via RecvRetry()
 169         if (icacheGen.isTraceComplete()) {
 170             // If this is the first trace to complete, set the variable. If it
 171             // is already set then both traces are complete to exit sim.
 172             checkAndSchedExitEvent();
 173         }
 174     }
 175     return;
 176 }
 177
 178 void
 179 TraceCPU::schedDcacheNext()
 180 {
 181     DPRINTF(TraceCPUData, "DcacheGen event.\n");
 182
 183     // Update stat for numCycles
 184     numCycles = clockEdge() / clockPeriod();
 185
 186     dcacheGen.execute();
 187     if (dcacheGen.isExecComplete()) {
 188         checkAndSchedExitEvent();
 189     }
 190 }
 191
 192 void
 193 TraceCPU::checkAndSchedExitEvent()
 194 {
 195     if (!oneTraceComplete) {
 196         oneTraceComplete = true;
 197     } else {
 198         // Schedule event to indicate execution is complete as both
 199         // instruction and data access traces have been played back.
 200         inform("%s: Execution complete.\n", name());
 201         // If the replay is configured to exit early, that is when any one
 202         // execution is complete then exit immediately and return. Otherwise,
 203         // schedule the counted exit that counts down completion of each Trace
 204         // CPU.
 205         if (enableEarlyExit) {
 206             exitSimLoop("End of trace reached");
 207         } else {
 208             schedule(*execCompleteEvent, curTick());
 209         }
 210     }
 211 }
 212  TraceCPU::TraceStats::TraceStats(TraceCPU *trace)
 213     : Stats::Group(trace),
 214     ADD_STAT(numSchedDcacheEvent,
 215      "Number of events scheduled to trigger data request generator"),
 216     ADD_STAT(numSchedIcacheEvent,
 217      "Number of events scheduled to trigger instruction request generator"),
 218     ADD_STAT(numOps, "Number of micro-ops simulated by the Trace CPU"),
 219     ADD_STAT(cpi, "Cycles per micro-op used as a proxy for CPI",
 220      trace->numCycles / numOps)
 221 {
 222         cpi.precision(6);
 223 }
 224 TraceCPU::ElasticDataGen::
 225 ElasticDataGenStatGroup::ElasticDataGenStatGroup(Stats::Group *parent,
 226                                                  const std::string& _name)
 227     : Stats::Group(parent, _name.c_str()),
 228     ADD_STAT(maxDependents, "Max number of dependents observed on a node"),
 229     ADD_STAT(maxReadyListSize, "Max size of the ready list observed"),
 230     ADD_STAT(numSendAttempted, "Number of first attempts to send a request"),
 231     ADD_STAT(numSendSucceeded, "Number of successful first attempts"),
 232     ADD_STAT(numSendFailed, "Number of failed first attempts"),
 233     ADD_STAT(numRetrySucceeded, "Number of successful retries"),
 234     ADD_STAT(numSplitReqs, "Number of split requests"),
 235     ADD_STAT(numSOLoads, "Number of strictly ordered loads"),
 236     ADD_STAT(numSOStores, "Number of strictly ordered stores"),
 237     ADD_STAT(dataLastTick, "Last tick simulated from the elastic data trace")
 238 {
 239 }
 240
 241 Tick
 242 TraceCPU::ElasticDataGen::init()
 243 {
 244     DPRINTF(TraceCPUData, "Initializing data memory request generator "
 245             "DcacheGen: elastic issue with retry.\n");
 246
 247     if (!readNextWindow())
 248         panic("Trace has %d elements. It must have at least %d elements.\n",
 249               depGraph.size(), 2 * windowSize);
 250     DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n",
 251             depGraph.size());
 252
 253     if (!readNextWindow())
 254         panic("Trace has %d elements. It must have at least %d elements.\n",
 255               depGraph.size(), 2 * windowSize);
 256     DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n",
 257             depGraph.size());
 258
 259     // Print readyList
 260     if (DTRACE(TraceCPUData)) {
 261         printReadyList();
 262     }
 263     auto free_itr = readyList.begin();
 264     DPRINTF(TraceCPUData, "Execute tick of the first dependency free node %lli"
 265             " is %d.\n", free_itr->seqNum, free_itr->execTick);
 266     // Return the execute tick of the earliest ready node so that an event
 267     // can be scheduled to call execute()
 268     return (free_itr->execTick);
 269 }
 270
 271 void
 272 TraceCPU::ElasticDataGen::adjustInitTraceOffset(Tick& offset) {
 273     for (auto& free_node : readyList) {
 274         free_node.execTick -= offset;
 275     }
 276 }
 277
 278 void
 279 TraceCPU::ElasticDataGen::exit()
 280 {
 281     trace.reset();
 282 }
 283
 284 bool
 285 TraceCPU::ElasticDataGen::readNextWindow()
 286 {
 287
 288     // Read and add next window
 289     DPRINTF(TraceCPUData, "Reading next window from file.\n");
 290
 291     if (traceComplete) {
 292         // We are at the end of the file, thus we have no more records.
 293         // Return false.
 294         return false;
 295     }
 296
 297     DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n",
 298             depGraph.size());
 299
 300     uint32_t num_read = 0;
 301     while (num_read != windowSize) {
 302
 303         // Create a new graph node
 304         GraphNode* new_node = new GraphNode;
 305
 306         // Read the next line to get the next record. If that fails then end of
 307         // trace has been reached and traceComplete needs to be set in addition
 308         // to returning false.
 309         if (!trace.read(new_node)) {
 310             DPRINTF(TraceCPUData, "\tTrace complete!\n");
 311             traceComplete = true;
 312             return false;
 313         }
 314
 315         // Annotate the ROB dependencies of the new node onto the parent nodes.
 316         addDepsOnParent(new_node, new_node->robDep, new_node->numRobDep);
 317         // Annotate the register dependencies of the new node onto the parent
 318         // nodes.
 319         addDepsOnParent(new_node, new_node->regDep, new_node->numRegDep);
 320
 321         num_read++;
 322         // Add to map
 323         depGraph[new_node->seqNum] = new_node;
 324         if (new_node->numRobDep == 0 && new_node->numRegDep == 0) {
 325             // Source dependencies are already complete, check if resources
 326             // are available and issue. The execution time is approximated
 327             // to current time plus the computational delay.
 328             checkAndIssue(new_node);
 329         }
 330     }
 331
 332     DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n",
 333             depGraph.size());
 334     return true;
 335 }
 336
 337 template<typename T> void
 338 TraceCPU::ElasticDataGen::addDepsOnParent(GraphNode *new_node,
 339                                             T& dep_array, uint8_t& num_dep)
 340 {
 341     for (auto& a_dep : dep_array) {
 342         // The convention is to set the dependencies starting with the first
 343         // index in the ROB and register dependency arrays. Thus, when we reach
 344         // a dependency equal to the initialisation value of zero, we know have
 345         // iterated over all dependencies and can break.
 346         if (a_dep == 0)
 347             break;
 348         // We look up the valid dependency, i.e. the parent of this node
 349         auto parent_itr = depGraph.find(a_dep);
 350         if (parent_itr != depGraph.end()) {
 351             // If the parent is found, it is yet to be executed. Append a
 352             // pointer to the new node to the dependents list of the parent
 353             // node.
 354             parent_itr->second->dependents.push_back(new_node);
 355             auto num_depts = parent_itr->second->dependents.size();
 356             elasticStats.maxDependents = std::max<double>(num_depts,
 357                                         elasticStats.maxDependents.value());
 358         } else {
 359             // The dependency is not found in the graph. So consider
 360             // the execution of the parent is complete, i.e. remove this
 361             // dependency.
 362             a_dep = 0;
 363             num_dep--;
 364         }
 365     }
 366 }
 367
 368 void
 369 TraceCPU::ElasticDataGen::execute()
 370 {
 371     DPRINTF(TraceCPUData, "Execute start occupancy:\n");
 372     DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
 373             "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
 374             depFreeQueue.size());
 375     hwResource.printOccupancy();
 376
 377     // Read next window to make sure that dependents of all dep-free nodes
 378     // are in the depGraph
 379     if (nextRead) {
 380         readNextWindow();
 381         nextRead = false;
 382     }
 383
 384     // First attempt to issue the pending dependency-free nodes held
 385     // in depFreeQueue. If resources have become available for a node,
 386     // then issue it, i.e. add the node to readyList.
 387     while (!depFreeQueue.empty()) {
 388         if (checkAndIssue(depFreeQueue.front(), false)) {
 389             DPRINTF(TraceCPUData, "Removing from depFreeQueue: seq. num "
 390                 "%lli.\n", (depFreeQueue.front())->seqNum);
 391             depFreeQueue.pop();
 392         } else {
 393             break;
 394         }
 395     }
 396     // Proceed to execute from readyList
 397     auto graph_itr = depGraph.begin();
 398     auto free_itr = readyList.begin();
 399     // Iterate through readyList until the next free node has its execute
 400     // tick later than curTick or the end of readyList is reached
 401     while (free_itr->execTick <= curTick() && free_itr != readyList.end()) {
 402
 403         // Get pointer to the node to be executed
 404         graph_itr = depGraph.find(free_itr->seqNum);
 405         assert(graph_itr != depGraph.end());
 406         GraphNode* node_ptr = graph_itr->second;
 407
 408         // If there is a retryPkt send that else execute the load
 409         if (retryPkt) {
 410             // The retryPkt must be the request that was created by the
 411             // first node in the readyList.
 412             if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) {
 413                 panic("Retry packet's seqence number does not match "
 414                       "the first node in the readyList.\n");
 415             }
 416             if (port.sendTimingReq(retryPkt)) {
 417                 ++elasticStats.numRetrySucceeded;
 418                 retryPkt = nullptr;
 419             }
 420         } else if (node_ptr->isLoad() || node_ptr->isStore()) {
 421             // If there is no retryPkt, attempt to send a memory request in
 422             // case of a load or store node. If the send fails, executeMemReq()
 423             // returns a packet pointer, which we save in retryPkt. In case of
 424             // a comp node we don't do anything and simply continue as if the
 425             // execution of the comp node succedded.
 426             retryPkt = executeMemReq(node_ptr);
 427         }
 428         // If the retryPkt or a new load/store node failed, we exit from here
 429         // as a retry from cache will bring the control to execute(). The
 430         // first node in readyList then, will be the failed node.
 431         if (retryPkt) {
 432             break;
 433         }
 434
 435         // Proceed to remove dependencies for the successfully executed node.
 436         // If it is a load which is not strictly ordered and we sent a
 437         // request for it successfully, we do not yet mark any register
 438         // dependencies complete. But as per dependency modelling we need
 439         // to mark ROB dependencies of load and non load/store nodes which
 440         // are based on successful sending of the load as complete.
 441         if (node_ptr->isLoad() && !node_ptr->isStrictlyOrdered()) {
 442             // If execute succeeded mark its dependents as complete
 443             DPRINTF(TraceCPUData, "Node seq. num %lli sent. Waking up "
 444                     "dependents..\n", node_ptr->seqNum);
 445
 446             auto child_itr = (node_ptr->dependents).begin();
 447             while (child_itr != (node_ptr->dependents).end()) {
 448                 // ROB dependency of a store on a load must not be removed
 449                 // after load is sent but after response is received
 450                 if (!(*child_itr)->isStore() &&
 451                     (*child_itr)->removeRobDep(node_ptr->seqNum)) {
 452
 453                     // Check if the child node has become dependency free
 454                     if ((*child_itr)->numRobDep == 0 &&
 455                         (*child_itr)->numRegDep == 0) {
 456
 457                         // Source dependencies are complete, check if
 458                         // resources are available and issue
 459                         checkAndIssue(*child_itr);
 460                     }
 461                     // Remove this child for the sent load and point to new
 462                     // location of the element following the erased element
 463                     child_itr = node_ptr->dependents.erase(child_itr);
 464                 } else {
 465                     // This child is not dependency-free, point to the next
 466                     // child
 467                     child_itr++;
 468                 }
 469             }
 470         } else {
 471             // If it is a strictly ordered load mark its dependents as complete
 472             // as we do not send a request for this case. If it is a store or a
 473             // comp node we also mark all its dependents complete.
 474             DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking"
 475                     " up dependents..\n", node_ptr->seqNum);
 476
 477             for (auto child : node_ptr->dependents) {
 478                 // If the child node is dependency free removeDepOnInst()
 479                 // returns true.
 480                 if (child->removeDepOnInst(node_ptr->seqNum)) {
 481                     // Source dependencies are complete, check if resources
 482                     // are available and issue
 483                     checkAndIssue(child);
 484                 }
 485             }
 486         }
 487
 488         // After executing the node, remove from readyList and delete node.
 489         readyList.erase(free_itr);
 490         // If it is a cacheable load which was sent, don't delete
 491         // just yet.  Delete it in completeMemAccess() after the
 492         // response is received. If it is an strictly ordered
 493         // load, it was not sent and all dependencies were simply
 494         // marked complete. Thus it is safe to delete it. For
 495         // stores and non load/store nodes all dependencies were
 496         // marked complete so it is safe to delete it.
 497         if (!node_ptr->isLoad() || node_ptr->isStrictlyOrdered()) {
 498             // Release all resources occupied by the completed node
 499             hwResource.release(node_ptr);
 500             // clear the dynamically allocated set of dependents
 501             (node_ptr->dependents).clear();
 502             // Update the stat for numOps simulated
 503             owner.updateNumOps(node_ptr->robNum);
 504             // delete node
 505             delete node_ptr;
 506             // remove from graph
 507             depGraph.erase(graph_itr);
 508         }
 509         // Point to first node to continue to next iteration of while loop
 510         free_itr = readyList.begin();
 511     } // end of while loop
 512
 513     // Print readyList, sizes of queues and resource status after updating
 514     if (DTRACE(TraceCPUData)) {
 515         printReadyList();
 516         DPRINTF(TraceCPUData, "Execute end occupancy:\n");
 517         DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
 518                 "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
 519                 depFreeQueue.size());
 520         hwResource.printOccupancy();
 521     }
 522
 523     if (retryPkt) {
 524         DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry"
 525                 "event from the cache for seq. num %lli.\n",
 526                 retryPkt->req->getReqInstSeqNum());
 527         return;
 528     }
 529     // If the size of the dependency graph is less than the dependency window
 530     // then read from the trace file to populate the graph next time we are in
 531     // execute.
 532     if (depGraph.size() < windowSize && !traceComplete)
 533         nextRead = true;
 534
 535     // If cache is not blocked, schedule an event for the first execTick in
 536     // readyList else retry from cache will schedule the event. If the ready
 537     // list is empty then check if the next pending node has resources
 538     // available to issue. If yes, then schedule an event for the next cycle.
 539     if (!readyList.empty()) {
 540         Tick next_event_tick = std::max(readyList.begin()->execTick,
 541                                         curTick());
 542         DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
 543                 next_event_tick);
 544         owner.schedDcacheNextEvent(next_event_tick);
 545     } else if (readyList.empty() && !depFreeQueue.empty() &&
 546                 hwResource.isAvailable(depFreeQueue.front())) {
 547         DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
 548                 owner.clockEdge(Cycles(1)));
 549         owner.schedDcacheNextEvent(owner.clockEdge(Cycles(1)));
 550     }
 551
 552     // If trace is completely read, readyList is empty and depGraph is empty,
 553     // set execComplete to true
 554     if (depGraph.empty() && readyList.empty() && traceComplete &&
 555         !hwResource.awaitingResponse()) {
 556         DPRINTF(TraceCPUData, "\tExecution Complete!\n");
 557         execComplete = true;
 558         elasticStats.dataLastTick = curTick();
 559     }
 560 }
 561
 562 PacketPtr
 563 TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr)
 564 {
 565
 566     DPRINTF(TraceCPUData, "Executing memory request %lli (phys addr %d, "
 567             "virt addr %d, pc %#x, size %d, flags %d).\n",
 568             node_ptr->seqNum, node_ptr->physAddr, node_ptr->virtAddr,
 569             node_ptr->pc, node_ptr->size, node_ptr->flags);
 570
 571     // If the request is strictly ordered, do not send it. Just return nullptr
 572     // as if it was succesfully sent.
 573     if (node_ptr->isStrictlyOrdered()) {
 574         node_ptr->isLoad() ? ++elasticStats.numSOLoads :
 575              ++elasticStats.numSOStores;
 576         DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n",
 577                 node_ptr->seqNum);
 578         return nullptr;
 579     }
 580
 581     // Check if the request spans two cache lines as this condition triggers
 582     // an assert fail in the L1 cache. If it does then truncate the size to
 583     // access only until the end of that line and ignore the remainder. The
 584     // stat counting this is useful to keep a check on how frequently this
 585     // happens. If required the code could be revised to mimick splitting such
 586     // a request into two.
 587     unsigned blk_size = owner.cacheLineSize();
 588     Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1));
 589     if (!(blk_offset + node_ptr->size <= blk_size)) {
 590         node_ptr->size = blk_size - blk_offset;
 591         ++elasticStats.numSplitReqs;
 592     }
 593
 594     // Create a request and the packet containing request
 595     auto req = std::make_shared<Request>(
 596         node_ptr->physAddr, node_ptr->size, node_ptr->flags, masterID);
 597     req->setReqInstSeqNum(node_ptr->seqNum);
 598
 599     // If this is not done it triggers assert in L1 cache for invalid contextId
 600     req->setContext(ContextID(0));
 601
 602     req->setPC(node_ptr->pc);
 603     // If virtual address is valid, set the virtual address field
 604     // of the request.
 605     if (node_ptr->virtAddr != 0) {
 606         req->setVirt(node_ptr->virtAddr, node_ptr->size,
 607                      node_ptr->flags, masterID, node_ptr->pc);
 608         req->setPaddr(node_ptr->physAddr);
 609         req->setReqInstSeqNum(node_ptr->seqNum);
 610     }
 611
 612     PacketPtr pkt;
 613     uint8_t* pkt_data = new uint8_t[req->getSize()];
 614     if (node_ptr->isLoad()) {
 615         pkt = Packet::createRead(req);
 616     } else {
 617         pkt = Packet::createWrite(req);
 618         memset(pkt_data, 0xA, req->getSize());
 619     }
 620     pkt->dataDynamic(pkt_data);
 621
 622     // Call MasterPort method to send a timing request for this packet
 623     bool success = port.sendTimingReq(pkt);
 624     ++elasticStats.numSendAttempted;
 625
 626     if (!success) {
 627         // If it fails, return the packet to retry when a retry is signalled by
 628         // the cache
 629         ++elasticStats.numSendFailed;
 630         DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n");
 631         return pkt;
 632     } else {
 633         // It is succeeds, return nullptr
 634         ++elasticStats.numSendSucceeded;
 635         return nullptr;
 636     }
 637 }
 638
 639 bool
 640 TraceCPU::ElasticDataGen::checkAndIssue(const GraphNode* node_ptr, bool first)
 641 {
 642     // Assert the node is dependency-free
 643     assert(node_ptr->numRobDep == 0 && node_ptr->numRegDep == 0);
 644
 645     // If this is the first attempt, print a debug message to indicate this.
 646     if (first) {
 647         DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now"
 648             " dependency free.\n", node_ptr->seqNum, node_ptr->typeToStr(),
 649             node_ptr->robNum);
 650     }
 651
 652     // Check if resources are available to issue the specific node
 653     if (hwResource.isAvailable(node_ptr)) {
 654         // If resources are free only then add to readyList
 655         DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. Adding"
 656             " to readyList, occupying resources.\n", node_ptr->seqNum);
 657         // Compute the execute tick by adding the compute delay for the node
 658         // and add the ready node to the ready list
 659         addToSortedReadyList(node_ptr->seqNum,
 660                                 owner.clockEdge() + node_ptr->compDelay);
 661         // Account for the resources taken up by this issued node.
 662         hwResource.occupy(node_ptr);
 663         return true;
 664
 665     } else {
 666         if (first) {
 667             // Although dependencies are complete, resources are not available.
 668             DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli."
 669                 " Adding to depFreeQueue.\n", node_ptr->seqNum);
 670             depFreeQueue.push(node_ptr);
 671         } else {
 672             DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli. "
 673                 "Still pending issue.\n", node_ptr->seqNum);
 674         }
 675         return false;
 676     }
 677 }
 678
 679 void
 680 TraceCPU::ElasticDataGen::completeMemAccess(PacketPtr pkt)
 681 {
 682     // Release the resources for this completed node.
 683     if (pkt->isWrite()) {
 684         // Consider store complete.
 685         hwResource.releaseStoreBuffer();
 686         // If it is a store response then do nothing since we do not model
 687         // dependencies on store completion in the trace. But if we were
 688         // blocking execution due to store buffer fullness, we need to schedule
 689         // an event and attempt to progress.
 690     } else {
 691         // If it is a load response then release the dependents waiting on it.
 692         // Get pointer to the completed load
 693         auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum());
 694         assert(graph_itr != depGraph.end());
 695         GraphNode* node_ptr = graph_itr->second;
 696
 697         // Release resources occupied by the load
 698         hwResource.release(node_ptr);
 699
 700         DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up"
 701                 " dependents..\n", node_ptr->seqNum);
 702
 703         for (auto child : node_ptr->dependents) {
 704             if (child->removeDepOnInst(node_ptr->seqNum)) {
 705                 checkAndIssue(child);
 706             }
 707         }
 708
 709         // clear the dynamically allocated set of dependents
 710         (node_ptr->dependents).clear();
 711         // Update the stat for numOps completed
 712         owner.updateNumOps(node_ptr->robNum);
 713         // delete node
 714         delete node_ptr;
 715         // remove from graph
 716         depGraph.erase(graph_itr);
 717     }
 718
 719     if (DTRACE(TraceCPUData)) {
 720         printReadyList();
 721     }
 722
 723     // If the size of the dependency graph is less than the dependency window
 724     // then read from the trace file to populate the graph next time we are in
 725     // execute.
 726     if (depGraph.size() < windowSize && !traceComplete)
 727         nextRead = true;
 728
 729     // If not waiting for retry, attempt to schedule next event
 730     if (!retryPkt) {
 731         // We might have new dep-free nodes in the list which will have execute
 732         // tick greater than or equal to curTick. But a new dep-free node might
 733         // have its execute tick earlier. Therefore, attempt to reschedule. It
 734         // could happen that the readyList is empty and we got here via a
 735         // last remaining response. So, either the trace is complete or there
 736         // are pending nodes in the depFreeQueue. The checking is done in the
 737         // execute() control flow, so schedule an event to go via that flow.
 738         Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) :
 739             std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1)));
 740         DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
 741                 next_event_tick);
 742         owner.schedDcacheNextEvent(next_event_tick);
 743     }
 744 }
 745
 746 void
 747 TraceCPU::ElasticDataGen::addToSortedReadyList(NodeSeqNum seq_num,
 748                                                     Tick exec_tick)
 749 {
 750     ReadyNode ready_node;
 751     ready_node.seqNum = seq_num;
 752     ready_node.execTick = exec_tick;
 753
 754     // Iterator to readyList
 755     auto itr = readyList.begin();
 756
 757     // If the readyList is empty, simply insert the new node at the beginning
 758     // and return
 759     if (itr == readyList.end()) {
 760         readyList.insert(itr, ready_node);
 761         elasticStats.maxReadyListSize = std::max<double>(readyList.size(),
 762                                         elasticStats.maxReadyListSize.value());
 763         return;
 764     }
 765
 766     // If the new node has its execution tick equal to the first node in the
 767     // list then go to the next node. If the first node in the list failed
 768     // to execute, its position as the first is thus maintained.
 769     if (retryPkt)
 770         if (retryPkt->req->getReqInstSeqNum() == itr->seqNum)
 771             itr++;
 772
 773     // Increment the iterator and compare the node pointed to by it to the new
 774     // node till the position to insert the new node is found.
 775     bool found = false;
 776     while (!found && itr != readyList.end()) {
 777         // If the execution tick of the new node is less than the node then
 778         // this is the position to insert
 779         if (exec_tick < itr->execTick)
 780             found = true;
 781         // If the execution tick of the new node is equal to the node then
 782         // sort in ascending order of sequence numbers
 783         else if (exec_tick == itr->execTick) {
 784             // If the sequence number of the new node is less than the node
 785             // then this is the position to insert
 786             if (seq_num < itr->seqNum)
 787                 found = true;
 788             // Else go to next node
 789             else
 790                 itr++;
 791         }
 792         // If the execution tick of the new node is greater than the node then
 793         // go to the next node
 794         else
 795             itr++;
 796     }
 797     readyList.insert(itr, ready_node);
 798     // Update the stat for max size reached of the readyList
 799     elasticStats.maxReadyListSize = std::max<double>(readyList.size(),
 800                                         elasticStats.maxReadyListSize.value());
 801 }
 802
 803 void
 804 TraceCPU::ElasticDataGen::printReadyList() {
 805
 806     auto itr = readyList.begin();
 807     if (itr == readyList.end()) {
 808         DPRINTF(TraceCPUData, "readyList is empty.\n");
 809         return;
 810     }
 811     DPRINTF(TraceCPUData, "Printing readyList:\n");
 812     while (itr != readyList.end()) {
 813         auto graph_itr = depGraph.find(itr->seqNum);
 814         GraphNode* node_ptr M5_VAR_USED = graph_itr->second;
 815         DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum,
 816             node_ptr->typeToStr(), itr->execTick);
 817         itr++;
 818     }
 819 }
 820
 821 TraceCPU::ElasticDataGen::HardwareResource::HardwareResource(
 822     uint16_t max_rob, uint16_t max_stores, uint16_t max_loads)
 823   : sizeROB(max_rob),
 824     sizeStoreBuffer(max_stores),
 825     sizeLoadBuffer(max_loads),
 826     oldestInFlightRobNum(UINT64_MAX),
 827     numInFlightLoads(0),
 828     numInFlightStores(0)
 829 {}
 830
 831 void
 832 TraceCPU::ElasticDataGen::HardwareResource::occupy(const GraphNode* new_node)
 833 {
 834     // Occupy ROB entry for the issued node
 835     // Merely maintain the oldest node, i.e. numerically least robNum by saving
 836     // it in the variable oldestInFLightRobNum.
 837     inFlightNodes[new_node->seqNum] = new_node->robNum;
 838     oldestInFlightRobNum = inFlightNodes.begin()->second;
 839
 840     // Occupy Load/Store Buffer entry for the issued node if applicable
 841     if (new_node->isLoad()) {
 842         ++numInFlightLoads;
 843     } else if (new_node->isStore()) {
 844         ++numInFlightStores;
 845     } // else if it is a non load/store node, no buffer entry is occupied
 846
 847     printOccupancy();
 848 }
 849
 850 void
 851 TraceCPU::ElasticDataGen::HardwareResource::release(const GraphNode* done_node)
 852 {
 853     assert(!inFlightNodes.empty());
 854     DPRINTFR(TraceCPUData, "\tClearing done seq. num %d from inFlightNodes..\n",
 855         done_node->seqNum);
 856
 857     assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end());
 858     inFlightNodes.erase(done_node->seqNum);
 859
 860     if (inFlightNodes.empty()) {
 861         // If we delete the only in-flight node and then the
 862         // oldestInFlightRobNum is set to it's initialized (max) value.
 863         oldestInFlightRobNum = UINT64_MAX;
 864     } else {
 865         // Set the oldest in-flight node rob number equal to the first node in
 866         // the inFlightNodes since that will have the numerically least value.
 867         oldestInFlightRobNum = inFlightNodes.begin()->second;
 868     }
 869
 870     DPRINTFR(TraceCPUData, "\tCleared. inFlightNodes.size() = %d, "
 871         "oldestInFlightRobNum = %d\n", inFlightNodes.size(),
 872         oldestInFlightRobNum);
 873
 874     // A store is considered complete when a request is sent, thus ROB entry is
 875     // freed. But it occupies an entry in the Store Buffer until its response
 876     // is received. A load is considered complete when a response is received,
 877     // thus both ROB and Load Buffer entries can be released.
 878     if (done_node->isLoad()) {
 879         assert(numInFlightLoads != 0);
 880         --numInFlightLoads;
 881     }
 882     // For normal writes, we send the requests out and clear a store buffer
 883     // entry on response. For writes which are strictly ordered, for e.g.
 884     // writes to device registers, we do that within release() which is called
 885     // when node is executed and taken off from readyList.
 886     if (done_node->isStore() && done_node->isStrictlyOrdered()) {
 887         releaseStoreBuffer();
 888     }
 889 }
 890
 891 void
 892 TraceCPU::ElasticDataGen::HardwareResource::releaseStoreBuffer()
 893 {
 894     assert(numInFlightStores != 0);
 895     --numInFlightStores;
 896 }
 897
 898 bool
 899 TraceCPU::ElasticDataGen::HardwareResource::isAvailable(
 900     const GraphNode* new_node) const
 901 {
 902     uint16_t num_in_flight_nodes;
 903     if (inFlightNodes.empty()) {
 904         num_in_flight_nodes = 0;
 905         DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
 906             " #in-flight nodes = 0", new_node->seqNum);
 907     } else if (new_node->robNum > oldestInFlightRobNum) {
 908         // This is the intuitive case where new dep-free node is younger
 909         // instruction than the oldest instruction in-flight. Thus we make sure
 910         // in_flight_nodes does not overflow.
 911         num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum;
 912         DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
 913             " #in-flight nodes = %d - %d =  %d", new_node->seqNum,
 914              new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes);
 915     } else {
 916         // This is the case where an instruction older than the oldest in-
 917         // flight instruction becomes dep-free. Thus we must have already
 918         // accounted for the entry in ROB for this new dep-free node.
 919         // Immediately after this check returns true, oldestInFlightRobNum will
 920         // be updated in occupy(). We simply let this node issue now.
 921         num_in_flight_nodes = 0;
 922         DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
 923             " new oldestInFlightRobNum = %d, #in-flight nodes ignored",
 924             new_node->seqNum, new_node->robNum);
 925     }
 926     DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ  = %d/%d.\n",
 927         numInFlightLoads, sizeLoadBuffer,
 928         numInFlightStores, sizeStoreBuffer);
 929     // Check if resources are available to issue the specific node
 930     if (num_in_flight_nodes >= sizeROB) {
 931         return false;
 932     }
 933     if (new_node->isLoad() && numInFlightLoads >= sizeLoadBuffer) {
 934         return false;
 935     }
 936     if (new_node->isStore() && numInFlightStores >= sizeStoreBuffer) {
 937         return false;
 938     }
 939     return true;
 940 }
 941
 942 bool
 943 TraceCPU::ElasticDataGen::HardwareResource::awaitingResponse() const {
 944     // Return true if there is at least one read or write request in flight
 945     return (numInFlightStores != 0 || numInFlightLoads != 0);
 946 }
 947
 948 void
 949 TraceCPU::ElasticDataGen::HardwareResource::printOccupancy() {
 950     DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, "
 951             "LQ = %d/%d, SQ  = %d/%d.\n",
 952             oldestInFlightRobNum,
 953             numInFlightLoads, sizeLoadBuffer,
 954             numInFlightStores, sizeStoreBuffer);
 955 }
 956 TraceCPU::FixedRetryGen::
 957 FixedRetryGenStatGroup::FixedRetryGenStatGroup(Stats::Group *parent,
 958                                                const std::string& _name)
 959     : Stats::Group(parent, _name.c_str()),
 960     ADD_STAT(numSendAttempted, "Number of first attempts to send a request"),
 961     ADD_STAT(numSendSucceeded, "Number of successful first attempts"),
 962     ADD_STAT(numSendFailed, "Number of failed first attempts"),
 963     ADD_STAT(numRetrySucceeded, "Number of successful retries"),
 964     ADD_STAT(instLastTick, "Last tick simulated from the fixed inst trace")
 965 {
 966
 967 }
 968
 969 Tick
 970 TraceCPU::FixedRetryGen::init()
 971 {
 972     DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator"
 973             " IcacheGen: fixed issue with retry.\n");
 974
 975     if (nextExecute()) {
 976         DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick);
 977         return currElement.tick;
 978     } else {
 979         panic("Read of first message in the trace failed.\n");
 980         return MaxTick;
 981     }
 982 }
 983
 984 bool
 985 TraceCPU::FixedRetryGen::tryNext()
 986 {
 987     // If there is a retry packet, try to send it
 988     if (retryPkt) {
 989
 990         DPRINTF(TraceCPUInst, "Trying to send retry packet.\n");
 991
 992         if (!port.sendTimingReq(retryPkt)) {
 993             // Still blocked! This should never occur.
 994             DPRINTF(TraceCPUInst, "Retry packet sending failed.\n");
 995             return false;
 996         }
 997         ++fixedStats.numRetrySucceeded;
 998     } else {
 999
1000         DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n");
1001
1002         // try sending current element
1003         assert(currElement.isValid());
1004
1005         ++fixedStats.numSendAttempted;
1006
1007         if (!send(currElement.addr, currElement.blocksize,
1008                     currElement.cmd, currElement.flags, currElement.pc)) {
1009             DPRINTF(TraceCPUInst, "currElement sending failed.\n");
1010             ++fixedStats.numSendFailed;
1011             // return false to indicate not to schedule next event
1012             return false;
1013         } else {
1014             ++fixedStats.numSendSucceeded;
1015         }
1016     }
1017     // If packet was sent successfully, either retryPkt or currElement, return
1018     // true to indicate to schedule event at current Tick plus delta. If packet
1019     // was sent successfully and there is no next packet to send, return false.
1020     DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next "
1021         "element.\n");
1022     retryPkt = nullptr;
1023     // Read next element into currElement, currElement gets cleared so save the
1024     // tick to calculate delta
1025     Tick last_tick = currElement.tick;
1026     if (nextExecute()) {
1027         assert(currElement.tick >= last_tick);
1028         delta = currElement.tick - last_tick;
1029     }
1030     return !traceComplete;
1031 }
1032
1033 void
1034 TraceCPU::FixedRetryGen::exit()
1035 {
1036     trace.reset();
1037 }
1038
1039 bool
1040 TraceCPU::FixedRetryGen::nextExecute()
1041 {
1042     if (traceComplete)
1043         // We are at the end of the file, thus we have no more messages.
1044         // Return false.
1045         return false;
1046
1047
1048     //Reset the currElement to the default values
1049     currElement.clear();
1050
1051     // Read the next line to get the next message. If that fails then end of
1052     // trace has been reached and traceComplete needs to be set in addition
1053     // to returning false. If successful then next message is in currElement.
1054     if (!trace.read(&currElement)) {
1055         traceComplete = true;
1056         fixedStats.instLastTick = curTick();
1057         return false;
1058     }
1059
1060     DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n",
1061             currElement.cmd.isRead() ? 'r' : 'w',
1062             currElement.addr,
1063             currElement.pc,
1064             currElement.blocksize,
1065             currElement.tick);
1066
1067     return true;
1068 }
1069
1070 bool
1071 TraceCPU::FixedRetryGen::send(Addr addr, unsigned size, const MemCmd& cmd,
1072               Request::FlagsType flags, Addr pc)
1073 {
1074
1075     // Create new request
1076     auto req = std::make_shared<Request>(addr, size, flags, masterID);
1077     req->setPC(pc);
1078
1079     // If this is not done it triggers assert in L1 cache for invalid contextId
1080     req->setContext(ContextID(0));
1081
1082     // Embed it in a packet
1083     PacketPtr pkt = new Packet(req, cmd);
1084
1085     uint8_t* pkt_data = new uint8_t[req->getSize()];
1086     pkt->dataDynamic(pkt_data);
1087
1088     if (cmd.isWrite()) {
1089         memset(pkt_data, 0xA, req->getSize());
1090     }
1091
1092     // Call MasterPort method to send a timing request for this packet
1093     bool success = port.sendTimingReq(pkt);
1094     if (!success) {
1095         // If it fails, save the packet to retry when a retry is signalled by
1096         // the cache
1097         retryPkt = pkt;
1098     }
1099     return success;
1100 }
1101
1102 void
1103 TraceCPU::icacheRetryRecvd()
1104 {
1105     // Schedule an event to go through the control flow in the same tick as
1106     // retry is received
1107     DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen"
1108             " event @%lli.\n", curTick());
1109     schedule(icacheNextEvent, curTick());
1110 }
1111
1112 void
1113 TraceCPU::dcacheRetryRecvd()
1114 {
1115     // Schedule an event to go through the execute flow in the same tick as
1116     // retry is received
1117     DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen"
1118             " event @%lli.\n", curTick());
1119     schedule(dcacheNextEvent, curTick());
1120 }
1121
1122 void
1123 TraceCPU::schedDcacheNextEvent(Tick when)
1124 {
1125     if (!dcacheNextEvent.scheduled()) {
1126         DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n",
1127                 when);
1128         schedule(dcacheNextEvent, when);
1129         ++traceStats.numSchedDcacheEvent;
1130     } else if (when < dcacheNextEvent.when()) {
1131         DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli"
1132                 " to %lli.\n", dcacheNextEvent.when(), when);
1133         reschedule(dcacheNextEvent, when);
1134     }
1135
1136 }
1137
1138 bool
1139 TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt)
1140 {
1141     // All responses on the instruction fetch side are ignored. Simply delete
1142     // the packet to free allocated memory
1143     delete pkt;
1144
1145     return true;
1146 }
1147
1148 void
1149 TraceCPU::IcachePort::recvReqRetry()
1150 {
1151     owner->icacheRetryRecvd();
1152 }
1153
1154 void
1155 TraceCPU::dcacheRecvTimingResp(PacketPtr pkt)
1156 {
1157     DPRINTF(TraceCPUData, "Received timing response from Dcache.\n");
1158     dcacheGen.completeMemAccess(pkt);
1159 }
1160
1161 bool
1162 TraceCPU::DcachePort::recvTimingResp(PacketPtr pkt)
1163 {
1164     // Handle the responses for data memory requests which is done inside the
1165     // elastic data generator
1166     owner->dcacheRecvTimingResp(pkt);
1167     // After processing the response delete the packet to free
1168     // memory
1169     delete pkt;
1170
1171     return true;
1172 }
1173
1174 void
1175 TraceCPU::DcachePort::recvReqRetry()
1176 {
1177     owner->dcacheRetryRecvd();
1178 }
1179
1180 TraceCPU::ElasticDataGen::InputStream::InputStream(
1181     const std::string& filename,
1182     const double time_multiplier)
1183     : trace(filename),
1184       timeMultiplier(time_multiplier),
1185       microOpCount(0)
1186 {
1187     // Create a protobuf message for the header and read it from the stream
1188     ProtoMessage::InstDepRecordHeader header_msg;
1189     if (!trace.read(header_msg)) {
1190         panic("Failed to read packet header from %s\n", filename);
1191
1192         if (header_msg.tick_freq() != SimClock::Frequency) {
1193             panic("Trace %s was recorded with a different tick frequency %d\n",
1194                   header_msg.tick_freq());
1195         }
1196     } else {
1197         // Assign window size equal to the field in the trace that was recorded
1198         // when the data dependency trace was captured in the o3cpu model
1199         windowSize = header_msg.window_size();
1200     }
1201 }
1202
1203 void
1204 TraceCPU::ElasticDataGen::InputStream::reset()
1205 {
1206     trace.reset();
1207 }
1208
1209 bool
1210 TraceCPU::ElasticDataGen::InputStream::read(GraphNode* element)
1211 {
1212     ProtoMessage::InstDepRecord pkt_msg;
1213     if (trace.read(pkt_msg)) {
1214         // Required fields
1215         element->seqNum = pkt_msg.seq_num();
1216         element->type = pkt_msg.type();
1217         // Scale the compute delay to effectively scale the Trace CPU frequency
1218         element->compDelay = pkt_msg.comp_delay() * timeMultiplier;
1219
1220         // Repeated field robDepList
1221         element->clearRobDep();
1222         assert((pkt_msg.rob_dep()).size() <= element->maxRobDep);
1223         for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) {
1224             element->robDep[element->numRobDep] = pkt_msg.rob_dep(i);
1225             element->numRobDep += 1;
1226         }
1227
1228         // Repeated field
1229         element->clearRegDep();
1230         assert((pkt_msg.reg_dep()).size() <= TheISA::MaxInstSrcRegs);
1231         for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) {
1232             // There is a possibility that an instruction has both, a register
1233             // and order dependency on an instruction. In such a case, the
1234             // register dependency is omitted
1235             bool duplicate = false;
1236             for (int j = 0; j < element->numRobDep; j++) {
1237                 duplicate |= (pkt_msg.reg_dep(i) == element->robDep[j]);
1238             }
1239             if (!duplicate) {
1240                 element->regDep[element->numRegDep] = pkt_msg.reg_dep(i);
1241                 element->numRegDep += 1;
1242             }
1243         }
1244
1245         // Optional fields
1246         if (pkt_msg.has_p_addr())
1247             element->physAddr = pkt_msg.p_addr();
1248         else
1249             element->physAddr = 0;
1250
1251         if (pkt_msg.has_v_addr())
1252             element->virtAddr = pkt_msg.v_addr();
1253         else
1254             element->virtAddr = 0;
1255
1256         if (pkt_msg.has_size())
1257             element->size = pkt_msg.size();
1258         else
1259             element->size = 0;
1260
1261         if (pkt_msg.has_flags())
1262             element->flags = pkt_msg.flags();
1263         else
1264             element->flags = 0;
1265
1266         if (pkt_msg.has_pc())
1267             element->pc = pkt_msg.pc();
1268         else
1269             element->pc = 0;
1270
1271         // ROB occupancy number
1272         ++microOpCount;
1273         if (pkt_msg.has_weight()) {
1274             microOpCount += pkt_msg.weight();
1275         }
1276         element->robNum = microOpCount;
1277         return true;
1278     }
1279
1280     // We have reached the end of the file
1281     return false;
1282 }
1283
1284 bool
1285 TraceCPU::ElasticDataGen::GraphNode::removeRegDep(NodeSeqNum reg_dep)
1286 {
1287     for (auto& own_reg_dep : regDep) {
1288         if (own_reg_dep == reg_dep) {
1289             // If register dependency is found, make it zero and return true
1290             own_reg_dep = 0;
1291             assert(numRegDep > 0);
1292             --numRegDep;
1293             DPRINTFR(TraceCPUData, "\tFor %lli: Marking register dependency %lli "
1294                     "done.\n", seqNum, reg_dep);
1295             return true;
1296         }
1297     }
1298
1299     // Return false if the dependency is not found
1300     return false;
1301 }
1302
1303 bool
1304 TraceCPU::ElasticDataGen::GraphNode::removeRobDep(NodeSeqNum rob_dep)
1305 {
1306     for (auto& own_rob_dep : robDep) {
1307         if (own_rob_dep == rob_dep) {
1308             // If the rob dependency is found, make it zero and return true
1309             own_rob_dep = 0;
1310             assert(numRobDep > 0);
1311             --numRobDep;
1312             DPRINTFR(TraceCPUData, "\tFor %lli: Marking ROB dependency %lli "
1313                 "done.\n", seqNum, rob_dep);
1314             return true;
1315         }
1316     }
1317     return false;
1318 }
1319
1320 void
1321 TraceCPU::ElasticDataGen::GraphNode::clearRegDep() {
1322     for (auto& own_reg_dep : regDep) {
1323         own_reg_dep = 0;
1324     }
1325     numRegDep = 0;
1326 }
1327
1328 void
1329 TraceCPU::ElasticDataGen::GraphNode::clearRobDep() {
1330     for (auto& own_rob_dep : robDep) {
1331         own_rob_dep = 0;
1332     }
1333     numRobDep = 0;
1334 }
1335
1336 bool
1337 TraceCPU::ElasticDataGen::GraphNode::removeDepOnInst(NodeSeqNum done_seq_num)
1338 {
1339     // If it is an rob dependency then remove it
1340     if (!removeRobDep(done_seq_num)) {
1341         // If it is not an rob dependency then it must be a register dependency
1342         // If the register dependency is not found, it violates an assumption
1343         // and must be caught by assert.
1344         bool regdep_found M5_VAR_USED = removeRegDep(done_seq_num);
1345         assert(regdep_found);
1346     }
1347     // Return true if the node is dependency free
1348     return (numRobDep == 0 && numRegDep == 0);
1349 }
1350
1351 void
1352 TraceCPU::ElasticDataGen::GraphNode::writeElementAsTrace() const
1353 {
1354     DPRINTFR(TraceCPUData, "%lli", seqNum);
1355     DPRINTFR(TraceCPUData, ",%s", typeToStr());
1356     if (isLoad() || isStore()) {
1357         DPRINTFR(TraceCPUData, ",%i", physAddr);
1358         DPRINTFR(TraceCPUData, ",%i", size);
1359         DPRINTFR(TraceCPUData, ",%i", flags);
1360     }
1361     DPRINTFR(TraceCPUData, ",%lli", compDelay);
1362     int i = 0;
1363     DPRINTFR(TraceCPUData, "robDep:");
1364     while (robDep[i] != 0) {
1365         DPRINTFR(TraceCPUData, ",%lli", robDep[i]);
1366         i++;
1367     }
1368     i = 0;
1369     DPRINTFR(TraceCPUData, "regDep:");
1370     while (regDep[i] != 0) {
1371         DPRINTFR(TraceCPUData, ",%lli", regDep[i]);
1372         i++;
1373     }
1374     auto child_itr = dependents.begin();
1375     DPRINTFR(TraceCPUData, "dependents:");
1376     while (child_itr != dependents.end()) {
1377         DPRINTFR(TraceCPUData, ":%lli", (*child_itr)->seqNum);
1378         child_itr++;
1379     }
1380
1381     DPRINTFR(TraceCPUData, "\n");
1382 }
1383
1384 std::string
1385 TraceCPU::ElasticDataGen::GraphNode::typeToStr() const
1386 {
1387     return Record::RecordType_Name(type);
1388 }
1389
1390 TraceCPU::FixedRetryGen::InputStream::InputStream(const std::string& filename)
1391     : trace(filename)
1392 {
1393     // Create a protobuf message for the header and read it from the stream
1394     ProtoMessage::PacketHeader header_msg;
1395     if (!trace.read(header_msg)) {
1396         panic("Failed to read packet header from %s\n", filename);
1397
1398         if (header_msg.tick_freq() != SimClock::Frequency) {
1399             panic("Trace %s was recorded with a different tick frequency %d\n",
1400                   header_msg.tick_freq());
1401         }
1402     }
1403 }
1404
1405 void
1406 TraceCPU::FixedRetryGen::InputStream::reset()
1407 {
1408     trace.reset();
1409 }
1410
1411 bool
1412 TraceCPU::FixedRetryGen::InputStream::read(TraceElement* element)
1413 {
1414     ProtoMessage::Packet pkt_msg;
1415     if (trace.read(pkt_msg)) {
1416         element->cmd = pkt_msg.cmd();
1417         element->addr = pkt_msg.addr();
1418         element->blocksize = pkt_msg.size();
1419         element->tick = pkt_msg.tick();
1420         element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0;
1421         element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0;
1422         return true;
1423     }
1424
1425     // We have reached the end of the file
1426     return false;
1427 }