80db94c4cc8e16b0c1d76ad3b0eb63d338dbd6d1
[gem5.git] / src / cpu / trace / trace_cpu.cc
1 /*
2 * Copyright (c) 2013 - 2016 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 #include "cpu/trace/trace_cpu.hh"
39
40 #include "sim/sim_exit.hh"
41
42 // Declare and initialize the static counter for number of trace CPUs.
43 int TraceCPU::numTraceCPUs = 0;
44
45 TraceCPU::TraceCPU(TraceCPUParams *params)
46 : BaseCPU(params),
47 icachePort(this),
48 dcachePort(this),
49 instMasterID(params->system->getMasterId(this, "inst")),
50 dataMasterID(params->system->getMasterId(this, "data")),
51 instTraceFile(params->instTraceFile),
52 dataTraceFile(params->dataTraceFile),
53 icacheGen(*this, "iside", icachePort, instMasterID, instTraceFile),
54 dcacheGen(*this, "dside", dcachePort, dataMasterID, dataTraceFile,
55 params),
56 icacheNextEvent([this]{ schedIcacheNext(); }, name()),
57 dcacheNextEvent([this]{ schedDcacheNext(); }, name()),
58 oneTraceComplete(false),
59 traceOffset(0),
60 execCompleteEvent(nullptr),
61 enableEarlyExit(params->enableEarlyExit),
62 progressMsgInterval(params->progressMsgInterval),
63 progressMsgThreshold(params->progressMsgInterval), traceStats(this)
64 {
65 // Increment static counter for number of Trace CPUs.
66 ++TraceCPU::numTraceCPUs;
67
68 // Check that the python parameters for sizes of ROB, store buffer and
69 // load buffer do not overflow the corresponding C++ variables.
70 fatal_if(params->sizeROB > UINT16_MAX, "ROB size set to %d exceeds the "
71 "max. value of %d.\n", params->sizeROB, UINT16_MAX);
72 fatal_if(params->sizeStoreBuffer > UINT16_MAX, "ROB size set to %d "
73 "exceeds the max. value of %d.\n", params->sizeROB,
74 UINT16_MAX);
75 fatal_if(params->sizeLoadBuffer > UINT16_MAX, "Load buffer size set to"
76 " %d exceeds the max. value of %d.\n",
77 params->sizeLoadBuffer, UINT16_MAX);
78 }
79
80 TraceCPU::~TraceCPU()
81 {
82
83 }
84
85 TraceCPU*
86 TraceCPUParams::create()
87 {
88 return new TraceCPU(this);
89 }
90
91 void
92 TraceCPU::updateNumOps(uint64_t rob_num)
93 {
94 traceStats.numOps = rob_num;
95 if (progressMsgInterval != 0 &&
96 traceStats.numOps.value() >= progressMsgThreshold) {
97 inform("%s: %i insts committed\n", name(), progressMsgThreshold);
98 progressMsgThreshold += progressMsgInterval;
99 }
100 }
101
102 void
103 TraceCPU::takeOverFrom(BaseCPU *oldCPU)
104 {
105 // Unbind the ports of the old CPU and bind the ports of the TraceCPU.
106 getInstPort().takeOverFrom(&oldCPU->getInstPort());
107 getDataPort().takeOverFrom(&oldCPU->getDataPort());
108 }
109
110 void
111 TraceCPU::init()
112 {
113 DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\"."
114 "\n", instTraceFile);
115 DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n",
116 dataTraceFile);
117
118 BaseCPU::init();
119
120 // Get the send tick of the first instruction read request
121 Tick first_icache_tick = icacheGen.init();
122
123 // Get the send tick of the first data read/write request
124 Tick first_dcache_tick = dcacheGen.init();
125
126 // Set the trace offset as the minimum of that in both traces
127 traceOffset = std::min(first_icache_tick, first_dcache_tick);
128 inform("%s: Time offset (tick) found as min of both traces is %lli.\n",
129 name(), traceOffset);
130
131 // Schedule next icache and dcache event by subtracting the offset
132 schedule(icacheNextEvent, first_icache_tick - traceOffset);
133 schedule(dcacheNextEvent, first_dcache_tick - traceOffset);
134
135 // Adjust the trace offset for the dcache generator's ready nodes
136 // We don't need to do this for the icache generator as it will
137 // send its first request at the first event and schedule subsequent
138 // events using a relative tick delta
139 dcacheGen.adjustInitTraceOffset(traceOffset);
140
141 // If the Trace CPU simulation is configured to exit on any one trace
142 // completion then we don't need a counted event to count down all Trace
143 // CPUs in the system. If not then instantiate a counted event.
144 if (!enableEarlyExit) {
145 // The static counter for number of Trace CPUs is correctly set at
146 // this point so create an event and pass it.
147 execCompleteEvent = new CountedExitEvent("end of all traces reached.",
148 numTraceCPUs);
149 }
150
151 }
152
153 void
154 TraceCPU::schedIcacheNext()
155 {
156 DPRINTF(TraceCPUInst, "IcacheGen event.\n");
157
158 // Try to send the current packet or a retry packet if there is one
159 bool sched_next = icacheGen.tryNext();
160 // If packet sent successfully, schedule next event
161 if (sched_next) {
162 DPRINTF(TraceCPUInst, "Scheduling next icacheGen event "
163 "at %d.\n", curTick() + icacheGen.tickDelta());
164 schedule(icacheNextEvent, curTick() + icacheGen.tickDelta());
165 ++traceStats.numSchedIcacheEvent;
166 } else {
167 // check if traceComplete. If not, do nothing because sending failed
168 // and next event will be scheduled via RecvRetry()
169 if (icacheGen.isTraceComplete()) {
170 // If this is the first trace to complete, set the variable. If it
171 // is already set then both traces are complete to exit sim.
172 checkAndSchedExitEvent();
173 }
174 }
175 return;
176 }
177
178 void
179 TraceCPU::schedDcacheNext()
180 {
181 DPRINTF(TraceCPUData, "DcacheGen event.\n");
182
183 // Update stat for numCycles
184 numCycles = clockEdge() / clockPeriod();
185
186 dcacheGen.execute();
187 if (dcacheGen.isExecComplete()) {
188 checkAndSchedExitEvent();
189 }
190 }
191
192 void
193 TraceCPU::checkAndSchedExitEvent()
194 {
195 if (!oneTraceComplete) {
196 oneTraceComplete = true;
197 } else {
198 // Schedule event to indicate execution is complete as both
199 // instruction and data access traces have been played back.
200 inform("%s: Execution complete.\n", name());
201 // If the replay is configured to exit early, that is when any one
202 // execution is complete then exit immediately and return. Otherwise,
203 // schedule the counted exit that counts down completion of each Trace
204 // CPU.
205 if (enableEarlyExit) {
206 exitSimLoop("End of trace reached");
207 } else {
208 schedule(*execCompleteEvent, curTick());
209 }
210 }
211 }
212 TraceCPU::TraceStats::TraceStats(TraceCPU *trace)
213 : Stats::Group(trace),
214 ADD_STAT(numSchedDcacheEvent,
215 "Number of events scheduled to trigger data request generator"),
216 ADD_STAT(numSchedIcacheEvent,
217 "Number of events scheduled to trigger instruction request generator"),
218 ADD_STAT(numOps, "Number of micro-ops simulated by the Trace CPU"),
219 ADD_STAT(cpi, "Cycles per micro-op used as a proxy for CPI",
220 trace->numCycles / numOps)
221 {
222 cpi.precision(6);
223 }
224 TraceCPU::ElasticDataGen::
225 ElasticDataGenStatGroup::ElasticDataGenStatGroup(Stats::Group *parent,
226 const std::string& _name)
227 : Stats::Group(parent, _name.c_str()),
228 ADD_STAT(maxDependents, "Max number of dependents observed on a node"),
229 ADD_STAT(maxReadyListSize, "Max size of the ready list observed"),
230 ADD_STAT(numSendAttempted, "Number of first attempts to send a request"),
231 ADD_STAT(numSendSucceeded, "Number of successful first attempts"),
232 ADD_STAT(numSendFailed, "Number of failed first attempts"),
233 ADD_STAT(numRetrySucceeded, "Number of successful retries"),
234 ADD_STAT(numSplitReqs, "Number of split requests"),
235 ADD_STAT(numSOLoads, "Number of strictly ordered loads"),
236 ADD_STAT(numSOStores, "Number of strictly ordered stores"),
237 ADD_STAT(dataLastTick, "Last tick simulated from the elastic data trace")
238 {
239 }
240
241 Tick
242 TraceCPU::ElasticDataGen::init()
243 {
244 DPRINTF(TraceCPUData, "Initializing data memory request generator "
245 "DcacheGen: elastic issue with retry.\n");
246
247 if (!readNextWindow())
248 panic("Trace has %d elements. It must have at least %d elements.\n",
249 depGraph.size(), 2 * windowSize);
250 DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n",
251 depGraph.size());
252
253 if (!readNextWindow())
254 panic("Trace has %d elements. It must have at least %d elements.\n",
255 depGraph.size(), 2 * windowSize);
256 DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n",
257 depGraph.size());
258
259 // Print readyList
260 if (DTRACE(TraceCPUData)) {
261 printReadyList();
262 }
263 auto free_itr = readyList.begin();
264 DPRINTF(TraceCPUData, "Execute tick of the first dependency free node %lli"
265 " is %d.\n", free_itr->seqNum, free_itr->execTick);
266 // Return the execute tick of the earliest ready node so that an event
267 // can be scheduled to call execute()
268 return (free_itr->execTick);
269 }
270
271 void
272 TraceCPU::ElasticDataGen::adjustInitTraceOffset(Tick& offset) {
273 for (auto& free_node : readyList) {
274 free_node.execTick -= offset;
275 }
276 }
277
278 void
279 TraceCPU::ElasticDataGen::exit()
280 {
281 trace.reset();
282 }
283
284 bool
285 TraceCPU::ElasticDataGen::readNextWindow()
286 {
287
288 // Read and add next window
289 DPRINTF(TraceCPUData, "Reading next window from file.\n");
290
291 if (traceComplete) {
292 // We are at the end of the file, thus we have no more records.
293 // Return false.
294 return false;
295 }
296
297 DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n",
298 depGraph.size());
299
300 uint32_t num_read = 0;
301 while (num_read != windowSize) {
302
303 // Create a new graph node
304 GraphNode* new_node = new GraphNode;
305
306 // Read the next line to get the next record. If that fails then end of
307 // trace has been reached and traceComplete needs to be set in addition
308 // to returning false.
309 if (!trace.read(new_node)) {
310 DPRINTF(TraceCPUData, "\tTrace complete!\n");
311 traceComplete = true;
312 return false;
313 }
314
315 // Annotate the ROB dependencies of the new node onto the parent nodes.
316 addDepsOnParent(new_node, new_node->robDep, new_node->numRobDep);
317 // Annotate the register dependencies of the new node onto the parent
318 // nodes.
319 addDepsOnParent(new_node, new_node->regDep, new_node->numRegDep);
320
321 num_read++;
322 // Add to map
323 depGraph[new_node->seqNum] = new_node;
324 if (new_node->numRobDep == 0 && new_node->numRegDep == 0) {
325 // Source dependencies are already complete, check if resources
326 // are available and issue. The execution time is approximated
327 // to current time plus the computational delay.
328 checkAndIssue(new_node);
329 }
330 }
331
332 DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n",
333 depGraph.size());
334 return true;
335 }
336
337 template<typename T> void
338 TraceCPU::ElasticDataGen::addDepsOnParent(GraphNode *new_node,
339 T& dep_array, uint8_t& num_dep)
340 {
341 for (auto& a_dep : dep_array) {
342 // The convention is to set the dependencies starting with the first
343 // index in the ROB and register dependency arrays. Thus, when we reach
344 // a dependency equal to the initialisation value of zero, we know have
345 // iterated over all dependencies and can break.
346 if (a_dep == 0)
347 break;
348 // We look up the valid dependency, i.e. the parent of this node
349 auto parent_itr = depGraph.find(a_dep);
350 if (parent_itr != depGraph.end()) {
351 // If the parent is found, it is yet to be executed. Append a
352 // pointer to the new node to the dependents list of the parent
353 // node.
354 parent_itr->second->dependents.push_back(new_node);
355 auto num_depts = parent_itr->second->dependents.size();
356 elasticStats.maxDependents = std::max<double>(num_depts,
357 elasticStats.maxDependents.value());
358 } else {
359 // The dependency is not found in the graph. So consider
360 // the execution of the parent is complete, i.e. remove this
361 // dependency.
362 a_dep = 0;
363 num_dep--;
364 }
365 }
366 }
367
368 void
369 TraceCPU::ElasticDataGen::execute()
370 {
371 DPRINTF(TraceCPUData, "Execute start occupancy:\n");
372 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
373 "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
374 depFreeQueue.size());
375 hwResource.printOccupancy();
376
377 // Read next window to make sure that dependents of all dep-free nodes
378 // are in the depGraph
379 if (nextRead) {
380 readNextWindow();
381 nextRead = false;
382 }
383
384 // First attempt to issue the pending dependency-free nodes held
385 // in depFreeQueue. If resources have become available for a node,
386 // then issue it, i.e. add the node to readyList.
387 while (!depFreeQueue.empty()) {
388 if (checkAndIssue(depFreeQueue.front(), false)) {
389 DPRINTF(TraceCPUData, "Removing from depFreeQueue: seq. num "
390 "%lli.\n", (depFreeQueue.front())->seqNum);
391 depFreeQueue.pop();
392 } else {
393 break;
394 }
395 }
396 // Proceed to execute from readyList
397 auto graph_itr = depGraph.begin();
398 auto free_itr = readyList.begin();
399 // Iterate through readyList until the next free node has its execute
400 // tick later than curTick or the end of readyList is reached
401 while (free_itr->execTick <= curTick() && free_itr != readyList.end()) {
402
403 // Get pointer to the node to be executed
404 graph_itr = depGraph.find(free_itr->seqNum);
405 assert(graph_itr != depGraph.end());
406 GraphNode* node_ptr = graph_itr->second;
407
408 // If there is a retryPkt send that else execute the load
409 if (retryPkt) {
410 // The retryPkt must be the request that was created by the
411 // first node in the readyList.
412 if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) {
413 panic("Retry packet's seqence number does not match "
414 "the first node in the readyList.\n");
415 }
416 if (port.sendTimingReq(retryPkt)) {
417 ++elasticStats.numRetrySucceeded;
418 retryPkt = nullptr;
419 }
420 } else if (node_ptr->isLoad() || node_ptr->isStore()) {
421 // If there is no retryPkt, attempt to send a memory request in
422 // case of a load or store node. If the send fails, executeMemReq()
423 // returns a packet pointer, which we save in retryPkt. In case of
424 // a comp node we don't do anything and simply continue as if the
425 // execution of the comp node succedded.
426 retryPkt = executeMemReq(node_ptr);
427 }
428 // If the retryPkt or a new load/store node failed, we exit from here
429 // as a retry from cache will bring the control to execute(). The
430 // first node in readyList then, will be the failed node.
431 if (retryPkt) {
432 break;
433 }
434
435 // Proceed to remove dependencies for the successfully executed node.
436 // If it is a load which is not strictly ordered and we sent a
437 // request for it successfully, we do not yet mark any register
438 // dependencies complete. But as per dependency modelling we need
439 // to mark ROB dependencies of load and non load/store nodes which
440 // are based on successful sending of the load as complete.
441 if (node_ptr->isLoad() && !node_ptr->isStrictlyOrdered()) {
442 // If execute succeeded mark its dependents as complete
443 DPRINTF(TraceCPUData, "Node seq. num %lli sent. Waking up "
444 "dependents..\n", node_ptr->seqNum);
445
446 auto child_itr = (node_ptr->dependents).begin();
447 while (child_itr != (node_ptr->dependents).end()) {
448 // ROB dependency of a store on a load must not be removed
449 // after load is sent but after response is received
450 if (!(*child_itr)->isStore() &&
451 (*child_itr)->removeRobDep(node_ptr->seqNum)) {
452
453 // Check if the child node has become dependency free
454 if ((*child_itr)->numRobDep == 0 &&
455 (*child_itr)->numRegDep == 0) {
456
457 // Source dependencies are complete, check if
458 // resources are available and issue
459 checkAndIssue(*child_itr);
460 }
461 // Remove this child for the sent load and point to new
462 // location of the element following the erased element
463 child_itr = node_ptr->dependents.erase(child_itr);
464 } else {
465 // This child is not dependency-free, point to the next
466 // child
467 child_itr++;
468 }
469 }
470 } else {
471 // If it is a strictly ordered load mark its dependents as complete
472 // as we do not send a request for this case. If it is a store or a
473 // comp node we also mark all its dependents complete.
474 DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking"
475 " up dependents..\n", node_ptr->seqNum);
476
477 for (auto child : node_ptr->dependents) {
478 // If the child node is dependency free removeDepOnInst()
479 // returns true.
480 if (child->removeDepOnInst(node_ptr->seqNum)) {
481 // Source dependencies are complete, check if resources
482 // are available and issue
483 checkAndIssue(child);
484 }
485 }
486 }
487
488 // After executing the node, remove from readyList and delete node.
489 readyList.erase(free_itr);
490 // If it is a cacheable load which was sent, don't delete
491 // just yet. Delete it in completeMemAccess() after the
492 // response is received. If it is an strictly ordered
493 // load, it was not sent and all dependencies were simply
494 // marked complete. Thus it is safe to delete it. For
495 // stores and non load/store nodes all dependencies were
496 // marked complete so it is safe to delete it.
497 if (!node_ptr->isLoad() || node_ptr->isStrictlyOrdered()) {
498 // Release all resources occupied by the completed node
499 hwResource.release(node_ptr);
500 // clear the dynamically allocated set of dependents
501 (node_ptr->dependents).clear();
502 // Update the stat for numOps simulated
503 owner.updateNumOps(node_ptr->robNum);
504 // delete node
505 delete node_ptr;
506 // remove from graph
507 depGraph.erase(graph_itr);
508 }
509 // Point to first node to continue to next iteration of while loop
510 free_itr = readyList.begin();
511 } // end of while loop
512
513 // Print readyList, sizes of queues and resource status after updating
514 if (DTRACE(TraceCPUData)) {
515 printReadyList();
516 DPRINTF(TraceCPUData, "Execute end occupancy:\n");
517 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
518 "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
519 depFreeQueue.size());
520 hwResource.printOccupancy();
521 }
522
523 if (retryPkt) {
524 DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry"
525 "event from the cache for seq. num %lli.\n",
526 retryPkt->req->getReqInstSeqNum());
527 return;
528 }
529 // If the size of the dependency graph is less than the dependency window
530 // then read from the trace file to populate the graph next time we are in
531 // execute.
532 if (depGraph.size() < windowSize && !traceComplete)
533 nextRead = true;
534
535 // If cache is not blocked, schedule an event for the first execTick in
536 // readyList else retry from cache will schedule the event. If the ready
537 // list is empty then check if the next pending node has resources
538 // available to issue. If yes, then schedule an event for the next cycle.
539 if (!readyList.empty()) {
540 Tick next_event_tick = std::max(readyList.begin()->execTick,
541 curTick());
542 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
543 next_event_tick);
544 owner.schedDcacheNextEvent(next_event_tick);
545 } else if (readyList.empty() && !depFreeQueue.empty() &&
546 hwResource.isAvailable(depFreeQueue.front())) {
547 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
548 owner.clockEdge(Cycles(1)));
549 owner.schedDcacheNextEvent(owner.clockEdge(Cycles(1)));
550 }
551
552 // If trace is completely read, readyList is empty and depGraph is empty,
553 // set execComplete to true
554 if (depGraph.empty() && readyList.empty() && traceComplete &&
555 !hwResource.awaitingResponse()) {
556 DPRINTF(TraceCPUData, "\tExecution Complete!\n");
557 execComplete = true;
558 elasticStats.dataLastTick = curTick();
559 }
560 }
561
562 PacketPtr
563 TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr)
564 {
565
566 DPRINTF(TraceCPUData, "Executing memory request %lli (phys addr %d, "
567 "virt addr %d, pc %#x, size %d, flags %d).\n",
568 node_ptr->seqNum, node_ptr->physAddr, node_ptr->virtAddr,
569 node_ptr->pc, node_ptr->size, node_ptr->flags);
570
571 // If the request is strictly ordered, do not send it. Just return nullptr
572 // as if it was succesfully sent.
573 if (node_ptr->isStrictlyOrdered()) {
574 node_ptr->isLoad() ? ++elasticStats.numSOLoads :
575 ++elasticStats.numSOStores;
576 DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n",
577 node_ptr->seqNum);
578 return nullptr;
579 }
580
581 // Check if the request spans two cache lines as this condition triggers
582 // an assert fail in the L1 cache. If it does then truncate the size to
583 // access only until the end of that line and ignore the remainder. The
584 // stat counting this is useful to keep a check on how frequently this
585 // happens. If required the code could be revised to mimick splitting such
586 // a request into two.
587 unsigned blk_size = owner.cacheLineSize();
588 Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1));
589 if (!(blk_offset + node_ptr->size <= blk_size)) {
590 node_ptr->size = blk_size - blk_offset;
591 ++elasticStats.numSplitReqs;
592 }
593
594 // Create a request and the packet containing request
595 auto req = std::make_shared<Request>(
596 node_ptr->physAddr, node_ptr->size, node_ptr->flags, masterID);
597 req->setReqInstSeqNum(node_ptr->seqNum);
598
599 // If this is not done it triggers assert in L1 cache for invalid contextId
600 req->setContext(ContextID(0));
601
602 req->setPC(node_ptr->pc);
603 // If virtual address is valid, set the virtual address field
604 // of the request.
605 if (node_ptr->virtAddr != 0) {
606 req->setVirt(node_ptr->virtAddr, node_ptr->size,
607 node_ptr->flags, masterID, node_ptr->pc);
608 req->setPaddr(node_ptr->physAddr);
609 req->setReqInstSeqNum(node_ptr->seqNum);
610 }
611
612 PacketPtr pkt;
613 uint8_t* pkt_data = new uint8_t[req->getSize()];
614 if (node_ptr->isLoad()) {
615 pkt = Packet::createRead(req);
616 } else {
617 pkt = Packet::createWrite(req);
618 memset(pkt_data, 0xA, req->getSize());
619 }
620 pkt->dataDynamic(pkt_data);
621
622 // Call MasterPort method to send a timing request for this packet
623 bool success = port.sendTimingReq(pkt);
624 ++elasticStats.numSendAttempted;
625
626 if (!success) {
627 // If it fails, return the packet to retry when a retry is signalled by
628 // the cache
629 ++elasticStats.numSendFailed;
630 DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n");
631 return pkt;
632 } else {
633 // It is succeeds, return nullptr
634 ++elasticStats.numSendSucceeded;
635 return nullptr;
636 }
637 }
638
639 bool
640 TraceCPU::ElasticDataGen::checkAndIssue(const GraphNode* node_ptr, bool first)
641 {
642 // Assert the node is dependency-free
643 assert(node_ptr->numRobDep == 0 && node_ptr->numRegDep == 0);
644
645 // If this is the first attempt, print a debug message to indicate this.
646 if (first) {
647 DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now"
648 " dependency free.\n", node_ptr->seqNum, node_ptr->typeToStr(),
649 node_ptr->robNum);
650 }
651
652 // Check if resources are available to issue the specific node
653 if (hwResource.isAvailable(node_ptr)) {
654 // If resources are free only then add to readyList
655 DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. Adding"
656 " to readyList, occupying resources.\n", node_ptr->seqNum);
657 // Compute the execute tick by adding the compute delay for the node
658 // and add the ready node to the ready list
659 addToSortedReadyList(node_ptr->seqNum,
660 owner.clockEdge() + node_ptr->compDelay);
661 // Account for the resources taken up by this issued node.
662 hwResource.occupy(node_ptr);
663 return true;
664
665 } else {
666 if (first) {
667 // Although dependencies are complete, resources are not available.
668 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli."
669 " Adding to depFreeQueue.\n", node_ptr->seqNum);
670 depFreeQueue.push(node_ptr);
671 } else {
672 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli. "
673 "Still pending issue.\n", node_ptr->seqNum);
674 }
675 return false;
676 }
677 }
678
679 void
680 TraceCPU::ElasticDataGen::completeMemAccess(PacketPtr pkt)
681 {
682 // Release the resources for this completed node.
683 if (pkt->isWrite()) {
684 // Consider store complete.
685 hwResource.releaseStoreBuffer();
686 // If it is a store response then do nothing since we do not model
687 // dependencies on store completion in the trace. But if we were
688 // blocking execution due to store buffer fullness, we need to schedule
689 // an event and attempt to progress.
690 } else {
691 // If it is a load response then release the dependents waiting on it.
692 // Get pointer to the completed load
693 auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum());
694 assert(graph_itr != depGraph.end());
695 GraphNode* node_ptr = graph_itr->second;
696
697 // Release resources occupied by the load
698 hwResource.release(node_ptr);
699
700 DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up"
701 " dependents..\n", node_ptr->seqNum);
702
703 for (auto child : node_ptr->dependents) {
704 if (child->removeDepOnInst(node_ptr->seqNum)) {
705 checkAndIssue(child);
706 }
707 }
708
709 // clear the dynamically allocated set of dependents
710 (node_ptr->dependents).clear();
711 // Update the stat for numOps completed
712 owner.updateNumOps(node_ptr->robNum);
713 // delete node
714 delete node_ptr;
715 // remove from graph
716 depGraph.erase(graph_itr);
717 }
718
719 if (DTRACE(TraceCPUData)) {
720 printReadyList();
721 }
722
723 // If the size of the dependency graph is less than the dependency window
724 // then read from the trace file to populate the graph next time we are in
725 // execute.
726 if (depGraph.size() < windowSize && !traceComplete)
727 nextRead = true;
728
729 // If not waiting for retry, attempt to schedule next event
730 if (!retryPkt) {
731 // We might have new dep-free nodes in the list which will have execute
732 // tick greater than or equal to curTick. But a new dep-free node might
733 // have its execute tick earlier. Therefore, attempt to reschedule. It
734 // could happen that the readyList is empty and we got here via a
735 // last remaining response. So, either the trace is complete or there
736 // are pending nodes in the depFreeQueue. The checking is done in the
737 // execute() control flow, so schedule an event to go via that flow.
738 Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) :
739 std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1)));
740 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
741 next_event_tick);
742 owner.schedDcacheNextEvent(next_event_tick);
743 }
744 }
745
746 void
747 TraceCPU::ElasticDataGen::addToSortedReadyList(NodeSeqNum seq_num,
748 Tick exec_tick)
749 {
750 ReadyNode ready_node;
751 ready_node.seqNum = seq_num;
752 ready_node.execTick = exec_tick;
753
754 // Iterator to readyList
755 auto itr = readyList.begin();
756
757 // If the readyList is empty, simply insert the new node at the beginning
758 // and return
759 if (itr == readyList.end()) {
760 readyList.insert(itr, ready_node);
761 elasticStats.maxReadyListSize = std::max<double>(readyList.size(),
762 elasticStats.maxReadyListSize.value());
763 return;
764 }
765
766 // If the new node has its execution tick equal to the first node in the
767 // list then go to the next node. If the first node in the list failed
768 // to execute, its position as the first is thus maintained.
769 if (retryPkt)
770 if (retryPkt->req->getReqInstSeqNum() == itr->seqNum)
771 itr++;
772
773 // Increment the iterator and compare the node pointed to by it to the new
774 // node till the position to insert the new node is found.
775 bool found = false;
776 while (!found && itr != readyList.end()) {
777 // If the execution tick of the new node is less than the node then
778 // this is the position to insert
779 if (exec_tick < itr->execTick)
780 found = true;
781 // If the execution tick of the new node is equal to the node then
782 // sort in ascending order of sequence numbers
783 else if (exec_tick == itr->execTick) {
784 // If the sequence number of the new node is less than the node
785 // then this is the position to insert
786 if (seq_num < itr->seqNum)
787 found = true;
788 // Else go to next node
789 else
790 itr++;
791 }
792 // If the execution tick of the new node is greater than the node then
793 // go to the next node
794 else
795 itr++;
796 }
797 readyList.insert(itr, ready_node);
798 // Update the stat for max size reached of the readyList
799 elasticStats.maxReadyListSize = std::max<double>(readyList.size(),
800 elasticStats.maxReadyListSize.value());
801 }
802
803 void
804 TraceCPU::ElasticDataGen::printReadyList() {
805
806 auto itr = readyList.begin();
807 if (itr == readyList.end()) {
808 DPRINTF(TraceCPUData, "readyList is empty.\n");
809 return;
810 }
811 DPRINTF(TraceCPUData, "Printing readyList:\n");
812 while (itr != readyList.end()) {
813 auto graph_itr = depGraph.find(itr->seqNum);
814 GraphNode* node_ptr M5_VAR_USED = graph_itr->second;
815 DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum,
816 node_ptr->typeToStr(), itr->execTick);
817 itr++;
818 }
819 }
820
821 TraceCPU::ElasticDataGen::HardwareResource::HardwareResource(
822 uint16_t max_rob, uint16_t max_stores, uint16_t max_loads)
823 : sizeROB(max_rob),
824 sizeStoreBuffer(max_stores),
825 sizeLoadBuffer(max_loads),
826 oldestInFlightRobNum(UINT64_MAX),
827 numInFlightLoads(0),
828 numInFlightStores(0)
829 {}
830
831 void
832 TraceCPU::ElasticDataGen::HardwareResource::occupy(const GraphNode* new_node)
833 {
834 // Occupy ROB entry for the issued node
835 // Merely maintain the oldest node, i.e. numerically least robNum by saving
836 // it in the variable oldestInFLightRobNum.
837 inFlightNodes[new_node->seqNum] = new_node->robNum;
838 oldestInFlightRobNum = inFlightNodes.begin()->second;
839
840 // Occupy Load/Store Buffer entry for the issued node if applicable
841 if (new_node->isLoad()) {
842 ++numInFlightLoads;
843 } else if (new_node->isStore()) {
844 ++numInFlightStores;
845 } // else if it is a non load/store node, no buffer entry is occupied
846
847 printOccupancy();
848 }
849
850 void
851 TraceCPU::ElasticDataGen::HardwareResource::release(const GraphNode* done_node)
852 {
853 assert(!inFlightNodes.empty());
854 DPRINTFR(TraceCPUData, "\tClearing done seq. num %d from inFlightNodes..\n",
855 done_node->seqNum);
856
857 assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end());
858 inFlightNodes.erase(done_node->seqNum);
859
860 if (inFlightNodes.empty()) {
861 // If we delete the only in-flight node and then the
862 // oldestInFlightRobNum is set to it's initialized (max) value.
863 oldestInFlightRobNum = UINT64_MAX;
864 } else {
865 // Set the oldest in-flight node rob number equal to the first node in
866 // the inFlightNodes since that will have the numerically least value.
867 oldestInFlightRobNum = inFlightNodes.begin()->second;
868 }
869
870 DPRINTFR(TraceCPUData, "\tCleared. inFlightNodes.size() = %d, "
871 "oldestInFlightRobNum = %d\n", inFlightNodes.size(),
872 oldestInFlightRobNum);
873
874 // A store is considered complete when a request is sent, thus ROB entry is
875 // freed. But it occupies an entry in the Store Buffer until its response
876 // is received. A load is considered complete when a response is received,
877 // thus both ROB and Load Buffer entries can be released.
878 if (done_node->isLoad()) {
879 assert(numInFlightLoads != 0);
880 --numInFlightLoads;
881 }
882 // For normal writes, we send the requests out and clear a store buffer
883 // entry on response. For writes which are strictly ordered, for e.g.
884 // writes to device registers, we do that within release() which is called
885 // when node is executed and taken off from readyList.
886 if (done_node->isStore() && done_node->isStrictlyOrdered()) {
887 releaseStoreBuffer();
888 }
889 }
890
891 void
892 TraceCPU::ElasticDataGen::HardwareResource::releaseStoreBuffer()
893 {
894 assert(numInFlightStores != 0);
895 --numInFlightStores;
896 }
897
898 bool
899 TraceCPU::ElasticDataGen::HardwareResource::isAvailable(
900 const GraphNode* new_node) const
901 {
902 uint16_t num_in_flight_nodes;
903 if (inFlightNodes.empty()) {
904 num_in_flight_nodes = 0;
905 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
906 " #in-flight nodes = 0", new_node->seqNum);
907 } else if (new_node->robNum > oldestInFlightRobNum) {
908 // This is the intuitive case where new dep-free node is younger
909 // instruction than the oldest instruction in-flight. Thus we make sure
910 // in_flight_nodes does not overflow.
911 num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum;
912 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
913 " #in-flight nodes = %d - %d = %d", new_node->seqNum,
914 new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes);
915 } else {
916 // This is the case where an instruction older than the oldest in-
917 // flight instruction becomes dep-free. Thus we must have already
918 // accounted for the entry in ROB for this new dep-free node.
919 // Immediately after this check returns true, oldestInFlightRobNum will
920 // be updated in occupy(). We simply let this node issue now.
921 num_in_flight_nodes = 0;
922 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
923 " new oldestInFlightRobNum = %d, #in-flight nodes ignored",
924 new_node->seqNum, new_node->robNum);
925 }
926 DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ = %d/%d.\n",
927 numInFlightLoads, sizeLoadBuffer,
928 numInFlightStores, sizeStoreBuffer);
929 // Check if resources are available to issue the specific node
930 if (num_in_flight_nodes >= sizeROB) {
931 return false;
932 }
933 if (new_node->isLoad() && numInFlightLoads >= sizeLoadBuffer) {
934 return false;
935 }
936 if (new_node->isStore() && numInFlightStores >= sizeStoreBuffer) {
937 return false;
938 }
939 return true;
940 }
941
942 bool
943 TraceCPU::ElasticDataGen::HardwareResource::awaitingResponse() const {
944 // Return true if there is at least one read or write request in flight
945 return (numInFlightStores != 0 || numInFlightLoads != 0);
946 }
947
948 void
949 TraceCPU::ElasticDataGen::HardwareResource::printOccupancy() {
950 DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, "
951 "LQ = %d/%d, SQ = %d/%d.\n",
952 oldestInFlightRobNum,
953 numInFlightLoads, sizeLoadBuffer,
954 numInFlightStores, sizeStoreBuffer);
955 }
956 TraceCPU::FixedRetryGen::
957 FixedRetryGenStatGroup::FixedRetryGenStatGroup(Stats::Group *parent,
958 const std::string& _name)
959 : Stats::Group(parent, _name.c_str()),
960 ADD_STAT(numSendAttempted, "Number of first attempts to send a request"),
961 ADD_STAT(numSendSucceeded, "Number of successful first attempts"),
962 ADD_STAT(numSendFailed, "Number of failed first attempts"),
963 ADD_STAT(numRetrySucceeded, "Number of successful retries"),
964 ADD_STAT(instLastTick, "Last tick simulated from the fixed inst trace")
965 {
966
967 }
968
969 Tick
970 TraceCPU::FixedRetryGen::init()
971 {
972 DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator"
973 " IcacheGen: fixed issue with retry.\n");
974
975 if (nextExecute()) {
976 DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick);
977 return currElement.tick;
978 } else {
979 panic("Read of first message in the trace failed.\n");
980 return MaxTick;
981 }
982 }
983
984 bool
985 TraceCPU::FixedRetryGen::tryNext()
986 {
987 // If there is a retry packet, try to send it
988 if (retryPkt) {
989
990 DPRINTF(TraceCPUInst, "Trying to send retry packet.\n");
991
992 if (!port.sendTimingReq(retryPkt)) {
993 // Still blocked! This should never occur.
994 DPRINTF(TraceCPUInst, "Retry packet sending failed.\n");
995 return false;
996 }
997 ++fixedStats.numRetrySucceeded;
998 } else {
999
1000 DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n");
1001
1002 // try sending current element
1003 assert(currElement.isValid());
1004
1005 ++fixedStats.numSendAttempted;
1006
1007 if (!send(currElement.addr, currElement.blocksize,
1008 currElement.cmd, currElement.flags, currElement.pc)) {
1009 DPRINTF(TraceCPUInst, "currElement sending failed.\n");
1010 ++fixedStats.numSendFailed;
1011 // return false to indicate not to schedule next event
1012 return false;
1013 } else {
1014 ++fixedStats.numSendSucceeded;
1015 }
1016 }
1017 // If packet was sent successfully, either retryPkt or currElement, return
1018 // true to indicate to schedule event at current Tick plus delta. If packet
1019 // was sent successfully and there is no next packet to send, return false.
1020 DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next "
1021 "element.\n");
1022 retryPkt = nullptr;
1023 // Read next element into currElement, currElement gets cleared so save the
1024 // tick to calculate delta
1025 Tick last_tick = currElement.tick;
1026 if (nextExecute()) {
1027 assert(currElement.tick >= last_tick);
1028 delta = currElement.tick - last_tick;
1029 }
1030 return !traceComplete;
1031 }
1032
1033 void
1034 TraceCPU::FixedRetryGen::exit()
1035 {
1036 trace.reset();
1037 }
1038
1039 bool
1040 TraceCPU::FixedRetryGen::nextExecute()
1041 {
1042 if (traceComplete)
1043 // We are at the end of the file, thus we have no more messages.
1044 // Return false.
1045 return false;
1046
1047
1048 //Reset the currElement to the default values
1049 currElement.clear();
1050
1051 // Read the next line to get the next message. If that fails then end of
1052 // trace has been reached and traceComplete needs to be set in addition
1053 // to returning false. If successful then next message is in currElement.
1054 if (!trace.read(&currElement)) {
1055 traceComplete = true;
1056 fixedStats.instLastTick = curTick();
1057 return false;
1058 }
1059
1060 DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n",
1061 currElement.cmd.isRead() ? 'r' : 'w',
1062 currElement.addr,
1063 currElement.pc,
1064 currElement.blocksize,
1065 currElement.tick);
1066
1067 return true;
1068 }
1069
1070 bool
1071 TraceCPU::FixedRetryGen::send(Addr addr, unsigned size, const MemCmd& cmd,
1072 Request::FlagsType flags, Addr pc)
1073 {
1074
1075 // Create new request
1076 auto req = std::make_shared<Request>(addr, size, flags, masterID);
1077 req->setPC(pc);
1078
1079 // If this is not done it triggers assert in L1 cache for invalid contextId
1080 req->setContext(ContextID(0));
1081
1082 // Embed it in a packet
1083 PacketPtr pkt = new Packet(req, cmd);
1084
1085 uint8_t* pkt_data = new uint8_t[req->getSize()];
1086 pkt->dataDynamic(pkt_data);
1087
1088 if (cmd.isWrite()) {
1089 memset(pkt_data, 0xA, req->getSize());
1090 }
1091
1092 // Call MasterPort method to send a timing request for this packet
1093 bool success = port.sendTimingReq(pkt);
1094 if (!success) {
1095 // If it fails, save the packet to retry when a retry is signalled by
1096 // the cache
1097 retryPkt = pkt;
1098 }
1099 return success;
1100 }
1101
1102 void
1103 TraceCPU::icacheRetryRecvd()
1104 {
1105 // Schedule an event to go through the control flow in the same tick as
1106 // retry is received
1107 DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen"
1108 " event @%lli.\n", curTick());
1109 schedule(icacheNextEvent, curTick());
1110 }
1111
1112 void
1113 TraceCPU::dcacheRetryRecvd()
1114 {
1115 // Schedule an event to go through the execute flow in the same tick as
1116 // retry is received
1117 DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen"
1118 " event @%lli.\n", curTick());
1119 schedule(dcacheNextEvent, curTick());
1120 }
1121
1122 void
1123 TraceCPU::schedDcacheNextEvent(Tick when)
1124 {
1125 if (!dcacheNextEvent.scheduled()) {
1126 DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n",
1127 when);
1128 schedule(dcacheNextEvent, when);
1129 ++traceStats.numSchedDcacheEvent;
1130 } else if (when < dcacheNextEvent.when()) {
1131 DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli"
1132 " to %lli.\n", dcacheNextEvent.when(), when);
1133 reschedule(dcacheNextEvent, when);
1134 }
1135
1136 }
1137
1138 bool
1139 TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt)
1140 {
1141 // All responses on the instruction fetch side are ignored. Simply delete
1142 // the packet to free allocated memory
1143 delete pkt;
1144
1145 return true;
1146 }
1147
1148 void
1149 TraceCPU::IcachePort::recvReqRetry()
1150 {
1151 owner->icacheRetryRecvd();
1152 }
1153
1154 void
1155 TraceCPU::dcacheRecvTimingResp(PacketPtr pkt)
1156 {
1157 DPRINTF(TraceCPUData, "Received timing response from Dcache.\n");
1158 dcacheGen.completeMemAccess(pkt);
1159 }
1160
1161 bool
1162 TraceCPU::DcachePort::recvTimingResp(PacketPtr pkt)
1163 {
1164 // Handle the responses for data memory requests which is done inside the
1165 // elastic data generator
1166 owner->dcacheRecvTimingResp(pkt);
1167 // After processing the response delete the packet to free
1168 // memory
1169 delete pkt;
1170
1171 return true;
1172 }
1173
1174 void
1175 TraceCPU::DcachePort::recvReqRetry()
1176 {
1177 owner->dcacheRetryRecvd();
1178 }
1179
1180 TraceCPU::ElasticDataGen::InputStream::InputStream(
1181 const std::string& filename,
1182 const double time_multiplier)
1183 : trace(filename),
1184 timeMultiplier(time_multiplier),
1185 microOpCount(0)
1186 {
1187 // Create a protobuf message for the header and read it from the stream
1188 ProtoMessage::InstDepRecordHeader header_msg;
1189 if (!trace.read(header_msg)) {
1190 panic("Failed to read packet header from %s\n", filename);
1191
1192 if (header_msg.tick_freq() != SimClock::Frequency) {
1193 panic("Trace %s was recorded with a different tick frequency %d\n",
1194 header_msg.tick_freq());
1195 }
1196 } else {
1197 // Assign window size equal to the field in the trace that was recorded
1198 // when the data dependency trace was captured in the o3cpu model
1199 windowSize = header_msg.window_size();
1200 }
1201 }
1202
1203 void
1204 TraceCPU::ElasticDataGen::InputStream::reset()
1205 {
1206 trace.reset();
1207 }
1208
1209 bool
1210 TraceCPU::ElasticDataGen::InputStream::read(GraphNode* element)
1211 {
1212 ProtoMessage::InstDepRecord pkt_msg;
1213 if (trace.read(pkt_msg)) {
1214 // Required fields
1215 element->seqNum = pkt_msg.seq_num();
1216 element->type = pkt_msg.type();
1217 // Scale the compute delay to effectively scale the Trace CPU frequency
1218 element->compDelay = pkt_msg.comp_delay() * timeMultiplier;
1219
1220 // Repeated field robDepList
1221 element->clearRobDep();
1222 assert((pkt_msg.rob_dep()).size() <= element->maxRobDep);
1223 for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) {
1224 element->robDep[element->numRobDep] = pkt_msg.rob_dep(i);
1225 element->numRobDep += 1;
1226 }
1227
1228 // Repeated field
1229 element->clearRegDep();
1230 assert((pkt_msg.reg_dep()).size() <= TheISA::MaxInstSrcRegs);
1231 for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) {
1232 // There is a possibility that an instruction has both, a register
1233 // and order dependency on an instruction. In such a case, the
1234 // register dependency is omitted
1235 bool duplicate = false;
1236 for (int j = 0; j < element->numRobDep; j++) {
1237 duplicate |= (pkt_msg.reg_dep(i) == element->robDep[j]);
1238 }
1239 if (!duplicate) {
1240 element->regDep[element->numRegDep] = pkt_msg.reg_dep(i);
1241 element->numRegDep += 1;
1242 }
1243 }
1244
1245 // Optional fields
1246 if (pkt_msg.has_p_addr())
1247 element->physAddr = pkt_msg.p_addr();
1248 else
1249 element->physAddr = 0;
1250
1251 if (pkt_msg.has_v_addr())
1252 element->virtAddr = pkt_msg.v_addr();
1253 else
1254 element->virtAddr = 0;
1255
1256 if (pkt_msg.has_size())
1257 element->size = pkt_msg.size();
1258 else
1259 element->size = 0;
1260
1261 if (pkt_msg.has_flags())
1262 element->flags = pkt_msg.flags();
1263 else
1264 element->flags = 0;
1265
1266 if (pkt_msg.has_pc())
1267 element->pc = pkt_msg.pc();
1268 else
1269 element->pc = 0;
1270
1271 // ROB occupancy number
1272 ++microOpCount;
1273 if (pkt_msg.has_weight()) {
1274 microOpCount += pkt_msg.weight();
1275 }
1276 element->robNum = microOpCount;
1277 return true;
1278 }
1279
1280 // We have reached the end of the file
1281 return false;
1282 }
1283
1284 bool
1285 TraceCPU::ElasticDataGen::GraphNode::removeRegDep(NodeSeqNum reg_dep)
1286 {
1287 for (auto& own_reg_dep : regDep) {
1288 if (own_reg_dep == reg_dep) {
1289 // If register dependency is found, make it zero and return true
1290 own_reg_dep = 0;
1291 assert(numRegDep > 0);
1292 --numRegDep;
1293 DPRINTFR(TraceCPUData, "\tFor %lli: Marking register dependency %lli "
1294 "done.\n", seqNum, reg_dep);
1295 return true;
1296 }
1297 }
1298
1299 // Return false if the dependency is not found
1300 return false;
1301 }
1302
1303 bool
1304 TraceCPU::ElasticDataGen::GraphNode::removeRobDep(NodeSeqNum rob_dep)
1305 {
1306 for (auto& own_rob_dep : robDep) {
1307 if (own_rob_dep == rob_dep) {
1308 // If the rob dependency is found, make it zero and return true
1309 own_rob_dep = 0;
1310 assert(numRobDep > 0);
1311 --numRobDep;
1312 DPRINTFR(TraceCPUData, "\tFor %lli: Marking ROB dependency %lli "
1313 "done.\n", seqNum, rob_dep);
1314 return true;
1315 }
1316 }
1317 return false;
1318 }
1319
1320 void
1321 TraceCPU::ElasticDataGen::GraphNode::clearRegDep() {
1322 for (auto& own_reg_dep : regDep) {
1323 own_reg_dep = 0;
1324 }
1325 numRegDep = 0;
1326 }
1327
1328 void
1329 TraceCPU::ElasticDataGen::GraphNode::clearRobDep() {
1330 for (auto& own_rob_dep : robDep) {
1331 own_rob_dep = 0;
1332 }
1333 numRobDep = 0;
1334 }
1335
1336 bool
1337 TraceCPU::ElasticDataGen::GraphNode::removeDepOnInst(NodeSeqNum done_seq_num)
1338 {
1339 // If it is an rob dependency then remove it
1340 if (!removeRobDep(done_seq_num)) {
1341 // If it is not an rob dependency then it must be a register dependency
1342 // If the register dependency is not found, it violates an assumption
1343 // and must be caught by assert.
1344 bool regdep_found M5_VAR_USED = removeRegDep(done_seq_num);
1345 assert(regdep_found);
1346 }
1347 // Return true if the node is dependency free
1348 return (numRobDep == 0 && numRegDep == 0);
1349 }
1350
1351 void
1352 TraceCPU::ElasticDataGen::GraphNode::writeElementAsTrace() const
1353 {
1354 DPRINTFR(TraceCPUData, "%lli", seqNum);
1355 DPRINTFR(TraceCPUData, ",%s", typeToStr());
1356 if (isLoad() || isStore()) {
1357 DPRINTFR(TraceCPUData, ",%i", physAddr);
1358 DPRINTFR(TraceCPUData, ",%i", size);
1359 DPRINTFR(TraceCPUData, ",%i", flags);
1360 }
1361 DPRINTFR(TraceCPUData, ",%lli", compDelay);
1362 int i = 0;
1363 DPRINTFR(TraceCPUData, "robDep:");
1364 while (robDep[i] != 0) {
1365 DPRINTFR(TraceCPUData, ",%lli", robDep[i]);
1366 i++;
1367 }
1368 i = 0;
1369 DPRINTFR(TraceCPUData, "regDep:");
1370 while (regDep[i] != 0) {
1371 DPRINTFR(TraceCPUData, ",%lli", regDep[i]);
1372 i++;
1373 }
1374 auto child_itr = dependents.begin();
1375 DPRINTFR(TraceCPUData, "dependents:");
1376 while (child_itr != dependents.end()) {
1377 DPRINTFR(TraceCPUData, ":%lli", (*child_itr)->seqNum);
1378 child_itr++;
1379 }
1380
1381 DPRINTFR(TraceCPUData, "\n");
1382 }
1383
1384 std::string
1385 TraceCPU::ElasticDataGen::GraphNode::typeToStr() const
1386 {
1387 return Record::RecordType_Name(type);
1388 }
1389
1390 TraceCPU::FixedRetryGen::InputStream::InputStream(const std::string& filename)
1391 : trace(filename)
1392 {
1393 // Create a protobuf message for the header and read it from the stream
1394 ProtoMessage::PacketHeader header_msg;
1395 if (!trace.read(header_msg)) {
1396 panic("Failed to read packet header from %s\n", filename);
1397
1398 if (header_msg.tick_freq() != SimClock::Frequency) {
1399 panic("Trace %s was recorded with a different tick frequency %d\n",
1400 header_msg.tick_freq());
1401 }
1402 }
1403 }
1404
1405 void
1406 TraceCPU::FixedRetryGen::InputStream::reset()
1407 {
1408 trace.reset();
1409 }
1410
1411 bool
1412 TraceCPU::FixedRetryGen::InputStream::read(TraceElement* element)
1413 {
1414 ProtoMessage::Packet pkt_msg;
1415 if (trace.read(pkt_msg)) {
1416 element->cmd = pkt_msg.cmd();
1417 element->addr = pkt_msg.addr();
1418 element->blocksize = pkt_msg.size();
1419 element->tick = pkt_msg.tick();
1420 element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0;
1421 element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0;
1422 return true;
1423 }
1424
1425 // We have reached the end of the file
1426 return false;
1427 }