c9b9944807fb0b36b083b6887530d15bdf93de4a
[gem5.git] / src / cpu / trace / trace_cpu.cc
1 /*
2 * Copyright (c) 2013 - 2016 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 #include "cpu/trace/trace_cpu.hh"
39
40 #include "sim/sim_exit.hh"
41
42 // Declare and initialize the static counter for number of trace CPUs.
43 int TraceCPU::numTraceCPUs = 0;
44
45 TraceCPU::TraceCPU(const TraceCPUParams &params)
46 : BaseCPU(params),
47 icachePort(this),
48 dcachePort(this),
49 instRequestorID(params.system->getRequestorId(this, "inst")),
50 dataRequestorID(params.system->getRequestorId(this, "data")),
51 instTraceFile(params.instTraceFile),
52 dataTraceFile(params.dataTraceFile),
53 icacheGen(*this, ".iside", icachePort, instRequestorID, instTraceFile),
54 dcacheGen(*this, ".dside", dcachePort, dataRequestorID, dataTraceFile,
55 params),
56 icacheNextEvent([this]{ schedIcacheNext(); }, name()),
57 dcacheNextEvent([this]{ schedDcacheNext(); }, name()),
58 oneTraceComplete(false),
59 traceOffset(0),
60 execCompleteEvent(nullptr),
61 enableEarlyExit(params.enableEarlyExit),
62 progressMsgInterval(params.progressMsgInterval),
63 progressMsgThreshold(params.progressMsgInterval), traceStats(this)
64 {
65 // Increment static counter for number of Trace CPUs.
66 ++TraceCPU::numTraceCPUs;
67
68 // Check that the python parameters for sizes of ROB, store buffer and
69 // load buffer do not overflow the corresponding C++ variables.
70 fatal_if(params.sizeROB > UINT16_MAX,
71 "ROB size set to %d exceeds the max. value of %d.",
72 params.sizeROB, UINT16_MAX);
73 fatal_if(params.sizeStoreBuffer > UINT16_MAX,
74 "ROB size set to %d exceeds the max. value of %d.",
75 params.sizeROB, UINT16_MAX);
76 fatal_if(params.sizeLoadBuffer > UINT16_MAX,
77 "Load buffer size set to %d exceeds the max. value of %d.",
78 params.sizeLoadBuffer, UINT16_MAX);
79 }
80
81 void
82 TraceCPU::updateNumOps(uint64_t rob_num)
83 {
84 traceStats.numOps = rob_num;
85 if (progressMsgInterval != 0 &&
86 traceStats.numOps.value() >= progressMsgThreshold) {
87 inform("%s: %i insts committed\n", name(), progressMsgThreshold);
88 progressMsgThreshold += progressMsgInterval;
89 }
90 }
91
92 void
93 TraceCPU::takeOverFrom(BaseCPU *oldCPU)
94 {
95 // Unbind the ports of the old CPU and bind the ports of the TraceCPU.
96 getInstPort().takeOverFrom(&oldCPU->getInstPort());
97 getDataPort().takeOverFrom(&oldCPU->getDataPort());
98 }
99
100 void
101 TraceCPU::init()
102 {
103 DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\".\n",
104 instTraceFile);
105 DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n",
106 dataTraceFile);
107
108 BaseCPU::init();
109
110 // Get the send tick of the first instruction read request
111 Tick first_icache_tick = icacheGen.init();
112
113 // Get the send tick of the first data read/write request
114 Tick first_dcache_tick = dcacheGen.init();
115
116 // Set the trace offset as the minimum of that in both traces
117 traceOffset = std::min(first_icache_tick, first_dcache_tick);
118 inform("%s: Time offset (tick) found as min of both traces is %lli.",
119 name(), traceOffset);
120
121 // Schedule next icache and dcache event by subtracting the offset
122 schedule(icacheNextEvent, first_icache_tick - traceOffset);
123 schedule(dcacheNextEvent, first_dcache_tick - traceOffset);
124
125 // Adjust the trace offset for the dcache generator's ready nodes
126 // We don't need to do this for the icache generator as it will
127 // send its first request at the first event and schedule subsequent
128 // events using a relative tick delta
129 dcacheGen.adjustInitTraceOffset(traceOffset);
130
131 // If the Trace CPU simulation is configured to exit on any one trace
132 // completion then we don't need a counted event to count down all Trace
133 // CPUs in the system. If not then instantiate a counted event.
134 if (!enableEarlyExit) {
135 // The static counter for number of Trace CPUs is correctly set at
136 // this point so create an event and pass it.
137 execCompleteEvent = new CountedExitEvent("end of all traces reached.",
138 numTraceCPUs);
139 }
140
141 }
142
143 void
144 TraceCPU::schedIcacheNext()
145 {
146 DPRINTF(TraceCPUInst, "IcacheGen event.\n");
147
148 // Try to send the current packet or a retry packet if there is one
149 bool sched_next = icacheGen.tryNext();
150 // If packet sent successfully, schedule next event
151 if (sched_next) {
152 DPRINTF(TraceCPUInst,
153 "Scheduling next icacheGen event at %d.\n",
154 curTick() + icacheGen.tickDelta());
155 schedule(icacheNextEvent, curTick() + icacheGen.tickDelta());
156 ++traceStats.numSchedIcacheEvent;
157 } else {
158 // check if traceComplete. If not, do nothing because sending failed
159 // and next event will be scheduled via RecvRetry()
160 if (icacheGen.isTraceComplete()) {
161 // If this is the first trace to complete, set the variable. If it
162 // is already set then both traces are complete to exit sim.
163 checkAndSchedExitEvent();
164 }
165 }
166 return;
167 }
168
169 void
170 TraceCPU::schedDcacheNext()
171 {
172 DPRINTF(TraceCPUData, "DcacheGen event.\n");
173
174 // Update stat for numCycles
175 baseStats.numCycles = clockEdge() / clockPeriod();
176
177 dcacheGen.execute();
178 if (dcacheGen.isExecComplete()) {
179 checkAndSchedExitEvent();
180 }
181 }
182
183 void
184 TraceCPU::checkAndSchedExitEvent()
185 {
186 if (!oneTraceComplete) {
187 oneTraceComplete = true;
188 } else {
189 // Schedule event to indicate execution is complete as both
190 // instruction and data access traces have been played back.
191 inform("%s: Execution complete.", name());
192 // If the replay is configured to exit early, that is when any one
193 // execution is complete then exit immediately and return. Otherwise,
194 // schedule the counted exit that counts down completion of each Trace
195 // CPU.
196 if (enableEarlyExit) {
197 exitSimLoop("End of trace reached");
198 } else {
199 schedule(*execCompleteEvent, curTick());
200 }
201 }
202 }
203
204 TraceCPU::TraceStats::TraceStats(TraceCPU *trace) :
205 Stats::Group(trace),
206 ADD_STAT(numSchedDcacheEvent,
207 "Number of events scheduled to trigger data request generator"),
208 ADD_STAT(numSchedIcacheEvent,
209 "Number of events scheduled to trigger instruction request "
210 "generator"),
211 ADD_STAT(numOps, "Number of micro-ops simulated by the Trace CPU"),
212 ADD_STAT(cpi, "Cycles per micro-op used as a proxy for CPI",
213 trace->baseStats.numCycles / numOps)
214 {
215 cpi.precision(6);
216 }
217
218 TraceCPU::ElasticDataGen::
219 ElasticDataGenStatGroup::ElasticDataGenStatGroup(Stats::Group *parent,
220 const std::string& _name) :
221 Stats::Group(parent, _name.c_str()),
222 ADD_STAT(maxDependents, "Max number of dependents observed on a node"),
223 ADD_STAT(maxReadyListSize, "Max size of the ready list observed"),
224 ADD_STAT(numSendAttempted, "Number of first attempts to send a request"),
225 ADD_STAT(numSendSucceeded, "Number of successful first attempts"),
226 ADD_STAT(numSendFailed, "Number of failed first attempts"),
227 ADD_STAT(numRetrySucceeded, "Number of successful retries"),
228 ADD_STAT(numSplitReqs, "Number of split requests"),
229 ADD_STAT(numSOLoads, "Number of strictly ordered loads"),
230 ADD_STAT(numSOStores, "Number of strictly ordered stores"),
231 ADD_STAT(dataLastTick, "Last tick simulated from the elastic data trace")
232 {
233 }
234
235 Tick
236 TraceCPU::ElasticDataGen::init()
237 {
238 DPRINTF(TraceCPUData, "Initializing data memory request generator "
239 "DcacheGen: elastic issue with retry.\n");
240
241 panic_if(!readNextWindow(),
242 "Trace has %d elements. It must have at least %d elements.",
243 depGraph.size(), 2 * windowSize);
244 DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n",
245 depGraph.size());
246
247 panic_if(!readNextWindow(),
248 "Trace has %d elements. It must have at least %d elements.",
249 depGraph.size(), 2 * windowSize);
250 DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n",
251 depGraph.size());
252
253 // Print readyList
254 if (DTRACE(TraceCPUData)) {
255 printReadyList();
256 }
257 auto free_itr = readyList.begin();
258 DPRINTF(TraceCPUData,
259 "Execute tick of the first dependency free node %lli is %d.\n",
260 free_itr->seqNum, free_itr->execTick);
261 // Return the execute tick of the earliest ready node so that an event
262 // can be scheduled to call execute()
263 return (free_itr->execTick);
264 }
265
266 void
267 TraceCPU::ElasticDataGen::adjustInitTraceOffset(Tick& offset)
268 {
269 for (auto& free_node : readyList) {
270 free_node.execTick -= offset;
271 }
272 }
273
274 void
275 TraceCPU::ElasticDataGen::exit()
276 {
277 trace.reset();
278 }
279
280 bool
281 TraceCPU::ElasticDataGen::readNextWindow()
282 {
283 // Read and add next window
284 DPRINTF(TraceCPUData, "Reading next window from file.\n");
285
286 if (traceComplete) {
287 // We are at the end of the file, thus we have no more records.
288 // Return false.
289 return false;
290 }
291
292 DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n",
293 depGraph.size());
294
295 uint32_t num_read = 0;
296 while (num_read != windowSize) {
297
298 // Create a new graph node
299 GraphNode* new_node = new GraphNode;
300
301 // Read the next line to get the next record. If that fails then end of
302 // trace has been reached and traceComplete needs to be set in addition
303 // to returning false.
304 if (!trace.read(new_node)) {
305 DPRINTF(TraceCPUData, "\tTrace complete!\n");
306 traceComplete = true;
307 return false;
308 }
309
310 // Annotate the ROB dependencies of the new node onto the parent nodes.
311 addDepsOnParent(new_node, new_node->robDep, new_node->numRobDep);
312 // Annotate the register dependencies of the new node onto the parent
313 // nodes.
314 addDepsOnParent(new_node, new_node->regDep, new_node->numRegDep);
315
316 num_read++;
317 // Add to map
318 depGraph[new_node->seqNum] = new_node;
319 if (new_node->numRobDep == 0 && new_node->numRegDep == 0) {
320 // Source dependencies are already complete, check if resources
321 // are available and issue. The execution time is approximated
322 // to current time plus the computational delay.
323 checkAndIssue(new_node);
324 }
325 }
326
327 DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n",
328 depGraph.size());
329 return true;
330 }
331
332 template<typename T>
333 void
334 TraceCPU::ElasticDataGen::addDepsOnParent(GraphNode *new_node,
335 T& dep_array, uint8_t& num_dep)
336 {
337 for (auto& a_dep : dep_array) {
338 // The convention is to set the dependencies starting with the first
339 // index in the ROB and register dependency arrays. Thus, when we reach
340 // a dependency equal to the initialisation value of zero, we know have
341 // iterated over all dependencies and can break.
342 if (a_dep == 0)
343 break;
344 // We look up the valid dependency, i.e. the parent of this node
345 auto parent_itr = depGraph.find(a_dep);
346 if (parent_itr != depGraph.end()) {
347 // If the parent is found, it is yet to be executed. Append a
348 // pointer to the new node to the dependents list of the parent
349 // node.
350 parent_itr->second->dependents.push_back(new_node);
351 auto num_depts = parent_itr->second->dependents.size();
352 elasticStats.maxDependents = std::max<double>(num_depts,
353 elasticStats.maxDependents.value());
354 } else {
355 // The dependency is not found in the graph. So consider
356 // the execution of the parent is complete, i.e. remove this
357 // dependency.
358 a_dep = 0;
359 num_dep--;
360 }
361 }
362 }
363
364 void
365 TraceCPU::ElasticDataGen::execute()
366 {
367 DPRINTF(TraceCPUData, "Execute start occupancy:\n");
368 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
369 "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
370 depFreeQueue.size());
371 hwResource.printOccupancy();
372
373 // Read next window to make sure that dependents of all dep-free nodes
374 // are in the depGraph
375 if (nextRead) {
376 readNextWindow();
377 nextRead = false;
378 }
379
380 // First attempt to issue the pending dependency-free nodes held
381 // in depFreeQueue. If resources have become available for a node,
382 // then issue it, i.e. add the node to readyList.
383 while (!depFreeQueue.empty()) {
384 if (checkAndIssue(depFreeQueue.front(), false)) {
385 DPRINTF(TraceCPUData,
386 "Removing from depFreeQueue: seq. num %lli.\n",
387 (depFreeQueue.front())->seqNum);
388 depFreeQueue.pop();
389 } else {
390 break;
391 }
392 }
393 // Proceed to execute from readyList
394 auto graph_itr = depGraph.begin();
395 auto free_itr = readyList.begin();
396 // Iterate through readyList until the next free node has its execute
397 // tick later than curTick or the end of readyList is reached
398 while (free_itr->execTick <= curTick() && free_itr != readyList.end()) {
399
400 // Get pointer to the node to be executed
401 graph_itr = depGraph.find(free_itr->seqNum);
402 assert(graph_itr != depGraph.end());
403 GraphNode* node_ptr = graph_itr->second;
404
405 // If there is a retryPkt send that else execute the load
406 if (retryPkt) {
407 // The retryPkt must be the request that was created by the
408 // first node in the readyList.
409 if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) {
410 panic("Retry packet's seqence number does not match "
411 "the first node in the readyList.\n");
412 }
413 if (port.sendTimingReq(retryPkt)) {
414 ++elasticStats.numRetrySucceeded;
415 retryPkt = nullptr;
416 }
417 } else if (node_ptr->isLoad() || node_ptr->isStore()) {
418 // If there is no retryPkt, attempt to send a memory request in
419 // case of a load or store node. If the send fails, executeMemReq()
420 // returns a packet pointer, which we save in retryPkt. In case of
421 // a comp node we don't do anything and simply continue as if the
422 // execution of the comp node succedded.
423 retryPkt = executeMemReq(node_ptr);
424 }
425 // If the retryPkt or a new load/store node failed, we exit from here
426 // as a retry from cache will bring the control to execute(). The
427 // first node in readyList then, will be the failed node.
428 if (retryPkt) {
429 break;
430 }
431
432 // Proceed to remove dependencies for the successfully executed node.
433 // If it is a load which is not strictly ordered and we sent a
434 // request for it successfully, we do not yet mark any register
435 // dependencies complete. But as per dependency modelling we need
436 // to mark ROB dependencies of load and non load/store nodes which
437 // are based on successful sending of the load as complete.
438 if (node_ptr->isLoad() && !node_ptr->isStrictlyOrdered()) {
439 // If execute succeeded mark its dependents as complete
440 DPRINTF(TraceCPUData,
441 "Node seq. num %lli sent. Waking up dependents..\n",
442 node_ptr->seqNum);
443
444 auto child_itr = (node_ptr->dependents).begin();
445 while (child_itr != (node_ptr->dependents).end()) {
446 // ROB dependency of a store on a load must not be removed
447 // after load is sent but after response is received
448 if (!(*child_itr)->isStore() &&
449 (*child_itr)->removeRobDep(node_ptr->seqNum)) {
450
451 // Check if the child node has become dependency free
452 if ((*child_itr)->numRobDep == 0 &&
453 (*child_itr)->numRegDep == 0) {
454
455 // Source dependencies are complete, check if
456 // resources are available and issue
457 checkAndIssue(*child_itr);
458 }
459 // Remove this child for the sent load and point to new
460 // location of the element following the erased element
461 child_itr = node_ptr->dependents.erase(child_itr);
462 } else {
463 // This child is not dependency-free, point to the next
464 // child
465 child_itr++;
466 }
467 }
468 } else {
469 // If it is a strictly ordered load mark its dependents as complete
470 // as we do not send a request for this case. If it is a store or a
471 // comp node we also mark all its dependents complete.
472 DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking"
473 " up dependents..\n", node_ptr->seqNum);
474
475 for (auto child : node_ptr->dependents) {
476 // If the child node is dependency free removeDepOnInst()
477 // returns true.
478 if (child->removeDepOnInst(node_ptr->seqNum)) {
479 // Source dependencies are complete, check if resources
480 // are available and issue
481 checkAndIssue(child);
482 }
483 }
484 }
485
486 // After executing the node, remove from readyList and delete node.
487 readyList.erase(free_itr);
488 // If it is a cacheable load which was sent, don't delete
489 // just yet. Delete it in completeMemAccess() after the
490 // response is received. If it is an strictly ordered
491 // load, it was not sent and all dependencies were simply
492 // marked complete. Thus it is safe to delete it. For
493 // stores and non load/store nodes all dependencies were
494 // marked complete so it is safe to delete it.
495 if (!node_ptr->isLoad() || node_ptr->isStrictlyOrdered()) {
496 // Release all resources occupied by the completed node
497 hwResource.release(node_ptr);
498 // clear the dynamically allocated set of dependents
499 (node_ptr->dependents).clear();
500 // Update the stat for numOps simulated
501 owner.updateNumOps(node_ptr->robNum);
502 // delete node
503 delete node_ptr;
504 // remove from graph
505 depGraph.erase(graph_itr);
506 }
507 // Point to first node to continue to next iteration of while loop
508 free_itr = readyList.begin();
509 } // end of while loop
510
511 // Print readyList, sizes of queues and resource status after updating
512 if (DTRACE(TraceCPUData)) {
513 printReadyList();
514 DPRINTF(TraceCPUData, "Execute end occupancy:\n");
515 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
516 "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
517 depFreeQueue.size());
518 hwResource.printOccupancy();
519 }
520
521 if (retryPkt) {
522 DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry"
523 "event from the cache for seq. num %lli.\n",
524 retryPkt->req->getReqInstSeqNum());
525 return;
526 }
527 // If the size of the dependency graph is less than the dependency window
528 // then read from the trace file to populate the graph next time we are in
529 // execute.
530 if (depGraph.size() < windowSize && !traceComplete)
531 nextRead = true;
532
533 // If cache is not blocked, schedule an event for the first execTick in
534 // readyList else retry from cache will schedule the event. If the ready
535 // list is empty then check if the next pending node has resources
536 // available to issue. If yes, then schedule an event for the next cycle.
537 if (!readyList.empty()) {
538 Tick next_event_tick = std::max(readyList.begin()->execTick,
539 curTick());
540 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
541 next_event_tick);
542 owner.schedDcacheNextEvent(next_event_tick);
543 } else if (readyList.empty() && !depFreeQueue.empty() &&
544 hwResource.isAvailable(depFreeQueue.front())) {
545 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
546 owner.clockEdge(Cycles(1)));
547 owner.schedDcacheNextEvent(owner.clockEdge(Cycles(1)));
548 }
549
550 // If trace is completely read, readyList is empty and depGraph is empty,
551 // set execComplete to true
552 if (depGraph.empty() && readyList.empty() && traceComplete &&
553 !hwResource.awaitingResponse()) {
554 DPRINTF(TraceCPUData, "\tExecution Complete!\n");
555 execComplete = true;
556 elasticStats.dataLastTick = curTick();
557 }
558 }
559
560 PacketPtr
561 TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr)
562 {
563 DPRINTF(TraceCPUData, "Executing memory request %lli (phys addr %d, "
564 "virt addr %d, pc %#x, size %d, flags %d).\n",
565 node_ptr->seqNum, node_ptr->physAddr, node_ptr->virtAddr,
566 node_ptr->pc, node_ptr->size, node_ptr->flags);
567
568 // If the request is strictly ordered, do not send it. Just return nullptr
569 // as if it was succesfully sent.
570 if (node_ptr->isStrictlyOrdered()) {
571 node_ptr->isLoad() ? ++elasticStats.numSOLoads :
572 ++elasticStats.numSOStores;
573 DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n",
574 node_ptr->seqNum);
575 return nullptr;
576 }
577
578 // Check if the request spans two cache lines as this condition triggers
579 // an assert fail in the L1 cache. If it does then truncate the size to
580 // access only until the end of that line and ignore the remainder. The
581 // stat counting this is useful to keep a check on how frequently this
582 // happens. If required the code could be revised to mimick splitting such
583 // a request into two.
584 unsigned blk_size = owner.cacheLineSize();
585 Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1));
586 if (!(blk_offset + node_ptr->size <= blk_size)) {
587 node_ptr->size = blk_size - blk_offset;
588 ++elasticStats.numSplitReqs;
589 }
590
591 // Create a request and the packet containing request
592 auto req = std::make_shared<Request>(
593 node_ptr->physAddr, node_ptr->size, node_ptr->flags, requestorId);
594 req->setReqInstSeqNum(node_ptr->seqNum);
595
596 // If this is not done it triggers assert in L1 cache for invalid contextId
597 req->setContext(ContextID(0));
598
599 req->setPC(node_ptr->pc);
600 // If virtual address is valid, set the virtual address field
601 // of the request.
602 if (node_ptr->virtAddr != 0) {
603 req->setVirt(node_ptr->virtAddr, node_ptr->size,
604 node_ptr->flags, requestorId, node_ptr->pc);
605 req->setPaddr(node_ptr->physAddr);
606 req->setReqInstSeqNum(node_ptr->seqNum);
607 }
608
609 PacketPtr pkt;
610 uint8_t* pkt_data = new uint8_t[req->getSize()];
611 if (node_ptr->isLoad()) {
612 pkt = Packet::createRead(req);
613 } else {
614 pkt = Packet::createWrite(req);
615 memset(pkt_data, 0xA, req->getSize());
616 }
617 pkt->dataDynamic(pkt_data);
618
619 // Call RequestPort method to send a timing request for this packet
620 bool success = port.sendTimingReq(pkt);
621 ++elasticStats.numSendAttempted;
622
623 if (!success) {
624 // If it fails, return the packet to retry when a retry is signalled by
625 // the cache
626 ++elasticStats.numSendFailed;
627 DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n");
628 return pkt;
629 } else {
630 // It is succeeds, return nullptr
631 ++elasticStats.numSendSucceeded;
632 return nullptr;
633 }
634 }
635
636 bool
637 TraceCPU::ElasticDataGen::checkAndIssue(const GraphNode* node_ptr, bool first)
638 {
639 // Assert the node is dependency-free
640 assert(node_ptr->numRobDep == 0 && node_ptr->numRegDep == 0);
641
642 // If this is the first attempt, print a debug message to indicate this.
643 if (first) {
644 DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now"
645 " dependency free.\n", node_ptr->seqNum, node_ptr->typeToStr(),
646 node_ptr->robNum);
647 }
648
649 // Check if resources are available to issue the specific node
650 if (hwResource.isAvailable(node_ptr)) {
651 // If resources are free only then add to readyList
652 DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. "
653 "Adding to readyList, occupying resources.\n",
654 node_ptr->seqNum);
655 // Compute the execute tick by adding the compute delay for the node
656 // and add the ready node to the ready list
657 addToSortedReadyList(node_ptr->seqNum,
658 owner.clockEdge() + node_ptr->compDelay);
659 // Account for the resources taken up by this issued node.
660 hwResource.occupy(node_ptr);
661 return true;
662 } else {
663 if (first) {
664 // Although dependencies are complete, resources are not available.
665 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num "
666 "%lli. Adding to depFreeQueue.\n", node_ptr->seqNum);
667 depFreeQueue.push(node_ptr);
668 } else {
669 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num "
670 "%lli. Still pending issue.\n", node_ptr->seqNum);
671 }
672 return false;
673 }
674 }
675
676 void
677 TraceCPU::ElasticDataGen::completeMemAccess(PacketPtr pkt)
678 {
679 // Release the resources for this completed node.
680 if (pkt->isWrite()) {
681 // Consider store complete.
682 hwResource.releaseStoreBuffer();
683 // If it is a store response then do nothing since we do not model
684 // dependencies on store completion in the trace. But if we were
685 // blocking execution due to store buffer fullness, we need to schedule
686 // an event and attempt to progress.
687 } else {
688 // If it is a load response then release the dependents waiting on it.
689 // Get pointer to the completed load
690 auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum());
691 assert(graph_itr != depGraph.end());
692 GraphNode* node_ptr = graph_itr->second;
693
694 // Release resources occupied by the load
695 hwResource.release(node_ptr);
696
697 DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up"
698 " dependents..\n", node_ptr->seqNum);
699
700 for (auto child : node_ptr->dependents) {
701 if (child->removeDepOnInst(node_ptr->seqNum)) {
702 checkAndIssue(child);
703 }
704 }
705
706 // clear the dynamically allocated set of dependents
707 (node_ptr->dependents).clear();
708 // Update the stat for numOps completed
709 owner.updateNumOps(node_ptr->robNum);
710 // delete node
711 delete node_ptr;
712 // remove from graph
713 depGraph.erase(graph_itr);
714 }
715
716 if (DTRACE(TraceCPUData)) {
717 printReadyList();
718 }
719
720 // If the size of the dependency graph is less than the dependency window
721 // then read from the trace file to populate the graph next time we are in
722 // execute.
723 if (depGraph.size() < windowSize && !traceComplete)
724 nextRead = true;
725
726 // If not waiting for retry, attempt to schedule next event
727 if (!retryPkt) {
728 // We might have new dep-free nodes in the list which will have execute
729 // tick greater than or equal to curTick. But a new dep-free node might
730 // have its execute tick earlier. Therefore, attempt to reschedule. It
731 // could happen that the readyList is empty and we got here via a
732 // last remaining response. So, either the trace is complete or there
733 // are pending nodes in the depFreeQueue. The checking is done in the
734 // execute() control flow, so schedule an event to go via that flow.
735 Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) :
736 std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1)));
737 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
738 next_event_tick);
739 owner.schedDcacheNextEvent(next_event_tick);
740 }
741 }
742
743 void
744 TraceCPU::ElasticDataGen::addToSortedReadyList(NodeSeqNum seq_num,
745 Tick exec_tick)
746 {
747 ReadyNode ready_node;
748 ready_node.seqNum = seq_num;
749 ready_node.execTick = exec_tick;
750
751 // Iterator to readyList
752 auto itr = readyList.begin();
753
754 // If the readyList is empty, simply insert the new node at the beginning
755 // and return
756 if (itr == readyList.end()) {
757 readyList.insert(itr, ready_node);
758 elasticStats.maxReadyListSize =
759 std::max<double>(readyList.size(),
760 elasticStats.maxReadyListSize.value());
761 return;
762 }
763
764 // If the new node has its execution tick equal to the first node in the
765 // list then go to the next node. If the first node in the list failed
766 // to execute, its position as the first is thus maintained.
767 if (retryPkt) {
768 if (retryPkt->req->getReqInstSeqNum() == itr->seqNum)
769 itr++;
770 }
771
772 // Increment the iterator and compare the node pointed to by it to the new
773 // node till the position to insert the new node is found.
774 bool found = false;
775 while (!found && itr != readyList.end()) {
776 // If the execution tick of the new node is less than the node then
777 // this is the position to insert
778 if (exec_tick < itr->execTick) {
779 found = true;
780 // If the execution tick of the new node is equal to the node then
781 // sort in ascending order of sequence numbers
782 } else if (exec_tick == itr->execTick) {
783 // If the sequence number of the new node is less than the node
784 // then this is the position to insert
785 if (seq_num < itr->seqNum) {
786 found = true;
787 // Else go to next node
788 } else {
789 itr++;
790 }
791 } else {
792 // If the execution tick of the new node is greater than the node
793 // then go to the next node.
794 itr++;
795 }
796 }
797 readyList.insert(itr, ready_node);
798 // Update the stat for max size reached of the readyList
799 elasticStats.maxReadyListSize = std::max<double>(readyList.size(),
800 elasticStats.maxReadyListSize.value());
801 }
802
803 void
804 TraceCPU::ElasticDataGen::printReadyList()
805 {
806 auto itr = readyList.begin();
807 if (itr == readyList.end()) {
808 DPRINTF(TraceCPUData, "readyList is empty.\n");
809 return;
810 }
811 DPRINTF(TraceCPUData, "Printing readyList:\n");
812 while (itr != readyList.end()) {
813 auto graph_itr = depGraph.find(itr->seqNum);
814 M5_VAR_USED GraphNode* node_ptr = graph_itr->second;
815 DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum,
816 node_ptr->typeToStr(), itr->execTick);
817 itr++;
818 }
819 }
820
821 TraceCPU::ElasticDataGen::HardwareResource::HardwareResource(
822 uint16_t max_rob, uint16_t max_stores, uint16_t max_loads) :
823 sizeROB(max_rob),
824 sizeStoreBuffer(max_stores),
825 sizeLoadBuffer(max_loads),
826 oldestInFlightRobNum(UINT64_MAX),
827 numInFlightLoads(0),
828 numInFlightStores(0)
829 {}
830
831 void
832 TraceCPU::ElasticDataGen::HardwareResource::occupy(const GraphNode* new_node)
833 {
834 // Occupy ROB entry for the issued node
835 // Merely maintain the oldest node, i.e. numerically least robNum by saving
836 // it in the variable oldestInFLightRobNum.
837 inFlightNodes[new_node->seqNum] = new_node->robNum;
838 oldestInFlightRobNum = inFlightNodes.begin()->second;
839
840 // Occupy Load/Store Buffer entry for the issued node if applicable
841 if (new_node->isLoad()) {
842 ++numInFlightLoads;
843 } else if (new_node->isStore()) {
844 ++numInFlightStores;
845 } // else if it is a non load/store node, no buffer entry is occupied
846
847 printOccupancy();
848 }
849
850 void
851 TraceCPU::ElasticDataGen::HardwareResource::release(const GraphNode* done_node)
852 {
853 assert(!inFlightNodes.empty());
854 DPRINTFR(TraceCPUData,
855 "\tClearing done seq. num %d from inFlightNodes..\n",
856 done_node->seqNum);
857
858 assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end());
859 inFlightNodes.erase(done_node->seqNum);
860
861 if (inFlightNodes.empty()) {
862 // If we delete the only in-flight node and then the
863 // oldestInFlightRobNum is set to it's initialized (max) value.
864 oldestInFlightRobNum = UINT64_MAX;
865 } else {
866 // Set the oldest in-flight node rob number equal to the first node in
867 // the inFlightNodes since that will have the numerically least value.
868 oldestInFlightRobNum = inFlightNodes.begin()->second;
869 }
870
871 DPRINTFR(TraceCPUData,
872 "\tCleared. inFlightNodes.size() = %d, "
873 "oldestInFlightRobNum = %d\n", inFlightNodes.size(),
874 oldestInFlightRobNum);
875
876 // A store is considered complete when a request is sent, thus ROB entry is
877 // freed. But it occupies an entry in the Store Buffer until its response
878 // is received. A load is considered complete when a response is received,
879 // thus both ROB and Load Buffer entries can be released.
880 if (done_node->isLoad()) {
881 assert(numInFlightLoads != 0);
882 --numInFlightLoads;
883 }
884 // For normal writes, we send the requests out and clear a store buffer
885 // entry on response. For writes which are strictly ordered, for e.g.
886 // writes to device registers, we do that within release() which is called
887 // when node is executed and taken off from readyList.
888 if (done_node->isStore() && done_node->isStrictlyOrdered()) {
889 releaseStoreBuffer();
890 }
891 }
892
893 void
894 TraceCPU::ElasticDataGen::HardwareResource::releaseStoreBuffer()
895 {
896 assert(numInFlightStores != 0);
897 --numInFlightStores;
898 }
899
900 bool
901 TraceCPU::ElasticDataGen::HardwareResource::isAvailable(
902 const GraphNode* new_node) const
903 {
904 uint16_t num_in_flight_nodes;
905 if (inFlightNodes.empty()) {
906 num_in_flight_nodes = 0;
907 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
908 " #in-flight nodes = 0", new_node->seqNum);
909 } else if (new_node->robNum > oldestInFlightRobNum) {
910 // This is the intuitive case where new dep-free node is younger
911 // instruction than the oldest instruction in-flight. Thus we make sure
912 // in_flight_nodes does not overflow.
913 num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum;
914 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
915 " #in-flight nodes = %d - %d = %d", new_node->seqNum,
916 new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes);
917 } else {
918 // This is the case where an instruction older than the oldest in-
919 // flight instruction becomes dep-free. Thus we must have already
920 // accounted for the entry in ROB for this new dep-free node.
921 // Immediately after this check returns true, oldestInFlightRobNum will
922 // be updated in occupy(). We simply let this node issue now.
923 num_in_flight_nodes = 0;
924 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
925 " new oldestInFlightRobNum = %d, #in-flight nodes ignored",
926 new_node->seqNum, new_node->robNum);
927 }
928 DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ = %d/%d.\n",
929 numInFlightLoads, sizeLoadBuffer,
930 numInFlightStores, sizeStoreBuffer);
931 // Check if resources are available to issue the specific node
932 if (num_in_flight_nodes >= sizeROB) {
933 return false;
934 }
935 if (new_node->isLoad() && numInFlightLoads >= sizeLoadBuffer) {
936 return false;
937 }
938 if (new_node->isStore() && numInFlightStores >= sizeStoreBuffer) {
939 return false;
940 }
941 return true;
942 }
943
944 bool
945 TraceCPU::ElasticDataGen::HardwareResource::awaitingResponse() const
946 {
947 // Return true if there is at least one read or write request in flight
948 return (numInFlightStores != 0 || numInFlightLoads != 0);
949 }
950
951 void
952 TraceCPU::ElasticDataGen::HardwareResource::printOccupancy()
953 {
954 DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, "
955 "LQ = %d/%d, SQ = %d/%d.\n",
956 oldestInFlightRobNum,
957 numInFlightLoads, sizeLoadBuffer,
958 numInFlightStores, sizeStoreBuffer);
959 }
960
961 TraceCPU::FixedRetryGen::FixedRetryGenStatGroup::FixedRetryGenStatGroup(
962 Stats::Group *parent, const std::string& _name) :
963 Stats::Group(parent, _name.c_str()),
964 ADD_STAT(numSendAttempted, "Number of first attempts to send a request"),
965 ADD_STAT(numSendSucceeded, "Number of successful first attempts"),
966 ADD_STAT(numSendFailed, "Number of failed first attempts"),
967 ADD_STAT(numRetrySucceeded, "Number of successful retries"),
968 ADD_STAT(instLastTick, "Last tick simulated from the fixed inst trace")
969 {
970
971 }
972
973 Tick
974 TraceCPU::FixedRetryGen::init()
975 {
976 DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator"
977 " IcacheGen: fixed issue with retry.\n");
978
979 if (nextExecute()) {
980 DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick);
981 return currElement.tick;
982 } else {
983 panic("Read of first message in the trace failed.\n");
984 return MaxTick;
985 }
986 }
987
988 bool
989 TraceCPU::FixedRetryGen::tryNext()
990 {
991 // If there is a retry packet, try to send it
992 if (retryPkt) {
993 DPRINTF(TraceCPUInst, "Trying to send retry packet.\n");
994
995 if (!port.sendTimingReq(retryPkt)) {
996 // Still blocked! This should never occur.
997 DPRINTF(TraceCPUInst, "Retry packet sending failed.\n");
998 return false;
999 }
1000 ++fixedStats.numRetrySucceeded;
1001 } else {
1002 DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n");
1003
1004 // try sending current element
1005 assert(currElement.isValid());
1006
1007 ++fixedStats.numSendAttempted;
1008
1009 if (!send(currElement.addr, currElement.blocksize,
1010 currElement.cmd, currElement.flags, currElement.pc)) {
1011 DPRINTF(TraceCPUInst, "currElement sending failed.\n");
1012 ++fixedStats.numSendFailed;
1013 // return false to indicate not to schedule next event
1014 return false;
1015 } else {
1016 ++fixedStats.numSendSucceeded;
1017 }
1018 }
1019 // If packet was sent successfully, either retryPkt or currElement, return
1020 // true to indicate to schedule event at current Tick plus delta. If packet
1021 // was sent successfully and there is no next packet to send, return false.
1022 DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next "
1023 "element.\n");
1024 retryPkt = nullptr;
1025 // Read next element into currElement, currElement gets cleared so save the
1026 // tick to calculate delta
1027 Tick last_tick = currElement.tick;
1028 if (nextExecute()) {
1029 assert(currElement.tick >= last_tick);
1030 delta = currElement.tick - last_tick;
1031 }
1032 return !traceComplete;
1033 }
1034
1035 void
1036 TraceCPU::FixedRetryGen::exit()
1037 {
1038 trace.reset();
1039 }
1040
1041 bool
1042 TraceCPU::FixedRetryGen::nextExecute()
1043 {
1044 if (traceComplete)
1045 // We are at the end of the file, thus we have no more messages.
1046 // Return false.
1047 return false;
1048
1049
1050 //Reset the currElement to the default values
1051 currElement.clear();
1052
1053 // Read the next line to get the next message. If that fails then end of
1054 // trace has been reached and traceComplete needs to be set in addition
1055 // to returning false. If successful then next message is in currElement.
1056 if (!trace.read(&currElement)) {
1057 traceComplete = true;
1058 fixedStats.instLastTick = curTick();
1059 return false;
1060 }
1061
1062 DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n",
1063 currElement.cmd.isRead() ? 'r' : 'w',
1064 currElement.addr,
1065 currElement.pc,
1066 currElement.blocksize,
1067 currElement.tick);
1068
1069 return true;
1070 }
1071
1072 bool
1073 TraceCPU::FixedRetryGen::send(Addr addr, unsigned size, const MemCmd& cmd,
1074 Request::FlagsType flags, Addr pc)
1075 {
1076
1077 // Create new request
1078 auto req = std::make_shared<Request>(addr, size, flags, requestorId);
1079 req->setPC(pc);
1080
1081 // If this is not done it triggers assert in L1 cache for invalid contextId
1082 req->setContext(ContextID(0));
1083
1084 // Embed it in a packet
1085 PacketPtr pkt = new Packet(req, cmd);
1086
1087 uint8_t* pkt_data = new uint8_t[req->getSize()];
1088 pkt->dataDynamic(pkt_data);
1089
1090 if (cmd.isWrite()) {
1091 memset(pkt_data, 0xA, req->getSize());
1092 }
1093
1094 // Call RequestPort method to send a timing request for this packet
1095 bool success = port.sendTimingReq(pkt);
1096 if (!success) {
1097 // If it fails, save the packet to retry when a retry is signalled by
1098 // the cache
1099 retryPkt = pkt;
1100 }
1101 return success;
1102 }
1103
1104 void
1105 TraceCPU::icacheRetryRecvd()
1106 {
1107 // Schedule an event to go through the control flow in the same tick as
1108 // retry is received
1109 DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen"
1110 " event @%lli.\n", curTick());
1111 schedule(icacheNextEvent, curTick());
1112 }
1113
1114 void
1115 TraceCPU::dcacheRetryRecvd()
1116 {
1117 // Schedule an event to go through the execute flow in the same tick as
1118 // retry is received
1119 DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen"
1120 " event @%lli.\n", curTick());
1121 schedule(dcacheNextEvent, curTick());
1122 }
1123
1124 void
1125 TraceCPU::schedDcacheNextEvent(Tick when)
1126 {
1127 if (!dcacheNextEvent.scheduled()) {
1128 DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n",
1129 when);
1130 schedule(dcacheNextEvent, when);
1131 ++traceStats.numSchedDcacheEvent;
1132 } else if (when < dcacheNextEvent.when()) {
1133 DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli"
1134 " to %lli.\n", dcacheNextEvent.when(), when);
1135 reschedule(dcacheNextEvent, when);
1136 }
1137
1138 }
1139
1140 bool
1141 TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt)
1142 {
1143 // All responses on the instruction fetch side are ignored. Simply delete
1144 // the packet to free allocated memory
1145 delete pkt;
1146
1147 return true;
1148 }
1149
1150 void
1151 TraceCPU::IcachePort::recvReqRetry()
1152 {
1153 owner->icacheRetryRecvd();
1154 }
1155
1156 void
1157 TraceCPU::dcacheRecvTimingResp(PacketPtr pkt)
1158 {
1159 DPRINTF(TraceCPUData, "Received timing response from Dcache.\n");
1160 dcacheGen.completeMemAccess(pkt);
1161 }
1162
1163 bool
1164 TraceCPU::DcachePort::recvTimingResp(PacketPtr pkt)
1165 {
1166 // Handle the responses for data memory requests which is done inside the
1167 // elastic data generator
1168 owner->dcacheRecvTimingResp(pkt);
1169 // After processing the response delete the packet to free
1170 // memory
1171 delete pkt;
1172
1173 return true;
1174 }
1175
1176 void
1177 TraceCPU::DcachePort::recvReqRetry()
1178 {
1179 owner->dcacheRetryRecvd();
1180 }
1181
1182 TraceCPU::ElasticDataGen::InputStream::InputStream(
1183 const std::string& filename, const double time_multiplier) :
1184 trace(filename),
1185 timeMultiplier(time_multiplier),
1186 microOpCount(0)
1187 {
1188 // Create a protobuf message for the header and read it from the stream
1189 ProtoMessage::InstDepRecordHeader header_msg;
1190 if (!trace.read(header_msg)) {
1191 panic("Failed to read packet header from %s\n", filename);
1192
1193 if (header_msg.tick_freq() != SimClock::Frequency) {
1194 panic("Trace %s was recorded with a different tick frequency %d\n",
1195 header_msg.tick_freq());
1196 }
1197 } else {
1198 // Assign window size equal to the field in the trace that was recorded
1199 // when the data dependency trace was captured in the o3cpu model
1200 windowSize = header_msg.window_size();
1201 }
1202 }
1203
1204 void
1205 TraceCPU::ElasticDataGen::InputStream::reset()
1206 {
1207 trace.reset();
1208 }
1209
1210 bool
1211 TraceCPU::ElasticDataGen::InputStream::read(GraphNode* element)
1212 {
1213 ProtoMessage::InstDepRecord pkt_msg;
1214 if (trace.read(pkt_msg)) {
1215 // Required fields
1216 element->seqNum = pkt_msg.seq_num();
1217 element->type = pkt_msg.type();
1218 // Scale the compute delay to effectively scale the Trace CPU frequency
1219 element->compDelay = pkt_msg.comp_delay() * timeMultiplier;
1220
1221 // Repeated field robDepList
1222 element->clearRobDep();
1223 assert((pkt_msg.rob_dep()).size() <= element->maxRobDep);
1224 for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) {
1225 element->robDep[element->numRobDep] = pkt_msg.rob_dep(i);
1226 element->numRobDep += 1;
1227 }
1228
1229 // Repeated field
1230 element->clearRegDep();
1231 assert((pkt_msg.reg_dep()).size() <= TheISA::MaxInstSrcRegs);
1232 for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) {
1233 // There is a possibility that an instruction has both, a register
1234 // and order dependency on an instruction. In such a case, the
1235 // register dependency is omitted
1236 bool duplicate = false;
1237 for (int j = 0; j < element->numRobDep; j++) {
1238 duplicate |= (pkt_msg.reg_dep(i) == element->robDep[j]);
1239 }
1240 if (!duplicate) {
1241 element->regDep[element->numRegDep] = pkt_msg.reg_dep(i);
1242 element->numRegDep += 1;
1243 }
1244 }
1245
1246 // Optional fields
1247 if (pkt_msg.has_p_addr())
1248 element->physAddr = pkt_msg.p_addr();
1249 else
1250 element->physAddr = 0;
1251
1252 if (pkt_msg.has_v_addr())
1253 element->virtAddr = pkt_msg.v_addr();
1254 else
1255 element->virtAddr = 0;
1256
1257 if (pkt_msg.has_size())
1258 element->size = pkt_msg.size();
1259 else
1260 element->size = 0;
1261
1262 if (pkt_msg.has_flags())
1263 element->flags = pkt_msg.flags();
1264 else
1265 element->flags = 0;
1266
1267 if (pkt_msg.has_pc())
1268 element->pc = pkt_msg.pc();
1269 else
1270 element->pc = 0;
1271
1272 // ROB occupancy number
1273 ++microOpCount;
1274 if (pkt_msg.has_weight()) {
1275 microOpCount += pkt_msg.weight();
1276 }
1277 element->robNum = microOpCount;
1278 return true;
1279 }
1280
1281 // We have reached the end of the file
1282 return false;
1283 }
1284
1285 bool
1286 TraceCPU::ElasticDataGen::GraphNode::removeRegDep(NodeSeqNum reg_dep)
1287 {
1288 for (auto& own_reg_dep : regDep) {
1289 if (own_reg_dep == reg_dep) {
1290 // If register dependency is found, make it zero and return true
1291 own_reg_dep = 0;
1292 assert(numRegDep > 0);
1293 --numRegDep;
1294 DPRINTFR(TraceCPUData, "\tFor %lli: Marking register dependency "
1295 "%lli done.\n", seqNum, reg_dep);
1296 return true;
1297 }
1298 }
1299
1300 // Return false if the dependency is not found
1301 return false;
1302 }
1303
1304 bool
1305 TraceCPU::ElasticDataGen::GraphNode::removeRobDep(NodeSeqNum rob_dep)
1306 {
1307 for (auto& own_rob_dep : robDep) {
1308 if (own_rob_dep == rob_dep) {
1309 // If the rob dependency is found, make it zero and return true
1310 own_rob_dep = 0;
1311 assert(numRobDep > 0);
1312 --numRobDep;
1313 DPRINTFR(TraceCPUData,
1314 "\tFor %lli: Marking ROB dependency %lli done.\n",
1315 seqNum, rob_dep);
1316 return true;
1317 }
1318 }
1319 return false;
1320 }
1321
1322 void
1323 TraceCPU::ElasticDataGen::GraphNode::clearRegDep()
1324 {
1325 for (auto& own_reg_dep : regDep) {
1326 own_reg_dep = 0;
1327 }
1328 numRegDep = 0;
1329 }
1330
1331 void
1332 TraceCPU::ElasticDataGen::GraphNode::clearRobDep()
1333 {
1334 for (auto& own_rob_dep : robDep) {
1335 own_rob_dep = 0;
1336 }
1337 numRobDep = 0;
1338 }
1339
1340 bool
1341 TraceCPU::ElasticDataGen::GraphNode::removeDepOnInst(NodeSeqNum done_seq_num)
1342 {
1343 // If it is an rob dependency then remove it
1344 if (!removeRobDep(done_seq_num)) {
1345 // If it is not an rob dependency then it must be a register dependency
1346 // If the register dependency is not found, it violates an assumption
1347 // and must be caught by assert.
1348 M5_VAR_USED bool regdep_found = removeRegDep(done_seq_num);
1349 assert(regdep_found);
1350 }
1351 // Return true if the node is dependency free
1352 return (numRobDep == 0 && numRegDep == 0);
1353 }
1354
1355 void
1356 TraceCPU::ElasticDataGen::GraphNode::writeElementAsTrace() const
1357 {
1358 DPRINTFR(TraceCPUData, "%lli", seqNum);
1359 DPRINTFR(TraceCPUData, ",%s", typeToStr());
1360 if (isLoad() || isStore()) {
1361 DPRINTFR(TraceCPUData, ",%i", physAddr);
1362 DPRINTFR(TraceCPUData, ",%i", size);
1363 DPRINTFR(TraceCPUData, ",%i", flags);
1364 }
1365 DPRINTFR(TraceCPUData, ",%lli", compDelay);
1366 int i = 0;
1367 DPRINTFR(TraceCPUData, "robDep:");
1368 while (robDep[i] != 0) {
1369 DPRINTFR(TraceCPUData, ",%lli", robDep[i]);
1370 i++;
1371 }
1372 i = 0;
1373 DPRINTFR(TraceCPUData, "regDep:");
1374 while (regDep[i] != 0) {
1375 DPRINTFR(TraceCPUData, ",%lli", regDep[i]);
1376 i++;
1377 }
1378 auto child_itr = dependents.begin();
1379 DPRINTFR(TraceCPUData, "dependents:");
1380 while (child_itr != dependents.end()) {
1381 DPRINTFR(TraceCPUData, ":%lli", (*child_itr)->seqNum);
1382 child_itr++;
1383 }
1384
1385 DPRINTFR(TraceCPUData, "\n");
1386 }
1387
1388 std::string
1389 TraceCPU::ElasticDataGen::GraphNode::typeToStr() const
1390 {
1391 return Record::RecordType_Name(type);
1392 }
1393
1394 TraceCPU::FixedRetryGen::InputStream::InputStream(const std::string& filename)
1395 : trace(filename)
1396 {
1397 // Create a protobuf message for the header and read it from the stream
1398 ProtoMessage::PacketHeader header_msg;
1399 if (!trace.read(header_msg)) {
1400 panic("Failed to read packet header from %s\n", filename);
1401
1402 if (header_msg.tick_freq() != SimClock::Frequency) {
1403 panic("Trace %s was recorded with a different tick frequency %d\n",
1404 header_msg.tick_freq());
1405 }
1406 }
1407 }
1408
1409 void
1410 TraceCPU::FixedRetryGen::InputStream::reset()
1411 {
1412 trace.reset();
1413 }
1414
1415 bool
1416 TraceCPU::FixedRetryGen::InputStream::read(TraceElement* element)
1417 {
1418 ProtoMessage::Packet pkt_msg;
1419 if (trace.read(pkt_msg)) {
1420 element->cmd = pkt_msg.cmd();
1421 element->addr = pkt_msg.addr();
1422 element->blocksize = pkt_msg.size();
1423 element->tick = pkt_msg.tick();
1424 element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0;
1425 element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0;
1426 return true;
1427 }
1428
1429 // We have reached the end of the file
1430 return false;
1431 }