arch: Make and use endian specific versions of the mem helpers.
[gem5.git] / src / cpu / trace / trace_cpu.cc
1 /*
2 * Copyright (c) 2013 - 2016 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Radhika Jagtap
38 * Andreas Hansson
39 * Thomas Grass
40 */
41
42 #include "cpu/trace/trace_cpu.hh"
43
44 #include "sim/sim_exit.hh"
45
46 // Declare and initialize the static counter for number of trace CPUs.
47 int TraceCPU::numTraceCPUs = 0;
48
49 TraceCPU::TraceCPU(TraceCPUParams *params)
50 : BaseCPU(params),
51 icachePort(this),
52 dcachePort(this),
53 instMasterID(params->system->getMasterId(this, "inst")),
54 dataMasterID(params->system->getMasterId(this, "data")),
55 instTraceFile(params->instTraceFile),
56 dataTraceFile(params->dataTraceFile),
57 icacheGen(*this, ".iside", icachePort, instMasterID, instTraceFile),
58 dcacheGen(*this, ".dside", dcachePort, dataMasterID, dataTraceFile,
59 params),
60 icacheNextEvent([this]{ schedIcacheNext(); }, name()),
61 dcacheNextEvent([this]{ schedDcacheNext(); }, name()),
62 oneTraceComplete(false),
63 traceOffset(0),
64 execCompleteEvent(nullptr),
65 enableEarlyExit(params->enableEarlyExit),
66 progressMsgInterval(params->progressMsgInterval),
67 progressMsgThreshold(params->progressMsgInterval)
68 {
69 // Increment static counter for number of Trace CPUs.
70 ++TraceCPU::numTraceCPUs;
71
72 // Check that the python parameters for sizes of ROB, store buffer and
73 // load buffer do not overflow the corresponding C++ variables.
74 fatal_if(params->sizeROB > UINT16_MAX, "ROB size set to %d exceeds the "
75 "max. value of %d.\n", params->sizeROB, UINT16_MAX);
76 fatal_if(params->sizeStoreBuffer > UINT16_MAX, "ROB size set to %d "
77 "exceeds the max. value of %d.\n", params->sizeROB,
78 UINT16_MAX);
79 fatal_if(params->sizeLoadBuffer > UINT16_MAX, "Load buffer size set to"
80 " %d exceeds the max. value of %d.\n",
81 params->sizeLoadBuffer, UINT16_MAX);
82 }
83
84 TraceCPU::~TraceCPU()
85 {
86
87 }
88
89 TraceCPU*
90 TraceCPUParams::create()
91 {
92 return new TraceCPU(this);
93 }
94
95 void
96 TraceCPU::updateNumOps(uint64_t rob_num)
97 {
98 numOps = rob_num;
99 if (progressMsgInterval != 0 && numOps.value() >= progressMsgThreshold) {
100 inform("%s: %i insts committed\n", name(), progressMsgThreshold);
101 progressMsgThreshold += progressMsgInterval;
102 }
103 }
104
105 void
106 TraceCPU::takeOverFrom(BaseCPU *oldCPU)
107 {
108 // Unbind the ports of the old CPU and bind the ports of the TraceCPU.
109 getInstPort().takeOverFrom(&oldCPU->getInstPort());
110 getDataPort().takeOverFrom(&oldCPU->getDataPort());
111 }
112
113 void
114 TraceCPU::init()
115 {
116 DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\"."
117 "\n", instTraceFile);
118 DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n",
119 dataTraceFile);
120
121 BaseCPU::init();
122
123 // Get the send tick of the first instruction read request
124 Tick first_icache_tick = icacheGen.init();
125
126 // Get the send tick of the first data read/write request
127 Tick first_dcache_tick = dcacheGen.init();
128
129 // Set the trace offset as the minimum of that in both traces
130 traceOffset = std::min(first_icache_tick, first_dcache_tick);
131 inform("%s: Time offset (tick) found as min of both traces is %lli.\n",
132 name(), traceOffset);
133
134 // Schedule next icache and dcache event by subtracting the offset
135 schedule(icacheNextEvent, first_icache_tick - traceOffset);
136 schedule(dcacheNextEvent, first_dcache_tick - traceOffset);
137
138 // Adjust the trace offset for the dcache generator's ready nodes
139 // We don't need to do this for the icache generator as it will
140 // send its first request at the first event and schedule subsequent
141 // events using a relative tick delta
142 dcacheGen.adjustInitTraceOffset(traceOffset);
143
144 // If the Trace CPU simulation is configured to exit on any one trace
145 // completion then we don't need a counted event to count down all Trace
146 // CPUs in the system. If not then instantiate a counted event.
147 if (!enableEarlyExit) {
148 // The static counter for number of Trace CPUs is correctly set at
149 // this point so create an event and pass it.
150 execCompleteEvent = new CountedExitEvent("end of all traces reached.",
151 numTraceCPUs);
152 }
153
154 }
155
156 void
157 TraceCPU::schedIcacheNext()
158 {
159 DPRINTF(TraceCPUInst, "IcacheGen event.\n");
160
161 // Try to send the current packet or a retry packet if there is one
162 bool sched_next = icacheGen.tryNext();
163 // If packet sent successfully, schedule next event
164 if (sched_next) {
165 DPRINTF(TraceCPUInst, "Scheduling next icacheGen event "
166 "at %d.\n", curTick() + icacheGen.tickDelta());
167 schedule(icacheNextEvent, curTick() + icacheGen.tickDelta());
168 ++numSchedIcacheEvent;
169 } else {
170 // check if traceComplete. If not, do nothing because sending failed
171 // and next event will be scheduled via RecvRetry()
172 if (icacheGen.isTraceComplete()) {
173 // If this is the first trace to complete, set the variable. If it
174 // is already set then both traces are complete to exit sim.
175 checkAndSchedExitEvent();
176 }
177 }
178 return;
179 }
180
181 void
182 TraceCPU::schedDcacheNext()
183 {
184 DPRINTF(TraceCPUData, "DcacheGen event.\n");
185
186 // Update stat for numCycles
187 numCycles = clockEdge() / clockPeriod();
188
189 dcacheGen.execute();
190 if (dcacheGen.isExecComplete()) {
191 checkAndSchedExitEvent();
192 }
193 }
194
195 void
196 TraceCPU::checkAndSchedExitEvent()
197 {
198 if (!oneTraceComplete) {
199 oneTraceComplete = true;
200 } else {
201 // Schedule event to indicate execution is complete as both
202 // instruction and data access traces have been played back.
203 inform("%s: Execution complete.\n", name());
204 // If the replay is configured to exit early, that is when any one
205 // execution is complete then exit immediately and return. Otherwise,
206 // schedule the counted exit that counts down completion of each Trace
207 // CPU.
208 if (enableEarlyExit) {
209 exitSimLoop("End of trace reached");
210 } else {
211 schedule(*execCompleteEvent, curTick());
212 }
213 }
214 }
215
216 void
217 TraceCPU::regStats()
218 {
219
220 BaseCPU::regStats();
221
222 numSchedDcacheEvent
223 .name(name() + ".numSchedDcacheEvent")
224 .desc("Number of events scheduled to trigger data request generator")
225 ;
226
227 numSchedIcacheEvent
228 .name(name() + ".numSchedIcacheEvent")
229 .desc("Number of events scheduled to trigger instruction request generator")
230 ;
231
232 numOps
233 .name(name() + ".numOps")
234 .desc("Number of micro-ops simulated by the Trace CPU")
235 ;
236
237 cpi
238 .name(name() + ".cpi")
239 .desc("Cycles per micro-op used as a proxy for CPI")
240 .precision(6)
241 ;
242 cpi = numCycles/numOps;
243
244 icacheGen.regStats();
245 dcacheGen.regStats();
246 }
247
248 void
249 TraceCPU::ElasticDataGen::regStats()
250 {
251 using namespace Stats;
252
253 maxDependents
254 .name(name() + ".maxDependents")
255 .desc("Max number of dependents observed on a node")
256 ;
257
258 maxReadyListSize
259 .name(name() + ".maxReadyListSize")
260 .desc("Max size of the ready list observed")
261 ;
262
263 numSendAttempted
264 .name(name() + ".numSendAttempted")
265 .desc("Number of first attempts to send a request")
266 ;
267
268 numSendSucceeded
269 .name(name() + ".numSendSucceeded")
270 .desc("Number of successful first attempts")
271 ;
272
273 numSendFailed
274 .name(name() + ".numSendFailed")
275 .desc("Number of failed first attempts")
276 ;
277
278 numRetrySucceeded
279 .name(name() + ".numRetrySucceeded")
280 .desc("Number of successful retries")
281 ;
282
283 numSplitReqs
284 .name(name() + ".numSplitReqs")
285 .desc("Number of split requests")
286 ;
287
288 numSOLoads
289 .name(name() + ".numSOLoads")
290 .desc("Number of strictly ordered loads")
291 ;
292
293 numSOStores
294 .name(name() + ".numSOStores")
295 .desc("Number of strictly ordered stores")
296 ;
297
298 dataLastTick
299 .name(name() + ".dataLastTick")
300 .desc("Last tick simulated from the elastic data trace")
301 ;
302 }
303
304 Tick
305 TraceCPU::ElasticDataGen::init()
306 {
307 DPRINTF(TraceCPUData, "Initializing data memory request generator "
308 "DcacheGen: elastic issue with retry.\n");
309
310 if (!readNextWindow())
311 panic("Trace has %d elements. It must have at least %d elements.\n",
312 depGraph.size(), 2 * windowSize);
313 DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n",
314 depGraph.size());
315
316 if (!readNextWindow())
317 panic("Trace has %d elements. It must have at least %d elements.\n",
318 depGraph.size(), 2 * windowSize);
319 DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n",
320 depGraph.size());
321
322 // Print readyList
323 if (DTRACE(TraceCPUData)) {
324 printReadyList();
325 }
326 auto free_itr = readyList.begin();
327 DPRINTF(TraceCPUData, "Execute tick of the first dependency free node %lli"
328 " is %d.\n", free_itr->seqNum, free_itr->execTick);
329 // Return the execute tick of the earliest ready node so that an event
330 // can be scheduled to call execute()
331 return (free_itr->execTick);
332 }
333
334 void
335 TraceCPU::ElasticDataGen::adjustInitTraceOffset(Tick& offset) {
336 for (auto& free_node : readyList) {
337 free_node.execTick -= offset;
338 }
339 }
340
341 void
342 TraceCPU::ElasticDataGen::exit()
343 {
344 trace.reset();
345 }
346
347 bool
348 TraceCPU::ElasticDataGen::readNextWindow()
349 {
350
351 // Read and add next window
352 DPRINTF(TraceCPUData, "Reading next window from file.\n");
353
354 if (traceComplete) {
355 // We are at the end of the file, thus we have no more records.
356 // Return false.
357 return false;
358 }
359
360 DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n",
361 depGraph.size());
362
363 uint32_t num_read = 0;
364 while (num_read != windowSize) {
365
366 // Create a new graph node
367 GraphNode* new_node = new GraphNode;
368
369 // Read the next line to get the next record. If that fails then end of
370 // trace has been reached and traceComplete needs to be set in addition
371 // to returning false.
372 if (!trace.read(new_node)) {
373 DPRINTF(TraceCPUData, "\tTrace complete!\n");
374 traceComplete = true;
375 return false;
376 }
377
378 // Annotate the ROB dependencies of the new node onto the parent nodes.
379 addDepsOnParent(new_node, new_node->robDep, new_node->numRobDep);
380 // Annotate the register dependencies of the new node onto the parent
381 // nodes.
382 addDepsOnParent(new_node, new_node->regDep, new_node->numRegDep);
383
384 num_read++;
385 // Add to map
386 depGraph[new_node->seqNum] = new_node;
387 if (new_node->numRobDep == 0 && new_node->numRegDep == 0) {
388 // Source dependencies are already complete, check if resources
389 // are available and issue. The execution time is approximated
390 // to current time plus the computational delay.
391 checkAndIssue(new_node);
392 }
393 }
394
395 DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n",
396 depGraph.size());
397 return true;
398 }
399
400 template<typename T> void
401 TraceCPU::ElasticDataGen::addDepsOnParent(GraphNode *new_node,
402 T& dep_array, uint8_t& num_dep)
403 {
404 for (auto& a_dep : dep_array) {
405 // The convention is to set the dependencies starting with the first
406 // index in the ROB and register dependency arrays. Thus, when we reach
407 // a dependency equal to the initialisation value of zero, we know have
408 // iterated over all dependencies and can break.
409 if (a_dep == 0)
410 break;
411 // We look up the valid dependency, i.e. the parent of this node
412 auto parent_itr = depGraph.find(a_dep);
413 if (parent_itr != depGraph.end()) {
414 // If the parent is found, it is yet to be executed. Append a
415 // pointer to the new node to the dependents list of the parent
416 // node.
417 parent_itr->second->dependents.push_back(new_node);
418 auto num_depts = parent_itr->second->dependents.size();
419 maxDependents = std::max<double>(num_depts, maxDependents.value());
420 } else {
421 // The dependency is not found in the graph. So consider
422 // the execution of the parent is complete, i.e. remove this
423 // dependency.
424 a_dep = 0;
425 num_dep--;
426 }
427 }
428 }
429
430 void
431 TraceCPU::ElasticDataGen::execute()
432 {
433 DPRINTF(TraceCPUData, "Execute start occupancy:\n");
434 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
435 "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
436 depFreeQueue.size());
437 hwResource.printOccupancy();
438
439 // Read next window to make sure that dependents of all dep-free nodes
440 // are in the depGraph
441 if (nextRead) {
442 readNextWindow();
443 nextRead = false;
444 }
445
446 // First attempt to issue the pending dependency-free nodes held
447 // in depFreeQueue. If resources have become available for a node,
448 // then issue it, i.e. add the node to readyList.
449 while (!depFreeQueue.empty()) {
450 if (checkAndIssue(depFreeQueue.front(), false)) {
451 DPRINTF(TraceCPUData, "Removing from depFreeQueue: seq. num "
452 "%lli.\n", (depFreeQueue.front())->seqNum);
453 depFreeQueue.pop();
454 } else {
455 break;
456 }
457 }
458 // Proceed to execute from readyList
459 auto graph_itr = depGraph.begin();
460 auto free_itr = readyList.begin();
461 // Iterate through readyList until the next free node has its execute
462 // tick later than curTick or the end of readyList is reached
463 while (free_itr->execTick <= curTick() && free_itr != readyList.end()) {
464
465 // Get pointer to the node to be executed
466 graph_itr = depGraph.find(free_itr->seqNum);
467 assert(graph_itr != depGraph.end());
468 GraphNode* node_ptr = graph_itr->second;
469
470 // If there is a retryPkt send that else execute the load
471 if (retryPkt) {
472 // The retryPkt must be the request that was created by the
473 // first node in the readyList.
474 if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) {
475 panic("Retry packet's seqence number does not match "
476 "the first node in the readyList.\n");
477 }
478 if (port.sendTimingReq(retryPkt)) {
479 ++numRetrySucceeded;
480 retryPkt = nullptr;
481 }
482 } else if (node_ptr->isLoad() || node_ptr->isStore()) {
483 // If there is no retryPkt, attempt to send a memory request in
484 // case of a load or store node. If the send fails, executeMemReq()
485 // returns a packet pointer, which we save in retryPkt. In case of
486 // a comp node we don't do anything and simply continue as if the
487 // execution of the comp node succedded.
488 retryPkt = executeMemReq(node_ptr);
489 }
490 // If the retryPkt or a new load/store node failed, we exit from here
491 // as a retry from cache will bring the control to execute(). The
492 // first node in readyList then, will be the failed node.
493 if (retryPkt) {
494 break;
495 }
496
497 // Proceed to remove dependencies for the successfully executed node.
498 // If it is a load which is not strictly ordered and we sent a
499 // request for it successfully, we do not yet mark any register
500 // dependencies complete. But as per dependency modelling we need
501 // to mark ROB dependencies of load and non load/store nodes which
502 // are based on successful sending of the load as complete.
503 if (node_ptr->isLoad() && !node_ptr->isStrictlyOrdered()) {
504 // If execute succeeded mark its dependents as complete
505 DPRINTF(TraceCPUData, "Node seq. num %lli sent. Waking up "
506 "dependents..\n", node_ptr->seqNum);
507
508 auto child_itr = (node_ptr->dependents).begin();
509 while (child_itr != (node_ptr->dependents).end()) {
510 // ROB dependency of a store on a load must not be removed
511 // after load is sent but after response is received
512 if (!(*child_itr)->isStore() &&
513 (*child_itr)->removeRobDep(node_ptr->seqNum)) {
514
515 // Check if the child node has become dependency free
516 if ((*child_itr)->numRobDep == 0 &&
517 (*child_itr)->numRegDep == 0) {
518
519 // Source dependencies are complete, check if
520 // resources are available and issue
521 checkAndIssue(*child_itr);
522 }
523 // Remove this child for the sent load and point to new
524 // location of the element following the erased element
525 child_itr = node_ptr->dependents.erase(child_itr);
526 } else {
527 // This child is not dependency-free, point to the next
528 // child
529 child_itr++;
530 }
531 }
532 } else {
533 // If it is a strictly ordered load mark its dependents as complete
534 // as we do not send a request for this case. If it is a store or a
535 // comp node we also mark all its dependents complete.
536 DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking"
537 " up dependents..\n", node_ptr->seqNum);
538
539 for (auto child : node_ptr->dependents) {
540 // If the child node is dependency free removeDepOnInst()
541 // returns true.
542 if (child->removeDepOnInst(node_ptr->seqNum)) {
543 // Source dependencies are complete, check if resources
544 // are available and issue
545 checkAndIssue(child);
546 }
547 }
548 }
549
550 // After executing the node, remove from readyList and delete node.
551 readyList.erase(free_itr);
552 // If it is a cacheable load which was sent, don't delete
553 // just yet. Delete it in completeMemAccess() after the
554 // response is received. If it is an strictly ordered
555 // load, it was not sent and all dependencies were simply
556 // marked complete. Thus it is safe to delete it. For
557 // stores and non load/store nodes all dependencies were
558 // marked complete so it is safe to delete it.
559 if (!node_ptr->isLoad() || node_ptr->isStrictlyOrdered()) {
560 // Release all resources occupied by the completed node
561 hwResource.release(node_ptr);
562 // clear the dynamically allocated set of dependents
563 (node_ptr->dependents).clear();
564 // Update the stat for numOps simulated
565 owner.updateNumOps(node_ptr->robNum);
566 // delete node
567 delete node_ptr;
568 // remove from graph
569 depGraph.erase(graph_itr);
570 }
571 // Point to first node to continue to next iteration of while loop
572 free_itr = readyList.begin();
573 } // end of while loop
574
575 // Print readyList, sizes of queues and resource status after updating
576 if (DTRACE(TraceCPUData)) {
577 printReadyList();
578 DPRINTF(TraceCPUData, "Execute end occupancy:\n");
579 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
580 "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
581 depFreeQueue.size());
582 hwResource.printOccupancy();
583 }
584
585 if (retryPkt) {
586 DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry"
587 "event from the cache for seq. num %lli.\n",
588 retryPkt->req->getReqInstSeqNum());
589 return;
590 }
591 // If the size of the dependency graph is less than the dependency window
592 // then read from the trace file to populate the graph next time we are in
593 // execute.
594 if (depGraph.size() < windowSize && !traceComplete)
595 nextRead = true;
596
597 // If cache is not blocked, schedule an event for the first execTick in
598 // readyList else retry from cache will schedule the event. If the ready
599 // list is empty then check if the next pending node has resources
600 // available to issue. If yes, then schedule an event for the next cycle.
601 if (!readyList.empty()) {
602 Tick next_event_tick = std::max(readyList.begin()->execTick,
603 curTick());
604 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
605 next_event_tick);
606 owner.schedDcacheNextEvent(next_event_tick);
607 } else if (readyList.empty() && !depFreeQueue.empty() &&
608 hwResource.isAvailable(depFreeQueue.front())) {
609 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
610 owner.clockEdge(Cycles(1)));
611 owner.schedDcacheNextEvent(owner.clockEdge(Cycles(1)));
612 }
613
614 // If trace is completely read, readyList is empty and depGraph is empty,
615 // set execComplete to true
616 if (depGraph.empty() && readyList.empty() && traceComplete &&
617 !hwResource.awaitingResponse()) {
618 DPRINTF(TraceCPUData, "\tExecution Complete!\n");
619 execComplete = true;
620 dataLastTick = curTick();
621 }
622 }
623
624 PacketPtr
625 TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr)
626 {
627
628 DPRINTF(TraceCPUData, "Executing memory request %lli (phys addr %d, "
629 "virt addr %d, pc %#x, size %d, flags %d).\n",
630 node_ptr->seqNum, node_ptr->physAddr, node_ptr->virtAddr,
631 node_ptr->pc, node_ptr->size, node_ptr->flags);
632
633 // If the request is strictly ordered, do not send it. Just return nullptr
634 // as if it was succesfully sent.
635 if (node_ptr->isStrictlyOrdered()) {
636 node_ptr->isLoad() ? ++numSOLoads : ++numSOStores;
637 DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n",
638 node_ptr->seqNum);
639 return nullptr;
640 }
641
642 // Check if the request spans two cache lines as this condition triggers
643 // an assert fail in the L1 cache. If it does then truncate the size to
644 // access only until the end of that line and ignore the remainder. The
645 // stat counting this is useful to keep a check on how frequently this
646 // happens. If required the code could be revised to mimick splitting such
647 // a request into two.
648 unsigned blk_size = owner.cacheLineSize();
649 Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1));
650 if (!(blk_offset + node_ptr->size <= blk_size)) {
651 node_ptr->size = blk_size - blk_offset;
652 ++numSplitReqs;
653 }
654
655 // Create a request and the packet containing request
656 auto req = std::make_shared<Request>(
657 node_ptr->physAddr, node_ptr->size,
658 node_ptr->flags, masterID, node_ptr->seqNum,
659 ContextID(0));
660
661 req->setPC(node_ptr->pc);
662 // If virtual address is valid, set the asid and virtual address fields
663 // of the request.
664 if (node_ptr->virtAddr != 0) {
665 req->setVirt(node_ptr->asid, node_ptr->virtAddr, node_ptr->size,
666 node_ptr->flags, masterID, node_ptr->pc);
667 req->setPaddr(node_ptr->physAddr);
668 req->setReqInstSeqNum(node_ptr->seqNum);
669 }
670
671 PacketPtr pkt;
672 uint8_t* pkt_data = new uint8_t[req->getSize()];
673 if (node_ptr->isLoad()) {
674 pkt = Packet::createRead(req);
675 } else {
676 pkt = Packet::createWrite(req);
677 memset(pkt_data, 0xA, req->getSize());
678 }
679 pkt->dataDynamic(pkt_data);
680
681 // Call MasterPort method to send a timing request for this packet
682 bool success = port.sendTimingReq(pkt);
683 ++numSendAttempted;
684
685 if (!success) {
686 // If it fails, return the packet to retry when a retry is signalled by
687 // the cache
688 ++numSendFailed;
689 DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n");
690 return pkt;
691 } else {
692 // It is succeeds, return nullptr
693 ++numSendSucceeded;
694 return nullptr;
695 }
696 }
697
698 bool
699 TraceCPU::ElasticDataGen::checkAndIssue(const GraphNode* node_ptr, bool first)
700 {
701 // Assert the node is dependency-free
702 assert(node_ptr->numRobDep == 0 && node_ptr->numRegDep == 0);
703
704 // If this is the first attempt, print a debug message to indicate this.
705 if (first) {
706 DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now"
707 " dependency free.\n", node_ptr->seqNum, node_ptr->typeToStr(),
708 node_ptr->robNum);
709 }
710
711 // Check if resources are available to issue the specific node
712 if (hwResource.isAvailable(node_ptr)) {
713 // If resources are free only then add to readyList
714 DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. Adding"
715 " to readyList, occupying resources.\n", node_ptr->seqNum);
716 // Compute the execute tick by adding the compute delay for the node
717 // and add the ready node to the ready list
718 addToSortedReadyList(node_ptr->seqNum,
719 owner.clockEdge() + node_ptr->compDelay);
720 // Account for the resources taken up by this issued node.
721 hwResource.occupy(node_ptr);
722 return true;
723
724 } else {
725 if (first) {
726 // Although dependencies are complete, resources are not available.
727 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli."
728 " Adding to depFreeQueue.\n", node_ptr->seqNum);
729 depFreeQueue.push(node_ptr);
730 } else {
731 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli. "
732 "Still pending issue.\n", node_ptr->seqNum);
733 }
734 return false;
735 }
736 }
737
738 void
739 TraceCPU::ElasticDataGen::completeMemAccess(PacketPtr pkt)
740 {
741 // Release the resources for this completed node.
742 if (pkt->isWrite()) {
743 // Consider store complete.
744 hwResource.releaseStoreBuffer();
745 // If it is a store response then do nothing since we do not model
746 // dependencies on store completion in the trace. But if we were
747 // blocking execution due to store buffer fullness, we need to schedule
748 // an event and attempt to progress.
749 } else {
750 // If it is a load response then release the dependents waiting on it.
751 // Get pointer to the completed load
752 auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum());
753 assert(graph_itr != depGraph.end());
754 GraphNode* node_ptr = graph_itr->second;
755
756 // Release resources occupied by the load
757 hwResource.release(node_ptr);
758
759 DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up"
760 " dependents..\n", node_ptr->seqNum);
761
762 for (auto child : node_ptr->dependents) {
763 if (child->removeDepOnInst(node_ptr->seqNum)) {
764 checkAndIssue(child);
765 }
766 }
767
768 // clear the dynamically allocated set of dependents
769 (node_ptr->dependents).clear();
770 // Update the stat for numOps completed
771 owner.updateNumOps(node_ptr->robNum);
772 // delete node
773 delete node_ptr;
774 // remove from graph
775 depGraph.erase(graph_itr);
776 }
777
778 if (DTRACE(TraceCPUData)) {
779 printReadyList();
780 }
781
782 // If the size of the dependency graph is less than the dependency window
783 // then read from the trace file to populate the graph next time we are in
784 // execute.
785 if (depGraph.size() < windowSize && !traceComplete)
786 nextRead = true;
787
788 // If not waiting for retry, attempt to schedule next event
789 if (!retryPkt) {
790 // We might have new dep-free nodes in the list which will have execute
791 // tick greater than or equal to curTick. But a new dep-free node might
792 // have its execute tick earlier. Therefore, attempt to reschedule. It
793 // could happen that the readyList is empty and we got here via a
794 // last remaining response. So, either the trace is complete or there
795 // are pending nodes in the depFreeQueue. The checking is done in the
796 // execute() control flow, so schedule an event to go via that flow.
797 Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) :
798 std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1)));
799 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
800 next_event_tick);
801 owner.schedDcacheNextEvent(next_event_tick);
802 }
803 }
804
805 void
806 TraceCPU::ElasticDataGen::addToSortedReadyList(NodeSeqNum seq_num,
807 Tick exec_tick)
808 {
809 ReadyNode ready_node;
810 ready_node.seqNum = seq_num;
811 ready_node.execTick = exec_tick;
812
813 // Iterator to readyList
814 auto itr = readyList.begin();
815
816 // If the readyList is empty, simply insert the new node at the beginning
817 // and return
818 if (itr == readyList.end()) {
819 readyList.insert(itr, ready_node);
820 maxReadyListSize = std::max<double>(readyList.size(),
821 maxReadyListSize.value());
822 return;
823 }
824
825 // If the new node has its execution tick equal to the first node in the
826 // list then go to the next node. If the first node in the list failed
827 // to execute, its position as the first is thus maintained.
828 if (retryPkt)
829 if (retryPkt->req->getReqInstSeqNum() == itr->seqNum)
830 itr++;
831
832 // Increment the iterator and compare the node pointed to by it to the new
833 // node till the position to insert the new node is found.
834 bool found = false;
835 while (!found && itr != readyList.end()) {
836 // If the execution tick of the new node is less than the node then
837 // this is the position to insert
838 if (exec_tick < itr->execTick)
839 found = true;
840 // If the execution tick of the new node is equal to the node then
841 // sort in ascending order of sequence numbers
842 else if (exec_tick == itr->execTick) {
843 // If the sequence number of the new node is less than the node
844 // then this is the position to insert
845 if (seq_num < itr->seqNum)
846 found = true;
847 // Else go to next node
848 else
849 itr++;
850 }
851 // If the execution tick of the new node is greater than the node then
852 // go to the next node
853 else
854 itr++;
855 }
856 readyList.insert(itr, ready_node);
857 // Update the stat for max size reached of the readyList
858 maxReadyListSize = std::max<double>(readyList.size(),
859 maxReadyListSize.value());
860 }
861
862 void
863 TraceCPU::ElasticDataGen::printReadyList() {
864
865 auto itr = readyList.begin();
866 if (itr == readyList.end()) {
867 DPRINTF(TraceCPUData, "readyList is empty.\n");
868 return;
869 }
870 DPRINTF(TraceCPUData, "Printing readyList:\n");
871 while (itr != readyList.end()) {
872 auto graph_itr = depGraph.find(itr->seqNum);
873 GraphNode* node_ptr M5_VAR_USED = graph_itr->second;
874 DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum,
875 node_ptr->typeToStr(), itr->execTick);
876 itr++;
877 }
878 }
879
880 TraceCPU::ElasticDataGen::HardwareResource::HardwareResource(
881 uint16_t max_rob, uint16_t max_stores, uint16_t max_loads)
882 : sizeROB(max_rob),
883 sizeStoreBuffer(max_stores),
884 sizeLoadBuffer(max_loads),
885 oldestInFlightRobNum(UINT64_MAX),
886 numInFlightLoads(0),
887 numInFlightStores(0)
888 {}
889
890 void
891 TraceCPU::ElasticDataGen::HardwareResource::occupy(const GraphNode* new_node)
892 {
893 // Occupy ROB entry for the issued node
894 // Merely maintain the oldest node, i.e. numerically least robNum by saving
895 // it in the variable oldestInFLightRobNum.
896 inFlightNodes[new_node->seqNum] = new_node->robNum;
897 oldestInFlightRobNum = inFlightNodes.begin()->second;
898
899 // Occupy Load/Store Buffer entry for the issued node if applicable
900 if (new_node->isLoad()) {
901 ++numInFlightLoads;
902 } else if (new_node->isStore()) {
903 ++numInFlightStores;
904 } // else if it is a non load/store node, no buffer entry is occupied
905
906 printOccupancy();
907 }
908
909 void
910 TraceCPU::ElasticDataGen::HardwareResource::release(const GraphNode* done_node)
911 {
912 assert(!inFlightNodes.empty());
913 DPRINTFR(TraceCPUData, "\tClearing done seq. num %d from inFlightNodes..\n",
914 done_node->seqNum);
915
916 assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end());
917 inFlightNodes.erase(done_node->seqNum);
918
919 if (inFlightNodes.empty()) {
920 // If we delete the only in-flight node and then the
921 // oldestInFlightRobNum is set to it's initialized (max) value.
922 oldestInFlightRobNum = UINT64_MAX;
923 } else {
924 // Set the oldest in-flight node rob number equal to the first node in
925 // the inFlightNodes since that will have the numerically least value.
926 oldestInFlightRobNum = inFlightNodes.begin()->second;
927 }
928
929 DPRINTFR(TraceCPUData, "\tCleared. inFlightNodes.size() = %d, "
930 "oldestInFlightRobNum = %d\n", inFlightNodes.size(),
931 oldestInFlightRobNum);
932
933 // A store is considered complete when a request is sent, thus ROB entry is
934 // freed. But it occupies an entry in the Store Buffer until its response
935 // is received. A load is considered complete when a response is received,
936 // thus both ROB and Load Buffer entries can be released.
937 if (done_node->isLoad()) {
938 assert(numInFlightLoads != 0);
939 --numInFlightLoads;
940 }
941 // For normal writes, we send the requests out and clear a store buffer
942 // entry on response. For writes which are strictly ordered, for e.g.
943 // writes to device registers, we do that within release() which is called
944 // when node is executed and taken off from readyList.
945 if (done_node->isStore() && done_node->isStrictlyOrdered()) {
946 releaseStoreBuffer();
947 }
948 }
949
950 void
951 TraceCPU::ElasticDataGen::HardwareResource::releaseStoreBuffer()
952 {
953 assert(numInFlightStores != 0);
954 --numInFlightStores;
955 }
956
957 bool
958 TraceCPU::ElasticDataGen::HardwareResource::isAvailable(
959 const GraphNode* new_node) const
960 {
961 uint16_t num_in_flight_nodes;
962 if (inFlightNodes.empty()) {
963 num_in_flight_nodes = 0;
964 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
965 " #in-flight nodes = 0", new_node->seqNum);
966 } else if (new_node->robNum > oldestInFlightRobNum) {
967 // This is the intuitive case where new dep-free node is younger
968 // instruction than the oldest instruction in-flight. Thus we make sure
969 // in_flight_nodes does not overflow.
970 num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum;
971 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
972 " #in-flight nodes = %d - %d = %d", new_node->seqNum,
973 new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes);
974 } else {
975 // This is the case where an instruction older than the oldest in-
976 // flight instruction becomes dep-free. Thus we must have already
977 // accounted for the entry in ROB for this new dep-free node.
978 // Immediately after this check returns true, oldestInFlightRobNum will
979 // be updated in occupy(). We simply let this node issue now.
980 num_in_flight_nodes = 0;
981 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
982 " new oldestInFlightRobNum = %d, #in-flight nodes ignored",
983 new_node->seqNum, new_node->robNum);
984 }
985 DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ = %d/%d.\n",
986 numInFlightLoads, sizeLoadBuffer,
987 numInFlightStores, sizeStoreBuffer);
988 // Check if resources are available to issue the specific node
989 if (num_in_flight_nodes >= sizeROB) {
990 return false;
991 }
992 if (new_node->isLoad() && numInFlightLoads >= sizeLoadBuffer) {
993 return false;
994 }
995 if (new_node->isStore() && numInFlightStores >= sizeStoreBuffer) {
996 return false;
997 }
998 return true;
999 }
1000
1001 bool
1002 TraceCPU::ElasticDataGen::HardwareResource::awaitingResponse() const {
1003 // Return true if there is at least one read or write request in flight
1004 return (numInFlightStores != 0 || numInFlightLoads != 0);
1005 }
1006
1007 void
1008 TraceCPU::ElasticDataGen::HardwareResource::printOccupancy() {
1009 DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, "
1010 "LQ = %d/%d, SQ = %d/%d.\n",
1011 oldestInFlightRobNum,
1012 numInFlightLoads, sizeLoadBuffer,
1013 numInFlightStores, sizeStoreBuffer);
1014 }
1015
1016 void
1017 TraceCPU::FixedRetryGen::regStats()
1018 {
1019 using namespace Stats;
1020
1021 numSendAttempted
1022 .name(name() + ".numSendAttempted")
1023 .desc("Number of first attempts to send a request")
1024 ;
1025
1026 numSendSucceeded
1027 .name(name() + ".numSendSucceeded")
1028 .desc("Number of successful first attempts")
1029 ;
1030
1031 numSendFailed
1032 .name(name() + ".numSendFailed")
1033 .desc("Number of failed first attempts")
1034 ;
1035
1036 numRetrySucceeded
1037 .name(name() + ".numRetrySucceeded")
1038 .desc("Number of successful retries")
1039 ;
1040
1041 instLastTick
1042 .name(name() + ".instLastTick")
1043 .desc("Last tick simulated from the fixed inst trace")
1044 ;
1045 }
1046
1047 Tick
1048 TraceCPU::FixedRetryGen::init()
1049 {
1050 DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator"
1051 " IcacheGen: fixed issue with retry.\n");
1052
1053 if (nextExecute()) {
1054 DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick);
1055 return currElement.tick;
1056 } else {
1057 panic("Read of first message in the trace failed.\n");
1058 return MaxTick;
1059 }
1060 }
1061
1062 bool
1063 TraceCPU::FixedRetryGen::tryNext()
1064 {
1065 // If there is a retry packet, try to send it
1066 if (retryPkt) {
1067
1068 DPRINTF(TraceCPUInst, "Trying to send retry packet.\n");
1069
1070 if (!port.sendTimingReq(retryPkt)) {
1071 // Still blocked! This should never occur.
1072 DPRINTF(TraceCPUInst, "Retry packet sending failed.\n");
1073 return false;
1074 }
1075 ++numRetrySucceeded;
1076 } else {
1077
1078 DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n");
1079
1080 // try sending current element
1081 assert(currElement.isValid());
1082
1083 ++numSendAttempted;
1084
1085 if (!send(currElement.addr, currElement.blocksize,
1086 currElement.cmd, currElement.flags, currElement.pc)) {
1087 DPRINTF(TraceCPUInst, "currElement sending failed.\n");
1088 ++numSendFailed;
1089 // return false to indicate not to schedule next event
1090 return false;
1091 } else {
1092 ++numSendSucceeded;
1093 }
1094 }
1095 // If packet was sent successfully, either retryPkt or currElement, return
1096 // true to indicate to schedule event at current Tick plus delta. If packet
1097 // was sent successfully and there is no next packet to send, return false.
1098 DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next "
1099 "element.\n");
1100 retryPkt = nullptr;
1101 // Read next element into currElement, currElement gets cleared so save the
1102 // tick to calculate delta
1103 Tick last_tick = currElement.tick;
1104 if (nextExecute()) {
1105 assert(currElement.tick >= last_tick);
1106 delta = currElement.tick - last_tick;
1107 }
1108 return !traceComplete;
1109 }
1110
1111 void
1112 TraceCPU::FixedRetryGen::exit()
1113 {
1114 trace.reset();
1115 }
1116
1117 bool
1118 TraceCPU::FixedRetryGen::nextExecute()
1119 {
1120 if (traceComplete)
1121 // We are at the end of the file, thus we have no more messages.
1122 // Return false.
1123 return false;
1124
1125
1126 //Reset the currElement to the default values
1127 currElement.clear();
1128
1129 // Read the next line to get the next message. If that fails then end of
1130 // trace has been reached and traceComplete needs to be set in addition
1131 // to returning false. If successful then next message is in currElement.
1132 if (!trace.read(&currElement)) {
1133 traceComplete = true;
1134 instLastTick = curTick();
1135 return false;
1136 }
1137
1138 DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n",
1139 currElement.cmd.isRead() ? 'r' : 'w',
1140 currElement.addr,
1141 currElement.pc,
1142 currElement.blocksize,
1143 currElement.tick);
1144
1145 return true;
1146 }
1147
1148 bool
1149 TraceCPU::FixedRetryGen::send(Addr addr, unsigned size, const MemCmd& cmd,
1150 Request::FlagsType flags, Addr pc)
1151 {
1152
1153 // Create new request
1154 auto req = std::make_shared<Request>(addr, size, flags, masterID);
1155 req->setPC(pc);
1156
1157 // If this is not done it triggers assert in L1 cache for invalid contextId
1158 req->setContext(ContextID(0));
1159
1160 // Embed it in a packet
1161 PacketPtr pkt = new Packet(req, cmd);
1162
1163 uint8_t* pkt_data = new uint8_t[req->getSize()];
1164 pkt->dataDynamic(pkt_data);
1165
1166 if (cmd.isWrite()) {
1167 memset(pkt_data, 0xA, req->getSize());
1168 }
1169
1170 // Call MasterPort method to send a timing request for this packet
1171 bool success = port.sendTimingReq(pkt);
1172 if (!success) {
1173 // If it fails, save the packet to retry when a retry is signalled by
1174 // the cache
1175 retryPkt = pkt;
1176 }
1177 return success;
1178 }
1179
1180 void
1181 TraceCPU::icacheRetryRecvd()
1182 {
1183 // Schedule an event to go through the control flow in the same tick as
1184 // retry is received
1185 DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen"
1186 " event @%lli.\n", curTick());
1187 schedule(icacheNextEvent, curTick());
1188 }
1189
1190 void
1191 TraceCPU::dcacheRetryRecvd()
1192 {
1193 // Schedule an event to go through the execute flow in the same tick as
1194 // retry is received
1195 DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen"
1196 " event @%lli.\n", curTick());
1197 schedule(dcacheNextEvent, curTick());
1198 }
1199
1200 void
1201 TraceCPU::schedDcacheNextEvent(Tick when)
1202 {
1203 if (!dcacheNextEvent.scheduled()) {
1204 DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n",
1205 when);
1206 schedule(dcacheNextEvent, when);
1207 ++numSchedDcacheEvent;
1208 } else if (when < dcacheNextEvent.when()) {
1209 DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli"
1210 " to %lli.\n", dcacheNextEvent.when(), when);
1211 reschedule(dcacheNextEvent, when);
1212 }
1213
1214 }
1215
1216 bool
1217 TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt)
1218 {
1219 // All responses on the instruction fetch side are ignored. Simply delete
1220 // the packet to free allocated memory
1221 delete pkt;
1222
1223 return true;
1224 }
1225
1226 void
1227 TraceCPU::IcachePort::recvReqRetry()
1228 {
1229 owner->icacheRetryRecvd();
1230 }
1231
1232 void
1233 TraceCPU::dcacheRecvTimingResp(PacketPtr pkt)
1234 {
1235 DPRINTF(TraceCPUData, "Received timing response from Dcache.\n");
1236 dcacheGen.completeMemAccess(pkt);
1237 }
1238
1239 bool
1240 TraceCPU::DcachePort::recvTimingResp(PacketPtr pkt)
1241 {
1242 // Handle the responses for data memory requests which is done inside the
1243 // elastic data generator
1244 owner->dcacheRecvTimingResp(pkt);
1245 // After processing the response delete the packet to free
1246 // memory
1247 delete pkt;
1248
1249 return true;
1250 }
1251
1252 void
1253 TraceCPU::DcachePort::recvReqRetry()
1254 {
1255 owner->dcacheRetryRecvd();
1256 }
1257
1258 TraceCPU::ElasticDataGen::InputStream::InputStream(
1259 const std::string& filename,
1260 const double time_multiplier)
1261 : trace(filename),
1262 timeMultiplier(time_multiplier),
1263 microOpCount(0)
1264 {
1265 // Create a protobuf message for the header and read it from the stream
1266 ProtoMessage::InstDepRecordHeader header_msg;
1267 if (!trace.read(header_msg)) {
1268 panic("Failed to read packet header from %s\n", filename);
1269
1270 if (header_msg.tick_freq() != SimClock::Frequency) {
1271 panic("Trace %s was recorded with a different tick frequency %d\n",
1272 header_msg.tick_freq());
1273 }
1274 } else {
1275 // Assign window size equal to the field in the trace that was recorded
1276 // when the data dependency trace was captured in the o3cpu model
1277 windowSize = header_msg.window_size();
1278 }
1279 }
1280
1281 void
1282 TraceCPU::ElasticDataGen::InputStream::reset()
1283 {
1284 trace.reset();
1285 }
1286
1287 bool
1288 TraceCPU::ElasticDataGen::InputStream::read(GraphNode* element)
1289 {
1290 ProtoMessage::InstDepRecord pkt_msg;
1291 if (trace.read(pkt_msg)) {
1292 // Required fields
1293 element->seqNum = pkt_msg.seq_num();
1294 element->type = pkt_msg.type();
1295 // Scale the compute delay to effectively scale the Trace CPU frequency
1296 element->compDelay = pkt_msg.comp_delay() * timeMultiplier;
1297
1298 // Repeated field robDepList
1299 element->clearRobDep();
1300 assert((pkt_msg.rob_dep()).size() <= element->maxRobDep);
1301 for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) {
1302 element->robDep[element->numRobDep] = pkt_msg.rob_dep(i);
1303 element->numRobDep += 1;
1304 }
1305
1306 // Repeated field
1307 element->clearRegDep();
1308 assert((pkt_msg.reg_dep()).size() <= TheISA::MaxInstSrcRegs);
1309 for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) {
1310 // There is a possibility that an instruction has both, a register
1311 // and order dependency on an instruction. In such a case, the
1312 // register dependency is omitted
1313 bool duplicate = false;
1314 for (int j = 0; j < element->numRobDep; j++) {
1315 duplicate |= (pkt_msg.reg_dep(i) == element->robDep[j]);
1316 }
1317 if (!duplicate) {
1318 element->regDep[element->numRegDep] = pkt_msg.reg_dep(i);
1319 element->numRegDep += 1;
1320 }
1321 }
1322
1323 // Optional fields
1324 if (pkt_msg.has_p_addr())
1325 element->physAddr = pkt_msg.p_addr();
1326 else
1327 element->physAddr = 0;
1328
1329 if (pkt_msg.has_v_addr())
1330 element->virtAddr = pkt_msg.v_addr();
1331 else
1332 element->virtAddr = 0;
1333
1334 if (pkt_msg.has_asid())
1335 element->asid = pkt_msg.asid();
1336 else
1337 element->asid = 0;
1338
1339 if (pkt_msg.has_size())
1340 element->size = pkt_msg.size();
1341 else
1342 element->size = 0;
1343
1344 if (pkt_msg.has_flags())
1345 element->flags = pkt_msg.flags();
1346 else
1347 element->flags = 0;
1348
1349 if (pkt_msg.has_pc())
1350 element->pc = pkt_msg.pc();
1351 else
1352 element->pc = 0;
1353
1354 // ROB occupancy number
1355 ++microOpCount;
1356 if (pkt_msg.has_weight()) {
1357 microOpCount += pkt_msg.weight();
1358 }
1359 element->robNum = microOpCount;
1360 return true;
1361 }
1362
1363 // We have reached the end of the file
1364 return false;
1365 }
1366
1367 bool
1368 TraceCPU::ElasticDataGen::GraphNode::removeRegDep(NodeSeqNum reg_dep)
1369 {
1370 for (auto& own_reg_dep : regDep) {
1371 if (own_reg_dep == reg_dep) {
1372 // If register dependency is found, make it zero and return true
1373 own_reg_dep = 0;
1374 assert(numRegDep > 0);
1375 --numRegDep;
1376 DPRINTFR(TraceCPUData, "\tFor %lli: Marking register dependency %lli "
1377 "done.\n", seqNum, reg_dep);
1378 return true;
1379 }
1380 }
1381
1382 // Return false if the dependency is not found
1383 return false;
1384 }
1385
1386 bool
1387 TraceCPU::ElasticDataGen::GraphNode::removeRobDep(NodeSeqNum rob_dep)
1388 {
1389 for (auto& own_rob_dep : robDep) {
1390 if (own_rob_dep == rob_dep) {
1391 // If the rob dependency is found, make it zero and return true
1392 own_rob_dep = 0;
1393 assert(numRobDep > 0);
1394 --numRobDep;
1395 DPRINTFR(TraceCPUData, "\tFor %lli: Marking ROB dependency %lli "
1396 "done.\n", seqNum, rob_dep);
1397 return true;
1398 }
1399 }
1400 return false;
1401 }
1402
1403 void
1404 TraceCPU::ElasticDataGen::GraphNode::clearRegDep() {
1405 for (auto& own_reg_dep : regDep) {
1406 own_reg_dep = 0;
1407 }
1408 numRegDep = 0;
1409 }
1410
1411 void
1412 TraceCPU::ElasticDataGen::GraphNode::clearRobDep() {
1413 for (auto& own_rob_dep : robDep) {
1414 own_rob_dep = 0;
1415 }
1416 numRobDep = 0;
1417 }
1418
1419 bool
1420 TraceCPU::ElasticDataGen::GraphNode::removeDepOnInst(NodeSeqNum done_seq_num)
1421 {
1422 // If it is an rob dependency then remove it
1423 if (!removeRobDep(done_seq_num)) {
1424 // If it is not an rob dependency then it must be a register dependency
1425 // If the register dependency is not found, it violates an assumption
1426 // and must be caught by assert.
1427 bool regdep_found M5_VAR_USED = removeRegDep(done_seq_num);
1428 assert(regdep_found);
1429 }
1430 // Return true if the node is dependency free
1431 return (numRobDep == 0 && numRegDep == 0);
1432 }
1433
1434 void
1435 TraceCPU::ElasticDataGen::GraphNode::writeElementAsTrace() const
1436 {
1437 DPRINTFR(TraceCPUData, "%lli", seqNum);
1438 DPRINTFR(TraceCPUData, ",%s", typeToStr());
1439 if (isLoad() || isStore()) {
1440 DPRINTFR(TraceCPUData, ",%i", physAddr);
1441 DPRINTFR(TraceCPUData, ",%i", size);
1442 DPRINTFR(TraceCPUData, ",%i", flags);
1443 }
1444 DPRINTFR(TraceCPUData, ",%lli", compDelay);
1445 int i = 0;
1446 DPRINTFR(TraceCPUData, "robDep:");
1447 while (robDep[i] != 0) {
1448 DPRINTFR(TraceCPUData, ",%lli", robDep[i]);
1449 i++;
1450 }
1451 i = 0;
1452 DPRINTFR(TraceCPUData, "regDep:");
1453 while (regDep[i] != 0) {
1454 DPRINTFR(TraceCPUData, ",%lli", regDep[i]);
1455 i++;
1456 }
1457 auto child_itr = dependents.begin();
1458 DPRINTFR(TraceCPUData, "dependents:");
1459 while (child_itr != dependents.end()) {
1460 DPRINTFR(TraceCPUData, ":%lli", (*child_itr)->seqNum);
1461 child_itr++;
1462 }
1463
1464 DPRINTFR(TraceCPUData, "\n");
1465 }
1466
1467 std::string
1468 TraceCPU::ElasticDataGen::GraphNode::typeToStr() const
1469 {
1470 return Record::RecordType_Name(type);
1471 }
1472
1473 TraceCPU::FixedRetryGen::InputStream::InputStream(const std::string& filename)
1474 : trace(filename)
1475 {
1476 // Create a protobuf message for the header and read it from the stream
1477 ProtoMessage::PacketHeader header_msg;
1478 if (!trace.read(header_msg)) {
1479 panic("Failed to read packet header from %s\n", filename);
1480
1481 if (header_msg.tick_freq() != SimClock::Frequency) {
1482 panic("Trace %s was recorded with a different tick frequency %d\n",
1483 header_msg.tick_freq());
1484 }
1485 }
1486 }
1487
1488 void
1489 TraceCPU::FixedRetryGen::InputStream::reset()
1490 {
1491 trace.reset();
1492 }
1493
1494 bool
1495 TraceCPU::FixedRetryGen::InputStream::read(TraceElement* element)
1496 {
1497 ProtoMessage::Packet pkt_msg;
1498 if (trace.read(pkt_msg)) {
1499 element->cmd = pkt_msg.cmd();
1500 element->addr = pkt_msg.addr();
1501 element->blocksize = pkt_msg.size();
1502 element->tick = pkt_msg.tick();
1503 element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0;
1504 element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0;
1505 return true;
1506 }
1507
1508 // We have reached the end of the file
1509 return false;
1510 }