2 * Copyright (c) 2013 - 2015 ARM Limited
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 * Authors: Radhika Jagtap
42 #include "cpu/trace/trace_cpu.hh"
44 #include "sim/sim_exit.hh"
46 // Declare and initialize the static counter for number of trace CPUs.
47 int TraceCPU::numTraceCPUs
= 0;
49 TraceCPU::TraceCPU(TraceCPUParams
*params
)
53 instMasterID(params
->system
->getMasterId(name() + ".inst")),
54 dataMasterID(params
->system
->getMasterId(name() + ".data")),
55 instTraceFile(params
->instTraceFile
),
56 dataTraceFile(params
->dataTraceFile
),
57 icacheGen(*this, ".iside", icachePort
, instMasterID
, instTraceFile
),
58 dcacheGen(*this, ".dside", dcachePort
, dataMasterID
, dataTraceFile
,
59 params
->sizeROB
, params
->sizeStoreBuffer
,
60 params
->sizeLoadBuffer
),
61 icacheNextEvent(this),
62 dcacheNextEvent(this),
63 oneTraceComplete(false),
65 execCompleteEvent(nullptr)
67 // Increment static counter for number of Trace CPUs.
68 ++TraceCPU::numTraceCPUs
;
70 // Check that the python parameters for sizes of ROB, store buffer and load
71 // buffer do not overflow the corresponding C++ variables.
72 fatal_if(params
->sizeROB
> UINT16_MAX
, "ROB size set to %d exceeds the "
73 "max. value of %d.\n", params
->sizeROB
, UINT16_MAX
);
74 fatal_if(params
->sizeStoreBuffer
> UINT16_MAX
, "ROB size set to %d "
75 "exceeds the max. value of %d.\n", params
->sizeROB
,
77 fatal_if(params
->sizeLoadBuffer
> UINT16_MAX
, "Load buffer size set to"
78 " %d exceeds the max. value of %d.\n",
79 params
->sizeLoadBuffer
, UINT16_MAX
);
88 TraceCPUParams::create()
90 return new TraceCPU(this);
94 TraceCPU::takeOverFrom(BaseCPU
*oldCPU
)
96 // Unbind the ports of the old CPU and bind the ports of the TraceCPU.
97 assert(!getInstPort().isConnected());
98 assert(oldCPU
->getInstPort().isConnected());
99 BaseSlavePort
&inst_peer_port
= oldCPU
->getInstPort().getSlavePort();
100 oldCPU
->getInstPort().unbind();
101 getInstPort().bind(inst_peer_port
);
103 assert(!getDataPort().isConnected());
104 assert(oldCPU
->getDataPort().isConnected());
105 BaseSlavePort
&data_peer_port
= oldCPU
->getDataPort().getSlavePort();
106 oldCPU
->getDataPort().unbind();
107 getDataPort().bind(data_peer_port
);
113 DPRINTF(TraceCPUInst
, "Instruction fetch request trace file is \"%s\"."
114 "\n", instTraceFile
);
115 DPRINTF(TraceCPUData
, "Data memory request trace file is \"%s\".\n",
120 // Get the send tick of the first instruction read request and schedule
121 // icacheNextEvent at that tick.
122 Tick first_icache_tick
= icacheGen
.init();
123 schedule(icacheNextEvent
, first_icache_tick
);
125 // Get the send tick of the first data read/write request and schedule
126 // dcacheNextEvent at that tick.
127 Tick first_dcache_tick
= dcacheGen
.init();
128 schedule(dcacheNextEvent
, first_dcache_tick
);
130 // The static counter for number of Trace CPUs is correctly set at this
131 // point so create an event and pass it.
132 execCompleteEvent
= new CountedExitEvent("end of all traces reached.",
134 // Save the first fetch request tick to dump it as tickOffset
135 firstFetchTick
= first_icache_tick
;
139 TraceCPU::schedIcacheNext()
141 DPRINTF(TraceCPUInst
, "IcacheGen event.\n");
143 // Try to send the current packet or a retry packet if there is one
144 bool sched_next
= icacheGen
.tryNext();
145 // If packet sent successfully, schedule next event
147 DPRINTF(TraceCPUInst
, "Scheduling next icacheGen event "
148 "at %d.\n", curTick() + icacheGen
.tickDelta());
149 schedule(icacheNextEvent
, curTick() + icacheGen
.tickDelta());
150 ++numSchedIcacheEvent
;
152 // check if traceComplete. If not, do nothing because sending failed
153 // and next event will be scheduled via RecvRetry()
154 if (icacheGen
.isTraceComplete()) {
155 // If this is the first trace to complete, set the variable. If it
156 // is already set then both traces are complete to exit sim.
157 checkAndSchedExitEvent();
164 TraceCPU::schedDcacheNext()
166 DPRINTF(TraceCPUData
, "DcacheGen event.\n");
169 if (dcacheGen
.isExecComplete()) {
170 checkAndSchedExitEvent();
175 TraceCPU::checkAndSchedExitEvent()
177 if (!oneTraceComplete
) {
178 oneTraceComplete
= true;
180 // Schedule event to indicate execution is complete as both
181 // instruction and data access traces have been played back.
182 inform("%s: Execution complete.\n", name());
184 // Record stats which are computed at the end of simulation
185 tickOffset
= firstFetchTick
;
186 numCycles
= (clockEdge() - firstFetchTick
) / clockPeriod();
187 numOps
= dcacheGen
.getMicroOpCount();
188 schedule(*execCompleteEvent
, curTick());
199 .name(name() + ".numSchedDcacheEvent")
200 .desc("Number of events scheduled to trigger data request generator")
204 .name(name() + ".numSchedIcacheEvent")
205 .desc("Number of events scheduled to trigger instruction request generator")
209 .name(name() + ".numOps")
210 .desc("Number of micro-ops simulated by the Trace CPU")
214 .name(name() + ".cpi")
215 .desc("Cycles per micro-op used as a proxy for CPI")
218 cpi
= numCycles
/numOps
;
221 .name(name() + ".tickOffset")
222 .desc("The first execution tick for the root node of elastic traces")
225 icacheGen
.regStats();
226 dcacheGen
.regStats();
230 TraceCPU::ElasticDataGen::regStats()
232 using namespace Stats
;
235 .name(name() + ".maxDependents")
236 .desc("Max number of dependents observed on a node")
240 .name(name() + ".maxReadyListSize")
241 .desc("Max size of the ready list observed")
245 .name(name() + ".numSendAttempted")
246 .desc("Number of first attempts to send a request")
250 .name(name() + ".numSendSucceeded")
251 .desc("Number of successful first attempts")
255 .name(name() + ".numSendFailed")
256 .desc("Number of failed first attempts")
260 .name(name() + ".numRetrySucceeded")
261 .desc("Number of successful retries")
265 .name(name() + ".numSplitReqs")
266 .desc("Number of split requests")
270 .name(name() + ".numSOLoads")
271 .desc("Number of strictly ordered loads")
275 .name(name() + ".numSOStores")
276 .desc("Number of strictly ordered stores")
280 .name(name() + ".dataLastTick")
281 .desc("Last tick simulated from the elastic data trace")
286 TraceCPU::ElasticDataGen::init()
288 DPRINTF(TraceCPUData
, "Initializing data memory request generator "
289 "DcacheGen: elastic issue with retry.\n");
291 if (!readNextWindow())
292 panic("Trace has %d elements. It must have at least %d elements.\n",
293 depGraph
.size(), 2 * windowSize
);
294 DPRINTF(TraceCPUData
, "After 1st read, depGraph size:%d.\n",
297 if (!readNextWindow())
298 panic("Trace has %d elements. It must have at least %d elements.\n",
299 depGraph
.size(), 2 * windowSize
);
300 DPRINTF(TraceCPUData
, "After 2st read, depGraph size:%d.\n",
304 if (DTRACE(TraceCPUData
)) {
307 auto free_itr
= readyList
.begin();
308 DPRINTF(TraceCPUData
, "Execute tick of the first dependency free node %lli"
309 " is %d.\n", free_itr
->seqNum
, free_itr
->execTick
);
310 // Return the execute tick of the earliest ready node so that an event
311 // can be scheduled to call execute()
312 return (free_itr
->execTick
);
316 TraceCPU::ElasticDataGen::exit()
322 TraceCPU::ElasticDataGen::readNextWindow()
325 // Read and add next window
326 DPRINTF(TraceCPUData
, "Reading next window from file.\n");
329 // We are at the end of the file, thus we have no more records.
334 DPRINTF(TraceCPUData
, "Start read: Size of depGraph is %d.\n",
337 uint32_t num_read
= 0;
338 while (num_read
!= windowSize
) {
340 // Create a new graph node
341 GraphNode
* new_node
= new GraphNode
;
343 // Read the next line to get the next record. If that fails then end of
344 // trace has been reached and traceComplete needs to be set in addition
345 // to returning false.
346 if (!trace
.read(new_node
)) {
347 DPRINTF(TraceCPUData
, "\tTrace complete!\n");
348 traceComplete
= true;
352 // Annotate the ROB dependencies of the new node onto the parent nodes.
353 addDepsOnParent(new_node
, new_node
->robDep
, new_node
->numRobDep
);
354 // Annotate the register dependencies of the new node onto the parent
356 addDepsOnParent(new_node
, new_node
->regDep
, new_node
->numRegDep
);
360 depGraph
[new_node
->seqNum
] = new_node
;
361 if (new_node
->numRobDep
== 0 && new_node
->numRegDep
== 0) {
362 // Source dependencies are already complete, check if resources
363 // are available and issue. The execution time is approximated
364 // to current time plus the computational delay.
365 checkAndIssue(new_node
);
369 DPRINTF(TraceCPUData
, "End read: Size of depGraph is %d.\n",
374 template<typename T
> void
375 TraceCPU::ElasticDataGen::addDepsOnParent(GraphNode
*new_node
,
376 T
& dep_array
, uint8_t& num_dep
)
378 for (auto& a_dep
: dep_array
) {
379 // The convention is to set the dependencies starting with the first
380 // index in the ROB and register dependency arrays. Thus, when we reach
381 // a dependency equal to the initialisation value of zero, we know have
382 // iterated over all dependencies and can break.
385 // We look up the valid dependency, i.e. the parent of this node
386 auto parent_itr
= depGraph
.find(a_dep
);
387 if (parent_itr
!= depGraph
.end()) {
388 // If the parent is found, it is yet to be executed. Append a
389 // pointer to the new node to the dependents list of the parent
391 parent_itr
->second
->dependents
.push_back(new_node
);
392 auto num_depts
= parent_itr
->second
->dependents
.size();
393 maxDependents
= std::max
<double>(num_depts
, maxDependents
.value());
395 // The dependency is not found in the graph. So consider
396 // the execution of the parent is complete, i.e. remove this
405 TraceCPU::ElasticDataGen::execute()
407 DPRINTF(TraceCPUData
, "Execute start occupancy:\n");
408 DPRINTFR(TraceCPUData
, "\tdepGraph = %d, readyList = %d, "
409 "depFreeQueue = %d ,", depGraph
.size(), readyList
.size(),
410 depFreeQueue
.size());
411 hwResource
.printOccupancy();
413 // Read next window to make sure that dependents of all dep-free nodes
414 // are in the depGraph
420 // First attempt to issue the pending dependency-free nodes held
421 // in depFreeQueue. If resources have become available for a node,
422 // then issue it, i.e. add the node to readyList.
423 while (!depFreeQueue
.empty()) {
424 if (checkAndIssue(depFreeQueue
.front(), false)) {
425 DPRINTF(TraceCPUData
, "Removing from depFreeQueue: seq. num "
426 "%lli.\n", (depFreeQueue
.front())->seqNum
);
432 // Proceed to execute from readyList
433 auto graph_itr
= depGraph
.begin();
434 auto free_itr
= readyList
.begin();
435 // Iterate through readyList until the next free node has its execute
436 // tick later than curTick or the end of readyList is reached
437 while (free_itr
->execTick
<= curTick() && free_itr
!= readyList
.end()) {
439 // Get pointer to the node to be executed
440 graph_itr
= depGraph
.find(free_itr
->seqNum
);
441 assert(graph_itr
!= depGraph
.end());
442 GraphNode
* node_ptr
= graph_itr
->second
;
444 // If there is a retryPkt send that else execute the load
446 // The retryPkt must be the request that was created by the
447 // first node in the readyList.
448 if (retryPkt
->req
->getReqInstSeqNum() != node_ptr
->seqNum
) {
449 panic("Retry packet's seqence number does not match "
450 "the first node in the readyList.\n");
452 if (port
.sendTimingReq(retryPkt
)) {
456 } else if (node_ptr
->isLoad() || node_ptr
->isStore()) {
457 // If there is no retryPkt, attempt to send a memory request in
458 // case of a load or store node. If the send fails, executeMemReq()
459 // returns a packet pointer, which we save in retryPkt. In case of
460 // a comp node we don't do anything and simply continue as if the
461 // execution of the comp node succedded.
462 retryPkt
= executeMemReq(node_ptr
);
464 // If the retryPkt or a new load/store node failed, we exit from here
465 // as a retry from cache will bring the control to execute(). The
466 // first node in readyList then, will be the failed node.
471 // Proceed to remove dependencies for the successfully executed node.
472 // If it is a load which is not strictly ordered and we sent a
473 // request for it successfully, we do not yet mark any register
474 // dependencies complete. But as per dependency modelling we need
475 // to mark ROB dependencies of load and non load/store nodes which
476 // are based on successful sending of the load as complete.
477 if (node_ptr
->isLoad() && !node_ptr
->isStrictlyOrdered()) {
478 // If execute succeeded mark its dependents as complete
479 DPRINTF(TraceCPUData
, "Node seq. num %lli sent. Waking up "
480 "dependents..\n", node_ptr
->seqNum
);
482 auto child_itr
= (node_ptr
->dependents
).begin();
483 while (child_itr
!= (node_ptr
->dependents
).end()) {
484 // ROB dependency of a store on a load must not be removed
485 // after load is sent but after response is received
486 if (!(*child_itr
)->isStore() &&
487 (*child_itr
)->removeRobDep(node_ptr
->seqNum
)) {
489 // Check if the child node has become dependency free
490 if ((*child_itr
)->numRobDep
== 0 &&
491 (*child_itr
)->numRegDep
== 0) {
493 // Source dependencies are complete, check if
494 // resources are available and issue
495 checkAndIssue(*child_itr
);
497 // Remove this child for the sent load and point to new
498 // location of the element following the erased element
499 child_itr
= node_ptr
->dependents
.erase(child_itr
);
501 // This child is not dependency-free, point to the next
507 // If it is a strictly ordered load mark its dependents as complete
508 // as we do not send a request for this case. If it is a store or a
509 // comp node we also mark all its dependents complete.
510 DPRINTF(TraceCPUData
, "Node seq. num %lli done. Waking"
511 " up dependents..\n", node_ptr
->seqNum
);
513 for (auto child
: node_ptr
->dependents
) {
514 // If the child node is dependency free removeDepOnInst()
516 if (child
->removeDepOnInst(node_ptr
->seqNum
)) {
517 // Source dependencies are complete, check if resources
518 // are available and issue
519 checkAndIssue(child
);
524 // After executing the node, remove from readyList and delete node.
525 readyList
.erase(free_itr
);
526 // If it is a cacheable load which was sent, don't delete
527 // just yet. Delete it in completeMemAccess() after the
528 // response is received. If it is an strictly ordered
529 // load, it was not sent and all dependencies were simply
530 // marked complete. Thus it is safe to delete it. For
531 // stores and non load/store nodes all dependencies were
532 // marked complete so it is safe to delete it.
533 if (!node_ptr
->isLoad() || node_ptr
->isStrictlyOrdered()) {
534 // Release all resources occupied by the completed node
535 hwResource
.release(node_ptr
);
536 // clear the dynamically allocated set of dependents
537 (node_ptr
->dependents
).clear();
541 depGraph
.erase(graph_itr
);
543 // Point to first node to continue to next iteration of while loop
544 free_itr
= readyList
.begin();
545 } // end of while loop
547 // Print readyList, sizes of queues and resource status after updating
548 if (DTRACE(TraceCPUData
)) {
550 DPRINTF(TraceCPUData
, "Execute end occupancy:\n");
551 DPRINTFR(TraceCPUData
, "\tdepGraph = %d, readyList = %d, "
552 "depFreeQueue = %d ,", depGraph
.size(), readyList
.size(),
553 depFreeQueue
.size());
554 hwResource
.printOccupancy();
558 DPRINTF(TraceCPUData
, "Not scheduling an event as expecting a retry"
559 "event from the cache for seq. num %lli.\n",
560 retryPkt
->req
->getReqInstSeqNum());
563 // If the size of the dependency graph is less than the dependency window
564 // then read from the trace file to populate the graph next time we are in
566 if (depGraph
.size() < windowSize
&& !traceComplete
)
569 // If cache is not blocked, schedule an event for the first execTick in
570 // readyList else retry from cache will schedule the event. If the ready
571 // list is empty then check if the next pending node has resources
572 // available to issue. If yes, then schedule an event for the next cycle.
573 if (!readyList
.empty()) {
574 Tick next_event_tick
= std::max(readyList
.begin()->execTick
,
576 DPRINTF(TraceCPUData
, "Attempting to schedule @%lli.\n",
578 owner
.schedDcacheNextEvent(next_event_tick
);
579 } else if (readyList
.empty() && !depFreeQueue
.empty() &&
580 hwResource
.isAvailable(depFreeQueue
.front())) {
581 DPRINTF(TraceCPUData
, "Attempting to schedule @%lli.\n",
582 owner
.clockEdge(Cycles(1)));
583 owner
.schedDcacheNextEvent(owner
.clockEdge(Cycles(1)));
586 // If trace is completely read, readyList is empty and depGraph is empty,
587 // set execComplete to true
588 if (depGraph
.empty() && readyList
.empty() && traceComplete
&&
589 !hwResource
.awaitingResponse()) {
590 DPRINTF(TraceCPUData
, "\tExecution Complete!\n");
592 dataLastTick
= curTick();
597 TraceCPU::ElasticDataGen::executeMemReq(GraphNode
* node_ptr
)
600 DPRINTF(TraceCPUData
, "Executing memory request %lli (phys addr %d, "
601 "virt addr %d, pc %#x, size %d, flags %d).\n",
602 node_ptr
->seqNum
, node_ptr
->physAddr
, node_ptr
->virtAddr
,
603 node_ptr
->pc
, node_ptr
->size
, node_ptr
->flags
);
605 // If the request is strictly ordered, do not send it. Just return nullptr
606 // as if it was succesfully sent.
607 if (node_ptr
->isStrictlyOrdered()) {
608 node_ptr
->isLoad() ? ++numSOLoads
: ++numSOStores
;
609 DPRINTF(TraceCPUData
, "Skipping strictly ordered request %lli.\n",
614 // Check if the request spans two cache lines as this condition triggers
615 // an assert fail in the L1 cache. If it does then truncate the size to
616 // access only until the end of that line and ignore the remainder. The
617 // stat counting this is useful to keep a check on how frequently this
618 // happens. If required the code could be revised to mimick splitting such
619 // a request into two.
620 unsigned blk_size
= owner
.cacheLineSize();
621 Addr blk_offset
= (node_ptr
->physAddr
& (Addr
)(blk_size
- 1));
622 if (!(blk_offset
+ node_ptr
->size
<= blk_size
)) {
623 node_ptr
->size
= blk_size
- blk_offset
;
627 // Create a request and the packet containing request
628 Request
* req
= new Request(node_ptr
->physAddr
, node_ptr
->size
,
629 node_ptr
->flags
, masterID
, node_ptr
->seqNum
,
631 req
->setPC(node_ptr
->pc
);
632 // If virtual address is valid, set the asid and virtual address fields
634 if (node_ptr
->virtAddr
!= 0) {
635 req
->setVirt(node_ptr
->asid
, node_ptr
->virtAddr
, node_ptr
->size
,
636 node_ptr
->flags
, masterID
, node_ptr
->pc
);
637 req
->setPaddr(node_ptr
->physAddr
);
638 req
->setReqInstSeqNum(node_ptr
->seqNum
);
642 uint8_t* pkt_data
= new uint8_t[req
->getSize()];
643 if (node_ptr
->isLoad()) {
644 pkt
= Packet::createRead(req
);
646 pkt
= Packet::createWrite(req
);
647 memset(pkt_data
, 0xA, req
->getSize());
649 pkt
->dataDynamic(pkt_data
);
651 // Call MasterPort method to send a timing request for this packet
652 bool success
= port
.sendTimingReq(pkt
);
656 // If it fails, return the packet to retry when a retry is signalled by
659 DPRINTF(TraceCPUData
, "Send failed. Saving packet for retry.\n");
662 // It is succeeds, return nullptr
669 TraceCPU::ElasticDataGen::checkAndIssue(const GraphNode
* node_ptr
, bool first
)
671 // Assert the node is dependency-free
672 assert(node_ptr
->numRobDep
== 0 && node_ptr
->numRegDep
== 0);
674 // If this is the first attempt, print a debug message to indicate this.
676 DPRINTFR(TraceCPUData
, "\t\tseq. num %lli(%s) with rob num %lli is now"
677 " dependency free.\n", node_ptr
->seqNum
, node_ptr
->typeToStr(),
681 // Check if resources are available to issue the specific node
682 if (hwResource
.isAvailable(node_ptr
)) {
683 // If resources are free only then add to readyList
684 DPRINTFR(TraceCPUData
, "\t\tResources available for seq. num %lli. Adding"
685 " to readyList, occupying resources.\n", node_ptr
->seqNum
);
686 // Compute the execute tick by adding the compute delay for the node
687 // and add the ready node to the ready list
688 addToSortedReadyList(node_ptr
->seqNum
,
689 owner
.clockEdge() + node_ptr
->compDelay
);
690 // Account for the resources taken up by this issued node.
691 hwResource
.occupy(node_ptr
);
696 // Although dependencies are complete, resources are not available.
697 DPRINTFR(TraceCPUData
, "\t\tResources unavailable for seq. num %lli."
698 " Adding to depFreeQueue.\n", node_ptr
->seqNum
);
699 depFreeQueue
.push(node_ptr
);
701 DPRINTFR(TraceCPUData
, "\t\tResources unavailable for seq. num %lli. "
702 "Still pending issue.\n", node_ptr
->seqNum
);
709 TraceCPU::ElasticDataGen::completeMemAccess(PacketPtr pkt
)
711 // Release the resources for this completed node.
712 if (pkt
->isWrite()) {
713 // Consider store complete.
714 hwResource
.releaseStoreBuffer();
715 // If it is a store response then do nothing since we do not model
716 // dependencies on store completion in the trace. But if we were
717 // blocking execution due to store buffer fullness, we need to schedule
718 // an event and attempt to progress.
720 // If it is a load response then release the dependents waiting on it.
721 // Get pointer to the completed load
722 auto graph_itr
= depGraph
.find(pkt
->req
->getReqInstSeqNum());
723 assert(graph_itr
!= depGraph
.end());
724 GraphNode
* node_ptr
= graph_itr
->second
;
726 // Release resources occupied by the load
727 hwResource
.release(node_ptr
);
729 DPRINTF(TraceCPUData
, "Load seq. num %lli response received. Waking up"
730 " dependents..\n", node_ptr
->seqNum
);
732 for (auto child
: node_ptr
->dependents
) {
733 if (child
->removeDepOnInst(node_ptr
->seqNum
)) {
734 checkAndIssue(child
);
738 // clear the dynamically allocated set of dependents
739 (node_ptr
->dependents
).clear();
743 depGraph
.erase(graph_itr
);
746 if (DTRACE(TraceCPUData
)) {
750 // If the size of the dependency graph is less than the dependency window
751 // then read from the trace file to populate the graph next time we are in
753 if (depGraph
.size() < windowSize
&& !traceComplete
)
756 // If not waiting for retry, attempt to schedule next event
758 // We might have new dep-free nodes in the list which will have execute
759 // tick greater than or equal to curTick. But a new dep-free node might
760 // have its execute tick earlier. Therefore, attempt to reschedule. It
761 // could happen that the readyList is empty and we got here via a
762 // last remaining response. So, either the trace is complete or there
763 // are pending nodes in the depFreeQueue. The checking is done in the
764 // execute() control flow, so schedule an event to go via that flow.
765 Tick next_event_tick
= readyList
.empty() ? owner
.clockEdge(Cycles(1)) :
766 std::max(readyList
.begin()->execTick
, owner
.clockEdge(Cycles(1)));
767 DPRINTF(TraceCPUData
, "Attempting to schedule @%lli.\n",
769 owner
.schedDcacheNextEvent(next_event_tick
);
774 TraceCPU::ElasticDataGen::addToSortedReadyList(NodeSeqNum seq_num
,
777 ReadyNode ready_node
;
778 ready_node
.seqNum
= seq_num
;
779 ready_node
.execTick
= exec_tick
;
781 // Iterator to readyList
782 auto itr
= readyList
.begin();
784 // If the readyList is empty, simply insert the new node at the beginning
786 if (itr
== readyList
.end()) {
787 readyList
.insert(itr
, ready_node
);
788 maxReadyListSize
= std::max
<double>(readyList
.size(),
789 maxReadyListSize
.value());
793 // If the new node has its execution tick equal to the first node in the
794 // list then go to the next node. If the first node in the list failed
795 // to execute, its position as the first is thus maintained.
797 if (retryPkt
->req
->getReqInstSeqNum() == itr
->seqNum
)
800 // Increment the iterator and compare the node pointed to by it to the new
801 // node till the position to insert the new node is found.
803 while (!found
&& itr
!= readyList
.end()) {
804 // If the execution tick of the new node is less than the node then
805 // this is the position to insert
806 if (exec_tick
< itr
->execTick
)
808 // If the execution tick of the new node is equal to the node then
809 // sort in ascending order of sequence numbers
810 else if (exec_tick
== itr
->execTick
) {
811 // If the sequence number of the new node is less than the node
812 // then this is the position to insert
813 if (seq_num
< itr
->seqNum
)
815 // Else go to next node
819 // If the execution tick of the new node is greater than the node then
820 // go to the next node
824 readyList
.insert(itr
, ready_node
);
825 // Update the stat for max size reached of the readyList
826 maxReadyListSize
= std::max
<double>(readyList
.size(),
827 maxReadyListSize
.value());
831 TraceCPU::ElasticDataGen::printReadyList() {
833 auto itr
= readyList
.begin();
834 if (itr
== readyList
.end()) {
835 DPRINTF(TraceCPUData
, "readyList is empty.\n");
838 DPRINTF(TraceCPUData
, "Printing readyList:\n");
839 while (itr
!= readyList
.end()) {
840 auto graph_itr
= depGraph
.find(itr
->seqNum
);
841 GraphNode
* node_ptr M5_VAR_USED
= graph_itr
->second
;
842 DPRINTFR(TraceCPUData
, "\t%lld(%s), %lld\n", itr
->seqNum
,
843 node_ptr
->typeToStr(), itr
->execTick
);
848 TraceCPU::ElasticDataGen::HardwareResource::HardwareResource(
849 uint16_t max_rob
, uint16_t max_stores
, uint16_t max_loads
)
851 sizeStoreBuffer(max_stores
),
852 sizeLoadBuffer(max_loads
),
853 oldestInFlightRobNum(UINT64_MAX
),
859 TraceCPU::ElasticDataGen::HardwareResource::occupy(const GraphNode
* new_node
)
861 // Occupy ROB entry for the issued node
862 // Merely maintain the oldest node, i.e. numerically least robNum by saving
863 // it in the variable oldestInFLightRobNum.
864 inFlightNodes
[new_node
->seqNum
] = new_node
->robNum
;
865 oldestInFlightRobNum
= inFlightNodes
.begin()->second
;
867 // Occupy Load/Store Buffer entry for the issued node if applicable
868 if (new_node
->isLoad()) {
870 } else if (new_node
->isStore()) {
872 } // else if it is a non load/store node, no buffer entry is occupied
878 TraceCPU::ElasticDataGen::HardwareResource::release(const GraphNode
* done_node
)
880 assert(!inFlightNodes
.empty());
881 DPRINTFR(TraceCPUData
, "\tClearing done seq. num %d from inFlightNodes..\n",
884 assert(inFlightNodes
.find(done_node
->seqNum
) != inFlightNodes
.end());
885 inFlightNodes
.erase(done_node
->seqNum
);
887 if (inFlightNodes
.empty()) {
888 // If we delete the only in-flight node and then the
889 // oldestInFlightRobNum is set to it's initialized (max) value.
890 oldestInFlightRobNum
= UINT64_MAX
;
892 // Set the oldest in-flight node rob number equal to the first node in
893 // the inFlightNodes since that will have the numerically least value.
894 oldestInFlightRobNum
= inFlightNodes
.begin()->second
;
897 DPRINTFR(TraceCPUData
, "\tCleared. inFlightNodes.size() = %d, "
898 "oldestInFlightRobNum = %d\n", inFlightNodes
.size(),
899 oldestInFlightRobNum
);
901 // A store is considered complete when a request is sent, thus ROB entry is
902 // freed. But it occupies an entry in the Store Buffer until its response
903 // is received. A load is considered complete when a response is received,
904 // thus both ROB and Load Buffer entries can be released.
905 if (done_node
->isLoad()) {
906 assert(numInFlightLoads
!= 0);
909 // For normal writes, we send the requests out and clear a store buffer
910 // entry on response. For writes which are strictly ordered, for e.g.
911 // writes to device registers, we do that within release() which is called
912 // when node is executed and taken off from readyList.
913 if (done_node
->isStore() && done_node
->isStrictlyOrdered()) {
914 releaseStoreBuffer();
919 TraceCPU::ElasticDataGen::HardwareResource::releaseStoreBuffer()
921 assert(numInFlightStores
!= 0);
926 TraceCPU::ElasticDataGen::HardwareResource::isAvailable(
927 const GraphNode
* new_node
) const
929 uint16_t num_in_flight_nodes
;
930 if (inFlightNodes
.empty()) {
931 num_in_flight_nodes
= 0;
932 DPRINTFR(TraceCPUData
, "\t\tChecking resources to issue seq. num %lli:"
933 " #in-flight nodes = 0", new_node
->seqNum
);
934 } else if (new_node
->robNum
> oldestInFlightRobNum
) {
935 // This is the intuitive case where new dep-free node is younger
936 // instruction than the oldest instruction in-flight. Thus we make sure
937 // in_flight_nodes does not overflow.
938 num_in_flight_nodes
= new_node
->robNum
- oldestInFlightRobNum
;
939 DPRINTFR(TraceCPUData
, "\t\tChecking resources to issue seq. num %lli:"
940 " #in-flight nodes = %d - %d = %d", new_node
->seqNum
,
941 new_node
->robNum
, oldestInFlightRobNum
, num_in_flight_nodes
);
943 // This is the case where an instruction older than the oldest in-
944 // flight instruction becomes dep-free. Thus we must have already
945 // accounted for the entry in ROB for this new dep-free node.
946 // Immediately after this check returns true, oldestInFlightRobNum will
947 // be updated in occupy(). We simply let this node issue now.
948 num_in_flight_nodes
= 0;
949 DPRINTFR(TraceCPUData
, "\t\tChecking resources to issue seq. num %lli:"
950 " new oldestInFlightRobNum = %d, #in-flight nodes ignored",
951 new_node
->seqNum
, new_node
->robNum
);
953 DPRINTFR(TraceCPUData
, ", LQ = %d/%d, SQ = %d/%d.\n",
954 numInFlightLoads
, sizeLoadBuffer
,
955 numInFlightStores
, sizeStoreBuffer
);
956 // Check if resources are available to issue the specific node
957 if (num_in_flight_nodes
>= sizeROB
) {
960 if (new_node
->isLoad() && numInFlightLoads
>= sizeLoadBuffer
) {
963 if (new_node
->isStore() && numInFlightStores
>= sizeStoreBuffer
) {
970 TraceCPU::ElasticDataGen::HardwareResource::awaitingResponse() const {
971 // Return true if there is at least one read or write request in flight
972 return (numInFlightStores
!= 0 || numInFlightLoads
!= 0);
976 TraceCPU::ElasticDataGen::HardwareResource::printOccupancy() {
977 DPRINTFR(TraceCPUData
, "oldestInFlightRobNum = %d, "
978 "LQ = %d/%d, SQ = %d/%d.\n",
979 oldestInFlightRobNum
,
980 numInFlightLoads
, sizeLoadBuffer
,
981 numInFlightStores
, sizeStoreBuffer
);
985 TraceCPU::FixedRetryGen::regStats()
987 using namespace Stats
;
990 .name(name() + ".numSendAttempted")
991 .desc("Number of first attempts to send a request")
995 .name(name() + ".numSendSucceeded")
996 .desc("Number of successful first attempts")
1000 .name(name() + ".numSendFailed")
1001 .desc("Number of failed first attempts")
1005 .name(name() + ".numRetrySucceeded")
1006 .desc("Number of successful retries")
1010 .name(name() + ".instLastTick")
1011 .desc("Last tick simulated from the fixed inst trace")
1016 TraceCPU::FixedRetryGen::init()
1018 DPRINTF(TraceCPUInst
, "Initializing instruction fetch request generator"
1019 " IcacheGen: fixed issue with retry.\n");
1021 if (nextExecute()) {
1022 DPRINTF(TraceCPUInst
, "\tFirst tick = %d.\n", currElement
.tick
);
1023 return currElement
.tick
;
1025 panic("Read of first message in the trace failed.\n");
1031 TraceCPU::FixedRetryGen::tryNext()
1033 // If there is a retry packet, try to send it
1036 DPRINTF(TraceCPUInst
, "Trying to send retry packet.\n");
1038 if (!port
.sendTimingReq(retryPkt
)) {
1039 // Still blocked! This should never occur.
1040 DPRINTF(TraceCPUInst
, "Retry packet sending failed.\n");
1043 ++numRetrySucceeded
;
1046 DPRINTF(TraceCPUInst
, "Trying to send packet for currElement.\n");
1048 // try sending current element
1049 assert(currElement
.isValid());
1053 if (!send(currElement
.addr
, currElement
.blocksize
,
1054 currElement
.cmd
, currElement
.flags
, currElement
.pc
)) {
1055 DPRINTF(TraceCPUInst
, "currElement sending failed.\n");
1057 // return false to indicate not to schedule next event
1063 // If packet was sent successfully, either retryPkt or currElement, return
1064 // true to indicate to schedule event at current Tick plus delta. If packet
1065 // was sent successfully and there is no next packet to send, return false.
1066 DPRINTF(TraceCPUInst
, "Packet sent successfully, trying to read next "
1069 // Read next element into currElement, currElement gets cleared so save the
1070 // tick to calculate delta
1071 Tick last_tick
= currElement
.tick
;
1072 if (nextExecute()) {
1073 assert(currElement
.tick
>= last_tick
);
1074 delta
= currElement
.tick
- last_tick
;
1076 return !traceComplete
;
1080 TraceCPU::FixedRetryGen::exit()
1086 TraceCPU::FixedRetryGen::nextExecute()
1089 // We are at the end of the file, thus we have no more messages.
1094 //Reset the currElement to the default values
1095 currElement
.clear();
1097 // Read the next line to get the next message. If that fails then end of
1098 // trace has been reached and traceComplete needs to be set in addition
1099 // to returning false. If successful then next message is in currElement.
1100 if (!trace
.read(&currElement
)) {
1101 traceComplete
= true;
1102 instLastTick
= curTick();
1106 DPRINTF(TraceCPUInst
, "inst fetch: %c addr %d pc %#x size %d tick %d\n",
1107 currElement
.cmd
.isRead() ? 'r' : 'w',
1110 currElement
.blocksize
,
1117 TraceCPU::FixedRetryGen::send(Addr addr
, unsigned size
, const MemCmd
& cmd
,
1118 Request::FlagsType flags
, Addr pc
)
1121 // Create new request
1122 Request
* req
= new Request(addr
, size
, flags
, masterID
);
1125 // If this is not done it triggers assert in L1 cache for invalid contextId
1126 req
->setContext(ContextID(0));
1128 // Embed it in a packet
1129 PacketPtr pkt
= new Packet(req
, cmd
);
1131 uint8_t* pkt_data
= new uint8_t[req
->getSize()];
1132 pkt
->dataDynamic(pkt_data
);
1134 if (cmd
.isWrite()) {
1135 memset(pkt_data
, 0xA, req
->getSize());
1138 // Call MasterPort method to send a timing request for this packet
1139 bool success
= port
.sendTimingReq(pkt
);
1141 // If it fails, save the packet to retry when a retry is signalled by
1149 TraceCPU::icacheRetryRecvd()
1151 // Schedule an event to go through the control flow in the same tick as
1152 // retry is received
1153 DPRINTF(TraceCPUInst
, "Icache retry received. Scheduling next IcacheGen"
1154 " event @%lli.\n", curTick());
1155 schedule(icacheNextEvent
, curTick());
1159 TraceCPU::dcacheRetryRecvd()
1161 // Schedule an event to go through the execute flow in the same tick as
1162 // retry is received
1163 DPRINTF(TraceCPUData
, "Dcache retry received. Scheduling next DcacheGen"
1164 " event @%lli.\n", curTick());
1165 schedule(dcacheNextEvent
, curTick());
1169 TraceCPU::schedDcacheNextEvent(Tick when
)
1171 if (!dcacheNextEvent
.scheduled()) {
1172 DPRINTF(TraceCPUData
, "Scheduling next DcacheGen event at %lli.\n",
1174 schedule(dcacheNextEvent
, when
);
1175 ++numSchedDcacheEvent
;
1176 } else if (when
< dcacheNextEvent
.when()) {
1177 DPRINTF(TraceCPUData
, "Re-scheduling next dcache event from %lli"
1178 " to %lli.\n", dcacheNextEvent
.when(), when
);
1179 reschedule(dcacheNextEvent
, when
);
1185 TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt
)
1187 // All responses on the instruction fetch side are ignored. Simply delete
1188 // the request and packet to free allocated memory
1196 TraceCPU::IcachePort::recvReqRetry()
1198 owner
->icacheRetryRecvd();
1202 TraceCPU::dcacheRecvTimingResp(PacketPtr pkt
)
1204 DPRINTF(TraceCPUData
, "Received timing response from Dcache.\n");
1205 dcacheGen
.completeMemAccess(pkt
);
1209 TraceCPU::DcachePort::recvTimingResp(PacketPtr pkt
)
1211 // Handle the responses for data memory requests which is done inside the
1212 // elastic data generator
1213 owner
->dcacheRecvTimingResp(pkt
);
1214 // After processing the response delete the request and packet to free
1223 TraceCPU::DcachePort::recvReqRetry()
1225 owner
->dcacheRetryRecvd();
1228 TraceCPU::ElasticDataGen::InputStream::InputStream(const std::string
& filename
)
1232 // Create a protobuf message for the header and read it from the stream
1233 ProtoMessage::InstDepRecordHeader header_msg
;
1234 if (!trace
.read(header_msg
)) {
1235 panic("Failed to read packet header from %s\n", filename
);
1237 if (header_msg
.tick_freq() != SimClock::Frequency
) {
1238 panic("Trace %s was recorded with a different tick frequency %d\n",
1239 header_msg
.tick_freq());
1242 // Assign window size equal to the field in the trace that was recorded
1243 // when the data dependency trace was captured in the o3cpu model
1244 windowSize
= header_msg
.window_size();
1249 TraceCPU::ElasticDataGen::InputStream::reset()
1255 TraceCPU::ElasticDataGen::InputStream::read(GraphNode
* element
)
1257 ProtoMessage::InstDepRecord pkt_msg
;
1258 if (trace
.read(pkt_msg
)) {
1260 element
->seqNum
= pkt_msg
.seq_num();
1261 element
->type
= pkt_msg
.type();
1262 element
->compDelay
= pkt_msg
.comp_delay();
1264 // Repeated field robDepList
1265 element
->clearRobDep();
1266 assert((pkt_msg
.rob_dep()).size() <= element
->maxRobDep
);
1267 for (int i
= 0; i
< (pkt_msg
.rob_dep()).size(); i
++) {
1268 element
->robDep
[element
->numRobDep
] = pkt_msg
.rob_dep(i
);
1269 element
->numRobDep
+= 1;
1273 element
->clearRegDep();
1274 assert((pkt_msg
.reg_dep()).size() <= TheISA::MaxInstSrcRegs
);
1275 for (int i
= 0; i
< (pkt_msg
.reg_dep()).size(); i
++) {
1276 // There is a possibility that an instruction has both, a register
1277 // and order dependency on an instruction. In such a case, the
1278 // register dependency is omitted
1279 bool duplicate
= false;
1280 for (int j
= 0; j
< element
->numRobDep
; j
++) {
1281 duplicate
|= (pkt_msg
.reg_dep(i
) == element
->robDep
[j
]);
1284 element
->regDep
[element
->numRegDep
] = pkt_msg
.reg_dep(i
);
1285 element
->numRegDep
+= 1;
1290 if (pkt_msg
.has_p_addr())
1291 element
->physAddr
= pkt_msg
.p_addr();
1293 element
->physAddr
= 0;
1295 if (pkt_msg
.has_v_addr())
1296 element
->virtAddr
= pkt_msg
.v_addr();
1298 element
->virtAddr
= 0;
1300 if (pkt_msg
.has_asid())
1301 element
->asid
= pkt_msg
.asid();
1305 if (pkt_msg
.has_size())
1306 element
->size
= pkt_msg
.size();
1310 if (pkt_msg
.has_flags())
1311 element
->flags
= pkt_msg
.flags();
1315 if (pkt_msg
.has_pc())
1316 element
->pc
= pkt_msg
.pc();
1320 // ROB occupancy number
1322 if (pkt_msg
.has_weight()) {
1323 microOpCount
+= pkt_msg
.weight();
1325 element
->robNum
= microOpCount
;
1329 // We have reached the end of the file
1334 TraceCPU::ElasticDataGen::GraphNode::removeRegDep(NodeSeqNum reg_dep
)
1336 for (auto& own_reg_dep
: regDep
) {
1337 if (own_reg_dep
== reg_dep
) {
1338 // If register dependency is found, make it zero and return true
1340 assert(numRegDep
> 0);
1342 DPRINTFR(TraceCPUData
, "\tFor %lli: Marking register dependency %lli "
1343 "done.\n", seqNum
, reg_dep
);
1348 // Return false if the dependency is not found
1353 TraceCPU::ElasticDataGen::GraphNode::removeRobDep(NodeSeqNum rob_dep
)
1355 for (auto& own_rob_dep
: robDep
) {
1356 if (own_rob_dep
== rob_dep
) {
1357 // If the rob dependency is found, make it zero and return true
1359 assert(numRobDep
> 0);
1361 DPRINTFR(TraceCPUData
, "\tFor %lli: Marking ROB dependency %lli "
1362 "done.\n", seqNum
, rob_dep
);
1370 TraceCPU::ElasticDataGen::GraphNode::clearRegDep() {
1371 for (auto& own_reg_dep
: regDep
) {
1378 TraceCPU::ElasticDataGen::GraphNode::clearRobDep() {
1379 for (auto& own_rob_dep
: robDep
) {
1386 TraceCPU::ElasticDataGen::GraphNode::removeDepOnInst(NodeSeqNum done_seq_num
)
1388 // If it is an rob dependency then remove it
1389 if (!removeRobDep(done_seq_num
)) {
1390 // If it is not an rob dependency then it must be a register dependency
1391 // If the register dependency is not found, it violates an assumption
1392 // and must be caught by assert.
1393 bool regdep_found M5_VAR_USED
= removeRegDep(done_seq_num
);
1394 assert(regdep_found
);
1396 // Return true if the node is dependency free
1397 return (numRobDep
== 0 && numRegDep
== 0);
1401 TraceCPU::ElasticDataGen::GraphNode::writeElementAsTrace() const
1403 DPRINTFR(TraceCPUData
, "%lli", seqNum
);
1404 DPRINTFR(TraceCPUData
, ",%s", typeToStr());
1405 if (isLoad() || isStore()) {
1406 DPRINTFR(TraceCPUData
, ",%i", physAddr
);
1407 DPRINTFR(TraceCPUData
, ",%i", size
);
1408 DPRINTFR(TraceCPUData
, ",%i", flags
);
1410 DPRINTFR(TraceCPUData
, ",%lli", compDelay
);
1412 DPRINTFR(TraceCPUData
, "robDep:");
1413 while (robDep
[i
] != 0) {
1414 DPRINTFR(TraceCPUData
, ",%lli", robDep
[i
]);
1418 DPRINTFR(TraceCPUData
, "regDep:");
1419 while (regDep
[i
] != 0) {
1420 DPRINTFR(TraceCPUData
, ",%lli", regDep
[i
]);
1423 auto child_itr
= dependents
.begin();
1424 DPRINTFR(TraceCPUData
, "dependents:");
1425 while (child_itr
!= dependents
.end()) {
1426 DPRINTFR(TraceCPUData
, ":%lli", (*child_itr
)->seqNum
);
1430 DPRINTFR(TraceCPUData
, "\n");
1434 TraceCPU::ElasticDataGen::GraphNode::typeToStr() const
1436 return Record::RecordType_Name(type
);
1439 TraceCPU::FixedRetryGen::InputStream::InputStream(const std::string
& filename
)
1442 // Create a protobuf message for the header and read it from the stream
1443 ProtoMessage::PacketHeader header_msg
;
1444 if (!trace
.read(header_msg
)) {
1445 panic("Failed to read packet header from %s\n", filename
);
1447 if (header_msg
.tick_freq() != SimClock::Frequency
) {
1448 panic("Trace %s was recorded with a different tick frequency %d\n",
1449 header_msg
.tick_freq());
1455 TraceCPU::FixedRetryGen::InputStream::reset()
1461 TraceCPU::FixedRetryGen::InputStream::read(TraceElement
* element
)
1463 ProtoMessage::Packet pkt_msg
;
1464 if (trace
.read(pkt_msg
)) {
1465 element
->cmd
= pkt_msg
.cmd();
1466 element
->addr
= pkt_msg
.addr();
1467 element
->blocksize
= pkt_msg
.size();
1468 element
->tick
= pkt_msg
.tick();
1469 element
->flags
= pkt_msg
.has_flags() ? pkt_msg
.flags() : 0;
1470 element
->pc
= pkt_msg
.has_pc() ? pkt_msg
.pc() : 0;
1474 // We have reached the end of the file