2 * Copyright (c) 2013 - 2015 ARM Limited
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 * Authors: Radhika Jagtap
42 #ifndef __CPU_TRACE_TRACE_CPU_HH__
43 #define __CPU_TRACE_TRACE_CPU_HH__
49 #include <unordered_map>
51 #include "arch/registers.hh"
52 #include "base/statistics.hh"
53 #include "cpu/base.hh"
54 #include "debug/TraceCPUData.hh"
55 #include "debug/TraceCPUInst.hh"
56 #include "params/TraceCPU.hh"
57 #include "proto/inst_dep_record.pb.h"
58 #include "proto/packet.pb.h"
59 #include "proto/protoio.hh"
60 #include "sim/sim_events.hh"
63 * The trace cpu replays traces generated using the elastic trace probe
64 * attached to the O3 CPU model. The elastic trace is an execution trace with
65 * register data dependencies and ordering dependencies annotated to it. The
66 * trace cpu also replays a fixed timestamp fetch trace that is also generated
67 * by the elastic trace probe. This trace cpu model aims at achieving faster
68 * simulation compared to the detailed cpu model and good correlation when the
69 * same trace is used for playback on different memory sub-systems.
71 * The TraceCPU inherits from BaseCPU so some virtual methods need to be
72 * defined. It has two port subclasses inherited from MasterPort for
73 * instruction and data ports. It issues the memory requests deducing the
74 * timing from the trace and without performing real execution of micro-ops. As
75 * soon as the last dependency for an instruction is complete, its
76 * computational delay, also provided in the input trace is added. The
77 * dependency-free nodes are maintained in a list, called 'ReadyList', ordered
78 * by ready time. Instructions which depend on load stall until the responses
79 * for read requests are received thus achieving elastic replay. If the
80 * dependency is not found when adding a new node, it is assumed complete.
81 * Thus, if this node is found to be completely dependency-free its issue time
82 * is calculated and it is added to the ready list immediately. This is
83 * encapsulated in the subclass ElasticDataGen.
85 * If ready nodes are issued in an unconstrained way there can be more nodes
86 * outstanding which results in divergence in timing compared to the O3CPU.
87 * Therefore, the Trace CPU also models hardware resources. A sub-class to
88 * model hardware resources contains the maximum sizes of load buffer, store
89 * buffer and ROB. If resources are not available, the node is not issued. Such
90 * nodes that are pending issue are held in the 'depFreeQueue' structure.
92 * Modeling the ROB size in the Trace CPU as a resource limitation is arguably
93 * the most important parameter of all resources. The ROB occupancy is
94 * estimated using the newly added field 'robNum'. We need to use ROB number as
95 * sequence number is at times much higher due to squashing and trace replay is
96 * focused on correct path modeling.
98 * A map called 'inFlightNodes' is added to track nodes that are not only in
99 * the readyList but also load nodes that are executed (and thus removed from
100 * readyList) but are not complete. ReadyList handles what and when to execute
101 * next node while the inFlightNodes is used for resource modelling. The oldest
102 * ROB number is updated when any node occupies the ROB or when an entry in the
103 * ROB is released. The ROB occupancy is equal to the difference in the ROB
104 * number of the newly dependency-free node and the oldest ROB number in
107 * If no node depends on a non load/store node then there is no reason to
108 * track it in the dependency graph. We filter out such nodes but count them
109 * and add a weight field to the subsequent node that we do include in the
110 * trace. The weight field is used to model ROB occupancy during replay.
112 * The depFreeQueue is chosen to be FIFO so that child nodes which are in
113 * program order get pushed into it in that order and thus issued in program
114 * order, like in the O3CPU. This is also why the dependents is made a
115 * sequential container, std::set to std::vector. We only check head of the
116 * depFreeQueue as nodes are issued in order and blocking on head models that
117 * better than looping the entire queue. An alternative choice would be to
118 * inspect top N pending nodes where N is the issue-width. This is left for
119 * future as the timing correlation looks good as it is.
121 * At the start of an execution event, first we attempt to issue such pending
122 * nodes by checking if appropriate resources have become available. If yes, we
123 * compute the execute tick with respect to the time then. Then we proceed to
124 * complete nodes from the readyList.
126 * When a read response is received, sometimes a dependency on it that was
127 * supposed to be released when it was issued is still not released. This
128 * occurs because the dependent gets added to the graph after the read was
129 * sent. So the check is made less strict and the dependency is marked complete
130 * on read response instead of insisting that it should have been removed on
133 * There is a check for requests spanning two cache lines as this condition
134 * triggers an assert fail in the L1 cache. If it does then truncate the size
135 * to access only until the end of that line and ignore the remainder.
136 * Strictly-ordered requests are skipped and the dependencies on such requests
137 * are handled by simply marking them complete immediately.
139 * The simulated seconds can be calculated as the difference between the
140 * final_tick stat and the tickOffset stat. A CountedExitEvent that contains a
141 * static int belonging to the Trace CPU class as a down counter is used to
142 * implement multi Trace CPU simulation exit.
145 class TraceCPU : public BaseCPU
149 TraceCPU(TraceCPUParams *params);
155 * This is a pure virtual function in BaseCPU. As we don't know how many
156 * insts are in the trace but only know how how many micro-ops are we
157 * cannot count this stat.
161 Counter totalInsts() const
167 * Return totalOps as the number of committed micro-ops plus the
168 * speculatively issued loads that are modelled in the TraceCPU replay.
170 * @return number of micro-ops i.e. nodes in the elastic data generator
172 Counter totalOps() const
174 return dcacheGen.getMicroOpCount();
177 /* Pure virtual function in BaseCPU. Do nothing. */
178 void wakeup(ThreadID tid = 0)
184 * When resuming from checkpoint in FS mode, the TraceCPU takes over from
185 * the old cpu. This function overrides the takeOverFrom() function in the
186 * BaseCPU. It unbinds the ports of the old CPU and binds the ports of the
189 void takeOverFrom(BaseCPU *oldCPU);
192 * When instruction cache port receives a retry, schedule event
195 void icacheRetryRecvd();
198 * When data cache port receives a retry, schedule event
201 void dcacheRetryRecvd();
204 * When data cache port receives a response, this calls the dcache
205 * generator method handle to complete the load writeback.
207 * @param pkt Pointer to packet received
209 void dcacheRecvTimingResp(PacketPtr pkt);
212 * Schedule event dcacheNextEvent at the given tick
214 * @param when Tick at which to schedule event
216 void schedDcacheNextEvent(Tick when);
221 * IcachePort class that interfaces with L1 Instruction Cache.
223 class IcachePort : public MasterPort
226 /** Default constructor. */
227 IcachePort(TraceCPU* _cpu)
228 : MasterPort(_cpu->name() + ".icache_port", _cpu),
234 * Receive the timing reponse and simply delete the packet since
235 * instruction fetch requests are issued as per the timing in the trace
236 * and responses are ignored.
238 * @param pkt Pointer to packet received
241 bool recvTimingResp(PacketPtr pkt);
244 * Required functionally but do nothing.
246 * @param pkt Pointer to packet received
248 void recvTimingSnoopReq(PacketPtr pkt) { }
251 * Handle a retry signalled by the cache if instruction read failed in
261 * DcachePort class that interfaces with L1 Data Cache.
263 class DcachePort : public MasterPort
267 /** Default constructor. */
268 DcachePort(TraceCPU* _cpu)
269 : MasterPort(_cpu->name() + ".dcache_port", _cpu),
276 * Receive the timing reponse and call dcacheRecvTimingResp() method
277 * of the dcacheGen to handle completing the load
279 * @param pkt Pointer to packet received
282 bool recvTimingResp(PacketPtr pkt);
285 * Required functionally but do nothing.
287 * @param pkt Pointer to packet received
289 void recvTimingSnoopReq(PacketPtr pkt)
293 * Required functionally but do nothing.
295 * @param pkt Pointer to packet received
297 void recvFunctionalSnoop(PacketPtr pkt)
301 * Handle a retry signalled by the cache if data access failed in the
307 * Required functionally.
309 * @return true since we have to snoop
311 bool isSnooping() const { return true; }
317 /** Port to connect to L1 instruction cache. */
318 IcachePort icachePort;
320 /** Port to connect to L1 data cache. */
321 DcachePort dcachePort;
323 /** Master id for instruction read requests. */
324 const MasterID instMasterID;
326 /** Master id for data read and write requests. */
327 const MasterID dataMasterID;
329 /** File names for input instruction and data traces. */
330 std::string instTraceFile, dataTraceFile;
333 * Generator to read protobuf trace containing memory requests at fixed
334 * timestamps, perform flow control and issue memory requests. If L1 cache
335 * port sends packet succesfully, determine the tick to send the next
336 * packet else wait for retry from cache.
344 * This struct stores a line in the trace file.
346 struct TraceElement {
348 /** Specifies if the request is to be a read or a write */
351 /** The address for the request */
354 /** The size of the access for the request */
357 /** The time at which the request should be sent */
360 /** Potential request flags to use */
361 Request::FlagsType flags;
363 /** Instruction PC */
367 * Check validity of this element.
369 * @return if this element is valid
371 bool isValid() const {
372 return cmd != MemCmd::InvalidCmd;
376 * Make this element invalid.
379 cmd = MemCmd::InvalidCmd;
384 * The InputStream encapsulates a trace file and the
385 * internal buffers and populates TraceElements based on
393 // Input file stream for the protobuf trace
394 ProtoInputStream trace;
399 * Create a trace input stream for a given file name.
401 * @param filename Path to the file to read from
403 InputStream(const std::string& filename);
406 * Reset the stream such that it can be played once
412 * Attempt to read a trace element from the stream,
413 * and also notify the caller if the end of the file
416 * @param element Trace element to populate
417 * @return True if an element could be read successfully
419 bool read(TraceElement* element);
424 FixedRetryGen(TraceCPU& _owner, const std::string& _name,
425 MasterPort& _port, MasterID master_id,
426 const std::string& trace_file)
431 genName(owner.name() + ".fixedretry" + _name),
439 * Called from TraceCPU init(). Reads the first message from the
440 * input trace file and returns the send tick.
442 * @return Tick when first packet must be sent
447 * This tries to send current or retry packet and returns true if
448 * successfull. It calls nextExecute() to read next message.
450 * @return bool true if packet is sent successfully
454 /** Returns name of the FixedRetryGen instance. */
455 const std::string& name() const { return genName; }
458 * Creates a new request assigning the request parameters passed by the
459 * arguments. Calls the port's sendTimingReq() and returns true if
460 * the packet was sent succesfully. It is called by tryNext()
462 * @param addr address of request
463 * @param size size of request
464 * @param cmd if it is a read or write request
465 * @param flags associated request flags
466 * @param pc instruction PC that generated the request
468 * @return true if packet was sent successfully
470 bool send(Addr addr, unsigned size, const MemCmd& cmd,
471 Request::FlagsType flags, Addr pc);
473 /** Exit the FixedRetryGen. */
477 * Reads a line of the trace file. Returns the tick
478 * when the next request should be generated. If the end
479 * of the file has been reached, it returns false.
481 * @return bool false id end of file has been reached
486 * Returns the traceComplete variable which is set when end of the
487 * input trace file is reached.
489 * @return bool true if traceComplete is set, false otherwise.
491 bool isTraceComplete() { return traceComplete; }
493 int64_t tickDelta() { return delta; }
499 /** Reference of the TraceCPU. */
502 /** Reference of the port to be used to issue memory requests. */
505 /** MasterID used for the requests being sent. */
506 const MasterID masterID;
508 /** Input stream used for reading the input trace file. */
511 /** String to store the name of the FixedRetryGen. */
514 /** PacketPtr used to store the packet to retry. */
518 * Stores the difference in the send ticks of the current and last
519 * packets. Keeping this signed to check overflow to a negative value
520 * which will be caught by assert(delta > 0)
525 * Set to true when end of trace is reached.
529 /** Store an element read from the trace to send as the next packet. */
530 TraceElement currElement;
532 /** Stats for instruction accesses replayed. */
533 Stats::Scalar numSendAttempted;
534 Stats::Scalar numSendSucceeded;
535 Stats::Scalar numSendFailed;
536 Stats::Scalar numRetrySucceeded;
537 /** Last simulated tick by the FixedRetryGen */
538 Stats::Scalar instLastTick;
543 * The elastic data memory request generator to read protobuf trace
544 * containing execution trace annotated with data and ordering
545 * dependencies. It deduces the time at which to send a load/store request
546 * by tracking the dependencies. It attempts to send a memory request for a
547 * load/store without performing real execution of micro-ops. If L1 cache
548 * port sends packet succesfully, the generator checks which instructions
549 * became dependency free as a result of this and schedules an event
550 * accordingly. If it fails to send the packet, it waits for a retry from
558 /** Node sequence number type. */
559 typedef uint64_t NodeSeqNum;
561 /** Node ROB number type. */
562 typedef uint64_t NodeRobNum;
565 * The struct GraphNode stores an instruction in the trace file. The
566 * format of the trace file favours constructing a dependency graph of
567 * the execution and this struct is used to encapsulate the request
568 * data as well as pointers to its dependent GraphNodes.
574 * The maximum no. of ROB dependencies. There can be at most 2
575 * order dependencies which could exist for a store. For a load
576 * and comp node there can be at most one order dependency.
578 static const uint8_t maxRobDep = 2;
580 /** Typedef for the array containing the ROB dependencies */
581 typedef std::array<NodeSeqNum, maxRobDep> RobDepArray;
583 /** Typedef for the array containing the register dependencies */
584 typedef std::array<NodeSeqNum, TheISA::MaxInstSrcRegs> RegDepArray;
586 /** Instruction sequence number */
589 /** ROB occupancy number */
592 /** If instruction is a load */
595 /** If instruction is a store */
598 /** The address for the request if any */
601 /** Size of request if any */
604 /** Request flags if any */
605 Request::Flags flags;
607 /** Instruction PC */
610 /** Array of order dependencies. */
613 /** Number of order dependencies */
616 /** Computational delay */
620 * Array of register dependencies (incoming) if any. Maximum number
621 * of source registers used to set maximum size of the array
625 /** Number of register dependencies */
629 * A vector of nodes dependent (outgoing) on this node. A
630 * sequential container is chosen because when dependents become
631 * free, they attempt to issue in program order.
633 std::vector<GraphNode *> dependents;
635 /** Initialize register dependency array to all zeroes */
638 /** Initialize register dependency array to all zeroes */
641 /** Remove completed instruction from register dependency array */
642 bool removeRegDep(NodeSeqNum reg_dep);
644 /** Remove completed instruction from order dependency array */
645 bool removeRobDep(NodeSeqNum rob_dep);
647 /** Check for all dependencies on completed inst */
648 bool removeDepOnInst(NodeSeqNum done_seq_num);
650 /** Return true if node has a request which is strictly ordered */
651 bool isStrictlyOrdered() const {
652 return (flags.isSet(Request::STRICT_ORDER));
655 * Write out element in trace-compatible format using debug flag
658 void writeElementAsTrace() const;
661 /** Struct to store a ready-to-execute node and its execution tick. */
664 /** The sequence number of the ready node */
667 /** The tick at which the ready node must be executed */
672 * The HardwareResource class models structures that hold the in-flight
673 * nodes. When a node becomes dependency free, first check if resources
674 * are available to issue it.
676 class HardwareResource
680 * Constructor that initializes the sizes of the structures.
682 * @param max_rob size of the Reorder Buffer
683 * @param max_stores size of Store Buffer
684 * @param max_loads size of Load Buffer
686 HardwareResource(uint16_t max_rob, uint16_t max_stores,
690 * Occupy appropriate structures for an issued node.
692 * @param node_ptr pointer to the issued node
694 void occupy(const GraphNode* new_node);
697 * Release appropriate structures for a completed node.
699 * @param node_ptr pointer to the completed node
701 void release(const GraphNode* done_node);
703 /** Release store buffer entry for a completed store */
704 void releaseStoreBuffer();
707 * Check if structures required to issue a node are free.
709 * @param node_ptr pointer to the node ready to issue
710 * @return true if resources are available
712 bool isAvailable(const GraphNode* new_node) const;
715 * Check if there are any outstanding requests, i.e. requests for
716 * which we are yet to receive a response.
718 * @return true if there is at least one read or write request
721 bool awaitingResponse() const;
723 /** Print resource occupancy for debugging */
724 void printOccupancy();
728 * The size of the ROB used to throttle the max. number of in-flight
731 const uint16_t sizeROB;
734 * The size of store buffer. This is used to throttle the max. number
735 * of in-flight stores.
737 const uint16_t sizeStoreBuffer;
740 * The size of load buffer. This is used to throttle the max. number
741 * of in-flight loads.
743 const uint16_t sizeLoadBuffer;
746 * A map from the sequence number to the ROB number of the in-
747 * flight nodes. This includes all nodes that are in the readyList
748 * plus the loads for which a request has been sent which are not
749 * present in the readyList. But such loads are not yet complete
750 * and thus occupy resources. We need to query the oldest in-flight
751 * node and since a map container keeps all its keys sorted using
752 * the less than criterion, the first element is the in-flight node
753 * with the least sequence number, i.e. the oldest in-flight node.
755 std::map<NodeSeqNum, NodeRobNum> inFlightNodes;
757 /** The ROB number of the oldest in-flight node */
758 NodeRobNum oldestInFlightRobNum;
760 /** Number of ready loads for which request may or may not be sent */
761 uint16_t numInFlightLoads;
763 /** Number of ready stores for which request may or may not be sent */
764 uint16_t numInFlightStores;
768 * The InputStream encapsulates a trace file and the
769 * internal buffers and populates GraphNodes based on
777 /** Input file stream for the protobuf trace */
778 ProtoInputStream trace;
780 /** Count of committed ops read from trace plus the filtered ops */
781 uint64_t microOpCount;
784 * The window size that is read from the header of the protobuf
785 * trace and used to process the dependency trace
791 * Create a trace input stream for a given file name.
793 * @param filename Path to the file to read from
795 InputStream(const std::string& filename);
798 * Reset the stream such that it can be played once
804 * Attempt to read a trace element from the stream,
805 * and also notify the caller if the end of the file
808 * @param element Trace element to populate
809 * @param size of register dependency array stored in the element
810 * @return True if an element could be read successfully
812 bool read(GraphNode* element);
814 /** Get window size from trace */
815 uint32_t getWindowSize() const { return windowSize; }
817 /** Get number of micro-ops modelled in the TraceCPU replay */
818 uint64_t getMicroOpCount() const { return microOpCount; }
823 ElasticDataGen(TraceCPU& _owner, const std::string& _name,
824 MasterPort& _port, MasterID master_id,
825 const std::string& trace_file, uint16_t max_rob,
826 uint16_t max_stores, uint16_t max_loads)
831 genName(owner.name() + ".elastic" + _name),
833 traceComplete(false),
836 windowSize(trace.getWindowSize()),
837 hwResource(max_rob, max_stores, max_loads)
839 DPRINTF(TraceCPUData, "Window size in the trace is %d.\n",
844 * Called from TraceCPU init(). Reads the first message from the
845 * input trace file and returns the send tick.
847 * @return Tick when first packet must be sent
851 /** Returns name of the ElasticDataGen instance. */
852 const std::string& name() const { return genName; }
854 /** Exit the ElasticDataGen. */
858 * Reads a line of the trace file. Returns the tick when the next
859 * request should be generated. If the end of the file has been
860 * reached, it returns false.
862 * @return bool false if end of file has been reached else true
864 bool readNextWindow();
867 * Iterate over the dependencies of a new node and add the new node
868 * to the list of dependents of the parent node.
870 * @param new_node new node to add to the graph
871 * @tparam dep_array the dependency array of type rob or register,
872 * that is to be iterated, and may get modified
873 * @param num_dep the number of dependencies set in the array
874 * which may get modified during iteration
876 template<typename T> void addDepsOnParent(GraphNode *new_node,
881 * This is the main execute function which consumes nodes from the
882 * sorted readyList. First attempt to issue the pending dependency-free
883 * nodes held in the depFreeQueue. Insert the ready-to-issue nodes into
884 * the readyList. Then iterate through the readyList and when a node
885 * has its execute tick equal to curTick(), execute it. If the node is
886 * a load or a store call executeMemReq() and if it is neither, simply
892 * Creates a new request for a load or store assigning the request
893 * parameters. Calls the port's sendTimingReq() and returns a packet
894 * if the send failed so that it can be saved for a retry.
896 * @param node_ptr pointer to the load or store node to be executed
898 * @return packet pointer if the request failed and nullptr if it was
901 PacketPtr executeMemReq(GraphNode* node_ptr);
904 * Add a ready node to the readyList. When inserting, ensure the nodes
905 * are sorted in ascending order of their execute ticks.
907 * @param seq_num seq. num of ready node
908 * @param exec_tick the execute tick of the ready node
910 void addToSortedReadyList(NodeSeqNum seq_num, Tick exec_tick);
912 /** Print readyList for debugging using debug flag TraceCPUData. */
913 void printReadyList();
916 * When a load writeback is received, that is when the load completes,
917 * release the dependents on it. This is called from the dcache port
920 void completeMemAccess(PacketPtr pkt);
923 * Returns the execComplete variable which is set when the last
926 * @return bool true if execComplete is set, false otherwise.
928 bool isExecComplete() const { return execComplete; }
931 * Attempts to issue a node once the node's source dependencies are
932 * complete. If resources are available then add it to the readyList,
933 * otherwise the node is not issued and is stored in depFreeQueue
934 * until resources become available.
936 * @param node_ptr pointer to node to be issued
937 * @param first true if this is the first attempt to issue this node
938 * @return true if node was added to readyList
940 bool checkAndIssue(const GraphNode* node_ptr, bool first = true);
942 /** Get number of micro-ops modelled in the TraceCPU replay */
943 uint64_t getMicroOpCount() const { return trace.getMicroOpCount(); }
949 /** Reference of the TraceCPU. */
952 /** Reference of the port to be used to issue memory requests. */
955 /** MasterID used for the requests being sent. */
956 const MasterID masterID;
958 /** Input stream used for reading the input trace file. */
961 /** String to store the name of the FixedRetryGen. */
964 /** PacketPtr used to store the packet to retry. */
967 /** Set to true when end of trace is reached. */
970 /** Set to true when the next window of instructions need to be read */
973 /** Set true when execution of trace is complete */
977 * Window size within which to check for dependencies. Its value is
978 * made equal to the window size used to generate the trace which is
979 * recorded in the trace header. The dependency graph must be
980 * populated enough such that when a node completes, its potential
981 * child node must be found and the dependency removed before the
982 * completed node itself is removed. Thus as soon as the graph shrinks
983 * to become smaller than this window, we read in the next window.
985 const uint32_t windowSize;
988 * Hardware resources required to contain in-flight nodes and to
989 * throttle issuing of new nodes when resources are not available.
991 HardwareResource hwResource;
993 /** Store the depGraph of GraphNodes */
994 std::unordered_map<NodeSeqNum, GraphNode*> depGraph;
997 * Queue of dependency-free nodes that are pending issue because
998 * resources are not available. This is chosen to be FIFO so that
999 * dependent nodes which become free in program order get pushed
1000 * into the queue in that order. Thus nodes are more likely to
1001 * issue in program order.
1003 std::queue<const GraphNode*> depFreeQueue;
1005 /** List of nodes that are ready to execute */
1006 std::list<ReadyNode> readyList;
1008 /** Stats for data memory accesses replayed. */
1009 Stats::Scalar maxDependents;
1010 Stats::Scalar maxReadyListSize;
1011 Stats::Scalar numSendAttempted;
1012 Stats::Scalar numSendSucceeded;
1013 Stats::Scalar numSendFailed;
1014 Stats::Scalar numRetrySucceeded;
1015 Stats::Scalar numSplitReqs;
1016 Stats::Scalar numSOLoads;
1017 Stats::Scalar numSOStores;
1018 /** Tick when ElasticDataGen completes execution */
1019 Stats::Scalar dataLastTick;
1022 /** Instance of FixedRetryGen to replay instruction read requests. */
1023 FixedRetryGen icacheGen;
1025 /** Instance of ElasticDataGen to replay data read and write requests. */
1026 ElasticDataGen dcacheGen;
1029 * This is the control flow that uses the functionality of the icacheGen to
1030 * replay the trace. It calls tryNext(). If it returns true then next event
1031 * is scheduled at curTick() plus delta. If it returns false then delta is
1032 * ignored and control is brought back via recvRetry().
1034 void schedIcacheNext();
1037 * This is the control flow that uses the functionality of the dcacheGen to
1038 * replay the trace. It calls execute(). It checks if execution is complete
1039 * and schedules an event to exit simulation accordingly.
1041 void schedDcacheNext();
1043 /** Event for the control flow method schedIcacheNext() */
1044 EventWrapper<TraceCPU, &TraceCPU::schedIcacheNext> icacheNextEvent;
1046 /** Event for the control flow method schedDcacheNext() */
1047 EventWrapper<TraceCPU, &TraceCPU::schedDcacheNext> dcacheNextEvent;
1049 /** This is called when either generator finishes executing from the trace */
1050 void checkAndSchedExitEvent();
1052 /** Set to true when one of the generators finishes replaying its trace. */
1053 bool oneTraceComplete;
1056 * This is stores the tick of the first instruction fetch request
1057 * which is later used for dumping the tickOffset stat.
1059 Tick firstFetchTick;
1062 * Number of Trace CPUs in the system used as a shared variable and passed
1063 * to the CountedExitEvent event used for counting down exit events. It is
1064 * incremented in the constructor call so that the total is arrived at
1067 static int numTraceCPUs;
1070 * A CountedExitEvent which when serviced decrements the counter. A sim
1071 * exit event is scheduled when the counter equals zero, that is all
1072 * instances of Trace CPU have had their execCompleteEvent serviced.
1074 CountedExitEvent *execCompleteEvent;
1076 Stats::Scalar numSchedDcacheEvent;
1077 Stats::Scalar numSchedIcacheEvent;
1079 /** Stat for number of simulated micro-ops. */
1080 Stats::Scalar numOps;
1081 /** Stat for the CPI. This is really cycles per micro-op and not inst. */
1085 * The first execution tick is dumped as a stat so that the simulated
1086 * seconds for a trace replay can be calculated as a difference between the
1087 * final_tick stat and the tickOffset stat
1089 Stats::Scalar tickOffset;
1093 /** Used to get a reference to the icache port. */
1094 MasterPort &getInstPort() { return icachePort; }
1096 /** Used to get a reference to the dcache port. */
1097 MasterPort &getDataPort() { return dcachePort; }
1101 #endif // __CPU_TRACE_TRACE_CPU_HH__