2 * Copyright (c) 2013 - 2015 ARM Limited
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 #include "cpu/o3/probe/elastic_trace.hh"
40 #include "base/callback.hh"
41 #include "base/output.hh"
42 #include "base/trace.hh"
43 #include "cpu/reg_class.hh"
44 #include "debug/ElasticTrace.hh"
45 #include "mem/packet.hh"
47 ElasticTrace::ElasticTrace(const ElasticTraceParams
* params
)
48 : ProbeListenerObject(params
),
49 regEtraceListenersEvent([this]{ regEtraceListeners(); }, name()),
52 depWindowSize(params
->depWindowSize
),
53 dataTraceStream(nullptr),
54 instTraceStream(nullptr),
55 startTraceInst(params
->startTraceInst
),
57 traceVirtAddr(params
->traceVirtAddr
)
59 cpu
= dynamic_cast<FullO3CPU
<O3CPUImpl
>*>(params
->manager
);
60 fatal_if(!cpu
, "Manager of %s is not of type O3CPU and thus does not "\
61 "support dependency tracing.\n", name());
63 fatal_if(depWindowSize
== 0, "depWindowSize parameter must be non-zero. "\
64 "Recommended size is 3x ROB size in the O3CPU.\n");
66 fatal_if(cpu
->numThreads
> 1, "numThreads = %i, %s supports tracing for"\
67 "single-threaded workload only", cpu
->numThreads
, name());
68 // Initialize the protobuf output stream
69 fatal_if(params
->instFetchTraceFile
== "", "Assign instruction fetch "\
70 "trace file path to instFetchTraceFile");
71 fatal_if(params
->dataDepTraceFile
== "", "Assign data dependency "\
72 "trace file path to dataDepTraceFile");
73 std::string filename
= simout
.resolve(name() + "." +
74 params
->instFetchTraceFile
);
75 instTraceStream
= new ProtoOutputStream(filename
);
76 filename
= simout
.resolve(name() + "." + params
->dataDepTraceFile
);
77 dataTraceStream
= new ProtoOutputStream(filename
);
78 // Create a protobuf message for the header and write it to the stream
79 ProtoMessage::PacketHeader inst_pkt_header
;
80 inst_pkt_header
.set_obj_id(name());
81 inst_pkt_header
.set_tick_freq(SimClock::Frequency
);
82 instTraceStream
->write(inst_pkt_header
);
83 // Create a protobuf message for the header and write it to
85 ProtoMessage::InstDepRecordHeader data_rec_header
;
86 data_rec_header
.set_obj_id(name());
87 data_rec_header
.set_tick_freq(SimClock::Frequency
);
88 data_rec_header
.set_window_size(depWindowSize
);
89 dataTraceStream
->write(data_rec_header
);
90 // Register a callback to flush trace records and close the output streams.
91 Callback
* cb
= new MakeCallback
<ElasticTrace
,
92 &ElasticTrace::flushTraces
>(this);
93 registerExitCallback(cb
);
97 ElasticTrace::regProbeListeners()
99 inform("@%llu: regProbeListeners() called, startTraceInst = %llu",
100 curTick(), startTraceInst
);
101 if (startTraceInst
== 0) {
102 // If we want to start tracing from the start of the simulation,
103 // register all elastic trace probes now.
104 regEtraceListeners();
106 // Schedule an event to register all elastic trace probes when
107 // specified no. of instructions are committed.
108 cpu
->getContext(0)->scheduleInstCountEvent(
109 ®EtraceListenersEvent
, startTraceInst
);
114 ElasticTrace::regEtraceListeners()
116 assert(!allProbesReg
);
117 inform("@%llu: No. of instructions committed = %llu, registering elastic"
118 " probe listeners", curTick(), cpu
->numSimulatedInsts());
119 // Create new listeners: provide method to be called upon a notify() for
121 listeners
.push_back(new ProbeListenerArg
<ElasticTrace
, RequestPtr
>(this,
122 "FetchRequest", &ElasticTrace::fetchReqTrace
));
123 listeners
.push_back(new ProbeListenerArg
<ElasticTrace
,
124 DynInstConstPtr
>(this, "Execute",
125 &ElasticTrace::recordExecTick
));
126 listeners
.push_back(new ProbeListenerArg
<ElasticTrace
,
127 DynInstConstPtr
>(this, "ToCommit",
128 &ElasticTrace::recordToCommTick
));
129 listeners
.push_back(new ProbeListenerArg
<ElasticTrace
,
130 DynInstConstPtr
>(this, "Rename",
131 &ElasticTrace::updateRegDep
));
132 listeners
.push_back(new ProbeListenerArg
<ElasticTrace
, SeqNumRegPair
>(this,
133 "SquashInRename", &ElasticTrace::removeRegDepMapEntry
));
134 listeners
.push_back(new ProbeListenerArg
<ElasticTrace
,
135 DynInstConstPtr
>(this, "Squash",
136 &ElasticTrace::addSquashedInst
));
137 listeners
.push_back(new ProbeListenerArg
<ElasticTrace
,
138 DynInstConstPtr
>(this, "Commit",
139 &ElasticTrace::addCommittedInst
));
144 ElasticTrace::fetchReqTrace(const RequestPtr
&req
)
147 DPRINTFR(ElasticTrace
, "Fetch Req %i,(%lli,%lli,%lli),%i,%i,%lli\n",
149 req
->getPC(), req
->getVaddr(), req
->getPaddr(),
150 req
->getFlags(), req
->getSize(), curTick());
152 // Create a protobuf message including the request fields necessary to
153 // recreate the request in the TraceCPU.
154 ProtoMessage::Packet inst_fetch_pkt
;
155 inst_fetch_pkt
.set_tick(curTick());
156 inst_fetch_pkt
.set_cmd(MemCmd::ReadReq
);
157 inst_fetch_pkt
.set_pc(req
->getPC());
158 inst_fetch_pkt
.set_flags(req
->getFlags());
159 inst_fetch_pkt
.set_addr(req
->getPaddr());
160 inst_fetch_pkt
.set_size(req
->getSize());
161 // Write the message to the stream.
162 instTraceStream
->write(inst_fetch_pkt
);
166 ElasticTrace::recordExecTick(const DynInstConstPtr
& dyn_inst
)
169 // In a corner case, a retired instruction is propagated backward to the
170 // IEW instruction queue to handle some side-channel information. But we
171 // must not process an instruction again. So we test the sequence number
172 // against the lastClearedSeqNum and skip adding the instruction for such
174 if (dyn_inst
->seqNum
<= lastClearedSeqNum
) {
175 DPRINTFR(ElasticTrace
, "[sn:%lli] Ignoring in execute as instruction \
176 has already retired (mostly squashed)", dyn_inst
->seqNum
);
177 // Do nothing as program has proceeded and this inst has been
178 // propagated backwards to handle something.
182 DPRINTFR(ElasticTrace
, "[sn:%lli] Execute Tick = %i\n", dyn_inst
->seqNum
,
184 // Either the execution info object will already exist if this
185 // instruction had a register dependency recorded in the rename probe
186 // listener before entering execute stage or it will not exist and will
187 // need to be created here.
188 InstExecInfo
* exec_info_ptr
;
189 auto itr_exec_info
= tempStore
.find(dyn_inst
->seqNum
);
190 if (itr_exec_info
!= tempStore
.end()) {
191 exec_info_ptr
= itr_exec_info
->second
;
193 exec_info_ptr
= new InstExecInfo
;
194 tempStore
[dyn_inst
->seqNum
] = exec_info_ptr
;
197 exec_info_ptr
->executeTick
= curTick();
198 maxTempStoreSize
= std::max(tempStore
.size(),
199 (std::size_t)maxTempStoreSize
.value());
203 ElasticTrace::recordToCommTick(const DynInstConstPtr
& dyn_inst
)
205 // If tracing has just been enabled then the instruction at this stage of
206 // execution is far enough that we cannot gather info about its past like
207 // the tick it started execution. Simply return until we see an instruction
208 // that is found in the tempStore.
209 auto itr_exec_info
= tempStore
.find(dyn_inst
->seqNum
);
210 if (itr_exec_info
== tempStore
.end()) {
211 DPRINTFR(ElasticTrace
, "recordToCommTick: [sn:%lli] Not in temp store,"
212 " skipping.\n", dyn_inst
->seqNum
);
216 DPRINTFR(ElasticTrace
, "[sn:%lli] To Commit Tick = %i\n", dyn_inst
->seqNum
,
218 InstExecInfo
* exec_info_ptr
= itr_exec_info
->second
;
219 exec_info_ptr
->toCommitTick
= curTick();
224 ElasticTrace::updateRegDep(const DynInstConstPtr
& dyn_inst
)
226 // Get the sequence number of the instruction
227 InstSeqNum seq_num
= dyn_inst
->seqNum
;
229 assert(dyn_inst
->seqNum
> lastClearedSeqNum
);
231 // Since this is the first probe activated in the pipeline, create
232 // a new execution info object to track this instruction as it
233 // progresses through the pipeline.
234 InstExecInfo
* exec_info_ptr
= new InstExecInfo
;
235 tempStore
[seq_num
] = exec_info_ptr
;
237 // Loop through the source registers and look up the dependency map. If
238 // the source register entry is found in the dependency map, add a
239 // dependency on the last writer.
240 int8_t max_regs
= dyn_inst
->numSrcRegs();
241 for (int src_idx
= 0; src_idx
< max_regs
; src_idx
++) {
243 const RegId
& src_reg
= dyn_inst
->srcRegIdx(src_idx
);
244 if (!src_reg
.isMiscReg() &&
245 !src_reg
.isZeroReg()) {
246 // Get the physical register index of the i'th source register.
247 PhysRegIdPtr phys_src_reg
= dyn_inst
->renamedSrcRegIdx(src_idx
);
248 DPRINTFR(ElasticTrace
, "[sn:%lli] Check map for src reg"
249 " %i (%s)\n", seq_num
,
250 phys_src_reg
->flatIndex(), phys_src_reg
->className());
251 auto itr_writer
= physRegDepMap
.find(phys_src_reg
->flatIndex());
252 if (itr_writer
!= physRegDepMap
.end()) {
253 InstSeqNum last_writer
= itr_writer
->second
;
254 // Additionally the dependency distance is kept less than the
255 // window size parameter to limit the memory allocation to
256 // nodes in the graph. If the window were tending to infinite
257 // we would have to load a large number of node objects during
259 if (seq_num
- last_writer
< depWindowSize
) {
260 // Record a physical register dependency.
261 exec_info_ptr
->physRegDepSet
.insert(last_writer
);
269 // Loop through the destination registers of this instruction and update
270 // the physical register dependency map for last writers to registers.
271 max_regs
= dyn_inst
->numDestRegs();
272 for (int dest_idx
= 0; dest_idx
< max_regs
; dest_idx
++) {
273 // For data dependency tracking the register must be an int, float or
274 // CC register and not a Misc register.
275 const RegId
& dest_reg
= dyn_inst
->destRegIdx(dest_idx
);
276 if (!dest_reg
.isMiscReg() &&
277 !dest_reg
.isZeroReg()) {
278 // Get the physical register index of the i'th destination
280 PhysRegIdPtr phys_dest_reg
= dyn_inst
->renamedDestRegIdx(dest_idx
);
281 DPRINTFR(ElasticTrace
, "[sn:%lli] Update map for dest reg"
282 " %i (%s)\n", seq_num
, phys_dest_reg
->flatIndex(),
283 dest_reg
.className());
284 physRegDepMap
[phys_dest_reg
->flatIndex()] = seq_num
;
287 maxPhysRegDepMapSize
= std::max(physRegDepMap
.size(),
288 (std::size_t)maxPhysRegDepMapSize
.value());
292 ElasticTrace::removeRegDepMapEntry(const SeqNumRegPair
&inst_reg_pair
)
294 DPRINTFR(ElasticTrace
, "Remove Map entry for Reg %i\n",
295 inst_reg_pair
.second
);
296 auto itr_regdep_map
= physRegDepMap
.find(inst_reg_pair
.second
);
297 if (itr_regdep_map
!= physRegDepMap
.end())
298 physRegDepMap
.erase(itr_regdep_map
);
302 ElasticTrace::addSquashedInst(const DynInstConstPtr
& head_inst
)
304 // If the squashed instruction was squashed before being processed by
305 // execute stage then it will not be in the temporary store. In this case
306 // do nothing and return.
307 auto itr_exec_info
= tempStore
.find(head_inst
->seqNum
);
308 if (itr_exec_info
== tempStore
.end())
311 // If there is a squashed load for which a read request was
312 // sent before it got squashed then add it to the trace.
313 DPRINTFR(ElasticTrace
, "Attempt to add squashed inst [sn:%lli]\n",
315 // Get pointer to the execution info object corresponding to the inst.
316 InstExecInfo
* exec_info_ptr
= itr_exec_info
->second
;
317 if (head_inst
->isLoad() && exec_info_ptr
->executeTick
!= MaxTick
&&
318 exec_info_ptr
->toCommitTick
!= MaxTick
&&
319 head_inst
->hasRequest() &&
320 head_inst
->getFault() == NoFault
) {
321 // Add record to depTrace with commit parameter as false.
322 addDepTraceRecord(head_inst
, exec_info_ptr
, false);
324 // As the information contained is no longer needed, remove the execution
325 // info object from the temporary store.
326 clearTempStoreUntil(head_inst
);
330 ElasticTrace::addCommittedInst(const DynInstConstPtr
& head_inst
)
332 DPRINTFR(ElasticTrace
, "Attempt to add committed inst [sn:%lli]\n",
335 // Add the instruction to the depTrace.
336 if (!head_inst
->isNop()) {
338 // If tracing has just been enabled then the instruction at this stage
339 // of execution is far enough that we cannot gather info about its past
340 // like the tick it started execution. Simply return until we see an
341 // instruction that is found in the tempStore.
342 auto itr_temp_store
= tempStore
.find(head_inst
->seqNum
);
343 if (itr_temp_store
== tempStore
.end()) {
344 DPRINTFR(ElasticTrace
, "addCommittedInst: [sn:%lli] Not in temp "
345 "store, skipping.\n", head_inst
->seqNum
);
349 // Get pointer to the execution info object corresponding to the inst.
350 InstExecInfo
* exec_info_ptr
= itr_temp_store
->second
;
351 assert(exec_info_ptr
->executeTick
!= MaxTick
);
352 assert(exec_info_ptr
->toCommitTick
!= MaxTick
);
354 // Check if the instruction had a fault, if it predicated false and
355 // thus previous register values were restored or if it was a
356 // load/store that did not have a request (e.g. when the size of the
357 // request is zero). In all these cases the instruction is set as
358 // executed and is picked up by the commit probe listener. But a
359 // request is not issued and registers are not written. So practically,
360 // skipping these should not hurt as execution would not stall on them.
361 // Alternatively, these could be included merely as a compute node in
362 // the graph. Removing these for now. If correlation accuracy needs to
363 // be improved in future these can be turned into comp nodes at the
364 // cost of bigger traces.
365 if (head_inst
->getFault() != NoFault
) {
366 DPRINTF(ElasticTrace
, "%s [sn:%lli] has faulted so "
367 "skip adding it to the trace\n",
368 (head_inst
->isMemRef() ? "Load/store" : "Comp inst."),
370 } else if (head_inst
->isMemRef() && !head_inst
->hasRequest()) {
371 DPRINTF(ElasticTrace
, "Load/store [sn:%lli] has no request so "
372 "skip adding it to the trace\n", head_inst
->seqNum
);
373 } else if (!head_inst
->readPredicate()) {
374 DPRINTF(ElasticTrace
, "%s [sn:%lli] is predicated false so "
375 "skip adding it to the trace\n",
376 (head_inst
->isMemRef() ? "Load/store" : "Comp inst."),
379 // Add record to depTrace with commit parameter as true.
380 addDepTraceRecord(head_inst
, exec_info_ptr
, true);
383 // As the information contained is no longer needed, remove the execution
384 // info object from the temporary store.
385 clearTempStoreUntil(head_inst
);
389 ElasticTrace::addDepTraceRecord(const DynInstConstPtr
& head_inst
,
390 InstExecInfo
* exec_info_ptr
, bool commit
)
392 // Create a record to assign dynamic intruction related fields.
393 TraceInfo
* new_record
= new TraceInfo
;
394 // Add to map for sequence number look up to retrieve the TraceInfo pointer
395 traceInfoMap
[head_inst
->seqNum
] = new_record
;
397 // Assign fields from the instruction
398 new_record
->instNum
= head_inst
->seqNum
;
399 new_record
->commit
= commit
;
400 new_record
->type
= head_inst
->isLoad() ? Record::LOAD
:
401 (head_inst
->isStore() ? Record::STORE
:
404 // Assign fields for creating a request in case of a load/store
405 new_record
->reqFlags
= head_inst
->memReqFlags
;
406 new_record
->virtAddr
= head_inst
->effAddr
;
407 new_record
->asid
= head_inst
->asid
;
408 new_record
->physAddr
= head_inst
->physEffAddr
;
409 // Currently the tracing does not support split requests.
410 new_record
->size
= head_inst
->effSize
;
411 new_record
->pc
= head_inst
->instAddr();
413 // Assign the timing information stored in the execution info object
414 new_record
->executeTick
= exec_info_ptr
->executeTick
;
415 new_record
->toCommitTick
= exec_info_ptr
->toCommitTick
;
416 new_record
->commitTick
= curTick();
418 // Assign initial values for number of dependents and computational delay
419 new_record
->numDepts
= 0;
420 new_record
->compDelay
= -1;
422 // The physical register dependency set of the first instruction is
423 // empty. Since there are no records in the depTrace at this point, the
424 // case of adding an ROB dependency by using a reverse iterator is not
425 // applicable. Thus, populate the fields of the record corresponding to the
426 // first instruction and return.
427 if (depTrace
.empty()) {
428 // Store the record in depTrace.
429 depTrace
.push_back(new_record
);
430 DPRINTF(ElasticTrace
, "Added first inst record %lli to DepTrace.\n",
431 new_record
->instNum
);
435 // Clear register dependencies for squashed loads as they may be dependent
436 // on squashed instructions and we do not add those to the trace.
437 if (head_inst
->isLoad() && !commit
) {
438 (exec_info_ptr
->physRegDepSet
).clear();
441 // Assign the register dependencies stored in the execution info object
442 std::set
<InstSeqNum
>::const_iterator dep_set_it
;
443 for (dep_set_it
= (exec_info_ptr
->physRegDepSet
).begin();
444 dep_set_it
!= (exec_info_ptr
->physRegDepSet
).end();
446 auto trace_info_itr
= traceInfoMap
.find(*dep_set_it
);
447 if (trace_info_itr
!= traceInfoMap
.end()) {
448 // The register dependency is valid. Assign it and calculate
449 // computational delay
450 new_record
->physRegDepList
.push_back(*dep_set_it
);
451 DPRINTF(ElasticTrace
, "Inst %lli has register dependency on "
452 "%lli\n", new_record
->instNum
, *dep_set_it
);
453 TraceInfo
* reg_dep
= trace_info_itr
->second
;
455 compDelayPhysRegDep(reg_dep
, new_record
);
458 // The instruction that this has a register dependency on was
459 // not added to the trace because of one of the following
460 // 1. it was an instruction that had a fault
461 // 2. it was an instruction that was predicated false and
462 // previous register values were restored
463 // 3. it was load/store that did not have a request (e.g. when
464 // the size of the request is zero but this may not be a fault)
465 // In all these cases the instruction is set as executed and is
466 // picked up by the commit probe listener. But a request is not
467 // issued and registers are not written to in these cases.
468 DPRINTF(ElasticTrace
, "Inst %lli has register dependency on "
469 "%lli is skipped\n",new_record
->instNum
, *dep_set_it
);
473 // Check for and assign an ROB dependency in addition to register
474 // dependency before adding the record to the trace.
475 // As stores have to commit in order a store is dependent on the last
476 // committed load/store. This is recorded in the ROB dependency.
477 if (head_inst
->isStore()) {
478 // Look up store-after-store order dependency
479 updateCommitOrderDep(new_record
, false);
480 // Look up store-after-load order dependency
481 updateCommitOrderDep(new_record
, true);
484 // In case a node is dependency-free or its dependency got discarded
485 // because it was outside the window, it is marked ready in the ROB at the
486 // time of issue. A request is sent as soon as possible. To model this, a
487 // node is assigned an issue order dependency on a committed instruction
488 // that completed earlier than it. This is done to avoid the problem of
489 // determining the issue times of such dependency-free nodes during replay
490 // which could lead to too much parallelism, thinking conservatively.
491 if (new_record
->robDepList
.empty() && new_record
->physRegDepList
.empty()) {
492 updateIssueOrderDep(new_record
);
495 // Store the record in depTrace.
496 depTrace
.push_back(new_record
);
497 DPRINTF(ElasticTrace
, "Added %s inst %lli to DepTrace.\n",
498 (commit
? "committed" : "squashed"), new_record
->instNum
);
500 // To process the number of records specified by depWindowSize in the
501 // forward direction, the depTrace must have twice as many records
502 // to check for dependencies.
503 if (depTrace
.size() == 2 * depWindowSize
) {
505 DPRINTF(ElasticTrace
, "Writing out trace...\n");
507 // Write out the records which have been processed to the trace
508 // and remove them from the depTrace.
509 writeDepTrace(depWindowSize
);
511 // After the first window, writeDepTrace() must check for valid
518 ElasticTrace::updateCommitOrderDep(TraceInfo
* new_record
,
519 bool find_load_not_store
)
521 assert(new_record
->isStore());
522 // Iterate in reverse direction to search for the last committed
523 // load/store that completed earlier than the new record
524 depTraceRevItr
from_itr(depTrace
.end());
525 depTraceRevItr
until_itr(depTrace
.begin());
526 TraceInfo
* past_record
= *from_itr
;
527 uint32_t num_go_back
= 0;
529 // The execution time of this store is when it is sent, that is committed
530 Tick execute_tick
= curTick();
531 // Search for store-after-load or store-after-store order dependency
532 while (num_go_back
< depWindowSize
&& from_itr
!= until_itr
) {
533 if (find_load_not_store
) {
534 // Check if previous inst is a load completed earlier by comparing
536 if (hasLoadCompleted(past_record
, execute_tick
)) {
537 // Assign rob dependency and calculate the computational delay
538 assignRobDep(past_record
, new_record
);
543 // Check if previous inst is a store sent earlier by comparing with
545 if (hasStoreCommitted(past_record
, execute_tick
)) {
546 // Assign rob dependency and calculate the computational delay
547 assignRobDep(past_record
, new_record
);
553 past_record
= *from_itr
;
559 ElasticTrace::updateIssueOrderDep(TraceInfo
* new_record
)
561 // Interate in reverse direction to search for the last committed
562 // record that completed earlier than the new record
563 depTraceRevItr
from_itr(depTrace
.end());
564 depTraceRevItr
until_itr(depTrace
.begin());
565 TraceInfo
* past_record
= *from_itr
;
567 uint32_t num_go_back
= 0;
568 Tick execute_tick
= 0;
570 if (new_record
->isLoad()) {
571 // The execution time of a load is when a request is sent
572 execute_tick
= new_record
->executeTick
;
573 ++numIssueOrderDepLoads
;
574 } else if (new_record
->isStore()) {
575 // The execution time of a store is when it is sent, i.e. committed
576 execute_tick
= curTick();
577 ++numIssueOrderDepStores
;
579 // The execution time of a non load/store is when it completes
580 execute_tick
= new_record
->toCommitTick
;
581 ++numIssueOrderDepOther
;
584 // We search if this record has an issue order dependency on a past record.
585 // Once we find it, we update both the new record and the record it depends
587 while (num_go_back
< depWindowSize
&& from_itr
!= until_itr
) {
588 // Check if a previous inst is a load sent earlier, or a store sent
589 // earlier, or a comp inst completed earlier by comparing with execute
591 if (hasLoadBeenSent(past_record
, execute_tick
) ||
592 hasStoreCommitted(past_record
, execute_tick
) ||
593 hasCompCompleted(past_record
, execute_tick
)) {
594 // Assign rob dependency and calculate the computational delay
595 assignRobDep(past_record
, new_record
);
599 past_record
= *from_itr
;
605 ElasticTrace::assignRobDep(TraceInfo
* past_record
, TraceInfo
* new_record
) {
606 DPRINTF(ElasticTrace
, "%s %lli has ROB dependency on %lli\n",
607 new_record
->typeToStr(), new_record
->instNum
,
608 past_record
->instNum
);
609 // Add dependency on past record
610 new_record
->robDepList
.push_back(past_record
->instNum
);
611 // Update new_record's compute delay with respect to the past record
612 compDelayRob(past_record
, new_record
);
613 // Increment number of dependents of the past record
614 ++(past_record
->numDepts
);
615 // Update stat to log max number of dependents
616 maxNumDependents
= std::max(past_record
->numDepts
,
617 (uint32_t)maxNumDependents
.value());
621 ElasticTrace::hasStoreCommitted(TraceInfo
* past_record
,
622 Tick execute_tick
) const
624 return (past_record
->isStore() && past_record
->commitTick
<= execute_tick
);
628 ElasticTrace::hasLoadCompleted(TraceInfo
* past_record
,
629 Tick execute_tick
) const
631 return(past_record
->isLoad() && past_record
->commit
&&
632 past_record
->toCommitTick
<= execute_tick
);
636 ElasticTrace::hasLoadBeenSent(TraceInfo
* past_record
,
637 Tick execute_tick
) const
639 // Check if previous inst is a load sent earlier than this
640 return (past_record
->isLoad() && past_record
->commit
&&
641 past_record
->executeTick
<= execute_tick
);
645 ElasticTrace::hasCompCompleted(TraceInfo
* past_record
,
646 Tick execute_tick
) const
648 return(past_record
->isComp() && past_record
->toCommitTick
<= execute_tick
);
652 ElasticTrace::clearTempStoreUntil(const DynInstConstPtr
& head_inst
)
654 // Clear from temp store starting with the execution info object
655 // corresponding the head_inst and continue clearing by decrementing the
656 // sequence number until the last cleared sequence number.
657 InstSeqNum temp_sn
= (head_inst
->seqNum
);
658 while (temp_sn
> lastClearedSeqNum
) {
659 auto itr_exec_info
= tempStore
.find(temp_sn
);
660 if (itr_exec_info
!= tempStore
.end()) {
661 InstExecInfo
* exec_info_ptr
= itr_exec_info
->second
;
662 // Free allocated memory for the info object
663 delete exec_info_ptr
;
664 // Remove entry from temporary store
665 tempStore
.erase(itr_exec_info
);
669 // Update the last cleared sequence number to that of the head_inst
670 lastClearedSeqNum
= head_inst
->seqNum
;
674 ElasticTrace::compDelayRob(TraceInfo
* past_record
, TraceInfo
* new_record
)
676 // The computation delay is the delay between the completion tick of the
677 // inst. pointed to by past_record and the execution tick of its dependent
678 // inst. pointed to by new_record.
679 int64_t comp_delay
= -1;
680 Tick execution_tick
= 0, completion_tick
= 0;
682 DPRINTF(ElasticTrace
, "Seq num %lli has ROB dependency on seq num %lli.\n",
683 new_record
->instNum
, past_record
->instNum
);
685 // Get the tick when the node is executed as per the modelling of
687 execution_tick
= new_record
->getExecuteTick();
689 if (past_record
->isLoad()) {
690 if (new_record
->isStore()) {
691 completion_tick
= past_record
->toCommitTick
;
693 completion_tick
= past_record
->executeTick
;
695 } else if (past_record
->isStore()) {
696 completion_tick
= past_record
->commitTick
;
697 } else if (past_record
->isComp()){
698 completion_tick
= past_record
->toCommitTick
;
700 assert(execution_tick
>= completion_tick
);
701 comp_delay
= execution_tick
- completion_tick
;
703 DPRINTF(ElasticTrace
, "Computational delay is %lli - %lli = %lli\n",
704 execution_tick
, completion_tick
, comp_delay
);
706 // Assign the computational delay with respect to the dependency which
707 // completes the latest.
708 if (new_record
->compDelay
== -1)
709 new_record
->compDelay
= comp_delay
;
711 new_record
->compDelay
= std::min(comp_delay
, new_record
->compDelay
);
712 DPRINTF(ElasticTrace
, "Final computational delay = %lli.\n",
713 new_record
->compDelay
);
717 ElasticTrace::compDelayPhysRegDep(TraceInfo
* past_record
,
718 TraceInfo
* new_record
)
720 // The computation delay is the delay between the completion tick of the
721 // inst. pointed to by past_record and the execution tick of its dependent
722 // inst. pointed to by new_record.
723 int64_t comp_delay
= -1;
724 Tick execution_tick
= 0, completion_tick
= 0;
726 DPRINTF(ElasticTrace
, "Seq. num %lli has register dependency on seq. num"
727 " %lli.\n", new_record
->instNum
, past_record
->instNum
);
729 // Get the tick when the node is executed as per the modelling of
731 execution_tick
= new_record
->getExecuteTick();
733 // When there is a physical register dependency on an instruction, the
734 // completion tick of that instruction is when it wrote to the register,
735 // that is toCommitTick. In case, of a store updating a destination
736 // register, this is approximated to commitTick instead
737 if (past_record
->isStore()) {
738 completion_tick
= past_record
->commitTick
;
740 completion_tick
= past_record
->toCommitTick
;
742 assert(execution_tick
>= completion_tick
);
743 comp_delay
= execution_tick
- completion_tick
;
744 DPRINTF(ElasticTrace
, "Computational delay is %lli - %lli = %lli\n",
745 execution_tick
, completion_tick
, comp_delay
);
747 // Assign the computational delay with respect to the dependency which
748 // completes the latest.
749 if (new_record
->compDelay
== -1)
750 new_record
->compDelay
= comp_delay
;
752 new_record
->compDelay
= std::min(comp_delay
, new_record
->compDelay
);
753 DPRINTF(ElasticTrace
, "Final computational delay = %lli.\n",
754 new_record
->compDelay
);
758 ElasticTrace::TraceInfo::getExecuteTick() const
761 // Execution tick for a load instruction is when the request was sent,
762 // that is executeTick.
764 } else if (isStore()) {
765 // Execution tick for a store instruction is when the request was sent,
766 // that is commitTick.
769 // Execution tick for a non load/store instruction is when the register
770 // value was written to, that is commitTick.
776 ElasticTrace::writeDepTrace(uint32_t num_to_write
)
778 // Write the trace with fields as follows:
779 // Instruction sequence number
780 // If instruction was a load
781 // If instruction was a store
782 // If instruction has addr
783 // If instruction has size
784 // If instruction has flags
785 // List of order dependencies - optional, repeated
786 // Computational delay with respect to last completed dependency
787 // List of physical register RAW dependencies - optional, repeated
788 // Weight of a node equal to no. of filtered nodes before it - optional
789 uint16_t num_filtered_nodes
= 0;
790 depTraceItr
dep_trace_itr(depTrace
.begin());
791 depTraceItr dep_trace_itr_start
= dep_trace_itr
;
792 while (num_to_write
> 0) {
793 TraceInfo
* temp_ptr
= *dep_trace_itr
;
794 assert(temp_ptr
->type
!= Record::INVALID
);
795 // If no node dependends on a comp node then there is no reason to
796 // track the comp node in the dependency graph. We filter out such
797 // nodes but count them and add a weight field to the subsequent node
798 // that we do include in the trace.
799 if (!temp_ptr
->isComp() || temp_ptr
->numDepts
!= 0) {
800 DPRINTFR(ElasticTrace
, "Instruction with seq. num %lli "
801 "is as follows:\n", temp_ptr
->instNum
);
802 if (temp_ptr
->isLoad() || temp_ptr
->isStore()) {
803 DPRINTFR(ElasticTrace
, "\tis a %s\n", temp_ptr
->typeToStr());
804 DPRINTFR(ElasticTrace
, "\thas a request with phys addr %i, "
805 "size %i, flags %i\n", temp_ptr
->physAddr
,
806 temp_ptr
->size
, temp_ptr
->reqFlags
);
808 DPRINTFR(ElasticTrace
, "\tis a %s\n", temp_ptr
->typeToStr());
810 if (firstWin
&& temp_ptr
->compDelay
== -1) {
811 if (temp_ptr
->isLoad()) {
812 temp_ptr
->compDelay
= temp_ptr
->executeTick
;
813 } else if (temp_ptr
->isStore()) {
814 temp_ptr
->compDelay
= temp_ptr
->commitTick
;
816 temp_ptr
->compDelay
= temp_ptr
->toCommitTick
;
819 assert(temp_ptr
->compDelay
!= -1);
820 DPRINTFR(ElasticTrace
, "\thas computational delay %lli\n",
821 temp_ptr
->compDelay
);
823 // Create a protobuf message for the dependency record
824 ProtoMessage::InstDepRecord dep_pkt
;
825 dep_pkt
.set_seq_num(temp_ptr
->instNum
);
826 dep_pkt
.set_type(temp_ptr
->type
);
827 dep_pkt
.set_pc(temp_ptr
->pc
);
828 if (temp_ptr
->isLoad() || temp_ptr
->isStore()) {
829 dep_pkt
.set_flags(temp_ptr
->reqFlags
);
830 dep_pkt
.set_p_addr(temp_ptr
->physAddr
);
831 // If tracing of virtual addresses is enabled, set the optional
834 dep_pkt
.set_v_addr(temp_ptr
->virtAddr
);
835 dep_pkt
.set_asid(temp_ptr
->asid
);
837 dep_pkt
.set_size(temp_ptr
->size
);
839 dep_pkt
.set_comp_delay(temp_ptr
->compDelay
);
840 if (temp_ptr
->robDepList
.empty()) {
841 DPRINTFR(ElasticTrace
, "\thas no order (rob) dependencies\n");
843 while (!temp_ptr
->robDepList
.empty()) {
844 DPRINTFR(ElasticTrace
, "\thas order (rob) dependency on %lli\n",
845 temp_ptr
->robDepList
.front());
846 dep_pkt
.add_rob_dep(temp_ptr
->robDepList
.front());
847 temp_ptr
->robDepList
.pop_front();
849 if (temp_ptr
->physRegDepList
.empty()) {
850 DPRINTFR(ElasticTrace
, "\thas no register dependencies\n");
852 while (!temp_ptr
->physRegDepList
.empty()) {
853 DPRINTFR(ElasticTrace
, "\thas register dependency on %lli\n",
854 temp_ptr
->physRegDepList
.front());
855 dep_pkt
.add_reg_dep(temp_ptr
->physRegDepList
.front());
856 temp_ptr
->physRegDepList
.pop_front();
858 if (num_filtered_nodes
!= 0) {
859 // Set the weight of this node as the no. of filtered nodes
860 // between this node and the last node that we wrote to output
861 // stream. The weight will be used during replay to model ROB
862 // occupancy of filtered nodes.
863 dep_pkt
.set_weight(num_filtered_nodes
);
864 num_filtered_nodes
= 0;
866 // Write the message to the protobuf output stream
867 dataTraceStream
->write(dep_pkt
);
869 // Don't write the node to the trace but note that we have filtered
872 ++num_filtered_nodes
;
875 traceInfoMap
.erase(temp_ptr
->instNum
);
879 depTrace
.erase(dep_trace_itr_start
, dep_trace_itr
);
883 ElasticTrace::regStats() {
884 ProbeListenerObject::regStats();
886 using namespace Stats
;
888 .name(name() + ".numRegDep")
889 .desc("Number of register dependencies recorded during tracing")
893 .name(name() + ".numOrderDepStores")
894 .desc("Number of commit order (rob) dependencies for a store recorded"
895 " on a past load/store during tracing")
898 numIssueOrderDepLoads
899 .name(name() + ".numIssueOrderDepLoads")
900 .desc("Number of loads that got assigned issue order dependency"
901 " because they were dependency-free")
904 numIssueOrderDepStores
905 .name(name() + ".numIssueOrderDepStores")
906 .desc("Number of stores that got assigned issue order dependency"
907 " because they were dependency-free")
910 numIssueOrderDepOther
911 .name(name() + ".numIssueOrderDepOther")
912 .desc("Number of non load/store insts that got assigned issue order"
913 " dependency because they were dependency-free")
917 .name(name() + ".numFilteredNodes")
918 .desc("No. of nodes filtered out before writing the output trace")
922 .name(name() + ".maxNumDependents")
923 .desc("Maximum number or dependents on any instruction")
927 .name(name() + ".maxTempStoreSize")
928 .desc("Maximum size of the temporary store during the run")
932 .name(name() + ".maxPhysRegDepMapSize")
933 .desc("Maximum size of register dependency map")
938 ElasticTrace::TraceInfo::typeToStr() const
940 return Record::RecordType_Name(type
);
944 ElasticTrace::name() const
946 return ProbeListenerObject::name();
950 ElasticTrace::flushTraces()
952 // Write to trace all records in the depTrace.
953 writeDepTrace(depTrace
.size());
954 // Delete the stream objects
955 delete dataTraceStream
;
956 delete instTraceStream
;
960 ElasticTraceParams::create()
962 return new ElasticTrace(this);