misc: merge branch 'release-staging-v19.0.0.0' into develop
[gem5.git] / src / cpu / o3 / probe / elastic_trace.cc
1 /*
2 * Copyright (c) 2013 - 2015 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 #include "cpu/o3/probe/elastic_trace.hh"
39
40 #include "base/callback.hh"
41 #include "base/output.hh"
42 #include "base/trace.hh"
43 #include "cpu/reg_class.hh"
44 #include "debug/ElasticTrace.hh"
45 #include "mem/packet.hh"
46
47 ElasticTrace::ElasticTrace(const ElasticTraceParams* params)
48 : ProbeListenerObject(params),
49 regEtraceListenersEvent([this]{ regEtraceListeners(); }, name()),
50 firstWin(true),
51 lastClearedSeqNum(0),
52 depWindowSize(params->depWindowSize),
53 dataTraceStream(nullptr),
54 instTraceStream(nullptr),
55 startTraceInst(params->startTraceInst),
56 allProbesReg(false),
57 traceVirtAddr(params->traceVirtAddr)
58 {
59 cpu = dynamic_cast<FullO3CPU<O3CPUImpl>*>(params->manager);
60 fatal_if(!cpu, "Manager of %s is not of type O3CPU and thus does not "\
61 "support dependency tracing.\n", name());
62
63 fatal_if(depWindowSize == 0, "depWindowSize parameter must be non-zero. "\
64 "Recommended size is 3x ROB size in the O3CPU.\n");
65
66 fatal_if(cpu->numThreads > 1, "numThreads = %i, %s supports tracing for"\
67 "single-threaded workload only", cpu->numThreads, name());
68 // Initialize the protobuf output stream
69 fatal_if(params->instFetchTraceFile == "", "Assign instruction fetch "\
70 "trace file path to instFetchTraceFile");
71 fatal_if(params->dataDepTraceFile == "", "Assign data dependency "\
72 "trace file path to dataDepTraceFile");
73 std::string filename = simout.resolve(name() + "." +
74 params->instFetchTraceFile);
75 instTraceStream = new ProtoOutputStream(filename);
76 filename = simout.resolve(name() + "." + params->dataDepTraceFile);
77 dataTraceStream = new ProtoOutputStream(filename);
78 // Create a protobuf message for the header and write it to the stream
79 ProtoMessage::PacketHeader inst_pkt_header;
80 inst_pkt_header.set_obj_id(name());
81 inst_pkt_header.set_tick_freq(SimClock::Frequency);
82 instTraceStream->write(inst_pkt_header);
83 // Create a protobuf message for the header and write it to
84 // the stream
85 ProtoMessage::InstDepRecordHeader data_rec_header;
86 data_rec_header.set_obj_id(name());
87 data_rec_header.set_tick_freq(SimClock::Frequency);
88 data_rec_header.set_window_size(depWindowSize);
89 dataTraceStream->write(data_rec_header);
90 // Register a callback to flush trace records and close the output streams.
91 Callback* cb = new MakeCallback<ElasticTrace,
92 &ElasticTrace::flushTraces>(this);
93 registerExitCallback(cb);
94 }
95
96 void
97 ElasticTrace::regProbeListeners()
98 {
99 inform("@%llu: regProbeListeners() called, startTraceInst = %llu",
100 curTick(), startTraceInst);
101 if (startTraceInst == 0) {
102 // If we want to start tracing from the start of the simulation,
103 // register all elastic trace probes now.
104 regEtraceListeners();
105 } else {
106 // Schedule an event to register all elastic trace probes when
107 // specified no. of instructions are committed.
108 cpu->getContext(0)->scheduleInstCountEvent(
109 &regEtraceListenersEvent, startTraceInst);
110 }
111 }
112
113 void
114 ElasticTrace::regEtraceListeners()
115 {
116 assert(!allProbesReg);
117 inform("@%llu: No. of instructions committed = %llu, registering elastic"
118 " probe listeners", curTick(), cpu->numSimulatedInsts());
119 // Create new listeners: provide method to be called upon a notify() for
120 // each probe point.
121 listeners.push_back(new ProbeListenerArg<ElasticTrace, RequestPtr>(this,
122 "FetchRequest", &ElasticTrace::fetchReqTrace));
123 listeners.push_back(new ProbeListenerArg<ElasticTrace,
124 DynInstConstPtr>(this, "Execute",
125 &ElasticTrace::recordExecTick));
126 listeners.push_back(new ProbeListenerArg<ElasticTrace,
127 DynInstConstPtr>(this, "ToCommit",
128 &ElasticTrace::recordToCommTick));
129 listeners.push_back(new ProbeListenerArg<ElasticTrace,
130 DynInstConstPtr>(this, "Rename",
131 &ElasticTrace::updateRegDep));
132 listeners.push_back(new ProbeListenerArg<ElasticTrace, SeqNumRegPair>(this,
133 "SquashInRename", &ElasticTrace::removeRegDepMapEntry));
134 listeners.push_back(new ProbeListenerArg<ElasticTrace,
135 DynInstConstPtr>(this, "Squash",
136 &ElasticTrace::addSquashedInst));
137 listeners.push_back(new ProbeListenerArg<ElasticTrace,
138 DynInstConstPtr>(this, "Commit",
139 &ElasticTrace::addCommittedInst));
140 allProbesReg = true;
141 }
142
143 void
144 ElasticTrace::fetchReqTrace(const RequestPtr &req)
145 {
146
147 DPRINTFR(ElasticTrace, "Fetch Req %i,(%lli,%lli,%lli),%i,%i,%lli\n",
148 (MemCmd::ReadReq),
149 req->getPC(), req->getVaddr(), req->getPaddr(),
150 req->getFlags(), req->getSize(), curTick());
151
152 // Create a protobuf message including the request fields necessary to
153 // recreate the request in the TraceCPU.
154 ProtoMessage::Packet inst_fetch_pkt;
155 inst_fetch_pkt.set_tick(curTick());
156 inst_fetch_pkt.set_cmd(MemCmd::ReadReq);
157 inst_fetch_pkt.set_pc(req->getPC());
158 inst_fetch_pkt.set_flags(req->getFlags());
159 inst_fetch_pkt.set_addr(req->getPaddr());
160 inst_fetch_pkt.set_size(req->getSize());
161 // Write the message to the stream.
162 instTraceStream->write(inst_fetch_pkt);
163 }
164
165 void
166 ElasticTrace::recordExecTick(const DynInstConstPtr& dyn_inst)
167 {
168
169 // In a corner case, a retired instruction is propagated backward to the
170 // IEW instruction queue to handle some side-channel information. But we
171 // must not process an instruction again. So we test the sequence number
172 // against the lastClearedSeqNum and skip adding the instruction for such
173 // corner cases.
174 if (dyn_inst->seqNum <= lastClearedSeqNum) {
175 DPRINTFR(ElasticTrace, "[sn:%lli] Ignoring in execute as instruction \
176 has already retired (mostly squashed)", dyn_inst->seqNum);
177 // Do nothing as program has proceeded and this inst has been
178 // propagated backwards to handle something.
179 return;
180 }
181
182 DPRINTFR(ElasticTrace, "[sn:%lli] Execute Tick = %i\n", dyn_inst->seqNum,
183 curTick());
184 // Either the execution info object will already exist if this
185 // instruction had a register dependency recorded in the rename probe
186 // listener before entering execute stage or it will not exist and will
187 // need to be created here.
188 InstExecInfo* exec_info_ptr;
189 auto itr_exec_info = tempStore.find(dyn_inst->seqNum);
190 if (itr_exec_info != tempStore.end()) {
191 exec_info_ptr = itr_exec_info->second;
192 } else {
193 exec_info_ptr = new InstExecInfo;
194 tempStore[dyn_inst->seqNum] = exec_info_ptr;
195 }
196
197 exec_info_ptr->executeTick = curTick();
198 maxTempStoreSize = std::max(tempStore.size(),
199 (std::size_t)maxTempStoreSize.value());
200 }
201
202 void
203 ElasticTrace::recordToCommTick(const DynInstConstPtr& dyn_inst)
204 {
205 // If tracing has just been enabled then the instruction at this stage of
206 // execution is far enough that we cannot gather info about its past like
207 // the tick it started execution. Simply return until we see an instruction
208 // that is found in the tempStore.
209 auto itr_exec_info = tempStore.find(dyn_inst->seqNum);
210 if (itr_exec_info == tempStore.end()) {
211 DPRINTFR(ElasticTrace, "recordToCommTick: [sn:%lli] Not in temp store,"
212 " skipping.\n", dyn_inst->seqNum);
213 return;
214 }
215
216 DPRINTFR(ElasticTrace, "[sn:%lli] To Commit Tick = %i\n", dyn_inst->seqNum,
217 curTick());
218 InstExecInfo* exec_info_ptr = itr_exec_info->second;
219 exec_info_ptr->toCommitTick = curTick();
220
221 }
222
223 void
224 ElasticTrace::updateRegDep(const DynInstConstPtr& dyn_inst)
225 {
226 // Get the sequence number of the instruction
227 InstSeqNum seq_num = dyn_inst->seqNum;
228
229 assert(dyn_inst->seqNum > lastClearedSeqNum);
230
231 // Since this is the first probe activated in the pipeline, create
232 // a new execution info object to track this instruction as it
233 // progresses through the pipeline.
234 InstExecInfo* exec_info_ptr = new InstExecInfo;
235 tempStore[seq_num] = exec_info_ptr;
236
237 // Loop through the source registers and look up the dependency map. If
238 // the source register entry is found in the dependency map, add a
239 // dependency on the last writer.
240 int8_t max_regs = dyn_inst->numSrcRegs();
241 for (int src_idx = 0; src_idx < max_regs; src_idx++) {
242
243 const RegId& src_reg = dyn_inst->srcRegIdx(src_idx);
244 if (!src_reg.isMiscReg() &&
245 !src_reg.isZeroReg()) {
246 // Get the physical register index of the i'th source register.
247 PhysRegIdPtr phys_src_reg = dyn_inst->renamedSrcRegIdx(src_idx);
248 DPRINTFR(ElasticTrace, "[sn:%lli] Check map for src reg"
249 " %i (%s)\n", seq_num,
250 phys_src_reg->flatIndex(), phys_src_reg->className());
251 auto itr_writer = physRegDepMap.find(phys_src_reg->flatIndex());
252 if (itr_writer != physRegDepMap.end()) {
253 InstSeqNum last_writer = itr_writer->second;
254 // Additionally the dependency distance is kept less than the
255 // window size parameter to limit the memory allocation to
256 // nodes in the graph. If the window were tending to infinite
257 // we would have to load a large number of node objects during
258 // replay.
259 if (seq_num - last_writer < depWindowSize) {
260 // Record a physical register dependency.
261 exec_info_ptr->physRegDepSet.insert(last_writer);
262 }
263 }
264
265 }
266
267 }
268
269 // Loop through the destination registers of this instruction and update
270 // the physical register dependency map for last writers to registers.
271 max_regs = dyn_inst->numDestRegs();
272 for (int dest_idx = 0; dest_idx < max_regs; dest_idx++) {
273 // For data dependency tracking the register must be an int, float or
274 // CC register and not a Misc register.
275 const RegId& dest_reg = dyn_inst->destRegIdx(dest_idx);
276 if (!dest_reg.isMiscReg() &&
277 !dest_reg.isZeroReg()) {
278 // Get the physical register index of the i'th destination
279 // register.
280 PhysRegIdPtr phys_dest_reg = dyn_inst->renamedDestRegIdx(dest_idx);
281 DPRINTFR(ElasticTrace, "[sn:%lli] Update map for dest reg"
282 " %i (%s)\n", seq_num, phys_dest_reg->flatIndex(),
283 dest_reg.className());
284 physRegDepMap[phys_dest_reg->flatIndex()] = seq_num;
285 }
286 }
287 maxPhysRegDepMapSize = std::max(physRegDepMap.size(),
288 (std::size_t)maxPhysRegDepMapSize.value());
289 }
290
291 void
292 ElasticTrace::removeRegDepMapEntry(const SeqNumRegPair &inst_reg_pair)
293 {
294 DPRINTFR(ElasticTrace, "Remove Map entry for Reg %i\n",
295 inst_reg_pair.second);
296 auto itr_regdep_map = physRegDepMap.find(inst_reg_pair.second);
297 if (itr_regdep_map != physRegDepMap.end())
298 physRegDepMap.erase(itr_regdep_map);
299 }
300
301 void
302 ElasticTrace::addSquashedInst(const DynInstConstPtr& head_inst)
303 {
304 // If the squashed instruction was squashed before being processed by
305 // execute stage then it will not be in the temporary store. In this case
306 // do nothing and return.
307 auto itr_exec_info = tempStore.find(head_inst->seqNum);
308 if (itr_exec_info == tempStore.end())
309 return;
310
311 // If there is a squashed load for which a read request was
312 // sent before it got squashed then add it to the trace.
313 DPRINTFR(ElasticTrace, "Attempt to add squashed inst [sn:%lli]\n",
314 head_inst->seqNum);
315 // Get pointer to the execution info object corresponding to the inst.
316 InstExecInfo* exec_info_ptr = itr_exec_info->second;
317 if (head_inst->isLoad() && exec_info_ptr->executeTick != MaxTick &&
318 exec_info_ptr->toCommitTick != MaxTick &&
319 head_inst->hasRequest() &&
320 head_inst->getFault() == NoFault) {
321 // Add record to depTrace with commit parameter as false.
322 addDepTraceRecord(head_inst, exec_info_ptr, false);
323 }
324 // As the information contained is no longer needed, remove the execution
325 // info object from the temporary store.
326 clearTempStoreUntil(head_inst);
327 }
328
329 void
330 ElasticTrace::addCommittedInst(const DynInstConstPtr& head_inst)
331 {
332 DPRINTFR(ElasticTrace, "Attempt to add committed inst [sn:%lli]\n",
333 head_inst->seqNum);
334
335 // Add the instruction to the depTrace.
336 if (!head_inst->isNop()) {
337
338 // If tracing has just been enabled then the instruction at this stage
339 // of execution is far enough that we cannot gather info about its past
340 // like the tick it started execution. Simply return until we see an
341 // instruction that is found in the tempStore.
342 auto itr_temp_store = tempStore.find(head_inst->seqNum);
343 if (itr_temp_store == tempStore.end()) {
344 DPRINTFR(ElasticTrace, "addCommittedInst: [sn:%lli] Not in temp "
345 "store, skipping.\n", head_inst->seqNum);
346 return;
347 }
348
349 // Get pointer to the execution info object corresponding to the inst.
350 InstExecInfo* exec_info_ptr = itr_temp_store->second;
351 assert(exec_info_ptr->executeTick != MaxTick);
352 assert(exec_info_ptr->toCommitTick != MaxTick);
353
354 // Check if the instruction had a fault, if it predicated false and
355 // thus previous register values were restored or if it was a
356 // load/store that did not have a request (e.g. when the size of the
357 // request is zero). In all these cases the instruction is set as
358 // executed and is picked up by the commit probe listener. But a
359 // request is not issued and registers are not written. So practically,
360 // skipping these should not hurt as execution would not stall on them.
361 // Alternatively, these could be included merely as a compute node in
362 // the graph. Removing these for now. If correlation accuracy needs to
363 // be improved in future these can be turned into comp nodes at the
364 // cost of bigger traces.
365 if (head_inst->getFault() != NoFault) {
366 DPRINTF(ElasticTrace, "%s [sn:%lli] has faulted so "
367 "skip adding it to the trace\n",
368 (head_inst->isMemRef() ? "Load/store" : "Comp inst."),
369 head_inst->seqNum);
370 } else if (head_inst->isMemRef() && !head_inst->hasRequest()) {
371 DPRINTF(ElasticTrace, "Load/store [sn:%lli] has no request so "
372 "skip adding it to the trace\n", head_inst->seqNum);
373 } else if (!head_inst->readPredicate()) {
374 DPRINTF(ElasticTrace, "%s [sn:%lli] is predicated false so "
375 "skip adding it to the trace\n",
376 (head_inst->isMemRef() ? "Load/store" : "Comp inst."),
377 head_inst->seqNum);
378 } else {
379 // Add record to depTrace with commit parameter as true.
380 addDepTraceRecord(head_inst, exec_info_ptr, true);
381 }
382 }
383 // As the information contained is no longer needed, remove the execution
384 // info object from the temporary store.
385 clearTempStoreUntil(head_inst);
386 }
387
388 void
389 ElasticTrace::addDepTraceRecord(const DynInstConstPtr& head_inst,
390 InstExecInfo* exec_info_ptr, bool commit)
391 {
392 // Create a record to assign dynamic intruction related fields.
393 TraceInfo* new_record = new TraceInfo;
394 // Add to map for sequence number look up to retrieve the TraceInfo pointer
395 traceInfoMap[head_inst->seqNum] = new_record;
396
397 // Assign fields from the instruction
398 new_record->instNum = head_inst->seqNum;
399 new_record->commit = commit;
400 new_record->type = head_inst->isLoad() ? Record::LOAD :
401 (head_inst->isStore() ? Record::STORE :
402 Record::COMP);
403
404 // Assign fields for creating a request in case of a load/store
405 new_record->reqFlags = head_inst->memReqFlags;
406 new_record->virtAddr = head_inst->effAddr;
407 new_record->asid = head_inst->asid;
408 new_record->physAddr = head_inst->physEffAddr;
409 // Currently the tracing does not support split requests.
410 new_record->size = head_inst->effSize;
411 new_record->pc = head_inst->instAddr();
412
413 // Assign the timing information stored in the execution info object
414 new_record->executeTick = exec_info_ptr->executeTick;
415 new_record->toCommitTick = exec_info_ptr->toCommitTick;
416 new_record->commitTick = curTick();
417
418 // Assign initial values for number of dependents and computational delay
419 new_record->numDepts = 0;
420 new_record->compDelay = -1;
421
422 // The physical register dependency set of the first instruction is
423 // empty. Since there are no records in the depTrace at this point, the
424 // case of adding an ROB dependency by using a reverse iterator is not
425 // applicable. Thus, populate the fields of the record corresponding to the
426 // first instruction and return.
427 if (depTrace.empty()) {
428 // Store the record in depTrace.
429 depTrace.push_back(new_record);
430 DPRINTF(ElasticTrace, "Added first inst record %lli to DepTrace.\n",
431 new_record->instNum);
432 return;
433 }
434
435 // Clear register dependencies for squashed loads as they may be dependent
436 // on squashed instructions and we do not add those to the trace.
437 if (head_inst->isLoad() && !commit) {
438 (exec_info_ptr->physRegDepSet).clear();
439 }
440
441 // Assign the register dependencies stored in the execution info object
442 std::set<InstSeqNum>::const_iterator dep_set_it;
443 for (dep_set_it = (exec_info_ptr->physRegDepSet).begin();
444 dep_set_it != (exec_info_ptr->physRegDepSet).end();
445 ++dep_set_it) {
446 auto trace_info_itr = traceInfoMap.find(*dep_set_it);
447 if (trace_info_itr != traceInfoMap.end()) {
448 // The register dependency is valid. Assign it and calculate
449 // computational delay
450 new_record->physRegDepList.push_back(*dep_set_it);
451 DPRINTF(ElasticTrace, "Inst %lli has register dependency on "
452 "%lli\n", new_record->instNum, *dep_set_it);
453 TraceInfo* reg_dep = trace_info_itr->second;
454 reg_dep->numDepts++;
455 compDelayPhysRegDep(reg_dep, new_record);
456 ++numRegDep;
457 } else {
458 // The instruction that this has a register dependency on was
459 // not added to the trace because of one of the following
460 // 1. it was an instruction that had a fault
461 // 2. it was an instruction that was predicated false and
462 // previous register values were restored
463 // 3. it was load/store that did not have a request (e.g. when
464 // the size of the request is zero but this may not be a fault)
465 // In all these cases the instruction is set as executed and is
466 // picked up by the commit probe listener. But a request is not
467 // issued and registers are not written to in these cases.
468 DPRINTF(ElasticTrace, "Inst %lli has register dependency on "
469 "%lli is skipped\n",new_record->instNum, *dep_set_it);
470 }
471 }
472
473 // Check for and assign an ROB dependency in addition to register
474 // dependency before adding the record to the trace.
475 // As stores have to commit in order a store is dependent on the last
476 // committed load/store. This is recorded in the ROB dependency.
477 if (head_inst->isStore()) {
478 // Look up store-after-store order dependency
479 updateCommitOrderDep(new_record, false);
480 // Look up store-after-load order dependency
481 updateCommitOrderDep(new_record, true);
482 }
483
484 // In case a node is dependency-free or its dependency got discarded
485 // because it was outside the window, it is marked ready in the ROB at the
486 // time of issue. A request is sent as soon as possible. To model this, a
487 // node is assigned an issue order dependency on a committed instruction
488 // that completed earlier than it. This is done to avoid the problem of
489 // determining the issue times of such dependency-free nodes during replay
490 // which could lead to too much parallelism, thinking conservatively.
491 if (new_record->robDepList.empty() && new_record->physRegDepList.empty()) {
492 updateIssueOrderDep(new_record);
493 }
494
495 // Store the record in depTrace.
496 depTrace.push_back(new_record);
497 DPRINTF(ElasticTrace, "Added %s inst %lli to DepTrace.\n",
498 (commit ? "committed" : "squashed"), new_record->instNum);
499
500 // To process the number of records specified by depWindowSize in the
501 // forward direction, the depTrace must have twice as many records
502 // to check for dependencies.
503 if (depTrace.size() == 2 * depWindowSize) {
504
505 DPRINTF(ElasticTrace, "Writing out trace...\n");
506
507 // Write out the records which have been processed to the trace
508 // and remove them from the depTrace.
509 writeDepTrace(depWindowSize);
510
511 // After the first window, writeDepTrace() must check for valid
512 // compDelay.
513 firstWin = false;
514 }
515 }
516
517 void
518 ElasticTrace::updateCommitOrderDep(TraceInfo* new_record,
519 bool find_load_not_store)
520 {
521 assert(new_record->isStore());
522 // Iterate in reverse direction to search for the last committed
523 // load/store that completed earlier than the new record
524 depTraceRevItr from_itr(depTrace.end());
525 depTraceRevItr until_itr(depTrace.begin());
526 TraceInfo* past_record = *from_itr;
527 uint32_t num_go_back = 0;
528
529 // The execution time of this store is when it is sent, that is committed
530 Tick execute_tick = curTick();
531 // Search for store-after-load or store-after-store order dependency
532 while (num_go_back < depWindowSize && from_itr != until_itr) {
533 if (find_load_not_store) {
534 // Check if previous inst is a load completed earlier by comparing
535 // with execute tick
536 if (hasLoadCompleted(past_record, execute_tick)) {
537 // Assign rob dependency and calculate the computational delay
538 assignRobDep(past_record, new_record);
539 ++numOrderDepStores;
540 return;
541 }
542 } else {
543 // Check if previous inst is a store sent earlier by comparing with
544 // execute tick
545 if (hasStoreCommitted(past_record, execute_tick)) {
546 // Assign rob dependency and calculate the computational delay
547 assignRobDep(past_record, new_record);
548 ++numOrderDepStores;
549 return;
550 }
551 }
552 ++from_itr;
553 past_record = *from_itr;
554 ++num_go_back;
555 }
556 }
557
558 void
559 ElasticTrace::updateIssueOrderDep(TraceInfo* new_record)
560 {
561 // Interate in reverse direction to search for the last committed
562 // record that completed earlier than the new record
563 depTraceRevItr from_itr(depTrace.end());
564 depTraceRevItr until_itr(depTrace.begin());
565 TraceInfo* past_record = *from_itr;
566
567 uint32_t num_go_back = 0;
568 Tick execute_tick = 0;
569
570 if (new_record->isLoad()) {
571 // The execution time of a load is when a request is sent
572 execute_tick = new_record->executeTick;
573 ++numIssueOrderDepLoads;
574 } else if (new_record->isStore()) {
575 // The execution time of a store is when it is sent, i.e. committed
576 execute_tick = curTick();
577 ++numIssueOrderDepStores;
578 } else {
579 // The execution time of a non load/store is when it completes
580 execute_tick = new_record->toCommitTick;
581 ++numIssueOrderDepOther;
582 }
583
584 // We search if this record has an issue order dependency on a past record.
585 // Once we find it, we update both the new record and the record it depends
586 // on and return.
587 while (num_go_back < depWindowSize && from_itr != until_itr) {
588 // Check if a previous inst is a load sent earlier, or a store sent
589 // earlier, or a comp inst completed earlier by comparing with execute
590 // tick
591 if (hasLoadBeenSent(past_record, execute_tick) ||
592 hasStoreCommitted(past_record, execute_tick) ||
593 hasCompCompleted(past_record, execute_tick)) {
594 // Assign rob dependency and calculate the computational delay
595 assignRobDep(past_record, new_record);
596 return;
597 }
598 ++from_itr;
599 past_record = *from_itr;
600 ++num_go_back;
601 }
602 }
603
604 void
605 ElasticTrace::assignRobDep(TraceInfo* past_record, TraceInfo* new_record) {
606 DPRINTF(ElasticTrace, "%s %lli has ROB dependency on %lli\n",
607 new_record->typeToStr(), new_record->instNum,
608 past_record->instNum);
609 // Add dependency on past record
610 new_record->robDepList.push_back(past_record->instNum);
611 // Update new_record's compute delay with respect to the past record
612 compDelayRob(past_record, new_record);
613 // Increment number of dependents of the past record
614 ++(past_record->numDepts);
615 // Update stat to log max number of dependents
616 maxNumDependents = std::max(past_record->numDepts,
617 (uint32_t)maxNumDependents.value());
618 }
619
620 bool
621 ElasticTrace::hasStoreCommitted(TraceInfo* past_record,
622 Tick execute_tick) const
623 {
624 return (past_record->isStore() && past_record->commitTick <= execute_tick);
625 }
626
627 bool
628 ElasticTrace::hasLoadCompleted(TraceInfo* past_record,
629 Tick execute_tick) const
630 {
631 return(past_record->isLoad() && past_record->commit &&
632 past_record->toCommitTick <= execute_tick);
633 }
634
635 bool
636 ElasticTrace::hasLoadBeenSent(TraceInfo* past_record,
637 Tick execute_tick) const
638 {
639 // Check if previous inst is a load sent earlier than this
640 return (past_record->isLoad() && past_record->commit &&
641 past_record->executeTick <= execute_tick);
642 }
643
644 bool
645 ElasticTrace::hasCompCompleted(TraceInfo* past_record,
646 Tick execute_tick) const
647 {
648 return(past_record->isComp() && past_record->toCommitTick <= execute_tick);
649 }
650
651 void
652 ElasticTrace::clearTempStoreUntil(const DynInstConstPtr& head_inst)
653 {
654 // Clear from temp store starting with the execution info object
655 // corresponding the head_inst and continue clearing by decrementing the
656 // sequence number until the last cleared sequence number.
657 InstSeqNum temp_sn = (head_inst->seqNum);
658 while (temp_sn > lastClearedSeqNum) {
659 auto itr_exec_info = tempStore.find(temp_sn);
660 if (itr_exec_info != tempStore.end()) {
661 InstExecInfo* exec_info_ptr = itr_exec_info->second;
662 // Free allocated memory for the info object
663 delete exec_info_ptr;
664 // Remove entry from temporary store
665 tempStore.erase(itr_exec_info);
666 }
667 temp_sn--;
668 }
669 // Update the last cleared sequence number to that of the head_inst
670 lastClearedSeqNum = head_inst->seqNum;
671 }
672
673 void
674 ElasticTrace::compDelayRob(TraceInfo* past_record, TraceInfo* new_record)
675 {
676 // The computation delay is the delay between the completion tick of the
677 // inst. pointed to by past_record and the execution tick of its dependent
678 // inst. pointed to by new_record.
679 int64_t comp_delay = -1;
680 Tick execution_tick = 0, completion_tick = 0;
681
682 DPRINTF(ElasticTrace, "Seq num %lli has ROB dependency on seq num %lli.\n",
683 new_record->instNum, past_record->instNum);
684
685 // Get the tick when the node is executed as per the modelling of
686 // computation delay
687 execution_tick = new_record->getExecuteTick();
688
689 if (past_record->isLoad()) {
690 if (new_record->isStore()) {
691 completion_tick = past_record->toCommitTick;
692 } else {
693 completion_tick = past_record->executeTick;
694 }
695 } else if (past_record->isStore()) {
696 completion_tick = past_record->commitTick;
697 } else if (past_record->isComp()){
698 completion_tick = past_record->toCommitTick;
699 }
700 assert(execution_tick >= completion_tick);
701 comp_delay = execution_tick - completion_tick;
702
703 DPRINTF(ElasticTrace, "Computational delay is %lli - %lli = %lli\n",
704 execution_tick, completion_tick, comp_delay);
705
706 // Assign the computational delay with respect to the dependency which
707 // completes the latest.
708 if (new_record->compDelay == -1)
709 new_record->compDelay = comp_delay;
710 else
711 new_record->compDelay = std::min(comp_delay, new_record->compDelay);
712 DPRINTF(ElasticTrace, "Final computational delay = %lli.\n",
713 new_record->compDelay);
714 }
715
716 void
717 ElasticTrace::compDelayPhysRegDep(TraceInfo* past_record,
718 TraceInfo* new_record)
719 {
720 // The computation delay is the delay between the completion tick of the
721 // inst. pointed to by past_record and the execution tick of its dependent
722 // inst. pointed to by new_record.
723 int64_t comp_delay = -1;
724 Tick execution_tick = 0, completion_tick = 0;
725
726 DPRINTF(ElasticTrace, "Seq. num %lli has register dependency on seq. num"
727 " %lli.\n", new_record->instNum, past_record->instNum);
728
729 // Get the tick when the node is executed as per the modelling of
730 // computation delay
731 execution_tick = new_record->getExecuteTick();
732
733 // When there is a physical register dependency on an instruction, the
734 // completion tick of that instruction is when it wrote to the register,
735 // that is toCommitTick. In case, of a store updating a destination
736 // register, this is approximated to commitTick instead
737 if (past_record->isStore()) {
738 completion_tick = past_record->commitTick;
739 } else {
740 completion_tick = past_record->toCommitTick;
741 }
742 assert(execution_tick >= completion_tick);
743 comp_delay = execution_tick - completion_tick;
744 DPRINTF(ElasticTrace, "Computational delay is %lli - %lli = %lli\n",
745 execution_tick, completion_tick, comp_delay);
746
747 // Assign the computational delay with respect to the dependency which
748 // completes the latest.
749 if (new_record->compDelay == -1)
750 new_record->compDelay = comp_delay;
751 else
752 new_record->compDelay = std::min(comp_delay, new_record->compDelay);
753 DPRINTF(ElasticTrace, "Final computational delay = %lli.\n",
754 new_record->compDelay);
755 }
756
757 Tick
758 ElasticTrace::TraceInfo::getExecuteTick() const
759 {
760 if (isLoad()) {
761 // Execution tick for a load instruction is when the request was sent,
762 // that is executeTick.
763 return executeTick;
764 } else if (isStore()) {
765 // Execution tick for a store instruction is when the request was sent,
766 // that is commitTick.
767 return commitTick;
768 } else {
769 // Execution tick for a non load/store instruction is when the register
770 // value was written to, that is commitTick.
771 return toCommitTick;
772 }
773 }
774
775 void
776 ElasticTrace::writeDepTrace(uint32_t num_to_write)
777 {
778 // Write the trace with fields as follows:
779 // Instruction sequence number
780 // If instruction was a load
781 // If instruction was a store
782 // If instruction has addr
783 // If instruction has size
784 // If instruction has flags
785 // List of order dependencies - optional, repeated
786 // Computational delay with respect to last completed dependency
787 // List of physical register RAW dependencies - optional, repeated
788 // Weight of a node equal to no. of filtered nodes before it - optional
789 uint16_t num_filtered_nodes = 0;
790 depTraceItr dep_trace_itr(depTrace.begin());
791 depTraceItr dep_trace_itr_start = dep_trace_itr;
792 while (num_to_write > 0) {
793 TraceInfo* temp_ptr = *dep_trace_itr;
794 assert(temp_ptr->type != Record::INVALID);
795 // If no node dependends on a comp node then there is no reason to
796 // track the comp node in the dependency graph. We filter out such
797 // nodes but count them and add a weight field to the subsequent node
798 // that we do include in the trace.
799 if (!temp_ptr->isComp() || temp_ptr->numDepts != 0) {
800 DPRINTFR(ElasticTrace, "Instruction with seq. num %lli "
801 "is as follows:\n", temp_ptr->instNum);
802 if (temp_ptr->isLoad() || temp_ptr->isStore()) {
803 DPRINTFR(ElasticTrace, "\tis a %s\n", temp_ptr->typeToStr());
804 DPRINTFR(ElasticTrace, "\thas a request with phys addr %i, "
805 "size %i, flags %i\n", temp_ptr->physAddr,
806 temp_ptr->size, temp_ptr->reqFlags);
807 } else {
808 DPRINTFR(ElasticTrace, "\tis a %s\n", temp_ptr->typeToStr());
809 }
810 if (firstWin && temp_ptr->compDelay == -1) {
811 if (temp_ptr->isLoad()) {
812 temp_ptr->compDelay = temp_ptr->executeTick;
813 } else if (temp_ptr->isStore()) {
814 temp_ptr->compDelay = temp_ptr->commitTick;
815 } else {
816 temp_ptr->compDelay = temp_ptr->toCommitTick;
817 }
818 }
819 assert(temp_ptr->compDelay != -1);
820 DPRINTFR(ElasticTrace, "\thas computational delay %lli\n",
821 temp_ptr->compDelay);
822
823 // Create a protobuf message for the dependency record
824 ProtoMessage::InstDepRecord dep_pkt;
825 dep_pkt.set_seq_num(temp_ptr->instNum);
826 dep_pkt.set_type(temp_ptr->type);
827 dep_pkt.set_pc(temp_ptr->pc);
828 if (temp_ptr->isLoad() || temp_ptr->isStore()) {
829 dep_pkt.set_flags(temp_ptr->reqFlags);
830 dep_pkt.set_p_addr(temp_ptr->physAddr);
831 // If tracing of virtual addresses is enabled, set the optional
832 // field for it
833 if (traceVirtAddr) {
834 dep_pkt.set_v_addr(temp_ptr->virtAddr);
835 dep_pkt.set_asid(temp_ptr->asid);
836 }
837 dep_pkt.set_size(temp_ptr->size);
838 }
839 dep_pkt.set_comp_delay(temp_ptr->compDelay);
840 if (temp_ptr->robDepList.empty()) {
841 DPRINTFR(ElasticTrace, "\thas no order (rob) dependencies\n");
842 }
843 while (!temp_ptr->robDepList.empty()) {
844 DPRINTFR(ElasticTrace, "\thas order (rob) dependency on %lli\n",
845 temp_ptr->robDepList.front());
846 dep_pkt.add_rob_dep(temp_ptr->robDepList.front());
847 temp_ptr->robDepList.pop_front();
848 }
849 if (temp_ptr->physRegDepList.empty()) {
850 DPRINTFR(ElasticTrace, "\thas no register dependencies\n");
851 }
852 while (!temp_ptr->physRegDepList.empty()) {
853 DPRINTFR(ElasticTrace, "\thas register dependency on %lli\n",
854 temp_ptr->physRegDepList.front());
855 dep_pkt.add_reg_dep(temp_ptr->physRegDepList.front());
856 temp_ptr->physRegDepList.pop_front();
857 }
858 if (num_filtered_nodes != 0) {
859 // Set the weight of this node as the no. of filtered nodes
860 // between this node and the last node that we wrote to output
861 // stream. The weight will be used during replay to model ROB
862 // occupancy of filtered nodes.
863 dep_pkt.set_weight(num_filtered_nodes);
864 num_filtered_nodes = 0;
865 }
866 // Write the message to the protobuf output stream
867 dataTraceStream->write(dep_pkt);
868 } else {
869 // Don't write the node to the trace but note that we have filtered
870 // out a node.
871 ++numFilteredNodes;
872 ++num_filtered_nodes;
873 }
874 dep_trace_itr++;
875 traceInfoMap.erase(temp_ptr->instNum);
876 delete temp_ptr;
877 num_to_write--;
878 }
879 depTrace.erase(dep_trace_itr_start, dep_trace_itr);
880 }
881
882 void
883 ElasticTrace::regStats() {
884 ProbeListenerObject::regStats();
885
886 using namespace Stats;
887 numRegDep
888 .name(name() + ".numRegDep")
889 .desc("Number of register dependencies recorded during tracing")
890 ;
891
892 numOrderDepStores
893 .name(name() + ".numOrderDepStores")
894 .desc("Number of commit order (rob) dependencies for a store recorded"
895 " on a past load/store during tracing")
896 ;
897
898 numIssueOrderDepLoads
899 .name(name() + ".numIssueOrderDepLoads")
900 .desc("Number of loads that got assigned issue order dependency"
901 " because they were dependency-free")
902 ;
903
904 numIssueOrderDepStores
905 .name(name() + ".numIssueOrderDepStores")
906 .desc("Number of stores that got assigned issue order dependency"
907 " because they were dependency-free")
908 ;
909
910 numIssueOrderDepOther
911 .name(name() + ".numIssueOrderDepOther")
912 .desc("Number of non load/store insts that got assigned issue order"
913 " dependency because they were dependency-free")
914 ;
915
916 numFilteredNodes
917 .name(name() + ".numFilteredNodes")
918 .desc("No. of nodes filtered out before writing the output trace")
919 ;
920
921 maxNumDependents
922 .name(name() + ".maxNumDependents")
923 .desc("Maximum number or dependents on any instruction")
924 ;
925
926 maxTempStoreSize
927 .name(name() + ".maxTempStoreSize")
928 .desc("Maximum size of the temporary store during the run")
929 ;
930
931 maxPhysRegDepMapSize
932 .name(name() + ".maxPhysRegDepMapSize")
933 .desc("Maximum size of register dependency map")
934 ;
935 }
936
937 const std::string&
938 ElasticTrace::TraceInfo::typeToStr() const
939 {
940 return Record::RecordType_Name(type);
941 }
942
943 const std::string
944 ElasticTrace::name() const
945 {
946 return ProbeListenerObject::name();
947 }
948
949 void
950 ElasticTrace::flushTraces()
951 {
952 // Write to trace all records in the depTrace.
953 writeDepTrace(depTrace.size());
954 // Delete the stream objects
955 delete dataTraceStream;
956 delete instTraceStream;
957 }
958
959 ElasticTrace*
960 ElasticTraceParams::create()
961 {
962 return new ElasticTrace(this);
963 }