cpu-o3: MemDepUnit tracks load-acquire/store-release
[gem5.git] / src / cpu / minor / execute.cc
1 /*
2 * Copyright (c) 2013-2014,2018-2019 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 #include "cpu/minor/execute.hh"
39
40 #include "arch/locked_mem.hh"
41 #include "arch/registers.hh"
42 #include "arch/utility.hh"
43 #include "cpu/minor/cpu.hh"
44 #include "cpu/minor/exec_context.hh"
45 #include "cpu/minor/fetch1.hh"
46 #include "cpu/minor/lsq.hh"
47 #include "cpu/op_class.hh"
48 #include "debug/Activity.hh"
49 #include "debug/Branch.hh"
50 #include "debug/Drain.hh"
51 #include "debug/MinorExecute.hh"
52 #include "debug/MinorInterrupt.hh"
53 #include "debug/MinorMem.hh"
54 #include "debug/MinorTrace.hh"
55 #include "debug/PCEvent.hh"
56
57 namespace Minor
58 {
59
60 Execute::Execute(const std::string &name_,
61 MinorCPU &cpu_,
62 MinorCPUParams &params,
63 Latch<ForwardInstData>::Output inp_,
64 Latch<BranchData>::Input out_) :
65 Named(name_),
66 inp(inp_),
67 out(out_),
68 cpu(cpu_),
69 issueLimit(params.executeIssueLimit),
70 memoryIssueLimit(params.executeMemoryIssueLimit),
71 commitLimit(params.executeCommitLimit),
72 memoryCommitLimit(params.executeMemoryCommitLimit),
73 processMoreThanOneInput(params.executeCycleInput),
74 fuDescriptions(*params.executeFuncUnits),
75 numFuncUnits(fuDescriptions.funcUnits.size()),
76 setTraceTimeOnCommit(params.executeSetTraceTimeOnCommit),
77 setTraceTimeOnIssue(params.executeSetTraceTimeOnIssue),
78 allowEarlyMemIssue(params.executeAllowEarlyMemoryIssue),
79 noCostFUIndex(fuDescriptions.funcUnits.size() + 1),
80 lsq(name_ + ".lsq", name_ + ".dcache_port",
81 cpu_, *this,
82 params.executeMaxAccessesInMemory,
83 params.executeMemoryWidth,
84 params.executeLSQRequestsQueueSize,
85 params.executeLSQTransfersQueueSize,
86 params.executeLSQStoreBufferSize,
87 params.executeLSQMaxStoreBufferStoresPerCycle),
88 executeInfo(params.numThreads, ExecuteThreadInfo(params.executeCommitLimit)),
89 interruptPriority(0),
90 issuePriority(0),
91 commitPriority(0)
92 {
93 if (commitLimit < 1) {
94 fatal("%s: executeCommitLimit must be >= 1 (%d)\n", name_,
95 commitLimit);
96 }
97
98 if (issueLimit < 1) {
99 fatal("%s: executeCommitLimit must be >= 1 (%d)\n", name_,
100 issueLimit);
101 }
102
103 if (memoryIssueLimit < 1) {
104 fatal("%s: executeMemoryIssueLimit must be >= 1 (%d)\n", name_,
105 memoryIssueLimit);
106 }
107
108 if (memoryCommitLimit > commitLimit) {
109 fatal("%s: executeMemoryCommitLimit (%d) must be <="
110 " executeCommitLimit (%d)\n",
111 name_, memoryCommitLimit, commitLimit);
112 }
113
114 if (params.executeInputBufferSize < 1) {
115 fatal("%s: executeInputBufferSize must be >= 1 (%d)\n", name_,
116 params.executeInputBufferSize);
117 }
118
119 if (params.executeInputBufferSize < 1) {
120 fatal("%s: executeInputBufferSize must be >= 1 (%d)\n", name_,
121 params.executeInputBufferSize);
122 }
123
124 /* This should be large enough to count all the in-FU instructions
125 * which need to be accounted for in the inFlightInsts
126 * queue */
127 unsigned int total_slots = 0;
128
129 /* Make FUPipelines for each MinorFU */
130 for (unsigned int i = 0; i < numFuncUnits; i++) {
131 std::ostringstream fu_name;
132 MinorFU *fu_description = fuDescriptions.funcUnits[i];
133
134 /* Note the total number of instruction slots (for sizing
135 * the inFlightInst queue) and the maximum latency of any FU
136 * (for sizing the activity recorder) */
137 total_slots += fu_description->opLat;
138
139 fu_name << name_ << ".fu." << i;
140
141 FUPipeline *fu = new FUPipeline(fu_name.str(), *fu_description, cpu);
142
143 funcUnits.push_back(fu);
144 }
145
146 /** Check that there is a functional unit for all operation classes */
147 for (int op_class = No_OpClass + 1; op_class < Num_OpClasses; op_class++) {
148 bool found_fu = false;
149 unsigned int fu_index = 0;
150
151 while (fu_index < numFuncUnits && !found_fu)
152 {
153 if (funcUnits[fu_index]->provides(
154 static_cast<OpClass>(op_class)))
155 {
156 found_fu = true;
157 }
158 fu_index++;
159 }
160
161 if (!found_fu) {
162 warn("No functional unit for OpClass %s\n",
163 Enums::OpClassStrings[op_class]);
164 }
165 }
166
167 /* Per-thread structures */
168 for (ThreadID tid = 0; tid < params.numThreads; tid++) {
169 std::string tid_str = std::to_string(tid);
170
171 /* Input Buffers */
172 inputBuffer.push_back(
173 InputBuffer<ForwardInstData>(
174 name_ + ".inputBuffer" + tid_str, "insts",
175 params.executeInputBufferSize));
176
177 /* Scoreboards */
178 scoreboard.push_back(Scoreboard(name_ + ".scoreboard" + tid_str));
179
180 /* In-flight instruction records */
181 executeInfo[tid].inFlightInsts = new Queue<QueuedInst,
182 ReportTraitsAdaptor<QueuedInst> >(
183 name_ + ".inFlightInsts" + tid_str, "insts", total_slots);
184
185 executeInfo[tid].inFUMemInsts = new Queue<QueuedInst,
186 ReportTraitsAdaptor<QueuedInst> >(
187 name_ + ".inFUMemInsts" + tid_str, "insts", total_slots);
188 }
189 }
190
191 const ForwardInstData *
192 Execute::getInput(ThreadID tid)
193 {
194 /* Get a line from the inputBuffer to work with */
195 if (!inputBuffer[tid].empty()) {
196 const ForwardInstData &head = inputBuffer[tid].front();
197
198 return (head.isBubble() ? NULL : &(inputBuffer[tid].front()));
199 } else {
200 return NULL;
201 }
202 }
203
204 void
205 Execute::popInput(ThreadID tid)
206 {
207 if (!inputBuffer[tid].empty())
208 inputBuffer[tid].pop();
209
210 executeInfo[tid].inputIndex = 0;
211 }
212
213 void
214 Execute::tryToBranch(MinorDynInstPtr inst, Fault fault, BranchData &branch)
215 {
216 ThreadContext *thread = cpu.getContext(inst->id.threadId);
217 const TheISA::PCState &pc_before = inst->pc;
218 TheISA::PCState target = thread->pcState();
219
220 /* Force a branch for SerializeAfter/SquashAfter instructions
221 * at the end of micro-op sequence when we're not suspended */
222 bool force_branch = thread->status() != ThreadContext::Suspended &&
223 !inst->isFault() &&
224 inst->isLastOpInInst() &&
225 (inst->staticInst->isSerializeAfter() ||
226 inst->staticInst->isSquashAfter() ||
227 inst->staticInst->isIprAccess());
228
229 DPRINTF(Branch, "tryToBranch before: %s after: %s%s\n",
230 pc_before, target, (force_branch ? " (forcing)" : ""));
231
232 /* Will we change the PC to something other than the next instruction? */
233 bool must_branch = pc_before != target ||
234 fault != NoFault ||
235 force_branch;
236
237 /* The reason for the branch data we're about to generate, set below */
238 BranchData::Reason reason = BranchData::NoBranch;
239
240 if (fault == NoFault)
241 {
242 TheISA::advancePC(target, inst->staticInst);
243 thread->pcState(target);
244
245 DPRINTF(Branch, "Advancing current PC from: %s to: %s\n",
246 pc_before, target);
247 }
248
249 if (inst->predictedTaken && !force_branch) {
250 /* Predicted to branch */
251 if (!must_branch) {
252 /* No branch was taken, change stream to get us back to the
253 * intended PC value */
254 DPRINTF(Branch, "Predicted a branch from 0x%x to 0x%x but"
255 " none happened inst: %s\n",
256 inst->pc.instAddr(), inst->predictedTarget.instAddr(), *inst);
257
258 reason = BranchData::BadlyPredictedBranch;
259 } else if (inst->predictedTarget == target) {
260 /* Branch prediction got the right target, kill the branch and
261 * carry on.
262 * Note that this information to the branch predictor might get
263 * overwritten by a "real" branch during this cycle */
264 DPRINTF(Branch, "Predicted a branch from 0x%x to 0x%x correctly"
265 " inst: %s\n",
266 inst->pc.instAddr(), inst->predictedTarget.instAddr(), *inst);
267
268 reason = BranchData::CorrectlyPredictedBranch;
269 } else {
270 /* Branch prediction got the wrong target */
271 DPRINTF(Branch, "Predicted a branch from 0x%x to 0x%x"
272 " but got the wrong target (actual: 0x%x) inst: %s\n",
273 inst->pc.instAddr(), inst->predictedTarget.instAddr(),
274 target.instAddr(), *inst);
275
276 reason = BranchData::BadlyPredictedBranchTarget;
277 }
278 } else if (must_branch) {
279 /* Unpredicted branch */
280 DPRINTF(Branch, "Unpredicted branch from 0x%x to 0x%x inst: %s\n",
281 inst->pc.instAddr(), target.instAddr(), *inst);
282
283 reason = BranchData::UnpredictedBranch;
284 } else {
285 /* No branch at all */
286 reason = BranchData::NoBranch;
287 }
288
289 updateBranchData(inst->id.threadId, reason, inst, target, branch);
290 }
291
292 void
293 Execute::updateBranchData(
294 ThreadID tid,
295 BranchData::Reason reason,
296 MinorDynInstPtr inst, const TheISA::PCState &target,
297 BranchData &branch)
298 {
299 if (reason != BranchData::NoBranch) {
300 /* Bump up the stream sequence number on a real branch*/
301 if (BranchData::isStreamChange(reason))
302 executeInfo[tid].streamSeqNum++;
303
304 /* Branches (even mis-predictions) don't change the predictionSeqNum,
305 * just the streamSeqNum */
306 branch = BranchData(reason, tid,
307 executeInfo[tid].streamSeqNum,
308 /* Maintaining predictionSeqNum if there's no inst is just a
309 * courtesy and looks better on minorview */
310 (inst->isBubble() ? executeInfo[tid].lastPredictionSeqNum
311 : inst->id.predictionSeqNum),
312 target, inst);
313
314 DPRINTF(Branch, "Branch data signalled: %s\n", branch);
315 }
316 }
317
318 void
319 Execute::handleMemResponse(MinorDynInstPtr inst,
320 LSQ::LSQRequestPtr response, BranchData &branch, Fault &fault)
321 {
322 ThreadID thread_id = inst->id.threadId;
323 ThreadContext *thread = cpu.getContext(thread_id);
324
325 ExecContext context(cpu, *cpu.threads[thread_id], *this, inst);
326
327 PacketPtr packet = response->packet;
328
329 bool is_load = inst->staticInst->isLoad();
330 bool is_store = inst->staticInst->isStore();
331 bool is_atomic = inst->staticInst->isAtomic();
332 bool is_prefetch = inst->staticInst->isDataPrefetch();
333
334 /* If true, the trace's predicate value will be taken from the exec
335 * context predicate, otherwise, it will be set to false */
336 bool use_context_predicate = true;
337
338 if (inst->translationFault != NoFault) {
339 /* Invoke memory faults. */
340 DPRINTF(MinorMem, "Completing fault from DTLB access: %s\n",
341 inst->translationFault->name());
342
343 if (inst->staticInst->isPrefetch()) {
344 DPRINTF(MinorMem, "Not taking fault on prefetch: %s\n",
345 inst->translationFault->name());
346
347 /* Don't assign to fault */
348 } else {
349 /* Take the fault raised during the TLB/memory access */
350 fault = inst->translationFault;
351
352 fault->invoke(thread, inst->staticInst);
353 }
354 } else if (!packet) {
355 DPRINTF(MinorMem, "Completing failed request inst: %s\n",
356 *inst);
357 use_context_predicate = false;
358 if (!context.readMemAccPredicate())
359 inst->staticInst->completeAcc(nullptr, &context, inst->traceData);
360 } else if (packet->isError()) {
361 DPRINTF(MinorMem, "Trying to commit error response: %s\n",
362 *inst);
363
364 fatal("Received error response packet for inst: %s\n", *inst);
365 } else if (is_store || is_load || is_prefetch || is_atomic) {
366 assert(packet);
367
368 DPRINTF(MinorMem, "Memory response inst: %s addr: 0x%x size: %d\n",
369 *inst, packet->getAddr(), packet->getSize());
370
371 if (is_load && packet->getSize() > 0) {
372 DPRINTF(MinorMem, "Memory data[0]: 0x%x\n",
373 static_cast<unsigned int>(packet->getConstPtr<uint8_t>()[0]));
374 }
375
376 /* Complete the memory access instruction */
377 fault = inst->staticInst->completeAcc(packet, &context,
378 inst->traceData);
379
380 if (fault != NoFault) {
381 /* Invoke fault created by instruction completion */
382 DPRINTF(MinorMem, "Fault in memory completeAcc: %s\n",
383 fault->name());
384 fault->invoke(thread, inst->staticInst);
385 } else {
386 /* Stores need to be pushed into the store buffer to finish
387 * them off */
388 if (response->needsToBeSentToStoreBuffer())
389 lsq.sendStoreToStoreBuffer(response);
390 }
391 } else {
392 fatal("There should only ever be reads, "
393 "writes or faults at this point\n");
394 }
395
396 lsq.popResponse(response);
397
398 if (inst->traceData) {
399 inst->traceData->setPredicate((use_context_predicate ?
400 context.readPredicate() : false));
401 }
402
403 doInstCommitAccounting(inst);
404
405 /* Generate output to account for branches */
406 tryToBranch(inst, fault, branch);
407 }
408
409 bool
410 Execute::isInterrupted(ThreadID thread_id) const
411 {
412 return cpu.checkInterrupts(cpu.getContext(thread_id));
413 }
414
415 bool
416 Execute::takeInterrupt(ThreadID thread_id, BranchData &branch)
417 {
418 DPRINTF(MinorInterrupt, "Considering interrupt status from PC: %s\n",
419 cpu.getContext(thread_id)->pcState());
420
421 Fault interrupt = cpu.getInterruptController(thread_id)->getInterrupt
422 (cpu.getContext(thread_id));
423
424 if (interrupt != NoFault) {
425 /* The interrupt *must* set pcState */
426 cpu.getInterruptController(thread_id)->updateIntrInfo
427 (cpu.getContext(thread_id));
428 interrupt->invoke(cpu.getContext(thread_id));
429
430 assert(!lsq.accessesInFlight());
431
432 DPRINTF(MinorInterrupt, "Invoking interrupt: %s to PC: %s\n",
433 interrupt->name(), cpu.getContext(thread_id)->pcState());
434
435 /* Assume that an interrupt *must* cause a branch. Assert this? */
436
437 updateBranchData(thread_id, BranchData::Interrupt,
438 MinorDynInst::bubble(), cpu.getContext(thread_id)->pcState(),
439 branch);
440 }
441
442 return interrupt != NoFault;
443 }
444
445 bool
446 Execute::executeMemRefInst(MinorDynInstPtr inst, BranchData &branch,
447 bool &passed_predicate, Fault &fault)
448 {
449 bool issued = false;
450
451 /* Set to true if the mem op. is issued and sent to the mem system */
452 passed_predicate = false;
453
454 if (!lsq.canRequest()) {
455 /* Not acting on instruction yet as the memory
456 * queues are full */
457 issued = false;
458 } else {
459 ThreadContext *thread = cpu.getContext(inst->id.threadId);
460 TheISA::PCState old_pc = thread->pcState();
461
462 ExecContext context(cpu, *cpu.threads[inst->id.threadId],
463 *this, inst);
464
465 DPRINTF(MinorExecute, "Initiating memRef inst: %s\n", *inst);
466
467 Fault init_fault = inst->staticInst->initiateAcc(&context,
468 inst->traceData);
469
470 if (inst->inLSQ) {
471 if (init_fault != NoFault) {
472 assert(inst->translationFault != NoFault);
473 // Translation faults are dealt with in handleMemResponse()
474 init_fault = NoFault;
475 } else {
476 // If we have a translation fault then it got suppressed by
477 // initateAcc()
478 inst->translationFault = NoFault;
479 }
480 }
481
482 if (init_fault != NoFault) {
483 DPRINTF(MinorExecute, "Fault on memory inst: %s"
484 " initiateAcc: %s\n", *inst, init_fault->name());
485 fault = init_fault;
486 } else {
487 /* Only set this if the instruction passed its
488 * predicate */
489 if (!context.readMemAccPredicate()) {
490 DPRINTF(MinorMem, "No memory access for inst: %s\n", *inst);
491 assert(context.readPredicate());
492 }
493 passed_predicate = context.readPredicate();
494
495 /* Set predicate in tracing */
496 if (inst->traceData)
497 inst->traceData->setPredicate(passed_predicate);
498
499 /* If the instruction didn't pass its predicate
500 * or it is a predicated vector instruction and the
501 * associated predicate register is all-false (and so will not
502 * progress from here) Try to branch to correct and branch
503 * mis-prediction. */
504 if (!inst->inLSQ) {
505 /* Leave it up to commit to handle the fault */
506 lsq.pushFailedRequest(inst);
507 inst->inLSQ = true;
508 }
509 }
510
511 /* Restore thread PC */
512 thread->pcState(old_pc);
513 issued = true;
514 }
515
516 return issued;
517 }
518
519 /** Increment a cyclic buffer index for indices [0, cycle_size-1] */
520 inline unsigned int
521 cyclicIndexInc(unsigned int index, unsigned int cycle_size)
522 {
523 unsigned int ret = index + 1;
524
525 if (ret == cycle_size)
526 ret = 0;
527
528 return ret;
529 }
530
531 /** Decrement a cyclic buffer index for indices [0, cycle_size-1] */
532 inline unsigned int
533 cyclicIndexDec(unsigned int index, unsigned int cycle_size)
534 {
535 int ret = index - 1;
536
537 if (ret < 0)
538 ret = cycle_size - 1;
539
540 return ret;
541 }
542
543 unsigned int
544 Execute::issue(ThreadID thread_id)
545 {
546 const ForwardInstData *insts_in = getInput(thread_id);
547 ExecuteThreadInfo &thread = executeInfo[thread_id];
548
549 /* Early termination if we have no instructions */
550 if (!insts_in)
551 return 0;
552
553 /* Start from the first FU */
554 unsigned int fu_index = 0;
555
556 /* Remains true while instructions are still being issued. If any
557 * instruction fails to issue, this is set to false and we exit issue.
558 * This strictly enforces in-order issue. For other issue behaviours,
559 * a more complicated test in the outer while loop below is needed. */
560 bool issued = true;
561
562 /* Number of insts issues this cycle to check for issueLimit */
563 unsigned num_insts_issued = 0;
564
565 /* Number of memory ops issues this cycle to check for memoryIssueLimit */
566 unsigned num_mem_insts_issued = 0;
567
568 /* Number of instructions discarded this cycle in order to enforce a
569 * discardLimit. @todo, add that parameter? */
570 unsigned num_insts_discarded = 0;
571
572 do {
573 MinorDynInstPtr inst = insts_in->insts[thread.inputIndex];
574 Fault fault = inst->fault;
575 bool discarded = false;
576 bool issued_mem_ref = false;
577
578 if (inst->isBubble()) {
579 /* Skip */
580 issued = true;
581 } else if (cpu.getContext(thread_id)->status() ==
582 ThreadContext::Suspended)
583 {
584 DPRINTF(MinorExecute, "Discarding inst: %s from suspended"
585 " thread\n", *inst);
586
587 issued = true;
588 discarded = true;
589 } else if (inst->id.streamSeqNum != thread.streamSeqNum) {
590 DPRINTF(MinorExecute, "Discarding inst: %s as its stream"
591 " state was unexpected, expected: %d\n",
592 *inst, thread.streamSeqNum);
593 issued = true;
594 discarded = true;
595 } else {
596 /* Try and issue an instruction into an FU, assume we didn't and
597 * fix that in the loop */
598 issued = false;
599
600 /* Try FU from 0 each instruction */
601 fu_index = 0;
602
603 /* Try and issue a single instruction stepping through the
604 * available FUs */
605 do {
606 FUPipeline *fu = funcUnits[fu_index];
607
608 DPRINTF(MinorExecute, "Trying to issue inst: %s to FU: %d\n",
609 *inst, fu_index);
610
611 /* Does the examined fu have the OpClass-related capability
612 * needed to execute this instruction? Faults can always
613 * issue to any FU but probably should just 'live' in the
614 * inFlightInsts queue rather than having an FU. */
615 bool fu_is_capable = (!inst->isFault() ?
616 fu->provides(inst->staticInst->opClass()) : true);
617
618 if (inst->isNoCostInst()) {
619 /* Issue free insts. to a fake numbered FU */
620 fu_index = noCostFUIndex;
621
622 /* And start the countdown on activity to allow
623 * this instruction to get to the end of its FU */
624 cpu.activityRecorder->activity();
625
626 /* Mark the destinations for this instruction as
627 * busy */
628 scoreboard[thread_id].markupInstDests(inst, cpu.curCycle() +
629 Cycles(0), cpu.getContext(thread_id), false);
630
631 DPRINTF(MinorExecute, "Issuing %s to %d\n", inst->id, noCostFUIndex);
632 inst->fuIndex = noCostFUIndex;
633 inst->extraCommitDelay = Cycles(0);
634 inst->extraCommitDelayExpr = NULL;
635
636 /* Push the instruction onto the inFlight queue so
637 * it can be committed in order */
638 QueuedInst fu_inst(inst);
639 thread.inFlightInsts->push(fu_inst);
640
641 issued = true;
642
643 } else if (!fu_is_capable || fu->alreadyPushed()) {
644 /* Skip */
645 if (!fu_is_capable) {
646 DPRINTF(MinorExecute, "Can't issue as FU: %d isn't"
647 " capable\n", fu_index);
648 } else {
649 DPRINTF(MinorExecute, "Can't issue as FU: %d is"
650 " already busy\n", fu_index);
651 }
652 } else if (fu->stalled) {
653 DPRINTF(MinorExecute, "Can't issue inst: %s into FU: %d,"
654 " it's stalled\n",
655 *inst, fu_index);
656 } else if (!fu->canInsert()) {
657 DPRINTF(MinorExecute, "Can't issue inst: %s to busy FU"
658 " for another: %d cycles\n",
659 *inst, fu->cyclesBeforeInsert());
660 } else {
661 MinorFUTiming *timing = (!inst->isFault() ?
662 fu->findTiming(inst->staticInst) : NULL);
663
664 const std::vector<Cycles> *src_latencies =
665 (timing ? &(timing->srcRegsRelativeLats)
666 : NULL);
667
668 const std::vector<bool> *cant_forward_from_fu_indices =
669 &(fu->cantForwardFromFUIndices);
670
671 if (timing && timing->suppress) {
672 DPRINTF(MinorExecute, "Can't issue inst: %s as extra"
673 " decoding is suppressing it\n",
674 *inst);
675 } else if (!scoreboard[thread_id].canInstIssue(inst,
676 src_latencies, cant_forward_from_fu_indices,
677 cpu.curCycle(), cpu.getContext(thread_id)))
678 {
679 DPRINTF(MinorExecute, "Can't issue inst: %s yet\n",
680 *inst);
681 } else {
682 /* Can insert the instruction into this FU */
683 DPRINTF(MinorExecute, "Issuing inst: %s"
684 " into FU %d\n", *inst,
685 fu_index);
686
687 Cycles extra_dest_retire_lat = Cycles(0);
688 TimingExpr *extra_dest_retire_lat_expr = NULL;
689 Cycles extra_assumed_lat = Cycles(0);
690
691 /* Add the extraCommitDelay and extraAssumeLat to
692 * the FU pipeline timings */
693 if (timing) {
694 extra_dest_retire_lat =
695 timing->extraCommitLat;
696 extra_dest_retire_lat_expr =
697 timing->extraCommitLatExpr;
698 extra_assumed_lat =
699 timing->extraAssumedLat;
700 }
701
702 issued_mem_ref = inst->isMemRef();
703
704 QueuedInst fu_inst(inst);
705
706 /* Decorate the inst with FU details */
707 inst->fuIndex = fu_index;
708 inst->extraCommitDelay = extra_dest_retire_lat;
709 inst->extraCommitDelayExpr =
710 extra_dest_retire_lat_expr;
711
712 if (issued_mem_ref) {
713 /* Remember which instruction this memory op
714 * depends on so that initiateAcc can be called
715 * early */
716 if (allowEarlyMemIssue) {
717 inst->instToWaitFor =
718 scoreboard[thread_id].execSeqNumToWaitFor(inst,
719 cpu.getContext(thread_id));
720
721 if (lsq.getLastMemBarrier(thread_id) >
722 inst->instToWaitFor)
723 {
724 DPRINTF(MinorExecute, "A barrier will"
725 " cause a delay in mem ref issue of"
726 " inst: %s until after inst"
727 " %d(exec)\n", *inst,
728 lsq.getLastMemBarrier(thread_id));
729
730 inst->instToWaitFor =
731 lsq.getLastMemBarrier(thread_id);
732 } else {
733 DPRINTF(MinorExecute, "Memory ref inst:"
734 " %s must wait for inst %d(exec)"
735 " before issuing\n",
736 *inst, inst->instToWaitFor);
737 }
738
739 inst->canEarlyIssue = true;
740 }
741 /* Also queue this instruction in the memory ref
742 * queue to ensure in-order issue to the LSQ */
743 DPRINTF(MinorExecute, "Pushing mem inst: %s\n",
744 *inst);
745 thread.inFUMemInsts->push(fu_inst);
746 }
747
748 /* Issue to FU */
749 fu->push(fu_inst);
750 /* And start the countdown on activity to allow
751 * this instruction to get to the end of its FU */
752 cpu.activityRecorder->activity();
753
754 /* Mark the destinations for this instruction as
755 * busy */
756 scoreboard[thread_id].markupInstDests(inst, cpu.curCycle() +
757 fu->description.opLat +
758 extra_dest_retire_lat +
759 extra_assumed_lat,
760 cpu.getContext(thread_id),
761 issued_mem_ref && extra_assumed_lat == Cycles(0));
762
763 /* Push the instruction onto the inFlight queue so
764 * it can be committed in order */
765 thread.inFlightInsts->push(fu_inst);
766
767 issued = true;
768 }
769 }
770
771 fu_index++;
772 } while (fu_index != numFuncUnits && !issued);
773
774 if (!issued)
775 DPRINTF(MinorExecute, "Didn't issue inst: %s\n", *inst);
776 }
777
778 if (issued) {
779 /* Generate MinorTrace's MinorInst lines. Do this at commit
780 * to allow better instruction annotation? */
781 if (DTRACE(MinorTrace) && !inst->isBubble())
782 inst->minorTraceInst(*this);
783
784 /* Mark up barriers in the LSQ */
785 if (!discarded && inst->isInst() &&
786 inst->staticInst->isMemBarrier())
787 {
788 DPRINTF(MinorMem, "Issuing memory barrier inst: %s\n", *inst);
789 lsq.issuedMemBarrierInst(inst);
790 }
791
792 if (inst->traceData && setTraceTimeOnIssue) {
793 inst->traceData->setWhen(curTick());
794 }
795
796 if (issued_mem_ref)
797 num_mem_insts_issued++;
798
799 if (discarded) {
800 num_insts_discarded++;
801 } else if (!inst->isBubble()) {
802 num_insts_issued++;
803
804 if (num_insts_issued == issueLimit)
805 DPRINTF(MinorExecute, "Reached inst issue limit\n");
806 }
807
808 thread.inputIndex++;
809 DPRINTF(MinorExecute, "Stepping to next inst inputIndex: %d\n",
810 thread.inputIndex);
811 }
812
813 /* Got to the end of a line */
814 if (thread.inputIndex == insts_in->width()) {
815 popInput(thread_id);
816 /* Set insts_in to null to force us to leave the surrounding
817 * loop */
818 insts_in = NULL;
819
820 if (processMoreThanOneInput) {
821 DPRINTF(MinorExecute, "Wrapping\n");
822 insts_in = getInput(thread_id);
823 }
824 }
825 } while (insts_in && thread.inputIndex < insts_in->width() &&
826 /* We still have instructions */
827 fu_index != numFuncUnits && /* Not visited all FUs */
828 issued && /* We've not yet failed to issue an instruction */
829 num_insts_issued != issueLimit && /* Still allowed to issue */
830 num_mem_insts_issued != memoryIssueLimit);
831
832 return num_insts_issued;
833 }
834
835 bool
836 Execute::tryPCEvents(ThreadID thread_id)
837 {
838 ThreadContext *thread = cpu.getContext(thread_id);
839 unsigned int num_pc_event_checks = 0;
840
841 /* Handle PC events on instructions */
842 Addr oldPC;
843 do {
844 oldPC = thread->instAddr();
845 cpu.threads[thread_id]->pcEventQueue.service(oldPC, thread);
846 num_pc_event_checks++;
847 } while (oldPC != thread->instAddr());
848
849 if (num_pc_event_checks > 1) {
850 DPRINTF(PCEvent, "Acting on PC Event to PC: %s\n",
851 thread->pcState());
852 }
853
854 return num_pc_event_checks > 1;
855 }
856
857 void
858 Execute::doInstCommitAccounting(MinorDynInstPtr inst)
859 {
860 assert(!inst->isFault());
861
862 MinorThread *thread = cpu.threads[inst->id.threadId];
863
864 /* Increment the many and various inst and op counts in the
865 * thread and system */
866 if (!inst->staticInst->isMicroop() || inst->staticInst->isLastMicroop())
867 {
868 thread->numInst++;
869 thread->numInsts++;
870 cpu.stats.numInsts++;
871 cpu.system->totalNumInsts++;
872
873 /* Act on events related to instruction counts */
874 thread->comInstEventQueue.serviceEvents(thread->numInst);
875 }
876 thread->numOp++;
877 thread->numOps++;
878 cpu.stats.numOps++;
879 cpu.stats.committedInstType[inst->id.threadId]
880 [inst->staticInst->opClass()]++;
881
882 /* Set the CP SeqNum to the numOps commit number */
883 if (inst->traceData)
884 inst->traceData->setCPSeq(thread->numOp);
885
886 cpu.probeInstCommit(inst->staticInst, inst->pc.instAddr());
887 }
888
889 bool
890 Execute::commitInst(MinorDynInstPtr inst, bool early_memory_issue,
891 BranchData &branch, Fault &fault, bool &committed,
892 bool &completed_mem_issue)
893 {
894 ThreadID thread_id = inst->id.threadId;
895 ThreadContext *thread = cpu.getContext(thread_id);
896
897 bool completed_inst = true;
898 fault = NoFault;
899
900 /* Is the thread for this instruction suspended? In that case, just
901 * stall as long as there are no pending interrupts */
902 if (thread->status() == ThreadContext::Suspended &&
903 !isInterrupted(thread_id))
904 {
905 panic("We should never hit the case where we try to commit from a "
906 "suspended thread as the streamSeqNum should not match");
907 } else if (inst->isFault()) {
908 ExecContext context(cpu, *cpu.threads[thread_id], *this, inst);
909
910 DPRINTF(MinorExecute, "Fault inst reached Execute: %s\n",
911 inst->fault->name());
912
913 fault = inst->fault;
914 inst->fault->invoke(thread, NULL);
915
916 tryToBranch(inst, fault, branch);
917 } else if (inst->staticInst->isMemRef()) {
918 /* Memory accesses are executed in two parts:
919 * executeMemRefInst -- calculates the EA and issues the access
920 * to memory. This is done here.
921 * handleMemResponse -- handles the response packet, done by
922 * Execute::commit
923 *
924 * While the memory access is in its FU, the EA is being
925 * calculated. At the end of the FU, when it is ready to
926 * 'commit' (in this function), the access is presented to the
927 * memory queues. When a response comes back from memory,
928 * Execute::commit will commit it.
929 */
930 bool predicate_passed = false;
931 bool completed_mem_inst = executeMemRefInst(inst, branch,
932 predicate_passed, fault);
933
934 if (completed_mem_inst && fault != NoFault) {
935 if (early_memory_issue) {
936 DPRINTF(MinorExecute, "Fault in early executing inst: %s\n",
937 fault->name());
938 /* Don't execute the fault, just stall the instruction
939 * until it gets to the head of inFlightInsts */
940 inst->canEarlyIssue = false;
941 /* Not completed as we'll come here again to pick up
942 * the fault when we get to the end of the FU */
943 completed_inst = false;
944 } else {
945 DPRINTF(MinorExecute, "Fault in execute: %s\n",
946 fault->name());
947 fault->invoke(thread, NULL);
948
949 tryToBranch(inst, fault, branch);
950 completed_inst = true;
951 }
952 } else {
953 completed_inst = completed_mem_inst;
954 }
955 completed_mem_issue = completed_inst;
956 } else if (inst->isInst() && inst->staticInst->isMemBarrier() &&
957 !lsq.canPushIntoStoreBuffer())
958 {
959 DPRINTF(MinorExecute, "Can't commit data barrier inst: %s yet as"
960 " there isn't space in the store buffer\n", *inst);
961
962 completed_inst = false;
963 } else if (inst->isInst() && inst->staticInst->isQuiesce()
964 && !branch.isBubble()){
965 /* This instruction can suspend, need to be able to communicate
966 * backwards, so no other branches may evaluate this cycle*/
967 completed_inst = false;
968 } else {
969 ExecContext context(cpu, *cpu.threads[thread_id], *this, inst);
970
971 DPRINTF(MinorExecute, "Committing inst: %s\n", *inst);
972
973 fault = inst->staticInst->execute(&context,
974 inst->traceData);
975
976 /* Set the predicate for tracing and dump */
977 if (inst->traceData)
978 inst->traceData->setPredicate(context.readPredicate());
979
980 committed = true;
981
982 if (fault != NoFault) {
983 DPRINTF(MinorExecute, "Fault in execute of inst: %s fault: %s\n",
984 *inst, fault->name());
985 fault->invoke(thread, inst->staticInst);
986 }
987
988 doInstCommitAccounting(inst);
989 tryToBranch(inst, fault, branch);
990 }
991
992 if (completed_inst) {
993 /* Keep a copy of this instruction's predictionSeqNum just in case
994 * we need to issue a branch without an instruction (such as an
995 * interrupt) */
996 executeInfo[thread_id].lastPredictionSeqNum = inst->id.predictionSeqNum;
997
998 /* Check to see if this instruction suspended the current thread. */
999 if (!inst->isFault() &&
1000 thread->status() == ThreadContext::Suspended &&
1001 branch.isBubble() && /* It didn't branch too */
1002 !isInterrupted(thread_id)) /* Don't suspend if we have
1003 interrupts */
1004 {
1005 TheISA::PCState resume_pc = cpu.getContext(thread_id)->pcState();
1006
1007 assert(resume_pc.microPC() == 0);
1008
1009 DPRINTF(MinorInterrupt, "Suspending thread: %d from Execute"
1010 " inst: %s\n", thread_id, *inst);
1011
1012 cpu.stats.numFetchSuspends++;
1013
1014 updateBranchData(thread_id, BranchData::SuspendThread, inst,
1015 resume_pc, branch);
1016 }
1017 }
1018
1019 return completed_inst;
1020 }
1021
1022 void
1023 Execute::commit(ThreadID thread_id, bool only_commit_microops, bool discard,
1024 BranchData &branch)
1025 {
1026 Fault fault = NoFault;
1027 Cycles now = cpu.curCycle();
1028 ExecuteThreadInfo &ex_info = executeInfo[thread_id];
1029
1030 /**
1031 * Try and execute as many instructions from the end of FU pipelines as
1032 * possible. This *doesn't* include actually advancing the pipelines.
1033 *
1034 * We do this by looping on the front of the inFlightInsts queue for as
1035 * long as we can find the desired instruction at the end of the
1036 * functional unit it was issued to without seeing a branch or a fault.
1037 * In this function, these terms are used:
1038 * complete -- The instruction has finished its passage through
1039 * its functional unit and its fate has been decided
1040 * (committed, discarded, issued to the memory system)
1041 * commit -- The instruction is complete(d), not discarded and has
1042 * its effects applied to the CPU state
1043 * discard(ed) -- The instruction is complete but not committed
1044 * as its streamSeqNum disagrees with the current
1045 * Execute::streamSeqNum
1046 *
1047 * Commits are also possible from two other places:
1048 *
1049 * 1) Responses returning from the LSQ
1050 * 2) Mem ops issued to the LSQ ('committed' from the FUs) earlier
1051 * than their position in the inFlightInsts queue, but after all
1052 * their dependencies are resolved.
1053 */
1054
1055 /* Has an instruction been completed? Once this becomes false, we stop
1056 * trying to complete instructions. */
1057 bool completed_inst = true;
1058
1059 /* Number of insts committed this cycle to check against commitLimit */
1060 unsigned int num_insts_committed = 0;
1061
1062 /* Number of memory access instructions committed to check against
1063 * memCommitLimit */
1064 unsigned int num_mem_refs_committed = 0;
1065
1066 if (only_commit_microops && !ex_info.inFlightInsts->empty()) {
1067 DPRINTF(MinorInterrupt, "Only commit microops %s %d\n",
1068 *(ex_info.inFlightInsts->front().inst),
1069 ex_info.lastCommitWasEndOfMacroop);
1070 }
1071
1072 while (!ex_info.inFlightInsts->empty() && /* Some more instructions to process */
1073 !branch.isStreamChange() && /* No real branch */
1074 fault == NoFault && /* No faults */
1075 completed_inst && /* Still finding instructions to execute */
1076 num_insts_committed != commitLimit /* Not reached commit limit */
1077 )
1078 {
1079 if (only_commit_microops) {
1080 DPRINTF(MinorInterrupt, "Committing tail of insts before"
1081 " interrupt: %s\n",
1082 *(ex_info.inFlightInsts->front().inst));
1083 }
1084
1085 QueuedInst *head_inflight_inst = &(ex_info.inFlightInsts->front());
1086
1087 InstSeqNum head_exec_seq_num =
1088 head_inflight_inst->inst->id.execSeqNum;
1089
1090 /* The instruction we actually process if completed_inst
1091 * remains true to the end of the loop body.
1092 * Start by considering the the head of the in flight insts queue */
1093 MinorDynInstPtr inst = head_inflight_inst->inst;
1094
1095 bool committed_inst = false;
1096 bool discard_inst = false;
1097 bool completed_mem_ref = false;
1098 bool issued_mem_ref = false;
1099 bool early_memory_issue = false;
1100
1101 /* Must set this again to go around the loop */
1102 completed_inst = false;
1103
1104 /* If we're just completing a macroop before an interrupt or drain,
1105 * can we stil commit another microop (rather than a memory response)
1106 * without crosing into the next full instruction? */
1107 bool can_commit_insts = !ex_info.inFlightInsts->empty() &&
1108 !(only_commit_microops && ex_info.lastCommitWasEndOfMacroop);
1109
1110 /* Can we find a mem response for this inst */
1111 LSQ::LSQRequestPtr mem_response =
1112 (inst->inLSQ ? lsq.findResponse(inst) : NULL);
1113
1114 DPRINTF(MinorExecute, "Trying to commit canCommitInsts: %d\n",
1115 can_commit_insts);
1116
1117 /* Test for PC events after every instruction */
1118 if (isInbetweenInsts(thread_id) && tryPCEvents(thread_id)) {
1119 ThreadContext *thread = cpu.getContext(thread_id);
1120
1121 /* Branch as there was a change in PC */
1122 updateBranchData(thread_id, BranchData::UnpredictedBranch,
1123 MinorDynInst::bubble(), thread->pcState(), branch);
1124 } else if (mem_response &&
1125 num_mem_refs_committed < memoryCommitLimit)
1126 {
1127 /* Try to commit from the memory responses next */
1128 discard_inst = inst->id.streamSeqNum !=
1129 ex_info.streamSeqNum || discard;
1130
1131 DPRINTF(MinorExecute, "Trying to commit mem response: %s\n",
1132 *inst);
1133
1134 /* Complete or discard the response */
1135 if (discard_inst) {
1136 DPRINTF(MinorExecute, "Discarding mem inst: %s as its"
1137 " stream state was unexpected, expected: %d\n",
1138 *inst, ex_info.streamSeqNum);
1139
1140 lsq.popResponse(mem_response);
1141 } else {
1142 handleMemResponse(inst, mem_response, branch, fault);
1143 committed_inst = true;
1144 }
1145
1146 completed_mem_ref = true;
1147 completed_inst = true;
1148 } else if (can_commit_insts) {
1149 /* If true, this instruction will, subject to timing tweaks,
1150 * be considered for completion. try_to_commit flattens
1151 * the `if' tree a bit and allows other tests for inst
1152 * commit to be inserted here. */
1153 bool try_to_commit = false;
1154
1155 /* Try and issue memory ops early if they:
1156 * - Can push a request into the LSQ
1157 * - Have reached the end of their FUs
1158 * - Have had all their dependencies satisfied
1159 * - Are from the right stream
1160 *
1161 * For any other case, leave it to the normal instruction
1162 * issue below to handle them.
1163 */
1164 if (!ex_info.inFUMemInsts->empty() && lsq.canRequest()) {
1165 DPRINTF(MinorExecute, "Trying to commit from mem FUs\n");
1166
1167 const MinorDynInstPtr head_mem_ref_inst =
1168 ex_info.inFUMemInsts->front().inst;
1169 FUPipeline *fu = funcUnits[head_mem_ref_inst->fuIndex];
1170 const MinorDynInstPtr &fu_inst = fu->front().inst;
1171
1172 /* Use this, possibly out of order, inst as the one
1173 * to 'commit'/send to the LSQ */
1174 if (!fu_inst->isBubble() &&
1175 !fu_inst->inLSQ &&
1176 fu_inst->canEarlyIssue &&
1177 ex_info.streamSeqNum == fu_inst->id.streamSeqNum &&
1178 head_exec_seq_num > fu_inst->instToWaitFor)
1179 {
1180 DPRINTF(MinorExecute, "Issuing mem ref early"
1181 " inst: %s instToWaitFor: %d\n",
1182 *(fu_inst), fu_inst->instToWaitFor);
1183
1184 inst = fu_inst;
1185 try_to_commit = true;
1186 early_memory_issue = true;
1187 completed_inst = true;
1188 }
1189 }
1190
1191 /* Try and commit FU-less insts */
1192 if (!completed_inst && inst->isNoCostInst()) {
1193 DPRINTF(MinorExecute, "Committing no cost inst: %s", *inst);
1194
1195 try_to_commit = true;
1196 completed_inst = true;
1197 }
1198
1199 /* Try to issue from the ends of FUs and the inFlightInsts
1200 * queue */
1201 if (!completed_inst && !inst->inLSQ) {
1202 DPRINTF(MinorExecute, "Trying to commit from FUs\n");
1203
1204 /* Try to commit from a functional unit */
1205 /* Is the head inst of the expected inst's FU actually the
1206 * expected inst? */
1207 QueuedInst &fu_inst =
1208 funcUnits[inst->fuIndex]->front();
1209 InstSeqNum fu_inst_seq_num = fu_inst.inst->id.execSeqNum;
1210
1211 if (fu_inst.inst->isBubble()) {
1212 /* No instruction ready */
1213 completed_inst = false;
1214 } else if (fu_inst_seq_num != head_exec_seq_num) {
1215 /* Past instruction: we must have already executed it
1216 * in the same cycle and so the head inst isn't
1217 * actually at the end of its pipeline
1218 * Future instruction: handled above and only for
1219 * mem refs on their way to the LSQ */
1220 } else if (fu_inst.inst->id == inst->id) {
1221 /* All instructions can be committed if they have the
1222 * right execSeqNum and there are no in-flight
1223 * mem insts before us */
1224 try_to_commit = true;
1225 completed_inst = true;
1226 }
1227 }
1228
1229 if (try_to_commit) {
1230 discard_inst = inst->id.streamSeqNum !=
1231 ex_info.streamSeqNum || discard;
1232
1233 /* Is this instruction discardable as its streamSeqNum
1234 * doesn't match? */
1235 if (!discard_inst) {
1236 /* Try to commit or discard a non-memory instruction.
1237 * Memory ops are actually 'committed' from this FUs
1238 * and 'issued' into the memory system so we need to
1239 * account for them later (commit_was_mem_issue gets
1240 * set) */
1241 if (inst->extraCommitDelayExpr) {
1242 DPRINTF(MinorExecute, "Evaluating expression for"
1243 " extra commit delay inst: %s\n", *inst);
1244
1245 ThreadContext *thread = cpu.getContext(thread_id);
1246
1247 TimingExprEvalContext context(inst->staticInst,
1248 thread, NULL);
1249
1250 uint64_t extra_delay = inst->extraCommitDelayExpr->
1251 eval(context);
1252
1253 DPRINTF(MinorExecute, "Extra commit delay expr"
1254 " result: %d\n", extra_delay);
1255
1256 if (extra_delay < 128) {
1257 inst->extraCommitDelay += Cycles(extra_delay);
1258 } else {
1259 DPRINTF(MinorExecute, "Extra commit delay was"
1260 " very long: %d\n", extra_delay);
1261 }
1262 inst->extraCommitDelayExpr = NULL;
1263 }
1264
1265 /* Move the extraCommitDelay from the instruction
1266 * into the minimumCommitCycle */
1267 if (inst->extraCommitDelay != Cycles(0)) {
1268 inst->minimumCommitCycle = cpu.curCycle() +
1269 inst->extraCommitDelay;
1270 inst->extraCommitDelay = Cycles(0);
1271 }
1272
1273 /* @todo Think about making lastMemBarrier be
1274 * MAX_UINT_64 to avoid using 0 as a marker value */
1275 if (!inst->isFault() && inst->isMemRef() &&
1276 lsq.getLastMemBarrier(thread_id) <
1277 inst->id.execSeqNum &&
1278 lsq.getLastMemBarrier(thread_id) != 0)
1279 {
1280 DPRINTF(MinorExecute, "Not committing inst: %s yet"
1281 " as there are incomplete barriers in flight\n",
1282 *inst);
1283 completed_inst = false;
1284 } else if (inst->minimumCommitCycle > now) {
1285 DPRINTF(MinorExecute, "Not committing inst: %s yet"
1286 " as it wants to be stalled for %d more cycles\n",
1287 *inst, inst->minimumCommitCycle - now);
1288 completed_inst = false;
1289 } else {
1290 completed_inst = commitInst(inst,
1291 early_memory_issue, branch, fault,
1292 committed_inst, issued_mem_ref);
1293 }
1294 } else {
1295 /* Discard instruction */
1296 completed_inst = true;
1297 }
1298
1299 if (completed_inst) {
1300 /* Allow the pipeline to advance. If the FU head
1301 * instruction wasn't the inFlightInsts head
1302 * but had already been committed, it would have
1303 * unstalled the pipeline before here */
1304 if (inst->fuIndex != noCostFUIndex) {
1305 DPRINTF(MinorExecute, "Unstalling %d for inst %s\n", inst->fuIndex, inst->id);
1306 funcUnits[inst->fuIndex]->stalled = false;
1307 }
1308 }
1309 }
1310 } else {
1311 DPRINTF(MinorExecute, "No instructions to commit\n");
1312 completed_inst = false;
1313 }
1314
1315 /* All discardable instructions must also be 'completed' by now */
1316 assert(!(discard_inst && !completed_inst));
1317
1318 /* Instruction committed but was discarded due to streamSeqNum
1319 * mismatch */
1320 if (discard_inst) {
1321 DPRINTF(MinorExecute, "Discarding inst: %s as its stream"
1322 " state was unexpected, expected: %d\n",
1323 *inst, ex_info.streamSeqNum);
1324
1325 if (fault == NoFault)
1326 cpu.stats.numDiscardedOps++;
1327 }
1328
1329 /* Mark the mem inst as being in the LSQ */
1330 if (issued_mem_ref) {
1331 inst->fuIndex = 0;
1332 inst->inLSQ = true;
1333 }
1334
1335 /* Pop issued (to LSQ) and discarded mem refs from the inFUMemInsts
1336 * as they've *definitely* exited the FUs */
1337 if (completed_inst && inst->isMemRef()) {
1338 /* The MemRef could have been discarded from the FU or the memory
1339 * queue, so just check an FU instruction */
1340 if (!ex_info.inFUMemInsts->empty() &&
1341 ex_info.inFUMemInsts->front().inst == inst)
1342 {
1343 ex_info.inFUMemInsts->pop();
1344 }
1345 }
1346
1347 if (completed_inst && !(issued_mem_ref && fault == NoFault)) {
1348 /* Note that this includes discarded insts */
1349 DPRINTF(MinorExecute, "Completed inst: %s\n", *inst);
1350
1351 /* Got to the end of a full instruction? */
1352 ex_info.lastCommitWasEndOfMacroop = inst->isFault() ||
1353 inst->isLastOpInInst();
1354
1355 /* lastPredictionSeqNum is kept as a convenience to prevent its
1356 * value from changing too much on the minorview display */
1357 ex_info.lastPredictionSeqNum = inst->id.predictionSeqNum;
1358
1359 /* Finished with the inst, remove it from the inst queue and
1360 * clear its dependencies */
1361 ex_info.inFlightInsts->pop();
1362
1363 /* Complete barriers in the LSQ/move to store buffer */
1364 if (inst->isInst() && inst->staticInst->isMemBarrier()) {
1365 DPRINTF(MinorMem, "Completing memory barrier"
1366 " inst: %s committed: %d\n", *inst, committed_inst);
1367 lsq.completeMemBarrierInst(inst, committed_inst);
1368 }
1369
1370 scoreboard[thread_id].clearInstDests(inst, inst->isMemRef());
1371 }
1372
1373 /* Handle per-cycle instruction counting */
1374 if (committed_inst) {
1375 bool is_no_cost_inst = inst->isNoCostInst();
1376
1377 /* Don't show no cost instructions as having taken a commit
1378 * slot */
1379 if (DTRACE(MinorTrace) && !is_no_cost_inst)
1380 ex_info.instsBeingCommitted.insts[num_insts_committed] = inst;
1381
1382 if (!is_no_cost_inst)
1383 num_insts_committed++;
1384
1385 if (num_insts_committed == commitLimit)
1386 DPRINTF(MinorExecute, "Reached inst commit limit\n");
1387
1388 /* Re-set the time of the instruction if that's required for
1389 * tracing */
1390 if (inst->traceData) {
1391 if (setTraceTimeOnCommit)
1392 inst->traceData->setWhen(curTick());
1393 inst->traceData->dump();
1394 }
1395
1396 if (completed_mem_ref)
1397 num_mem_refs_committed++;
1398
1399 if (num_mem_refs_committed == memoryCommitLimit)
1400 DPRINTF(MinorExecute, "Reached mem ref commit limit\n");
1401 }
1402 }
1403 }
1404
1405 bool
1406 Execute::isInbetweenInsts(ThreadID thread_id) const
1407 {
1408 return executeInfo[thread_id].lastCommitWasEndOfMacroop &&
1409 !lsq.accessesInFlight();
1410 }
1411
1412 void
1413 Execute::evaluate()
1414 {
1415 if (!inp.outputWire->isBubble())
1416 inputBuffer[inp.outputWire->threadId].setTail(*inp.outputWire);
1417
1418 BranchData &branch = *out.inputWire;
1419
1420 unsigned int num_issued = 0;
1421
1422 /* Do all the cycle-wise activities for dcachePort here to potentially
1423 * free up input spaces in the LSQ's requests queue */
1424 lsq.step();
1425
1426 /* Check interrupts first. Will halt commit if interrupt found */
1427 bool interrupted = false;
1428 ThreadID interrupt_tid = checkInterrupts(branch, interrupted);
1429
1430 if (interrupt_tid != InvalidThreadID) {
1431 /* Signalling an interrupt this cycle, not issuing/committing from
1432 * any other threads */
1433 } else if (!branch.isBubble()) {
1434 /* It's important that this is here to carry Fetch1 wakeups to Fetch1
1435 * without overwriting them */
1436 DPRINTF(MinorInterrupt, "Execute skipping a cycle to allow old"
1437 " branch to complete\n");
1438 } else {
1439 ThreadID commit_tid = getCommittingThread();
1440
1441 if (commit_tid != InvalidThreadID) {
1442 ExecuteThreadInfo& commit_info = executeInfo[commit_tid];
1443
1444 DPRINTF(MinorExecute, "Attempting to commit [tid:%d]\n",
1445 commit_tid);
1446 /* commit can set stalled flags observable to issue and so *must* be
1447 * called first */
1448 if (commit_info.drainState != NotDraining) {
1449 if (commit_info.drainState == DrainCurrentInst) {
1450 /* Commit only micro-ops, don't kill anything else */
1451 commit(commit_tid, true, false, branch);
1452
1453 if (isInbetweenInsts(commit_tid))
1454 setDrainState(commit_tid, DrainHaltFetch);
1455
1456 /* Discard any generated branch */
1457 branch = BranchData::bubble();
1458 } else if (commit_info.drainState == DrainAllInsts) {
1459 /* Kill all instructions */
1460 while (getInput(commit_tid))
1461 popInput(commit_tid);
1462 commit(commit_tid, false, true, branch);
1463 }
1464 } else {
1465 /* Commit micro-ops only if interrupted. Otherwise, commit
1466 * anything you like */
1467 DPRINTF(MinorExecute, "Committing micro-ops for interrupt[tid:%d]\n",
1468 commit_tid);
1469 bool only_commit_microops = interrupted &&
1470 hasInterrupt(commit_tid);
1471 commit(commit_tid, only_commit_microops, false, branch);
1472 }
1473
1474 /* Halt fetch, but don't do it until we have the current instruction in
1475 * the bag */
1476 if (commit_info.drainState == DrainHaltFetch) {
1477 updateBranchData(commit_tid, BranchData::HaltFetch,
1478 MinorDynInst::bubble(), TheISA::PCState(0), branch);
1479
1480 cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
1481 setDrainState(commit_tid, DrainAllInsts);
1482 }
1483 }
1484 ThreadID issue_tid = getIssuingThread();
1485 /* This will issue merrily even when interrupted in the sure and
1486 * certain knowledge that the interrupt with change the stream */
1487 if (issue_tid != InvalidThreadID) {
1488 DPRINTF(MinorExecute, "Attempting to issue [tid:%d]\n",
1489 issue_tid);
1490 num_issued = issue(issue_tid);
1491 }
1492
1493 }
1494
1495 /* Run logic to step functional units + decide if we are active on the next
1496 * clock cycle */
1497 std::vector<MinorDynInstPtr> next_issuable_insts;
1498 bool can_issue_next = false;
1499
1500 for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
1501 /* Find the next issuable instruction for each thread and see if it can
1502 be issued */
1503 if (getInput(tid)) {
1504 unsigned int input_index = executeInfo[tid].inputIndex;
1505 MinorDynInstPtr inst = getInput(tid)->insts[input_index];
1506 if (inst->isFault()) {
1507 can_issue_next = true;
1508 } else if (!inst->isBubble()) {
1509 next_issuable_insts.push_back(inst);
1510 }
1511 }
1512 }
1513
1514 bool becoming_stalled = true;
1515
1516 /* Advance the pipelines and note whether they still need to be
1517 * advanced */
1518 for (unsigned int i = 0; i < numFuncUnits; i++) {
1519 FUPipeline *fu = funcUnits[i];
1520 fu->advance();
1521
1522 /* If we need to tick again, the pipeline will have been left or set
1523 * to be unstalled */
1524 if (fu->occupancy !=0 && !fu->stalled)
1525 becoming_stalled = false;
1526
1527 /* Could we possibly issue the next instruction from any thread?
1528 * This is quite an expensive test and is only used to determine
1529 * if the CPU should remain active, only run it if we aren't sure
1530 * we are active next cycle yet */
1531 for (auto inst : next_issuable_insts) {
1532 if (!fu->stalled && fu->provides(inst->staticInst->opClass()) &&
1533 scoreboard[inst->id.threadId].canInstIssue(inst,
1534 NULL, NULL, cpu.curCycle() + Cycles(1),
1535 cpu.getContext(inst->id.threadId))) {
1536 can_issue_next = true;
1537 break;
1538 }
1539 }
1540 }
1541
1542 bool head_inst_might_commit = false;
1543
1544 /* Could the head in flight insts be committed */
1545 for (auto const &info : executeInfo) {
1546 if (!info.inFlightInsts->empty()) {
1547 const QueuedInst &head_inst = info.inFlightInsts->front();
1548
1549 if (head_inst.inst->isNoCostInst()) {
1550 head_inst_might_commit = true;
1551 } else {
1552 FUPipeline *fu = funcUnits[head_inst.inst->fuIndex];
1553 if ((fu->stalled &&
1554 fu->front().inst->id == head_inst.inst->id) ||
1555 lsq.findResponse(head_inst.inst))
1556 {
1557 head_inst_might_commit = true;
1558 break;
1559 }
1560 }
1561 }
1562 }
1563
1564 DPRINTF(Activity, "Need to tick num issued insts: %s%s%s%s%s%s\n",
1565 (num_issued != 0 ? " (issued some insts)" : ""),
1566 (becoming_stalled ? "(becoming stalled)" : "(not becoming stalled)"),
1567 (can_issue_next ? " (can issued next inst)" : ""),
1568 (head_inst_might_commit ? "(head inst might commit)" : ""),
1569 (lsq.needsToTick() ? " (LSQ needs to tick)" : ""),
1570 (interrupted ? " (interrupted)" : ""));
1571
1572 bool need_to_tick =
1573 num_issued != 0 || /* Issued some insts this cycle */
1574 !becoming_stalled || /* Some FU pipelines can still move */
1575 can_issue_next || /* Can still issue a new inst */
1576 head_inst_might_commit || /* Could possible commit the next inst */
1577 lsq.needsToTick() || /* Must step the dcache port */
1578 interrupted; /* There are pending interrupts */
1579
1580 if (!need_to_tick) {
1581 DPRINTF(Activity, "The next cycle might be skippable as there are no"
1582 " advanceable FUs\n");
1583 }
1584
1585 /* Wake up if we need to tick again */
1586 if (need_to_tick)
1587 cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
1588
1589 /* Note activity of following buffer */
1590 if (!branch.isBubble())
1591 cpu.activityRecorder->activity();
1592
1593 /* Make sure the input (if any left) is pushed */
1594 if (!inp.outputWire->isBubble())
1595 inputBuffer[inp.outputWire->threadId].pushTail();
1596 }
1597
1598 ThreadID
1599 Execute::checkInterrupts(BranchData& branch, bool& interrupted)
1600 {
1601 ThreadID tid = interruptPriority;
1602 /* Evaluate interrupts in round-robin based upon service */
1603 do {
1604 /* Has an interrupt been signalled? This may not be acted on
1605 * straighaway so this is different from took_interrupt */
1606 bool thread_interrupted = false;
1607
1608 if (FullSystem && cpu.getInterruptController(tid)) {
1609 /* This is here because it seems that after drainResume the
1610 * interrupt controller isn't always set */
1611 thread_interrupted = executeInfo[tid].drainState == NotDraining &&
1612 isInterrupted(tid);
1613 interrupted = interrupted || thread_interrupted;
1614 } else {
1615 DPRINTF(MinorInterrupt, "No interrupt controller\n");
1616 }
1617 DPRINTF(MinorInterrupt, "[tid:%d] thread_interrupted?=%d isInbetweenInsts?=%d\n",
1618 tid, thread_interrupted, isInbetweenInsts(tid));
1619 /* Act on interrupts */
1620 if (thread_interrupted && isInbetweenInsts(tid)) {
1621 if (takeInterrupt(tid, branch)) {
1622 interruptPriority = tid;
1623 return tid;
1624 }
1625 } else {
1626 tid = (tid + 1) % cpu.numThreads;
1627 }
1628 } while (tid != interruptPriority);
1629
1630 return InvalidThreadID;
1631 }
1632
1633 bool
1634 Execute::hasInterrupt(ThreadID thread_id)
1635 {
1636 if (FullSystem && cpu.getInterruptController(thread_id)) {
1637 return executeInfo[thread_id].drainState == NotDraining &&
1638 isInterrupted(thread_id);
1639 }
1640
1641 return false;
1642 }
1643
1644 void
1645 Execute::minorTrace() const
1646 {
1647 std::ostringstream insts;
1648 std::ostringstream stalled;
1649
1650 executeInfo[0].instsBeingCommitted.reportData(insts);
1651 lsq.minorTrace();
1652 inputBuffer[0].minorTrace();
1653 scoreboard[0].minorTrace();
1654
1655 /* Report functional unit stalling in one string */
1656 unsigned int i = 0;
1657 while (i < numFuncUnits)
1658 {
1659 stalled << (funcUnits[i]->stalled ? '1' : 'E');
1660 i++;
1661 if (i != numFuncUnits)
1662 stalled << ',';
1663 }
1664
1665 MINORTRACE("insts=%s inputIndex=%d streamSeqNum=%d"
1666 " stalled=%s drainState=%d isInbetweenInsts=%d\n",
1667 insts.str(), executeInfo[0].inputIndex, executeInfo[0].streamSeqNum,
1668 stalled.str(), executeInfo[0].drainState, isInbetweenInsts(0));
1669
1670 std::for_each(funcUnits.begin(), funcUnits.end(),
1671 std::mem_fun(&FUPipeline::minorTrace));
1672
1673 executeInfo[0].inFlightInsts->minorTrace();
1674 executeInfo[0].inFUMemInsts->minorTrace();
1675 }
1676
1677 inline ThreadID
1678 Execute::getCommittingThread()
1679 {
1680 std::vector<ThreadID> priority_list;
1681
1682 switch (cpu.threadPolicy) {
1683 case Enums::SingleThreaded:
1684 return 0;
1685 case Enums::RoundRobin:
1686 priority_list = cpu.roundRobinPriority(commitPriority);
1687 break;
1688 case Enums::Random:
1689 priority_list = cpu.randomPriority();
1690 break;
1691 default:
1692 panic("Invalid thread policy");
1693 }
1694
1695 for (auto tid : priority_list) {
1696 ExecuteThreadInfo &ex_info = executeInfo[tid];
1697 bool can_commit_insts = !ex_info.inFlightInsts->empty();
1698 if (can_commit_insts) {
1699 QueuedInst *head_inflight_inst = &(ex_info.inFlightInsts->front());
1700 MinorDynInstPtr inst = head_inflight_inst->inst;
1701
1702 can_commit_insts = can_commit_insts &&
1703 (!inst->inLSQ || (lsq.findResponse(inst) != NULL));
1704
1705 if (!inst->inLSQ) {
1706 bool can_transfer_mem_inst = false;
1707 if (!ex_info.inFUMemInsts->empty() && lsq.canRequest()) {
1708 const MinorDynInstPtr head_mem_ref_inst =
1709 ex_info.inFUMemInsts->front().inst;
1710 FUPipeline *fu = funcUnits[head_mem_ref_inst->fuIndex];
1711 const MinorDynInstPtr &fu_inst = fu->front().inst;
1712 can_transfer_mem_inst =
1713 !fu_inst->isBubble() &&
1714 fu_inst->id.threadId == tid &&
1715 !fu_inst->inLSQ &&
1716 fu_inst->canEarlyIssue &&
1717 inst->id.execSeqNum > fu_inst->instToWaitFor;
1718 }
1719
1720 bool can_execute_fu_inst = inst->fuIndex == noCostFUIndex;
1721 if (can_commit_insts && !can_transfer_mem_inst &&
1722 inst->fuIndex != noCostFUIndex)
1723 {
1724 QueuedInst& fu_inst = funcUnits[inst->fuIndex]->front();
1725 can_execute_fu_inst = !fu_inst.inst->isBubble() &&
1726 fu_inst.inst->id == inst->id;
1727 }
1728
1729 can_commit_insts = can_commit_insts &&
1730 (can_transfer_mem_inst || can_execute_fu_inst);
1731 }
1732 }
1733
1734
1735 if (can_commit_insts) {
1736 commitPriority = tid;
1737 return tid;
1738 }
1739 }
1740
1741 return InvalidThreadID;
1742 }
1743
1744 inline ThreadID
1745 Execute::getIssuingThread()
1746 {
1747 std::vector<ThreadID> priority_list;
1748
1749 switch (cpu.threadPolicy) {
1750 case Enums::SingleThreaded:
1751 return 0;
1752 case Enums::RoundRobin:
1753 priority_list = cpu.roundRobinPriority(issuePriority);
1754 break;
1755 case Enums::Random:
1756 priority_list = cpu.randomPriority();
1757 break;
1758 default:
1759 panic("Invalid thread scheduling policy.");
1760 }
1761
1762 for (auto tid : priority_list) {
1763 if (getInput(tid)) {
1764 issuePriority = tid;
1765 return tid;
1766 }
1767 }
1768
1769 return InvalidThreadID;
1770 }
1771
1772 void
1773 Execute::drainResume()
1774 {
1775 DPRINTF(Drain, "MinorExecute drainResume\n");
1776
1777 for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
1778 setDrainState(tid, NotDraining);
1779 }
1780
1781 cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
1782 }
1783
1784 std::ostream &operator <<(std::ostream &os, Execute::DrainState state)
1785 {
1786 switch (state)
1787 {
1788 case Execute::NotDraining:
1789 os << "NotDraining";
1790 break;
1791 case Execute::DrainCurrentInst:
1792 os << "DrainCurrentInst";
1793 break;
1794 case Execute::DrainHaltFetch:
1795 os << "DrainHaltFetch";
1796 break;
1797 case Execute::DrainAllInsts:
1798 os << "DrainAllInsts";
1799 break;
1800 default:
1801 os << "Drain-" << static_cast<int>(state);
1802 break;
1803 }
1804
1805 return os;
1806 }
1807
1808 void
1809 Execute::setDrainState(ThreadID thread_id, DrainState state)
1810 {
1811 DPRINTF(Drain, "setDrainState[%d]: %s\n", thread_id, state);
1812 executeInfo[thread_id].drainState = state;
1813 }
1814
1815 unsigned int
1816 Execute::drain()
1817 {
1818 DPRINTF(Drain, "MinorExecute drain\n");
1819
1820 for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
1821 if (executeInfo[tid].drainState == NotDraining) {
1822 cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
1823
1824 /* Go to DrainCurrentInst if we're between microops
1825 * or waiting on an unbufferable memory operation.
1826 * Otherwise we can go straight to DrainHaltFetch
1827 */
1828 if (isInbetweenInsts(tid))
1829 setDrainState(tid, DrainHaltFetch);
1830 else
1831 setDrainState(tid, DrainCurrentInst);
1832 }
1833 }
1834 return (isDrained() ? 0 : 1);
1835 }
1836
1837 bool
1838 Execute::isDrained()
1839 {
1840 if (!lsq.isDrained())
1841 return false;
1842
1843 for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
1844 if (!inputBuffer[tid].empty() ||
1845 !executeInfo[tid].inFlightInsts->empty()) {
1846
1847 return false;
1848 }
1849 }
1850
1851 return true;
1852 }
1853
1854 Execute::~Execute()
1855 {
1856 for (unsigned int i = 0; i < numFuncUnits; i++)
1857 delete funcUnits[i];
1858
1859 for (ThreadID tid = 0; tid < cpu.numThreads; tid++)
1860 delete executeInfo[tid].inFlightInsts;
1861 }
1862
1863 bool
1864 Execute::instIsRightStream(MinorDynInstPtr inst)
1865 {
1866 return inst->id.streamSeqNum == executeInfo[inst->id.threadId].streamSeqNum;
1867 }
1868
1869 bool
1870 Execute::instIsHeadInst(MinorDynInstPtr inst)
1871 {
1872 bool ret = false;
1873
1874 if (!executeInfo[inst->id.threadId].inFlightInsts->empty())
1875 ret = executeInfo[inst->id.threadId].inFlightInsts->front().inst->id == inst->id;
1876
1877 return ret;
1878 }
1879
1880 MinorCPU::MinorCPUPort &
1881 Execute::getDcachePort()
1882 {
1883 return lsq.getDcachePort();
1884 }
1885
1886 }