2 * Copyright (c) 2013-2014,2016 ARM Limited
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 * Authors: Andrew Bardsley
40 #include "cpu/minor/fetch2.hh"
44 #include "arch/decoder.hh"
45 #include "arch/utility.hh"
46 #include "cpu/minor/pipeline.hh"
47 #include "cpu/pred/bpred_unit.hh"
48 #include "debug/Branch.hh"
49 #include "debug/Fetch.hh"
50 #include "debug/MinorTrace.hh"
55 Fetch2::Fetch2(const std::string
&name
,
57 MinorCPUParams
¶ms
,
58 Latch
<ForwardLineData
>::Output inp_
,
59 Latch
<BranchData
>::Output branchInp_
,
60 Latch
<BranchData
>::Input predictionOut_
,
61 Latch
<ForwardInstData
>::Input out_
,
62 std::vector
<InputBuffer
<ForwardInstData
>> &next_stage_input_buffer
) :
66 branchInp(branchInp_
),
67 predictionOut(predictionOut_
),
69 nextStageReserve(next_stage_input_buffer
),
70 outputWidth(params
.decodeInputWidth
),
71 processMoreThanOneInput(params
.fetch2CycleInput
),
72 branchPredictor(*params
.branchPred
),
73 fetchInfo(params
.numThreads
),
77 fatal("%s: decodeInputWidth must be >= 1 (%d)\n", name
, outputWidth
);
79 if (params
.fetch2InputBufferSize
< 1) {
80 fatal("%s: fetch2InputBufferSize must be >= 1 (%d)\n", name
,
81 params
.fetch2InputBufferSize
);
84 /* Per-thread input buffers */
85 for (ThreadID tid
= 0; tid
< params
.numThreads
; tid
++) {
86 inputBuffer
.push_back(
87 InputBuffer
<ForwardLineData
>(
88 name
+ ".inputBuffer" + std::to_string(tid
), "lines",
89 params
.fetch2InputBufferSize
));
93 const ForwardLineData
*
94 Fetch2::getInput(ThreadID tid
)
96 /* Get a line from the inputBuffer to work with */
97 if (!inputBuffer
[tid
].empty()) {
98 return &(inputBuffer
[tid
].front());
105 Fetch2::popInput(ThreadID tid
)
107 if (!inputBuffer
[tid
].empty()) {
108 inputBuffer
[tid
].front().freeLine();
109 inputBuffer
[tid
].pop();
112 fetchInfo
[tid
].inputIndex
= 0;
116 Fetch2::dumpAllInput(ThreadID tid
)
118 DPRINTF(Fetch
, "Dumping whole input buffer\n");
119 while (!inputBuffer
[tid
].empty())
122 fetchInfo
[tid
].inputIndex
= 0;
123 fetchInfo
[tid
].havePC
= false;
127 Fetch2::updateBranchPrediction(const BranchData
&branch
)
129 MinorDynInstPtr inst
= branch
.inst
;
131 /* Don't even consider instructions we didn't try to predict or faults */
132 if (inst
->isFault() || !inst
->triedToPredict
)
135 switch (branch
.reason
) {
136 case BranchData::NoBranch
:
137 /* No data to update */
139 case BranchData::Interrupt
:
140 /* Never try to predict interrupts */
142 case BranchData::SuspendThread
:
143 /* Don't need to act on suspends */
145 case BranchData::HaltFetch
:
146 /* Don't need to act on fetch wakeup */
148 case BranchData::BranchPrediction
:
149 /* Shouldn't happen. Fetch2 is the only source of
150 * BranchPredictions */
152 case BranchData::UnpredictedBranch
:
153 /* Unpredicted branch or barrier */
154 DPRINTF(Branch
, "Unpredicted branch seen inst: %s\n", *inst
);
155 branchPredictor
.squash(inst
->id
.fetchSeqNum
,
156 branch
.target
, true, inst
->id
.threadId
);
157 // Update after squashing to accomodate O3CPU
158 // using the branch prediction code.
159 branchPredictor
.update(inst
->id
.fetchSeqNum
,
162 case BranchData::CorrectlyPredictedBranch
:
163 /* Predicted taken, was taken */
164 DPRINTF(Branch
, "Branch predicted correctly inst: %s\n", *inst
);
165 branchPredictor
.update(inst
->id
.fetchSeqNum
,
168 case BranchData::BadlyPredictedBranch
:
169 /* Predicted taken, not taken */
170 DPRINTF(Branch
, "Branch mis-predicted inst: %s\n", *inst
);
171 branchPredictor
.squash(inst
->id
.fetchSeqNum
,
172 branch
.target
/* Not used */, false, inst
->id
.threadId
);
173 // Update after squashing to accomodate O3CPU
174 // using the branch prediction code.
175 branchPredictor
.update(inst
->id
.fetchSeqNum
,
178 case BranchData::BadlyPredictedBranchTarget
:
179 /* Predicted taken, was taken but to a different target */
180 DPRINTF(Branch
, "Branch mis-predicted target inst: %s target: %s\n",
181 *inst
, branch
.target
);
182 branchPredictor
.squash(inst
->id
.fetchSeqNum
,
183 branch
.target
, true, inst
->id
.threadId
);
189 Fetch2::predictBranch(MinorDynInstPtr inst
, BranchData
&branch
)
191 Fetch2ThreadInfo
&thread
= fetchInfo
[inst
->id
.threadId
];
192 TheISA::PCState inst_pc
= inst
->pc
;
194 assert(!inst
->predictedTaken
);
196 /* Skip non-control/sys call instructions */
197 if (inst
->staticInst
->isControl() ||
198 inst
->staticInst
->isSyscall())
200 /* Tried to predict */
201 inst
->triedToPredict
= true;
203 DPRINTF(Branch
, "Trying to predict for inst: %s\n", *inst
);
205 if (branchPredictor
.predict(inst
->staticInst
,
206 inst
->id
.fetchSeqNum
, inst_pc
,
209 inst
->predictedTaken
= true;
210 inst
->predictedTarget
= inst_pc
;
211 branch
.target
= inst_pc
;
214 DPRINTF(Branch
, "Not attempting prediction for inst: %s\n", *inst
);
217 /* If we predict taken, set branch and update sequence numbers */
218 if (inst
->predictedTaken
) {
219 /* Update the predictionSeqNum and remember the streamSeqNum that it
220 * was associated with */
221 thread
.expectedStreamSeqNum
= inst
->id
.streamSeqNum
;
223 BranchData new_branch
= BranchData(BranchData::BranchPrediction
,
225 inst
->id
.streamSeqNum
, thread
.predictionSeqNum
+ 1,
226 inst
->predictedTarget
, inst
);
228 /* Mark with a new prediction number by the stream number of the
229 * instruction causing the prediction */
230 thread
.predictionSeqNum
++;
233 DPRINTF(Branch
, "Branch predicted taken inst: %s target: %s"
234 " new predictionSeqNum: %d\n",
235 *inst
, inst
->predictedTarget
, thread
.predictionSeqNum
);
242 /* Push input onto appropriate input buffer */
243 if (!inp
.outputWire
->isBubble())
244 inputBuffer
[inp
.outputWire
->id
.threadId
].setTail(*inp
.outputWire
);
246 ForwardInstData
&insts_out
= *out
.inputWire
;
247 BranchData prediction
;
248 BranchData
&branch_inp
= *branchInp
.outputWire
;
250 assert(insts_out
.isBubble());
252 /* React to branches from Execute to update local branch prediction
254 updateBranchPrediction(branch_inp
);
256 /* If a branch arrives, don't try and do anything about it. Only
257 * react to your own predictions */
258 if (branch_inp
.isStreamChange()) {
259 DPRINTF(Fetch
, "Dumping all input as a stream changing branch"
261 dumpAllInput(branch_inp
.threadId
);
262 fetchInfo
[branch_inp
.threadId
].havePC
= false;
265 assert(insts_out
.isBubble());
266 /* Even when blocked, clear out input lines with the wrong
267 * prediction sequence number */
268 for (ThreadID tid
= 0; tid
< cpu
.numThreads
; tid
++) {
269 Fetch2ThreadInfo
&thread
= fetchInfo
[tid
];
271 thread
.blocked
= !nextStageReserve
[tid
].canReserve();
273 const ForwardLineData
*line_in
= getInput(tid
);
276 thread
.expectedStreamSeqNum
== line_in
->id
.streamSeqNum
&&
277 thread
.predictionSeqNum
!= line_in
->id
.predictionSeqNum
)
279 DPRINTF(Fetch
, "Discarding line %s"
280 " due to predictionSeqNum mismatch (expected: %d)\n",
281 line_in
->id
, thread
.predictionSeqNum
);
284 fetchInfo
[tid
].havePC
= false;
286 if (processMoreThanOneInput
) {
287 DPRINTF(Fetch
, "Wrapping\n");
288 line_in
= getInput(tid
);
295 ThreadID tid
= getScheduledThread();
296 DPRINTF(Fetch
, "Scheduled Thread: %d\n", tid
);
298 assert(insts_out
.isBubble());
299 if (tid
!= InvalidThreadID
) {
300 Fetch2ThreadInfo
&fetch_info
= fetchInfo
[tid
];
302 const ForwardLineData
*line_in
= getInput(tid
);
304 unsigned int output_index
= 0;
306 /* Pack instructions into the output while we can. This may involve
307 * using more than one input line. Note that lineWidth will be 0
308 * for faulting lines */
310 (line_in
->isFault() ||
311 fetch_info
.inputIndex
< line_in
->lineWidth
) && /* More input */
312 output_index
< outputWidth
&& /* More output to fill */
313 prediction
.isBubble() /* No predicted branch */)
315 ThreadContext
*thread
= cpu
.getContext(line_in
->id
.threadId
);
316 TheISA::Decoder
*decoder
= thread
->getDecoderPtr();
318 /* Discard line due to prediction sequence number being wrong but
319 * without the streamSeqNum number having changed */
321 fetch_info
.expectedStreamSeqNum
== line_in
->id
.streamSeqNum
&&
322 fetch_info
.predictionSeqNum
!= line_in
->id
.predictionSeqNum
;
324 /* Set the PC if the stream changes. Setting havePC to false in
325 * a previous cycle handles all other change of flow of control
327 bool set_pc
= fetch_info
.lastStreamSeqNum
!= line_in
->id
.streamSeqNum
;
329 if (!discard_line
&& (!fetch_info
.havePC
|| set_pc
)) {
330 /* Set the inputIndex to be the MachInst-aligned offset
331 * from lineBaseAddr of the new PC value */
332 fetch_info
.inputIndex
=
333 (line_in
->pc
.instAddr() & BaseCPU::PCMask
) -
334 line_in
->lineBaseAddr
;
335 DPRINTF(Fetch
, "Setting new PC value: %s inputIndex: 0x%x"
336 " lineBaseAddr: 0x%x lineWidth: 0x%x\n",
337 line_in
->pc
, fetch_info
.inputIndex
, line_in
->lineBaseAddr
,
339 fetch_info
.pc
= line_in
->pc
;
340 fetch_info
.havePC
= true;
344 /* The generated instruction. Leave as NULL if no instruction
345 * is to be packed into the output */
346 MinorDynInstPtr dyn_inst
= NULL
;
349 /* Rest of line was from an older prediction in the same
351 DPRINTF(Fetch
, "Discarding line %s (from inputIndex: %d)"
352 " due to predictionSeqNum mismatch (expected: %d)\n",
353 line_in
->id
, fetch_info
.inputIndex
,
354 fetch_info
.predictionSeqNum
);
355 } else if (line_in
->isFault()) {
356 /* Pack a fault as a MinorDynInst with ->fault set */
358 /* Make a new instruction and pick up the line, stream,
359 * prediction, thread ids from the incoming line */
360 dyn_inst
= new MinorDynInst(line_in
->id
);
362 /* Fetch and prediction sequence numbers originate here */
363 dyn_inst
->id
.fetchSeqNum
= fetch_info
.fetchSeqNum
;
364 dyn_inst
->id
.predictionSeqNum
= fetch_info
.predictionSeqNum
;
365 /* To complete the set, test that exec sequence number has
367 assert(dyn_inst
->id
.execSeqNum
== 0);
369 dyn_inst
->pc
= fetch_info
.pc
;
371 /* Pack a faulting instruction but allow other
372 * instructions to be generated. (Fetch2 makes no
373 * immediate judgement about streamSeqNum) */
374 dyn_inst
->fault
= line_in
->fault
;
375 DPRINTF(Fetch
, "Fault being passed output_index: "
376 "%d: %s\n", output_index
, dyn_inst
->fault
->name());
378 uint8_t *line
= line_in
->line
;
380 TheISA::MachInst inst_word
;
381 /* The instruction is wholly in the line, can just
383 inst_word
= TheISA::gtoh(
384 *(reinterpret_cast<TheISA::MachInst
*>
385 (line
+ fetch_info
.inputIndex
)));
387 if (!decoder
->instReady()) {
388 decoder
->moreBytes(fetch_info
.pc
,
389 line_in
->lineBaseAddr
+ fetch_info
.inputIndex
,
391 DPRINTF(Fetch
, "Offering MachInst to decoder addr: 0x%x\n",
392 line_in
->lineBaseAddr
+ fetch_info
.inputIndex
);
395 /* Maybe make the above a loop to accomodate ISAs with
396 * instructions longer than sizeof(MachInst) */
398 if (decoder
->instReady()) {
399 /* Make a new instruction and pick up the line, stream,
400 * prediction, thread ids from the incoming line */
401 dyn_inst
= new MinorDynInst(line_in
->id
);
403 /* Fetch and prediction sequence numbers originate here */
404 dyn_inst
->id
.fetchSeqNum
= fetch_info
.fetchSeqNum
;
405 dyn_inst
->id
.predictionSeqNum
= fetch_info
.predictionSeqNum
;
406 /* To complete the set, test that exec sequence number
407 * has not been set */
408 assert(dyn_inst
->id
.execSeqNum
== 0);
410 /* Note that the decoder can update the given PC.
411 * Remember not to assign it until *after* calling
413 StaticInstPtr decoded_inst
= decoder
->decode(fetch_info
.pc
);
414 dyn_inst
->staticInst
= decoded_inst
;
416 dyn_inst
->pc
= fetch_info
.pc
;
417 DPRINTF(Fetch
, "decoder inst %s\n", *dyn_inst
);
419 // Collect some basic inst class stats
420 if (decoded_inst
->isLoad())
422 else if (decoded_inst
->isStore())
424 else if (decoded_inst
->isVector())
426 else if (decoded_inst
->isFloating())
428 else if (decoded_inst
->isInteger())
431 DPRINTF(Fetch
, "Instruction extracted from line %s"
432 " lineWidth: %d output_index: %d inputIndex: %d"
433 " pc: %s inst: %s\n",
435 line_in
->lineWidth
, output_index
, fetch_info
.inputIndex
,
436 fetch_info
.pc
, *dyn_inst
);
438 #if THE_ISA == X86_ISA || THE_ISA == ARM_ISA
439 /* In SE mode, it's possible to branch to a microop when
440 * replaying faults such as page faults (or simply
441 * intra-microcode branches in X86). Unfortunately,
442 * as Minor has micro-op decomposition in a separate
443 * pipeline stage from instruction decomposition, the
444 * following advancePC (which may follow a branch with
445 * microPC() != 0) *must* see a fresh macroop. This
446 * kludge should be improved with an addition to PCState
447 * but I offer it in this form for the moment
449 * X86 can branch within microops so we need to deal with
450 * the case that, after a branch, the first un-advanced PC
451 * may be pointing to a microop other than 0. Once
452 * advanced, however, the microop number *must* be 0 */
453 fetch_info
.pc
.upc(0);
454 fetch_info
.pc
.nupc(1);
457 /* Advance PC for the next instruction */
458 TheISA::advancePC(fetch_info
.pc
, decoded_inst
);
460 /* Predict any branches and issue a branch if
462 predictBranch(dyn_inst
, prediction
);
464 DPRINTF(Fetch
, "Inst not ready yet\n");
467 /* Step on the pointer into the line if there's no
468 * complete instruction waiting */
469 if (decoder
->needMoreBytes()) {
470 fetch_info
.inputIndex
+= sizeof(TheISA::MachInst
);
472 DPRINTF(Fetch
, "Updated inputIndex value PC: %s"
473 " inputIndex: 0x%x lineBaseAddr: 0x%x lineWidth: 0x%x\n",
474 line_in
->pc
, fetch_info
.inputIndex
, line_in
->lineBaseAddr
,
480 /* Step to next sequence number */
481 fetch_info
.fetchSeqNum
++;
483 /* Correctly size the output before writing */
484 if (output_index
== 0) {
485 insts_out
.resize(outputWidth
);
487 /* Pack the generated dynamic instruction into the output */
488 insts_out
.insts
[output_index
] = dyn_inst
;
491 /* Output MinorTrace instruction info for
492 * pre-microop decomposition macroops */
493 if (DTRACE(MinorTrace
) && !dyn_inst
->isFault() &&
494 dyn_inst
->staticInst
->isMacroop())
496 dyn_inst
->minorTraceInst(*this);
500 /* Remember the streamSeqNum of this line so we can tell when
501 * we change stream */
502 fetch_info
.lastStreamSeqNum
= line_in
->id
.streamSeqNum
;
504 /* Asked to discard line or there was a branch or fault */
505 if (!prediction
.isBubble() || /* The remains of a
506 line with a prediction in it */
507 line_in
->isFault() /* A line which is just a fault */)
509 DPRINTF(Fetch
, "Discarding all input on branch/fault\n");
511 fetch_info
.havePC
= false;
513 } else if (discard_line
) {
514 /* Just discard one line, one's behind it may have new
515 * stream sequence numbers. There's a DPRINTF above
518 fetch_info
.havePC
= false;
520 } else if (fetch_info
.inputIndex
== line_in
->lineWidth
) {
521 /* Got to end of a line, pop the line but keep PC
522 * in case this is a line-wrapping inst. */
527 if (!line_in
&& processMoreThanOneInput
) {
528 DPRINTF(Fetch
, "Wrapping\n");
529 line_in
= getInput(tid
);
533 /* The rest of the output (if any) should already have been packed
534 * with bubble instructions by insts_out's initialisation */
536 if (tid
== InvalidThreadID
) {
537 assert(insts_out
.isBubble());
539 /** Reserve a slot in the next stage and output data */
540 *predictionOut
.inputWire
= prediction
;
542 /* If we generated output, reserve space for the result in the next stage
543 * and mark the stage as being active this cycle */
544 if (!insts_out
.isBubble()) {
545 /* Note activity of following buffer */
546 cpu
.activityRecorder
->activity();
547 insts_out
.threadId
= tid
;
548 nextStageReserve
[tid
].reserve();
551 /* If we still have input to process and somewhere to put it,
552 * mark stage as active */
553 for (ThreadID i
= 0; i
< cpu
.numThreads
; i
++)
555 if (getInput(i
) && nextStageReserve
[i
].canReserve()) {
556 cpu
.activityRecorder
->activateStage(Pipeline::Fetch2StageId
);
561 /* Make sure the input (if any left) is pushed */
562 if (!inp
.outputWire
->isBubble())
563 inputBuffer
[inp
.outputWire
->id
.threadId
].pushTail();
567 Fetch2::getScheduledThread()
569 /* Select thread via policy. */
570 std::vector
<ThreadID
> priority_list
;
572 switch (cpu
.threadPolicy
) {
573 case Enums::SingleThreaded
:
574 priority_list
.push_back(0);
576 case Enums::RoundRobin
:
577 priority_list
= cpu
.roundRobinPriority(threadPriority
);
580 priority_list
= cpu
.randomPriority();
583 panic("Unknown fetch policy");
586 for (auto tid
: priority_list
) {
587 if (cpu
.getContext(tid
)->status() == ThreadContext::Active
&&
589 !fetchInfo
[tid
].blocked
) {
590 threadPriority
= tid
;
595 return InvalidThreadID
;
601 for (const auto &buffer
: inputBuffer
) {
606 return (*inp
.outputWire
).isBubble() &&
607 (*predictionOut
.inputWire
).isBubble();
613 using namespace Stats
;
616 .name(name() + ".int_instructions")
617 .desc("Number of integer instructions successfully decoded")
621 .name(name() + ".fp_instructions")
622 .desc("Number of floating point instructions successfully decoded")
626 .name(name() + ".vec_instructions")
627 .desc("Number of SIMD instructions successfully decoded")
631 .name(name() + ".load_instructions")
632 .desc("Number of memory load instructions successfully decoded")
636 .name(name() + ".store_instructions")
637 .desc("Number of memory store instructions successfully decoded")
642 Fetch2::minorTrace() const
644 std::ostringstream data
;
646 if (fetchInfo
[0].blocked
)
649 (*out
.inputWire
).reportData(data
);
651 MINORTRACE("inputIndex=%d havePC=%d predictionSeqNum=%d insts=%s\n",
652 fetchInfo
[0].inputIndex
, fetchInfo
[0].havePC
, fetchInfo
[0].predictionSeqNum
, data
.str());
653 inputBuffer
[0].minorTrace();