inorder: squash from memory stall
[gem5.git] / src / cpu / inorder / resources / fetch_seq_unit.cc
1 /*
2 * Copyright (c) 2007 MIPS Technologies, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Korey Sewell
29 *
30 */
31
32 #include "config/the_isa.hh"
33 #include "cpu/inorder/resources/fetch_seq_unit.hh"
34 #include "cpu/inorder/resource_pool.hh"
35
36 using namespace std;
37 using namespace TheISA;
38 using namespace ThePipeline;
39
40 FetchSeqUnit::FetchSeqUnit(std::string res_name, int res_id, int res_width,
41 int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params)
42 : Resource(res_name, res_id, res_width, res_latency, _cpu),
43 instSize(sizeof(MachInst))
44 {
45 for (ThreadID tid = 0; tid < ThePipeline::MaxThreads; tid++) {
46 delaySlotInfo[tid].numInsts = 0;
47 delaySlotInfo[tid].targetReady = false;
48
49 pcValid[tid] = false;
50 pcBlockStage[tid] = 0;
51
52 squashSeqNum[tid] = (InstSeqNum)-1;
53 lastSquashCycle[tid] = 0;
54 }
55 }
56
57 FetchSeqUnit::~FetchSeqUnit()
58 {
59 delete [] resourceEvent;
60 }
61
62 void
63 FetchSeqUnit::init()
64 {
65 resourceEvent = new FetchSeqEvent[width];
66
67 initSlots();
68 }
69
70 void
71 FetchSeqUnit::execute(int slot_num)
72 {
73 // After this is working, change this to a reinterpret cast
74 // for performance considerations
75 ResourceRequest* fs_req = reqMap[slot_num];
76 DynInstPtr inst = fs_req->inst;
77 ThreadID tid = inst->readTid();
78 int stage_num = fs_req->getStageNum();
79 int seq_num = inst->seqNum;
80
81 fs_req->fault = NoFault;
82
83 switch (fs_req->cmd)
84 {
85 case AssignNextPC:
86 {
87 if (pcValid[tid]) {
88
89 if (delaySlotInfo[tid].targetReady &&
90 delaySlotInfo[tid].numInsts == 0) {
91 // Set PC to target
92 PC[tid] = delaySlotInfo[tid].targetAddr; //next_PC
93 nextPC[tid] = PC[tid] + instSize; //next_NPC
94 nextNPC[tid] = PC[tid] + (2 * instSize);
95
96 delaySlotInfo[tid].targetReady = false;
97
98 DPRINTF(InOrderFetchSeq, "[tid:%i]: Setting PC to delay slot target\n",tid);
99 }
100
101 inst->setPC(PC[tid]);
102 inst->setNextPC(PC[tid] + instSize);
103 inst->setNextNPC(PC[tid] + (instSize * 2));
104
105 #if ISA_HAS_DELAY_SLOT
106 inst->setPredTarg(inst->readNextNPC());
107 #else
108 inst->setPredTarg(inst->readNextPC());
109 #endif
110 inst->setMemAddr(PC[tid]);
111 inst->setSeqNum(cpu->getAndIncrementInstSeq(tid));
112
113 DPRINTF(InOrderFetchSeq, "[tid:%i]: Assigning [sn:%i] to PC %08p, NPC %08p, NNPC %08p\n", tid,
114 inst->seqNum, inst->readPC(), inst->readNextPC(), inst->readNextNPC());
115
116 if (delaySlotInfo[tid].numInsts > 0) {
117 --delaySlotInfo[tid].numInsts;
118
119 // It's OK to set PC to target of branch
120 if (delaySlotInfo[tid].numInsts == 0) {
121 delaySlotInfo[tid].targetReady = true;
122 }
123
124 DPRINTF(InOrderFetchSeq, "[tid:%i]: %i delay slot inst(s) left to"
125 " process.\n", tid, delaySlotInfo[tid].numInsts);
126 }
127
128 PC[tid] = nextPC[tid];
129 nextPC[tid] = nextNPC[tid];
130 nextNPC[tid] += instSize;
131
132 fs_req->done();
133 } else {
134 DPRINTF(InOrderStall, "STALL: [tid:%i]: NPC not valid\n", tid);
135 fs_req->setCompleted(false);
136 }
137 }
138 break;
139
140 case UpdateTargetPC:
141 {
142 if (inst->isControl()) {
143 // If it's a return, then we must wait for resolved address.
144 if (inst->isReturn() && !inst->predTaken()) {
145 cpu->pipelineStage[stage_num]->toPrevStages->stageBlock[stage_num][tid] = true;
146 pcValid[tid] = false;
147 pcBlockStage[tid] = stage_num;
148 } else if (inst->isCondDelaySlot() && !inst->predTaken()) {
149 // Not-Taken AND Conditional Control
150 DPRINTF(InOrderFetchSeq, "[tid:%i]: [sn:%i]: [PC:%08p] Predicted Not-Taken Cond. "
151 "Delay inst. Skipping delay slot and Updating PC to %08p\n",
152 tid, inst->seqNum, inst->readPC(), inst->readPredTarg());
153
154 DPRINTF(InOrderFetchSeq, "[tid:%i] Setting up squash to start from stage %i, after [sn:%i].\n",
155 tid, stage_num, seq_num);
156
157 inst->bdelaySeqNum = seq_num;
158 inst->squashingStage = stage_num;
159
160 squashAfterInst(inst, stage_num, tid);
161 } else if (!inst->isCondDelaySlot() && !inst->predTaken()) {
162 // Not-Taken Control
163 DPRINTF(InOrderFetchSeq, "[tid:%i]: [sn:%i]: Predicted Not-Taken Control "
164 "inst. updating PC to %08p\n", tid, inst->seqNum,
165 inst->readNextPC());
166 #if ISA_HAS_DELAY_SLOT
167 ++delaySlotInfo[tid].numInsts;
168 delaySlotInfo[tid].targetReady = false;
169 delaySlotInfo[tid].targetAddr = inst->readNextNPC();
170 #else
171 assert(delaySlotInfo[tid].numInsts == 0);
172 #endif
173 } else if (inst->predTaken()) {
174 // Taken Control
175 #if ISA_HAS_DELAY_SLOT
176 ++delaySlotInfo[tid].numInsts;
177 delaySlotInfo[tid].targetReady = false;
178 delaySlotInfo[tid].targetAddr = inst->readPredTarg();
179
180 DPRINTF(InOrderFetchSeq, "[tid:%i]: [sn:%i] Updating delay slot target "
181 "to PC %08p\n", tid, inst->seqNum, inst->readPredTarg());
182 inst->bdelaySeqNum = seq_num + 1;
183 #else
184 inst->bdelaySeqNum = seq_num;
185 assert(delaySlotInfo[tid].numInsts == 0);
186 #endif
187
188 inst->squashingStage = stage_num;
189
190 DPRINTF(InOrderFetchSeq, "[tid:%i] Setting up squash to start from stage %i, after [sn:%i].\n",
191 tid, stage_num, inst->bdelaySeqNum);
192
193 // Do Squashing
194 squashAfterInst(inst, stage_num, tid);
195 }
196 } else {
197 DPRINTF(InOrderFetchSeq, "[tid:%i]: [sn:%i]: Ignoring branch target update "
198 "since then is not a control instruction.\n", tid, inst->seqNum);
199 }
200
201 fs_req->done();
202 }
203 break;
204
205 default:
206 fatal("Unrecognized command to %s", resName);
207 }
208 }
209
210 inline void
211 FetchSeqUnit::squashAfterInst(DynInstPtr inst, int stage_num, ThreadID tid)
212 {
213 // Squash In Pipeline Stage
214 cpu->pipelineStage[stage_num]->squashDueToBranch(inst, tid);
215
216 // Squash inside current resource, so if there needs to be fetching on same cycle
217 // the fetch information will be correct.
218 // squash(inst, stage_num, inst->bdelaySeqNum, tid);
219
220 // Schedule Squash Through-out Resource Pool
221 cpu->resPool->scheduleEvent((InOrderCPU::CPUEventType)ResourcePool::SquashAll, inst, 0);
222 }
223 void
224 FetchSeqUnit::squash(DynInstPtr inst, int squash_stage,
225 InstSeqNum squash_seq_num, ThreadID tid)
226 {
227 DPRINTF(InOrderFetchSeq, "[tid:%i]: Updating due to squash from stage %i.\n",
228 tid, squash_stage);
229
230 InstSeqNum done_seq_num = inst->bdelaySeqNum;
231
232 // Handles the case where we are squashing because of something that is
233 // not a branch...like a memory stall
234 Addr new_PC = (inst->isControl()) ?
235 inst->readPredTarg() : inst->readPC() + instSize;
236
237 if (squashSeqNum[tid] <= done_seq_num &&
238 lastSquashCycle[tid] == curTick) {
239 DPRINTF(InOrderFetchSeq, "[tid:%i]: Ignoring squash from stage %i, since"
240 "there is an outstanding squash that is older.\n",
241 tid, squash_stage);
242 } else {
243 squashSeqNum[tid] = done_seq_num;
244 lastSquashCycle[tid] = curTick;
245
246 // If The very next instruction number is the done seq. num,
247 // then we haven't seen the delay slot yet ... if it isn't
248 // the last done_seq_num then this is the delay slot inst.
249 if (cpu->nextInstSeqNum(tid) != done_seq_num &&
250 !inst->procDelaySlotOnMispred) {
251 delaySlotInfo[tid].numInsts = 0;
252 delaySlotInfo[tid].targetReady = false;
253
254 // Reset PC
255 PC[tid] = new_PC;
256 nextPC[tid] = new_PC + instSize;
257 nextNPC[tid] = new_PC + (2 * instSize);
258
259 DPRINTF(InOrderFetchSeq, "[tid:%i]: Setting PC to %08p.\n",
260 tid, PC[tid]);
261 } else {
262 #if !ISA_HAS_DELAY_SLOT
263 assert(0);
264 #endif
265
266 delaySlotInfo[tid].numInsts = 1;
267 delaySlotInfo[tid].targetReady = false;
268 delaySlotInfo[tid].targetAddr = (inst->procDelaySlotOnMispred) ? inst->branchTarget() : new_PC;
269
270 // Reset PC to Delay Slot Instruction
271 if (inst->procDelaySlotOnMispred) {
272 PC[tid] = new_PC;
273 nextPC[tid] = new_PC + instSize;
274 nextNPC[tid] = new_PC + (2 * instSize);
275 }
276
277 }
278
279 // Unblock Any Stages Waiting for this information to be updated ...
280 if (!pcValid[tid]) {
281 cpu->pipelineStage[pcBlockStage[tid]]->toPrevStages->stageUnblock[pcBlockStage[tid]][tid] = true;
282 }
283
284 pcValid[tid] = true;
285 }
286
287 Resource::squash(inst, squash_stage, squash_seq_num, tid);
288 }
289
290 FetchSeqUnit::FetchSeqEvent::FetchSeqEvent()
291 : ResourceEvent()
292 { }
293
294 void
295 FetchSeqUnit::FetchSeqEvent::process()
296 {
297 FetchSeqUnit* fs_res = dynamic_cast<FetchSeqUnit*>(resource);
298 assert(fs_res);
299
300 for (int i=0; i < MaxThreads; i++) {
301 fs_res->PC[i] = fs_res->cpu->readPC(i);
302 fs_res->nextPC[i] = fs_res->cpu->readNextPC(i);
303 fs_res->nextNPC[i] = fs_res->cpu->readNextNPC(i);
304 DPRINTF(InOrderFetchSeq, "[tid:%i]: Setting PC:%08p NPC:%08p NNPC:%08p.\n",
305 fs_res->PC[i], fs_res->nextPC[i], fs_res->nextNPC[i]);
306
307 fs_res->pcValid[i] = true;
308 }
309
310 //cpu->fetchPriorityList.push_back(tid);
311 }
312
313
314 void
315 FetchSeqUnit::activateThread(ThreadID tid)
316 {
317 pcValid[tid] = true;
318
319 PC[tid] = cpu->readPC(tid);
320 nextPC[tid] = cpu->readNextPC(tid);
321 nextNPC[tid] = cpu->readNextNPC(tid);
322
323 cpu->fetchPriorityList.push_back(tid);
324
325 DPRINTF(InOrderFetchSeq, "[tid:%i]: Reading PC:%08p NPC:%08p NNPC:%08p.\n",
326 tid, PC[tid], nextPC[tid], nextNPC[tid]);
327 }
328
329 void
330 FetchSeqUnit::deactivateThread(ThreadID tid)
331 {
332 delaySlotInfo[tid].numInsts = 0;
333 delaySlotInfo[tid].targetReady = false;
334
335 pcValid[tid] = false;
336 pcBlockStage[tid] = 0;
337
338 squashSeqNum[tid] = (InstSeqNum)-1;
339 lastSquashCycle[tid] = 0;
340
341 list<ThreadID>::iterator thread_it = find(cpu->fetchPriorityList.begin(),
342 cpu->fetchPriorityList.end(),
343 tid);
344
345 if (thread_it != cpu->fetchPriorityList.end())
346 cpu->fetchPriorityList.erase(thread_it);
347 }
348
349 void
350 FetchSeqUnit::suspendThread(ThreadID tid)
351 {
352 deactivateThread(tid);
353 }
354
355 void
356 FetchSeqUnit::updateAfterContextSwitch(DynInstPtr inst, ThreadID tid)
357 {
358 pcValid[tid] = true;
359
360 if (cpu->thread[tid]->lastGradIsBranch) {
361 /** This function assumes that the instruction causing the context
362 * switch was right after the branch. Thus, if it's not, then
363 * we are updating incorrectly here
364 */
365 assert(cpu->thread[tid]->lastBranchNextPC == inst->readPC());
366
367 PC[tid] = cpu->thread[tid]->lastBranchNextNPC;
368 nextPC[tid] = PC[tid] + instSize;
369 nextNPC[tid] = nextPC[tid] + instSize;
370 } else {
371 PC[tid] = inst->readNextPC();
372 nextPC[tid] = inst->readNextNPC();
373 nextNPC[tid] = inst->readNextNPC() + instSize;
374 }
375
376 DPRINTF(InOrderFetchSeq, "[tid:%i]: Updating PCs due to Context Switch."
377 "Assigning PC:%08p NPC:%08p NNPC:%08p.\n", tid, PC[tid],
378 nextPC[tid], nextNPC[tid]);
379 }