inorder-bpred: edits to handle non-delay-slot ISAs
[gem5.git] / src / cpu / inorder / resources / fetch_seq_unit.cc
1 /*
2 * Copyright (c) 2007 MIPS Technologies, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Korey Sewell
29 *
30 */
31
32 #include "cpu/inorder/resources/fetch_seq_unit.hh"
33 #include "cpu/inorder/resource_pool.hh"
34
35 using namespace std;
36 using namespace TheISA;
37 using namespace ThePipeline;
38
39 FetchSeqUnit::FetchSeqUnit(std::string res_name, int res_id, int res_width,
40 int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params)
41 : Resource(res_name, res_id, res_width, res_latency, _cpu),
42 instSize(sizeof(MachInst))
43 {
44 for (int tid = 0; tid < ThePipeline::MaxThreads; tid++) {
45 delaySlotInfo[tid].numInsts = 0;
46 delaySlotInfo[tid].targetReady = false;
47
48 pcValid[tid] = false;
49 pcBlockStage[tid] = 0;
50
51 squashSeqNum[tid] = (InstSeqNum)-1;
52 lastSquashCycle[tid] = 0;
53 }
54 }
55
56 void
57 FetchSeqUnit::init()
58 {
59 resourceEvent = new FetchSeqEvent[width];
60
61 initSlots();
62 }
63
64 void
65 FetchSeqUnit::execute(int slot_num)
66 {
67 // After this is working, change this to a reinterpret cast
68 // for performance considerations
69 ResourceRequest* fs_req = reqMap[slot_num];
70 DynInstPtr inst = fs_req->inst;
71 int tid = inst->readTid();
72 int stage_num = fs_req->getStageNum();
73 int seq_num = inst->seqNum;
74
75 fs_req->fault = NoFault;
76
77 switch (fs_req->cmd)
78 {
79 case AssignNextPC:
80 {
81 if (pcValid[tid]) {
82
83 if (delaySlotInfo[tid].targetReady &&
84 delaySlotInfo[tid].numInsts == 0) {
85 // Set PC to target
86 PC[tid] = delaySlotInfo[tid].targetAddr; //next_PC
87 nextPC[tid] = PC[tid] + instSize; //next_NPC
88 nextNPC[tid] = PC[tid] + (2 * instSize);
89
90 delaySlotInfo[tid].targetReady = false;
91
92 DPRINTF(InOrderFetchSeq, "[tid:%i]: Setting PC to delay slot target\n",tid);
93 }
94
95 inst->setPC(PC[tid]);
96 inst->setNextPC(PC[tid] + instSize);
97 inst->setNextNPC(PC[tid] + (instSize * 2));
98
99 #if ISA_HAS_DELAY_SLOT
100 inst->setPredTarg(inst->readNextNPC());
101 #else
102 inst->setPredTarg(inst->readNextPC());
103 #endif
104 inst->setMemAddr(PC[tid]);
105 inst->setSeqNum(cpu->getAndIncrementInstSeq(tid));
106
107 DPRINTF(InOrderFetchSeq, "[tid:%i]: Assigning [sn:%i] to PC %08p, NPC %08p, NNPC %08p\n", tid,
108 inst->seqNum, inst->readPC(), inst->readNextPC(), inst->readNextNPC());
109
110 if (delaySlotInfo[tid].numInsts > 0) {
111 --delaySlotInfo[tid].numInsts;
112
113 // It's OK to set PC to target of branch
114 if (delaySlotInfo[tid].numInsts == 0) {
115 delaySlotInfo[tid].targetReady = true;
116 }
117
118 DPRINTF(InOrderFetchSeq, "[tid:%i]: %i delay slot inst(s) left to"
119 " process.\n", tid, delaySlotInfo[tid].numInsts);
120 }
121
122 PC[tid] = nextPC[tid];
123 nextPC[tid] = nextNPC[tid];
124 nextNPC[tid] += instSize;
125
126 fs_req->done();
127 } else {
128 DPRINTF(InOrderStall, "STALL: [tid:%i]: NPC not valid\n", tid);
129 fs_req->setCompleted(false);
130 }
131 }
132 break;
133
134 case UpdateTargetPC:
135 {
136 if (inst->isControl()) {
137 // If it's a return, then we must wait for resolved address.
138 if (inst->isReturn() && !inst->predTaken()) {
139 cpu->pipelineStage[stage_num]->toPrevStages->stageBlock[stage_num][tid] = true;
140 pcValid[tid] = false;
141 pcBlockStage[tid] = stage_num;
142 } else if (inst->isCondDelaySlot() && !inst->predTaken()) {
143 // Not-Taken AND Conditional Control
144 DPRINTF(InOrderFetchSeq, "[tid:%i]: [sn:%i]: [PC:%08p] Predicted Not-Taken Cond. "
145 "Delay inst. Skipping delay slot and Updating PC to %08p\n",
146 tid, inst->seqNum, inst->readPC(), inst->readPredTarg());
147
148 DPRINTF(InOrderFetchSeq, "[tid:%i] Setting up squash to start from stage %i, after [sn:%i].\n",
149 tid, stage_num, seq_num);
150
151 inst->bdelaySeqNum = seq_num;
152 inst->squashingStage = stage_num;
153
154 squashAfterInst(inst, stage_num, tid);
155 } else if (!inst->isCondDelaySlot() && !inst->predTaken()) {
156 // Not-Taken Control
157 DPRINTF(InOrderFetchSeq, "[tid:%i]: [sn:%i]: Predicted Not-Taken Control "
158 "inst. updating PC to %08p\n", tid, inst->seqNum,
159 inst->readNextPC());
160 #if ISA_HAS_DELAY_SLOT
161 ++delaySlotInfo[tid].numInsts;
162 delaySlotInfo[tid].targetReady = false;
163 delaySlotInfo[tid].targetAddr = inst->readNextNPC();
164 #else
165 assert(delaySlotInfo[tid].numInsts == 0);
166 #endif
167 } else if (inst->predTaken()) {
168 // Taken Control
169 #if ISA_HAS_DELAY_SLOT
170 ++delaySlotInfo[tid].numInsts;
171 delaySlotInfo[tid].targetReady = false;
172 delaySlotInfo[tid].targetAddr = inst->readPredTarg();
173
174 DPRINTF(InOrderFetchSeq, "[tid:%i]: [sn:%i] Updating delay slot target "
175 "to PC %08p\n", tid, inst->seqNum, inst->readPredTarg());
176 inst->bdelaySeqNum = seq_num + 1;
177 #else
178 inst->bdelaySeqNum = seq_num;
179 assert(delaySlotInfo[tid].numInsts == 0);
180 #endif
181
182 inst->squashingStage = stage_num;
183
184 DPRINTF(InOrderFetchSeq, "[tid:%i] Setting up squash to start from stage %i, after [sn:%i].\n",
185 tid, stage_num, inst->bdelaySeqNum);
186
187 // Do Squashing
188 squashAfterInst(inst, stage_num, tid);
189 }
190 } else {
191 DPRINTF(InOrderFetchSeq, "[tid:%i]: [sn:%i]: Ignoring branch target update "
192 "since then is not a control instruction.\n", tid, inst->seqNum);
193 }
194
195 fs_req->done();
196 }
197 break;
198
199 default:
200 fatal("Unrecognized command to %s", resName);
201 }
202 }
203
204 inline void
205 FetchSeqUnit::squashAfterInst(DynInstPtr inst, int stage_num, unsigned tid)
206 {
207 // Squash In Pipeline Stage
208 cpu->pipelineStage[stage_num]->squashDueToBranch(inst, tid);
209
210 // Squash inside current resource, so if there needs to be fetching on same cycle
211 // the fetch information will be correct.
212 // squash(inst, stage_num, inst->bdelaySeqNum, tid);
213
214 // Schedule Squash Through-out Resource Pool
215 cpu->resPool->scheduleEvent((InOrderCPU::CPUEventType)ResourcePool::SquashAll, inst, 0);
216 }
217 void
218 FetchSeqUnit::squash(DynInstPtr inst, int squash_stage,
219 InstSeqNum squash_seq_num, unsigned tid)
220 {
221 DPRINTF(InOrderFetchSeq, "[tid:%i]: Updating due to squash from stage %i.\n",
222 tid, squash_stage);
223
224 InstSeqNum done_seq_num = inst->bdelaySeqNum;
225 Addr new_PC = inst->readPredTarg();
226
227 if (squashSeqNum[tid] <= done_seq_num &&
228 lastSquashCycle[tid] == curTick) {
229 DPRINTF(InOrderFetchSeq, "[tid:%i]: Ignoring squash from stage %i, since"
230 "there is an outstanding squash that is older.\n",
231 tid, squash_stage);
232 } else {
233 squashSeqNum[tid] = done_seq_num;
234 lastSquashCycle[tid] = curTick;
235
236 // If The very next instruction number is the done seq. num,
237 // then we haven't seen the delay slot yet ... if it isn't
238 // the last done_seq_num then this is the delay slot inst.
239 if (cpu->nextInstSeqNum(tid) != done_seq_num &&
240 !inst->procDelaySlotOnMispred) {
241 delaySlotInfo[tid].numInsts = 0;
242 delaySlotInfo[tid].targetReady = false;
243
244 // Reset PC
245 PC[tid] = new_PC;
246 nextPC[tid] = new_PC + instSize;
247 nextNPC[tid] = new_PC + (2 * instSize);
248
249 DPRINTF(InOrderFetchSeq, "[tid:%i]: Setting PC to %08p.\n",
250 tid, PC[tid]);
251 } else {
252 #if !ISA_HAS_DELAY_SLOT
253 assert(0);
254 #endif
255
256 delaySlotInfo[tid].numInsts = 1;
257 delaySlotInfo[tid].targetReady = false;
258 delaySlotInfo[tid].targetAddr = (inst->procDelaySlotOnMispred) ? inst->branchTarget() : new_PC;
259
260 // Reset PC to Delay Slot Instruction
261 if (inst->procDelaySlotOnMispred) {
262 PC[tid] = new_PC;
263 nextPC[tid] = new_PC + instSize;
264 nextNPC[tid] = new_PC + (2 * instSize);
265 }
266
267 }
268
269 // Unblock Any Stages Waiting for this information to be updated ...
270 if (!pcValid[tid]) {
271 cpu->pipelineStage[pcBlockStage[tid]]->toPrevStages->stageUnblock[pcBlockStage[tid]][tid] = true;
272 }
273
274 pcValid[tid] = true;
275 }
276
277 Resource::squash(inst, squash_stage, squash_seq_num, tid);
278 }
279
280 FetchSeqUnit::FetchSeqEvent::FetchSeqEvent()
281 : ResourceEvent()
282 { }
283
284 void
285 FetchSeqUnit::FetchSeqEvent::process()
286 {
287 FetchSeqUnit* fs_res = dynamic_cast<FetchSeqUnit*>(resource);
288 assert(fs_res);
289
290 for (int i=0; i < MaxThreads; i++) {
291 fs_res->PC[i] = fs_res->cpu->readPC(i);
292 fs_res->nextPC[i] = fs_res->cpu->readNextPC(i);
293 fs_res->nextNPC[i] = fs_res->cpu->readNextNPC(i);
294 DPRINTF(InOrderFetchSeq, "[tid:%i]: Setting PC:%08p NPC:%08p NNPC:%08p.\n",
295 fs_res->PC[i], fs_res->nextPC[i], fs_res->nextNPC[i]);
296
297 fs_res->pcValid[i] = true;
298 }
299
300 //cpu->fetchPriorityList.push_back(tid);
301 }
302
303
304 void
305 FetchSeqUnit::activateThread(unsigned tid)
306 {
307 pcValid[tid] = true;
308
309 PC[tid] = cpu->readPC(tid);
310 nextPC[tid] = cpu->readNextPC(tid);
311 nextNPC[tid] = cpu->readNextNPC(tid);
312
313 cpu->fetchPriorityList.push_back(tid);
314
315 DPRINTF(InOrderFetchSeq, "[tid:%i]: Reading PC:%08p NPC:%08p NNPC:%08p.\n",
316 tid, PC[tid], nextPC[tid], nextNPC[tid]);
317 }
318
319 void
320 FetchSeqUnit::deactivateThread(unsigned tid)
321 {
322 delaySlotInfo[tid].numInsts = 0;
323 delaySlotInfo[tid].targetReady = false;
324
325 pcValid[tid] = false;
326 pcBlockStage[tid] = 0;
327
328 squashSeqNum[tid] = (InstSeqNum)-1;
329 lastSquashCycle[tid] = 0;
330
331 std::list<unsigned>::iterator thread_it = find(cpu->fetchPriorityList.begin(),
332 cpu->fetchPriorityList.end(),
333 tid);
334
335 if (thread_it != cpu->fetchPriorityList.end())
336 cpu->fetchPriorityList.erase(thread_it);
337 }