Fix SCons version check.
[gem5.git] / src / cpu / o3 / fetch_impl.hh
1 /*
2 * Copyright (c) 2004-2005 The Regents of The University of Michigan
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 // Remove this later; used only for debugging.
30 #define OPCODE(X) (X >> 26) & 0x3f
31
32 #include "arch/isa_traits.hh"
33 #include "sim/byteswap.hh"
34 #include "cpu/exetrace.hh"
35 #include "mem/base_mem.hh"
36 #include "mem/mem_interface.hh"
37 #include "mem/mem_req.hh"
38 #include "cpu/o3/fetch.hh"
39
40 #include "sim/root.hh"
41
42 template<class Impl>
43 SimpleFetch<Impl>::CacheCompletionEvent
44 ::CacheCompletionEvent(SimpleFetch *_fetch)
45 : Event(&mainEventQueue),
46 fetch(_fetch)
47 {
48 }
49
50 template<class Impl>
51 void
52 SimpleFetch<Impl>::CacheCompletionEvent::process()
53 {
54 fetch->processCacheCompletion();
55 }
56
57 template<class Impl>
58 const char *
59 SimpleFetch<Impl>::CacheCompletionEvent::description()
60 {
61 return "SimpleFetch cache completion event";
62 }
63
64 template<class Impl>
65 SimpleFetch<Impl>::SimpleFetch(Params &params)
66 : icacheInterface(params.icacheInterface),
67 branchPred(params),
68 decodeToFetchDelay(params.decodeToFetchDelay),
69 renameToFetchDelay(params.renameToFetchDelay),
70 iewToFetchDelay(params.iewToFetchDelay),
71 commitToFetchDelay(params.commitToFetchDelay),
72 fetchWidth(params.fetchWidth)
73 {
74 DPRINTF(Fetch, "Fetch: Fetch constructor called\n");
75
76 // Set status to idle.
77 _status = Idle;
78
79 // Create a new memory request.
80 memReq = new MemReq();
81 // Not sure of this parameter. I think it should be based on the
82 // thread number.
83 #if !FULL_SYSTEM
84 memReq->asid = 0;
85 #else
86 memReq->asid = 0;
87 #endif // FULL_SYSTEM
88 memReq->data = new uint8_t[64];
89
90 // Size of cache block.
91 cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64;
92
93 // Create mask to get rid of offset bits.
94 cacheBlkMask = (cacheBlkSize - 1);
95
96 // Get the size of an instruction.
97 instSize = sizeof(MachInst);
98
99 // Create space to store a cache line.
100 cacheData = new uint8_t[cacheBlkSize];
101 }
102
103 template <class Impl>
104 void
105 SimpleFetch<Impl>::regStats()
106 {
107 icacheStallCycles
108 .name(name() + ".icacheStallCycles")
109 .desc("Number of cycles fetch is stalled on an Icache miss")
110 .prereq(icacheStallCycles);
111
112 fetchedInsts
113 .name(name() + ".fetchedInsts")
114 .desc("Number of instructions fetch has processed")
115 .prereq(fetchedInsts);
116 predictedBranches
117 .name(name() + ".predictedBranches")
118 .desc("Number of branches that fetch has predicted taken")
119 .prereq(predictedBranches);
120 fetchCycles
121 .name(name() + ".fetchCycles")
122 .desc("Number of cycles fetch has run and was not squashing or"
123 " blocked")
124 .prereq(fetchCycles);
125 fetchSquashCycles
126 .name(name() + ".fetchSquashCycles")
127 .desc("Number of cycles fetch has spent squashing")
128 .prereq(fetchSquashCycles);
129 fetchBlockedCycles
130 .name(name() + ".fetchBlockedCycles")
131 .desc("Number of cycles fetch has spent blocked")
132 .prereq(fetchBlockedCycles);
133 fetchedCacheLines
134 .name(name() + ".fetchedCacheLines")
135 .desc("Number of cache lines fetched")
136 .prereq(fetchedCacheLines);
137
138 fetch_nisn_dist
139 .init(/* base value */ 0,
140 /* last value */ fetchWidth,
141 /* bucket size */ 1)
142 .name(name() + ".FETCH:rate_dist")
143 .desc("Number of instructions fetched each cycle (Total)")
144 .flags(Stats::pdf)
145 ;
146
147 branchPred.regStats();
148 }
149
150 template<class Impl>
151 void
152 SimpleFetch<Impl>::setCPU(FullCPU *cpu_ptr)
153 {
154 DPRINTF(Fetch, "Fetch: Setting the CPU pointer.\n");
155 cpu = cpu_ptr;
156 // This line will be removed eventually.
157 memReq->xc = cpu->xcBase();
158 }
159
160 template<class Impl>
161 void
162 SimpleFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
163 {
164 DPRINTF(Fetch, "Fetch: Setting the time buffer pointer.\n");
165 timeBuffer = time_buffer;
166
167 // Create wires to get information from proper places in time buffer.
168 fromDecode = timeBuffer->getWire(-decodeToFetchDelay);
169 fromRename = timeBuffer->getWire(-renameToFetchDelay);
170 fromIEW = timeBuffer->getWire(-iewToFetchDelay);
171 fromCommit = timeBuffer->getWire(-commitToFetchDelay);
172 }
173
174 template<class Impl>
175 void
176 SimpleFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
177 {
178 DPRINTF(Fetch, "Fetch: Setting the fetch queue pointer.\n");
179 fetchQueue = fq_ptr;
180
181 // Create wire to write information to proper place in fetch queue.
182 toDecode = fetchQueue->getWire(0);
183 }
184
185 template<class Impl>
186 void
187 SimpleFetch<Impl>::processCacheCompletion()
188 {
189 DPRINTF(Fetch, "Fetch: Waking up from cache miss.\n");
190
191 // Only change the status if it's still waiting on the icache access
192 // to return.
193 // Can keep track of how many cache accesses go unused due to
194 // misspeculation here.
195 if (_status == IcacheMissStall)
196 _status = IcacheMissComplete;
197 }
198
199 template <class Impl>
200 bool
201 SimpleFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC)
202 {
203 // Do branch prediction check here.
204 // A bit of a misnomer...next_PC is actually the current PC until
205 // this function updates it.
206 bool predict_taken;
207
208 if (!inst->isControl()) {
209 next_PC = next_PC + instSize;
210 inst->setPredTarg(next_PC);
211 return false;
212 }
213
214 predict_taken = branchPred.predict(inst, next_PC);
215
216 if (predict_taken) {
217 ++predictedBranches;
218 }
219
220 return predict_taken;
221 }
222
223 template <class Impl>
224 Fault
225 SimpleFetch<Impl>::fetchCacheLine(Addr fetch_PC)
226 {
227 // Check if the instruction exists within the cache.
228 // If it does, then proceed on to read the instruction and the rest
229 // of the instructions in the cache line until either the end of the
230 // cache line or a predicted taken branch is encountered.
231
232 #if FULL_SYSTEM
233 // Flag to say whether or not address is physical addr.
234 unsigned flags = cpu->inPalMode() ? PHYSICAL : 0;
235 #else
236 unsigned flags = 0;
237 #endif // FULL_SYSTEM
238
239 Fault fault = NoFault;
240
241 // Align the fetch PC so it's at the start of a cache block.
242 fetch_PC = icacheBlockAlignPC(fetch_PC);
243
244 // Setup the memReq to do a read of the first isntruction's address.
245 // Set the appropriate read size and flags as well.
246 memReq->cmd = Read;
247 memReq->reset(fetch_PC, cacheBlkSize, flags);
248
249 // Translate the instruction request.
250 // Should this function be
251 // in the CPU class ? Probably...ITB/DTB should exist within the
252 // CPU.
253
254 fault = cpu->translateInstReq(memReq);
255
256 // In the case of faults, the fetch stage may need to stall and wait
257 // on what caused the fetch (ITB or Icache miss).
258
259 // If translation was successful, attempt to read the first
260 // instruction.
261 if (fault == NoFault) {
262 DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
263 fault = cpu->mem->read(memReq, cacheData);
264 // This read may change when the mem interface changes.
265
266 fetchedCacheLines++;
267 }
268
269 // Now do the timing access to see whether or not the instruction
270 // exists within the cache.
271 if (icacheInterface && fault == NoFault) {
272 DPRINTF(Fetch, "Fetch: Doing timing memory access.\n");
273 memReq->completionEvent = NULL;
274
275 memReq->time = curTick;
276
277 MemAccessResult result = icacheInterface->access(memReq);
278
279 // If the cache missed (in this model functional and timing
280 // memories are different), then schedule an event to wake
281 // up this stage once the cache miss completes.
282 if (result != MA_HIT && icacheInterface->doEvents()) {
283 memReq->completionEvent = new CacheCompletionEvent(this);
284
285 // How does current model work as far as individual
286 // stages scheduling/unscheduling?
287 // Perhaps have only the main CPU scheduled/unscheduled,
288 // and have it choose what stages to run appropriately.
289
290 DPRINTF(Fetch, "Fetch: Stalling due to icache miss.\n");
291 _status = IcacheMissStall;
292 }
293 }
294
295 return fault;
296 }
297
298 template <class Impl>
299 inline void
300 SimpleFetch<Impl>::doSquash(const Addr &new_PC)
301 {
302 DPRINTF(Fetch, "Fetch: Squashing, setting PC to: %#x.\n", new_PC);
303
304 cpu->setNextPC(new_PC + instSize);
305 cpu->setPC(new_PC);
306
307 // Clear the icache miss if it's outstanding.
308 if (_status == IcacheMissStall && icacheInterface) {
309 DPRINTF(Fetch, "Fetch: Squashing outstanding Icache miss.\n");
310 // @todo: Use an actual thread number here.
311 icacheInterface->squash(0);
312 }
313
314 _status = Squashing;
315
316 ++fetchSquashCycles;
317 }
318
319 template<class Impl>
320 void
321 SimpleFetch<Impl>::squashFromDecode(const Addr &new_PC,
322 const InstSeqNum &seq_num)
323 {
324 DPRINTF(Fetch, "Fetch: Squashing from decode.\n");
325
326 doSquash(new_PC);
327
328 // Tell the CPU to remove any instructions that are in flight between
329 // fetch and decode.
330 cpu->removeInstsUntil(seq_num);
331 }
332
333 template <class Impl>
334 void
335 SimpleFetch<Impl>::squash(const Addr &new_PC)
336 {
337 DPRINTF(Fetch, "Fetch: Squash from commit.\n");
338
339 doSquash(new_PC);
340
341 // Tell the CPU to remove any instructions that are not in the ROB.
342 cpu->removeInstsNotInROB();
343 }
344
345 template<class Impl>
346 void
347 SimpleFetch<Impl>::tick()
348 {
349 // Check squash signals from commit.
350 if (fromCommit->commitInfo.squash) {
351 DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "
352 "from commit.\n");
353
354 // In any case, squash.
355 squash(fromCommit->commitInfo.nextPC);
356
357 // Also check if there's a mispredict that happened.
358 if (fromCommit->commitInfo.branchMispredict) {
359 branchPred.squash(fromCommit->commitInfo.doneSeqNum,
360 fromCommit->commitInfo.nextPC,
361 fromCommit->commitInfo.branchTaken);
362 } else {
363 branchPred.squash(fromCommit->commitInfo.doneSeqNum);
364 }
365
366 return;
367 } else if (fromCommit->commitInfo.doneSeqNum) {
368 // Update the branch predictor if it wasn't a squashed instruction
369 // that was braodcasted.
370 branchPred.update(fromCommit->commitInfo.doneSeqNum);
371 }
372
373 // Check ROB squash signals from commit.
374 if (fromCommit->commitInfo.robSquashing) {
375 DPRINTF(Fetch, "Fetch: ROB is still squashing.\n");
376
377 // Continue to squash.
378 _status = Squashing;
379
380 ++fetchSquashCycles;
381 return;
382 }
383
384 // Check squash signals from decode.
385 if (fromDecode->decodeInfo.squash) {
386 DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "
387 "from decode.\n");
388
389 // Update the branch predictor.
390 if (fromDecode->decodeInfo.branchMispredict) {
391 branchPred.squash(fromDecode->decodeInfo.doneSeqNum,
392 fromDecode->decodeInfo.nextPC,
393 fromDecode->decodeInfo.branchTaken);
394 } else {
395 branchPred.squash(fromDecode->decodeInfo.doneSeqNum);
396 }
397
398 if (_status != Squashing) {
399 // Squash unless we're already squashing?
400 squashFromDecode(fromDecode->decodeInfo.nextPC,
401 fromDecode->decodeInfo.doneSeqNum);
402 return;
403 }
404 }
405
406 // Check if any of the stall signals are high.
407 if (fromDecode->decodeInfo.stall ||
408 fromRename->renameInfo.stall ||
409 fromIEW->iewInfo.stall ||
410 fromCommit->commitInfo.stall)
411 {
412 // Block stage, regardless of current status.
413
414 DPRINTF(Fetch, "Fetch: Stalling stage.\n");
415 DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i "
416 "Commit: %i\n",
417 fromDecode->decodeInfo.stall,
418 fromRename->renameInfo.stall,
419 fromIEW->iewInfo.stall,
420 fromCommit->commitInfo.stall);
421
422 _status = Blocked;
423
424 ++fetchBlockedCycles;
425 return;
426 } else if (_status == Blocked) {
427 // Unblock stage if status is currently blocked and none of the
428 // stall signals are being held high.
429 _status = Running;
430
431 ++fetchBlockedCycles;
432 return;
433 }
434
435 // If fetch has reached this point, then there are no squash signals
436 // still being held high. Check if fetch is in the squashing state;
437 // if so, fetch can switch to running.
438 // Similarly, there are no blocked signals still being held high.
439 // Check if fetch is in the blocked state; if so, fetch can switch to
440 // running.
441 if (_status == Squashing) {
442 DPRINTF(Fetch, "Fetch: Done squashing, switching to running.\n");
443
444 // Switch status to running
445 _status = Running;
446
447 ++fetchCycles;
448
449 fetch();
450 } else if (_status != IcacheMissStall) {
451 DPRINTF(Fetch, "Fetch: Running stage.\n");
452
453 ++fetchCycles;
454
455 fetch();
456 }
457 }
458
459 template<class Impl>
460 void
461 SimpleFetch<Impl>::fetch()
462 {
463 //////////////////////////////////////////
464 // Start actual fetch
465 //////////////////////////////////////////
466
467 // The current PC.
468 Addr fetch_PC = cpu->readPC();
469
470 // Fault code for memory access.
471 Fault fault = NoFault;
472
473 // If returning from the delay of a cache miss, then update the status
474 // to running, otherwise do the cache access. Possibly move this up
475 // to tick() function.
476 if (_status == IcacheMissComplete) {
477 DPRINTF(Fetch, "Fetch: Icache miss is complete.\n");
478
479 // Reset the completion event to NULL.
480 memReq->completionEvent = NULL;
481
482 _status = Running;
483 } else {
484 DPRINTF(Fetch, "Fetch: Attempting to translate and read "
485 "instruction, starting at PC %08p.\n",
486 fetch_PC);
487
488 fault = fetchCacheLine(fetch_PC);
489 }
490
491 // If we had a stall due to an icache miss, then return. It'd
492 // be nicer if this were handled through the kind of fault that
493 // is returned by the function.
494 if (_status == IcacheMissStall) {
495 return;
496 }
497
498 // As far as timing goes, the CPU will need to send an event through
499 // the MemReq in order to be woken up once the memory access completes.
500 // Probably have a status on a per thread basis so each thread can
501 // block independently and be woken up independently.
502
503 Addr next_PC = fetch_PC;
504 InstSeqNum inst_seq;
505 MachInst inst;
506 unsigned offset = fetch_PC & cacheBlkMask;
507 unsigned fetched;
508
509 if (fault == NoFault) {
510 // If the read of the first instruction was successful, then grab the
511 // instructions from the rest of the cache line and put them into the
512 // queue heading to decode.
513
514 DPRINTF(Fetch, "Fetch: Adding instructions to queue to decode.\n");
515
516 //////////////////////////
517 // Fetch first instruction
518 //////////////////////////
519
520 // Need to keep track of whether or not a predicted branch
521 // ended this fetch block.
522 bool predicted_branch = false;
523
524 for (fetched = 0;
525 offset < cacheBlkSize &&
526 fetched < fetchWidth &&
527 !predicted_branch;
528 ++fetched)
529 {
530
531 // Get a sequence number.
532 inst_seq = cpu->getAndIncrementInstSeq();
533
534 // Make sure this is a valid index.
535 assert(offset <= cacheBlkSize - instSize);
536
537 // Get the instruction from the array of the cache line.
538 inst = gtoh(*reinterpret_cast<MachInst *>
539 (&cacheData[offset]));
540
541 // Create a new DynInst from the instruction fetched.
542 DynInstPtr instruction = new DynInst(inst, fetch_PC, next_PC,
543 inst_seq, cpu);
544
545 DPRINTF(Fetch, "Fetch: Instruction %i created, with PC %#x\n",
546 inst_seq, instruction->readPC());
547
548 DPRINTF(Fetch, "Fetch: Instruction opcode is: %03p\n",
549 OPCODE(inst));
550
551 instruction->traceData =
552 Trace::getInstRecord(curTick, cpu->xcBase(), cpu,
553 instruction->staticInst,
554 instruction->readPC(), 0);
555
556 predicted_branch = lookupAndUpdateNextPC(instruction, next_PC);
557
558 // Add instruction to the CPU's list of instructions.
559 cpu->addInst(instruction);
560
561 // Write the instruction to the first slot in the queue
562 // that heads to decode.
563 toDecode->insts[fetched] = instruction;
564
565 toDecode->size++;
566
567 // Increment stat of fetched instructions.
568 ++fetchedInsts;
569
570 // Move to the next instruction, unless we have a branch.
571 fetch_PC = next_PC;
572
573 offset+= instSize;
574 }
575
576 fetch_nisn_dist.sample(fetched);
577 }
578
579 // Now that fetching is completed, update the PC to signify what the next
580 // cycle will be. Might want to move this to the beginning of this
581 // function so that the PC updates at the beginning of everything.
582 // Or might want to leave setting the PC to the main CPU, with fetch
583 // only changing the nextPC (will require correct determination of
584 // next PC).
585 if (fault == NoFault) {
586 DPRINTF(Fetch, "Fetch: Setting PC to %08p.\n", next_PC);
587 cpu->setPC(next_PC);
588 cpu->setNextPC(next_PC + instSize);
589 } else {
590 // If the issue was an icache miss, then we can just return and
591 // wait until it is handled.
592 if (_status == IcacheMissStall) {
593 return;
594 }
595
596 // Handle the fault.
597 // This stage will not be able to continue until all the ROB
598 // slots are empty, at which point the fault can be handled.
599 // The only other way it can wake up is if a squash comes along
600 // and changes the PC. Not sure how to handle that case...perhaps
601 // have it handled by the upper level CPU class which peeks into the
602 // time buffer and sees if a squash comes along, in which case it
603 // changes the status.
604
605 DPRINTF(Fetch, "Fetch: Blocked, need to handle the trap.\n");
606
607 _status = Blocked;
608 #if FULL_SYSTEM
609 // cpu->trap(fault);
610 // Send a signal to the ROB indicating that there's a trap from the
611 // fetch stage that needs to be handled. Need to indicate that
612 // there's a fault, and the fault type.
613 #else // !FULL_SYSTEM
614 fatal("fault (%d) detected @ PC %08p", fault, cpu->readPC());
615 #endif // FULL_SYSTEM
616 }
617 }