Merge ktlim@zizzer.eecs.umich.edu:/bk/m5
[gem5.git] / cpu / ooo_cpu / ooo_cpu.hh
1 /*
2 * Copyright (c) 2002-2005 The Regents of The University of Michigan
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #ifndef __CPU_OOO_CPU_OOO_CPU_HH__
30 #define __CPU_OOO_CPU_OOO_CPU_HH__
31
32 #include "base/statistics.hh"
33 #include "cpu/base_cpu.hh"
34 #include "cpu/exec_context.hh"
35 #include "cpu/full_cpu/fu_pool.hh"
36 #include "cpu/ooo_cpu/ea_list.hh"
37 #include "cpu/pc_event.hh"
38 #include "cpu/static_inst.hh"
39 #include "mem/mem_interface.hh"
40 #include "sim/eventq.hh"
41
42 // forward declarations
43 #ifdef FULL_SYSTEM
44 class Processor;
45 class AlphaITB;
46 class AlphaDTB;
47 class PhysicalMemory;
48
49 class RemoteGDB;
50 class GDBListener;
51
52 #else
53
54 class Process;
55
56 #endif // FULL_SYSTEM
57
58 class Checkpoint;
59 class MemInterface;
60
61 namespace Trace {
62 class InstRecord;
63 }
64
65 /**
66 * Declaration of Out-of-Order CPU class. Basically it is a SimpleCPU with
67 * simple out-of-order capabilities added to it. It is still a 1 CPI machine
68 * (?), but is capable of handling cache misses. Basically it models having
69 * a ROB/IQ by only allowing a certain amount of instructions to execute while
70 * the cache miss is outstanding.
71 */
72
73 template <class Impl>
74 class OoOCPU : public BaseCPU
75 {
76 private:
77 typedef typename Impl::DynInst DynInst;
78 typedef typename Impl::DynInstPtr DynInstPtr;
79 typedef typename Impl::ISA ISA;
80
81 public:
82 // main simulation loop (one cycle)
83 void tick();
84
85 private:
86 struct TickEvent : public Event
87 {
88 OoOCPU *cpu;
89 int width;
90
91 TickEvent(OoOCPU *c, int w);
92 void process();
93 const char *description();
94 };
95
96 TickEvent tickEvent;
97
98 /// Schedule tick event, regardless of its current state.
99 void scheduleTickEvent(int delay)
100 {
101 if (tickEvent.squashed())
102 tickEvent.reschedule(curTick + delay);
103 else if (!tickEvent.scheduled())
104 tickEvent.schedule(curTick + delay);
105 }
106
107 /// Unschedule tick event, regardless of its current state.
108 void unscheduleTickEvent()
109 {
110 if (tickEvent.scheduled())
111 tickEvent.squash();
112 }
113
114 private:
115 Trace::InstRecord *traceData;
116
117 template<typename T>
118 void trace_data(T data);
119
120 public:
121 //
122 enum Status {
123 Running,
124 Idle,
125 IcacheMissStall,
126 IcacheMissComplete,
127 DcacheMissStall,
128 SwitchedOut
129 };
130
131 private:
132 Status _status;
133
134 public:
135 void post_interrupt(int int_num, int index);
136
137 void zero_fill_64(Addr addr) {
138 static int warned = 0;
139 if (!warned) {
140 warn ("WH64 is not implemented");
141 warned = 1;
142 }
143 };
144
145 struct Params : public BaseCPU::Params
146 {
147 MemInterface *icache_interface;
148 MemInterface *dcache_interface;
149 int width;
150 #ifdef FULL_SYSTEM
151 AlphaITB *itb;
152 AlphaDTB *dtb;
153 FunctionalMemory *mem;
154 #else
155 Process *process;
156 #endif
157 int issueWidth;
158 };
159
160 OoOCPU(Params *params);
161
162 virtual ~OoOCPU();
163
164 private:
165 void copyFromXC();
166
167 public:
168 // execution context
169 ExecContext *xc;
170
171 void switchOut();
172 void takeOverFrom(BaseCPU *oldCPU);
173
174 #ifdef FULL_SYSTEM
175 Addr dbg_vtophys(Addr addr);
176
177 bool interval_stats;
178 #endif
179
180 // L1 instruction cache
181 MemInterface *icacheInterface;
182
183 // L1 data cache
184 MemInterface *dcacheInterface;
185
186 FuncUnitPool *fuPool;
187
188 // Refcounted pointer to the one memory request.
189 MemReqPtr cacheMemReq;
190
191 class ICacheCompletionEvent : public Event
192 {
193 private:
194 OoOCPU *cpu;
195
196 public:
197 ICacheCompletionEvent(OoOCPU *_cpu);
198
199 virtual void process();
200 virtual const char *description();
201 };
202
203 // Will need to create a cache completion event upon any memory miss.
204 ICacheCompletionEvent iCacheCompletionEvent;
205
206 class DCacheCompletionEvent : public Event
207 {
208 private:
209 OoOCPU *cpu;
210 DynInstPtr inst;
211
212 public:
213 DCacheCompletionEvent(OoOCPU *_cpu, DynInstPtr &_inst);
214
215 virtual void process();
216 virtual const char *description();
217 };
218
219 friend class DCacheCompletionEvent;
220
221 Status status() const { return _status; }
222
223 virtual void activateContext(int thread_num, int delay);
224 virtual void suspendContext(int thread_num);
225 virtual void deallocateContext(int thread_num);
226 virtual void haltContext(int thread_num);
227
228 // statistics
229 virtual void regStats();
230 virtual void resetStats();
231
232 // number of simulated instructions
233 Counter numInst;
234 Counter startNumInst;
235 Stats::Scalar<> numInsts;
236
237 virtual Counter totalInstructions() const
238 {
239 return numInst - startNumInst;
240 }
241
242 // number of simulated memory references
243 Stats::Scalar<> numMemRefs;
244
245 // number of simulated loads
246 Counter numLoad;
247 Counter startNumLoad;
248
249 // number of idle cycles
250 Stats::Average<> notIdleFraction;
251 Stats::Formula idleFraction;
252
253 // number of cycles stalled for I-cache misses
254 Stats::Scalar<> icacheStallCycles;
255 Counter lastIcacheStall;
256
257 // number of cycles stalled for D-cache misses
258 Stats::Scalar<> dcacheStallCycles;
259 Counter lastDcacheStall;
260
261 void processICacheCompletion();
262
263 virtual void serialize(std::ostream &os);
264 virtual void unserialize(Checkpoint *cp, const std::string &section);
265
266 #ifdef FULL_SYSTEM
267 bool validInstAddr(Addr addr) { return true; }
268 bool validDataAddr(Addr addr) { return true; }
269 int getInstAsid() { return xc->regs.instAsid(); }
270 int getDataAsid() { return xc->regs.dataAsid(); }
271
272 Fault translateInstReq(MemReqPtr &req)
273 {
274 return itb->translate(req);
275 }
276
277 Fault translateDataReadReq(MemReqPtr &req)
278 {
279 return dtb->translate(req, false);
280 }
281
282 Fault translateDataWriteReq(MemReqPtr &req)
283 {
284 return dtb->translate(req, true);
285 }
286
287 #else
288 bool validInstAddr(Addr addr)
289 { return xc->validInstAddr(addr); }
290
291 bool validDataAddr(Addr addr)
292 { return xc->validDataAddr(addr); }
293
294 int getInstAsid() { return xc->asid; }
295 int getDataAsid() { return xc->asid; }
296
297 Fault dummyTranslation(MemReqPtr &req)
298 {
299 #if 0
300 assert((req->vaddr >> 48 & 0xffff) == 0);
301 #endif
302
303 // put the asid in the upper 16 bits of the paddr
304 req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16);
305 req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16;
306 return No_Fault;
307 }
308 Fault translateInstReq(MemReqPtr &req)
309 {
310 return dummyTranslation(req);
311 }
312 Fault translateDataReadReq(MemReqPtr &req)
313 {
314 return dummyTranslation(req);
315 }
316 Fault translateDataWriteReq(MemReqPtr &req)
317 {
318 return dummyTranslation(req);
319 }
320
321 #endif
322
323 template <class T>
324 Fault read(Addr addr, T &data, unsigned flags, DynInstPtr inst);
325
326 template <class T>
327 Fault write(T data, Addr addr, unsigned flags,
328 uint64_t *res, DynInstPtr inst);
329
330 void prefetch(Addr addr, unsigned flags)
331 {
332 // need to do this...
333 }
334
335 void writeHint(Addr addr, int size, unsigned flags)
336 {
337 // need to do this...
338 }
339
340 Fault copySrcTranslate(Addr src);
341
342 Fault copy(Addr dest);
343
344 private:
345 bool executeInst(DynInstPtr &inst);
346
347 void renameInst(DynInstPtr &inst);
348
349 void addInst(DynInstPtr &inst);
350
351 void commitHeadInst();
352
353 bool grabInst();
354
355 Fault fetchCacheLine();
356
357 InstSeqNum getAndIncrementInstSeq();
358
359 bool ambigMemAddr;
360
361 private:
362 InstSeqNum globalSeqNum;
363
364 DynInstPtr renameTable[ISA::TotalNumRegs];
365 DynInstPtr commitTable[ISA::TotalNumRegs];
366
367 // Might need a table of the shadow registers as well.
368 #ifdef FULL_SYSTEM
369 DynInstPtr palShadowTable[ISA::NumIntRegs];
370 #endif
371
372 public:
373 // The register accessor methods provide the index of the
374 // instruction's operand (e.g., 0 or 1), not the architectural
375 // register index, to simplify the implementation of register
376 // renaming. We find the architectural register index by indexing
377 // into the instruction's own operand index table. Note that a
378 // raw pointer to the StaticInst is provided instead of a
379 // ref-counted StaticInstPtr to redice overhead. This is fine as
380 // long as these methods don't copy the pointer into any long-term
381 // storage (which is pretty hard to imagine they would have reason
382 // to do).
383
384 // In the OoO case these shouldn't read from the XC but rather from the
385 // rename table of DynInsts. Also these likely shouldn't be called very
386 // often, other than when adding things into the xc during say a syscall.
387
388 uint64_t readIntReg(StaticInst<TheISA> *si, int idx)
389 {
390 return xc->readIntReg(si->srcRegIdx(idx));
391 }
392
393 float readFloatRegSingle(StaticInst<TheISA> *si, int idx)
394 {
395 int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag;
396 return xc->readFloatRegSingle(reg_idx);
397 }
398
399 double readFloatRegDouble(StaticInst<TheISA> *si, int idx)
400 {
401 int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag;
402 return xc->readFloatRegDouble(reg_idx);
403 }
404
405 uint64_t readFloatRegInt(StaticInst<TheISA> *si, int idx)
406 {
407 int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag;
408 return xc->readFloatRegInt(reg_idx);
409 }
410
411 void setIntReg(StaticInst<TheISA> *si, int idx, uint64_t val)
412 {
413 xc->setIntReg(si->destRegIdx(idx), val);
414 }
415
416 void setFloatRegSingle(StaticInst<TheISA> *si, int idx, float val)
417 {
418 int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag;
419 xc->setFloatRegSingle(reg_idx, val);
420 }
421
422 void setFloatRegDouble(StaticInst<TheISA> *si, int idx, double val)
423 {
424 int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag;
425 xc->setFloatRegDouble(reg_idx, val);
426 }
427
428 void setFloatRegInt(StaticInst<TheISA> *si, int idx, uint64_t val)
429 {
430 int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag;
431 xc->setFloatRegInt(reg_idx, val);
432 }
433
434 uint64_t readPC() { return PC; }
435 void setNextPC(Addr val) { nextPC = val; }
436
437 private:
438 Addr PC;
439 Addr nextPC;
440
441 unsigned issueWidth;
442
443 bool fetchRedirExcp;
444 bool fetchRedirBranch;
445
446 /** Mask to get a cache block's address. */
447 Addr cacheBlkMask;
448
449 unsigned cacheBlkSize;
450
451 Addr cacheBlkPC;
452
453 /** The cache line being fetched. */
454 uint8_t *cacheData;
455
456 protected:
457 bool cacheBlkValid;
458
459 private:
460
461 // Align an address (typically a PC) to the start of an I-cache block.
462 // We fold in the PISA 64- to 32-bit conversion here as well.
463 Addr icacheBlockAlignPC(Addr addr)
464 {
465 addr = ISA::realPCToFetchPC(addr);
466 return (addr & ~(cacheBlkMask));
467 }
468
469 unsigned instSize;
470
471 // ROB tracking stuff.
472 DynInstPtr robHeadPtr;
473 DynInstPtr robTailPtr;
474 unsigned robInsts;
475
476 // List of outstanding EA instructions.
477 protected:
478 EAList eaList;
479
480 public:
481 void branchToTarget(Addr val)
482 {
483 if (!fetchRedirExcp) {
484 fetchRedirBranch = true;
485 PC = val;
486 }
487 }
488
489 // ISA stuff:
490 uint64_t readUniq() { return xc->readUniq(); }
491 void setUniq(uint64_t val) { xc->setUniq(val); }
492
493 uint64_t readFpcr() { return xc->readFpcr(); }
494 void setFpcr(uint64_t val) { xc->setFpcr(val); }
495
496 #ifdef FULL_SYSTEM
497 uint64_t readIpr(int idx, Fault &fault) { return xc->readIpr(idx, fault); }
498 Fault setIpr(int idx, uint64_t val) { return xc->setIpr(idx, val); }
499 Fault hwrei() { return xc->hwrei(); }
500 int readIntrFlag() { return xc->readIntrFlag(); }
501 void setIntrFlag(int val) { xc->setIntrFlag(val); }
502 bool inPalMode() { return xc->inPalMode(); }
503 void ev5_trap(Fault fault) { xc->ev5_trap(fault); }
504 bool simPalCheck(int palFunc) { return xc->simPalCheck(palFunc); }
505 #else
506 void syscall() { xc->syscall(); }
507 #endif
508
509 ExecContext *xcBase() { return xc; }
510 };
511
512
513 // precise architected memory state accessor macros
514 template <class Impl>
515 template <class T>
516 Fault
517 OoOCPU<Impl>::read(Addr addr, T &data, unsigned flags, DynInstPtr inst)
518 {
519 MemReqPtr readReq = new MemReq();
520 readReq->xc = xc;
521 readReq->asid = 0;
522 readReq->data = new uint8_t[64];
523
524 readReq->reset(addr, sizeof(T), flags);
525
526 // translate to physical address - This might be an ISA impl call
527 Fault fault = translateDataReadReq(readReq);
528
529 // do functional access
530 if (fault == No_Fault)
531 fault = xc->mem->read(readReq, data);
532 #if 0
533 if (traceData) {
534 traceData->setAddr(addr);
535 if (fault == No_Fault)
536 traceData->setData(data);
537 }
538 #endif
539
540 // if we have a cache, do cache access too
541 if (fault == No_Fault && dcacheInterface) {
542 readReq->cmd = Read;
543 readReq->completionEvent = NULL;
544 readReq->time = curTick;
545 /*MemAccessResult result = */dcacheInterface->access(readReq);
546
547 if (dcacheInterface->doEvents()) {
548 readReq->completionEvent = new DCacheCompletionEvent(this, inst);
549 lastDcacheStall = curTick;
550 unscheduleTickEvent();
551 _status = DcacheMissStall;
552 }
553 }
554
555 if (!dcacheInterface && (readReq->flags & UNCACHEABLE))
556 recordEvent("Uncached Read");
557
558 return fault;
559 }
560
561 template <class Impl>
562 template <class T>
563 Fault
564 OoOCPU<Impl>::write(T data, Addr addr, unsigned flags,
565 uint64_t *res, DynInstPtr inst)
566 {
567 MemReqPtr writeReq = new MemReq();
568 writeReq->xc = xc;
569 writeReq->asid = 0;
570 writeReq->data = new uint8_t[64];
571
572 #if 0
573 if (traceData) {
574 traceData->setAddr(addr);
575 traceData->setData(data);
576 }
577 #endif
578
579 writeReq->reset(addr, sizeof(T), flags);
580
581 // translate to physical address
582 Fault fault = xc->translateDataWriteReq(writeReq);
583
584 // do functional access
585 if (fault == No_Fault)
586 fault = xc->write(writeReq, data);
587
588 if (fault == No_Fault && dcacheInterface) {
589 writeReq->cmd = Write;
590 memcpy(writeReq->data,(uint8_t *)&data,writeReq->size);
591 writeReq->completionEvent = NULL;
592 writeReq->time = curTick;
593 /*MemAccessResult result = */dcacheInterface->access(writeReq);
594
595 if (dcacheInterface->doEvents()) {
596 writeReq->completionEvent = new DCacheCompletionEvent(this, inst);
597 lastDcacheStall = curTick;
598 unscheduleTickEvent();
599 _status = DcacheMissStall;
600 }
601 }
602
603 if (res && (fault == No_Fault))
604 *res = writeReq->result;
605
606 if (!dcacheInterface && (writeReq->flags & UNCACHEABLE))
607 recordEvent("Uncached Write");
608
609 return fault;
610 }
611
612
613 #endif // __CPU_OOO_CPU_OOO_CPU_HH__