8debd277dfa43c49831f1dd932235c0fa3123b60
[gem5.git] / src / cpu / ozone / back_end.hh
1 /*
2 * Copyright (c) 2006 The Regents of The University of Michigan
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Kevin Lim
29 */
30
31 #ifndef __CPU_OZONE_BACK_END_HH__
32 #define __CPU_OZONE_BACK_END_HH__
33
34 #include <list>
35 #include <queue>
36 #include <string>
37
38 #include "arch/faults.hh"
39 #include "base/timebuf.hh"
40 #include "cpu/inst_seq.hh"
41 #include "cpu/ozone/rename_table.hh"
42 #include "cpu/ozone/thread_state.hh"
43 #include "mem/request.hh"
44 #include "sim/eventq.hh"
45
46 class ThreadContext;
47
48 template <class Impl>
49 class OzoneThreadState;
50
51 template <class Impl>
52 class BackEnd
53 {
54 public:
55 typedef OzoneThreadState<Impl> Thread;
56
57 typedef typename Impl::Params Params;
58 typedef typename Impl::DynInst DynInst;
59 typedef typename Impl::DynInstPtr DynInstPtr;
60 typedef typename Impl::FullCPU FullCPU;
61 typedef typename Impl::FrontEnd FrontEnd;
62 typedef typename Impl::FullCPU::CommStruct CommStruct;
63
64 struct SizeStruct {
65 int size;
66 };
67
68 typedef SizeStruct DispatchToIssue;
69 typedef SizeStruct IssueToExec;
70 typedef SizeStruct ExecToCommit;
71 typedef SizeStruct Writeback;
72
73 TimeBuffer<DispatchToIssue> d2i;
74 typename TimeBuffer<DispatchToIssue>::wire instsToDispatch;
75 TimeBuffer<IssueToExec> i2e;
76 typename TimeBuffer<IssueToExec>::wire instsToExecute;
77 TimeBuffer<ExecToCommit> e2c;
78 TimeBuffer<Writeback> numInstsToWB;
79
80 TimeBuffer<CommStruct> *comm;
81 typename TimeBuffer<CommStruct>::wire toIEW;
82 typename TimeBuffer<CommStruct>::wire fromCommit;
83
84 class InstQueue {
85 enum queue {
86 NonSpec,
87 IQ,
88 ToBeScheduled,
89 ReadyList,
90 ReplayList
91 };
92 struct pqCompare {
93 bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
94 {
95 return lhs->seqNum > rhs->seqNum;
96 }
97 };
98 public:
99 InstQueue(Params *params);
100
101 std::string name() const;
102
103 void regStats();
104
105 void setIssueExecQueue(TimeBuffer<IssueToExec> *i2e_queue);
106
107 void setBE(BackEnd *_be) { be = _be; }
108
109 void insert(DynInstPtr &inst);
110
111 void scheduleReadyInsts();
112
113 void scheduleNonSpec(const InstSeqNum &sn);
114
115 DynInstPtr getReadyInst();
116
117 void commit(const InstSeqNum &sn) {}
118
119 void squash(const InstSeqNum &sn);
120
121 int wakeDependents(DynInstPtr &inst);
122
123 /** Tells memory dependence unit that a memory instruction needs to be
124 * rescheduled. It will re-execute once replayMemInst() is called.
125 */
126 void rescheduleMemInst(DynInstPtr &inst);
127
128 /** Re-executes all rescheduled memory instructions. */
129 void replayMemInst(DynInstPtr &inst);
130
131 /** Completes memory instruction. */
132 void completeMemInst(DynInstPtr &inst);
133
134 void violation(DynInstPtr &inst, DynInstPtr &violation) { }
135
136 bool isFull() { return numInsts >= size; }
137
138 void dumpInsts();
139
140 private:
141 bool find(queue q, typename std::list<DynInstPtr>::iterator it);
142 BackEnd *be;
143 TimeBuffer<IssueToExec> *i2e;
144 typename TimeBuffer<IssueToExec>::wire numIssued;
145 typedef typename std::list<DynInstPtr> InstList;
146 typedef typename std::list<DynInstPtr>::iterator InstListIt;
147 typedef typename std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare> ReadyInstQueue;
148 // Not sure I need the IQ list; it just needs to be a count.
149 InstList iq;
150 InstList toBeScheduled;
151 InstList readyList;
152 InstList nonSpec;
153 InstList replayList;
154 ReadyInstQueue readyQueue;
155 public:
156 int size;
157 int numInsts;
158 int width;
159
160 Stats::VectorDistribution<> occ_dist;
161
162 Stats::Vector<> inst_count;
163 Stats::Vector<> peak_inst_count;
164 Stats::Scalar<> empty_count;
165 Stats::Scalar<> current_count;
166 Stats::Scalar<> fullCount;
167
168 Stats::Formula occ_rate;
169 Stats::Formula avg_residency;
170 Stats::Formula empty_rate;
171 Stats::Formula full_rate;
172 };
173
174 /** LdWriteback event for a load completion. */
175 class LdWritebackEvent : public Event {
176 private:
177 /** Instruction that is writing back data to the register file. */
178 DynInstPtr inst;
179 /** Pointer to IEW stage. */
180 BackEnd *be;
181
182 public:
183 /** Constructs a load writeback event. */
184 LdWritebackEvent(DynInstPtr &_inst, BackEnd *be);
185
186 /** Processes writeback event. */
187 virtual void process();
188 /** Returns the description of the writeback event. */
189 virtual const char *description();
190 };
191
192 BackEnd(Params *params);
193
194 std::string name() const;
195
196 void regStats();
197
198 void setCPU(FullCPU *cpu_ptr)
199 { cpu = cpu_ptr; }
200
201 void setFrontEnd(FrontEnd *front_end_ptr)
202 { frontEnd = front_end_ptr; }
203
204 void setTC(ThreadContext *tc_ptr)
205 { tc = tc_ptr; }
206
207 void setThreadState(Thread *thread_ptr)
208 { thread = thread_ptr; }
209
210 void setCommBuffer(TimeBuffer<CommStruct> *_comm);
211
212 void tick();
213 void squash();
214 void squashFromTC();
215 bool tcSquash;
216
217 template <class T>
218 Fault read(RequestPtr req, T &data, int load_idx);
219
220 template <class T>
221 Fault write(RequestPtr req, T &data, int store_idx);
222
223 Addr readCommitPC() { return commitPC; }
224
225 Addr commitPC;
226
227 bool robEmpty() { return instList.empty(); }
228
229 bool isFull() { return numInsts >= numROBEntries; }
230 bool isBlocked() { return status == Blocked || dispatchStatus == Blocked; }
231
232 /** Tells memory dependence unit that a memory instruction needs to be
233 * rescheduled. It will re-execute once replayMemInst() is called.
234 */
235 void rescheduleMemInst(DynInstPtr &inst)
236 { IQ.rescheduleMemInst(inst); }
237
238 /** Re-executes all rescheduled memory instructions. */
239 void replayMemInst(DynInstPtr &inst)
240 { IQ.replayMemInst(inst); }
241
242 /** Completes memory instruction. */
243 void completeMemInst(DynInstPtr &inst)
244 { IQ.completeMemInst(inst); }
245
246 void fetchFault(Fault &fault);
247
248 private:
249 void updateStructures();
250 void dispatchInsts();
251 void dispatchStall();
252 void checkDispatchStatus();
253 void scheduleReadyInsts();
254 void executeInsts();
255 void commitInsts();
256 void addToIQ(DynInstPtr &inst);
257 void addToLSQ(DynInstPtr &inst);
258 void instToCommit(DynInstPtr &inst);
259 void writebackInsts();
260 bool commitInst(int inst_num);
261 void squash(const InstSeqNum &sn);
262 void squashDueToBranch(DynInstPtr &inst);
263 void squashDueToMemBlocked(DynInstPtr &inst);
264 void updateExeInstStats(DynInstPtr &inst);
265 void updateComInstStats(DynInstPtr &inst);
266
267 public:
268 FullCPU *cpu;
269
270 FrontEnd *frontEnd;
271
272 ThreadContext *tc;
273
274 Thread *thread;
275
276 enum Status {
277 Running,
278 Idle,
279 DcacheMissStall,
280 DcacheMissComplete,
281 Blocked
282 };
283
284 Status status;
285
286 Status dispatchStatus;
287
288 Counter funcExeInst;
289
290 private:
291 // typedef typename Impl::InstQueue InstQueue;
292
293 InstQueue IQ;
294
295 typedef typename Impl::LdstQueue LdstQueue;
296
297 LdstQueue LSQ;
298 public:
299 RenameTable<Impl> commitRenameTable;
300
301 RenameTable<Impl> renameTable;
302 private:
303 class DCacheCompletionEvent : public Event
304 {
305 private:
306 BackEnd *be;
307
308 public:
309 DCacheCompletionEvent(BackEnd *_be);
310
311 virtual void process();
312 virtual const char *description();
313 };
314
315 friend class DCacheCompletionEvent;
316
317 DCacheCompletionEvent cacheCompletionEvent;
318
319 MemInterface *dcacheInterface;
320
321 Request *memReq;
322
323 // General back end width. Used if the more specific isn't given.
324 int width;
325
326 // Dispatch width.
327 int dispatchWidth;
328 int numDispatchEntries;
329 int dispatchSize;
330
331 int issueWidth;
332
333 // Writeback width
334 int wbWidth;
335
336 // Commit width
337 int commitWidth;
338
339 /** Index into queue of instructions being written back. */
340 unsigned wbNumInst;
341
342 /** Cycle number within the queue of instructions being written
343 * back. Used in case there are too many instructions writing
344 * back at the current cycle and writesbacks need to be scheduled
345 * for the future. See comments in instToCommit().
346 */
347 unsigned wbCycle;
348
349 int numROBEntries;
350 int numInsts;
351
352 bool squashPending;
353 InstSeqNum squashSeqNum;
354 Addr squashNextPC;
355
356 Fault faultFromFetch;
357
358 private:
359 typedef typename std::list<DynInstPtr>::iterator InstListIt;
360
361 std::list<DynInstPtr> instList;
362 std::list<DynInstPtr> dispatch;
363 std::list<DynInstPtr> writeback;
364
365 int latency;
366
367 int squashLatency;
368
369 bool exactFullStall;
370
371 bool fetchRedirect[Impl::MaxThreads];
372
373 // number of cycles stalled for D-cache misses
374 /* Stats::Scalar<> dcacheStallCycles;
375 Counter lastDcacheStall;
376 */
377 Stats::Vector<> rob_cap_events;
378 Stats::Vector<> rob_cap_inst_count;
379 Stats::Vector<> iq_cap_events;
380 Stats::Vector<> iq_cap_inst_count;
381 // total number of instructions executed
382 Stats::Vector<> exe_inst;
383 Stats::Vector<> exe_swp;
384 Stats::Vector<> exe_nop;
385 Stats::Vector<> exe_refs;
386 Stats::Vector<> exe_loads;
387 Stats::Vector<> exe_branches;
388
389 Stats::Vector<> issued_ops;
390
391 // total number of loads forwaded from LSQ stores
392 Stats::Vector<> lsq_forw_loads;
393
394 // total number of loads ignored due to invalid addresses
395 Stats::Vector<> inv_addr_loads;
396
397 // total number of software prefetches ignored due to invalid addresses
398 Stats::Vector<> inv_addr_swpfs;
399 // ready loads blocked due to memory disambiguation
400 Stats::Vector<> lsq_blocked_loads;
401
402 Stats::Scalar<> lsqInversion;
403
404 Stats::Vector<> n_issued_dist;
405 Stats::VectorDistribution<> issue_delay_dist;
406
407 Stats::VectorDistribution<> queue_res_dist;
408 /*
409 Stats::Vector<> stat_fu_busy;
410 Stats::Vector2d<> stat_fuBusy;
411 Stats::Vector<> dist_unissued;
412 Stats::Vector2d<> stat_issued_inst_type;
413
414 Stats::Formula misspec_cnt;
415 Stats::Formula misspec_ipc;
416 Stats::Formula issue_rate;
417 Stats::Formula issue_stores;
418 Stats::Formula issue_op_rate;
419 Stats::Formula fu_busy_rate;
420 Stats::Formula commit_stores;
421 Stats::Formula commit_ipc;
422 Stats::Formula commit_ipb;
423 Stats::Formula lsq_inv_rate;
424 */
425 Stats::Vector<> writeback_count;
426 Stats::Vector<> producer_inst;
427 Stats::Vector<> consumer_inst;
428 Stats::Vector<> wb_penalized;
429
430 Stats::Formula wb_rate;
431 Stats::Formula wb_fanout;
432 Stats::Formula wb_penalized_rate;
433
434 // total number of instructions committed
435 Stats::Vector<> stat_com_inst;
436 Stats::Vector<> stat_com_swp;
437 Stats::Vector<> stat_com_refs;
438 Stats::Vector<> stat_com_loads;
439 Stats::Vector<> stat_com_membars;
440 Stats::Vector<> stat_com_branches;
441
442 Stats::Distribution<> n_committed_dist;
443
444 Stats::Scalar<> commit_eligible_samples;
445 Stats::Vector<> commit_eligible;
446
447 Stats::Scalar<> ROB_fcount;
448 Stats::Formula ROB_full_rate;
449
450 Stats::Vector<> ROB_count; // cumulative ROB occupancy
451 Stats::Formula ROB_occ_rate;
452 Stats::VectorDistribution<> ROB_occ_dist;
453 public:
454 void dumpInsts();
455 };
456
457 template <class Impl>
458 template <class T>
459 Fault
460 BackEnd<Impl>::read(RequestPtr req, T &data, int load_idx)
461 {
462 /* memReq->reset(addr, sizeof(T), flags);
463
464 // translate to physical address
465 Fault fault = cpu->translateDataReadReq(memReq);
466
467 // if we have a cache, do cache access too
468 if (fault == NoFault && dcacheInterface) {
469 memReq->cmd = Read;
470 memReq->completionEvent = NULL;
471 memReq->time = curTick;
472 memReq->flags &= ~INST_READ;
473 MemAccessResult result = dcacheInterface->access(memReq);
474
475 // Ugly hack to get an event scheduled *only* if the access is
476 // a miss. We really should add first-class support for this
477 // at some point.
478 if (result != MA_HIT && dcacheInterface->doEvents()) {
479 // Fix this hack for keeping funcExeInst correct with loads that
480 // are executed twice.
481 --funcExeInst;
482
483 memReq->completionEvent = &cacheCompletionEvent;
484 lastDcacheStall = curTick;
485 // unscheduleTickEvent();
486 // status = DcacheMissStall;
487 DPRINTF(OzoneCPU, "Dcache miss stall!\n");
488 } else {
489 // do functional access
490 fault = thread->mem->read(memReq, data);
491
492 }
493 }
494 */
495 /*
496 if (!dcacheInterface && (memReq->isUncacheable()))
497 recordEvent("Uncached Read");
498 */
499 return LSQ.read(req, data, load_idx);
500 }
501
502 template <class Impl>
503 template <class T>
504 Fault
505 BackEnd<Impl>::write(RequestPtr req, T &data, int store_idx)
506 {
507 /*
508 memReq->reset(addr, sizeof(T), flags);
509
510 // translate to physical address
511 Fault fault = cpu->translateDataWriteReq(memReq);
512
513 if (fault == NoFault && dcacheInterface) {
514 memReq->cmd = Write;
515 memcpy(memReq->data,(uint8_t *)&data,memReq->size);
516 memReq->completionEvent = NULL;
517 memReq->time = curTick;
518 memReq->flags &= ~INST_READ;
519 MemAccessResult result = dcacheInterface->access(memReq);
520
521 // Ugly hack to get an event scheduled *only* if the access is
522 // a miss. We really should add first-class support for this
523 // at some point.
524 if (result != MA_HIT && dcacheInterface->doEvents()) {
525 memReq->completionEvent = &cacheCompletionEvent;
526 lastDcacheStall = curTick;
527 // unscheduleTickEvent();
528 // status = DcacheMissStall;
529 DPRINTF(OzoneCPU, "Dcache miss stall!\n");
530 }
531 }
532
533 if (res && (fault == NoFault))
534 *res = memReq->result;
535 */
536 /*
537 if (!dcacheInterface && (memReq->isUncacheable()))
538 recordEvent("Uncached Write");
539 */
540 return LSQ.write(req, data, store_idx);
541 }
542
543 #endif // __CPU_OZONE_BACK_END_HH__