2 * Copyright (c) 2006 The Regents of The University of Michigan
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 #ifndef __CPU_OZONE_BACK_END_HH__
32 #define __CPU_OZONE_BACK_END_HH__
38 #include "cpu/ozone/rename_table.hh"
39 #include "cpu/ozone/thread_state.hh"
40 #include "cpu/inst_seq.hh"
41 #include "cpu/timebuf.hh"
42 #include "mem/request.hh"
43 #include "sim/eventq.hh"
44 #include "sim/faults.hh"
49 class OzoneThreadState;
55 typedef OzoneThreadState<Impl> Thread;
57 typedef typename Impl::Params Params;
58 typedef typename Impl::DynInst DynInst;
59 typedef typename Impl::DynInstPtr DynInstPtr;
60 typedef typename Impl::FullCPU FullCPU;
61 typedef typename Impl::FrontEnd FrontEnd;
62 typedef typename Impl::FullCPU::CommStruct CommStruct;
68 typedef SizeStruct DispatchToIssue;
69 typedef SizeStruct IssueToExec;
70 typedef SizeStruct ExecToCommit;
71 typedef SizeStruct Writeback;
73 TimeBuffer<DispatchToIssue> d2i;
74 typename TimeBuffer<DispatchToIssue>::wire instsToDispatch;
75 TimeBuffer<IssueToExec> i2e;
76 typename TimeBuffer<IssueToExec>::wire instsToExecute;
77 TimeBuffer<ExecToCommit> e2c;
78 TimeBuffer<Writeback> numInstsToWB;
80 TimeBuffer<CommStruct> *comm;
81 typename TimeBuffer<CommStruct>::wire toIEW;
82 typename TimeBuffer<CommStruct>::wire fromCommit;
93 bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
95 return lhs->seqNum > rhs->seqNum;
99 InstQueue(Params *params);
101 std::string name() const;
105 void setIssueExecQueue(TimeBuffer<IssueToExec> *i2e_queue);
107 void setBE(BackEnd *_be) { be = _be; }
109 void insert(DynInstPtr &inst);
111 void scheduleReadyInsts();
113 void scheduleNonSpec(const InstSeqNum &sn);
115 DynInstPtr getReadyInst();
117 void commit(const InstSeqNum &sn) {}
119 void squash(const InstSeqNum &sn);
121 int wakeDependents(DynInstPtr &inst);
123 /** Tells memory dependence unit that a memory instruction needs to be
124 * rescheduled. It will re-execute once replayMemInst() is called.
126 void rescheduleMemInst(DynInstPtr &inst);
128 /** Re-executes all rescheduled memory instructions. */
129 void replayMemInst(DynInstPtr &inst);
131 /** Completes memory instruction. */
132 void completeMemInst(DynInstPtr &inst);
134 void violation(DynInstPtr &inst, DynInstPtr &violation) { }
136 bool isFull() { return numInsts >= size; }
141 bool find(queue q, typename std::list<DynInstPtr>::iterator it);
143 TimeBuffer<IssueToExec> *i2e;
144 typename TimeBuffer<IssueToExec>::wire numIssued;
145 typedef typename std::list<DynInstPtr> InstList;
146 typedef typename std::list<DynInstPtr>::iterator InstListIt;
147 typedef typename std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare> ReadyInstQueue;
148 // Not sure I need the IQ list; it just needs to be a count.
150 InstList toBeScheduled;
154 ReadyInstQueue readyQueue;
160 Stats::VectorDistribution occ_dist;
162 Stats::Vector inst_count;
163 Stats::Vector peak_inst_count;
164 Stats::Scalar empty_count;
165 Stats::Scalar current_count;
166 Stats::Scalar fullCount;
168 Stats::Formula occ_rate;
169 Stats::Formula avg_residency;
170 Stats::Formula empty_rate;
171 Stats::Formula full_rate;
174 /** LdWriteback event for a load completion. */
175 class LdWritebackEvent : public Event {
177 /** Instruction that is writing back data to the register file. */
179 /** Pointer to IEW stage. */
183 /** Constructs a load writeback event. */
184 LdWritebackEvent(DynInstPtr &_inst, BackEnd *be);
186 /** Processes writeback event. */
187 virtual void process();
188 /** Returns the description of the writeback event. */
189 virtual const char *description() const;
192 BackEnd(Params *params);
194 std::string name() const;
198 void setCPU(FullCPU *cpu_ptr)
201 void setFrontEnd(FrontEnd *front_end_ptr)
202 { frontEnd = front_end_ptr; }
204 void setTC(ThreadContext *tc_ptr)
207 void setThreadState(Thread *thread_ptr)
208 { thread = thread_ptr; }
210 void setCommBuffer(TimeBuffer<CommStruct> *_comm);
218 Fault read(RequestPtr req, T &data, int load_idx);
221 Fault write(RequestPtr req, T &data, int store_idx);
223 Addr readCommitPC() { return commitPC; }
227 bool robEmpty() { return instList.empty(); }
229 bool isFull() { return numInsts >= numROBEntries; }
230 bool isBlocked() { return status == Blocked || dispatchStatus == Blocked; }
232 /** Tells memory dependence unit that a memory instruction needs to be
233 * rescheduled. It will re-execute once replayMemInst() is called.
235 void rescheduleMemInst(DynInstPtr &inst)
236 { IQ.rescheduleMemInst(inst); }
238 /** Re-executes all rescheduled memory instructions. */
239 void replayMemInst(DynInstPtr &inst)
240 { IQ.replayMemInst(inst); }
242 /** Completes memory instruction. */
243 void completeMemInst(DynInstPtr &inst)
244 { IQ.completeMemInst(inst); }
246 void fetchFault(Fault &fault);
249 void updateStructures();
250 void dispatchInsts();
251 void dispatchStall();
252 void checkDispatchStatus();
253 void scheduleReadyInsts();
256 void addToIQ(DynInstPtr &inst);
257 void addToLSQ(DynInstPtr &inst);
258 void instToCommit(DynInstPtr &inst);
259 void writebackInsts();
260 bool commitInst(int inst_num);
261 void squash(const InstSeqNum &sn);
262 void squashDueToBranch(DynInstPtr &inst);
263 void squashDueToMemBlocked(DynInstPtr &inst);
264 void updateExeInstStats(DynInstPtr &inst);
265 void updateComInstStats(DynInstPtr &inst);
286 Status dispatchStatus;
291 // typedef typename Impl::InstQueue InstQueue;
295 typedef typename Impl::LdstQueue LdstQueue;
299 RenameTable<Impl> commitRenameTable;
301 RenameTable<Impl> renameTable;
303 class DCacheCompletionEvent : public Event
309 DCacheCompletionEvent(BackEnd *_be);
311 virtual void process();
312 virtual const char *description() const;
315 friend class DCacheCompletionEvent;
317 DCacheCompletionEvent cacheCompletionEvent;
319 MemInterface *dcacheInterface;
323 // General back end width. Used if the more specific isn't given.
328 int numDispatchEntries;
339 /** Index into queue of instructions being written back. */
342 /** Cycle number within the queue of instructions being written
343 * back. Used in case there are too many instructions writing
344 * back at the current cycle and writesbacks need to be scheduled
345 * for the future. See comments in instToCommit().
353 InstSeqNum squashSeqNum;
356 Fault faultFromFetch;
359 typedef typename std::list<DynInstPtr>::iterator InstListIt;
361 std::list<DynInstPtr> instList;
362 std::list<DynInstPtr> dispatch;
363 std::list<DynInstPtr> writeback;
371 bool fetchRedirect[Impl::MaxThreads];
373 // number of cycles stalled for D-cache misses
374 /* Stats::Scalar dcacheStallCycles;
375 Counter lastDcacheStall;
377 Stats::Vector rob_cap_events;
378 Stats::Vector rob_cap_inst_count;
379 Stats::Vector iq_cap_events;
380 Stats::Vector iq_cap_inst_count;
381 // total number of instructions executed
382 Stats::Vector exe_inst;
383 Stats::Vector exe_swp;
384 Stats::Vector exe_nop;
385 Stats::Vector exe_refs;
386 Stats::Vector exe_loads;
387 Stats::Vector exe_branches;
389 Stats::Vector issued_ops;
391 // total number of loads forwaded from LSQ stores
392 Stats::Vector lsq_forw_loads;
394 // total number of loads ignored due to invalid addresses
395 Stats::Vector inv_addr_loads;
397 // total number of software prefetches ignored due to invalid addresses
398 Stats::Vector inv_addr_swpfs;
399 // ready loads blocked due to memory disambiguation
400 Stats::Vector lsq_blocked_loads;
402 Stats::Scalar lsqInversion;
404 Stats::Vector n_issued_dist;
405 Stats::VectorDistribution issue_delay_dist;
407 Stats::VectorDistribution queue_res_dist;
409 Stats::Vector stat_fu_busy;
410 Stats::Vector2d stat_fuBusy;
411 Stats::Vector dist_unissued;
412 Stats::Vector2d stat_issued_inst_type;
414 Stats::Formula misspec_cnt;
415 Stats::Formula misspec_ipc;
416 Stats::Formula issue_rate;
417 Stats::Formula issue_stores;
418 Stats::Formula issue_op_rate;
419 Stats::Formula fu_busy_rate;
420 Stats::Formula commit_stores;
421 Stats::Formula commit_ipc;
422 Stats::Formula commit_ipb;
423 Stats::Formula lsq_inv_rate;
425 Stats::Vector writeback_count;
426 Stats::Vector producer_inst;
427 Stats::Vector consumer_inst;
428 Stats::Vector wb_penalized;
430 Stats::Formula wb_rate;
431 Stats::Formula wb_fanout;
432 Stats::Formula wb_penalized_rate;
434 // total number of instructions committed
435 Stats::Vector stat_com_inst;
436 Stats::Vector stat_com_swp;
437 Stats::Vector stat_com_refs;
438 Stats::Vector stat_com_loads;
439 Stats::Vector stat_com_membars;
440 Stats::Vector stat_com_branches;
442 Stats::Distribution n_committed_dist;
444 Stats::Scalar commit_eligible_samples;
445 Stats::Vector commit_eligible;
447 Stats::Scalar ROB_fcount;
448 Stats::Formula ROB_full_rate;
450 Stats::Vector ROB_count; // cumulative ROB occupancy
451 Stats::Formula ROB_occ_rate;
452 Stats::VectorDistribution ROB_occ_dist;
457 template <class Impl>
460 BackEnd<Impl>::read(RequestPtr req, T &data, int load_idx)
462 /* memReq->reset(addr, sizeof(T), flags);
464 // translate to physical address
465 Fault fault = cpu->translateDataReadReq(memReq);
467 // if we have a cache, do cache access too
468 if (fault == NoFault && dcacheInterface) {
470 memReq->completionEvent = NULL;
471 memReq->time = curTick();
472 memReq->flags &= ~INST_READ;
473 MemAccessResult result = dcacheInterface->access(memReq);
475 // Ugly hack to get an event scheduled *only* if the access is
476 // a miss. We really should add first-class support for this
478 if (result != MA_HIT && dcacheInterface->doEvents()) {
479 // Fix this hack for keeping funcExeInst correct with loads that
480 // are executed twice.
483 memReq->completionEvent = &cacheCompletionEvent;
484 lastDcacheStall = curTick();
485 // unscheduleTickEvent();
486 // status = DcacheMissStall;
487 DPRINTF(OzoneCPU, "Dcache miss stall!\n");
489 // do functional access
490 fault = thread->mem->read(memReq, data);
495 return LSQ.read(req, data, load_idx);
498 template <class Impl>
501 BackEnd<Impl>::write(RequestPtr req, T &data, int store_idx)
504 memReq->reset(addr, sizeof(T), flags);
506 // translate to physical address
507 Fault fault = cpu->translateDataWriteReq(memReq);
509 if (fault == NoFault && dcacheInterface) {
511 memcpy(memReq->data,(uint8_t *)&data,memReq->size);
512 memReq->completionEvent = NULL;
513 memReq->time = curTick();
514 memReq->flags &= ~INST_READ;
515 MemAccessResult result = dcacheInterface->access(memReq);
517 // Ugly hack to get an event scheduled *only* if the access is
518 // a miss. We really should add first-class support for this
520 if (result != MA_HIT && dcacheInterface->doEvents()) {
521 memReq->completionEvent = &cacheCompletionEvent;
522 lastDcacheStall = curTick();
523 // unscheduleTickEvent();
524 // status = DcacheMissStall;
525 DPRINTF(OzoneCPU, "Dcache miss stall!\n");
529 if (res && (fault == NoFault))
530 *res = memReq->result;
532 return LSQ.write(req, data, store_idx);
535 #endif // __CPU_OZONE_BACK_END_HH__