Merge ktlim@zamp:./local/clean/o3-merge/m5
[gem5.git] / src / cpu / ozone / lw_back_end.hh
1 /*
2 * Copyright (c) 2006 The Regents of The University of Michigan
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Kevin Lim
29 */
30
31 #ifndef __CPU_OZONE_LW_BACK_END_HH__
32 #define __CPU_OZONE_LW_BACK_END_HH__
33
34 #include <list>
35 #include <queue>
36 #include <set>
37 #include <string>
38
39 #include "arch/faults.hh"
40 #include "base/timebuf.hh"
41 #include "cpu/inst_seq.hh"
42 #include "cpu/ozone/rename_table.hh"
43 #include "cpu/ozone/thread_state.hh"
44 #include "mem/request.hh"
45 #include "sim/eventq.hh"
46
47 template <class>
48 class Checker;
49 class ThreadContext;
50
51 template <class Impl>
52 class OzoneThreadState;
53
54 class Port;
55
56 template <class Impl>
57 class LWBackEnd
58 {
59 public:
60 typedef OzoneThreadState<Impl> Thread;
61
62 typedef typename Impl::Params Params;
63 typedef typename Impl::DynInst DynInst;
64 typedef typename Impl::DynInstPtr DynInstPtr;
65 typedef typename Impl::OzoneCPU OzoneCPU;
66 typedef typename Impl::FrontEnd FrontEnd;
67 typedef typename Impl::OzoneCPU::CommStruct CommStruct;
68
69 struct SizeStruct {
70 int size;
71 };
72
73 typedef SizeStruct DispatchToIssue;
74 typedef SizeStruct IssueToExec;
75 typedef SizeStruct ExecToCommit;
76 typedef SizeStruct Writeback;
77
78 TimeBuffer<DispatchToIssue> d2i;
79 typename TimeBuffer<DispatchToIssue>::wire instsToDispatch;
80 TimeBuffer<IssueToExec> i2e;
81 typename TimeBuffer<IssueToExec>::wire instsToExecute;
82 TimeBuffer<ExecToCommit> e2c;
83 TimeBuffer<int> numInstsToWB;
84
85 TimeBuffer<CommStruct> *comm;
86 typename TimeBuffer<CommStruct>::wire toIEW;
87 typename TimeBuffer<CommStruct>::wire fromCommit;
88
89 class TrapEvent : public Event {
90 private:
91 LWBackEnd<Impl> *be;
92
93 public:
94 TrapEvent(LWBackEnd<Impl> *_be);
95
96 void process();
97 const char *description();
98 };
99
100 LWBackEnd(Params *params);
101
102 std::string name() const;
103
104 void regStats();
105
106 void setCPU(OzoneCPU *cpu_ptr);
107
108 void setFrontEnd(FrontEnd *front_end_ptr)
109 { frontEnd = front_end_ptr; }
110
111 void setTC(ThreadContext *tc_ptr)
112 { tc = tc_ptr; }
113
114 void setThreadState(Thread *thread_ptr)
115 { thread = thread_ptr; }
116
117 void setCommBuffer(TimeBuffer<CommStruct> *_comm);
118
119 Port *getDcachePort() { return LSQ.getDcachePort(); }
120
121 void tick();
122 void squash();
123 void generateTCEvent() { tcSquash = true; }
124 void squashFromTC();
125 void squashFromTrap();
126 void checkInterrupts();
127 bool trapSquash;
128 bool tcSquash;
129
130 template <class T>
131 Fault read(RequestPtr req, T &data, int load_idx);
132
133 template <class T>
134 Fault write(RequestPtr req, T &data, int store_idx);
135
136 Addr readCommitPC() { return commitPC; }
137
138 Addr commitPC;
139
140 Tick lastCommitCycle;
141
142 bool robEmpty() { return numInsts == 0; }
143
144 bool isFull() { return numInsts >= numROBEntries; }
145 bool isBlocked() { return status == Blocked || dispatchStatus == Blocked; }
146
147 void fetchFault(Fault &fault);
148
149 int wakeDependents(DynInstPtr &inst, bool memory_deps = false);
150
151 /** Tells memory dependence unit that a memory instruction needs to be
152 * rescheduled. It will re-execute once replayMemInst() is called.
153 */
154 void rescheduleMemInst(DynInstPtr &inst);
155
156 /** Re-executes all rescheduled memory instructions. */
157 void replayMemInst(DynInstPtr &inst);
158
159 /** Completes memory instruction. */
160 void completeMemInst(DynInstPtr &inst) { }
161
162 void addDcacheMiss(DynInstPtr &inst)
163 {
164 waitingMemOps.insert(inst->seqNum);
165 numWaitingMemOps++;
166 DPRINTF(BE, "Adding a Dcache miss mem op [sn:%lli], total %i\n",
167 inst->seqNum, numWaitingMemOps);
168 }
169
170 void removeDcacheMiss(DynInstPtr &inst)
171 {
172 assert(waitingMemOps.find(inst->seqNum) != waitingMemOps.end());
173 waitingMemOps.erase(inst->seqNum);
174 numWaitingMemOps--;
175 DPRINTF(BE, "Removing a Dcache miss mem op [sn:%lli], total %i\n",
176 inst->seqNum, numWaitingMemOps);
177 }
178
179 void addWaitingMemOp(DynInstPtr &inst)
180 {
181 waitingMemOps.insert(inst->seqNum);
182 numWaitingMemOps++;
183 DPRINTF(BE, "Adding a waiting mem op [sn:%lli], total %i\n",
184 inst->seqNum, numWaitingMemOps);
185 }
186
187 void removeWaitingMemOp(DynInstPtr &inst)
188 {
189 assert(waitingMemOps.find(inst->seqNum) != waitingMemOps.end());
190 waitingMemOps.erase(inst->seqNum);
191 numWaitingMemOps--;
192 DPRINTF(BE, "Removing a waiting mem op [sn:%lli], total %i\n",
193 inst->seqNum, numWaitingMemOps);
194 }
195
196 void instToCommit(DynInstPtr &inst);
197 void readyInstsForCommit();
198
199 void switchOut();
200 void doSwitchOut();
201 void takeOverFrom(ThreadContext *old_tc = NULL);
202
203 bool isSwitchedOut() { return switchedOut; }
204
205 private:
206 void generateTrapEvent(Tick latency = 0);
207 void handleFault(Fault &fault, Tick latency = 0);
208 void updateStructures();
209 void dispatchInsts();
210 void dispatchStall();
211 void checkDispatchStatus();
212 void executeInsts();
213 void commitInsts();
214 void addToLSQ(DynInstPtr &inst);
215 void writebackInsts();
216 bool commitInst(int inst_num);
217 void squash(const InstSeqNum &sn);
218 void squashDueToBranch(DynInstPtr &inst);
219 void squashDueToMemViolation(DynInstPtr &inst);
220 void squashDueToMemBlocked(DynInstPtr &inst);
221 void updateExeInstStats(DynInstPtr &inst);
222 void updateComInstStats(DynInstPtr &inst);
223
224 public:
225 OzoneCPU *cpu;
226
227 FrontEnd *frontEnd;
228
229 ThreadContext *tc;
230
231 Thread *thread;
232
233 enum Status {
234 Running,
235 Idle,
236 DcacheMissStall,
237 DcacheMissComplete,
238 Blocked,
239 TrapPending
240 };
241
242 Status status;
243
244 Status dispatchStatus;
245
246 Status commitStatus;
247
248 Counter funcExeInst;
249
250 private:
251 typedef typename Impl::LdstQueue LdstQueue;
252
253 LdstQueue LSQ;
254 public:
255 RenameTable<Impl> commitRenameTable;
256
257 RenameTable<Impl> renameTable;
258 private:
259 int latency;
260
261 // General back end width. Used if the more specific isn't given.
262 int width;
263
264 // Dispatch width.
265 int dispatchWidth;
266 int dispatchSize;
267
268 int waitingInsts;
269
270 int issueWidth;
271
272 // Writeback width
273 int wbWidth;
274
275 // Commit width
276 int commitWidth;
277
278 /** Index into queue of instructions being written back. */
279 unsigned wbNumInst;
280
281 /** Cycle number within the queue of instructions being written
282 * back. Used in case there are too many instructions writing
283 * back at the current cycle and writesbacks need to be scheduled
284 * for the future. See comments in instToCommit().
285 */
286 unsigned wbCycle;
287
288 int numROBEntries;
289 int numInsts;
290 bool lsqLimits;
291
292 std::set<InstSeqNum> waitingMemOps;
293 typedef std::set<InstSeqNum>::iterator MemIt;
294 int numWaitingMemOps;
295 unsigned maxOutstandingMemOps;
296
297 bool squashPending;
298 InstSeqNum squashSeqNum;
299 Addr squashNextPC;
300
301 bool switchedOut;
302 bool switchPending;
303
304 DynInstPtr memBarrier;
305
306 private:
307 struct pqCompare {
308 bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
309 {
310 return lhs->seqNum > rhs->seqNum;
311 }
312 };
313
314 typedef typename std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare> ReadyInstQueue;
315 ReadyInstQueue exeList;
316
317 typedef typename std::list<DynInstPtr>::iterator InstListIt;
318
319 std::list<DynInstPtr> instList;
320 std::list<DynInstPtr> waitingList;
321 std::list<DynInstPtr> replayList;
322 std::list<DynInstPtr> writeback;
323
324 int squashLatency;
325
326 bool exactFullStall;
327
328 // number of cycles stalled for D-cache misses
329 /* Stats::Scalar<> dcacheStallCycles;
330 Counter lastDcacheStall;
331 */
332 Stats::Vector<> robCapEvents;
333 Stats::Vector<> robCapInstCount;
334 Stats::Vector<> iqCapEvents;
335 Stats::Vector<> iqCapInstCount;
336 // total number of instructions executed
337 Stats::Vector<> exeInst;
338 Stats::Vector<> exeSwp;
339 Stats::Vector<> exeNop;
340 Stats::Vector<> exeRefs;
341 Stats::Vector<> exeLoads;
342 Stats::Vector<> exeBranches;
343
344 Stats::Vector<> issuedOps;
345
346 // total number of loads forwaded from LSQ stores
347 Stats::Vector<> lsqForwLoads;
348
349 // total number of loads ignored due to invalid addresses
350 Stats::Vector<> invAddrLoads;
351
352 // total number of software prefetches ignored due to invalid addresses
353 Stats::Vector<> invAddrSwpfs;
354 // ready loads blocked due to memory disambiguation
355 Stats::Vector<> lsqBlockedLoads;
356
357 Stats::Scalar<> lsqInversion;
358
359 Stats::Vector<> nIssuedDist;
360 /*
361 Stats::VectorDistribution<> issueDelayDist;
362
363 Stats::VectorDistribution<> queueResDist;
364 */
365 /*
366 Stats::Vector<> stat_fu_busy;
367 Stats::Vector2d<> stat_fuBusy;
368 Stats::Vector<> dist_unissued;
369 Stats::Vector2d<> stat_issued_inst_type;
370
371 Stats::Formula misspec_cnt;
372 Stats::Formula misspec_ipc;
373 Stats::Formula issue_rate;
374 Stats::Formula issue_stores;
375 Stats::Formula issue_op_rate;
376 Stats::Formula fu_busy_rate;
377 Stats::Formula commit_stores;
378 Stats::Formula commit_ipc;
379 Stats::Formula commit_ipb;
380 Stats::Formula lsq_inv_rate;
381 */
382 Stats::Vector<> writebackCount;
383 Stats::Vector<> producerInst;
384 Stats::Vector<> consumerInst;
385 Stats::Vector<> wbPenalized;
386
387 Stats::Formula wbRate;
388 Stats::Formula wbFanout;
389 Stats::Formula wbPenalizedRate;
390
391 // total number of instructions committed
392 Stats::Vector<> statComInst;
393 Stats::Vector<> statComSwp;
394 Stats::Vector<> statComRefs;
395 Stats::Vector<> statComLoads;
396 Stats::Vector<> statComMembars;
397 Stats::Vector<> statComBranches;
398
399 Stats::Distribution<> nCommittedDist;
400
401 Stats::Scalar<> commitEligibleSamples;
402 Stats::Vector<> commitEligible;
403
404 Stats::Vector<> squashedInsts;
405 Stats::Vector<> ROBSquashedInsts;
406
407 Stats::Scalar<> ROBFcount;
408 Stats::Formula ROBFullRate;
409
410 Stats::Vector<> ROBCount; // cumulative ROB occupancy
411 Stats::Formula ROBOccRate;
412 // Stats::VectorDistribution<> ROBOccDist;
413 public:
414 void dumpInsts();
415
416 Checker<DynInstPtr> *checker;
417 };
418
419 template <class Impl>
420 template <class T>
421 Fault
422 LWBackEnd<Impl>::read(RequestPtr req, T &data, int load_idx)
423 {
424 return LSQ.read(req, data, load_idx);
425 }
426
427 template <class Impl>
428 template <class T>
429 Fault
430 LWBackEnd<Impl>::write(RequestPtr req, T &data, int store_idx)
431 {
432 return LSQ.write(req, data, store_idx);
433 }
434
435 #endif // __CPU_OZONE_LW_BACK_END_HH__