Merge ktlim@zizzer:/bk/newmem
[gem5.git] / src / cpu / ozone / lw_back_end.hh
1 /*
2 * Copyright (c) 2006 The Regents of The University of Michigan
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Kevin Lim
29 */
30
31 #ifndef __CPU_OZONE_LW_BACK_END_HH__
32 #define __CPU_OZONE_LW_BACK_END_HH__
33
34 #include <list>
35 #include <queue>
36 #include <set>
37 #include <string>
38
39 #include "arch/faults.hh"
40 #include "base/timebuf.hh"
41 #include "cpu/inst_seq.hh"
42 #include "cpu/ozone/rename_table.hh"
43 #include "cpu/ozone/thread_state.hh"
44 #include "mem/request.hh"
45 #include "sim/eventq.hh"
46
47 template <class>
48 class Checker;
49 class ThreadContext;
50
51 template <class Impl>
52 class OzoneThreadState;
53
54 template <class Impl>
55 class LWBackEnd
56 {
57 public:
58 typedef OzoneThreadState<Impl> Thread;
59
60 typedef typename Impl::Params Params;
61 typedef typename Impl::DynInst DynInst;
62 typedef typename Impl::DynInstPtr DynInstPtr;
63 typedef typename Impl::OzoneCPU OzoneCPU;
64 typedef typename Impl::FrontEnd FrontEnd;
65 typedef typename Impl::OzoneCPU::CommStruct CommStruct;
66
67 struct SizeStruct {
68 int size;
69 };
70
71 typedef SizeStruct DispatchToIssue;
72 typedef SizeStruct IssueToExec;
73 typedef SizeStruct ExecToCommit;
74 typedef SizeStruct Writeback;
75
76 TimeBuffer<DispatchToIssue> d2i;
77 typename TimeBuffer<DispatchToIssue>::wire instsToDispatch;
78 TimeBuffer<IssueToExec> i2e;
79 typename TimeBuffer<IssueToExec>::wire instsToExecute;
80 TimeBuffer<ExecToCommit> e2c;
81 TimeBuffer<Writeback> numInstsToWB;
82
83 TimeBuffer<CommStruct> *comm;
84 typename TimeBuffer<CommStruct>::wire toIEW;
85 typename TimeBuffer<CommStruct>::wire fromCommit;
86
87 class TrapEvent : public Event {
88 private:
89 LWBackEnd<Impl> *be;
90
91 public:
92 TrapEvent(LWBackEnd<Impl> *_be);
93
94 void process();
95 const char *description();
96 };
97
98 LWBackEnd(Params *params);
99
100 std::string name() const;
101
102 void regStats();
103
104 void setCPU(OzoneCPU *cpu_ptr);
105
106 void setFrontEnd(FrontEnd *front_end_ptr)
107 { frontEnd = front_end_ptr; }
108
109 void setTC(ThreadContext *tc_ptr)
110 { tc = tc_ptr; }
111
112 void setThreadState(Thread *thread_ptr)
113 { thread = thread_ptr; }
114
115 void setCommBuffer(TimeBuffer<CommStruct> *_comm);
116
117 void tick();
118 void squash();
119 void generateTCEvent() { tcSquash = true; }
120 void squashFromTC();
121 void squashFromTrap();
122 void checkInterrupts();
123 bool trapSquash;
124 bool tcSquash;
125
126 template <class T>
127 Fault read(RequestPtr req, T &data, int load_idx);
128
129 template <class T>
130 Fault write(RequestPtr req, T &data, int store_idx);
131
132 Addr readCommitPC() { return commitPC; }
133
134 Addr commitPC;
135
136 Tick lastCommitCycle;
137
138 bool robEmpty() { return instList.empty(); }
139
140 bool isFull() { return numInsts >= numROBEntries; }
141 bool isBlocked() { return status == Blocked || dispatchStatus == Blocked; }
142
143 void fetchFault(Fault &fault);
144
145 int wakeDependents(DynInstPtr &inst, bool memory_deps = false);
146
147 /** Tells memory dependence unit that a memory instruction needs to be
148 * rescheduled. It will re-execute once replayMemInst() is called.
149 */
150 void rescheduleMemInst(DynInstPtr &inst);
151
152 /** Re-executes all rescheduled memory instructions. */
153 void replayMemInst(DynInstPtr &inst);
154
155 /** Completes memory instruction. */
156 void completeMemInst(DynInstPtr &inst) { }
157
158 void addDcacheMiss(DynInstPtr &inst)
159 {
160 waitingMemOps.insert(inst->seqNum);
161 numWaitingMemOps++;
162 DPRINTF(BE, "Adding a Dcache miss mem op [sn:%lli], total %i\n",
163 inst->seqNum, numWaitingMemOps);
164 }
165
166 void removeDcacheMiss(DynInstPtr &inst)
167 {
168 assert(waitingMemOps.find(inst->seqNum) != waitingMemOps.end());
169 waitingMemOps.erase(inst->seqNum);
170 numWaitingMemOps--;
171 DPRINTF(BE, "Removing a Dcache miss mem op [sn:%lli], total %i\n",
172 inst->seqNum, numWaitingMemOps);
173 }
174
175 void addWaitingMemOp(DynInstPtr &inst)
176 {
177 waitingMemOps.insert(inst->seqNum);
178 numWaitingMemOps++;
179 DPRINTF(BE, "Adding a waiting mem op [sn:%lli], total %i\n",
180 inst->seqNum, numWaitingMemOps);
181 }
182
183 void removeWaitingMemOp(DynInstPtr &inst)
184 {
185 assert(waitingMemOps.find(inst->seqNum) != waitingMemOps.end());
186 waitingMemOps.erase(inst->seqNum);
187 numWaitingMemOps--;
188 DPRINTF(BE, "Removing a waiting mem op [sn:%lli], total %i\n",
189 inst->seqNum, numWaitingMemOps);
190 }
191
192 void instToCommit(DynInstPtr &inst);
193
194 void switchOut();
195 void doSwitchOut();
196 void takeOverFrom(ThreadContext *old_tc = NULL);
197
198 bool isSwitchedOut() { return switchedOut; }
199
200 private:
201 void generateTrapEvent(Tick latency = 0);
202 void handleFault(Fault &fault, Tick latency = 0);
203 void updateStructures();
204 void dispatchInsts();
205 void dispatchStall();
206 void checkDispatchStatus();
207 void executeInsts();
208 void commitInsts();
209 void addToLSQ(DynInstPtr &inst);
210 void writebackInsts();
211 bool commitInst(int inst_num);
212 void squash(const InstSeqNum &sn);
213 void squashDueToBranch(DynInstPtr &inst);
214 void squashDueToMemViolation(DynInstPtr &inst);
215 void squashDueToMemBlocked(DynInstPtr &inst);
216 void updateExeInstStats(DynInstPtr &inst);
217 void updateComInstStats(DynInstPtr &inst);
218
219 public:
220 OzoneCPU *cpu;
221
222 FrontEnd *frontEnd;
223
224 ThreadContext *tc;
225
226 Thread *thread;
227
228 enum Status {
229 Running,
230 Idle,
231 DcacheMissStall,
232 DcacheMissComplete,
233 Blocked,
234 TrapPending
235 };
236
237 Status status;
238
239 Status dispatchStatus;
240
241 Status commitStatus;
242
243 Counter funcExeInst;
244
245 private:
246 typedef typename Impl::LdstQueue LdstQueue;
247
248 LdstQueue LSQ;
249 public:
250 RenameTable<Impl> commitRenameTable;
251
252 RenameTable<Impl> renameTable;
253 private:
254 // General back end width. Used if the more specific isn't given.
255 int width;
256
257 // Dispatch width.
258 int dispatchWidth;
259 int numDispatchEntries;
260 int dispatchSize;
261
262 int waitingInsts;
263
264 int issueWidth;
265
266 // Writeback width
267 int wbWidth;
268
269 // Commit width
270 int commitWidth;
271
272 /** Index into queue of instructions being written back. */
273 unsigned wbNumInst;
274
275 /** Cycle number within the queue of instructions being written
276 * back. Used in case there are too many instructions writing
277 * back at the current cycle and writesbacks need to be scheduled
278 * for the future. See comments in instToCommit().
279 */
280 unsigned wbCycle;
281
282 int numROBEntries;
283 int numInsts;
284
285 std::set<InstSeqNum> waitingMemOps;
286 typedef std::set<InstSeqNum>::iterator MemIt;
287 int numWaitingMemOps;
288 unsigned maxOutstandingMemOps;
289
290 bool squashPending;
291 InstSeqNum squashSeqNum;
292 Addr squashNextPC;
293
294 Fault faultFromFetch;
295 bool fetchHasFault;
296
297 bool switchedOut;
298 bool switchPending;
299
300 DynInstPtr memBarrier;
301
302 private:
303 struct pqCompare {
304 bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
305 {
306 return lhs->seqNum > rhs->seqNum;
307 }
308 };
309
310 typedef typename std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare> ReadyInstQueue;
311 ReadyInstQueue exeList;
312
313 typedef typename std::list<DynInstPtr>::iterator InstListIt;
314
315 std::list<DynInstPtr> instList;
316 std::list<DynInstPtr> waitingList;
317 std::list<DynInstPtr> replayList;
318 std::list<DynInstPtr> writeback;
319
320 int latency;
321
322 int squashLatency;
323
324 bool exactFullStall;
325
326 // number of cycles stalled for D-cache misses
327 /* Stats::Scalar<> dcacheStallCycles;
328 Counter lastDcacheStall;
329 */
330 Stats::Vector<> rob_cap_events;
331 Stats::Vector<> rob_cap_inst_count;
332 Stats::Vector<> iq_cap_events;
333 Stats::Vector<> iq_cap_inst_count;
334 // total number of instructions executed
335 Stats::Vector<> exe_inst;
336 Stats::Vector<> exe_swp;
337 Stats::Vector<> exe_nop;
338 Stats::Vector<> exe_refs;
339 Stats::Vector<> exe_loads;
340 Stats::Vector<> exe_branches;
341
342 Stats::Vector<> issued_ops;
343
344 // total number of loads forwaded from LSQ stores
345 Stats::Vector<> lsq_forw_loads;
346
347 // total number of loads ignored due to invalid addresses
348 Stats::Vector<> inv_addr_loads;
349
350 // total number of software prefetches ignored due to invalid addresses
351 Stats::Vector<> inv_addr_swpfs;
352 // ready loads blocked due to memory disambiguation
353 Stats::Vector<> lsq_blocked_loads;
354
355 Stats::Scalar<> lsqInversion;
356
357 Stats::Vector<> n_issued_dist;
358 Stats::VectorDistribution<> issue_delay_dist;
359
360 Stats::VectorDistribution<> queue_res_dist;
361 /*
362 Stats::Vector<> stat_fu_busy;
363 Stats::Vector2d<> stat_fuBusy;
364 Stats::Vector<> dist_unissued;
365 Stats::Vector2d<> stat_issued_inst_type;
366
367 Stats::Formula misspec_cnt;
368 Stats::Formula misspec_ipc;
369 Stats::Formula issue_rate;
370 Stats::Formula issue_stores;
371 Stats::Formula issue_op_rate;
372 Stats::Formula fu_busy_rate;
373 Stats::Formula commit_stores;
374 Stats::Formula commit_ipc;
375 Stats::Formula commit_ipb;
376 Stats::Formula lsq_inv_rate;
377 */
378 Stats::Vector<> writeback_count;
379 Stats::Vector<> producer_inst;
380 Stats::Vector<> consumer_inst;
381 Stats::Vector<> wb_penalized;
382
383 Stats::Formula wb_rate;
384 Stats::Formula wb_fanout;
385 Stats::Formula wb_penalized_rate;
386
387 // total number of instructions committed
388 Stats::Vector<> stat_com_inst;
389 Stats::Vector<> stat_com_swp;
390 Stats::Vector<> stat_com_refs;
391 Stats::Vector<> stat_com_loads;
392 Stats::Vector<> stat_com_membars;
393 Stats::Vector<> stat_com_branches;
394
395 Stats::Distribution<> n_committed_dist;
396
397 Stats::Scalar<> commit_eligible_samples;
398 Stats::Vector<> commit_eligible;
399
400 Stats::Vector<> squashedInsts;
401 Stats::Vector<> ROBSquashedInsts;
402
403 Stats::Scalar<> ROB_fcount;
404 Stats::Formula ROB_full_rate;
405
406 Stats::Vector<> ROB_count; // cumulative ROB occupancy
407 Stats::Formula ROB_occ_rate;
408 Stats::VectorDistribution<> ROB_occ_dist;
409 public:
410 void dumpInsts();
411
412 Checker<DynInstPtr> *checker;
413 };
414
415 template <class Impl>
416 template <class T>
417 Fault
418 LWBackEnd<Impl>::read(RequestPtr req, T &data, int load_idx)
419 {
420 return LSQ.read(req, data, load_idx);
421 }
422
423 template <class Impl>
424 template <class T>
425 Fault
426 LWBackEnd<Impl>::write(RequestPtr req, T &data, int store_idx)
427 {
428 return LSQ.write(req, data, store_idx);
429 }
430
431 #endif // __CPU_OZONE_LW_BACK_END_HH__