ruby: Refactor some Event subclasses to lambdas
[gem5.git] / src / mem / ruby / system / GPUCoalescer.hh
1 /*
2 * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Sooraj Puthoor
34 */
35
36 #ifndef __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__
37 #define __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__
38
39 #include <iostream>
40 #include <unordered_map>
41
42 #include "base/statistics.hh"
43 #include "mem/protocol/HSAScope.hh"
44 #include "mem/protocol/HSASegment.hh"
45 #include "mem/protocol/PrefetchBit.hh"
46 #include "mem/protocol/RubyAccessMode.hh"
47 #include "mem/protocol/RubyRequestType.hh"
48 #include "mem/protocol/SequencerRequestType.hh"
49 #include "mem/request.hh"
50 #include "mem/ruby/common/Address.hh"
51 #include "mem/ruby/common/Consumer.hh"
52 #include "mem/ruby/system/Sequencer.hh"
53
54 class DataBlock;
55 class CacheMsg;
56 class MachineID;
57 class CacheMemory;
58
59 class RubyGPUCoalescerParams;
60
61 HSAScope reqScopeToHSAScope(Request* req);
62 HSASegment reqSegmentToHSASegment(Request* req);
63
64 struct GPUCoalescerRequest
65 {
66 PacketPtr pkt;
67 RubyRequestType m_type;
68 Cycles issue_time;
69
70 GPUCoalescerRequest(PacketPtr _pkt, RubyRequestType _m_type,
71 Cycles _issue_time)
72 : pkt(_pkt), m_type(_m_type), issue_time(_issue_time)
73 {}
74 };
75
76 class RequestDesc
77 {
78 public:
79 RequestDesc(PacketPtr pkt, RubyRequestType p_type, RubyRequestType s_type)
80 : pkt(pkt), primaryType(p_type), secondaryType(s_type)
81 {
82 }
83
84 RequestDesc() : pkt(nullptr), primaryType(RubyRequestType_NULL),
85 secondaryType(RubyRequestType_NULL)
86 {
87 }
88
89 PacketPtr pkt;
90 RubyRequestType primaryType;
91 RubyRequestType secondaryType;
92 };
93
94 std::ostream& operator<<(std::ostream& out, const GPUCoalescerRequest& obj);
95
96 class GPUCoalescer : public RubyPort
97 {
98 public:
99 typedef RubyGPUCoalescerParams Params;
100 GPUCoalescer(const Params *);
101 ~GPUCoalescer();
102
103 // Public Methods
104 void wakeup(); // Used only for deadlock detection
105
106 void printProgress(std::ostream& out) const;
107 void resetStats();
108 void collateStats();
109 void regStats();
110
111 void writeCallback(Addr address, DataBlock& data);
112
113 void writeCallback(Addr address,
114 MachineType mach,
115 DataBlock& data);
116
117 void writeCallback(Addr address,
118 MachineType mach,
119 DataBlock& data,
120 Cycles initialRequestTime,
121 Cycles forwardRequestTime,
122 Cycles firstResponseTime,
123 bool isRegion);
124
125 void writeCallback(Addr address,
126 MachineType mach,
127 DataBlock& data,
128 Cycles initialRequestTime,
129 Cycles forwardRequestTime,
130 Cycles firstResponseTime);
131
132 void readCallback(Addr address, DataBlock& data);
133
134 void readCallback(Addr address,
135 MachineType mach,
136 DataBlock& data);
137
138 void readCallback(Addr address,
139 MachineType mach,
140 DataBlock& data,
141 Cycles initialRequestTime,
142 Cycles forwardRequestTime,
143 Cycles firstResponseTime);
144
145 void readCallback(Addr address,
146 MachineType mach,
147 DataBlock& data,
148 Cycles initialRequestTime,
149 Cycles forwardRequestTime,
150 Cycles firstResponseTime,
151 bool isRegion);
152 /* atomics need their own callback because the data
153 might be const coming from SLICC */
154 void atomicCallback(Addr address,
155 MachineType mach,
156 const DataBlock& data);
157
158 void recordCPReadCallBack(MachineID myMachID, MachineID senderMachID);
159 void recordCPWriteCallBack(MachineID myMachID, MachineID senderMachID);
160
161 // Alternate implementations in VIPER Coalescer
162 virtual RequestStatus makeRequest(PacketPtr pkt);
163
164 int outstandingCount() const { return m_outstanding_count; }
165
166 bool
167 isDeadlockEventScheduled() const
168 {
169 return deadlockCheckEvent.scheduled();
170 }
171
172 void
173 descheduleDeadlockEvent()
174 {
175 deschedule(deadlockCheckEvent);
176 }
177
178 bool empty() const;
179
180 void print(std::ostream& out) const;
181 void checkCoherence(Addr address);
182
183 void markRemoved();
184 void removeRequest(GPUCoalescerRequest* request);
185 void evictionCallback(Addr address);
186 void completeIssue();
187
188 void insertKernel(int wavefront_id, PacketPtr pkt);
189
190 void recordRequestType(SequencerRequestType requestType);
191 Stats::Histogram& getOutstandReqHist() { return m_outstandReqHist; }
192
193 Stats::Histogram& getLatencyHist() { return m_latencyHist; }
194 Stats::Histogram& getTypeLatencyHist(uint32_t t)
195 { return *m_typeLatencyHist[t]; }
196
197 Stats::Histogram& getMissLatencyHist()
198 { return m_missLatencyHist; }
199 Stats::Histogram& getMissTypeLatencyHist(uint32_t t)
200 { return *m_missTypeLatencyHist[t]; }
201
202 Stats::Histogram& getMissMachLatencyHist(uint32_t t) const
203 { return *m_missMachLatencyHist[t]; }
204
205 Stats::Histogram&
206 getMissTypeMachLatencyHist(uint32_t r, uint32_t t) const
207 { return *m_missTypeMachLatencyHist[r][t]; }
208
209 Stats::Histogram& getIssueToInitialDelayHist(uint32_t t) const
210 { return *m_IssueToInitialDelayHist[t]; }
211
212 Stats::Histogram&
213 getInitialToForwardDelayHist(const MachineType t) const
214 { return *m_InitialToForwardDelayHist[t]; }
215
216 Stats::Histogram&
217 getForwardRequestToFirstResponseHist(const MachineType t) const
218 { return *m_ForwardToFirstResponseDelayHist[t]; }
219
220 Stats::Histogram&
221 getFirstResponseToCompletionDelayHist(const MachineType t) const
222 { return *m_FirstResponseToCompletionDelayHist[t]; }
223
224 // Changed to protected to enable inheritance by VIPER Coalescer
225 protected:
226 bool tryCacheAccess(Addr addr, RubyRequestType type,
227 Addr pc, RubyAccessMode access_mode,
228 int size, DataBlock*& data_ptr);
229 // Alternate implementations in VIPER Coalescer
230 virtual void issueRequest(PacketPtr pkt, RubyRequestType type);
231
232 void kernelCallback(int wavfront_id);
233
234 void hitCallback(GPUCoalescerRequest* request,
235 MachineType mach,
236 DataBlock& data,
237 bool success,
238 Cycles initialRequestTime,
239 Cycles forwardRequestTime,
240 Cycles firstResponseTime,
241 bool isRegion);
242 void recordMissLatency(GPUCoalescerRequest* request,
243 MachineType mach,
244 Cycles initialRequestTime,
245 Cycles forwardRequestTime,
246 Cycles firstResponseTime,
247 bool success, bool isRegion);
248 void completeHitCallback(std::vector<PacketPtr> & mylist, int len);
249 PacketPtr mapAddrToPkt(Addr address);
250
251
252 RequestStatus getRequestStatus(PacketPtr pkt,
253 RubyRequestType request_type);
254 bool insertRequest(PacketPtr pkt, RubyRequestType request_type);
255
256 bool handleLlsc(Addr address, GPUCoalescerRequest* request);
257
258 EventFunctionWrapper issueEvent;
259
260
261 // Changed to protected to enable inheritance by VIPER Coalescer
262 protected:
263 int m_max_outstanding_requests;
264 int m_deadlock_threshold;
265
266 CacheMemory* m_dataCache_ptr;
267 CacheMemory* m_instCache_ptr;
268
269 // The cache access latency for this GPU data cache. This is assessed at the
270 // beginning of each access. This should be very similar to the
271 // implementation in Sequencer() as this is very much like a Sequencer
272 Cycles m_data_cache_hit_latency;
273
274 // We need to track both the primary and secondary request types.
275 // The secondary request type comprises a subset of RubyRequestTypes that
276 // are understood by the L1 Controller. A primary request type can be any
277 // RubyRequestType.
278 typedef std::unordered_map<Addr, std::vector<RequestDesc>> CoalescingTable;
279 CoalescingTable reqCoalescer;
280 std::vector<Addr> newRequests;
281
282 typedef std::unordered_map<Addr, GPUCoalescerRequest*> RequestTable;
283 RequestTable m_writeRequestTable;
284 RequestTable m_readRequestTable;
285 // Global outstanding request count, across all request tables
286 int m_outstanding_count;
287 bool m_deadlock_check_scheduled;
288 std::unordered_map<int, PacketPtr> kernelEndList;
289 std::vector<int> newKernelEnds;
290
291 int m_store_waiting_on_load_cycles;
292 int m_store_waiting_on_store_cycles;
293 int m_load_waiting_on_store_cycles;
294 int m_load_waiting_on_load_cycles;
295
296 bool m_runningGarnetStandalone;
297
298 EventFunctionWrapper deadlockCheckEvent;
299 bool assumingRfOCoherence;
300
301 // m5 style stats for TCP hit/miss counts
302 Stats::Scalar GPU_TCPLdHits;
303 Stats::Scalar GPU_TCPLdTransfers;
304 Stats::Scalar GPU_TCCLdHits;
305 Stats::Scalar GPU_LdMiss;
306
307 Stats::Scalar GPU_TCPStHits;
308 Stats::Scalar GPU_TCPStTransfers;
309 Stats::Scalar GPU_TCCStHits;
310 Stats::Scalar GPU_StMiss;
311
312 Stats::Scalar CP_TCPLdHits;
313 Stats::Scalar CP_TCPLdTransfers;
314 Stats::Scalar CP_TCCLdHits;
315 Stats::Scalar CP_LdMiss;
316
317 Stats::Scalar CP_TCPStHits;
318 Stats::Scalar CP_TCPStTransfers;
319 Stats::Scalar CP_TCCStHits;
320 Stats::Scalar CP_StMiss;
321
322 //! Histogram for number of outstanding requests per cycle.
323 Stats::Histogram m_outstandReqHist;
324
325 //! Histogram for holding latency profile of all requests.
326 Stats::Histogram m_latencyHist;
327 std::vector<Stats::Histogram *> m_typeLatencyHist;
328
329 //! Histogram for holding latency profile of all requests that
330 //! miss in the controller connected to this sequencer.
331 Stats::Histogram m_missLatencyHist;
332 std::vector<Stats::Histogram *> m_missTypeLatencyHist;
333
334 //! Histograms for profiling the latencies for requests that
335 //! required external messages.
336 std::vector<Stats::Histogram *> m_missMachLatencyHist;
337 std::vector< std::vector<Stats::Histogram *> > m_missTypeMachLatencyHist;
338
339 //! Histograms for recording the breakdown of miss latency
340 std::vector<Stats::Histogram *> m_IssueToInitialDelayHist;
341 std::vector<Stats::Histogram *> m_InitialToForwardDelayHist;
342 std::vector<Stats::Histogram *> m_ForwardToFirstResponseDelayHist;
343 std::vector<Stats::Histogram *> m_FirstResponseToCompletionDelayHist;
344
345 private:
346 // Private copy constructor and assignment operator
347 GPUCoalescer(const GPUCoalescer& obj);
348 GPUCoalescer& operator=(const GPUCoalescer& obj);
349 };
350
351 inline std::ostream&
352 operator<<(std::ostream& out, const GPUCoalescer& obj)
353 {
354 obj.print(out);
355 out << std::flush;
356 return out;
357 }
358
359 #endif // __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__