2 * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
5 * For use for simulation and test purposes only
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
33 * Author: Sooraj Puthoor
36 #ifndef __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__
37 #define __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__
40 #include <unordered_map>
42 #include "base/statistics.hh"
43 #include "mem/protocol/HSAScope.hh"
44 #include "mem/protocol/HSASegment.hh"
45 #include "mem/protocol/PrefetchBit.hh"
46 #include "mem/protocol/RubyAccessMode.hh"
47 #include "mem/protocol/RubyRequestType.hh"
48 #include "mem/protocol/SequencerRequestType.hh"
49 #include "mem/request.hh"
50 #include "mem/ruby/common/Address.hh"
51 #include "mem/ruby/common/Consumer.hh"
52 #include "mem/ruby/system/Sequencer.hh"
59 class RubyGPUCoalescerParams;
61 HSAScope reqScopeToHSAScope(Request* req);
62 HSASegment reqSegmentToHSASegment(Request* req);
64 struct GPUCoalescerRequest
67 RubyRequestType m_type;
70 GPUCoalescerRequest(PacketPtr _pkt, RubyRequestType _m_type,
72 : pkt(_pkt), m_type(_m_type), issue_time(_issue_time)
79 RequestDesc(PacketPtr pkt, RubyRequestType p_type, RubyRequestType s_type)
80 : pkt(pkt), primaryType(p_type), secondaryType(s_type)
84 RequestDesc() : pkt(nullptr), primaryType(RubyRequestType_NULL),
85 secondaryType(RubyRequestType_NULL)
90 RubyRequestType primaryType;
91 RubyRequestType secondaryType;
94 std::ostream& operator<<(std::ostream& out, const GPUCoalescerRequest& obj);
96 class GPUCoalescer : public RubyPort
99 typedef RubyGPUCoalescerParams Params;
100 GPUCoalescer(const Params *);
104 void wakeup(); // Used only for deadlock detection
106 void printProgress(std::ostream& out) const;
111 void writeCallback(Addr address, DataBlock& data);
113 void writeCallback(Addr address,
117 void writeCallback(Addr address,
120 Cycles initialRequestTime,
121 Cycles forwardRequestTime,
122 Cycles firstResponseTime,
125 void writeCallback(Addr address,
128 Cycles initialRequestTime,
129 Cycles forwardRequestTime,
130 Cycles firstResponseTime);
132 void readCallback(Addr address, DataBlock& data);
134 void readCallback(Addr address,
138 void readCallback(Addr address,
141 Cycles initialRequestTime,
142 Cycles forwardRequestTime,
143 Cycles firstResponseTime);
145 void readCallback(Addr address,
148 Cycles initialRequestTime,
149 Cycles forwardRequestTime,
150 Cycles firstResponseTime,
152 /* atomics need their own callback because the data
153 might be const coming from SLICC */
154 void atomicCallback(Addr address,
156 const DataBlock& data);
158 void recordCPReadCallBack(MachineID myMachID, MachineID senderMachID);
159 void recordCPWriteCallBack(MachineID myMachID, MachineID senderMachID);
161 // Alternate implementations in VIPER Coalescer
162 virtual RequestStatus makeRequest(PacketPtr pkt);
164 int outstandingCount() const { return m_outstanding_count; }
167 isDeadlockEventScheduled() const
169 return deadlockCheckEvent.scheduled();
173 descheduleDeadlockEvent()
175 deschedule(deadlockCheckEvent);
180 void print(std::ostream& out) const;
181 void checkCoherence(Addr address);
184 void removeRequest(GPUCoalescerRequest* request);
185 void evictionCallback(Addr address);
186 void completeIssue();
188 void insertKernel(int wavefront_id, PacketPtr pkt);
190 void recordRequestType(SequencerRequestType requestType);
191 Stats::Histogram& getOutstandReqHist() { return m_outstandReqHist; }
193 Stats::Histogram& getLatencyHist() { return m_latencyHist; }
194 Stats::Histogram& getTypeLatencyHist(uint32_t t)
195 { return *m_typeLatencyHist[t]; }
197 Stats::Histogram& getMissLatencyHist()
198 { return m_missLatencyHist; }
199 Stats::Histogram& getMissTypeLatencyHist(uint32_t t)
200 { return *m_missTypeLatencyHist[t]; }
202 Stats::Histogram& getMissMachLatencyHist(uint32_t t) const
203 { return *m_missMachLatencyHist[t]; }
206 getMissTypeMachLatencyHist(uint32_t r, uint32_t t) const
207 { return *m_missTypeMachLatencyHist[r][t]; }
209 Stats::Histogram& getIssueToInitialDelayHist(uint32_t t) const
210 { return *m_IssueToInitialDelayHist[t]; }
213 getInitialToForwardDelayHist(const MachineType t) const
214 { return *m_InitialToForwardDelayHist[t]; }
217 getForwardRequestToFirstResponseHist(const MachineType t) const
218 { return *m_ForwardToFirstResponseDelayHist[t]; }
221 getFirstResponseToCompletionDelayHist(const MachineType t) const
222 { return *m_FirstResponseToCompletionDelayHist[t]; }
224 // Changed to protected to enable inheritance by VIPER Coalescer
226 bool tryCacheAccess(Addr addr, RubyRequestType type,
227 Addr pc, RubyAccessMode access_mode,
228 int size, DataBlock*& data_ptr);
229 // Alternate implementations in VIPER Coalescer
230 virtual void issueRequest(PacketPtr pkt, RubyRequestType type);
232 void kernelCallback(int wavfront_id);
234 void hitCallback(GPUCoalescerRequest* request,
238 Cycles initialRequestTime,
239 Cycles forwardRequestTime,
240 Cycles firstResponseTime,
242 void recordMissLatency(GPUCoalescerRequest* request,
244 Cycles initialRequestTime,
245 Cycles forwardRequestTime,
246 Cycles firstResponseTime,
247 bool success, bool isRegion);
248 void completeHitCallback(std::vector<PacketPtr> & mylist, int len);
249 PacketPtr mapAddrToPkt(Addr address);
252 RequestStatus getRequestStatus(PacketPtr pkt,
253 RubyRequestType request_type);
254 bool insertRequest(PacketPtr pkt, RubyRequestType request_type);
256 bool handleLlsc(Addr address, GPUCoalescerRequest* request);
258 EventFunctionWrapper issueEvent;
261 // Changed to protected to enable inheritance by VIPER Coalescer
263 int m_max_outstanding_requests;
264 int m_deadlock_threshold;
266 CacheMemory* m_dataCache_ptr;
267 CacheMemory* m_instCache_ptr;
269 // The cache access latency for this GPU data cache. This is assessed at the
270 // beginning of each access. This should be very similar to the
271 // implementation in Sequencer() as this is very much like a Sequencer
272 Cycles m_data_cache_hit_latency;
274 // We need to track both the primary and secondary request types.
275 // The secondary request type comprises a subset of RubyRequestTypes that
276 // are understood by the L1 Controller. A primary request type can be any
278 typedef std::unordered_map<Addr, std::vector<RequestDesc>> CoalescingTable;
279 CoalescingTable reqCoalescer;
280 std::vector<Addr> newRequests;
282 typedef std::unordered_map<Addr, GPUCoalescerRequest*> RequestTable;
283 RequestTable m_writeRequestTable;
284 RequestTable m_readRequestTable;
285 // Global outstanding request count, across all request tables
286 int m_outstanding_count;
287 bool m_deadlock_check_scheduled;
288 std::unordered_map<int, PacketPtr> kernelEndList;
289 std::vector<int> newKernelEnds;
291 int m_store_waiting_on_load_cycles;
292 int m_store_waiting_on_store_cycles;
293 int m_load_waiting_on_store_cycles;
294 int m_load_waiting_on_load_cycles;
296 bool m_runningGarnetStandalone;
298 EventFunctionWrapper deadlockCheckEvent;
299 bool assumingRfOCoherence;
301 // m5 style stats for TCP hit/miss counts
302 Stats::Scalar GPU_TCPLdHits;
303 Stats::Scalar GPU_TCPLdTransfers;
304 Stats::Scalar GPU_TCCLdHits;
305 Stats::Scalar GPU_LdMiss;
307 Stats::Scalar GPU_TCPStHits;
308 Stats::Scalar GPU_TCPStTransfers;
309 Stats::Scalar GPU_TCCStHits;
310 Stats::Scalar GPU_StMiss;
312 Stats::Scalar CP_TCPLdHits;
313 Stats::Scalar CP_TCPLdTransfers;
314 Stats::Scalar CP_TCCLdHits;
315 Stats::Scalar CP_LdMiss;
317 Stats::Scalar CP_TCPStHits;
318 Stats::Scalar CP_TCPStTransfers;
319 Stats::Scalar CP_TCCStHits;
320 Stats::Scalar CP_StMiss;
322 //! Histogram for number of outstanding requests per cycle.
323 Stats::Histogram m_outstandReqHist;
325 //! Histogram for holding latency profile of all requests.
326 Stats::Histogram m_latencyHist;
327 std::vector<Stats::Histogram *> m_typeLatencyHist;
329 //! Histogram for holding latency profile of all requests that
330 //! miss in the controller connected to this sequencer.
331 Stats::Histogram m_missLatencyHist;
332 std::vector<Stats::Histogram *> m_missTypeLatencyHist;
334 //! Histograms for profiling the latencies for requests that
335 //! required external messages.
336 std::vector<Stats::Histogram *> m_missMachLatencyHist;
337 std::vector< std::vector<Stats::Histogram *> > m_missTypeMachLatencyHist;
339 //! Histograms for recording the breakdown of miss latency
340 std::vector<Stats::Histogram *> m_IssueToInitialDelayHist;
341 std::vector<Stats::Histogram *> m_InitialToForwardDelayHist;
342 std::vector<Stats::Histogram *> m_ForwardToFirstResponseDelayHist;
343 std::vector<Stats::Histogram *> m_FirstResponseToCompletionDelayHist;
346 // Private copy constructor and assignment operator
347 GPUCoalescer(const GPUCoalescer& obj);
348 GPUCoalescer& operator=(const GPUCoalescer& obj);
352 operator<<(std::ostream& out, const GPUCoalescer& obj)
359 #endif // __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__