misc: Fix db_offset calculation
[gem5.git] / src / dev / hsa / hw_scheduler.cc
1 /*
2 * Copyright (c) 2016-2017 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its
18 * contributors may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Authors: Sooraj Puthoor
34 */
35
36 #include "dev/hsa/hw_scheduler.hh"
37
38 #include "debug/HSAPacketProcessor.hh"
39 #include "mem/packet_access.hh"
40
41 #define HWSCHDLR_EVENT_DESCRIPTION_GENERATOR(XEVENT) \
42 const char* \
43 HWScheduler::XEVENT::description() const \
44 { \
45 return #XEVENT; \
46 }
47
48 HWSCHDLR_EVENT_DESCRIPTION_GENERATOR(SchedulerWakeupEvent)
49
50 void
51 HWScheduler::SchedulerWakeupEvent::process()
52 {
53 hwSchdlr->wakeup();
54 }
55
56 void
57 HWScheduler::wakeup()
58 {
59 // The scheduler unmaps an idle queue from the
60 // registered qList and maps a new queue
61 // to the registered list from the active list.
62 // For this implementation, an idle queue means
63 // a queue that does not have any outstanding dispatch
64 // at the time of this scheduler's wakeup
65
66 contextSwitchQ();
67 schedWakeup();
68 }
69
70 void
71 HWScheduler::schedWakeup()
72 {
73 // If atleast there is one queue that is not registered
74 // then wakeup again
75 if (!schedWakeupEvent.scheduled() &&
76 regdListMap.size() < activeList.size()) {
77 hsaPP->schedule(&schedWakeupEvent, curTick() + wakeupDelay);
78 DPRINTF(HSAPacketProcessor,
79 "Scheduling wakeup at %lu\n", (curTick() + wakeupDelay));
80 }
81 }
82
83 void
84 HWScheduler::registerNewQueue(uint64_t hostReadIndexPointer,
85 uint64_t basePointer,
86 uint64_t queue_id,
87 uint32_t size)
88 {
89 assert(queue_id < MAX_ACTIVE_QUEUES);
90 // Map queue ID to doorbell.
91 // We are only using offset to pio base address as doorbell
92 // We use the same mapping function used by hsa runtime to do this mapping
93 //
94 // Originally
95 // #define VOID_PTR_ADD32(ptr,n)
96 // (void*)((uint32_t*)(ptr) + n)/*ptr + offset*/
97 // (Addr)VOID_PTR_ADD32(0, queue_id)
98 Addr db_offset = sizeof(uint32_t)*queue_id;
99 if (dbMap.find(db_offset) != dbMap.end()) {
100 panic("Creating an already existing queue (queueID %d)", queue_id);
101 }
102
103 // Populate doorbell map
104 dbMap[db_offset] = queue_id;
105
106 if (queue_id >= MAX_ACTIVE_QUEUES) {
107 panic("Attempting to create a queue (queueID %d)" \
108 " beyond PIO range", queue_id);
109 }
110
111 HSAQueueDescriptor* q_desc =
112 new HSAQueueDescriptor(basePointer, db_offset,
113 hostReadIndexPointer, size);
114 AQLRingBuffer* aql_buf =
115 new AQLRingBuffer(NUM_DMA_BUFS, hsaPP->name());
116 QCntxt q_cntxt(q_desc, aql_buf);
117 activeList[dbMap[db_offset]] = q_cntxt;
118
119 // Check if this newly created queue can be directly mapped
120 // to registered queue list
121 bool M5_VAR_USED register_q = mapQIfSlotAvlbl(queue_id, aql_buf, q_desc);
122 schedWakeup();
123 DPRINTF(HSAPacketProcessor,
124 "%s: offset = %p, qID = %d, is_regd = %s, AL size %d\n",
125 __FUNCTION__, db_offset, queue_id,
126 (register_q) ? "true" : "false", dbMap.size());
127 }
128
129 bool
130 HWScheduler::findEmptyHWQ()
131 {
132 DPRINTF(HSAPacketProcessor,
133 "Trying to find empty HW queue, @ %s\n", __FUNCTION__);
134 if (regdListMap.size() < hsaPP->numHWQueues) {
135 for (int emptyQId = 0; emptyQId < hsaPP->numHWQueues; emptyQId++) {
136 HSAQueueDescriptor* qDesc =
137 hsaPP->getRegdListEntry(nextRLId)->qCntxt.qDesc;
138 // If qDesc is empty, we find an empty HW queue
139 if (qDesc == NULL) {
140 return true;
141 }
142 nextRLId = (nextRLId + 1) % hsaPP->numHWQueues;
143 }
144 // We should be able to find an empty slot in registered list
145 // So, we should not reach here
146 panic("Cannot find empty queue\n");
147 }
148 return false;
149 }
150
151 bool
152 HWScheduler::mapQIfSlotAvlbl(uint32_t q_id, AQLRingBuffer* aql_buf,
153 HSAQueueDescriptor* q_desc)
154 {
155 DPRINTF(HSAPacketProcessor,
156 "Trying to map new queue, @ %s\n", __FUNCTION__);
157 if (!findEmptyHWQ()) {
158 return false;
159 }
160 addQCntxt(q_id, aql_buf, q_desc);
161 scheduleAndWakeupMappedQ();
162 updateRRVars(q_id, nextRLId);
163 return true;
164 }
165
166 void
167 HWScheduler::scheduleAndWakeupMappedQ()
168 {
169 // There maybe AQL packets in the mapped queue waiting
170 // to be fetched. Invoke the logic to fetch AQL packets
171 hsaPP->getCommandsFromHost(0, nextRLId);
172 // Schedule the newly mapped queue
173 if (hsaPP->regdQList[nextRLId]->dispPending())
174 hsaPP->schedAQLProcessing(nextRLId);
175 }
176
177 void
178 HWScheduler::addQCntxt(uint32_t al_idx, AQLRingBuffer* aql_buf,
179 HSAQueueDescriptor* q_desc)
180 {
181 assert(hsaPP->getRegdListEntry(nextRLId)->qCntxt.qDesc == NULL);
182 assert(hsaPP->getRegdListEntry(nextRLId)->qCntxt.aqlBuf == NULL);
183 // Move the context
184 hsaPP->getRegdListEntry(nextRLId)->qCntxt.qDesc = q_desc;
185 hsaPP->getRegdListEntry(nextRLId)->qCntxt.aqlBuf = aql_buf;
186 // Add the mapping to registered list map
187 regdListMap[al_idx] = nextRLId;
188 DPRINTF(HSAPacketProcessor, "Mapped HSA queue %d to hw queue %d: @ %s\n",
189 al_idx, nextRLId, __FUNCTION__);
190 }
191
192 bool
193 HWScheduler::contextSwitchQ()
194 {
195 DPRINTF(HSAPacketProcessor,
196 "Trying to map next queue, @ %s", __FUNCTION__);
197 // Identify the next queue, if there is nothing to
198 // map, return false
199 if (!findNextActiveALQ()) {
200 return false;
201 }
202 HSAQueueDescriptor* q_desc = activeList[nextALId].qDesc;
203 AQLRingBuffer* aql_buf = activeList[nextALId].aqlBuf;
204 // If there is empty slot available, use that slot
205 if(mapQIfSlotAvlbl(nextALId, aql_buf, q_desc)) {
206 return true;
207 }
208 // There is no empty slot to map this queue. So, we need to
209 // unmap a queue from registered list and find a slot.
210 // If nothing can be unmapped now, return false
211 if (!unmapQFromRQ()) {
212 return false;
213 }
214 // One queue is unmapped from registered list and that queueID
215 // is stored in nextRLId. We will map this queue to that unmapped slot
216 addQCntxt(nextALId, aql_buf, q_desc);
217 scheduleAndWakeupMappedQ();
218 updateRRVars(nextALId, nextRLId);
219 return true;
220 }
221
222 void
223 HWScheduler::updateRRVars(uint32_t al_idx, uint32_t rl_idx)
224 {
225 nextALId = (al_idx + 1) % MAX_ACTIVE_QUEUES;
226 nextRLId = (rl_idx + 1) % hsaPP->numHWQueues;
227 }
228
229 bool
230 HWScheduler::unmapQFromRQ()
231 {
232 // Identify the next idle queue, if there is no
233 // idle queue, we cannot unmap
234 if (!findNextIdleRLQ()) {
235 return false;
236 }
237 removeQCntxt();
238 return true;
239 }
240
241 void
242 HWScheduler::removeQCntxt()
243 {
244 // The nextRLId gives the registered queue that is to be unmapped.
245 // We can find the corresponding queue_id from the doorbellPointer
246 Addr db_offset =
247 hsaPP->getRegdListEntry(nextRLId)->qCntxt.qDesc->doorbellPointer;
248 hsaPP->getRegdListEntry(nextRLId)->qCntxt.qDesc = NULL;
249 hsaPP->getRegdListEntry(nextRLId)->qCntxt.aqlBuf = NULL;
250 // Here, we are unmappping a queue wihtout waiting for the outstanding
251 // dependency signal reads to complete. We will discard any outstanding
252 // reads and will reset the signal values here.
253 hsaPP->getRegdListEntry(nextRLId)->depSignalRdState.discardRead = true;
254 hsaPP->getRegdListEntry(nextRLId)->depSignalRdState.resetSigVals();
255 uint32_t al_idx = dbMap[db_offset];
256 assert(regdListMap[al_idx] == nextRLId);
257 // Unmap from regdListMap.
258 regdListMap.erase(al_idx);
259 }
260
261 bool
262 HWScheduler::findNextActiveALQ()
263 {
264 for (int activeQId = 0; activeQId < MAX_ACTIVE_QUEUES; activeQId++) {
265 uint32_t al_id = (nextALId + activeQId) % MAX_ACTIVE_QUEUES;
266 auto aqlmap_iter = activeList.find(al_id);
267 if (aqlmap_iter != activeList.end()) {
268 // If this queue is already mapped
269 if (regdListMap.find(al_id) != regdListMap.end()) {
270 continue;
271 } else {
272 DPRINTF(HSAPacketProcessor,
273 "Next Active ALQ %d (current %d), max ALQ %d\n",
274 al_id, nextALId, MAX_ACTIVE_QUEUES);
275 nextALId = al_id;
276 return true;
277 }
278 }
279 }
280 return false;
281 }
282
283 bool
284 HWScheduler::findNextIdleRLQ()
285 {
286 for (int regdQId = 0; regdQId < hsaPP->numHWQueues; regdQId++) {
287 uint32_t rl_idx = (nextRLId + regdQId) % hsaPP->numHWQueues;
288 if (isRLQIdle(rl_idx)) {
289 nextRLId = rl_idx;
290 return true;
291 }
292 }
293 return false;
294 }
295
296 // This function could be moved to packet processor
297 bool
298 HWScheduler::isRLQIdle(uint32_t rl_idx)
299 {
300 DPRINTF(HSAPacketProcessor,
301 "@ %s, analyzing hw queue %d\n", __FUNCTION__, rl_idx);
302 HSAQueueDescriptor* qDesc = hsaPP->getRegdListEntry(rl_idx)->qCntxt.qDesc;
303 AQLRingBuffer* aql_buf = hsaPP->getRegdListEntry(rl_idx)->qCntxt.aqlBuf;
304
305 // If there a pending DMA to this registered queue
306 // then the queue is not idle
307 if (qDesc->dmaInProgress) {
308 return false;
309 }
310
311 // Since packet completion stage happens only after kernel completion
312 // we need to keep the queue mapped till all the outstanding kernels
313 // from that queue are finished
314 if (aql_buf->rdIdx() != aql_buf->dispIdx()) {
315 return false;
316 }
317
318 return true;
319 }
320
321 void
322 HWScheduler::write(Addr db_addr, uint32_t doorbell_reg)
323 {
324 auto dbmap_iter = dbMap.find(db_addr);
325 if (dbmap_iter == dbMap.end()) {
326 panic("Writing to a non-existing queue (db_offset %x)", db_addr);
327 }
328 uint32_t al_idx = dbMap[db_addr];
329 // Modify the write pointer
330 activeList[al_idx].qDesc->writeIndex = doorbell_reg;
331 // If this queue is mapped, then start DMA to fetch the
332 // AQL packet
333 if (regdListMap.find(al_idx) != regdListMap.end()) {
334 hsaPP->getCommandsFromHost(0, regdListMap[al_idx]);
335 }
336 }
337
338 void
339 HWScheduler::unregisterQueue(uint64_t queue_id)
340 {
341 // Pointer arithmetic on a null pointer is undefined behavior. Clang
342 // compilers therefore complain if the following reads:
343 // `(Addr)(VOID_PRT_ADD32(0, queue_id))`
344 //
345 // Originally
346 // #define VOID_PTR_ADD32(ptr,n)
347 // (void*)((uint32_t*)(ptr) + n)/*ptr + offset*/
348 // (Addr)VOID_PTR_ADD32(0, queue_id)
349 Addr db_offset = sizeof(uint32_t)*queue_id;
350 auto dbmap_iter = dbMap.find(db_offset);
351 if (dbmap_iter == dbMap.end()) {
352 panic("Destroying a non-existing queue (db_offset %x)",
353 db_offset);
354 }
355 uint32_t al_idx = dbMap[db_offset];
356 assert(dbMap[db_offset] == dbmap_iter->second);
357 if (!activeList[al_idx].qDesc->isEmpty()) {
358 // According to HSA runtime specification says, deleting
359 // a queue before it is fully processed can lead to undefined
360 // behavior and it is the application's responsibility to
361 // avoid this situation.
362 // Even completion signal is not a sufficient indication for a
363 // fully processed queue; for example completion signal may be
364 // asserted when a read pointer update is in progress
365 warn("Destroying a non-empty queue");
366 }
367 delete activeList[al_idx].qDesc;
368 delete activeList[al_idx].aqlBuf;
369 activeList.erase(al_idx);
370 // Unmap doorbell from doorbell map
371 dbMap.erase(db_offset);
372 if (regdListMap.find(al_idx) != regdListMap.end()) {
373 uint32_t rl_idx = regdListMap[al_idx];
374 hsaPP->getRegdListEntry(rl_idx)->qCntxt.aqlBuf = NULL;
375 hsaPP->getRegdListEntry(rl_idx)->qCntxt.qDesc = NULL;
376 hsaPP->getRegdListEntry(rl_idx)->depSignalRdState.discardRead = true;
377 hsaPP->getRegdListEntry(rl_idx)->depSignalRdState.resetSigVals();
378 assert(!hsaPP->getRegdListEntry(rl_idx)->aqlProcessEvent.scheduled());
379 regdListMap.erase(al_idx);
380 // A registered queue is released, let us try to map
381 // a queue to that slot
382 contextSwitchQ();
383 }
384 schedWakeup();
385 }