7d8fb909950c004bd9c3811c31c234d91e6a5b57
2 * Copyright (c) 2016-2017 Advanced Micro Devices, Inc.
5 * For use for simulation and test purposes only
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
17 * 3. Neither the name of the copyright holder nor the names of its
18 * contributors may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
33 * Authors: Sooraj Puthoor
36 #include "dev/hsa/hw_scheduler.hh"
38 #include "debug/HSAPacketProcessor.hh"
39 #include "mem/packet_access.hh"
41 #define HWSCHDLR_EVENT_DESCRIPTION_GENERATOR(XEVENT) \
43 HWScheduler::XEVENT::description() const \
48 HWSCHDLR_EVENT_DESCRIPTION_GENERATOR(SchedulerWakeupEvent
)
51 HWScheduler::SchedulerWakeupEvent::process()
59 // The scheduler unmaps an idle queue from the
60 // registered qList and maps a new queue
61 // to the registered list from the active list.
62 // For this implementation, an idle queue means
63 // a queue that does not have any outstanding dispatch
64 // at the time of this scheduler's wakeup
71 HWScheduler::schedWakeup()
73 // If atleast there is one queue that is not registered
75 if (!schedWakeupEvent
.scheduled() &&
76 regdListMap
.size() < activeList
.size()) {
77 hsaPP
->schedule(&schedWakeupEvent
, curTick() + wakeupDelay
);
78 DPRINTF(HSAPacketProcessor
,
79 "Scheduling wakeup at %lu\n", (curTick() + wakeupDelay
));
84 HWScheduler::registerNewQueue(uint64_t hostReadIndexPointer
,
89 assert(queue_id
< MAX_ACTIVE_QUEUES
);
90 // Map queue ID to doorbell.
91 // We are only using offset to pio base address as doorbell
92 // We use the same mapping function used by hsa runtime to do this mapping
95 // #define VOID_PTR_ADD32(ptr,n)
96 // (void*)((uint32_t*)(ptr) + n)/*ptr + offset*/
97 // (Addr)VOID_PTR_ADD32(0, queue_id)
98 Addr db_offset
= sizeof(uint32_t)*queue_id
;
99 if (dbMap
.find(db_offset
) != dbMap
.end()) {
100 panic("Creating an already existing queue (queueID %d)", queue_id
);
103 // Populate doorbell map
104 dbMap
[db_offset
] = queue_id
;
106 if (queue_id
>= MAX_ACTIVE_QUEUES
) {
107 panic("Attempting to create a queue (queueID %d)" \
108 " beyond PIO range", queue_id
);
111 HSAQueueDescriptor
* q_desc
=
112 new HSAQueueDescriptor(basePointer
, db_offset
,
113 hostReadIndexPointer
, size
);
114 AQLRingBuffer
* aql_buf
=
115 new AQLRingBuffer(NUM_DMA_BUFS
, hsaPP
->name());
116 QCntxt
q_cntxt(q_desc
, aql_buf
);
117 activeList
[dbMap
[db_offset
]] = q_cntxt
;
119 // Check if this newly created queue can be directly mapped
120 // to registered queue list
121 M5_VAR_USED
bool register_q
= mapQIfSlotAvlbl(queue_id
, aql_buf
, q_desc
);
123 DPRINTF(HSAPacketProcessor
,
124 "%s: offset = %p, qID = %d, is_regd = %s, AL size %d\n",
125 __FUNCTION__
, db_offset
, queue_id
,
126 (register_q
) ? "true" : "false", dbMap
.size());
130 HWScheduler::findEmptyHWQ()
132 DPRINTF(HSAPacketProcessor
,
133 "Trying to find empty HW queue, @ %s\n", __FUNCTION__
);
134 if (regdListMap
.size() < hsaPP
->numHWQueues
) {
135 for (int emptyQId
= 0; emptyQId
< hsaPP
->numHWQueues
; emptyQId
++) {
136 HSAQueueDescriptor
* qDesc
=
137 hsaPP
->getRegdListEntry(nextRLId
)->qCntxt
.qDesc
;
138 // If qDesc is empty, we find an empty HW queue
142 nextRLId
= (nextRLId
+ 1) % hsaPP
->numHWQueues
;
144 // We should be able to find an empty slot in registered list
145 // So, we should not reach here
146 panic("Cannot find empty queue\n");
152 HWScheduler::mapQIfSlotAvlbl(uint32_t q_id
, AQLRingBuffer
* aql_buf
,
153 HSAQueueDescriptor
* q_desc
)
155 DPRINTF(HSAPacketProcessor
,
156 "Trying to map new queue, @ %s\n", __FUNCTION__
);
157 if (!findEmptyHWQ()) {
160 addQCntxt(q_id
, aql_buf
, q_desc
);
161 scheduleAndWakeupMappedQ();
162 updateRRVars(q_id
, nextRLId
);
167 HWScheduler::scheduleAndWakeupMappedQ()
169 // There maybe AQL packets in the mapped queue waiting
170 // to be fetched. Invoke the logic to fetch AQL packets
171 hsaPP
->getCommandsFromHost(0, nextRLId
);
172 // Schedule the newly mapped queue
173 if (hsaPP
->regdQList
[nextRLId
]->dispPending())
174 hsaPP
->schedAQLProcessing(nextRLId
);
178 HWScheduler::addQCntxt(uint32_t al_idx
, AQLRingBuffer
* aql_buf
,
179 HSAQueueDescriptor
* q_desc
)
181 assert(hsaPP
->getRegdListEntry(nextRLId
)->qCntxt
.qDesc
== NULL
);
182 assert(hsaPP
->getRegdListEntry(nextRLId
)->qCntxt
.aqlBuf
== NULL
);
184 hsaPP
->getRegdListEntry(nextRLId
)->qCntxt
.qDesc
= q_desc
;
185 hsaPP
->getRegdListEntry(nextRLId
)->qCntxt
.aqlBuf
= aql_buf
;
186 // Add the mapping to registered list map
187 regdListMap
[al_idx
] = nextRLId
;
188 DPRINTF(HSAPacketProcessor
, "Mapped HSA queue %d to hw queue %d: @ %s\n",
189 al_idx
, nextRLId
, __FUNCTION__
);
193 HWScheduler::contextSwitchQ()
195 DPRINTF(HSAPacketProcessor
,
196 "Trying to map next queue, @ %s", __FUNCTION__
);
197 // Identify the next queue, if there is nothing to
199 if (!findNextActiveALQ()) {
202 HSAQueueDescriptor
* q_desc
= activeList
[nextALId
].qDesc
;
203 AQLRingBuffer
* aql_buf
= activeList
[nextALId
].aqlBuf
;
204 // If there is empty slot available, use that slot
205 if(mapQIfSlotAvlbl(nextALId
, aql_buf
, q_desc
)) {
208 // There is no empty slot to map this queue. So, we need to
209 // unmap a queue from registered list and find a slot.
210 // If nothing can be unmapped now, return false
211 if (!unmapQFromRQ()) {
214 // One queue is unmapped from registered list and that queueID
215 // is stored in nextRLId. We will map this queue to that unmapped slot
216 addQCntxt(nextALId
, aql_buf
, q_desc
);
217 scheduleAndWakeupMappedQ();
218 updateRRVars(nextALId
, nextRLId
);
223 HWScheduler::updateRRVars(uint32_t al_idx
, uint32_t rl_idx
)
225 nextALId
= (al_idx
+ 1) % MAX_ACTIVE_QUEUES
;
226 nextRLId
= (rl_idx
+ 1) % hsaPP
->numHWQueues
;
230 HWScheduler::unmapQFromRQ()
232 // Identify the next idle queue, if there is no
233 // idle queue, we cannot unmap
234 if (!findNextIdleRLQ()) {
242 HWScheduler::removeQCntxt()
244 // The nextRLId gives the registered queue that is to be unmapped.
245 // We can find the corresponding queue_id from the doorbellPointer
247 hsaPP
->getRegdListEntry(nextRLId
)->qCntxt
.qDesc
->doorbellPointer
;
248 hsaPP
->getRegdListEntry(nextRLId
)->qCntxt
.qDesc
= NULL
;
249 hsaPP
->getRegdListEntry(nextRLId
)->qCntxt
.aqlBuf
= NULL
;
250 // Here, we are unmappping a queue wihtout waiting for the outstanding
251 // dependency signal reads to complete. We will discard any outstanding
252 // reads and will reset the signal values here.
253 hsaPP
->getRegdListEntry(nextRLId
)->depSignalRdState
.discardRead
= true;
254 hsaPP
->getRegdListEntry(nextRLId
)->depSignalRdState
.resetSigVals();
255 uint32_t al_idx
= dbMap
[db_offset
];
256 assert(regdListMap
[al_idx
] == nextRLId
);
257 // Unmap from regdListMap.
258 regdListMap
.erase(al_idx
);
262 HWScheduler::findNextActiveALQ()
264 for (int activeQId
= 0; activeQId
< MAX_ACTIVE_QUEUES
; activeQId
++) {
265 uint32_t al_id
= (nextALId
+ activeQId
) % MAX_ACTIVE_QUEUES
;
266 auto aqlmap_iter
= activeList
.find(al_id
);
267 if (aqlmap_iter
!= activeList
.end()) {
268 // If this queue is already mapped
269 if (regdListMap
.find(al_id
) != regdListMap
.end()) {
272 DPRINTF(HSAPacketProcessor
,
273 "Next Active ALQ %d (current %d), max ALQ %d\n",
274 al_id
, nextALId
, MAX_ACTIVE_QUEUES
);
284 HWScheduler::findNextIdleRLQ()
286 for (int regdQId
= 0; regdQId
< hsaPP
->numHWQueues
; regdQId
++) {
287 uint32_t rl_idx
= (nextRLId
+ regdQId
) % hsaPP
->numHWQueues
;
288 if (isRLQIdle(rl_idx
)) {
296 // This function could be moved to packet processor
298 HWScheduler::isRLQIdle(uint32_t rl_idx
)
300 DPRINTF(HSAPacketProcessor
,
301 "@ %s, analyzing hw queue %d\n", __FUNCTION__
, rl_idx
);
302 HSAQueueDescriptor
* qDesc
= hsaPP
->getRegdListEntry(rl_idx
)->qCntxt
.qDesc
;
303 AQLRingBuffer
* aql_buf
= hsaPP
->getRegdListEntry(rl_idx
)->qCntxt
.aqlBuf
;
305 // If there a pending DMA to this registered queue
306 // then the queue is not idle
307 if (qDesc
->dmaInProgress
) {
311 // Since packet completion stage happens only after kernel completion
312 // we need to keep the queue mapped till all the outstanding kernels
313 // from that queue are finished
314 if (aql_buf
->rdIdx() != aql_buf
->dispIdx()) {
322 HWScheduler::write(Addr db_addr
, uint32_t doorbell_reg
)
324 auto dbmap_iter
= dbMap
.find(db_addr
);
325 if (dbmap_iter
== dbMap
.end()) {
326 panic("Writing to a non-existing queue (db_offset %x)", db_addr
);
328 uint32_t al_idx
= dbMap
[db_addr
];
329 // Modify the write pointer
330 activeList
[al_idx
].qDesc
->writeIndex
= doorbell_reg
;
331 // If this queue is mapped, then start DMA to fetch the
333 if (regdListMap
.find(al_idx
) != regdListMap
.end()) {
334 hsaPP
->getCommandsFromHost(0, regdListMap
[al_idx
]);
339 HWScheduler::unregisterQueue(uint64_t queue_id
)
341 // Pointer arithmetic on a null pointer is undefined behavior. Clang
342 // compilers therefore complain if the following reads:
343 // `(Addr)(VOID_PRT_ADD32(0, queue_id))`
346 // #define VOID_PTR_ADD32(ptr,n)
347 // (void*)((uint32_t*)(ptr) + n)/*ptr + offset*/
348 // (Addr)VOID_PTR_ADD32(0, queue_id)
349 Addr db_offset
= sizeof(uint32_t)*queue_id
;
350 auto dbmap_iter
= dbMap
.find(db_offset
);
351 if (dbmap_iter
== dbMap
.end()) {
352 panic("Destroying a non-existing queue (db_offset %x)",
355 uint32_t al_idx
= dbMap
[db_offset
];
356 assert(dbMap
[db_offset
] == dbmap_iter
->second
);
357 if (!activeList
[al_idx
].qDesc
->isEmpty()) {
358 // According to HSA runtime specification says, deleting
359 // a queue before it is fully processed can lead to undefined
360 // behavior and it is the application's responsibility to
361 // avoid this situation.
362 // Even completion signal is not a sufficient indication for a
363 // fully processed queue; for example completion signal may be
364 // asserted when a read pointer update is in progress
365 warn("Destroying a non-empty queue");
367 delete activeList
[al_idx
].qDesc
;
368 delete activeList
[al_idx
].aqlBuf
;
369 activeList
.erase(al_idx
);
370 // Unmap doorbell from doorbell map
371 dbMap
.erase(db_offset
);
372 if (regdListMap
.find(al_idx
) != regdListMap
.end()) {
373 uint32_t rl_idx
= regdListMap
[al_idx
];
374 hsaPP
->getRegdListEntry(rl_idx
)->qCntxt
.aqlBuf
= NULL
;
375 hsaPP
->getRegdListEntry(rl_idx
)->qCntxt
.qDesc
= NULL
;
376 hsaPP
->getRegdListEntry(rl_idx
)->depSignalRdState
.discardRead
= true;
377 hsaPP
->getRegdListEntry(rl_idx
)->depSignalRdState
.resetSigVals();
378 assert(!hsaPP
->getRegdListEntry(rl_idx
)->aqlProcessEvent
.scheduled());
379 regdListMap
.erase(al_idx
);
380 // A registered queue is released, let us try to map
381 // a queue to that slot