misc: Delete the now unnecessary create methods.
[gem5.git] / src / dev / hsa / hsa_packet_processor.cc
1 /*
2 * Copyright (c) 2015-2018 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its
18 * contributors may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Authors: Eric van Tassell
34 */
35
36 #include "dev/hsa/hsa_packet_processor.hh"
37
38 #include <cstring>
39
40 #include "base/chunk_generator.hh"
41 #include "base/compiler.hh"
42 #include "debug/HSAPacketProcessor.hh"
43 #include "dev/dma_device.hh"
44 #include "dev/hsa/hsa_device.hh"
45 #include "dev/hsa/hsa_packet.hh"
46 #include "dev/hsa/hw_scheduler.hh"
47 #include "mem/packet_access.hh"
48 #include "mem/page_table.hh"
49 #include "sim/process.hh"
50 #include "sim/proxy_ptr.hh"
51 #include "sim/system.hh"
52
53 #define HSAPP_EVENT_DESCRIPTION_GENERATOR(XEVENT) \
54 const char* \
55 HSAPacketProcessor::XEVENT::description() const \
56 { \
57 return #XEVENT; \
58 }
59
60 #define PKT_TYPE(PKT) ((hsa_packet_type_t)(((PKT->header) >> \
61 HSA_PACKET_HEADER_TYPE) & (HSA_PACKET_HEADER_WIDTH_TYPE - 1)))
62
63 // checks if the barrier bit is set in the header -- shift the barrier bit
64 // to LSB, then bitwise "and" to mask off all other bits
65 #define IS_BARRIER(PKT) ((hsa_packet_header_t)(((PKT->header) >> \
66 HSA_PACKET_HEADER_BARRIER) & HSA_PACKET_HEADER_WIDTH_BARRIER))
67
68 HSAPP_EVENT_DESCRIPTION_GENERATOR(UpdateReadDispIdDmaEvent)
69 HSAPP_EVENT_DESCRIPTION_GENERATOR(CmdQueueCmdDmaEvent)
70 HSAPP_EVENT_DESCRIPTION_GENERATOR(QueueProcessEvent)
71 HSAPP_EVENT_DESCRIPTION_GENERATOR(DepSignalsReadDmaEvent)
72
73 HSAPacketProcessor::HSAPacketProcessor(const Params &p)
74 : DmaDevice(p), numHWQueues(p.numHWQueues), pioAddr(p.pioAddr),
75 pioSize(PAGE_SIZE), pioDelay(10), pktProcessDelay(p.pktProcessDelay)
76 {
77 DPRINTF(HSAPacketProcessor, "%s:\n", __FUNCTION__);
78 hwSchdlr = new HWScheduler(this, p.wakeupDelay);
79 regdQList.resize(numHWQueues);
80 for (int i = 0; i < numHWQueues; i++) {
81 regdQList[i] = new RQLEntry(this, i);
82 }
83 }
84
85 HSAPacketProcessor::~HSAPacketProcessor()
86 {
87 for (auto &queue : regdQList) {
88 delete queue;
89 }
90 }
91
92 void
93 HSAPacketProcessor::unsetDeviceQueueDesc(uint64_t queue_id)
94 {
95 hwSchdlr->unregisterQueue(queue_id);
96 }
97
98 void
99 HSAPacketProcessor::setDeviceQueueDesc(uint64_t hostReadIndexPointer,
100 uint64_t basePointer,
101 uint64_t queue_id,
102 uint32_t size)
103 {
104 DPRINTF(HSAPacketProcessor,
105 "%s:base = %p, qID = %d, ze = %d\n", __FUNCTION__,
106 (void *)basePointer, queue_id, size);
107 hwSchdlr->registerNewQueue(hostReadIndexPointer,
108 basePointer, queue_id, size);
109 }
110
111 AddrRangeList
112 HSAPacketProcessor::getAddrRanges() const
113 {
114 assert(pioSize != 0);
115
116 AddrRangeList ranges;
117 ranges.push_back(RangeSize(pioAddr, pioSize));
118
119 return ranges;
120 }
121
122 // Basically only processes writes to the queue doorbell register.
123 Tick
124 HSAPacketProcessor::write(Packet *pkt)
125 {
126 assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
127
128 // TODO: How to get pid??
129 M5_VAR_USED Addr daddr = pkt->getAddr() - pioAddr;
130
131 DPRINTF(HSAPacketProcessor,
132 "%s: write of size %d to reg-offset %d (0x%x)\n",
133 __FUNCTION__, pkt->getSize(), daddr, daddr);
134
135 uint32_t doorbell_reg = pkt->getLE<uint32_t>();
136
137 DPRINTF(HSAPacketProcessor,
138 "%s: write data 0x%x to offset %d (0x%x)\n",
139 __FUNCTION__, doorbell_reg, daddr, daddr);
140 hwSchdlr->write(daddr, doorbell_reg);
141 pkt->makeAtomicResponse();
142 return pioDelay;
143 }
144
145 Tick
146 HSAPacketProcessor::read(Packet *pkt)
147 {
148 pkt->makeAtomicResponse();
149 pkt->setBadAddress();
150 return pioDelay;
151 }
152
153 void
154 HSAPacketProcessor::translateOrDie(Addr vaddr, Addr &paddr)
155 {
156 // Grab the process and try to translate the virtual address with it; with
157 // new extensions, it will likely be wrong to just arbitrarily grab context
158 // zero.
159 auto process = sys->threads[0]->getProcessPtr();
160
161 if (!process->pTable->translate(vaddr, paddr))
162 fatal("failed translation: vaddr 0x%x\n", vaddr);
163 }
164
165 void
166 HSAPacketProcessor::dmaVirt(DmaFnPtr dmaFn, Addr addr, unsigned size,
167 Event *event, void *data, Tick delay)
168 {
169 if (size == 0) {
170 schedule(event, curTick() + delay);
171 return;
172 }
173
174 // move the buffer data pointer with the chunks
175 uint8_t *loc_data = (uint8_t*)data;
176
177 for (ChunkGenerator gen(addr, size, PAGE_SIZE); !gen.done(); gen.next()) {
178 Addr phys;
179
180 // translate pages into their corresponding frames
181 translateOrDie(gen.addr(), phys);
182
183 // only send event on last transfer; transfers complete in-order
184 Event *ev = gen.last() ? event : NULL;
185
186 (this->*dmaFn)(phys, gen.size(), ev, loc_data, delay);
187
188 loc_data += gen.size();
189 }
190 }
191
192 void
193 HSAPacketProcessor::dmaReadVirt(Addr host_addr, unsigned size,
194 Event *event, void *data, Tick delay)
195 {
196 DPRINTF(HSAPacketProcessor,
197 "%s:host_addr = 0x%lx, size = %d\n", __FUNCTION__, host_addr, size);
198 dmaVirt(&DmaDevice::dmaRead, host_addr, size, event, data, delay);
199 }
200
201 void
202 HSAPacketProcessor::dmaWriteVirt(Addr host_addr, unsigned size,
203 Event *event, void *data, Tick delay)
204 {
205 dmaVirt(&DmaDevice::dmaWrite, host_addr, size, event, data, delay);
206 }
207
208 HSAPacketProcessor::UpdateReadDispIdDmaEvent::
209 UpdateReadDispIdDmaEvent()
210 : Event(Default_Pri, AutoDelete)
211 {
212 DPRINTF(HSAPacketProcessor, "%s:\n", __FUNCTION__);
213 setFlags(AutoDelete);
214 }
215
216 void
217 HSAPacketProcessor::updateReadIndex(int pid, uint32_t rl_idx)
218 {
219 AQLRingBuffer* aqlbuf = regdQList[rl_idx]->qCntxt.aqlBuf;
220 HSAQueueDescriptor* qDesc = regdQList[rl_idx]->qCntxt.qDesc;
221 auto *dmaEvent = new UpdateReadDispIdDmaEvent();
222
223 DPRINTF(HSAPacketProcessor,
224 "%s: read-pointer offset [0x%x]\n", __FUNCTION__, aqlbuf->rdIdx());
225
226 dmaWriteVirt((Addr)qDesc->hostReadIndexPtr,
227 sizeof(aqlbuf->rdIdx()),
228 dmaEvent, aqlbuf->rdIdxPtr());
229
230 DPRINTF(HSAPacketProcessor,
231 "%s: rd-ptr offset [0x%x], wr-ptr offset [0x%x], space used = %d," \
232 " q size = %d, is_empty = %s, active list ID = %d\n", __FUNCTION__,
233 qDesc->readIndex, qDesc->writeIndex, qDesc->spaceUsed(),
234 qDesc->numElts, qDesc->isEmpty()? "true" : "false", rl_idx);
235 if (qDesc->writeIndex != aqlbuf->wrIdx()) {
236 getCommandsFromHost(pid, rl_idx);
237 }
238 }
239
240 HSAPacketProcessor::CmdQueueCmdDmaEvent::
241 CmdQueueCmdDmaEvent(HSAPacketProcessor *_hsaPP, int _pid, bool _isRead,
242 uint32_t _ix_start, unsigned _num_pkts,
243 dma_series_ctx *_series_ctx, void *_dest_4debug)
244 : Event(Default_Pri, AutoDelete), hsaPP(_hsaPP), pid(_pid), isRead(_isRead),
245 ix_start(_ix_start), num_pkts(_num_pkts), series_ctx(_series_ctx),
246 dest_4debug(_dest_4debug)
247 {
248 setFlags(AutoDelete);
249
250 DPRINTF(HSAPacketProcessor, "%s, ix = %d, npkts = %d," \
251 "active list ID = %d\n", __FUNCTION__,
252 _ix_start, num_pkts, series_ctx->rl_idx);
253 }
254
255 void
256 HSAPacketProcessor::CmdQueueCmdDmaEvent::process()
257 {
258 uint32_t rl_idx = series_ctx->rl_idx;
259 M5_VAR_USED AQLRingBuffer *aqlRingBuffer =
260 hsaPP->regdQList[rl_idx]->qCntxt.aqlBuf;
261 HSAQueueDescriptor* qDesc =
262 hsaPP->regdQList[rl_idx]->qCntxt.qDesc;
263 DPRINTF(HSAPacketProcessor, ">%s, ix = %d, npkts = %d," \
264 " pktsRemaining = %d, active list ID = %d\n", __FUNCTION__,
265 ix_start, num_pkts, series_ctx->pkts_2_go,
266 rl_idx);
267 if (isRead) {
268 series_ctx->pkts_2_go -= num_pkts;
269 if (series_ctx->pkts_2_go == 0) {
270 // Mark DMA as completed
271 qDesc->dmaInProgress = false;
272 DPRINTF(HSAPacketProcessor,
273 "%s: schedule Qwakeup next cycle, rdIdx %d, wrIdx %d," \
274 " dispIdx %d, active list ID = %d\n",
275 __FUNCTION__, aqlRingBuffer->rdIdx(),
276 aqlRingBuffer->wrIdx(), aqlRingBuffer->dispIdx(), rl_idx);
277 // schedule queue wakeup
278 hsaPP->schedAQLProcessing(rl_idx);
279 delete series_ctx;
280 }
281 }
282 }
283
284 void
285 HSAPacketProcessor::schedAQLProcessing(uint32_t rl_idx, Tick delay)
286 {
287 RQLEntry *queue = regdQList[rl_idx];
288 if (!queue->aqlProcessEvent.scheduled()) {
289 Tick processingTick = curTick() + delay;
290 schedule(queue->aqlProcessEvent, processingTick);
291 DPRINTF(HSAPacketProcessor, "AQL processing scheduled at tick: %d\n",
292 processingTick);
293 } else {
294 DPRINTF(HSAPacketProcessor, "AQL processing already scheduled\n");
295 }
296 }
297
298 void
299 HSAPacketProcessor::schedAQLProcessing(uint32_t rl_idx)
300 {
301 schedAQLProcessing(rl_idx, pktProcessDelay);
302 }
303
304 Q_STATE
305 HSAPacketProcessor::processPkt(void* pkt, uint32_t rl_idx, Addr host_pkt_addr)
306 {
307 Q_STATE is_submitted = BLOCKED_BPKT;
308 SignalState *dep_sgnl_rd_st = &(regdQList[rl_idx]->depSignalRdState);
309 // Dependency signals are not read yet. And this can only be a retry.
310 // The retry logic will schedule the packet processor wakeup
311 if (dep_sgnl_rd_st->pendingReads != 0) {
312 return BLOCKED_BPKT;
313 }
314 // `pkt` can be typecasted to any type of AQL packet since they all
315 // have header information at offset zero
316 auto disp_pkt = (_hsa_dispatch_packet_t *)pkt;
317 hsa_packet_type_t pkt_type = PKT_TYPE(disp_pkt);
318 if (IS_BARRIER(disp_pkt) &&
319 regdQList[rl_idx]->compltnPending() > 0) {
320 // If this packet is using the "barrier bit" to enforce ordering with
321 // previous packets, and if there are outstanding packets, set the
322 // barrier bit for this queue and block the queue.
323 DPRINTF(HSAPacketProcessor, "%s: setting barrier bit for active" \
324 " list ID = %d\n", __FUNCTION__, rl_idx);
325 regdQList[rl_idx]->setBarrierBit(true);
326 return BLOCKED_BBIT;
327 }
328 if (pkt_type == HSA_PACKET_TYPE_VENDOR_SPECIFIC) {
329 DPRINTF(HSAPacketProcessor, "%s: submitting vendor specific pkt" \
330 " active list ID = %d\n", __FUNCTION__, rl_idx);
331 // Submit packet to HSA device (dispatcher)
332 hsa_device->submitVendorPkt((void *)disp_pkt, rl_idx, host_pkt_addr);
333 is_submitted = UNBLOCKED;
334 } else if (pkt_type == HSA_PACKET_TYPE_KERNEL_DISPATCH) {
335 DPRINTF(HSAPacketProcessor, "%s: submitting kernel dispatch pkt" \
336 " active list ID = %d\n", __FUNCTION__, rl_idx);
337 // Submit packet to HSA device (dispatcher)
338 hsa_device->submitDispatchPkt((void *)disp_pkt, rl_idx, host_pkt_addr);
339 is_submitted = UNBLOCKED;
340 } else if (pkt_type == HSA_PACKET_TYPE_BARRIER_AND) {
341 DPRINTF(HSAPacketProcessor, "%s: Processing barrier packet" \
342 " active list ID = %d\n", __FUNCTION__, rl_idx);
343 auto bar_and_pkt = (_hsa_barrier_and_packet_t *)pkt;
344 bool isReady = true;
345 // Loop thorugh all the completion signals to see if this barrier
346 // packet is ready.
347 for (int i = 0; i < NumSignalsPerBarrier; i++) {
348 // dep_signal = zero imply no signal connected
349 if (bar_and_pkt->dep_signal[i]) {
350 // The signal value is aligned 8 bytes from
351 // the actual handle in the runtime
352 uint64_t signal_addr =
353 (uint64_t) (((uint64_t *) bar_and_pkt->dep_signal[i]) + 1);
354 hsa_signal_value_t *signal_val =
355 &(dep_sgnl_rd_st->values[i]);
356 DPRINTF(HSAPacketProcessor, "%s: Barrier pkt dep sgnl[%d]" \
357 " , sig addr %x, value %d active list ID = %d\n",
358 __FUNCTION__, i, signal_addr,
359 *signal_val, rl_idx);
360 // The if condition will be executed everytime except the
361 // very first time this barrier packet is encounteresd.
362 if (dep_sgnl_rd_st->allRead) {
363 if (*signal_val != 0) {
364 // This signal is not yet ready, read it again
365 isReady = false;
366 DepSignalsReadDmaEvent *sgnl_rd_evnt =
367 new DepSignalsReadDmaEvent(dep_sgnl_rd_st);
368 dmaReadVirt(signal_addr, sizeof(hsa_signal_value_t),
369 sgnl_rd_evnt, signal_val);
370 dep_sgnl_rd_st->pendingReads++;
371 DPRINTF(HSAPacketProcessor, "%s: Pending reads %d," \
372 " active list %d\n", __FUNCTION__,
373 dep_sgnl_rd_st->pendingReads, rl_idx);
374 }
375 } else {
376 // This signal is not yet ready, read it again
377 isReady = false;
378 DepSignalsReadDmaEvent *sgnl_rd_evnt =
379 new DepSignalsReadDmaEvent(dep_sgnl_rd_st);
380 dmaReadVirt(signal_addr, sizeof(hsa_signal_value_t),
381 sgnl_rd_evnt, signal_val);
382 dep_sgnl_rd_st->pendingReads++;
383 DPRINTF(HSAPacketProcessor, "%s: Pending reads %d," \
384 " active list %d\n", __FUNCTION__,
385 dep_sgnl_rd_st->pendingReads, rl_idx);
386 }
387 }
388 }
389 if (isReady) {
390 assert(dep_sgnl_rd_st->pendingReads == 0);
391 DPRINTF(HSAPacketProcessor, "%s: Barrier packet completed" \
392 " active list ID = %d\n", __FUNCTION__, rl_idx);
393 // TODO: Completion signal of barrier packet to be
394 // atomically decremented here
395 finishPkt((void*)bar_and_pkt, rl_idx);
396 is_submitted = UNBLOCKED;
397 // Reset signal values
398 dep_sgnl_rd_st->resetSigVals();
399 // The completion signal is connected
400 if (bar_and_pkt->completion_signal != 0) {
401 // The signal value is aligned 8 bytes
402 // from the actual handle in the runtime
403 uint64_t signal_addr =
404 (uint64_t) (((uint64_t *)
405 bar_and_pkt->completion_signal) + 1);
406 DPRINTF(HSAPacketProcessor, "Triggering barrier packet" \
407 " completion signal: %x!\n", signal_addr);
408 /**
409 * HACK: The semantics of the HSA signal is to
410 * decrement the current signal value.
411 * I'm going to cheat here and read out
412 * the value from main memory using functional
413 * access, and then just DMA the decremented value.
414 * The reason for this is that the DMASequencer does
415 * not support atomic operations.
416 */
417 VPtr<uint64_t> prev_signal(signal_addr, sys->threads[0]);
418
419 hsa_signal_value_t *new_signal = new hsa_signal_value_t;
420 *new_signal = (hsa_signal_value_t)*prev_signal - 1;
421
422 dmaWriteVirt(signal_addr,
423 sizeof(hsa_signal_value_t), NULL, new_signal, 0);
424 }
425 }
426 if (dep_sgnl_rd_st->pendingReads > 0) {
427 // Atleast one DepSignalsReadDmaEvent is scheduled this cycle
428 dep_sgnl_rd_st->allRead = false;
429 dep_sgnl_rd_st->discardRead = false;
430 }
431 } else if (pkt_type == HSA_PACKET_TYPE_BARRIER_OR) {
432 fatal("Unsupported packet type HSA_PACKET_TYPE_BARRIER_OR");
433 } else if (pkt_type == HSA_PACKET_TYPE_INVALID) {
434 fatal("Unsupported packet type HSA_PACKET_TYPE_INVALID");
435 } else {
436 fatal("Unsupported packet type %d\n", pkt_type);
437 }
438 return is_submitted;
439 }
440
441 // Wakes up every fixed time interval (pktProcessDelay) and processes a single
442 // packet from the queue that scheduled this wakeup. If there are more
443 // packets in that queue, the next wakeup is scheduled.
444 void
445 HSAPacketProcessor::QueueProcessEvent::process()
446 {
447 AQLRingBuffer *aqlRingBuffer = hsaPP->regdQList[rqIdx]->qCntxt.aqlBuf;
448 DPRINTF(HSAPacketProcessor,
449 "%s: Qwakeup , rdIdx %d, wrIdx %d," \
450 " dispIdx %d, active list ID = %d\n",
451 __FUNCTION__, aqlRingBuffer->rdIdx(),
452 aqlRingBuffer->wrIdx(), aqlRingBuffer->dispIdx(), rqIdx);
453 // If barrier bit is set, then this wakeup is a dummy wakeup
454 // just to model the processing time. Do nothing.
455 if (hsaPP->regdQList[rqIdx]->getBarrierBit()) {
456 DPRINTF(HSAPacketProcessor,
457 "Dummy wakeup with barrier bit for rdIdx %d\n", rqIdx);
458 return;
459 }
460 // In the future, we may support batch processing of packets.
461 // Then, we can just remove the break statements and the code
462 // will support batch processing. That is why we are using a
463 // "while loop" here instead on an "if" condition.
464 while (hsaPP->regdQList[rqIdx]->dispPending()) {
465 void *pkt = aqlRingBuffer->ptr(aqlRingBuffer->dispIdx());
466 DPRINTF(HSAPacketProcessor, "%s: Attempting dispatch @ dispIdx[%d]\n",
467 __FUNCTION__, aqlRingBuffer->dispIdx());
468 Addr host_addr = aqlRingBuffer->hostDispAddr();
469 Q_STATE q_state = hsaPP->processPkt(pkt, rqIdx, host_addr);
470 if (q_state == UNBLOCKED) {
471 aqlRingBuffer->incDispIdx(1);
472 DPRINTF(HSAPacketProcessor, "%s: Increment dispIdx[%d]\n",
473 __FUNCTION__, aqlRingBuffer->dispIdx());
474 if (hsaPP->regdQList[rqIdx]->dispPending()) {
475 hsaPP->schedAQLProcessing(rqIdx);
476 }
477 break;
478 } else if (q_state == BLOCKED_BPKT) {
479 // This queue is blocked by barrier packet,
480 // schedule a processing event
481 hsaPP->schedAQLProcessing(rqIdx);
482 break;
483 } else if (q_state == BLOCKED_BBIT) {
484 // This queue is blocked by barrier bit, and processing event
485 // should be scheduled from finishPkt(). However, to elapse
486 // "pktProcessDelay" processing time, let us schedule a dummy
487 // wakeup once which will just wakeup and will do nothing.
488 hsaPP->schedAQLProcessing(rqIdx);
489 break;
490 } else {
491 panic("Unknown queue state\n");
492 }
493 }
494 }
495
496 void
497 HSAPacketProcessor::SignalState::handleReadDMA()
498 {
499 assert(pendingReads > 0);
500 pendingReads--;
501 if (pendingReads == 0) {
502 allRead = true;
503 if (discardRead) {
504 resetSigVals();
505 }
506 }
507 }
508
509 void
510 HSAPacketProcessor::getCommandsFromHost(int pid, uint32_t rl_idx)
511 {
512 HSAQueueDescriptor* qDesc = regdQList[rl_idx]->qCntxt.qDesc;
513 AQLRingBuffer *aqlRingBuffer = regdQList[rl_idx]->qCntxt.aqlBuf;
514
515 DPRINTF(HSAPacketProcessor,
516 "%s: read-pointer offset[0x%x], write-pointer offset[0x%x]"
517 " doorbell(%d)[0x%x] \n",
518 __FUNCTION__, qDesc->readIndex,
519 qDesc->writeIndex, pid, qDesc->doorbellPointer);
520
521 if (qDesc->dmaInProgress) {
522 // we'll try again when this dma transfer completes in updateReadIndex
523 return;
524 }
525 uint32_t num_umq = qDesc->spaceUsed();
526 if (num_umq == 0)
527 return; // nothing to be gotten
528 uint32_t umq_nxt = qDesc->readIndex;
529 // Total AQL buffer size
530 uint32_t ttl_aql_buf = aqlRingBuffer->numObjs();
531 // Available AQL buffer size. If the available buffer is less than
532 // demanded, number of available buffer is returned
533 uint32_t got_aql_buf = aqlRingBuffer->allocEntry(num_umq);
534 qDesc->readIndex += got_aql_buf;
535 uint32_t dma_start_ix = (aqlRingBuffer->wrIdx() - got_aql_buf) %
536 ttl_aql_buf;
537 dma_series_ctx *series_ctx = NULL;
538
539 DPRINTF(HSAPacketProcessor, "%s: umq_nxt = %d, ttl_aql_buf = %d, "
540 "dma_start_ix = %d, num_umq = %d\n", __FUNCTION__, umq_nxt,
541 ttl_aql_buf, dma_start_ix, num_umq);
542
543 if (got_aql_buf == 0) {
544 // we'll try again when some dma bufs are freed in freeEntry
545 qDesc->stalledOnDmaBufAvailability = true;
546 return;
547 } else {
548 qDesc->stalledOnDmaBufAvailability = false;
549 }
550
551 uint32_t dma_b4_wrap = ttl_aql_buf - dma_start_ix;
552 while (got_aql_buf != 0 && num_umq != 0) {
553 uint32_t umq_b4_wrap = qDesc->numObjs() -
554 (umq_nxt % qDesc->objSize());
555 uint32_t num_2_xfer
556 = std::min({umq_b4_wrap, dma_b4_wrap, num_umq, got_aql_buf});
557 if (!series_ctx) {
558 qDesc->dmaInProgress = true;
559 series_ctx = new dma_series_ctx(got_aql_buf, got_aql_buf,
560 dma_start_ix, rl_idx);
561 }
562
563 void *aql_buf = aqlRingBuffer->ptr(dma_start_ix);
564 CmdQueueCmdDmaEvent *dmaEvent
565 = new CmdQueueCmdDmaEvent(this, pid, true, dma_start_ix,
566 num_2_xfer, series_ctx, aql_buf);
567 DPRINTF(HSAPacketProcessor,
568 "%s: aql_buf = %p, umq_nxt = %d, dma_ix = %d, num2xfer = %d\n",
569 __FUNCTION__, aql_buf, umq_nxt, dma_start_ix, num_2_xfer);
570
571 dmaReadVirt(qDesc->ptr(umq_nxt), num_2_xfer * qDesc->objSize(),
572 dmaEvent, aql_buf);
573
574 aqlRingBuffer->saveHostDispAddr(qDesc->ptr(umq_nxt), num_2_xfer,
575 dma_start_ix);
576
577 num_umq -= num_2_xfer;
578 got_aql_buf -= num_2_xfer;
579 dma_start_ix = (dma_start_ix + num_2_xfer) % ttl_aql_buf;
580 umq_nxt = (umq_nxt + num_2_xfer) % qDesc->numObjs();
581 if (got_aql_buf == 0 && num_umq != 0) {
582 // There are more packets in the queue but
583 // not enough DMA buffers. Set the stalledOnDmaBufAvailability,
584 // we will try again in freeEntry
585 qDesc->stalledOnDmaBufAvailability = true;
586 }
587 }
588 }
589
590 void
591 HSAPacketProcessor::displayQueueDescriptor(int pid, uint32_t rl_idx)
592 {
593 M5_VAR_USED HSAQueueDescriptor* qDesc = regdQList[rl_idx]->qCntxt.qDesc;
594 DPRINTF(HSAPacketProcessor,
595 "%s: pid[%d], basePointer[0x%lx], dBPointer[0x%lx], "
596 "writeIndex[0x%x], readIndex[0x%x], size(bytes)[0x%x]\n",
597 __FUNCTION__, pid, qDesc->basePointer,
598 qDesc->doorbellPointer, qDesc->writeIndex,
599 qDesc->readIndex, qDesc->numElts);
600 }
601
602 AQLRingBuffer::AQLRingBuffer(uint32_t size,
603 const std::string name)
604 : _name(name), _wrIdx(0), _rdIdx(0), _dispIdx(0)
605 {
606 _aqlBuf.resize(size);
607 _aqlComplete.resize(size);
608 _hostDispAddresses.resize(size);
609 // Mark all packets as invalid and incomplete
610 for (auto& it : _aqlBuf)
611 it.header = HSA_PACKET_TYPE_INVALID;
612 std::fill(_aqlComplete.begin(), _aqlComplete.end(), false);
613 }
614
615 bool
616 AQLRingBuffer::freeEntry(void *pkt)
617 {
618 _aqlComplete[(hsa_kernel_dispatch_packet_t *) pkt - _aqlBuf.data()] = true;
619 DPRINTF(HSAPacketProcessor, "%s: pkt_ix = %d; "\
620 " # free entries = %d, wrIdx = %d, rdIdx = %d\n", __FUNCTION__,
621 (hsa_kernel_dispatch_packet_t *) pkt - _aqlBuf.data(),
622 nFree(), wrIdx(), rdIdx());
623 // Packets can complete out-of-order. This code "retires" packets in-order
624 // by updating the read pointer in the MQD when a contiguous chunk of
625 // packets have finished.
626 uint32_t old_rdIdx = rdIdx();
627 while (_aqlComplete[rdIdx() % numObjs()]) {
628 _aqlComplete[rdIdx() % numObjs()] = false;
629 _aqlBuf[rdIdx() % numObjs()].header = HSA_PACKET_TYPE_INVALID;
630 incRdIdx(1);
631 }
632 return (old_rdIdx != rdIdx());
633 }
634
635 void
636 HSAPacketProcessor::setDevice(HSADevice *dev)
637 {
638 this->hsa_device = dev;
639 }
640
641 int
642 AQLRingBuffer::allocEntry(uint32_t nBufReq)
643 {
644 DPRINTF(HSAPacketProcessor, "%s: nReq = %d\n", __FUNCTION__, nBufReq);
645 if (nFree() == 0) {
646 DPRINTF(HSAPacketProcessor, "%s: return = %d\n", __FUNCTION__, 0);
647 return 0;
648 }
649
650 if (nBufReq > nFree())
651 nBufReq = nFree();
652
653 DPRINTF(HSAPacketProcessor, "%s: ix1stFree = %d\n", __FUNCTION__, wrIdx());
654 incWrIdx(nBufReq);
655 DPRINTF(HSAPacketProcessor, "%s: return = %d, wrIdx = %d\n",
656 __FUNCTION__, nBufReq, wrIdx());
657 return nBufReq;
658 }
659
660 void
661 HSAPacketProcessor::finishPkt(void *pvPkt, uint32_t rl_idx)
662 {
663 HSAQueueDescriptor* qDesc = regdQList[rl_idx]->qCntxt.qDesc;
664
665 // if barrier bit was set and this is the last
666 // outstanding packet from that queue,
667 // unset it here
668 if (regdQList[rl_idx]->getBarrierBit() &&
669 regdQList[rl_idx]->isLastOutstandingPkt()) {
670 DPRINTF(HSAPacketProcessor,
671 "Unset barrier bit for active list ID %d\n", rl_idx);
672 regdQList[rl_idx]->setBarrierBit(false);
673 panic_if(!regdQList[rl_idx]->dispPending(),
674 "There should be pending kernels in this queue\n");
675 DPRINTF(HSAPacketProcessor,
676 "Rescheduling active list ID %d after unsetting barrier "
677 "bit\n", rl_idx);
678 // Try to schedule wakeup in the next cycle. There is a minimum
679 // pktProcessDelay for queue wake up. If that processing delay is
680 // elapsed, schedAQLProcessing will wakeup next tick.
681 schedAQLProcessing(rl_idx, 1);
682 }
683
684 // If set, then blocked schedule, so need to reschedule
685 if (regdQList[rl_idx]->qCntxt.aqlBuf->freeEntry(pvPkt))
686 updateReadIndex(0, rl_idx);
687 DPRINTF(HSAPacketProcessor,
688 "%s: rd-ptr offset [0x%x], wr-ptr offset [0x%x], space used = %d," \
689 " q size = %d, stalled = %s, empty = %s, active list ID = %d\n",
690 __FUNCTION__, qDesc->readIndex, qDesc->writeIndex,
691 qDesc->spaceUsed(), qDesc->numElts,
692 qDesc->stalledOnDmaBufAvailability? "true" : "false",
693 qDesc->isEmpty()? "true" : "false", rl_idx);
694 // DMA buffer is freed, check the queue to see if there are DMA
695 // accesses blocked becasue of non-availability of DMA buffer
696 if (qDesc->stalledOnDmaBufAvailability) {
697 assert(!qDesc->isEmpty());
698 getCommandsFromHost(0, rl_idx); // TODO:assign correct pid
699 // when implementing
700 // multi-process support
701 }
702 }