misc: Merge branch 'release-staging-v20.0.0.0' into develop
[gem5.git] / src / dev / hsa / hsa_packet_processor.cc
1 /*
2 * Copyright (c) 2015-2018 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its
18 * contributors may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Authors: Eric van Tassell
34 */
35
36 #include "dev/hsa/hsa_packet_processor.hh"
37
38 #include <cstring>
39
40 #include "base/chunk_generator.hh"
41 #include "base/compiler.hh"
42 #include "debug/HSAPacketProcessor.hh"
43 #include "dev/dma_device.hh"
44 #include "dev/hsa/hsa_device.hh"
45 #include "dev/hsa/hsa_packet.hh"
46 #include "dev/hsa/hw_scheduler.hh"
47 #include "mem/packet_access.hh"
48 #include "mem/page_table.hh"
49 #include "sim/process.hh"
50 #include "sim/syscall_emul_buf.hh"
51 #include "sim/system.hh"
52
53 #define HSAPP_EVENT_DESCRIPTION_GENERATOR(XEVENT) \
54 const char* \
55 HSAPacketProcessor::XEVENT::description() const \
56 { \
57 return #XEVENT; \
58 }
59
60 #define PKT_TYPE(PKT) ((hsa_packet_type_t)(((PKT->header) >> \
61 HSA_PACKET_HEADER_TYPE) & (HSA_PACKET_HEADER_WIDTH_TYPE - 1)))
62
63 HSAPP_EVENT_DESCRIPTION_GENERATOR(UpdateReadDispIdDmaEvent)
64 HSAPP_EVENT_DESCRIPTION_GENERATOR(CmdQueueCmdDmaEvent)
65 HSAPP_EVENT_DESCRIPTION_GENERATOR(QueueProcessEvent)
66 HSAPP_EVENT_DESCRIPTION_GENERATOR(DepSignalsReadDmaEvent)
67
68 HSAPacketProcessor::HSAPacketProcessor(const Params *p)
69 : DmaDevice(p), numHWQueues(p->numHWQueues), pioAddr(p->pioAddr),
70 pioSize(PAGE_SIZE), pioDelay(10), pktProcessDelay(p->pktProcessDelay)
71 {
72 DPRINTF(HSAPacketProcessor, "%s:\n", __FUNCTION__);
73 hwSchdlr = new HWScheduler(this, p->wakeupDelay);
74 regdQList.resize(numHWQueues);
75 for (int i = 0; i < numHWQueues; i++) {
76 regdQList[i] = new RQLEntry(this, i);
77 }
78 }
79
80 HSAPacketProcessor::~HSAPacketProcessor()
81 {
82 for (auto &queue : regdQList) {
83 delete queue;
84 }
85 }
86
87 void
88 HSAPacketProcessor::unsetDeviceQueueDesc(uint64_t queue_id)
89 {
90 hwSchdlr->unregisterQueue(queue_id);
91 }
92
93 void
94 HSAPacketProcessor::setDeviceQueueDesc(uint64_t hostReadIndexPointer,
95 uint64_t basePointer,
96 uint64_t queue_id,
97 uint32_t size)
98 {
99 DPRINTF(HSAPacketProcessor,
100 "%s:base = %p, qID = %d, ze = %d\n", __FUNCTION__,
101 (void *)basePointer, queue_id, size);
102 hwSchdlr->registerNewQueue(hostReadIndexPointer,
103 basePointer, queue_id, size);
104 }
105
106 AddrRangeList
107 HSAPacketProcessor::getAddrRanges() const
108 {
109 assert(pioSize != 0);
110
111 AddrRangeList ranges;
112 ranges.push_back(RangeSize(pioAddr, pioSize));
113
114 return ranges;
115 }
116
117 // Basically only processes writes to the queue doorbell register.
118 Tick
119 HSAPacketProcessor::write(Packet *pkt)
120 {
121 assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
122
123 // TODO: How to get pid??
124 Addr M5_VAR_USED daddr = pkt->getAddr() - pioAddr;
125
126 DPRINTF(HSAPacketProcessor,
127 "%s: write of size %d to reg-offset %d (0x%x)\n",
128 __FUNCTION__, pkt->getSize(), daddr, daddr);
129
130 uint32_t doorbell_reg = pkt->getLE<uint32_t>();
131
132 DPRINTF(HSAPacketProcessor,
133 "%s: write data 0x%x to offset %d (0x%x)\n",
134 __FUNCTION__, doorbell_reg, daddr, daddr);
135 hwSchdlr->write(daddr, doorbell_reg);
136 pkt->makeAtomicResponse();
137 return pioDelay;
138 }
139
140 Tick
141 HSAPacketProcessor::read(Packet *pkt)
142 {
143 pkt->makeAtomicResponse();
144 pkt->setBadAddress();
145 return pioDelay;
146 }
147
148 void
149 HSAPacketProcessor::translateOrDie(Addr vaddr, Addr &paddr)
150 {
151 // Grab the process and try to translate the virtual address with it; with
152 // new extensions, it will likely be wrong to just arbitrarily grab context
153 // zero.
154 auto process = sys->getThreadContext(0)->getProcessPtr();
155
156 if (!process->pTable->translate(vaddr, paddr))
157 fatal("failed translation: vaddr 0x%x\n", vaddr);
158 }
159
160 void
161 HSAPacketProcessor::dmaVirt(DmaFnPtr dmaFn, Addr addr, unsigned size,
162 Event *event, void *data, Tick delay)
163 {
164 if (size == 0) {
165 schedule(event, curTick() + delay);
166 return;
167 }
168
169 // move the buffer data pointer with the chunks
170 uint8_t *loc_data = (uint8_t*)data;
171
172 for (ChunkGenerator gen(addr, size, PAGE_SIZE); !gen.done(); gen.next()) {
173 Addr phys;
174
175 // translate pages into their corresponding frames
176 translateOrDie(gen.addr(), phys);
177
178 // only send event on last transfer; transfers complete in-order
179 Event *ev = gen.last() ? event : NULL;
180
181 (this->*dmaFn)(phys, gen.size(), ev, loc_data, delay);
182
183 loc_data += gen.size();
184 }
185 }
186
187 void
188 HSAPacketProcessor::dmaReadVirt(Addr host_addr, unsigned size,
189 Event *event, void *data, Tick delay)
190 {
191 DPRINTF(HSAPacketProcessor,
192 "%s:host_addr = 0x%lx, size = %d\n", __FUNCTION__, host_addr, size);
193 dmaVirt(&DmaDevice::dmaRead, host_addr, size, event, data, delay);
194 }
195
196 void
197 HSAPacketProcessor::dmaWriteVirt(Addr host_addr, unsigned size,
198 Event *event, void *data, Tick delay)
199 {
200 dmaVirt(&DmaDevice::dmaWrite, host_addr, size, event, data, delay);
201 }
202
203 HSAPacketProcessor::UpdateReadDispIdDmaEvent::
204 UpdateReadDispIdDmaEvent()
205 : Event(Default_Pri, AutoDelete)
206 {
207 DPRINTF(HSAPacketProcessor, "%s:\n", __FUNCTION__);
208 setFlags(AutoDelete);
209 }
210
211 void
212 HSAPacketProcessor::updateReadIndex(int pid, uint32_t rl_idx)
213 {
214 AQLRingBuffer* aqlbuf = regdQList[rl_idx]->qCntxt.aqlBuf;
215 HSAQueueDescriptor* qDesc = regdQList[rl_idx]->qCntxt.qDesc;
216 auto *dmaEvent = new UpdateReadDispIdDmaEvent();
217
218 DPRINTF(HSAPacketProcessor,
219 "%s: read-pointer offset [0x%x]\n", __FUNCTION__, aqlbuf->rdIdx());
220
221 dmaWriteVirt((Addr)qDesc->hostReadIndexPtr,
222 sizeof(aqlbuf->rdIdx()),
223 dmaEvent, aqlbuf->rdIdxPtr());
224
225 DPRINTF(HSAPacketProcessor,
226 "%s: rd-ptr offset [0x%x], wr-ptr offset [0x%x], space used = %d," \
227 " q size = %d, is_empty = %s, active list ID = %d\n", __FUNCTION__,
228 qDesc->readIndex, qDesc->writeIndex, qDesc->spaceUsed(),
229 qDesc->numElts, qDesc->isEmpty()? "true" : "false", rl_idx);
230 if (qDesc->writeIndex != aqlbuf->wrIdx()) {
231 getCommandsFromHost(pid, rl_idx);
232 }
233 }
234
235 HSAPacketProcessor::CmdQueueCmdDmaEvent::
236 CmdQueueCmdDmaEvent(HSAPacketProcessor *_hsaPP, int _pid, bool _isRead,
237 uint32_t _ix_start, unsigned _num_pkts,
238 dma_series_ctx *_series_ctx, void *_dest_4debug)
239 : Event(Default_Pri, AutoDelete), hsaPP(_hsaPP), pid(_pid), isRead(_isRead),
240 ix_start(_ix_start), num_pkts(_num_pkts), series_ctx(_series_ctx),
241 dest_4debug(_dest_4debug)
242 {
243 setFlags(AutoDelete);
244
245 DPRINTF(HSAPacketProcessor, "%s, ix = %d, npkts = %d," \
246 "active list ID = %d\n", __FUNCTION__,
247 _ix_start, num_pkts, series_ctx->rl_idx);
248 }
249
250 void
251 HSAPacketProcessor::CmdQueueCmdDmaEvent::process()
252 {
253 uint32_t rl_idx = series_ctx->rl_idx;
254 AQLRingBuffer *aqlRingBuffer M5_VAR_USED =
255 hsaPP->regdQList[rl_idx]->qCntxt.aqlBuf;
256 HSAQueueDescriptor* qDesc =
257 hsaPP->regdQList[rl_idx]->qCntxt.qDesc;
258 DPRINTF(HSAPacketProcessor, ">%s, ix = %d, npkts = %d," \
259 " pktsRemaining = %d, active list ID = %d\n", __FUNCTION__,
260 ix_start, num_pkts, series_ctx->pkts_2_go,
261 rl_idx);
262 if (isRead) {
263 series_ctx->pkts_2_go -= num_pkts;
264 if (series_ctx->pkts_2_go == 0) {
265 // Mark DMA as completed
266 qDesc->dmaInProgress = false;
267 DPRINTF(HSAPacketProcessor,
268 "%s: schedule Qwakeup next cycle, rdIdx %d, wrIdx %d," \
269 " dispIdx %d, active list ID = %d\n",
270 __FUNCTION__, aqlRingBuffer->rdIdx(),
271 aqlRingBuffer->wrIdx(), aqlRingBuffer->dispIdx(), rl_idx);
272 // schedule queue wakeup
273 hsaPP->schedAQLProcessing(rl_idx);
274 delete series_ctx;
275 }
276 }
277 }
278
279 void
280 HSAPacketProcessor::schedAQLProcessing(uint32_t rl_idx)
281 {
282 RQLEntry *queue = regdQList[rl_idx];
283 if (!queue->aqlProcessEvent.scheduled()) {
284 Tick processingTick = curTick() + pktProcessDelay;
285 schedule(queue->aqlProcessEvent, processingTick);
286 DPRINTF(HSAPacketProcessor, "AQL processing scheduled at tick: %d\n",
287 processingTick);
288 } else {
289 DPRINTF(HSAPacketProcessor, "AQL processing already scheduled\n");
290 }
291 }
292
293 bool
294 HSAPacketProcessor::processPkt(void* pkt, uint32_t rl_idx, Addr host_pkt_addr)
295 {
296 bool is_submitted = false;
297 SignalState *dep_sgnl_rd_st = &(regdQList[rl_idx]->depSignalRdState);
298 // Dependency signals are not read yet. And this can only be a retry.
299 // The retry logic will schedule the packet processor wakeup
300 if (dep_sgnl_rd_st->pendingReads != 0) {
301 return false;
302 }
303 // `pkt` can be typecasted to any type of AQL packet since they all
304 // have header information at offset zero
305 auto disp_pkt = (_hsa_dispatch_packet_t *)pkt;
306 hsa_packet_type_t pkt_type = PKT_TYPE(disp_pkt);
307 if (pkt_type == HSA_PACKET_TYPE_VENDOR_SPECIFIC) {
308 DPRINTF(HSAPacketProcessor, "%s: submitting vendor specific pkt" \
309 " active list ID = %d\n", __FUNCTION__, rl_idx);
310 // Submit packet to HSA device (dispatcher)
311 hsa_device->submitVendorPkt((void *)disp_pkt, rl_idx, host_pkt_addr);
312 is_submitted = true;
313 } else if (pkt_type == HSA_PACKET_TYPE_KERNEL_DISPATCH) {
314 DPRINTF(HSAPacketProcessor, "%s: submitting kernel dispatch pkt" \
315 " active list ID = %d\n", __FUNCTION__, rl_idx);
316 // Submit packet to HSA device (dispatcher)
317 hsa_device->submitDispatchPkt((void *)disp_pkt, rl_idx, host_pkt_addr);
318 is_submitted = true;
319 } else if (pkt_type == HSA_PACKET_TYPE_BARRIER_AND) {
320 DPRINTF(HSAPacketProcessor, "%s: Processing barrier packet" \
321 " active list ID = %d\n", __FUNCTION__, rl_idx);
322 auto bar_and_pkt = (_hsa_barrier_and_packet_t *)pkt;
323 bool isReady = true;
324 // Loop thorugh all the completion signals to see if this barrier
325 // packet is ready.
326 for (int i = 0; i < NumSignalsPerBarrier; i++) {
327 // dep_signal = zero imply no signal connected
328 if (bar_and_pkt->dep_signal[i]) {
329 // The signal value is aligned 8 bytes from
330 // the actual handle in the runtime
331 uint64_t signal_addr =
332 (uint64_t) (((uint64_t *) bar_and_pkt->dep_signal[i]) + 1);
333 hsa_signal_value_t *signal_val =
334 &(dep_sgnl_rd_st->values[i]);
335 DPRINTF(HSAPacketProcessor, "%s: Barrier pkt dep sgnl[%d]" \
336 " , sig addr %x, value %d active list ID = %d\n",
337 __FUNCTION__, i, signal_addr,
338 *signal_val, rl_idx);
339 // The if condition will be executed everytime except the
340 // very first time this barrier packet is encounteresd.
341 if (dep_sgnl_rd_st->allRead) {
342 if (*signal_val != 0) {
343 // This signal is not yet ready, read it again
344 isReady = false;
345 DepSignalsReadDmaEvent *sgnl_rd_evnt =
346 new DepSignalsReadDmaEvent(dep_sgnl_rd_st);
347 dmaReadVirt(signal_addr, sizeof(hsa_signal_value_t),
348 sgnl_rd_evnt, signal_val);
349 dep_sgnl_rd_st->pendingReads++;
350 DPRINTF(HSAPacketProcessor, "%s: Pending reads %d," \
351 " active list %d\n", __FUNCTION__,
352 dep_sgnl_rd_st->pendingReads, rl_idx);
353 }
354 } else {
355 // This signal is not yet ready, read it again
356 isReady = false;
357 DepSignalsReadDmaEvent *sgnl_rd_evnt =
358 new DepSignalsReadDmaEvent(dep_sgnl_rd_st);
359 dmaReadVirt(signal_addr, sizeof(hsa_signal_value_t),
360 sgnl_rd_evnt, signal_val);
361 dep_sgnl_rd_st->pendingReads++;
362 DPRINTF(HSAPacketProcessor, "%s: Pending reads %d," \
363 " active list %d\n", __FUNCTION__,
364 dep_sgnl_rd_st->pendingReads, rl_idx);
365 }
366 }
367 }
368 if (isReady) {
369 assert(dep_sgnl_rd_st->pendingReads == 0);
370 DPRINTF(HSAPacketProcessor, "%s: Barrier packet completed" \
371 " active list ID = %d\n", __FUNCTION__, rl_idx);
372 // TODO: Completion signal of barrier packet to be
373 // atomically decremented here
374 finishPkt((void*)bar_and_pkt, rl_idx);
375 is_submitted = true;
376 // Reset signal values
377 dep_sgnl_rd_st->resetSigVals();
378 // The completion signal is connected
379 if (bar_and_pkt->completion_signal != 0) {
380 // The signal value is aligned 8 bytes
381 // from the actual handle in the runtime
382 uint64_t signal_addr =
383 (uint64_t) (((uint64_t *)
384 bar_and_pkt->completion_signal) + 1);
385 DPRINTF(HSAPacketProcessor, "Triggering barrier packet" \
386 " completion signal: %x!\n", signal_addr);
387 /**
388 * HACK: The semantics of the HSA signal is to
389 * decrement the current signal value.
390 * I'm going to cheat here and read out
391 * the value from main memory using functional
392 * access, and then just DMA the decremented value.
393 * The reason for this is that the DMASequencer does
394 * not support atomic operations.
395 */
396 auto tc = sys->getThreadContext(0);
397 auto &virt_proxy = tc->getVirtProxy();
398 TypedBufferArg<uint64_t> prev_signal(signal_addr);
399 prev_signal.copyIn(virt_proxy);
400
401 hsa_signal_value_t *new_signal = new hsa_signal_value_t;
402 *new_signal = (hsa_signal_value_t) *prev_signal - 1;
403
404 dmaWriteVirt(signal_addr,
405 sizeof(hsa_signal_value_t), NULL, new_signal, 0);
406 }
407 }
408 if (dep_sgnl_rd_st->pendingReads > 0) {
409 // Atleast one DepSignalsReadDmaEvent is scheduled this cycle
410 dep_sgnl_rd_st->allRead = false;
411 dep_sgnl_rd_st->discardRead = false;
412 }
413 } else if (pkt_type == HSA_PACKET_TYPE_BARRIER_OR) {
414 fatal("Unsupported packet type HSA_PACKET_TYPE_BARRIER_OR");
415 } else if (pkt_type == HSA_PACKET_TYPE_INVALID) {
416 fatal("Unsupported packet type HSA_PACKET_TYPE_INVALID");
417 } else {
418 fatal("Unsupported packet type %d\n", pkt_type);
419 }
420 return is_submitted;
421 }
422
423 // Wakes up every fixed time interval (pktProcessDelay) and processes a single
424 // packet from the queue that scheduled this wakeup. If there are more
425 // packets in that queue, the next wakeup is scheduled.
426 void
427 HSAPacketProcessor::QueueProcessEvent::process()
428 {
429 AQLRingBuffer *aqlRingBuffer = hsaPP->regdQList[rqIdx]->qCntxt.aqlBuf;
430 DPRINTF(HSAPacketProcessor,
431 "%s: Qwakeup , rdIdx %d, wrIdx %d," \
432 " dispIdx %d, active list ID = %d\n",
433 __FUNCTION__, aqlRingBuffer->rdIdx(),
434 aqlRingBuffer->wrIdx(), aqlRingBuffer->dispIdx(), rqIdx);
435 // In the future, we may support batch processing of packets.
436 // Then, we can just remove the break statements and the code
437 // will support batch processing. That is why we are using a
438 // "while loop" here instead on an "if" condition.
439 while (hsaPP->regdQList[rqIdx]->dispPending()) {
440 void *pkt = aqlRingBuffer->ptr(aqlRingBuffer->dispIdx());
441 DPRINTF(HSAPacketProcessor, "%s: Attempting dispatch @ dispIdx[%d]\n",
442 __FUNCTION__, aqlRingBuffer->dispIdx());
443 Addr host_addr = aqlRingBuffer->hostDispAddr();
444 if (hsaPP->processPkt(pkt, rqIdx, host_addr)) {
445 aqlRingBuffer->incDispIdx(1);
446 DPRINTF(HSAPacketProcessor, "%s: Increment dispIdx[%d]\n",
447 __FUNCTION__, aqlRingBuffer->dispIdx());
448 if (hsaPP->regdQList[rqIdx]->dispPending()) {
449 hsaPP->schedAQLProcessing(rqIdx);
450 }
451 break;
452 } else {
453 // This queue is blocked, scheduled a processing event
454 hsaPP->schedAQLProcessing(rqIdx);
455 break;
456 }
457 }
458 }
459
460 void
461 HSAPacketProcessor::SignalState::handleReadDMA()
462 {
463 assert(pendingReads > 0);
464 pendingReads--;
465 if (pendingReads == 0) {
466 allRead = true;
467 if (discardRead) {
468 resetSigVals();
469 }
470 }
471 }
472
473 void
474 HSAPacketProcessor::getCommandsFromHost(int pid, uint32_t rl_idx)
475 {
476 HSAQueueDescriptor* qDesc = regdQList[rl_idx]->qCntxt.qDesc;
477 AQLRingBuffer *aqlRingBuffer = regdQList[rl_idx]->qCntxt.aqlBuf;
478
479 DPRINTF(HSAPacketProcessor,
480 "%s: read-pointer offset[0x%x], write-pointer offset[0x%x]"
481 " doorbell(%d)[0x%x] \n",
482 __FUNCTION__, qDesc->readIndex,
483 qDesc->writeIndex, pid, qDesc->doorbellPointer);
484
485 if (qDesc->dmaInProgress) {
486 // we'll try again when this dma transfer completes in updateReadIndex
487 return;
488 }
489 uint32_t num_umq = qDesc->spaceUsed();
490 if (num_umq == 0)
491 return; // nothing to be gotten
492 uint32_t umq_nxt = qDesc->readIndex;
493 // Total AQL buffer size
494 uint32_t ttl_aql_buf = aqlRingBuffer->numObjs();
495 // Available AQL buffer size. If the available buffer is less than
496 // demanded, number of available buffer is returned
497 uint32_t got_aql_buf = aqlRingBuffer->allocEntry(num_umq);
498 qDesc->readIndex += got_aql_buf;
499 uint32_t dma_start_ix = (aqlRingBuffer->wrIdx() - got_aql_buf) %
500 ttl_aql_buf;
501 dma_series_ctx *series_ctx = NULL;
502
503 DPRINTF(HSAPacketProcessor, "%s: umq_nxt = %d, ttl_aql_buf = %d, "
504 "dma_start_ix = %d, num_umq = %d\n", __FUNCTION__, umq_nxt,
505 ttl_aql_buf, dma_start_ix, num_umq);
506
507 if (got_aql_buf == 0) {
508 // we'll try again when some dma bufs are freed in freeEntry
509 qDesc->stalledOnDmaBufAvailability = true;
510 return;
511 } else {
512 qDesc->stalledOnDmaBufAvailability = false;
513 }
514
515 uint32_t dma_b4_wrap = ttl_aql_buf - dma_start_ix;
516 while (got_aql_buf != 0 && num_umq != 0) {
517 uint32_t umq_b4_wrap = qDesc->numObjs() -
518 (umq_nxt % qDesc->objSize());
519 uint32_t num_2_xfer
520 = std::min({umq_b4_wrap, dma_b4_wrap, num_umq, got_aql_buf});
521 if (!series_ctx) {
522 qDesc->dmaInProgress = true;
523 series_ctx = new dma_series_ctx(got_aql_buf, got_aql_buf,
524 dma_start_ix, rl_idx);
525 }
526
527 void *aql_buf = aqlRingBuffer->ptr(dma_start_ix);
528 CmdQueueCmdDmaEvent *dmaEvent
529 = new CmdQueueCmdDmaEvent(this, pid, true, dma_start_ix,
530 num_2_xfer, series_ctx, aql_buf);
531 DPRINTF(HSAPacketProcessor,
532 "%s: aql_buf = %p, umq_nxt = %d, dma_ix = %d, num2xfer = %d\n",
533 __FUNCTION__, aql_buf, umq_nxt, dma_start_ix, num_2_xfer);
534
535 dmaReadVirt(qDesc->ptr(umq_nxt), num_2_xfer * qDesc->objSize(),
536 dmaEvent, aql_buf);
537
538 aqlRingBuffer->saveHostDispAddr(qDesc->ptr(umq_nxt), num_2_xfer,
539 dma_start_ix);
540
541 num_umq -= num_2_xfer;
542 got_aql_buf -= num_2_xfer;
543 dma_start_ix = (dma_start_ix + num_2_xfer) % ttl_aql_buf;
544 umq_nxt = (umq_nxt + num_2_xfer) % qDesc->numObjs();
545 if (got_aql_buf == 0 && num_umq != 0) {
546 // There are more packets in the queue but
547 // not enough DMA buffers. Set the stalledOnDmaBufAvailability,
548 // we will try again in freeEntry
549 qDesc->stalledOnDmaBufAvailability = true;
550 }
551 }
552 }
553
554 void
555 HSAPacketProcessor::displayQueueDescriptor(int pid, uint32_t rl_idx)
556 {
557 HSAQueueDescriptor* M5_VAR_USED qDesc = regdQList[rl_idx]->qCntxt.qDesc;
558 DPRINTF(HSAPacketProcessor,
559 "%s: pid[%d], basePointer[0x%lx], dBPointer[0x%lx], "
560 "writeIndex[0x%x], readIndex[0x%x], size(bytes)[0x%x]\n",
561 __FUNCTION__, pid, qDesc->basePointer,
562 qDesc->doorbellPointer, qDesc->writeIndex,
563 qDesc->readIndex, qDesc->numElts);
564 }
565
566 AQLRingBuffer::AQLRingBuffer(uint32_t size,
567 const std::string name)
568 : _name(name), _wrIdx(0), _rdIdx(0), _dispIdx(0)
569 {
570 _aqlBuf.resize(size);
571 _aqlComplete.resize(size);
572 _hostDispAddresses.resize(size);
573 // Mark all packets as invalid and incomplete
574 for (auto& it : _aqlBuf)
575 it.header = HSA_PACKET_TYPE_INVALID;
576 std::fill(_aqlComplete.begin(), _aqlComplete.end(), false);
577 }
578
579 bool
580 AQLRingBuffer::freeEntry(void *pkt)
581 {
582 _aqlComplete[(hsa_kernel_dispatch_packet_t *) pkt - _aqlBuf.data()] = true;
583 DPRINTF(HSAPacketProcessor, "%s: pkt_ix = %d; "\
584 " # free entries = %d, wrIdx = %d, rdIdx = %d\n", __FUNCTION__,
585 (hsa_kernel_dispatch_packet_t *) pkt - _aqlBuf.data(),
586 nFree(), wrIdx(), rdIdx());
587 // Packets can complete out-of-order. This code "retires" packets in-order
588 // by updating the read pointer in the MQD when a contiguous chunk of
589 // packets have finished.
590 uint32_t old_rdIdx = rdIdx();
591 while (_aqlComplete[rdIdx() % numObjs()]) {
592 _aqlComplete[rdIdx() % numObjs()] = false;
593 _aqlBuf[rdIdx() % numObjs()].header = HSA_PACKET_TYPE_INVALID;
594 incRdIdx(1);
595 }
596 return (old_rdIdx != rdIdx());
597 }
598
599 void
600 HSAPacketProcessor::setDevice(HSADevice *dev)
601 {
602 this->hsa_device = dev;
603 }
604
605 int
606 AQLRingBuffer::allocEntry(uint32_t nBufReq)
607 {
608 DPRINTF(HSAPacketProcessor, "%s: nReq = %d\n", __FUNCTION__, nBufReq);
609 if (nFree() == 0) {
610 DPRINTF(HSAPacketProcessor, "%s: return = %d\n", __FUNCTION__, 0);
611 return 0;
612 }
613
614 if (nBufReq > nFree())
615 nBufReq = nFree();
616
617 DPRINTF(HSAPacketProcessor, "%s: ix1stFree = %d\n", __FUNCTION__, wrIdx());
618 incWrIdx(nBufReq);
619 DPRINTF(HSAPacketProcessor, "%s: return = %d, wrIdx = %d\n",
620 __FUNCTION__, nBufReq, wrIdx());
621 return nBufReq;
622 }
623
624 HSAPacketProcessor *
625 HSAPacketProcessorParams::create()
626 {
627 return new HSAPacketProcessor(this);
628 }
629
630 void
631 HSAPacketProcessor::finishPkt(void *pvPkt, uint32_t rl_idx)
632 {
633 HSAQueueDescriptor* qDesc = regdQList[rl_idx]->qCntxt.qDesc;
634 if (regdQList[rl_idx]->qCntxt.aqlBuf->freeEntry(pvPkt))
635 updateReadIndex(0, rl_idx);
636 DPRINTF(HSAPacketProcessor,
637 "%s: rd-ptr offset [0x%x], wr-ptr offset [0x%x], space used = %d," \
638 " q size = %d, stalled = %s, empty = %s, active list ID = %d\n",
639 __FUNCTION__, qDesc->readIndex, qDesc->writeIndex,
640 qDesc->spaceUsed(), qDesc->numElts,
641 qDesc->stalledOnDmaBufAvailability? "true" : "false",
642 qDesc->isEmpty()? "true" : "false", rl_idx);
643 // DMA buffer is freed, check the queue to see if there are DMA
644 // accesses blocked becasue of non-availability of DMA buffer
645 if (qDesc->stalledOnDmaBufAvailability) {
646 assert(!qDesc->isEmpty());
647 getCommandsFromHost(0, rl_idx); // TODO:assign correct pid
648 // when implementing
649 // multi-process support
650 }
651 }