2 * Copyright (c) 2015-2018 Advanced Micro Devices, Inc.
5 * For use for simulation and test purposes only
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
17 * 3. Neither the name of the copyright holder nor the names of its
18 * contributors may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
33 * Authors: Eric van Tassell
36 #include "dev/hsa/hsa_packet_processor.hh"
40 #include "base/chunk_generator.hh"
41 #include "base/compiler.hh"
42 #include "debug/HSAPacketProcessor.hh"
43 #include "dev/dma_device.hh"
44 #include "dev/hsa/hsa_device.hh"
45 #include "dev/hsa/hsa_packet.hh"
46 #include "dev/hsa/hw_scheduler.hh"
47 #include "mem/packet_access.hh"
48 #include "mem/page_table.hh"
49 #include "sim/process.hh"
50 #include "sim/proxy_ptr.hh"
51 #include "sim/system.hh"
53 #define HSAPP_EVENT_DESCRIPTION_GENERATOR(XEVENT) \
55 HSAPacketProcessor::XEVENT::description() const \
60 #define PKT_TYPE(PKT) ((hsa_packet_type_t)(((PKT->header) >> \
61 HSA_PACKET_HEADER_TYPE) & (HSA_PACKET_HEADER_WIDTH_TYPE - 1)))
63 // checks if the barrier bit is set in the header -- shift the barrier bit
64 // to LSB, then bitwise "and" to mask off all other bits
65 #define IS_BARRIER(PKT) ((hsa_packet_header_t)(((PKT->header) >> \
66 HSA_PACKET_HEADER_BARRIER) & HSA_PACKET_HEADER_WIDTH_BARRIER))
68 HSAPP_EVENT_DESCRIPTION_GENERATOR(UpdateReadDispIdDmaEvent
)
69 HSAPP_EVENT_DESCRIPTION_GENERATOR(CmdQueueCmdDmaEvent
)
70 HSAPP_EVENT_DESCRIPTION_GENERATOR(QueueProcessEvent
)
71 HSAPP_EVENT_DESCRIPTION_GENERATOR(DepSignalsReadDmaEvent
)
73 HSAPacketProcessor::HSAPacketProcessor(const Params
&p
)
74 : DmaDevice(p
), numHWQueues(p
.numHWQueues
), pioAddr(p
.pioAddr
),
75 pioSize(PAGE_SIZE
), pioDelay(10), pktProcessDelay(p
.pktProcessDelay
)
77 DPRINTF(HSAPacketProcessor
, "%s:\n", __FUNCTION__
);
78 hwSchdlr
= new HWScheduler(this, p
.wakeupDelay
);
79 regdQList
.resize(numHWQueues
);
80 for (int i
= 0; i
< numHWQueues
; i
++) {
81 regdQList
[i
] = new RQLEntry(this, i
);
85 HSAPacketProcessor::~HSAPacketProcessor()
87 for (auto &queue
: regdQList
) {
93 HSAPacketProcessor::unsetDeviceQueueDesc(uint64_t queue_id
)
95 hwSchdlr
->unregisterQueue(queue_id
);
99 HSAPacketProcessor::setDeviceQueueDesc(uint64_t hostReadIndexPointer
,
100 uint64_t basePointer
,
104 DPRINTF(HSAPacketProcessor
,
105 "%s:base = %p, qID = %d, ze = %d\n", __FUNCTION__
,
106 (void *)basePointer
, queue_id
, size
);
107 hwSchdlr
->registerNewQueue(hostReadIndexPointer
,
108 basePointer
, queue_id
, size
);
112 HSAPacketProcessor::getAddrRanges() const
114 assert(pioSize
!= 0);
116 AddrRangeList ranges
;
117 ranges
.push_back(RangeSize(pioAddr
, pioSize
));
122 // Basically only processes writes to the queue doorbell register.
124 HSAPacketProcessor::write(Packet
*pkt
)
126 assert(pkt
->getAddr() >= pioAddr
&& pkt
->getAddr() < pioAddr
+ pioSize
);
128 // TODO: How to get pid??
129 M5_VAR_USED Addr daddr
= pkt
->getAddr() - pioAddr
;
131 DPRINTF(HSAPacketProcessor
,
132 "%s: write of size %d to reg-offset %d (0x%x)\n",
133 __FUNCTION__
, pkt
->getSize(), daddr
, daddr
);
135 uint32_t doorbell_reg
= pkt
->getLE
<uint32_t>();
137 DPRINTF(HSAPacketProcessor
,
138 "%s: write data 0x%x to offset %d (0x%x)\n",
139 __FUNCTION__
, doorbell_reg
, daddr
, daddr
);
140 hwSchdlr
->write(daddr
, doorbell_reg
);
141 pkt
->makeAtomicResponse();
146 HSAPacketProcessor::read(Packet
*pkt
)
148 pkt
->makeAtomicResponse();
149 pkt
->setBadAddress();
154 HSAPacketProcessor::translateOrDie(Addr vaddr
, Addr
&paddr
)
156 // Grab the process and try to translate the virtual address with it; with
157 // new extensions, it will likely be wrong to just arbitrarily grab context
159 auto process
= sys
->threads
[0]->getProcessPtr();
161 if (!process
->pTable
->translate(vaddr
, paddr
))
162 fatal("failed translation: vaddr 0x%x\n", vaddr
);
166 HSAPacketProcessor::dmaVirt(DmaFnPtr dmaFn
, Addr addr
, unsigned size
,
167 Event
*event
, void *data
, Tick delay
)
170 schedule(event
, curTick() + delay
);
174 // move the buffer data pointer with the chunks
175 uint8_t *loc_data
= (uint8_t*)data
;
177 for (ChunkGenerator
gen(addr
, size
, PAGE_SIZE
); !gen
.done(); gen
.next()) {
180 // translate pages into their corresponding frames
181 translateOrDie(gen
.addr(), phys
);
183 // only send event on last transfer; transfers complete in-order
184 Event
*ev
= gen
.last() ? event
: NULL
;
186 (this->*dmaFn
)(phys
, gen
.size(), ev
, loc_data
, delay
);
188 loc_data
+= gen
.size();
193 HSAPacketProcessor::dmaReadVirt(Addr host_addr
, unsigned size
,
194 Event
*event
, void *data
, Tick delay
)
196 DPRINTF(HSAPacketProcessor
,
197 "%s:host_addr = 0x%lx, size = %d\n", __FUNCTION__
, host_addr
, size
);
198 dmaVirt(&DmaDevice::dmaRead
, host_addr
, size
, event
, data
, delay
);
202 HSAPacketProcessor::dmaWriteVirt(Addr host_addr
, unsigned size
,
203 Event
*event
, void *data
, Tick delay
)
205 dmaVirt(&DmaDevice::dmaWrite
, host_addr
, size
, event
, data
, delay
);
208 HSAPacketProcessor::UpdateReadDispIdDmaEvent::
209 UpdateReadDispIdDmaEvent()
210 : Event(Default_Pri
, AutoDelete
)
212 DPRINTF(HSAPacketProcessor
, "%s:\n", __FUNCTION__
);
213 setFlags(AutoDelete
);
217 HSAPacketProcessor::updateReadIndex(int pid
, uint32_t rl_idx
)
219 AQLRingBuffer
* aqlbuf
= regdQList
[rl_idx
]->qCntxt
.aqlBuf
;
220 HSAQueueDescriptor
* qDesc
= regdQList
[rl_idx
]->qCntxt
.qDesc
;
221 auto *dmaEvent
= new UpdateReadDispIdDmaEvent();
223 DPRINTF(HSAPacketProcessor
,
224 "%s: read-pointer offset [0x%x]\n", __FUNCTION__
, aqlbuf
->rdIdx());
226 dmaWriteVirt((Addr
)qDesc
->hostReadIndexPtr
,
227 sizeof(aqlbuf
->rdIdx()),
228 dmaEvent
, aqlbuf
->rdIdxPtr());
230 DPRINTF(HSAPacketProcessor
,
231 "%s: rd-ptr offset [0x%x], wr-ptr offset [0x%x], space used = %d," \
232 " q size = %d, is_empty = %s, active list ID = %d\n", __FUNCTION__
,
233 qDesc
->readIndex
, qDesc
->writeIndex
, qDesc
->spaceUsed(),
234 qDesc
->numElts
, qDesc
->isEmpty()? "true" : "false", rl_idx
);
235 if (qDesc
->writeIndex
!= aqlbuf
->wrIdx()) {
236 getCommandsFromHost(pid
, rl_idx
);
240 HSAPacketProcessor::CmdQueueCmdDmaEvent::
241 CmdQueueCmdDmaEvent(HSAPacketProcessor
*_hsaPP
, int _pid
, bool _isRead
,
242 uint32_t _ix_start
, unsigned _num_pkts
,
243 dma_series_ctx
*_series_ctx
, void *_dest_4debug
)
244 : Event(Default_Pri
, AutoDelete
), hsaPP(_hsaPP
), pid(_pid
), isRead(_isRead
),
245 ix_start(_ix_start
), num_pkts(_num_pkts
), series_ctx(_series_ctx
),
246 dest_4debug(_dest_4debug
)
248 setFlags(AutoDelete
);
250 DPRINTF(HSAPacketProcessor
, "%s, ix = %d, npkts = %d," \
251 "active list ID = %d\n", __FUNCTION__
,
252 _ix_start
, num_pkts
, series_ctx
->rl_idx
);
256 HSAPacketProcessor::CmdQueueCmdDmaEvent::process()
258 uint32_t rl_idx
= series_ctx
->rl_idx
;
259 M5_VAR_USED AQLRingBuffer
*aqlRingBuffer
=
260 hsaPP
->regdQList
[rl_idx
]->qCntxt
.aqlBuf
;
261 HSAQueueDescriptor
* qDesc
=
262 hsaPP
->regdQList
[rl_idx
]->qCntxt
.qDesc
;
263 DPRINTF(HSAPacketProcessor
, ">%s, ix = %d, npkts = %d," \
264 " pktsRemaining = %d, active list ID = %d\n", __FUNCTION__
,
265 ix_start
, num_pkts
, series_ctx
->pkts_2_go
,
268 series_ctx
->pkts_2_go
-= num_pkts
;
269 if (series_ctx
->pkts_2_go
== 0) {
270 // Mark DMA as completed
271 qDesc
->dmaInProgress
= false;
272 DPRINTF(HSAPacketProcessor
,
273 "%s: schedule Qwakeup next cycle, rdIdx %d, wrIdx %d," \
274 " dispIdx %d, active list ID = %d\n",
275 __FUNCTION__
, aqlRingBuffer
->rdIdx(),
276 aqlRingBuffer
->wrIdx(), aqlRingBuffer
->dispIdx(), rl_idx
);
277 // schedule queue wakeup
278 hsaPP
->schedAQLProcessing(rl_idx
);
285 HSAPacketProcessor::schedAQLProcessing(uint32_t rl_idx
, Tick delay
)
287 RQLEntry
*queue
= regdQList
[rl_idx
];
288 if (!queue
->aqlProcessEvent
.scheduled()) {
289 Tick processingTick
= curTick() + delay
;
290 schedule(queue
->aqlProcessEvent
, processingTick
);
291 DPRINTF(HSAPacketProcessor
, "AQL processing scheduled at tick: %d\n",
294 DPRINTF(HSAPacketProcessor
, "AQL processing already scheduled\n");
299 HSAPacketProcessor::schedAQLProcessing(uint32_t rl_idx
)
301 schedAQLProcessing(rl_idx
, pktProcessDelay
);
305 HSAPacketProcessor::processPkt(void* pkt
, uint32_t rl_idx
, Addr host_pkt_addr
)
307 Q_STATE is_submitted
= BLOCKED_BPKT
;
308 SignalState
*dep_sgnl_rd_st
= &(regdQList
[rl_idx
]->depSignalRdState
);
309 // Dependency signals are not read yet. And this can only be a retry.
310 // The retry logic will schedule the packet processor wakeup
311 if (dep_sgnl_rd_st
->pendingReads
!= 0) {
314 // `pkt` can be typecasted to any type of AQL packet since they all
315 // have header information at offset zero
316 auto disp_pkt
= (_hsa_dispatch_packet_t
*)pkt
;
317 hsa_packet_type_t pkt_type
= PKT_TYPE(disp_pkt
);
318 if (IS_BARRIER(disp_pkt
) &&
319 regdQList
[rl_idx
]->compltnPending() > 0) {
320 // If this packet is using the "barrier bit" to enforce ordering with
321 // previous packets, and if there are outstanding packets, set the
322 // barrier bit for this queue and block the queue.
323 DPRINTF(HSAPacketProcessor
, "%s: setting barrier bit for active" \
324 " list ID = %d\n", __FUNCTION__
, rl_idx
);
325 regdQList
[rl_idx
]->setBarrierBit(true);
328 if (pkt_type
== HSA_PACKET_TYPE_VENDOR_SPECIFIC
) {
329 DPRINTF(HSAPacketProcessor
, "%s: submitting vendor specific pkt" \
330 " active list ID = %d\n", __FUNCTION__
, rl_idx
);
331 // Submit packet to HSA device (dispatcher)
332 hsa_device
->submitVendorPkt((void *)disp_pkt
, rl_idx
, host_pkt_addr
);
333 is_submitted
= UNBLOCKED
;
334 } else if (pkt_type
== HSA_PACKET_TYPE_KERNEL_DISPATCH
) {
335 DPRINTF(HSAPacketProcessor
, "%s: submitting kernel dispatch pkt" \
336 " active list ID = %d\n", __FUNCTION__
, rl_idx
);
337 // Submit packet to HSA device (dispatcher)
338 hsa_device
->submitDispatchPkt((void *)disp_pkt
, rl_idx
, host_pkt_addr
);
339 is_submitted
= UNBLOCKED
;
340 } else if (pkt_type
== HSA_PACKET_TYPE_BARRIER_AND
) {
341 DPRINTF(HSAPacketProcessor
, "%s: Processing barrier packet" \
342 " active list ID = %d\n", __FUNCTION__
, rl_idx
);
343 auto bar_and_pkt
= (_hsa_barrier_and_packet_t
*)pkt
;
345 // Loop thorugh all the completion signals to see if this barrier
347 for (int i
= 0; i
< NumSignalsPerBarrier
; i
++) {
348 // dep_signal = zero imply no signal connected
349 if (bar_and_pkt
->dep_signal
[i
]) {
350 // The signal value is aligned 8 bytes from
351 // the actual handle in the runtime
352 uint64_t signal_addr
=
353 (uint64_t) (((uint64_t *) bar_and_pkt
->dep_signal
[i
]) + 1);
354 hsa_signal_value_t
*signal_val
=
355 &(dep_sgnl_rd_st
->values
[i
]);
356 DPRINTF(HSAPacketProcessor
, "%s: Barrier pkt dep sgnl[%d]" \
357 " , sig addr %x, value %d active list ID = %d\n",
358 __FUNCTION__
, i
, signal_addr
,
359 *signal_val
, rl_idx
);
360 // The if condition will be executed everytime except the
361 // very first time this barrier packet is encounteresd.
362 if (dep_sgnl_rd_st
->allRead
) {
363 if (*signal_val
!= 0) {
364 // This signal is not yet ready, read it again
366 DepSignalsReadDmaEvent
*sgnl_rd_evnt
=
367 new DepSignalsReadDmaEvent(dep_sgnl_rd_st
);
368 dmaReadVirt(signal_addr
, sizeof(hsa_signal_value_t
),
369 sgnl_rd_evnt
, signal_val
);
370 dep_sgnl_rd_st
->pendingReads
++;
371 DPRINTF(HSAPacketProcessor
, "%s: Pending reads %d," \
372 " active list %d\n", __FUNCTION__
,
373 dep_sgnl_rd_st
->pendingReads
, rl_idx
);
376 // This signal is not yet ready, read it again
378 DepSignalsReadDmaEvent
*sgnl_rd_evnt
=
379 new DepSignalsReadDmaEvent(dep_sgnl_rd_st
);
380 dmaReadVirt(signal_addr
, sizeof(hsa_signal_value_t
),
381 sgnl_rd_evnt
, signal_val
);
382 dep_sgnl_rd_st
->pendingReads
++;
383 DPRINTF(HSAPacketProcessor
, "%s: Pending reads %d," \
384 " active list %d\n", __FUNCTION__
,
385 dep_sgnl_rd_st
->pendingReads
, rl_idx
);
390 assert(dep_sgnl_rd_st
->pendingReads
== 0);
391 DPRINTF(HSAPacketProcessor
, "%s: Barrier packet completed" \
392 " active list ID = %d\n", __FUNCTION__
, rl_idx
);
393 // TODO: Completion signal of barrier packet to be
394 // atomically decremented here
395 finishPkt((void*)bar_and_pkt
, rl_idx
);
396 is_submitted
= UNBLOCKED
;
397 // Reset signal values
398 dep_sgnl_rd_st
->resetSigVals();
399 // The completion signal is connected
400 if (bar_and_pkt
->completion_signal
!= 0) {
401 // The signal value is aligned 8 bytes
402 // from the actual handle in the runtime
403 uint64_t signal_addr
=
404 (uint64_t) (((uint64_t *)
405 bar_and_pkt
->completion_signal
) + 1);
406 DPRINTF(HSAPacketProcessor
, "Triggering barrier packet" \
407 " completion signal: %x!\n", signal_addr
);
409 * HACK: The semantics of the HSA signal is to
410 * decrement the current signal value.
411 * I'm going to cheat here and read out
412 * the value from main memory using functional
413 * access, and then just DMA the decremented value.
414 * The reason for this is that the DMASequencer does
415 * not support atomic operations.
417 VPtr
<uint64_t> prev_signal(signal_addr
, sys
->threads
[0]);
419 hsa_signal_value_t
*new_signal
= new hsa_signal_value_t
;
420 *new_signal
= (hsa_signal_value_t
)*prev_signal
- 1;
422 dmaWriteVirt(signal_addr
,
423 sizeof(hsa_signal_value_t
), NULL
, new_signal
, 0);
426 if (dep_sgnl_rd_st
->pendingReads
> 0) {
427 // Atleast one DepSignalsReadDmaEvent is scheduled this cycle
428 dep_sgnl_rd_st
->allRead
= false;
429 dep_sgnl_rd_st
->discardRead
= false;
431 } else if (pkt_type
== HSA_PACKET_TYPE_BARRIER_OR
) {
432 fatal("Unsupported packet type HSA_PACKET_TYPE_BARRIER_OR");
433 } else if (pkt_type
== HSA_PACKET_TYPE_INVALID
) {
434 fatal("Unsupported packet type HSA_PACKET_TYPE_INVALID");
436 fatal("Unsupported packet type %d\n", pkt_type
);
441 // Wakes up every fixed time interval (pktProcessDelay) and processes a single
442 // packet from the queue that scheduled this wakeup. If there are more
443 // packets in that queue, the next wakeup is scheduled.
445 HSAPacketProcessor::QueueProcessEvent::process()
447 AQLRingBuffer
*aqlRingBuffer
= hsaPP
->regdQList
[rqIdx
]->qCntxt
.aqlBuf
;
448 DPRINTF(HSAPacketProcessor
,
449 "%s: Qwakeup , rdIdx %d, wrIdx %d," \
450 " dispIdx %d, active list ID = %d\n",
451 __FUNCTION__
, aqlRingBuffer
->rdIdx(),
452 aqlRingBuffer
->wrIdx(), aqlRingBuffer
->dispIdx(), rqIdx
);
453 // If barrier bit is set, then this wakeup is a dummy wakeup
454 // just to model the processing time. Do nothing.
455 if (hsaPP
->regdQList
[rqIdx
]->getBarrierBit()) {
456 DPRINTF(HSAPacketProcessor
,
457 "Dummy wakeup with barrier bit for rdIdx %d\n", rqIdx
);
460 // In the future, we may support batch processing of packets.
461 // Then, we can just remove the break statements and the code
462 // will support batch processing. That is why we are using a
463 // "while loop" here instead on an "if" condition.
464 while (hsaPP
->regdQList
[rqIdx
]->dispPending()) {
465 void *pkt
= aqlRingBuffer
->ptr(aqlRingBuffer
->dispIdx());
466 DPRINTF(HSAPacketProcessor
, "%s: Attempting dispatch @ dispIdx[%d]\n",
467 __FUNCTION__
, aqlRingBuffer
->dispIdx());
468 Addr host_addr
= aqlRingBuffer
->hostDispAddr();
469 Q_STATE q_state
= hsaPP
->processPkt(pkt
, rqIdx
, host_addr
);
470 if (q_state
== UNBLOCKED
) {
471 aqlRingBuffer
->incDispIdx(1);
472 DPRINTF(HSAPacketProcessor
, "%s: Increment dispIdx[%d]\n",
473 __FUNCTION__
, aqlRingBuffer
->dispIdx());
474 if (hsaPP
->regdQList
[rqIdx
]->dispPending()) {
475 hsaPP
->schedAQLProcessing(rqIdx
);
478 } else if (q_state
== BLOCKED_BPKT
) {
479 // This queue is blocked by barrier packet,
480 // schedule a processing event
481 hsaPP
->schedAQLProcessing(rqIdx
);
483 } else if (q_state
== BLOCKED_BBIT
) {
484 // This queue is blocked by barrier bit, and processing event
485 // should be scheduled from finishPkt(). However, to elapse
486 // "pktProcessDelay" processing time, let us schedule a dummy
487 // wakeup once which will just wakeup and will do nothing.
488 hsaPP
->schedAQLProcessing(rqIdx
);
491 panic("Unknown queue state\n");
497 HSAPacketProcessor::SignalState::handleReadDMA()
499 assert(pendingReads
> 0);
501 if (pendingReads
== 0) {
510 HSAPacketProcessor::getCommandsFromHost(int pid
, uint32_t rl_idx
)
512 HSAQueueDescriptor
* qDesc
= regdQList
[rl_idx
]->qCntxt
.qDesc
;
513 AQLRingBuffer
*aqlRingBuffer
= regdQList
[rl_idx
]->qCntxt
.aqlBuf
;
515 DPRINTF(HSAPacketProcessor
,
516 "%s: read-pointer offset[0x%x], write-pointer offset[0x%x]"
517 " doorbell(%d)[0x%x] \n",
518 __FUNCTION__
, qDesc
->readIndex
,
519 qDesc
->writeIndex
, pid
, qDesc
->doorbellPointer
);
521 if (qDesc
->dmaInProgress
) {
522 // we'll try again when this dma transfer completes in updateReadIndex
525 uint32_t num_umq
= qDesc
->spaceUsed();
527 return; // nothing to be gotten
528 uint32_t umq_nxt
= qDesc
->readIndex
;
529 // Total AQL buffer size
530 uint32_t ttl_aql_buf
= aqlRingBuffer
->numObjs();
531 // Available AQL buffer size. If the available buffer is less than
532 // demanded, number of available buffer is returned
533 uint32_t got_aql_buf
= aqlRingBuffer
->allocEntry(num_umq
);
534 qDesc
->readIndex
+= got_aql_buf
;
535 uint32_t dma_start_ix
= (aqlRingBuffer
->wrIdx() - got_aql_buf
) %
537 dma_series_ctx
*series_ctx
= NULL
;
539 DPRINTF(HSAPacketProcessor
, "%s: umq_nxt = %d, ttl_aql_buf = %d, "
540 "dma_start_ix = %d, num_umq = %d\n", __FUNCTION__
, umq_nxt
,
541 ttl_aql_buf
, dma_start_ix
, num_umq
);
543 if (got_aql_buf
== 0) {
544 // we'll try again when some dma bufs are freed in freeEntry
545 qDesc
->stalledOnDmaBufAvailability
= true;
548 qDesc
->stalledOnDmaBufAvailability
= false;
551 uint32_t dma_b4_wrap
= ttl_aql_buf
- dma_start_ix
;
552 while (got_aql_buf
!= 0 && num_umq
!= 0) {
553 uint32_t umq_b4_wrap
= qDesc
->numObjs() -
554 (umq_nxt
% qDesc
->objSize());
556 = std::min({umq_b4_wrap
, dma_b4_wrap
, num_umq
, got_aql_buf
});
558 qDesc
->dmaInProgress
= true;
559 series_ctx
= new dma_series_ctx(got_aql_buf
, got_aql_buf
,
560 dma_start_ix
, rl_idx
);
563 void *aql_buf
= aqlRingBuffer
->ptr(dma_start_ix
);
564 CmdQueueCmdDmaEvent
*dmaEvent
565 = new CmdQueueCmdDmaEvent(this, pid
, true, dma_start_ix
,
566 num_2_xfer
, series_ctx
, aql_buf
);
567 DPRINTF(HSAPacketProcessor
,
568 "%s: aql_buf = %p, umq_nxt = %d, dma_ix = %d, num2xfer = %d\n",
569 __FUNCTION__
, aql_buf
, umq_nxt
, dma_start_ix
, num_2_xfer
);
571 dmaReadVirt(qDesc
->ptr(umq_nxt
), num_2_xfer
* qDesc
->objSize(),
574 aqlRingBuffer
->saveHostDispAddr(qDesc
->ptr(umq_nxt
), num_2_xfer
,
577 num_umq
-= num_2_xfer
;
578 got_aql_buf
-= num_2_xfer
;
579 dma_start_ix
= (dma_start_ix
+ num_2_xfer
) % ttl_aql_buf
;
580 umq_nxt
= (umq_nxt
+ num_2_xfer
) % qDesc
->numObjs();
581 if (got_aql_buf
== 0 && num_umq
!= 0) {
582 // There are more packets in the queue but
583 // not enough DMA buffers. Set the stalledOnDmaBufAvailability,
584 // we will try again in freeEntry
585 qDesc
->stalledOnDmaBufAvailability
= true;
591 HSAPacketProcessor::displayQueueDescriptor(int pid
, uint32_t rl_idx
)
593 M5_VAR_USED HSAQueueDescriptor
* qDesc
= regdQList
[rl_idx
]->qCntxt
.qDesc
;
594 DPRINTF(HSAPacketProcessor
,
595 "%s: pid[%d], basePointer[0x%lx], dBPointer[0x%lx], "
596 "writeIndex[0x%x], readIndex[0x%x], size(bytes)[0x%x]\n",
597 __FUNCTION__
, pid
, qDesc
->basePointer
,
598 qDesc
->doorbellPointer
, qDesc
->writeIndex
,
599 qDesc
->readIndex
, qDesc
->numElts
);
602 AQLRingBuffer::AQLRingBuffer(uint32_t size
,
603 const std::string name
)
604 : _name(name
), _wrIdx(0), _rdIdx(0), _dispIdx(0)
606 _aqlBuf
.resize(size
);
607 _aqlComplete
.resize(size
);
608 _hostDispAddresses
.resize(size
);
609 // Mark all packets as invalid and incomplete
610 for (auto& it
: _aqlBuf
)
611 it
.header
= HSA_PACKET_TYPE_INVALID
;
612 std::fill(_aqlComplete
.begin(), _aqlComplete
.end(), false);
616 AQLRingBuffer::freeEntry(void *pkt
)
618 _aqlComplete
[(hsa_kernel_dispatch_packet_t
*) pkt
- _aqlBuf
.data()] = true;
619 DPRINTF(HSAPacketProcessor
, "%s: pkt_ix = %d; "\
620 " # free entries = %d, wrIdx = %d, rdIdx = %d\n", __FUNCTION__
,
621 (hsa_kernel_dispatch_packet_t
*) pkt
- _aqlBuf
.data(),
622 nFree(), wrIdx(), rdIdx());
623 // Packets can complete out-of-order. This code "retires" packets in-order
624 // by updating the read pointer in the MQD when a contiguous chunk of
625 // packets have finished.
626 uint32_t old_rdIdx
= rdIdx();
627 while (_aqlComplete
[rdIdx() % numObjs()]) {
628 _aqlComplete
[rdIdx() % numObjs()] = false;
629 _aqlBuf
[rdIdx() % numObjs()].header
= HSA_PACKET_TYPE_INVALID
;
632 return (old_rdIdx
!= rdIdx());
636 HSAPacketProcessor::setDevice(HSADevice
*dev
)
638 this->hsa_device
= dev
;
642 AQLRingBuffer::allocEntry(uint32_t nBufReq
)
644 DPRINTF(HSAPacketProcessor
, "%s: nReq = %d\n", __FUNCTION__
, nBufReq
);
646 DPRINTF(HSAPacketProcessor
, "%s: return = %d\n", __FUNCTION__
, 0);
650 if (nBufReq
> nFree())
653 DPRINTF(HSAPacketProcessor
, "%s: ix1stFree = %d\n", __FUNCTION__
, wrIdx());
655 DPRINTF(HSAPacketProcessor
, "%s: return = %d, wrIdx = %d\n",
656 __FUNCTION__
, nBufReq
, wrIdx());
661 HSAPacketProcessor::finishPkt(void *pvPkt
, uint32_t rl_idx
)
663 HSAQueueDescriptor
* qDesc
= regdQList
[rl_idx
]->qCntxt
.qDesc
;
665 // if barrier bit was set and this is the last
666 // outstanding packet from that queue,
668 if (regdQList
[rl_idx
]->getBarrierBit() &&
669 regdQList
[rl_idx
]->isLastOutstandingPkt()) {
670 DPRINTF(HSAPacketProcessor
,
671 "Unset barrier bit for active list ID %d\n", rl_idx
);
672 regdQList
[rl_idx
]->setBarrierBit(false);
673 panic_if(!regdQList
[rl_idx
]->dispPending(),
674 "There should be pending kernels in this queue\n");
675 DPRINTF(HSAPacketProcessor
,
676 "Rescheduling active list ID %d after unsetting barrier "
678 // Try to schedule wakeup in the next cycle. There is a minimum
679 // pktProcessDelay for queue wake up. If that processing delay is
680 // elapsed, schedAQLProcessing will wakeup next tick.
681 schedAQLProcessing(rl_idx
, 1);
684 // If set, then blocked schedule, so need to reschedule
685 if (regdQList
[rl_idx
]->qCntxt
.aqlBuf
->freeEntry(pvPkt
))
686 updateReadIndex(0, rl_idx
);
687 DPRINTF(HSAPacketProcessor
,
688 "%s: rd-ptr offset [0x%x], wr-ptr offset [0x%x], space used = %d," \
689 " q size = %d, stalled = %s, empty = %s, active list ID = %d\n",
690 __FUNCTION__
, qDesc
->readIndex
, qDesc
->writeIndex
,
691 qDesc
->spaceUsed(), qDesc
->numElts
,
692 qDesc
->stalledOnDmaBufAvailability
? "true" : "false",
693 qDesc
->isEmpty()? "true" : "false", rl_idx
);
694 // DMA buffer is freed, check the queue to see if there are DMA
695 // accesses blocked becasue of non-availability of DMA buffer
696 if (qDesc
->stalledOnDmaBufAvailability
) {
697 assert(!qDesc
->isEmpty());
698 getCommandsFromHost(0, rl_idx
); // TODO:assign correct pid
700 // multi-process support