gpuCmdProc->hsaPacketProc()
.finishPkt(task->dispPktPtr(), task->queueId());
if (task->completionSignal()) {
- // The signal value is aligned 8 bytes from
- // the actual handle in the runtime
- Addr signal_addr = task->completionSignal() + sizeof(Addr);
- DPRINTF(GPUDisp, "HSA AQL Kernel Complete! Triggering "
- "completion signal: %x!\n", signal_addr);
-
/**
- * HACK: The semantics of the HSA signal is to decrement
- * the current signal value. We cheat here and read out
- * he value from main memory using functional access and
- * then just DMA the decremented value. This is because
- * the DMA controller does not currently support GPU
- * atomics.
- */
- auto *tc = gpuCmdProc->system()->threads[0];
- auto &virt_proxy = tc->getVirtProxy();
- TypedBufferArg<Addr> prev_signal(signal_addr);
- prev_signal.copyIn(virt_proxy);
-
- Addr *new_signal = new Addr;
- *new_signal = (Addr)*prev_signal - 1;
-
- gpuCmdProc->dmaWriteVirt(signal_addr, sizeof(Addr), nullptr,
- new_signal, 0);
+ * HACK: The semantics of the HSA signal is to decrement
+ * the current signal value. We cheat here and read out
+ * he value from main memory using functional access and
+ * then just DMA the decremented value.
+ */
+ uint64_t signal_value =
+ gpuCmdProc->functionalReadHsaSignal(task->completionSignal());
+
+ DPRINTF(GPUDisp, "HSA AQL Kernel Complete with completion "
+ "signal! Addr: %d\n", task->completionSignal());
+
+ gpuCmdProc->updateHsaSignal(task->completionSignal(),
+ signal_value - 1);
} else {
DPRINTF(GPUDisp, "HSA AQL Kernel Complete! No completion "
"signal\n");