"/usr/lib/x86_64-linux-gnu"
]),
'HOME=%s' % os.getenv('HOME','/'),
- "HSA_ENABLE_INTERRUPT=0"]
+ "HSA_ENABLE_INTERRUPT=1"]
process = Process(executable = executable, cmd = [options.cmd]
+ options.options.split(), drivers = [gpu_driver], env = env)
#include "dev/hsa/hsa_packet_processor.hh"
#include "params/HSADevice.hh"
+class HSADriver;
+
class HSADevice : public DmaDevice
{
public:
typedef HSADeviceParams Params;
+ typedef std::function<void(const uint64_t &)> HsaSignalCallbackFunction;
HSADevice(const Params &p) : DmaDevice(p), hsaPP(p.hsapp)
{
{
fatal("%s does not accept vendor specific packets\n", name());
}
-
+ virtual void
+ attachDriver(HSADriver *driver)
+ {
+ fatal("%s does not need HSA driver\n", name());
+ }
+ virtual void
+ updateHsaSignal(Addr signal_handle, uint64_t signal_value)
+ {
+ fatal("%s does not have HSA signal update functionality.\n", name());
+ }
+ virtual uint64_t
+ functionalReadHsaSignal(Addr signal_handle)
+ {
+ fatal("%s does not have HSA signal read functionality.\n", name());
+ }
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb,
void *data, Tick delay = 0);
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *cb,
#include "cpu/thread_context.hh"
#include "debug/HSADriver.hh"
#include "dev/hsa/hsa_device.hh"
+#include "dev/hsa/hsa_packet_processor.hh"
+#include "dev/hsa/kfd_event_defines.h"
#include "dev/hsa/kfd_ioctl.h"
#include "params/HSADriver.hh"
#include "sim/process.hh"
/**
* Currently, mmap() will simply setup a mapping for the associated
- * device's packet processor's doorbells.
+ * device's packet processor's doorbells and creates the event page.
*/
Addr
HSADriver::mmap(ThreadContext *tc, Addr start, uint64_t length, int prot,
- int tgt_flags, int tgt_fd, int offset)
+ int tgt_flags, int tgt_fd, off_t offset)
{
- DPRINTF(HSADriver, "amdkfd doorbell mmap (start: %p, length: 0x%x,"
- "offset: 0x%x)\n", start, length, offset);
-
- auto process = tc->getProcessPtr();
- auto mem_state = process->memState;
+ // Is this a signal event mmap
+ bool is_event_mmap = false;
+ // If addr == 0, then we may need to do mmap.
+ bool should_mmap = (start == 0);
+ auto process = tc->getProcessPtr();
+ auto mem_state = process->memState;
+ // Check if mmap is for signal events first
+ if (((offset >> PAGE_SHIFT) & KFD_MMAP_TYPE_MASK) ==
+ KFD_MMAP_TYPE_EVENTS) {
+ is_event_mmap = true;
+ DPRINTF(HSADriver, "amdkfd mmap for events(start: %p, length: 0x%x,"
+ "offset: 0x%x, )\n", start, length, offset);
+ panic_if(start != 0,
+ "Start address should be provided by KFD\n");
+ panic_if(length != 8 * KFD_SIGNAL_EVENT_LIMIT,
+ "Requested length %d, expected length %d; length mismatch\n",
+ length, 8 * KFD_SIGNAL_EVENT_LIMIT);
+ // For signal event, do mmap only is eventPage is uninitialized
+ should_mmap = (!eventPage);
+ } else {
+ DPRINTF(HSADriver, "amdkfd doorbell mmap (start: %p, length: 0x%x,"
+ "offset: 0x%x)\n", start, length, offset);
+ }
// Extend global mmap region if necessary.
- if (start == 0) {
- // Assume mmap grows down, as in x86 Linux.
+ if (should_mmap) {
+ // Assume mmap grows down, as in x86 Linux
start = mem_state->getMmapEnd() - length;
mem_state->setMmapEnd(start);
}
- /**
- * Now map this virtual address to our PIO doorbell interface
- * in the page tables (non-cacheable).
- */
- process->pTable->map(start, device->hsaPacketProc().pioAddr,
- length, false);
- DPRINTF(HSADriver, "amdkfd doorbell mapped to %xp\n", start);
+ if (is_event_mmap) {
+ if (should_mmap) {
+ eventPage = start;
+ }
+ } else {
+ // Now map this virtual address to our PIO doorbell interface
+ // in the page tables (non-cacheable)
+ process->pTable->map(start, device->hsaPacketProc().pioAddr,
+ length, false);
+
+ DPRINTF(HSADriver, "amdkfd doorbell mapped to %xp\n", start);
+ }
+
return start;
}
args->ring_base_address, args->queue_id,
args->ring_size);
}
+
+const char*
+HSADriver::DriverWakeupEvent::description() const
+{
+ return "DriverWakeupEvent";
+}
+
+void
+HSADriver::DriverWakeupEvent::scheduleWakeup(Tick wakeup_delay)
+{
+ assert(driver);
+ driver->schedule(this, curTick() + wakeup_delay);
+}
+
+void
+HSADriver::signalWakeupEvent(uint32_t event_id)
+{
+ panic_if(event_id >= eventSlotIndex,
+ "Trying wakeup on an event that is not yet created\n");
+ if (ETable[event_id].threadWaiting) {
+ panic_if(!ETable[event_id].tc,
+ "No thread context to wake up\n");
+ ThreadContext *tc = ETable[event_id].tc;
+ DPRINTF(HSADriver,
+ "Signal event: Waking up CPU %d\n", tc->cpuId());
+ // Wake up this thread
+ tc->activate();
+ // Remove events that can wake up this thread
+ TCEvents[tc].clearEvents();
+ } else {
+ // This may be a race condition between an ioctl call asking to wait on
+ // this event and this signalWakeupEvent. Taking care of this race
+ // condition here by setting the event here. The ioctl call should take
+ // the necessary action when waiting on an already set event. However,
+ // this may be a genuine instance in which the runtime has decided not
+ // to wait on this event. But since we cannot distinguish this case with
+ // the race condition, we are any way setting the event.
+ ETable[event_id].setEvent = true;
+ }
+}
+
+void
+HSADriver::DriverWakeupEvent::process()
+{
+ DPRINTF(HSADriver,
+ "Timer event: Waking up CPU %d\n", tc->cpuId());
+ // Wake up this thread
+ tc->activate();
+ // Remove events that can wake up this thread
+ driver->TCEvents[tc].clearEvents();
+}
#ifndef __DEV_HSA_HSA_DRIVER_HH__
#define __DEV_HSA_HSA_DRIVER_HH__
+#include <unordered_map>
+
#include "base/types.hh"
#include "sim/emul_driver.hh"
int open(ThreadContext *tc, int mode, int flags);
Addr mmap(ThreadContext *tc, Addr start, uint64_t length,
- int prot, int tgtFlags, int tgtFd, int offset);
+ int prot, int tgt_flags, int tgt_fd, off_t offset);
+ virtual void signalWakeupEvent(uint32_t event_id);
+ class DriverWakeupEvent : public Event
+ {
+ public:
+ DriverWakeupEvent(HSADriver *hsa_driver, ThreadContext *thrd_cntxt)
+ : driver(hsa_driver), tc(thrd_cntxt) {}
+ void process() override;
+ const char *description() const override;
+ void scheduleWakeup(Tick wakeup_delay);
+ private:
+ HSADriver *driver;
+ ThreadContext *tc;
+ };
+ class EventTableEntry {
+ public:
+ EventTableEntry() :
+ mailBoxPtr(0), tc(nullptr), threadWaiting(false), setEvent(false)
+ {}
+ // Mail box pointer for this address. Current implementation does not
+ // use this mailBoxPtr to notify events but directly calls
+ // signalWakeupEvent from dispatcher (GPU) to notify event. So,
+ // currently this mailBoxPtr is not used. But a future implementation
+ // may communicate to the driver using mailBoxPtr.
+ Addr mailBoxPtr;
+ // Thread context waiting on this event. We do not support multiple
+ // threads waiting on an event currently.
+ ThreadContext *tc;
+ // threadWaiting = true, if some thread context is waiting on this
+ // event. A thread context waiting on this event is put to sleep.
+ bool threadWaiting;
+ // setEvent = true, if this event is triggered but when this event
+ // triggered, no thread context was waiting on it. In the future, some
+ // thread context will try to wait on this event but since event has
+ // already happened, we will not allow that thread context to go to
+ // sleep. The above mentioned scenario can happen when the waiting
+ // thread and wakeup thread race on this event and the wakeup thread
+ // beat the waiting thread at the driver.
+ bool setEvent;
+ };
+ typedef class EventTableEntry ETEntry;
+
protected:
+ Addr eventPage;
+ uint32_t eventSlotIndex;
+ // Event table that keeps track of events. It is indexed with event ID.
+ std::unordered_map<uint32_t, ETEntry> ETable;
+
+ // TCEvents map keeps track of the events that can wakeup this thread. When
+ // multiple events can wake up this thread, this data structure helps to
+ // reset all events when one of those events wake up this thread. The
+ // signal events that can wake up this thread are stored in signalEvents
+ // whereas the timer wakeup event is stored in timerEvent.
+ class EventList {
+ public:
+ EventList() : driver(nullptr), timerEvent(nullptr, nullptr) {}
+ EventList(HSADriver *hsa_driver, ThreadContext *thrd_cntxt)
+ : driver(hsa_driver), timerEvent(hsa_driver, thrd_cntxt)
+ { }
+ void clearEvents() {
+ assert(driver);
+ for (auto event : signalEvents) {
+ assert(event < driver->eventSlotIndex);
+ panic_if(driver->ETable[event].tc->status() == \
+ ThreadContext::Suspended,
+ "Thread should not be suspended\n");
+ driver->ETable[event].tc = nullptr;
+ driver->ETable[event].threadWaiting = false;
+ }
+ signalEvents.clear();
+ if (timerEvent.scheduled()) {
+ driver->deschedule(timerEvent);
+ }
+ }
+ HSADriver *driver;
+ DriverWakeupEvent timerEvent;
+ // The set of events that can wake up the same thread.
+ std::set<uint32_t> signalEvents;
+ };
+ std::unordered_map<ThreadContext *, EventList> TCEvents;
+
/**
* HSA agent (device) that is controled by this driver.
*/
dep_sgnl_rd_st->resetSigVals();
// The completion signal is connected
if (bar_and_pkt->completion_signal != 0) {
- // The signal value is aligned 8 bytes
- // from the actual handle in the runtime
- uint64_t signal_addr =
- (uint64_t) (((uint64_t *)
- bar_and_pkt->completion_signal) + 1);
+ // HACK: The semantics of the HSA signal is to
+ // decrement the current signal value
+ // I'm going to cheat here and read out
+ // the value from main memory using functional
+ // access, and then just DMA the decremented value.
+ uint64_t signal_value = hsa_device->functionalReadHsaSignal(\
+ bar_and_pkt->completion_signal);
+
DPRINTF(HSAPacketProcessor, "Triggering barrier packet" \
- " completion signal: %x!\n", signal_addr);
- /**
- * HACK: The semantics of the HSA signal is to
- * decrement the current signal value.
- * I'm going to cheat here and read out
- * the value from main memory using functional
- * access, and then just DMA the decremented value.
- * The reason for this is that the DMASequencer does
- * not support atomic operations.
- */
- VPtr<uint64_t> prev_signal(signal_addr, sys->threads[0]);
-
- hsa_signal_value_t *new_signal = new hsa_signal_value_t;
- *new_signal = (hsa_signal_value_t)*prev_signal - 1;
-
- dmaWriteVirt(signal_addr,
- sizeof(hsa_signal_value_t), NULL, new_signal, 0);
+ " completion signal! Addr: %x\n",
+ bar_and_pkt->completion_signal);
+
+ hsa_device->updateHsaSignal(bar_and_pkt->completion_signal,
+ signal_value - 1);
}
}
if (dep_sgnl_rd_st->pendingReads > 0) {
return regdQList.at(queId);
}
+ uint64_t
+ inFlightPkts(uint32_t queId)
+ {
+ auto aqlBuf = regdQList.at(queId)->qCntxt.aqlBuf;
+ return aqlBuf->dispIdx() - aqlBuf->rdIdx();
+ }
+
int numHWQueues;
Addr pioAddr;
Addr pioSize;
--- /dev/null
+/*
+ * Copyright (c) 2016-2019 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef DEV_HSA_HSA_SIGNAL_H
+#define DEV_HSA_HSA_SIGNAL_H
+
+// AMD Signal Kind Enumeration Values.
+typedef int64_t amd_signal_kind64_t;
+enum amd_signal_kind_t {
+ AMD_SIGNAL_KIND_INVALID = 0,
+ AMD_SIGNAL_KIND_USER = 1,
+ AMD_SIGNAL_KIND_DOORBELL = -1,
+ AMD_SIGNAL_KIND_LEGACY_DOORBELL = -2
+};
+
+// AMD Signal.
+typedef struct amd_signal_s {
+ amd_signal_kind64_t kind;
+ union {
+ volatile int64_t value;
+ volatile uint32_t* legacy_hardware_doorbell_ptr;
+ volatile uint64_t* hardware_doorbell_ptr;
+ };
+ uint64_t event_mailbox_ptr;
+ uint32_t event_id;
+ uint32_t reserved1;
+ uint64_t start_ts;
+ uint64_t end_ts;
+ union {
+ uint64_t queue_ptr;
+ uint64_t reserved2;
+ };
+ uint32_t reserved3[2];
+} amd_signal_t;
+
+#endif // DEV_HSA_HSA_SIGNAL_H
DPRINTF(HSAPacketProcessor,
"@ %s, analyzing hw queue %d\n", __FUNCTION__, rl_idx);
HSAQueueDescriptor* qDesc = hsaPP->getRegdListEntry(rl_idx)->qCntxt.qDesc;
- AQLRingBuffer* aql_buf = hsaPP->getRegdListEntry(rl_idx)->qCntxt.aqlBuf;
// If there a pending DMA to this registered queue
// then the queue is not idle
// Since packet completion stage happens only after kernel completion
// we need to keep the queue mapped till all the outstanding kernels
// from that queue are finished
- if (aql_buf->rdIdx() != aql_buf->dispIdx()) {
+ if (hsaPP->inFlightPkts(rl_idx)) {
return false;
}
--- /dev/null
+/*
+ * Copyright (c) 2016-2019 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef KFD_EVENT_DEFINES_H_INCLUDED
+#define KFD_EVENT_DEFINES_H_INCLUDED
+
+#include "dev/hsa/kfd_ioctl.h"
+
+#define PAGE_SHIFT 12
+#define KFD_MMAP_TYPE_SHIFT (62 - PAGE_SHIFT)
+#define KFD_MMAP_TYPE_MASK (0x3ULL << KFD_MMAP_TYPE_SHIFT)
+#define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT)
+#define KFD_MMAP_TYPE_EVENTS (0x2ULL << KFD_MMAP_TYPE_SHIFT)
+#define SLOTS_PER_PAGE KFD_SIGNAL_EVENT_LIMIT
+
+#endif
gpuCmdProc->hsaPacketProc()
.finishPkt(task->dispPktPtr(), task->queueId());
if (task->completionSignal()) {
- // The signal value is aligned 8 bytes from
- // the actual handle in the runtime
- Addr signal_addr = task->completionSignal() + sizeof(Addr);
- DPRINTF(GPUDisp, "HSA AQL Kernel Complete! Triggering "
- "completion signal: %x!\n", signal_addr);
-
/**
- * HACK: The semantics of the HSA signal is to decrement
- * the current signal value. We cheat here and read out
- * he value from main memory using functional access and
- * then just DMA the decremented value. This is because
- * the DMA controller does not currently support GPU
- * atomics.
- */
- auto *tc = gpuCmdProc->system()->threads[0];
- auto &virt_proxy = tc->getVirtProxy();
- TypedBufferArg<Addr> prev_signal(signal_addr);
- prev_signal.copyIn(virt_proxy);
-
- Addr *new_signal = new Addr;
- *new_signal = (Addr)*prev_signal - 1;
-
- gpuCmdProc->dmaWriteVirt(signal_addr, sizeof(Addr), nullptr,
- new_signal, 0);
+ * HACK: The semantics of the HSA signal is to decrement
+ * the current signal value. We cheat here and read out
+ * he value from main memory using functional access and
+ * then just DMA the decremented value.
+ */
+ uint64_t signal_value =
+ gpuCmdProc->functionalReadHsaSignal(task->completionSignal());
+
+ DPRINTF(GPUDisp, "HSA AQL Kernel Complete with completion "
+ "signal! Addr: %d\n", task->completionSignal());
+
+ gpuCmdProc->updateHsaSignal(task->completionSignal(),
+ signal_value - 1);
} else {
DPRINTF(GPUDisp, "HSA AQL Kernel Complete! No completion "
"signal\n");
#include "debug/GPUKernelInfo.hh"
#include "gpu-compute/dispatcher.hh"
#include "params/GPUCommandProcessor.hh"
+#include "sim/process.hh"
+#include "sim/proxy_ptr.hh"
+#include "sim/syscall_emul_buf.hh"
GPUCommandProcessor::GPUCommandProcessor(const Params &p)
: HSADevice(p), dispatcher(*p.dispatcher)
++dynamic_task_id;
}
+uint64_t
+GPUCommandProcessor::functionalReadHsaSignal(Addr signal_handle)
+{
+ Addr value_addr = getHsaSignalValueAddr(signal_handle);
+ auto tc = system()->threads[0];
+ ConstVPtr<Addr> prev_value(value_addr, tc);
+ return *prev_value;
+}
+
+void
+GPUCommandProcessor::updateHsaSignal(Addr signal_handle, uint64_t signal_value)
+{
+ // The signal value is aligned 8 bytes from
+ // the actual handle in the runtime
+ Addr value_addr = getHsaSignalValueAddr(signal_handle);
+ Addr mailbox_addr = getHsaSignalMailboxAddr(signal_handle);
+ Addr event_addr = getHsaSignalEventAddr(signal_handle);
+ DPRINTF(GPUCommandProc, "Triggering completion signal: %x!\n", value_addr);
+
+ Addr *new_signal = new Addr;
+ *new_signal = signal_value;
+
+ dmaWriteVirt(value_addr, sizeof(Addr), nullptr, new_signal, 0);
+
+ auto tc = system()->threads[0];
+ ConstVPtr<uint64_t> mailbox_ptr(mailbox_addr, tc);
+
+ // Notifying an event with its mailbox pointer is
+ // not supported in the current implementation. Just use
+ // mailbox pointer to distinguish between interruptible
+ // and default signal. Interruptible signal will have
+ // a valid mailbox pointer.
+ if (*mailbox_ptr != 0) {
+ // This is an interruptible signal. Now, read the
+ // event ID and directly communicate with the driver
+ // about that event notification.
+ ConstVPtr<uint32_t> event_val(event_addr, tc);
+
+ DPRINTF(GPUCommandProc, "Calling signal wakeup event on "
+ "signal event value %d\n", *event_val);
+ signalWakeupEvent(*event_val);
+ }
+}
+
+void
+GPUCommandProcessor::attachDriver(HSADriver *hsa_driver)
+{
+ fatal_if(driver, "Should not overwrite driver.");
+ driver = hsa_driver;
+}
+
/**
* submitVendorPkt() is for accepting vendor-specific packets from
* the HSAPP. Vendor-specific packets may be used by the runtime to
dispatcher.dispatch(task);
}
+void
+GPUCommandProcessor::signalWakeupEvent(uint32_t event_id)
+{
+ driver->signalWakeupEvent(event_id);
+}
+
/**
* The CP is responsible for traversing all HSA-ABI-related data
* structures from memory and initializing the ABI state.
#define __DEV_HSA_GPU_COMMAND_PROCESSOR_HH__
#include "dev/hsa/hsa_device.hh"
+#include "dev/hsa/hsa_signal.hh"
+#include "gpu-compute/gpu_compute_driver.hh"
#include "gpu-compute/hsa_queue_entry.hh"
struct GPUCommandProcessorParams;
Addr host_pkt_addr) override;
void submitVendorPkt(void *raw_pkt, uint32_t queue_id,
Addr host_pkt_addr) override;
+ void attachDriver(HSADriver *driver) override;
void dispatchPkt(HSAQueueEntry *task);
+ void signalWakeupEvent(uint32_t event_id);
Tick write(PacketPtr pkt) override { return 0; }
Tick read(PacketPtr pkt) override { return 0; }
AddrRangeList getAddrRanges() const override;
System *system();
+ void updateHsaSignal(Addr signal_handle, uint64_t signal_value) override;
+
+ uint64_t functionalReadHsaSignal(Addr signal_handle);
+
+ Addr getHsaSignalValueAddr(Addr signal_handle)
+ {
+ return signal_handle + offsetof(amd_signal_t, value);
+ }
+
+ Addr getHsaSignalMailboxAddr(Addr signal_handle)
+ {
+ return signal_handle + offsetof(amd_signal_t, event_mailbox_ptr);
+ }
+
+ Addr getHsaSignalEventAddr(Addr signal_handle)
+ {
+ return signal_handle + offsetof(amd_signal_t, event_id);
+ }
+
private:
Shader *_shader;
GPUDispatcher &dispatcher;
+ HSADriver *driver;
void initABI(HSAQueueEntry *task);
#include "debug/GPUDriver.hh"
#include "dev/hsa/hsa_device.hh"
#include "dev/hsa/hsa_packet_processor.hh"
+#include "dev/hsa/kfd_event_defines.h"
#include "dev/hsa/kfd_ioctl.h"
#include "params/GPUComputeDriver.hh"
#include "sim/syscall_emul_buf.hh"
GPUComputeDriver::GPUComputeDriver(const Params &p)
: HSADriver(p)
{
+ device->attachDriver(this);
DPRINTF(GPUDriver, "Constructing KFD: device\n");
}
DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_GET_VERSION\n");
TypedBufferArg<kfd_ioctl_get_version_args> args(ioc_buf);
- args->major_version = 1;
- args->minor_version = 0;
+ args->major_version = KFD_IOCTL_MAJOR_VERSION;
+ args->minor_version = KFD_IOCTL_MINOR_VERSION;
args.copyOut(virt_proxy);
}
break;
case AMDKFD_IOC_CREATE_EVENT:
{
- warn("unimplemented ioctl: AMDKFD_IOC_CREATE_EVENT\n");
+ DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_CREATE_EVENT\n");
+
+ TypedBufferArg<kfd_ioctl_create_event_args> args(ioc_buf);
+ args.copyIn(virt_proxy);
+ if (args->event_type != KFD_IOC_EVENT_SIGNAL) {
+ fatal("Signal events are only supported currently\n");
+ } else if (eventSlotIndex == SLOTS_PER_PAGE) {
+ fatal("Signal event wasn't created; signal limit reached\n");
+ }
+ // Currently, we allocate only one signal_page for events.
+ // Note that this signal page is of size 8 * KFD_SIGNAL_EVENT_LIMIT
+ uint64_t page_index = 0;
+ args->event_page_offset = (page_index | KFD_MMAP_TYPE_EVENTS);
+ args->event_page_offset <<= PAGE_SHIFT;
+ // TODO: Currently we support only signal events, hence using
+ // the same ID for both signal slot and event slot
+ args->event_slot_index = eventSlotIndex;
+ args->event_id = eventSlotIndex++;
+ args->event_trigger_data = args->event_id;
+ DPRINTF(GPUDriver, "amdkfd create events"
+ "(event_id: 0x%x, offset: 0x%x)\n",
+ args->event_id, args->event_page_offset);
+ // Since eventSlotIndex is increased everytime a new event is
+ // created ETable at eventSlotIndex(event_id) is guaranteed to be
+ // empty. In a future implementation that reuses deleted event_ids,
+ // we should check if event table at this
+ // eventSlotIndex(event_id) is empty before inserting a new event
+ // table entry
+ ETable.emplace(std::pair<uint32_t, ETEntry>(args->event_id, {}));
+ args.copyOut(virt_proxy);
}
break;
case AMDKFD_IOC_DESTROY_EVENT:
{
- warn("unimplemented ioctl: AMDKFD_IOC_DESTROY_EVENT\n");
+ DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_DESTROY_EVENT\n");
+ TypedBufferArg<kfd_ioctl_destroy_event_args> args(ioc_buf);
+ args.copyIn(virt_proxy);
+ DPRINTF(GPUDriver, "amdkfd destroying event %d\n", args->event_id);
+ fatal_if(ETable.count(args->event_id) == 0,
+ "Event ID invalid, cannot destroy this event\n");
+ ETable.erase(args->event_id);
}
break;
case AMDKFD_IOC_SET_EVENT:
{
- warn("unimplemented ioctl: AMDKFD_IOC_SET_EVENT\n");
+ DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_SET_EVENTS\n");
+ TypedBufferArg<kfd_ioctl_set_event_args> args(ioc_buf);
+ args.copyIn(virt_proxy);
+ DPRINTF(GPUDriver, "amdkfd set event %d\n", args->event_id);
+ fatal_if(ETable.count(args->event_id) == 0,
+ "Event ID invlaid, cannot set this event\n");
+ ETable[args->event_id].setEvent = true;
+ signalWakeupEvent(args->event_id);
}
break;
case AMDKFD_IOC_RESET_EVENT:
break;
case AMDKFD_IOC_WAIT_EVENTS:
{
- warn("unimplemented ioctl: AMDKFD_IOC_WAIT_EVENTS\n");
+ DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_WAIT_EVENTS\n");
+ TypedBufferArg<kfd_ioctl_wait_events_args> args(ioc_buf);
+ args.copyIn(virt_proxy);
+ kfd_event_data *events =
+ (kfd_event_data *)args->events_ptr;
+ DPRINTF(GPUDriver, "amdkfd wait for events"
+ "(wait on all: %d, timeout : %d, num_events: %s)\n",
+ args->wait_for_all, args->timeout, args->num_events);
+ panic_if(args->wait_for_all != 0 && args->num_events > 1,
+ "Wait for all events not supported\n");
+ bool should_sleep = true;
+ if (TCEvents.count(tc) == 0) {
+ // This thread context trying to wait on an event for the first
+ // time, initialize it.
+ TCEvents.emplace(std::piecewise_construct, std::make_tuple(tc),
+ std::make_tuple(this, tc));
+ DPRINTF(GPUDriver, "\tamdkfd creating event list"
+ " for thread %d\n", tc->cpuId());
+ }
+ panic_if(TCEvents[tc].signalEvents.size() != 0,
+ "There are %d events that put this thread to sleep,"
+ " this thread should not be running\n",
+ TCEvents[tc].signalEvents.size());
+ for (int i = 0; i < args->num_events; i++) {
+ panic_if(!events,
+ "Event pointer invalid\n");
+ Addr eventDataAddr = (Addr)(events + i);
+ TypedBufferArg<kfd_event_data> EventData(
+ eventDataAddr, sizeof(kfd_event_data));
+ EventData.copyIn(virt_proxy);
+ DPRINTF(GPUDriver,
+ "\tamdkfd wait for event %d\n", EventData->event_id);
+ panic_if(ETable.count(EventData->event_id) == 0,
+ "Event ID invalid, cannot set this event\n");
+ panic_if(ETable[EventData->event_id].threadWaiting,
+ "Multiple threads waiting on the same event\n");
+ if (ETable[EventData->event_id].setEvent) {
+ // If event is already set, the event has already happened.
+ // Just unset the event and dont put this thread to sleep.
+ ETable[EventData->event_id].setEvent = false;
+ should_sleep = false;
+ }
+ if (should_sleep) {
+ // Put this thread to sleep
+ ETable[EventData->event_id].threadWaiting = true;
+ ETable[EventData->event_id].tc = tc;
+ TCEvents[tc].signalEvents.insert(EventData->event_id);
+ }
+ }
+
+ // TODO: Return the correct wait_result back. Currently, returning
+ // success for both KFD_WAIT_TIMEOUT and KFD_WAIT_COMPLETE.
+ // Ideally, this needs to be done after the event is triggered and
+ // after the thread is woken up.
+ args->wait_result = 0;
+ args.copyOut(virt_proxy);
+ if (should_sleep) {
+ // Put this thread to sleep
+ sleepCPU(tc, args->timeout);
+ } else {
+ // Remove events that tried to put this thread to sleep
+ TCEvents[tc].clearEvents();
+ }
}
break;
case AMDKFD_IOC_DBG_REGISTER:
return 0;
}
+void
+GPUComputeDriver::sleepCPU(ThreadContext *tc, uint32_t milliSecTimeout)
+{
+ // Convert millisecs to ticks
+ Tick wakeup_delay((uint64_t)milliSecTimeout * 1000000000);
+ assert(TCEvents.count(tc) == 1);
+ TCEvents[tc].timerEvent.scheduleWakeup(wakeup_delay);
+ tc->suspend();
+ DPRINTF(GPUDriver,
+ "CPU %d is put to sleep\n", tc->cpuId());
+}
+
Addr
GPUComputeDriver::gpuVmApeBase(int gpuNum) const
{
typedef GPUComputeDriverParams Params;
GPUComputeDriver(const Params &p);
int ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf) override;
+ void sleepCPU(ThreadContext *tc, uint32_t milliSecTimeout);
private:
/**
* (see the SyscallReturn class).
*/
virtual Addr mmap(ThreadContext *tc, Addr start, uint64_t length,
- int prot, int tgtFlags, int tgtFd, int offset)
+ int prot, int tgtFlags, int tgtFd, off_t offset)
{ return -EBADF; }
};