2 * Copyright (c) 2018 Advanced Micro Devices, Inc.
5 * For use for simulation and test purposes only
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
17 * 3. Neither the name of the copyright holder nor the names of its
18 * contributors may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
33 * Authors: Anthony Gutierrez
38 * The GPUCommandProcessor (CP) is responsible for accepting commands, in
39 * the form of HSA AQL packets, from the HSA packet processor (HSAPP). The CP
40 * works with several components, including the HSAPP and the dispatcher.
41 * When the HSAPP sends a ready task to the CP, it will perform the necessary
42 * operations to extract relevant data structures from memory, such as the
43 * AQL queue descriptor and AQL packet, and initializes register state for the
47 #ifndef __DEV_HSA_GPU_COMMAND_PROCESSOR_HH__
48 #define __DEV_HSA_GPU_COMMAND_PROCESSOR_HH__
50 #include "dev/hsa/hsa_device.hh"
51 #include "gpu-compute/hsa_queue_entry.hh"
53 struct GPUCommandProcessorParams;
57 class GPUCommandProcessor : public HSADevice
60 typedef GPUCommandProcessorParams Params;
62 GPUCommandProcessor() = delete;
63 GPUCommandProcessor(const Params &p);
65 void setShader(Shader *shader);
68 void submitDispatchPkt(void *raw_pkt, uint32_t queue_id,
69 Addr host_pkt_addr) override;
70 void submitVendorPkt(void *raw_pkt, uint32_t queue_id,
71 Addr host_pkt_addr) override;
72 void dispatchPkt(HSAQueueEntry *task);
74 Tick write(PacketPtr pkt) override { return 0; }
75 Tick read(PacketPtr pkt) override { return 0; }
76 AddrRangeList getAddrRanges() const override;
81 GPUDispatcher &dispatcher;
83 void initABI(HSAQueueEntry *task);
86 * Perform a DMA read of the read_dispatch_id_field_base_byte_offset
87 * field, which follows directly after the read_dispatch_id (the read
88 * pointer) in the amd_hsa_queue_t struct (aka memory queue descriptor
89 * (MQD)), to find the base address of the MQD. The MQD is the runtime's
90 * soft representation of a HW queue descriptor (HQD).
92 * Any fields below the read dispatch ID in the amd_hsa_queue_t should
93 * not change according to the HSA standard, therefore we should be able
94 * to get them based on their known relative position to the read dispatch
97 class ReadDispIdOffsetDmaEvent : public DmaCallback
100 ReadDispIdOffsetDmaEvent(GPUCommandProcessor &gpu_cmd_proc,
102 : DmaCallback(), readDispIdOffset(0), gpuCmdProc(gpu_cmd_proc),
111 * Now that the read pointer's offset from the base of
112 * the MQD is known, we can use that to calculate the
113 * the address of the MQD itself, the dispatcher will
114 * DMA that into the HSAQueueEntry when a kernel is
117 _task->hostAMDQueueAddr
118 = gpuCmdProc.hsaPP->getQueueDesc(_task->queueId())
119 ->hostReadIndexPtr - readDispIdOffset;
122 * DMA a copy of the MQD into the task. Some fields of
123 * the MQD will be used to initialize register state.
125 auto *mqdDmaEvent = new MQDDmaEvent(gpuCmdProc, _task);
126 gpuCmdProc.dmaReadVirt(_task->hostAMDQueueAddr,
127 sizeof(_amd_queue_t), mqdDmaEvent,
131 uint32_t readDispIdOffset;
134 GPUCommandProcessor &gpuCmdProc;
135 HSAQueueEntry *_task;
139 * Perform a DMA read of the MQD that corresponds to a hardware
140 * queue descriptor (HQD). We store a copy of the MQD in the
141 * HSAQueueEntry object so we can send a copy of it along with
142 * a dispatch packet, which is needed to initialize register
145 class MQDDmaEvent : public DmaCallback
148 MQDDmaEvent(GPUCommandProcessor &gpu_cmd_proc, HSAQueueEntry *task)
149 : DmaCallback(), gpuCmdProc(gpu_cmd_proc), _task(task)
156 gpuCmdProc.dispatchPkt(_task);
160 GPUCommandProcessor &gpuCmdProc;
161 HSAQueueEntry *_task;
165 #endif // __DEV_HSA_GPU_COMMAND_PROCESSOR_HH__