misc: Standardize the way create() constructs SimObjects.
[gem5.git] / src / gpu-compute / gpu_command_processor.hh
1 /*
2 * Copyright (c) 2018 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its
18 * contributors may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Authors: Anthony Gutierrez
34 */
35
36 /**
37 * @file
38 * The GPUCommandProcessor (CP) is responsible for accepting commands, in
39 * the form of HSA AQL packets, from the HSA packet processor (HSAPP). The CP
40 * works with several components, including the HSAPP and the dispatcher.
41 * When the HSAPP sends a ready task to the CP, it will perform the necessary
42 * operations to extract relevant data structures from memory, such as the
43 * AQL queue descriptor and AQL packet, and initializes register state for the
44 * task's wavefronts.
45 */
46
47 #ifndef __DEV_HSA_GPU_COMMAND_PROCESSOR_HH__
48 #define __DEV_HSA_GPU_COMMAND_PROCESSOR_HH__
49
50 #include "dev/hsa/hsa_device.hh"
51 #include "gpu-compute/hsa_queue_entry.hh"
52
53 struct GPUCommandProcessorParams;
54 class GPUDispatcher;
55 class Shader;
56
57 class GPUCommandProcessor : public HSADevice
58 {
59 public:
60 typedef GPUCommandProcessorParams Params;
61
62 GPUCommandProcessor() = delete;
63 GPUCommandProcessor(const Params &p);
64
65 void setShader(Shader *shader);
66 Shader* shader();
67
68 void submitDispatchPkt(void *raw_pkt, uint32_t queue_id,
69 Addr host_pkt_addr) override;
70 void submitVendorPkt(void *raw_pkt, uint32_t queue_id,
71 Addr host_pkt_addr) override;
72 void dispatchPkt(HSAQueueEntry *task);
73
74 Tick write(PacketPtr pkt) override { return 0; }
75 Tick read(PacketPtr pkt) override { return 0; }
76 AddrRangeList getAddrRanges() const override;
77 System *system();
78
79 private:
80 Shader *_shader;
81 GPUDispatcher &dispatcher;
82
83 void initABI(HSAQueueEntry *task);
84
85 /**
86 * Perform a DMA read of the read_dispatch_id_field_base_byte_offset
87 * field, which follows directly after the read_dispatch_id (the read
88 * pointer) in the amd_hsa_queue_t struct (aka memory queue descriptor
89 * (MQD)), to find the base address of the MQD. The MQD is the runtime's
90 * soft representation of a HW queue descriptor (HQD).
91 *
92 * Any fields below the read dispatch ID in the amd_hsa_queue_t should
93 * not change according to the HSA standard, therefore we should be able
94 * to get them based on their known relative position to the read dispatch
95 * ID.
96 */
97 class ReadDispIdOffsetDmaEvent : public DmaCallback
98 {
99 public:
100 ReadDispIdOffsetDmaEvent(GPUCommandProcessor &gpu_cmd_proc,
101 HSAQueueEntry *task)
102 : DmaCallback(), readDispIdOffset(0), gpuCmdProc(gpu_cmd_proc),
103 _task(task)
104 {
105 }
106
107 void
108 process() override
109 {
110 /**
111 * Now that the read pointer's offset from the base of
112 * the MQD is known, we can use that to calculate the
113 * the address of the MQD itself, the dispatcher will
114 * DMA that into the HSAQueueEntry when a kernel is
115 * launched.
116 */
117 _task->hostAMDQueueAddr
118 = gpuCmdProc.hsaPP->getQueueDesc(_task->queueId())
119 ->hostReadIndexPtr - readDispIdOffset;
120
121 /**
122 * DMA a copy of the MQD into the task. Some fields of
123 * the MQD will be used to initialize register state.
124 */
125 auto *mqdDmaEvent = new MQDDmaEvent(gpuCmdProc, _task);
126 gpuCmdProc.dmaReadVirt(_task->hostAMDQueueAddr,
127 sizeof(_amd_queue_t), mqdDmaEvent,
128 &_task->amdQueue);
129 }
130
131 uint32_t readDispIdOffset;
132
133 private:
134 GPUCommandProcessor &gpuCmdProc;
135 HSAQueueEntry *_task;
136 };
137
138 /**
139 * Perform a DMA read of the MQD that corresponds to a hardware
140 * queue descriptor (HQD). We store a copy of the MQD in the
141 * HSAQueueEntry object so we can send a copy of it along with
142 * a dispatch packet, which is needed to initialize register
143 * state.
144 */
145 class MQDDmaEvent : public DmaCallback
146 {
147 public:
148 MQDDmaEvent(GPUCommandProcessor &gpu_cmd_proc, HSAQueueEntry *task)
149 : DmaCallback(), gpuCmdProc(gpu_cmd_proc), _task(task)
150 {
151 }
152
153 void
154 process() override
155 {
156 gpuCmdProc.dispatchPkt(_task);
157 }
158
159 private:
160 GPUCommandProcessor &gpuCmdProc;
161 HSAQueueEntry *_task;
162 };
163 };
164
165 #endif // __DEV_HSA_GPU_COMMAND_PROCESSOR_HH__