gpu-compute, arch-gcn3: refactor barriers
[gem5.git] / src / gpu-compute / gpu_compute_driver.cc
1 /*
2 * Copyright (c) 2015-2018 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its
18 * contributors may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Authors: Sooraj Puthoor
34 * Anthony Gutierrez
35 */
36
37 #include "gpu-compute/gpu_compute_driver.hh"
38
39 #include "cpu/thread_context.hh"
40 #include "debug/GPUDriver.hh"
41 #include "dev/hsa/hsa_device.hh"
42 #include "dev/hsa/hsa_packet_processor.hh"
43 #include "dev/hsa/kfd_ioctl.h"
44 #include "params/GPUComputeDriver.hh"
45 #include "sim/syscall_emul_buf.hh"
46
47 GPUComputeDriver::GPUComputeDriver(Params *p)
48 : HSADriver(p)
49 {
50 DPRINTF(GPUDriver, "Constructing KFD: device\n");
51 }
52
53 int
54 GPUComputeDriver::ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf)
55 {
56 auto &virt_proxy = tc->getVirtProxy();
57
58 switch (req) {
59 case AMDKFD_IOC_GET_VERSION:
60 {
61 DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_GET_VERSION\n");
62
63 TypedBufferArg<kfd_ioctl_get_version_args> args(ioc_buf);
64 args->major_version = 1;
65 args->minor_version = 0;
66
67 args.copyOut(virt_proxy);
68 }
69 break;
70 case AMDKFD_IOC_CREATE_QUEUE:
71 {
72 DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_CREATE_QUEUE\n");
73
74 allocateQueue(virt_proxy, ioc_buf);
75
76 DPRINTF(GPUDriver, "Creating queue %d\n", queueId);
77 }
78 break;
79 case AMDKFD_IOC_DESTROY_QUEUE:
80 {
81 TypedBufferArg<kfd_ioctl_destroy_queue_args> args(ioc_buf);
82 args.copyIn(virt_proxy);
83 DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_DESTROY_QUEUE;" \
84 "queue offset %d\n", args->queue_id);
85 device->hsaPacketProc().unsetDeviceQueueDesc(args->queue_id);
86 }
87 break;
88 case AMDKFD_IOC_SET_MEMORY_POLICY:
89 {
90 warn("unimplemented ioctl: AMDKFD_IOC_SET_MEMORY_POLICY\n");
91 }
92 break;
93 case AMDKFD_IOC_GET_CLOCK_COUNTERS:
94 {
95 DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_GET_CLOCK_COUNTERS\n");
96
97 TypedBufferArg<kfd_ioctl_get_clock_counters_args> args(ioc_buf);
98 args.copyIn(virt_proxy);
99
100 // Set nanosecond resolution
101 args->system_clock_freq = 1000000000;
102
103 /**
104 * Derive all clock counters based on the tick. All
105 * device clocks are identical and perfectly in sync.
106 */
107 uint64_t elapsed_nsec = curTick() / SimClock::Int::ns;
108 args->gpu_clock_counter = elapsed_nsec;
109 args->cpu_clock_counter = elapsed_nsec;
110 args->system_clock_counter = elapsed_nsec;
111
112 args.copyOut(virt_proxy);
113 }
114 break;
115 case AMDKFD_IOC_GET_PROCESS_APERTURES:
116 {
117 DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_GET_PROCESS_APERTURES\n");
118
119 TypedBufferArg<kfd_ioctl_get_process_apertures_args> args(ioc_buf);
120 args->num_of_nodes = 1;
121
122 /**
123 * Set the GPUVM/LDS/Scratch APEs exactly as they
124 * are in the real driver, see the KFD driver
125 * in the ROCm Linux kernel source:
126 * drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
127 */
128 for (int i = 0; i < args->num_of_nodes; ++i) {
129 /**
130 * While the GPU node numbers start at 0, we add 1
131 * to force the count to start at 1. This is to
132 * ensure that the base/limit addresses are
133 * calculated correctly.
134 */
135 args->process_apertures[i].scratch_base
136 = scratchApeBase(i + 1);
137 args->process_apertures[i].scratch_limit =
138 scratchApeLimit(args->process_apertures[i].scratch_base);
139
140 args->process_apertures[i].lds_base = ldsApeBase(i + 1);
141 args->process_apertures[i].lds_limit =
142 ldsApeLimit(args->process_apertures[i].lds_base);
143
144 args->process_apertures[i].gpuvm_base = gpuVmApeBase(i + 1);
145 args->process_apertures[i].gpuvm_limit =
146 gpuVmApeLimit(args->process_apertures[i].gpuvm_base);
147
148 // NOTE: Must match ID populated by hsaTopology.py
149 args->process_apertures[i].gpu_id = 2765;
150
151 DPRINTF(GPUDriver, "GPUVM base for node[%i] = %#x\n", i,
152 args->process_apertures[i].gpuvm_base);
153 DPRINTF(GPUDriver, "GPUVM limit for node[%i] = %#x\n", i,
154 args->process_apertures[i].gpuvm_limit);
155
156 DPRINTF(GPUDriver, "LDS base for node[%i] = %#x\n", i,
157 args->process_apertures[i].lds_base);
158 DPRINTF(GPUDriver, "LDS limit for node[%i] = %#x\n", i,
159 args->process_apertures[i].lds_limit);
160
161 DPRINTF(GPUDriver, "Scratch base for node[%i] = %#x\n", i,
162 args->process_apertures[i].scratch_base);
163 DPRINTF(GPUDriver, "Scratch limit for node[%i] = %#x\n", i,
164 args->process_apertures[i].scratch_limit);
165
166 /**
167 * The CPU's 64b address space can only use the
168 * areas with VA[63:47] == 0x1ffff or VA[63:47] == 0,
169 * therefore we must ensure that the apertures do not
170 * fall in the CPU's address space.
171 */
172 assert(bits<Addr>(args->process_apertures[i].scratch_base, 63,
173 47) != 0x1ffff);
174 assert(bits<Addr>(args->process_apertures[i].scratch_base, 63,
175 47) != 0);
176 assert(bits<Addr>(args->process_apertures[i].scratch_limit, 63,
177 47) != 0x1ffff);
178 assert(bits<Addr>(args->process_apertures[i].scratch_limit, 63,
179 47) != 0);
180 assert(bits<Addr>(args->process_apertures[i].lds_base, 63,
181 47) != 0x1ffff);
182 assert(bits<Addr>(args->process_apertures[i].lds_base, 63,
183 47) != 0);
184 assert(bits<Addr>(args->process_apertures[i].lds_limit, 63,
185 47) != 0x1ffff);
186 assert(bits<Addr>(args->process_apertures[i].lds_limit, 63,
187 47) != 0);
188 assert(bits<Addr>(args->process_apertures[i].gpuvm_base, 63,
189 47) != 0x1ffff);
190 assert(bits<Addr>(args->process_apertures[i].gpuvm_base, 63,
191 47) != 0);
192 assert(bits<Addr>(args->process_apertures[i].gpuvm_limit, 63,
193 47) != 0x1ffff);
194 assert(bits<Addr>(args->process_apertures[i].gpuvm_limit, 63,
195 47) != 0);
196 }
197
198 args.copyOut(virt_proxy);
199 }
200 break;
201 case AMDKFD_IOC_UPDATE_QUEUE:
202 {
203 warn("unimplemented ioctl: AMDKFD_IOC_UPDATE_QUEUE\n");
204 }
205 break;
206 case AMDKFD_IOC_CREATE_EVENT:
207 {
208 warn("unimplemented ioctl: AMDKFD_IOC_CREATE_EVENT\n");
209 }
210 break;
211 case AMDKFD_IOC_DESTROY_EVENT:
212 {
213 warn("unimplemented ioctl: AMDKFD_IOC_DESTROY_EVENT\n");
214 }
215 break;
216 case AMDKFD_IOC_SET_EVENT:
217 {
218 warn("unimplemented ioctl: AMDKFD_IOC_SET_EVENT\n");
219 }
220 break;
221 case AMDKFD_IOC_RESET_EVENT:
222 {
223 warn("unimplemented ioctl: AMDKFD_IOC_RESET_EVENT\n");
224 }
225 break;
226 case AMDKFD_IOC_WAIT_EVENTS:
227 {
228 warn("unimplemented ioctl: AMDKFD_IOC_WAIT_EVENTS\n");
229 }
230 break;
231 case AMDKFD_IOC_DBG_REGISTER:
232 {
233 warn("unimplemented ioctl: AMDKFD_IOC_DBG_REGISTER\n");
234 }
235 break;
236 case AMDKFD_IOC_DBG_UNREGISTER:
237 {
238 warn("unimplemented ioctl: AMDKFD_IOC_DBG_UNREGISTER\n");
239 }
240 break;
241 case AMDKFD_IOC_DBG_ADDRESS_WATCH:
242 {
243 warn("unimplemented ioctl: AMDKFD_IOC_DBG_ADDRESS_WATCH\n");
244 }
245 break;
246 case AMDKFD_IOC_DBG_WAVE_CONTROL:
247 {
248 warn("unimplemented ioctl: AMDKFD_IOC_DBG_WAVE_CONTROL\n");
249 }
250 break;
251 case AMDKFD_IOC_ALLOC_MEMORY_OF_GPU:
252 {
253 warn("unimplemented ioctl: AMDKFD_IOC_ALLOC_MEMORY_OF_GPU\n");
254 }
255 break;
256 case AMDKFD_IOC_FREE_MEMORY_OF_GPU:
257 {
258 warn("unimplemented ioctl: AMDKFD_IOC_FREE_MEMORY_OF_GPU\n");
259 }
260 break;
261 case AMDKFD_IOC_MAP_MEMORY_TO_GPU:
262 {
263 warn("unimplemented ioctl: AMDKFD_IOC_MAP_MEMORY_TO_GPU\n");
264 }
265 break;
266 case AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU:
267 {
268 warn("unimplemented ioctl: AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU\n");
269 }
270 case AMDKFD_IOC_ALLOC_MEMORY_OF_SCRATCH:
271 {
272 warn("unimplemented ioctl: AMDKFD_IOC_ALLOC_MEMORY_OF_SCRATCH\n");
273 }
274 break;
275 case AMDKFD_IOC_SET_CU_MASK:
276 {
277 warn("unimplemented ioctl: AMDKFD_IOC_SET_CU_MASK\n");
278 }
279 break;
280 case AMDKFD_IOC_SET_PROCESS_DGPU_APERTURE:
281 {
282 warn("unimplemented ioctl: AMDKFD_IOC_SET_PROCESS_DGPU_APERTURE"
283 "\n");
284 }
285 break;
286 case AMDKFD_IOC_SET_TRAP_HANDLER:
287 {
288 warn("unimplemented ioctl: AMDKFD_IOC_SET_TRAP_HANDLER\n");
289 }
290 break;
291 case AMDKFD_IOC_GET_PROCESS_APERTURES_NEW:
292 {
293 DPRINTF(GPUDriver,
294 "ioctl: AMDKFD_IOC_GET_PROCESS_APERTURES_NEW\n");
295
296 TypedBufferArg<kfd_ioctl_get_process_apertures_new_args>
297 ioc_args(ioc_buf);
298
299 ioc_args.copyIn(virt_proxy);
300 ioc_args->num_of_nodes = 1;
301
302 for (int i = 0; i < ioc_args->num_of_nodes; ++i) {
303 TypedBufferArg<kfd_process_device_apertures> ape_args
304 (ioc_args->kfd_process_device_apertures_ptr);
305
306 ape_args->scratch_base = scratchApeBase(i + 1);
307 ape_args->scratch_limit =
308 scratchApeLimit(ape_args->scratch_base);
309 ape_args->lds_base = ldsApeBase(i + 1);
310 ape_args->lds_limit = ldsApeLimit(ape_args->lds_base);
311 ape_args->gpuvm_base = gpuVmApeBase(i + 1);
312 ape_args->gpuvm_limit = gpuVmApeLimit(ape_args->gpuvm_base);
313
314 ape_args->gpu_id = 2765;
315
316 assert(bits<Addr>(ape_args->scratch_base, 63, 47) != 0x1ffff);
317 assert(bits<Addr>(ape_args->scratch_base, 63, 47) != 0);
318 assert(bits<Addr>(ape_args->scratch_limit, 63, 47) != 0x1ffff);
319 assert(bits<Addr>(ape_args->scratch_limit, 63, 47) != 0);
320 assert(bits<Addr>(ape_args->lds_base, 63, 47) != 0x1ffff);
321 assert(bits<Addr>(ape_args->lds_base, 63, 47) != 0);
322 assert(bits<Addr>(ape_args->lds_limit, 63, 47) != 0x1ffff);
323 assert(bits<Addr>(ape_args->lds_limit, 63, 47) != 0);
324 assert(bits<Addr>(ape_args->gpuvm_base, 63, 47) != 0x1ffff);
325 assert(bits<Addr>(ape_args->gpuvm_base, 63, 47) != 0);
326 assert(bits<Addr>(ape_args->gpuvm_limit, 63, 47) != 0x1ffff);
327 assert(bits<Addr>(ape_args->gpuvm_limit, 63, 47) != 0);
328
329 ape_args.copyOut(virt_proxy);
330 }
331
332 ioc_args.copyOut(virt_proxy);
333 }
334 break;
335 case AMDKFD_IOC_GET_DMABUF_INFO:
336 {
337 warn("unimplemented ioctl: AMDKFD_IOC_GET_DMABUF_INFO\n");
338 }
339 break;
340 case AMDKFD_IOC_IMPORT_DMABUF:
341 {
342 warn("unimplemented ioctl: AMDKFD_IOC_IMPORT_DMABUF\n");
343 }
344 break;
345 case AMDKFD_IOC_GET_TILE_CONFIG:
346 {
347 warn("unimplemented ioctl: AMDKFD_IOC_GET_TILE_CONFIG\n");
348 }
349 break;
350 case AMDKFD_IOC_IPC_IMPORT_HANDLE:
351 {
352 warn("unimplemented ioctl: AMDKFD_IOC_IPC_IMPORT_HANDLE\n");
353 }
354 break;
355 case AMDKFD_IOC_IPC_EXPORT_HANDLE:
356 {
357 warn("unimplemented ioctl: AMDKFD_IOC_IPC_EXPORT_HANDLE\n");
358 }
359 break;
360 case AMDKFD_IOC_CROSS_MEMORY_COPY:
361 {
362 warn("unimplemented ioctl: AMDKFD_IOC_CROSS_MEMORY_COPY\n");
363 }
364 break;
365 case AMDKFD_IOC_OPEN_GRAPHIC_HANDLE:
366 {
367 warn("unimplemented ioctl: AMDKFD_IOC_OPEN_GRAPHIC_HANDLE\n");
368 }
369 break;
370 default:
371 fatal("%s: bad ioctl %d\n", req);
372 break;
373 }
374 return 0;
375 }
376
377 Addr
378 GPUComputeDriver::gpuVmApeBase(int gpuNum) const
379 {
380 return ((Addr)gpuNum << 61) + 0x1000000000000L;
381 }
382
383 Addr
384 GPUComputeDriver::gpuVmApeLimit(Addr apeBase) const
385 {
386 return (apeBase & 0xFFFFFF0000000000UL) | 0xFFFFFFFFFFL;
387 }
388
389 Addr
390 GPUComputeDriver::scratchApeBase(int gpuNum) const
391 {
392 return ((Addr)gpuNum << 61) + 0x100000000L;
393 }
394
395 Addr
396 GPUComputeDriver::scratchApeLimit(Addr apeBase) const
397 {
398 return (apeBase & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF;
399 }
400
401 Addr
402 GPUComputeDriver::ldsApeBase(int gpuNum) const
403 {
404 return ((Addr)gpuNum << 61) + 0x0;
405 }
406
407 Addr
408 GPUComputeDriver::ldsApeLimit(Addr apeBase) const
409 {
410 return (apeBase & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF;
411 }
412
413 GPUComputeDriver*
414 GPUComputeDriverParams::create()
415 {
416 return new GPUComputeDriver(this);
417 }