2 * Copyright (c) 2015-2018 Advanced Micro Devices, Inc.
5 * For use for simulation and test purposes only
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
17 * 3. Neither the name of the copyright holder nor the names of its
18 * contributors may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
33 * Authors: Sooraj Puthoor
37 #include "gpu-compute/gpu_compute_driver.hh"
39 #include "cpu/thread_context.hh"
40 #include "debug/GPUDriver.hh"
41 #include "dev/hsa/hsa_device.hh"
42 #include "dev/hsa/hsa_packet_processor.hh"
43 #include "dev/hsa/kfd_ioctl.h"
44 #include "params/GPUComputeDriver.hh"
45 #include "sim/syscall_emul_buf.hh"
47 GPUComputeDriver::GPUComputeDriver(Params
*p
)
50 DPRINTF(GPUDriver
, "Constructing KFD: device\n");
54 GPUComputeDriver::ioctl(ThreadContext
*tc
, unsigned req
, Addr ioc_buf
)
56 auto &virt_proxy
= tc
->getVirtProxy();
59 case AMDKFD_IOC_GET_VERSION
:
61 DPRINTF(GPUDriver
, "ioctl: AMDKFD_IOC_GET_VERSION\n");
63 TypedBufferArg
<kfd_ioctl_get_version_args
> args(ioc_buf
);
64 args
->major_version
= 1;
65 args
->minor_version
= 0;
67 args
.copyOut(virt_proxy
);
70 case AMDKFD_IOC_CREATE_QUEUE
:
72 DPRINTF(GPUDriver
, "ioctl: AMDKFD_IOC_CREATE_QUEUE\n");
74 allocateQueue(virt_proxy
, ioc_buf
);
76 DPRINTF(GPUDriver
, "Creating queue %d\n", queueId
);
79 case AMDKFD_IOC_DESTROY_QUEUE
:
81 TypedBufferArg
<kfd_ioctl_destroy_queue_args
> args(ioc_buf
);
82 args
.copyIn(virt_proxy
);
83 DPRINTF(GPUDriver
, "ioctl: AMDKFD_IOC_DESTROY_QUEUE;" \
84 "queue offset %d\n", args
->queue_id
);
85 device
->hsaPacketProc().unsetDeviceQueueDesc(args
->queue_id
);
88 case AMDKFD_IOC_SET_MEMORY_POLICY
:
90 warn("unimplemented ioctl: AMDKFD_IOC_SET_MEMORY_POLICY\n");
93 case AMDKFD_IOC_GET_CLOCK_COUNTERS
:
95 DPRINTF(GPUDriver
, "ioctl: AMDKFD_IOC_GET_CLOCK_COUNTERS\n");
97 TypedBufferArg
<kfd_ioctl_get_clock_counters_args
> args(ioc_buf
);
98 args
.copyIn(virt_proxy
);
100 // Set nanosecond resolution
101 args
->system_clock_freq
= 1000000000;
104 * Derive all clock counters based on the tick. All
105 * device clocks are identical and perfectly in sync.
107 uint64_t elapsed_nsec
= curTick() / SimClock::Int::ns
;
108 args
->gpu_clock_counter
= elapsed_nsec
;
109 args
->cpu_clock_counter
= elapsed_nsec
;
110 args
->system_clock_counter
= elapsed_nsec
;
112 args
.copyOut(virt_proxy
);
115 case AMDKFD_IOC_GET_PROCESS_APERTURES
:
117 DPRINTF(GPUDriver
, "ioctl: AMDKFD_IOC_GET_PROCESS_APERTURES\n");
119 TypedBufferArg
<kfd_ioctl_get_process_apertures_args
> args(ioc_buf
);
120 args
->num_of_nodes
= 1;
123 * Set the GPUVM/LDS/Scratch APEs exactly as they
124 * are in the real driver, see the KFD driver
125 * in the ROCm Linux kernel source:
126 * drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
128 for (int i
= 0; i
< args
->num_of_nodes
; ++i
) {
130 * While the GPU node numbers start at 0, we add 1
131 * to force the count to start at 1. This is to
132 * ensure that the base/limit addresses are
133 * calculated correctly.
135 args
->process_apertures
[i
].scratch_base
136 = scratchApeBase(i
+ 1);
137 args
->process_apertures
[i
].scratch_limit
=
138 scratchApeLimit(args
->process_apertures
[i
].scratch_base
);
140 args
->process_apertures
[i
].lds_base
= ldsApeBase(i
+ 1);
141 args
->process_apertures
[i
].lds_limit
=
142 ldsApeLimit(args
->process_apertures
[i
].lds_base
);
144 args
->process_apertures
[i
].gpuvm_base
= gpuVmApeBase(i
+ 1);
145 args
->process_apertures
[i
].gpuvm_limit
=
146 gpuVmApeLimit(args
->process_apertures
[i
].gpuvm_base
);
148 // NOTE: Must match ID populated by hsaTopology.py
149 args
->process_apertures
[i
].gpu_id
= 2765;
151 DPRINTF(GPUDriver
, "GPUVM base for node[%i] = %#x\n", i
,
152 args
->process_apertures
[i
].gpuvm_base
);
153 DPRINTF(GPUDriver
, "GPUVM limit for node[%i] = %#x\n", i
,
154 args
->process_apertures
[i
].gpuvm_limit
);
156 DPRINTF(GPUDriver
, "LDS base for node[%i] = %#x\n", i
,
157 args
->process_apertures
[i
].lds_base
);
158 DPRINTF(GPUDriver
, "LDS limit for node[%i] = %#x\n", i
,
159 args
->process_apertures
[i
].lds_limit
);
161 DPRINTF(GPUDriver
, "Scratch base for node[%i] = %#x\n", i
,
162 args
->process_apertures
[i
].scratch_base
);
163 DPRINTF(GPUDriver
, "Scratch limit for node[%i] = %#x\n", i
,
164 args
->process_apertures
[i
].scratch_limit
);
167 * The CPU's 64b address space can only use the
168 * areas with VA[63:47] == 0x1ffff or VA[63:47] == 0,
169 * therefore we must ensure that the apertures do not
170 * fall in the CPU's address space.
172 assert(bits
<Addr
>(args
->process_apertures
[i
].scratch_base
, 63,
174 assert(bits
<Addr
>(args
->process_apertures
[i
].scratch_base
, 63,
176 assert(bits
<Addr
>(args
->process_apertures
[i
].scratch_limit
, 63,
178 assert(bits
<Addr
>(args
->process_apertures
[i
].scratch_limit
, 63,
180 assert(bits
<Addr
>(args
->process_apertures
[i
].lds_base
, 63,
182 assert(bits
<Addr
>(args
->process_apertures
[i
].lds_base
, 63,
184 assert(bits
<Addr
>(args
->process_apertures
[i
].lds_limit
, 63,
186 assert(bits
<Addr
>(args
->process_apertures
[i
].lds_limit
, 63,
188 assert(bits
<Addr
>(args
->process_apertures
[i
].gpuvm_base
, 63,
190 assert(bits
<Addr
>(args
->process_apertures
[i
].gpuvm_base
, 63,
192 assert(bits
<Addr
>(args
->process_apertures
[i
].gpuvm_limit
, 63,
194 assert(bits
<Addr
>(args
->process_apertures
[i
].gpuvm_limit
, 63,
198 args
.copyOut(virt_proxy
);
201 case AMDKFD_IOC_UPDATE_QUEUE
:
203 warn("unimplemented ioctl: AMDKFD_IOC_UPDATE_QUEUE\n");
206 case AMDKFD_IOC_CREATE_EVENT
:
208 warn("unimplemented ioctl: AMDKFD_IOC_CREATE_EVENT\n");
211 case AMDKFD_IOC_DESTROY_EVENT
:
213 warn("unimplemented ioctl: AMDKFD_IOC_DESTROY_EVENT\n");
216 case AMDKFD_IOC_SET_EVENT
:
218 warn("unimplemented ioctl: AMDKFD_IOC_SET_EVENT\n");
221 case AMDKFD_IOC_RESET_EVENT
:
223 warn("unimplemented ioctl: AMDKFD_IOC_RESET_EVENT\n");
226 case AMDKFD_IOC_WAIT_EVENTS
:
228 warn("unimplemented ioctl: AMDKFD_IOC_WAIT_EVENTS\n");
231 case AMDKFD_IOC_DBG_REGISTER
:
233 warn("unimplemented ioctl: AMDKFD_IOC_DBG_REGISTER\n");
236 case AMDKFD_IOC_DBG_UNREGISTER
:
238 warn("unimplemented ioctl: AMDKFD_IOC_DBG_UNREGISTER\n");
241 case AMDKFD_IOC_DBG_ADDRESS_WATCH
:
243 warn("unimplemented ioctl: AMDKFD_IOC_DBG_ADDRESS_WATCH\n");
246 case AMDKFD_IOC_DBG_WAVE_CONTROL
:
248 warn("unimplemented ioctl: AMDKFD_IOC_DBG_WAVE_CONTROL\n");
251 case AMDKFD_IOC_ALLOC_MEMORY_OF_GPU
:
253 warn("unimplemented ioctl: AMDKFD_IOC_ALLOC_MEMORY_OF_GPU\n");
256 case AMDKFD_IOC_FREE_MEMORY_OF_GPU
:
258 warn("unimplemented ioctl: AMDKFD_IOC_FREE_MEMORY_OF_GPU\n");
261 case AMDKFD_IOC_MAP_MEMORY_TO_GPU
:
263 warn("unimplemented ioctl: AMDKFD_IOC_MAP_MEMORY_TO_GPU\n");
266 case AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU
:
268 warn("unimplemented ioctl: AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU\n");
270 case AMDKFD_IOC_ALLOC_MEMORY_OF_SCRATCH
:
272 warn("unimplemented ioctl: AMDKFD_IOC_ALLOC_MEMORY_OF_SCRATCH\n");
275 case AMDKFD_IOC_SET_CU_MASK
:
277 warn("unimplemented ioctl: AMDKFD_IOC_SET_CU_MASK\n");
280 case AMDKFD_IOC_SET_PROCESS_DGPU_APERTURE
:
282 warn("unimplemented ioctl: AMDKFD_IOC_SET_PROCESS_DGPU_APERTURE"
286 case AMDKFD_IOC_SET_TRAP_HANDLER
:
288 warn("unimplemented ioctl: AMDKFD_IOC_SET_TRAP_HANDLER\n");
291 case AMDKFD_IOC_GET_PROCESS_APERTURES_NEW
:
294 "ioctl: AMDKFD_IOC_GET_PROCESS_APERTURES_NEW\n");
296 TypedBufferArg
<kfd_ioctl_get_process_apertures_new_args
>
299 ioc_args
.copyIn(virt_proxy
);
300 ioc_args
->num_of_nodes
= 1;
302 for (int i
= 0; i
< ioc_args
->num_of_nodes
; ++i
) {
303 TypedBufferArg
<kfd_process_device_apertures
> ape_args
304 (ioc_args
->kfd_process_device_apertures_ptr
);
306 ape_args
->scratch_base
= scratchApeBase(i
+ 1);
307 ape_args
->scratch_limit
=
308 scratchApeLimit(ape_args
->scratch_base
);
309 ape_args
->lds_base
= ldsApeBase(i
+ 1);
310 ape_args
->lds_limit
= ldsApeLimit(ape_args
->lds_base
);
311 ape_args
->gpuvm_base
= gpuVmApeBase(i
+ 1);
312 ape_args
->gpuvm_limit
= gpuVmApeLimit(ape_args
->gpuvm_base
);
314 ape_args
->gpu_id
= 2765;
316 assert(bits
<Addr
>(ape_args
->scratch_base
, 63, 47) != 0x1ffff);
317 assert(bits
<Addr
>(ape_args
->scratch_base
, 63, 47) != 0);
318 assert(bits
<Addr
>(ape_args
->scratch_limit
, 63, 47) != 0x1ffff);
319 assert(bits
<Addr
>(ape_args
->scratch_limit
, 63, 47) != 0);
320 assert(bits
<Addr
>(ape_args
->lds_base
, 63, 47) != 0x1ffff);
321 assert(bits
<Addr
>(ape_args
->lds_base
, 63, 47) != 0);
322 assert(bits
<Addr
>(ape_args
->lds_limit
, 63, 47) != 0x1ffff);
323 assert(bits
<Addr
>(ape_args
->lds_limit
, 63, 47) != 0);
324 assert(bits
<Addr
>(ape_args
->gpuvm_base
, 63, 47) != 0x1ffff);
325 assert(bits
<Addr
>(ape_args
->gpuvm_base
, 63, 47) != 0);
326 assert(bits
<Addr
>(ape_args
->gpuvm_limit
, 63, 47) != 0x1ffff);
327 assert(bits
<Addr
>(ape_args
->gpuvm_limit
, 63, 47) != 0);
329 ape_args
.copyOut(virt_proxy
);
332 ioc_args
.copyOut(virt_proxy
);
335 case AMDKFD_IOC_GET_DMABUF_INFO
:
337 warn("unimplemented ioctl: AMDKFD_IOC_GET_DMABUF_INFO\n");
340 case AMDKFD_IOC_IMPORT_DMABUF
:
342 warn("unimplemented ioctl: AMDKFD_IOC_IMPORT_DMABUF\n");
345 case AMDKFD_IOC_GET_TILE_CONFIG
:
347 warn("unimplemented ioctl: AMDKFD_IOC_GET_TILE_CONFIG\n");
350 case AMDKFD_IOC_IPC_IMPORT_HANDLE
:
352 warn("unimplemented ioctl: AMDKFD_IOC_IPC_IMPORT_HANDLE\n");
355 case AMDKFD_IOC_IPC_EXPORT_HANDLE
:
357 warn("unimplemented ioctl: AMDKFD_IOC_IPC_EXPORT_HANDLE\n");
360 case AMDKFD_IOC_CROSS_MEMORY_COPY
:
362 warn("unimplemented ioctl: AMDKFD_IOC_CROSS_MEMORY_COPY\n");
365 case AMDKFD_IOC_OPEN_GRAPHIC_HANDLE
:
367 warn("unimplemented ioctl: AMDKFD_IOC_OPEN_GRAPHIC_HANDLE\n");
371 fatal("%s: bad ioctl %d\n", req
);
378 GPUComputeDriver::gpuVmApeBase(int gpuNum
) const
380 return ((Addr
)gpuNum
<< 61) + 0x1000000000000L
;
384 GPUComputeDriver::gpuVmApeLimit(Addr apeBase
) const
386 return (apeBase
& 0xFFFFFF0000000000UL
) | 0xFFFFFFFFFFL
;
390 GPUComputeDriver::scratchApeBase(int gpuNum
) const
392 return ((Addr
)gpuNum
<< 61) + 0x100000000L
;
396 GPUComputeDriver::scratchApeLimit(Addr apeBase
) const
398 return (apeBase
& 0xFFFFFFFF00000000UL
) | 0xFFFFFFFF;
402 GPUComputeDriver::ldsApeBase(int gpuNum
) const
404 return ((Addr
)gpuNum
<< 61) + 0x0;
408 GPUComputeDriver::ldsApeLimit(Addr apeBase
) const
410 return (apeBase
& 0xFFFFFFFF00000000UL
) | 0xFFFFFFFF;
414 GPUComputeDriverParams::create()
416 return new GPUComputeDriver(this);