syscall_emul: [patch 10/22] refactor fdentry and add fdarray class
[gem5.git] / src / gpu-compute / cl_driver.cc
1 /*
2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Anthony Gutierrez
34 */
35
36 #include "gpu-compute/cl_driver.hh"
37
38 #include <memory>
39
40 #include "base/intmath.hh"
41 #include "cpu/thread_context.hh"
42 #include "gpu-compute/dispatcher.hh"
43 #include "gpu-compute/hsa_code.hh"
44 #include "gpu-compute/hsa_kernel_info.hh"
45 #include "gpu-compute/hsa_object.hh"
46 #include "params/ClDriver.hh"
47 #include "sim/process.hh"
48 #include "sim/syscall_emul_buf.hh"
49
50 ClDriver::ClDriver(ClDriverParams *p)
51 : EmulatedDriver(p), hsaCode(0)
52 {
53 for (const auto &codeFile : p->codefile)
54 codeFiles.push_back(&codeFile);
55
56 maxFuncArgsSize = 0;
57
58 for (int i = 0; i < codeFiles.size(); ++i) {
59 HsaObject *obj = HsaObject::createHsaObject(*codeFiles[i]);
60
61 for (int k = 0; k < obj->numKernels(); ++k) {
62 assert(obj->getKernel(k));
63 kernels.push_back(obj->getKernel(k));
64 kernels.back()->setReadonlyData((uint8_t*)obj->readonlyData);
65 int kern_funcargs_size = kernels.back()->funcarg_size;
66 maxFuncArgsSize = maxFuncArgsSize < kern_funcargs_size ?
67 kern_funcargs_size : maxFuncArgsSize;
68 }
69 }
70
71 int name_offs = 0;
72 int code_offs = 0;
73
74 for (int i = 0; i < kernels.size(); ++i) {
75 kernelInfo.push_back(HsaKernelInfo());
76 HsaCode *k = kernels[i];
77
78 k->generateHsaKernelInfo(&kernelInfo[i]);
79
80 kernelInfo[i].name_offs = name_offs;
81 kernelInfo[i].code_offs = code_offs;
82
83 name_offs += k->name().size() + 1;
84 code_offs += k->numInsts() * sizeof(TheGpuISA::RawMachInst);
85 }
86 }
87
88 void
89 ClDriver::handshake(GpuDispatcher *_dispatcher)
90 {
91 dispatcher = _dispatcher;
92 dispatcher->setFuncargsSize(maxFuncArgsSize);
93 }
94
95 int
96 ClDriver::open(Process *p, ThreadContext *tc, int mode, int flags)
97 {
98 std::shared_ptr<DeviceFDEntry> fdp;
99 fdp = std::make_shared<DeviceFDEntry>(this, filename);
100 int tgt_fd = p->fds->allocFD(fdp);
101 return tgt_fd;
102 }
103
104 int
105 ClDriver::ioctl(Process *process, ThreadContext *tc, unsigned req)
106 {
107 int index = 2;
108 Addr buf_addr = process->getSyscallArg(tc, index);
109
110 switch (req) {
111 case HSA_GET_SIZES:
112 {
113 TypedBufferArg<HsaDriverSizes> sizes(buf_addr);
114 sizes->num_kernels = kernels.size();
115 sizes->string_table_size = 0;
116 sizes->code_size = 0;
117 sizes->readonly_size = 0;
118
119 if (kernels.size() > 0) {
120 // all kernels will share the same read-only memory
121 sizes->readonly_size =
122 kernels[0]->getSize(HsaCode::MemorySegment::READONLY);
123 // check our assumption
124 for (int i = 1; i<kernels.size(); ++i) {
125 assert(sizes->readonly_size ==
126 kernels[i]->getSize(HsaCode::MemorySegment::READONLY));
127 }
128 }
129
130 for (int i = 0; i < kernels.size(); ++i) {
131 HsaCode *k = kernels[i];
132 // add one for terminating '\0'
133 sizes->string_table_size += k->name().size() + 1;
134 sizes->code_size +=
135 k->numInsts() * sizeof(TheGpuISA::RawMachInst);
136 }
137
138 sizes.copyOut(tc->getMemProxy());
139 }
140 break;
141
142 case HSA_GET_KINFO:
143 {
144 TypedBufferArg<HsaKernelInfo>
145 kinfo(buf_addr, sizeof(HsaKernelInfo) * kernels.size());
146
147 for (int i = 0; i < kernels.size(); ++i) {
148 HsaKernelInfo *ki = &kinfo[i];
149 ki->name_offs = kernelInfo[i].name_offs;
150 ki->code_offs = kernelInfo[i].code_offs;
151 ki->sRegCount = kernelInfo[i].sRegCount;
152 ki->dRegCount = kernelInfo[i].dRegCount;
153 ki->cRegCount = kernelInfo[i].cRegCount;
154 ki->static_lds_size = kernelInfo[i].static_lds_size;
155 ki->private_mem_size = kernelInfo[i].private_mem_size;
156 ki->spill_mem_size = kernelInfo[i].spill_mem_size;
157 }
158
159 kinfo.copyOut(tc->getMemProxy());
160 }
161 break;
162
163 case HSA_GET_STRINGS:
164 {
165 int string_table_size = 0;
166 for (int i = 0; i < kernels.size(); ++i) {
167 HsaCode *k = kernels[i];
168 string_table_size += k->name().size() + 1;
169 }
170
171 BufferArg buf(buf_addr, string_table_size);
172 char *bufp = (char*)buf.bufferPtr();
173
174 for (int i = 0; i < kernels.size(); ++i) {
175 HsaCode *k = kernels[i];
176 const char *n = k->name().c_str();
177
178 // idiomatic string copy
179 while ((*bufp++ = *n++));
180 }
181
182 assert(bufp - (char *)buf.bufferPtr() == string_table_size);
183
184 buf.copyOut(tc->getMemProxy());
185 }
186 break;
187
188 case HSA_GET_READONLY_DATA:
189 {
190 // we can pick any kernel --- they share the same
191 // readonly segment (this assumption is checked in GET_SIZES)
192 uint64_t size =
193 kernels.back()->getSize(HsaCode::MemorySegment::READONLY);
194 BufferArg data(buf_addr, size);
195 char *datap = (char *)data.bufferPtr();
196 memcpy(datap,
197 kernels.back()->readonly_data,
198 size);
199 data.copyOut(tc->getMemProxy());
200 }
201 break;
202
203 case HSA_GET_CODE:
204 {
205 // set hsaCode pointer
206 hsaCode = buf_addr;
207 int code_size = 0;
208
209 for (int i = 0; i < kernels.size(); ++i) {
210 HsaCode *k = kernels[i];
211 code_size += k->numInsts() * sizeof(TheGpuISA::RawMachInst);
212 }
213
214 TypedBufferArg<TheGpuISA::RawMachInst> buf(buf_addr, code_size);
215 TheGpuISA::RawMachInst *bufp = buf;
216
217 int buf_idx = 0;
218
219 for (int i = 0; i < kernels.size(); ++i) {
220 HsaCode *k = kernels[i];
221
222 for (int j = 0; j < k->numInsts(); ++j) {
223 bufp[buf_idx] = k->insts()->at(j);
224 ++buf_idx;
225 }
226 }
227
228 buf.copyOut(tc->getMemProxy());
229 }
230 break;
231
232 case HSA_GET_CU_CNT:
233 {
234 BufferArg buf(buf_addr, sizeof(uint32_t));
235 *((uint32_t*)buf.bufferPtr()) = dispatcher->getNumCUs();
236 buf.copyOut(tc->getMemProxy());
237 }
238 break;
239
240 case HSA_GET_VSZ:
241 {
242 BufferArg buf(buf_addr, sizeof(uint32_t));
243 *((uint32_t*)buf.bufferPtr()) = dispatcher->wfSize();
244 buf.copyOut(tc->getMemProxy());
245 }
246 break;
247 case HSA_GET_HW_STATIC_CONTEXT_SIZE:
248 {
249 BufferArg buf(buf_addr, sizeof(uint32_t));
250 *((uint32_t*)buf.bufferPtr()) = dispatcher->getStaticContextSize();
251 buf.copyOut(tc->getMemProxy());
252 }
253 break;
254
255 default:
256 fatal("ClDriver: bad ioctl %d\n", req);
257 }
258
259 return 0;
260 }
261
262 const char*
263 ClDriver::codeOffToKernelName(uint64_t code_ptr)
264 {
265 assert(hsaCode);
266 uint32_t code_offs = code_ptr - hsaCode;
267
268 for (int i = 0; i < kernels.size(); ++i) {
269 if (code_offs == kernelInfo[i].code_offs) {
270 return kernels[i]->name().c_str();
271 }
272 }
273
274 return nullptr;
275 }
276
277 ClDriver*
278 ClDriverParams::create()
279 {
280 return new ClDriver(this);
281 }