2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
5 * For use for simulation and test purposes only
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
33 * Author: Anthony Gutierrez
36 #include "gpu-compute/cl_driver.hh"
40 #include "base/intmath.hh"
41 #include "cpu/thread_context.hh"
42 #include "gpu-compute/dispatcher.hh"
43 #include "gpu-compute/hsa_code.hh"
44 #include "gpu-compute/hsa_kernel_info.hh"
45 #include "gpu-compute/hsa_object.hh"
46 #include "params/ClDriver.hh"
47 #include "sim/process.hh"
48 #include "sim/syscall_emul_buf.hh"
50 ClDriver::ClDriver(ClDriverParams
*p
)
51 : EmulatedDriver(p
), hsaCode(0)
53 for (const auto &codeFile
: p
->codefile
)
54 codeFiles
.push_back(&codeFile
);
58 for (int i
= 0; i
< codeFiles
.size(); ++i
) {
59 HsaObject
*obj
= HsaObject::createHsaObject(*codeFiles
[i
]);
61 for (int k
= 0; k
< obj
->numKernels(); ++k
) {
62 assert(obj
->getKernel(k
));
63 kernels
.push_back(obj
->getKernel(k
));
64 kernels
.back()->setReadonlyData((uint8_t*)obj
->readonlyData
);
65 int kern_funcargs_size
= kernels
.back()->funcarg_size
;
66 maxFuncArgsSize
= maxFuncArgsSize
< kern_funcargs_size
?
67 kern_funcargs_size
: maxFuncArgsSize
;
74 for (int i
= 0; i
< kernels
.size(); ++i
) {
75 kernelInfo
.push_back(HsaKernelInfo());
76 HsaCode
*k
= kernels
[i
];
78 k
->generateHsaKernelInfo(&kernelInfo
[i
]);
80 kernelInfo
[i
].name_offs
= name_offs
;
81 kernelInfo
[i
].code_offs
= code_offs
;
83 name_offs
+= k
->name().size() + 1;
84 code_offs
+= k
->numInsts() * sizeof(TheGpuISA::RawMachInst
);
89 ClDriver::handshake(GpuDispatcher
*_dispatcher
)
91 dispatcher
= _dispatcher
;
92 dispatcher
->setFuncargsSize(maxFuncArgsSize
);
96 ClDriver::open(Process
*p
, ThreadContext
*tc
, int mode
, int flags
)
98 std::shared_ptr
<DeviceFDEntry
> fdp
;
99 fdp
= std::make_shared
<DeviceFDEntry
>(this, filename
);
100 int tgt_fd
= p
->fds
->allocFD(fdp
);
105 ClDriver::ioctl(Process
*process
, ThreadContext
*tc
, unsigned req
)
108 Addr buf_addr
= process
->getSyscallArg(tc
, index
);
113 TypedBufferArg
<HsaDriverSizes
> sizes(buf_addr
);
114 sizes
->num_kernels
= kernels
.size();
115 sizes
->string_table_size
= 0;
116 sizes
->code_size
= 0;
117 sizes
->readonly_size
= 0;
119 if (kernels
.size() > 0) {
120 // all kernels will share the same read-only memory
121 sizes
->readonly_size
=
122 kernels
[0]->getSize(HsaCode::MemorySegment::READONLY
);
123 // check our assumption
124 for (int i
= 1; i
<kernels
.size(); ++i
) {
125 assert(sizes
->readonly_size
==
126 kernels
[i
]->getSize(HsaCode::MemorySegment::READONLY
));
130 for (int i
= 0; i
< kernels
.size(); ++i
) {
131 HsaCode
*k
= kernels
[i
];
132 // add one for terminating '\0'
133 sizes
->string_table_size
+= k
->name().size() + 1;
135 k
->numInsts() * sizeof(TheGpuISA::RawMachInst
);
138 sizes
.copyOut(tc
->getMemProxy());
144 TypedBufferArg
<HsaKernelInfo
>
145 kinfo(buf_addr
, sizeof(HsaKernelInfo
) * kernels
.size());
147 for (int i
= 0; i
< kernels
.size(); ++i
) {
148 HsaKernelInfo
*ki
= &kinfo
[i
];
149 ki
->name_offs
= kernelInfo
[i
].name_offs
;
150 ki
->code_offs
= kernelInfo
[i
].code_offs
;
151 ki
->sRegCount
= kernelInfo
[i
].sRegCount
;
152 ki
->dRegCount
= kernelInfo
[i
].dRegCount
;
153 ki
->cRegCount
= kernelInfo
[i
].cRegCount
;
154 ki
->static_lds_size
= kernelInfo
[i
].static_lds_size
;
155 ki
->private_mem_size
= kernelInfo
[i
].private_mem_size
;
156 ki
->spill_mem_size
= kernelInfo
[i
].spill_mem_size
;
159 kinfo
.copyOut(tc
->getMemProxy());
163 case HSA_GET_STRINGS
:
165 int string_table_size
= 0;
166 for (int i
= 0; i
< kernels
.size(); ++i
) {
167 HsaCode
*k
= kernels
[i
];
168 string_table_size
+= k
->name().size() + 1;
171 BufferArg
buf(buf_addr
, string_table_size
);
172 char *bufp
= (char*)buf
.bufferPtr();
174 for (int i
= 0; i
< kernels
.size(); ++i
) {
175 HsaCode
*k
= kernels
[i
];
176 const char *n
= k
->name().c_str();
178 // idiomatic string copy
179 while ((*bufp
++ = *n
++));
182 assert(bufp
- (char *)buf
.bufferPtr() == string_table_size
);
184 buf
.copyOut(tc
->getMemProxy());
188 case HSA_GET_READONLY_DATA
:
190 // we can pick any kernel --- they share the same
191 // readonly segment (this assumption is checked in GET_SIZES)
193 kernels
.back()->getSize(HsaCode::MemorySegment::READONLY
);
194 BufferArg
data(buf_addr
, size
);
195 char *datap
= (char *)data
.bufferPtr();
197 kernels
.back()->readonly_data
,
199 data
.copyOut(tc
->getMemProxy());
205 // set hsaCode pointer
209 for (int i
= 0; i
< kernels
.size(); ++i
) {
210 HsaCode
*k
= kernels
[i
];
211 code_size
+= k
->numInsts() * sizeof(TheGpuISA::RawMachInst
);
214 TypedBufferArg
<TheGpuISA::RawMachInst
> buf(buf_addr
, code_size
);
215 TheGpuISA::RawMachInst
*bufp
= buf
;
219 for (int i
= 0; i
< kernels
.size(); ++i
) {
220 HsaCode
*k
= kernels
[i
];
222 for (int j
= 0; j
< k
->numInsts(); ++j
) {
223 bufp
[buf_idx
] = k
->insts()->at(j
);
228 buf
.copyOut(tc
->getMemProxy());
234 BufferArg
buf(buf_addr
, sizeof(uint32_t));
235 *((uint32_t*)buf
.bufferPtr()) = dispatcher
->getNumCUs();
236 buf
.copyOut(tc
->getMemProxy());
242 BufferArg
buf(buf_addr
, sizeof(uint32_t));
243 *((uint32_t*)buf
.bufferPtr()) = dispatcher
->wfSize();
244 buf
.copyOut(tc
->getMemProxy());
247 case HSA_GET_HW_STATIC_CONTEXT_SIZE
:
249 BufferArg
buf(buf_addr
, sizeof(uint32_t));
250 *((uint32_t*)buf
.bufferPtr()) = dispatcher
->getStaticContextSize();
251 buf
.copyOut(tc
->getMemProxy());
256 fatal("ClDriver: bad ioctl %d\n", req
);
263 ClDriver::codeOffToKernelName(uint64_t code_ptr
)
266 uint32_t code_offs
= code_ptr
- hsaCode
;
268 for (int i
= 0; i
< kernels
.size(); ++i
) {
269 if (code_offs
== kernelInfo
[i
].code_offs
) {
270 return kernels
[i
]->name().c_str();
278 ClDriverParams::create()
280 return new ClDriver(this);