gallium: add PIPE_CAP_RESOURCE_FROM_USER_MEMORY_COMPUTE_ONLY
[mesa.git] / src / gallium / frontends / clover / core / device.cpp
1 //
2 // Copyright 2012 Francisco Jerez
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
21 //
22
23 #include <algorithm>
24 #include <unistd.h>
25 #include "core/device.hpp"
26 #include "core/platform.hpp"
27 #include "pipe/p_screen.h"
28 #include "pipe/p_state.h"
29 #include "util/bitscan.h"
30 #include "util/u_debug.h"
31
32 using namespace clover;
33
34 namespace {
35 template<typename T>
36 std::vector<T>
37 get_compute_param(pipe_screen *pipe, pipe_shader_ir ir_format,
38 pipe_compute_cap cap) {
39 int sz = pipe->get_compute_param(pipe, ir_format, cap, NULL);
40 std::vector<T> v(sz / sizeof(T));
41
42 pipe->get_compute_param(pipe, ir_format, cap, &v.front());
43 return v;
44 }
45 }
46
47 device::device(clover::platform &platform, pipe_loader_device *ldev) :
48 platform(platform), ldev(ldev) {
49 pipe = pipe_loader_create_screen(ldev);
50 if (pipe && pipe->get_param(pipe, PIPE_CAP_COMPUTE)) {
51 if (supports_ir(PIPE_SHADER_IR_NATIVE))
52 return;
53 #ifdef HAVE_CLOVER_SPIRV
54 if (supports_ir(PIPE_SHADER_IR_NIR_SERIALIZED))
55 return;
56 #endif
57 }
58 if (pipe)
59 pipe->destroy(pipe);
60 throw error(CL_INVALID_DEVICE);
61 }
62
63 device::~device() {
64 if (pipe)
65 pipe->destroy(pipe);
66 if (ldev)
67 pipe_loader_release(&ldev, 1);
68 }
69
70 bool
71 device::operator==(const device &dev) const {
72 return this == &dev;
73 }
74
75 cl_device_type
76 device::type() const {
77 switch (ldev->type) {
78 case PIPE_LOADER_DEVICE_SOFTWARE:
79 return CL_DEVICE_TYPE_CPU;
80 case PIPE_LOADER_DEVICE_PCI:
81 case PIPE_LOADER_DEVICE_PLATFORM:
82 return CL_DEVICE_TYPE_GPU;
83 default:
84 unreachable("Unknown device type.");
85 }
86 }
87
88 cl_uint
89 device::vendor_id() const {
90 switch (ldev->type) {
91 case PIPE_LOADER_DEVICE_SOFTWARE:
92 case PIPE_LOADER_DEVICE_PLATFORM:
93 return 0;
94 case PIPE_LOADER_DEVICE_PCI:
95 return ldev->u.pci.vendor_id;
96 default:
97 unreachable("Unknown device type.");
98 }
99 }
100
101 size_t
102 device::max_images_read() const {
103 return PIPE_MAX_SHADER_IMAGES;
104 }
105
106 size_t
107 device::max_images_write() const {
108 return PIPE_MAX_SHADER_IMAGES;
109 }
110
111 size_t
112 device::max_image_buffer_size() const {
113 return pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE);
114 }
115
116 cl_uint
117 device::max_image_levels_2d() const {
118 return util_last_bit(pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_2D_SIZE));
119 }
120
121 cl_uint
122 device::max_image_levels_3d() const {
123 return pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_3D_LEVELS);
124 }
125
126 size_t
127 device::max_image_array_number() const {
128 return pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS);
129 }
130
131 cl_uint
132 device::max_samplers() const {
133 return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
134 PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS);
135 }
136
137 cl_ulong
138 device::max_mem_global() const {
139 return get_compute_param<uint64_t>(pipe, ir_format(),
140 PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE)[0];
141 }
142
143 cl_ulong
144 device::max_mem_local() const {
145 return get_compute_param<uint64_t>(pipe, ir_format(),
146 PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE)[0];
147 }
148
149 cl_ulong
150 device::max_mem_input() const {
151 return get_compute_param<uint64_t>(pipe, ir_format(),
152 PIPE_COMPUTE_CAP_MAX_INPUT_SIZE)[0];
153 }
154
155 cl_ulong
156 device::max_const_buffer_size() const {
157 return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
158 PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE);
159 }
160
161 cl_uint
162 device::max_const_buffers() const {
163 return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
164 PIPE_SHADER_CAP_MAX_CONST_BUFFERS);
165 }
166
167 size_t
168 device::max_threads_per_block() const {
169 return get_compute_param<uint64_t>(
170 pipe, ir_format(), PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK)[0];
171 }
172
173 cl_ulong
174 device::max_mem_alloc_size() const {
175 return get_compute_param<uint64_t>(pipe, ir_format(),
176 PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE)[0];
177 }
178
179 cl_uint
180 device::max_clock_frequency() const {
181 return get_compute_param<uint32_t>(pipe, ir_format(),
182 PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY)[0];
183 }
184
185 cl_uint
186 device::max_compute_units() const {
187 return get_compute_param<uint32_t>(pipe, ir_format(),
188 PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS)[0];
189 }
190
191 bool
192 device::image_support() const {
193 return get_compute_param<uint32_t>(pipe, ir_format(),
194 PIPE_COMPUTE_CAP_IMAGES_SUPPORTED)[0];
195 }
196
197 bool
198 device::has_doubles() const {
199 return pipe->get_param(pipe, PIPE_CAP_DOUBLES);
200 }
201
202 bool
203 device::has_halves() const {
204 return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
205 PIPE_SHADER_CAP_FP16);
206 }
207
208 bool
209 device::has_int64_atomics() const {
210 return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
211 PIPE_SHADER_CAP_INT64_ATOMICS);
212 }
213
214 bool
215 device::has_unified_memory() const {
216 return pipe->get_param(pipe, PIPE_CAP_UMA);
217 }
218
219 size_t
220 device::mem_base_addr_align() const {
221 return std::max((size_t)sysconf(_SC_PAGESIZE), sizeof(cl_long) * 16);
222 }
223
224 cl_device_svm_capabilities
225 device::svm_support() const {
226 // Without CAP_RESOURCE_FROM_USER_MEMORY SVM and CL_MEM_USE_HOST_PTR
227 // interactions won't work according to spec as clover manages a GPU side
228 // copy of the host data.
229 //
230 // The biggest problem are memory buffers created with CL_MEM_USE_HOST_PTR,
231 // but the application and/or the kernel updates the memory via SVM and not
232 // the cl_mem buffer.
233 // We can't even do proper tracking on what memory might have been accessed
234 // as the host ptr to the buffer could be within a SVM region, where through
235 // the CL API there is no reliable way of knowing if a certain cl_mem buffer
236 // was accessed by a kernel or not and the runtime can't reliably know from
237 // which side the GPU buffer content needs to be updated.
238 //
239 // Another unsolvable scenario is a cl_mem object passed by cl_mem reference
240 // and SVM pointer into the same kernel at the same time.
241 if (allows_user_pointers() && pipe->get_param(pipe, PIPE_CAP_SYSTEM_SVM))
242 // we can emulate all lower levels if we support fine grain system
243 return CL_DEVICE_SVM_FINE_GRAIN_SYSTEM |
244 CL_DEVICE_SVM_COARSE_GRAIN_BUFFER |
245 CL_DEVICE_SVM_FINE_GRAIN_BUFFER;
246 return 0;
247 }
248
249 bool
250 device::allows_user_pointers() const {
251 return pipe->get_param(pipe, PIPE_CAP_RESOURCE_FROM_USER_MEMORY) ||
252 pipe->get_param(pipe, PIPE_CAP_RESOURCE_FROM_USER_MEMORY_COMPUTE_ONLY);
253 }
254
255 std::vector<size_t>
256 device::max_block_size() const {
257 auto v = get_compute_param<uint64_t>(pipe, ir_format(),
258 PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
259 return { v.begin(), v.end() };
260 }
261
262 cl_uint
263 device::subgroup_size() const {
264 return get_compute_param<uint32_t>(pipe, ir_format(),
265 PIPE_COMPUTE_CAP_SUBGROUP_SIZE)[0];
266 }
267
268 cl_uint
269 device::address_bits() const {
270 return get_compute_param<uint32_t>(pipe, ir_format(),
271 PIPE_COMPUTE_CAP_ADDRESS_BITS)[0];
272 }
273
274 std::string
275 device::device_name() const {
276 return pipe->get_name(pipe);
277 }
278
279 std::string
280 device::vendor_name() const {
281 return pipe->get_device_vendor(pipe);
282 }
283
284 enum pipe_shader_ir
285 device::ir_format() const {
286 if (supports_ir(PIPE_SHADER_IR_NATIVE))
287 return PIPE_SHADER_IR_NATIVE;
288
289 assert(supports_ir(PIPE_SHADER_IR_NIR_SERIALIZED));
290 return PIPE_SHADER_IR_NIR_SERIALIZED;
291 }
292
293 std::string
294 device::ir_target() const {
295 std::vector<char> target = get_compute_param<char>(
296 pipe, ir_format(), PIPE_COMPUTE_CAP_IR_TARGET);
297 return { target.data() };
298 }
299
300 enum pipe_endian
301 device::endianness() const {
302 return (enum pipe_endian)pipe->get_param(pipe, PIPE_CAP_ENDIANNESS);
303 }
304
305 std::string
306 device::device_version() const {
307 static const std::string device_version =
308 debug_get_option("CLOVER_DEVICE_VERSION_OVERRIDE", "1.1");
309 return device_version;
310 }
311
312 std::string
313 device::device_clc_version() const {
314 static const std::string device_clc_version =
315 debug_get_option("CLOVER_DEVICE_CLC_VERSION_OVERRIDE", "1.1");
316 return device_clc_version;
317 }
318
319 bool
320 device::supports_ir(enum pipe_shader_ir ir) const {
321 return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
322 PIPE_SHADER_CAP_SUPPORTED_IRS) & (1 << ir);
323 }
324
325 std::string
326 device::supported_extensions() const {
327 return
328 "cl_khr_byte_addressable_store"
329 " cl_khr_global_int32_base_atomics"
330 " cl_khr_global_int32_extended_atomics"
331 " cl_khr_local_int32_base_atomics"
332 " cl_khr_local_int32_extended_atomics"
333 + std::string(has_int64_atomics() ? " cl_khr_int64_base_atomics" : "")
334 + std::string(has_int64_atomics() ? " cl_khr_int64_extended_atomics" : "")
335 + std::string(has_doubles() ? " cl_khr_fp64" : "")
336 + std::string(has_halves() ? " cl_khr_fp16" : "")
337 + std::string(svm_support() ? " cl_arm_shared_virtual_memory" : "");
338 }
339
340 const void *
341 device::get_compiler_options(enum pipe_shader_ir ir) const {
342 return pipe->get_compiler_options(pipe, ir, PIPE_SHADER_COMPUTE);
343 }