vc4: Only call nir_lower_io on shader_in/out
[mesa.git] / src / gallium / drivers / vc4 / vc4_screen.c
1 /*
2 * Copyright © 2014 Broadcom
3 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #include "util/os_misc.h"
26 #include "pipe/p_defines.h"
27 #include "pipe/p_screen.h"
28 #include "pipe/p_state.h"
29
30 #include "util/u_cpu_detect.h"
31 #include "util/u_debug.h"
32 #include "util/u_memory.h"
33 #include "util/format/u_format.h"
34 #include "util/u_hash_table.h"
35 #include "util/u_screen.h"
36 #include "util/u_transfer_helper.h"
37 #include "util/ralloc.h"
38
39 #include <xf86drm.h>
40 #include "drm-uapi/drm_fourcc.h"
41 #include "drm-uapi/vc4_drm.h"
42 #include "vc4_screen.h"
43 #include "vc4_context.h"
44 #include "vc4_resource.h"
45
46 static const struct debug_named_value debug_options[] = {
47 { "cl", VC4_DEBUG_CL,
48 "Dump command list during creation" },
49 { "surf", VC4_DEBUG_SURFACE,
50 "Dump surface layouts" },
51 { "qpu", VC4_DEBUG_QPU,
52 "Dump generated QPU instructions" },
53 { "qir", VC4_DEBUG_QIR,
54 "Dump QPU IR during program compile" },
55 { "nir", VC4_DEBUG_NIR,
56 "Dump NIR during program compile" },
57 { "tgsi", VC4_DEBUG_TGSI,
58 "Dump TGSI during program compile" },
59 { "shaderdb", VC4_DEBUG_SHADERDB,
60 "Dump program compile information for shader-db analysis" },
61 { "perf", VC4_DEBUG_PERF,
62 "Print during performance-related events" },
63 { "norast", VC4_DEBUG_NORAST,
64 "Skip actual hardware execution of commands" },
65 { "always_flush", VC4_DEBUG_ALWAYS_FLUSH,
66 "Flush after each draw call" },
67 { "always_sync", VC4_DEBUG_ALWAYS_SYNC,
68 "Wait for finish after each flush" },
69 #ifdef USE_VC4_SIMULATOR
70 { "dump", VC4_DEBUG_DUMP,
71 "Write a GPU command stream trace file" },
72 #endif
73 { NULL }
74 };
75
76 DEBUG_GET_ONCE_FLAGS_OPTION(vc4_debug, "VC4_DEBUG", debug_options, 0)
77 uint32_t vc4_debug;
78
79 static const char *
80 vc4_screen_get_name(struct pipe_screen *pscreen)
81 {
82 struct vc4_screen *screen = vc4_screen(pscreen);
83
84 if (!screen->name) {
85 screen->name = ralloc_asprintf(screen,
86 "VC4 V3D %d.%d",
87 screen->v3d_ver / 10,
88 screen->v3d_ver % 10);
89 }
90
91 return screen->name;
92 }
93
94 static const char *
95 vc4_screen_get_vendor(struct pipe_screen *pscreen)
96 {
97 return "Broadcom";
98 }
99
100 static void
101 vc4_screen_destroy(struct pipe_screen *pscreen)
102 {
103 struct vc4_screen *screen = vc4_screen(pscreen);
104
105 _mesa_hash_table_destroy(screen->bo_handles, NULL);
106 vc4_bufmgr_destroy(pscreen);
107 slab_destroy_parent(&screen->transfer_pool);
108 free(screen->ro);
109
110 #ifdef USE_VC4_SIMULATOR
111 vc4_simulator_destroy(screen);
112 #endif
113
114 u_transfer_helper_destroy(pscreen->transfer_helper);
115
116 close(screen->fd);
117 ralloc_free(pscreen);
118 }
119
120 static bool
121 vc4_has_feature(struct vc4_screen *screen, uint32_t feature)
122 {
123 struct drm_vc4_get_param p = {
124 .param = feature,
125 };
126 int ret = vc4_ioctl(screen->fd, DRM_IOCTL_VC4_GET_PARAM, &p);
127
128 if (ret != 0)
129 return false;
130
131 return p.value;
132 }
133
134 static int
135 vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
136 {
137 struct vc4_screen *screen = vc4_screen(pscreen);
138
139 switch (param) {
140 /* Supported features (boolean caps). */
141 case PIPE_CAP_VERTEX_COLOR_CLAMPED:
142 case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
143 case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
144 case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
145 case PIPE_CAP_NPOT_TEXTURES:
146 case PIPE_CAP_SHAREABLE_SHADERS:
147 case PIPE_CAP_BLEND_EQUATION_SEPARATE:
148 case PIPE_CAP_TEXTURE_MULTISAMPLE:
149 case PIPE_CAP_TEXTURE_SWIZZLE:
150 case PIPE_CAP_TEXTURE_BARRIER:
151 case PIPE_CAP_TGSI_TEXCOORD:
152 return 1;
153
154 case PIPE_CAP_NATIVE_FENCE_FD:
155 return screen->has_syncobj;
156
157 case PIPE_CAP_TILE_RASTER_ORDER:
158 return vc4_has_feature(screen,
159 DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER);
160
161 /* lying for GL 2.0 */
162 case PIPE_CAP_OCCLUSION_QUERY:
163 case PIPE_CAP_POINT_SPRITE:
164 return 1;
165
166 case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
167 case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
168 case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
169 return 1;
170
171 case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
172 case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
173 return 1;
174
175 /* Texturing. */
176 case PIPE_CAP_MAX_TEXTURE_2D_SIZE:
177 return 2048;
178 case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
179 return VC4_MAX_MIP_LEVELS;
180 case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
181 /* Note: Not supported in hardware, just faking it. */
182 return 5;
183
184 case PIPE_CAP_MAX_VARYINGS:
185 return 8;
186
187 case PIPE_CAP_VENDOR_ID:
188 return 0x14E4;
189 case PIPE_CAP_ACCELERATED:
190 return 1;
191 case PIPE_CAP_VIDEO_MEMORY: {
192 uint64_t system_memory;
193
194 if (!os_get_total_physical_memory(&system_memory))
195 return 0;
196
197 return (int)(system_memory >> 20);
198 }
199 case PIPE_CAP_UMA:
200 return 1;
201
202 default:
203 return u_pipe_screen_get_param_defaults(pscreen, param);
204 }
205 }
206
207 static float
208 vc4_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
209 {
210 switch (param) {
211 case PIPE_CAPF_MAX_LINE_WIDTH:
212 case PIPE_CAPF_MAX_LINE_WIDTH_AA:
213 return 32;
214
215 case PIPE_CAPF_MAX_POINT_WIDTH:
216 case PIPE_CAPF_MAX_POINT_WIDTH_AA:
217 return 512.0f;
218
219 case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
220 return 0.0f;
221 case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
222 return 0.0f;
223
224 case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE:
225 case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE:
226 case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY:
227 return 0.0f;
228 default:
229 fprintf(stderr, "unknown paramf %d\n", param);
230 return 0;
231 }
232 }
233
234 static int
235 vc4_screen_get_shader_param(struct pipe_screen *pscreen,
236 enum pipe_shader_type shader,
237 enum pipe_shader_cap param)
238 {
239 if (shader != PIPE_SHADER_VERTEX &&
240 shader != PIPE_SHADER_FRAGMENT) {
241 return 0;
242 }
243
244 /* this is probably not totally correct.. but it's a start: */
245 switch (param) {
246 case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
247 case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
248 case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
249 case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
250 return 16384;
251
252 case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
253 return vc4_screen(pscreen)->has_control_flow;
254
255 case PIPE_SHADER_CAP_MAX_INPUTS:
256 return 8;
257 case PIPE_SHADER_CAP_MAX_OUTPUTS:
258 return shader == PIPE_SHADER_FRAGMENT ? 1 : 8;
259 case PIPE_SHADER_CAP_MAX_TEMPS:
260 return 256; /* GL_MAX_PROGRAM_TEMPORARIES_ARB */
261 case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
262 return 16 * 1024 * sizeof(float);
263 case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
264 return 1;
265 case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
266 return 0;
267 case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
268 case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
269 case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
270 return 0;
271 case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
272 return 1;
273 case PIPE_SHADER_CAP_SUBROUTINES:
274 return 0;
275 case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
276 return 0;
277 case PIPE_SHADER_CAP_INTEGERS:
278 return 1;
279 case PIPE_SHADER_CAP_INT64_ATOMICS:
280 case PIPE_SHADER_CAP_FP16:
281 case PIPE_SHADER_CAP_FP16_DERIVATIVES:
282 case PIPE_SHADER_CAP_INT16:
283 case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
284 case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
285 case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
286 case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
287 case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
288 return 0;
289 case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
290 case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
291 return VC4_MAX_TEXTURE_SAMPLERS;
292 case PIPE_SHADER_CAP_PREFERRED_IR:
293 return PIPE_SHADER_IR_NIR;
294 case PIPE_SHADER_CAP_SUPPORTED_IRS:
295 return 0;
296 case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
297 return 32;
298 case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
299 case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
300 case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
301 case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
302 case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
303 case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
304 return 0;
305 default:
306 fprintf(stderr, "unknown shader param %d\n", param);
307 return 0;
308 }
309 return 0;
310 }
311
312 static bool
313 vc4_screen_is_format_supported(struct pipe_screen *pscreen,
314 enum pipe_format format,
315 enum pipe_texture_target target,
316 unsigned sample_count,
317 unsigned storage_sample_count,
318 unsigned usage)
319 {
320 struct vc4_screen *screen = vc4_screen(pscreen);
321
322 if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
323 return false;
324
325 if (sample_count > 1 && sample_count != VC4_MAX_SAMPLES)
326 return false;
327
328 if (target >= PIPE_MAX_TEXTURE_TYPES) {
329 return false;
330 }
331
332 if (usage & PIPE_BIND_VERTEX_BUFFER) {
333 switch (format) {
334 case PIPE_FORMAT_R32G32B32A32_FLOAT:
335 case PIPE_FORMAT_R32G32B32_FLOAT:
336 case PIPE_FORMAT_R32G32_FLOAT:
337 case PIPE_FORMAT_R32_FLOAT:
338 case PIPE_FORMAT_R32G32B32A32_SNORM:
339 case PIPE_FORMAT_R32G32B32_SNORM:
340 case PIPE_FORMAT_R32G32_SNORM:
341 case PIPE_FORMAT_R32_SNORM:
342 case PIPE_FORMAT_R32G32B32A32_SSCALED:
343 case PIPE_FORMAT_R32G32B32_SSCALED:
344 case PIPE_FORMAT_R32G32_SSCALED:
345 case PIPE_FORMAT_R32_SSCALED:
346 case PIPE_FORMAT_R16G16B16A16_UNORM:
347 case PIPE_FORMAT_R16G16B16_UNORM:
348 case PIPE_FORMAT_R16G16_UNORM:
349 case PIPE_FORMAT_R16_UNORM:
350 case PIPE_FORMAT_R16G16B16A16_SNORM:
351 case PIPE_FORMAT_R16G16B16_SNORM:
352 case PIPE_FORMAT_R16G16_SNORM:
353 case PIPE_FORMAT_R16_SNORM:
354 case PIPE_FORMAT_R16G16B16A16_USCALED:
355 case PIPE_FORMAT_R16G16B16_USCALED:
356 case PIPE_FORMAT_R16G16_USCALED:
357 case PIPE_FORMAT_R16_USCALED:
358 case PIPE_FORMAT_R16G16B16A16_SSCALED:
359 case PIPE_FORMAT_R16G16B16_SSCALED:
360 case PIPE_FORMAT_R16G16_SSCALED:
361 case PIPE_FORMAT_R16_SSCALED:
362 case PIPE_FORMAT_R8G8B8A8_UNORM:
363 case PIPE_FORMAT_R8G8B8_UNORM:
364 case PIPE_FORMAT_R8G8_UNORM:
365 case PIPE_FORMAT_R8_UNORM:
366 case PIPE_FORMAT_R8G8B8A8_SNORM:
367 case PIPE_FORMAT_R8G8B8_SNORM:
368 case PIPE_FORMAT_R8G8_SNORM:
369 case PIPE_FORMAT_R8_SNORM:
370 case PIPE_FORMAT_R8G8B8A8_USCALED:
371 case PIPE_FORMAT_R8G8B8_USCALED:
372 case PIPE_FORMAT_R8G8_USCALED:
373 case PIPE_FORMAT_R8_USCALED:
374 case PIPE_FORMAT_R8G8B8A8_SSCALED:
375 case PIPE_FORMAT_R8G8B8_SSCALED:
376 case PIPE_FORMAT_R8G8_SSCALED:
377 case PIPE_FORMAT_R8_SSCALED:
378 break;
379 default:
380 return false;
381 }
382 }
383
384 if ((usage & PIPE_BIND_RENDER_TARGET) &&
385 !vc4_rt_format_supported(format)) {
386 return false;
387 }
388
389 if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
390 (!vc4_tex_format_supported(format) ||
391 (format == PIPE_FORMAT_ETC1_RGB8 && !screen->has_etc1))) {
392 return false;
393 }
394
395 if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
396 format != PIPE_FORMAT_S8_UINT_Z24_UNORM &&
397 format != PIPE_FORMAT_X8Z24_UNORM) {
398 return false;
399 }
400
401 if ((usage & PIPE_BIND_INDEX_BUFFER) &&
402 format != PIPE_FORMAT_I8_UINT &&
403 format != PIPE_FORMAT_I16_UINT) {
404 return false;
405 }
406
407 return true;
408 }
409
410 static void
411 vc4_screen_query_dmabuf_modifiers(struct pipe_screen *pscreen,
412 enum pipe_format format, int max,
413 uint64_t *modifiers,
414 unsigned int *external_only,
415 int *count)
416 {
417 int m, i;
418 bool tex_will_lower;
419 uint64_t available_modifiers[] = {
420 DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED,
421 DRM_FORMAT_MOD_LINEAR,
422 };
423 struct vc4_screen *screen = vc4_screen(pscreen);
424 int num_modifiers = screen->has_tiling_ioctl ? 2 : 1;
425
426 if (!modifiers) {
427 *count = num_modifiers;
428 return;
429 }
430
431 *count = MIN2(max, num_modifiers);
432 m = screen->has_tiling_ioctl ? 0 : 1;
433 tex_will_lower = !vc4_tex_format_supported(format);
434 /* We support both modifiers (tiled and linear) for all sampler
435 * formats, but if we don't have the DRM_VC4_GET_TILING ioctl
436 * we shouldn't advertise the tiled formats.
437 */
438 for (i = 0; i < *count; i++) {
439 modifiers[i] = available_modifiers[m++];
440 if (external_only)
441 external_only[i] = tex_will_lower;
442 }
443 }
444
445 static bool
446 vc4_get_chip_info(struct vc4_screen *screen)
447 {
448 struct drm_vc4_get_param ident0 = {
449 .param = DRM_VC4_PARAM_V3D_IDENT0,
450 };
451 struct drm_vc4_get_param ident1 = {
452 .param = DRM_VC4_PARAM_V3D_IDENT1,
453 };
454 int ret;
455
456 ret = vc4_ioctl(screen->fd, DRM_IOCTL_VC4_GET_PARAM, &ident0);
457 if (ret != 0) {
458 if (errno == EINVAL) {
459 /* Backwards compatibility with 2835 kernels which
460 * only do V3D 2.1.
461 */
462 screen->v3d_ver = 21;
463 return true;
464 } else {
465 fprintf(stderr, "Couldn't get V3D IDENT0: %s\n",
466 strerror(errno));
467 return false;
468 }
469 }
470 ret = vc4_ioctl(screen->fd, DRM_IOCTL_VC4_GET_PARAM, &ident1);
471 if (ret != 0) {
472 fprintf(stderr, "Couldn't get V3D IDENT1: %s\n",
473 strerror(errno));
474 return false;
475 }
476
477 uint32_t major = (ident0.value >> 24) & 0xff;
478 uint32_t minor = (ident1.value >> 0) & 0xf;
479 screen->v3d_ver = major * 10 + minor;
480
481 if (screen->v3d_ver != 21 && screen->v3d_ver != 26) {
482 fprintf(stderr,
483 "V3D %d.%d not supported by this version of Mesa.\n",
484 screen->v3d_ver / 10,
485 screen->v3d_ver % 10);
486 return false;
487 }
488
489 return true;
490 }
491
492 struct pipe_screen *
493 vc4_screen_create(int fd, struct renderonly *ro)
494 {
495 struct vc4_screen *screen = rzalloc(NULL, struct vc4_screen);
496 uint64_t syncobj_cap = 0;
497 struct pipe_screen *pscreen;
498 int err;
499
500 pscreen = &screen->base;
501
502 pscreen->destroy = vc4_screen_destroy;
503 pscreen->get_param = vc4_screen_get_param;
504 pscreen->get_paramf = vc4_screen_get_paramf;
505 pscreen->get_shader_param = vc4_screen_get_shader_param;
506 pscreen->context_create = vc4_context_create;
507 pscreen->is_format_supported = vc4_screen_is_format_supported;
508
509 screen->fd = fd;
510 if (ro) {
511 screen->ro = renderonly_dup(ro);
512 if (!screen->ro) {
513 fprintf(stderr, "Failed to dup renderonly object\n");
514 ralloc_free(screen);
515 return NULL;
516 }
517 }
518
519 list_inithead(&screen->bo_cache.time_list);
520 (void) mtx_init(&screen->bo_handles_mutex, mtx_plain);
521 screen->bo_handles = util_hash_table_create_ptr_keys();
522
523 screen->has_control_flow =
524 vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_BRANCHES);
525 screen->has_etc1 =
526 vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_ETC1);
527 screen->has_threaded_fs =
528 vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_THREADED_FS);
529 screen->has_madvise =
530 vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_MADVISE);
531 screen->has_perfmon_ioctl =
532 vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_PERFMON);
533
534 err = drmGetCap(fd, DRM_CAP_SYNCOBJ, &syncobj_cap);
535 if (err == 0 && syncobj_cap)
536 screen->has_syncobj = true;
537
538 if (!vc4_get_chip_info(screen))
539 goto fail;
540
541 util_cpu_detect();
542
543 slab_create_parent(&screen->transfer_pool, sizeof(struct vc4_transfer), 16);
544
545 vc4_fence_screen_init(screen);
546
547 vc4_debug = debug_get_option_vc4_debug();
548 if (vc4_debug & VC4_DEBUG_SHADERDB)
549 vc4_debug |= VC4_DEBUG_NORAST;
550
551 #ifdef USE_VC4_SIMULATOR
552 vc4_simulator_init(screen);
553 #endif
554
555 vc4_resource_screen_init(pscreen);
556
557 pscreen->get_name = vc4_screen_get_name;
558 pscreen->get_vendor = vc4_screen_get_vendor;
559 pscreen->get_device_vendor = vc4_screen_get_vendor;
560 pscreen->get_compiler_options = vc4_screen_get_compiler_options;
561 pscreen->query_dmabuf_modifiers = vc4_screen_query_dmabuf_modifiers;
562
563 if (screen->has_perfmon_ioctl) {
564 pscreen->get_driver_query_group_info = vc4_get_driver_query_group_info;
565 pscreen->get_driver_query_info = vc4_get_driver_query_info;
566 }
567
568 return pscreen;
569
570 fail:
571 close(fd);
572 ralloc_free(pscreen);
573 return NULL;
574 }