iris: Always emit at least one BLEND_STATE
[mesa.git] / src / gallium / drivers / iris / iris_program.c
1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 /**
24 * @file iris_program.c
25 *
26 * This file contains the driver interface for compiling shaders.
27 *
28 * See iris_program_cache.c for the in-memory program cache where the
29 * compiled shaders are stored.
30 */
31
32 #include <stdio.h>
33 #include <errno.h>
34 #include "pipe/p_defines.h"
35 #include "pipe/p_state.h"
36 #include "pipe/p_context.h"
37 #include "pipe/p_screen.h"
38 #include "util/u_atomic.h"
39 #include "compiler/nir/nir.h"
40 #include "compiler/nir/nir_builder.h"
41 #include "intel/compiler/brw_compiler.h"
42 #include "intel/compiler/brw_nir.h"
43 #include "iris_context.h"
44
45 #define ALL_SAMPLERS_XYZW .tex.swizzles[0 ... MAX_SAMPLERS - 1] = 0x688
46 #define KEY_INIT .program_string_id = ish->program_id, ALL_SAMPLERS_XYZW
47
48 static unsigned
49 get_new_program_id(struct iris_screen *screen)
50 {
51 return p_atomic_inc_return(&screen->program_id);
52 }
53
54 /**
55 * An uncompiled, API-facing shader. This is the Gallium CSO for shaders.
56 * It primarily contains the NIR for the shader.
57 *
58 * Each API-facing shader can be compiled into multiple shader variants,
59 * based on non-orthogonal state dependencies, recorded in the shader key.
60 *
61 * See iris_compiled_shader, which represents a compiled shader variant.
62 */
63 struct iris_uncompiled_shader {
64 nir_shader *nir;
65
66 struct pipe_stream_output_info stream_output;
67
68 unsigned program_id;
69
70 /** Bitfield of (1 << IRIS_NOS_*) flags. */
71 unsigned nos;
72
73 /** Have any shader variants been compiled yet? */
74 bool compiled_once;
75 };
76
77 static nir_ssa_def *
78 get_aoa_deref_offset(nir_builder *b,
79 nir_deref_instr *deref,
80 unsigned elem_size)
81 {
82 unsigned array_size = elem_size;
83 nir_ssa_def *offset = nir_imm_int(b, 0);
84
85 while (deref->deref_type != nir_deref_type_var) {
86 assert(deref->deref_type == nir_deref_type_array);
87
88 /* This level's element size is the previous level's array size */
89 nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
90 assert(deref->arr.index.ssa);
91 offset = nir_iadd(b, offset,
92 nir_imul(b, index, nir_imm_int(b, array_size)));
93
94 deref = nir_deref_instr_parent(deref);
95 assert(glsl_type_is_array(deref->type));
96 array_size *= glsl_get_length(deref->type);
97 }
98
99 /* Accessing an invalid surface index with the dataport can result in a
100 * hang. According to the spec "if the index used to select an individual
101 * element is negative or greater than or equal to the size of the array,
102 * the results of the operation are undefined but may not lead to
103 * termination" -- which is one of the possible outcomes of the hang.
104 * Clamp the index to prevent access outside of the array bounds.
105 */
106 return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));
107 }
108
109 static void
110 iris_lower_storage_image_derefs(nir_shader *nir)
111 {
112 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
113
114 nir_builder b;
115 nir_builder_init(&b, impl);
116
117 nir_foreach_block(block, impl) {
118 nir_foreach_instr_safe(instr, block) {
119 if (instr->type != nir_instr_type_intrinsic)
120 continue;
121
122 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
123 switch (intrin->intrinsic) {
124 case nir_intrinsic_image_deref_load:
125 case nir_intrinsic_image_deref_store:
126 case nir_intrinsic_image_deref_atomic_add:
127 case nir_intrinsic_image_deref_atomic_min:
128 case nir_intrinsic_image_deref_atomic_max:
129 case nir_intrinsic_image_deref_atomic_and:
130 case nir_intrinsic_image_deref_atomic_or:
131 case nir_intrinsic_image_deref_atomic_xor:
132 case nir_intrinsic_image_deref_atomic_exchange:
133 case nir_intrinsic_image_deref_atomic_comp_swap:
134 case nir_intrinsic_image_deref_size:
135 case nir_intrinsic_image_deref_samples:
136 case nir_intrinsic_image_deref_load_raw_intel:
137 case nir_intrinsic_image_deref_store_raw_intel: {
138 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
139 nir_variable *var = nir_deref_instr_get_variable(deref);
140
141 b.cursor = nir_before_instr(&intrin->instr);
142 nir_ssa_def *index =
143 nir_iadd(&b, nir_imm_int(&b, var->data.driver_location),
144 get_aoa_deref_offset(&b, deref, 1));
145 brw_nir_rewrite_image_intrinsic(intrin, index);
146 break;
147 }
148
149 default:
150 break;
151 }
152 }
153 }
154 }
155
156 // XXX: need unify_interfaces() at link time...
157
158 /**
159 * Fix an uncompiled shader's stream output info.
160 *
161 * Core Gallium stores output->register_index as a "slot" number, where
162 * slots are assigned consecutively to all outputs in info->outputs_written.
163 * This naive packing of outputs doesn't work for us - we too have slots,
164 * but the layout is defined by the VUE map, which we won't have until we
165 * compile a specific shader variant. So, we remap these and simply store
166 * VARYING_SLOT_* in our copy's output->register_index fields.
167 *
168 * We also fix up VARYING_SLOT_{LAYER,VIEWPORT,PSIZ} to select the Y/Z/W
169 * components of our VUE header. See brw_vue_map.c for the layout.
170 */
171 static void
172 update_so_info(struct pipe_stream_output_info *so_info,
173 uint64_t outputs_written)
174 {
175 uint8_t reverse_map[64] = {};
176 unsigned slot = 0;
177 while (outputs_written) {
178 reverse_map[slot++] = u_bit_scan64(&outputs_written);
179 }
180
181 for (unsigned i = 0; i < so_info->num_outputs; i++) {
182 struct pipe_stream_output *output = &so_info->output[i];
183
184 /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
185 output->register_index = reverse_map[output->register_index];
186
187 /* The VUE header contains three scalar fields packed together:
188 * - gl_PointSize is stored in VARYING_SLOT_PSIZ.w
189 * - gl_Layer is stored in VARYING_SLOT_PSIZ.y
190 * - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
191 */
192 switch (output->register_index) {
193 case VARYING_SLOT_LAYER:
194 assert(output->num_components == 1);
195 output->register_index = VARYING_SLOT_PSIZ;
196 output->start_component = 1;
197 break;
198 case VARYING_SLOT_VIEWPORT:
199 assert(output->num_components == 1);
200 output->register_index = VARYING_SLOT_PSIZ;
201 output->start_component = 2;
202 break;
203 case VARYING_SLOT_PSIZ:
204 assert(output->num_components == 1);
205 output->start_component = 3;
206 break;
207 }
208
209 //info->outputs_written |= 1ull << output->register_index;
210 }
211 }
212
213 /**
214 * Sets up the starting offsets for the groups of binding table entries
215 * common to all pipeline stages.
216 *
217 * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're
218 * unused but also make sure that addition of small offsets to them will
219 * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
220 */
221 static uint32_t
222 assign_common_binding_table_offsets(const struct gen_device_info *devinfo,
223 const struct nir_shader *nir,
224 struct brw_stage_prog_data *prog_data,
225 uint32_t next_binding_table_offset,
226 unsigned num_system_values,
227 unsigned num_cbufs)
228 {
229 const struct shader_info *info = &nir->info;
230
231 unsigned num_textures = util_last_bit(info->textures_used);
232
233 if (num_textures) {
234 prog_data->binding_table.texture_start = next_binding_table_offset;
235 prog_data->binding_table.gather_texture_start = next_binding_table_offset;
236 next_binding_table_offset += num_textures;
237 } else {
238 prog_data->binding_table.texture_start = 0xd0d0d0d0;
239 prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
240 }
241
242 if (info->num_images) {
243 prog_data->binding_table.image_start = next_binding_table_offset;
244 next_binding_table_offset += info->num_images;
245 } else {
246 prog_data->binding_table.image_start = 0xd0d0d0d0;
247 }
248
249 if (num_cbufs) {
250 //assert(info->num_ubos <= BRW_MAX_UBO);
251 prog_data->binding_table.ubo_start = next_binding_table_offset;
252 next_binding_table_offset += num_cbufs;
253 } else {
254 prog_data->binding_table.ubo_start = 0xd0d0d0d0;
255 }
256
257 if (info->num_ssbos || info->num_abos) {
258 prog_data->binding_table.ssbo_start = next_binding_table_offset;
259 // XXX: see iris_state "wasting 16 binding table slots for ABOs" comment
260 next_binding_table_offset += IRIS_MAX_ABOS + info->num_ssbos;
261 } else {
262 prog_data->binding_table.ssbo_start = 0xd0d0d0d0;
263 }
264
265 prog_data->binding_table.shader_time_start = 0xd0d0d0d0;
266
267 /* Plane 0 is just the regular texture section */
268 prog_data->binding_table.plane_start[0] = prog_data->binding_table.texture_start;
269
270 prog_data->binding_table.plane_start[1] = next_binding_table_offset;
271 next_binding_table_offset += num_textures;
272
273 prog_data->binding_table.plane_start[2] = next_binding_table_offset;
274 next_binding_table_offset += num_textures;
275
276 /* Set the binding table size */
277 prog_data->binding_table.size_bytes = next_binding_table_offset * 4;
278
279 return next_binding_table_offset;
280 }
281
282 static void
283 setup_vec4_image_sysval(uint32_t *sysvals, uint32_t idx,
284 unsigned offset, unsigned n)
285 {
286 assert(offset % sizeof(uint32_t) == 0);
287
288 for (unsigned i = 0; i < n; ++i)
289 sysvals[i] = BRW_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i);
290
291 for (unsigned i = n; i < 4; ++i)
292 sysvals[i] = BRW_PARAM_BUILTIN_ZERO;
293 }
294
295 /**
296 * Associate NIR uniform variables with the prog_data->param[] mechanism
297 * used by the backend. Also, decide which UBOs we'd like to push in an
298 * ideal situation (though the backend can reduce this).
299 */
300 static void
301 iris_setup_uniforms(const struct brw_compiler *compiler,
302 void *mem_ctx,
303 nir_shader *nir,
304 struct brw_stage_prog_data *prog_data,
305 enum brw_param_builtin **out_system_values,
306 unsigned *out_num_system_values,
307 unsigned *out_num_cbufs)
308 {
309 const struct gen_device_info *devinfo = compiler->devinfo;
310
311 /* The intel compiler assumes that num_uniforms is in bytes. For
312 * scalar that means 4 bytes per uniform slot.
313 *
314 * Ref: brw_nir_lower_uniforms, type_size_scalar_bytes.
315 */
316 nir->num_uniforms *= 4;
317
318 const unsigned IRIS_MAX_SYSTEM_VALUES =
319 PIPE_MAX_SHADER_IMAGES * BRW_IMAGE_PARAM_SIZE;
320 enum brw_param_builtin *system_values =
321 rzalloc_array(mem_ctx, enum brw_param_builtin, IRIS_MAX_SYSTEM_VALUES);
322 unsigned num_system_values = 0;
323
324 unsigned patch_vert_idx = -1;
325 unsigned ucp_idx[IRIS_MAX_CLIP_PLANES];
326 unsigned img_idx[PIPE_MAX_SHADER_IMAGES];
327 memset(ucp_idx, -1, sizeof(ucp_idx));
328 memset(img_idx, -1, sizeof(img_idx));
329
330 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
331
332 nir_builder b;
333 nir_builder_init(&b, impl);
334
335 b.cursor = nir_before_block(nir_start_block(impl));
336 nir_ssa_def *temp_ubo_name = nir_ssa_undef(&b, 1, 32);
337
338 /* Turn system value intrinsics into uniforms */
339 nir_foreach_block(block, impl) {
340 nir_foreach_instr_safe(instr, block) {
341 if (instr->type != nir_instr_type_intrinsic)
342 continue;
343
344 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
345 nir_ssa_def *offset;
346
347 switch (intrin->intrinsic) {
348 case nir_intrinsic_load_user_clip_plane: {
349 unsigned ucp = nir_intrinsic_ucp_id(intrin);
350
351 if (ucp_idx[ucp] == -1) {
352 ucp_idx[ucp] = num_system_values;
353 num_system_values += 4;
354 }
355
356 for (int i = 0; i < 4; i++) {
357 system_values[ucp_idx[ucp] + i] =
358 BRW_PARAM_BUILTIN_CLIP_PLANE(ucp, i);
359 }
360
361 b.cursor = nir_before_instr(instr);
362 offset = nir_imm_int(&b, ucp_idx[ucp] * sizeof(uint32_t));
363 break;
364 }
365 case nir_intrinsic_load_patch_vertices_in:
366 if (patch_vert_idx == -1)
367 patch_vert_idx = num_system_values++;
368
369 system_values[patch_vert_idx] =
370 BRW_PARAM_BUILTIN_PATCH_VERTICES_IN;
371
372 b.cursor = nir_before_instr(instr);
373 offset = nir_imm_int(&b, patch_vert_idx * sizeof(uint32_t));
374 break;
375 case nir_intrinsic_image_deref_load_param_intel: {
376 assert(devinfo->gen < 9);
377 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
378 nir_variable *var = nir_deref_instr_get_variable(deref);
379
380 /* XXX: var->data.binding is not set properly. We need to run
381 * some form of gl_nir_lower_samplers_as_deref() to get it.
382 * This breaks tests which use more than one image.
383 */
384 if (img_idx[var->data.binding] == -1) {
385 /* GL only allows arrays of arrays of images. */
386 assert(glsl_type_is_image(glsl_without_array(var->type)));
387 unsigned num_images = MAX2(1, glsl_get_aoa_size(var->type));
388
389 for (int i = 0; i < num_images; i++) {
390 const unsigned img = var->data.binding + i;
391
392 img_idx[img] = num_system_values;
393 num_system_values += BRW_IMAGE_PARAM_SIZE;
394
395 uint32_t *img_sv = &system_values[img_idx[img]];
396
397 setup_vec4_image_sysval(
398 img_sv + BRW_IMAGE_PARAM_OFFSET_OFFSET, img,
399 offsetof(struct brw_image_param, offset), 2);
400 setup_vec4_image_sysval(
401 img_sv + BRW_IMAGE_PARAM_SIZE_OFFSET, img,
402 offsetof(struct brw_image_param, size), 3);
403 setup_vec4_image_sysval(
404 img_sv + BRW_IMAGE_PARAM_STRIDE_OFFSET, img,
405 offsetof(struct brw_image_param, stride), 4);
406 setup_vec4_image_sysval(
407 img_sv + BRW_IMAGE_PARAM_TILING_OFFSET, img,
408 offsetof(struct brw_image_param, tiling), 3);
409 setup_vec4_image_sysval(
410 img_sv + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, img,
411 offsetof(struct brw_image_param, swizzling), 2);
412 }
413 }
414
415 b.cursor = nir_before_instr(instr);
416 offset = nir_iadd(&b,
417 get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4),
418 nir_imm_int(&b, img_idx[var->data.binding] * 4 +
419 nir_intrinsic_base(intrin) * 16));
420 break;
421 }
422 default:
423 continue;
424 }
425
426 unsigned comps = nir_intrinsic_dest_components(intrin);
427
428 nir_intrinsic_instr *load =
429 nir_intrinsic_instr_create(nir, nir_intrinsic_load_ubo);
430 load->num_components = comps;
431 load->src[0] = nir_src_for_ssa(temp_ubo_name);
432 load->src[1] = nir_src_for_ssa(offset);
433 nir_ssa_dest_init(&load->instr, &load->dest, comps, 32, NULL);
434 nir_builder_instr_insert(&b, &load->instr);
435 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
436 nir_src_for_ssa(&load->dest.ssa));
437 nir_instr_remove(instr);
438 }
439 }
440
441 nir_validate_shader(nir, "before remapping");
442
443 /* Place the new params at the front of constant buffer 0. */
444 if (num_system_values > 0) {
445 nir->num_uniforms += num_system_values * sizeof(uint32_t);
446
447 system_values = reralloc(mem_ctx, system_values, enum brw_param_builtin,
448 num_system_values);
449
450 nir_foreach_block(block, impl) {
451 nir_foreach_instr_safe(instr, block) {
452 if (instr->type != nir_instr_type_intrinsic)
453 continue;
454
455 nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
456
457 if (load->intrinsic != nir_intrinsic_load_ubo)
458 continue;
459
460 b.cursor = nir_before_instr(instr);
461
462 assert(load->src[0].is_ssa);
463
464 if (load->src[0].ssa == temp_ubo_name) {
465 nir_instr_rewrite_src(instr, &load->src[0],
466 nir_src_for_ssa(nir_imm_int(&b, 0)));
467 } else if (nir_src_as_uint(load->src[0]) == 0) {
468 nir_ssa_def *offset =
469 nir_iadd(&b, load->src[1].ssa,
470 nir_imm_int(&b, 4 * num_system_values));
471 nir_instr_rewrite_src(instr, &load->src[1],
472 nir_src_for_ssa(offset));
473 }
474 }
475 }
476
477 /* We need to fold the new iadds for brw_nir_analyze_ubo_ranges */
478 nir_opt_constant_folding(nir);
479 } else {
480 ralloc_free(system_values);
481 system_values = NULL;
482 }
483
484 nir_validate_shader(nir, "after remap");
485
486 if (nir->info.stage != MESA_SHADER_COMPUTE)
487 brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
488
489 /* We don't use params[], but fs_visitor::nir_setup_uniforms() asserts
490 * about it for compute shaders, so go ahead and make some fake ones
491 * which the backend will dead code eliminate.
492 */
493 prog_data->nr_params = nir->num_uniforms / 4;
494 prog_data->param = rzalloc_array(mem_ctx, uint32_t, prog_data->nr_params);
495
496 /* System values and uniforms are stored in constant buffer 0, the
497 * user-facing UBOs are indexed by one. So if any constant buffer is
498 * needed, the constant buffer 0 will be needed, so account for it.
499 */
500 unsigned num_cbufs = nir->info.num_ubos;
501 if (num_cbufs || num_system_values || nir->num_uniforms)
502 num_cbufs++;
503
504 *out_system_values = system_values;
505 *out_num_system_values = num_system_values;
506 *out_num_cbufs = num_cbufs;
507 }
508
509 /**
510 * Compile a vertex shader, and upload the assembly.
511 */
512 static struct iris_compiled_shader *
513 iris_compile_vs(struct iris_context *ice,
514 struct iris_uncompiled_shader *ish,
515 const struct brw_vs_prog_key *key)
516 {
517 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
518 const struct brw_compiler *compiler = screen->compiler;
519 const struct gen_device_info *devinfo = &screen->devinfo;
520 void *mem_ctx = ralloc_context(NULL);
521 struct brw_vs_prog_data *vs_prog_data =
522 rzalloc(mem_ctx, struct brw_vs_prog_data);
523 struct brw_vue_prog_data *vue_prog_data = &vs_prog_data->base;
524 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
525 enum brw_param_builtin *system_values;
526 unsigned num_system_values;
527 unsigned num_cbufs;
528
529 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
530
531 if (key->nr_userclip_plane_consts) {
532 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
533 nir_lower_clip_vs(nir, (1 << key->nr_userclip_plane_consts) - 1, true);
534 nir_lower_io_to_temporaries(nir, impl, true, false);
535 nir_lower_global_vars_to_local(nir);
536 nir_lower_vars_to_ssa(nir);
537 nir_shader_gather_info(nir, impl);
538 }
539
540 if (nir->info.name && strncmp(nir->info.name, "ARB", 3) == 0)
541 prog_data->use_alt_mode = true;
542
543 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
544 &num_system_values, &num_cbufs);
545
546 assign_common_binding_table_offsets(devinfo, nir, prog_data, 0,
547 num_system_values, num_cbufs);
548
549 brw_compute_vue_map(devinfo,
550 &vue_prog_data->vue_map, nir->info.outputs_written,
551 nir->info.separate_shader);
552
553 /* Don't tell the backend about our clip plane constants, we've already
554 * lowered them in NIR and we don't want it doing it again.
555 */
556 struct brw_vs_prog_key key_no_ucp = *key;
557 key_no_ucp.nr_userclip_plane_consts = 0;
558
559 char *error_str = NULL;
560 const unsigned *program =
561 brw_compile_vs(compiler, &ice->dbg, mem_ctx, &key_no_ucp, vs_prog_data,
562 nir, -1, &error_str);
563 if (program == NULL) {
564 dbg_printf("Failed to compile vertex shader: %s\n", error_str);
565 ralloc_free(mem_ctx);
566 return false;
567 }
568
569 uint32_t *so_decls =
570 ice->vtbl.create_so_decl_list(&ish->stream_output,
571 &vue_prog_data->vue_map);
572
573 struct iris_compiled_shader *shader =
574 iris_upload_shader(ice, IRIS_CACHE_VS, sizeof(*key), key, program,
575 prog_data, so_decls, system_values, num_system_values,
576 num_cbufs);
577
578 if (ish->compiled_once) {
579 perf_debug(&ice->dbg, "Recompiling vertex shader\n");
580 } else {
581 ish->compiled_once = true;
582 }
583
584 ralloc_free(mem_ctx);
585 return shader;
586 }
587
588 /**
589 * Update the current vertex shader variant.
590 *
591 * Fill out the key, look in the cache, compile and bind if needed.
592 */
593 static void
594 iris_update_compiled_vs(struct iris_context *ice)
595 {
596 struct iris_uncompiled_shader *ish =
597 ice->shaders.uncompiled[MESA_SHADER_VERTEX];
598
599 struct brw_vs_prog_key key = { KEY_INIT };
600 ice->vtbl.populate_vs_key(ice, &ish->nir->info, &key);
601
602 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_VS];
603 struct iris_compiled_shader *shader =
604 iris_find_cached_shader(ice, IRIS_CACHE_VS, sizeof(key), &key);
605
606 if (!shader)
607 shader = iris_compile_vs(ice, ish, &key);
608
609 if (old != shader) {
610 ice->shaders.prog[IRIS_CACHE_VS] = shader;
611 ice->state.dirty |= IRIS_DIRTY_VS |
612 IRIS_DIRTY_BINDINGS_VS |
613 IRIS_DIRTY_CONSTANTS_VS |
614 IRIS_DIRTY_VF_SGVS;
615 }
616 }
617
618 /**
619 * Get the shader_info for a given stage, or NULL if the stage is disabled.
620 */
621 const struct shader_info *
622 iris_get_shader_info(const struct iris_context *ice, gl_shader_stage stage)
623 {
624 const struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[stage];
625
626 if (!ish)
627 return NULL;
628
629 const nir_shader *nir = ish->nir;
630 return &nir->info;
631 }
632
633 /**
634 * Get the union of TCS output and TES input slots.
635 *
636 * TCS and TES need to agree on a common URB entry layout. In particular,
637 * the data for all patch vertices is stored in a single URB entry (unlike
638 * GS which has one entry per input vertex). This means that per-vertex
639 * array indexing needs a stride.
640 *
641 * SSO requires locations to match, but doesn't require the number of
642 * outputs/inputs to match (in fact, the TCS often has extra outputs).
643 * So, we need to take the extra step of unifying these on the fly.
644 */
645 static void
646 get_unified_tess_slots(const struct iris_context *ice,
647 uint64_t *per_vertex_slots,
648 uint32_t *per_patch_slots)
649 {
650 const struct shader_info *tcs =
651 iris_get_shader_info(ice, MESA_SHADER_TESS_CTRL);
652 const struct shader_info *tes =
653 iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
654
655 *per_vertex_slots = tes->inputs_read;
656 *per_patch_slots = tes->patch_inputs_read;
657
658 if (tcs) {
659 *per_vertex_slots |= tcs->outputs_written;
660 *per_patch_slots |= tcs->patch_outputs_written;
661 }
662 }
663
664 /**
665 * Compile a tessellation control shader, and upload the assembly.
666 */
667 static struct iris_compiled_shader *
668 iris_compile_tcs(struct iris_context *ice,
669 struct iris_uncompiled_shader *ish,
670 const struct brw_tcs_prog_key *key)
671 {
672 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
673 const struct brw_compiler *compiler = screen->compiler;
674 const struct nir_shader_compiler_options *options =
675 compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].NirOptions;
676 const struct gen_device_info *devinfo = &screen->devinfo;
677 void *mem_ctx = ralloc_context(NULL);
678 struct brw_tcs_prog_data *tcs_prog_data =
679 rzalloc(mem_ctx, struct brw_tcs_prog_data);
680 struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base;
681 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
682 enum brw_param_builtin *system_values = NULL;
683 unsigned num_system_values = 0;
684 unsigned num_cbufs;
685
686 nir_shader *nir;
687
688 if (ish) {
689 nir = nir_shader_clone(mem_ctx, ish->nir);
690
691 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
692 &num_system_values, &num_cbufs);
693 assign_common_binding_table_offsets(devinfo, nir, prog_data, 0,
694 num_system_values, num_cbufs);
695 } else {
696 nir = brw_nir_create_passthrough_tcs(mem_ctx, compiler, options, key);
697
698 /* Reserve space for passing the default tess levels as constants. */
699 prog_data->param = rzalloc_array(mem_ctx, uint32_t, 8);
700 prog_data->nr_params = 8;
701 prog_data->ubo_ranges[0].length = 1;
702 }
703
704 char *error_str = NULL;
705 const unsigned *program =
706 brw_compile_tcs(compiler, &ice->dbg, mem_ctx, key, tcs_prog_data, nir,
707 -1, &error_str);
708 if (program == NULL) {
709 dbg_printf("Failed to compile control shader: %s\n", error_str);
710 ralloc_free(mem_ctx);
711 return false;
712 }
713
714 struct iris_compiled_shader *shader =
715 iris_upload_shader(ice, IRIS_CACHE_TCS, sizeof(*key), key, program,
716 prog_data, NULL, system_values, num_system_values,
717 num_cbufs);
718
719 if (ish) {
720 if (ish->compiled_once) {
721 perf_debug(&ice->dbg, "Recompiling tessellation control shader\n");
722 } else {
723 ish->compiled_once = true;
724 }
725 }
726
727 ralloc_free(mem_ctx);
728 return shader;
729 }
730
731 /**
732 * Update the current tessellation control shader variant.
733 *
734 * Fill out the key, look in the cache, compile and bind if needed.
735 */
736 static void
737 iris_update_compiled_tcs(struct iris_context *ice)
738 {
739 struct iris_uncompiled_shader *tcs =
740 ice->shaders.uncompiled[MESA_SHADER_TESS_CTRL];
741
742 const struct shader_info *tes_info =
743 iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
744 struct brw_tcs_prog_key key = {
745 ALL_SAMPLERS_XYZW,
746 .program_string_id = tcs ? tcs->program_id : 0,
747 .tes_primitive_mode = tes_info->tess.primitive_mode,
748 .input_vertices = ice->state.vertices_per_patch,
749 };
750 get_unified_tess_slots(ice, &key.outputs_written,
751 &key.patch_outputs_written);
752 ice->vtbl.populate_tcs_key(ice, &key);
753
754 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TCS];
755 struct iris_compiled_shader *shader =
756 iris_find_cached_shader(ice, IRIS_CACHE_TCS, sizeof(key), &key);
757
758 if (!shader)
759 shader = iris_compile_tcs(ice, tcs, &key);
760
761 if (old != shader) {
762 ice->shaders.prog[IRIS_CACHE_TCS] = shader;
763 ice->state.dirty |= IRIS_DIRTY_TCS |
764 IRIS_DIRTY_BINDINGS_TCS |
765 IRIS_DIRTY_CONSTANTS_TCS;
766 }
767 }
768
769 /**
770 * Compile a tessellation evaluation shader, and upload the assembly.
771 */
772 static struct iris_compiled_shader *
773 iris_compile_tes(struct iris_context *ice,
774 struct iris_uncompiled_shader *ish,
775 const struct brw_tes_prog_key *key)
776 {
777 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
778 const struct brw_compiler *compiler = screen->compiler;
779 const struct gen_device_info *devinfo = &screen->devinfo;
780 void *mem_ctx = ralloc_context(NULL);
781 struct brw_tes_prog_data *tes_prog_data =
782 rzalloc(mem_ctx, struct brw_tes_prog_data);
783 struct brw_vue_prog_data *vue_prog_data = &tes_prog_data->base;
784 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
785 enum brw_param_builtin *system_values;
786 unsigned num_system_values;
787 unsigned num_cbufs;
788
789 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
790
791 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
792 &num_system_values, &num_cbufs);
793
794 assign_common_binding_table_offsets(devinfo, nir, prog_data, 0,
795 num_system_values, num_cbufs);
796
797 struct brw_vue_map input_vue_map;
798 brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,
799 key->patch_inputs_read);
800
801 char *error_str = NULL;
802 const unsigned *program =
803 brw_compile_tes(compiler, &ice->dbg, mem_ctx, key, &input_vue_map,
804 tes_prog_data, nir, NULL, -1, &error_str);
805 if (program == NULL) {
806 dbg_printf("Failed to compile evaluation shader: %s\n", error_str);
807 ralloc_free(mem_ctx);
808 return false;
809 }
810
811 uint32_t *so_decls =
812 ice->vtbl.create_so_decl_list(&ish->stream_output,
813 &vue_prog_data->vue_map);
814
815
816 struct iris_compiled_shader *shader =
817 iris_upload_shader(ice, IRIS_CACHE_TES, sizeof(*key), key, program,
818 prog_data, so_decls, system_values, num_system_values,
819 num_cbufs);
820
821 if (ish->compiled_once) {
822 perf_debug(&ice->dbg, "Recompiling tessellation evaluation shader\n");
823 } else {
824 ish->compiled_once = true;
825 }
826
827 ralloc_free(mem_ctx);
828 return shader;
829 }
830
831 /**
832 * Update the current tessellation evaluation shader variant.
833 *
834 * Fill out the key, look in the cache, compile and bind if needed.
835 */
836 static void
837 iris_update_compiled_tes(struct iris_context *ice)
838 {
839 struct iris_uncompiled_shader *ish =
840 ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
841
842 struct brw_tes_prog_key key = { KEY_INIT };
843 get_unified_tess_slots(ice, &key.inputs_read, &key.patch_inputs_read);
844 ice->vtbl.populate_tes_key(ice, &key);
845
846 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TES];
847 struct iris_compiled_shader *shader =
848 iris_find_cached_shader(ice, IRIS_CACHE_TES, sizeof(key), &key);
849
850 if (!shader)
851 shader = iris_compile_tes(ice, ish, &key);
852
853 if (old != shader) {
854 ice->shaders.prog[IRIS_CACHE_TES] = shader;
855 ice->state.dirty |= IRIS_DIRTY_TES |
856 IRIS_DIRTY_BINDINGS_TES |
857 IRIS_DIRTY_CONSTANTS_TES;
858 }
859 }
860
861 /**
862 * Compile a geometry shader, and upload the assembly.
863 */
864 static struct iris_compiled_shader *
865 iris_compile_gs(struct iris_context *ice,
866 struct iris_uncompiled_shader *ish,
867 const struct brw_gs_prog_key *key)
868 {
869 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
870 const struct brw_compiler *compiler = screen->compiler;
871 const struct gen_device_info *devinfo = &screen->devinfo;
872 void *mem_ctx = ralloc_context(NULL);
873 struct brw_gs_prog_data *gs_prog_data =
874 rzalloc(mem_ctx, struct brw_gs_prog_data);
875 struct brw_vue_prog_data *vue_prog_data = &gs_prog_data->base;
876 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
877 enum brw_param_builtin *system_values;
878 unsigned num_system_values;
879 unsigned num_cbufs;
880
881 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
882
883 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
884 &num_system_values, &num_cbufs);
885
886 assign_common_binding_table_offsets(devinfo, nir, prog_data, 0,
887 num_system_values, num_cbufs);
888
889 brw_compute_vue_map(devinfo,
890 &vue_prog_data->vue_map, nir->info.outputs_written,
891 nir->info.separate_shader);
892
893 char *error_str = NULL;
894 const unsigned *program =
895 brw_compile_gs(compiler, &ice->dbg, mem_ctx, key, gs_prog_data, nir,
896 NULL, -1, &error_str);
897 if (program == NULL) {
898 dbg_printf("Failed to compile geometry shader: %s\n", error_str);
899 ralloc_free(mem_ctx);
900 return false;
901 }
902
903 uint32_t *so_decls =
904 ice->vtbl.create_so_decl_list(&ish->stream_output,
905 &vue_prog_data->vue_map);
906
907 struct iris_compiled_shader *shader =
908 iris_upload_shader(ice, IRIS_CACHE_GS, sizeof(*key), key, program,
909 prog_data, so_decls, system_values, num_system_values,
910 num_cbufs);
911
912 if (ish->compiled_once) {
913 perf_debug(&ice->dbg, "Recompiling geometry shader\n");
914 } else {
915 ish->compiled_once = true;
916 }
917
918 ralloc_free(mem_ctx);
919 return shader;
920 }
921
922 /**
923 * Update the current geometry shader variant.
924 *
925 * Fill out the key, look in the cache, compile and bind if needed.
926 */
927 static void
928 iris_update_compiled_gs(struct iris_context *ice)
929 {
930 struct iris_uncompiled_shader *ish =
931 ice->shaders.uncompiled[MESA_SHADER_GEOMETRY];
932 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_GS];
933 struct iris_compiled_shader *shader = NULL;
934
935 if (ish) {
936 struct brw_gs_prog_key key = { KEY_INIT };
937 ice->vtbl.populate_gs_key(ice, &key);
938
939 shader =
940 iris_find_cached_shader(ice, IRIS_CACHE_GS, sizeof(key), &key);
941
942 if (!shader)
943 shader = iris_compile_gs(ice, ish, &key);
944 }
945
946 if (old != shader) {
947 ice->shaders.prog[IRIS_CACHE_GS] = shader;
948 ice->state.dirty |= IRIS_DIRTY_GS |
949 IRIS_DIRTY_BINDINGS_GS |
950 IRIS_DIRTY_CONSTANTS_GS;
951 }
952 }
953
954 /**
955 * Compile a fragment (pixel) shader, and upload the assembly.
956 */
957 static struct iris_compiled_shader *
958 iris_compile_fs(struct iris_context *ice,
959 struct iris_uncompiled_shader *ish,
960 const struct brw_wm_prog_key *key,
961 struct brw_vue_map *vue_map)
962 {
963 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
964 const struct brw_compiler *compiler = screen->compiler;
965 const struct gen_device_info *devinfo = &screen->devinfo;
966 void *mem_ctx = ralloc_context(NULL);
967 struct brw_wm_prog_data *fs_prog_data =
968 rzalloc(mem_ctx, struct brw_wm_prog_data);
969 struct brw_stage_prog_data *prog_data = &fs_prog_data->base;
970 enum brw_param_builtin *system_values;
971 unsigned num_system_values;
972 unsigned num_cbufs;
973
974 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
975
976 if (nir->info.name && strncmp(nir->info.name, "ARB", 3) == 0)
977 prog_data->use_alt_mode = true;
978
979 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
980 &num_system_values, &num_cbufs);
981
982 assign_common_binding_table_offsets(devinfo, nir, prog_data,
983 MAX2(key->nr_color_regions, 1),
984 num_system_values, num_cbufs);
985 char *error_str = NULL;
986 const unsigned *program =
987 brw_compile_fs(compiler, &ice->dbg, mem_ctx, key, fs_prog_data,
988 nir, NULL, -1, -1, -1, true, false, vue_map, &error_str);
989 if (program == NULL) {
990 dbg_printf("Failed to compile fragment shader: %s\n", error_str);
991 ralloc_free(mem_ctx);
992 return false;
993 }
994
995 struct iris_compiled_shader *shader =
996 iris_upload_shader(ice, IRIS_CACHE_FS, sizeof(*key), key, program,
997 prog_data, NULL, system_values, num_system_values,
998 num_cbufs);
999
1000 if (ish->compiled_once) {
1001 perf_debug(&ice->dbg, "Recompiling fragment shader\n");
1002 } else {
1003 ish->compiled_once = true;
1004 }
1005
1006 ralloc_free(mem_ctx);
1007 return shader;
1008 }
1009
1010 /**
1011 * Update the current fragment shader variant.
1012 *
1013 * Fill out the key, look in the cache, compile and bind if needed.
1014 */
1015 static void
1016 iris_update_compiled_fs(struct iris_context *ice)
1017 {
1018 struct iris_uncompiled_shader *ish =
1019 ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
1020 struct brw_wm_prog_key key = { KEY_INIT };
1021 ice->vtbl.populate_fs_key(ice, &key);
1022
1023 if (ish->nos & (1ull << IRIS_NOS_LAST_VUE_MAP))
1024 key.input_slots_valid = ice->shaders.last_vue_map->slots_valid;
1025
1026 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_FS];
1027 struct iris_compiled_shader *shader =
1028 iris_find_cached_shader(ice, IRIS_CACHE_FS, sizeof(key), &key);
1029
1030 if (!shader)
1031 shader = iris_compile_fs(ice, ish, &key, ice->shaders.last_vue_map);
1032
1033 if (old != shader) {
1034 // XXX: only need to flag CLIP if barycentric has NONPERSPECTIVE
1035 // toggles. might be able to avoid flagging SBE too.
1036 ice->shaders.prog[IRIS_CACHE_FS] = shader;
1037 ice->state.dirty |= IRIS_DIRTY_FS |
1038 IRIS_DIRTY_BINDINGS_FS |
1039 IRIS_DIRTY_CONSTANTS_FS |
1040 IRIS_DIRTY_WM |
1041 IRIS_DIRTY_CLIP |
1042 IRIS_DIRTY_SBE;
1043 }
1044 }
1045
1046 /**
1047 * Get the compiled shader for the last enabled geometry stage.
1048 *
1049 * This stage is the one which will feed stream output and the rasterizer.
1050 */
1051 static gl_shader_stage
1052 last_vue_stage(struct iris_context *ice)
1053 {
1054 if (ice->shaders.prog[MESA_SHADER_GEOMETRY])
1055 return MESA_SHADER_GEOMETRY;
1056
1057 if (ice->shaders.prog[MESA_SHADER_TESS_EVAL])
1058 return MESA_SHADER_TESS_EVAL;
1059
1060 return MESA_SHADER_VERTEX;
1061 }
1062
1063 /**
1064 * Update the last enabled stage's VUE map.
1065 *
1066 * When the shader feeding the rasterizer's output interface changes, we
1067 * need to re-emit various packets.
1068 */
1069 static void
1070 update_last_vue_map(struct iris_context *ice,
1071 struct brw_stage_prog_data *prog_data)
1072 {
1073 struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
1074 struct brw_vue_map *vue_map = &vue_prog_data->vue_map;
1075 struct brw_vue_map *old_map = ice->shaders.last_vue_map;
1076 const uint64_t changed_slots =
1077 (old_map ? old_map->slots_valid : 0ull) ^ vue_map->slots_valid;
1078
1079 if (changed_slots & VARYING_BIT_VIEWPORT) {
1080 // XXX: could use ctx->Const.MaxViewports for old API efficiency
1081 ice->state.num_viewports =
1082 (vue_map->slots_valid & VARYING_BIT_VIEWPORT) ? IRIS_MAX_VIEWPORTS : 1;
1083 ice->state.dirty |= IRIS_DIRTY_CLIP |
1084 IRIS_DIRTY_SF_CL_VIEWPORT |
1085 IRIS_DIRTY_CC_VIEWPORT |
1086 IRIS_DIRTY_SCISSOR_RECT |
1087 IRIS_DIRTY_UNCOMPILED_FS |
1088 ice->state.dirty_for_nos[IRIS_NOS_LAST_VUE_MAP];
1089 // XXX: CC_VIEWPORT?
1090 }
1091
1092 if (changed_slots || (old_map && old_map->separate != vue_map->separate)) {
1093 ice->state.dirty |= IRIS_DIRTY_SBE;
1094 }
1095
1096 ice->shaders.last_vue_map = &vue_prog_data->vue_map;
1097 }
1098
1099 /**
1100 * Get the prog_data for a given stage, or NULL if the stage is disabled.
1101 */
1102 static struct brw_vue_prog_data *
1103 get_vue_prog_data(struct iris_context *ice, gl_shader_stage stage)
1104 {
1105 if (!ice->shaders.prog[stage])
1106 return NULL;
1107
1108 return (void *) ice->shaders.prog[stage]->prog_data;
1109 }
1110
1111 // XXX: iris_compiled_shaders are space-leaking :(
1112 // XXX: do remember to unbind them if deleting them.
1113
1114 /**
1115 * Update the current shader variants for the given state.
1116 *
1117 * This should be called on every draw call to ensure that the correct
1118 * shaders are bound. It will also flag any dirty state triggered by
1119 * swapping out those shaders.
1120 */
1121 void
1122 iris_update_compiled_shaders(struct iris_context *ice)
1123 {
1124 const uint64_t dirty = ice->state.dirty;
1125
1126 struct brw_vue_prog_data *old_prog_datas[4];
1127 if (!(dirty & IRIS_DIRTY_URB)) {
1128 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++)
1129 old_prog_datas[i] = get_vue_prog_data(ice, i);
1130 }
1131
1132 if (dirty & (IRIS_DIRTY_UNCOMPILED_TCS | IRIS_DIRTY_UNCOMPILED_TES)) {
1133 struct iris_uncompiled_shader *tes =
1134 ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
1135 if (tes) {
1136 iris_update_compiled_tcs(ice);
1137 iris_update_compiled_tes(ice);
1138 } else {
1139 ice->shaders.prog[IRIS_CACHE_TCS] = NULL;
1140 ice->shaders.prog[IRIS_CACHE_TES] = NULL;
1141 ice->state.dirty |=
1142 IRIS_DIRTY_TCS | IRIS_DIRTY_TES |
1143 IRIS_DIRTY_BINDINGS_TCS | IRIS_DIRTY_BINDINGS_TES |
1144 IRIS_DIRTY_CONSTANTS_TCS | IRIS_DIRTY_CONSTANTS_TES;
1145 }
1146 }
1147
1148 if (dirty & IRIS_DIRTY_UNCOMPILED_VS)
1149 iris_update_compiled_vs(ice);
1150 if (dirty & IRIS_DIRTY_UNCOMPILED_GS)
1151 iris_update_compiled_gs(ice);
1152
1153 gl_shader_stage last_stage = last_vue_stage(ice);
1154 struct iris_compiled_shader *shader = ice->shaders.prog[last_stage];
1155 struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[last_stage];
1156 update_last_vue_map(ice, shader->prog_data);
1157 if (ice->state.streamout != shader->streamout) {
1158 ice->state.streamout = shader->streamout;
1159 ice->state.dirty |= IRIS_DIRTY_SO_DECL_LIST | IRIS_DIRTY_STREAMOUT;
1160 }
1161
1162 if (ice->state.streamout_active) {
1163 for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
1164 struct iris_stream_output_target *so =
1165 (void *) ice->state.so_target[i];
1166 if (so)
1167 so->stride = ish->stream_output.stride[i];
1168 }
1169 }
1170
1171 if (dirty & IRIS_DIRTY_UNCOMPILED_FS)
1172 iris_update_compiled_fs(ice);
1173 // ...
1174
1175 /* Changing shader interfaces may require a URB configuration. */
1176 if (!(dirty & IRIS_DIRTY_URB)) {
1177 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
1178 struct brw_vue_prog_data *old = old_prog_datas[i];
1179 struct brw_vue_prog_data *new = get_vue_prog_data(ice, i);
1180 if (!!old != !!new ||
1181 (new && new->urb_entry_size != old->urb_entry_size)) {
1182 ice->state.dirty |= IRIS_DIRTY_URB;
1183 break;
1184 }
1185 }
1186 }
1187 }
1188
1189 static struct iris_compiled_shader *
1190 iris_compile_cs(struct iris_context *ice,
1191 struct iris_uncompiled_shader *ish,
1192 const struct brw_cs_prog_key *key)
1193 {
1194 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1195 const struct brw_compiler *compiler = screen->compiler;
1196 const struct gen_device_info *devinfo = &screen->devinfo;
1197 void *mem_ctx = ralloc_context(NULL);
1198 struct brw_cs_prog_data *cs_prog_data =
1199 rzalloc(mem_ctx, struct brw_cs_prog_data);
1200 struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
1201 enum brw_param_builtin *system_values;
1202 unsigned num_system_values;
1203 unsigned num_cbufs;
1204
1205 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1206
1207 cs_prog_data->binding_table.work_groups_start = 0;
1208
1209 prog_data->total_shared = nir->info.cs.shared_size;
1210
1211 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
1212 &num_system_values, &num_cbufs);
1213
1214 assign_common_binding_table_offsets(devinfo, nir, prog_data, 1,
1215 num_system_values, num_cbufs);
1216
1217 char *error_str = NULL;
1218 const unsigned *program =
1219 brw_compile_cs(compiler, &ice->dbg, mem_ctx, key, cs_prog_data,
1220 nir, -1, &error_str);
1221 if (program == NULL) {
1222 dbg_printf("Failed to compile compute shader: %s\n", error_str);
1223 ralloc_free(mem_ctx);
1224 return false;
1225 }
1226
1227 struct iris_compiled_shader *shader =
1228 iris_upload_shader(ice, IRIS_CACHE_CS, sizeof(*key), key, program,
1229 prog_data, NULL, system_values, num_system_values,
1230 num_cbufs);
1231
1232 if (ish->compiled_once) {
1233 perf_debug(&ice->dbg, "Recompiling compute shader\n");
1234 } else {
1235 ish->compiled_once = true;
1236 }
1237
1238 ralloc_free(mem_ctx);
1239 return shader;
1240 }
1241
1242 void
1243 iris_update_compiled_compute_shader(struct iris_context *ice)
1244 {
1245 struct iris_uncompiled_shader *ish =
1246 ice->shaders.uncompiled[MESA_SHADER_COMPUTE];
1247
1248 struct brw_cs_prog_key key = { KEY_INIT };
1249 ice->vtbl.populate_cs_key(ice, &key);
1250
1251 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_CS];
1252 struct iris_compiled_shader *shader =
1253 iris_find_cached_shader(ice, IRIS_CACHE_CS, sizeof(key), &key);
1254
1255 if (!shader)
1256 shader = iris_compile_cs(ice, ish, &key);
1257
1258 if (old != shader) {
1259 ice->shaders.prog[IRIS_CACHE_CS] = shader;
1260 ice->state.dirty |= IRIS_DIRTY_CS |
1261 IRIS_DIRTY_BINDINGS_CS |
1262 IRIS_DIRTY_CONSTANTS_CS;
1263 }
1264 }
1265
1266 void
1267 iris_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data,
1268 uint32_t *dst)
1269 {
1270 struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
1271 assert(cs_prog_data->push.total.size > 0);
1272 assert(cs_prog_data->push.cross_thread.size == 0);
1273 assert(cs_prog_data->push.per_thread.dwords == 1);
1274 assert(prog_data->param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID);
1275 for (unsigned t = 0; t < cs_prog_data->threads; t++)
1276 dst[8 * t] = t;
1277 }
1278
1279 /**
1280 * Allocate scratch BOs as needed for the given per-thread size and stage.
1281 */
1282 struct iris_bo *
1283 iris_get_scratch_space(struct iris_context *ice,
1284 unsigned per_thread_scratch,
1285 gl_shader_stage stage)
1286 {
1287 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1288 struct iris_bufmgr *bufmgr = screen->bufmgr;
1289 const struct gen_device_info *devinfo = &screen->devinfo;
1290
1291 unsigned encoded_size = ffs(per_thread_scratch) - 11;
1292 assert(encoded_size < (1 << 16));
1293
1294 struct iris_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage];
1295
1296 /* The documentation for 3DSTATE_PS "Scratch Space Base Pointer" says:
1297 *
1298 * "Scratch Space per slice is computed based on 4 sub-slices. SW
1299 * must allocate scratch space enough so that each slice has 4
1300 * slices allowed."
1301 *
1302 * According to the other driver team, this applies to compute shaders
1303 * as well. This is not currently documented at all.
1304 *
1305 * This hack is no longer necessary on Gen11+.
1306 */
1307 unsigned subslice_total = screen->subslice_total;
1308 if (devinfo->gen < 11)
1309 subslice_total = 4 * devinfo->num_slices;
1310 assert(subslice_total >= screen->subslice_total);
1311
1312 if (!*bop) {
1313 unsigned scratch_ids_per_subslice = devinfo->max_cs_threads;
1314 uint32_t max_threads[] = {
1315 [MESA_SHADER_VERTEX] = devinfo->max_vs_threads,
1316 [MESA_SHADER_TESS_CTRL] = devinfo->max_tcs_threads,
1317 [MESA_SHADER_TESS_EVAL] = devinfo->max_tes_threads,
1318 [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads,
1319 [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads,
1320 [MESA_SHADER_COMPUTE] = scratch_ids_per_subslice * subslice_total,
1321 };
1322
1323 uint32_t size = per_thread_scratch * max_threads[stage];
1324
1325 *bop = iris_bo_alloc(bufmgr, "scratch", size, IRIS_MEMZONE_SHADER);
1326 }
1327
1328 return *bop;
1329 }
1330
1331 /* ------------------------------------------------------------------- */
1332
1333 /**
1334 * The pipe->create_[stage]_state() driver hooks.
1335 *
1336 * Performs basic NIR preprocessing, records any state dependencies, and
1337 * returns an iris_uncompiled_shader as the Gallium CSO.
1338 *
1339 * Actual shader compilation to assembly happens later, at first use.
1340 */
1341 static void *
1342 iris_create_uncompiled_shader(struct pipe_context *ctx,
1343 nir_shader *nir,
1344 const struct pipe_stream_output_info *so_info)
1345 {
1346 struct iris_screen *screen = (struct iris_screen *)ctx->screen;
1347 const struct gen_device_info *devinfo = &screen->devinfo;
1348
1349 struct iris_uncompiled_shader *ish =
1350 calloc(1, sizeof(struct iris_uncompiled_shader));
1351 if (!ish)
1352 return NULL;
1353
1354 nir = brw_preprocess_nir(screen->compiler, nir);
1355
1356 NIR_PASS_V(nir, brw_nir_lower_image_load_store, devinfo);
1357 NIR_PASS_V(nir, iris_lower_storage_image_derefs);
1358
1359 ish->program_id = get_new_program_id(screen);
1360 ish->nir = nir;
1361 if (so_info) {
1362 memcpy(&ish->stream_output, so_info, sizeof(*so_info));
1363 update_so_info(&ish->stream_output, nir->info.outputs_written);
1364 }
1365
1366 return ish;
1367 }
1368
1369 static struct iris_uncompiled_shader *
1370 iris_create_shader_state(struct pipe_context *ctx,
1371 const struct pipe_shader_state *state)
1372 {
1373 assert(state->type == PIPE_SHADER_IR_NIR);
1374
1375 return iris_create_uncompiled_shader(ctx, state->ir.nir,
1376 &state->stream_output);
1377 }
1378
1379 static void *
1380 iris_create_vs_state(struct pipe_context *ctx,
1381 const struct pipe_shader_state *state)
1382 {
1383 struct iris_context *ice = (void *) ctx;
1384 struct iris_screen *screen = (void *) ctx->screen;
1385 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
1386
1387 /* User clip planes */
1388 if (ish->nir->info.clip_distance_array_size == 0)
1389 ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
1390
1391 if (screen->precompile) {
1392 struct brw_vs_prog_key key = { KEY_INIT };
1393
1394 iris_compile_vs(ice, ish, &key);
1395 }
1396
1397 return ish;
1398 }
1399
1400 static void *
1401 iris_create_tcs_state(struct pipe_context *ctx,
1402 const struct pipe_shader_state *state)
1403 {
1404 struct iris_context *ice = (void *) ctx;
1405 struct iris_screen *screen = (void *) ctx->screen;
1406 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
1407 struct shader_info *info = &ish->nir->info;
1408
1409 // XXX: NOS?
1410
1411 if (screen->precompile) {
1412 const unsigned _GL_TRIANGLES = 0x0004;
1413 struct brw_tcs_prog_key key = {
1414 KEY_INIT,
1415 // XXX: make sure the linker fills this out from the TES...
1416 .tes_primitive_mode =
1417 info->tess.primitive_mode ? info->tess.primitive_mode
1418 : _GL_TRIANGLES,
1419 .outputs_written = info->outputs_written,
1420 .patch_outputs_written = info->patch_outputs_written,
1421 };
1422
1423 iris_compile_tcs(ice, ish, &key);
1424 }
1425
1426 return ish;
1427 }
1428
1429 static void *
1430 iris_create_tes_state(struct pipe_context *ctx,
1431 const struct pipe_shader_state *state)
1432 {
1433 struct iris_context *ice = (void *) ctx;
1434 struct iris_screen *screen = (void *) ctx->screen;
1435 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
1436 struct shader_info *info = &ish->nir->info;
1437
1438 // XXX: NOS?
1439
1440 if (screen->precompile) {
1441 struct brw_tes_prog_key key = {
1442 KEY_INIT,
1443 // XXX: not ideal, need TCS output/TES input unification
1444 .inputs_read = info->inputs_read,
1445 .patch_inputs_read = info->patch_inputs_read,
1446 };
1447
1448 iris_compile_tes(ice, ish, &key);
1449 }
1450
1451 return ish;
1452 }
1453
1454 static void *
1455 iris_create_gs_state(struct pipe_context *ctx,
1456 const struct pipe_shader_state *state)
1457 {
1458 struct iris_context *ice = (void *) ctx;
1459 struct iris_screen *screen = (void *) ctx->screen;
1460 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
1461
1462 // XXX: NOS?
1463
1464 if (screen->precompile) {
1465 struct brw_gs_prog_key key = { KEY_INIT };
1466
1467 iris_compile_gs(ice, ish, &key);
1468 }
1469
1470 return ish;
1471 }
1472
1473 static void *
1474 iris_create_fs_state(struct pipe_context *ctx,
1475 const struct pipe_shader_state *state)
1476 {
1477 struct iris_context *ice = (void *) ctx;
1478 struct iris_screen *screen = (void *) ctx->screen;
1479 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
1480 struct shader_info *info = &ish->nir->info;
1481
1482 ish->nos |= (1ull << IRIS_NOS_FRAMEBUFFER) |
1483 (1ull << IRIS_NOS_DEPTH_STENCIL_ALPHA) |
1484 (1ull << IRIS_NOS_RASTERIZER) |
1485 (1ull << IRIS_NOS_BLEND);
1486
1487 /* The program key needs the VUE map if there are > 16 inputs */
1488 if (util_bitcount64(ish->nir->info.inputs_read &
1489 BRW_FS_VARYING_INPUT_MASK) > 16) {
1490 ish->nos |= (1ull << IRIS_NOS_LAST_VUE_MAP);
1491 }
1492
1493 if (screen->precompile) {
1494 const uint64_t color_outputs = info->outputs_written &
1495 ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
1496 BITFIELD64_BIT(FRAG_RESULT_STENCIL) |
1497 BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK));
1498
1499 bool can_rearrange_varyings =
1500 util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) <= 16;
1501
1502 struct brw_wm_prog_key key = {
1503 KEY_INIT,
1504 .nr_color_regions = util_bitcount(color_outputs),
1505 .coherent_fb_fetch = true,
1506 .input_slots_valid =
1507 can_rearrange_varyings ? 0 : info->inputs_read | VARYING_BIT_POS,
1508 };
1509
1510 iris_compile_fs(ice, ish, &key, NULL);
1511 }
1512
1513 return ish;
1514 }
1515
1516 static void *
1517 iris_create_compute_state(struct pipe_context *ctx,
1518 const struct pipe_compute_state *state)
1519 {
1520 assert(state->ir_type == PIPE_SHADER_IR_NIR);
1521
1522 struct iris_context *ice = (void *) ctx;
1523 struct iris_screen *screen = (void *) ctx->screen;
1524 struct iris_uncompiled_shader *ish =
1525 iris_create_uncompiled_shader(ctx, (void *) state->prog, NULL);
1526
1527 // XXX: disallow more than 64KB of shared variables
1528
1529 if (screen->precompile) {
1530 struct brw_cs_prog_key key = { KEY_INIT };
1531
1532 iris_compile_cs(ice, ish, &key);
1533 }
1534
1535 return ish;
1536 }
1537
1538 /**
1539 * The pipe->delete_[stage]_state() driver hooks.
1540 *
1541 * Frees the iris_uncompiled_shader.
1542 */
1543 static void
1544 iris_delete_shader_state(struct pipe_context *ctx, void *state)
1545 {
1546 struct iris_uncompiled_shader *ish = state;
1547
1548 ralloc_free(ish->nir);
1549 free(ish);
1550 }
1551
1552 /**
1553 * The pipe->bind_[stage]_state() driver hook.
1554 *
1555 * Binds an uncompiled shader as the current one for a particular stage.
1556 * Updates dirty tracking to account for the shader's NOS.
1557 */
1558 static void
1559 bind_state(struct iris_context *ice,
1560 struct iris_uncompiled_shader *ish,
1561 gl_shader_stage stage)
1562 {
1563 uint64_t dirty_bit = IRIS_DIRTY_UNCOMPILED_VS << stage;
1564 const uint64_t nos = ish ? ish->nos : 0;
1565
1566 ice->shaders.uncompiled[stage] = ish;
1567 ice->state.dirty |= dirty_bit;
1568
1569 /* Record that CSOs need to mark IRIS_DIRTY_UNCOMPILED_XS when they change
1570 * (or that they no longer need to do so).
1571 */
1572 for (int i = 0; i < IRIS_NOS_COUNT; i++) {
1573 if (nos & (1 << i))
1574 ice->state.dirty_for_nos[i] |= dirty_bit;
1575 else
1576 ice->state.dirty_for_nos[i] &= ~dirty_bit;
1577 }
1578 }
1579
1580 static void
1581 iris_bind_vs_state(struct pipe_context *ctx, void *state)
1582 {
1583 bind_state((void *) ctx, state, MESA_SHADER_VERTEX);
1584 }
1585
1586 static void
1587 iris_bind_tcs_state(struct pipe_context *ctx, void *state)
1588 {
1589 bind_state((void *) ctx, state, MESA_SHADER_TESS_CTRL);
1590 }
1591
1592 static void
1593 iris_bind_tes_state(struct pipe_context *ctx, void *state)
1594 {
1595 struct iris_context *ice = (struct iris_context *)ctx;
1596
1597 /* Enabling/disabling optional stages requires a URB reconfiguration. */
1598 if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
1599 ice->state.dirty |= IRIS_DIRTY_URB;
1600
1601 bind_state((void *) ctx, state, MESA_SHADER_TESS_EVAL);
1602 }
1603
1604 static void
1605 iris_bind_gs_state(struct pipe_context *ctx, void *state)
1606 {
1607 struct iris_context *ice = (struct iris_context *)ctx;
1608
1609 /* Enabling/disabling optional stages requires a URB reconfiguration. */
1610 if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
1611 ice->state.dirty |= IRIS_DIRTY_URB;
1612
1613 bind_state((void *) ctx, state, MESA_SHADER_GEOMETRY);
1614 }
1615
1616 static void
1617 iris_bind_fs_state(struct pipe_context *ctx, void *state)
1618 {
1619 struct iris_context *ice = (struct iris_context *) ctx;
1620 struct iris_uncompiled_shader *old_ish =
1621 ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
1622 struct iris_uncompiled_shader *new_ish = state;
1623
1624 const unsigned color_bits =
1625 BITFIELD64_BIT(FRAG_RESULT_COLOR) |
1626 BITFIELD64_RANGE(FRAG_RESULT_DATA0, BRW_MAX_DRAW_BUFFERS);
1627
1628 /* Fragment shader outputs influence HasWriteableRT */
1629 if (!old_ish || !new_ish ||
1630 (old_ish->nir->info.outputs_written & color_bits) !=
1631 (new_ish->nir->info.outputs_written & color_bits))
1632 ice->state.dirty |= IRIS_DIRTY_PS_BLEND;
1633
1634 bind_state((void *) ctx, state, MESA_SHADER_FRAGMENT);
1635 }
1636
1637 static void
1638 iris_bind_cs_state(struct pipe_context *ctx, void *state)
1639 {
1640 bind_state((void *) ctx, state, MESA_SHADER_COMPUTE);
1641 }
1642
1643 void
1644 iris_init_program_functions(struct pipe_context *ctx)
1645 {
1646 ctx->create_vs_state = iris_create_vs_state;
1647 ctx->create_tcs_state = iris_create_tcs_state;
1648 ctx->create_tes_state = iris_create_tes_state;
1649 ctx->create_gs_state = iris_create_gs_state;
1650 ctx->create_fs_state = iris_create_fs_state;
1651 ctx->create_compute_state = iris_create_compute_state;
1652
1653 ctx->delete_vs_state = iris_delete_shader_state;
1654 ctx->delete_tcs_state = iris_delete_shader_state;
1655 ctx->delete_tes_state = iris_delete_shader_state;
1656 ctx->delete_gs_state = iris_delete_shader_state;
1657 ctx->delete_fs_state = iris_delete_shader_state;
1658 ctx->delete_compute_state = iris_delete_shader_state;
1659
1660 ctx->bind_vs_state = iris_bind_vs_state;
1661 ctx->bind_tcs_state = iris_bind_tcs_state;
1662 ctx->bind_tes_state = iris_bind_tes_state;
1663 ctx->bind_gs_state = iris_bind_gs_state;
1664 ctx->bind_fs_state = iris_bind_fs_state;
1665 ctx->bind_compute_state = iris_bind_cs_state;
1666 }