iris: Cache assembly shaders in the on-disk shader cache
[mesa.git] / src / gallium / drivers / iris / iris_program.c
1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 /**
24 * @file iris_program.c
25 *
26 * This file contains the driver interface for compiling shaders.
27 *
28 * See iris_program_cache.c for the in-memory program cache where the
29 * compiled shaders are stored.
30 */
31
32 #include <stdio.h>
33 #include <errno.h>
34 #include "pipe/p_defines.h"
35 #include "pipe/p_state.h"
36 #include "pipe/p_context.h"
37 #include "pipe/p_screen.h"
38 #include "util/u_atomic.h"
39 #include "compiler/nir/nir.h"
40 #include "compiler/nir/nir_builder.h"
41 #include "compiler/nir/nir_serialize.h"
42 #include "intel/compiler/brw_compiler.h"
43 #include "intel/compiler/brw_nir.h"
44 #include "iris_context.h"
45 #include "nir/tgsi_to_nir.h"
46
47 #define KEY_INIT_NO_ID(gen) \
48 .tex.swizzles[0 ... MAX_SAMPLERS - 1] = 0x688, \
49 .tex.compressed_multisample_layout_mask = ~0, \
50 .tex.msaa_16 = (gen >= 9 ? ~0 : 0)
51 #define KEY_INIT(gen) .program_string_id = ish->program_id, KEY_INIT_NO_ID(gen)
52
53 static unsigned
54 get_new_program_id(struct iris_screen *screen)
55 {
56 return p_atomic_inc_return(&screen->program_id);
57 }
58
59 static nir_ssa_def *
60 get_aoa_deref_offset(nir_builder *b,
61 nir_deref_instr *deref,
62 unsigned elem_size)
63 {
64 unsigned array_size = elem_size;
65 nir_ssa_def *offset = nir_imm_int(b, 0);
66
67 while (deref->deref_type != nir_deref_type_var) {
68 assert(deref->deref_type == nir_deref_type_array);
69
70 /* This level's element size is the previous level's array size */
71 nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
72 assert(deref->arr.index.ssa);
73 offset = nir_iadd(b, offset,
74 nir_imul(b, index, nir_imm_int(b, array_size)));
75
76 deref = nir_deref_instr_parent(deref);
77 assert(glsl_type_is_array(deref->type));
78 array_size *= glsl_get_length(deref->type);
79 }
80
81 /* Accessing an invalid surface index with the dataport can result in a
82 * hang. According to the spec "if the index used to select an individual
83 * element is negative or greater than or equal to the size of the array,
84 * the results of the operation are undefined but may not lead to
85 * termination" -- which is one of the possible outcomes of the hang.
86 * Clamp the index to prevent access outside of the array bounds.
87 */
88 return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));
89 }
90
91 static void
92 iris_lower_storage_image_derefs(nir_shader *nir)
93 {
94 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
95
96 nir_builder b;
97 nir_builder_init(&b, impl);
98
99 nir_foreach_block(block, impl) {
100 nir_foreach_instr_safe(instr, block) {
101 if (instr->type != nir_instr_type_intrinsic)
102 continue;
103
104 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
105 switch (intrin->intrinsic) {
106 case nir_intrinsic_image_deref_load:
107 case nir_intrinsic_image_deref_store:
108 case nir_intrinsic_image_deref_atomic_add:
109 case nir_intrinsic_image_deref_atomic_min:
110 case nir_intrinsic_image_deref_atomic_max:
111 case nir_intrinsic_image_deref_atomic_and:
112 case nir_intrinsic_image_deref_atomic_or:
113 case nir_intrinsic_image_deref_atomic_xor:
114 case nir_intrinsic_image_deref_atomic_exchange:
115 case nir_intrinsic_image_deref_atomic_comp_swap:
116 case nir_intrinsic_image_deref_size:
117 case nir_intrinsic_image_deref_samples:
118 case nir_intrinsic_image_deref_load_raw_intel:
119 case nir_intrinsic_image_deref_store_raw_intel: {
120 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
121 nir_variable *var = nir_deref_instr_get_variable(deref);
122
123 b.cursor = nir_before_instr(&intrin->instr);
124 nir_ssa_def *index =
125 nir_iadd(&b, nir_imm_int(&b, var->data.driver_location),
126 get_aoa_deref_offset(&b, deref, 1));
127 nir_rewrite_image_intrinsic(intrin, index, false);
128 break;
129 }
130
131 default:
132 break;
133 }
134 }
135 }
136 }
137
138 // XXX: need unify_interfaces() at link time...
139
140 /**
141 * Fix an uncompiled shader's stream output info.
142 *
143 * Core Gallium stores output->register_index as a "slot" number, where
144 * slots are assigned consecutively to all outputs in info->outputs_written.
145 * This naive packing of outputs doesn't work for us - we too have slots,
146 * but the layout is defined by the VUE map, which we won't have until we
147 * compile a specific shader variant. So, we remap these and simply store
148 * VARYING_SLOT_* in our copy's output->register_index fields.
149 *
150 * We also fix up VARYING_SLOT_{LAYER,VIEWPORT,PSIZ} to select the Y/Z/W
151 * components of our VUE header. See brw_vue_map.c for the layout.
152 */
153 static void
154 update_so_info(struct pipe_stream_output_info *so_info,
155 uint64_t outputs_written)
156 {
157 uint8_t reverse_map[64] = {};
158 unsigned slot = 0;
159 while (outputs_written) {
160 reverse_map[slot++] = u_bit_scan64(&outputs_written);
161 }
162
163 for (unsigned i = 0; i < so_info->num_outputs; i++) {
164 struct pipe_stream_output *output = &so_info->output[i];
165
166 /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
167 output->register_index = reverse_map[output->register_index];
168
169 /* The VUE header contains three scalar fields packed together:
170 * - gl_PointSize is stored in VARYING_SLOT_PSIZ.w
171 * - gl_Layer is stored in VARYING_SLOT_PSIZ.y
172 * - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
173 */
174 switch (output->register_index) {
175 case VARYING_SLOT_LAYER:
176 assert(output->num_components == 1);
177 output->register_index = VARYING_SLOT_PSIZ;
178 output->start_component = 1;
179 break;
180 case VARYING_SLOT_VIEWPORT:
181 assert(output->num_components == 1);
182 output->register_index = VARYING_SLOT_PSIZ;
183 output->start_component = 2;
184 break;
185 case VARYING_SLOT_PSIZ:
186 assert(output->num_components == 1);
187 output->start_component = 3;
188 break;
189 }
190
191 //info->outputs_written |= 1ull << output->register_index;
192 }
193 }
194
195 /**
196 * Sets up the starting offsets for the groups of binding table entries
197 * common to all pipeline stages.
198 *
199 * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're
200 * unused but also make sure that addition of small offsets to them will
201 * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
202 */
203 static uint32_t
204 assign_common_binding_table_offsets(const struct gen_device_info *devinfo,
205 const struct nir_shader *nir,
206 struct brw_stage_prog_data *prog_data,
207 uint32_t next_binding_table_offset,
208 unsigned num_system_values,
209 unsigned num_cbufs)
210 {
211 const struct shader_info *info = &nir->info;
212
213 unsigned num_textures = util_last_bit(info->textures_used);
214
215 if (num_textures) {
216 prog_data->binding_table.texture_start = next_binding_table_offset;
217 prog_data->binding_table.gather_texture_start = next_binding_table_offset;
218 next_binding_table_offset += num_textures;
219 } else {
220 prog_data->binding_table.texture_start = 0xd0d0d0d0;
221 prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
222 }
223
224 if (info->num_images) {
225 prog_data->binding_table.image_start = next_binding_table_offset;
226 next_binding_table_offset += info->num_images;
227 } else {
228 prog_data->binding_table.image_start = 0xd0d0d0d0;
229 }
230
231 if (num_cbufs) {
232 //assert(info->num_ubos <= BRW_MAX_UBO);
233 prog_data->binding_table.ubo_start = next_binding_table_offset;
234 next_binding_table_offset += num_cbufs;
235 } else {
236 prog_data->binding_table.ubo_start = 0xd0d0d0d0;
237 }
238
239 if (info->num_ssbos || info->num_abos) {
240 prog_data->binding_table.ssbo_start = next_binding_table_offset;
241 // XXX: see iris_state "wasting 16 binding table slots for ABOs" comment
242 next_binding_table_offset += IRIS_MAX_ABOS + info->num_ssbos;
243 } else {
244 prog_data->binding_table.ssbo_start = 0xd0d0d0d0;
245 }
246
247 prog_data->binding_table.shader_time_start = 0xd0d0d0d0;
248
249 /* Plane 0 is just the regular texture section */
250 prog_data->binding_table.plane_start[0] = prog_data->binding_table.texture_start;
251
252 prog_data->binding_table.plane_start[1] = next_binding_table_offset;
253 next_binding_table_offset += num_textures;
254
255 prog_data->binding_table.plane_start[2] = next_binding_table_offset;
256 next_binding_table_offset += num_textures;
257
258 /* Set the binding table size */
259 prog_data->binding_table.size_bytes = next_binding_table_offset * 4;
260
261 return next_binding_table_offset;
262 }
263
264 static void
265 setup_vec4_image_sysval(uint32_t *sysvals, uint32_t idx,
266 unsigned offset, unsigned n)
267 {
268 assert(offset % sizeof(uint32_t) == 0);
269
270 for (unsigned i = 0; i < n; ++i)
271 sysvals[i] = BRW_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i);
272
273 for (unsigned i = n; i < 4; ++i)
274 sysvals[i] = BRW_PARAM_BUILTIN_ZERO;
275 }
276
277 /**
278 * Associate NIR uniform variables with the prog_data->param[] mechanism
279 * used by the backend. Also, decide which UBOs we'd like to push in an
280 * ideal situation (though the backend can reduce this).
281 */
282 static void
283 iris_setup_uniforms(const struct brw_compiler *compiler,
284 void *mem_ctx,
285 nir_shader *nir,
286 struct brw_stage_prog_data *prog_data,
287 enum brw_param_builtin **out_system_values,
288 unsigned *out_num_system_values,
289 unsigned *out_num_cbufs)
290 {
291 UNUSED const struct gen_device_info *devinfo = compiler->devinfo;
292
293 /* The intel compiler assumes that num_uniforms is in bytes. For
294 * scalar that means 4 bytes per uniform slot.
295 *
296 * Ref: brw_nir_lower_uniforms, type_size_scalar_bytes.
297 */
298 nir->num_uniforms *= 4;
299
300 const unsigned IRIS_MAX_SYSTEM_VALUES =
301 PIPE_MAX_SHADER_IMAGES * BRW_IMAGE_PARAM_SIZE;
302 enum brw_param_builtin *system_values =
303 rzalloc_array(mem_ctx, enum brw_param_builtin, IRIS_MAX_SYSTEM_VALUES);
304 unsigned num_system_values = 0;
305
306 unsigned patch_vert_idx = -1;
307 unsigned ucp_idx[IRIS_MAX_CLIP_PLANES];
308 unsigned img_idx[PIPE_MAX_SHADER_IMAGES];
309 memset(ucp_idx, -1, sizeof(ucp_idx));
310 memset(img_idx, -1, sizeof(img_idx));
311
312 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
313
314 nir_builder b;
315 nir_builder_init(&b, impl);
316
317 b.cursor = nir_before_block(nir_start_block(impl));
318 nir_ssa_def *temp_ubo_name = nir_ssa_undef(&b, 1, 32);
319
320 /* Turn system value intrinsics into uniforms */
321 nir_foreach_block(block, impl) {
322 nir_foreach_instr_safe(instr, block) {
323 if (instr->type != nir_instr_type_intrinsic)
324 continue;
325
326 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
327 nir_ssa_def *offset;
328
329 switch (intrin->intrinsic) {
330 case nir_intrinsic_load_user_clip_plane: {
331 unsigned ucp = nir_intrinsic_ucp_id(intrin);
332
333 if (ucp_idx[ucp] == -1) {
334 ucp_idx[ucp] = num_system_values;
335 num_system_values += 4;
336 }
337
338 for (int i = 0; i < 4; i++) {
339 system_values[ucp_idx[ucp] + i] =
340 BRW_PARAM_BUILTIN_CLIP_PLANE(ucp, i);
341 }
342
343 b.cursor = nir_before_instr(instr);
344 offset = nir_imm_int(&b, ucp_idx[ucp] * sizeof(uint32_t));
345 break;
346 }
347 case nir_intrinsic_load_patch_vertices_in:
348 if (patch_vert_idx == -1)
349 patch_vert_idx = num_system_values++;
350
351 system_values[patch_vert_idx] =
352 BRW_PARAM_BUILTIN_PATCH_VERTICES_IN;
353
354 b.cursor = nir_before_instr(instr);
355 offset = nir_imm_int(&b, patch_vert_idx * sizeof(uint32_t));
356 break;
357 case nir_intrinsic_image_deref_load_param_intel: {
358 assert(devinfo->gen < 9);
359 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
360 nir_variable *var = nir_deref_instr_get_variable(deref);
361
362 /* XXX: var->data.binding is not set properly. We need to run
363 * some form of gl_nir_lower_samplers_as_deref() to get it.
364 * This breaks tests which use more than one image.
365 */
366 if (img_idx[var->data.binding] == -1) {
367 /* GL only allows arrays of arrays of images. */
368 assert(glsl_type_is_image(glsl_without_array(var->type)));
369 unsigned num_images = MAX2(1, glsl_get_aoa_size(var->type));
370
371 for (int i = 0; i < num_images; i++) {
372 const unsigned img = var->data.binding + i;
373
374 img_idx[img] = num_system_values;
375 num_system_values += BRW_IMAGE_PARAM_SIZE;
376
377 uint32_t *img_sv = &system_values[img_idx[img]];
378
379 setup_vec4_image_sysval(
380 img_sv + BRW_IMAGE_PARAM_OFFSET_OFFSET, img,
381 offsetof(struct brw_image_param, offset), 2);
382 setup_vec4_image_sysval(
383 img_sv + BRW_IMAGE_PARAM_SIZE_OFFSET, img,
384 offsetof(struct brw_image_param, size), 3);
385 setup_vec4_image_sysval(
386 img_sv + BRW_IMAGE_PARAM_STRIDE_OFFSET, img,
387 offsetof(struct brw_image_param, stride), 4);
388 setup_vec4_image_sysval(
389 img_sv + BRW_IMAGE_PARAM_TILING_OFFSET, img,
390 offsetof(struct brw_image_param, tiling), 3);
391 setup_vec4_image_sysval(
392 img_sv + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, img,
393 offsetof(struct brw_image_param, swizzling), 2);
394 }
395 }
396
397 b.cursor = nir_before_instr(instr);
398 offset = nir_iadd(&b,
399 get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4),
400 nir_imm_int(&b, img_idx[var->data.binding] * 4 +
401 nir_intrinsic_base(intrin) * 16));
402 break;
403 }
404 default:
405 continue;
406 }
407
408 unsigned comps = nir_intrinsic_dest_components(intrin);
409
410 nir_intrinsic_instr *load =
411 nir_intrinsic_instr_create(nir, nir_intrinsic_load_ubo);
412 load->num_components = comps;
413 load->src[0] = nir_src_for_ssa(temp_ubo_name);
414 load->src[1] = nir_src_for_ssa(offset);
415 nir_ssa_dest_init(&load->instr, &load->dest, comps, 32, NULL);
416 nir_builder_instr_insert(&b, &load->instr);
417 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
418 nir_src_for_ssa(&load->dest.ssa));
419 nir_instr_remove(instr);
420 }
421 }
422
423 nir_validate_shader(nir, "before remapping");
424
425 /* Place the new params at the front of constant buffer 0. */
426 if (num_system_values > 0) {
427 nir->num_uniforms += num_system_values * sizeof(uint32_t);
428
429 system_values = reralloc(mem_ctx, system_values, enum brw_param_builtin,
430 num_system_values);
431
432 nir_foreach_block(block, impl) {
433 nir_foreach_instr_safe(instr, block) {
434 if (instr->type != nir_instr_type_intrinsic)
435 continue;
436
437 nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
438
439 if (load->intrinsic != nir_intrinsic_load_ubo)
440 continue;
441
442 b.cursor = nir_before_instr(instr);
443
444 assert(load->src[0].is_ssa);
445
446 if (load->src[0].ssa == temp_ubo_name) {
447 nir_instr_rewrite_src(instr, &load->src[0],
448 nir_src_for_ssa(nir_imm_int(&b, 0)));
449 } else if (nir_src_as_uint(load->src[0]) == 0) {
450 nir_ssa_def *offset =
451 nir_iadd(&b, load->src[1].ssa,
452 nir_imm_int(&b, 4 * num_system_values));
453 nir_instr_rewrite_src(instr, &load->src[1],
454 nir_src_for_ssa(offset));
455 }
456 }
457 }
458
459 /* We need to fold the new iadds for brw_nir_analyze_ubo_ranges */
460 nir_opt_constant_folding(nir);
461 } else {
462 ralloc_free(system_values);
463 system_values = NULL;
464 }
465
466 nir_validate_shader(nir, "after remap");
467
468 if (nir->info.stage != MESA_SHADER_COMPUTE)
469 brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
470
471 /* We don't use params[], but fs_visitor::nir_setup_uniforms() asserts
472 * about it for compute shaders, so go ahead and make some fake ones
473 * which the backend will dead code eliminate.
474 */
475 prog_data->nr_params = nir->num_uniforms / 4;
476 prog_data->param = rzalloc_array(mem_ctx, uint32_t, prog_data->nr_params);
477
478 /* System values and uniforms are stored in constant buffer 0, the
479 * user-facing UBOs are indexed by one. So if any constant buffer is
480 * needed, the constant buffer 0 will be needed, so account for it.
481 */
482 unsigned num_cbufs = nir->info.num_ubos;
483 if (num_cbufs || num_system_values || nir->num_uniforms)
484 num_cbufs++;
485
486 *out_system_values = system_values;
487 *out_num_system_values = num_system_values;
488 *out_num_cbufs = num_cbufs;
489 }
490
491 static void
492 iris_debug_recompile(struct iris_context *ice,
493 struct shader_info *info,
494 unsigned program_string_id,
495 const void *key)
496 {
497 struct iris_screen *screen = (struct iris_screen *) ice->ctx.screen;
498 const struct brw_compiler *c = screen->compiler;
499
500 if (!info)
501 return;
502
503 c->shader_perf_log(&ice->dbg, "Recompiling %s shader for program %s: %s\n",
504 _mesa_shader_stage_to_string(info->stage),
505 info->name ? info->name : "(no identifier)",
506 info->label ? info->label : "");
507
508 const void *old_key =
509 iris_find_previous_compile(ice, info->stage, program_string_id);
510
511 brw_debug_key_recompile(c, &ice->dbg, info->stage, old_key, key);
512 }
513
514
515 /**
516 * Compile a vertex shader, and upload the assembly.
517 */
518 static struct iris_compiled_shader *
519 iris_compile_vs(struct iris_context *ice,
520 struct iris_uncompiled_shader *ish,
521 const struct brw_vs_prog_key *key)
522 {
523 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
524 const struct brw_compiler *compiler = screen->compiler;
525 const struct gen_device_info *devinfo = &screen->devinfo;
526 void *mem_ctx = ralloc_context(NULL);
527 struct brw_vs_prog_data *vs_prog_data =
528 rzalloc(mem_ctx, struct brw_vs_prog_data);
529 struct brw_vue_prog_data *vue_prog_data = &vs_prog_data->base;
530 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
531 enum brw_param_builtin *system_values;
532 unsigned num_system_values;
533 unsigned num_cbufs;
534
535 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
536
537 if (key->nr_userclip_plane_consts) {
538 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
539 nir_lower_clip_vs(nir, (1 << key->nr_userclip_plane_consts) - 1, true);
540 nir_lower_io_to_temporaries(nir, impl, true, false);
541 nir_lower_global_vars_to_local(nir);
542 nir_lower_vars_to_ssa(nir);
543 nir_shader_gather_info(nir, impl);
544 }
545
546 if (nir->info.name && strncmp(nir->info.name, "ARB", 3) == 0)
547 prog_data->use_alt_mode = true;
548
549 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
550 &num_system_values, &num_cbufs);
551
552 assign_common_binding_table_offsets(devinfo, nir, prog_data, 0,
553 num_system_values, num_cbufs);
554
555 brw_compute_vue_map(devinfo,
556 &vue_prog_data->vue_map, nir->info.outputs_written,
557 nir->info.separate_shader);
558
559 /* Don't tell the backend about our clip plane constants, we've already
560 * lowered them in NIR and we don't want it doing it again.
561 */
562 struct brw_vs_prog_key key_no_ucp = *key;
563 key_no_ucp.nr_userclip_plane_consts = 0;
564
565 char *error_str = NULL;
566 const unsigned *program =
567 brw_compile_vs(compiler, &ice->dbg, mem_ctx, &key_no_ucp, vs_prog_data,
568 nir, -1, &error_str);
569 if (program == NULL) {
570 dbg_printf("Failed to compile vertex shader: %s\n", error_str);
571 ralloc_free(mem_ctx);
572 return false;
573 }
574
575 if (ish->compiled_once) {
576 iris_debug_recompile(ice, &nir->info, key->program_string_id, key);
577 } else {
578 ish->compiled_once = true;
579 }
580
581 uint32_t *so_decls =
582 ice->vtbl.create_so_decl_list(&ish->stream_output,
583 &vue_prog_data->vue_map);
584
585 struct iris_compiled_shader *shader =
586 iris_upload_shader(ice, IRIS_CACHE_VS, sizeof(*key), key, program,
587 prog_data, so_decls, system_values, num_system_values,
588 num_cbufs);
589
590 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
591
592 ralloc_free(mem_ctx);
593 return shader;
594 }
595
596 /**
597 * Update the current vertex shader variant.
598 *
599 * Fill out the key, look in the cache, compile and bind if needed.
600 */
601 static void
602 iris_update_compiled_vs(struct iris_context *ice)
603 {
604 struct iris_uncompiled_shader *ish =
605 ice->shaders.uncompiled[MESA_SHADER_VERTEX];
606 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
607 const struct gen_device_info *devinfo = &screen->devinfo;
608
609 struct brw_vs_prog_key key = { KEY_INIT(devinfo->gen) };
610 ice->vtbl.populate_vs_key(ice, &ish->nir->info, &key);
611
612 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_VS];
613 struct iris_compiled_shader *shader =
614 iris_find_cached_shader(ice, IRIS_CACHE_VS, sizeof(key), &key);
615
616 if (!shader)
617 shader = iris_disk_cache_retrieve(ice, ish, &key, sizeof(key));
618
619 if (!shader)
620 shader = iris_compile_vs(ice, ish, &key);
621
622 if (old != shader) {
623 ice->shaders.prog[IRIS_CACHE_VS] = shader;
624 ice->state.dirty |= IRIS_DIRTY_VS |
625 IRIS_DIRTY_BINDINGS_VS |
626 IRIS_DIRTY_CONSTANTS_VS |
627 IRIS_DIRTY_VF_SGVS;
628 const struct brw_vs_prog_data *vs_prog_data =
629 (void *) shader->prog_data;
630 const bool uses_draw_params = vs_prog_data->uses_firstvertex ||
631 vs_prog_data->uses_baseinstance;
632 const bool uses_derived_draw_params = vs_prog_data->uses_drawid ||
633 vs_prog_data->uses_is_indexed_draw;
634 const bool needs_sgvs_element = uses_draw_params ||
635 vs_prog_data->uses_instanceid ||
636 vs_prog_data->uses_vertexid;
637 bool needs_edge_flag = false;
638 nir_foreach_variable(var, &ish->nir->inputs) {
639 if (var->data.location == VERT_ATTRIB_EDGEFLAG)
640 needs_edge_flag = true;
641 }
642
643 if (ice->state.vs_uses_draw_params != uses_draw_params ||
644 ice->state.vs_uses_derived_draw_params != uses_derived_draw_params ||
645 ice->state.vs_needs_edge_flag != needs_edge_flag) {
646 ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS |
647 IRIS_DIRTY_VERTEX_ELEMENTS;
648 }
649 ice->state.vs_uses_draw_params = uses_draw_params;
650 ice->state.vs_uses_derived_draw_params = uses_derived_draw_params;
651 ice->state.vs_needs_sgvs_element = needs_sgvs_element;
652 ice->state.vs_needs_edge_flag = needs_edge_flag;
653 }
654 }
655
656 /**
657 * Get the shader_info for a given stage, or NULL if the stage is disabled.
658 */
659 const struct shader_info *
660 iris_get_shader_info(const struct iris_context *ice, gl_shader_stage stage)
661 {
662 const struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[stage];
663
664 if (!ish)
665 return NULL;
666
667 const nir_shader *nir = ish->nir;
668 return &nir->info;
669 }
670
671 /**
672 * Get the union of TCS output and TES input slots.
673 *
674 * TCS and TES need to agree on a common URB entry layout. In particular,
675 * the data for all patch vertices is stored in a single URB entry (unlike
676 * GS which has one entry per input vertex). This means that per-vertex
677 * array indexing needs a stride.
678 *
679 * SSO requires locations to match, but doesn't require the number of
680 * outputs/inputs to match (in fact, the TCS often has extra outputs).
681 * So, we need to take the extra step of unifying these on the fly.
682 */
683 static void
684 get_unified_tess_slots(const struct iris_context *ice,
685 uint64_t *per_vertex_slots,
686 uint32_t *per_patch_slots)
687 {
688 const struct shader_info *tcs =
689 iris_get_shader_info(ice, MESA_SHADER_TESS_CTRL);
690 const struct shader_info *tes =
691 iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
692
693 *per_vertex_slots = tes->inputs_read;
694 *per_patch_slots = tes->patch_inputs_read;
695
696 if (tcs) {
697 *per_vertex_slots |= tcs->outputs_written;
698 *per_patch_slots |= tcs->patch_outputs_written;
699 }
700 }
701
702 /**
703 * Compile a tessellation control shader, and upload the assembly.
704 */
705 static struct iris_compiled_shader *
706 iris_compile_tcs(struct iris_context *ice,
707 struct iris_uncompiled_shader *ish,
708 const struct brw_tcs_prog_key *key)
709 {
710 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
711 const struct brw_compiler *compiler = screen->compiler;
712 const struct nir_shader_compiler_options *options =
713 compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].NirOptions;
714 const struct gen_device_info *devinfo = &screen->devinfo;
715 void *mem_ctx = ralloc_context(NULL);
716 struct brw_tcs_prog_data *tcs_prog_data =
717 rzalloc(mem_ctx, struct brw_tcs_prog_data);
718 struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base;
719 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
720 enum brw_param_builtin *system_values = NULL;
721 unsigned num_system_values = 0;
722 unsigned num_cbufs = 0;
723
724 nir_shader *nir;
725
726 if (ish) {
727 nir = nir_shader_clone(mem_ctx, ish->nir);
728
729 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
730 &num_system_values, &num_cbufs);
731 assign_common_binding_table_offsets(devinfo, nir, prog_data, 0,
732 num_system_values, num_cbufs);
733 } else {
734 nir = brw_nir_create_passthrough_tcs(mem_ctx, compiler, options, key);
735
736 /* Reserve space for passing the default tess levels as constants. */
737 num_system_values = 8;
738 system_values =
739 rzalloc_array(mem_ctx, enum brw_param_builtin, num_system_values);
740 prog_data->param = rzalloc_array(mem_ctx, uint32_t, num_system_values);
741 prog_data->nr_params = num_system_values;
742
743 if (key->tes_primitive_mode == GL_QUADS) {
744 for (int i = 0; i < 4; i++)
745 system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
746
747 system_values[3] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
748 system_values[2] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y;
749 } else if (key->tes_primitive_mode == GL_TRIANGLES) {
750 for (int i = 0; i < 3; i++)
751 system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
752
753 system_values[4] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
754 } else {
755 assert(key->tes_primitive_mode == GL_ISOLINES);
756 system_values[7] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_Y;
757 system_values[6] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X;
758 }
759
760 prog_data->ubo_ranges[0].length = 1;
761 }
762
763 char *error_str = NULL;
764 const unsigned *program =
765 brw_compile_tcs(compiler, &ice->dbg, mem_ctx, key, tcs_prog_data, nir,
766 -1, &error_str);
767 if (program == NULL) {
768 dbg_printf("Failed to compile control shader: %s\n", error_str);
769 ralloc_free(mem_ctx);
770 return false;
771 }
772
773 if (ish) {
774 if (ish->compiled_once) {
775 iris_debug_recompile(ice, &nir->info, key->program_string_id, key);
776 } else {
777 ish->compiled_once = true;
778 }
779 }
780
781 struct iris_compiled_shader *shader =
782 iris_upload_shader(ice, IRIS_CACHE_TCS, sizeof(*key), key, program,
783 prog_data, NULL, system_values, num_system_values,
784 num_cbufs);
785
786 if (ish)
787 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
788
789 ralloc_free(mem_ctx);
790 return shader;
791 }
792
793 /**
794 * Update the current tessellation control shader variant.
795 *
796 * Fill out the key, look in the cache, compile and bind if needed.
797 */
798 static void
799 iris_update_compiled_tcs(struct iris_context *ice)
800 {
801 struct iris_uncompiled_shader *tcs =
802 ice->shaders.uncompiled[MESA_SHADER_TESS_CTRL];
803 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
804 const struct gen_device_info *devinfo = &screen->devinfo;
805
806 const struct shader_info *tes_info =
807 iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
808 struct brw_tcs_prog_key key = {
809 KEY_INIT_NO_ID(devinfo->gen),
810 .program_string_id = tcs ? tcs->program_id : 0,
811 .tes_primitive_mode = tes_info->tess.primitive_mode,
812 .input_vertices = ice->state.vertices_per_patch,
813 };
814 get_unified_tess_slots(ice, &key.outputs_written,
815 &key.patch_outputs_written);
816 ice->vtbl.populate_tcs_key(ice, &key);
817
818 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TCS];
819 struct iris_compiled_shader *shader =
820 iris_find_cached_shader(ice, IRIS_CACHE_TCS, sizeof(key), &key);
821
822 if (tcs && !shader)
823 shader = iris_disk_cache_retrieve(ice, tcs, &key, sizeof(key));
824
825 if (!shader)
826 shader = iris_compile_tcs(ice, tcs, &key);
827
828 if (old != shader) {
829 ice->shaders.prog[IRIS_CACHE_TCS] = shader;
830 ice->state.dirty |= IRIS_DIRTY_TCS |
831 IRIS_DIRTY_BINDINGS_TCS |
832 IRIS_DIRTY_CONSTANTS_TCS;
833 }
834 }
835
836 /**
837 * Compile a tessellation evaluation shader, and upload the assembly.
838 */
839 static struct iris_compiled_shader *
840 iris_compile_tes(struct iris_context *ice,
841 struct iris_uncompiled_shader *ish,
842 const struct brw_tes_prog_key *key)
843 {
844 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
845 const struct brw_compiler *compiler = screen->compiler;
846 const struct gen_device_info *devinfo = &screen->devinfo;
847 void *mem_ctx = ralloc_context(NULL);
848 struct brw_tes_prog_data *tes_prog_data =
849 rzalloc(mem_ctx, struct brw_tes_prog_data);
850 struct brw_vue_prog_data *vue_prog_data = &tes_prog_data->base;
851 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
852 enum brw_param_builtin *system_values;
853 unsigned num_system_values;
854 unsigned num_cbufs;
855
856 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
857
858 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
859 &num_system_values, &num_cbufs);
860
861 assign_common_binding_table_offsets(devinfo, nir, prog_data, 0,
862 num_system_values, num_cbufs);
863
864 struct brw_vue_map input_vue_map;
865 brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,
866 key->patch_inputs_read);
867
868 char *error_str = NULL;
869 const unsigned *program =
870 brw_compile_tes(compiler, &ice->dbg, mem_ctx, key, &input_vue_map,
871 tes_prog_data, nir, NULL, -1, &error_str);
872 if (program == NULL) {
873 dbg_printf("Failed to compile evaluation shader: %s\n", error_str);
874 ralloc_free(mem_ctx);
875 return false;
876 }
877
878 if (ish->compiled_once) {
879 iris_debug_recompile(ice, &nir->info, key->program_string_id, key);
880 } else {
881 ish->compiled_once = true;
882 }
883
884 uint32_t *so_decls =
885 ice->vtbl.create_so_decl_list(&ish->stream_output,
886 &vue_prog_data->vue_map);
887
888
889 struct iris_compiled_shader *shader =
890 iris_upload_shader(ice, IRIS_CACHE_TES, sizeof(*key), key, program,
891 prog_data, so_decls, system_values, num_system_values,
892 num_cbufs);
893
894 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
895
896 ralloc_free(mem_ctx);
897 return shader;
898 }
899
900 /**
901 * Update the current tessellation evaluation shader variant.
902 *
903 * Fill out the key, look in the cache, compile and bind if needed.
904 */
905 static void
906 iris_update_compiled_tes(struct iris_context *ice)
907 {
908 struct iris_uncompiled_shader *ish =
909 ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
910 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
911 const struct gen_device_info *devinfo = &screen->devinfo;
912
913 struct brw_tes_prog_key key = { KEY_INIT(devinfo->gen) };
914 get_unified_tess_slots(ice, &key.inputs_read, &key.patch_inputs_read);
915 ice->vtbl.populate_tes_key(ice, &key);
916
917 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TES];
918 struct iris_compiled_shader *shader =
919 iris_find_cached_shader(ice, IRIS_CACHE_TES, sizeof(key), &key);
920
921 if (!shader)
922 shader = iris_disk_cache_retrieve(ice, ish, &key, sizeof(key));
923
924 if (!shader)
925 shader = iris_compile_tes(ice, ish, &key);
926
927 if (old != shader) {
928 ice->shaders.prog[IRIS_CACHE_TES] = shader;
929 ice->state.dirty |= IRIS_DIRTY_TES |
930 IRIS_DIRTY_BINDINGS_TES |
931 IRIS_DIRTY_CONSTANTS_TES;
932 }
933
934 /* TODO: Could compare and avoid flagging this. */
935 const struct shader_info *tes_info = &ish->nir->info;
936 if (tes_info->system_values_read & (1ull << SYSTEM_VALUE_VERTICES_IN)) {
937 ice->state.dirty |= IRIS_DIRTY_CONSTANTS_TES;
938 ice->state.shaders[MESA_SHADER_TESS_EVAL].cbuf0_needs_upload = true;
939 }
940 }
941
942 /**
943 * Compile a geometry shader, and upload the assembly.
944 */
945 static struct iris_compiled_shader *
946 iris_compile_gs(struct iris_context *ice,
947 struct iris_uncompiled_shader *ish,
948 const struct brw_gs_prog_key *key)
949 {
950 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
951 const struct brw_compiler *compiler = screen->compiler;
952 const struct gen_device_info *devinfo = &screen->devinfo;
953 void *mem_ctx = ralloc_context(NULL);
954 struct brw_gs_prog_data *gs_prog_data =
955 rzalloc(mem_ctx, struct brw_gs_prog_data);
956 struct brw_vue_prog_data *vue_prog_data = &gs_prog_data->base;
957 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
958 enum brw_param_builtin *system_values;
959 unsigned num_system_values;
960 unsigned num_cbufs;
961
962 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
963
964 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
965 &num_system_values, &num_cbufs);
966
967 assign_common_binding_table_offsets(devinfo, nir, prog_data, 0,
968 num_system_values, num_cbufs);
969
970 brw_compute_vue_map(devinfo,
971 &vue_prog_data->vue_map, nir->info.outputs_written,
972 nir->info.separate_shader);
973
974 char *error_str = NULL;
975 const unsigned *program =
976 brw_compile_gs(compiler, &ice->dbg, mem_ctx, key, gs_prog_data, nir,
977 NULL, -1, &error_str);
978 if (program == NULL) {
979 dbg_printf("Failed to compile geometry shader: %s\n", error_str);
980 ralloc_free(mem_ctx);
981 return false;
982 }
983
984 if (ish->compiled_once) {
985 iris_debug_recompile(ice, &nir->info, key->program_string_id, key);
986 } else {
987 ish->compiled_once = true;
988 }
989
990 uint32_t *so_decls =
991 ice->vtbl.create_so_decl_list(&ish->stream_output,
992 &vue_prog_data->vue_map);
993
994 struct iris_compiled_shader *shader =
995 iris_upload_shader(ice, IRIS_CACHE_GS, sizeof(*key), key, program,
996 prog_data, so_decls, system_values, num_system_values,
997 num_cbufs);
998
999 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1000
1001 ralloc_free(mem_ctx);
1002 return shader;
1003 }
1004
1005 /**
1006 * Update the current geometry shader variant.
1007 *
1008 * Fill out the key, look in the cache, compile and bind if needed.
1009 */
1010 static void
1011 iris_update_compiled_gs(struct iris_context *ice)
1012 {
1013 struct iris_uncompiled_shader *ish =
1014 ice->shaders.uncompiled[MESA_SHADER_GEOMETRY];
1015 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_GS];
1016 struct iris_compiled_shader *shader = NULL;
1017
1018 if (ish) {
1019 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1020 const struct gen_device_info *devinfo = &screen->devinfo;
1021 struct brw_gs_prog_key key = { KEY_INIT(devinfo->gen) };
1022 ice->vtbl.populate_gs_key(ice, &key);
1023
1024 shader =
1025 iris_find_cached_shader(ice, IRIS_CACHE_GS, sizeof(key), &key);
1026
1027 if (!shader)
1028 shader = iris_disk_cache_retrieve(ice, ish, &key, sizeof(key));
1029
1030 if (!shader)
1031 shader = iris_compile_gs(ice, ish, &key);
1032 }
1033
1034 if (old != shader) {
1035 ice->shaders.prog[IRIS_CACHE_GS] = shader;
1036 ice->state.dirty |= IRIS_DIRTY_GS |
1037 IRIS_DIRTY_BINDINGS_GS |
1038 IRIS_DIRTY_CONSTANTS_GS;
1039 }
1040 }
1041
1042 /**
1043 * Compile a fragment (pixel) shader, and upload the assembly.
1044 */
1045 static struct iris_compiled_shader *
1046 iris_compile_fs(struct iris_context *ice,
1047 struct iris_uncompiled_shader *ish,
1048 const struct brw_wm_prog_key *key,
1049 struct brw_vue_map *vue_map)
1050 {
1051 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1052 const struct brw_compiler *compiler = screen->compiler;
1053 const struct gen_device_info *devinfo = &screen->devinfo;
1054 void *mem_ctx = ralloc_context(NULL);
1055 struct brw_wm_prog_data *fs_prog_data =
1056 rzalloc(mem_ctx, struct brw_wm_prog_data);
1057 struct brw_stage_prog_data *prog_data = &fs_prog_data->base;
1058 enum brw_param_builtin *system_values;
1059 unsigned num_system_values;
1060 unsigned num_cbufs;
1061
1062 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1063
1064 if (nir->info.name && strncmp(nir->info.name, "ARB", 3) == 0)
1065 prog_data->use_alt_mode = true;
1066
1067 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
1068 &num_system_values, &num_cbufs);
1069
1070 assign_common_binding_table_offsets(devinfo, nir, prog_data,
1071 MAX2(key->nr_color_regions, 1),
1072 num_system_values, num_cbufs);
1073 char *error_str = NULL;
1074 const unsigned *program =
1075 brw_compile_fs(compiler, &ice->dbg, mem_ctx, key, fs_prog_data,
1076 nir, NULL, -1, -1, -1, true, false, vue_map, &error_str);
1077 if (program == NULL) {
1078 dbg_printf("Failed to compile fragment shader: %s\n", error_str);
1079 ralloc_free(mem_ctx);
1080 return false;
1081 }
1082
1083 if (ish->compiled_once) {
1084 iris_debug_recompile(ice, &nir->info, key->program_string_id, key);
1085 } else {
1086 ish->compiled_once = true;
1087 }
1088
1089 struct iris_compiled_shader *shader =
1090 iris_upload_shader(ice, IRIS_CACHE_FS, sizeof(*key), key, program,
1091 prog_data, NULL, system_values, num_system_values,
1092 num_cbufs);
1093
1094 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1095
1096 ralloc_free(mem_ctx);
1097 return shader;
1098 }
1099
1100 /**
1101 * Update the current fragment shader variant.
1102 *
1103 * Fill out the key, look in the cache, compile and bind if needed.
1104 */
1105 static void
1106 iris_update_compiled_fs(struct iris_context *ice)
1107 {
1108 struct iris_uncompiled_shader *ish =
1109 ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
1110 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1111 const struct gen_device_info *devinfo = &screen->devinfo;
1112 struct brw_wm_prog_key key = { KEY_INIT(devinfo->gen) };
1113 ice->vtbl.populate_fs_key(ice, &key);
1114
1115 if (ish->nos & (1ull << IRIS_NOS_LAST_VUE_MAP))
1116 key.input_slots_valid = ice->shaders.last_vue_map->slots_valid;
1117
1118 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_FS];
1119 struct iris_compiled_shader *shader =
1120 iris_find_cached_shader(ice, IRIS_CACHE_FS, sizeof(key), &key);
1121
1122 if (!shader)
1123 shader = iris_disk_cache_retrieve(ice, ish, &key, sizeof(key));
1124
1125 if (!shader)
1126 shader = iris_compile_fs(ice, ish, &key, ice->shaders.last_vue_map);
1127
1128 if (old != shader) {
1129 // XXX: only need to flag CLIP if barycentric has NONPERSPECTIVE
1130 // toggles. might be able to avoid flagging SBE too.
1131 ice->shaders.prog[IRIS_CACHE_FS] = shader;
1132 ice->state.dirty |= IRIS_DIRTY_FS |
1133 IRIS_DIRTY_BINDINGS_FS |
1134 IRIS_DIRTY_CONSTANTS_FS |
1135 IRIS_DIRTY_WM |
1136 IRIS_DIRTY_CLIP |
1137 IRIS_DIRTY_SBE;
1138 }
1139 }
1140
1141 /**
1142 * Get the compiled shader for the last enabled geometry stage.
1143 *
1144 * This stage is the one which will feed stream output and the rasterizer.
1145 */
1146 static gl_shader_stage
1147 last_vue_stage(struct iris_context *ice)
1148 {
1149 if (ice->shaders.prog[MESA_SHADER_GEOMETRY])
1150 return MESA_SHADER_GEOMETRY;
1151
1152 if (ice->shaders.prog[MESA_SHADER_TESS_EVAL])
1153 return MESA_SHADER_TESS_EVAL;
1154
1155 return MESA_SHADER_VERTEX;
1156 }
1157
1158 /**
1159 * Update the last enabled stage's VUE map.
1160 *
1161 * When the shader feeding the rasterizer's output interface changes, we
1162 * need to re-emit various packets.
1163 */
1164 static void
1165 update_last_vue_map(struct iris_context *ice,
1166 struct brw_stage_prog_data *prog_data)
1167 {
1168 struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
1169 struct brw_vue_map *vue_map = &vue_prog_data->vue_map;
1170 struct brw_vue_map *old_map = ice->shaders.last_vue_map;
1171 const uint64_t changed_slots =
1172 (old_map ? old_map->slots_valid : 0ull) ^ vue_map->slots_valid;
1173
1174 if (changed_slots & VARYING_BIT_VIEWPORT) {
1175 // XXX: could use ctx->Const.MaxViewports for old API efficiency
1176 ice->state.num_viewports =
1177 (vue_map->slots_valid & VARYING_BIT_VIEWPORT) ? IRIS_MAX_VIEWPORTS : 1;
1178 ice->state.dirty |= IRIS_DIRTY_CLIP |
1179 IRIS_DIRTY_SF_CL_VIEWPORT |
1180 IRIS_DIRTY_CC_VIEWPORT |
1181 IRIS_DIRTY_SCISSOR_RECT |
1182 IRIS_DIRTY_UNCOMPILED_FS |
1183 ice->state.dirty_for_nos[IRIS_NOS_LAST_VUE_MAP];
1184 // XXX: CC_VIEWPORT?
1185 }
1186
1187 if (changed_slots || (old_map && old_map->separate != vue_map->separate)) {
1188 ice->state.dirty |= IRIS_DIRTY_SBE;
1189 }
1190
1191 ice->shaders.last_vue_map = &vue_prog_data->vue_map;
1192 }
1193
1194 /**
1195 * Get the prog_data for a given stage, or NULL if the stage is disabled.
1196 */
1197 static struct brw_vue_prog_data *
1198 get_vue_prog_data(struct iris_context *ice, gl_shader_stage stage)
1199 {
1200 if (!ice->shaders.prog[stage])
1201 return NULL;
1202
1203 return (void *) ice->shaders.prog[stage]->prog_data;
1204 }
1205
1206 // XXX: iris_compiled_shaders are space-leaking :(
1207 // XXX: do remember to unbind them if deleting them.
1208
1209 /**
1210 * Update the current shader variants for the given state.
1211 *
1212 * This should be called on every draw call to ensure that the correct
1213 * shaders are bound. It will also flag any dirty state triggered by
1214 * swapping out those shaders.
1215 */
1216 void
1217 iris_update_compiled_shaders(struct iris_context *ice)
1218 {
1219 const uint64_t dirty = ice->state.dirty;
1220
1221 struct brw_vue_prog_data *old_prog_datas[4];
1222 if (!(dirty & IRIS_DIRTY_URB)) {
1223 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++)
1224 old_prog_datas[i] = get_vue_prog_data(ice, i);
1225 }
1226
1227 if (dirty & (IRIS_DIRTY_UNCOMPILED_TCS | IRIS_DIRTY_UNCOMPILED_TES)) {
1228 struct iris_uncompiled_shader *tes =
1229 ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
1230 if (tes) {
1231 iris_update_compiled_tcs(ice);
1232 iris_update_compiled_tes(ice);
1233 } else {
1234 ice->shaders.prog[IRIS_CACHE_TCS] = NULL;
1235 ice->shaders.prog[IRIS_CACHE_TES] = NULL;
1236 ice->state.dirty |=
1237 IRIS_DIRTY_TCS | IRIS_DIRTY_TES |
1238 IRIS_DIRTY_BINDINGS_TCS | IRIS_DIRTY_BINDINGS_TES |
1239 IRIS_DIRTY_CONSTANTS_TCS | IRIS_DIRTY_CONSTANTS_TES;
1240 }
1241 }
1242
1243 if (dirty & IRIS_DIRTY_UNCOMPILED_VS)
1244 iris_update_compiled_vs(ice);
1245 if (dirty & IRIS_DIRTY_UNCOMPILED_GS)
1246 iris_update_compiled_gs(ice);
1247
1248 if (dirty & (IRIS_DIRTY_UNCOMPILED_GS | IRIS_DIRTY_UNCOMPILED_TES)) {
1249 const struct iris_compiled_shader *gs =
1250 ice->shaders.prog[MESA_SHADER_GEOMETRY];
1251 const struct iris_compiled_shader *tes =
1252 ice->shaders.prog[MESA_SHADER_TESS_EVAL];
1253
1254 bool points_or_lines = false;
1255
1256 if (gs) {
1257 const struct brw_gs_prog_data *gs_prog_data = (void *) gs->prog_data;
1258 points_or_lines =
1259 gs_prog_data->output_topology == _3DPRIM_POINTLIST ||
1260 gs_prog_data->output_topology == _3DPRIM_LINESTRIP;
1261 } else if (tes) {
1262 const struct brw_tes_prog_data *tes_data = (void *) tes->prog_data;
1263 points_or_lines =
1264 tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_LINE ||
1265 tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_POINT;
1266 }
1267
1268 if (ice->shaders.output_topology_is_points_or_lines != points_or_lines) {
1269 /* Outbound to XY Clip enables */
1270 ice->shaders.output_topology_is_points_or_lines = points_or_lines;
1271 ice->state.dirty |= IRIS_DIRTY_CLIP;
1272 }
1273 }
1274
1275 gl_shader_stage last_stage = last_vue_stage(ice);
1276 struct iris_compiled_shader *shader = ice->shaders.prog[last_stage];
1277 struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[last_stage];
1278 update_last_vue_map(ice, shader->prog_data);
1279 if (ice->state.streamout != shader->streamout) {
1280 ice->state.streamout = shader->streamout;
1281 ice->state.dirty |= IRIS_DIRTY_SO_DECL_LIST | IRIS_DIRTY_STREAMOUT;
1282 }
1283
1284 if (ice->state.streamout_active) {
1285 for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
1286 struct iris_stream_output_target *so =
1287 (void *) ice->state.so_target[i];
1288 if (so)
1289 so->stride = ish->stream_output.stride[i];
1290 }
1291 }
1292
1293 if (dirty & IRIS_DIRTY_UNCOMPILED_FS)
1294 iris_update_compiled_fs(ice);
1295
1296 /* Changing shader interfaces may require a URB configuration. */
1297 if (!(dirty & IRIS_DIRTY_URB)) {
1298 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
1299 struct brw_vue_prog_data *old = old_prog_datas[i];
1300 struct brw_vue_prog_data *new = get_vue_prog_data(ice, i);
1301 if (!!old != !!new ||
1302 (new && new->urb_entry_size != old->urb_entry_size)) {
1303 ice->state.dirty |= IRIS_DIRTY_URB;
1304 break;
1305 }
1306 }
1307 }
1308 }
1309
1310 static struct iris_compiled_shader *
1311 iris_compile_cs(struct iris_context *ice,
1312 struct iris_uncompiled_shader *ish,
1313 const struct brw_cs_prog_key *key)
1314 {
1315 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1316 const struct brw_compiler *compiler = screen->compiler;
1317 const struct gen_device_info *devinfo = &screen->devinfo;
1318 void *mem_ctx = ralloc_context(NULL);
1319 struct brw_cs_prog_data *cs_prog_data =
1320 rzalloc(mem_ctx, struct brw_cs_prog_data);
1321 struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
1322 enum brw_param_builtin *system_values;
1323 unsigned num_system_values;
1324 unsigned num_cbufs;
1325
1326 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1327
1328 cs_prog_data->binding_table.work_groups_start = 0;
1329
1330 prog_data->total_shared = nir->info.cs.shared_size;
1331
1332 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
1333 &num_system_values, &num_cbufs);
1334
1335 assign_common_binding_table_offsets(devinfo, nir, prog_data, 1,
1336 num_system_values, num_cbufs);
1337
1338 char *error_str = NULL;
1339 const unsigned *program =
1340 brw_compile_cs(compiler, &ice->dbg, mem_ctx, key, cs_prog_data,
1341 nir, -1, &error_str);
1342 if (program == NULL) {
1343 dbg_printf("Failed to compile compute shader: %s\n", error_str);
1344 ralloc_free(mem_ctx);
1345 return false;
1346 }
1347
1348 if (ish->compiled_once) {
1349 iris_debug_recompile(ice, &nir->info, key->program_string_id, key);
1350 } else {
1351 ish->compiled_once = true;
1352 }
1353
1354 struct iris_compiled_shader *shader =
1355 iris_upload_shader(ice, IRIS_CACHE_CS, sizeof(*key), key, program,
1356 prog_data, NULL, system_values, num_system_values,
1357 num_cbufs);
1358
1359 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1360
1361 ralloc_free(mem_ctx);
1362 return shader;
1363 }
1364
1365 void
1366 iris_update_compiled_compute_shader(struct iris_context *ice)
1367 {
1368 struct iris_uncompiled_shader *ish =
1369 ice->shaders.uncompiled[MESA_SHADER_COMPUTE];
1370
1371 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1372 const struct gen_device_info *devinfo = &screen->devinfo;
1373 struct brw_cs_prog_key key = { KEY_INIT(devinfo->gen) };
1374 ice->vtbl.populate_cs_key(ice, &key);
1375
1376 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_CS];
1377 struct iris_compiled_shader *shader =
1378 iris_find_cached_shader(ice, IRIS_CACHE_CS, sizeof(key), &key);
1379
1380 if (!shader)
1381 shader = iris_disk_cache_retrieve(ice, ish, &key, sizeof(key));
1382
1383 if (!shader)
1384 shader = iris_compile_cs(ice, ish, &key);
1385
1386 if (old != shader) {
1387 ice->shaders.prog[IRIS_CACHE_CS] = shader;
1388 ice->state.dirty |= IRIS_DIRTY_CS |
1389 IRIS_DIRTY_BINDINGS_CS |
1390 IRIS_DIRTY_CONSTANTS_CS;
1391 }
1392 }
1393
1394 void
1395 iris_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data,
1396 uint32_t *dst)
1397 {
1398 assert(cs_prog_data->push.total.size > 0);
1399 assert(cs_prog_data->push.cross_thread.size == 0);
1400 assert(cs_prog_data->push.per_thread.dwords == 1);
1401 assert(cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID);
1402 for (unsigned t = 0; t < cs_prog_data->threads; t++)
1403 dst[8 * t] = t;
1404 }
1405
1406 /**
1407 * Allocate scratch BOs as needed for the given per-thread size and stage.
1408 */
1409 struct iris_bo *
1410 iris_get_scratch_space(struct iris_context *ice,
1411 unsigned per_thread_scratch,
1412 gl_shader_stage stage)
1413 {
1414 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1415 struct iris_bufmgr *bufmgr = screen->bufmgr;
1416 const struct gen_device_info *devinfo = &screen->devinfo;
1417
1418 unsigned encoded_size = ffs(per_thread_scratch) - 11;
1419 assert(encoded_size < (1 << 16));
1420
1421 struct iris_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage];
1422
1423 /* The documentation for 3DSTATE_PS "Scratch Space Base Pointer" says:
1424 *
1425 * "Scratch Space per slice is computed based on 4 sub-slices. SW
1426 * must allocate scratch space enough so that each slice has 4
1427 * slices allowed."
1428 *
1429 * According to the other driver team, this applies to compute shaders
1430 * as well. This is not currently documented at all.
1431 *
1432 * This hack is no longer necessary on Gen11+.
1433 */
1434 unsigned subslice_total = screen->subslice_total;
1435 if (devinfo->gen < 11)
1436 subslice_total = 4 * devinfo->num_slices;
1437 assert(subslice_total >= screen->subslice_total);
1438
1439 if (!*bop) {
1440 unsigned scratch_ids_per_subslice = devinfo->max_cs_threads;
1441 uint32_t max_threads[] = {
1442 [MESA_SHADER_VERTEX] = devinfo->max_vs_threads,
1443 [MESA_SHADER_TESS_CTRL] = devinfo->max_tcs_threads,
1444 [MESA_SHADER_TESS_EVAL] = devinfo->max_tes_threads,
1445 [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads,
1446 [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads,
1447 [MESA_SHADER_COMPUTE] = scratch_ids_per_subslice * subslice_total,
1448 };
1449
1450 uint32_t size = per_thread_scratch * max_threads[stage];
1451
1452 *bop = iris_bo_alloc(bufmgr, "scratch", size, IRIS_MEMZONE_SHADER);
1453 }
1454
1455 return *bop;
1456 }
1457
1458 /* ------------------------------------------------------------------- */
1459
1460 /**
1461 * The pipe->create_[stage]_state() driver hooks.
1462 *
1463 * Performs basic NIR preprocessing, records any state dependencies, and
1464 * returns an iris_uncompiled_shader as the Gallium CSO.
1465 *
1466 * Actual shader compilation to assembly happens later, at first use.
1467 */
1468 static void *
1469 iris_create_uncompiled_shader(struct pipe_context *ctx,
1470 nir_shader *nir,
1471 const struct pipe_stream_output_info *so_info)
1472 {
1473 struct iris_screen *screen = (struct iris_screen *)ctx->screen;
1474 const struct gen_device_info *devinfo = &screen->devinfo;
1475
1476 struct iris_uncompiled_shader *ish =
1477 calloc(1, sizeof(struct iris_uncompiled_shader));
1478 if (!ish)
1479 return NULL;
1480
1481 nir = brw_preprocess_nir(screen->compiler, nir, NULL);
1482
1483 NIR_PASS_V(nir, brw_nir_lower_image_load_store, devinfo);
1484 NIR_PASS_V(nir, iris_lower_storage_image_derefs);
1485
1486 ish->program_id = get_new_program_id(screen);
1487 ish->nir = nir;
1488 if (so_info) {
1489 memcpy(&ish->stream_output, so_info, sizeof(*so_info));
1490 update_so_info(&ish->stream_output, nir->info.outputs_written);
1491 }
1492
1493 if (screen->disk_cache) {
1494 /* Serialize the NIR to a binary blob that we can hash for the disk
1495 * cache. First, drop unnecessary information (like variable names)
1496 * so the serialized NIR is smaller, and also to let us detect more
1497 * isomorphic shaders when hashing, increasing cache hits.
1498 *
1499 * We skip this step when not using the disk cache, as variable names
1500 * are useful for inspecting and debugging shaders.
1501 */
1502 nir_strip(nir);
1503
1504 struct blob blob;
1505 blob_init(&blob);
1506 nir_serialize(&blob, ish->nir);
1507 ish->ir_cache_binary = malloc(blob.size);
1508 ish->ir_cache_binary_size = blob.size;
1509 memcpy(ish->ir_cache_binary, blob.data, blob.size);
1510 blob_finish(&blob);
1511 }
1512
1513 return ish;
1514 }
1515
1516 static struct iris_uncompiled_shader *
1517 iris_create_shader_state(struct pipe_context *ctx,
1518 const struct pipe_shader_state *state)
1519 {
1520 struct nir_shader *nir;
1521
1522 if (state->type == PIPE_SHADER_IR_TGSI)
1523 nir = tgsi_to_nir(state->tokens, ctx->screen);
1524 else
1525 nir = state->ir.nir;
1526
1527 return iris_create_uncompiled_shader(ctx, nir, &state->stream_output);
1528 }
1529
1530 static void *
1531 iris_create_vs_state(struct pipe_context *ctx,
1532 const struct pipe_shader_state *state)
1533 {
1534 struct iris_context *ice = (void *) ctx;
1535 struct iris_screen *screen = (void *) ctx->screen;
1536 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
1537
1538 /* User clip planes */
1539 if (ish->nir->info.clip_distance_array_size == 0)
1540 ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
1541
1542 if (screen->precompile) {
1543 const struct gen_device_info *devinfo = &screen->devinfo;
1544 struct brw_vs_prog_key key = { KEY_INIT(devinfo->gen) };
1545
1546 if (!iris_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
1547 iris_compile_vs(ice, ish, &key);
1548 }
1549
1550 return ish;
1551 }
1552
1553 static void *
1554 iris_create_tcs_state(struct pipe_context *ctx,
1555 const struct pipe_shader_state *state)
1556 {
1557 struct iris_context *ice = (void *) ctx;
1558 struct iris_screen *screen = (void *) ctx->screen;
1559 const struct brw_compiler *compiler = screen->compiler;
1560 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
1561 struct shader_info *info = &ish->nir->info;
1562
1563 // XXX: NOS?
1564
1565 if (screen->precompile) {
1566 const unsigned _GL_TRIANGLES = 0x0004;
1567 const struct gen_device_info *devinfo = &screen->devinfo;
1568 struct brw_tcs_prog_key key = {
1569 KEY_INIT(devinfo->gen),
1570 // XXX: make sure the linker fills this out from the TES...
1571 .tes_primitive_mode =
1572 info->tess.primitive_mode ? info->tess.primitive_mode
1573 : _GL_TRIANGLES,
1574 .outputs_written = info->outputs_written,
1575 .patch_outputs_written = info->patch_outputs_written,
1576 };
1577
1578 /* 8_PATCH mode needs the key to contain the input patch dimensionality.
1579 * We don't have that information, so we randomly guess that the input
1580 * and output patches are the same size. This is a bad guess, but we
1581 * can't do much better.
1582 */
1583 if (compiler->use_tcs_8_patch)
1584 key.input_vertices = info->tess.tcs_vertices_out;
1585
1586 if (!iris_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
1587 iris_compile_tcs(ice, ish, &key);
1588 }
1589
1590 return ish;
1591 }
1592
1593 static void *
1594 iris_create_tes_state(struct pipe_context *ctx,
1595 const struct pipe_shader_state *state)
1596 {
1597 struct iris_context *ice = (void *) ctx;
1598 struct iris_screen *screen = (void *) ctx->screen;
1599 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
1600 struct shader_info *info = &ish->nir->info;
1601
1602 // XXX: NOS?
1603
1604 if (screen->precompile) {
1605 const struct gen_device_info *devinfo = &screen->devinfo;
1606 struct brw_tes_prog_key key = {
1607 KEY_INIT(devinfo->gen),
1608 // XXX: not ideal, need TCS output/TES input unification
1609 .inputs_read = info->inputs_read,
1610 .patch_inputs_read = info->patch_inputs_read,
1611 };
1612
1613 if (!iris_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
1614 iris_compile_tes(ice, ish, &key);
1615 }
1616
1617 return ish;
1618 }
1619
1620 static void *
1621 iris_create_gs_state(struct pipe_context *ctx,
1622 const struct pipe_shader_state *state)
1623 {
1624 struct iris_context *ice = (void *) ctx;
1625 struct iris_screen *screen = (void *) ctx->screen;
1626 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
1627
1628 // XXX: NOS?
1629
1630 if (screen->precompile) {
1631 const struct gen_device_info *devinfo = &screen->devinfo;
1632 struct brw_gs_prog_key key = { KEY_INIT(devinfo->gen) };
1633
1634 if (!iris_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
1635 iris_compile_gs(ice, ish, &key);
1636 }
1637
1638 return ish;
1639 }
1640
1641 static void *
1642 iris_create_fs_state(struct pipe_context *ctx,
1643 const struct pipe_shader_state *state)
1644 {
1645 struct iris_context *ice = (void *) ctx;
1646 struct iris_screen *screen = (void *) ctx->screen;
1647 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
1648 struct shader_info *info = &ish->nir->info;
1649
1650 ish->nos |= (1ull << IRIS_NOS_FRAMEBUFFER) |
1651 (1ull << IRIS_NOS_DEPTH_STENCIL_ALPHA) |
1652 (1ull << IRIS_NOS_RASTERIZER) |
1653 (1ull << IRIS_NOS_BLEND);
1654
1655 /* The program key needs the VUE map if there are > 16 inputs */
1656 if (util_bitcount64(ish->nir->info.inputs_read &
1657 BRW_FS_VARYING_INPUT_MASK) > 16) {
1658 ish->nos |= (1ull << IRIS_NOS_LAST_VUE_MAP);
1659 }
1660
1661 if (screen->precompile) {
1662 const uint64_t color_outputs = info->outputs_written &
1663 ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
1664 BITFIELD64_BIT(FRAG_RESULT_STENCIL) |
1665 BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK));
1666
1667 bool can_rearrange_varyings =
1668 util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) <= 16;
1669
1670 const struct gen_device_info *devinfo = &screen->devinfo;
1671 struct brw_wm_prog_key key = {
1672 KEY_INIT(devinfo->gen),
1673 .nr_color_regions = util_bitcount(color_outputs),
1674 .coherent_fb_fetch = true,
1675 .input_slots_valid =
1676 can_rearrange_varyings ? 0 : info->inputs_read | VARYING_BIT_POS,
1677 };
1678
1679 if (!iris_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
1680 iris_compile_fs(ice, ish, &key, NULL);
1681 }
1682
1683 return ish;
1684 }
1685
1686 static void *
1687 iris_create_compute_state(struct pipe_context *ctx,
1688 const struct pipe_compute_state *state)
1689 {
1690 assert(state->ir_type == PIPE_SHADER_IR_NIR);
1691
1692 struct iris_context *ice = (void *) ctx;
1693 struct iris_screen *screen = (void *) ctx->screen;
1694 struct iris_uncompiled_shader *ish =
1695 iris_create_uncompiled_shader(ctx, (void *) state->prog, NULL);
1696
1697 // XXX: disallow more than 64KB of shared variables
1698
1699 if (screen->precompile) {
1700 const struct gen_device_info *devinfo = &screen->devinfo;
1701 struct brw_cs_prog_key key = { KEY_INIT(devinfo->gen) };
1702
1703 if (!iris_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
1704 iris_compile_cs(ice, ish, &key);
1705 }
1706
1707 return ish;
1708 }
1709
1710 /**
1711 * The pipe->delete_[stage]_state() driver hooks.
1712 *
1713 * Frees the iris_uncompiled_shader.
1714 */
1715 static void
1716 iris_delete_shader_state(struct pipe_context *ctx, void *state, gl_shader_stage stage)
1717 {
1718 struct iris_uncompiled_shader *ish = state;
1719 struct iris_context *ice = (void *) ctx;
1720
1721 if (ice->shaders.uncompiled[stage] == ish) {
1722 ice->shaders.uncompiled[stage] = NULL;
1723 ice->state.dirty |= IRIS_DIRTY_UNCOMPILED_VS << stage;
1724 }
1725
1726 ralloc_free(ish->nir);
1727 free(ish);
1728 }
1729
1730 static void
1731 iris_delete_vs_state(struct pipe_context *ctx, void *state)
1732 {
1733 iris_delete_shader_state(ctx, state, MESA_SHADER_VERTEX);
1734 }
1735
1736 static void
1737 iris_delete_tcs_state(struct pipe_context *ctx, void *state)
1738 {
1739 iris_delete_shader_state(ctx, state, MESA_SHADER_TESS_CTRL);
1740 }
1741
1742 static void
1743 iris_delete_tes_state(struct pipe_context *ctx, void *state)
1744 {
1745 iris_delete_shader_state(ctx, state, MESA_SHADER_TESS_EVAL);
1746 }
1747
1748 static void
1749 iris_delete_gs_state(struct pipe_context *ctx, void *state)
1750 {
1751 iris_delete_shader_state(ctx, state, MESA_SHADER_GEOMETRY);
1752 }
1753
1754 static void
1755 iris_delete_fs_state(struct pipe_context *ctx, void *state)
1756 {
1757 iris_delete_shader_state(ctx, state, MESA_SHADER_FRAGMENT);
1758 }
1759
1760 static void
1761 iris_delete_cs_state(struct pipe_context *ctx, void *state)
1762 {
1763 iris_delete_shader_state(ctx, state, MESA_SHADER_COMPUTE);
1764 }
1765
1766 /**
1767 * The pipe->bind_[stage]_state() driver hook.
1768 *
1769 * Binds an uncompiled shader as the current one for a particular stage.
1770 * Updates dirty tracking to account for the shader's NOS.
1771 */
1772 static void
1773 bind_state(struct iris_context *ice,
1774 struct iris_uncompiled_shader *ish,
1775 gl_shader_stage stage)
1776 {
1777 uint64_t dirty_bit = IRIS_DIRTY_UNCOMPILED_VS << stage;
1778 const uint64_t nos = ish ? ish->nos : 0;
1779
1780 const struct shader_info *old_info = iris_get_shader_info(ice, stage);
1781 const struct shader_info *new_info = ish ? &ish->nir->info : NULL;
1782
1783 if ((old_info ? util_last_bit(old_info->textures_used) : 0) !=
1784 (new_info ? util_last_bit(new_info->textures_used) : 0)) {
1785 ice->state.dirty |= IRIS_DIRTY_SAMPLER_STATES_VS << stage;
1786 }
1787
1788 ice->shaders.uncompiled[stage] = ish;
1789 ice->state.dirty |= dirty_bit;
1790
1791 /* Record that CSOs need to mark IRIS_DIRTY_UNCOMPILED_XS when they change
1792 * (or that they no longer need to do so).
1793 */
1794 for (int i = 0; i < IRIS_NOS_COUNT; i++) {
1795 if (nos & (1 << i))
1796 ice->state.dirty_for_nos[i] |= dirty_bit;
1797 else
1798 ice->state.dirty_for_nos[i] &= ~dirty_bit;
1799 }
1800 }
1801
1802 static void
1803 iris_bind_vs_state(struct pipe_context *ctx, void *state)
1804 {
1805 bind_state((void *) ctx, state, MESA_SHADER_VERTEX);
1806 }
1807
1808 static void
1809 iris_bind_tcs_state(struct pipe_context *ctx, void *state)
1810 {
1811 bind_state((void *) ctx, state, MESA_SHADER_TESS_CTRL);
1812 }
1813
1814 static void
1815 iris_bind_tes_state(struct pipe_context *ctx, void *state)
1816 {
1817 struct iris_context *ice = (struct iris_context *)ctx;
1818
1819 /* Enabling/disabling optional stages requires a URB reconfiguration. */
1820 if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
1821 ice->state.dirty |= IRIS_DIRTY_URB;
1822
1823 bind_state((void *) ctx, state, MESA_SHADER_TESS_EVAL);
1824 }
1825
1826 static void
1827 iris_bind_gs_state(struct pipe_context *ctx, void *state)
1828 {
1829 struct iris_context *ice = (struct iris_context *)ctx;
1830
1831 /* Enabling/disabling optional stages requires a URB reconfiguration. */
1832 if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
1833 ice->state.dirty |= IRIS_DIRTY_URB;
1834
1835 bind_state((void *) ctx, state, MESA_SHADER_GEOMETRY);
1836 }
1837
1838 static void
1839 iris_bind_fs_state(struct pipe_context *ctx, void *state)
1840 {
1841 struct iris_context *ice = (struct iris_context *) ctx;
1842 struct iris_uncompiled_shader *old_ish =
1843 ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
1844 struct iris_uncompiled_shader *new_ish = state;
1845
1846 const unsigned color_bits =
1847 BITFIELD64_BIT(FRAG_RESULT_COLOR) |
1848 BITFIELD64_RANGE(FRAG_RESULT_DATA0, BRW_MAX_DRAW_BUFFERS);
1849
1850 /* Fragment shader outputs influence HasWriteableRT */
1851 if (!old_ish || !new_ish ||
1852 (old_ish->nir->info.outputs_written & color_bits) !=
1853 (new_ish->nir->info.outputs_written & color_bits))
1854 ice->state.dirty |= IRIS_DIRTY_PS_BLEND;
1855
1856 bind_state((void *) ctx, state, MESA_SHADER_FRAGMENT);
1857 }
1858
1859 static void
1860 iris_bind_cs_state(struct pipe_context *ctx, void *state)
1861 {
1862 bind_state((void *) ctx, state, MESA_SHADER_COMPUTE);
1863 }
1864
1865 void
1866 iris_init_program_functions(struct pipe_context *ctx)
1867 {
1868 ctx->create_vs_state = iris_create_vs_state;
1869 ctx->create_tcs_state = iris_create_tcs_state;
1870 ctx->create_tes_state = iris_create_tes_state;
1871 ctx->create_gs_state = iris_create_gs_state;
1872 ctx->create_fs_state = iris_create_fs_state;
1873 ctx->create_compute_state = iris_create_compute_state;
1874
1875 ctx->delete_vs_state = iris_delete_vs_state;
1876 ctx->delete_tcs_state = iris_delete_tcs_state;
1877 ctx->delete_tes_state = iris_delete_tes_state;
1878 ctx->delete_gs_state = iris_delete_gs_state;
1879 ctx->delete_fs_state = iris_delete_fs_state;
1880 ctx->delete_compute_state = iris_delete_cs_state;
1881
1882 ctx->bind_vs_state = iris_bind_vs_state;
1883 ctx->bind_tcs_state = iris_bind_tcs_state;
1884 ctx->bind_tes_state = iris_bind_tes_state;
1885 ctx->bind_gs_state = iris_bind_gs_state;
1886 ctx->bind_fs_state = iris_bind_fs_state;
1887 ctx->bind_compute_state = iris_bind_cs_state;
1888 }