iris: Move upload_ubo_ssbo_surf_state to iris_program.c
[mesa.git] / src / gallium / drivers / iris / iris_program.c
1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 /**
24 * @file iris_program.c
25 *
26 * This file contains the driver interface for compiling shaders.
27 *
28 * See iris_program_cache.c for the in-memory program cache where the
29 * compiled shaders are stored.
30 */
31
32 #include <stdio.h>
33 #include <errno.h>
34 #include "pipe/p_defines.h"
35 #include "pipe/p_state.h"
36 #include "pipe/p_context.h"
37 #include "pipe/p_screen.h"
38 #include "util/u_atomic.h"
39 #include "util/u_upload_mgr.h"
40 #include "compiler/nir/nir.h"
41 #include "compiler/nir/nir_builder.h"
42 #include "compiler/nir/nir_serialize.h"
43 #include "intel/compiler/brw_compiler.h"
44 #include "intel/compiler/brw_nir.h"
45 #include "iris_context.h"
46 #include "nir/tgsi_to_nir.h"
47
48 #define KEY_INIT_NO_ID(gen) \
49 .tex.swizzles[0 ... MAX_SAMPLERS - 1] = 0x688, \
50 .tex.compressed_multisample_layout_mask = ~0, \
51 .tex.msaa_16 = (gen >= 9 ? ~0 : 0)
52 #define KEY_INIT(gen) .program_string_id = ish->program_id, KEY_INIT_NO_ID(gen)
53
54 static unsigned
55 get_new_program_id(struct iris_screen *screen)
56 {
57 return p_atomic_inc_return(&screen->program_id);
58 }
59
60 static void *
61 upload_state(struct u_upload_mgr *uploader,
62 struct iris_state_ref *ref,
63 unsigned size,
64 unsigned alignment)
65 {
66 void *p = NULL;
67 u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p);
68 return p;
69 }
70
71 void
72 iris_upload_ubo_ssbo_surf_state(struct iris_context *ice,
73 struct pipe_shader_buffer *buf,
74 struct iris_state_ref *surf_state,
75 bool ssbo)
76 {
77 struct pipe_context *ctx = &ice->ctx;
78 struct iris_screen *screen = (struct iris_screen *) ctx->screen;
79
80 // XXX: these are not retained forever, use a separate uploader?
81 void *map =
82 upload_state(ice->state.surface_uploader, surf_state,
83 screen->isl_dev.ss.size, 64);
84 if (!unlikely(map)) {
85 surf_state->res = NULL;
86 return;
87 }
88
89 struct iris_resource *res = (void *) buf->buffer;
90 struct iris_bo *surf_bo = iris_resource_bo(surf_state->res);
91 surf_state->offset += iris_bo_offset_from_base_address(surf_bo);
92
93 isl_buffer_fill_state(&screen->isl_dev, map,
94 .address = res->bo->gtt_offset + res->offset +
95 buf->buffer_offset,
96 .size_B = buf->buffer_size - res->offset,
97 .format = ssbo ? ISL_FORMAT_RAW
98 : ISL_FORMAT_R32G32B32A32_FLOAT,
99 .swizzle = ISL_SWIZZLE_IDENTITY,
100 .stride_B = 1,
101 .mocs = ice->vtbl.mocs(res->bo));
102 }
103
104 static nir_ssa_def *
105 get_aoa_deref_offset(nir_builder *b,
106 nir_deref_instr *deref,
107 unsigned elem_size)
108 {
109 unsigned array_size = elem_size;
110 nir_ssa_def *offset = nir_imm_int(b, 0);
111
112 while (deref->deref_type != nir_deref_type_var) {
113 assert(deref->deref_type == nir_deref_type_array);
114
115 /* This level's element size is the previous level's array size */
116 nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
117 assert(deref->arr.index.ssa);
118 offset = nir_iadd(b, offset,
119 nir_imul(b, index, nir_imm_int(b, array_size)));
120
121 deref = nir_deref_instr_parent(deref);
122 assert(glsl_type_is_array(deref->type));
123 array_size *= glsl_get_length(deref->type);
124 }
125
126 /* Accessing an invalid surface index with the dataport can result in a
127 * hang. According to the spec "if the index used to select an individual
128 * element is negative or greater than or equal to the size of the array,
129 * the results of the operation are undefined but may not lead to
130 * termination" -- which is one of the possible outcomes of the hang.
131 * Clamp the index to prevent access outside of the array bounds.
132 */
133 return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));
134 }
135
136 static void
137 iris_lower_storage_image_derefs(nir_shader *nir)
138 {
139 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
140
141 nir_builder b;
142 nir_builder_init(&b, impl);
143
144 nir_foreach_block(block, impl) {
145 nir_foreach_instr_safe(instr, block) {
146 if (instr->type != nir_instr_type_intrinsic)
147 continue;
148
149 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
150 switch (intrin->intrinsic) {
151 case nir_intrinsic_image_deref_load:
152 case nir_intrinsic_image_deref_store:
153 case nir_intrinsic_image_deref_atomic_add:
154 case nir_intrinsic_image_deref_atomic_min:
155 case nir_intrinsic_image_deref_atomic_max:
156 case nir_intrinsic_image_deref_atomic_and:
157 case nir_intrinsic_image_deref_atomic_or:
158 case nir_intrinsic_image_deref_atomic_xor:
159 case nir_intrinsic_image_deref_atomic_exchange:
160 case nir_intrinsic_image_deref_atomic_comp_swap:
161 case nir_intrinsic_image_deref_size:
162 case nir_intrinsic_image_deref_samples:
163 case nir_intrinsic_image_deref_load_raw_intel:
164 case nir_intrinsic_image_deref_store_raw_intel: {
165 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
166 nir_variable *var = nir_deref_instr_get_variable(deref);
167
168 b.cursor = nir_before_instr(&intrin->instr);
169 nir_ssa_def *index =
170 nir_iadd(&b, nir_imm_int(&b, var->data.driver_location),
171 get_aoa_deref_offset(&b, deref, 1));
172 nir_rewrite_image_intrinsic(intrin, index, false);
173 break;
174 }
175
176 default:
177 break;
178 }
179 }
180 }
181 }
182
183 // XXX: need unify_interfaces() at link time...
184
185 /**
186 * Fix an uncompiled shader's stream output info.
187 *
188 * Core Gallium stores output->register_index as a "slot" number, where
189 * slots are assigned consecutively to all outputs in info->outputs_written.
190 * This naive packing of outputs doesn't work for us - we too have slots,
191 * but the layout is defined by the VUE map, which we won't have until we
192 * compile a specific shader variant. So, we remap these and simply store
193 * VARYING_SLOT_* in our copy's output->register_index fields.
194 *
195 * We also fix up VARYING_SLOT_{LAYER,VIEWPORT,PSIZ} to select the Y/Z/W
196 * components of our VUE header. See brw_vue_map.c for the layout.
197 */
198 static void
199 update_so_info(struct pipe_stream_output_info *so_info,
200 uint64_t outputs_written)
201 {
202 uint8_t reverse_map[64] = {};
203 unsigned slot = 0;
204 while (outputs_written) {
205 reverse_map[slot++] = u_bit_scan64(&outputs_written);
206 }
207
208 for (unsigned i = 0; i < so_info->num_outputs; i++) {
209 struct pipe_stream_output *output = &so_info->output[i];
210
211 /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
212 output->register_index = reverse_map[output->register_index];
213
214 /* The VUE header contains three scalar fields packed together:
215 * - gl_PointSize is stored in VARYING_SLOT_PSIZ.w
216 * - gl_Layer is stored in VARYING_SLOT_PSIZ.y
217 * - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
218 */
219 switch (output->register_index) {
220 case VARYING_SLOT_LAYER:
221 assert(output->num_components == 1);
222 output->register_index = VARYING_SLOT_PSIZ;
223 output->start_component = 1;
224 break;
225 case VARYING_SLOT_VIEWPORT:
226 assert(output->num_components == 1);
227 output->register_index = VARYING_SLOT_PSIZ;
228 output->start_component = 2;
229 break;
230 case VARYING_SLOT_PSIZ:
231 assert(output->num_components == 1);
232 output->start_component = 3;
233 break;
234 }
235
236 //info->outputs_written |= 1ull << output->register_index;
237 }
238 }
239
240 /**
241 * Sets up the starting offsets for the groups of binding table entries
242 * common to all pipeline stages.
243 *
244 * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're
245 * unused but also make sure that addition of small offsets to them will
246 * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
247 */
248 static uint32_t
249 assign_common_binding_table_offsets(const struct gen_device_info *devinfo,
250 const struct nir_shader *nir,
251 struct brw_stage_prog_data *prog_data,
252 uint32_t next_binding_table_offset,
253 unsigned num_system_values,
254 unsigned num_cbufs)
255 {
256 const struct shader_info *info = &nir->info;
257
258 unsigned num_textures = util_last_bit(info->textures_used);
259
260 if (num_textures) {
261 prog_data->binding_table.texture_start = next_binding_table_offset;
262 prog_data->binding_table.gather_texture_start = next_binding_table_offset;
263 next_binding_table_offset += num_textures;
264 } else {
265 prog_data->binding_table.texture_start = 0xd0d0d0d0;
266 prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
267 }
268
269 if (info->num_images) {
270 prog_data->binding_table.image_start = next_binding_table_offset;
271 next_binding_table_offset += info->num_images;
272 } else {
273 prog_data->binding_table.image_start = 0xd0d0d0d0;
274 }
275
276 if (num_cbufs) {
277 //assert(info->num_ubos <= BRW_MAX_UBO);
278 prog_data->binding_table.ubo_start = next_binding_table_offset;
279 next_binding_table_offset += num_cbufs;
280 } else {
281 prog_data->binding_table.ubo_start = 0xd0d0d0d0;
282 }
283
284 if (info->num_ssbos || info->num_abos) {
285 prog_data->binding_table.ssbo_start = next_binding_table_offset;
286 // XXX: see iris_state "wasting 16 binding table slots for ABOs" comment
287 next_binding_table_offset += IRIS_MAX_ABOS + info->num_ssbos;
288 } else {
289 prog_data->binding_table.ssbo_start = 0xd0d0d0d0;
290 }
291
292 prog_data->binding_table.shader_time_start = 0xd0d0d0d0;
293
294 /* Plane 0 is just the regular texture section */
295 prog_data->binding_table.plane_start[0] = prog_data->binding_table.texture_start;
296
297 prog_data->binding_table.plane_start[1] = next_binding_table_offset;
298 next_binding_table_offset += num_textures;
299
300 prog_data->binding_table.plane_start[2] = next_binding_table_offset;
301 next_binding_table_offset += num_textures;
302
303 /* Set the binding table size */
304 prog_data->binding_table.size_bytes = next_binding_table_offset * 4;
305
306 return next_binding_table_offset;
307 }
308
309 static void
310 setup_vec4_image_sysval(uint32_t *sysvals, uint32_t idx,
311 unsigned offset, unsigned n)
312 {
313 assert(offset % sizeof(uint32_t) == 0);
314
315 for (unsigned i = 0; i < n; ++i)
316 sysvals[i] = BRW_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i);
317
318 for (unsigned i = n; i < 4; ++i)
319 sysvals[i] = BRW_PARAM_BUILTIN_ZERO;
320 }
321
322 /**
323 * Associate NIR uniform variables with the prog_data->param[] mechanism
324 * used by the backend. Also, decide which UBOs we'd like to push in an
325 * ideal situation (though the backend can reduce this).
326 */
327 static void
328 iris_setup_uniforms(const struct brw_compiler *compiler,
329 void *mem_ctx,
330 nir_shader *nir,
331 struct brw_stage_prog_data *prog_data,
332 enum brw_param_builtin **out_system_values,
333 unsigned *out_num_system_values,
334 unsigned *out_num_cbufs)
335 {
336 UNUSED const struct gen_device_info *devinfo = compiler->devinfo;
337
338 /* The intel compiler assumes that num_uniforms is in bytes. For
339 * scalar that means 4 bytes per uniform slot.
340 *
341 * Ref: brw_nir_lower_uniforms, type_size_scalar_bytes.
342 */
343 nir->num_uniforms *= 4;
344
345 const unsigned IRIS_MAX_SYSTEM_VALUES =
346 PIPE_MAX_SHADER_IMAGES * BRW_IMAGE_PARAM_SIZE;
347 enum brw_param_builtin *system_values =
348 rzalloc_array(mem_ctx, enum brw_param_builtin, IRIS_MAX_SYSTEM_VALUES);
349 unsigned num_system_values = 0;
350
351 unsigned patch_vert_idx = -1;
352 unsigned ucp_idx[IRIS_MAX_CLIP_PLANES];
353 unsigned img_idx[PIPE_MAX_SHADER_IMAGES];
354 memset(ucp_idx, -1, sizeof(ucp_idx));
355 memset(img_idx, -1, sizeof(img_idx));
356
357 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
358
359 nir_builder b;
360 nir_builder_init(&b, impl);
361
362 b.cursor = nir_before_block(nir_start_block(impl));
363 nir_ssa_def *temp_ubo_name = nir_ssa_undef(&b, 1, 32);
364
365 /* Turn system value intrinsics into uniforms */
366 nir_foreach_block(block, impl) {
367 nir_foreach_instr_safe(instr, block) {
368 if (instr->type != nir_instr_type_intrinsic)
369 continue;
370
371 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
372 nir_ssa_def *offset;
373
374 switch (intrin->intrinsic) {
375 case nir_intrinsic_load_user_clip_plane: {
376 unsigned ucp = nir_intrinsic_ucp_id(intrin);
377
378 if (ucp_idx[ucp] == -1) {
379 ucp_idx[ucp] = num_system_values;
380 num_system_values += 4;
381 }
382
383 for (int i = 0; i < 4; i++) {
384 system_values[ucp_idx[ucp] + i] =
385 BRW_PARAM_BUILTIN_CLIP_PLANE(ucp, i);
386 }
387
388 b.cursor = nir_before_instr(instr);
389 offset = nir_imm_int(&b, ucp_idx[ucp] * sizeof(uint32_t));
390 break;
391 }
392 case nir_intrinsic_load_patch_vertices_in:
393 if (patch_vert_idx == -1)
394 patch_vert_idx = num_system_values++;
395
396 system_values[patch_vert_idx] =
397 BRW_PARAM_BUILTIN_PATCH_VERTICES_IN;
398
399 b.cursor = nir_before_instr(instr);
400 offset = nir_imm_int(&b, patch_vert_idx * sizeof(uint32_t));
401 break;
402 case nir_intrinsic_image_deref_load_param_intel: {
403 assert(devinfo->gen < 9);
404 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
405 nir_variable *var = nir_deref_instr_get_variable(deref);
406
407 /* XXX: var->data.binding is not set properly. We need to run
408 * some form of gl_nir_lower_samplers_as_deref() to get it.
409 * This breaks tests which use more than one image.
410 */
411 if (img_idx[var->data.binding] == -1) {
412 /* GL only allows arrays of arrays of images. */
413 assert(glsl_type_is_image(glsl_without_array(var->type)));
414 unsigned num_images = MAX2(1, glsl_get_aoa_size(var->type));
415
416 for (int i = 0; i < num_images; i++) {
417 const unsigned img = var->data.binding + i;
418
419 img_idx[img] = num_system_values;
420 num_system_values += BRW_IMAGE_PARAM_SIZE;
421
422 uint32_t *img_sv = &system_values[img_idx[img]];
423
424 setup_vec4_image_sysval(
425 img_sv + BRW_IMAGE_PARAM_OFFSET_OFFSET, img,
426 offsetof(struct brw_image_param, offset), 2);
427 setup_vec4_image_sysval(
428 img_sv + BRW_IMAGE_PARAM_SIZE_OFFSET, img,
429 offsetof(struct brw_image_param, size), 3);
430 setup_vec4_image_sysval(
431 img_sv + BRW_IMAGE_PARAM_STRIDE_OFFSET, img,
432 offsetof(struct brw_image_param, stride), 4);
433 setup_vec4_image_sysval(
434 img_sv + BRW_IMAGE_PARAM_TILING_OFFSET, img,
435 offsetof(struct brw_image_param, tiling), 3);
436 setup_vec4_image_sysval(
437 img_sv + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, img,
438 offsetof(struct brw_image_param, swizzling), 2);
439 }
440 }
441
442 b.cursor = nir_before_instr(instr);
443 offset = nir_iadd(&b,
444 get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4),
445 nir_imm_int(&b, img_idx[var->data.binding] * 4 +
446 nir_intrinsic_base(intrin) * 16));
447 break;
448 }
449 default:
450 continue;
451 }
452
453 unsigned comps = nir_intrinsic_dest_components(intrin);
454
455 nir_intrinsic_instr *load =
456 nir_intrinsic_instr_create(nir, nir_intrinsic_load_ubo);
457 load->num_components = comps;
458 load->src[0] = nir_src_for_ssa(temp_ubo_name);
459 load->src[1] = nir_src_for_ssa(offset);
460 nir_ssa_dest_init(&load->instr, &load->dest, comps, 32, NULL);
461 nir_builder_instr_insert(&b, &load->instr);
462 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
463 nir_src_for_ssa(&load->dest.ssa));
464 nir_instr_remove(instr);
465 }
466 }
467
468 nir_validate_shader(nir, "before remapping");
469
470 /* Place the new params at the front of constant buffer 0. */
471 if (num_system_values > 0) {
472 nir->num_uniforms += num_system_values * sizeof(uint32_t);
473
474 system_values = reralloc(mem_ctx, system_values, enum brw_param_builtin,
475 num_system_values);
476
477 nir_foreach_block(block, impl) {
478 nir_foreach_instr_safe(instr, block) {
479 if (instr->type != nir_instr_type_intrinsic)
480 continue;
481
482 nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
483
484 if (load->intrinsic != nir_intrinsic_load_ubo)
485 continue;
486
487 b.cursor = nir_before_instr(instr);
488
489 assert(load->src[0].is_ssa);
490
491 if (load->src[0].ssa == temp_ubo_name) {
492 nir_instr_rewrite_src(instr, &load->src[0],
493 nir_src_for_ssa(nir_imm_int(&b, 0)));
494 } else if (nir_src_as_uint(load->src[0]) == 0) {
495 nir_ssa_def *offset =
496 nir_iadd(&b, load->src[1].ssa,
497 nir_imm_int(&b, 4 * num_system_values));
498 nir_instr_rewrite_src(instr, &load->src[1],
499 nir_src_for_ssa(offset));
500 }
501 }
502 }
503
504 /* We need to fold the new iadds for brw_nir_analyze_ubo_ranges */
505 nir_opt_constant_folding(nir);
506 } else {
507 ralloc_free(system_values);
508 system_values = NULL;
509 }
510
511 nir_validate_shader(nir, "after remap");
512
513 if (nir->info.stage != MESA_SHADER_COMPUTE)
514 brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
515
516 /* We don't use params[], but fs_visitor::nir_setup_uniforms() asserts
517 * about it for compute shaders, so go ahead and make some fake ones
518 * which the backend will dead code eliminate.
519 */
520 prog_data->nr_params = nir->num_uniforms / 4;
521 prog_data->param = rzalloc_array(mem_ctx, uint32_t, prog_data->nr_params);
522
523 /* System values and uniforms are stored in constant buffer 0, the
524 * user-facing UBOs are indexed by one. So if any constant buffer is
525 * needed, the constant buffer 0 will be needed, so account for it.
526 */
527 unsigned num_cbufs = nir->info.num_ubos;
528 if (num_cbufs || num_system_values || nir->num_uniforms)
529 num_cbufs++;
530
531 *out_system_values = system_values;
532 *out_num_system_values = num_system_values;
533 *out_num_cbufs = num_cbufs;
534 }
535
536 static void
537 iris_debug_recompile(struct iris_context *ice,
538 struct shader_info *info,
539 unsigned program_string_id,
540 const void *key)
541 {
542 struct iris_screen *screen = (struct iris_screen *) ice->ctx.screen;
543 const struct brw_compiler *c = screen->compiler;
544
545 if (!info)
546 return;
547
548 c->shader_perf_log(&ice->dbg, "Recompiling %s shader for program %s: %s\n",
549 _mesa_shader_stage_to_string(info->stage),
550 info->name ? info->name : "(no identifier)",
551 info->label ? info->label : "");
552
553 const void *old_key =
554 iris_find_previous_compile(ice, info->stage, program_string_id);
555
556 brw_debug_key_recompile(c, &ice->dbg, info->stage, old_key, key);
557 }
558
559
560 /**
561 * Compile a vertex shader, and upload the assembly.
562 */
563 static struct iris_compiled_shader *
564 iris_compile_vs(struct iris_context *ice,
565 struct iris_uncompiled_shader *ish,
566 const struct brw_vs_prog_key *key)
567 {
568 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
569 const struct brw_compiler *compiler = screen->compiler;
570 const struct gen_device_info *devinfo = &screen->devinfo;
571 void *mem_ctx = ralloc_context(NULL);
572 struct brw_vs_prog_data *vs_prog_data =
573 rzalloc(mem_ctx, struct brw_vs_prog_data);
574 struct brw_vue_prog_data *vue_prog_data = &vs_prog_data->base;
575 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
576 enum brw_param_builtin *system_values;
577 unsigned num_system_values;
578 unsigned num_cbufs;
579
580 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
581
582 if (key->nr_userclip_plane_consts) {
583 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
584 nir_lower_clip_vs(nir, (1 << key->nr_userclip_plane_consts) - 1, true);
585 nir_lower_io_to_temporaries(nir, impl, true, false);
586 nir_lower_global_vars_to_local(nir);
587 nir_lower_vars_to_ssa(nir);
588 nir_shader_gather_info(nir, impl);
589 }
590
591 prog_data->use_alt_mode = ish->use_alt_mode;
592
593 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
594 &num_system_values, &num_cbufs);
595
596 assign_common_binding_table_offsets(devinfo, nir, prog_data, 0,
597 num_system_values, num_cbufs);
598
599 brw_compute_vue_map(devinfo,
600 &vue_prog_data->vue_map, nir->info.outputs_written,
601 nir->info.separate_shader);
602
603 /* Don't tell the backend about our clip plane constants, we've already
604 * lowered them in NIR and we don't want it doing it again.
605 */
606 struct brw_vs_prog_key key_no_ucp = *key;
607 key_no_ucp.nr_userclip_plane_consts = 0;
608
609 char *error_str = NULL;
610 const unsigned *program =
611 brw_compile_vs(compiler, &ice->dbg, mem_ctx, &key_no_ucp, vs_prog_data,
612 nir, -1, &error_str);
613 if (program == NULL) {
614 dbg_printf("Failed to compile vertex shader: %s\n", error_str);
615 ralloc_free(mem_ctx);
616 return false;
617 }
618
619 if (ish->compiled_once) {
620 iris_debug_recompile(ice, &nir->info, key->program_string_id, key);
621 } else {
622 ish->compiled_once = true;
623 }
624
625 uint32_t *so_decls =
626 ice->vtbl.create_so_decl_list(&ish->stream_output,
627 &vue_prog_data->vue_map);
628
629 struct iris_compiled_shader *shader =
630 iris_upload_shader(ice, IRIS_CACHE_VS, sizeof(*key), key, program,
631 prog_data, so_decls, system_values, num_system_values,
632 num_cbufs);
633
634 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
635
636 ralloc_free(mem_ctx);
637 return shader;
638 }
639
640 /**
641 * Update the current vertex shader variant.
642 *
643 * Fill out the key, look in the cache, compile and bind if needed.
644 */
645 static void
646 iris_update_compiled_vs(struct iris_context *ice)
647 {
648 struct iris_uncompiled_shader *ish =
649 ice->shaders.uncompiled[MESA_SHADER_VERTEX];
650 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
651 const struct gen_device_info *devinfo = &screen->devinfo;
652
653 struct brw_vs_prog_key key = { KEY_INIT(devinfo->gen) };
654 ice->vtbl.populate_vs_key(ice, &ish->nir->info, &key);
655
656 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_VS];
657 struct iris_compiled_shader *shader =
658 iris_find_cached_shader(ice, IRIS_CACHE_VS, sizeof(key), &key);
659
660 if (!shader)
661 shader = iris_disk_cache_retrieve(ice, ish, &key, sizeof(key));
662
663 if (!shader)
664 shader = iris_compile_vs(ice, ish, &key);
665
666 if (old != shader) {
667 ice->shaders.prog[IRIS_CACHE_VS] = shader;
668 ice->state.dirty |= IRIS_DIRTY_VS |
669 IRIS_DIRTY_BINDINGS_VS |
670 IRIS_DIRTY_CONSTANTS_VS |
671 IRIS_DIRTY_VF_SGVS;
672 const struct brw_vs_prog_data *vs_prog_data =
673 (void *) shader->prog_data;
674 const bool uses_draw_params = vs_prog_data->uses_firstvertex ||
675 vs_prog_data->uses_baseinstance;
676 const bool uses_derived_draw_params = vs_prog_data->uses_drawid ||
677 vs_prog_data->uses_is_indexed_draw;
678 const bool needs_sgvs_element = uses_draw_params ||
679 vs_prog_data->uses_instanceid ||
680 vs_prog_data->uses_vertexid;
681 bool needs_edge_flag = false;
682 nir_foreach_variable(var, &ish->nir->inputs) {
683 if (var->data.location == VERT_ATTRIB_EDGEFLAG)
684 needs_edge_flag = true;
685 }
686
687 if (ice->state.vs_uses_draw_params != uses_draw_params ||
688 ice->state.vs_uses_derived_draw_params != uses_derived_draw_params ||
689 ice->state.vs_needs_edge_flag != needs_edge_flag) {
690 ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS |
691 IRIS_DIRTY_VERTEX_ELEMENTS;
692 }
693 ice->state.vs_uses_draw_params = uses_draw_params;
694 ice->state.vs_uses_derived_draw_params = uses_derived_draw_params;
695 ice->state.vs_needs_sgvs_element = needs_sgvs_element;
696 ice->state.vs_needs_edge_flag = needs_edge_flag;
697 }
698 }
699
700 /**
701 * Get the shader_info for a given stage, or NULL if the stage is disabled.
702 */
703 const struct shader_info *
704 iris_get_shader_info(const struct iris_context *ice, gl_shader_stage stage)
705 {
706 const struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[stage];
707
708 if (!ish)
709 return NULL;
710
711 const nir_shader *nir = ish->nir;
712 return &nir->info;
713 }
714
715 /**
716 * Get the union of TCS output and TES input slots.
717 *
718 * TCS and TES need to agree on a common URB entry layout. In particular,
719 * the data for all patch vertices is stored in a single URB entry (unlike
720 * GS which has one entry per input vertex). This means that per-vertex
721 * array indexing needs a stride.
722 *
723 * SSO requires locations to match, but doesn't require the number of
724 * outputs/inputs to match (in fact, the TCS often has extra outputs).
725 * So, we need to take the extra step of unifying these on the fly.
726 */
727 static void
728 get_unified_tess_slots(const struct iris_context *ice,
729 uint64_t *per_vertex_slots,
730 uint32_t *per_patch_slots)
731 {
732 const struct shader_info *tcs =
733 iris_get_shader_info(ice, MESA_SHADER_TESS_CTRL);
734 const struct shader_info *tes =
735 iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
736
737 *per_vertex_slots = tes->inputs_read;
738 *per_patch_slots = tes->patch_inputs_read;
739
740 if (tcs) {
741 *per_vertex_slots |= tcs->outputs_written;
742 *per_patch_slots |= tcs->patch_outputs_written;
743 }
744 }
745
746 /**
747 * Compile a tessellation control shader, and upload the assembly.
748 */
749 static struct iris_compiled_shader *
750 iris_compile_tcs(struct iris_context *ice,
751 struct iris_uncompiled_shader *ish,
752 const struct brw_tcs_prog_key *key)
753 {
754 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
755 const struct brw_compiler *compiler = screen->compiler;
756 const struct nir_shader_compiler_options *options =
757 compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].NirOptions;
758 const struct gen_device_info *devinfo = &screen->devinfo;
759 void *mem_ctx = ralloc_context(NULL);
760 struct brw_tcs_prog_data *tcs_prog_data =
761 rzalloc(mem_ctx, struct brw_tcs_prog_data);
762 struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base;
763 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
764 enum brw_param_builtin *system_values = NULL;
765 unsigned num_system_values = 0;
766 unsigned num_cbufs = 0;
767
768 nir_shader *nir;
769
770 if (ish) {
771 nir = nir_shader_clone(mem_ctx, ish->nir);
772
773 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
774 &num_system_values, &num_cbufs);
775 assign_common_binding_table_offsets(devinfo, nir, prog_data, 0,
776 num_system_values, num_cbufs);
777 } else {
778 nir = brw_nir_create_passthrough_tcs(mem_ctx, compiler, options, key);
779
780 /* Reserve space for passing the default tess levels as constants. */
781 num_system_values = 8;
782 system_values =
783 rzalloc_array(mem_ctx, enum brw_param_builtin, num_system_values);
784 prog_data->param = rzalloc_array(mem_ctx, uint32_t, num_system_values);
785 prog_data->nr_params = num_system_values;
786
787 if (key->tes_primitive_mode == GL_QUADS) {
788 for (int i = 0; i < 4; i++)
789 system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
790
791 system_values[3] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
792 system_values[2] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y;
793 } else if (key->tes_primitive_mode == GL_TRIANGLES) {
794 for (int i = 0; i < 3; i++)
795 system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
796
797 system_values[4] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
798 } else {
799 assert(key->tes_primitive_mode == GL_ISOLINES);
800 system_values[7] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_Y;
801 system_values[6] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X;
802 }
803
804 prog_data->ubo_ranges[0].length = 1;
805 }
806
807 char *error_str = NULL;
808 const unsigned *program =
809 brw_compile_tcs(compiler, &ice->dbg, mem_ctx, key, tcs_prog_data, nir,
810 -1, &error_str);
811 if (program == NULL) {
812 dbg_printf("Failed to compile control shader: %s\n", error_str);
813 ralloc_free(mem_ctx);
814 return false;
815 }
816
817 if (ish) {
818 if (ish->compiled_once) {
819 iris_debug_recompile(ice, &nir->info, key->program_string_id, key);
820 } else {
821 ish->compiled_once = true;
822 }
823 }
824
825 struct iris_compiled_shader *shader =
826 iris_upload_shader(ice, IRIS_CACHE_TCS, sizeof(*key), key, program,
827 prog_data, NULL, system_values, num_system_values,
828 num_cbufs);
829
830 if (ish)
831 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
832
833 ralloc_free(mem_ctx);
834 return shader;
835 }
836
837 /**
838 * Update the current tessellation control shader variant.
839 *
840 * Fill out the key, look in the cache, compile and bind if needed.
841 */
842 static void
843 iris_update_compiled_tcs(struct iris_context *ice)
844 {
845 struct iris_uncompiled_shader *tcs =
846 ice->shaders.uncompiled[MESA_SHADER_TESS_CTRL];
847 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
848 const struct gen_device_info *devinfo = &screen->devinfo;
849
850 const struct shader_info *tes_info =
851 iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
852 struct brw_tcs_prog_key key = {
853 KEY_INIT_NO_ID(devinfo->gen),
854 .program_string_id = tcs ? tcs->program_id : 0,
855 .tes_primitive_mode = tes_info->tess.primitive_mode,
856 .input_vertices = ice->state.vertices_per_patch,
857 };
858 get_unified_tess_slots(ice, &key.outputs_written,
859 &key.patch_outputs_written);
860 ice->vtbl.populate_tcs_key(ice, &key);
861
862 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TCS];
863 struct iris_compiled_shader *shader =
864 iris_find_cached_shader(ice, IRIS_CACHE_TCS, sizeof(key), &key);
865
866 if (tcs && !shader)
867 shader = iris_disk_cache_retrieve(ice, tcs, &key, sizeof(key));
868
869 if (!shader)
870 shader = iris_compile_tcs(ice, tcs, &key);
871
872 if (old != shader) {
873 ice->shaders.prog[IRIS_CACHE_TCS] = shader;
874 ice->state.dirty |= IRIS_DIRTY_TCS |
875 IRIS_DIRTY_BINDINGS_TCS |
876 IRIS_DIRTY_CONSTANTS_TCS;
877 }
878 }
879
880 /**
881 * Compile a tessellation evaluation shader, and upload the assembly.
882 */
883 static struct iris_compiled_shader *
884 iris_compile_tes(struct iris_context *ice,
885 struct iris_uncompiled_shader *ish,
886 const struct brw_tes_prog_key *key)
887 {
888 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
889 const struct brw_compiler *compiler = screen->compiler;
890 const struct gen_device_info *devinfo = &screen->devinfo;
891 void *mem_ctx = ralloc_context(NULL);
892 struct brw_tes_prog_data *tes_prog_data =
893 rzalloc(mem_ctx, struct brw_tes_prog_data);
894 struct brw_vue_prog_data *vue_prog_data = &tes_prog_data->base;
895 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
896 enum brw_param_builtin *system_values;
897 unsigned num_system_values;
898 unsigned num_cbufs;
899
900 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
901
902 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
903 &num_system_values, &num_cbufs);
904
905 assign_common_binding_table_offsets(devinfo, nir, prog_data, 0,
906 num_system_values, num_cbufs);
907
908 struct brw_vue_map input_vue_map;
909 brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,
910 key->patch_inputs_read);
911
912 char *error_str = NULL;
913 const unsigned *program =
914 brw_compile_tes(compiler, &ice->dbg, mem_ctx, key, &input_vue_map,
915 tes_prog_data, nir, NULL, -1, &error_str);
916 if (program == NULL) {
917 dbg_printf("Failed to compile evaluation shader: %s\n", error_str);
918 ralloc_free(mem_ctx);
919 return false;
920 }
921
922 if (ish->compiled_once) {
923 iris_debug_recompile(ice, &nir->info, key->program_string_id, key);
924 } else {
925 ish->compiled_once = true;
926 }
927
928 uint32_t *so_decls =
929 ice->vtbl.create_so_decl_list(&ish->stream_output,
930 &vue_prog_data->vue_map);
931
932
933 struct iris_compiled_shader *shader =
934 iris_upload_shader(ice, IRIS_CACHE_TES, sizeof(*key), key, program,
935 prog_data, so_decls, system_values, num_system_values,
936 num_cbufs);
937
938 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
939
940 ralloc_free(mem_ctx);
941 return shader;
942 }
943
944 /**
945 * Update the current tessellation evaluation shader variant.
946 *
947 * Fill out the key, look in the cache, compile and bind if needed.
948 */
949 static void
950 iris_update_compiled_tes(struct iris_context *ice)
951 {
952 struct iris_uncompiled_shader *ish =
953 ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
954 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
955 const struct gen_device_info *devinfo = &screen->devinfo;
956
957 struct brw_tes_prog_key key = { KEY_INIT(devinfo->gen) };
958 get_unified_tess_slots(ice, &key.inputs_read, &key.patch_inputs_read);
959 ice->vtbl.populate_tes_key(ice, &key);
960
961 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TES];
962 struct iris_compiled_shader *shader =
963 iris_find_cached_shader(ice, IRIS_CACHE_TES, sizeof(key), &key);
964
965 if (!shader)
966 shader = iris_disk_cache_retrieve(ice, ish, &key, sizeof(key));
967
968 if (!shader)
969 shader = iris_compile_tes(ice, ish, &key);
970
971 if (old != shader) {
972 ice->shaders.prog[IRIS_CACHE_TES] = shader;
973 ice->state.dirty |= IRIS_DIRTY_TES |
974 IRIS_DIRTY_BINDINGS_TES |
975 IRIS_DIRTY_CONSTANTS_TES;
976 }
977
978 /* TODO: Could compare and avoid flagging this. */
979 const struct shader_info *tes_info = &ish->nir->info;
980 if (tes_info->system_values_read & (1ull << SYSTEM_VALUE_VERTICES_IN)) {
981 ice->state.dirty |= IRIS_DIRTY_CONSTANTS_TES;
982 ice->state.shaders[MESA_SHADER_TESS_EVAL].cbuf0_needs_upload = true;
983 }
984 }
985
986 /**
987 * Compile a geometry shader, and upload the assembly.
988 */
989 static struct iris_compiled_shader *
990 iris_compile_gs(struct iris_context *ice,
991 struct iris_uncompiled_shader *ish,
992 const struct brw_gs_prog_key *key)
993 {
994 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
995 const struct brw_compiler *compiler = screen->compiler;
996 const struct gen_device_info *devinfo = &screen->devinfo;
997 void *mem_ctx = ralloc_context(NULL);
998 struct brw_gs_prog_data *gs_prog_data =
999 rzalloc(mem_ctx, struct brw_gs_prog_data);
1000 struct brw_vue_prog_data *vue_prog_data = &gs_prog_data->base;
1001 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1002 enum brw_param_builtin *system_values;
1003 unsigned num_system_values;
1004 unsigned num_cbufs;
1005
1006 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1007
1008 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
1009 &num_system_values, &num_cbufs);
1010
1011 assign_common_binding_table_offsets(devinfo, nir, prog_data, 0,
1012 num_system_values, num_cbufs);
1013
1014 brw_compute_vue_map(devinfo,
1015 &vue_prog_data->vue_map, nir->info.outputs_written,
1016 nir->info.separate_shader);
1017
1018 char *error_str = NULL;
1019 const unsigned *program =
1020 brw_compile_gs(compiler, &ice->dbg, mem_ctx, key, gs_prog_data, nir,
1021 NULL, -1, &error_str);
1022 if (program == NULL) {
1023 dbg_printf("Failed to compile geometry shader: %s\n", error_str);
1024 ralloc_free(mem_ctx);
1025 return false;
1026 }
1027
1028 if (ish->compiled_once) {
1029 iris_debug_recompile(ice, &nir->info, key->program_string_id, key);
1030 } else {
1031 ish->compiled_once = true;
1032 }
1033
1034 uint32_t *so_decls =
1035 ice->vtbl.create_so_decl_list(&ish->stream_output,
1036 &vue_prog_data->vue_map);
1037
1038 struct iris_compiled_shader *shader =
1039 iris_upload_shader(ice, IRIS_CACHE_GS, sizeof(*key), key, program,
1040 prog_data, so_decls, system_values, num_system_values,
1041 num_cbufs);
1042
1043 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1044
1045 ralloc_free(mem_ctx);
1046 return shader;
1047 }
1048
1049 /**
1050 * Update the current geometry shader variant.
1051 *
1052 * Fill out the key, look in the cache, compile and bind if needed.
1053 */
1054 static void
1055 iris_update_compiled_gs(struct iris_context *ice)
1056 {
1057 struct iris_uncompiled_shader *ish =
1058 ice->shaders.uncompiled[MESA_SHADER_GEOMETRY];
1059 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_GS];
1060 struct iris_compiled_shader *shader = NULL;
1061
1062 if (ish) {
1063 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1064 const struct gen_device_info *devinfo = &screen->devinfo;
1065 struct brw_gs_prog_key key = { KEY_INIT(devinfo->gen) };
1066 ice->vtbl.populate_gs_key(ice, &key);
1067
1068 shader =
1069 iris_find_cached_shader(ice, IRIS_CACHE_GS, sizeof(key), &key);
1070
1071 if (!shader)
1072 shader = iris_disk_cache_retrieve(ice, ish, &key, sizeof(key));
1073
1074 if (!shader)
1075 shader = iris_compile_gs(ice, ish, &key);
1076 }
1077
1078 if (old != shader) {
1079 ice->shaders.prog[IRIS_CACHE_GS] = shader;
1080 ice->state.dirty |= IRIS_DIRTY_GS |
1081 IRIS_DIRTY_BINDINGS_GS |
1082 IRIS_DIRTY_CONSTANTS_GS;
1083 }
1084 }
1085
1086 /**
1087 * Compile a fragment (pixel) shader, and upload the assembly.
1088 */
1089 static struct iris_compiled_shader *
1090 iris_compile_fs(struct iris_context *ice,
1091 struct iris_uncompiled_shader *ish,
1092 const struct brw_wm_prog_key *key,
1093 struct brw_vue_map *vue_map)
1094 {
1095 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1096 const struct brw_compiler *compiler = screen->compiler;
1097 const struct gen_device_info *devinfo = &screen->devinfo;
1098 void *mem_ctx = ralloc_context(NULL);
1099 struct brw_wm_prog_data *fs_prog_data =
1100 rzalloc(mem_ctx, struct brw_wm_prog_data);
1101 struct brw_stage_prog_data *prog_data = &fs_prog_data->base;
1102 enum brw_param_builtin *system_values;
1103 unsigned num_system_values;
1104 unsigned num_cbufs;
1105
1106 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1107
1108 prog_data->use_alt_mode = ish->use_alt_mode;
1109
1110 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
1111 &num_system_values, &num_cbufs);
1112
1113 assign_common_binding_table_offsets(devinfo, nir, prog_data,
1114 MAX2(key->nr_color_regions, 1),
1115 num_system_values, num_cbufs);
1116 char *error_str = NULL;
1117 const unsigned *program =
1118 brw_compile_fs(compiler, &ice->dbg, mem_ctx, key, fs_prog_data,
1119 nir, NULL, -1, -1, -1, true, false, vue_map, &error_str);
1120 if (program == NULL) {
1121 dbg_printf("Failed to compile fragment shader: %s\n", error_str);
1122 ralloc_free(mem_ctx);
1123 return false;
1124 }
1125
1126 if (ish->compiled_once) {
1127 iris_debug_recompile(ice, &nir->info, key->program_string_id, key);
1128 } else {
1129 ish->compiled_once = true;
1130 }
1131
1132 struct iris_compiled_shader *shader =
1133 iris_upload_shader(ice, IRIS_CACHE_FS, sizeof(*key), key, program,
1134 prog_data, NULL, system_values, num_system_values,
1135 num_cbufs);
1136
1137 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1138
1139 ralloc_free(mem_ctx);
1140 return shader;
1141 }
1142
1143 /**
1144 * Update the current fragment shader variant.
1145 *
1146 * Fill out the key, look in the cache, compile and bind if needed.
1147 */
1148 static void
1149 iris_update_compiled_fs(struct iris_context *ice)
1150 {
1151 struct iris_uncompiled_shader *ish =
1152 ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
1153 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1154 const struct gen_device_info *devinfo = &screen->devinfo;
1155 struct brw_wm_prog_key key = { KEY_INIT(devinfo->gen) };
1156 ice->vtbl.populate_fs_key(ice, &key);
1157
1158 if (ish->nos & (1ull << IRIS_NOS_LAST_VUE_MAP))
1159 key.input_slots_valid = ice->shaders.last_vue_map->slots_valid;
1160
1161 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_FS];
1162 struct iris_compiled_shader *shader =
1163 iris_find_cached_shader(ice, IRIS_CACHE_FS, sizeof(key), &key);
1164
1165 if (!shader)
1166 shader = iris_disk_cache_retrieve(ice, ish, &key, sizeof(key));
1167
1168 if (!shader)
1169 shader = iris_compile_fs(ice, ish, &key, ice->shaders.last_vue_map);
1170
1171 if (old != shader) {
1172 // XXX: only need to flag CLIP if barycentric has NONPERSPECTIVE
1173 // toggles. might be able to avoid flagging SBE too.
1174 ice->shaders.prog[IRIS_CACHE_FS] = shader;
1175 ice->state.dirty |= IRIS_DIRTY_FS |
1176 IRIS_DIRTY_BINDINGS_FS |
1177 IRIS_DIRTY_CONSTANTS_FS |
1178 IRIS_DIRTY_WM |
1179 IRIS_DIRTY_CLIP |
1180 IRIS_DIRTY_SBE;
1181 }
1182 }
1183
1184 /**
1185 * Get the compiled shader for the last enabled geometry stage.
1186 *
1187 * This stage is the one which will feed stream output and the rasterizer.
1188 */
1189 static gl_shader_stage
1190 last_vue_stage(struct iris_context *ice)
1191 {
1192 if (ice->shaders.prog[MESA_SHADER_GEOMETRY])
1193 return MESA_SHADER_GEOMETRY;
1194
1195 if (ice->shaders.prog[MESA_SHADER_TESS_EVAL])
1196 return MESA_SHADER_TESS_EVAL;
1197
1198 return MESA_SHADER_VERTEX;
1199 }
1200
1201 /**
1202 * Update the last enabled stage's VUE map.
1203 *
1204 * When the shader feeding the rasterizer's output interface changes, we
1205 * need to re-emit various packets.
1206 */
1207 static void
1208 update_last_vue_map(struct iris_context *ice,
1209 struct brw_stage_prog_data *prog_data)
1210 {
1211 struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
1212 struct brw_vue_map *vue_map = &vue_prog_data->vue_map;
1213 struct brw_vue_map *old_map = ice->shaders.last_vue_map;
1214 const uint64_t changed_slots =
1215 (old_map ? old_map->slots_valid : 0ull) ^ vue_map->slots_valid;
1216
1217 if (changed_slots & VARYING_BIT_VIEWPORT) {
1218 // XXX: could use ctx->Const.MaxViewports for old API efficiency
1219 ice->state.num_viewports =
1220 (vue_map->slots_valid & VARYING_BIT_VIEWPORT) ? IRIS_MAX_VIEWPORTS : 1;
1221 ice->state.dirty |= IRIS_DIRTY_CLIP |
1222 IRIS_DIRTY_SF_CL_VIEWPORT |
1223 IRIS_DIRTY_CC_VIEWPORT |
1224 IRIS_DIRTY_SCISSOR_RECT |
1225 IRIS_DIRTY_UNCOMPILED_FS |
1226 ice->state.dirty_for_nos[IRIS_NOS_LAST_VUE_MAP];
1227 // XXX: CC_VIEWPORT?
1228 }
1229
1230 if (changed_slots || (old_map && old_map->separate != vue_map->separate)) {
1231 ice->state.dirty |= IRIS_DIRTY_SBE;
1232 }
1233
1234 ice->shaders.last_vue_map = &vue_prog_data->vue_map;
1235 }
1236
1237 /**
1238 * Get the prog_data for a given stage, or NULL if the stage is disabled.
1239 */
1240 static struct brw_vue_prog_data *
1241 get_vue_prog_data(struct iris_context *ice, gl_shader_stage stage)
1242 {
1243 if (!ice->shaders.prog[stage])
1244 return NULL;
1245
1246 return (void *) ice->shaders.prog[stage]->prog_data;
1247 }
1248
1249 // XXX: iris_compiled_shaders are space-leaking :(
1250 // XXX: do remember to unbind them if deleting them.
1251
1252 /**
1253 * Update the current shader variants for the given state.
1254 *
1255 * This should be called on every draw call to ensure that the correct
1256 * shaders are bound. It will also flag any dirty state triggered by
1257 * swapping out those shaders.
1258 */
1259 void
1260 iris_update_compiled_shaders(struct iris_context *ice)
1261 {
1262 const uint64_t dirty = ice->state.dirty;
1263
1264 struct brw_vue_prog_data *old_prog_datas[4];
1265 if (!(dirty & IRIS_DIRTY_URB)) {
1266 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++)
1267 old_prog_datas[i] = get_vue_prog_data(ice, i);
1268 }
1269
1270 if (dirty & (IRIS_DIRTY_UNCOMPILED_TCS | IRIS_DIRTY_UNCOMPILED_TES)) {
1271 struct iris_uncompiled_shader *tes =
1272 ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
1273 if (tes) {
1274 iris_update_compiled_tcs(ice);
1275 iris_update_compiled_tes(ice);
1276 } else {
1277 ice->shaders.prog[IRIS_CACHE_TCS] = NULL;
1278 ice->shaders.prog[IRIS_CACHE_TES] = NULL;
1279 ice->state.dirty |=
1280 IRIS_DIRTY_TCS | IRIS_DIRTY_TES |
1281 IRIS_DIRTY_BINDINGS_TCS | IRIS_DIRTY_BINDINGS_TES |
1282 IRIS_DIRTY_CONSTANTS_TCS | IRIS_DIRTY_CONSTANTS_TES;
1283 }
1284 }
1285
1286 if (dirty & IRIS_DIRTY_UNCOMPILED_VS)
1287 iris_update_compiled_vs(ice);
1288 if (dirty & IRIS_DIRTY_UNCOMPILED_GS)
1289 iris_update_compiled_gs(ice);
1290
1291 if (dirty & (IRIS_DIRTY_UNCOMPILED_GS | IRIS_DIRTY_UNCOMPILED_TES)) {
1292 const struct iris_compiled_shader *gs =
1293 ice->shaders.prog[MESA_SHADER_GEOMETRY];
1294 const struct iris_compiled_shader *tes =
1295 ice->shaders.prog[MESA_SHADER_TESS_EVAL];
1296
1297 bool points_or_lines = false;
1298
1299 if (gs) {
1300 const struct brw_gs_prog_data *gs_prog_data = (void *) gs->prog_data;
1301 points_or_lines =
1302 gs_prog_data->output_topology == _3DPRIM_POINTLIST ||
1303 gs_prog_data->output_topology == _3DPRIM_LINESTRIP;
1304 } else if (tes) {
1305 const struct brw_tes_prog_data *tes_data = (void *) tes->prog_data;
1306 points_or_lines =
1307 tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_LINE ||
1308 tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_POINT;
1309 }
1310
1311 if (ice->shaders.output_topology_is_points_or_lines != points_or_lines) {
1312 /* Outbound to XY Clip enables */
1313 ice->shaders.output_topology_is_points_or_lines = points_or_lines;
1314 ice->state.dirty |= IRIS_DIRTY_CLIP;
1315 }
1316 }
1317
1318 gl_shader_stage last_stage = last_vue_stage(ice);
1319 struct iris_compiled_shader *shader = ice->shaders.prog[last_stage];
1320 struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[last_stage];
1321 update_last_vue_map(ice, shader->prog_data);
1322 if (ice->state.streamout != shader->streamout) {
1323 ice->state.streamout = shader->streamout;
1324 ice->state.dirty |= IRIS_DIRTY_SO_DECL_LIST | IRIS_DIRTY_STREAMOUT;
1325 }
1326
1327 if (ice->state.streamout_active) {
1328 for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
1329 struct iris_stream_output_target *so =
1330 (void *) ice->state.so_target[i];
1331 if (so)
1332 so->stride = ish->stream_output.stride[i];
1333 }
1334 }
1335
1336 if (dirty & IRIS_DIRTY_UNCOMPILED_FS)
1337 iris_update_compiled_fs(ice);
1338
1339 /* Changing shader interfaces may require a URB configuration. */
1340 if (!(dirty & IRIS_DIRTY_URB)) {
1341 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
1342 struct brw_vue_prog_data *old = old_prog_datas[i];
1343 struct brw_vue_prog_data *new = get_vue_prog_data(ice, i);
1344 if (!!old != !!new ||
1345 (new && new->urb_entry_size != old->urb_entry_size)) {
1346 ice->state.dirty |= IRIS_DIRTY_URB;
1347 break;
1348 }
1349 }
1350 }
1351 }
1352
1353 static struct iris_compiled_shader *
1354 iris_compile_cs(struct iris_context *ice,
1355 struct iris_uncompiled_shader *ish,
1356 const struct brw_cs_prog_key *key)
1357 {
1358 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1359 const struct brw_compiler *compiler = screen->compiler;
1360 const struct gen_device_info *devinfo = &screen->devinfo;
1361 void *mem_ctx = ralloc_context(NULL);
1362 struct brw_cs_prog_data *cs_prog_data =
1363 rzalloc(mem_ctx, struct brw_cs_prog_data);
1364 struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
1365 enum brw_param_builtin *system_values;
1366 unsigned num_system_values;
1367 unsigned num_cbufs;
1368
1369 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1370
1371 cs_prog_data->binding_table.work_groups_start = 0;
1372
1373 prog_data->total_shared = nir->info.cs.shared_size;
1374
1375 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
1376 &num_system_values, &num_cbufs);
1377
1378 assign_common_binding_table_offsets(devinfo, nir, prog_data, 1,
1379 num_system_values, num_cbufs);
1380
1381 char *error_str = NULL;
1382 const unsigned *program =
1383 brw_compile_cs(compiler, &ice->dbg, mem_ctx, key, cs_prog_data,
1384 nir, -1, &error_str);
1385 if (program == NULL) {
1386 dbg_printf("Failed to compile compute shader: %s\n", error_str);
1387 ralloc_free(mem_ctx);
1388 return false;
1389 }
1390
1391 if (ish->compiled_once) {
1392 iris_debug_recompile(ice, &nir->info, key->program_string_id, key);
1393 } else {
1394 ish->compiled_once = true;
1395 }
1396
1397 struct iris_compiled_shader *shader =
1398 iris_upload_shader(ice, IRIS_CACHE_CS, sizeof(*key), key, program,
1399 prog_data, NULL, system_values, num_system_values,
1400 num_cbufs);
1401
1402 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1403
1404 ralloc_free(mem_ctx);
1405 return shader;
1406 }
1407
1408 void
1409 iris_update_compiled_compute_shader(struct iris_context *ice)
1410 {
1411 struct iris_uncompiled_shader *ish =
1412 ice->shaders.uncompiled[MESA_SHADER_COMPUTE];
1413
1414 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1415 const struct gen_device_info *devinfo = &screen->devinfo;
1416 struct brw_cs_prog_key key = { KEY_INIT(devinfo->gen) };
1417 ice->vtbl.populate_cs_key(ice, &key);
1418
1419 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_CS];
1420 struct iris_compiled_shader *shader =
1421 iris_find_cached_shader(ice, IRIS_CACHE_CS, sizeof(key), &key);
1422
1423 if (!shader)
1424 shader = iris_disk_cache_retrieve(ice, ish, &key, sizeof(key));
1425
1426 if (!shader)
1427 shader = iris_compile_cs(ice, ish, &key);
1428
1429 if (old != shader) {
1430 ice->shaders.prog[IRIS_CACHE_CS] = shader;
1431 ice->state.dirty |= IRIS_DIRTY_CS |
1432 IRIS_DIRTY_BINDINGS_CS |
1433 IRIS_DIRTY_CONSTANTS_CS;
1434 }
1435 }
1436
1437 void
1438 iris_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data,
1439 uint32_t *dst)
1440 {
1441 assert(cs_prog_data->push.total.size > 0);
1442 assert(cs_prog_data->push.cross_thread.size == 0);
1443 assert(cs_prog_data->push.per_thread.dwords == 1);
1444 assert(cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID);
1445 for (unsigned t = 0; t < cs_prog_data->threads; t++)
1446 dst[8 * t] = t;
1447 }
1448
1449 /**
1450 * Allocate scratch BOs as needed for the given per-thread size and stage.
1451 */
1452 struct iris_bo *
1453 iris_get_scratch_space(struct iris_context *ice,
1454 unsigned per_thread_scratch,
1455 gl_shader_stage stage)
1456 {
1457 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1458 struct iris_bufmgr *bufmgr = screen->bufmgr;
1459 const struct gen_device_info *devinfo = &screen->devinfo;
1460
1461 unsigned encoded_size = ffs(per_thread_scratch) - 11;
1462 assert(encoded_size < (1 << 16));
1463
1464 struct iris_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage];
1465
1466 /* The documentation for 3DSTATE_PS "Scratch Space Base Pointer" says:
1467 *
1468 * "Scratch Space per slice is computed based on 4 sub-slices. SW
1469 * must allocate scratch space enough so that each slice has 4
1470 * slices allowed."
1471 *
1472 * According to the other driver team, this applies to compute shaders
1473 * as well. This is not currently documented at all.
1474 *
1475 * This hack is no longer necessary on Gen11+.
1476 */
1477 unsigned subslice_total = screen->subslice_total;
1478 if (devinfo->gen < 11)
1479 subslice_total = 4 * devinfo->num_slices;
1480 assert(subslice_total >= screen->subslice_total);
1481
1482 if (!*bop) {
1483 unsigned scratch_ids_per_subslice = devinfo->max_cs_threads;
1484 uint32_t max_threads[] = {
1485 [MESA_SHADER_VERTEX] = devinfo->max_vs_threads,
1486 [MESA_SHADER_TESS_CTRL] = devinfo->max_tcs_threads,
1487 [MESA_SHADER_TESS_EVAL] = devinfo->max_tes_threads,
1488 [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads,
1489 [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads,
1490 [MESA_SHADER_COMPUTE] = scratch_ids_per_subslice * subslice_total,
1491 };
1492
1493 uint32_t size = per_thread_scratch * max_threads[stage];
1494
1495 *bop = iris_bo_alloc(bufmgr, "scratch", size, IRIS_MEMZONE_SHADER);
1496 }
1497
1498 return *bop;
1499 }
1500
1501 /* ------------------------------------------------------------------- */
1502
1503 /**
1504 * The pipe->create_[stage]_state() driver hooks.
1505 *
1506 * Performs basic NIR preprocessing, records any state dependencies, and
1507 * returns an iris_uncompiled_shader as the Gallium CSO.
1508 *
1509 * Actual shader compilation to assembly happens later, at first use.
1510 */
1511 static void *
1512 iris_create_uncompiled_shader(struct pipe_context *ctx,
1513 nir_shader *nir,
1514 const struct pipe_stream_output_info *so_info)
1515 {
1516 struct iris_screen *screen = (struct iris_screen *)ctx->screen;
1517 const struct gen_device_info *devinfo = &screen->devinfo;
1518
1519 struct iris_uncompiled_shader *ish =
1520 calloc(1, sizeof(struct iris_uncompiled_shader));
1521 if (!ish)
1522 return NULL;
1523
1524 nir = brw_preprocess_nir(screen->compiler, nir, NULL);
1525
1526 NIR_PASS_V(nir, brw_nir_lower_image_load_store, devinfo);
1527 NIR_PASS_V(nir, iris_lower_storage_image_derefs);
1528
1529 ish->program_id = get_new_program_id(screen);
1530 ish->nir = nir;
1531 if (so_info) {
1532 memcpy(&ish->stream_output, so_info, sizeof(*so_info));
1533 update_so_info(&ish->stream_output, nir->info.outputs_written);
1534 }
1535
1536 /* Save this now before potentially dropping nir->info.name */
1537 if (nir->info.name && strncmp(nir->info.name, "ARB", 3) == 0)
1538 ish->use_alt_mode = true;
1539
1540 if (screen->disk_cache) {
1541 /* Serialize the NIR to a binary blob that we can hash for the disk
1542 * cache. First, drop unnecessary information (like variable names)
1543 * so the serialized NIR is smaller, and also to let us detect more
1544 * isomorphic shaders when hashing, increasing cache hits. We clone
1545 * the NIR before stripping away this info because it can be useful
1546 * when inspecting and debugging shaders.
1547 */
1548 nir_shader *clone = nir_shader_clone(NULL, nir);
1549 nir_strip(clone);
1550
1551 struct blob blob;
1552 blob_init(&blob);
1553 nir_serialize(&blob, clone);
1554 _mesa_sha1_compute(blob.data, blob.size, ish->nir_sha1);
1555 blob_finish(&blob);
1556
1557 ralloc_free(clone);
1558 }
1559
1560 return ish;
1561 }
1562
1563 static struct iris_uncompiled_shader *
1564 iris_create_shader_state(struct pipe_context *ctx,
1565 const struct pipe_shader_state *state)
1566 {
1567 struct nir_shader *nir;
1568
1569 if (state->type == PIPE_SHADER_IR_TGSI)
1570 nir = tgsi_to_nir(state->tokens, ctx->screen);
1571 else
1572 nir = state->ir.nir;
1573
1574 return iris_create_uncompiled_shader(ctx, nir, &state->stream_output);
1575 }
1576
1577 static void *
1578 iris_create_vs_state(struct pipe_context *ctx,
1579 const struct pipe_shader_state *state)
1580 {
1581 struct iris_context *ice = (void *) ctx;
1582 struct iris_screen *screen = (void *) ctx->screen;
1583 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
1584
1585 /* User clip planes */
1586 if (ish->nir->info.clip_distance_array_size == 0)
1587 ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
1588
1589 if (screen->precompile) {
1590 const struct gen_device_info *devinfo = &screen->devinfo;
1591 struct brw_vs_prog_key key = { KEY_INIT(devinfo->gen) };
1592
1593 if (!iris_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
1594 iris_compile_vs(ice, ish, &key);
1595 }
1596
1597 return ish;
1598 }
1599
1600 static void *
1601 iris_create_tcs_state(struct pipe_context *ctx,
1602 const struct pipe_shader_state *state)
1603 {
1604 struct iris_context *ice = (void *) ctx;
1605 struct iris_screen *screen = (void *) ctx->screen;
1606 const struct brw_compiler *compiler = screen->compiler;
1607 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
1608 struct shader_info *info = &ish->nir->info;
1609
1610 // XXX: NOS?
1611
1612 if (screen->precompile) {
1613 const unsigned _GL_TRIANGLES = 0x0004;
1614 const struct gen_device_info *devinfo = &screen->devinfo;
1615 struct brw_tcs_prog_key key = {
1616 KEY_INIT(devinfo->gen),
1617 // XXX: make sure the linker fills this out from the TES...
1618 .tes_primitive_mode =
1619 info->tess.primitive_mode ? info->tess.primitive_mode
1620 : _GL_TRIANGLES,
1621 .outputs_written = info->outputs_written,
1622 .patch_outputs_written = info->patch_outputs_written,
1623 };
1624
1625 /* 8_PATCH mode needs the key to contain the input patch dimensionality.
1626 * We don't have that information, so we randomly guess that the input
1627 * and output patches are the same size. This is a bad guess, but we
1628 * can't do much better.
1629 */
1630 if (compiler->use_tcs_8_patch)
1631 key.input_vertices = info->tess.tcs_vertices_out;
1632
1633 if (!iris_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
1634 iris_compile_tcs(ice, ish, &key);
1635 }
1636
1637 return ish;
1638 }
1639
1640 static void *
1641 iris_create_tes_state(struct pipe_context *ctx,
1642 const struct pipe_shader_state *state)
1643 {
1644 struct iris_context *ice = (void *) ctx;
1645 struct iris_screen *screen = (void *) ctx->screen;
1646 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
1647 struct shader_info *info = &ish->nir->info;
1648
1649 // XXX: NOS?
1650
1651 if (screen->precompile) {
1652 const struct gen_device_info *devinfo = &screen->devinfo;
1653 struct brw_tes_prog_key key = {
1654 KEY_INIT(devinfo->gen),
1655 // XXX: not ideal, need TCS output/TES input unification
1656 .inputs_read = info->inputs_read,
1657 .patch_inputs_read = info->patch_inputs_read,
1658 };
1659
1660 if (!iris_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
1661 iris_compile_tes(ice, ish, &key);
1662 }
1663
1664 return ish;
1665 }
1666
1667 static void *
1668 iris_create_gs_state(struct pipe_context *ctx,
1669 const struct pipe_shader_state *state)
1670 {
1671 struct iris_context *ice = (void *) ctx;
1672 struct iris_screen *screen = (void *) ctx->screen;
1673 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
1674
1675 // XXX: NOS?
1676
1677 if (screen->precompile) {
1678 const struct gen_device_info *devinfo = &screen->devinfo;
1679 struct brw_gs_prog_key key = { KEY_INIT(devinfo->gen) };
1680
1681 if (!iris_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
1682 iris_compile_gs(ice, ish, &key);
1683 }
1684
1685 return ish;
1686 }
1687
1688 static void *
1689 iris_create_fs_state(struct pipe_context *ctx,
1690 const struct pipe_shader_state *state)
1691 {
1692 struct iris_context *ice = (void *) ctx;
1693 struct iris_screen *screen = (void *) ctx->screen;
1694 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
1695 struct shader_info *info = &ish->nir->info;
1696
1697 ish->nos |= (1ull << IRIS_NOS_FRAMEBUFFER) |
1698 (1ull << IRIS_NOS_DEPTH_STENCIL_ALPHA) |
1699 (1ull << IRIS_NOS_RASTERIZER) |
1700 (1ull << IRIS_NOS_BLEND);
1701
1702 /* The program key needs the VUE map if there are > 16 inputs */
1703 if (util_bitcount64(ish->nir->info.inputs_read &
1704 BRW_FS_VARYING_INPUT_MASK) > 16) {
1705 ish->nos |= (1ull << IRIS_NOS_LAST_VUE_MAP);
1706 }
1707
1708 if (screen->precompile) {
1709 const uint64_t color_outputs = info->outputs_written &
1710 ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
1711 BITFIELD64_BIT(FRAG_RESULT_STENCIL) |
1712 BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK));
1713
1714 bool can_rearrange_varyings =
1715 util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) <= 16;
1716
1717 const struct gen_device_info *devinfo = &screen->devinfo;
1718 struct brw_wm_prog_key key = {
1719 KEY_INIT(devinfo->gen),
1720 .nr_color_regions = util_bitcount(color_outputs),
1721 .coherent_fb_fetch = true,
1722 .input_slots_valid =
1723 can_rearrange_varyings ? 0 : info->inputs_read | VARYING_BIT_POS,
1724 };
1725
1726 if (!iris_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
1727 iris_compile_fs(ice, ish, &key, NULL);
1728 }
1729
1730 return ish;
1731 }
1732
1733 static void *
1734 iris_create_compute_state(struct pipe_context *ctx,
1735 const struct pipe_compute_state *state)
1736 {
1737 assert(state->ir_type == PIPE_SHADER_IR_NIR);
1738
1739 struct iris_context *ice = (void *) ctx;
1740 struct iris_screen *screen = (void *) ctx->screen;
1741 struct iris_uncompiled_shader *ish =
1742 iris_create_uncompiled_shader(ctx, (void *) state->prog, NULL);
1743
1744 // XXX: disallow more than 64KB of shared variables
1745
1746 if (screen->precompile) {
1747 const struct gen_device_info *devinfo = &screen->devinfo;
1748 struct brw_cs_prog_key key = { KEY_INIT(devinfo->gen) };
1749
1750 if (!iris_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
1751 iris_compile_cs(ice, ish, &key);
1752 }
1753
1754 return ish;
1755 }
1756
1757 /**
1758 * The pipe->delete_[stage]_state() driver hooks.
1759 *
1760 * Frees the iris_uncompiled_shader.
1761 */
1762 static void
1763 iris_delete_shader_state(struct pipe_context *ctx, void *state, gl_shader_stage stage)
1764 {
1765 struct iris_uncompiled_shader *ish = state;
1766 struct iris_context *ice = (void *) ctx;
1767
1768 if (ice->shaders.uncompiled[stage] == ish) {
1769 ice->shaders.uncompiled[stage] = NULL;
1770 ice->state.dirty |= IRIS_DIRTY_UNCOMPILED_VS << stage;
1771 }
1772
1773 ralloc_free(ish->nir);
1774 free(ish);
1775 }
1776
1777 static void
1778 iris_delete_vs_state(struct pipe_context *ctx, void *state)
1779 {
1780 iris_delete_shader_state(ctx, state, MESA_SHADER_VERTEX);
1781 }
1782
1783 static void
1784 iris_delete_tcs_state(struct pipe_context *ctx, void *state)
1785 {
1786 iris_delete_shader_state(ctx, state, MESA_SHADER_TESS_CTRL);
1787 }
1788
1789 static void
1790 iris_delete_tes_state(struct pipe_context *ctx, void *state)
1791 {
1792 iris_delete_shader_state(ctx, state, MESA_SHADER_TESS_EVAL);
1793 }
1794
1795 static void
1796 iris_delete_gs_state(struct pipe_context *ctx, void *state)
1797 {
1798 iris_delete_shader_state(ctx, state, MESA_SHADER_GEOMETRY);
1799 }
1800
1801 static void
1802 iris_delete_fs_state(struct pipe_context *ctx, void *state)
1803 {
1804 iris_delete_shader_state(ctx, state, MESA_SHADER_FRAGMENT);
1805 }
1806
1807 static void
1808 iris_delete_cs_state(struct pipe_context *ctx, void *state)
1809 {
1810 iris_delete_shader_state(ctx, state, MESA_SHADER_COMPUTE);
1811 }
1812
1813 /**
1814 * The pipe->bind_[stage]_state() driver hook.
1815 *
1816 * Binds an uncompiled shader as the current one for a particular stage.
1817 * Updates dirty tracking to account for the shader's NOS.
1818 */
1819 static void
1820 bind_state(struct iris_context *ice,
1821 struct iris_uncompiled_shader *ish,
1822 gl_shader_stage stage)
1823 {
1824 uint64_t dirty_bit = IRIS_DIRTY_UNCOMPILED_VS << stage;
1825 const uint64_t nos = ish ? ish->nos : 0;
1826
1827 const struct shader_info *old_info = iris_get_shader_info(ice, stage);
1828 const struct shader_info *new_info = ish ? &ish->nir->info : NULL;
1829
1830 if ((old_info ? util_last_bit(old_info->textures_used) : 0) !=
1831 (new_info ? util_last_bit(new_info->textures_used) : 0)) {
1832 ice->state.dirty |= IRIS_DIRTY_SAMPLER_STATES_VS << stage;
1833 }
1834
1835 ice->shaders.uncompiled[stage] = ish;
1836 ice->state.dirty |= dirty_bit;
1837
1838 /* Record that CSOs need to mark IRIS_DIRTY_UNCOMPILED_XS when they change
1839 * (or that they no longer need to do so).
1840 */
1841 for (int i = 0; i < IRIS_NOS_COUNT; i++) {
1842 if (nos & (1 << i))
1843 ice->state.dirty_for_nos[i] |= dirty_bit;
1844 else
1845 ice->state.dirty_for_nos[i] &= ~dirty_bit;
1846 }
1847 }
1848
1849 static void
1850 iris_bind_vs_state(struct pipe_context *ctx, void *state)
1851 {
1852 bind_state((void *) ctx, state, MESA_SHADER_VERTEX);
1853 }
1854
1855 static void
1856 iris_bind_tcs_state(struct pipe_context *ctx, void *state)
1857 {
1858 bind_state((void *) ctx, state, MESA_SHADER_TESS_CTRL);
1859 }
1860
1861 static void
1862 iris_bind_tes_state(struct pipe_context *ctx, void *state)
1863 {
1864 struct iris_context *ice = (struct iris_context *)ctx;
1865
1866 /* Enabling/disabling optional stages requires a URB reconfiguration. */
1867 if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
1868 ice->state.dirty |= IRIS_DIRTY_URB;
1869
1870 bind_state((void *) ctx, state, MESA_SHADER_TESS_EVAL);
1871 }
1872
1873 static void
1874 iris_bind_gs_state(struct pipe_context *ctx, void *state)
1875 {
1876 struct iris_context *ice = (struct iris_context *)ctx;
1877
1878 /* Enabling/disabling optional stages requires a URB reconfiguration. */
1879 if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
1880 ice->state.dirty |= IRIS_DIRTY_URB;
1881
1882 bind_state((void *) ctx, state, MESA_SHADER_GEOMETRY);
1883 }
1884
1885 static void
1886 iris_bind_fs_state(struct pipe_context *ctx, void *state)
1887 {
1888 struct iris_context *ice = (struct iris_context *) ctx;
1889 struct iris_uncompiled_shader *old_ish =
1890 ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
1891 struct iris_uncompiled_shader *new_ish = state;
1892
1893 const unsigned color_bits =
1894 BITFIELD64_BIT(FRAG_RESULT_COLOR) |
1895 BITFIELD64_RANGE(FRAG_RESULT_DATA0, BRW_MAX_DRAW_BUFFERS);
1896
1897 /* Fragment shader outputs influence HasWriteableRT */
1898 if (!old_ish || !new_ish ||
1899 (old_ish->nir->info.outputs_written & color_bits) !=
1900 (new_ish->nir->info.outputs_written & color_bits))
1901 ice->state.dirty |= IRIS_DIRTY_PS_BLEND;
1902
1903 bind_state((void *) ctx, state, MESA_SHADER_FRAGMENT);
1904 }
1905
1906 static void
1907 iris_bind_cs_state(struct pipe_context *ctx, void *state)
1908 {
1909 bind_state((void *) ctx, state, MESA_SHADER_COMPUTE);
1910 }
1911
1912 void
1913 iris_init_program_functions(struct pipe_context *ctx)
1914 {
1915 ctx->create_vs_state = iris_create_vs_state;
1916 ctx->create_tcs_state = iris_create_tcs_state;
1917 ctx->create_tes_state = iris_create_tes_state;
1918 ctx->create_gs_state = iris_create_gs_state;
1919 ctx->create_fs_state = iris_create_fs_state;
1920 ctx->create_compute_state = iris_create_compute_state;
1921
1922 ctx->delete_vs_state = iris_delete_vs_state;
1923 ctx->delete_tcs_state = iris_delete_tcs_state;
1924 ctx->delete_tes_state = iris_delete_tes_state;
1925 ctx->delete_gs_state = iris_delete_gs_state;
1926 ctx->delete_fs_state = iris_delete_fs_state;
1927 ctx->delete_compute_state = iris_delete_cs_state;
1928
1929 ctx->bind_vs_state = iris_bind_vs_state;
1930 ctx->bind_tcs_state = iris_bind_tcs_state;
1931 ctx->bind_tes_state = iris_bind_tes_state;
1932 ctx->bind_gs_state = iris_bind_gs_state;
1933 ctx->bind_fs_state = iris_bind_fs_state;
1934 ctx->bind_compute_state = iris_bind_cs_state;
1935 }