iris: Rename iris_syncpt to iris_syncobj for clarity.
[mesa.git] / src / gallium / drivers / iris / iris_program.c
1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 /**
24 * @file iris_program.c
25 *
26 * This file contains the driver interface for compiling shaders.
27 *
28 * See iris_program_cache.c for the in-memory program cache where the
29 * compiled shaders are stored.
30 */
31
32 #include <stdio.h>
33 #include <errno.h>
34 #include "pipe/p_defines.h"
35 #include "pipe/p_state.h"
36 #include "pipe/p_context.h"
37 #include "pipe/p_screen.h"
38 #include "util/u_atomic.h"
39 #include "util/u_upload_mgr.h"
40 #include "util/debug.h"
41 #include "compiler/nir/nir.h"
42 #include "compiler/nir/nir_builder.h"
43 #include "compiler/nir/nir_serialize.h"
44 #include "intel/compiler/brw_compiler.h"
45 #include "intel/compiler/brw_nir.h"
46 #include "iris_context.h"
47 #include "nir/tgsi_to_nir.h"
48
49 #define KEY_ID(prefix) .prefix.program_string_id = ish->program_id
50 #define BRW_KEY_INIT(gen, prog_id) \
51 .base.program_string_id = prog_id, \
52 .base.subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM, \
53 .base.tex.swizzles[0 ... MAX_SAMPLERS - 1] = 0x688, \
54 .base.tex.compressed_multisample_layout_mask = ~0, \
55 .base.tex.msaa_16 = (gen >= 9 ? ~0 : 0)
56
57 static unsigned
58 get_new_program_id(struct iris_screen *screen)
59 {
60 return p_atomic_inc_return(&screen->program_id);
61 }
62
63 static struct brw_vs_prog_key
64 iris_to_brw_vs_key(const struct gen_device_info *devinfo,
65 const struct iris_vs_prog_key *key)
66 {
67 return (struct brw_vs_prog_key) {
68 BRW_KEY_INIT(devinfo->gen, key->vue.base.program_string_id),
69
70 /* Don't tell the backend about our clip plane constants, we've
71 * already lowered them in NIR and don't want it doing it again.
72 */
73 .nr_userclip_plane_consts = 0,
74 };
75 }
76
77 static struct brw_tcs_prog_key
78 iris_to_brw_tcs_key(const struct gen_device_info *devinfo,
79 const struct iris_tcs_prog_key *key)
80 {
81 return (struct brw_tcs_prog_key) {
82 BRW_KEY_INIT(devinfo->gen, key->vue.base.program_string_id),
83 .tes_primitive_mode = key->tes_primitive_mode,
84 .input_vertices = key->input_vertices,
85 .patch_outputs_written = key->patch_outputs_written,
86 .outputs_written = key->outputs_written,
87 .quads_workaround = key->quads_workaround,
88 };
89 }
90
91 static struct brw_tes_prog_key
92 iris_to_brw_tes_key(const struct gen_device_info *devinfo,
93 const struct iris_tes_prog_key *key)
94 {
95 return (struct brw_tes_prog_key) {
96 BRW_KEY_INIT(devinfo->gen, key->vue.base.program_string_id),
97 .patch_inputs_read = key->patch_inputs_read,
98 .inputs_read = key->inputs_read,
99 };
100 }
101
102 static struct brw_gs_prog_key
103 iris_to_brw_gs_key(const struct gen_device_info *devinfo,
104 const struct iris_gs_prog_key *key)
105 {
106 return (struct brw_gs_prog_key) {
107 BRW_KEY_INIT(devinfo->gen, key->vue.base.program_string_id),
108 };
109 }
110
111 static struct brw_wm_prog_key
112 iris_to_brw_fs_key(const struct gen_device_info *devinfo,
113 const struct iris_fs_prog_key *key)
114 {
115 return (struct brw_wm_prog_key) {
116 BRW_KEY_INIT(devinfo->gen, key->base.program_string_id),
117 .nr_color_regions = key->nr_color_regions,
118 .flat_shade = key->flat_shade,
119 .alpha_test_replicate_alpha = key->alpha_test_replicate_alpha,
120 .alpha_to_coverage = key->alpha_to_coverage,
121 .clamp_fragment_color = key->clamp_fragment_color,
122 .persample_interp = key->persample_interp,
123 .multisample_fbo = key->multisample_fbo,
124 .force_dual_color_blend = key->force_dual_color_blend,
125 .coherent_fb_fetch = key->coherent_fb_fetch,
126 .color_outputs_valid = key->color_outputs_valid,
127 .input_slots_valid = key->input_slots_valid,
128 };
129 }
130
131 static struct brw_cs_prog_key
132 iris_to_brw_cs_key(const struct gen_device_info *devinfo,
133 const struct iris_cs_prog_key *key)
134 {
135 return (struct brw_cs_prog_key) {
136 BRW_KEY_INIT(devinfo->gen, key->base.program_string_id),
137 };
138 }
139
140 static void *
141 upload_state(struct u_upload_mgr *uploader,
142 struct iris_state_ref *ref,
143 unsigned size,
144 unsigned alignment)
145 {
146 void *p = NULL;
147 u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p);
148 return p;
149 }
150
151 void
152 iris_upload_ubo_ssbo_surf_state(struct iris_context *ice,
153 struct pipe_shader_buffer *buf,
154 struct iris_state_ref *surf_state,
155 bool ssbo)
156 {
157 struct pipe_context *ctx = &ice->ctx;
158 struct iris_screen *screen = (struct iris_screen *) ctx->screen;
159
160 void *map =
161 upload_state(ice->state.surface_uploader, surf_state,
162 screen->isl_dev.ss.size, 64);
163 if (!unlikely(map)) {
164 surf_state->res = NULL;
165 return;
166 }
167
168 struct iris_resource *res = (void *) buf->buffer;
169 struct iris_bo *surf_bo = iris_resource_bo(surf_state->res);
170 surf_state->offset += iris_bo_offset_from_base_address(surf_bo);
171
172 isl_buffer_fill_state(&screen->isl_dev, map,
173 .address = res->bo->gtt_offset + res->offset +
174 buf->buffer_offset,
175 .size_B = buf->buffer_size - res->offset,
176 .format = ssbo ? ISL_FORMAT_RAW
177 : ISL_FORMAT_R32G32B32A32_FLOAT,
178 .swizzle = ISL_SWIZZLE_IDENTITY,
179 .stride_B = 1,
180 .mocs = iris_mocs(res->bo, &screen->isl_dev));
181 }
182
183 static nir_ssa_def *
184 get_aoa_deref_offset(nir_builder *b,
185 nir_deref_instr *deref,
186 unsigned elem_size)
187 {
188 unsigned array_size = elem_size;
189 nir_ssa_def *offset = nir_imm_int(b, 0);
190
191 while (deref->deref_type != nir_deref_type_var) {
192 assert(deref->deref_type == nir_deref_type_array);
193
194 /* This level's element size is the previous level's array size */
195 nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
196 assert(deref->arr.index.ssa);
197 offset = nir_iadd(b, offset,
198 nir_imul(b, index, nir_imm_int(b, array_size)));
199
200 deref = nir_deref_instr_parent(deref);
201 assert(glsl_type_is_array(deref->type));
202 array_size *= glsl_get_length(deref->type);
203 }
204
205 /* Accessing an invalid surface index with the dataport can result in a
206 * hang. According to the spec "if the index used to select an individual
207 * element is negative or greater than or equal to the size of the array,
208 * the results of the operation are undefined but may not lead to
209 * termination" -- which is one of the possible outcomes of the hang.
210 * Clamp the index to prevent access outside of the array bounds.
211 */
212 return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));
213 }
214
215 static void
216 iris_lower_storage_image_derefs(nir_shader *nir)
217 {
218 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
219
220 nir_builder b;
221 nir_builder_init(&b, impl);
222
223 nir_foreach_block(block, impl) {
224 nir_foreach_instr_safe(instr, block) {
225 if (instr->type != nir_instr_type_intrinsic)
226 continue;
227
228 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
229 switch (intrin->intrinsic) {
230 case nir_intrinsic_image_deref_load:
231 case nir_intrinsic_image_deref_store:
232 case nir_intrinsic_image_deref_atomic_add:
233 case nir_intrinsic_image_deref_atomic_imin:
234 case nir_intrinsic_image_deref_atomic_umin:
235 case nir_intrinsic_image_deref_atomic_imax:
236 case nir_intrinsic_image_deref_atomic_umax:
237 case nir_intrinsic_image_deref_atomic_and:
238 case nir_intrinsic_image_deref_atomic_or:
239 case nir_intrinsic_image_deref_atomic_xor:
240 case nir_intrinsic_image_deref_atomic_exchange:
241 case nir_intrinsic_image_deref_atomic_comp_swap:
242 case nir_intrinsic_image_deref_size:
243 case nir_intrinsic_image_deref_samples:
244 case nir_intrinsic_image_deref_load_raw_intel:
245 case nir_intrinsic_image_deref_store_raw_intel: {
246 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
247 nir_variable *var = nir_deref_instr_get_variable(deref);
248
249 b.cursor = nir_before_instr(&intrin->instr);
250 nir_ssa_def *index =
251 nir_iadd(&b, nir_imm_int(&b, var->data.driver_location),
252 get_aoa_deref_offset(&b, deref, 1));
253 nir_rewrite_image_intrinsic(intrin, index, false);
254 break;
255 }
256
257 default:
258 break;
259 }
260 }
261 }
262 }
263
264 /**
265 * Undo nir_lower_passthrough_edgeflags but keep the inputs_read flag.
266 */
267 static bool
268 iris_fix_edge_flags(nir_shader *nir)
269 {
270 if (nir->info.stage != MESA_SHADER_VERTEX)
271 return false;
272
273 nir_variable *var = NULL;
274 nir_foreach_variable(v, &nir->outputs) {
275 if (v->data.location == VARYING_SLOT_EDGE) {
276 var = v;
277 break;
278 }
279 }
280
281 if (!var)
282 return false;
283
284 exec_node_remove(&var->node);
285 var->data.mode = nir_var_shader_temp;
286 exec_list_push_tail(&nir->globals, &var->node);
287 nir->info.outputs_written &= ~VARYING_BIT_EDGE;
288 nir->info.inputs_read &= ~VERT_BIT_EDGEFLAG;
289 nir_fixup_deref_modes(nir);
290
291 nir_foreach_function(f, nir) {
292 if (f->impl) {
293 nir_metadata_preserve(f->impl, nir_metadata_block_index |
294 nir_metadata_dominance |
295 nir_metadata_live_ssa_defs |
296 nir_metadata_loop_analysis);
297 }
298 }
299
300 return true;
301 }
302
303 /**
304 * Fix an uncompiled shader's stream output info.
305 *
306 * Core Gallium stores output->register_index as a "slot" number, where
307 * slots are assigned consecutively to all outputs in info->outputs_written.
308 * This naive packing of outputs doesn't work for us - we too have slots,
309 * but the layout is defined by the VUE map, which we won't have until we
310 * compile a specific shader variant. So, we remap these and simply store
311 * VARYING_SLOT_* in our copy's output->register_index fields.
312 *
313 * We also fix up VARYING_SLOT_{LAYER,VIEWPORT,PSIZ} to select the Y/Z/W
314 * components of our VUE header. See brw_vue_map.c for the layout.
315 */
316 static void
317 update_so_info(struct pipe_stream_output_info *so_info,
318 uint64_t outputs_written)
319 {
320 uint8_t reverse_map[64] = {};
321 unsigned slot = 0;
322 while (outputs_written) {
323 reverse_map[slot++] = u_bit_scan64(&outputs_written);
324 }
325
326 for (unsigned i = 0; i < so_info->num_outputs; i++) {
327 struct pipe_stream_output *output = &so_info->output[i];
328
329 /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
330 output->register_index = reverse_map[output->register_index];
331
332 /* The VUE header contains three scalar fields packed together:
333 * - gl_PointSize is stored in VARYING_SLOT_PSIZ.w
334 * - gl_Layer is stored in VARYING_SLOT_PSIZ.y
335 * - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
336 */
337 switch (output->register_index) {
338 case VARYING_SLOT_LAYER:
339 assert(output->num_components == 1);
340 output->register_index = VARYING_SLOT_PSIZ;
341 output->start_component = 1;
342 break;
343 case VARYING_SLOT_VIEWPORT:
344 assert(output->num_components == 1);
345 output->register_index = VARYING_SLOT_PSIZ;
346 output->start_component = 2;
347 break;
348 case VARYING_SLOT_PSIZ:
349 assert(output->num_components == 1);
350 output->start_component = 3;
351 break;
352 }
353
354 //info->outputs_written |= 1ull << output->register_index;
355 }
356 }
357
358 static void
359 setup_vec4_image_sysval(uint32_t *sysvals, uint32_t idx,
360 unsigned offset, unsigned n)
361 {
362 assert(offset % sizeof(uint32_t) == 0);
363
364 for (unsigned i = 0; i < n; ++i)
365 sysvals[i] = BRW_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i);
366
367 for (unsigned i = n; i < 4; ++i)
368 sysvals[i] = BRW_PARAM_BUILTIN_ZERO;
369 }
370
371 /**
372 * Associate NIR uniform variables with the prog_data->param[] mechanism
373 * used by the backend. Also, decide which UBOs we'd like to push in an
374 * ideal situation (though the backend can reduce this).
375 */
376 static void
377 iris_setup_uniforms(const struct brw_compiler *compiler,
378 void *mem_ctx,
379 nir_shader *nir,
380 struct brw_stage_prog_data *prog_data,
381 enum brw_param_builtin **out_system_values,
382 unsigned *out_num_system_values,
383 unsigned *out_num_cbufs)
384 {
385 UNUSED const struct gen_device_info *devinfo = compiler->devinfo;
386
387 const unsigned IRIS_MAX_SYSTEM_VALUES =
388 PIPE_MAX_SHADER_IMAGES * BRW_IMAGE_PARAM_SIZE;
389 enum brw_param_builtin *system_values =
390 rzalloc_array(mem_ctx, enum brw_param_builtin, IRIS_MAX_SYSTEM_VALUES);
391 unsigned num_system_values = 0;
392
393 unsigned patch_vert_idx = -1;
394 unsigned ucp_idx[IRIS_MAX_CLIP_PLANES];
395 unsigned img_idx[PIPE_MAX_SHADER_IMAGES];
396 memset(ucp_idx, -1, sizeof(ucp_idx));
397 memset(img_idx, -1, sizeof(img_idx));
398
399 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
400
401 nir_builder b;
402 nir_builder_init(&b, impl);
403
404 b.cursor = nir_before_block(nir_start_block(impl));
405 nir_ssa_def *temp_ubo_name = nir_ssa_undef(&b, 1, 32);
406 nir_ssa_def *temp_const_ubo_name = NULL;
407
408 /* Turn system value intrinsics into uniforms */
409 nir_foreach_block(block, impl) {
410 nir_foreach_instr_safe(instr, block) {
411 if (instr->type != nir_instr_type_intrinsic)
412 continue;
413
414 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
415 nir_ssa_def *offset;
416
417 switch (intrin->intrinsic) {
418 case nir_intrinsic_load_constant: {
419 /* This one is special because it reads from the shader constant
420 * data and not cbuf0 which gallium uploads for us.
421 */
422 b.cursor = nir_before_instr(instr);
423 nir_ssa_def *offset =
424 nir_iadd_imm(&b, nir_ssa_for_src(&b, intrin->src[0], 1),
425 nir_intrinsic_base(intrin));
426
427 if (temp_const_ubo_name == NULL)
428 temp_const_ubo_name = nir_imm_int(&b, 0);
429
430 nir_intrinsic_instr *load_ubo =
431 nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ubo);
432 load_ubo->num_components = intrin->num_components;
433 load_ubo->src[0] = nir_src_for_ssa(temp_const_ubo_name);
434 load_ubo->src[1] = nir_src_for_ssa(offset);
435 nir_intrinsic_set_align(load_ubo,
436 nir_intrinsic_align_mul(intrin),
437 nir_intrinsic_align_offset(intrin));
438 nir_ssa_dest_init(&load_ubo->instr, &load_ubo->dest,
439 intrin->dest.ssa.num_components,
440 intrin->dest.ssa.bit_size,
441 intrin->dest.ssa.name);
442 nir_builder_instr_insert(&b, &load_ubo->instr);
443
444 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
445 nir_src_for_ssa(&load_ubo->dest.ssa));
446 nir_instr_remove(&intrin->instr);
447 continue;
448 }
449 case nir_intrinsic_load_user_clip_plane: {
450 unsigned ucp = nir_intrinsic_ucp_id(intrin);
451
452 if (ucp_idx[ucp] == -1) {
453 ucp_idx[ucp] = num_system_values;
454 num_system_values += 4;
455 }
456
457 for (int i = 0; i < 4; i++) {
458 system_values[ucp_idx[ucp] + i] =
459 BRW_PARAM_BUILTIN_CLIP_PLANE(ucp, i);
460 }
461
462 b.cursor = nir_before_instr(instr);
463 offset = nir_imm_int(&b, ucp_idx[ucp] * sizeof(uint32_t));
464 break;
465 }
466 case nir_intrinsic_load_patch_vertices_in:
467 if (patch_vert_idx == -1)
468 patch_vert_idx = num_system_values++;
469
470 system_values[patch_vert_idx] =
471 BRW_PARAM_BUILTIN_PATCH_VERTICES_IN;
472
473 b.cursor = nir_before_instr(instr);
474 offset = nir_imm_int(&b, patch_vert_idx * sizeof(uint32_t));
475 break;
476 case nir_intrinsic_image_deref_load_param_intel: {
477 assert(devinfo->gen < 9);
478 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
479 nir_variable *var = nir_deref_instr_get_variable(deref);
480
481 if (img_idx[var->data.binding] == -1) {
482 /* GL only allows arrays of arrays of images. */
483 assert(glsl_type_is_image(glsl_without_array(var->type)));
484 unsigned num_images = MAX2(1, glsl_get_aoa_size(var->type));
485
486 for (int i = 0; i < num_images; i++) {
487 const unsigned img = var->data.binding + i;
488
489 img_idx[img] = num_system_values;
490 num_system_values += BRW_IMAGE_PARAM_SIZE;
491
492 uint32_t *img_sv = &system_values[img_idx[img]];
493
494 setup_vec4_image_sysval(
495 img_sv + BRW_IMAGE_PARAM_OFFSET_OFFSET, img,
496 offsetof(struct brw_image_param, offset), 2);
497 setup_vec4_image_sysval(
498 img_sv + BRW_IMAGE_PARAM_SIZE_OFFSET, img,
499 offsetof(struct brw_image_param, size), 3);
500 setup_vec4_image_sysval(
501 img_sv + BRW_IMAGE_PARAM_STRIDE_OFFSET, img,
502 offsetof(struct brw_image_param, stride), 4);
503 setup_vec4_image_sysval(
504 img_sv + BRW_IMAGE_PARAM_TILING_OFFSET, img,
505 offsetof(struct brw_image_param, tiling), 3);
506 setup_vec4_image_sysval(
507 img_sv + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, img,
508 offsetof(struct brw_image_param, swizzling), 2);
509 }
510 }
511
512 b.cursor = nir_before_instr(instr);
513 offset = nir_iadd(&b,
514 get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4),
515 nir_imm_int(&b, img_idx[var->data.binding] * 4 +
516 nir_intrinsic_base(intrin) * 16));
517 break;
518 }
519 default:
520 continue;
521 }
522
523 unsigned comps = nir_intrinsic_dest_components(intrin);
524
525 nir_intrinsic_instr *load =
526 nir_intrinsic_instr_create(nir, nir_intrinsic_load_ubo);
527 load->num_components = comps;
528 load->src[0] = nir_src_for_ssa(temp_ubo_name);
529 load->src[1] = nir_src_for_ssa(offset);
530 nir_intrinsic_set_align(load, 4, 0);
531 nir_ssa_dest_init(&load->instr, &load->dest, comps, 32, NULL);
532 nir_builder_instr_insert(&b, &load->instr);
533 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
534 nir_src_for_ssa(&load->dest.ssa));
535 nir_instr_remove(instr);
536 }
537 }
538
539 nir_validate_shader(nir, "before remapping");
540
541 /* Uniforms are stored in constant buffer 0, the
542 * user-facing UBOs are indexed by one. So if any constant buffer is
543 * needed, the constant buffer 0 will be needed, so account for it.
544 */
545 unsigned num_cbufs = nir->info.num_ubos;
546 if (num_cbufs || nir->num_uniforms)
547 num_cbufs++;
548
549 /* Place the new params in a new cbuf. */
550 if (num_system_values > 0) {
551 unsigned sysval_cbuf_index = num_cbufs;
552 num_cbufs++;
553
554 system_values = reralloc(mem_ctx, system_values, enum brw_param_builtin,
555 num_system_values);
556
557 nir_foreach_block(block, impl) {
558 nir_foreach_instr_safe(instr, block) {
559 if (instr->type != nir_instr_type_intrinsic)
560 continue;
561
562 nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
563
564 if (load->intrinsic != nir_intrinsic_load_ubo)
565 continue;
566
567 b.cursor = nir_before_instr(instr);
568
569 assert(load->src[0].is_ssa);
570
571 if (load->src[0].ssa == temp_ubo_name) {
572 nir_ssa_def *imm = nir_imm_int(&b, sysval_cbuf_index);
573 nir_instr_rewrite_src(instr, &load->src[0],
574 nir_src_for_ssa(imm));
575 }
576 }
577 }
578
579 /* We need to fold the new iadds for brw_nir_analyze_ubo_ranges */
580 nir_opt_constant_folding(nir);
581 } else {
582 ralloc_free(system_values);
583 system_values = NULL;
584 }
585
586 assert(num_cbufs < PIPE_MAX_CONSTANT_BUFFERS);
587 nir_validate_shader(nir, "after remap");
588
589 /* We don't use params[] but gallium leaves num_uniforms set. We use this
590 * to detect when cbuf0 exists but we don't need it anymore when we get
591 * here. Instead, zero it out so that the back-end doesn't get confused
592 * when nr_params * 4 != num_uniforms != nr_params * 4.
593 */
594 nir->num_uniforms = 0;
595
596 /* Constant loads (if any) need to go at the end of the constant buffers so
597 * we need to know num_cbufs before we can lower to them.
598 */
599 if (temp_const_ubo_name != NULL) {
600 nir_load_const_instr *const_ubo_index =
601 nir_instr_as_load_const(temp_const_ubo_name->parent_instr);
602 assert(const_ubo_index->def.bit_size == 32);
603 const_ubo_index->value[0].u32 = num_cbufs;
604 }
605
606 *out_system_values = system_values;
607 *out_num_system_values = num_system_values;
608 *out_num_cbufs = num_cbufs;
609 }
610
611 static const char *surface_group_names[] = {
612 [IRIS_SURFACE_GROUP_RENDER_TARGET] = "render target",
613 [IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = "non-coherent render target read",
614 [IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = "CS work groups",
615 [IRIS_SURFACE_GROUP_TEXTURE] = "texture",
616 [IRIS_SURFACE_GROUP_UBO] = "ubo",
617 [IRIS_SURFACE_GROUP_SSBO] = "ssbo",
618 [IRIS_SURFACE_GROUP_IMAGE] = "image",
619 };
620
621 static void
622 iris_print_binding_table(FILE *fp, const char *name,
623 const struct iris_binding_table *bt)
624 {
625 STATIC_ASSERT(ARRAY_SIZE(surface_group_names) == IRIS_SURFACE_GROUP_COUNT);
626
627 uint32_t total = 0;
628 uint32_t compacted = 0;
629
630 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
631 uint32_t size = bt->sizes[i];
632 total += size;
633 if (size)
634 compacted += util_bitcount64(bt->used_mask[i]);
635 }
636
637 if (total == 0) {
638 fprintf(fp, "Binding table for %s is empty\n\n", name);
639 return;
640 }
641
642 if (total != compacted) {
643 fprintf(fp, "Binding table for %s "
644 "(compacted to %u entries from %u entries)\n",
645 name, compacted, total);
646 } else {
647 fprintf(fp, "Binding table for %s (%u entries)\n", name, total);
648 }
649
650 uint32_t entry = 0;
651 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
652 uint64_t mask = bt->used_mask[i];
653 while (mask) {
654 int index = u_bit_scan64(&mask);
655 fprintf(fp, " [%u] %s #%d\n", entry++, surface_group_names[i], index);
656 }
657 }
658 fprintf(fp, "\n");
659 }
660
661 enum {
662 /* Max elements in a surface group. */
663 SURFACE_GROUP_MAX_ELEMENTS = 64,
664 };
665
666 /**
667 * Map a <group, index> pair to a binding table index.
668 *
669 * For example: <UBO, 5> => binding table index 12
670 */
671 uint32_t
672 iris_group_index_to_bti(const struct iris_binding_table *bt,
673 enum iris_surface_group group, uint32_t index)
674 {
675 assert(index < bt->sizes[group]);
676 uint64_t mask = bt->used_mask[group];
677 uint64_t bit = 1ull << index;
678 if (bit & mask) {
679 return bt->offsets[group] + util_bitcount64((bit - 1) & mask);
680 } else {
681 return IRIS_SURFACE_NOT_USED;
682 }
683 }
684
685 /**
686 * Map a binding table index back to a <group, index> pair.
687 *
688 * For example: binding table index 12 => <UBO, 5>
689 */
690 uint32_t
691 iris_bti_to_group_index(const struct iris_binding_table *bt,
692 enum iris_surface_group group, uint32_t bti)
693 {
694 uint64_t used_mask = bt->used_mask[group];
695 assert(bti >= bt->offsets[group]);
696
697 uint32_t c = bti - bt->offsets[group];
698 while (used_mask) {
699 int i = u_bit_scan64(&used_mask);
700 if (c == 0)
701 return i;
702 c--;
703 }
704
705 return IRIS_SURFACE_NOT_USED;
706 }
707
708 static void
709 rewrite_src_with_bti(nir_builder *b, struct iris_binding_table *bt,
710 nir_instr *instr, nir_src *src,
711 enum iris_surface_group group)
712 {
713 assert(bt->sizes[group] > 0);
714
715 b->cursor = nir_before_instr(instr);
716 nir_ssa_def *bti;
717 if (nir_src_is_const(*src)) {
718 uint32_t index = nir_src_as_uint(*src);
719 bti = nir_imm_intN_t(b, iris_group_index_to_bti(bt, group, index),
720 src->ssa->bit_size);
721 } else {
722 /* Indirect usage makes all the surfaces of the group to be available,
723 * so we can just add the base.
724 */
725 assert(bt->used_mask[group] == BITFIELD64_MASK(bt->sizes[group]));
726 bti = nir_iadd_imm(b, src->ssa, bt->offsets[group]);
727 }
728 nir_instr_rewrite_src(instr, src, nir_src_for_ssa(bti));
729 }
730
731 static void
732 mark_used_with_src(struct iris_binding_table *bt, nir_src *src,
733 enum iris_surface_group group)
734 {
735 assert(bt->sizes[group] > 0);
736
737 if (nir_src_is_const(*src)) {
738 uint64_t index = nir_src_as_uint(*src);
739 assert(index < bt->sizes[group]);
740 bt->used_mask[group] |= 1ull << index;
741 } else {
742 /* There's an indirect usage, we need all the surfaces. */
743 bt->used_mask[group] = BITFIELD64_MASK(bt->sizes[group]);
744 }
745 }
746
747 static bool
748 skip_compacting_binding_tables(void)
749 {
750 static int skip = -1;
751 if (skip < 0)
752 skip = env_var_as_boolean("INTEL_DISABLE_COMPACT_BINDING_TABLE", false);
753 return skip;
754 }
755
756 /**
757 * Set up the binding table indices and apply to the shader.
758 */
759 static void
760 iris_setup_binding_table(const struct gen_device_info *devinfo,
761 struct nir_shader *nir,
762 struct iris_binding_table *bt,
763 unsigned num_render_targets,
764 unsigned num_system_values,
765 unsigned num_cbufs)
766 {
767 const struct shader_info *info = &nir->info;
768
769 memset(bt, 0, sizeof(*bt));
770
771 /* Set the sizes for each surface group. For some groups, we already know
772 * upfront how many will be used, so mark them.
773 */
774 if (info->stage == MESA_SHADER_FRAGMENT) {
775 bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET] = num_render_targets;
776 /* All render targets used. */
777 bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET] =
778 BITFIELD64_MASK(num_render_targets);
779
780 /* Setup render target read surface group inorder to support non-coherent
781 * framebuffer fetch on Gen8
782 */
783 if (devinfo->gen == 8 && info->outputs_read) {
784 bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = num_render_targets;
785 bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] =
786 BITFIELD64_MASK(num_render_targets);
787 }
788 } else if (info->stage == MESA_SHADER_COMPUTE) {
789 bt->sizes[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
790 }
791
792 bt->sizes[IRIS_SURFACE_GROUP_TEXTURE] = util_last_bit(info->textures_used);
793 bt->used_mask[IRIS_SURFACE_GROUP_TEXTURE] = info->textures_used;
794
795 bt->sizes[IRIS_SURFACE_GROUP_IMAGE] = info->num_images;
796
797 /* Allocate an extra slot in the UBO section for NIR constants.
798 * Binding table compaction will remove it if unnecessary.
799 *
800 * We don't include them in iris_compiled_shader::num_cbufs because
801 * they are uploaded separately from shs->constbuf[], but from a shader
802 * point of view, they're another UBO (at the end of the section).
803 */
804 bt->sizes[IRIS_SURFACE_GROUP_UBO] = num_cbufs + 1;
805
806 bt->sizes[IRIS_SURFACE_GROUP_SSBO] = info->num_ssbos;
807
808 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
809 assert(bt->sizes[i] <= SURFACE_GROUP_MAX_ELEMENTS);
810
811 /* Mark surfaces used for the cases we don't have the information available
812 * upfront.
813 */
814 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
815 nir_foreach_block (block, impl) {
816 nir_foreach_instr (instr, block) {
817 if (instr->type != nir_instr_type_intrinsic)
818 continue;
819
820 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
821 switch (intrin->intrinsic) {
822 case nir_intrinsic_load_num_work_groups:
823 bt->used_mask[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
824 break;
825
826 case nir_intrinsic_load_output:
827 if (devinfo->gen == 8) {
828 mark_used_with_src(bt, &intrin->src[0],
829 IRIS_SURFACE_GROUP_RENDER_TARGET_READ);
830 }
831 break;
832
833 case nir_intrinsic_image_size:
834 case nir_intrinsic_image_load:
835 case nir_intrinsic_image_store:
836 case nir_intrinsic_image_atomic_add:
837 case nir_intrinsic_image_atomic_imin:
838 case nir_intrinsic_image_atomic_umin:
839 case nir_intrinsic_image_atomic_imax:
840 case nir_intrinsic_image_atomic_umax:
841 case nir_intrinsic_image_atomic_and:
842 case nir_intrinsic_image_atomic_or:
843 case nir_intrinsic_image_atomic_xor:
844 case nir_intrinsic_image_atomic_exchange:
845 case nir_intrinsic_image_atomic_comp_swap:
846 case nir_intrinsic_image_load_raw_intel:
847 case nir_intrinsic_image_store_raw_intel:
848 mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_IMAGE);
849 break;
850
851 case nir_intrinsic_load_ubo:
852 mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_UBO);
853 break;
854
855 case nir_intrinsic_store_ssbo:
856 mark_used_with_src(bt, &intrin->src[1], IRIS_SURFACE_GROUP_SSBO);
857 break;
858
859 case nir_intrinsic_get_buffer_size:
860 case nir_intrinsic_ssbo_atomic_add:
861 case nir_intrinsic_ssbo_atomic_imin:
862 case nir_intrinsic_ssbo_atomic_umin:
863 case nir_intrinsic_ssbo_atomic_imax:
864 case nir_intrinsic_ssbo_atomic_umax:
865 case nir_intrinsic_ssbo_atomic_and:
866 case nir_intrinsic_ssbo_atomic_or:
867 case nir_intrinsic_ssbo_atomic_xor:
868 case nir_intrinsic_ssbo_atomic_exchange:
869 case nir_intrinsic_ssbo_atomic_comp_swap:
870 case nir_intrinsic_ssbo_atomic_fmin:
871 case nir_intrinsic_ssbo_atomic_fmax:
872 case nir_intrinsic_ssbo_atomic_fcomp_swap:
873 case nir_intrinsic_load_ssbo:
874 mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_SSBO);
875 break;
876
877 default:
878 break;
879 }
880 }
881 }
882
883 /* When disable we just mark everything as used. */
884 if (unlikely(skip_compacting_binding_tables())) {
885 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
886 bt->used_mask[i] = BITFIELD64_MASK(bt->sizes[i]);
887 }
888
889 /* Calculate the offsets and the binding table size based on the used
890 * surfaces. After this point, the functions to go between "group indices"
891 * and binding table indices can be used.
892 */
893 uint32_t next = 0;
894 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
895 if (bt->used_mask[i] != 0) {
896 bt->offsets[i] = next;
897 next += util_bitcount64(bt->used_mask[i]);
898 }
899 }
900 bt->size_bytes = next * 4;
901
902 if (unlikely(INTEL_DEBUG & DEBUG_BT)) {
903 iris_print_binding_table(stderr, gl_shader_stage_name(info->stage), bt);
904 }
905
906 /* Apply the binding table indices. The backend compiler is not expected
907 * to change those, as we haven't set any of the *_start entries in brw
908 * binding_table.
909 */
910 nir_builder b;
911 nir_builder_init(&b, impl);
912
913 nir_foreach_block (block, impl) {
914 nir_foreach_instr (instr, block) {
915 if (instr->type == nir_instr_type_tex) {
916 nir_tex_instr *tex = nir_instr_as_tex(instr);
917 tex->texture_index =
918 iris_group_index_to_bti(bt, IRIS_SURFACE_GROUP_TEXTURE,
919 tex->texture_index);
920 continue;
921 }
922
923 if (instr->type != nir_instr_type_intrinsic)
924 continue;
925
926 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
927 switch (intrin->intrinsic) {
928 case nir_intrinsic_image_size:
929 case nir_intrinsic_image_load:
930 case nir_intrinsic_image_store:
931 case nir_intrinsic_image_atomic_add:
932 case nir_intrinsic_image_atomic_imin:
933 case nir_intrinsic_image_atomic_umin:
934 case nir_intrinsic_image_atomic_imax:
935 case nir_intrinsic_image_atomic_umax:
936 case nir_intrinsic_image_atomic_and:
937 case nir_intrinsic_image_atomic_or:
938 case nir_intrinsic_image_atomic_xor:
939 case nir_intrinsic_image_atomic_exchange:
940 case nir_intrinsic_image_atomic_comp_swap:
941 case nir_intrinsic_image_load_raw_intel:
942 case nir_intrinsic_image_store_raw_intel:
943 rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
944 IRIS_SURFACE_GROUP_IMAGE);
945 break;
946
947 case nir_intrinsic_load_ubo:
948 rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
949 IRIS_SURFACE_GROUP_UBO);
950 break;
951
952 case nir_intrinsic_store_ssbo:
953 rewrite_src_with_bti(&b, bt, instr, &intrin->src[1],
954 IRIS_SURFACE_GROUP_SSBO);
955 break;
956
957 case nir_intrinsic_load_output:
958 if (devinfo->gen == 8) {
959 rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
960 IRIS_SURFACE_GROUP_RENDER_TARGET_READ);
961 }
962 break;
963
964 case nir_intrinsic_get_buffer_size:
965 case nir_intrinsic_ssbo_atomic_add:
966 case nir_intrinsic_ssbo_atomic_imin:
967 case nir_intrinsic_ssbo_atomic_umin:
968 case nir_intrinsic_ssbo_atomic_imax:
969 case nir_intrinsic_ssbo_atomic_umax:
970 case nir_intrinsic_ssbo_atomic_and:
971 case nir_intrinsic_ssbo_atomic_or:
972 case nir_intrinsic_ssbo_atomic_xor:
973 case nir_intrinsic_ssbo_atomic_exchange:
974 case nir_intrinsic_ssbo_atomic_comp_swap:
975 case nir_intrinsic_ssbo_atomic_fmin:
976 case nir_intrinsic_ssbo_atomic_fmax:
977 case nir_intrinsic_ssbo_atomic_fcomp_swap:
978 case nir_intrinsic_load_ssbo:
979 rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
980 IRIS_SURFACE_GROUP_SSBO);
981 break;
982
983 default:
984 break;
985 }
986 }
987 }
988 }
989
990 static void
991 iris_debug_recompile(struct iris_context *ice,
992 struct shader_info *info,
993 const struct brw_base_prog_key *key)
994 {
995 struct iris_screen *screen = (struct iris_screen *) ice->ctx.screen;
996 const struct gen_device_info *devinfo = &screen->devinfo;
997 const struct brw_compiler *c = screen->compiler;
998
999 if (!info)
1000 return;
1001
1002 c->shader_perf_log(&ice->dbg, "Recompiling %s shader for program %s: %s\n",
1003 _mesa_shader_stage_to_string(info->stage),
1004 info->name ? info->name : "(no identifier)",
1005 info->label ? info->label : "");
1006
1007 const void *old_iris_key =
1008 iris_find_previous_compile(ice, info->stage, key->program_string_id);
1009
1010 union brw_any_prog_key old_key;
1011
1012 switch (info->stage) {
1013 case MESA_SHADER_VERTEX:
1014 old_key.vs = iris_to_brw_vs_key(devinfo, old_iris_key);
1015 break;
1016 case MESA_SHADER_TESS_CTRL:
1017 old_key.tcs = iris_to_brw_tcs_key(devinfo, old_iris_key);
1018 break;
1019 case MESA_SHADER_TESS_EVAL:
1020 old_key.tes = iris_to_brw_tes_key(devinfo, old_iris_key);
1021 break;
1022 case MESA_SHADER_GEOMETRY:
1023 old_key.gs = iris_to_brw_gs_key(devinfo, old_iris_key);
1024 break;
1025 case MESA_SHADER_FRAGMENT:
1026 old_key.wm = iris_to_brw_fs_key(devinfo, old_iris_key);
1027 break;
1028 case MESA_SHADER_COMPUTE:
1029 old_key.cs = iris_to_brw_cs_key(devinfo, old_iris_key);
1030 break;
1031 default:
1032 unreachable("invalid shader stage");
1033 }
1034
1035 brw_debug_key_recompile(c, &ice->dbg, info->stage, &old_key.base, key);
1036 }
1037
1038 /**
1039 * Get the shader for the last enabled geometry stage.
1040 *
1041 * This stage is the one which will feed stream output and the rasterizer.
1042 */
1043 static gl_shader_stage
1044 last_vue_stage(struct iris_context *ice)
1045 {
1046 if (ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
1047 return MESA_SHADER_GEOMETRY;
1048
1049 if (ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
1050 return MESA_SHADER_TESS_EVAL;
1051
1052 return MESA_SHADER_VERTEX;
1053 }
1054
1055 /**
1056 * Compile a vertex shader, and upload the assembly.
1057 */
1058 static struct iris_compiled_shader *
1059 iris_compile_vs(struct iris_context *ice,
1060 struct iris_uncompiled_shader *ish,
1061 const struct iris_vs_prog_key *key)
1062 {
1063 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1064 const struct brw_compiler *compiler = screen->compiler;
1065 const struct gen_device_info *devinfo = &screen->devinfo;
1066 void *mem_ctx = ralloc_context(NULL);
1067 struct brw_vs_prog_data *vs_prog_data =
1068 rzalloc(mem_ctx, struct brw_vs_prog_data);
1069 struct brw_vue_prog_data *vue_prog_data = &vs_prog_data->base;
1070 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1071 enum brw_param_builtin *system_values;
1072 unsigned num_system_values;
1073 unsigned num_cbufs;
1074
1075 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1076
1077 if (key->vue.nr_userclip_plane_consts) {
1078 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1079 nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
1080 true, false, NULL);
1081 nir_lower_io_to_temporaries(nir, impl, true, false);
1082 nir_lower_global_vars_to_local(nir);
1083 nir_lower_vars_to_ssa(nir);
1084 nir_shader_gather_info(nir, impl);
1085 }
1086
1087 prog_data->use_alt_mode = ish->use_alt_mode;
1088
1089 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
1090 &num_system_values, &num_cbufs);
1091
1092 struct iris_binding_table bt;
1093 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1094 num_system_values, num_cbufs);
1095
1096 brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1097
1098 brw_compute_vue_map(devinfo,
1099 &vue_prog_data->vue_map, nir->info.outputs_written,
1100 nir->info.separate_shader, /* pos_slots */ 1);
1101
1102 struct brw_vs_prog_key brw_key = iris_to_brw_vs_key(devinfo, key);
1103
1104 char *error_str = NULL;
1105 const unsigned *program =
1106 brw_compile_vs(compiler, &ice->dbg, mem_ctx, &brw_key, vs_prog_data,
1107 nir, -1, NULL, &error_str);
1108 if (program == NULL) {
1109 dbg_printf("Failed to compile vertex shader: %s\n", error_str);
1110 ralloc_free(mem_ctx);
1111 return false;
1112 }
1113
1114 if (ish->compiled_once) {
1115 iris_debug_recompile(ice, &nir->info, &brw_key.base);
1116 } else {
1117 ish->compiled_once = true;
1118 }
1119
1120 uint32_t *so_decls =
1121 screen->vtbl.create_so_decl_list(&ish->stream_output,
1122 &vue_prog_data->vue_map);
1123
1124 struct iris_compiled_shader *shader =
1125 iris_upload_shader(ice, IRIS_CACHE_VS, sizeof(*key), key, program,
1126 prog_data, so_decls, system_values, num_system_values,
1127 num_cbufs, &bt);
1128
1129 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1130
1131 ralloc_free(mem_ctx);
1132 return shader;
1133 }
1134
1135 /**
1136 * Update the current vertex shader variant.
1137 *
1138 * Fill out the key, look in the cache, compile and bind if needed.
1139 */
1140 static void
1141 iris_update_compiled_vs(struct iris_context *ice)
1142 {
1143 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1144 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_VERTEX];
1145 struct iris_uncompiled_shader *ish =
1146 ice->shaders.uncompiled[MESA_SHADER_VERTEX];
1147
1148 struct iris_vs_prog_key key = { KEY_ID(vue.base) };
1149 screen->vtbl.populate_vs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
1150
1151 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_VS];
1152 struct iris_compiled_shader *shader =
1153 iris_find_cached_shader(ice, IRIS_CACHE_VS, sizeof(key), &key);
1154
1155 if (!shader)
1156 shader = iris_disk_cache_retrieve(ice, ish, &key, sizeof(key));
1157
1158 if (!shader)
1159 shader = iris_compile_vs(ice, ish, &key);
1160
1161 if (old != shader) {
1162 ice->shaders.prog[IRIS_CACHE_VS] = shader;
1163 ice->state.dirty |= IRIS_DIRTY_VS |
1164 IRIS_DIRTY_BINDINGS_VS |
1165 IRIS_DIRTY_CONSTANTS_VS |
1166 IRIS_DIRTY_VF_SGVS;
1167 shs->sysvals_need_upload = true;
1168
1169 const struct brw_vs_prog_data *vs_prog_data =
1170 (void *) shader->prog_data;
1171 const bool uses_draw_params = vs_prog_data->uses_firstvertex ||
1172 vs_prog_data->uses_baseinstance;
1173 const bool uses_derived_draw_params = vs_prog_data->uses_drawid ||
1174 vs_prog_data->uses_is_indexed_draw;
1175 const bool needs_sgvs_element = uses_draw_params ||
1176 vs_prog_data->uses_instanceid ||
1177 vs_prog_data->uses_vertexid;
1178
1179 if (ice->state.vs_uses_draw_params != uses_draw_params ||
1180 ice->state.vs_uses_derived_draw_params != uses_derived_draw_params ||
1181 ice->state.vs_needs_edge_flag != ish->needs_edge_flag) {
1182 ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS |
1183 IRIS_DIRTY_VERTEX_ELEMENTS;
1184 }
1185 ice->state.vs_uses_draw_params = uses_draw_params;
1186 ice->state.vs_uses_derived_draw_params = uses_derived_draw_params;
1187 ice->state.vs_needs_sgvs_element = needs_sgvs_element;
1188 ice->state.vs_needs_edge_flag = ish->needs_edge_flag;
1189 }
1190 }
1191
1192 /**
1193 * Get the shader_info for a given stage, or NULL if the stage is disabled.
1194 */
1195 const struct shader_info *
1196 iris_get_shader_info(const struct iris_context *ice, gl_shader_stage stage)
1197 {
1198 const struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[stage];
1199
1200 if (!ish)
1201 return NULL;
1202
1203 const nir_shader *nir = ish->nir;
1204 return &nir->info;
1205 }
1206
1207 /**
1208 * Get the union of TCS output and TES input slots.
1209 *
1210 * TCS and TES need to agree on a common URB entry layout. In particular,
1211 * the data for all patch vertices is stored in a single URB entry (unlike
1212 * GS which has one entry per input vertex). This means that per-vertex
1213 * array indexing needs a stride.
1214 *
1215 * SSO requires locations to match, but doesn't require the number of
1216 * outputs/inputs to match (in fact, the TCS often has extra outputs).
1217 * So, we need to take the extra step of unifying these on the fly.
1218 */
1219 static void
1220 get_unified_tess_slots(const struct iris_context *ice,
1221 uint64_t *per_vertex_slots,
1222 uint32_t *per_patch_slots)
1223 {
1224 const struct shader_info *tcs =
1225 iris_get_shader_info(ice, MESA_SHADER_TESS_CTRL);
1226 const struct shader_info *tes =
1227 iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
1228
1229 *per_vertex_slots = tes->inputs_read;
1230 *per_patch_slots = tes->patch_inputs_read;
1231
1232 if (tcs) {
1233 *per_vertex_slots |= tcs->outputs_written;
1234 *per_patch_slots |= tcs->patch_outputs_written;
1235 }
1236 }
1237
1238 /**
1239 * Compile a tessellation control shader, and upload the assembly.
1240 */
1241 static struct iris_compiled_shader *
1242 iris_compile_tcs(struct iris_context *ice,
1243 struct iris_uncompiled_shader *ish,
1244 const struct iris_tcs_prog_key *key)
1245 {
1246 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1247 const struct brw_compiler *compiler = screen->compiler;
1248 const struct nir_shader_compiler_options *options =
1249 compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].NirOptions;
1250 void *mem_ctx = ralloc_context(NULL);
1251 struct brw_tcs_prog_data *tcs_prog_data =
1252 rzalloc(mem_ctx, struct brw_tcs_prog_data);
1253 struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base;
1254 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1255 const struct gen_device_info *devinfo = &screen->devinfo;
1256 enum brw_param_builtin *system_values = NULL;
1257 unsigned num_system_values = 0;
1258 unsigned num_cbufs = 0;
1259
1260 nir_shader *nir;
1261
1262 struct iris_binding_table bt;
1263
1264 struct brw_tcs_prog_key brw_key = iris_to_brw_tcs_key(devinfo, key);
1265
1266 if (ish) {
1267 nir = nir_shader_clone(mem_ctx, ish->nir);
1268
1269 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
1270 &num_system_values, &num_cbufs);
1271 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1272 num_system_values, num_cbufs);
1273 brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1274 } else {
1275 nir =
1276 brw_nir_create_passthrough_tcs(mem_ctx, compiler, options, &brw_key);
1277
1278 /* Reserve space for passing the default tess levels as constants. */
1279 num_cbufs = 1;
1280 num_system_values = 8;
1281 system_values =
1282 rzalloc_array(mem_ctx, enum brw_param_builtin, num_system_values);
1283 prog_data->param = rzalloc_array(mem_ctx, uint32_t, num_system_values);
1284 prog_data->nr_params = num_system_values;
1285
1286 if (key->tes_primitive_mode == GL_QUADS) {
1287 for (int i = 0; i < 4; i++)
1288 system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
1289
1290 system_values[3] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
1291 system_values[2] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y;
1292 } else if (key->tes_primitive_mode == GL_TRIANGLES) {
1293 for (int i = 0; i < 3; i++)
1294 system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
1295
1296 system_values[4] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
1297 } else {
1298 assert(key->tes_primitive_mode == GL_ISOLINES);
1299 system_values[7] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_Y;
1300 system_values[6] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X;
1301 }
1302
1303 /* Manually setup the TCS binding table. */
1304 memset(&bt, 0, sizeof(bt));
1305 bt.sizes[IRIS_SURFACE_GROUP_UBO] = 1;
1306 bt.used_mask[IRIS_SURFACE_GROUP_UBO] = 1;
1307 bt.size_bytes = 4;
1308
1309 prog_data->ubo_ranges[0].length = 1;
1310 }
1311
1312 char *error_str = NULL;
1313 const unsigned *program =
1314 brw_compile_tcs(compiler, &ice->dbg, mem_ctx, &brw_key, tcs_prog_data,
1315 nir, -1, NULL, &error_str);
1316 if (program == NULL) {
1317 dbg_printf("Failed to compile control shader: %s\n", error_str);
1318 ralloc_free(mem_ctx);
1319 return false;
1320 }
1321
1322 if (ish) {
1323 if (ish->compiled_once) {
1324 iris_debug_recompile(ice, &nir->info, &brw_key.base);
1325 } else {
1326 ish->compiled_once = true;
1327 }
1328 }
1329
1330 struct iris_compiled_shader *shader =
1331 iris_upload_shader(ice, IRIS_CACHE_TCS, sizeof(*key), key, program,
1332 prog_data, NULL, system_values, num_system_values,
1333 num_cbufs, &bt);
1334
1335 if (ish)
1336 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1337
1338 ralloc_free(mem_ctx);
1339 return shader;
1340 }
1341
1342 /**
1343 * Update the current tessellation control shader variant.
1344 *
1345 * Fill out the key, look in the cache, compile and bind if needed.
1346 */
1347 static void
1348 iris_update_compiled_tcs(struct iris_context *ice)
1349 {
1350 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_CTRL];
1351 struct iris_uncompiled_shader *tcs =
1352 ice->shaders.uncompiled[MESA_SHADER_TESS_CTRL];
1353 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1354 const struct brw_compiler *compiler = screen->compiler;
1355 const struct gen_device_info *devinfo = &screen->devinfo;
1356
1357 const struct shader_info *tes_info =
1358 iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
1359 struct iris_tcs_prog_key key = {
1360 .vue.base.program_string_id = tcs ? tcs->program_id : 0,
1361 .tes_primitive_mode = tes_info->tess.primitive_mode,
1362 .input_vertices =
1363 !tcs || compiler->use_tcs_8_patch ? ice->state.vertices_per_patch : 0,
1364 .quads_workaround = devinfo->gen < 9 &&
1365 tes_info->tess.primitive_mode == GL_QUADS &&
1366 tes_info->tess.spacing == TESS_SPACING_EQUAL,
1367 };
1368 get_unified_tess_slots(ice, &key.outputs_written,
1369 &key.patch_outputs_written);
1370 screen->vtbl.populate_tcs_key(ice, &key);
1371
1372 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TCS];
1373 struct iris_compiled_shader *shader =
1374 iris_find_cached_shader(ice, IRIS_CACHE_TCS, sizeof(key), &key);
1375
1376 if (tcs && !shader)
1377 shader = iris_disk_cache_retrieve(ice, tcs, &key, sizeof(key));
1378
1379 if (!shader)
1380 shader = iris_compile_tcs(ice, tcs, &key);
1381
1382 if (old != shader) {
1383 ice->shaders.prog[IRIS_CACHE_TCS] = shader;
1384 ice->state.dirty |= IRIS_DIRTY_TCS |
1385 IRIS_DIRTY_BINDINGS_TCS |
1386 IRIS_DIRTY_CONSTANTS_TCS;
1387 shs->sysvals_need_upload = true;
1388 }
1389 }
1390
1391 /**
1392 * Compile a tessellation evaluation shader, and upload the assembly.
1393 */
1394 static struct iris_compiled_shader *
1395 iris_compile_tes(struct iris_context *ice,
1396 struct iris_uncompiled_shader *ish,
1397 const struct iris_tes_prog_key *key)
1398 {
1399 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1400 const struct brw_compiler *compiler = screen->compiler;
1401 void *mem_ctx = ralloc_context(NULL);
1402 struct brw_tes_prog_data *tes_prog_data =
1403 rzalloc(mem_ctx, struct brw_tes_prog_data);
1404 struct brw_vue_prog_data *vue_prog_data = &tes_prog_data->base;
1405 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1406 enum brw_param_builtin *system_values;
1407 const struct gen_device_info *devinfo = &screen->devinfo;
1408 unsigned num_system_values;
1409 unsigned num_cbufs;
1410
1411 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1412
1413 if (key->vue.nr_userclip_plane_consts) {
1414 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1415 nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
1416 true, false, NULL);
1417 nir_lower_io_to_temporaries(nir, impl, true, false);
1418 nir_lower_global_vars_to_local(nir);
1419 nir_lower_vars_to_ssa(nir);
1420 nir_shader_gather_info(nir, impl);
1421 }
1422
1423 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
1424 &num_system_values, &num_cbufs);
1425
1426 struct iris_binding_table bt;
1427 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1428 num_system_values, num_cbufs);
1429
1430 brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1431
1432 struct brw_vue_map input_vue_map;
1433 brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,
1434 key->patch_inputs_read);
1435
1436 struct brw_tes_prog_key brw_key = iris_to_brw_tes_key(devinfo, key);
1437
1438 char *error_str = NULL;
1439 const unsigned *program =
1440 brw_compile_tes(compiler, &ice->dbg, mem_ctx, &brw_key, &input_vue_map,
1441 tes_prog_data, nir, -1, NULL, &error_str);
1442 if (program == NULL) {
1443 dbg_printf("Failed to compile evaluation shader: %s\n", error_str);
1444 ralloc_free(mem_ctx);
1445 return false;
1446 }
1447
1448 if (ish->compiled_once) {
1449 iris_debug_recompile(ice, &nir->info, &brw_key.base);
1450 } else {
1451 ish->compiled_once = true;
1452 }
1453
1454 uint32_t *so_decls =
1455 screen->vtbl.create_so_decl_list(&ish->stream_output,
1456 &vue_prog_data->vue_map);
1457
1458
1459 struct iris_compiled_shader *shader =
1460 iris_upload_shader(ice, IRIS_CACHE_TES, sizeof(*key), key, program,
1461 prog_data, so_decls, system_values, num_system_values,
1462 num_cbufs, &bt);
1463
1464 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1465
1466 ralloc_free(mem_ctx);
1467 return shader;
1468 }
1469
1470 /**
1471 * Update the current tessellation evaluation shader variant.
1472 *
1473 * Fill out the key, look in the cache, compile and bind if needed.
1474 */
1475 static void
1476 iris_update_compiled_tes(struct iris_context *ice)
1477 {
1478 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1479 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_EVAL];
1480 struct iris_uncompiled_shader *ish =
1481 ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
1482
1483 struct iris_tes_prog_key key = { KEY_ID(vue.base) };
1484 get_unified_tess_slots(ice, &key.inputs_read, &key.patch_inputs_read);
1485 screen->vtbl.populate_tes_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
1486
1487 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TES];
1488 struct iris_compiled_shader *shader =
1489 iris_find_cached_shader(ice, IRIS_CACHE_TES, sizeof(key), &key);
1490
1491 if (!shader)
1492 shader = iris_disk_cache_retrieve(ice, ish, &key, sizeof(key));
1493
1494 if (!shader)
1495 shader = iris_compile_tes(ice, ish, &key);
1496
1497 if (old != shader) {
1498 ice->shaders.prog[IRIS_CACHE_TES] = shader;
1499 ice->state.dirty |= IRIS_DIRTY_TES |
1500 IRIS_DIRTY_BINDINGS_TES |
1501 IRIS_DIRTY_CONSTANTS_TES;
1502 shs->sysvals_need_upload = true;
1503 }
1504
1505 /* TODO: Could compare and avoid flagging this. */
1506 const struct shader_info *tes_info = &ish->nir->info;
1507 if (tes_info->system_values_read & (1ull << SYSTEM_VALUE_VERTICES_IN)) {
1508 ice->state.dirty |= IRIS_DIRTY_CONSTANTS_TES;
1509 ice->state.shaders[MESA_SHADER_TESS_EVAL].sysvals_need_upload = true;
1510 }
1511 }
1512
1513 /**
1514 * Compile a geometry shader, and upload the assembly.
1515 */
1516 static struct iris_compiled_shader *
1517 iris_compile_gs(struct iris_context *ice,
1518 struct iris_uncompiled_shader *ish,
1519 const struct iris_gs_prog_key *key)
1520 {
1521 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1522 const struct brw_compiler *compiler = screen->compiler;
1523 const struct gen_device_info *devinfo = &screen->devinfo;
1524 void *mem_ctx = ralloc_context(NULL);
1525 struct brw_gs_prog_data *gs_prog_data =
1526 rzalloc(mem_ctx, struct brw_gs_prog_data);
1527 struct brw_vue_prog_data *vue_prog_data = &gs_prog_data->base;
1528 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1529 enum brw_param_builtin *system_values;
1530 unsigned num_system_values;
1531 unsigned num_cbufs;
1532
1533 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1534
1535 if (key->vue.nr_userclip_plane_consts) {
1536 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1537 nir_lower_clip_gs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
1538 false, NULL);
1539 nir_lower_io_to_temporaries(nir, impl, true, false);
1540 nir_lower_global_vars_to_local(nir);
1541 nir_lower_vars_to_ssa(nir);
1542 nir_shader_gather_info(nir, impl);
1543 }
1544
1545 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
1546 &num_system_values, &num_cbufs);
1547
1548 struct iris_binding_table bt;
1549 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1550 num_system_values, num_cbufs);
1551
1552 brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1553
1554 brw_compute_vue_map(devinfo,
1555 &vue_prog_data->vue_map, nir->info.outputs_written,
1556 nir->info.separate_shader, /* pos_slots */ 1);
1557
1558 struct brw_gs_prog_key brw_key = iris_to_brw_gs_key(devinfo, key);
1559
1560 char *error_str = NULL;
1561 const unsigned *program =
1562 brw_compile_gs(compiler, &ice->dbg, mem_ctx, &brw_key, gs_prog_data,
1563 nir, NULL, -1, NULL, &error_str);
1564 if (program == NULL) {
1565 dbg_printf("Failed to compile geometry shader: %s\n", error_str);
1566 ralloc_free(mem_ctx);
1567 return false;
1568 }
1569
1570 if (ish->compiled_once) {
1571 iris_debug_recompile(ice, &nir->info, &brw_key.base);
1572 } else {
1573 ish->compiled_once = true;
1574 }
1575
1576 uint32_t *so_decls =
1577 screen->vtbl.create_so_decl_list(&ish->stream_output,
1578 &vue_prog_data->vue_map);
1579
1580 struct iris_compiled_shader *shader =
1581 iris_upload_shader(ice, IRIS_CACHE_GS, sizeof(*key), key, program,
1582 prog_data, so_decls, system_values, num_system_values,
1583 num_cbufs, &bt);
1584
1585 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1586
1587 ralloc_free(mem_ctx);
1588 return shader;
1589 }
1590
1591 /**
1592 * Update the current geometry shader variant.
1593 *
1594 * Fill out the key, look in the cache, compile and bind if needed.
1595 */
1596 static void
1597 iris_update_compiled_gs(struct iris_context *ice)
1598 {
1599 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_GEOMETRY];
1600 struct iris_uncompiled_shader *ish =
1601 ice->shaders.uncompiled[MESA_SHADER_GEOMETRY];
1602 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_GS];
1603 struct iris_compiled_shader *shader = NULL;
1604 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1605
1606 if (ish) {
1607 struct iris_gs_prog_key key = { KEY_ID(vue.base) };
1608 screen->vtbl.populate_gs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
1609
1610 shader =
1611 iris_find_cached_shader(ice, IRIS_CACHE_GS, sizeof(key), &key);
1612
1613 if (!shader)
1614 shader = iris_disk_cache_retrieve(ice, ish, &key, sizeof(key));
1615
1616 if (!shader)
1617 shader = iris_compile_gs(ice, ish, &key);
1618 }
1619
1620 if (old != shader) {
1621 ice->shaders.prog[IRIS_CACHE_GS] = shader;
1622 ice->state.dirty |= IRIS_DIRTY_GS |
1623 IRIS_DIRTY_BINDINGS_GS |
1624 IRIS_DIRTY_CONSTANTS_GS;
1625 shs->sysvals_need_upload = true;
1626 }
1627 }
1628
1629 /**
1630 * Compile a fragment (pixel) shader, and upload the assembly.
1631 */
1632 static struct iris_compiled_shader *
1633 iris_compile_fs(struct iris_context *ice,
1634 struct iris_uncompiled_shader *ish,
1635 const struct iris_fs_prog_key *key,
1636 struct brw_vue_map *vue_map)
1637 {
1638 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1639 const struct brw_compiler *compiler = screen->compiler;
1640 void *mem_ctx = ralloc_context(NULL);
1641 struct brw_wm_prog_data *fs_prog_data =
1642 rzalloc(mem_ctx, struct brw_wm_prog_data);
1643 struct brw_stage_prog_data *prog_data = &fs_prog_data->base;
1644 enum brw_param_builtin *system_values;
1645 const struct gen_device_info *devinfo = &screen->devinfo;
1646 unsigned num_system_values;
1647 unsigned num_cbufs;
1648
1649 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1650
1651 prog_data->use_alt_mode = ish->use_alt_mode;
1652
1653 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
1654 &num_system_values, &num_cbufs);
1655
1656 /* Lower output variables to load_output intrinsics before setting up
1657 * binding tables, so iris_setup_binding_table can map any load_output
1658 * intrinsics to IRIS_SURFACE_GROUP_RENDER_TARGET_READ on Gen8 for
1659 * non-coherent framebuffer fetches.
1660 */
1661 brw_nir_lower_fs_outputs(nir);
1662
1663 /* On Gen11+, shader RT write messages have a "Null Render Target" bit
1664 * and do not need a binding table entry with a null surface. Earlier
1665 * generations need an entry for a null surface.
1666 */
1667 int null_rts = devinfo->gen < 11 ? 1 : 0;
1668
1669 struct iris_binding_table bt;
1670 iris_setup_binding_table(devinfo, nir, &bt,
1671 MAX2(key->nr_color_regions, null_rts),
1672 num_system_values, num_cbufs);
1673
1674 brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1675
1676 struct brw_wm_prog_key brw_key = iris_to_brw_fs_key(devinfo, key);
1677
1678 char *error_str = NULL;
1679 const unsigned *program =
1680 brw_compile_fs(compiler, &ice->dbg, mem_ctx, &brw_key, fs_prog_data,
1681 nir, -1, -1, -1, true, false, vue_map,
1682 NULL, &error_str);
1683 if (program == NULL) {
1684 dbg_printf("Failed to compile fragment shader: %s\n", error_str);
1685 ralloc_free(mem_ctx);
1686 return false;
1687 }
1688
1689 if (ish->compiled_once) {
1690 iris_debug_recompile(ice, &nir->info, &brw_key.base);
1691 } else {
1692 ish->compiled_once = true;
1693 }
1694
1695 struct iris_compiled_shader *shader =
1696 iris_upload_shader(ice, IRIS_CACHE_FS, sizeof(*key), key, program,
1697 prog_data, NULL, system_values, num_system_values,
1698 num_cbufs, &bt);
1699
1700 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1701
1702 ralloc_free(mem_ctx);
1703 return shader;
1704 }
1705
1706 /**
1707 * Update the current fragment shader variant.
1708 *
1709 * Fill out the key, look in the cache, compile and bind if needed.
1710 */
1711 static void
1712 iris_update_compiled_fs(struct iris_context *ice)
1713 {
1714 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_FRAGMENT];
1715 struct iris_uncompiled_shader *ish =
1716 ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
1717 struct iris_fs_prog_key key = { KEY_ID(base) };
1718 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1719 screen->vtbl.populate_fs_key(ice, &ish->nir->info, &key);
1720
1721 if (ish->nos & (1ull << IRIS_NOS_LAST_VUE_MAP))
1722 key.input_slots_valid = ice->shaders.last_vue_map->slots_valid;
1723
1724 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_FS];
1725 struct iris_compiled_shader *shader =
1726 iris_find_cached_shader(ice, IRIS_CACHE_FS, sizeof(key), &key);
1727
1728 if (!shader)
1729 shader = iris_disk_cache_retrieve(ice, ish, &key, sizeof(key));
1730
1731 if (!shader)
1732 shader = iris_compile_fs(ice, ish, &key, ice->shaders.last_vue_map);
1733
1734 if (old != shader) {
1735 // XXX: only need to flag CLIP if barycentric has NONPERSPECTIVE
1736 // toggles. might be able to avoid flagging SBE too.
1737 ice->shaders.prog[IRIS_CACHE_FS] = shader;
1738 ice->state.dirty |= IRIS_DIRTY_FS |
1739 IRIS_DIRTY_BINDINGS_FS |
1740 IRIS_DIRTY_CONSTANTS_FS |
1741 IRIS_DIRTY_WM |
1742 IRIS_DIRTY_CLIP |
1743 IRIS_DIRTY_SBE;
1744 shs->sysvals_need_upload = true;
1745 }
1746 }
1747
1748 /**
1749 * Update the last enabled stage's VUE map.
1750 *
1751 * When the shader feeding the rasterizer's output interface changes, we
1752 * need to re-emit various packets.
1753 */
1754 static void
1755 update_last_vue_map(struct iris_context *ice,
1756 struct brw_stage_prog_data *prog_data)
1757 {
1758 struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
1759 struct brw_vue_map *vue_map = &vue_prog_data->vue_map;
1760 struct brw_vue_map *old_map = ice->shaders.last_vue_map;
1761 const uint64_t changed_slots =
1762 (old_map ? old_map->slots_valid : 0ull) ^ vue_map->slots_valid;
1763
1764 if (changed_slots & VARYING_BIT_VIEWPORT) {
1765 ice->state.num_viewports =
1766 (vue_map->slots_valid & VARYING_BIT_VIEWPORT) ? IRIS_MAX_VIEWPORTS : 1;
1767 ice->state.dirty |= IRIS_DIRTY_CLIP |
1768 IRIS_DIRTY_SF_CL_VIEWPORT |
1769 IRIS_DIRTY_CC_VIEWPORT |
1770 IRIS_DIRTY_SCISSOR_RECT |
1771 IRIS_DIRTY_UNCOMPILED_FS |
1772 ice->state.dirty_for_nos[IRIS_NOS_LAST_VUE_MAP];
1773 }
1774
1775 if (changed_slots || (old_map && old_map->separate != vue_map->separate)) {
1776 ice->state.dirty |= IRIS_DIRTY_SBE;
1777 }
1778
1779 ice->shaders.last_vue_map = &vue_prog_data->vue_map;
1780 }
1781
1782 static void
1783 iris_update_pull_constant_descriptors(struct iris_context *ice,
1784 gl_shader_stage stage)
1785 {
1786 struct iris_compiled_shader *shader = ice->shaders.prog[stage];
1787
1788 if (!shader || !shader->prog_data->has_ubo_pull)
1789 return;
1790
1791 struct iris_shader_state *shs = &ice->state.shaders[stage];
1792 bool any_new_descriptors =
1793 shader->num_system_values > 0 && shs->sysvals_need_upload;
1794
1795 unsigned bound_cbufs = shs->bound_cbufs;
1796
1797 while (bound_cbufs) {
1798 const int i = u_bit_scan(&bound_cbufs);
1799 struct pipe_shader_buffer *cbuf = &shs->constbuf[i];
1800 struct iris_state_ref *surf_state = &shs->constbuf_surf_state[i];
1801 if (!surf_state->res && cbuf->buffer) {
1802 iris_upload_ubo_ssbo_surf_state(ice, cbuf, surf_state, false);
1803 any_new_descriptors = true;
1804 }
1805 }
1806
1807 if (any_new_descriptors)
1808 ice->state.dirty |= IRIS_DIRTY_BINDINGS_VS << stage;
1809 }
1810
1811 /**
1812 * Get the prog_data for a given stage, or NULL if the stage is disabled.
1813 */
1814 static struct brw_vue_prog_data *
1815 get_vue_prog_data(struct iris_context *ice, gl_shader_stage stage)
1816 {
1817 if (!ice->shaders.prog[stage])
1818 return NULL;
1819
1820 return (void *) ice->shaders.prog[stage]->prog_data;
1821 }
1822
1823 // XXX: iris_compiled_shaders are space-leaking :(
1824 // XXX: do remember to unbind them if deleting them.
1825
1826 /**
1827 * Update the current shader variants for the given state.
1828 *
1829 * This should be called on every draw call to ensure that the correct
1830 * shaders are bound. It will also flag any dirty state triggered by
1831 * swapping out those shaders.
1832 */
1833 void
1834 iris_update_compiled_shaders(struct iris_context *ice)
1835 {
1836 const uint64_t dirty = ice->state.dirty;
1837
1838 struct brw_vue_prog_data *old_prog_datas[4];
1839 if (!(dirty & IRIS_DIRTY_URB)) {
1840 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++)
1841 old_prog_datas[i] = get_vue_prog_data(ice, i);
1842 }
1843
1844 if (dirty & (IRIS_DIRTY_UNCOMPILED_TCS | IRIS_DIRTY_UNCOMPILED_TES)) {
1845 struct iris_uncompiled_shader *tes =
1846 ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
1847 if (tes) {
1848 iris_update_compiled_tcs(ice);
1849 iris_update_compiled_tes(ice);
1850 } else {
1851 ice->shaders.prog[IRIS_CACHE_TCS] = NULL;
1852 ice->shaders.prog[IRIS_CACHE_TES] = NULL;
1853 ice->state.dirty |=
1854 IRIS_DIRTY_TCS | IRIS_DIRTY_TES |
1855 IRIS_DIRTY_BINDINGS_TCS | IRIS_DIRTY_BINDINGS_TES |
1856 IRIS_DIRTY_CONSTANTS_TCS | IRIS_DIRTY_CONSTANTS_TES;
1857 }
1858 }
1859
1860 if (dirty & IRIS_DIRTY_UNCOMPILED_VS)
1861 iris_update_compiled_vs(ice);
1862 if (dirty & IRIS_DIRTY_UNCOMPILED_GS)
1863 iris_update_compiled_gs(ice);
1864
1865 if (dirty & (IRIS_DIRTY_UNCOMPILED_GS | IRIS_DIRTY_UNCOMPILED_TES)) {
1866 const struct iris_compiled_shader *gs =
1867 ice->shaders.prog[MESA_SHADER_GEOMETRY];
1868 const struct iris_compiled_shader *tes =
1869 ice->shaders.prog[MESA_SHADER_TESS_EVAL];
1870
1871 bool points_or_lines = false;
1872
1873 if (gs) {
1874 const struct brw_gs_prog_data *gs_prog_data = (void *) gs->prog_data;
1875 points_or_lines =
1876 gs_prog_data->output_topology == _3DPRIM_POINTLIST ||
1877 gs_prog_data->output_topology == _3DPRIM_LINESTRIP;
1878 } else if (tes) {
1879 const struct brw_tes_prog_data *tes_data = (void *) tes->prog_data;
1880 points_or_lines =
1881 tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_LINE ||
1882 tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_POINT;
1883 }
1884
1885 if (ice->shaders.output_topology_is_points_or_lines != points_or_lines) {
1886 /* Outbound to XY Clip enables */
1887 ice->shaders.output_topology_is_points_or_lines = points_or_lines;
1888 ice->state.dirty |= IRIS_DIRTY_CLIP;
1889 }
1890 }
1891
1892 gl_shader_stage last_stage = last_vue_stage(ice);
1893 struct iris_compiled_shader *shader = ice->shaders.prog[last_stage];
1894 struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[last_stage];
1895 update_last_vue_map(ice, shader->prog_data);
1896 if (ice->state.streamout != shader->streamout) {
1897 ice->state.streamout = shader->streamout;
1898 ice->state.dirty |= IRIS_DIRTY_SO_DECL_LIST | IRIS_DIRTY_STREAMOUT;
1899 }
1900
1901 if (ice->state.streamout_active) {
1902 for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
1903 struct iris_stream_output_target *so =
1904 (void *) ice->state.so_target[i];
1905 if (so)
1906 so->stride = ish->stream_output.stride[i] * sizeof(uint32_t);
1907 }
1908 }
1909
1910 if (dirty & IRIS_DIRTY_UNCOMPILED_FS)
1911 iris_update_compiled_fs(ice);
1912
1913 /* Changing shader interfaces may require a URB configuration. */
1914 if (!(dirty & IRIS_DIRTY_URB)) {
1915 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
1916 struct brw_vue_prog_data *old = old_prog_datas[i];
1917 struct brw_vue_prog_data *new = get_vue_prog_data(ice, i);
1918 if (!!old != !!new ||
1919 (new && new->urb_entry_size != old->urb_entry_size)) {
1920 ice->state.dirty |= IRIS_DIRTY_URB;
1921 break;
1922 }
1923 }
1924 }
1925
1926 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_FRAGMENT; i++) {
1927 if (ice->state.dirty & (IRIS_DIRTY_CONSTANTS_VS << i))
1928 iris_update_pull_constant_descriptors(ice, i);
1929 }
1930 }
1931
1932 static struct iris_compiled_shader *
1933 iris_compile_cs(struct iris_context *ice,
1934 struct iris_uncompiled_shader *ish,
1935 const struct iris_cs_prog_key *key)
1936 {
1937 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1938 const struct brw_compiler *compiler = screen->compiler;
1939 void *mem_ctx = ralloc_context(NULL);
1940 struct brw_cs_prog_data *cs_prog_data =
1941 rzalloc(mem_ctx, struct brw_cs_prog_data);
1942 struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
1943 enum brw_param_builtin *system_values;
1944 const struct gen_device_info *devinfo = &screen->devinfo;
1945 unsigned num_system_values;
1946 unsigned num_cbufs;
1947
1948 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1949
1950 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
1951 &num_system_values, &num_cbufs);
1952
1953 struct iris_binding_table bt;
1954 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1955 num_system_values, num_cbufs);
1956
1957 struct brw_cs_prog_key brw_key = iris_to_brw_cs_key(devinfo, key);
1958
1959 char *error_str = NULL;
1960 const unsigned *program =
1961 brw_compile_cs(compiler, &ice->dbg, mem_ctx, &brw_key, cs_prog_data,
1962 nir, -1, NULL, &error_str);
1963 if (program == NULL) {
1964 dbg_printf("Failed to compile compute shader: %s\n", error_str);
1965 ralloc_free(mem_ctx);
1966 return false;
1967 }
1968
1969 if (ish->compiled_once) {
1970 iris_debug_recompile(ice, &nir->info, &brw_key.base);
1971 } else {
1972 ish->compiled_once = true;
1973 }
1974
1975 struct iris_compiled_shader *shader =
1976 iris_upload_shader(ice, IRIS_CACHE_CS, sizeof(*key), key, program,
1977 prog_data, NULL, system_values, num_system_values,
1978 num_cbufs, &bt);
1979
1980 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1981
1982 ralloc_free(mem_ctx);
1983 return shader;
1984 }
1985
1986 static void
1987 iris_update_compiled_cs(struct iris_context *ice)
1988 {
1989 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_COMPUTE];
1990 struct iris_uncompiled_shader *ish =
1991 ice->shaders.uncompiled[MESA_SHADER_COMPUTE];
1992
1993 struct iris_cs_prog_key key = { KEY_ID(base) };
1994 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1995 screen->vtbl.populate_cs_key(ice, &key);
1996
1997 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_CS];
1998 struct iris_compiled_shader *shader =
1999 iris_find_cached_shader(ice, IRIS_CACHE_CS, sizeof(key), &key);
2000
2001 if (!shader)
2002 shader = iris_disk_cache_retrieve(ice, ish, &key, sizeof(key));
2003
2004 if (!shader)
2005 shader = iris_compile_cs(ice, ish, &key);
2006
2007 if (old != shader) {
2008 ice->shaders.prog[IRIS_CACHE_CS] = shader;
2009 ice->state.dirty |= IRIS_DIRTY_CS |
2010 IRIS_DIRTY_BINDINGS_CS |
2011 IRIS_DIRTY_CONSTANTS_CS;
2012 shs->sysvals_need_upload = true;
2013 }
2014 }
2015
2016 void
2017 iris_update_compiled_compute_shader(struct iris_context *ice)
2018 {
2019 if (ice->state.dirty & IRIS_DIRTY_UNCOMPILED_CS)
2020 iris_update_compiled_cs(ice);
2021
2022 if (ice->state.dirty & IRIS_DIRTY_CONSTANTS_CS)
2023 iris_update_pull_constant_descriptors(ice, MESA_SHADER_COMPUTE);
2024 }
2025
2026 void
2027 iris_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data,
2028 unsigned threads,
2029 uint32_t *dst)
2030 {
2031 assert(brw_cs_push_const_total_size(cs_prog_data, threads) > 0);
2032 assert(cs_prog_data->push.cross_thread.size == 0);
2033 assert(cs_prog_data->push.per_thread.dwords == 1);
2034 assert(cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID);
2035 for (unsigned t = 0; t < threads; t++)
2036 dst[8 * t] = t;
2037 }
2038
2039 /**
2040 * Allocate scratch BOs as needed for the given per-thread size and stage.
2041 */
2042 struct iris_bo *
2043 iris_get_scratch_space(struct iris_context *ice,
2044 unsigned per_thread_scratch,
2045 gl_shader_stage stage)
2046 {
2047 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2048 struct iris_bufmgr *bufmgr = screen->bufmgr;
2049 const struct gen_device_info *devinfo = &screen->devinfo;
2050
2051 unsigned encoded_size = ffs(per_thread_scratch) - 11;
2052 assert(encoded_size < (1 << 16));
2053
2054 struct iris_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage];
2055
2056 /* The documentation for 3DSTATE_PS "Scratch Space Base Pointer" says:
2057 *
2058 * "Scratch Space per slice is computed based on 4 sub-slices. SW
2059 * must allocate scratch space enough so that each slice has 4
2060 * slices allowed."
2061 *
2062 * According to the other driver team, this applies to compute shaders
2063 * as well. This is not currently documented at all.
2064 *
2065 * This hack is no longer necessary on Gen11+.
2066 *
2067 * For, Gen11+, scratch space allocation is based on the number of threads
2068 * in the base configuration.
2069 */
2070 unsigned subslice_total = screen->subslice_total;
2071 if (devinfo->gen >= 12)
2072 subslice_total = devinfo->num_subslices[0];
2073 else if (devinfo->gen == 11)
2074 subslice_total = 8;
2075 else if (devinfo->gen < 11)
2076 subslice_total = 4 * devinfo->num_slices;
2077 assert(subslice_total >= screen->subslice_total);
2078
2079 if (!*bop) {
2080 unsigned scratch_ids_per_subslice = devinfo->max_cs_threads;
2081
2082 if (devinfo->gen >= 12) {
2083 /* Same as ICL below, but with 16 EUs. */
2084 scratch_ids_per_subslice = 16 * 8;
2085 } else if (devinfo->gen == 11) {
2086 /* The MEDIA_VFE_STATE docs say:
2087 *
2088 * "Starting with this configuration, the Maximum Number of
2089 * Threads must be set to (#EU * 8) for GPGPU dispatches.
2090 *
2091 * Although there are only 7 threads per EU in the configuration,
2092 * the FFTID is calculated as if there are 8 threads per EU,
2093 * which in turn requires a larger amount of Scratch Space to be
2094 * allocated by the driver."
2095 */
2096 scratch_ids_per_subslice = 8 * 8;
2097 }
2098
2099 uint32_t max_threads[] = {
2100 [MESA_SHADER_VERTEX] = devinfo->max_vs_threads,
2101 [MESA_SHADER_TESS_CTRL] = devinfo->max_tcs_threads,
2102 [MESA_SHADER_TESS_EVAL] = devinfo->max_tes_threads,
2103 [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads,
2104 [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads,
2105 [MESA_SHADER_COMPUTE] = scratch_ids_per_subslice * subslice_total,
2106 };
2107
2108 uint32_t size = per_thread_scratch * max_threads[stage];
2109
2110 *bop = iris_bo_alloc(bufmgr, "scratch", size, IRIS_MEMZONE_SHADER);
2111 }
2112
2113 return *bop;
2114 }
2115
2116 /* ------------------------------------------------------------------- */
2117
2118 /**
2119 * The pipe->create_[stage]_state() driver hooks.
2120 *
2121 * Performs basic NIR preprocessing, records any state dependencies, and
2122 * returns an iris_uncompiled_shader as the Gallium CSO.
2123 *
2124 * Actual shader compilation to assembly happens later, at first use.
2125 */
2126 static void *
2127 iris_create_uncompiled_shader(struct pipe_context *ctx,
2128 nir_shader *nir,
2129 const struct pipe_stream_output_info *so_info)
2130 {
2131 struct iris_context *ice = (void *)ctx;
2132 struct iris_screen *screen = (struct iris_screen *)ctx->screen;
2133 const struct gen_device_info *devinfo = &screen->devinfo;
2134
2135 struct iris_uncompiled_shader *ish =
2136 calloc(1, sizeof(struct iris_uncompiled_shader));
2137 if (!ish)
2138 return NULL;
2139
2140 NIR_PASS(ish->needs_edge_flag, nir, iris_fix_edge_flags);
2141
2142 brw_preprocess_nir(screen->compiler, nir, NULL);
2143
2144 NIR_PASS_V(nir, brw_nir_lower_image_load_store, devinfo,
2145 &ish->uses_atomic_load_store);
2146 NIR_PASS_V(nir, iris_lower_storage_image_derefs);
2147
2148 nir_sweep(nir);
2149
2150 if (nir->constant_data_size > 0) {
2151 unsigned data_offset;
2152 u_upload_data(ice->shaders.uploader, 0, nir->constant_data_size,
2153 32, nir->constant_data, &data_offset, &ish->const_data);
2154
2155 struct pipe_shader_buffer psb = {
2156 .buffer = ish->const_data,
2157 .buffer_offset = data_offset,
2158 .buffer_size = nir->constant_data_size,
2159 };
2160 iris_upload_ubo_ssbo_surf_state(ice, &psb, &ish->const_data_state, false);
2161 }
2162
2163 ish->program_id = get_new_program_id(screen);
2164 ish->nir = nir;
2165 if (so_info) {
2166 memcpy(&ish->stream_output, so_info, sizeof(*so_info));
2167 update_so_info(&ish->stream_output, nir->info.outputs_written);
2168 }
2169
2170 /* Save this now before potentially dropping nir->info.name */
2171 if (nir->info.name && strncmp(nir->info.name, "ARB", 3) == 0)
2172 ish->use_alt_mode = true;
2173
2174 if (screen->disk_cache) {
2175 /* Serialize the NIR to a binary blob that we can hash for the disk
2176 * cache. Drop unnecessary information (like variable names)
2177 * so the serialized NIR is smaller, and also to let us detect more
2178 * isomorphic shaders when hashing, increasing cache hits.
2179 */
2180 struct blob blob;
2181 blob_init(&blob);
2182 nir_serialize(&blob, nir, true);
2183 _mesa_sha1_compute(blob.data, blob.size, ish->nir_sha1);
2184 blob_finish(&blob);
2185 }
2186
2187 return ish;
2188 }
2189
2190 static struct iris_uncompiled_shader *
2191 iris_create_shader_state(struct pipe_context *ctx,
2192 const struct pipe_shader_state *state)
2193 {
2194 struct nir_shader *nir;
2195
2196 if (state->type == PIPE_SHADER_IR_TGSI)
2197 nir = tgsi_to_nir(state->tokens, ctx->screen);
2198 else
2199 nir = state->ir.nir;
2200
2201 return iris_create_uncompiled_shader(ctx, nir, &state->stream_output);
2202 }
2203
2204 static void *
2205 iris_create_vs_state(struct pipe_context *ctx,
2206 const struct pipe_shader_state *state)
2207 {
2208 struct iris_context *ice = (void *) ctx;
2209 struct iris_screen *screen = (void *) ctx->screen;
2210 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
2211
2212 /* User clip planes */
2213 if (ish->nir->info.clip_distance_array_size == 0)
2214 ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
2215
2216 if (screen->precompile) {
2217 struct iris_vs_prog_key key = { KEY_ID(vue.base) };
2218
2219 if (!iris_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
2220 iris_compile_vs(ice, ish, &key);
2221 }
2222
2223 return ish;
2224 }
2225
2226 static void *
2227 iris_create_tcs_state(struct pipe_context *ctx,
2228 const struct pipe_shader_state *state)
2229 {
2230 struct iris_context *ice = (void *) ctx;
2231 struct iris_screen *screen = (void *) ctx->screen;
2232 const struct brw_compiler *compiler = screen->compiler;
2233 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
2234 struct shader_info *info = &ish->nir->info;
2235
2236 if (screen->precompile) {
2237 const unsigned _GL_TRIANGLES = 0x0004;
2238 struct iris_tcs_prog_key key = {
2239 KEY_ID(vue.base),
2240 // XXX: make sure the linker fills this out from the TES...
2241 .tes_primitive_mode =
2242 info->tess.primitive_mode ? info->tess.primitive_mode
2243 : _GL_TRIANGLES,
2244 .outputs_written = info->outputs_written,
2245 .patch_outputs_written = info->patch_outputs_written,
2246 };
2247
2248 /* 8_PATCH mode needs the key to contain the input patch dimensionality.
2249 * We don't have that information, so we randomly guess that the input
2250 * and output patches are the same size. This is a bad guess, but we
2251 * can't do much better.
2252 */
2253 if (compiler->use_tcs_8_patch)
2254 key.input_vertices = info->tess.tcs_vertices_out;
2255
2256 if (!iris_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
2257 iris_compile_tcs(ice, ish, &key);
2258 }
2259
2260 return ish;
2261 }
2262
2263 static void *
2264 iris_create_tes_state(struct pipe_context *ctx,
2265 const struct pipe_shader_state *state)
2266 {
2267 struct iris_context *ice = (void *) ctx;
2268 struct iris_screen *screen = (void *) ctx->screen;
2269 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
2270 struct shader_info *info = &ish->nir->info;
2271
2272 /* User clip planes */
2273 if (ish->nir->info.clip_distance_array_size == 0)
2274 ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
2275
2276 if (screen->precompile) {
2277 struct iris_tes_prog_key key = {
2278 KEY_ID(vue.base),
2279 // XXX: not ideal, need TCS output/TES input unification
2280 .inputs_read = info->inputs_read,
2281 .patch_inputs_read = info->patch_inputs_read,
2282 };
2283
2284 if (!iris_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
2285 iris_compile_tes(ice, ish, &key);
2286 }
2287
2288 return ish;
2289 }
2290
2291 static void *
2292 iris_create_gs_state(struct pipe_context *ctx,
2293 const struct pipe_shader_state *state)
2294 {
2295 struct iris_context *ice = (void *) ctx;
2296 struct iris_screen *screen = (void *) ctx->screen;
2297 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
2298
2299 /* User clip planes */
2300 if (ish->nir->info.clip_distance_array_size == 0)
2301 ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
2302
2303 if (screen->precompile) {
2304 struct iris_gs_prog_key key = { KEY_ID(vue.base) };
2305
2306 if (!iris_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
2307 iris_compile_gs(ice, ish, &key);
2308 }
2309
2310 return ish;
2311 }
2312
2313 static void *
2314 iris_create_fs_state(struct pipe_context *ctx,
2315 const struct pipe_shader_state *state)
2316 {
2317 struct iris_context *ice = (void *) ctx;
2318 struct iris_screen *screen = (void *) ctx->screen;
2319 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
2320 struct shader_info *info = &ish->nir->info;
2321
2322 ish->nos |= (1ull << IRIS_NOS_FRAMEBUFFER) |
2323 (1ull << IRIS_NOS_DEPTH_STENCIL_ALPHA) |
2324 (1ull << IRIS_NOS_RASTERIZER) |
2325 (1ull << IRIS_NOS_BLEND);
2326
2327 /* The program key needs the VUE map if there are > 16 inputs */
2328 if (util_bitcount64(ish->nir->info.inputs_read &
2329 BRW_FS_VARYING_INPUT_MASK) > 16) {
2330 ish->nos |= (1ull << IRIS_NOS_LAST_VUE_MAP);
2331 }
2332
2333 if (screen->precompile) {
2334 const uint64_t color_outputs = info->outputs_written &
2335 ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
2336 BITFIELD64_BIT(FRAG_RESULT_STENCIL) |
2337 BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK));
2338
2339 bool can_rearrange_varyings =
2340 util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) <= 16;
2341
2342 const struct gen_device_info *devinfo = &screen->devinfo;
2343 struct iris_fs_prog_key key = {
2344 KEY_ID(base),
2345 .nr_color_regions = util_bitcount(color_outputs),
2346 .coherent_fb_fetch = devinfo->gen >= 9,
2347 .input_slots_valid =
2348 can_rearrange_varyings ? 0 : info->inputs_read | VARYING_BIT_POS,
2349 };
2350
2351 if (!iris_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
2352 iris_compile_fs(ice, ish, &key, NULL);
2353 }
2354
2355 return ish;
2356 }
2357
2358 static void *
2359 iris_create_compute_state(struct pipe_context *ctx,
2360 const struct pipe_compute_state *state)
2361 {
2362 assert(state->ir_type == PIPE_SHADER_IR_NIR);
2363
2364 struct iris_context *ice = (void *) ctx;
2365 struct iris_screen *screen = (void *) ctx->screen;
2366 struct iris_uncompiled_shader *ish =
2367 iris_create_uncompiled_shader(ctx, (void *) state->prog, NULL);
2368
2369 // XXX: disallow more than 64KB of shared variables
2370
2371 if (screen->precompile) {
2372 struct iris_cs_prog_key key = { KEY_ID(base) };
2373
2374 if (!iris_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
2375 iris_compile_cs(ice, ish, &key);
2376 }
2377
2378 return ish;
2379 }
2380
2381 /**
2382 * The pipe->delete_[stage]_state() driver hooks.
2383 *
2384 * Frees the iris_uncompiled_shader.
2385 */
2386 static void
2387 iris_delete_shader_state(struct pipe_context *ctx, void *state, gl_shader_stage stage)
2388 {
2389 struct iris_uncompiled_shader *ish = state;
2390 struct iris_context *ice = (void *) ctx;
2391
2392 if (ice->shaders.uncompiled[stage] == ish) {
2393 ice->shaders.uncompiled[stage] = NULL;
2394 ice->state.dirty |= IRIS_DIRTY_UNCOMPILED_VS << stage;
2395 }
2396
2397 if (ish->const_data) {
2398 pipe_resource_reference(&ish->const_data, NULL);
2399 pipe_resource_reference(&ish->const_data_state.res, NULL);
2400 }
2401
2402 ralloc_free(ish->nir);
2403 free(ish);
2404 }
2405
2406 static void
2407 iris_delete_vs_state(struct pipe_context *ctx, void *state)
2408 {
2409 iris_delete_shader_state(ctx, state, MESA_SHADER_VERTEX);
2410 }
2411
2412 static void
2413 iris_delete_tcs_state(struct pipe_context *ctx, void *state)
2414 {
2415 iris_delete_shader_state(ctx, state, MESA_SHADER_TESS_CTRL);
2416 }
2417
2418 static void
2419 iris_delete_tes_state(struct pipe_context *ctx, void *state)
2420 {
2421 iris_delete_shader_state(ctx, state, MESA_SHADER_TESS_EVAL);
2422 }
2423
2424 static void
2425 iris_delete_gs_state(struct pipe_context *ctx, void *state)
2426 {
2427 iris_delete_shader_state(ctx, state, MESA_SHADER_GEOMETRY);
2428 }
2429
2430 static void
2431 iris_delete_fs_state(struct pipe_context *ctx, void *state)
2432 {
2433 iris_delete_shader_state(ctx, state, MESA_SHADER_FRAGMENT);
2434 }
2435
2436 static void
2437 iris_delete_cs_state(struct pipe_context *ctx, void *state)
2438 {
2439 iris_delete_shader_state(ctx, state, MESA_SHADER_COMPUTE);
2440 }
2441
2442 /**
2443 * The pipe->bind_[stage]_state() driver hook.
2444 *
2445 * Binds an uncompiled shader as the current one for a particular stage.
2446 * Updates dirty tracking to account for the shader's NOS.
2447 */
2448 static void
2449 bind_shader_state(struct iris_context *ice,
2450 struct iris_uncompiled_shader *ish,
2451 gl_shader_stage stage)
2452 {
2453 uint64_t dirty_bit = IRIS_DIRTY_UNCOMPILED_VS << stage;
2454 const uint64_t nos = ish ? ish->nos : 0;
2455
2456 const struct shader_info *old_info = iris_get_shader_info(ice, stage);
2457 const struct shader_info *new_info = ish ? &ish->nir->info : NULL;
2458
2459 if ((old_info ? util_last_bit(old_info->textures_used) : 0) !=
2460 (new_info ? util_last_bit(new_info->textures_used) : 0)) {
2461 ice->state.dirty |= IRIS_DIRTY_SAMPLER_STATES_VS << stage;
2462 }
2463
2464 ice->shaders.uncompiled[stage] = ish;
2465 ice->state.dirty |= dirty_bit;
2466
2467 /* Record that CSOs need to mark IRIS_DIRTY_UNCOMPILED_XS when they change
2468 * (or that they no longer need to do so).
2469 */
2470 for (int i = 0; i < IRIS_NOS_COUNT; i++) {
2471 if (nos & (1 << i))
2472 ice->state.dirty_for_nos[i] |= dirty_bit;
2473 else
2474 ice->state.dirty_for_nos[i] &= ~dirty_bit;
2475 }
2476 }
2477
2478 static void
2479 iris_bind_vs_state(struct pipe_context *ctx, void *state)
2480 {
2481 struct iris_context *ice = (struct iris_context *)ctx;
2482 struct iris_uncompiled_shader *new_ish = state;
2483
2484 if (new_ish &&
2485 ice->state.window_space_position !=
2486 new_ish->nir->info.vs.window_space_position) {
2487 ice->state.window_space_position =
2488 new_ish->nir->info.vs.window_space_position;
2489
2490 ice->state.dirty |= IRIS_DIRTY_CLIP |
2491 IRIS_DIRTY_RASTER |
2492 IRIS_DIRTY_CC_VIEWPORT;
2493 }
2494
2495 bind_shader_state((void *) ctx, state, MESA_SHADER_VERTEX);
2496 }
2497
2498 static void
2499 iris_bind_tcs_state(struct pipe_context *ctx, void *state)
2500 {
2501 bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_CTRL);
2502 }
2503
2504 static void
2505 iris_bind_tes_state(struct pipe_context *ctx, void *state)
2506 {
2507 struct iris_context *ice = (struct iris_context *)ctx;
2508
2509 /* Enabling/disabling optional stages requires a URB reconfiguration. */
2510 if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
2511 ice->state.dirty |= IRIS_DIRTY_URB;
2512
2513 bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_EVAL);
2514 }
2515
2516 static void
2517 iris_bind_gs_state(struct pipe_context *ctx, void *state)
2518 {
2519 struct iris_context *ice = (struct iris_context *)ctx;
2520
2521 /* Enabling/disabling optional stages requires a URB reconfiguration. */
2522 if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
2523 ice->state.dirty |= IRIS_DIRTY_URB;
2524
2525 bind_shader_state((void *) ctx, state, MESA_SHADER_GEOMETRY);
2526 }
2527
2528 static void
2529 iris_bind_fs_state(struct pipe_context *ctx, void *state)
2530 {
2531 struct iris_context *ice = (struct iris_context *) ctx;
2532 struct iris_screen *screen = (struct iris_screen *) ctx->screen;
2533 const struct gen_device_info *devinfo = &screen->devinfo;
2534 struct iris_uncompiled_shader *old_ish =
2535 ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
2536 struct iris_uncompiled_shader *new_ish = state;
2537
2538 const unsigned color_bits =
2539 BITFIELD64_BIT(FRAG_RESULT_COLOR) |
2540 BITFIELD64_RANGE(FRAG_RESULT_DATA0, BRW_MAX_DRAW_BUFFERS);
2541
2542 /* Fragment shader outputs influence HasWriteableRT */
2543 if (!old_ish || !new_ish ||
2544 (old_ish->nir->info.outputs_written & color_bits) !=
2545 (new_ish->nir->info.outputs_written & color_bits))
2546 ice->state.dirty |= IRIS_DIRTY_PS_BLEND;
2547
2548 if (devinfo->gen == 8)
2549 ice->state.dirty |= IRIS_DIRTY_PMA_FIX;
2550
2551 bind_shader_state((void *) ctx, state, MESA_SHADER_FRAGMENT);
2552 }
2553
2554 static void
2555 iris_bind_cs_state(struct pipe_context *ctx, void *state)
2556 {
2557 bind_shader_state((void *) ctx, state, MESA_SHADER_COMPUTE);
2558 }
2559
2560 void
2561 iris_init_program_functions(struct pipe_context *ctx)
2562 {
2563 ctx->create_vs_state = iris_create_vs_state;
2564 ctx->create_tcs_state = iris_create_tcs_state;
2565 ctx->create_tes_state = iris_create_tes_state;
2566 ctx->create_gs_state = iris_create_gs_state;
2567 ctx->create_fs_state = iris_create_fs_state;
2568 ctx->create_compute_state = iris_create_compute_state;
2569
2570 ctx->delete_vs_state = iris_delete_vs_state;
2571 ctx->delete_tcs_state = iris_delete_tcs_state;
2572 ctx->delete_tes_state = iris_delete_tes_state;
2573 ctx->delete_gs_state = iris_delete_gs_state;
2574 ctx->delete_fs_state = iris_delete_fs_state;
2575 ctx->delete_compute_state = iris_delete_cs_state;
2576
2577 ctx->bind_vs_state = iris_bind_vs_state;
2578 ctx->bind_tcs_state = iris_bind_tcs_state;
2579 ctx->bind_tes_state = iris_bind_tes_state;
2580 ctx->bind_gs_state = iris_bind_gs_state;
2581 ctx->bind_fs_state = iris_bind_fs_state;
2582 ctx->bind_compute_state = iris_bind_cs_state;
2583 }