intel/fs: Allow multiple slots for position
[mesa.git] / src / gallium / drivers / iris / iris_program.c
1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 /**
24 * @file iris_program.c
25 *
26 * This file contains the driver interface for compiling shaders.
27 *
28 * See iris_program_cache.c for the in-memory program cache where the
29 * compiled shaders are stored.
30 */
31
32 #include <stdio.h>
33 #include <errno.h>
34 #include "pipe/p_defines.h"
35 #include "pipe/p_state.h"
36 #include "pipe/p_context.h"
37 #include "pipe/p_screen.h"
38 #include "util/u_atomic.h"
39 #include "util/u_upload_mgr.h"
40 #include "util/debug.h"
41 #include "compiler/nir/nir.h"
42 #include "compiler/nir/nir_builder.h"
43 #include "compiler/nir/nir_serialize.h"
44 #include "intel/compiler/brw_compiler.h"
45 #include "intel/compiler/brw_nir.h"
46 #include "iris_context.h"
47 #include "nir/tgsi_to_nir.h"
48
49 #define KEY_ID(prefix) .prefix.program_string_id = ish->program_id
50 #define BRW_KEY_INIT(gen, prog_id) \
51 .base.program_string_id = prog_id, \
52 .base.subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM, \
53 .base.tex.swizzles[0 ... MAX_SAMPLERS - 1] = 0x688, \
54 .base.tex.compressed_multisample_layout_mask = ~0, \
55 .base.tex.msaa_16 = (gen >= 9 ? ~0 : 0)
56
57 static unsigned
58 get_new_program_id(struct iris_screen *screen)
59 {
60 return p_atomic_inc_return(&screen->program_id);
61 }
62
63 static struct brw_vs_prog_key
64 iris_to_brw_vs_key(const struct gen_device_info *devinfo,
65 const struct iris_vs_prog_key *key)
66 {
67 return (struct brw_vs_prog_key) {
68 BRW_KEY_INIT(devinfo->gen, key->vue.base.program_string_id),
69
70 /* Don't tell the backend about our clip plane constants, we've
71 * already lowered them in NIR and don't want it doing it again.
72 */
73 .nr_userclip_plane_consts = 0,
74 };
75 }
76
77 static struct brw_tcs_prog_key
78 iris_to_brw_tcs_key(const struct gen_device_info *devinfo,
79 const struct iris_tcs_prog_key *key)
80 {
81 return (struct brw_tcs_prog_key) {
82 BRW_KEY_INIT(devinfo->gen, key->vue.base.program_string_id),
83 .tes_primitive_mode = key->tes_primitive_mode,
84 .input_vertices = key->input_vertices,
85 .patch_outputs_written = key->patch_outputs_written,
86 .outputs_written = key->outputs_written,
87 .quads_workaround = key->quads_workaround,
88 };
89 }
90
91 static struct brw_tes_prog_key
92 iris_to_brw_tes_key(const struct gen_device_info *devinfo,
93 const struct iris_tes_prog_key *key)
94 {
95 return (struct brw_tes_prog_key) {
96 BRW_KEY_INIT(devinfo->gen, key->vue.base.program_string_id),
97 .patch_inputs_read = key->patch_inputs_read,
98 .inputs_read = key->inputs_read,
99 };
100 }
101
102 static struct brw_gs_prog_key
103 iris_to_brw_gs_key(const struct gen_device_info *devinfo,
104 const struct iris_gs_prog_key *key)
105 {
106 return (struct brw_gs_prog_key) {
107 BRW_KEY_INIT(devinfo->gen, key->vue.base.program_string_id),
108 };
109 }
110
111 static struct brw_wm_prog_key
112 iris_to_brw_fs_key(const struct gen_device_info *devinfo,
113 const struct iris_fs_prog_key *key)
114 {
115 return (struct brw_wm_prog_key) {
116 BRW_KEY_INIT(devinfo->gen, key->base.program_string_id),
117 .nr_color_regions = key->nr_color_regions,
118 .flat_shade = key->flat_shade,
119 .alpha_test_replicate_alpha = key->alpha_test_replicate_alpha,
120 .alpha_to_coverage = key->alpha_to_coverage,
121 .clamp_fragment_color = key->clamp_fragment_color,
122 .persample_interp = key->persample_interp,
123 .multisample_fbo = key->multisample_fbo,
124 .force_dual_color_blend = key->force_dual_color_blend,
125 .coherent_fb_fetch = key->coherent_fb_fetch,
126 .color_outputs_valid = key->color_outputs_valid,
127 .input_slots_valid = key->input_slots_valid,
128 };
129 }
130
131 static struct brw_cs_prog_key
132 iris_to_brw_cs_key(const struct gen_device_info *devinfo,
133 const struct iris_cs_prog_key *key)
134 {
135 return (struct brw_cs_prog_key) {
136 BRW_KEY_INIT(devinfo->gen, key->base.program_string_id),
137 };
138 }
139
140 static void *
141 upload_state(struct u_upload_mgr *uploader,
142 struct iris_state_ref *ref,
143 unsigned size,
144 unsigned alignment)
145 {
146 void *p = NULL;
147 u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p);
148 return p;
149 }
150
151 void
152 iris_upload_ubo_ssbo_surf_state(struct iris_context *ice,
153 struct pipe_shader_buffer *buf,
154 struct iris_state_ref *surf_state,
155 bool ssbo)
156 {
157 struct pipe_context *ctx = &ice->ctx;
158 struct iris_screen *screen = (struct iris_screen *) ctx->screen;
159
160 void *map =
161 upload_state(ice->state.surface_uploader, surf_state,
162 screen->isl_dev.ss.size, 64);
163 if (!unlikely(map)) {
164 surf_state->res = NULL;
165 return;
166 }
167
168 struct iris_resource *res = (void *) buf->buffer;
169 struct iris_bo *surf_bo = iris_resource_bo(surf_state->res);
170 surf_state->offset += iris_bo_offset_from_base_address(surf_bo);
171
172 isl_buffer_fill_state(&screen->isl_dev, map,
173 .address = res->bo->gtt_offset + res->offset +
174 buf->buffer_offset,
175 .size_B = buf->buffer_size - res->offset,
176 .format = ssbo ? ISL_FORMAT_RAW
177 : ISL_FORMAT_R32G32B32A32_FLOAT,
178 .swizzle = ISL_SWIZZLE_IDENTITY,
179 .stride_B = 1,
180 .mocs = iris_mocs(res->bo, &screen->isl_dev));
181 }
182
183 static nir_ssa_def *
184 get_aoa_deref_offset(nir_builder *b,
185 nir_deref_instr *deref,
186 unsigned elem_size)
187 {
188 unsigned array_size = elem_size;
189 nir_ssa_def *offset = nir_imm_int(b, 0);
190
191 while (deref->deref_type != nir_deref_type_var) {
192 assert(deref->deref_type == nir_deref_type_array);
193
194 /* This level's element size is the previous level's array size */
195 nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
196 assert(deref->arr.index.ssa);
197 offset = nir_iadd(b, offset,
198 nir_imul(b, index, nir_imm_int(b, array_size)));
199
200 deref = nir_deref_instr_parent(deref);
201 assert(glsl_type_is_array(deref->type));
202 array_size *= glsl_get_length(deref->type);
203 }
204
205 /* Accessing an invalid surface index with the dataport can result in a
206 * hang. According to the spec "if the index used to select an individual
207 * element is negative or greater than or equal to the size of the array,
208 * the results of the operation are undefined but may not lead to
209 * termination" -- which is one of the possible outcomes of the hang.
210 * Clamp the index to prevent access outside of the array bounds.
211 */
212 return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));
213 }
214
215 static void
216 iris_lower_storage_image_derefs(nir_shader *nir)
217 {
218 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
219
220 nir_builder b;
221 nir_builder_init(&b, impl);
222
223 nir_foreach_block(block, impl) {
224 nir_foreach_instr_safe(instr, block) {
225 if (instr->type != nir_instr_type_intrinsic)
226 continue;
227
228 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
229 switch (intrin->intrinsic) {
230 case nir_intrinsic_image_deref_load:
231 case nir_intrinsic_image_deref_store:
232 case nir_intrinsic_image_deref_atomic_add:
233 case nir_intrinsic_image_deref_atomic_imin:
234 case nir_intrinsic_image_deref_atomic_umin:
235 case nir_intrinsic_image_deref_atomic_imax:
236 case nir_intrinsic_image_deref_atomic_umax:
237 case nir_intrinsic_image_deref_atomic_and:
238 case nir_intrinsic_image_deref_atomic_or:
239 case nir_intrinsic_image_deref_atomic_xor:
240 case nir_intrinsic_image_deref_atomic_exchange:
241 case nir_intrinsic_image_deref_atomic_comp_swap:
242 case nir_intrinsic_image_deref_size:
243 case nir_intrinsic_image_deref_samples:
244 case nir_intrinsic_image_deref_load_raw_intel:
245 case nir_intrinsic_image_deref_store_raw_intel: {
246 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
247 nir_variable *var = nir_deref_instr_get_variable(deref);
248
249 b.cursor = nir_before_instr(&intrin->instr);
250 nir_ssa_def *index =
251 nir_iadd(&b, nir_imm_int(&b, var->data.driver_location),
252 get_aoa_deref_offset(&b, deref, 1));
253 nir_rewrite_image_intrinsic(intrin, index, false);
254 break;
255 }
256
257 default:
258 break;
259 }
260 }
261 }
262 }
263
264 /**
265 * Undo nir_lower_passthrough_edgeflags but keep the inputs_read flag.
266 */
267 static bool
268 iris_fix_edge_flags(nir_shader *nir)
269 {
270 if (nir->info.stage != MESA_SHADER_VERTEX)
271 return false;
272
273 nir_variable *var = NULL;
274 nir_foreach_variable(v, &nir->outputs) {
275 if (v->data.location == VARYING_SLOT_EDGE) {
276 var = v;
277 break;
278 }
279 }
280
281 if (!var)
282 return false;
283
284 exec_node_remove(&var->node);
285 var->data.mode = nir_var_shader_temp;
286 exec_list_push_tail(&nir->globals, &var->node);
287 nir->info.outputs_written &= ~VARYING_BIT_EDGE;
288 nir->info.inputs_read &= ~VERT_BIT_EDGEFLAG;
289 nir_fixup_deref_modes(nir);
290
291 nir_foreach_function(f, nir) {
292 if (f->impl) {
293 nir_metadata_preserve(f->impl, nir_metadata_block_index |
294 nir_metadata_dominance |
295 nir_metadata_live_ssa_defs |
296 nir_metadata_loop_analysis);
297 }
298 }
299
300 return true;
301 }
302
303 /**
304 * Fix an uncompiled shader's stream output info.
305 *
306 * Core Gallium stores output->register_index as a "slot" number, where
307 * slots are assigned consecutively to all outputs in info->outputs_written.
308 * This naive packing of outputs doesn't work for us - we too have slots,
309 * but the layout is defined by the VUE map, which we won't have until we
310 * compile a specific shader variant. So, we remap these and simply store
311 * VARYING_SLOT_* in our copy's output->register_index fields.
312 *
313 * We also fix up VARYING_SLOT_{LAYER,VIEWPORT,PSIZ} to select the Y/Z/W
314 * components of our VUE header. See brw_vue_map.c for the layout.
315 */
316 static void
317 update_so_info(struct pipe_stream_output_info *so_info,
318 uint64_t outputs_written)
319 {
320 uint8_t reverse_map[64] = {};
321 unsigned slot = 0;
322 while (outputs_written) {
323 reverse_map[slot++] = u_bit_scan64(&outputs_written);
324 }
325
326 for (unsigned i = 0; i < so_info->num_outputs; i++) {
327 struct pipe_stream_output *output = &so_info->output[i];
328
329 /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
330 output->register_index = reverse_map[output->register_index];
331
332 /* The VUE header contains three scalar fields packed together:
333 * - gl_PointSize is stored in VARYING_SLOT_PSIZ.w
334 * - gl_Layer is stored in VARYING_SLOT_PSIZ.y
335 * - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
336 */
337 switch (output->register_index) {
338 case VARYING_SLOT_LAYER:
339 assert(output->num_components == 1);
340 output->register_index = VARYING_SLOT_PSIZ;
341 output->start_component = 1;
342 break;
343 case VARYING_SLOT_VIEWPORT:
344 assert(output->num_components == 1);
345 output->register_index = VARYING_SLOT_PSIZ;
346 output->start_component = 2;
347 break;
348 case VARYING_SLOT_PSIZ:
349 assert(output->num_components == 1);
350 output->start_component = 3;
351 break;
352 }
353
354 //info->outputs_written |= 1ull << output->register_index;
355 }
356 }
357
358 static void
359 setup_vec4_image_sysval(uint32_t *sysvals, uint32_t idx,
360 unsigned offset, unsigned n)
361 {
362 assert(offset % sizeof(uint32_t) == 0);
363
364 for (unsigned i = 0; i < n; ++i)
365 sysvals[i] = BRW_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i);
366
367 for (unsigned i = n; i < 4; ++i)
368 sysvals[i] = BRW_PARAM_BUILTIN_ZERO;
369 }
370
371 /**
372 * Associate NIR uniform variables with the prog_data->param[] mechanism
373 * used by the backend. Also, decide which UBOs we'd like to push in an
374 * ideal situation (though the backend can reduce this).
375 */
376 static void
377 iris_setup_uniforms(const struct brw_compiler *compiler,
378 void *mem_ctx,
379 nir_shader *nir,
380 struct brw_stage_prog_data *prog_data,
381 enum brw_param_builtin **out_system_values,
382 unsigned *out_num_system_values,
383 unsigned *out_num_cbufs)
384 {
385 UNUSED const struct gen_device_info *devinfo = compiler->devinfo;
386
387 const unsigned IRIS_MAX_SYSTEM_VALUES =
388 PIPE_MAX_SHADER_IMAGES * BRW_IMAGE_PARAM_SIZE;
389 enum brw_param_builtin *system_values =
390 rzalloc_array(mem_ctx, enum brw_param_builtin, IRIS_MAX_SYSTEM_VALUES);
391 unsigned num_system_values = 0;
392
393 unsigned patch_vert_idx = -1;
394 unsigned ucp_idx[IRIS_MAX_CLIP_PLANES];
395 unsigned img_idx[PIPE_MAX_SHADER_IMAGES];
396 memset(ucp_idx, -1, sizeof(ucp_idx));
397 memset(img_idx, -1, sizeof(img_idx));
398
399 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
400
401 nir_builder b;
402 nir_builder_init(&b, impl);
403
404 b.cursor = nir_before_block(nir_start_block(impl));
405 nir_ssa_def *temp_ubo_name = nir_ssa_undef(&b, 1, 32);
406 nir_ssa_def *temp_const_ubo_name = NULL;
407
408 /* Turn system value intrinsics into uniforms */
409 nir_foreach_block(block, impl) {
410 nir_foreach_instr_safe(instr, block) {
411 if (instr->type != nir_instr_type_intrinsic)
412 continue;
413
414 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
415 nir_ssa_def *offset;
416
417 switch (intrin->intrinsic) {
418 case nir_intrinsic_load_constant: {
419 /* This one is special because it reads from the shader constant
420 * data and not cbuf0 which gallium uploads for us.
421 */
422 b.cursor = nir_before_instr(instr);
423 nir_ssa_def *offset =
424 nir_iadd_imm(&b, nir_ssa_for_src(&b, intrin->src[0], 1),
425 nir_intrinsic_base(intrin));
426
427 if (temp_const_ubo_name == NULL)
428 temp_const_ubo_name = nir_imm_int(&b, 0);
429
430 nir_intrinsic_instr *load_ubo =
431 nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ubo);
432 load_ubo->num_components = intrin->num_components;
433 load_ubo->src[0] = nir_src_for_ssa(temp_const_ubo_name);
434 load_ubo->src[1] = nir_src_for_ssa(offset);
435 nir_intrinsic_set_align(load_ubo,
436 nir_intrinsic_align_mul(intrin),
437 nir_intrinsic_align_offset(intrin));
438 nir_ssa_dest_init(&load_ubo->instr, &load_ubo->dest,
439 intrin->dest.ssa.num_components,
440 intrin->dest.ssa.bit_size,
441 intrin->dest.ssa.name);
442 nir_builder_instr_insert(&b, &load_ubo->instr);
443
444 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
445 nir_src_for_ssa(&load_ubo->dest.ssa));
446 nir_instr_remove(&intrin->instr);
447 continue;
448 }
449 case nir_intrinsic_load_user_clip_plane: {
450 unsigned ucp = nir_intrinsic_ucp_id(intrin);
451
452 if (ucp_idx[ucp] == -1) {
453 ucp_idx[ucp] = num_system_values;
454 num_system_values += 4;
455 }
456
457 for (int i = 0; i < 4; i++) {
458 system_values[ucp_idx[ucp] + i] =
459 BRW_PARAM_BUILTIN_CLIP_PLANE(ucp, i);
460 }
461
462 b.cursor = nir_before_instr(instr);
463 offset = nir_imm_int(&b, ucp_idx[ucp] * sizeof(uint32_t));
464 break;
465 }
466 case nir_intrinsic_load_patch_vertices_in:
467 if (patch_vert_idx == -1)
468 patch_vert_idx = num_system_values++;
469
470 system_values[patch_vert_idx] =
471 BRW_PARAM_BUILTIN_PATCH_VERTICES_IN;
472
473 b.cursor = nir_before_instr(instr);
474 offset = nir_imm_int(&b, patch_vert_idx * sizeof(uint32_t));
475 break;
476 case nir_intrinsic_image_deref_load_param_intel: {
477 assert(devinfo->gen < 9);
478 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
479 nir_variable *var = nir_deref_instr_get_variable(deref);
480
481 if (img_idx[var->data.binding] == -1) {
482 /* GL only allows arrays of arrays of images. */
483 assert(glsl_type_is_image(glsl_without_array(var->type)));
484 unsigned num_images = MAX2(1, glsl_get_aoa_size(var->type));
485
486 for (int i = 0; i < num_images; i++) {
487 const unsigned img = var->data.binding + i;
488
489 img_idx[img] = num_system_values;
490 num_system_values += BRW_IMAGE_PARAM_SIZE;
491
492 uint32_t *img_sv = &system_values[img_idx[img]];
493
494 setup_vec4_image_sysval(
495 img_sv + BRW_IMAGE_PARAM_OFFSET_OFFSET, img,
496 offsetof(struct brw_image_param, offset), 2);
497 setup_vec4_image_sysval(
498 img_sv + BRW_IMAGE_PARAM_SIZE_OFFSET, img,
499 offsetof(struct brw_image_param, size), 3);
500 setup_vec4_image_sysval(
501 img_sv + BRW_IMAGE_PARAM_STRIDE_OFFSET, img,
502 offsetof(struct brw_image_param, stride), 4);
503 setup_vec4_image_sysval(
504 img_sv + BRW_IMAGE_PARAM_TILING_OFFSET, img,
505 offsetof(struct brw_image_param, tiling), 3);
506 setup_vec4_image_sysval(
507 img_sv + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, img,
508 offsetof(struct brw_image_param, swizzling), 2);
509 }
510 }
511
512 b.cursor = nir_before_instr(instr);
513 offset = nir_iadd(&b,
514 get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4),
515 nir_imm_int(&b, img_idx[var->data.binding] * 4 +
516 nir_intrinsic_base(intrin) * 16));
517 break;
518 }
519 default:
520 continue;
521 }
522
523 unsigned comps = nir_intrinsic_dest_components(intrin);
524
525 nir_intrinsic_instr *load =
526 nir_intrinsic_instr_create(nir, nir_intrinsic_load_ubo);
527 load->num_components = comps;
528 load->src[0] = nir_src_for_ssa(temp_ubo_name);
529 load->src[1] = nir_src_for_ssa(offset);
530 nir_intrinsic_set_align(load, 4, 0);
531 nir_ssa_dest_init(&load->instr, &load->dest, comps, 32, NULL);
532 nir_builder_instr_insert(&b, &load->instr);
533 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
534 nir_src_for_ssa(&load->dest.ssa));
535 nir_instr_remove(instr);
536 }
537 }
538
539 nir_validate_shader(nir, "before remapping");
540
541 /* Uniforms are stored in constant buffer 0, the
542 * user-facing UBOs are indexed by one. So if any constant buffer is
543 * needed, the constant buffer 0 will be needed, so account for it.
544 */
545 unsigned num_cbufs = nir->info.num_ubos;
546 if (num_cbufs || nir->num_uniforms)
547 num_cbufs++;
548
549 /* Place the new params in a new cbuf. */
550 if (num_system_values > 0) {
551 unsigned sysval_cbuf_index = num_cbufs;
552 num_cbufs++;
553
554 system_values = reralloc(mem_ctx, system_values, enum brw_param_builtin,
555 num_system_values);
556
557 nir_foreach_block(block, impl) {
558 nir_foreach_instr_safe(instr, block) {
559 if (instr->type != nir_instr_type_intrinsic)
560 continue;
561
562 nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
563
564 if (load->intrinsic != nir_intrinsic_load_ubo)
565 continue;
566
567 b.cursor = nir_before_instr(instr);
568
569 assert(load->src[0].is_ssa);
570
571 if (load->src[0].ssa == temp_ubo_name) {
572 nir_ssa_def *imm = nir_imm_int(&b, sysval_cbuf_index);
573 nir_instr_rewrite_src(instr, &load->src[0],
574 nir_src_for_ssa(imm));
575 }
576 }
577 }
578
579 /* We need to fold the new iadds for brw_nir_analyze_ubo_ranges */
580 nir_opt_constant_folding(nir);
581 } else {
582 ralloc_free(system_values);
583 system_values = NULL;
584 }
585
586 assert(num_cbufs < PIPE_MAX_CONSTANT_BUFFERS);
587 nir_validate_shader(nir, "after remap");
588
589 /* We don't use params[] but gallium leaves num_uniforms set. We use this
590 * to detect when cbuf0 exists but we don't need it anymore when we get
591 * here. Instead, zero it out so that the back-end doesn't get confused
592 * when nr_params * 4 != num_uniforms != nr_params * 4.
593 */
594 nir->num_uniforms = 0;
595
596 /* Constant loads (if any) need to go at the end of the constant buffers so
597 * we need to know num_cbufs before we can lower to them.
598 */
599 if (temp_const_ubo_name != NULL) {
600 nir_load_const_instr *const_ubo_index =
601 nir_instr_as_load_const(temp_const_ubo_name->parent_instr);
602 assert(const_ubo_index->def.bit_size == 32);
603 const_ubo_index->value[0].u32 = num_cbufs;
604 }
605
606 *out_system_values = system_values;
607 *out_num_system_values = num_system_values;
608 *out_num_cbufs = num_cbufs;
609 }
610
611 static const char *surface_group_names[] = {
612 [IRIS_SURFACE_GROUP_RENDER_TARGET] = "render target",
613 [IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = "non-coherent render target read",
614 [IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = "CS work groups",
615 [IRIS_SURFACE_GROUP_TEXTURE] = "texture",
616 [IRIS_SURFACE_GROUP_UBO] = "ubo",
617 [IRIS_SURFACE_GROUP_SSBO] = "ssbo",
618 [IRIS_SURFACE_GROUP_IMAGE] = "image",
619 };
620
621 static void
622 iris_print_binding_table(FILE *fp, const char *name,
623 const struct iris_binding_table *bt)
624 {
625 STATIC_ASSERT(ARRAY_SIZE(surface_group_names) == IRIS_SURFACE_GROUP_COUNT);
626
627 uint32_t total = 0;
628 uint32_t compacted = 0;
629
630 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
631 uint32_t size = bt->sizes[i];
632 total += size;
633 if (size)
634 compacted += util_bitcount64(bt->used_mask[i]);
635 }
636
637 if (total == 0) {
638 fprintf(fp, "Binding table for %s is empty\n\n", name);
639 return;
640 }
641
642 if (total != compacted) {
643 fprintf(fp, "Binding table for %s "
644 "(compacted to %u entries from %u entries)\n",
645 name, compacted, total);
646 } else {
647 fprintf(fp, "Binding table for %s (%u entries)\n", name, total);
648 }
649
650 uint32_t entry = 0;
651 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
652 uint64_t mask = bt->used_mask[i];
653 while (mask) {
654 int index = u_bit_scan64(&mask);
655 fprintf(fp, " [%u] %s #%d\n", entry++, surface_group_names[i], index);
656 }
657 }
658 fprintf(fp, "\n");
659 }
660
661 enum {
662 /* Max elements in a surface group. */
663 SURFACE_GROUP_MAX_ELEMENTS = 64,
664 };
665
666 /**
667 * Map a <group, index> pair to a binding table index.
668 *
669 * For example: <UBO, 5> => binding table index 12
670 */
671 uint32_t
672 iris_group_index_to_bti(const struct iris_binding_table *bt,
673 enum iris_surface_group group, uint32_t index)
674 {
675 assert(index < bt->sizes[group]);
676 uint64_t mask = bt->used_mask[group];
677 uint64_t bit = 1ull << index;
678 if (bit & mask) {
679 return bt->offsets[group] + util_bitcount64((bit - 1) & mask);
680 } else {
681 return IRIS_SURFACE_NOT_USED;
682 }
683 }
684
685 /**
686 * Map a binding table index back to a <group, index> pair.
687 *
688 * For example: binding table index 12 => <UBO, 5>
689 */
690 uint32_t
691 iris_bti_to_group_index(const struct iris_binding_table *bt,
692 enum iris_surface_group group, uint32_t bti)
693 {
694 uint64_t used_mask = bt->used_mask[group];
695 assert(bti >= bt->offsets[group]);
696
697 uint32_t c = bti - bt->offsets[group];
698 while (used_mask) {
699 int i = u_bit_scan64(&used_mask);
700 if (c == 0)
701 return i;
702 c--;
703 }
704
705 return IRIS_SURFACE_NOT_USED;
706 }
707
708 static void
709 rewrite_src_with_bti(nir_builder *b, struct iris_binding_table *bt,
710 nir_instr *instr, nir_src *src,
711 enum iris_surface_group group)
712 {
713 assert(bt->sizes[group] > 0);
714
715 b->cursor = nir_before_instr(instr);
716 nir_ssa_def *bti;
717 if (nir_src_is_const(*src)) {
718 uint32_t index = nir_src_as_uint(*src);
719 bti = nir_imm_intN_t(b, iris_group_index_to_bti(bt, group, index),
720 src->ssa->bit_size);
721 } else {
722 /* Indirect usage makes all the surfaces of the group to be available,
723 * so we can just add the base.
724 */
725 assert(bt->used_mask[group] == BITFIELD64_MASK(bt->sizes[group]));
726 bti = nir_iadd_imm(b, src->ssa, bt->offsets[group]);
727 }
728 nir_instr_rewrite_src(instr, src, nir_src_for_ssa(bti));
729 }
730
731 static void
732 mark_used_with_src(struct iris_binding_table *bt, nir_src *src,
733 enum iris_surface_group group)
734 {
735 assert(bt->sizes[group] > 0);
736
737 if (nir_src_is_const(*src)) {
738 uint64_t index = nir_src_as_uint(*src);
739 assert(index < bt->sizes[group]);
740 bt->used_mask[group] |= 1ull << index;
741 } else {
742 /* There's an indirect usage, we need all the surfaces. */
743 bt->used_mask[group] = BITFIELD64_MASK(bt->sizes[group]);
744 }
745 }
746
747 static bool
748 skip_compacting_binding_tables(void)
749 {
750 static int skip = -1;
751 if (skip < 0)
752 skip = env_var_as_boolean("INTEL_DISABLE_COMPACT_BINDING_TABLE", false);
753 return skip;
754 }
755
756 /**
757 * Set up the binding table indices and apply to the shader.
758 */
759 static void
760 iris_setup_binding_table(const struct gen_device_info *devinfo,
761 struct nir_shader *nir,
762 struct iris_binding_table *bt,
763 unsigned num_render_targets,
764 unsigned num_system_values,
765 unsigned num_cbufs)
766 {
767 const struct shader_info *info = &nir->info;
768
769 memset(bt, 0, sizeof(*bt));
770
771 /* Set the sizes for each surface group. For some groups, we already know
772 * upfront how many will be used, so mark them.
773 */
774 if (info->stage == MESA_SHADER_FRAGMENT) {
775 bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET] = num_render_targets;
776 /* All render targets used. */
777 bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET] =
778 BITFIELD64_MASK(num_render_targets);
779
780 /* Setup render target read surface group inorder to support non-coherent
781 * framebuffer fetch on Gen8
782 */
783 if (devinfo->gen == 8 && info->outputs_read) {
784 bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = num_render_targets;
785 bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] =
786 BITFIELD64_MASK(num_render_targets);
787 }
788 } else if (info->stage == MESA_SHADER_COMPUTE) {
789 bt->sizes[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
790 }
791
792 bt->sizes[IRIS_SURFACE_GROUP_TEXTURE] = util_last_bit(info->textures_used);
793 bt->used_mask[IRIS_SURFACE_GROUP_TEXTURE] = info->textures_used;
794
795 bt->sizes[IRIS_SURFACE_GROUP_IMAGE] = info->num_images;
796
797 /* Allocate an extra slot in the UBO section for NIR constants.
798 * Binding table compaction will remove it if unnecessary.
799 *
800 * We don't include them in iris_compiled_shader::num_cbufs because
801 * they are uploaded separately from shs->constbuf[], but from a shader
802 * point of view, they're another UBO (at the end of the section).
803 */
804 bt->sizes[IRIS_SURFACE_GROUP_UBO] = num_cbufs + 1;
805
806 bt->sizes[IRIS_SURFACE_GROUP_SSBO] = info->num_ssbos;
807
808 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
809 assert(bt->sizes[i] <= SURFACE_GROUP_MAX_ELEMENTS);
810
811 /* Mark surfaces used for the cases we don't have the information available
812 * upfront.
813 */
814 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
815 nir_foreach_block (block, impl) {
816 nir_foreach_instr (instr, block) {
817 if (instr->type != nir_instr_type_intrinsic)
818 continue;
819
820 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
821 switch (intrin->intrinsic) {
822 case nir_intrinsic_load_num_work_groups:
823 bt->used_mask[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
824 break;
825
826 case nir_intrinsic_load_output:
827 if (devinfo->gen == 8) {
828 mark_used_with_src(bt, &intrin->src[0],
829 IRIS_SURFACE_GROUP_RENDER_TARGET_READ);
830 }
831 break;
832
833 case nir_intrinsic_image_size:
834 case nir_intrinsic_image_load:
835 case nir_intrinsic_image_store:
836 case nir_intrinsic_image_atomic_add:
837 case nir_intrinsic_image_atomic_imin:
838 case nir_intrinsic_image_atomic_umin:
839 case nir_intrinsic_image_atomic_imax:
840 case nir_intrinsic_image_atomic_umax:
841 case nir_intrinsic_image_atomic_and:
842 case nir_intrinsic_image_atomic_or:
843 case nir_intrinsic_image_atomic_xor:
844 case nir_intrinsic_image_atomic_exchange:
845 case nir_intrinsic_image_atomic_comp_swap:
846 case nir_intrinsic_image_load_raw_intel:
847 case nir_intrinsic_image_store_raw_intel:
848 mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_IMAGE);
849 break;
850
851 case nir_intrinsic_load_ubo:
852 mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_UBO);
853 break;
854
855 case nir_intrinsic_store_ssbo:
856 mark_used_with_src(bt, &intrin->src[1], IRIS_SURFACE_GROUP_SSBO);
857 break;
858
859 case nir_intrinsic_get_buffer_size:
860 case nir_intrinsic_ssbo_atomic_add:
861 case nir_intrinsic_ssbo_atomic_imin:
862 case nir_intrinsic_ssbo_atomic_umin:
863 case nir_intrinsic_ssbo_atomic_imax:
864 case nir_intrinsic_ssbo_atomic_umax:
865 case nir_intrinsic_ssbo_atomic_and:
866 case nir_intrinsic_ssbo_atomic_or:
867 case nir_intrinsic_ssbo_atomic_xor:
868 case nir_intrinsic_ssbo_atomic_exchange:
869 case nir_intrinsic_ssbo_atomic_comp_swap:
870 case nir_intrinsic_ssbo_atomic_fmin:
871 case nir_intrinsic_ssbo_atomic_fmax:
872 case nir_intrinsic_ssbo_atomic_fcomp_swap:
873 case nir_intrinsic_load_ssbo:
874 mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_SSBO);
875 break;
876
877 default:
878 break;
879 }
880 }
881 }
882
883 /* When disable we just mark everything as used. */
884 if (unlikely(skip_compacting_binding_tables())) {
885 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
886 bt->used_mask[i] = BITFIELD64_MASK(bt->sizes[i]);
887 }
888
889 /* Calculate the offsets and the binding table size based on the used
890 * surfaces. After this point, the functions to go between "group indices"
891 * and binding table indices can be used.
892 */
893 uint32_t next = 0;
894 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
895 if (bt->used_mask[i] != 0) {
896 bt->offsets[i] = next;
897 next += util_bitcount64(bt->used_mask[i]);
898 }
899 }
900 bt->size_bytes = next * 4;
901
902 if (unlikely(INTEL_DEBUG & DEBUG_BT)) {
903 iris_print_binding_table(stderr, gl_shader_stage_name(info->stage), bt);
904 }
905
906 /* Apply the binding table indices. The backend compiler is not expected
907 * to change those, as we haven't set any of the *_start entries in brw
908 * binding_table.
909 */
910 nir_builder b;
911 nir_builder_init(&b, impl);
912
913 nir_foreach_block (block, impl) {
914 nir_foreach_instr (instr, block) {
915 if (instr->type == nir_instr_type_tex) {
916 nir_tex_instr *tex = nir_instr_as_tex(instr);
917 tex->texture_index =
918 iris_group_index_to_bti(bt, IRIS_SURFACE_GROUP_TEXTURE,
919 tex->texture_index);
920 continue;
921 }
922
923 if (instr->type != nir_instr_type_intrinsic)
924 continue;
925
926 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
927 switch (intrin->intrinsic) {
928 case nir_intrinsic_image_size:
929 case nir_intrinsic_image_load:
930 case nir_intrinsic_image_store:
931 case nir_intrinsic_image_atomic_add:
932 case nir_intrinsic_image_atomic_imin:
933 case nir_intrinsic_image_atomic_umin:
934 case nir_intrinsic_image_atomic_imax:
935 case nir_intrinsic_image_atomic_umax:
936 case nir_intrinsic_image_atomic_and:
937 case nir_intrinsic_image_atomic_or:
938 case nir_intrinsic_image_atomic_xor:
939 case nir_intrinsic_image_atomic_exchange:
940 case nir_intrinsic_image_atomic_comp_swap:
941 case nir_intrinsic_image_load_raw_intel:
942 case nir_intrinsic_image_store_raw_intel:
943 rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
944 IRIS_SURFACE_GROUP_IMAGE);
945 break;
946
947 case nir_intrinsic_load_ubo:
948 rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
949 IRIS_SURFACE_GROUP_UBO);
950 break;
951
952 case nir_intrinsic_store_ssbo:
953 rewrite_src_with_bti(&b, bt, instr, &intrin->src[1],
954 IRIS_SURFACE_GROUP_SSBO);
955 break;
956
957 case nir_intrinsic_load_output:
958 if (devinfo->gen == 8) {
959 rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
960 IRIS_SURFACE_GROUP_RENDER_TARGET_READ);
961 }
962 break;
963
964 case nir_intrinsic_get_buffer_size:
965 case nir_intrinsic_ssbo_atomic_add:
966 case nir_intrinsic_ssbo_atomic_imin:
967 case nir_intrinsic_ssbo_atomic_umin:
968 case nir_intrinsic_ssbo_atomic_imax:
969 case nir_intrinsic_ssbo_atomic_umax:
970 case nir_intrinsic_ssbo_atomic_and:
971 case nir_intrinsic_ssbo_atomic_or:
972 case nir_intrinsic_ssbo_atomic_xor:
973 case nir_intrinsic_ssbo_atomic_exchange:
974 case nir_intrinsic_ssbo_atomic_comp_swap:
975 case nir_intrinsic_ssbo_atomic_fmin:
976 case nir_intrinsic_ssbo_atomic_fmax:
977 case nir_intrinsic_ssbo_atomic_fcomp_swap:
978 case nir_intrinsic_load_ssbo:
979 rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
980 IRIS_SURFACE_GROUP_SSBO);
981 break;
982
983 default:
984 break;
985 }
986 }
987 }
988 }
989
990 static void
991 iris_debug_recompile(struct iris_context *ice,
992 struct shader_info *info,
993 const struct brw_base_prog_key *key)
994 {
995 struct iris_screen *screen = (struct iris_screen *) ice->ctx.screen;
996 const struct gen_device_info *devinfo = &screen->devinfo;
997 const struct brw_compiler *c = screen->compiler;
998
999 if (!info)
1000 return;
1001
1002 c->shader_perf_log(&ice->dbg, "Recompiling %s shader for program %s: %s\n",
1003 _mesa_shader_stage_to_string(info->stage),
1004 info->name ? info->name : "(no identifier)",
1005 info->label ? info->label : "");
1006
1007 const void *old_iris_key =
1008 iris_find_previous_compile(ice, info->stage, key->program_string_id);
1009
1010 union brw_any_prog_key old_key;
1011
1012 switch (info->stage) {
1013 case MESA_SHADER_VERTEX:
1014 old_key.vs = iris_to_brw_vs_key(devinfo, old_iris_key);
1015 break;
1016 case MESA_SHADER_TESS_CTRL:
1017 old_key.tcs = iris_to_brw_tcs_key(devinfo, old_iris_key);
1018 break;
1019 case MESA_SHADER_TESS_EVAL:
1020 old_key.tes = iris_to_brw_tes_key(devinfo, old_iris_key);
1021 break;
1022 case MESA_SHADER_GEOMETRY:
1023 old_key.gs = iris_to_brw_gs_key(devinfo, old_iris_key);
1024 break;
1025 case MESA_SHADER_FRAGMENT:
1026 old_key.wm = iris_to_brw_fs_key(devinfo, old_iris_key);
1027 break;
1028 case MESA_SHADER_COMPUTE:
1029 old_key.cs = iris_to_brw_cs_key(devinfo, old_iris_key);
1030 break;
1031 default:
1032 unreachable("invalid shader stage");
1033 }
1034
1035 brw_debug_key_recompile(c, &ice->dbg, info->stage, &old_key.base, key);
1036 }
1037
1038 /**
1039 * Get the shader for the last enabled geometry stage.
1040 *
1041 * This stage is the one which will feed stream output and the rasterizer.
1042 */
1043 static gl_shader_stage
1044 last_vue_stage(struct iris_context *ice)
1045 {
1046 if (ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
1047 return MESA_SHADER_GEOMETRY;
1048
1049 if (ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
1050 return MESA_SHADER_TESS_EVAL;
1051
1052 return MESA_SHADER_VERTEX;
1053 }
1054
1055 /**
1056 * Compile a vertex shader, and upload the assembly.
1057 */
1058 static struct iris_compiled_shader *
1059 iris_compile_vs(struct iris_context *ice,
1060 struct iris_uncompiled_shader *ish,
1061 const struct iris_vs_prog_key *key)
1062 {
1063 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1064 const struct brw_compiler *compiler = screen->compiler;
1065 const struct gen_device_info *devinfo = &screen->devinfo;
1066 void *mem_ctx = ralloc_context(NULL);
1067 struct brw_vs_prog_data *vs_prog_data =
1068 rzalloc(mem_ctx, struct brw_vs_prog_data);
1069 struct brw_vue_prog_data *vue_prog_data = &vs_prog_data->base;
1070 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1071 enum brw_param_builtin *system_values;
1072 unsigned num_system_values;
1073 unsigned num_cbufs;
1074
1075 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1076
1077 if (key->vue.nr_userclip_plane_consts) {
1078 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1079 nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
1080 true, false, NULL);
1081 nir_lower_io_to_temporaries(nir, impl, true, false);
1082 nir_lower_global_vars_to_local(nir);
1083 nir_lower_vars_to_ssa(nir);
1084 nir_shader_gather_info(nir, impl);
1085 }
1086
1087 prog_data->use_alt_mode = ish->use_alt_mode;
1088
1089 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
1090 &num_system_values, &num_cbufs);
1091
1092 struct iris_binding_table bt;
1093 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1094 num_system_values, num_cbufs);
1095
1096 brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1097
1098 brw_compute_vue_map(devinfo,
1099 &vue_prog_data->vue_map, nir->info.outputs_written,
1100 nir->info.separate_shader, /* pos_slots */ 1);
1101
1102 struct brw_vs_prog_key brw_key = iris_to_brw_vs_key(devinfo, key);
1103
1104 char *error_str = NULL;
1105 const unsigned *program =
1106 brw_compile_vs(compiler, &ice->dbg, mem_ctx, &brw_key, vs_prog_data,
1107 nir, -1, NULL, &error_str);
1108 if (program == NULL) {
1109 dbg_printf("Failed to compile vertex shader: %s\n", error_str);
1110 ralloc_free(mem_ctx);
1111 return false;
1112 }
1113
1114 if (ish->compiled_once) {
1115 iris_debug_recompile(ice, &nir->info, &brw_key.base);
1116 } else {
1117 ish->compiled_once = true;
1118 }
1119
1120 uint32_t *so_decls =
1121 ice->vtbl.create_so_decl_list(&ish->stream_output,
1122 &vue_prog_data->vue_map);
1123
1124 struct iris_compiled_shader *shader =
1125 iris_upload_shader(ice, IRIS_CACHE_VS, sizeof(*key), key, program,
1126 prog_data, so_decls, system_values, num_system_values,
1127 num_cbufs, &bt);
1128
1129 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1130
1131 ralloc_free(mem_ctx);
1132 return shader;
1133 }
1134
1135 /**
1136 * Update the current vertex shader variant.
1137 *
1138 * Fill out the key, look in the cache, compile and bind if needed.
1139 */
1140 static void
1141 iris_update_compiled_vs(struct iris_context *ice)
1142 {
1143 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_VERTEX];
1144 struct iris_uncompiled_shader *ish =
1145 ice->shaders.uncompiled[MESA_SHADER_VERTEX];
1146
1147 struct iris_vs_prog_key key = { KEY_ID(vue.base) };
1148 ice->vtbl.populate_vs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
1149
1150 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_VS];
1151 struct iris_compiled_shader *shader =
1152 iris_find_cached_shader(ice, IRIS_CACHE_VS, sizeof(key), &key);
1153
1154 if (!shader)
1155 shader = iris_disk_cache_retrieve(ice, ish, &key, sizeof(key));
1156
1157 if (!shader)
1158 shader = iris_compile_vs(ice, ish, &key);
1159
1160 if (old != shader) {
1161 ice->shaders.prog[IRIS_CACHE_VS] = shader;
1162 ice->state.dirty |= IRIS_DIRTY_VS |
1163 IRIS_DIRTY_BINDINGS_VS |
1164 IRIS_DIRTY_CONSTANTS_VS |
1165 IRIS_DIRTY_VF_SGVS;
1166 shs->sysvals_need_upload = true;
1167
1168 const struct brw_vs_prog_data *vs_prog_data =
1169 (void *) shader->prog_data;
1170 const bool uses_draw_params = vs_prog_data->uses_firstvertex ||
1171 vs_prog_data->uses_baseinstance;
1172 const bool uses_derived_draw_params = vs_prog_data->uses_drawid ||
1173 vs_prog_data->uses_is_indexed_draw;
1174 const bool needs_sgvs_element = uses_draw_params ||
1175 vs_prog_data->uses_instanceid ||
1176 vs_prog_data->uses_vertexid;
1177
1178 if (ice->state.vs_uses_draw_params != uses_draw_params ||
1179 ice->state.vs_uses_derived_draw_params != uses_derived_draw_params ||
1180 ice->state.vs_needs_edge_flag != ish->needs_edge_flag) {
1181 ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS |
1182 IRIS_DIRTY_VERTEX_ELEMENTS;
1183 }
1184 ice->state.vs_uses_draw_params = uses_draw_params;
1185 ice->state.vs_uses_derived_draw_params = uses_derived_draw_params;
1186 ice->state.vs_needs_sgvs_element = needs_sgvs_element;
1187 ice->state.vs_needs_edge_flag = ish->needs_edge_flag;
1188 }
1189 }
1190
1191 /**
1192 * Get the shader_info for a given stage, or NULL if the stage is disabled.
1193 */
1194 const struct shader_info *
1195 iris_get_shader_info(const struct iris_context *ice, gl_shader_stage stage)
1196 {
1197 const struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[stage];
1198
1199 if (!ish)
1200 return NULL;
1201
1202 const nir_shader *nir = ish->nir;
1203 return &nir->info;
1204 }
1205
1206 /**
1207 * Get the union of TCS output and TES input slots.
1208 *
1209 * TCS and TES need to agree on a common URB entry layout. In particular,
1210 * the data for all patch vertices is stored in a single URB entry (unlike
1211 * GS which has one entry per input vertex). This means that per-vertex
1212 * array indexing needs a stride.
1213 *
1214 * SSO requires locations to match, but doesn't require the number of
1215 * outputs/inputs to match (in fact, the TCS often has extra outputs).
1216 * So, we need to take the extra step of unifying these on the fly.
1217 */
1218 static void
1219 get_unified_tess_slots(const struct iris_context *ice,
1220 uint64_t *per_vertex_slots,
1221 uint32_t *per_patch_slots)
1222 {
1223 const struct shader_info *tcs =
1224 iris_get_shader_info(ice, MESA_SHADER_TESS_CTRL);
1225 const struct shader_info *tes =
1226 iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
1227
1228 *per_vertex_slots = tes->inputs_read;
1229 *per_patch_slots = tes->patch_inputs_read;
1230
1231 if (tcs) {
1232 *per_vertex_slots |= tcs->outputs_written;
1233 *per_patch_slots |= tcs->patch_outputs_written;
1234 }
1235 }
1236
1237 /**
1238 * Compile a tessellation control shader, and upload the assembly.
1239 */
1240 static struct iris_compiled_shader *
1241 iris_compile_tcs(struct iris_context *ice,
1242 struct iris_uncompiled_shader *ish,
1243 const struct iris_tcs_prog_key *key)
1244 {
1245 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1246 const struct brw_compiler *compiler = screen->compiler;
1247 const struct nir_shader_compiler_options *options =
1248 compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].NirOptions;
1249 void *mem_ctx = ralloc_context(NULL);
1250 struct brw_tcs_prog_data *tcs_prog_data =
1251 rzalloc(mem_ctx, struct brw_tcs_prog_data);
1252 struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base;
1253 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1254 const struct gen_device_info *devinfo = &screen->devinfo;
1255 enum brw_param_builtin *system_values = NULL;
1256 unsigned num_system_values = 0;
1257 unsigned num_cbufs = 0;
1258
1259 nir_shader *nir;
1260
1261 struct iris_binding_table bt;
1262
1263 struct brw_tcs_prog_key brw_key = iris_to_brw_tcs_key(devinfo, key);
1264
1265 if (ish) {
1266 nir = nir_shader_clone(mem_ctx, ish->nir);
1267
1268 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
1269 &num_system_values, &num_cbufs);
1270 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1271 num_system_values, num_cbufs);
1272 brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1273 } else {
1274 nir =
1275 brw_nir_create_passthrough_tcs(mem_ctx, compiler, options, &brw_key);
1276
1277 /* Reserve space for passing the default tess levels as constants. */
1278 num_cbufs = 1;
1279 num_system_values = 8;
1280 system_values =
1281 rzalloc_array(mem_ctx, enum brw_param_builtin, num_system_values);
1282 prog_data->param = rzalloc_array(mem_ctx, uint32_t, num_system_values);
1283 prog_data->nr_params = num_system_values;
1284
1285 if (key->tes_primitive_mode == GL_QUADS) {
1286 for (int i = 0; i < 4; i++)
1287 system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
1288
1289 system_values[3] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
1290 system_values[2] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y;
1291 } else if (key->tes_primitive_mode == GL_TRIANGLES) {
1292 for (int i = 0; i < 3; i++)
1293 system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
1294
1295 system_values[4] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
1296 } else {
1297 assert(key->tes_primitive_mode == GL_ISOLINES);
1298 system_values[7] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_Y;
1299 system_values[6] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X;
1300 }
1301
1302 /* Manually setup the TCS binding table. */
1303 memset(&bt, 0, sizeof(bt));
1304 bt.sizes[IRIS_SURFACE_GROUP_UBO] = 1;
1305 bt.used_mask[IRIS_SURFACE_GROUP_UBO] = 1;
1306 bt.size_bytes = 4;
1307
1308 prog_data->ubo_ranges[0].length = 1;
1309 }
1310
1311 char *error_str = NULL;
1312 const unsigned *program =
1313 brw_compile_tcs(compiler, &ice->dbg, mem_ctx, &brw_key, tcs_prog_data,
1314 nir, -1, NULL, &error_str);
1315 if (program == NULL) {
1316 dbg_printf("Failed to compile control shader: %s\n", error_str);
1317 ralloc_free(mem_ctx);
1318 return false;
1319 }
1320
1321 if (ish) {
1322 if (ish->compiled_once) {
1323 iris_debug_recompile(ice, &nir->info, &brw_key.base);
1324 } else {
1325 ish->compiled_once = true;
1326 }
1327 }
1328
1329 struct iris_compiled_shader *shader =
1330 iris_upload_shader(ice, IRIS_CACHE_TCS, sizeof(*key), key, program,
1331 prog_data, NULL, system_values, num_system_values,
1332 num_cbufs, &bt);
1333
1334 if (ish)
1335 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1336
1337 ralloc_free(mem_ctx);
1338 return shader;
1339 }
1340
1341 /**
1342 * Update the current tessellation control shader variant.
1343 *
1344 * Fill out the key, look in the cache, compile and bind if needed.
1345 */
1346 static void
1347 iris_update_compiled_tcs(struct iris_context *ice)
1348 {
1349 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_CTRL];
1350 struct iris_uncompiled_shader *tcs =
1351 ice->shaders.uncompiled[MESA_SHADER_TESS_CTRL];
1352 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1353 const struct brw_compiler *compiler = screen->compiler;
1354 const struct gen_device_info *devinfo = &screen->devinfo;
1355
1356 const struct shader_info *tes_info =
1357 iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
1358 struct iris_tcs_prog_key key = {
1359 .vue.base.program_string_id = tcs ? tcs->program_id : 0,
1360 .tes_primitive_mode = tes_info->tess.primitive_mode,
1361 .input_vertices =
1362 !tcs || compiler->use_tcs_8_patch ? ice->state.vertices_per_patch : 0,
1363 .quads_workaround = devinfo->gen < 9 &&
1364 tes_info->tess.primitive_mode == GL_QUADS &&
1365 tes_info->tess.spacing == TESS_SPACING_EQUAL,
1366 };
1367 get_unified_tess_slots(ice, &key.outputs_written,
1368 &key.patch_outputs_written);
1369 ice->vtbl.populate_tcs_key(ice, &key);
1370
1371 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TCS];
1372 struct iris_compiled_shader *shader =
1373 iris_find_cached_shader(ice, IRIS_CACHE_TCS, sizeof(key), &key);
1374
1375 if (tcs && !shader)
1376 shader = iris_disk_cache_retrieve(ice, tcs, &key, sizeof(key));
1377
1378 if (!shader)
1379 shader = iris_compile_tcs(ice, tcs, &key);
1380
1381 if (old != shader) {
1382 ice->shaders.prog[IRIS_CACHE_TCS] = shader;
1383 ice->state.dirty |= IRIS_DIRTY_TCS |
1384 IRIS_DIRTY_BINDINGS_TCS |
1385 IRIS_DIRTY_CONSTANTS_TCS;
1386 shs->sysvals_need_upload = true;
1387 }
1388 }
1389
1390 /**
1391 * Compile a tessellation evaluation shader, and upload the assembly.
1392 */
1393 static struct iris_compiled_shader *
1394 iris_compile_tes(struct iris_context *ice,
1395 struct iris_uncompiled_shader *ish,
1396 const struct iris_tes_prog_key *key)
1397 {
1398 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1399 const struct brw_compiler *compiler = screen->compiler;
1400 void *mem_ctx = ralloc_context(NULL);
1401 struct brw_tes_prog_data *tes_prog_data =
1402 rzalloc(mem_ctx, struct brw_tes_prog_data);
1403 struct brw_vue_prog_data *vue_prog_data = &tes_prog_data->base;
1404 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1405 enum brw_param_builtin *system_values;
1406 const struct gen_device_info *devinfo = &screen->devinfo;
1407 unsigned num_system_values;
1408 unsigned num_cbufs;
1409
1410 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1411
1412 if (key->vue.nr_userclip_plane_consts) {
1413 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1414 nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
1415 true, false, NULL);
1416 nir_lower_io_to_temporaries(nir, impl, true, false);
1417 nir_lower_global_vars_to_local(nir);
1418 nir_lower_vars_to_ssa(nir);
1419 nir_shader_gather_info(nir, impl);
1420 }
1421
1422 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
1423 &num_system_values, &num_cbufs);
1424
1425 struct iris_binding_table bt;
1426 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1427 num_system_values, num_cbufs);
1428
1429 brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1430
1431 struct brw_vue_map input_vue_map;
1432 brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,
1433 key->patch_inputs_read);
1434
1435 struct brw_tes_prog_key brw_key = iris_to_brw_tes_key(devinfo, key);
1436
1437 char *error_str = NULL;
1438 const unsigned *program =
1439 brw_compile_tes(compiler, &ice->dbg, mem_ctx, &brw_key, &input_vue_map,
1440 tes_prog_data, nir, -1, NULL, &error_str);
1441 if (program == NULL) {
1442 dbg_printf("Failed to compile evaluation shader: %s\n", error_str);
1443 ralloc_free(mem_ctx);
1444 return false;
1445 }
1446
1447 if (ish->compiled_once) {
1448 iris_debug_recompile(ice, &nir->info, &brw_key.base);
1449 } else {
1450 ish->compiled_once = true;
1451 }
1452
1453 uint32_t *so_decls =
1454 ice->vtbl.create_so_decl_list(&ish->stream_output,
1455 &vue_prog_data->vue_map);
1456
1457
1458 struct iris_compiled_shader *shader =
1459 iris_upload_shader(ice, IRIS_CACHE_TES, sizeof(*key), key, program,
1460 prog_data, so_decls, system_values, num_system_values,
1461 num_cbufs, &bt);
1462
1463 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1464
1465 ralloc_free(mem_ctx);
1466 return shader;
1467 }
1468
1469 /**
1470 * Update the current tessellation evaluation shader variant.
1471 *
1472 * Fill out the key, look in the cache, compile and bind if needed.
1473 */
1474 static void
1475 iris_update_compiled_tes(struct iris_context *ice)
1476 {
1477 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_EVAL];
1478 struct iris_uncompiled_shader *ish =
1479 ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
1480
1481 struct iris_tes_prog_key key = { KEY_ID(vue.base) };
1482 get_unified_tess_slots(ice, &key.inputs_read, &key.patch_inputs_read);
1483 ice->vtbl.populate_tes_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
1484
1485 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TES];
1486 struct iris_compiled_shader *shader =
1487 iris_find_cached_shader(ice, IRIS_CACHE_TES, sizeof(key), &key);
1488
1489 if (!shader)
1490 shader = iris_disk_cache_retrieve(ice, ish, &key, sizeof(key));
1491
1492 if (!shader)
1493 shader = iris_compile_tes(ice, ish, &key);
1494
1495 if (old != shader) {
1496 ice->shaders.prog[IRIS_CACHE_TES] = shader;
1497 ice->state.dirty |= IRIS_DIRTY_TES |
1498 IRIS_DIRTY_BINDINGS_TES |
1499 IRIS_DIRTY_CONSTANTS_TES;
1500 shs->sysvals_need_upload = true;
1501 }
1502
1503 /* TODO: Could compare and avoid flagging this. */
1504 const struct shader_info *tes_info = &ish->nir->info;
1505 if (tes_info->system_values_read & (1ull << SYSTEM_VALUE_VERTICES_IN)) {
1506 ice->state.dirty |= IRIS_DIRTY_CONSTANTS_TES;
1507 ice->state.shaders[MESA_SHADER_TESS_EVAL].sysvals_need_upload = true;
1508 }
1509 }
1510
1511 /**
1512 * Compile a geometry shader, and upload the assembly.
1513 */
1514 static struct iris_compiled_shader *
1515 iris_compile_gs(struct iris_context *ice,
1516 struct iris_uncompiled_shader *ish,
1517 const struct iris_gs_prog_key *key)
1518 {
1519 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1520 const struct brw_compiler *compiler = screen->compiler;
1521 const struct gen_device_info *devinfo = &screen->devinfo;
1522 void *mem_ctx = ralloc_context(NULL);
1523 struct brw_gs_prog_data *gs_prog_data =
1524 rzalloc(mem_ctx, struct brw_gs_prog_data);
1525 struct brw_vue_prog_data *vue_prog_data = &gs_prog_data->base;
1526 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1527 enum brw_param_builtin *system_values;
1528 unsigned num_system_values;
1529 unsigned num_cbufs;
1530
1531 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1532
1533 if (key->vue.nr_userclip_plane_consts) {
1534 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1535 nir_lower_clip_gs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
1536 false, NULL);
1537 nir_lower_io_to_temporaries(nir, impl, true, false);
1538 nir_lower_global_vars_to_local(nir);
1539 nir_lower_vars_to_ssa(nir);
1540 nir_shader_gather_info(nir, impl);
1541 }
1542
1543 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
1544 &num_system_values, &num_cbufs);
1545
1546 struct iris_binding_table bt;
1547 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1548 num_system_values, num_cbufs);
1549
1550 brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1551
1552 brw_compute_vue_map(devinfo,
1553 &vue_prog_data->vue_map, nir->info.outputs_written,
1554 nir->info.separate_shader, /* pos_slots */ 1);
1555
1556 struct brw_gs_prog_key brw_key = iris_to_brw_gs_key(devinfo, key);
1557
1558 char *error_str = NULL;
1559 const unsigned *program =
1560 brw_compile_gs(compiler, &ice->dbg, mem_ctx, &brw_key, gs_prog_data,
1561 nir, NULL, -1, NULL, &error_str);
1562 if (program == NULL) {
1563 dbg_printf("Failed to compile geometry shader: %s\n", error_str);
1564 ralloc_free(mem_ctx);
1565 return false;
1566 }
1567
1568 if (ish->compiled_once) {
1569 iris_debug_recompile(ice, &nir->info, &brw_key.base);
1570 } else {
1571 ish->compiled_once = true;
1572 }
1573
1574 uint32_t *so_decls =
1575 ice->vtbl.create_so_decl_list(&ish->stream_output,
1576 &vue_prog_data->vue_map);
1577
1578 struct iris_compiled_shader *shader =
1579 iris_upload_shader(ice, IRIS_CACHE_GS, sizeof(*key), key, program,
1580 prog_data, so_decls, system_values, num_system_values,
1581 num_cbufs, &bt);
1582
1583 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1584
1585 ralloc_free(mem_ctx);
1586 return shader;
1587 }
1588
1589 /**
1590 * Update the current geometry shader variant.
1591 *
1592 * Fill out the key, look in the cache, compile and bind if needed.
1593 */
1594 static void
1595 iris_update_compiled_gs(struct iris_context *ice)
1596 {
1597 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_GEOMETRY];
1598 struct iris_uncompiled_shader *ish =
1599 ice->shaders.uncompiled[MESA_SHADER_GEOMETRY];
1600 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_GS];
1601 struct iris_compiled_shader *shader = NULL;
1602
1603 if (ish) {
1604 struct iris_gs_prog_key key = { KEY_ID(vue.base) };
1605 ice->vtbl.populate_gs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
1606
1607 shader =
1608 iris_find_cached_shader(ice, IRIS_CACHE_GS, sizeof(key), &key);
1609
1610 if (!shader)
1611 shader = iris_disk_cache_retrieve(ice, ish, &key, sizeof(key));
1612
1613 if (!shader)
1614 shader = iris_compile_gs(ice, ish, &key);
1615 }
1616
1617 if (old != shader) {
1618 ice->shaders.prog[IRIS_CACHE_GS] = shader;
1619 ice->state.dirty |= IRIS_DIRTY_GS |
1620 IRIS_DIRTY_BINDINGS_GS |
1621 IRIS_DIRTY_CONSTANTS_GS;
1622 shs->sysvals_need_upload = true;
1623 }
1624 }
1625
1626 /**
1627 * Compile a fragment (pixel) shader, and upload the assembly.
1628 */
1629 static struct iris_compiled_shader *
1630 iris_compile_fs(struct iris_context *ice,
1631 struct iris_uncompiled_shader *ish,
1632 const struct iris_fs_prog_key *key,
1633 struct brw_vue_map *vue_map)
1634 {
1635 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1636 const struct brw_compiler *compiler = screen->compiler;
1637 void *mem_ctx = ralloc_context(NULL);
1638 struct brw_wm_prog_data *fs_prog_data =
1639 rzalloc(mem_ctx, struct brw_wm_prog_data);
1640 struct brw_stage_prog_data *prog_data = &fs_prog_data->base;
1641 enum brw_param_builtin *system_values;
1642 const struct gen_device_info *devinfo = &screen->devinfo;
1643 unsigned num_system_values;
1644 unsigned num_cbufs;
1645
1646 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1647
1648 prog_data->use_alt_mode = ish->use_alt_mode;
1649
1650 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
1651 &num_system_values, &num_cbufs);
1652
1653 /* Lower output variables to load_output intrinsics before setting up
1654 * binding tables, so iris_setup_binding_table can map any load_output
1655 * intrinsics to IRIS_SURFACE_GROUP_RENDER_TARGET_READ on Gen8 for
1656 * non-coherent framebuffer fetches.
1657 */
1658 brw_nir_lower_fs_outputs(nir);
1659
1660 /* On Gen11+, shader RT write messages have a "Null Render Target" bit
1661 * and do not need a binding table entry with a null surface. Earlier
1662 * generations need an entry for a null surface.
1663 */
1664 int null_rts = devinfo->gen < 11 ? 1 : 0;
1665
1666 struct iris_binding_table bt;
1667 iris_setup_binding_table(devinfo, nir, &bt,
1668 MAX2(key->nr_color_regions, null_rts),
1669 num_system_values, num_cbufs);
1670
1671 brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1672
1673 struct brw_wm_prog_key brw_key = iris_to_brw_fs_key(devinfo, key);
1674
1675 char *error_str = NULL;
1676 const unsigned *program =
1677 brw_compile_fs(compiler, &ice->dbg, mem_ctx, &brw_key, fs_prog_data,
1678 nir, -1, -1, -1, true, false, vue_map,
1679 NULL, &error_str);
1680 if (program == NULL) {
1681 dbg_printf("Failed to compile fragment shader: %s\n", error_str);
1682 ralloc_free(mem_ctx);
1683 return false;
1684 }
1685
1686 if (ish->compiled_once) {
1687 iris_debug_recompile(ice, &nir->info, &brw_key.base);
1688 } else {
1689 ish->compiled_once = true;
1690 }
1691
1692 struct iris_compiled_shader *shader =
1693 iris_upload_shader(ice, IRIS_CACHE_FS, sizeof(*key), key, program,
1694 prog_data, NULL, system_values, num_system_values,
1695 num_cbufs, &bt);
1696
1697 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1698
1699 ralloc_free(mem_ctx);
1700 return shader;
1701 }
1702
1703 /**
1704 * Update the current fragment shader variant.
1705 *
1706 * Fill out the key, look in the cache, compile and bind if needed.
1707 */
1708 static void
1709 iris_update_compiled_fs(struct iris_context *ice)
1710 {
1711 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_FRAGMENT];
1712 struct iris_uncompiled_shader *ish =
1713 ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
1714 struct iris_fs_prog_key key = { KEY_ID(base) };
1715 ice->vtbl.populate_fs_key(ice, &ish->nir->info, &key);
1716
1717 if (ish->nos & (1ull << IRIS_NOS_LAST_VUE_MAP))
1718 key.input_slots_valid = ice->shaders.last_vue_map->slots_valid;
1719
1720 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_FS];
1721 struct iris_compiled_shader *shader =
1722 iris_find_cached_shader(ice, IRIS_CACHE_FS, sizeof(key), &key);
1723
1724 if (!shader)
1725 shader = iris_disk_cache_retrieve(ice, ish, &key, sizeof(key));
1726
1727 if (!shader)
1728 shader = iris_compile_fs(ice, ish, &key, ice->shaders.last_vue_map);
1729
1730 if (old != shader) {
1731 // XXX: only need to flag CLIP if barycentric has NONPERSPECTIVE
1732 // toggles. might be able to avoid flagging SBE too.
1733 ice->shaders.prog[IRIS_CACHE_FS] = shader;
1734 ice->state.dirty |= IRIS_DIRTY_FS |
1735 IRIS_DIRTY_BINDINGS_FS |
1736 IRIS_DIRTY_CONSTANTS_FS |
1737 IRIS_DIRTY_WM |
1738 IRIS_DIRTY_CLIP |
1739 IRIS_DIRTY_SBE;
1740 shs->sysvals_need_upload = true;
1741 }
1742 }
1743
1744 /**
1745 * Update the last enabled stage's VUE map.
1746 *
1747 * When the shader feeding the rasterizer's output interface changes, we
1748 * need to re-emit various packets.
1749 */
1750 static void
1751 update_last_vue_map(struct iris_context *ice,
1752 struct brw_stage_prog_data *prog_data)
1753 {
1754 struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
1755 struct brw_vue_map *vue_map = &vue_prog_data->vue_map;
1756 struct brw_vue_map *old_map = ice->shaders.last_vue_map;
1757 const uint64_t changed_slots =
1758 (old_map ? old_map->slots_valid : 0ull) ^ vue_map->slots_valid;
1759
1760 if (changed_slots & VARYING_BIT_VIEWPORT) {
1761 ice->state.num_viewports =
1762 (vue_map->slots_valid & VARYING_BIT_VIEWPORT) ? IRIS_MAX_VIEWPORTS : 1;
1763 ice->state.dirty |= IRIS_DIRTY_CLIP |
1764 IRIS_DIRTY_SF_CL_VIEWPORT |
1765 IRIS_DIRTY_CC_VIEWPORT |
1766 IRIS_DIRTY_SCISSOR_RECT |
1767 IRIS_DIRTY_UNCOMPILED_FS |
1768 ice->state.dirty_for_nos[IRIS_NOS_LAST_VUE_MAP];
1769 }
1770
1771 if (changed_slots || (old_map && old_map->separate != vue_map->separate)) {
1772 ice->state.dirty |= IRIS_DIRTY_SBE;
1773 }
1774
1775 ice->shaders.last_vue_map = &vue_prog_data->vue_map;
1776 }
1777
1778 static void
1779 iris_update_pull_constant_descriptors(struct iris_context *ice,
1780 gl_shader_stage stage)
1781 {
1782 struct iris_compiled_shader *shader = ice->shaders.prog[stage];
1783
1784 if (!shader || !shader->prog_data->has_ubo_pull)
1785 return;
1786
1787 struct iris_shader_state *shs = &ice->state.shaders[stage];
1788 bool any_new_descriptors =
1789 shader->num_system_values > 0 && shs->sysvals_need_upload;
1790
1791 unsigned bound_cbufs = shs->bound_cbufs;
1792
1793 while (bound_cbufs) {
1794 const int i = u_bit_scan(&bound_cbufs);
1795 struct pipe_shader_buffer *cbuf = &shs->constbuf[i];
1796 struct iris_state_ref *surf_state = &shs->constbuf_surf_state[i];
1797 if (!surf_state->res && cbuf->buffer) {
1798 iris_upload_ubo_ssbo_surf_state(ice, cbuf, surf_state, false);
1799 any_new_descriptors = true;
1800 }
1801 }
1802
1803 if (any_new_descriptors)
1804 ice->state.dirty |= IRIS_DIRTY_BINDINGS_VS << stage;
1805 }
1806
1807 /**
1808 * Get the prog_data for a given stage, or NULL if the stage is disabled.
1809 */
1810 static struct brw_vue_prog_data *
1811 get_vue_prog_data(struct iris_context *ice, gl_shader_stage stage)
1812 {
1813 if (!ice->shaders.prog[stage])
1814 return NULL;
1815
1816 return (void *) ice->shaders.prog[stage]->prog_data;
1817 }
1818
1819 // XXX: iris_compiled_shaders are space-leaking :(
1820 // XXX: do remember to unbind them if deleting them.
1821
1822 /**
1823 * Update the current shader variants for the given state.
1824 *
1825 * This should be called on every draw call to ensure that the correct
1826 * shaders are bound. It will also flag any dirty state triggered by
1827 * swapping out those shaders.
1828 */
1829 void
1830 iris_update_compiled_shaders(struct iris_context *ice)
1831 {
1832 const uint64_t dirty = ice->state.dirty;
1833
1834 struct brw_vue_prog_data *old_prog_datas[4];
1835 if (!(dirty & IRIS_DIRTY_URB)) {
1836 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++)
1837 old_prog_datas[i] = get_vue_prog_data(ice, i);
1838 }
1839
1840 if (dirty & (IRIS_DIRTY_UNCOMPILED_TCS | IRIS_DIRTY_UNCOMPILED_TES)) {
1841 struct iris_uncompiled_shader *tes =
1842 ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
1843 if (tes) {
1844 iris_update_compiled_tcs(ice);
1845 iris_update_compiled_tes(ice);
1846 } else {
1847 ice->shaders.prog[IRIS_CACHE_TCS] = NULL;
1848 ice->shaders.prog[IRIS_CACHE_TES] = NULL;
1849 ice->state.dirty |=
1850 IRIS_DIRTY_TCS | IRIS_DIRTY_TES |
1851 IRIS_DIRTY_BINDINGS_TCS | IRIS_DIRTY_BINDINGS_TES |
1852 IRIS_DIRTY_CONSTANTS_TCS | IRIS_DIRTY_CONSTANTS_TES;
1853 }
1854 }
1855
1856 if (dirty & IRIS_DIRTY_UNCOMPILED_VS)
1857 iris_update_compiled_vs(ice);
1858 if (dirty & IRIS_DIRTY_UNCOMPILED_GS)
1859 iris_update_compiled_gs(ice);
1860
1861 if (dirty & (IRIS_DIRTY_UNCOMPILED_GS | IRIS_DIRTY_UNCOMPILED_TES)) {
1862 const struct iris_compiled_shader *gs =
1863 ice->shaders.prog[MESA_SHADER_GEOMETRY];
1864 const struct iris_compiled_shader *tes =
1865 ice->shaders.prog[MESA_SHADER_TESS_EVAL];
1866
1867 bool points_or_lines = false;
1868
1869 if (gs) {
1870 const struct brw_gs_prog_data *gs_prog_data = (void *) gs->prog_data;
1871 points_or_lines =
1872 gs_prog_data->output_topology == _3DPRIM_POINTLIST ||
1873 gs_prog_data->output_topology == _3DPRIM_LINESTRIP;
1874 } else if (tes) {
1875 const struct brw_tes_prog_data *tes_data = (void *) tes->prog_data;
1876 points_or_lines =
1877 tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_LINE ||
1878 tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_POINT;
1879 }
1880
1881 if (ice->shaders.output_topology_is_points_or_lines != points_or_lines) {
1882 /* Outbound to XY Clip enables */
1883 ice->shaders.output_topology_is_points_or_lines = points_or_lines;
1884 ice->state.dirty |= IRIS_DIRTY_CLIP;
1885 }
1886 }
1887
1888 gl_shader_stage last_stage = last_vue_stage(ice);
1889 struct iris_compiled_shader *shader = ice->shaders.prog[last_stage];
1890 struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[last_stage];
1891 update_last_vue_map(ice, shader->prog_data);
1892 if (ice->state.streamout != shader->streamout) {
1893 ice->state.streamout = shader->streamout;
1894 ice->state.dirty |= IRIS_DIRTY_SO_DECL_LIST | IRIS_DIRTY_STREAMOUT;
1895 }
1896
1897 if (ice->state.streamout_active) {
1898 for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
1899 struct iris_stream_output_target *so =
1900 (void *) ice->state.so_target[i];
1901 if (so)
1902 so->stride = ish->stream_output.stride[i] * sizeof(uint32_t);
1903 }
1904 }
1905
1906 if (dirty & IRIS_DIRTY_UNCOMPILED_FS)
1907 iris_update_compiled_fs(ice);
1908
1909 /* Changing shader interfaces may require a URB configuration. */
1910 if (!(dirty & IRIS_DIRTY_URB)) {
1911 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
1912 struct brw_vue_prog_data *old = old_prog_datas[i];
1913 struct brw_vue_prog_data *new = get_vue_prog_data(ice, i);
1914 if (!!old != !!new ||
1915 (new && new->urb_entry_size != old->urb_entry_size)) {
1916 ice->state.dirty |= IRIS_DIRTY_URB;
1917 break;
1918 }
1919 }
1920 }
1921
1922 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_FRAGMENT; i++) {
1923 if (ice->state.dirty & (IRIS_DIRTY_CONSTANTS_VS << i))
1924 iris_update_pull_constant_descriptors(ice, i);
1925 }
1926 }
1927
1928 static struct iris_compiled_shader *
1929 iris_compile_cs(struct iris_context *ice,
1930 struct iris_uncompiled_shader *ish,
1931 const struct iris_cs_prog_key *key)
1932 {
1933 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1934 const struct brw_compiler *compiler = screen->compiler;
1935 void *mem_ctx = ralloc_context(NULL);
1936 struct brw_cs_prog_data *cs_prog_data =
1937 rzalloc(mem_ctx, struct brw_cs_prog_data);
1938 struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
1939 enum brw_param_builtin *system_values;
1940 const struct gen_device_info *devinfo = &screen->devinfo;
1941 unsigned num_system_values;
1942 unsigned num_cbufs;
1943
1944 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1945
1946 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
1947 &num_system_values, &num_cbufs);
1948
1949 struct iris_binding_table bt;
1950 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1951 num_system_values, num_cbufs);
1952
1953 struct brw_cs_prog_key brw_key = iris_to_brw_cs_key(devinfo, key);
1954
1955 char *error_str = NULL;
1956 const unsigned *program =
1957 brw_compile_cs(compiler, &ice->dbg, mem_ctx, &brw_key, cs_prog_data,
1958 nir, -1, NULL, &error_str);
1959 if (program == NULL) {
1960 dbg_printf("Failed to compile compute shader: %s\n", error_str);
1961 ralloc_free(mem_ctx);
1962 return false;
1963 }
1964
1965 if (ish->compiled_once) {
1966 iris_debug_recompile(ice, &nir->info, &brw_key.base);
1967 } else {
1968 ish->compiled_once = true;
1969 }
1970
1971 struct iris_compiled_shader *shader =
1972 iris_upload_shader(ice, IRIS_CACHE_CS, sizeof(*key), key, program,
1973 prog_data, NULL, system_values, num_system_values,
1974 num_cbufs, &bt);
1975
1976 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1977
1978 ralloc_free(mem_ctx);
1979 return shader;
1980 }
1981
1982 static void
1983 iris_update_compiled_cs(struct iris_context *ice)
1984 {
1985 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_COMPUTE];
1986 struct iris_uncompiled_shader *ish =
1987 ice->shaders.uncompiled[MESA_SHADER_COMPUTE];
1988
1989 struct iris_cs_prog_key key = { KEY_ID(base) };
1990 ice->vtbl.populate_cs_key(ice, &key);
1991
1992 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_CS];
1993 struct iris_compiled_shader *shader =
1994 iris_find_cached_shader(ice, IRIS_CACHE_CS, sizeof(key), &key);
1995
1996 if (!shader)
1997 shader = iris_disk_cache_retrieve(ice, ish, &key, sizeof(key));
1998
1999 if (!shader)
2000 shader = iris_compile_cs(ice, ish, &key);
2001
2002 if (old != shader) {
2003 ice->shaders.prog[IRIS_CACHE_CS] = shader;
2004 ice->state.dirty |= IRIS_DIRTY_CS |
2005 IRIS_DIRTY_BINDINGS_CS |
2006 IRIS_DIRTY_CONSTANTS_CS;
2007 shs->sysvals_need_upload = true;
2008 }
2009 }
2010
2011 void
2012 iris_update_compiled_compute_shader(struct iris_context *ice)
2013 {
2014 if (ice->state.dirty & IRIS_DIRTY_UNCOMPILED_CS)
2015 iris_update_compiled_cs(ice);
2016
2017 if (ice->state.dirty & IRIS_DIRTY_CONSTANTS_CS)
2018 iris_update_pull_constant_descriptors(ice, MESA_SHADER_COMPUTE);
2019 }
2020
2021 void
2022 iris_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data,
2023 uint32_t *dst)
2024 {
2025 assert(cs_prog_data->push.total.size > 0);
2026 assert(cs_prog_data->push.cross_thread.size == 0);
2027 assert(cs_prog_data->push.per_thread.dwords == 1);
2028 assert(cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID);
2029 for (unsigned t = 0; t < cs_prog_data->threads; t++)
2030 dst[8 * t] = t;
2031 }
2032
2033 /**
2034 * Allocate scratch BOs as needed for the given per-thread size and stage.
2035 */
2036 struct iris_bo *
2037 iris_get_scratch_space(struct iris_context *ice,
2038 unsigned per_thread_scratch,
2039 gl_shader_stage stage)
2040 {
2041 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2042 struct iris_bufmgr *bufmgr = screen->bufmgr;
2043 const struct gen_device_info *devinfo = &screen->devinfo;
2044
2045 unsigned encoded_size = ffs(per_thread_scratch) - 11;
2046 assert(encoded_size < (1 << 16));
2047
2048 struct iris_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage];
2049
2050 /* The documentation for 3DSTATE_PS "Scratch Space Base Pointer" says:
2051 *
2052 * "Scratch Space per slice is computed based on 4 sub-slices. SW
2053 * must allocate scratch space enough so that each slice has 4
2054 * slices allowed."
2055 *
2056 * According to the other driver team, this applies to compute shaders
2057 * as well. This is not currently documented at all.
2058 *
2059 * This hack is no longer necessary on Gen11+.
2060 *
2061 * For, Gen11+, scratch space allocation is based on the number of threads
2062 * in the base configuration.
2063 */
2064 unsigned subslice_total = screen->subslice_total;
2065 if (devinfo->gen >= 12)
2066 subslice_total = devinfo->num_subslices[0];
2067 else if (devinfo->gen == 11)
2068 subslice_total = 8;
2069 else if (devinfo->gen < 11)
2070 subslice_total = 4 * devinfo->num_slices;
2071 assert(subslice_total >= screen->subslice_total);
2072
2073 if (!*bop) {
2074 unsigned scratch_ids_per_subslice = devinfo->max_cs_threads;
2075
2076 if (devinfo->gen >= 12) {
2077 /* Same as ICL below, but with 16 EUs. */
2078 scratch_ids_per_subslice = 16 * 8;
2079 } else if (devinfo->gen == 11) {
2080 /* The MEDIA_VFE_STATE docs say:
2081 *
2082 * "Starting with this configuration, the Maximum Number of
2083 * Threads must be set to (#EU * 8) for GPGPU dispatches.
2084 *
2085 * Although there are only 7 threads per EU in the configuration,
2086 * the FFTID is calculated as if there are 8 threads per EU,
2087 * which in turn requires a larger amount of Scratch Space to be
2088 * allocated by the driver."
2089 */
2090 scratch_ids_per_subslice = 8 * 8;
2091 }
2092
2093 uint32_t max_threads[] = {
2094 [MESA_SHADER_VERTEX] = devinfo->max_vs_threads,
2095 [MESA_SHADER_TESS_CTRL] = devinfo->max_tcs_threads,
2096 [MESA_SHADER_TESS_EVAL] = devinfo->max_tes_threads,
2097 [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads,
2098 [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads,
2099 [MESA_SHADER_COMPUTE] = scratch_ids_per_subslice * subslice_total,
2100 };
2101
2102 uint32_t size = per_thread_scratch * max_threads[stage];
2103
2104 *bop = iris_bo_alloc(bufmgr, "scratch", size, IRIS_MEMZONE_SHADER);
2105 }
2106
2107 return *bop;
2108 }
2109
2110 /* ------------------------------------------------------------------- */
2111
2112 /**
2113 * The pipe->create_[stage]_state() driver hooks.
2114 *
2115 * Performs basic NIR preprocessing, records any state dependencies, and
2116 * returns an iris_uncompiled_shader as the Gallium CSO.
2117 *
2118 * Actual shader compilation to assembly happens later, at first use.
2119 */
2120 static void *
2121 iris_create_uncompiled_shader(struct pipe_context *ctx,
2122 nir_shader *nir,
2123 const struct pipe_stream_output_info *so_info)
2124 {
2125 struct iris_context *ice = (void *)ctx;
2126 struct iris_screen *screen = (struct iris_screen *)ctx->screen;
2127 const struct gen_device_info *devinfo = &screen->devinfo;
2128
2129 struct iris_uncompiled_shader *ish =
2130 calloc(1, sizeof(struct iris_uncompiled_shader));
2131 if (!ish)
2132 return NULL;
2133
2134 NIR_PASS(ish->needs_edge_flag, nir, iris_fix_edge_flags);
2135
2136 brw_preprocess_nir(screen->compiler, nir, NULL);
2137
2138 NIR_PASS_V(nir, brw_nir_lower_image_load_store, devinfo,
2139 &ish->uses_atomic_load_store);
2140 NIR_PASS_V(nir, iris_lower_storage_image_derefs);
2141
2142 nir_sweep(nir);
2143
2144 if (nir->constant_data_size > 0) {
2145 unsigned data_offset;
2146 u_upload_data(ice->shaders.uploader, 0, nir->constant_data_size,
2147 32, nir->constant_data, &data_offset, &ish->const_data);
2148
2149 struct pipe_shader_buffer psb = {
2150 .buffer = ish->const_data,
2151 .buffer_offset = data_offset,
2152 .buffer_size = nir->constant_data_size,
2153 };
2154 iris_upload_ubo_ssbo_surf_state(ice, &psb, &ish->const_data_state, false);
2155 }
2156
2157 ish->program_id = get_new_program_id(screen);
2158 ish->nir = nir;
2159 if (so_info) {
2160 memcpy(&ish->stream_output, so_info, sizeof(*so_info));
2161 update_so_info(&ish->stream_output, nir->info.outputs_written);
2162 }
2163
2164 /* Save this now before potentially dropping nir->info.name */
2165 if (nir->info.name && strncmp(nir->info.name, "ARB", 3) == 0)
2166 ish->use_alt_mode = true;
2167
2168 if (screen->disk_cache) {
2169 /* Serialize the NIR to a binary blob that we can hash for the disk
2170 * cache. Drop unnecessary information (like variable names)
2171 * so the serialized NIR is smaller, and also to let us detect more
2172 * isomorphic shaders when hashing, increasing cache hits.
2173 */
2174 struct blob blob;
2175 blob_init(&blob);
2176 nir_serialize(&blob, nir, true);
2177 _mesa_sha1_compute(blob.data, blob.size, ish->nir_sha1);
2178 blob_finish(&blob);
2179 }
2180
2181 return ish;
2182 }
2183
2184 static struct iris_uncompiled_shader *
2185 iris_create_shader_state(struct pipe_context *ctx,
2186 const struct pipe_shader_state *state)
2187 {
2188 struct nir_shader *nir;
2189
2190 if (state->type == PIPE_SHADER_IR_TGSI)
2191 nir = tgsi_to_nir(state->tokens, ctx->screen);
2192 else
2193 nir = state->ir.nir;
2194
2195 return iris_create_uncompiled_shader(ctx, nir, &state->stream_output);
2196 }
2197
2198 static void *
2199 iris_create_vs_state(struct pipe_context *ctx,
2200 const struct pipe_shader_state *state)
2201 {
2202 struct iris_context *ice = (void *) ctx;
2203 struct iris_screen *screen = (void *) ctx->screen;
2204 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
2205
2206 /* User clip planes */
2207 if (ish->nir->info.clip_distance_array_size == 0)
2208 ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
2209
2210 if (screen->precompile) {
2211 struct iris_vs_prog_key key = { KEY_ID(vue.base) };
2212
2213 if (!iris_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
2214 iris_compile_vs(ice, ish, &key);
2215 }
2216
2217 return ish;
2218 }
2219
2220 static void *
2221 iris_create_tcs_state(struct pipe_context *ctx,
2222 const struct pipe_shader_state *state)
2223 {
2224 struct iris_context *ice = (void *) ctx;
2225 struct iris_screen *screen = (void *) ctx->screen;
2226 const struct brw_compiler *compiler = screen->compiler;
2227 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
2228 struct shader_info *info = &ish->nir->info;
2229
2230 if (screen->precompile) {
2231 const unsigned _GL_TRIANGLES = 0x0004;
2232 struct iris_tcs_prog_key key = {
2233 KEY_ID(vue.base),
2234 // XXX: make sure the linker fills this out from the TES...
2235 .tes_primitive_mode =
2236 info->tess.primitive_mode ? info->tess.primitive_mode
2237 : _GL_TRIANGLES,
2238 .outputs_written = info->outputs_written,
2239 .patch_outputs_written = info->patch_outputs_written,
2240 };
2241
2242 /* 8_PATCH mode needs the key to contain the input patch dimensionality.
2243 * We don't have that information, so we randomly guess that the input
2244 * and output patches are the same size. This is a bad guess, but we
2245 * can't do much better.
2246 */
2247 if (compiler->use_tcs_8_patch)
2248 key.input_vertices = info->tess.tcs_vertices_out;
2249
2250 if (!iris_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
2251 iris_compile_tcs(ice, ish, &key);
2252 }
2253
2254 return ish;
2255 }
2256
2257 static void *
2258 iris_create_tes_state(struct pipe_context *ctx,
2259 const struct pipe_shader_state *state)
2260 {
2261 struct iris_context *ice = (void *) ctx;
2262 struct iris_screen *screen = (void *) ctx->screen;
2263 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
2264 struct shader_info *info = &ish->nir->info;
2265
2266 /* User clip planes */
2267 if (ish->nir->info.clip_distance_array_size == 0)
2268 ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
2269
2270 if (screen->precompile) {
2271 struct iris_tes_prog_key key = {
2272 KEY_ID(vue.base),
2273 // XXX: not ideal, need TCS output/TES input unification
2274 .inputs_read = info->inputs_read,
2275 .patch_inputs_read = info->patch_inputs_read,
2276 };
2277
2278 if (!iris_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
2279 iris_compile_tes(ice, ish, &key);
2280 }
2281
2282 return ish;
2283 }
2284
2285 static void *
2286 iris_create_gs_state(struct pipe_context *ctx,
2287 const struct pipe_shader_state *state)
2288 {
2289 struct iris_context *ice = (void *) ctx;
2290 struct iris_screen *screen = (void *) ctx->screen;
2291 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
2292
2293 /* User clip planes */
2294 if (ish->nir->info.clip_distance_array_size == 0)
2295 ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
2296
2297 if (screen->precompile) {
2298 struct iris_gs_prog_key key = { KEY_ID(vue.base) };
2299
2300 if (!iris_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
2301 iris_compile_gs(ice, ish, &key);
2302 }
2303
2304 return ish;
2305 }
2306
2307 static void *
2308 iris_create_fs_state(struct pipe_context *ctx,
2309 const struct pipe_shader_state *state)
2310 {
2311 struct iris_context *ice = (void *) ctx;
2312 struct iris_screen *screen = (void *) ctx->screen;
2313 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
2314 struct shader_info *info = &ish->nir->info;
2315
2316 ish->nos |= (1ull << IRIS_NOS_FRAMEBUFFER) |
2317 (1ull << IRIS_NOS_DEPTH_STENCIL_ALPHA) |
2318 (1ull << IRIS_NOS_RASTERIZER) |
2319 (1ull << IRIS_NOS_BLEND);
2320
2321 /* The program key needs the VUE map if there are > 16 inputs */
2322 if (util_bitcount64(ish->nir->info.inputs_read &
2323 BRW_FS_VARYING_INPUT_MASK) > 16) {
2324 ish->nos |= (1ull << IRIS_NOS_LAST_VUE_MAP);
2325 }
2326
2327 if (screen->precompile) {
2328 const uint64_t color_outputs = info->outputs_written &
2329 ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
2330 BITFIELD64_BIT(FRAG_RESULT_STENCIL) |
2331 BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK));
2332
2333 bool can_rearrange_varyings =
2334 util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) <= 16;
2335
2336 const struct gen_device_info *devinfo = &screen->devinfo;
2337 struct iris_fs_prog_key key = {
2338 KEY_ID(base),
2339 .nr_color_regions = util_bitcount(color_outputs),
2340 .coherent_fb_fetch = devinfo->gen >= 9,
2341 .input_slots_valid =
2342 can_rearrange_varyings ? 0 : info->inputs_read | VARYING_BIT_POS,
2343 };
2344
2345 if (!iris_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
2346 iris_compile_fs(ice, ish, &key, NULL);
2347 }
2348
2349 return ish;
2350 }
2351
2352 static void *
2353 iris_create_compute_state(struct pipe_context *ctx,
2354 const struct pipe_compute_state *state)
2355 {
2356 assert(state->ir_type == PIPE_SHADER_IR_NIR);
2357
2358 struct iris_context *ice = (void *) ctx;
2359 struct iris_screen *screen = (void *) ctx->screen;
2360 struct iris_uncompiled_shader *ish =
2361 iris_create_uncompiled_shader(ctx, (void *) state->prog, NULL);
2362
2363 // XXX: disallow more than 64KB of shared variables
2364
2365 if (screen->precompile) {
2366 struct iris_cs_prog_key key = { KEY_ID(base) };
2367
2368 if (!iris_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
2369 iris_compile_cs(ice, ish, &key);
2370 }
2371
2372 return ish;
2373 }
2374
2375 /**
2376 * The pipe->delete_[stage]_state() driver hooks.
2377 *
2378 * Frees the iris_uncompiled_shader.
2379 */
2380 static void
2381 iris_delete_shader_state(struct pipe_context *ctx, void *state, gl_shader_stage stage)
2382 {
2383 struct iris_uncompiled_shader *ish = state;
2384 struct iris_context *ice = (void *) ctx;
2385
2386 if (ice->shaders.uncompiled[stage] == ish) {
2387 ice->shaders.uncompiled[stage] = NULL;
2388 ice->state.dirty |= IRIS_DIRTY_UNCOMPILED_VS << stage;
2389 }
2390
2391 if (ish->const_data) {
2392 pipe_resource_reference(&ish->const_data, NULL);
2393 pipe_resource_reference(&ish->const_data_state.res, NULL);
2394 }
2395
2396 ralloc_free(ish->nir);
2397 free(ish);
2398 }
2399
2400 static void
2401 iris_delete_vs_state(struct pipe_context *ctx, void *state)
2402 {
2403 iris_delete_shader_state(ctx, state, MESA_SHADER_VERTEX);
2404 }
2405
2406 static void
2407 iris_delete_tcs_state(struct pipe_context *ctx, void *state)
2408 {
2409 iris_delete_shader_state(ctx, state, MESA_SHADER_TESS_CTRL);
2410 }
2411
2412 static void
2413 iris_delete_tes_state(struct pipe_context *ctx, void *state)
2414 {
2415 iris_delete_shader_state(ctx, state, MESA_SHADER_TESS_EVAL);
2416 }
2417
2418 static void
2419 iris_delete_gs_state(struct pipe_context *ctx, void *state)
2420 {
2421 iris_delete_shader_state(ctx, state, MESA_SHADER_GEOMETRY);
2422 }
2423
2424 static void
2425 iris_delete_fs_state(struct pipe_context *ctx, void *state)
2426 {
2427 iris_delete_shader_state(ctx, state, MESA_SHADER_FRAGMENT);
2428 }
2429
2430 static void
2431 iris_delete_cs_state(struct pipe_context *ctx, void *state)
2432 {
2433 iris_delete_shader_state(ctx, state, MESA_SHADER_COMPUTE);
2434 }
2435
2436 /**
2437 * The pipe->bind_[stage]_state() driver hook.
2438 *
2439 * Binds an uncompiled shader as the current one for a particular stage.
2440 * Updates dirty tracking to account for the shader's NOS.
2441 */
2442 static void
2443 bind_shader_state(struct iris_context *ice,
2444 struct iris_uncompiled_shader *ish,
2445 gl_shader_stage stage)
2446 {
2447 uint64_t dirty_bit = IRIS_DIRTY_UNCOMPILED_VS << stage;
2448 const uint64_t nos = ish ? ish->nos : 0;
2449
2450 const struct shader_info *old_info = iris_get_shader_info(ice, stage);
2451 const struct shader_info *new_info = ish ? &ish->nir->info : NULL;
2452
2453 if ((old_info ? util_last_bit(old_info->textures_used) : 0) !=
2454 (new_info ? util_last_bit(new_info->textures_used) : 0)) {
2455 ice->state.dirty |= IRIS_DIRTY_SAMPLER_STATES_VS << stage;
2456 }
2457
2458 ice->shaders.uncompiled[stage] = ish;
2459 ice->state.dirty |= dirty_bit;
2460
2461 /* Record that CSOs need to mark IRIS_DIRTY_UNCOMPILED_XS when they change
2462 * (or that they no longer need to do so).
2463 */
2464 for (int i = 0; i < IRIS_NOS_COUNT; i++) {
2465 if (nos & (1 << i))
2466 ice->state.dirty_for_nos[i] |= dirty_bit;
2467 else
2468 ice->state.dirty_for_nos[i] &= ~dirty_bit;
2469 }
2470 }
2471
2472 static void
2473 iris_bind_vs_state(struct pipe_context *ctx, void *state)
2474 {
2475 struct iris_context *ice = (struct iris_context *)ctx;
2476 struct iris_uncompiled_shader *new_ish = state;
2477
2478 if (new_ish &&
2479 ice->state.window_space_position !=
2480 new_ish->nir->info.vs.window_space_position) {
2481 ice->state.window_space_position =
2482 new_ish->nir->info.vs.window_space_position;
2483
2484 ice->state.dirty |= IRIS_DIRTY_CLIP |
2485 IRIS_DIRTY_RASTER |
2486 IRIS_DIRTY_CC_VIEWPORT;
2487 }
2488
2489 bind_shader_state((void *) ctx, state, MESA_SHADER_VERTEX);
2490 }
2491
2492 static void
2493 iris_bind_tcs_state(struct pipe_context *ctx, void *state)
2494 {
2495 bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_CTRL);
2496 }
2497
2498 static void
2499 iris_bind_tes_state(struct pipe_context *ctx, void *state)
2500 {
2501 struct iris_context *ice = (struct iris_context *)ctx;
2502
2503 /* Enabling/disabling optional stages requires a URB reconfiguration. */
2504 if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
2505 ice->state.dirty |= IRIS_DIRTY_URB;
2506
2507 bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_EVAL);
2508 }
2509
2510 static void
2511 iris_bind_gs_state(struct pipe_context *ctx, void *state)
2512 {
2513 struct iris_context *ice = (struct iris_context *)ctx;
2514
2515 /* Enabling/disabling optional stages requires a URB reconfiguration. */
2516 if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
2517 ice->state.dirty |= IRIS_DIRTY_URB;
2518
2519 bind_shader_state((void *) ctx, state, MESA_SHADER_GEOMETRY);
2520 }
2521
2522 static void
2523 iris_bind_fs_state(struct pipe_context *ctx, void *state)
2524 {
2525 struct iris_context *ice = (struct iris_context *) ctx;
2526 struct iris_screen *screen = (struct iris_screen *) ctx->screen;
2527 const struct gen_device_info *devinfo = &screen->devinfo;
2528 struct iris_uncompiled_shader *old_ish =
2529 ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
2530 struct iris_uncompiled_shader *new_ish = state;
2531
2532 const unsigned color_bits =
2533 BITFIELD64_BIT(FRAG_RESULT_COLOR) |
2534 BITFIELD64_RANGE(FRAG_RESULT_DATA0, BRW_MAX_DRAW_BUFFERS);
2535
2536 /* Fragment shader outputs influence HasWriteableRT */
2537 if (!old_ish || !new_ish ||
2538 (old_ish->nir->info.outputs_written & color_bits) !=
2539 (new_ish->nir->info.outputs_written & color_bits))
2540 ice->state.dirty |= IRIS_DIRTY_PS_BLEND;
2541
2542 if (devinfo->gen == 8)
2543 ice->state.dirty |= IRIS_DIRTY_PMA_FIX;
2544
2545 bind_shader_state((void *) ctx, state, MESA_SHADER_FRAGMENT);
2546 }
2547
2548 static void
2549 iris_bind_cs_state(struct pipe_context *ctx, void *state)
2550 {
2551 bind_shader_state((void *) ctx, state, MESA_SHADER_COMPUTE);
2552 }
2553
2554 void
2555 iris_init_program_functions(struct pipe_context *ctx)
2556 {
2557 ctx->create_vs_state = iris_create_vs_state;
2558 ctx->create_tcs_state = iris_create_tcs_state;
2559 ctx->create_tes_state = iris_create_tes_state;
2560 ctx->create_gs_state = iris_create_gs_state;
2561 ctx->create_fs_state = iris_create_fs_state;
2562 ctx->create_compute_state = iris_create_compute_state;
2563
2564 ctx->delete_vs_state = iris_delete_vs_state;
2565 ctx->delete_tcs_state = iris_delete_tcs_state;
2566 ctx->delete_tes_state = iris_delete_tes_state;
2567 ctx->delete_gs_state = iris_delete_gs_state;
2568 ctx->delete_fs_state = iris_delete_fs_state;
2569 ctx->delete_compute_state = iris_delete_cs_state;
2570
2571 ctx->bind_vs_state = iris_bind_vs_state;
2572 ctx->bind_tcs_state = iris_bind_tcs_state;
2573 ctx->bind_tes_state = iris_bind_tes_state;
2574 ctx->bind_gs_state = iris_bind_gs_state;
2575 ctx->bind_fs_state = iris_bind_fs_state;
2576 ctx->bind_compute_state = iris_bind_cs_state;
2577 }