iris: Enable precompiles
[mesa.git] / src / gallium / drivers / iris / iris_program.c
1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 /**
24 * @file iris_program.c
25 *
26 * This file contains the driver interface for compiling shaders.
27 *
28 * See iris_program_cache.c for the in-memory program cache where the
29 * compiled shaders are stored.
30 */
31
32 #include <stdio.h>
33 #include <errno.h>
34 #include "pipe/p_defines.h"
35 #include "pipe/p_state.h"
36 #include "pipe/p_context.h"
37 #include "pipe/p_screen.h"
38 #include "util/u_atomic.h"
39 #include "compiler/nir/nir.h"
40 #include "compiler/nir/nir_builder.h"
41 #include "intel/compiler/brw_compiler.h"
42 #include "intel/compiler/brw_nir.h"
43 #include "iris_context.h"
44
45 #define ALL_SAMPLERS_XYZW .tex.swizzles[0 ... MAX_SAMPLERS - 1] = 0x688
46 #define KEY_INIT .program_string_id = ish->program_id, ALL_SAMPLERS_XYZW
47
48 static struct iris_compiled_shader *
49 iris_compile_vs(struct iris_context *, struct iris_uncompiled_shader *,
50 const struct brw_vs_prog_key *);
51 static struct iris_compiled_shader *
52 iris_compile_tcs(struct iris_context *, struct iris_uncompiled_shader *,
53 const struct brw_tcs_prog_key *);
54 static struct iris_compiled_shader *
55 iris_compile_tes(struct iris_context *, struct iris_uncompiled_shader *,
56 const struct brw_tes_prog_key *);
57 static struct iris_compiled_shader *
58 iris_compile_gs(struct iris_context *, struct iris_uncompiled_shader *,
59 const struct brw_gs_prog_key *);
60 static struct iris_compiled_shader *
61 iris_compile_fs(struct iris_context *, struct iris_uncompiled_shader *,
62 const struct brw_wm_prog_key *, struct brw_vue_map *);
63 static struct iris_compiled_shader *
64 iris_compile_cs(struct iris_context *, struct iris_uncompiled_shader *,
65 const struct brw_cs_prog_key *);
66
67
68 static unsigned
69 get_new_program_id(struct iris_screen *screen)
70 {
71 return p_atomic_inc_return(&screen->program_id);
72 }
73
74 /**
75 * An uncompiled, API-facing shader. This is the Gallium CSO for shaders.
76 * It primarily contains the NIR for the shader.
77 *
78 * Each API-facing shader can be compiled into multiple shader variants,
79 * based on non-orthogonal state dependencies, recorded in the shader key.
80 *
81 * See iris_compiled_shader, which represents a compiled shader variant.
82 */
83 struct iris_uncompiled_shader {
84 nir_shader *nir;
85
86 struct pipe_stream_output_info stream_output;
87
88 unsigned program_id;
89
90 /** Bitfield of (1 << IRIS_NOS_*) flags. */
91 unsigned nos;
92
93 /** Have any shader variants been compiled yet? */
94 bool compiled_once;
95 };
96
97 static nir_ssa_def *
98 get_aoa_deref_offset(nir_builder *b,
99 nir_deref_instr *deref,
100 unsigned elem_size)
101 {
102 unsigned array_size = elem_size;
103 nir_ssa_def *offset = nir_imm_int(b, 0);
104
105 while (deref->deref_type != nir_deref_type_var) {
106 assert(deref->deref_type == nir_deref_type_array);
107
108 /* This level's element size is the previous level's array size */
109 nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
110 assert(deref->arr.index.ssa);
111 offset = nir_iadd(b, offset,
112 nir_imul(b, index, nir_imm_int(b, array_size)));
113
114 deref = nir_deref_instr_parent(deref);
115 assert(glsl_type_is_array(deref->type));
116 array_size *= glsl_get_length(deref->type);
117 }
118
119 /* Accessing an invalid surface index with the dataport can result in a
120 * hang. According to the spec "if the index used to select an individual
121 * element is negative or greater than or equal to the size of the array,
122 * the results of the operation are undefined but may not lead to
123 * termination" -- which is one of the possible outcomes of the hang.
124 * Clamp the index to prevent access outside of the array bounds.
125 */
126 return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));
127 }
128
129 static void
130 iris_lower_storage_image_derefs(nir_shader *nir)
131 {
132 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
133
134 nir_builder b;
135 nir_builder_init(&b, impl);
136
137 nir_foreach_block(block, impl) {
138 nir_foreach_instr_safe(instr, block) {
139 if (instr->type != nir_instr_type_intrinsic)
140 continue;
141
142 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
143 switch (intrin->intrinsic) {
144 case nir_intrinsic_image_deref_load:
145 case nir_intrinsic_image_deref_store:
146 case nir_intrinsic_image_deref_atomic_add:
147 case nir_intrinsic_image_deref_atomic_min:
148 case nir_intrinsic_image_deref_atomic_max:
149 case nir_intrinsic_image_deref_atomic_and:
150 case nir_intrinsic_image_deref_atomic_or:
151 case nir_intrinsic_image_deref_atomic_xor:
152 case nir_intrinsic_image_deref_atomic_exchange:
153 case nir_intrinsic_image_deref_atomic_comp_swap:
154 case nir_intrinsic_image_deref_size:
155 case nir_intrinsic_image_deref_samples: {
156 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
157 nir_variable *var = nir_deref_instr_get_variable(deref);
158
159 b.cursor = nir_before_instr(&intrin->instr);
160 nir_ssa_def *index =
161 nir_iadd(&b, nir_imm_int(&b, var->data.driver_location),
162 get_aoa_deref_offset(&b, deref, 1));
163 brw_nir_rewrite_image_intrinsic(intrin, index);
164 break;
165 }
166
167 default:
168 break;
169 }
170 }
171 }
172 }
173
174 // XXX: need unify_interfaces() at link time...
175
176 static void
177 update_so_info(struct pipe_stream_output_info *so_info)
178 {
179 for (unsigned i = 0; i < so_info->num_outputs; i++) {
180 struct pipe_stream_output *output = &so_info->output[i];
181
182 /* The VUE header contains three scalar fields packed together:
183 * - gl_PointSize is stored in VARYING_SLOT_PSIZ.w
184 * - gl_Layer is stored in VARYING_SLOT_PSIZ.y
185 * - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
186 */
187 switch (output->register_index) {
188 case VARYING_SLOT_LAYER:
189 assert(output->num_components == 1);
190 output->register_index = VARYING_SLOT_PSIZ;
191 output->start_component = 1;
192 break;
193 case VARYING_SLOT_VIEWPORT:
194 assert(output->num_components == 1);
195 output->register_index = VARYING_SLOT_PSIZ;
196 output->start_component = 2;
197 break;
198 case VARYING_SLOT_PSIZ:
199 assert(output->num_components == 1);
200 output->start_component = 3;
201 break;
202 }
203
204 //info->outputs_written |= 1ull << output->register_index;
205 }
206 }
207
208 /**
209 * The pipe->create_[stage]_state() driver hooks.
210 *
211 * Performs basic NIR preprocessing, records any state dependencies, and
212 * returns an iris_uncompiled_shader as the Gallium CSO.
213 *
214 * Actual shader compilation to assembly happens later, at first use.
215 */
216 static void *
217 iris_create_uncompiled_shader(struct pipe_context *ctx,
218 nir_shader *nir,
219 const struct pipe_stream_output_info *so_info)
220 {
221 struct iris_screen *screen = (struct iris_screen *)ctx->screen;
222 const struct gen_device_info *devinfo = &screen->devinfo;
223
224 struct iris_uncompiled_shader *ish =
225 calloc(1, sizeof(struct iris_uncompiled_shader));
226 if (!ish)
227 return NULL;
228
229 nir = brw_preprocess_nir(screen->compiler, nir);
230
231 NIR_PASS_V(nir, brw_nir_lower_image_load_store, devinfo);
232 NIR_PASS_V(nir, iris_lower_storage_image_derefs);
233
234 ish->program_id = get_new_program_id(screen);
235 ish->nir = nir;
236 if (so_info) {
237 memcpy(&ish->stream_output, so_info, sizeof(*so_info));
238 update_so_info(&ish->stream_output);
239 }
240
241 return ish;
242 }
243
244 static struct iris_uncompiled_shader *
245 iris_create_shader_state(struct pipe_context *ctx,
246 const struct pipe_shader_state *state)
247 {
248 assert(state->type == PIPE_SHADER_IR_NIR);
249
250 return iris_create_uncompiled_shader(ctx, state->ir.nir,
251 &state->stream_output);
252 }
253
254 static void *
255 iris_create_vs_state(struct pipe_context *ctx,
256 const struct pipe_shader_state *state)
257 {
258 struct iris_context *ice = (void *) ctx;
259 struct iris_screen *screen = (void *) ctx->screen;
260 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
261
262 /* User clip planes */
263 if (ish->nir->info.clip_distance_array_size == 0)
264 ish->nos |= IRIS_NOS_RASTERIZER;
265
266 if (screen->precompile) {
267 struct brw_vs_prog_key key = { KEY_INIT };
268
269 iris_compile_vs(ice, ish, &key);
270 }
271
272 return ish;
273 }
274
275 static void *
276 iris_create_tcs_state(struct pipe_context *ctx,
277 const struct pipe_shader_state *state)
278 {
279 struct iris_context *ice = (void *) ctx;
280 struct iris_screen *screen = (void *) ctx->screen;
281 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
282 struct shader_info *info = &ish->nir->info;
283
284 // XXX: NOS?
285
286 if (screen->precompile) {
287 const unsigned _GL_TRIANGLES = 0x0004;
288 struct brw_tcs_prog_key key = {
289 KEY_INIT,
290 // XXX: make sure the linker fills this out from the TES...
291 .tes_primitive_mode =
292 info->tess.primitive_mode ? info->tess.primitive_mode
293 : _GL_TRIANGLES,
294 .outputs_written = info->outputs_written,
295 .patch_outputs_written = info->patch_outputs_written,
296 };
297
298 iris_compile_tcs(ice, ish, &key);
299 }
300
301 return ish;
302 }
303
304 static void *
305 iris_create_tes_state(struct pipe_context *ctx,
306 const struct pipe_shader_state *state)
307 {
308 struct iris_context *ice = (void *) ctx;
309 struct iris_screen *screen = (void *) ctx->screen;
310 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
311 struct shader_info *info = &ish->nir->info;
312
313 // XXX: NOS?
314
315 if (screen->precompile) {
316 struct brw_tes_prog_key key = {
317 KEY_INIT,
318 // XXX: not ideal, need TCS output/TES input unification
319 .inputs_read = info->inputs_read,
320 .patch_inputs_read = info->patch_inputs_read,
321 };
322
323 iris_compile_tes(ice, ish, &key);
324 }
325
326 return ish;
327 }
328
329 static void *
330 iris_create_gs_state(struct pipe_context *ctx,
331 const struct pipe_shader_state *state)
332 {
333 struct iris_context *ice = (void *) ctx;
334 struct iris_screen *screen = (void *) ctx->screen;
335 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
336
337 // XXX: NOS?
338
339 if (screen->precompile) {
340 struct brw_gs_prog_key key = { KEY_INIT };
341
342 iris_compile_gs(ice, ish, &key);
343 }
344
345 return ish;
346 }
347
348 static void *
349 iris_create_fs_state(struct pipe_context *ctx,
350 const struct pipe_shader_state *state)
351 {
352 struct iris_context *ice = (void *) ctx;
353 struct iris_screen *screen = (void *) ctx->screen;
354 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
355 struct shader_info *info = &ish->nir->info;
356
357 ish->nos |= IRIS_NOS_FRAMEBUFFER |
358 IRIS_NOS_DEPTH_STENCIL_ALPHA |
359 IRIS_NOS_RASTERIZER |
360 IRIS_NOS_BLEND;
361
362 /* The program key needs the VUE map if there are > 16 inputs */
363 if (util_bitcount64(ish->nir->info.inputs_read &
364 BRW_FS_VARYING_INPUT_MASK) > 16) {
365 ish->nos |= IRIS_NOS_LAST_VUE_MAP;
366 }
367
368 if (screen->precompile) {
369 const uint64_t color_outputs = info->outputs_written &
370 ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
371 BITFIELD64_BIT(FRAG_RESULT_STENCIL) |
372 BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK));
373
374 bool can_rearrange_varyings =
375 util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) <= 16;
376
377 struct brw_wm_prog_key key = {
378 KEY_INIT,
379 .nr_color_regions = util_bitcount(color_outputs),
380 .coherent_fb_fetch = true,
381 .input_slots_valid =
382 can_rearrange_varyings ? 0 : info->inputs_read | VARYING_BIT_POS,
383 };
384
385 iris_compile_fs(ice, ish, &key, NULL);
386 }
387
388 return ish;
389 }
390
391 static void *
392 iris_create_compute_state(struct pipe_context *ctx,
393 const struct pipe_compute_state *state)
394 {
395 assert(state->ir_type == PIPE_SHADER_IR_NIR);
396
397 struct iris_context *ice = (void *) ctx;
398 struct iris_screen *screen = (void *) ctx->screen;
399 struct iris_uncompiled_shader *ish =
400 iris_create_uncompiled_shader(ctx, (void *) state->prog, NULL);
401
402 // XXX: disallow more than 64KB of shared variables
403
404 if (screen->precompile) {
405 struct brw_cs_prog_key key = { KEY_INIT };
406
407 iris_compile_cs(ice, ish, &key);
408 }
409
410 return ish;
411 }
412
413 /**
414 * The pipe->delete_[stage]_state() driver hooks.
415 *
416 * Frees the iris_uncompiled_shader.
417 */
418 static void
419 iris_delete_shader_state(struct pipe_context *ctx, void *state)
420 {
421 struct iris_uncompiled_shader *ish = state;
422
423 ralloc_free(ish->nir);
424 free(ish);
425 }
426
427 /**
428 * The pipe->bind_[stage]_state() driver hook.
429 *
430 * Binds an uncompiled shader as the current one for a particular stage.
431 * Updates dirty tracking to account for the shader's NOS.
432 */
433 static void
434 bind_state(struct iris_context *ice,
435 struct iris_uncompiled_shader *ish,
436 gl_shader_stage stage)
437 {
438 uint64_t dirty_bit = IRIS_DIRTY_UNCOMPILED_VS << stage;
439 const uint64_t nos = ish ? ish->nos : 0;
440
441 ice->shaders.uncompiled[stage] = ish;
442 ice->state.dirty |= dirty_bit;
443
444 /* Record that CSOs need to mark IRIS_DIRTY_UNCOMPILED_XS when they change
445 * (or that they no longer need to do so).
446 */
447 for (int i = 0; i < IRIS_NOS_COUNT; i++) {
448 if (nos & (1 << i))
449 ice->state.dirty_for_nos[i] |= dirty_bit;
450 else
451 ice->state.dirty_for_nos[i] &= ~dirty_bit;
452 }
453 }
454
455 static void
456 iris_bind_vs_state(struct pipe_context *ctx, void *state)
457 {
458 bind_state((void *) ctx, state, MESA_SHADER_VERTEX);
459 }
460
461 static void
462 iris_bind_tcs_state(struct pipe_context *ctx, void *state)
463 {
464 bind_state((void *) ctx, state, MESA_SHADER_TESS_CTRL);
465 }
466
467 static void
468 iris_bind_tes_state(struct pipe_context *ctx, void *state)
469 {
470 struct iris_context *ice = (struct iris_context *)ctx;
471
472 /* Enabling/disabling optional stages requires a URB reconfiguration. */
473 if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
474 ice->state.dirty |= IRIS_DIRTY_URB;
475
476 bind_state((void *) ctx, state, MESA_SHADER_TESS_EVAL);
477 }
478
479 static void
480 iris_bind_gs_state(struct pipe_context *ctx, void *state)
481 {
482 struct iris_context *ice = (struct iris_context *)ctx;
483
484 /* Enabling/disabling optional stages requires a URB reconfiguration. */
485 if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
486 ice->state.dirty |= IRIS_DIRTY_URB;
487
488 bind_state((void *) ctx, state, MESA_SHADER_GEOMETRY);
489 }
490
491 static void
492 iris_bind_fs_state(struct pipe_context *ctx, void *state)
493 {
494 bind_state((void *) ctx, state, MESA_SHADER_FRAGMENT);
495 }
496
497 static void
498 iris_bind_cs_state(struct pipe_context *ctx, void *state)
499 {
500 bind_state((void *) ctx, state, MESA_SHADER_COMPUTE);
501 }
502
503 /**
504 * Sets up the starting offsets for the groups of binding table entries
505 * common to all pipeline stages.
506 *
507 * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're
508 * unused but also make sure that addition of small offsets to them will
509 * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
510 */
511 static uint32_t
512 assign_common_binding_table_offsets(const struct gen_device_info *devinfo,
513 const struct nir_shader *nir,
514 struct brw_stage_prog_data *prog_data,
515 uint32_t next_binding_table_offset,
516 unsigned num_system_values)
517 {
518 const struct shader_info *info = &nir->info;
519
520 if (info->num_textures) {
521 prog_data->binding_table.texture_start = next_binding_table_offset;
522 prog_data->binding_table.gather_texture_start = next_binding_table_offset;
523 next_binding_table_offset += info->num_textures;
524 } else {
525 prog_data->binding_table.texture_start = 0xd0d0d0d0;
526 prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
527 }
528
529 if (info->num_images) {
530 prog_data->binding_table.image_start = next_binding_table_offset;
531 next_binding_table_offset += info->num_images;
532 } else {
533 prog_data->binding_table.image_start = 0xd0d0d0d0;
534 }
535
536 int num_ubos = info->num_ubos +
537 ((nir->num_uniforms || num_system_values) ? 1 : 0);
538
539 if (num_ubos) {
540 //assert(info->num_ubos <= BRW_MAX_UBO);
541 prog_data->binding_table.ubo_start = next_binding_table_offset;
542 next_binding_table_offset += num_ubos;
543 } else {
544 prog_data->binding_table.ubo_start = 0xd0d0d0d0;
545 }
546
547 if (info->num_ssbos || info->num_abos) {
548 prog_data->binding_table.ssbo_start = next_binding_table_offset;
549 // XXX: see iris_state "wasting 16 binding table slots for ABOs" comment
550 next_binding_table_offset += IRIS_MAX_ABOS + info->num_ssbos;
551 } else {
552 prog_data->binding_table.ssbo_start = 0xd0d0d0d0;
553 }
554
555 prog_data->binding_table.shader_time_start = 0xd0d0d0d0;
556
557 /* This may or may not be used depending on how the compile goes. */
558 prog_data->binding_table.pull_constants_start = next_binding_table_offset;
559 next_binding_table_offset++;
560
561 /* Plane 0 is just the regular texture section */
562 prog_data->binding_table.plane_start[0] = prog_data->binding_table.texture_start;
563
564 prog_data->binding_table.plane_start[1] = next_binding_table_offset;
565 next_binding_table_offset += info->num_textures;
566
567 prog_data->binding_table.plane_start[2] = next_binding_table_offset;
568 next_binding_table_offset += info->num_textures;
569
570 /* Set the binding table size */
571 prog_data->binding_table.size_bytes = next_binding_table_offset * 4;
572
573 return next_binding_table_offset;
574 }
575
576 /**
577 * Associate NIR uniform variables with the prog_data->param[] mechanism
578 * used by the backend. Also, decide which UBOs we'd like to push in an
579 * ideal situation (though the backend can reduce this).
580 */
581 static void
582 iris_setup_uniforms(const struct brw_compiler *compiler,
583 void *mem_ctx,
584 nir_shader *nir,
585 struct brw_stage_prog_data *prog_data,
586 enum brw_param_builtin **out_system_values,
587 unsigned *out_num_system_values)
588 {
589 /* We don't use params[], but fs_visitor::nir_setup_uniforms() asserts
590 * about it for compute shaders, so go ahead and make some fake ones
591 * which the backend will dead code eliminate.
592 */
593 prog_data->nr_params = nir->num_uniforms;
594 prog_data->param = rzalloc_array(mem_ctx, uint32_t, prog_data->nr_params);
595
596 /* The intel compiler assumes that num_uniforms is in bytes. For
597 * scalar that means 4 bytes per uniform slot.
598 *
599 * Ref: brw_nir_lower_uniforms, type_size_scalar_bytes.
600 */
601 nir->num_uniforms *= 4;
602
603 const unsigned IRIS_MAX_SYSTEM_VALUES = 32;
604 enum brw_param_builtin *system_values =
605 rzalloc_array(mem_ctx, enum brw_param_builtin, IRIS_MAX_SYSTEM_VALUES);
606 unsigned num_system_values = 0;
607
608 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
609
610 nir_builder b;
611 nir_builder_init(&b, impl);
612
613 b.cursor = nir_before_block(nir_start_block(impl));
614 nir_ssa_def *temp_ubo_name = nir_ssa_undef(&b, 1, 32);
615
616 /* Turn system value intrinsics into uniforms */
617 nir_foreach_block(block, impl) {
618 nir_foreach_instr_safe(instr, block) {
619 if (instr->type != nir_instr_type_intrinsic)
620 continue;
621
622 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
623
624 unsigned idx = num_system_values;
625
626 switch (intrin->intrinsic) {
627 case nir_intrinsic_load_user_clip_plane: {
628 unsigned ucp = nir_intrinsic_ucp_id(intrin);
629 for (int i = 0; i < 4; i++) {
630 system_values[num_system_values++] =
631 BRW_PARAM_BUILTIN_CLIP_PLANE(ucp, i);
632 }
633 break;
634 }
635 default:
636 continue;
637 }
638
639 b.cursor = nir_before_instr(instr);
640
641 unsigned comps = nir_intrinsic_dest_components(intrin);
642 nir_ssa_def *offset = nir_imm_int(&b, idx * sizeof(uint32_t));
643
644 nir_intrinsic_instr *load =
645 nir_intrinsic_instr_create(nir, nir_intrinsic_load_ubo);
646 load->num_components = comps;
647 load->src[0] = nir_src_for_ssa(temp_ubo_name);
648 load->src[1] = nir_src_for_ssa(offset);
649 nir_ssa_dest_init(&load->instr, &load->dest, comps, 32, NULL);
650 nir_builder_instr_insert(&b, &load->instr);
651 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
652 nir_src_for_ssa(&load->dest.ssa));
653 nir_instr_remove(instr);
654 }
655 }
656
657 nir_validate_shader(nir, "before remapping");
658
659 /* Place the new params at the front of constant buffer 0. */
660 if (num_system_values > 0) {
661 nir->num_uniforms += num_system_values * sizeof(uint32_t);
662
663 system_values = reralloc(mem_ctx, system_values, enum brw_param_builtin,
664 num_system_values);
665
666 nir_foreach_block(block, impl) {
667 nir_foreach_instr_safe(instr, block) {
668 if (instr->type != nir_instr_type_intrinsic)
669 continue;
670
671 nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
672
673 if (load->intrinsic != nir_intrinsic_load_ubo)
674 continue;
675
676 b.cursor = nir_before_instr(instr);
677
678 assert(load->src[0].is_ssa);
679
680 if (load->src[0].ssa == temp_ubo_name) {
681 nir_instr_rewrite_src(instr, &load->src[0],
682 nir_src_for_ssa(nir_imm_int(&b, 0)));
683 } else if (nir_src_as_uint(load->src[0]) == 0) {
684 nir_ssa_def *offset =
685 nir_iadd(&b, load->src[1].ssa,
686 nir_imm_int(&b, 4 * num_system_values));
687 nir_instr_rewrite_src(instr, &load->src[1],
688 nir_src_for_ssa(offset));
689 }
690 }
691 }
692
693 /* We need to fold the new iadds for brw_nir_analyze_ubo_ranges */
694 nir_opt_constant_folding(nir);
695 } else {
696 ralloc_free(system_values);
697 system_values = NULL;
698 }
699
700 nir_validate_shader(nir, "after remap");
701
702 // XXX: vs clip planes?
703 if (nir->info.stage != MESA_SHADER_COMPUTE)
704 brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
705
706 *out_system_values = system_values;
707 *out_num_system_values = num_system_values;
708 }
709
710 /**
711 * Compile a vertex shader, and upload the assembly.
712 */
713 static struct iris_compiled_shader *
714 iris_compile_vs(struct iris_context *ice,
715 struct iris_uncompiled_shader *ish,
716 const struct brw_vs_prog_key *key)
717 {
718 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
719 const struct brw_compiler *compiler = screen->compiler;
720 const struct gen_device_info *devinfo = &screen->devinfo;
721 void *mem_ctx = ralloc_context(NULL);
722 struct brw_vs_prog_data *vs_prog_data =
723 rzalloc(mem_ctx, struct brw_vs_prog_data);
724 struct brw_vue_prog_data *vue_prog_data = &vs_prog_data->base;
725 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
726 enum brw_param_builtin *system_values;
727 unsigned num_system_values;
728
729 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
730
731 if (key->nr_userclip_plane_consts) {
732 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
733 nir_lower_clip_vs(nir, (1 << key->nr_userclip_plane_consts) - 1, true);
734 nir_lower_io_to_temporaries(nir, impl, true, false);
735 nir_lower_global_vars_to_local(nir);
736 nir_lower_vars_to_ssa(nir);
737 nir_shader_gather_info(nir, impl);
738 }
739
740 // XXX: alt mode
741
742 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
743 &num_system_values);
744
745 assign_common_binding_table_offsets(devinfo, nir, prog_data, 0,
746 num_system_values);
747
748 brw_compute_vue_map(devinfo,
749 &vue_prog_data->vue_map, nir->info.outputs_written,
750 nir->info.separate_shader);
751
752 /* Don't tell the backend about our clip plane constants, we've already
753 * lowered them in NIR and we don't want it doing it again.
754 */
755 struct brw_vs_prog_key key_no_ucp = *key;
756 key_no_ucp.nr_userclip_plane_consts = 0;
757
758 char *error_str = NULL;
759 const unsigned *program =
760 brw_compile_vs(compiler, &ice->dbg, mem_ctx, &key_no_ucp, vs_prog_data,
761 nir, -1, &error_str);
762 if (program == NULL) {
763 dbg_printf("Failed to compile vertex shader: %s\n", error_str);
764 ralloc_free(mem_ctx);
765 return false;
766 }
767
768 uint32_t *so_decls =
769 ice->vtbl.create_so_decl_list(&ish->stream_output,
770 &vue_prog_data->vue_map);
771
772 struct iris_compiled_shader *shader =
773 iris_upload_shader(ice, IRIS_CACHE_VS, sizeof(*key), key, program,
774 prog_data, so_decls, system_values, num_system_values);
775
776 if (ish->compiled_once) {
777 perf_debug(&ice->dbg, "Recompiling vertex shader\n");
778 } else {
779 ish->compiled_once = true;
780 }
781
782 ralloc_free(mem_ctx);
783 return shader;
784 }
785
786 /**
787 * Update the current vertex shader variant.
788 *
789 * Fill out the key, look in the cache, compile and bind if needed.
790 */
791 static void
792 iris_update_compiled_vs(struct iris_context *ice)
793 {
794 struct iris_uncompiled_shader *ish =
795 ice->shaders.uncompiled[MESA_SHADER_VERTEX];
796
797 struct brw_vs_prog_key key = { KEY_INIT };
798 ice->vtbl.populate_vs_key(ice, &ish->nir->info, &key);
799
800 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_VS];
801 struct iris_compiled_shader *shader =
802 iris_find_cached_shader(ice, IRIS_CACHE_VS, sizeof(key), &key);
803
804 if (!shader)
805 shader = iris_compile_vs(ice, ish, &key);
806
807 if (old != shader) {
808 ice->shaders.prog[IRIS_CACHE_VS] = shader;
809 ice->state.dirty |= IRIS_DIRTY_VS |
810 IRIS_DIRTY_BINDINGS_VS |
811 IRIS_DIRTY_CONSTANTS_VS |
812 IRIS_DIRTY_VF_SGVS;
813 }
814 }
815
816 /**
817 * Get the shader_info for a given stage, or NULL if the stage is disabled.
818 */
819 const struct shader_info *
820 iris_get_shader_info(const struct iris_context *ice, gl_shader_stage stage)
821 {
822 const struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[stage];
823
824 if (!ish)
825 return NULL;
826
827 const nir_shader *nir = ish->nir;
828 return &nir->info;
829 }
830
831 // XXX: this function is gross
832 unsigned
833 iris_get_shader_num_ubos(const struct iris_context *ice, gl_shader_stage stage)
834 {
835 const struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[stage];
836 const struct iris_compiled_shader *shader = ice->shaders.prog[stage];
837
838 if (ish) {
839 const nir_shader *nir = ish->nir;
840 /* see assign_common_binding_table_offsets */
841 return nir->info.num_ubos +
842 ((nir->num_uniforms || shader->num_system_values) ? 1 : 0);
843 }
844 return 0;
845 }
846
847 /**
848 * Get the union of TCS output and TES input slots.
849 *
850 * TCS and TES need to agree on a common URB entry layout. In particular,
851 * the data for all patch vertices is stored in a single URB entry (unlike
852 * GS which has one entry per input vertex). This means that per-vertex
853 * array indexing needs a stride.
854 *
855 * SSO requires locations to match, but doesn't require the number of
856 * outputs/inputs to match (in fact, the TCS often has extra outputs).
857 * So, we need to take the extra step of unifying these on the fly.
858 */
859 static void
860 get_unified_tess_slots(const struct iris_context *ice,
861 uint64_t *per_vertex_slots,
862 uint32_t *per_patch_slots)
863 {
864 const struct shader_info *tcs =
865 iris_get_shader_info(ice, MESA_SHADER_TESS_CTRL);
866 const struct shader_info *tes =
867 iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
868
869 *per_vertex_slots = tes->inputs_read;
870 *per_patch_slots = tes->patch_inputs_read;
871
872 if (tcs) {
873 *per_vertex_slots |= tcs->outputs_written;
874 *per_patch_slots |= tcs->patch_outputs_written;
875 }
876 }
877
878 /**
879 * Compile a tessellation control shader, and upload the assembly.
880 */
881 static struct iris_compiled_shader *
882 iris_compile_tcs(struct iris_context *ice,
883 struct iris_uncompiled_shader *ish,
884 const struct brw_tcs_prog_key *key)
885 {
886 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
887 const struct brw_compiler *compiler = screen->compiler;
888 const struct nir_shader_compiler_options *options =
889 compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].NirOptions;
890 const struct gen_device_info *devinfo = &screen->devinfo;
891 void *mem_ctx = ralloc_context(NULL);
892 struct brw_tcs_prog_data *tcs_prog_data =
893 rzalloc(mem_ctx, struct brw_tcs_prog_data);
894 struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base;
895 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
896 enum brw_param_builtin *system_values = NULL;
897 unsigned num_system_values = 0;
898
899 nir_shader *nir;
900
901 if (ish) {
902 nir = nir_shader_clone(mem_ctx, ish->nir);
903
904 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
905 &num_system_values);
906 assign_common_binding_table_offsets(devinfo, nir, prog_data, 0,
907 num_system_values);
908 } else {
909 nir = brw_nir_create_passthrough_tcs(mem_ctx, compiler, options, key);
910
911 /* Reserve space for passing the default tess levels as constants. */
912 prog_data->param = rzalloc_array(mem_ctx, uint32_t, 8);
913 prog_data->nr_params = 8;
914 prog_data->ubo_ranges[0].length = 1;
915 }
916
917 char *error_str = NULL;
918 const unsigned *program =
919 brw_compile_tcs(compiler, &ice->dbg, mem_ctx, key, tcs_prog_data, nir,
920 -1, &error_str);
921 if (program == NULL) {
922 dbg_printf("Failed to compile evaluation shader: %s\n", error_str);
923 ralloc_free(mem_ctx);
924 return false;
925 }
926
927 struct iris_compiled_shader *shader =
928 iris_upload_shader(ice, IRIS_CACHE_TCS, sizeof(*key), key, program,
929 prog_data, NULL, system_values, num_system_values);
930
931 if (ish) {
932 if (ish->compiled_once) {
933 perf_debug(&ice->dbg, "Recompiling tessellation control shader\n");
934 } else {
935 ish->compiled_once = true;
936 }
937 }
938
939 ralloc_free(mem_ctx);
940 return shader;
941 }
942
943 /**
944 * Update the current tessellation control shader variant.
945 *
946 * Fill out the key, look in the cache, compile and bind if needed.
947 */
948 static void
949 iris_update_compiled_tcs(struct iris_context *ice)
950 {
951 struct iris_uncompiled_shader *tcs =
952 ice->shaders.uncompiled[MESA_SHADER_TESS_CTRL];
953
954 const struct shader_info *tes_info =
955 iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
956 struct brw_tcs_prog_key key = {
957 ALL_SAMPLERS_XYZW,
958 .program_string_id = tcs ? tcs->program_id : 0,
959 .tes_primitive_mode = tes_info->tess.primitive_mode,
960 .input_vertices = ice->state.vertices_per_patch,
961 };
962 get_unified_tess_slots(ice, &key.outputs_written,
963 &key.patch_outputs_written);
964 ice->vtbl.populate_tcs_key(ice, &key);
965
966 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TCS];
967 struct iris_compiled_shader *shader =
968 iris_find_cached_shader(ice, IRIS_CACHE_TCS, sizeof(key), &key);
969
970 if (!shader)
971 shader = iris_compile_tcs(ice, tcs, &key);
972
973 if (old != shader) {
974 ice->shaders.prog[IRIS_CACHE_TCS] = shader;
975 ice->state.dirty |= IRIS_DIRTY_TCS |
976 IRIS_DIRTY_BINDINGS_TCS |
977 IRIS_DIRTY_CONSTANTS_TCS;
978 }
979 }
980
981 /**
982 * Compile a tessellation evaluation shader, and upload the assembly.
983 */
984 static struct iris_compiled_shader *
985 iris_compile_tes(struct iris_context *ice,
986 struct iris_uncompiled_shader *ish,
987 const struct brw_tes_prog_key *key)
988 {
989 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
990 const struct brw_compiler *compiler = screen->compiler;
991 const struct gen_device_info *devinfo = &screen->devinfo;
992 void *mem_ctx = ralloc_context(NULL);
993 struct brw_tes_prog_data *tes_prog_data =
994 rzalloc(mem_ctx, struct brw_tes_prog_data);
995 struct brw_vue_prog_data *vue_prog_data = &tes_prog_data->base;
996 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
997 enum brw_param_builtin *system_values;
998 unsigned num_system_values;
999
1000 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1001
1002 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
1003 &num_system_values);
1004
1005 assign_common_binding_table_offsets(devinfo, nir, prog_data, 0,
1006 num_system_values);
1007
1008 struct brw_vue_map input_vue_map;
1009 brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,
1010 key->patch_inputs_read);
1011
1012 char *error_str = NULL;
1013 const unsigned *program =
1014 brw_compile_tes(compiler, &ice->dbg, mem_ctx, key, &input_vue_map,
1015 tes_prog_data, nir, NULL, -1, &error_str);
1016 if (program == NULL) {
1017 dbg_printf("Failed to compile evaluation shader: %s\n", error_str);
1018 ralloc_free(mem_ctx);
1019 return false;
1020 }
1021
1022 uint32_t *so_decls =
1023 ice->vtbl.create_so_decl_list(&ish->stream_output,
1024 &vue_prog_data->vue_map);
1025
1026
1027 struct iris_compiled_shader *shader =
1028 iris_upload_shader(ice, IRIS_CACHE_TES, sizeof(*key), key, program,
1029 prog_data, so_decls, system_values, num_system_values);
1030
1031 if (ish->compiled_once) {
1032 perf_debug(&ice->dbg, "Recompiling tessellation evaluation shader\n");
1033 } else {
1034 ish->compiled_once = true;
1035 }
1036
1037 ralloc_free(mem_ctx);
1038 return shader;
1039 }
1040
1041 /**
1042 * Update the current tessellation evaluation shader variant.
1043 *
1044 * Fill out the key, look in the cache, compile and bind if needed.
1045 */
1046 static void
1047 iris_update_compiled_tes(struct iris_context *ice)
1048 {
1049 struct iris_uncompiled_shader *ish =
1050 ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
1051
1052 struct brw_tes_prog_key key = { KEY_INIT };
1053 get_unified_tess_slots(ice, &key.inputs_read, &key.patch_inputs_read);
1054 ice->vtbl.populate_tes_key(ice, &key);
1055
1056 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TES];
1057 struct iris_compiled_shader *shader =
1058 iris_find_cached_shader(ice, IRIS_CACHE_TES, sizeof(key), &key);
1059
1060 if (!shader)
1061 shader = iris_compile_tes(ice, ish, &key);
1062
1063 if (old != shader) {
1064 ice->shaders.prog[IRIS_CACHE_TES] = shader;
1065 ice->state.dirty |= IRIS_DIRTY_TES |
1066 IRIS_DIRTY_BINDINGS_TES |
1067 IRIS_DIRTY_CONSTANTS_TES;
1068 }
1069 }
1070
1071 /**
1072 * Compile a geometry shader, and upload the assembly.
1073 */
1074 static struct iris_compiled_shader *
1075 iris_compile_gs(struct iris_context *ice,
1076 struct iris_uncompiled_shader *ish,
1077 const struct brw_gs_prog_key *key)
1078 {
1079 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1080 const struct brw_compiler *compiler = screen->compiler;
1081 const struct gen_device_info *devinfo = &screen->devinfo;
1082 void *mem_ctx = ralloc_context(NULL);
1083 struct brw_gs_prog_data *gs_prog_data =
1084 rzalloc(mem_ctx, struct brw_gs_prog_data);
1085 struct brw_vue_prog_data *vue_prog_data = &gs_prog_data->base;
1086 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1087 enum brw_param_builtin *system_values;
1088 unsigned num_system_values;
1089
1090 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1091
1092 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
1093 &num_system_values);
1094
1095 assign_common_binding_table_offsets(devinfo, nir, prog_data, 0,
1096 num_system_values);
1097
1098 brw_compute_vue_map(devinfo,
1099 &vue_prog_data->vue_map, nir->info.outputs_written,
1100 nir->info.separate_shader);
1101
1102 char *error_str = NULL;
1103 const unsigned *program =
1104 brw_compile_gs(compiler, &ice->dbg, mem_ctx, key, gs_prog_data, nir,
1105 NULL, -1, &error_str);
1106 if (program == NULL) {
1107 dbg_printf("Failed to compile geometry shader: %s\n", error_str);
1108 ralloc_free(mem_ctx);
1109 return false;
1110 }
1111
1112 uint32_t *so_decls =
1113 ice->vtbl.create_so_decl_list(&ish->stream_output,
1114 &vue_prog_data->vue_map);
1115
1116 struct iris_compiled_shader *shader =
1117 iris_upload_shader(ice, IRIS_CACHE_GS, sizeof(*key), key, program,
1118 prog_data, so_decls, system_values, num_system_values);
1119
1120 if (ish->compiled_once) {
1121 perf_debug(&ice->dbg, "Recompiling geometry shader\n");
1122 } else {
1123 ish->compiled_once = true;
1124 }
1125
1126 ralloc_free(mem_ctx);
1127 return shader;
1128 }
1129
1130 /**
1131 * Update the current geometry shader variant.
1132 *
1133 * Fill out the key, look in the cache, compile and bind if needed.
1134 */
1135 static void
1136 iris_update_compiled_gs(struct iris_context *ice)
1137 {
1138 struct iris_uncompiled_shader *ish =
1139 ice->shaders.uncompiled[MESA_SHADER_GEOMETRY];
1140 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_GS];
1141 struct iris_compiled_shader *shader = NULL;
1142
1143 if (ish) {
1144 struct brw_gs_prog_key key = { KEY_INIT };
1145 ice->vtbl.populate_gs_key(ice, &key);
1146
1147 shader =
1148 iris_find_cached_shader(ice, IRIS_CACHE_GS, sizeof(key), &key);
1149
1150 if (!shader)
1151 shader = iris_compile_gs(ice, ish, &key);
1152 }
1153
1154 if (old != shader) {
1155 ice->shaders.prog[IRIS_CACHE_GS] = shader;
1156 ice->state.dirty |= IRIS_DIRTY_GS |
1157 IRIS_DIRTY_BINDINGS_GS |
1158 IRIS_DIRTY_CONSTANTS_GS;
1159 }
1160 }
1161
1162 /**
1163 * Compile a fragment (pixel) shader, and upload the assembly.
1164 */
1165 static struct iris_compiled_shader *
1166 iris_compile_fs(struct iris_context *ice,
1167 struct iris_uncompiled_shader *ish,
1168 const struct brw_wm_prog_key *key,
1169 struct brw_vue_map *vue_map)
1170 {
1171 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1172 const struct brw_compiler *compiler = screen->compiler;
1173 const struct gen_device_info *devinfo = &screen->devinfo;
1174 void *mem_ctx = ralloc_context(NULL);
1175 struct brw_wm_prog_data *fs_prog_data =
1176 rzalloc(mem_ctx, struct brw_wm_prog_data);
1177 struct brw_stage_prog_data *prog_data = &fs_prog_data->base;
1178 enum brw_param_builtin *system_values;
1179 unsigned num_system_values;
1180
1181 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1182
1183 // XXX: alt mode
1184
1185 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
1186 &num_system_values);
1187
1188 assign_common_binding_table_offsets(devinfo, nir, prog_data,
1189 MAX2(key->nr_color_regions, 1),
1190 num_system_values);
1191 char *error_str = NULL;
1192 const unsigned *program =
1193 brw_compile_fs(compiler, &ice->dbg, mem_ctx, key, fs_prog_data,
1194 nir, NULL, -1, -1, -1, true, false, vue_map, &error_str);
1195 if (program == NULL) {
1196 dbg_printf("Failed to compile fragment shader: %s\n", error_str);
1197 ralloc_free(mem_ctx);
1198 return false;
1199 }
1200
1201 struct iris_compiled_shader *shader =
1202 iris_upload_shader(ice, IRIS_CACHE_FS, sizeof(*key), key, program,
1203 prog_data, NULL, system_values, num_system_values);
1204
1205 if (ish->compiled_once) {
1206 perf_debug(&ice->dbg, "Recompiling fragment shader\n");
1207 } else {
1208 ish->compiled_once = true;
1209 }
1210
1211 ralloc_free(mem_ctx);
1212 return shader;
1213 }
1214
1215 /**
1216 * Update the current fragment shader variant.
1217 *
1218 * Fill out the key, look in the cache, compile and bind if needed.
1219 */
1220 static void
1221 iris_update_compiled_fs(struct iris_context *ice)
1222 {
1223 struct iris_uncompiled_shader *ish =
1224 ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
1225 struct brw_wm_prog_key key = { KEY_INIT };
1226 ice->vtbl.populate_fs_key(ice, &key);
1227
1228 if (ish->nos & IRIS_NOS_LAST_VUE_MAP)
1229 key.input_slots_valid = ice->shaders.last_vue_map->slots_valid;
1230
1231 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_FS];
1232 struct iris_compiled_shader *shader =
1233 iris_find_cached_shader(ice, IRIS_CACHE_FS, sizeof(key), &key);
1234
1235 if (!shader)
1236 shader = iris_compile_fs(ice, ish, &key, ice->shaders.last_vue_map);
1237
1238 if (old != shader) {
1239 // XXX: only need to flag CLIP if barycentric has NONPERSPECTIVE
1240 // toggles. might be able to avoid flagging SBE too.
1241 ice->shaders.prog[IRIS_CACHE_FS] = shader;
1242 ice->state.dirty |= IRIS_DIRTY_FS |
1243 IRIS_DIRTY_BINDINGS_FS |
1244 IRIS_DIRTY_CONSTANTS_FS |
1245 IRIS_DIRTY_WM |
1246 IRIS_DIRTY_CLIP |
1247 IRIS_DIRTY_SBE;
1248 }
1249 }
1250
1251 /**
1252 * Get the compiled shader for the last enabled geometry stage.
1253 *
1254 * This stage is the one which will feed stream output and the rasterizer.
1255 */
1256 static struct iris_compiled_shader *
1257 last_vue_shader(struct iris_context *ice)
1258 {
1259 if (ice->shaders.prog[MESA_SHADER_GEOMETRY])
1260 return ice->shaders.prog[MESA_SHADER_GEOMETRY];
1261
1262 if (ice->shaders.prog[MESA_SHADER_TESS_EVAL])
1263 return ice->shaders.prog[MESA_SHADER_TESS_EVAL];
1264
1265 return ice->shaders.prog[MESA_SHADER_VERTEX];
1266 }
1267
1268 /**
1269 * Update the last enabled stage's VUE map.
1270 *
1271 * When the shader feeding the rasterizer's output interface changes, we
1272 * need to re-emit various packets.
1273 */
1274 static void
1275 update_last_vue_map(struct iris_context *ice,
1276 struct brw_stage_prog_data *prog_data)
1277 {
1278 struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
1279 struct brw_vue_map *vue_map = &vue_prog_data->vue_map;
1280 struct brw_vue_map *old_map = ice->shaders.last_vue_map;
1281 const uint64_t changed_slots =
1282 (old_map ? old_map->slots_valid : 0ull) ^ vue_map->slots_valid;
1283
1284 if (changed_slots & VARYING_BIT_VIEWPORT) {
1285 // XXX: could use ctx->Const.MaxViewports for old API efficiency
1286 ice->state.num_viewports =
1287 (vue_map->slots_valid & VARYING_BIT_VIEWPORT) ? IRIS_MAX_VIEWPORTS : 1;
1288 ice->state.dirty |= IRIS_DIRTY_CLIP |
1289 IRIS_DIRTY_SF_CL_VIEWPORT |
1290 IRIS_DIRTY_CC_VIEWPORT |
1291 IRIS_DIRTY_SCISSOR_RECT |
1292 IRIS_DIRTY_UNCOMPILED_FS |
1293 ice->state.dirty_for_nos[IRIS_NOS_LAST_VUE_MAP];
1294 // XXX: CC_VIEWPORT?
1295 }
1296
1297 if (changed_slots || (old_map && old_map->separate != vue_map->separate)) {
1298 ice->state.dirty |= IRIS_DIRTY_SBE;
1299 }
1300
1301 ice->shaders.last_vue_map = &vue_prog_data->vue_map;
1302 }
1303
1304 /**
1305 * Get the prog_data for a given stage, or NULL if the stage is disabled.
1306 */
1307 static struct brw_vue_prog_data *
1308 get_vue_prog_data(struct iris_context *ice, gl_shader_stage stage)
1309 {
1310 if (!ice->shaders.prog[stage])
1311 return NULL;
1312
1313 return (void *) ice->shaders.prog[stage]->prog_data;
1314 }
1315
1316 // XXX: iris_compiled_shaders are space-leaking :(
1317 // XXX: do remember to unbind them if deleting them.
1318
1319 /**
1320 * Update the current shader variants for the given state.
1321 *
1322 * This should be called on every draw call to ensure that the correct
1323 * shaders are bound. It will also flag any dirty state triggered by
1324 * swapping out those shaders.
1325 */
1326 void
1327 iris_update_compiled_shaders(struct iris_context *ice)
1328 {
1329 const uint64_t dirty = ice->state.dirty;
1330
1331 struct brw_vue_prog_data *old_prog_datas[4];
1332 if (!(dirty & IRIS_DIRTY_URB)) {
1333 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++)
1334 old_prog_datas[i] = get_vue_prog_data(ice, i);
1335 }
1336
1337 if (dirty & (IRIS_DIRTY_UNCOMPILED_TCS | IRIS_DIRTY_UNCOMPILED_TES)) {
1338 struct iris_uncompiled_shader *tes =
1339 ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
1340 if (tes) {
1341 iris_update_compiled_tcs(ice);
1342 iris_update_compiled_tes(ice);
1343 } else {
1344 ice->shaders.prog[IRIS_CACHE_TCS] = NULL;
1345 ice->shaders.prog[IRIS_CACHE_TES] = NULL;
1346 ice->state.dirty |=
1347 IRIS_DIRTY_TCS | IRIS_DIRTY_TES |
1348 IRIS_DIRTY_BINDINGS_TCS | IRIS_DIRTY_BINDINGS_TES |
1349 IRIS_DIRTY_CONSTANTS_TCS | IRIS_DIRTY_CONSTANTS_TES;
1350 }
1351 }
1352
1353 if (dirty & IRIS_DIRTY_UNCOMPILED_VS)
1354 iris_update_compiled_vs(ice);
1355 if (dirty & IRIS_DIRTY_UNCOMPILED_GS)
1356 iris_update_compiled_gs(ice);
1357
1358 struct iris_compiled_shader *shader = last_vue_shader(ice);
1359 update_last_vue_map(ice, shader->prog_data);
1360 if (ice->state.streamout != shader->streamout) {
1361 ice->state.streamout = shader->streamout;
1362 ice->state.dirty |= IRIS_DIRTY_SO_DECL_LIST | IRIS_DIRTY_STREAMOUT;
1363 }
1364
1365 if (dirty & IRIS_DIRTY_UNCOMPILED_FS)
1366 iris_update_compiled_fs(ice);
1367 // ...
1368
1369 /* Changing shader interfaces may require a URB configuration. */
1370 if (!(dirty & IRIS_DIRTY_URB)) {
1371 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
1372 struct brw_vue_prog_data *old = old_prog_datas[i];
1373 struct brw_vue_prog_data *new = get_vue_prog_data(ice, i);
1374 if (!!old != !!new ||
1375 (new && new->urb_entry_size != old->urb_entry_size)) {
1376 ice->state.dirty |= IRIS_DIRTY_URB;
1377 break;
1378 }
1379 }
1380 }
1381 }
1382
1383 static struct iris_compiled_shader *
1384 iris_compile_cs(struct iris_context *ice,
1385 struct iris_uncompiled_shader *ish,
1386 const struct brw_cs_prog_key *key)
1387 {
1388 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1389 const struct brw_compiler *compiler = screen->compiler;
1390 const struct gen_device_info *devinfo = &screen->devinfo;
1391 void *mem_ctx = ralloc_context(NULL);
1392 struct brw_cs_prog_data *cs_prog_data =
1393 rzalloc(mem_ctx, struct brw_cs_prog_data);
1394 struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
1395 enum brw_param_builtin *system_values;
1396 unsigned num_system_values;
1397
1398 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1399
1400 cs_prog_data->binding_table.work_groups_start = 0;
1401
1402 prog_data->total_shared = nir->info.cs.shared_size;
1403
1404 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
1405 &num_system_values);
1406
1407 assign_common_binding_table_offsets(devinfo, nir, prog_data, 1,
1408 num_system_values);
1409
1410 char *error_str = NULL;
1411 const unsigned *program =
1412 brw_compile_cs(compiler, &ice->dbg, mem_ctx, key, cs_prog_data,
1413 nir, -1, &error_str);
1414 if (program == NULL) {
1415 dbg_printf("Failed to compile compute shader: %s\n", error_str);
1416 ralloc_free(mem_ctx);
1417 return false;
1418 }
1419
1420 struct iris_compiled_shader *shader =
1421 iris_upload_shader(ice, IRIS_CACHE_CS, sizeof(*key), key, program,
1422 prog_data, NULL, system_values, num_system_values);
1423
1424 if (ish->compiled_once) {
1425 perf_debug(&ice->dbg, "Recompiling compute shader\n");
1426 } else {
1427 ish->compiled_once = true;
1428 }
1429
1430 ralloc_free(mem_ctx);
1431 return shader;
1432 }
1433
1434 void
1435 iris_update_compiled_compute_shader(struct iris_context *ice)
1436 {
1437 struct iris_uncompiled_shader *ish =
1438 ice->shaders.uncompiled[MESA_SHADER_COMPUTE];
1439
1440 struct brw_cs_prog_key key = { KEY_INIT };
1441 ice->vtbl.populate_cs_key(ice, &key);
1442
1443 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_CS];
1444 struct iris_compiled_shader *shader =
1445 iris_find_cached_shader(ice, IRIS_CACHE_CS, sizeof(key), &key);
1446
1447 if (!shader)
1448 shader = iris_compile_cs(ice, ish, &key);
1449
1450 if (old != shader) {
1451 ice->shaders.prog[IRIS_CACHE_CS] = shader;
1452 ice->state.dirty |= IRIS_DIRTY_CS |
1453 IRIS_DIRTY_BINDINGS_CS |
1454 IRIS_DIRTY_CONSTANTS_CS;
1455 }
1456 }
1457
1458 void
1459 iris_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data,
1460 uint32_t *dst)
1461 {
1462 struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
1463 assert(cs_prog_data->push.total.size > 0);
1464 assert(cs_prog_data->push.cross_thread.size == 0);
1465 assert(cs_prog_data->push.per_thread.dwords == 1);
1466 assert(prog_data->param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID);
1467 for (unsigned t = 0; t < cs_prog_data->threads; t++)
1468 dst[8 * t] = t;
1469 }
1470
1471 /**
1472 * Allocate scratch BOs as needed for the given per-thread size and stage.
1473 *
1474 * Returns the 32-bit "Scratch Space Base Pointer" value.
1475 */
1476 uint32_t
1477 iris_get_scratch_space(struct iris_context *ice,
1478 unsigned per_thread_scratch,
1479 gl_shader_stage stage)
1480 {
1481 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1482 struct iris_bufmgr *bufmgr = screen->bufmgr;
1483 const struct gen_device_info *devinfo = &screen->devinfo;
1484
1485 unsigned encoded_size = ffs(per_thread_scratch) - 11;
1486 assert(encoded_size < (1 << 16));
1487
1488 struct iris_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage];
1489
1490 /* The documentation for 3DSTATE_PS "Scratch Space Base Pointer" says:
1491 *
1492 * "Scratch Space per slice is computed based on 4 sub-slices. SW must
1493 * allocate scratch space enough so that each slice has 4 slices
1494 * allowed."
1495 *
1496 * According to the other driver team, this applies to compute shaders
1497 * as well. This is not currently documented at all.
1498 */
1499 unsigned subslice_total = 4 * devinfo->num_slices;
1500 assert(subslice_total >= screen->subslice_total);
1501
1502 if (!*bop) {
1503 unsigned scratch_ids_per_subslice = devinfo->max_cs_threads;
1504 uint32_t max_threads[] = {
1505 [MESA_SHADER_VERTEX] = devinfo->max_vs_threads,
1506 [MESA_SHADER_TESS_CTRL] = devinfo->max_tcs_threads,
1507 [MESA_SHADER_TESS_EVAL] = devinfo->max_tes_threads,
1508 [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads,
1509 [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads,
1510 [MESA_SHADER_COMPUTE] = scratch_ids_per_subslice * subslice_total,
1511 };
1512
1513 uint32_t size = per_thread_scratch * max_threads[stage];
1514
1515 *bop = iris_bo_alloc(bufmgr, "scratch", size, IRIS_MEMZONE_SHADER);
1516 }
1517
1518 return (*bop)->gtt_offset;
1519 }
1520
1521 void
1522 iris_init_program_functions(struct pipe_context *ctx)
1523 {
1524 ctx->create_vs_state = iris_create_vs_state;
1525 ctx->create_tcs_state = iris_create_tcs_state;
1526 ctx->create_tes_state = iris_create_tes_state;
1527 ctx->create_gs_state = iris_create_gs_state;
1528 ctx->create_fs_state = iris_create_fs_state;
1529 ctx->create_compute_state = iris_create_compute_state;
1530
1531 ctx->delete_vs_state = iris_delete_shader_state;
1532 ctx->delete_tcs_state = iris_delete_shader_state;
1533 ctx->delete_tes_state = iris_delete_shader_state;
1534 ctx->delete_gs_state = iris_delete_shader_state;
1535 ctx->delete_fs_state = iris_delete_shader_state;
1536 ctx->delete_compute_state = iris_delete_shader_state;
1537
1538 ctx->bind_vs_state = iris_bind_vs_state;
1539 ctx->bind_tcs_state = iris_bind_tcs_state;
1540 ctx->bind_tes_state = iris_bind_tes_state;
1541 ctx->bind_gs_state = iris_bind_gs_state;
1542 ctx->bind_fs_state = iris_bind_fs_state;
1543 ctx->bind_compute_state = iris_bind_cs_state;
1544 }