iris: Use wrappers for create_xs_state rather than a switch statement
[mesa.git] / src / gallium / drivers / iris / iris_program.c
1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 /**
24 * @file iris_program.c
25 *
26 * This file contains the driver interface for compiling shaders.
27 *
28 * See iris_program_cache.c for the in-memory program cache where the
29 * compiled shaders are stored.
30 */
31
32 #include <stdio.h>
33 #include <errno.h>
34 #include "pipe/p_defines.h"
35 #include "pipe/p_state.h"
36 #include "pipe/p_context.h"
37 #include "pipe/p_screen.h"
38 #include "util/u_atomic.h"
39 #include "compiler/nir/nir.h"
40 #include "compiler/nir/nir_builder.h"
41 #include "intel/compiler/brw_compiler.h"
42 #include "intel/compiler/brw_nir.h"
43 #include "iris_context.h"
44
45 static unsigned
46 get_new_program_id(struct iris_screen *screen)
47 {
48 return p_atomic_inc_return(&screen->program_id);
49 }
50
51 /**
52 * An uncompiled, API-facing shader. This is the Gallium CSO for shaders.
53 * It primarily contains the NIR for the shader.
54 *
55 * Each API-facing shader can be compiled into multiple shader variants,
56 * based on non-orthogonal state dependencies, recorded in the shader key.
57 *
58 * See iris_compiled_shader, which represents a compiled shader variant.
59 */
60 struct iris_uncompiled_shader {
61 nir_shader *nir;
62
63 struct pipe_stream_output_info stream_output;
64
65 unsigned program_id;
66
67 /** Bitfield of (1 << IRIS_NOS_*) flags. */
68 unsigned nos;
69 };
70
71 static nir_ssa_def *
72 get_aoa_deref_offset(nir_builder *b,
73 nir_deref_instr *deref,
74 unsigned elem_size)
75 {
76 unsigned array_size = elem_size;
77 nir_ssa_def *offset = nir_imm_int(b, 0);
78
79 while (deref->deref_type != nir_deref_type_var) {
80 assert(deref->deref_type == nir_deref_type_array);
81
82 /* This level's element size is the previous level's array size */
83 nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
84 assert(deref->arr.index.ssa);
85 offset = nir_iadd(b, offset,
86 nir_imul(b, index, nir_imm_int(b, array_size)));
87
88 deref = nir_deref_instr_parent(deref);
89 assert(glsl_type_is_array(deref->type));
90 array_size *= glsl_get_length(deref->type);
91 }
92
93 /* Accessing an invalid surface index with the dataport can result in a
94 * hang. According to the spec "if the index used to select an individual
95 * element is negative or greater than or equal to the size of the array,
96 * the results of the operation are undefined but may not lead to
97 * termination" -- which is one of the possible outcomes of the hang.
98 * Clamp the index to prevent access outside of the array bounds.
99 */
100 return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));
101 }
102
103 static void
104 iris_lower_storage_image_derefs(nir_shader *nir)
105 {
106 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
107
108 nir_builder b;
109 nir_builder_init(&b, impl);
110
111 nir_foreach_block(block, impl) {
112 nir_foreach_instr_safe(instr, block) {
113 if (instr->type != nir_instr_type_intrinsic)
114 continue;
115
116 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
117 switch (intrin->intrinsic) {
118 case nir_intrinsic_image_deref_load:
119 case nir_intrinsic_image_deref_store:
120 case nir_intrinsic_image_deref_atomic_add:
121 case nir_intrinsic_image_deref_atomic_min:
122 case nir_intrinsic_image_deref_atomic_max:
123 case nir_intrinsic_image_deref_atomic_and:
124 case nir_intrinsic_image_deref_atomic_or:
125 case nir_intrinsic_image_deref_atomic_xor:
126 case nir_intrinsic_image_deref_atomic_exchange:
127 case nir_intrinsic_image_deref_atomic_comp_swap:
128 case nir_intrinsic_image_deref_size:
129 case nir_intrinsic_image_deref_samples: {
130 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
131 nir_variable *var = nir_deref_instr_get_variable(deref);
132
133 b.cursor = nir_before_instr(&intrin->instr);
134 nir_ssa_def *index =
135 nir_iadd(&b, nir_imm_int(&b, var->data.driver_location),
136 get_aoa_deref_offset(&b, deref, 1));
137 brw_nir_rewrite_image_intrinsic(intrin, index);
138 break;
139 }
140
141 default:
142 break;
143 }
144 }
145 }
146 }
147
148 // XXX: need unify_interfaces() at link time...
149
150 static void
151 update_so_info(struct pipe_stream_output_info *so_info)
152 {
153 for (unsigned i = 0; i < so_info->num_outputs; i++) {
154 struct pipe_stream_output *output = &so_info->output[i];
155
156 /* The VUE header contains three scalar fields packed together:
157 * - gl_PointSize is stored in VARYING_SLOT_PSIZ.w
158 * - gl_Layer is stored in VARYING_SLOT_PSIZ.y
159 * - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
160 */
161 switch (output->register_index) {
162 case VARYING_SLOT_LAYER:
163 assert(output->num_components == 1);
164 output->register_index = VARYING_SLOT_PSIZ;
165 output->start_component = 1;
166 break;
167 case VARYING_SLOT_VIEWPORT:
168 assert(output->num_components == 1);
169 output->register_index = VARYING_SLOT_PSIZ;
170 output->start_component = 2;
171 break;
172 case VARYING_SLOT_PSIZ:
173 assert(output->num_components == 1);
174 output->start_component = 3;
175 break;
176 }
177
178 //info->outputs_written |= 1ull << output->register_index;
179 }
180 }
181
182 /**
183 * The pipe->create_[stage]_state() driver hooks.
184 *
185 * Performs basic NIR preprocessing, records any state dependencies, and
186 * returns an iris_uncompiled_shader as the Gallium CSO.
187 *
188 * Actual shader compilation to assembly happens later, at first use.
189 */
190 static void *
191 iris_create_uncompiled_shader(struct pipe_context *ctx,
192 nir_shader *nir,
193 const struct pipe_stream_output_info *so_info)
194 {
195 struct iris_screen *screen = (struct iris_screen *)ctx->screen;
196 const struct gen_device_info *devinfo = &screen->devinfo;
197
198 struct iris_uncompiled_shader *ish =
199 calloc(1, sizeof(struct iris_uncompiled_shader));
200 if (!ish)
201 return NULL;
202
203 nir = brw_preprocess_nir(screen->compiler, nir);
204
205 NIR_PASS_V(nir, brw_nir_lower_image_load_store, devinfo);
206 NIR_PASS_V(nir, iris_lower_storage_image_derefs);
207
208 ish->program_id = get_new_program_id(screen);
209 ish->nir = nir;
210 if (so_info) {
211 memcpy(&ish->stream_output, so_info, sizeof(*so_info));
212 update_so_info(&ish->stream_output);
213 }
214
215 return ish;
216 }
217
218 static struct iris_uncompiled_shader *
219 iris_create_shader_state(struct pipe_context *ctx,
220 const struct pipe_shader_state *state)
221 {
222 assert(state->type == PIPE_SHADER_IR_NIR);
223
224 return iris_create_uncompiled_shader(ctx, state->ir.nir,
225 &state->stream_output);
226 }
227
228 static void *
229 iris_create_vs_state(struct pipe_context *ctx,
230 const struct pipe_shader_state *state)
231 {
232 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
233
234 /* User clip planes */
235 if (ish->nir->info.clip_distance_array_size == 0)
236 ish->nos |= IRIS_NOS_RASTERIZER;
237
238 return ish;
239 }
240
241 static void *
242 iris_create_tcs_state(struct pipe_context *ctx,
243 const struct pipe_shader_state *state)
244 {
245 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
246
247 // XXX: NOS?
248
249 return ish;
250 }
251
252 static void *
253 iris_create_tes_state(struct pipe_context *ctx,
254 const struct pipe_shader_state *state)
255 {
256 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
257
258 // XXX: NOS?
259
260 return ish;
261 }
262
263 static void *
264 iris_create_gs_state(struct pipe_context *ctx,
265 const struct pipe_shader_state *state)
266 {
267 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
268
269 // XXX: NOS?
270
271 return ish;
272 }
273
274 static void *
275 iris_create_fs_state(struct pipe_context *ctx,
276 const struct pipe_shader_state *state)
277 {
278 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
279
280 ish->nos |= IRIS_NOS_FRAMEBUFFER |
281 IRIS_NOS_DEPTH_STENCIL_ALPHA |
282 IRIS_NOS_RASTERIZER |
283 IRIS_NOS_BLEND;
284
285 /* The program key needs the VUE map if there are > 16 inputs */
286 if (util_bitcount64(ish->nir->info.inputs_read &
287 BRW_FS_VARYING_INPUT_MASK) > 16) {
288 ish->nos |= IRIS_NOS_LAST_VUE_MAP;
289 }
290
291 return ish;
292 }
293
294 static void *
295 iris_create_compute_state(struct pipe_context *ctx,
296 const struct pipe_compute_state *state)
297 {
298 assert(state->ir_type == PIPE_SHADER_IR_NIR);
299
300 // XXX: disallow more than 64KB of shared variables
301
302 struct iris_uncompiled_shader *ish =
303 iris_create_uncompiled_shader(ctx, (void *) state->prog, NULL);
304
305 return ish;
306 }
307
308 /**
309 * The pipe->delete_[stage]_state() driver hooks.
310 *
311 * Frees the iris_uncompiled_shader.
312 */
313 static void
314 iris_delete_shader_state(struct pipe_context *ctx, void *state)
315 {
316 struct iris_uncompiled_shader *ish = state;
317
318 ralloc_free(ish->nir);
319 free(ish);
320 }
321
322 /**
323 * The pipe->bind_[stage]_state() driver hook.
324 *
325 * Binds an uncompiled shader as the current one for a particular stage.
326 * Updates dirty tracking to account for the shader's NOS.
327 */
328 static void
329 bind_state(struct iris_context *ice,
330 struct iris_uncompiled_shader *ish,
331 gl_shader_stage stage)
332 {
333 uint64_t dirty_bit = IRIS_DIRTY_UNCOMPILED_VS << stage;
334 const uint64_t nos = ish ? ish->nos : 0;
335
336 ice->shaders.uncompiled[stage] = ish;
337 ice->state.dirty |= dirty_bit;
338
339 /* Record that CSOs need to mark IRIS_DIRTY_UNCOMPILED_XS when they change
340 * (or that they no longer need to do so).
341 */
342 for (int i = 0; i < IRIS_NOS_COUNT; i++) {
343 if (nos & (1 << i))
344 ice->state.dirty_for_nos[i] |= dirty_bit;
345 else
346 ice->state.dirty_for_nos[i] &= ~dirty_bit;
347 }
348 }
349
350 static void
351 iris_bind_vs_state(struct pipe_context *ctx, void *state)
352 {
353 bind_state((void *) ctx, state, MESA_SHADER_VERTEX);
354 }
355
356 static void
357 iris_bind_tcs_state(struct pipe_context *ctx, void *state)
358 {
359 bind_state((void *) ctx, state, MESA_SHADER_TESS_CTRL);
360 }
361
362 static void
363 iris_bind_tes_state(struct pipe_context *ctx, void *state)
364 {
365 struct iris_context *ice = (struct iris_context *)ctx;
366
367 /* Enabling/disabling optional stages requires a URB reconfiguration. */
368 if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
369 ice->state.dirty |= IRIS_DIRTY_URB;
370
371 bind_state((void *) ctx, state, MESA_SHADER_TESS_EVAL);
372 }
373
374 static void
375 iris_bind_gs_state(struct pipe_context *ctx, void *state)
376 {
377 struct iris_context *ice = (struct iris_context *)ctx;
378
379 /* Enabling/disabling optional stages requires a URB reconfiguration. */
380 if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
381 ice->state.dirty |= IRIS_DIRTY_URB;
382
383 bind_state((void *) ctx, state, MESA_SHADER_GEOMETRY);
384 }
385
386 static void
387 iris_bind_fs_state(struct pipe_context *ctx, void *state)
388 {
389 bind_state((void *) ctx, state, MESA_SHADER_FRAGMENT);
390 }
391
392 static void
393 iris_bind_cs_state(struct pipe_context *ctx, void *state)
394 {
395 bind_state((void *) ctx, state, MESA_SHADER_COMPUTE);
396 }
397
398 /**
399 * Sets up the starting offsets for the groups of binding table entries
400 * common to all pipeline stages.
401 *
402 * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're
403 * unused but also make sure that addition of small offsets to them will
404 * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
405 */
406 static uint32_t
407 assign_common_binding_table_offsets(const struct gen_device_info *devinfo,
408 const struct nir_shader *nir,
409 struct brw_stage_prog_data *prog_data,
410 uint32_t next_binding_table_offset,
411 unsigned num_system_values)
412 {
413 const struct shader_info *info = &nir->info;
414
415 if (info->num_textures) {
416 prog_data->binding_table.texture_start = next_binding_table_offset;
417 prog_data->binding_table.gather_texture_start = next_binding_table_offset;
418 next_binding_table_offset += info->num_textures;
419 } else {
420 prog_data->binding_table.texture_start = 0xd0d0d0d0;
421 prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
422 }
423
424 if (info->num_images) {
425 prog_data->binding_table.image_start = next_binding_table_offset;
426 next_binding_table_offset += info->num_images;
427 } else {
428 prog_data->binding_table.image_start = 0xd0d0d0d0;
429 }
430
431 int num_ubos = info->num_ubos +
432 ((nir->num_uniforms || num_system_values) ? 1 : 0);
433
434 if (num_ubos) {
435 //assert(info->num_ubos <= BRW_MAX_UBO);
436 prog_data->binding_table.ubo_start = next_binding_table_offset;
437 next_binding_table_offset += num_ubos;
438 } else {
439 prog_data->binding_table.ubo_start = 0xd0d0d0d0;
440 }
441
442 if (info->num_ssbos || info->num_abos) {
443 prog_data->binding_table.ssbo_start = next_binding_table_offset;
444 // XXX: see iris_state "wasting 16 binding table slots for ABOs" comment
445 next_binding_table_offset += IRIS_MAX_ABOS + info->num_ssbos;
446 } else {
447 prog_data->binding_table.ssbo_start = 0xd0d0d0d0;
448 }
449
450 prog_data->binding_table.shader_time_start = 0xd0d0d0d0;
451
452 /* This may or may not be used depending on how the compile goes. */
453 prog_data->binding_table.pull_constants_start = next_binding_table_offset;
454 next_binding_table_offset++;
455
456 /* Plane 0 is just the regular texture section */
457 prog_data->binding_table.plane_start[0] = prog_data->binding_table.texture_start;
458
459 prog_data->binding_table.plane_start[1] = next_binding_table_offset;
460 next_binding_table_offset += info->num_textures;
461
462 prog_data->binding_table.plane_start[2] = next_binding_table_offset;
463 next_binding_table_offset += info->num_textures;
464
465 /* Set the binding table size */
466 prog_data->binding_table.size_bytes = next_binding_table_offset * 4;
467
468 return next_binding_table_offset;
469 }
470
471 /**
472 * Associate NIR uniform variables with the prog_data->param[] mechanism
473 * used by the backend. Also, decide which UBOs we'd like to push in an
474 * ideal situation (though the backend can reduce this).
475 */
476 static void
477 iris_setup_uniforms(const struct brw_compiler *compiler,
478 void *mem_ctx,
479 nir_shader *nir,
480 struct brw_stage_prog_data *prog_data,
481 enum brw_param_builtin **out_system_values,
482 unsigned *out_num_system_values)
483 {
484 /* We don't use params[], but fs_visitor::nir_setup_uniforms() asserts
485 * about it for compute shaders, so go ahead and make some fake ones
486 * which the backend will dead code eliminate.
487 */
488 prog_data->nr_params = nir->num_uniforms;
489 prog_data->param = rzalloc_array(mem_ctx, uint32_t, prog_data->nr_params);
490
491 /* The intel compiler assumes that num_uniforms is in bytes. For
492 * scalar that means 4 bytes per uniform slot.
493 *
494 * Ref: brw_nir_lower_uniforms, type_size_scalar_bytes.
495 */
496 nir->num_uniforms *= 4;
497
498 const unsigned IRIS_MAX_SYSTEM_VALUES = 32;
499 enum brw_param_builtin *system_values =
500 rzalloc_array(mem_ctx, enum brw_param_builtin, IRIS_MAX_SYSTEM_VALUES);
501 unsigned num_system_values = 0;
502
503 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
504
505 nir_builder b;
506 nir_builder_init(&b, impl);
507
508 b.cursor = nir_before_block(nir_start_block(impl));
509 nir_ssa_def *temp_ubo_name = nir_ssa_undef(&b, 1, 32);
510
511 /* Turn system value intrinsics into uniforms */
512 nir_foreach_block(block, impl) {
513 nir_foreach_instr_safe(instr, block) {
514 if (instr->type != nir_instr_type_intrinsic)
515 continue;
516
517 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
518
519 unsigned idx = num_system_values;
520
521 switch (intrin->intrinsic) {
522 case nir_intrinsic_load_user_clip_plane: {
523 unsigned ucp = nir_intrinsic_ucp_id(intrin);
524 for (int i = 0; i < 4; i++) {
525 system_values[num_system_values++] =
526 BRW_PARAM_BUILTIN_CLIP_PLANE(ucp, i);
527 }
528 break;
529 }
530 default:
531 continue;
532 }
533
534 b.cursor = nir_before_instr(instr);
535
536 unsigned comps = nir_intrinsic_dest_components(intrin);
537 nir_ssa_def *offset = nir_imm_int(&b, idx * sizeof(uint32_t));
538
539 nir_intrinsic_instr *load =
540 nir_intrinsic_instr_create(nir, nir_intrinsic_load_ubo);
541 load->num_components = comps;
542 load->src[0] = nir_src_for_ssa(temp_ubo_name);
543 load->src[1] = nir_src_for_ssa(offset);
544 nir_ssa_dest_init(&load->instr, &load->dest, comps, 32, NULL);
545 nir_builder_instr_insert(&b, &load->instr);
546 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
547 nir_src_for_ssa(&load->dest.ssa));
548 nir_instr_remove(instr);
549 }
550 }
551
552 nir_validate_shader(nir, "before remapping");
553
554 /* Place the new params at the front of constant buffer 0. */
555 if (num_system_values > 0) {
556 nir->num_uniforms += num_system_values * sizeof(uint32_t);
557
558 system_values = reralloc(mem_ctx, system_values, enum brw_param_builtin,
559 num_system_values);
560
561 nir_foreach_block(block, impl) {
562 nir_foreach_instr_safe(instr, block) {
563 if (instr->type != nir_instr_type_intrinsic)
564 continue;
565
566 nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
567
568 if (load->intrinsic != nir_intrinsic_load_ubo)
569 continue;
570
571 b.cursor = nir_before_instr(instr);
572
573 assert(load->src[0].is_ssa);
574
575 if (load->src[0].ssa == temp_ubo_name) {
576 nir_instr_rewrite_src(instr, &load->src[0],
577 nir_src_for_ssa(nir_imm_int(&b, 0)));
578 } else if (nir_src_as_uint(load->src[0]) == 0) {
579 nir_ssa_def *offset =
580 nir_iadd(&b, load->src[1].ssa,
581 nir_imm_int(&b, 4 * num_system_values));
582 nir_instr_rewrite_src(instr, &load->src[1],
583 nir_src_for_ssa(offset));
584 }
585 }
586 }
587
588 /* We need to fold the new iadds for brw_nir_analyze_ubo_ranges */
589 nir_opt_constant_folding(nir);
590 } else {
591 ralloc_free(system_values);
592 system_values = NULL;
593 }
594
595 nir_validate_shader(nir, "after remap");
596
597 // XXX: vs clip planes?
598 if (nir->info.stage != MESA_SHADER_COMPUTE)
599 brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
600
601 *out_system_values = system_values;
602 *out_num_system_values = num_system_values;
603 }
604
605 /**
606 * Compile a vertex shader, and upload the assembly.
607 */
608 static bool
609 iris_compile_vs(struct iris_context *ice,
610 struct iris_uncompiled_shader *ish,
611 const struct brw_vs_prog_key *key)
612 {
613 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
614 const struct brw_compiler *compiler = screen->compiler;
615 const struct gen_device_info *devinfo = &screen->devinfo;
616 void *mem_ctx = ralloc_context(NULL);
617 struct brw_vs_prog_data *vs_prog_data =
618 rzalloc(mem_ctx, struct brw_vs_prog_data);
619 struct brw_vue_prog_data *vue_prog_data = &vs_prog_data->base;
620 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
621 enum brw_param_builtin *system_values;
622 unsigned num_system_values;
623
624 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
625
626 if (key->nr_userclip_plane_consts) {
627 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
628 nir_lower_clip_vs(nir, (1 << key->nr_userclip_plane_consts) - 1, true);
629 nir_lower_io_to_temporaries(nir, impl, true, false);
630 nir_lower_global_vars_to_local(nir);
631 nir_lower_vars_to_ssa(nir);
632 nir_shader_gather_info(nir, impl);
633 }
634
635 // XXX: alt mode
636
637 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
638 &num_system_values);
639
640 assign_common_binding_table_offsets(devinfo, nir, prog_data, 0,
641 num_system_values);
642
643 brw_compute_vue_map(devinfo,
644 &vue_prog_data->vue_map, nir->info.outputs_written,
645 nir->info.separate_shader);
646
647 /* Don't tell the backend about our clip plane constants, we've already
648 * lowered them in NIR and we don't want it doing it again.
649 */
650 struct brw_vs_prog_key key_no_ucp = *key;
651 key_no_ucp.nr_userclip_plane_consts = 0;
652
653 char *error_str = NULL;
654 const unsigned *program =
655 brw_compile_vs(compiler, &ice->dbg, mem_ctx, &key_no_ucp, vs_prog_data,
656 nir, -1, &error_str);
657 if (program == NULL) {
658 dbg_printf("Failed to compile vertex shader: %s\n", error_str);
659 ralloc_free(mem_ctx);
660 return false;
661 }
662
663 uint32_t *so_decls =
664 ice->vtbl.create_so_decl_list(&ish->stream_output,
665 &vue_prog_data->vue_map);
666
667 iris_upload_and_bind_shader(ice, IRIS_CACHE_VS, key, program, prog_data,
668 so_decls, system_values, num_system_values);
669
670 ralloc_free(mem_ctx);
671 return true;
672 }
673
674 /**
675 * Update the current vertex shader variant.
676 *
677 * Fill out the key, look in the cache, compile and bind if needed.
678 */
679 static void
680 iris_update_compiled_vs(struct iris_context *ice)
681 {
682 struct iris_uncompiled_shader *ish =
683 ice->shaders.uncompiled[MESA_SHADER_VERTEX];
684
685 struct brw_vs_prog_key key = { .program_string_id = ish->program_id };
686 ice->vtbl.populate_vs_key(ice, &ish->nir->info, &key);
687
688 if (iris_bind_cached_shader(ice, IRIS_CACHE_VS, &key))
689 return;
690
691 UNUSED bool success = iris_compile_vs(ice, ish, &key);
692 }
693
694 /**
695 * Get the shader_info for a given stage, or NULL if the stage is disabled.
696 */
697 const struct shader_info *
698 iris_get_shader_info(const struct iris_context *ice, gl_shader_stage stage)
699 {
700 const struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[stage];
701
702 if (!ish)
703 return NULL;
704
705 const nir_shader *nir = ish->nir;
706 return &nir->info;
707 }
708
709 // XXX: this function is gross
710 unsigned
711 iris_get_shader_num_ubos(const struct iris_context *ice, gl_shader_stage stage)
712 {
713 const struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[stage];
714 const struct iris_compiled_shader *shader = ice->shaders.prog[stage];
715
716 if (ish) {
717 const nir_shader *nir = ish->nir;
718 /* see assign_common_binding_table_offsets */
719 return nir->info.num_ubos +
720 ((nir->num_uniforms || shader->num_system_values) ? 1 : 0);
721 }
722 return 0;
723 }
724
725 /**
726 * Get the union of TCS output and TES input slots.
727 *
728 * TCS and TES need to agree on a common URB entry layout. In particular,
729 * the data for all patch vertices is stored in a single URB entry (unlike
730 * GS which has one entry per input vertex). This means that per-vertex
731 * array indexing needs a stride.
732 *
733 * SSO requires locations to match, but doesn't require the number of
734 * outputs/inputs to match (in fact, the TCS often has extra outputs).
735 * So, we need to take the extra step of unifying these on the fly.
736 */
737 static void
738 get_unified_tess_slots(const struct iris_context *ice,
739 uint64_t *per_vertex_slots,
740 uint32_t *per_patch_slots)
741 {
742 const struct shader_info *tcs =
743 iris_get_shader_info(ice, MESA_SHADER_TESS_CTRL);
744 const struct shader_info *tes =
745 iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
746
747 *per_vertex_slots = tes->inputs_read;
748 *per_patch_slots = tes->patch_inputs_read;
749
750 if (tcs) {
751 *per_vertex_slots |= tcs->outputs_written;
752 *per_patch_slots |= tcs->patch_outputs_written;
753 }
754 }
755
756 /**
757 * Compile a tessellation control shader, and upload the assembly.
758 */
759 static bool
760 iris_compile_tcs(struct iris_context *ice,
761 struct iris_uncompiled_shader *ish,
762 const struct brw_tcs_prog_key *key)
763 {
764 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
765 const struct brw_compiler *compiler = screen->compiler;
766 const struct nir_shader_compiler_options *options =
767 compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].NirOptions;
768 const struct gen_device_info *devinfo = &screen->devinfo;
769 void *mem_ctx = ralloc_context(NULL);
770 struct brw_tcs_prog_data *tcs_prog_data =
771 rzalloc(mem_ctx, struct brw_tcs_prog_data);
772 struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base;
773 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
774 enum brw_param_builtin *system_values = NULL;
775 unsigned num_system_values = 0;
776
777 nir_shader *nir;
778
779 if (ish) {
780 nir = nir_shader_clone(mem_ctx, ish->nir);
781
782 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
783 &num_system_values);
784 assign_common_binding_table_offsets(devinfo, nir, prog_data, 0,
785 num_system_values);
786 } else {
787 nir = brw_nir_create_passthrough_tcs(mem_ctx, compiler, options, key);
788
789 /* Reserve space for passing the default tess levels as constants. */
790 prog_data->param = rzalloc_array(mem_ctx, uint32_t, 8);
791 prog_data->nr_params = 8;
792 prog_data->ubo_ranges[0].length = 1;
793 }
794
795 char *error_str = NULL;
796 const unsigned *program =
797 brw_compile_tcs(compiler, &ice->dbg, mem_ctx, key, tcs_prog_data, nir,
798 -1, &error_str);
799 if (program == NULL) {
800 dbg_printf("Failed to compile evaluation shader: %s\n", error_str);
801 ralloc_free(mem_ctx);
802 return false;
803 }
804
805 iris_upload_and_bind_shader(ice, IRIS_CACHE_TCS, key, program, prog_data,
806 NULL, system_values, num_system_values);
807
808 ralloc_free(mem_ctx);
809 return true;
810 }
811
812 /**
813 * Update the current tessellation control shader variant.
814 *
815 * Fill out the key, look in the cache, compile and bind if needed.
816 */
817 static void
818 iris_update_compiled_tcs(struct iris_context *ice)
819 {
820 struct iris_uncompiled_shader *tcs =
821 ice->shaders.uncompiled[MESA_SHADER_TESS_CTRL];
822
823 const struct shader_info *tes_info =
824 iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
825 struct brw_tcs_prog_key key = {
826 .program_string_id = tcs ? tcs->program_id : 0,
827 .tes_primitive_mode = tes_info->tess.primitive_mode,
828 .input_vertices = ice->state.vertices_per_patch,
829 };
830 get_unified_tess_slots(ice, &key.outputs_written,
831 &key.patch_outputs_written);
832 ice->vtbl.populate_tcs_key(ice, &key);
833
834 if (iris_bind_cached_shader(ice, IRIS_CACHE_TCS, &key))
835 return;
836
837 UNUSED bool success = iris_compile_tcs(ice, tcs, &key);
838 }
839
840 /**
841 * Compile a tessellation evaluation shader, and upload the assembly.
842 */
843 static bool
844 iris_compile_tes(struct iris_context *ice,
845 struct iris_uncompiled_shader *ish,
846 const struct brw_tes_prog_key *key)
847 {
848 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
849 const struct brw_compiler *compiler = screen->compiler;
850 const struct gen_device_info *devinfo = &screen->devinfo;
851 void *mem_ctx = ralloc_context(NULL);
852 struct brw_tes_prog_data *tes_prog_data =
853 rzalloc(mem_ctx, struct brw_tes_prog_data);
854 struct brw_vue_prog_data *vue_prog_data = &tes_prog_data->base;
855 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
856 enum brw_param_builtin *system_values;
857 unsigned num_system_values;
858
859 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
860
861 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
862 &num_system_values);
863
864 assign_common_binding_table_offsets(devinfo, nir, prog_data, 0,
865 num_system_values);
866
867 struct brw_vue_map input_vue_map;
868 brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,
869 key->patch_inputs_read);
870
871 char *error_str = NULL;
872 const unsigned *program =
873 brw_compile_tes(compiler, &ice->dbg, mem_ctx, key, &input_vue_map,
874 tes_prog_data, nir, NULL, -1, &error_str);
875 if (program == NULL) {
876 dbg_printf("Failed to compile evaluation shader: %s\n", error_str);
877 ralloc_free(mem_ctx);
878 return false;
879 }
880
881 uint32_t *so_decls =
882 ice->vtbl.create_so_decl_list(&ish->stream_output,
883 &vue_prog_data->vue_map);
884
885 iris_upload_and_bind_shader(ice, IRIS_CACHE_TES, key, program, prog_data,
886 so_decls, system_values, num_system_values);
887
888 ralloc_free(mem_ctx);
889 return true;
890 }
891
892 /**
893 * Update the current tessellation evaluation shader variant.
894 *
895 * Fill out the key, look in the cache, compile and bind if needed.
896 */
897 static void
898 iris_update_compiled_tes(struct iris_context *ice)
899 {
900 struct iris_uncompiled_shader *ish =
901 ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
902
903 struct brw_tes_prog_key key = { .program_string_id = ish->program_id };
904 get_unified_tess_slots(ice, &key.inputs_read, &key.patch_inputs_read);
905 ice->vtbl.populate_tes_key(ice, &key);
906
907 if (iris_bind_cached_shader(ice, IRIS_CACHE_TES, &key))
908 return;
909
910 UNUSED bool success = iris_compile_tes(ice, ish, &key);
911 }
912
913 /**
914 * Compile a geometry shader, and upload the assembly.
915 */
916 static bool
917 iris_compile_gs(struct iris_context *ice,
918 struct iris_uncompiled_shader *ish,
919 const struct brw_gs_prog_key *key)
920 {
921 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
922 const struct brw_compiler *compiler = screen->compiler;
923 const struct gen_device_info *devinfo = &screen->devinfo;
924 void *mem_ctx = ralloc_context(NULL);
925 struct brw_gs_prog_data *gs_prog_data =
926 rzalloc(mem_ctx, struct brw_gs_prog_data);
927 struct brw_vue_prog_data *vue_prog_data = &gs_prog_data->base;
928 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
929 enum brw_param_builtin *system_values;
930 unsigned num_system_values;
931
932 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
933
934 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
935 &num_system_values);
936
937 assign_common_binding_table_offsets(devinfo, nir, prog_data, 0,
938 num_system_values);
939
940 brw_compute_vue_map(devinfo,
941 &vue_prog_data->vue_map, nir->info.outputs_written,
942 nir->info.separate_shader);
943
944 char *error_str = NULL;
945 const unsigned *program =
946 brw_compile_gs(compiler, &ice->dbg, mem_ctx, key, gs_prog_data, nir,
947 NULL, -1, &error_str);
948 if (program == NULL) {
949 dbg_printf("Failed to compile geometry shader: %s\n", error_str);
950 ralloc_free(mem_ctx);
951 return false;
952 }
953
954 uint32_t *so_decls =
955 ice->vtbl.create_so_decl_list(&ish->stream_output,
956 &vue_prog_data->vue_map);
957
958 iris_upload_and_bind_shader(ice, IRIS_CACHE_GS, key, program, prog_data,
959 so_decls, system_values, num_system_values);
960
961 ralloc_free(mem_ctx);
962 return true;
963 }
964
965 /**
966 * Update the current geometry shader variant.
967 *
968 * Fill out the key, look in the cache, compile and bind if needed.
969 */
970 static void
971 iris_update_compiled_gs(struct iris_context *ice)
972 {
973 struct iris_uncompiled_shader *ish =
974 ice->shaders.uncompiled[MESA_SHADER_GEOMETRY];
975
976 if (!ish) {
977 iris_unbind_shader(ice, IRIS_CACHE_GS);
978 return;
979 }
980
981 struct brw_gs_prog_key key = { .program_string_id = ish->program_id };
982 ice->vtbl.populate_gs_key(ice, &key);
983
984 if (iris_bind_cached_shader(ice, IRIS_CACHE_GS, &key))
985 return;
986
987 UNUSED bool success = iris_compile_gs(ice, ish, &key);
988 }
989
990 /**
991 * Compile a fragment (pixel) shader, and upload the assembly.
992 */
993 static bool
994 iris_compile_fs(struct iris_context *ice,
995 struct iris_uncompiled_shader *ish,
996 const struct brw_wm_prog_key *key,
997 struct brw_vue_map *vue_map)
998 {
999 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1000 const struct brw_compiler *compiler = screen->compiler;
1001 const struct gen_device_info *devinfo = &screen->devinfo;
1002 void *mem_ctx = ralloc_context(NULL);
1003 struct brw_wm_prog_data *fs_prog_data =
1004 rzalloc(mem_ctx, struct brw_wm_prog_data);
1005 struct brw_stage_prog_data *prog_data = &fs_prog_data->base;
1006 enum brw_param_builtin *system_values;
1007 unsigned num_system_values;
1008
1009 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1010
1011 // XXX: alt mode
1012
1013 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
1014 &num_system_values);
1015
1016 assign_common_binding_table_offsets(devinfo, nir, prog_data,
1017 MAX2(key->nr_color_regions, 1),
1018 num_system_values);
1019 char *error_str = NULL;
1020 const unsigned *program =
1021 brw_compile_fs(compiler, &ice->dbg, mem_ctx, key, fs_prog_data,
1022 nir, NULL, -1, -1, -1, true, false, vue_map, &error_str);
1023 if (program == NULL) {
1024 dbg_printf("Failed to compile fragment shader: %s\n", error_str);
1025 ralloc_free(mem_ctx);
1026 return false;
1027 }
1028
1029 //brw_alloc_stage_scratch(brw, &brw->wm.base, prog_data.base.total_scratch);
1030
1031 iris_upload_and_bind_shader(ice, IRIS_CACHE_FS, key, program, prog_data,
1032 NULL, system_values, num_system_values);
1033
1034 ralloc_free(mem_ctx);
1035 return true;
1036 }
1037
1038 /**
1039 * Update the current fragment shader variant.
1040 *
1041 * Fill out the key, look in the cache, compile and bind if needed.
1042 */
1043 static void
1044 iris_update_compiled_fs(struct iris_context *ice)
1045 {
1046 struct iris_uncompiled_shader *ish =
1047 ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
1048 struct brw_wm_prog_key key = { .program_string_id = ish->program_id };
1049 ice->vtbl.populate_fs_key(ice, &key);
1050
1051 if (ish->nos & IRIS_NOS_LAST_VUE_MAP)
1052 key.input_slots_valid = ice->shaders.last_vue_map->slots_valid;
1053
1054 if (iris_bind_cached_shader(ice, IRIS_CACHE_FS, &key))
1055 return;
1056
1057 UNUSED bool success =
1058 iris_compile_fs(ice, ish, &key, ice->shaders.last_vue_map);
1059 }
1060
1061 /**
1062 * Get the compiled shader for the last enabled geometry stage.
1063 *
1064 * This stage is the one which will feed stream output and the rasterizer.
1065 */
1066 static struct iris_compiled_shader *
1067 last_vue_shader(struct iris_context *ice)
1068 {
1069 if (ice->shaders.prog[MESA_SHADER_GEOMETRY])
1070 return ice->shaders.prog[MESA_SHADER_GEOMETRY];
1071
1072 if (ice->shaders.prog[MESA_SHADER_TESS_EVAL])
1073 return ice->shaders.prog[MESA_SHADER_TESS_EVAL];
1074
1075 return ice->shaders.prog[MESA_SHADER_VERTEX];
1076 }
1077
1078 /**
1079 * Update the last enabled stage's VUE map.
1080 *
1081 * When the shader feeding the rasterizer's output interface changes, we
1082 * need to re-emit various packets.
1083 */
1084 static void
1085 update_last_vue_map(struct iris_context *ice,
1086 struct brw_stage_prog_data *prog_data)
1087 {
1088 struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
1089 struct brw_vue_map *vue_map = &vue_prog_data->vue_map;
1090 struct brw_vue_map *old_map = ice->shaders.last_vue_map;
1091 const uint64_t changed_slots =
1092 (old_map ? old_map->slots_valid : 0ull) ^ vue_map->slots_valid;
1093
1094 if (changed_slots & VARYING_BIT_VIEWPORT) {
1095 // XXX: could use ctx->Const.MaxViewports for old API efficiency
1096 ice->state.num_viewports =
1097 (vue_map->slots_valid & VARYING_BIT_VIEWPORT) ? IRIS_MAX_VIEWPORTS : 1;
1098 ice->state.dirty |= IRIS_DIRTY_CLIP |
1099 IRIS_DIRTY_SF_CL_VIEWPORT |
1100 IRIS_DIRTY_CC_VIEWPORT |
1101 IRIS_DIRTY_SCISSOR_RECT |
1102 IRIS_DIRTY_UNCOMPILED_FS |
1103 ice->state.dirty_for_nos[IRIS_NOS_LAST_VUE_MAP];
1104 // XXX: CC_VIEWPORT?
1105 }
1106
1107 if (changed_slots || (old_map && old_map->separate != vue_map->separate)) {
1108 ice->state.dirty |= IRIS_DIRTY_SBE;
1109 }
1110
1111 ice->shaders.last_vue_map = &vue_prog_data->vue_map;
1112 }
1113
1114 /**
1115 * Get the prog_data for a given stage, or NULL if the stage is disabled.
1116 */
1117 static struct brw_vue_prog_data *
1118 get_vue_prog_data(struct iris_context *ice, gl_shader_stage stage)
1119 {
1120 if (!ice->shaders.prog[stage])
1121 return NULL;
1122
1123 return (void *) ice->shaders.prog[stage]->prog_data;
1124 }
1125
1126 /**
1127 * Update the current shader variants for the given state.
1128 *
1129 * This should be called on every draw call to ensure that the correct
1130 * shaders are bound. It will also flag any dirty state triggered by
1131 * swapping out those shaders.
1132 */
1133 void
1134 iris_update_compiled_shaders(struct iris_context *ice)
1135 {
1136 const uint64_t dirty = ice->state.dirty;
1137
1138 struct brw_vue_prog_data *old_prog_datas[4];
1139 if (!(dirty & IRIS_DIRTY_URB)) {
1140 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++)
1141 old_prog_datas[i] = get_vue_prog_data(ice, i);
1142 }
1143
1144 if (dirty & (IRIS_DIRTY_UNCOMPILED_TCS | IRIS_DIRTY_UNCOMPILED_TES)) {
1145 struct iris_uncompiled_shader *tes =
1146 ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
1147 if (tes) {
1148 iris_update_compiled_tcs(ice);
1149 iris_update_compiled_tes(ice);
1150 } else {
1151 iris_unbind_shader(ice, IRIS_CACHE_TCS);
1152 iris_unbind_shader(ice, IRIS_CACHE_TES);
1153 }
1154 }
1155
1156 if (dirty & IRIS_DIRTY_UNCOMPILED_VS)
1157 iris_update_compiled_vs(ice);
1158 if (dirty & IRIS_DIRTY_UNCOMPILED_GS)
1159 iris_update_compiled_gs(ice);
1160
1161 struct iris_compiled_shader *shader = last_vue_shader(ice);
1162 update_last_vue_map(ice, shader->prog_data);
1163 if (ice->state.streamout != shader->streamout) {
1164 ice->state.streamout = shader->streamout;
1165 ice->state.dirty |= IRIS_DIRTY_SO_DECL_LIST | IRIS_DIRTY_STREAMOUT;
1166 }
1167
1168 if (dirty & IRIS_DIRTY_UNCOMPILED_FS)
1169 iris_update_compiled_fs(ice);
1170 // ...
1171
1172 /* Changing shader interfaces may require a URB configuration. */
1173 if (!(dirty & IRIS_DIRTY_URB)) {
1174 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
1175 struct brw_vue_prog_data *old = old_prog_datas[i];
1176 struct brw_vue_prog_data *new = get_vue_prog_data(ice, i);
1177 if (!!old != !!new ||
1178 (new && new->urb_entry_size != old->urb_entry_size)) {
1179 ice->state.dirty |= IRIS_DIRTY_URB;
1180 break;
1181 }
1182 }
1183 }
1184 }
1185
1186 static bool
1187 iris_compile_cs(struct iris_context *ice,
1188 struct iris_uncompiled_shader *ish,
1189 const struct brw_cs_prog_key *key)
1190 {
1191 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1192 const struct brw_compiler *compiler = screen->compiler;
1193 const struct gen_device_info *devinfo = &screen->devinfo;
1194 void *mem_ctx = ralloc_context(NULL);
1195 struct brw_cs_prog_data *cs_prog_data =
1196 rzalloc(mem_ctx, struct brw_cs_prog_data);
1197 struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
1198 enum brw_param_builtin *system_values;
1199 unsigned num_system_values;
1200
1201 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1202
1203 cs_prog_data->binding_table.work_groups_start = 0;
1204
1205 prog_data->total_shared = nir->info.cs.shared_size;
1206
1207 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
1208 &num_system_values);
1209
1210 assign_common_binding_table_offsets(devinfo, nir, prog_data, 1,
1211 num_system_values);
1212
1213 char *error_str = NULL;
1214 const unsigned *program =
1215 brw_compile_cs(compiler, &ice->dbg, mem_ctx, key, cs_prog_data,
1216 nir, -1, &error_str);
1217 if (program == NULL) {
1218 dbg_printf("Failed to compile compute shader: %s\n", error_str);
1219 ralloc_free(mem_ctx);
1220 return false;
1221 }
1222
1223 iris_upload_and_bind_shader(ice, IRIS_CACHE_CS, key, program, prog_data,
1224 NULL, system_values, num_system_values);
1225
1226 ralloc_free(mem_ctx);
1227 return true;
1228 }
1229
1230 void
1231 iris_update_compiled_compute_shader(struct iris_context *ice)
1232 {
1233 struct iris_uncompiled_shader *ish =
1234 ice->shaders.uncompiled[MESA_SHADER_COMPUTE];
1235
1236 struct brw_cs_prog_key key = { .program_string_id = ish->program_id };
1237 ice->vtbl.populate_cs_key(ice, &key);
1238
1239 if (iris_bind_cached_shader(ice, IRIS_CACHE_CS, &key))
1240 return;
1241
1242 UNUSED bool success = iris_compile_cs(ice, ish, &key);
1243 }
1244
1245 void
1246 iris_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data,
1247 uint32_t *dst)
1248 {
1249 struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
1250 assert(cs_prog_data->push.total.size > 0);
1251 assert(cs_prog_data->push.cross_thread.size == 0);
1252 assert(cs_prog_data->push.per_thread.dwords == 1);
1253 assert(prog_data->param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID);
1254 for (unsigned t = 0; t < cs_prog_data->threads; t++)
1255 dst[8 * t] = t;
1256 }
1257
1258 /**
1259 * Allocate scratch BOs as needed for the given per-thread size and stage.
1260 *
1261 * Returns the 32-bit "Scratch Space Base Pointer" value.
1262 */
1263 uint32_t
1264 iris_get_scratch_space(struct iris_context *ice,
1265 unsigned per_thread_scratch,
1266 gl_shader_stage stage)
1267 {
1268 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1269 struct iris_bufmgr *bufmgr = screen->bufmgr;
1270 const struct gen_device_info *devinfo = &screen->devinfo;
1271
1272 unsigned encoded_size = ffs(per_thread_scratch) - 11;
1273 assert(encoded_size < (1 << 16));
1274
1275 struct iris_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage];
1276
1277 /* The documentation for 3DSTATE_PS "Scratch Space Base Pointer" says:
1278 *
1279 * "Scratch Space per slice is computed based on 4 sub-slices. SW must
1280 * allocate scratch space enough so that each slice has 4 slices
1281 * allowed."
1282 *
1283 * According to the other driver team, this applies to compute shaders
1284 * as well. This is not currently documented at all.
1285 */
1286 unsigned subslice_total = 4 * devinfo->num_slices;
1287 assert(subslice_total >= screen->subslice_total);
1288
1289 if (!*bop) {
1290 unsigned scratch_ids_per_subslice = devinfo->max_cs_threads;
1291 uint32_t max_threads[] = {
1292 [MESA_SHADER_VERTEX] = devinfo->max_vs_threads,
1293 [MESA_SHADER_TESS_CTRL] = devinfo->max_tcs_threads,
1294 [MESA_SHADER_TESS_EVAL] = devinfo->max_tes_threads,
1295 [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads,
1296 [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads,
1297 [MESA_SHADER_COMPUTE] = scratch_ids_per_subslice * subslice_total,
1298 };
1299
1300 uint32_t size = per_thread_scratch * max_threads[stage];
1301
1302 *bop = iris_bo_alloc(bufmgr, "scratch", size, IRIS_MEMZONE_SHADER);
1303 }
1304
1305 return (*bop)->gtt_offset;
1306 }
1307
1308 void
1309 iris_init_program_functions(struct pipe_context *ctx)
1310 {
1311 ctx->create_vs_state = iris_create_vs_state;
1312 ctx->create_tcs_state = iris_create_tcs_state;
1313 ctx->create_tes_state = iris_create_tes_state;
1314 ctx->create_gs_state = iris_create_gs_state;
1315 ctx->create_fs_state = iris_create_fs_state;
1316 ctx->create_compute_state = iris_create_compute_state;
1317
1318 ctx->delete_vs_state = iris_delete_shader_state;
1319 ctx->delete_tcs_state = iris_delete_shader_state;
1320 ctx->delete_tes_state = iris_delete_shader_state;
1321 ctx->delete_gs_state = iris_delete_shader_state;
1322 ctx->delete_fs_state = iris_delete_shader_state;
1323 ctx->delete_compute_state = iris_delete_shader_state;
1324
1325 ctx->bind_vs_state = iris_bind_vs_state;
1326 ctx->bind_tcs_state = iris_bind_tcs_state;
1327 ctx->bind_tes_state = iris_bind_tes_state;
1328 ctx->bind_gs_state = iris_bind_gs_state;
1329 ctx->bind_fs_state = iris_bind_fs_state;
1330 ctx->bind_compute_state = iris_bind_cs_state;
1331 }