iris: fix comment location
[mesa.git] / src / gallium / drivers / iris / iris_program.c
1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 /**
24 * @file iris_program.c
25 *
26 * This file contains the driver interface for compiling shaders.
27 *
28 * See iris_program_cache.c for the in-memory program cache where the
29 * compiled shaders are stored.
30 */
31
32 #include <stdio.h>
33 #include <errno.h>
34 #include "pipe/p_defines.h"
35 #include "pipe/p_state.h"
36 #include "pipe/p_context.h"
37 #include "pipe/p_screen.h"
38 #include "util/u_atomic.h"
39 #include "compiler/nir/nir.h"
40 #include "compiler/nir/nir_builder.h"
41 #include "intel/compiler/brw_compiler.h"
42 #include "intel/compiler/brw_nir.h"
43 #include "iris_context.h"
44
45 static unsigned
46 get_new_program_id(struct iris_screen *screen)
47 {
48 return p_atomic_inc_return(&screen->program_id);
49 }
50
51 /**
52 * An uncompiled, API-facing shader. This is the Gallium CSO for shaders.
53 * It primarily contains the NIR for the shader.
54 *
55 * Each API-facing shader can be compiled into multiple shader variants,
56 * based on non-orthogonal state dependencies, recorded in the shader key.
57 *
58 * See iris_compiled_shader, which represents a compiled shader variant.
59 */
60 struct iris_uncompiled_shader {
61 nir_shader *nir;
62
63 struct pipe_stream_output_info stream_output;
64
65 unsigned program_id;
66
67 /** Bitfield of (1 << IRIS_NOS_*) flags. */
68 unsigned nos;
69 };
70
71 static nir_ssa_def *
72 get_aoa_deref_offset(nir_builder *b,
73 nir_deref_instr *deref,
74 unsigned elem_size)
75 {
76 unsigned array_size = elem_size;
77 nir_ssa_def *offset = nir_imm_int(b, 0);
78
79 while (deref->deref_type != nir_deref_type_var) {
80 assert(deref->deref_type == nir_deref_type_array);
81
82 /* This level's element size is the previous level's array size */
83 nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
84 assert(deref->arr.index.ssa);
85 offset = nir_iadd(b, offset,
86 nir_imul(b, index, nir_imm_int(b, array_size)));
87
88 deref = nir_deref_instr_parent(deref);
89 assert(glsl_type_is_array(deref->type));
90 array_size *= glsl_get_length(deref->type);
91 }
92
93 /* Accessing an invalid surface index with the dataport can result in a
94 * hang. According to the spec "if the index used to select an individual
95 * element is negative or greater than or equal to the size of the array,
96 * the results of the operation are undefined but may not lead to
97 * termination" -- which is one of the possible outcomes of the hang.
98 * Clamp the index to prevent access outside of the array bounds.
99 */
100 return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));
101 }
102
103 static void
104 iris_lower_storage_image_derefs(nir_shader *nir)
105 {
106 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
107
108 nir_builder b;
109 nir_builder_init(&b, impl);
110
111 nir_foreach_block(block, impl) {
112 nir_foreach_instr_safe(instr, block) {
113 if (instr->type != nir_instr_type_intrinsic)
114 continue;
115
116 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
117 switch (intrin->intrinsic) {
118 case nir_intrinsic_image_deref_load:
119 case nir_intrinsic_image_deref_store:
120 case nir_intrinsic_image_deref_atomic_add:
121 case nir_intrinsic_image_deref_atomic_min:
122 case nir_intrinsic_image_deref_atomic_max:
123 case nir_intrinsic_image_deref_atomic_and:
124 case nir_intrinsic_image_deref_atomic_or:
125 case nir_intrinsic_image_deref_atomic_xor:
126 case nir_intrinsic_image_deref_atomic_exchange:
127 case nir_intrinsic_image_deref_atomic_comp_swap:
128 case nir_intrinsic_image_deref_size:
129 case nir_intrinsic_image_deref_samples: {
130 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
131 nir_variable *var = nir_deref_instr_get_variable(deref);
132
133 b.cursor = nir_before_instr(&intrin->instr);
134 nir_ssa_def *index =
135 nir_iadd(&b, nir_imm_int(&b, var->data.driver_location),
136 get_aoa_deref_offset(&b, deref, 1));
137 brw_nir_rewrite_image_intrinsic(intrin, index);
138 break;
139 }
140
141 default:
142 break;
143 }
144 }
145 }
146 }
147
148 // XXX: need unify_interfaces() at link time...
149
150 static void
151 update_so_info(struct pipe_stream_output_info *so_info)
152 {
153 for (unsigned i = 0; i < so_info->num_outputs; i++) {
154 struct pipe_stream_output *output = &so_info->output[i];
155
156 /* The VUE header contains three scalar fields packed together:
157 * - gl_PointSize is stored in VARYING_SLOT_PSIZ.w
158 * - gl_Layer is stored in VARYING_SLOT_PSIZ.y
159 * - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
160 */
161 switch (output->register_index) {
162 case VARYING_SLOT_LAYER:
163 assert(output->num_components == 1);
164 output->register_index = VARYING_SLOT_PSIZ;
165 output->start_component = 1;
166 break;
167 case VARYING_SLOT_VIEWPORT:
168 assert(output->num_components == 1);
169 output->register_index = VARYING_SLOT_PSIZ;
170 output->start_component = 2;
171 break;
172 case VARYING_SLOT_PSIZ:
173 assert(output->num_components == 1);
174 output->start_component = 3;
175 break;
176 }
177
178 //info->outputs_written |= 1ull << output->register_index;
179 }
180 }
181
182 /**
183 * The pipe->create_[stage]_state() driver hooks.
184 *
185 * Performs basic NIR preprocessing, records any state dependencies, and
186 * returns an iris_uncompiled_shader as the Gallium CSO.
187 *
188 * Actual shader compilation to assembly happens later, at first use.
189 */
190 static void *
191 iris_create_uncompiled_shader(struct pipe_context *ctx,
192 nir_shader *nir,
193 const struct pipe_stream_output_info *so_info)
194 {
195 //struct iris_context *ice = (struct iris_context *)ctx;
196 struct iris_screen *screen = (struct iris_screen *)ctx->screen;
197 const struct gen_device_info *devinfo = &screen->devinfo;
198
199 struct iris_uncompiled_shader *ish =
200 calloc(1, sizeof(struct iris_uncompiled_shader));
201 if (!ish)
202 return NULL;
203
204 nir = brw_preprocess_nir(screen->compiler, nir);
205
206 NIR_PASS_V(nir, brw_nir_lower_image_load_store, devinfo);
207 NIR_PASS_V(nir, iris_lower_storage_image_derefs);
208
209 ish->program_id = get_new_program_id(screen);
210 ish->nir = nir;
211 if (so_info) {
212 memcpy(&ish->stream_output, so_info, sizeof(*so_info));
213 update_so_info(&ish->stream_output);
214 }
215
216 switch (nir->info.stage) {
217 case MESA_SHADER_VERTEX:
218 /* User clip planes */
219 if (nir->info.clip_distance_array_size == 0)
220 ish->nos |= IRIS_NOS_RASTERIZER;
221 // XXX: NOS
222 break;
223 case MESA_SHADER_TESS_CTRL:
224 // XXX: NOS
225 break;
226 case MESA_SHADER_TESS_EVAL:
227 // XXX: NOS
228 break;
229 case MESA_SHADER_GEOMETRY:
230 // XXX: NOS
231 break;
232 case MESA_SHADER_FRAGMENT:
233 ish->nos |= IRIS_NOS_FRAMEBUFFER |
234 IRIS_NOS_DEPTH_STENCIL_ALPHA |
235 IRIS_NOS_RASTERIZER |
236 IRIS_NOS_BLEND;
237
238 /* The program key needs the VUE map if there are > 16 inputs */
239 if (util_bitcount64(ish->nir->info.inputs_read &
240 BRW_FS_VARYING_INPUT_MASK) > 16) {
241 ish->nos |= IRIS_NOS_LAST_VUE_MAP;
242 }
243 break;
244 case MESA_SHADER_COMPUTE:
245 // XXX: NOS
246 break;
247 default:
248 break;
249 }
250
251 // XXX: precompile!
252 // XXX: disallow more than 64KB of shared variables
253
254 return ish;
255 }
256
257 static void *
258 iris_create_shader_state(struct pipe_context *ctx,
259 const struct pipe_shader_state *state)
260 {
261 assert(state->type == PIPE_SHADER_IR_NIR);
262
263 return iris_create_uncompiled_shader(ctx, state->ir.nir,
264 &state->stream_output);
265 }
266
267 static void *
268 iris_create_compute_state(struct pipe_context *ctx,
269 const struct pipe_compute_state *state)
270 {
271 assert(state->ir_type == PIPE_SHADER_IR_NIR);
272
273 return iris_create_uncompiled_shader(ctx, (void *) state->prog, NULL);
274 }
275
276 /**
277 * The pipe->delete_[stage]_state() driver hooks.
278 *
279 * Frees the iris_uncompiled_shader.
280 */
281 static void
282 iris_delete_shader_state(struct pipe_context *ctx, void *state)
283 {
284 struct iris_uncompiled_shader *ish = state;
285
286 ralloc_free(ish->nir);
287 free(ish);
288 }
289
290 /**
291 * The pipe->bind_[stage]_state() driver hook.
292 *
293 * Binds an uncompiled shader as the current one for a particular stage.
294 * Updates dirty tracking to account for the shader's NOS.
295 */
296 static void
297 bind_state(struct iris_context *ice,
298 struct iris_uncompiled_shader *ish,
299 gl_shader_stage stage)
300 {
301 uint64_t dirty_bit = IRIS_DIRTY_UNCOMPILED_VS << stage;
302 const uint64_t nos = ish ? ish->nos : 0;
303
304 ice->shaders.uncompiled[stage] = ish;
305 ice->state.dirty |= dirty_bit;
306
307 /* Record that CSOs need to mark IRIS_DIRTY_UNCOMPILED_XS when they change
308 * (or that they no longer need to do so).
309 */
310 for (int i = 0; i < IRIS_NOS_COUNT; i++) {
311 if (nos & (1 << i))
312 ice->state.dirty_for_nos[i] |= dirty_bit;
313 else
314 ice->state.dirty_for_nos[i] &= ~dirty_bit;
315 }
316 }
317
318 static void
319 iris_bind_vs_state(struct pipe_context *ctx, void *state)
320 {
321 bind_state((void *) ctx, state, MESA_SHADER_VERTEX);
322 }
323
324 static void
325 iris_bind_tcs_state(struct pipe_context *ctx, void *state)
326 {
327 bind_state((void *) ctx, state, MESA_SHADER_TESS_CTRL);
328 }
329
330 static void
331 iris_bind_tes_state(struct pipe_context *ctx, void *state)
332 {
333 struct iris_context *ice = (struct iris_context *)ctx;
334
335 /* Enabling/disabling optional stages requires a URB reconfiguration. */
336 if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
337 ice->state.dirty |= IRIS_DIRTY_URB;
338
339 bind_state((void *) ctx, state, MESA_SHADER_TESS_EVAL);
340 }
341
342 static void
343 iris_bind_gs_state(struct pipe_context *ctx, void *state)
344 {
345 struct iris_context *ice = (struct iris_context *)ctx;
346
347 /* Enabling/disabling optional stages requires a URB reconfiguration. */
348 if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
349 ice->state.dirty |= IRIS_DIRTY_URB;
350
351 bind_state((void *) ctx, state, MESA_SHADER_GEOMETRY);
352 }
353
354 static void
355 iris_bind_fs_state(struct pipe_context *ctx, void *state)
356 {
357 bind_state((void *) ctx, state, MESA_SHADER_FRAGMENT);
358 }
359
360 static void
361 iris_bind_cs_state(struct pipe_context *ctx, void *state)
362 {
363 bind_state((void *) ctx, state, MESA_SHADER_COMPUTE);
364 }
365
366 /**
367 * Sets up the starting offsets for the groups of binding table entries
368 * common to all pipeline stages.
369 *
370 * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're
371 * unused but also make sure that addition of small offsets to them will
372 * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
373 */
374 static uint32_t
375 assign_common_binding_table_offsets(const struct gen_device_info *devinfo,
376 const struct nir_shader *nir,
377 struct brw_stage_prog_data *prog_data,
378 uint32_t next_binding_table_offset,
379 unsigned num_system_values)
380 {
381 const struct shader_info *info = &nir->info;
382
383 if (info->num_textures) {
384 prog_data->binding_table.texture_start = next_binding_table_offset;
385 prog_data->binding_table.gather_texture_start = next_binding_table_offset;
386 next_binding_table_offset += info->num_textures;
387 } else {
388 prog_data->binding_table.texture_start = 0xd0d0d0d0;
389 prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
390 }
391
392 if (info->num_images) {
393 prog_data->binding_table.image_start = next_binding_table_offset;
394 next_binding_table_offset += info->num_images;
395 } else {
396 prog_data->binding_table.image_start = 0xd0d0d0d0;
397 }
398
399 int num_ubos = info->num_ubos +
400 ((nir->num_uniforms || num_system_values) ? 1 : 0);
401
402 if (num_ubos) {
403 //assert(info->num_ubos <= BRW_MAX_UBO);
404 prog_data->binding_table.ubo_start = next_binding_table_offset;
405 next_binding_table_offset += num_ubos;
406 } else {
407 prog_data->binding_table.ubo_start = 0xd0d0d0d0;
408 }
409
410 if (info->num_ssbos || info->num_abos) {
411 prog_data->binding_table.ssbo_start = next_binding_table_offset;
412 // XXX: see iris_state "wasting 16 binding table slots for ABOs" comment
413 next_binding_table_offset += IRIS_MAX_ABOS + info->num_ssbos;
414 } else {
415 prog_data->binding_table.ssbo_start = 0xd0d0d0d0;
416 }
417
418 prog_data->binding_table.shader_time_start = 0xd0d0d0d0;
419
420 /* This may or may not be used depending on how the compile goes. */
421 prog_data->binding_table.pull_constants_start = next_binding_table_offset;
422 next_binding_table_offset++;
423
424 /* Plane 0 is just the regular texture section */
425 prog_data->binding_table.plane_start[0] = prog_data->binding_table.texture_start;
426
427 prog_data->binding_table.plane_start[1] = next_binding_table_offset;
428 next_binding_table_offset += info->num_textures;
429
430 prog_data->binding_table.plane_start[2] = next_binding_table_offset;
431 next_binding_table_offset += info->num_textures;
432
433 /* Set the binding table size */
434 prog_data->binding_table.size_bytes = next_binding_table_offset * 4;
435
436 return next_binding_table_offset;
437 }
438
439 /**
440 * Associate NIR uniform variables with the prog_data->param[] mechanism
441 * used by the backend. Also, decide which UBOs we'd like to push in an
442 * ideal situation (though the backend can reduce this).
443 */
444 static void
445 iris_setup_uniforms(const struct brw_compiler *compiler,
446 void *mem_ctx,
447 nir_shader *nir,
448 struct brw_stage_prog_data *prog_data,
449 enum brw_param_builtin **out_system_values,
450 unsigned *out_num_system_values)
451 {
452 /* We don't use params[], but fs_visitor::nir_setup_uniforms() asserts
453 * about it for compute shaders, so go ahead and make some fake ones
454 * which the backend will dead code eliminate.
455 */
456 prog_data->nr_params = nir->num_uniforms;
457 prog_data->param = rzalloc_array(mem_ctx, uint32_t, prog_data->nr_params);
458
459 /* The intel compiler assumes that num_uniforms is in bytes. For
460 * scalar that means 4 bytes per uniform slot.
461 *
462 * Ref: brw_nir_lower_uniforms, type_size_scalar_bytes.
463 */
464 nir->num_uniforms *= 4;
465
466 const unsigned IRIS_MAX_SYSTEM_VALUES = 32;
467 enum brw_param_builtin *system_values =
468 rzalloc_array(mem_ctx, enum brw_param_builtin, IRIS_MAX_SYSTEM_VALUES);
469 unsigned num_system_values = 0;
470
471 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
472
473 nir_builder b;
474 nir_builder_init(&b, impl);
475
476 b.cursor = nir_before_block(nir_start_block(impl));
477 nir_ssa_def *temp_ubo_name = nir_ssa_undef(&b, 1, 32);
478
479 /* Turn system value intrinsics into uniforms */
480 nir_foreach_block(block, impl) {
481 nir_foreach_instr_safe(instr, block) {
482 if (instr->type != nir_instr_type_intrinsic)
483 continue;
484
485 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
486
487 unsigned idx = num_system_values;
488
489 switch (intrin->intrinsic) {
490 case nir_intrinsic_load_user_clip_plane: {
491 unsigned ucp = nir_intrinsic_ucp_id(intrin);
492 for (int i = 0; i < 4; i++) {
493 system_values[num_system_values++] =
494 BRW_PARAM_BUILTIN_CLIP_PLANE(ucp, i);
495 }
496 break;
497 }
498 default:
499 continue;
500 }
501
502 b.cursor = nir_before_instr(instr);
503
504 unsigned comps = nir_intrinsic_dest_components(intrin);
505 nir_ssa_def *offset = nir_imm_int(&b, idx * sizeof(uint32_t));
506
507 nir_intrinsic_instr *load =
508 nir_intrinsic_instr_create(nir, nir_intrinsic_load_ubo);
509 load->num_components = comps;
510 load->src[0] = nir_src_for_ssa(temp_ubo_name);
511 load->src[1] = nir_src_for_ssa(offset);
512 nir_ssa_dest_init(&load->instr, &load->dest, comps, 32, NULL);
513 nir_builder_instr_insert(&b, &load->instr);
514 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
515 nir_src_for_ssa(&load->dest.ssa));
516 nir_instr_remove(instr);
517 }
518 }
519
520 nir_validate_shader(nir, "before remapping");
521
522 /* Place the new params at the front of constant buffer 0. */
523 if (num_system_values > 0) {
524 nir->num_uniforms += num_system_values * sizeof(uint32_t);
525
526 system_values = reralloc(mem_ctx, system_values, enum brw_param_builtin,
527 num_system_values);
528
529 nir_foreach_block(block, impl) {
530 nir_foreach_instr_safe(instr, block) {
531 if (instr->type != nir_instr_type_intrinsic)
532 continue;
533
534 nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
535
536 if (load->intrinsic != nir_intrinsic_load_ubo)
537 continue;
538
539 b.cursor = nir_before_instr(instr);
540
541 assert(load->src[0].is_ssa);
542
543 if (load->src[0].ssa == temp_ubo_name) {
544 nir_instr_rewrite_src(instr, &load->src[0],
545 nir_src_for_ssa(nir_imm_int(&b, 0)));
546 } else if (nir_src_as_uint(load->src[0]) == 0) {
547 nir_ssa_def *offset =
548 nir_iadd(&b, load->src[1].ssa,
549 nir_imm_int(&b, 4 * num_system_values));
550 nir_instr_rewrite_src(instr, &load->src[1],
551 nir_src_for_ssa(offset));
552 }
553 }
554 }
555
556 /* We need to fold the new iadds for brw_nir_analyze_ubo_ranges */
557 nir_opt_constant_folding(nir);
558 } else {
559 ralloc_free(system_values);
560 system_values = NULL;
561 }
562
563 nir_validate_shader(nir, "after remap");
564
565 // XXX: vs clip planes?
566 if (nir->info.stage != MESA_SHADER_COMPUTE)
567 brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
568
569 *out_system_values = system_values;
570 *out_num_system_values = num_system_values;
571 }
572
573 /**
574 * Compile a vertex shader, and upload the assembly.
575 */
576 static bool
577 iris_compile_vs(struct iris_context *ice,
578 struct iris_uncompiled_shader *ish,
579 const struct brw_vs_prog_key *key)
580 {
581 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
582 const struct brw_compiler *compiler = screen->compiler;
583 const struct gen_device_info *devinfo = &screen->devinfo;
584 void *mem_ctx = ralloc_context(NULL);
585 struct brw_vs_prog_data *vs_prog_data =
586 rzalloc(mem_ctx, struct brw_vs_prog_data);
587 struct brw_vue_prog_data *vue_prog_data = &vs_prog_data->base;
588 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
589 enum brw_param_builtin *system_values;
590 unsigned num_system_values;
591
592 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
593
594 if (key->nr_userclip_plane_consts) {
595 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
596 nir_lower_clip_vs(nir, (1 << key->nr_userclip_plane_consts) - 1, true);
597 nir_lower_io_to_temporaries(nir, impl, true, false);
598 nir_lower_global_vars_to_local(nir);
599 nir_lower_vars_to_ssa(nir);
600 nir_shader_gather_info(nir, impl);
601 }
602
603 // XXX: alt mode
604
605 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
606 &num_system_values);
607
608 assign_common_binding_table_offsets(devinfo, nir, prog_data, 0,
609 num_system_values);
610
611 brw_compute_vue_map(devinfo,
612 &vue_prog_data->vue_map, nir->info.outputs_written,
613 nir->info.separate_shader);
614
615 /* Don't tell the backend about our clip plane constants, we've already
616 * lowered them in NIR and we don't want it doing it again.
617 */
618 struct brw_vs_prog_key key_no_ucp = *key;
619 key_no_ucp.nr_userclip_plane_consts = 0;
620
621 char *error_str = NULL;
622 const unsigned *program =
623 brw_compile_vs(compiler, &ice->dbg, mem_ctx, &key_no_ucp, vs_prog_data,
624 nir, -1, &error_str);
625 if (program == NULL) {
626 dbg_printf("Failed to compile vertex shader: %s\n", error_str);
627 ralloc_free(mem_ctx);
628 return false;
629 }
630
631 uint32_t *so_decls =
632 ice->vtbl.create_so_decl_list(&ish->stream_output,
633 &vue_prog_data->vue_map);
634
635 iris_upload_and_bind_shader(ice, IRIS_CACHE_VS, key, program, prog_data,
636 so_decls, system_values, num_system_values);
637
638 ralloc_free(mem_ctx);
639 return true;
640 }
641
642 /**
643 * Update the current vertex shader variant.
644 *
645 * Fill out the key, look in the cache, compile and bind if needed.
646 */
647 static void
648 iris_update_compiled_vs(struct iris_context *ice)
649 {
650 struct iris_uncompiled_shader *ish =
651 ice->shaders.uncompiled[MESA_SHADER_VERTEX];
652
653 struct brw_vs_prog_key key = { .program_string_id = ish->program_id };
654 ice->vtbl.populate_vs_key(ice, &ish->nir->info, &key);
655
656 if (iris_bind_cached_shader(ice, IRIS_CACHE_VS, &key))
657 return;
658
659 UNUSED bool success = iris_compile_vs(ice, ish, &key);
660 }
661
662 /**
663 * Get the shader_info for a given stage, or NULL if the stage is disabled.
664 */
665 const struct shader_info *
666 iris_get_shader_info(const struct iris_context *ice, gl_shader_stage stage)
667 {
668 const struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[stage];
669
670 if (!ish)
671 return NULL;
672
673 const nir_shader *nir = ish->nir;
674 return &nir->info;
675 }
676
677 // XXX: this function is gross
678 unsigned
679 iris_get_shader_num_ubos(const struct iris_context *ice, gl_shader_stage stage)
680 {
681 const struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[stage];
682 const struct iris_compiled_shader *shader = ice->shaders.prog[stage];
683
684 if (ish) {
685 const nir_shader *nir = ish->nir;
686 /* see assign_common_binding_table_offsets */
687 return nir->info.num_ubos +
688 ((nir->num_uniforms || shader->num_system_values) ? 1 : 0);
689 }
690 return 0;
691 }
692
693 /**
694 * Get the union of TCS output and TES input slots.
695 *
696 * TCS and TES need to agree on a common URB entry layout. In particular,
697 * the data for all patch vertices is stored in a single URB entry (unlike
698 * GS which has one entry per input vertex). This means that per-vertex
699 * array indexing needs a stride.
700 *
701 * SSO requires locations to match, but doesn't require the number of
702 * outputs/inputs to match (in fact, the TCS often has extra outputs).
703 * So, we need to take the extra step of unifying these on the fly.
704 */
705 static void
706 get_unified_tess_slots(const struct iris_context *ice,
707 uint64_t *per_vertex_slots,
708 uint32_t *per_patch_slots)
709 {
710 const struct shader_info *tcs =
711 iris_get_shader_info(ice, MESA_SHADER_TESS_CTRL);
712 const struct shader_info *tes =
713 iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
714
715 *per_vertex_slots = tes->inputs_read;
716 *per_patch_slots = tes->patch_inputs_read;
717
718 if (tcs) {
719 *per_vertex_slots |= tcs->outputs_written;
720 *per_patch_slots |= tcs->patch_outputs_written;
721 }
722 }
723
724 /**
725 * Compile a tessellation control shader, and upload the assembly.
726 */
727 static bool
728 iris_compile_tcs(struct iris_context *ice,
729 struct iris_uncompiled_shader *ish,
730 const struct brw_tcs_prog_key *key)
731 {
732 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
733 const struct brw_compiler *compiler = screen->compiler;
734 const struct nir_shader_compiler_options *options =
735 compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].NirOptions;
736 const struct gen_device_info *devinfo = &screen->devinfo;
737 void *mem_ctx = ralloc_context(NULL);
738 struct brw_tcs_prog_data *tcs_prog_data =
739 rzalloc(mem_ctx, struct brw_tcs_prog_data);
740 struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base;
741 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
742 enum brw_param_builtin *system_values = NULL;
743 unsigned num_system_values = 0;
744
745 nir_shader *nir;
746
747 if (ish) {
748 nir = nir_shader_clone(mem_ctx, ish->nir);
749
750 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
751 &num_system_values);
752 assign_common_binding_table_offsets(devinfo, nir, prog_data, 0,
753 num_system_values);
754 } else {
755 nir = brw_nir_create_passthrough_tcs(mem_ctx, compiler, options, key);
756
757 /* Reserve space for passing the default tess levels as constants. */
758 prog_data->param = rzalloc_array(mem_ctx, uint32_t, 8);
759 prog_data->nr_params = 8;
760 prog_data->ubo_ranges[0].length = 1;
761 }
762
763 char *error_str = NULL;
764 const unsigned *program =
765 brw_compile_tcs(compiler, &ice->dbg, mem_ctx, key, tcs_prog_data, nir,
766 -1, &error_str);
767 if (program == NULL) {
768 dbg_printf("Failed to compile evaluation shader: %s\n", error_str);
769 ralloc_free(mem_ctx);
770 return false;
771 }
772
773 iris_upload_and_bind_shader(ice, IRIS_CACHE_TCS, key, program, prog_data,
774 NULL, system_values, num_system_values);
775
776 ralloc_free(mem_ctx);
777 return true;
778 }
779
780 /**
781 * Update the current tessellation control shader variant.
782 *
783 * Fill out the key, look in the cache, compile and bind if needed.
784 */
785 static void
786 iris_update_compiled_tcs(struct iris_context *ice)
787 {
788 struct iris_uncompiled_shader *tcs =
789 ice->shaders.uncompiled[MESA_SHADER_TESS_CTRL];
790
791 const struct shader_info *tes_info =
792 iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
793 struct brw_tcs_prog_key key = {
794 .program_string_id = tcs ? tcs->program_id : 0,
795 .tes_primitive_mode = tes_info->tess.primitive_mode,
796 .input_vertices = ice->state.vertices_per_patch,
797 };
798 get_unified_tess_slots(ice, &key.outputs_written,
799 &key.patch_outputs_written);
800 ice->vtbl.populate_tcs_key(ice, &key);
801
802 if (iris_bind_cached_shader(ice, IRIS_CACHE_TCS, &key))
803 return;
804
805 UNUSED bool success = iris_compile_tcs(ice, tcs, &key);
806 }
807
808 /**
809 * Compile a tessellation evaluation shader, and upload the assembly.
810 */
811 static bool
812 iris_compile_tes(struct iris_context *ice,
813 struct iris_uncompiled_shader *ish,
814 const struct brw_tes_prog_key *key)
815 {
816 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
817 const struct brw_compiler *compiler = screen->compiler;
818 const struct gen_device_info *devinfo = &screen->devinfo;
819 void *mem_ctx = ralloc_context(NULL);
820 struct brw_tes_prog_data *tes_prog_data =
821 rzalloc(mem_ctx, struct brw_tes_prog_data);
822 struct brw_vue_prog_data *vue_prog_data = &tes_prog_data->base;
823 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
824 enum brw_param_builtin *system_values;
825 unsigned num_system_values;
826
827 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
828
829 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
830 &num_system_values);
831
832 assign_common_binding_table_offsets(devinfo, nir, prog_data, 0,
833 num_system_values);
834
835 struct brw_vue_map input_vue_map;
836 brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,
837 key->patch_inputs_read);
838
839 char *error_str = NULL;
840 const unsigned *program =
841 brw_compile_tes(compiler, &ice->dbg, mem_ctx, key, &input_vue_map,
842 tes_prog_data, nir, NULL, -1, &error_str);
843 if (program == NULL) {
844 dbg_printf("Failed to compile evaluation shader: %s\n", error_str);
845 ralloc_free(mem_ctx);
846 return false;
847 }
848
849 uint32_t *so_decls =
850 ice->vtbl.create_so_decl_list(&ish->stream_output,
851 &vue_prog_data->vue_map);
852
853 iris_upload_and_bind_shader(ice, IRIS_CACHE_TES, key, program, prog_data,
854 so_decls, system_values, num_system_values);
855
856 ralloc_free(mem_ctx);
857 return true;
858 }
859
860 /**
861 * Update the current tessellation evaluation shader variant.
862 *
863 * Fill out the key, look in the cache, compile and bind if needed.
864 */
865 static void
866 iris_update_compiled_tes(struct iris_context *ice)
867 {
868 struct iris_uncompiled_shader *ish =
869 ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
870
871 struct brw_tes_prog_key key = { .program_string_id = ish->program_id };
872 get_unified_tess_slots(ice, &key.inputs_read, &key.patch_inputs_read);
873 ice->vtbl.populate_tes_key(ice, &key);
874
875 if (iris_bind_cached_shader(ice, IRIS_CACHE_TES, &key))
876 return;
877
878 UNUSED bool success = iris_compile_tes(ice, ish, &key);
879 }
880
881 /**
882 * Compile a geometry shader, and upload the assembly.
883 */
884 static bool
885 iris_compile_gs(struct iris_context *ice,
886 struct iris_uncompiled_shader *ish,
887 const struct brw_gs_prog_key *key)
888 {
889 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
890 const struct brw_compiler *compiler = screen->compiler;
891 const struct gen_device_info *devinfo = &screen->devinfo;
892 void *mem_ctx = ralloc_context(NULL);
893 struct brw_gs_prog_data *gs_prog_data =
894 rzalloc(mem_ctx, struct brw_gs_prog_data);
895 struct brw_vue_prog_data *vue_prog_data = &gs_prog_data->base;
896 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
897 enum brw_param_builtin *system_values;
898 unsigned num_system_values;
899
900 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
901
902 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
903 &num_system_values);
904
905 assign_common_binding_table_offsets(devinfo, nir, prog_data, 0,
906 num_system_values);
907
908 brw_compute_vue_map(devinfo,
909 &vue_prog_data->vue_map, nir->info.outputs_written,
910 nir->info.separate_shader);
911
912 char *error_str = NULL;
913 const unsigned *program =
914 brw_compile_gs(compiler, &ice->dbg, mem_ctx, key, gs_prog_data, nir,
915 NULL, -1, &error_str);
916 if (program == NULL) {
917 dbg_printf("Failed to compile geometry shader: %s\n", error_str);
918 ralloc_free(mem_ctx);
919 return false;
920 }
921
922 uint32_t *so_decls =
923 ice->vtbl.create_so_decl_list(&ish->stream_output,
924 &vue_prog_data->vue_map);
925
926 iris_upload_and_bind_shader(ice, IRIS_CACHE_GS, key, program, prog_data,
927 so_decls, system_values, num_system_values);
928
929 ralloc_free(mem_ctx);
930 return true;
931 }
932
933 /**
934 * Update the current geometry shader variant.
935 *
936 * Fill out the key, look in the cache, compile and bind if needed.
937 */
938 static void
939 iris_update_compiled_gs(struct iris_context *ice)
940 {
941 struct iris_uncompiled_shader *ish =
942 ice->shaders.uncompiled[MESA_SHADER_GEOMETRY];
943
944 if (!ish) {
945 iris_unbind_shader(ice, IRIS_CACHE_GS);
946 return;
947 }
948
949 struct brw_gs_prog_key key = { .program_string_id = ish->program_id };
950 ice->vtbl.populate_gs_key(ice, &key);
951
952 if (iris_bind_cached_shader(ice, IRIS_CACHE_GS, &key))
953 return;
954
955 UNUSED bool success = iris_compile_gs(ice, ish, &key);
956 }
957
958 /**
959 * Compile a fragment (pixel) shader, and upload the assembly.
960 */
961 static bool
962 iris_compile_fs(struct iris_context *ice,
963 struct iris_uncompiled_shader *ish,
964 const struct brw_wm_prog_key *key,
965 struct brw_vue_map *vue_map)
966 {
967 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
968 const struct brw_compiler *compiler = screen->compiler;
969 const struct gen_device_info *devinfo = &screen->devinfo;
970 void *mem_ctx = ralloc_context(NULL);
971 struct brw_wm_prog_data *fs_prog_data =
972 rzalloc(mem_ctx, struct brw_wm_prog_data);
973 struct brw_stage_prog_data *prog_data = &fs_prog_data->base;
974 enum brw_param_builtin *system_values;
975 unsigned num_system_values;
976
977 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
978
979 // XXX: alt mode
980
981 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
982 &num_system_values);
983
984 assign_common_binding_table_offsets(devinfo, nir, prog_data,
985 MAX2(key->nr_color_regions, 1),
986 num_system_values);
987 char *error_str = NULL;
988 const unsigned *program =
989 brw_compile_fs(compiler, &ice->dbg, mem_ctx, key, fs_prog_data,
990 nir, NULL, -1, -1, -1, true, false, vue_map, &error_str);
991 if (program == NULL) {
992 dbg_printf("Failed to compile fragment shader: %s\n", error_str);
993 ralloc_free(mem_ctx);
994 return false;
995 }
996
997 //brw_alloc_stage_scratch(brw, &brw->wm.base, prog_data.base.total_scratch);
998
999 iris_upload_and_bind_shader(ice, IRIS_CACHE_FS, key, program, prog_data,
1000 NULL, system_values, num_system_values);
1001
1002 ralloc_free(mem_ctx);
1003 return true;
1004 }
1005
1006 /**
1007 * Update the current fragment shader variant.
1008 *
1009 * Fill out the key, look in the cache, compile and bind if needed.
1010 */
1011 static void
1012 iris_update_compiled_fs(struct iris_context *ice)
1013 {
1014 struct iris_uncompiled_shader *ish =
1015 ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
1016 struct brw_wm_prog_key key = { .program_string_id = ish->program_id };
1017 ice->vtbl.populate_fs_key(ice, &key);
1018
1019 if (ish->nos & IRIS_NOS_LAST_VUE_MAP)
1020 key.input_slots_valid = ice->shaders.last_vue_map->slots_valid;
1021
1022 if (iris_bind_cached_shader(ice, IRIS_CACHE_FS, &key))
1023 return;
1024
1025 UNUSED bool success =
1026 iris_compile_fs(ice, ish, &key, ice->shaders.last_vue_map);
1027 }
1028
1029 /**
1030 * Get the compiled shader for the last enabled geometry stage.
1031 *
1032 * This stage is the one which will feed stream output and the rasterizer.
1033 */
1034 static struct iris_compiled_shader *
1035 last_vue_shader(struct iris_context *ice)
1036 {
1037 if (ice->shaders.prog[MESA_SHADER_GEOMETRY])
1038 return ice->shaders.prog[MESA_SHADER_GEOMETRY];
1039
1040 if (ice->shaders.prog[MESA_SHADER_TESS_EVAL])
1041 return ice->shaders.prog[MESA_SHADER_TESS_EVAL];
1042
1043 return ice->shaders.prog[MESA_SHADER_VERTEX];
1044 }
1045
1046 /**
1047 * Update the last enabled stage's VUE map.
1048 *
1049 * When the shader feeding the rasterizer's output interface changes, we
1050 * need to re-emit various packets.
1051 */
1052 static void
1053 update_last_vue_map(struct iris_context *ice,
1054 struct brw_stage_prog_data *prog_data)
1055 {
1056 struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
1057 struct brw_vue_map *vue_map = &vue_prog_data->vue_map;
1058 struct brw_vue_map *old_map = ice->shaders.last_vue_map;
1059 const uint64_t changed_slots =
1060 (old_map ? old_map->slots_valid : 0ull) ^ vue_map->slots_valid;
1061
1062 if (changed_slots & VARYING_BIT_VIEWPORT) {
1063 // XXX: could use ctx->Const.MaxViewports for old API efficiency
1064 ice->state.num_viewports =
1065 (vue_map->slots_valid & VARYING_BIT_VIEWPORT) ? IRIS_MAX_VIEWPORTS : 1;
1066 ice->state.dirty |= IRIS_DIRTY_CLIP |
1067 IRIS_DIRTY_SF_CL_VIEWPORT |
1068 IRIS_DIRTY_CC_VIEWPORT |
1069 IRIS_DIRTY_SCISSOR_RECT |
1070 IRIS_DIRTY_UNCOMPILED_FS |
1071 ice->state.dirty_for_nos[IRIS_NOS_LAST_VUE_MAP];
1072 // XXX: CC_VIEWPORT?
1073 }
1074
1075 if (changed_slots || (old_map && old_map->separate != vue_map->separate)) {
1076 ice->state.dirty |= IRIS_DIRTY_SBE;
1077 }
1078
1079 ice->shaders.last_vue_map = &vue_prog_data->vue_map;
1080 }
1081
1082 /**
1083 * Get the prog_data for a given stage, or NULL if the stage is disabled.
1084 */
1085 static struct brw_vue_prog_data *
1086 get_vue_prog_data(struct iris_context *ice, gl_shader_stage stage)
1087 {
1088 if (!ice->shaders.prog[stage])
1089 return NULL;
1090
1091 return (void *) ice->shaders.prog[stage]->prog_data;
1092 }
1093
1094 /**
1095 * Update the current shader variants for the given state.
1096 *
1097 * This should be called on every draw call to ensure that the correct
1098 * shaders are bound. It will also flag any dirty state triggered by
1099 * swapping out those shaders.
1100 */
1101 void
1102 iris_update_compiled_shaders(struct iris_context *ice)
1103 {
1104 const uint64_t dirty = ice->state.dirty;
1105
1106 struct brw_vue_prog_data *old_prog_datas[4];
1107 if (!(dirty & IRIS_DIRTY_URB)) {
1108 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++)
1109 old_prog_datas[i] = get_vue_prog_data(ice, i);
1110 }
1111
1112 if (dirty & (IRIS_DIRTY_UNCOMPILED_TCS | IRIS_DIRTY_UNCOMPILED_TES)) {
1113 struct iris_uncompiled_shader *tes =
1114 ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
1115 if (tes) {
1116 iris_update_compiled_tcs(ice);
1117 iris_update_compiled_tes(ice);
1118 } else {
1119 iris_unbind_shader(ice, IRIS_CACHE_TCS);
1120 iris_unbind_shader(ice, IRIS_CACHE_TES);
1121 }
1122 }
1123
1124 if (dirty & IRIS_DIRTY_UNCOMPILED_VS)
1125 iris_update_compiled_vs(ice);
1126 if (dirty & IRIS_DIRTY_UNCOMPILED_GS)
1127 iris_update_compiled_gs(ice);
1128
1129 struct iris_compiled_shader *shader = last_vue_shader(ice);
1130 update_last_vue_map(ice, shader->prog_data);
1131 if (ice->state.streamout != shader->streamout) {
1132 ice->state.streamout = shader->streamout;
1133 ice->state.dirty |= IRIS_DIRTY_SO_DECL_LIST | IRIS_DIRTY_STREAMOUT;
1134 }
1135
1136 if (dirty & IRIS_DIRTY_UNCOMPILED_FS)
1137 iris_update_compiled_fs(ice);
1138 // ...
1139
1140 /* Changing shader interfaces may require a URB configuration. */
1141 if (!(dirty & IRIS_DIRTY_URB)) {
1142 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
1143 struct brw_vue_prog_data *old = old_prog_datas[i];
1144 struct brw_vue_prog_data *new = get_vue_prog_data(ice, i);
1145 if (!!old != !!new ||
1146 (new && new->urb_entry_size != old->urb_entry_size)) {
1147 ice->state.dirty |= IRIS_DIRTY_URB;
1148 break;
1149 }
1150 }
1151 }
1152 }
1153
1154 static bool
1155 iris_compile_cs(struct iris_context *ice,
1156 struct iris_uncompiled_shader *ish,
1157 const struct brw_cs_prog_key *key)
1158 {
1159 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1160 const struct brw_compiler *compiler = screen->compiler;
1161 const struct gen_device_info *devinfo = &screen->devinfo;
1162 void *mem_ctx = ralloc_context(NULL);
1163 struct brw_cs_prog_data *cs_prog_data =
1164 rzalloc(mem_ctx, struct brw_cs_prog_data);
1165 struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
1166 enum brw_param_builtin *system_values;
1167 unsigned num_system_values;
1168
1169 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1170
1171 cs_prog_data->binding_table.work_groups_start = 0;
1172
1173 prog_data->total_shared = nir->info.cs.shared_size;
1174
1175 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
1176 &num_system_values);
1177
1178 assign_common_binding_table_offsets(devinfo, nir, prog_data, 1,
1179 num_system_values);
1180
1181 char *error_str = NULL;
1182 const unsigned *program =
1183 brw_compile_cs(compiler, &ice->dbg, mem_ctx, key, cs_prog_data,
1184 nir, -1, &error_str);
1185 if (program == NULL) {
1186 dbg_printf("Failed to compile compute shader: %s\n", error_str);
1187 ralloc_free(mem_ctx);
1188 return false;
1189 }
1190
1191 iris_upload_and_bind_shader(ice, IRIS_CACHE_CS, key, program, prog_data,
1192 NULL, system_values, num_system_values);
1193
1194 ralloc_free(mem_ctx);
1195 return true;
1196 }
1197
1198 void
1199 iris_update_compiled_compute_shader(struct iris_context *ice)
1200 {
1201 struct iris_uncompiled_shader *ish =
1202 ice->shaders.uncompiled[MESA_SHADER_COMPUTE];
1203
1204 struct brw_cs_prog_key key = { .program_string_id = ish->program_id };
1205 ice->vtbl.populate_cs_key(ice, &key);
1206
1207 if (iris_bind_cached_shader(ice, IRIS_CACHE_CS, &key))
1208 return;
1209
1210 UNUSED bool success = iris_compile_cs(ice, ish, &key);
1211 }
1212
1213 void
1214 iris_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data,
1215 uint32_t *dst)
1216 {
1217 struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
1218 assert(cs_prog_data->push.total.size > 0);
1219 assert(cs_prog_data->push.cross_thread.size == 0);
1220 assert(cs_prog_data->push.per_thread.dwords == 1);
1221 assert(prog_data->param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID);
1222 for (unsigned t = 0; t < cs_prog_data->threads; t++)
1223 dst[8 * t] = t;
1224 }
1225
1226 /**
1227 * Allocate scratch BOs as needed for the given per-thread size and stage.
1228 *
1229 * Returns the 32-bit "Scratch Space Base Pointer" value.
1230 */
1231 uint32_t
1232 iris_get_scratch_space(struct iris_context *ice,
1233 unsigned per_thread_scratch,
1234 gl_shader_stage stage)
1235 {
1236 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1237 struct iris_bufmgr *bufmgr = screen->bufmgr;
1238 const struct gen_device_info *devinfo = &screen->devinfo;
1239
1240 unsigned encoded_size = ffs(per_thread_scratch) - 11;
1241 assert(encoded_size < (1 << 16));
1242
1243 struct iris_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage];
1244
1245 /* The documentation for 3DSTATE_PS "Scratch Space Base Pointer" says:
1246 *
1247 * "Scratch Space per slice is computed based on 4 sub-slices. SW must
1248 * allocate scratch space enough so that each slice has 4 slices
1249 * allowed."
1250 *
1251 * According to the other driver team, this applies to compute shaders
1252 * as well. This is not currently documented at all.
1253 */
1254 unsigned subslice_total = 4 * devinfo->num_slices;
1255 assert(subslice_total >= screen->subslice_total);
1256
1257 if (!*bop) {
1258 unsigned scratch_ids_per_subslice = devinfo->max_cs_threads;
1259 uint32_t max_threads[] = {
1260 [MESA_SHADER_VERTEX] = devinfo->max_vs_threads,
1261 [MESA_SHADER_TESS_CTRL] = devinfo->max_tcs_threads,
1262 [MESA_SHADER_TESS_EVAL] = devinfo->max_tes_threads,
1263 [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads,
1264 [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads,
1265 [MESA_SHADER_COMPUTE] = scratch_ids_per_subslice * subslice_total,
1266 };
1267
1268 uint32_t size = per_thread_scratch * max_threads[stage];
1269
1270 *bop = iris_bo_alloc(bufmgr, "scratch", size, IRIS_MEMZONE_SHADER);
1271 }
1272
1273 return (*bop)->gtt_offset;
1274 }
1275
1276 void
1277 iris_init_program_functions(struct pipe_context *ctx)
1278 {
1279 ctx->create_vs_state = iris_create_shader_state;
1280 ctx->create_tcs_state = iris_create_shader_state;
1281 ctx->create_tes_state = iris_create_shader_state;
1282 ctx->create_gs_state = iris_create_shader_state;
1283 ctx->create_fs_state = iris_create_shader_state;
1284 ctx->create_compute_state = iris_create_compute_state;
1285
1286 ctx->delete_vs_state = iris_delete_shader_state;
1287 ctx->delete_tcs_state = iris_delete_shader_state;
1288 ctx->delete_tes_state = iris_delete_shader_state;
1289 ctx->delete_gs_state = iris_delete_shader_state;
1290 ctx->delete_fs_state = iris_delete_shader_state;
1291 ctx->delete_compute_state = iris_delete_shader_state;
1292
1293 ctx->bind_vs_state = iris_bind_vs_state;
1294 ctx->bind_tcs_state = iris_bind_tcs_state;
1295 ctx->bind_tes_state = iris_bind_tes_state;
1296 ctx->bind_gs_state = iris_bind_gs_state;
1297 ctx->bind_fs_state = iris_bind_fs_state;
1298 ctx->bind_compute_state = iris_bind_cs_state;
1299 }