2 * Copyright © 2017 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
24 * @file iris_program.c
26 * This file contains the driver interface for compiling shaders.
28 * See iris_program_cache.c for the in-memory program cache where the
29 * compiled shaders are stored.
34 #include "pipe/p_defines.h"
35 #include "pipe/p_state.h"
36 #include "pipe/p_context.h"
37 #include "pipe/p_screen.h"
38 #include "util/u_atomic.h"
39 #include "util/u_upload_mgr.h"
40 #include "compiler/nir/nir.h"
41 #include "compiler/nir/nir_builder.h"
42 #include "compiler/nir/nir_serialize.h"
43 #include "intel/compiler/brw_compiler.h"
44 #include "intel/compiler/brw_nir.h"
45 #include "iris_context.h"
46 #include "nir/tgsi_to_nir.h"
48 #define KEY_INIT_NO_ID(gen) \
49 .tex.swizzles[0 ... MAX_SAMPLERS - 1] = 0x688, \
50 .tex.compressed_multisample_layout_mask = ~0, \
51 .tex.msaa_16 = (gen >= 9 ? ~0 : 0)
52 #define KEY_INIT(gen) .program_string_id = ish->program_id, KEY_INIT_NO_ID(gen)
55 get_new_program_id(struct iris_screen
*screen
)
57 return p_atomic_inc_return(&screen
->program_id
);
61 upload_state(struct u_upload_mgr
*uploader
,
62 struct iris_state_ref
*ref
,
67 u_upload_alloc(uploader
, 0, size
, alignment
, &ref
->offset
, &ref
->res
, &p
);
72 iris_upload_ubo_ssbo_surf_state(struct iris_context
*ice
,
73 struct pipe_shader_buffer
*buf
,
74 struct iris_state_ref
*surf_state
,
77 struct pipe_context
*ctx
= &ice
->ctx
;
78 struct iris_screen
*screen
= (struct iris_screen
*) ctx
->screen
;
80 // XXX: these are not retained forever, use a separate uploader?
82 upload_state(ice
->state
.surface_uploader
, surf_state
,
83 screen
->isl_dev
.ss
.size
, 64);
85 surf_state
->res
= NULL
;
89 struct iris_resource
*res
= (void *) buf
->buffer
;
90 struct iris_bo
*surf_bo
= iris_resource_bo(surf_state
->res
);
91 surf_state
->offset
+= iris_bo_offset_from_base_address(surf_bo
);
93 isl_buffer_fill_state(&screen
->isl_dev
, map
,
94 .address
= res
->bo
->gtt_offset
+ res
->offset
+
96 .size_B
= buf
->buffer_size
- res
->offset
,
97 .format
= ssbo
? ISL_FORMAT_RAW
98 : ISL_FORMAT_R32G32B32A32_FLOAT
,
99 .swizzle
= ISL_SWIZZLE_IDENTITY
,
101 .mocs
= ice
->vtbl
.mocs(res
->bo
));
105 get_aoa_deref_offset(nir_builder
*b
,
106 nir_deref_instr
*deref
,
109 unsigned array_size
= elem_size
;
110 nir_ssa_def
*offset
= nir_imm_int(b
, 0);
112 while (deref
->deref_type
!= nir_deref_type_var
) {
113 assert(deref
->deref_type
== nir_deref_type_array
);
115 /* This level's element size is the previous level's array size */
116 nir_ssa_def
*index
= nir_ssa_for_src(b
, deref
->arr
.index
, 1);
117 assert(deref
->arr
.index
.ssa
);
118 offset
= nir_iadd(b
, offset
,
119 nir_imul(b
, index
, nir_imm_int(b
, array_size
)));
121 deref
= nir_deref_instr_parent(deref
);
122 assert(glsl_type_is_array(deref
->type
));
123 array_size
*= glsl_get_length(deref
->type
);
126 /* Accessing an invalid surface index with the dataport can result in a
127 * hang. According to the spec "if the index used to select an individual
128 * element is negative or greater than or equal to the size of the array,
129 * the results of the operation are undefined but may not lead to
130 * termination" -- which is one of the possible outcomes of the hang.
131 * Clamp the index to prevent access outside of the array bounds.
133 return nir_umin(b
, offset
, nir_imm_int(b
, array_size
- elem_size
));
137 iris_lower_storage_image_derefs(nir_shader
*nir
)
139 nir_function_impl
*impl
= nir_shader_get_entrypoint(nir
);
142 nir_builder_init(&b
, impl
);
144 nir_foreach_block(block
, impl
) {
145 nir_foreach_instr_safe(instr
, block
) {
146 if (instr
->type
!= nir_instr_type_intrinsic
)
149 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
150 switch (intrin
->intrinsic
) {
151 case nir_intrinsic_image_deref_load
:
152 case nir_intrinsic_image_deref_store
:
153 case nir_intrinsic_image_deref_atomic_add
:
154 case nir_intrinsic_image_deref_atomic_min
:
155 case nir_intrinsic_image_deref_atomic_max
:
156 case nir_intrinsic_image_deref_atomic_and
:
157 case nir_intrinsic_image_deref_atomic_or
:
158 case nir_intrinsic_image_deref_atomic_xor
:
159 case nir_intrinsic_image_deref_atomic_exchange
:
160 case nir_intrinsic_image_deref_atomic_comp_swap
:
161 case nir_intrinsic_image_deref_size
:
162 case nir_intrinsic_image_deref_samples
:
163 case nir_intrinsic_image_deref_load_raw_intel
:
164 case nir_intrinsic_image_deref_store_raw_intel
: {
165 nir_deref_instr
*deref
= nir_src_as_deref(intrin
->src
[0]);
166 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
168 b
.cursor
= nir_before_instr(&intrin
->instr
);
170 nir_iadd(&b
, nir_imm_int(&b
, var
->data
.driver_location
),
171 get_aoa_deref_offset(&b
, deref
, 1));
172 nir_rewrite_image_intrinsic(intrin
, index
, false);
183 // XXX: need unify_interfaces() at link time...
186 * Fix an uncompiled shader's stream output info.
188 * Core Gallium stores output->register_index as a "slot" number, where
189 * slots are assigned consecutively to all outputs in info->outputs_written.
190 * This naive packing of outputs doesn't work for us - we too have slots,
191 * but the layout is defined by the VUE map, which we won't have until we
192 * compile a specific shader variant. So, we remap these and simply store
193 * VARYING_SLOT_* in our copy's output->register_index fields.
195 * We also fix up VARYING_SLOT_{LAYER,VIEWPORT,PSIZ} to select the Y/Z/W
196 * components of our VUE header. See brw_vue_map.c for the layout.
199 update_so_info(struct pipe_stream_output_info
*so_info
,
200 uint64_t outputs_written
)
202 uint8_t reverse_map
[64] = {};
204 while (outputs_written
) {
205 reverse_map
[slot
++] = u_bit_scan64(&outputs_written
);
208 for (unsigned i
= 0; i
< so_info
->num_outputs
; i
++) {
209 struct pipe_stream_output
*output
= &so_info
->output
[i
];
211 /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
212 output
->register_index
= reverse_map
[output
->register_index
];
214 /* The VUE header contains three scalar fields packed together:
215 * - gl_PointSize is stored in VARYING_SLOT_PSIZ.w
216 * - gl_Layer is stored in VARYING_SLOT_PSIZ.y
217 * - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
219 switch (output
->register_index
) {
220 case VARYING_SLOT_LAYER
:
221 assert(output
->num_components
== 1);
222 output
->register_index
= VARYING_SLOT_PSIZ
;
223 output
->start_component
= 1;
225 case VARYING_SLOT_VIEWPORT
:
226 assert(output
->num_components
== 1);
227 output
->register_index
= VARYING_SLOT_PSIZ
;
228 output
->start_component
= 2;
230 case VARYING_SLOT_PSIZ
:
231 assert(output
->num_components
== 1);
232 output
->start_component
= 3;
236 //info->outputs_written |= 1ull << output->register_index;
241 setup_vec4_image_sysval(uint32_t *sysvals
, uint32_t idx
,
242 unsigned offset
, unsigned n
)
244 assert(offset
% sizeof(uint32_t) == 0);
246 for (unsigned i
= 0; i
< n
; ++i
)
247 sysvals
[i
] = BRW_PARAM_IMAGE(idx
, offset
/ sizeof(uint32_t) + i
);
249 for (unsigned i
= n
; i
< 4; ++i
)
250 sysvals
[i
] = BRW_PARAM_BUILTIN_ZERO
;
254 * Associate NIR uniform variables with the prog_data->param[] mechanism
255 * used by the backend. Also, decide which UBOs we'd like to push in an
256 * ideal situation (though the backend can reduce this).
259 iris_setup_uniforms(const struct brw_compiler
*compiler
,
262 struct brw_stage_prog_data
*prog_data
,
263 enum brw_param_builtin
**out_system_values
,
264 unsigned *out_num_system_values
,
265 unsigned *out_num_cbufs
)
267 UNUSED
const struct gen_device_info
*devinfo
= compiler
->devinfo
;
269 /* The intel compiler assumes that num_uniforms is in bytes. For
270 * scalar that means 4 bytes per uniform slot.
272 * Ref: brw_nir_lower_uniforms, type_size_scalar_bytes.
274 nir
->num_uniforms
*= 4;
276 const unsigned IRIS_MAX_SYSTEM_VALUES
=
277 PIPE_MAX_SHADER_IMAGES
* BRW_IMAGE_PARAM_SIZE
;
278 enum brw_param_builtin
*system_values
=
279 rzalloc_array(mem_ctx
, enum brw_param_builtin
, IRIS_MAX_SYSTEM_VALUES
);
280 unsigned num_system_values
= 0;
282 unsigned patch_vert_idx
= -1;
283 unsigned ucp_idx
[IRIS_MAX_CLIP_PLANES
];
284 unsigned img_idx
[PIPE_MAX_SHADER_IMAGES
];
285 memset(ucp_idx
, -1, sizeof(ucp_idx
));
286 memset(img_idx
, -1, sizeof(img_idx
));
288 nir_function_impl
*impl
= nir_shader_get_entrypoint(nir
);
291 nir_builder_init(&b
, impl
);
293 b
.cursor
= nir_before_block(nir_start_block(impl
));
294 nir_ssa_def
*temp_ubo_name
= nir_ssa_undef(&b
, 1, 32);
295 nir_ssa_def
*temp_const_ubo_name
= NULL
;
297 /* Turn system value intrinsics into uniforms */
298 nir_foreach_block(block
, impl
) {
299 nir_foreach_instr_safe(instr
, block
) {
300 if (instr
->type
!= nir_instr_type_intrinsic
)
303 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
306 switch (intrin
->intrinsic
) {
307 case nir_intrinsic_load_constant
: {
308 /* This one is special because it reads from the shader constant
309 * data and not cbuf0 which gallium uploads for us.
311 b
.cursor
= nir_before_instr(instr
);
312 nir_ssa_def
*offset
=
313 nir_iadd_imm(&b
, nir_ssa_for_src(&b
, intrin
->src
[0], 1),
314 nir_intrinsic_base(intrin
));
316 if (temp_const_ubo_name
== NULL
)
317 temp_const_ubo_name
= nir_imm_int(&b
, 0);
319 nir_intrinsic_instr
*load_ubo
=
320 nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_load_ubo
);
321 load_ubo
->num_components
= intrin
->num_components
;
322 load_ubo
->src
[0] = nir_src_for_ssa(temp_const_ubo_name
);
323 load_ubo
->src
[1] = nir_src_for_ssa(offset
);
324 nir_ssa_dest_init(&load_ubo
->instr
, &load_ubo
->dest
,
325 intrin
->dest
.ssa
.num_components
,
326 intrin
->dest
.ssa
.bit_size
,
327 intrin
->dest
.ssa
.name
);
328 nir_builder_instr_insert(&b
, &load_ubo
->instr
);
330 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
,
331 nir_src_for_ssa(&load_ubo
->dest
.ssa
));
332 nir_instr_remove(&intrin
->instr
);
335 case nir_intrinsic_load_user_clip_plane
: {
336 unsigned ucp
= nir_intrinsic_ucp_id(intrin
);
338 if (ucp_idx
[ucp
] == -1) {
339 ucp_idx
[ucp
] = num_system_values
;
340 num_system_values
+= 4;
343 for (int i
= 0; i
< 4; i
++) {
344 system_values
[ucp_idx
[ucp
] + i
] =
345 BRW_PARAM_BUILTIN_CLIP_PLANE(ucp
, i
);
348 b
.cursor
= nir_before_instr(instr
);
349 offset
= nir_imm_int(&b
, ucp_idx
[ucp
] * sizeof(uint32_t));
352 case nir_intrinsic_load_patch_vertices_in
:
353 if (patch_vert_idx
== -1)
354 patch_vert_idx
= num_system_values
++;
356 system_values
[patch_vert_idx
] =
357 BRW_PARAM_BUILTIN_PATCH_VERTICES_IN
;
359 b
.cursor
= nir_before_instr(instr
);
360 offset
= nir_imm_int(&b
, patch_vert_idx
* sizeof(uint32_t));
362 case nir_intrinsic_image_deref_load_param_intel
: {
363 assert(devinfo
->gen
< 9);
364 nir_deref_instr
*deref
= nir_src_as_deref(intrin
->src
[0]);
365 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
367 /* XXX: var->data.binding is not set properly. We need to run
368 * some form of gl_nir_lower_samplers_as_deref() to get it.
369 * This breaks tests which use more than one image.
371 if (img_idx
[var
->data
.binding
] == -1) {
372 /* GL only allows arrays of arrays of images. */
373 assert(glsl_type_is_image(glsl_without_array(var
->type
)));
374 unsigned num_images
= MAX2(1, glsl_get_aoa_size(var
->type
));
376 for (int i
= 0; i
< num_images
; i
++) {
377 const unsigned img
= var
->data
.binding
+ i
;
379 img_idx
[img
] = num_system_values
;
380 num_system_values
+= BRW_IMAGE_PARAM_SIZE
;
382 uint32_t *img_sv
= &system_values
[img_idx
[img
]];
384 setup_vec4_image_sysval(
385 img_sv
+ BRW_IMAGE_PARAM_OFFSET_OFFSET
, img
,
386 offsetof(struct brw_image_param
, offset
), 2);
387 setup_vec4_image_sysval(
388 img_sv
+ BRW_IMAGE_PARAM_SIZE_OFFSET
, img
,
389 offsetof(struct brw_image_param
, size
), 3);
390 setup_vec4_image_sysval(
391 img_sv
+ BRW_IMAGE_PARAM_STRIDE_OFFSET
, img
,
392 offsetof(struct brw_image_param
, stride
), 4);
393 setup_vec4_image_sysval(
394 img_sv
+ BRW_IMAGE_PARAM_TILING_OFFSET
, img
,
395 offsetof(struct brw_image_param
, tiling
), 3);
396 setup_vec4_image_sysval(
397 img_sv
+ BRW_IMAGE_PARAM_SWIZZLING_OFFSET
, img
,
398 offsetof(struct brw_image_param
, swizzling
), 2);
402 b
.cursor
= nir_before_instr(instr
);
403 offset
= nir_iadd(&b
,
404 get_aoa_deref_offset(&b
, deref
, BRW_IMAGE_PARAM_SIZE
* 4),
405 nir_imm_int(&b
, img_idx
[var
->data
.binding
] * 4 +
406 nir_intrinsic_base(intrin
) * 16));
413 unsigned comps
= nir_intrinsic_dest_components(intrin
);
415 nir_intrinsic_instr
*load
=
416 nir_intrinsic_instr_create(nir
, nir_intrinsic_load_ubo
);
417 load
->num_components
= comps
;
418 load
->src
[0] = nir_src_for_ssa(temp_ubo_name
);
419 load
->src
[1] = nir_src_for_ssa(offset
);
420 nir_ssa_dest_init(&load
->instr
, &load
->dest
, comps
, 32, NULL
);
421 nir_builder_instr_insert(&b
, &load
->instr
);
422 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
,
423 nir_src_for_ssa(&load
->dest
.ssa
));
424 nir_instr_remove(instr
);
428 nir_validate_shader(nir
, "before remapping");
430 /* Place the new params at the front of constant buffer 0. */
431 if (num_system_values
> 0) {
432 nir
->num_uniforms
+= num_system_values
* sizeof(uint32_t);
434 system_values
= reralloc(mem_ctx
, system_values
, enum brw_param_builtin
,
437 nir_foreach_block(block
, impl
) {
438 nir_foreach_instr_safe(instr
, block
) {
439 if (instr
->type
!= nir_instr_type_intrinsic
)
442 nir_intrinsic_instr
*load
= nir_instr_as_intrinsic(instr
);
444 if (load
->intrinsic
!= nir_intrinsic_load_ubo
)
447 b
.cursor
= nir_before_instr(instr
);
449 assert(load
->src
[0].is_ssa
);
451 if (load
->src
[0].ssa
== temp_ubo_name
) {
452 nir_instr_rewrite_src(instr
, &load
->src
[0],
453 nir_src_for_ssa(nir_imm_int(&b
, 0)));
454 } else if (nir_src_is_const(load
->src
[0]) &&
455 nir_src_as_uint(load
->src
[0]) == 0) {
456 nir_ssa_def
*offset
=
457 nir_iadd(&b
, load
->src
[1].ssa
,
458 nir_imm_int(&b
, 4 * num_system_values
));
459 nir_instr_rewrite_src(instr
, &load
->src
[1],
460 nir_src_for_ssa(offset
));
465 /* We need to fold the new iadds for brw_nir_analyze_ubo_ranges */
466 nir_opt_constant_folding(nir
);
468 ralloc_free(system_values
);
469 system_values
= NULL
;
472 nir_validate_shader(nir
, "after remap");
474 /* We don't use params[], but fs_visitor::nir_setup_uniforms() asserts
475 * about it for compute shaders, so go ahead and make some fake ones
476 * which the backend will dead code eliminate.
478 prog_data
->nr_params
= nir
->num_uniforms
/ 4;
479 prog_data
->param
= rzalloc_array(mem_ctx
, uint32_t, prog_data
->nr_params
);
481 /* System values and uniforms are stored in constant buffer 0, the
482 * user-facing UBOs are indexed by one. So if any constant buffer is
483 * needed, the constant buffer 0 will be needed, so account for it.
485 unsigned num_cbufs
= nir
->info
.num_ubos
;
486 if (num_cbufs
|| num_system_values
|| nir
->num_uniforms
)
489 /* Constant loads (if any) need to go at the end of the constant buffers so
490 * we need to know num_cbufs before we can lower to them.
492 if (temp_const_ubo_name
!= NULL
) {
493 nir_load_const_instr
*const_ubo_index
=
494 nir_instr_as_load_const(temp_const_ubo_name
->parent_instr
);
495 assert(const_ubo_index
->def
.bit_size
== 32);
496 const_ubo_index
->value
[0].u32
= num_cbufs
;
499 *out_system_values
= system_values
;
500 *out_num_system_values
= num_system_values
;
501 *out_num_cbufs
= num_cbufs
;
505 rewrite_src_with_bti(nir_builder
*b
, struct iris_binding_table
*bt
,
506 nir_instr
*instr
, nir_src
*src
,
507 enum iris_surface_group group
)
509 assert(bt
->offsets
[group
] != 0xd0d0d0d0);
511 b
->cursor
= nir_before_instr(instr
);
513 if (nir_src_is_const(*src
)) {
514 bti
= nir_imm_intN_t(b
, nir_src_as_uint(*src
) + bt
->offsets
[group
],
517 bti
= nir_iadd_imm(b
, src
->ssa
, bt
->offsets
[group
]);
519 nir_instr_rewrite_src(instr
, src
, nir_src_for_ssa(bti
));
523 * Set up the binding table indices and apply to the shader.
525 * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're
526 * unused but also make sure that addition of small offsets to them will
527 * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
530 iris_setup_binding_table(struct nir_shader
*nir
,
531 struct iris_binding_table
*bt
,
532 unsigned num_render_targets
,
533 unsigned num_system_values
,
536 const struct shader_info
*info
= &nir
->info
;
538 memset(bt
, 0, sizeof(*bt
));
539 for (int i
= 0; i
< IRIS_SURFACE_GROUP_COUNT
; i
++)
540 bt
->offsets
[i
] = 0xd0d0d0d0;
542 /* Calculate the initial binding table index for each group. */
543 uint32_t next_offset
;
544 if (info
->stage
== MESA_SHADER_FRAGMENT
) {
545 next_offset
= num_render_targets
;
546 bt
->offsets
[IRIS_SURFACE_GROUP_RENDER_TARGET
] = 0;
547 } else if (info
->stage
== MESA_SHADER_COMPUTE
) {
549 bt
->offsets
[IRIS_SURFACE_GROUP_CS_WORK_GROUPS
] = 0;
554 unsigned num_textures
= util_last_bit(info
->textures_used
);
556 bt
->offsets
[IRIS_SURFACE_GROUP_TEXTURE
] = next_offset
;
557 next_offset
+= num_textures
;
560 if (info
->num_images
) {
561 bt
->offsets
[IRIS_SURFACE_GROUP_IMAGE
] = next_offset
;
562 next_offset
+= info
->num_images
;
565 /* Allocate a slot in the UBO section for NIR constants if present.
566 * We don't include them in iris_compiled_shader::num_cbufs because
567 * they are uploaded separately from shs->constbuf[], but from a shader
568 * point of view, they're another UBO (at the end of the section).
570 if (nir
->constant_data_size
> 0)
574 //assert(info->num_ubos <= BRW_MAX_UBO);
575 bt
->offsets
[IRIS_SURFACE_GROUP_UBO
] = next_offset
;
576 next_offset
+= num_cbufs
;
579 if (info
->num_ssbos
|| info
->num_abos
) {
580 bt
->offsets
[IRIS_SURFACE_GROUP_SSBO
] = next_offset
;
581 // XXX: see iris_state "wasting 16 binding table slots for ABOs" comment
582 next_offset
+= IRIS_MAX_ABOS
+ info
->num_ssbos
;
585 bt
->size_bytes
= next_offset
* 4;
587 nir_function_impl
*impl
= nir_shader_get_entrypoint(nir
);
589 /* Apply the binding table indices. The backend compiler is not expected
590 * to change those, as we haven't set any of the *_start entries in brw
594 nir_builder_init(&b
, impl
);
596 nir_foreach_block (block
, impl
) {
597 nir_foreach_instr (instr
, block
) {
598 if (instr
->type
== nir_instr_type_tex
) {
599 assert(bt
->offsets
[IRIS_SURFACE_GROUP_TEXTURE
] != 0xd0d0d0d0);
600 nir_instr_as_tex(instr
)->texture_index
+=
601 bt
->offsets
[IRIS_SURFACE_GROUP_TEXTURE
];
605 if (instr
->type
!= nir_instr_type_intrinsic
)
608 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
609 switch (intrin
->intrinsic
) {
610 case nir_intrinsic_image_size
:
611 case nir_intrinsic_image_load
:
612 case nir_intrinsic_image_store
:
613 case nir_intrinsic_image_atomic_add
:
614 case nir_intrinsic_image_atomic_min
:
615 case nir_intrinsic_image_atomic_max
:
616 case nir_intrinsic_image_atomic_and
:
617 case nir_intrinsic_image_atomic_or
:
618 case nir_intrinsic_image_atomic_xor
:
619 case nir_intrinsic_image_atomic_exchange
:
620 case nir_intrinsic_image_atomic_comp_swap
:
621 case nir_intrinsic_image_load_raw_intel
:
622 case nir_intrinsic_image_store_raw_intel
:
623 rewrite_src_with_bti(&b
, bt
, instr
, &intrin
->src
[0],
624 IRIS_SURFACE_GROUP_IMAGE
);
627 case nir_intrinsic_load_ubo
:
628 rewrite_src_with_bti(&b
, bt
, instr
, &intrin
->src
[0],
629 IRIS_SURFACE_GROUP_UBO
);
632 case nir_intrinsic_store_ssbo
:
633 rewrite_src_with_bti(&b
, bt
, instr
, &intrin
->src
[1],
634 IRIS_SURFACE_GROUP_SSBO
);
637 case nir_intrinsic_get_buffer_size
:
638 case nir_intrinsic_ssbo_atomic_add
:
639 case nir_intrinsic_ssbo_atomic_imin
:
640 case nir_intrinsic_ssbo_atomic_umin
:
641 case nir_intrinsic_ssbo_atomic_imax
:
642 case nir_intrinsic_ssbo_atomic_umax
:
643 case nir_intrinsic_ssbo_atomic_and
:
644 case nir_intrinsic_ssbo_atomic_or
:
645 case nir_intrinsic_ssbo_atomic_xor
:
646 case nir_intrinsic_ssbo_atomic_exchange
:
647 case nir_intrinsic_ssbo_atomic_comp_swap
:
648 case nir_intrinsic_ssbo_atomic_fmin
:
649 case nir_intrinsic_ssbo_atomic_fmax
:
650 case nir_intrinsic_ssbo_atomic_fcomp_swap
:
651 case nir_intrinsic_load_ssbo
:
652 rewrite_src_with_bti(&b
, bt
, instr
, &intrin
->src
[0],
653 IRIS_SURFACE_GROUP_SSBO
);
664 iris_debug_recompile(struct iris_context
*ice
,
665 struct shader_info
*info
,
666 unsigned program_string_id
,
669 struct iris_screen
*screen
= (struct iris_screen
*) ice
->ctx
.screen
;
670 const struct brw_compiler
*c
= screen
->compiler
;
675 c
->shader_perf_log(&ice
->dbg
, "Recompiling %s shader for program %s: %s\n",
676 _mesa_shader_stage_to_string(info
->stage
),
677 info
->name
? info
->name
: "(no identifier)",
678 info
->label
? info
->label
: "");
680 const void *old_key
=
681 iris_find_previous_compile(ice
, info
->stage
, program_string_id
);
683 brw_debug_key_recompile(c
, &ice
->dbg
, info
->stage
, old_key
, key
);
688 * Compile a vertex shader, and upload the assembly.
690 static struct iris_compiled_shader
*
691 iris_compile_vs(struct iris_context
*ice
,
692 struct iris_uncompiled_shader
*ish
,
693 const struct brw_vs_prog_key
*key
)
695 struct iris_screen
*screen
= (struct iris_screen
*)ice
->ctx
.screen
;
696 const struct brw_compiler
*compiler
= screen
->compiler
;
697 const struct gen_device_info
*devinfo
= &screen
->devinfo
;
698 void *mem_ctx
= ralloc_context(NULL
);
699 struct brw_vs_prog_data
*vs_prog_data
=
700 rzalloc(mem_ctx
, struct brw_vs_prog_data
);
701 struct brw_vue_prog_data
*vue_prog_data
= &vs_prog_data
->base
;
702 struct brw_stage_prog_data
*prog_data
= &vue_prog_data
->base
;
703 enum brw_param_builtin
*system_values
;
704 unsigned num_system_values
;
707 nir_shader
*nir
= nir_shader_clone(mem_ctx
, ish
->nir
);
709 if (key
->nr_userclip_plane_consts
) {
710 nir_function_impl
*impl
= nir_shader_get_entrypoint(nir
);
711 nir_lower_clip_vs(nir
, (1 << key
->nr_userclip_plane_consts
) - 1, true);
712 nir_lower_io_to_temporaries(nir
, impl
, true, false);
713 nir_lower_global_vars_to_local(nir
);
714 nir_lower_vars_to_ssa(nir
);
715 nir_shader_gather_info(nir
, impl
);
718 prog_data
->use_alt_mode
= ish
->use_alt_mode
;
720 iris_setup_uniforms(compiler
, mem_ctx
, nir
, prog_data
, &system_values
,
721 &num_system_values
, &num_cbufs
);
723 struct iris_binding_table bt
;
724 iris_setup_binding_table(nir
, &bt
, /* num_render_targets */ 0,
725 num_system_values
, num_cbufs
);
727 brw_nir_analyze_ubo_ranges(compiler
, nir
, NULL
, prog_data
->ubo_ranges
);
729 brw_compute_vue_map(devinfo
,
730 &vue_prog_data
->vue_map
, nir
->info
.outputs_written
,
731 nir
->info
.separate_shader
);
733 /* Don't tell the backend about our clip plane constants, we've already
734 * lowered them in NIR and we don't want it doing it again.
736 struct brw_vs_prog_key key_no_ucp
= *key
;
737 key_no_ucp
.nr_userclip_plane_consts
= 0;
739 char *error_str
= NULL
;
740 const unsigned *program
=
741 brw_compile_vs(compiler
, &ice
->dbg
, mem_ctx
, &key_no_ucp
, vs_prog_data
,
742 nir
, -1, &error_str
);
743 if (program
== NULL
) {
744 dbg_printf("Failed to compile vertex shader: %s\n", error_str
);
745 ralloc_free(mem_ctx
);
749 if (ish
->compiled_once
) {
750 iris_debug_recompile(ice
, &nir
->info
, key
->program_string_id
, key
);
752 ish
->compiled_once
= true;
756 ice
->vtbl
.create_so_decl_list(&ish
->stream_output
,
757 &vue_prog_data
->vue_map
);
759 struct iris_compiled_shader
*shader
=
760 iris_upload_shader(ice
, IRIS_CACHE_VS
, sizeof(*key
), key
, program
,
761 prog_data
, so_decls
, system_values
, num_system_values
,
764 iris_disk_cache_store(screen
->disk_cache
, ish
, shader
, key
, sizeof(*key
));
766 ralloc_free(mem_ctx
);
771 * Update the current vertex shader variant.
773 * Fill out the key, look in the cache, compile and bind if needed.
776 iris_update_compiled_vs(struct iris_context
*ice
)
778 struct iris_uncompiled_shader
*ish
=
779 ice
->shaders
.uncompiled
[MESA_SHADER_VERTEX
];
780 struct iris_screen
*screen
= (struct iris_screen
*)ice
->ctx
.screen
;
781 const struct gen_device_info
*devinfo
= &screen
->devinfo
;
783 struct brw_vs_prog_key key
= { KEY_INIT(devinfo
->gen
) };
784 ice
->vtbl
.populate_vs_key(ice
, &ish
->nir
->info
, &key
);
786 struct iris_compiled_shader
*old
= ice
->shaders
.prog
[IRIS_CACHE_VS
];
787 struct iris_compiled_shader
*shader
=
788 iris_find_cached_shader(ice
, IRIS_CACHE_VS
, sizeof(key
), &key
);
791 shader
= iris_disk_cache_retrieve(ice
, ish
, &key
, sizeof(key
));
794 shader
= iris_compile_vs(ice
, ish
, &key
);
797 ice
->shaders
.prog
[IRIS_CACHE_VS
] = shader
;
798 ice
->state
.dirty
|= IRIS_DIRTY_VS
|
799 IRIS_DIRTY_BINDINGS_VS
|
800 IRIS_DIRTY_CONSTANTS_VS
|
802 const struct brw_vs_prog_data
*vs_prog_data
=
803 (void *) shader
->prog_data
;
804 const bool uses_draw_params
= vs_prog_data
->uses_firstvertex
||
805 vs_prog_data
->uses_baseinstance
;
806 const bool uses_derived_draw_params
= vs_prog_data
->uses_drawid
||
807 vs_prog_data
->uses_is_indexed_draw
;
808 const bool needs_sgvs_element
= uses_draw_params
||
809 vs_prog_data
->uses_instanceid
||
810 vs_prog_data
->uses_vertexid
;
811 bool needs_edge_flag
= false;
812 nir_foreach_variable(var
, &ish
->nir
->inputs
) {
813 if (var
->data
.location
== VERT_ATTRIB_EDGEFLAG
)
814 needs_edge_flag
= true;
817 if (ice
->state
.vs_uses_draw_params
!= uses_draw_params
||
818 ice
->state
.vs_uses_derived_draw_params
!= uses_derived_draw_params
||
819 ice
->state
.vs_needs_edge_flag
!= needs_edge_flag
) {
820 ice
->state
.dirty
|= IRIS_DIRTY_VERTEX_BUFFERS
|
821 IRIS_DIRTY_VERTEX_ELEMENTS
;
823 ice
->state
.vs_uses_draw_params
= uses_draw_params
;
824 ice
->state
.vs_uses_derived_draw_params
= uses_derived_draw_params
;
825 ice
->state
.vs_needs_sgvs_element
= needs_sgvs_element
;
826 ice
->state
.vs_needs_edge_flag
= needs_edge_flag
;
831 * Get the shader_info for a given stage, or NULL if the stage is disabled.
833 const struct shader_info
*
834 iris_get_shader_info(const struct iris_context
*ice
, gl_shader_stage stage
)
836 const struct iris_uncompiled_shader
*ish
= ice
->shaders
.uncompiled
[stage
];
841 const nir_shader
*nir
= ish
->nir
;
846 * Get the union of TCS output and TES input slots.
848 * TCS and TES need to agree on a common URB entry layout. In particular,
849 * the data for all patch vertices is stored in a single URB entry (unlike
850 * GS which has one entry per input vertex). This means that per-vertex
851 * array indexing needs a stride.
853 * SSO requires locations to match, but doesn't require the number of
854 * outputs/inputs to match (in fact, the TCS often has extra outputs).
855 * So, we need to take the extra step of unifying these on the fly.
858 get_unified_tess_slots(const struct iris_context
*ice
,
859 uint64_t *per_vertex_slots
,
860 uint32_t *per_patch_slots
)
862 const struct shader_info
*tcs
=
863 iris_get_shader_info(ice
, MESA_SHADER_TESS_CTRL
);
864 const struct shader_info
*tes
=
865 iris_get_shader_info(ice
, MESA_SHADER_TESS_EVAL
);
867 *per_vertex_slots
= tes
->inputs_read
;
868 *per_patch_slots
= tes
->patch_inputs_read
;
871 *per_vertex_slots
|= tcs
->outputs_written
;
872 *per_patch_slots
|= tcs
->patch_outputs_written
;
877 * Compile a tessellation control shader, and upload the assembly.
879 static struct iris_compiled_shader
*
880 iris_compile_tcs(struct iris_context
*ice
,
881 struct iris_uncompiled_shader
*ish
,
882 const struct brw_tcs_prog_key
*key
)
884 struct iris_screen
*screen
= (struct iris_screen
*)ice
->ctx
.screen
;
885 const struct brw_compiler
*compiler
= screen
->compiler
;
886 const struct nir_shader_compiler_options
*options
=
887 compiler
->glsl_compiler_options
[MESA_SHADER_TESS_CTRL
].NirOptions
;
888 void *mem_ctx
= ralloc_context(NULL
);
889 struct brw_tcs_prog_data
*tcs_prog_data
=
890 rzalloc(mem_ctx
, struct brw_tcs_prog_data
);
891 struct brw_vue_prog_data
*vue_prog_data
= &tcs_prog_data
->base
;
892 struct brw_stage_prog_data
*prog_data
= &vue_prog_data
->base
;
893 enum brw_param_builtin
*system_values
= NULL
;
894 unsigned num_system_values
= 0;
895 unsigned num_cbufs
= 0;
899 struct iris_binding_table bt
;
902 nir
= nir_shader_clone(mem_ctx
, ish
->nir
);
904 iris_setup_uniforms(compiler
, mem_ctx
, nir
, prog_data
, &system_values
,
905 &num_system_values
, &num_cbufs
);
906 iris_setup_binding_table(nir
, &bt
, /* num_render_targets */ 0,
907 num_system_values
, num_cbufs
);
908 brw_nir_analyze_ubo_ranges(compiler
, nir
, NULL
, prog_data
->ubo_ranges
);
910 nir
= brw_nir_create_passthrough_tcs(mem_ctx
, compiler
, options
, key
);
912 /* Reserve space for passing the default tess levels as constants. */
913 num_system_values
= 8;
915 rzalloc_array(mem_ctx
, enum brw_param_builtin
, num_system_values
);
916 prog_data
->param
= rzalloc_array(mem_ctx
, uint32_t, num_system_values
);
917 prog_data
->nr_params
= num_system_values
;
919 if (key
->tes_primitive_mode
== GL_QUADS
) {
920 for (int i
= 0; i
< 4; i
++)
921 system_values
[7 - i
] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X
+ i
;
923 system_values
[3] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X
;
924 system_values
[2] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y
;
925 } else if (key
->tes_primitive_mode
== GL_TRIANGLES
) {
926 for (int i
= 0; i
< 3; i
++)
927 system_values
[7 - i
] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X
+ i
;
929 system_values
[4] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X
;
931 assert(key
->tes_primitive_mode
== GL_ISOLINES
);
932 system_values
[7] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_Y
;
933 system_values
[6] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X
;
936 /* Manually setup the TCS binding table. */
937 memset(&bt
, 0, sizeof(bt
));
940 prog_data
->ubo_ranges
[0].length
= 1;
943 char *error_str
= NULL
;
944 const unsigned *program
=
945 brw_compile_tcs(compiler
, &ice
->dbg
, mem_ctx
, key
, tcs_prog_data
, nir
,
947 if (program
== NULL
) {
948 dbg_printf("Failed to compile control shader: %s\n", error_str
);
949 ralloc_free(mem_ctx
);
954 if (ish
->compiled_once
) {
955 iris_debug_recompile(ice
, &nir
->info
, key
->program_string_id
, key
);
957 ish
->compiled_once
= true;
961 struct iris_compiled_shader
*shader
=
962 iris_upload_shader(ice
, IRIS_CACHE_TCS
, sizeof(*key
), key
, program
,
963 prog_data
, NULL
, system_values
, num_system_values
,
967 iris_disk_cache_store(screen
->disk_cache
, ish
, shader
, key
, sizeof(*key
));
969 ralloc_free(mem_ctx
);
974 * Update the current tessellation control shader variant.
976 * Fill out the key, look in the cache, compile and bind if needed.
979 iris_update_compiled_tcs(struct iris_context
*ice
)
981 struct iris_uncompiled_shader
*tcs
=
982 ice
->shaders
.uncompiled
[MESA_SHADER_TESS_CTRL
];
983 struct iris_screen
*screen
= (struct iris_screen
*)ice
->ctx
.screen
;
984 const struct gen_device_info
*devinfo
= &screen
->devinfo
;
986 const struct shader_info
*tes_info
=
987 iris_get_shader_info(ice
, MESA_SHADER_TESS_EVAL
);
988 struct brw_tcs_prog_key key
= {
989 KEY_INIT_NO_ID(devinfo
->gen
),
990 .program_string_id
= tcs
? tcs
->program_id
: 0,
991 .tes_primitive_mode
= tes_info
->tess
.primitive_mode
,
992 .input_vertices
= ice
->state
.vertices_per_patch
,
994 get_unified_tess_slots(ice
, &key
.outputs_written
,
995 &key
.patch_outputs_written
);
996 ice
->vtbl
.populate_tcs_key(ice
, &key
);
998 struct iris_compiled_shader
*old
= ice
->shaders
.prog
[IRIS_CACHE_TCS
];
999 struct iris_compiled_shader
*shader
=
1000 iris_find_cached_shader(ice
, IRIS_CACHE_TCS
, sizeof(key
), &key
);
1003 shader
= iris_disk_cache_retrieve(ice
, tcs
, &key
, sizeof(key
));
1006 shader
= iris_compile_tcs(ice
, tcs
, &key
);
1008 if (old
!= shader
) {
1009 ice
->shaders
.prog
[IRIS_CACHE_TCS
] = shader
;
1010 ice
->state
.dirty
|= IRIS_DIRTY_TCS
|
1011 IRIS_DIRTY_BINDINGS_TCS
|
1012 IRIS_DIRTY_CONSTANTS_TCS
;
1017 * Compile a tessellation evaluation shader, and upload the assembly.
1019 static struct iris_compiled_shader
*
1020 iris_compile_tes(struct iris_context
*ice
,
1021 struct iris_uncompiled_shader
*ish
,
1022 const struct brw_tes_prog_key
*key
)
1024 struct iris_screen
*screen
= (struct iris_screen
*)ice
->ctx
.screen
;
1025 const struct brw_compiler
*compiler
= screen
->compiler
;
1026 void *mem_ctx
= ralloc_context(NULL
);
1027 struct brw_tes_prog_data
*tes_prog_data
=
1028 rzalloc(mem_ctx
, struct brw_tes_prog_data
);
1029 struct brw_vue_prog_data
*vue_prog_data
= &tes_prog_data
->base
;
1030 struct brw_stage_prog_data
*prog_data
= &vue_prog_data
->base
;
1031 enum brw_param_builtin
*system_values
;
1032 unsigned num_system_values
;
1035 nir_shader
*nir
= nir_shader_clone(mem_ctx
, ish
->nir
);
1037 iris_setup_uniforms(compiler
, mem_ctx
, nir
, prog_data
, &system_values
,
1038 &num_system_values
, &num_cbufs
);
1040 struct iris_binding_table bt
;
1041 iris_setup_binding_table(nir
, &bt
, /* num_render_targets */ 0,
1042 num_system_values
, num_cbufs
);
1044 brw_nir_analyze_ubo_ranges(compiler
, nir
, NULL
, prog_data
->ubo_ranges
);
1046 struct brw_vue_map input_vue_map
;
1047 brw_compute_tess_vue_map(&input_vue_map
, key
->inputs_read
,
1048 key
->patch_inputs_read
);
1050 char *error_str
= NULL
;
1051 const unsigned *program
=
1052 brw_compile_tes(compiler
, &ice
->dbg
, mem_ctx
, key
, &input_vue_map
,
1053 tes_prog_data
, nir
, NULL
, -1, &error_str
);
1054 if (program
== NULL
) {
1055 dbg_printf("Failed to compile evaluation shader: %s\n", error_str
);
1056 ralloc_free(mem_ctx
);
1060 if (ish
->compiled_once
) {
1061 iris_debug_recompile(ice
, &nir
->info
, key
->program_string_id
, key
);
1063 ish
->compiled_once
= true;
1066 uint32_t *so_decls
=
1067 ice
->vtbl
.create_so_decl_list(&ish
->stream_output
,
1068 &vue_prog_data
->vue_map
);
1071 struct iris_compiled_shader
*shader
=
1072 iris_upload_shader(ice
, IRIS_CACHE_TES
, sizeof(*key
), key
, program
,
1073 prog_data
, so_decls
, system_values
, num_system_values
,
1076 iris_disk_cache_store(screen
->disk_cache
, ish
, shader
, key
, sizeof(*key
));
1078 ralloc_free(mem_ctx
);
1083 * Update the current tessellation evaluation shader variant.
1085 * Fill out the key, look in the cache, compile and bind if needed.
1088 iris_update_compiled_tes(struct iris_context
*ice
)
1090 struct iris_uncompiled_shader
*ish
=
1091 ice
->shaders
.uncompiled
[MESA_SHADER_TESS_EVAL
];
1092 struct iris_screen
*screen
= (struct iris_screen
*)ice
->ctx
.screen
;
1093 const struct gen_device_info
*devinfo
= &screen
->devinfo
;
1095 struct brw_tes_prog_key key
= { KEY_INIT(devinfo
->gen
) };
1096 get_unified_tess_slots(ice
, &key
.inputs_read
, &key
.patch_inputs_read
);
1097 ice
->vtbl
.populate_tes_key(ice
, &key
);
1099 struct iris_compiled_shader
*old
= ice
->shaders
.prog
[IRIS_CACHE_TES
];
1100 struct iris_compiled_shader
*shader
=
1101 iris_find_cached_shader(ice
, IRIS_CACHE_TES
, sizeof(key
), &key
);
1104 shader
= iris_disk_cache_retrieve(ice
, ish
, &key
, sizeof(key
));
1107 shader
= iris_compile_tes(ice
, ish
, &key
);
1109 if (old
!= shader
) {
1110 ice
->shaders
.prog
[IRIS_CACHE_TES
] = shader
;
1111 ice
->state
.dirty
|= IRIS_DIRTY_TES
|
1112 IRIS_DIRTY_BINDINGS_TES
|
1113 IRIS_DIRTY_CONSTANTS_TES
;
1116 /* TODO: Could compare and avoid flagging this. */
1117 const struct shader_info
*tes_info
= &ish
->nir
->info
;
1118 if (tes_info
->system_values_read
& (1ull << SYSTEM_VALUE_VERTICES_IN
)) {
1119 ice
->state
.dirty
|= IRIS_DIRTY_CONSTANTS_TES
;
1120 ice
->state
.shaders
[MESA_SHADER_TESS_EVAL
].cbuf0_needs_upload
= true;
1125 * Compile a geometry shader, and upload the assembly.
1127 static struct iris_compiled_shader
*
1128 iris_compile_gs(struct iris_context
*ice
,
1129 struct iris_uncompiled_shader
*ish
,
1130 const struct brw_gs_prog_key
*key
)
1132 struct iris_screen
*screen
= (struct iris_screen
*)ice
->ctx
.screen
;
1133 const struct brw_compiler
*compiler
= screen
->compiler
;
1134 const struct gen_device_info
*devinfo
= &screen
->devinfo
;
1135 void *mem_ctx
= ralloc_context(NULL
);
1136 struct brw_gs_prog_data
*gs_prog_data
=
1137 rzalloc(mem_ctx
, struct brw_gs_prog_data
);
1138 struct brw_vue_prog_data
*vue_prog_data
= &gs_prog_data
->base
;
1139 struct brw_stage_prog_data
*prog_data
= &vue_prog_data
->base
;
1140 enum brw_param_builtin
*system_values
;
1141 unsigned num_system_values
;
1144 nir_shader
*nir
= nir_shader_clone(mem_ctx
, ish
->nir
);
1146 iris_setup_uniforms(compiler
, mem_ctx
, nir
, prog_data
, &system_values
,
1147 &num_system_values
, &num_cbufs
);
1149 struct iris_binding_table bt
;
1150 iris_setup_binding_table(nir
, &bt
, /* num_render_targets */ 0,
1151 num_system_values
, num_cbufs
);
1153 brw_nir_analyze_ubo_ranges(compiler
, nir
, NULL
, prog_data
->ubo_ranges
);
1155 brw_compute_vue_map(devinfo
,
1156 &vue_prog_data
->vue_map
, nir
->info
.outputs_written
,
1157 nir
->info
.separate_shader
);
1159 char *error_str
= NULL
;
1160 const unsigned *program
=
1161 brw_compile_gs(compiler
, &ice
->dbg
, mem_ctx
, key
, gs_prog_data
, nir
,
1162 NULL
, -1, &error_str
);
1163 if (program
== NULL
) {
1164 dbg_printf("Failed to compile geometry shader: %s\n", error_str
);
1165 ralloc_free(mem_ctx
);
1169 if (ish
->compiled_once
) {
1170 iris_debug_recompile(ice
, &nir
->info
, key
->program_string_id
, key
);
1172 ish
->compiled_once
= true;
1175 uint32_t *so_decls
=
1176 ice
->vtbl
.create_so_decl_list(&ish
->stream_output
,
1177 &vue_prog_data
->vue_map
);
1179 struct iris_compiled_shader
*shader
=
1180 iris_upload_shader(ice
, IRIS_CACHE_GS
, sizeof(*key
), key
, program
,
1181 prog_data
, so_decls
, system_values
, num_system_values
,
1184 iris_disk_cache_store(screen
->disk_cache
, ish
, shader
, key
, sizeof(*key
));
1186 ralloc_free(mem_ctx
);
1191 * Update the current geometry shader variant.
1193 * Fill out the key, look in the cache, compile and bind if needed.
1196 iris_update_compiled_gs(struct iris_context
*ice
)
1198 struct iris_uncompiled_shader
*ish
=
1199 ice
->shaders
.uncompiled
[MESA_SHADER_GEOMETRY
];
1200 struct iris_compiled_shader
*old
= ice
->shaders
.prog
[IRIS_CACHE_GS
];
1201 struct iris_compiled_shader
*shader
= NULL
;
1204 struct iris_screen
*screen
= (struct iris_screen
*)ice
->ctx
.screen
;
1205 const struct gen_device_info
*devinfo
= &screen
->devinfo
;
1206 struct brw_gs_prog_key key
= { KEY_INIT(devinfo
->gen
) };
1207 ice
->vtbl
.populate_gs_key(ice
, &key
);
1210 iris_find_cached_shader(ice
, IRIS_CACHE_GS
, sizeof(key
), &key
);
1213 shader
= iris_disk_cache_retrieve(ice
, ish
, &key
, sizeof(key
));
1216 shader
= iris_compile_gs(ice
, ish
, &key
);
1219 if (old
!= shader
) {
1220 ice
->shaders
.prog
[IRIS_CACHE_GS
] = shader
;
1221 ice
->state
.dirty
|= IRIS_DIRTY_GS
|
1222 IRIS_DIRTY_BINDINGS_GS
|
1223 IRIS_DIRTY_CONSTANTS_GS
;
1228 * Compile a fragment (pixel) shader, and upload the assembly.
1230 static struct iris_compiled_shader
*
1231 iris_compile_fs(struct iris_context
*ice
,
1232 struct iris_uncompiled_shader
*ish
,
1233 const struct brw_wm_prog_key
*key
,
1234 struct brw_vue_map
*vue_map
)
1236 struct iris_screen
*screen
= (struct iris_screen
*)ice
->ctx
.screen
;
1237 const struct brw_compiler
*compiler
= screen
->compiler
;
1238 void *mem_ctx
= ralloc_context(NULL
);
1239 struct brw_wm_prog_data
*fs_prog_data
=
1240 rzalloc(mem_ctx
, struct brw_wm_prog_data
);
1241 struct brw_stage_prog_data
*prog_data
= &fs_prog_data
->base
;
1242 enum brw_param_builtin
*system_values
;
1243 unsigned num_system_values
;
1246 nir_shader
*nir
= nir_shader_clone(mem_ctx
, ish
->nir
);
1248 prog_data
->use_alt_mode
= ish
->use_alt_mode
;
1250 iris_setup_uniforms(compiler
, mem_ctx
, nir
, prog_data
, &system_values
,
1251 &num_system_values
, &num_cbufs
);
1253 struct iris_binding_table bt
;
1254 iris_setup_binding_table(nir
, &bt
, MAX2(key
->nr_color_regions
, 1),
1255 num_system_values
, num_cbufs
);
1257 brw_nir_analyze_ubo_ranges(compiler
, nir
, NULL
, prog_data
->ubo_ranges
);
1259 char *error_str
= NULL
;
1260 const unsigned *program
=
1261 brw_compile_fs(compiler
, &ice
->dbg
, mem_ctx
, key
, fs_prog_data
,
1262 nir
, NULL
, -1, -1, -1, true, false, vue_map
, &error_str
);
1263 if (program
== NULL
) {
1264 dbg_printf("Failed to compile fragment shader: %s\n", error_str
);
1265 ralloc_free(mem_ctx
);
1269 if (ish
->compiled_once
) {
1270 iris_debug_recompile(ice
, &nir
->info
, key
->program_string_id
, key
);
1272 ish
->compiled_once
= true;
1275 struct iris_compiled_shader
*shader
=
1276 iris_upload_shader(ice
, IRIS_CACHE_FS
, sizeof(*key
), key
, program
,
1277 prog_data
, NULL
, system_values
, num_system_values
,
1280 iris_disk_cache_store(screen
->disk_cache
, ish
, shader
, key
, sizeof(*key
));
1282 ralloc_free(mem_ctx
);
1287 * Update the current fragment shader variant.
1289 * Fill out the key, look in the cache, compile and bind if needed.
1292 iris_update_compiled_fs(struct iris_context
*ice
)
1294 struct iris_uncompiled_shader
*ish
=
1295 ice
->shaders
.uncompiled
[MESA_SHADER_FRAGMENT
];
1296 struct iris_screen
*screen
= (struct iris_screen
*)ice
->ctx
.screen
;
1297 const struct gen_device_info
*devinfo
= &screen
->devinfo
;
1298 struct brw_wm_prog_key key
= { KEY_INIT(devinfo
->gen
) };
1299 ice
->vtbl
.populate_fs_key(ice
, &key
);
1301 if (ish
->nos
& (1ull << IRIS_NOS_LAST_VUE_MAP
))
1302 key
.input_slots_valid
= ice
->shaders
.last_vue_map
->slots_valid
;
1304 struct iris_compiled_shader
*old
= ice
->shaders
.prog
[IRIS_CACHE_FS
];
1305 struct iris_compiled_shader
*shader
=
1306 iris_find_cached_shader(ice
, IRIS_CACHE_FS
, sizeof(key
), &key
);
1309 shader
= iris_disk_cache_retrieve(ice
, ish
, &key
, sizeof(key
));
1312 shader
= iris_compile_fs(ice
, ish
, &key
, ice
->shaders
.last_vue_map
);
1314 if (old
!= shader
) {
1315 // XXX: only need to flag CLIP if barycentric has NONPERSPECTIVE
1316 // toggles. might be able to avoid flagging SBE too.
1317 ice
->shaders
.prog
[IRIS_CACHE_FS
] = shader
;
1318 ice
->state
.dirty
|= IRIS_DIRTY_FS
|
1319 IRIS_DIRTY_BINDINGS_FS
|
1320 IRIS_DIRTY_CONSTANTS_FS
|
1328 * Get the compiled shader for the last enabled geometry stage.
1330 * This stage is the one which will feed stream output and the rasterizer.
1332 static gl_shader_stage
1333 last_vue_stage(struct iris_context
*ice
)
1335 if (ice
->shaders
.prog
[MESA_SHADER_GEOMETRY
])
1336 return MESA_SHADER_GEOMETRY
;
1338 if (ice
->shaders
.prog
[MESA_SHADER_TESS_EVAL
])
1339 return MESA_SHADER_TESS_EVAL
;
1341 return MESA_SHADER_VERTEX
;
1345 * Update the last enabled stage's VUE map.
1347 * When the shader feeding the rasterizer's output interface changes, we
1348 * need to re-emit various packets.
1351 update_last_vue_map(struct iris_context
*ice
,
1352 struct brw_stage_prog_data
*prog_data
)
1354 struct brw_vue_prog_data
*vue_prog_data
= (void *) prog_data
;
1355 struct brw_vue_map
*vue_map
= &vue_prog_data
->vue_map
;
1356 struct brw_vue_map
*old_map
= ice
->shaders
.last_vue_map
;
1357 const uint64_t changed_slots
=
1358 (old_map
? old_map
->slots_valid
: 0ull) ^ vue_map
->slots_valid
;
1360 if (changed_slots
& VARYING_BIT_VIEWPORT
) {
1361 // XXX: could use ctx->Const.MaxViewports for old API efficiency
1362 ice
->state
.num_viewports
=
1363 (vue_map
->slots_valid
& VARYING_BIT_VIEWPORT
) ? IRIS_MAX_VIEWPORTS
: 1;
1364 ice
->state
.dirty
|= IRIS_DIRTY_CLIP
|
1365 IRIS_DIRTY_SF_CL_VIEWPORT
|
1366 IRIS_DIRTY_CC_VIEWPORT
|
1367 IRIS_DIRTY_SCISSOR_RECT
|
1368 IRIS_DIRTY_UNCOMPILED_FS
|
1369 ice
->state
.dirty_for_nos
[IRIS_NOS_LAST_VUE_MAP
];
1370 // XXX: CC_VIEWPORT?
1373 if (changed_slots
|| (old_map
&& old_map
->separate
!= vue_map
->separate
)) {
1374 ice
->state
.dirty
|= IRIS_DIRTY_SBE
;
1377 ice
->shaders
.last_vue_map
= &vue_prog_data
->vue_map
;
1381 * Get the prog_data for a given stage, or NULL if the stage is disabled.
1383 static struct brw_vue_prog_data
*
1384 get_vue_prog_data(struct iris_context
*ice
, gl_shader_stage stage
)
1386 if (!ice
->shaders
.prog
[stage
])
1389 return (void *) ice
->shaders
.prog
[stage
]->prog_data
;
1392 // XXX: iris_compiled_shaders are space-leaking :(
1393 // XXX: do remember to unbind them if deleting them.
1396 * Update the current shader variants for the given state.
1398 * This should be called on every draw call to ensure that the correct
1399 * shaders are bound. It will also flag any dirty state triggered by
1400 * swapping out those shaders.
1403 iris_update_compiled_shaders(struct iris_context
*ice
)
1405 const uint64_t dirty
= ice
->state
.dirty
;
1407 struct brw_vue_prog_data
*old_prog_datas
[4];
1408 if (!(dirty
& IRIS_DIRTY_URB
)) {
1409 for (int i
= MESA_SHADER_VERTEX
; i
<= MESA_SHADER_GEOMETRY
; i
++)
1410 old_prog_datas
[i
] = get_vue_prog_data(ice
, i
);
1413 if (dirty
& (IRIS_DIRTY_UNCOMPILED_TCS
| IRIS_DIRTY_UNCOMPILED_TES
)) {
1414 struct iris_uncompiled_shader
*tes
=
1415 ice
->shaders
.uncompiled
[MESA_SHADER_TESS_EVAL
];
1417 iris_update_compiled_tcs(ice
);
1418 iris_update_compiled_tes(ice
);
1420 ice
->shaders
.prog
[IRIS_CACHE_TCS
] = NULL
;
1421 ice
->shaders
.prog
[IRIS_CACHE_TES
] = NULL
;
1423 IRIS_DIRTY_TCS
| IRIS_DIRTY_TES
|
1424 IRIS_DIRTY_BINDINGS_TCS
| IRIS_DIRTY_BINDINGS_TES
|
1425 IRIS_DIRTY_CONSTANTS_TCS
| IRIS_DIRTY_CONSTANTS_TES
;
1429 if (dirty
& IRIS_DIRTY_UNCOMPILED_VS
)
1430 iris_update_compiled_vs(ice
);
1431 if (dirty
& IRIS_DIRTY_UNCOMPILED_GS
)
1432 iris_update_compiled_gs(ice
);
1434 if (dirty
& (IRIS_DIRTY_UNCOMPILED_GS
| IRIS_DIRTY_UNCOMPILED_TES
)) {
1435 const struct iris_compiled_shader
*gs
=
1436 ice
->shaders
.prog
[MESA_SHADER_GEOMETRY
];
1437 const struct iris_compiled_shader
*tes
=
1438 ice
->shaders
.prog
[MESA_SHADER_TESS_EVAL
];
1440 bool points_or_lines
= false;
1443 const struct brw_gs_prog_data
*gs_prog_data
= (void *) gs
->prog_data
;
1445 gs_prog_data
->output_topology
== _3DPRIM_POINTLIST
||
1446 gs_prog_data
->output_topology
== _3DPRIM_LINESTRIP
;
1448 const struct brw_tes_prog_data
*tes_data
= (void *) tes
->prog_data
;
1450 tes_data
->output_topology
== BRW_TESS_OUTPUT_TOPOLOGY_LINE
||
1451 tes_data
->output_topology
== BRW_TESS_OUTPUT_TOPOLOGY_POINT
;
1454 if (ice
->shaders
.output_topology_is_points_or_lines
!= points_or_lines
) {
1455 /* Outbound to XY Clip enables */
1456 ice
->shaders
.output_topology_is_points_or_lines
= points_or_lines
;
1457 ice
->state
.dirty
|= IRIS_DIRTY_CLIP
;
1461 gl_shader_stage last_stage
= last_vue_stage(ice
);
1462 struct iris_compiled_shader
*shader
= ice
->shaders
.prog
[last_stage
];
1463 struct iris_uncompiled_shader
*ish
= ice
->shaders
.uncompiled
[last_stage
];
1464 update_last_vue_map(ice
, shader
->prog_data
);
1465 if (ice
->state
.streamout
!= shader
->streamout
) {
1466 ice
->state
.streamout
= shader
->streamout
;
1467 ice
->state
.dirty
|= IRIS_DIRTY_SO_DECL_LIST
| IRIS_DIRTY_STREAMOUT
;
1470 if (ice
->state
.streamout_active
) {
1471 for (int i
= 0; i
< PIPE_MAX_SO_BUFFERS
; i
++) {
1472 struct iris_stream_output_target
*so
=
1473 (void *) ice
->state
.so_target
[i
];
1475 so
->stride
= ish
->stream_output
.stride
[i
];
1479 if (dirty
& IRIS_DIRTY_UNCOMPILED_FS
)
1480 iris_update_compiled_fs(ice
);
1482 /* Changing shader interfaces may require a URB configuration. */
1483 if (!(dirty
& IRIS_DIRTY_URB
)) {
1484 for (int i
= MESA_SHADER_VERTEX
; i
<= MESA_SHADER_GEOMETRY
; i
++) {
1485 struct brw_vue_prog_data
*old
= old_prog_datas
[i
];
1486 struct brw_vue_prog_data
*new = get_vue_prog_data(ice
, i
);
1487 if (!!old
!= !!new ||
1488 (new && new->urb_entry_size
!= old
->urb_entry_size
)) {
1489 ice
->state
.dirty
|= IRIS_DIRTY_URB
;
1496 static struct iris_compiled_shader
*
1497 iris_compile_cs(struct iris_context
*ice
,
1498 struct iris_uncompiled_shader
*ish
,
1499 const struct brw_cs_prog_key
*key
)
1501 struct iris_screen
*screen
= (struct iris_screen
*)ice
->ctx
.screen
;
1502 const struct brw_compiler
*compiler
= screen
->compiler
;
1503 void *mem_ctx
= ralloc_context(NULL
);
1504 struct brw_cs_prog_data
*cs_prog_data
=
1505 rzalloc(mem_ctx
, struct brw_cs_prog_data
);
1506 struct brw_stage_prog_data
*prog_data
= &cs_prog_data
->base
;
1507 enum brw_param_builtin
*system_values
;
1508 unsigned num_system_values
;
1511 nir_shader
*nir
= nir_shader_clone(mem_ctx
, ish
->nir
);
1513 prog_data
->total_shared
= nir
->info
.cs
.shared_size
;
1515 iris_setup_uniforms(compiler
, mem_ctx
, nir
, prog_data
, &system_values
,
1516 &num_system_values
, &num_cbufs
);
1518 struct iris_binding_table bt
;
1519 iris_setup_binding_table(nir
, &bt
, /* num_render_targets */ 0,
1520 num_system_values
, num_cbufs
);
1522 char *error_str
= NULL
;
1523 const unsigned *program
=
1524 brw_compile_cs(compiler
, &ice
->dbg
, mem_ctx
, key
, cs_prog_data
,
1525 nir
, -1, &error_str
);
1526 if (program
== NULL
) {
1527 dbg_printf("Failed to compile compute shader: %s\n", error_str
);
1528 ralloc_free(mem_ctx
);
1532 if (ish
->compiled_once
) {
1533 iris_debug_recompile(ice
, &nir
->info
, key
->program_string_id
, key
);
1535 ish
->compiled_once
= true;
1538 struct iris_compiled_shader
*shader
=
1539 iris_upload_shader(ice
, IRIS_CACHE_CS
, sizeof(*key
), key
, program
,
1540 prog_data
, NULL
, system_values
, num_system_values
,
1543 iris_disk_cache_store(screen
->disk_cache
, ish
, shader
, key
, sizeof(*key
));
1545 ralloc_free(mem_ctx
);
1550 iris_update_compiled_compute_shader(struct iris_context
*ice
)
1552 struct iris_uncompiled_shader
*ish
=
1553 ice
->shaders
.uncompiled
[MESA_SHADER_COMPUTE
];
1555 struct iris_screen
*screen
= (struct iris_screen
*)ice
->ctx
.screen
;
1556 const struct gen_device_info
*devinfo
= &screen
->devinfo
;
1557 struct brw_cs_prog_key key
= { KEY_INIT(devinfo
->gen
) };
1558 ice
->vtbl
.populate_cs_key(ice
, &key
);
1560 struct iris_compiled_shader
*old
= ice
->shaders
.prog
[IRIS_CACHE_CS
];
1561 struct iris_compiled_shader
*shader
=
1562 iris_find_cached_shader(ice
, IRIS_CACHE_CS
, sizeof(key
), &key
);
1565 shader
= iris_disk_cache_retrieve(ice
, ish
, &key
, sizeof(key
));
1568 shader
= iris_compile_cs(ice
, ish
, &key
);
1570 if (old
!= shader
) {
1571 ice
->shaders
.prog
[IRIS_CACHE_CS
] = shader
;
1572 ice
->state
.dirty
|= IRIS_DIRTY_CS
|
1573 IRIS_DIRTY_BINDINGS_CS
|
1574 IRIS_DIRTY_CONSTANTS_CS
;
1579 iris_fill_cs_push_const_buffer(struct brw_cs_prog_data
*cs_prog_data
,
1582 assert(cs_prog_data
->push
.total
.size
> 0);
1583 assert(cs_prog_data
->push
.cross_thread
.size
== 0);
1584 assert(cs_prog_data
->push
.per_thread
.dwords
== 1);
1585 assert(cs_prog_data
->base
.param
[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID
);
1586 for (unsigned t
= 0; t
< cs_prog_data
->threads
; t
++)
1591 * Allocate scratch BOs as needed for the given per-thread size and stage.
1594 iris_get_scratch_space(struct iris_context
*ice
,
1595 unsigned per_thread_scratch
,
1596 gl_shader_stage stage
)
1598 struct iris_screen
*screen
= (struct iris_screen
*)ice
->ctx
.screen
;
1599 struct iris_bufmgr
*bufmgr
= screen
->bufmgr
;
1600 const struct gen_device_info
*devinfo
= &screen
->devinfo
;
1602 unsigned encoded_size
= ffs(per_thread_scratch
) - 11;
1603 assert(encoded_size
< (1 << 16));
1605 struct iris_bo
**bop
= &ice
->shaders
.scratch_bos
[encoded_size
][stage
];
1607 /* The documentation for 3DSTATE_PS "Scratch Space Base Pointer" says:
1609 * "Scratch Space per slice is computed based on 4 sub-slices. SW
1610 * must allocate scratch space enough so that each slice has 4
1613 * According to the other driver team, this applies to compute shaders
1614 * as well. This is not currently documented at all.
1616 * This hack is no longer necessary on Gen11+.
1618 unsigned subslice_total
= screen
->subslice_total
;
1619 if (devinfo
->gen
< 11)
1620 subslice_total
= 4 * devinfo
->num_slices
;
1621 assert(subslice_total
>= screen
->subslice_total
);
1624 unsigned scratch_ids_per_subslice
= devinfo
->max_cs_threads
;
1625 uint32_t max_threads
[] = {
1626 [MESA_SHADER_VERTEX
] = devinfo
->max_vs_threads
,
1627 [MESA_SHADER_TESS_CTRL
] = devinfo
->max_tcs_threads
,
1628 [MESA_SHADER_TESS_EVAL
] = devinfo
->max_tes_threads
,
1629 [MESA_SHADER_GEOMETRY
] = devinfo
->max_gs_threads
,
1630 [MESA_SHADER_FRAGMENT
] = devinfo
->max_wm_threads
,
1631 [MESA_SHADER_COMPUTE
] = scratch_ids_per_subslice
* subslice_total
,
1634 uint32_t size
= per_thread_scratch
* max_threads
[stage
];
1636 *bop
= iris_bo_alloc(bufmgr
, "scratch", size
, IRIS_MEMZONE_SHADER
);
1642 /* ------------------------------------------------------------------- */
1645 * The pipe->create_[stage]_state() driver hooks.
1647 * Performs basic NIR preprocessing, records any state dependencies, and
1648 * returns an iris_uncompiled_shader as the Gallium CSO.
1650 * Actual shader compilation to assembly happens later, at first use.
1653 iris_create_uncompiled_shader(struct pipe_context
*ctx
,
1655 const struct pipe_stream_output_info
*so_info
)
1657 struct iris_context
*ice
= (void *)ctx
;
1658 struct iris_screen
*screen
= (struct iris_screen
*)ctx
->screen
;
1659 const struct gen_device_info
*devinfo
= &screen
->devinfo
;
1661 struct iris_uncompiled_shader
*ish
=
1662 calloc(1, sizeof(struct iris_uncompiled_shader
));
1666 nir
= brw_preprocess_nir(screen
->compiler
, nir
, NULL
);
1668 NIR_PASS_V(nir
, brw_nir_lower_image_load_store
, devinfo
);
1669 NIR_PASS_V(nir
, iris_lower_storage_image_derefs
);
1671 if (nir
->constant_data_size
> 0) {
1672 unsigned data_offset
;
1673 u_upload_data(ice
->shaders
.uploader
, 0, nir
->constant_data_size
,
1674 32, nir
->constant_data
, &data_offset
, &ish
->const_data
);
1676 struct pipe_shader_buffer psb
= {
1677 .buffer
= ish
->const_data
,
1678 .buffer_offset
= data_offset
,
1679 .buffer_size
= nir
->constant_data_size
,
1681 iris_upload_ubo_ssbo_surf_state(ice
, &psb
, &ish
->const_data_state
, false);
1684 ish
->program_id
= get_new_program_id(screen
);
1687 memcpy(&ish
->stream_output
, so_info
, sizeof(*so_info
));
1688 update_so_info(&ish
->stream_output
, nir
->info
.outputs_written
);
1691 /* Save this now before potentially dropping nir->info.name */
1692 if (nir
->info
.name
&& strncmp(nir
->info
.name
, "ARB", 3) == 0)
1693 ish
->use_alt_mode
= true;
1695 if (screen
->disk_cache
) {
1696 /* Serialize the NIR to a binary blob that we can hash for the disk
1697 * cache. First, drop unnecessary information (like variable names)
1698 * so the serialized NIR is smaller, and also to let us detect more
1699 * isomorphic shaders when hashing, increasing cache hits. We clone
1700 * the NIR before stripping away this info because it can be useful
1701 * when inspecting and debugging shaders.
1703 nir_shader
*clone
= nir_shader_clone(NULL
, nir
);
1708 nir_serialize(&blob
, clone
);
1709 _mesa_sha1_compute(blob
.data
, blob
.size
, ish
->nir_sha1
);
1718 static struct iris_uncompiled_shader
*
1719 iris_create_shader_state(struct pipe_context
*ctx
,
1720 const struct pipe_shader_state
*state
)
1722 struct nir_shader
*nir
;
1724 if (state
->type
== PIPE_SHADER_IR_TGSI
)
1725 nir
= tgsi_to_nir(state
->tokens
, ctx
->screen
);
1727 nir
= state
->ir
.nir
;
1729 return iris_create_uncompiled_shader(ctx
, nir
, &state
->stream_output
);
1733 iris_create_vs_state(struct pipe_context
*ctx
,
1734 const struct pipe_shader_state
*state
)
1736 struct iris_context
*ice
= (void *) ctx
;
1737 struct iris_screen
*screen
= (void *) ctx
->screen
;
1738 struct iris_uncompiled_shader
*ish
= iris_create_shader_state(ctx
, state
);
1740 /* User clip planes */
1741 if (ish
->nir
->info
.clip_distance_array_size
== 0)
1742 ish
->nos
|= (1ull << IRIS_NOS_RASTERIZER
);
1744 if (screen
->precompile
) {
1745 const struct gen_device_info
*devinfo
= &screen
->devinfo
;
1746 struct brw_vs_prog_key key
= { KEY_INIT(devinfo
->gen
) };
1748 if (!iris_disk_cache_retrieve(ice
, ish
, &key
, sizeof(key
)))
1749 iris_compile_vs(ice
, ish
, &key
);
1756 iris_create_tcs_state(struct pipe_context
*ctx
,
1757 const struct pipe_shader_state
*state
)
1759 struct iris_context
*ice
= (void *) ctx
;
1760 struct iris_screen
*screen
= (void *) ctx
->screen
;
1761 const struct brw_compiler
*compiler
= screen
->compiler
;
1762 struct iris_uncompiled_shader
*ish
= iris_create_shader_state(ctx
, state
);
1763 struct shader_info
*info
= &ish
->nir
->info
;
1767 if (screen
->precompile
) {
1768 const unsigned _GL_TRIANGLES
= 0x0004;
1769 const struct gen_device_info
*devinfo
= &screen
->devinfo
;
1770 struct brw_tcs_prog_key key
= {
1771 KEY_INIT(devinfo
->gen
),
1772 // XXX: make sure the linker fills this out from the TES...
1773 .tes_primitive_mode
=
1774 info
->tess
.primitive_mode
? info
->tess
.primitive_mode
1776 .outputs_written
= info
->outputs_written
,
1777 .patch_outputs_written
= info
->patch_outputs_written
,
1780 /* 8_PATCH mode needs the key to contain the input patch dimensionality.
1781 * We don't have that information, so we randomly guess that the input
1782 * and output patches are the same size. This is a bad guess, but we
1783 * can't do much better.
1785 if (compiler
->use_tcs_8_patch
)
1786 key
.input_vertices
= info
->tess
.tcs_vertices_out
;
1788 if (!iris_disk_cache_retrieve(ice
, ish
, &key
, sizeof(key
)))
1789 iris_compile_tcs(ice
, ish
, &key
);
1796 iris_create_tes_state(struct pipe_context
*ctx
,
1797 const struct pipe_shader_state
*state
)
1799 struct iris_context
*ice
= (void *) ctx
;
1800 struct iris_screen
*screen
= (void *) ctx
->screen
;
1801 struct iris_uncompiled_shader
*ish
= iris_create_shader_state(ctx
, state
);
1802 struct shader_info
*info
= &ish
->nir
->info
;
1806 if (screen
->precompile
) {
1807 const struct gen_device_info
*devinfo
= &screen
->devinfo
;
1808 struct brw_tes_prog_key key
= {
1809 KEY_INIT(devinfo
->gen
),
1810 // XXX: not ideal, need TCS output/TES input unification
1811 .inputs_read
= info
->inputs_read
,
1812 .patch_inputs_read
= info
->patch_inputs_read
,
1815 if (!iris_disk_cache_retrieve(ice
, ish
, &key
, sizeof(key
)))
1816 iris_compile_tes(ice
, ish
, &key
);
1823 iris_create_gs_state(struct pipe_context
*ctx
,
1824 const struct pipe_shader_state
*state
)
1826 struct iris_context
*ice
= (void *) ctx
;
1827 struct iris_screen
*screen
= (void *) ctx
->screen
;
1828 struct iris_uncompiled_shader
*ish
= iris_create_shader_state(ctx
, state
);
1832 if (screen
->precompile
) {
1833 const struct gen_device_info
*devinfo
= &screen
->devinfo
;
1834 struct brw_gs_prog_key key
= { KEY_INIT(devinfo
->gen
) };
1836 if (!iris_disk_cache_retrieve(ice
, ish
, &key
, sizeof(key
)))
1837 iris_compile_gs(ice
, ish
, &key
);
1844 iris_create_fs_state(struct pipe_context
*ctx
,
1845 const struct pipe_shader_state
*state
)
1847 struct iris_context
*ice
= (void *) ctx
;
1848 struct iris_screen
*screen
= (void *) ctx
->screen
;
1849 struct iris_uncompiled_shader
*ish
= iris_create_shader_state(ctx
, state
);
1850 struct shader_info
*info
= &ish
->nir
->info
;
1852 ish
->nos
|= (1ull << IRIS_NOS_FRAMEBUFFER
) |
1853 (1ull << IRIS_NOS_DEPTH_STENCIL_ALPHA
) |
1854 (1ull << IRIS_NOS_RASTERIZER
) |
1855 (1ull << IRIS_NOS_BLEND
);
1857 /* The program key needs the VUE map if there are > 16 inputs */
1858 if (util_bitcount64(ish
->nir
->info
.inputs_read
&
1859 BRW_FS_VARYING_INPUT_MASK
) > 16) {
1860 ish
->nos
|= (1ull << IRIS_NOS_LAST_VUE_MAP
);
1863 if (screen
->precompile
) {
1864 const uint64_t color_outputs
= info
->outputs_written
&
1865 ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH
) |
1866 BITFIELD64_BIT(FRAG_RESULT_STENCIL
) |
1867 BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK
));
1869 bool can_rearrange_varyings
=
1870 util_bitcount64(info
->inputs_read
& BRW_FS_VARYING_INPUT_MASK
) <= 16;
1872 const struct gen_device_info
*devinfo
= &screen
->devinfo
;
1873 struct brw_wm_prog_key key
= {
1874 KEY_INIT(devinfo
->gen
),
1875 .nr_color_regions
= util_bitcount(color_outputs
),
1876 .coherent_fb_fetch
= true,
1877 .input_slots_valid
=
1878 can_rearrange_varyings
? 0 : info
->inputs_read
| VARYING_BIT_POS
,
1881 if (!iris_disk_cache_retrieve(ice
, ish
, &key
, sizeof(key
)))
1882 iris_compile_fs(ice
, ish
, &key
, NULL
);
1889 iris_create_compute_state(struct pipe_context
*ctx
,
1890 const struct pipe_compute_state
*state
)
1892 assert(state
->ir_type
== PIPE_SHADER_IR_NIR
);
1894 struct iris_context
*ice
= (void *) ctx
;
1895 struct iris_screen
*screen
= (void *) ctx
->screen
;
1896 struct iris_uncompiled_shader
*ish
=
1897 iris_create_uncompiled_shader(ctx
, (void *) state
->prog
, NULL
);
1899 // XXX: disallow more than 64KB of shared variables
1901 if (screen
->precompile
) {
1902 const struct gen_device_info
*devinfo
= &screen
->devinfo
;
1903 struct brw_cs_prog_key key
= { KEY_INIT(devinfo
->gen
) };
1905 if (!iris_disk_cache_retrieve(ice
, ish
, &key
, sizeof(key
)))
1906 iris_compile_cs(ice
, ish
, &key
);
1913 * The pipe->delete_[stage]_state() driver hooks.
1915 * Frees the iris_uncompiled_shader.
1918 iris_delete_shader_state(struct pipe_context
*ctx
, void *state
, gl_shader_stage stage
)
1920 struct iris_uncompiled_shader
*ish
= state
;
1921 struct iris_context
*ice
= (void *) ctx
;
1923 if (ice
->shaders
.uncompiled
[stage
] == ish
) {
1924 ice
->shaders
.uncompiled
[stage
] = NULL
;
1925 ice
->state
.dirty
|= IRIS_DIRTY_UNCOMPILED_VS
<< stage
;
1928 if (ish
->const_data
) {
1929 pipe_resource_reference(&ish
->const_data
, NULL
);
1930 pipe_resource_reference(&ish
->const_data_state
.res
, NULL
);
1933 ralloc_free(ish
->nir
);
1938 iris_delete_vs_state(struct pipe_context
*ctx
, void *state
)
1940 iris_delete_shader_state(ctx
, state
, MESA_SHADER_VERTEX
);
1944 iris_delete_tcs_state(struct pipe_context
*ctx
, void *state
)
1946 iris_delete_shader_state(ctx
, state
, MESA_SHADER_TESS_CTRL
);
1950 iris_delete_tes_state(struct pipe_context
*ctx
, void *state
)
1952 iris_delete_shader_state(ctx
, state
, MESA_SHADER_TESS_EVAL
);
1956 iris_delete_gs_state(struct pipe_context
*ctx
, void *state
)
1958 iris_delete_shader_state(ctx
, state
, MESA_SHADER_GEOMETRY
);
1962 iris_delete_fs_state(struct pipe_context
*ctx
, void *state
)
1964 iris_delete_shader_state(ctx
, state
, MESA_SHADER_FRAGMENT
);
1968 iris_delete_cs_state(struct pipe_context
*ctx
, void *state
)
1970 iris_delete_shader_state(ctx
, state
, MESA_SHADER_COMPUTE
);
1974 * The pipe->bind_[stage]_state() driver hook.
1976 * Binds an uncompiled shader as the current one for a particular stage.
1977 * Updates dirty tracking to account for the shader's NOS.
1980 bind_state(struct iris_context
*ice
,
1981 struct iris_uncompiled_shader
*ish
,
1982 gl_shader_stage stage
)
1984 uint64_t dirty_bit
= IRIS_DIRTY_UNCOMPILED_VS
<< stage
;
1985 const uint64_t nos
= ish
? ish
->nos
: 0;
1987 const struct shader_info
*old_info
= iris_get_shader_info(ice
, stage
);
1988 const struct shader_info
*new_info
= ish
? &ish
->nir
->info
: NULL
;
1990 if ((old_info
? util_last_bit(old_info
->textures_used
) : 0) !=
1991 (new_info
? util_last_bit(new_info
->textures_used
) : 0)) {
1992 ice
->state
.dirty
|= IRIS_DIRTY_SAMPLER_STATES_VS
<< stage
;
1995 ice
->shaders
.uncompiled
[stage
] = ish
;
1996 ice
->state
.dirty
|= dirty_bit
;
1998 /* Record that CSOs need to mark IRIS_DIRTY_UNCOMPILED_XS when they change
1999 * (or that they no longer need to do so).
2001 for (int i
= 0; i
< IRIS_NOS_COUNT
; i
++) {
2003 ice
->state
.dirty_for_nos
[i
] |= dirty_bit
;
2005 ice
->state
.dirty_for_nos
[i
] &= ~dirty_bit
;
2010 iris_bind_vs_state(struct pipe_context
*ctx
, void *state
)
2012 bind_state((void *) ctx
, state
, MESA_SHADER_VERTEX
);
2016 iris_bind_tcs_state(struct pipe_context
*ctx
, void *state
)
2018 bind_state((void *) ctx
, state
, MESA_SHADER_TESS_CTRL
);
2022 iris_bind_tes_state(struct pipe_context
*ctx
, void *state
)
2024 struct iris_context
*ice
= (struct iris_context
*)ctx
;
2026 /* Enabling/disabling optional stages requires a URB reconfiguration. */
2027 if (!!state
!= !!ice
->shaders
.uncompiled
[MESA_SHADER_TESS_EVAL
])
2028 ice
->state
.dirty
|= IRIS_DIRTY_URB
;
2030 bind_state((void *) ctx
, state
, MESA_SHADER_TESS_EVAL
);
2034 iris_bind_gs_state(struct pipe_context
*ctx
, void *state
)
2036 struct iris_context
*ice
= (struct iris_context
*)ctx
;
2038 /* Enabling/disabling optional stages requires a URB reconfiguration. */
2039 if (!!state
!= !!ice
->shaders
.uncompiled
[MESA_SHADER_GEOMETRY
])
2040 ice
->state
.dirty
|= IRIS_DIRTY_URB
;
2042 bind_state((void *) ctx
, state
, MESA_SHADER_GEOMETRY
);
2046 iris_bind_fs_state(struct pipe_context
*ctx
, void *state
)
2048 struct iris_context
*ice
= (struct iris_context
*) ctx
;
2049 struct iris_uncompiled_shader
*old_ish
=
2050 ice
->shaders
.uncompiled
[MESA_SHADER_FRAGMENT
];
2051 struct iris_uncompiled_shader
*new_ish
= state
;
2053 const unsigned color_bits
=
2054 BITFIELD64_BIT(FRAG_RESULT_COLOR
) |
2055 BITFIELD64_RANGE(FRAG_RESULT_DATA0
, BRW_MAX_DRAW_BUFFERS
);
2057 /* Fragment shader outputs influence HasWriteableRT */
2058 if (!old_ish
|| !new_ish
||
2059 (old_ish
->nir
->info
.outputs_written
& color_bits
) !=
2060 (new_ish
->nir
->info
.outputs_written
& color_bits
))
2061 ice
->state
.dirty
|= IRIS_DIRTY_PS_BLEND
;
2063 bind_state((void *) ctx
, state
, MESA_SHADER_FRAGMENT
);
2067 iris_bind_cs_state(struct pipe_context
*ctx
, void *state
)
2069 bind_state((void *) ctx
, state
, MESA_SHADER_COMPUTE
);
2073 iris_init_program_functions(struct pipe_context
*ctx
)
2075 ctx
->create_vs_state
= iris_create_vs_state
;
2076 ctx
->create_tcs_state
= iris_create_tcs_state
;
2077 ctx
->create_tes_state
= iris_create_tes_state
;
2078 ctx
->create_gs_state
= iris_create_gs_state
;
2079 ctx
->create_fs_state
= iris_create_fs_state
;
2080 ctx
->create_compute_state
= iris_create_compute_state
;
2082 ctx
->delete_vs_state
= iris_delete_vs_state
;
2083 ctx
->delete_tcs_state
= iris_delete_tcs_state
;
2084 ctx
->delete_tes_state
= iris_delete_tes_state
;
2085 ctx
->delete_gs_state
= iris_delete_gs_state
;
2086 ctx
->delete_fs_state
= iris_delete_fs_state
;
2087 ctx
->delete_compute_state
= iris_delete_cs_state
;
2089 ctx
->bind_vs_state
= iris_bind_vs_state
;
2090 ctx
->bind_tcs_state
= iris_bind_tcs_state
;
2091 ctx
->bind_tes_state
= iris_bind_tes_state
;
2092 ctx
->bind_gs_state
= iris_bind_gs_state
;
2093 ctx
->bind_fs_state
= iris_bind_fs_state
;
2094 ctx
->bind_compute_state
= iris_bind_cs_state
;