2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * Implements a pass that lowers output and/or input variables to a
26 * temporary plus an output variable with a single copy at each exit
27 * point of the shader and/or an input variable with a single copy
28 * at the entrance point of the shader. This way the output variable
29 * is only ever written once and/or input is only read once, and there
30 * are no indirect outut/input accesses.
34 #include "nir_builder.h"
35 #include "nir_deref.h"
37 struct lower_io_state
{
39 nir_function_impl
*entrypoint
;
40 struct exec_list old_outputs
;
41 struct exec_list old_inputs
;
43 /* map from temporary to new input */
44 struct hash_table
*input_map
;
48 emit_copies(nir_builder
*b
, struct exec_list
*dest_vars
,
49 struct exec_list
*src_vars
)
51 assert(exec_list_length(dest_vars
) == exec_list_length(src_vars
));
53 foreach_two_lists(dest_node
, dest_vars
, src_node
, src_vars
) {
54 nir_variable
*dest
= exec_node_data(nir_variable
, dest_node
, node
);
55 nir_variable
*src
= exec_node_data(nir_variable
, src_node
, node
);
57 /* No need to copy the contents of a non-fb_fetch_output output variable
58 * to the temporary allocated for it, since its initial value is
61 if (src
->data
.mode
== nir_var_shader_out
&&
62 !src
->data
.fb_fetch_output
)
65 /* Can't copy the contents of the temporary back to a read-only
66 * interface variable. The value of the temporary won't have been
67 * modified by the shader anyway.
69 if (dest
->data
.read_only
)
72 nir_copy_var(b
, dest
, src
);
77 emit_output_copies_impl(struct lower_io_state
*state
, nir_function_impl
*impl
)
80 nir_builder_init(&b
, impl
);
82 if (state
->shader
->info
.stage
== MESA_SHADER_GEOMETRY
) {
83 /* For geometry shaders, we have to emit the output copies right
84 * before each EmitVertex call.
86 nir_foreach_block(block
, impl
) {
87 nir_foreach_instr(instr
, block
) {
88 if (instr
->type
!= nir_instr_type_intrinsic
)
91 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
92 if (intrin
->intrinsic
== nir_intrinsic_emit_vertex
||
93 intrin
->intrinsic
== nir_intrinsic_emit_vertex_with_counter
) {
94 b
.cursor
= nir_before_instr(&intrin
->instr
);
95 emit_copies(&b
, &state
->shader
->outputs
, &state
->old_outputs
);
99 } else if (impl
== state
->entrypoint
) {
100 b
.cursor
= nir_before_block(nir_start_block(impl
));
101 emit_copies(&b
, &state
->old_outputs
, &state
->shader
->outputs
);
103 /* For all other shader types, we need to do the copies right before
104 * the jumps to the end block.
106 set_foreach(impl
->end_block
->predecessors
, block_entry
) {
107 struct nir_block
*block
= (void *)block_entry
->key
;
108 b
.cursor
= nir_after_block_before_jump(block
);
109 emit_copies(&b
, &state
->shader
->outputs
, &state
->old_outputs
);
114 /* For fragment shader inputs, when we lower to temporaries we'll invalidate
115 * interpolateAt*() because now they'll be pointing to the temporary instead
116 * of the actual variable. Since the caller presumably doesn't support
117 * indirect indexing of inputs, we'll need to lower something like:
121 * ... = interpolateAtCentroid(foo[i]);
123 * to a sequence of interpolations that store to our temporary, then a
129 * foo_tmp[0] = interpolateAtCentroid(foo[0]);
130 * foo_tmp[1] = interpolateAtCentroid(foo[1]);
135 * Recursively emit the interpolation instructions. Here old_interp_deref
136 * refers to foo[i], temp_deref is foo_tmp[0/1], and new_interp_deref is
141 emit_interp(nir_builder
*b
, nir_deref_instr
**old_interp_deref
,
142 nir_deref_instr
*temp_deref
, nir_deref_instr
*new_interp_deref
,
143 nir_intrinsic_instr
*interp
)
145 while (*old_interp_deref
) {
146 switch ((*old_interp_deref
)->deref_type
) {
147 case nir_deref_type_struct
:
149 nir_build_deref_struct(b
, temp_deref
,
150 (*old_interp_deref
)->strct
.index
);
152 nir_build_deref_struct(b
, new_interp_deref
,
153 (*old_interp_deref
)->strct
.index
);
155 case nir_deref_type_array
:
156 if (nir_src_is_const((*old_interp_deref
)->arr
.index
)) {
158 nir_build_deref_array(b
, temp_deref
,
159 (*old_interp_deref
)->arr
.index
.ssa
);
161 nir_build_deref_array(b
, new_interp_deref
,
162 (*old_interp_deref
)->arr
.index
.ssa
);
165 /* We have an indirect deref, so we have to emit interpolations
166 * for every index. Recurse in case we have an array of arrays.
168 unsigned length
= glsl_get_length(temp_deref
->type
);
169 for (unsigned i
= 0; i
< length
; i
++) {
170 nir_deref_instr
*new_temp
=
171 nir_build_deref_array_imm(b
, temp_deref
, i
);
172 nir_deref_instr
*new_interp
=
173 nir_build_deref_array_imm(b
, new_interp_deref
, i
);
175 emit_interp(b
, old_interp_deref
+ 1, new_temp
, new_interp
,
182 case nir_deref_type_var
:
183 case nir_deref_type_array_wildcard
:
184 case nir_deref_type_ptr_as_array
:
185 case nir_deref_type_cast
:
186 unreachable("bad deref type");
192 /* Now that we've constructed a fully-qualified deref with all the indirect
193 * derefs replaced with direct ones, it's time to actually emit the new
194 * interpolation instruction.
197 nir_intrinsic_instr
*new_interp
=
198 nir_intrinsic_instr_create(b
->shader
, interp
->intrinsic
);
200 new_interp
->src
[0] = nir_src_for_ssa(&new_interp_deref
->dest
.ssa
);
201 if (interp
->intrinsic
== nir_intrinsic_interp_deref_at_sample
||
202 interp
->intrinsic
== nir_intrinsic_interp_deref_at_offset
||
203 interp
->intrinsic
== nir_intrinsic_interp_deref_at_vertex
) {
204 new_interp
->src
[1] = interp
->src
[1];
207 new_interp
->num_components
= interp
->num_components
;
208 nir_ssa_dest_init(&new_interp
->instr
, &new_interp
->dest
,
209 interp
->dest
.ssa
.num_components
,
210 interp
->dest
.ssa
.bit_size
, NULL
);
212 nir_builder_instr_insert(b
, &new_interp
->instr
);
213 nir_store_deref(b
, temp_deref
, &new_interp
->dest
.ssa
,
214 (1 << interp
->dest
.ssa
.num_components
) - 1);
218 fixup_interpolation_instr(struct lower_io_state
*state
,
219 nir_intrinsic_instr
*interp
, nir_builder
*b
)
221 nir_deref_path interp_path
;
222 nir_deref_path_init(&interp_path
, nir_src_as_deref(interp
->src
[0]), NULL
);
224 b
->cursor
= nir_before_instr(&interp
->instr
);
226 /* The original interpolation instruction should contain a deref path
227 * starting with the original variable, which is now the temporary.
229 nir_deref_instr
*temp_root
= interp_path
.path
[0];
231 /* Fish out the newly-created input variable. */
232 assert(temp_root
->deref_type
== nir_deref_type_var
);
233 struct hash_entry
*entry
= _mesa_hash_table_search(state
->input_map
,
236 nir_variable
*input
= entry
->data
;
237 nir_deref_instr
*input_root
= nir_build_deref_var(b
, input
);
239 /* Emit the interpolation instructions. */
240 emit_interp(b
, interp_path
.path
+ 1, temp_root
, input_root
, interp
);
242 /* Now the temporary contains the interpolation results, and we can just
243 * load from it. We can reuse the original deref, since it points to the
244 * correct part of the temporary.
246 nir_ssa_def
*load
= nir_load_deref(b
, nir_src_as_deref(interp
->src
[0]));
247 nir_ssa_def_rewrite_uses(&interp
->dest
.ssa
, nir_src_for_ssa(load
));
248 nir_instr_remove(&interp
->instr
);
250 nir_deref_path_finish(&interp_path
);
254 fixup_interpolation(struct lower_io_state
*state
, nir_function_impl
*impl
,
257 nir_foreach_block(block
, impl
) {
258 nir_foreach_instr_safe(instr
, block
) {
259 if (instr
->type
!= nir_instr_type_intrinsic
)
262 nir_intrinsic_instr
*interp
= nir_instr_as_intrinsic(instr
);
264 if (interp
->intrinsic
== nir_intrinsic_interp_deref_at_centroid
||
265 interp
->intrinsic
== nir_intrinsic_interp_deref_at_sample
||
266 interp
->intrinsic
== nir_intrinsic_interp_deref_at_offset
||
267 interp
->intrinsic
== nir_intrinsic_interp_deref_at_vertex
) {
268 fixup_interpolation_instr(state
, interp
, b
);
275 emit_input_copies_impl(struct lower_io_state
*state
, nir_function_impl
*impl
)
277 if (impl
== state
->entrypoint
) {
279 nir_builder_init(&b
, impl
);
280 b
.cursor
= nir_before_block(nir_start_block(impl
));
281 emit_copies(&b
, &state
->old_inputs
, &state
->shader
->inputs
);
282 if (state
->shader
->info
.stage
== MESA_SHADER_FRAGMENT
)
283 fixup_interpolation(state
, impl
, &b
);
287 static nir_variable
*
288 create_shadow_temp(struct lower_io_state
*state
, nir_variable
*var
)
290 nir_variable
*nvar
= ralloc(state
->shader
, nir_variable
);
291 memcpy(nvar
, var
, sizeof *nvar
);
292 nvar
->data
.cannot_coalesce
= true;
294 /* The original is now the temporary */
295 nir_variable
*temp
= var
;
297 /* Reparent the name to the new variable */
298 ralloc_steal(nvar
, nvar
->name
);
300 assert(nvar
->constant_initializer
== NULL
&& nvar
->pointer_initializer
== NULL
);
302 /* Give the original a new name with @<mode>-temp appended */
303 const char *mode
= (temp
->data
.mode
== nir_var_shader_in
) ? "in" : "out";
304 temp
->name
= ralloc_asprintf(var
, "%s@%s-temp", mode
, nvar
->name
);
305 temp
->data
.mode
= nir_var_shader_temp
;
306 temp
->data
.read_only
= false;
307 temp
->data
.fb_fetch_output
= false;
308 temp
->data
.compact
= false;
314 nir_lower_io_to_temporaries(nir_shader
*shader
, nir_function_impl
*entrypoint
,
315 bool outputs
, bool inputs
)
317 struct lower_io_state state
;
319 if (shader
->info
.stage
== MESA_SHADER_TESS_CTRL
)
322 state
.shader
= shader
;
323 state
.entrypoint
= entrypoint
;
324 state
.input_map
= _mesa_pointer_hash_table_create(NULL
);
327 exec_list_move_nodes_to(&shader
->inputs
, &state
.old_inputs
);
329 exec_list_make_empty(&state
.old_inputs
);
332 exec_list_move_nodes_to(&shader
->outputs
, &state
.old_outputs
);
334 exec_list_make_empty(&state
.old_outputs
);
336 /* Walk over all of the outputs turn each output into a temporary and
337 * make a new variable for the actual output.
339 nir_foreach_variable(var
, &state
.old_outputs
) {
340 nir_variable
*output
= create_shadow_temp(&state
, var
);
341 exec_list_push_tail(&shader
->outputs
, &output
->node
);
344 /* and same for inputs: */
345 nir_foreach_variable(var
, &state
.old_inputs
) {
346 nir_variable
*input
= create_shadow_temp(&state
, var
);
347 exec_list_push_tail(&shader
->inputs
, &input
->node
);
348 _mesa_hash_table_insert(state
.input_map
, var
, input
);
351 nir_foreach_function(function
, shader
) {
352 if (function
->impl
== NULL
)
356 emit_input_copies_impl(&state
, function
->impl
);
359 emit_output_copies_impl(&state
, function
->impl
);
361 nir_metadata_preserve(function
->impl
, nir_metadata_block_index
|
362 nir_metadata_dominance
);
365 exec_list_append(&shader
->globals
, &state
.old_inputs
);
366 exec_list_append(&shader
->globals
, &state
.old_outputs
);
368 nir_fixup_deref_modes(shader
);
370 _mesa_hash_table_destroy(state
.input_map
, NULL
);