2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * Implements a pass that lowers output and/or input variables to a
26 * temporary plus an output variable with a single copy at each exit
27 * point of the shader and/or an input variable with a single copy
28 * at the entrance point of the shader. This way the output variable
29 * is only ever written once and/or input is only read once, and there
30 * are no indirect outut/input accesses.
34 #include "nir_builder.h"
35 #include "nir_deref.h"
37 struct lower_io_state
{
39 nir_function_impl
*entrypoint
;
40 struct exec_list old_outputs
;
41 struct exec_list old_inputs
;
42 struct exec_list new_outputs
;
43 struct exec_list new_inputs
;
45 /* map from temporary to new input */
46 struct hash_table
*input_map
;
50 emit_copies(nir_builder
*b
, struct exec_list
*dest_vars
,
51 struct exec_list
*src_vars
)
53 assert(exec_list_length(dest_vars
) == exec_list_length(src_vars
));
55 foreach_two_lists(dest_node
, dest_vars
, src_node
, src_vars
) {
56 nir_variable
*dest
= exec_node_data(nir_variable
, dest_node
, node
);
57 nir_variable
*src
= exec_node_data(nir_variable
, src_node
, node
);
59 /* No need to copy the contents of a non-fb_fetch_output output variable
60 * to the temporary allocated for it, since its initial value is
63 if (src
->data
.mode
== nir_var_shader_out
&&
64 !src
->data
.fb_fetch_output
)
67 /* Can't copy the contents of the temporary back to a read-only
68 * interface variable. The value of the temporary won't have been
69 * modified by the shader anyway.
71 if (dest
->data
.read_only
)
74 nir_copy_var(b
, dest
, src
);
79 emit_output_copies_impl(struct lower_io_state
*state
, nir_function_impl
*impl
)
82 nir_builder_init(&b
, impl
);
84 if (state
->shader
->info
.stage
== MESA_SHADER_GEOMETRY
) {
85 /* For geometry shaders, we have to emit the output copies right
86 * before each EmitVertex call.
88 nir_foreach_block(block
, impl
) {
89 nir_foreach_instr(instr
, block
) {
90 if (instr
->type
!= nir_instr_type_intrinsic
)
93 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
94 if (intrin
->intrinsic
== nir_intrinsic_emit_vertex
||
95 intrin
->intrinsic
== nir_intrinsic_emit_vertex_with_counter
) {
96 b
.cursor
= nir_before_instr(&intrin
->instr
);
97 emit_copies(&b
, &state
->new_outputs
, &state
->old_outputs
);
101 } else if (impl
== state
->entrypoint
) {
102 b
.cursor
= nir_before_block(nir_start_block(impl
));
103 emit_copies(&b
, &state
->old_outputs
, &state
->new_outputs
);
105 /* For all other shader types, we need to do the copies right before
106 * the jumps to the end block.
108 set_foreach(impl
->end_block
->predecessors
, block_entry
) {
109 struct nir_block
*block
= (void *)block_entry
->key
;
110 b
.cursor
= nir_after_block_before_jump(block
);
111 emit_copies(&b
, &state
->new_outputs
, &state
->old_outputs
);
116 /* For fragment shader inputs, when we lower to temporaries we'll invalidate
117 * interpolateAt*() because now they'll be pointing to the temporary instead
118 * of the actual variable. Since the caller presumably doesn't support
119 * indirect indexing of inputs, we'll need to lower something like:
123 * ... = interpolateAtCentroid(foo[i]);
125 * to a sequence of interpolations that store to our temporary, then a
131 * foo_tmp[0] = interpolateAtCentroid(foo[0]);
132 * foo_tmp[1] = interpolateAtCentroid(foo[1]);
137 * Recursively emit the interpolation instructions. Here old_interp_deref
138 * refers to foo[i], temp_deref is foo_tmp[0/1], and new_interp_deref is
143 emit_interp(nir_builder
*b
, nir_deref_instr
**old_interp_deref
,
144 nir_deref_instr
*temp_deref
, nir_deref_instr
*new_interp_deref
,
145 nir_intrinsic_instr
*interp
)
147 while (*old_interp_deref
) {
148 switch ((*old_interp_deref
)->deref_type
) {
149 case nir_deref_type_struct
:
151 nir_build_deref_struct(b
, temp_deref
,
152 (*old_interp_deref
)->strct
.index
);
154 nir_build_deref_struct(b
, new_interp_deref
,
155 (*old_interp_deref
)->strct
.index
);
157 case nir_deref_type_array
:
158 if (nir_src_is_const((*old_interp_deref
)->arr
.index
)) {
160 nir_build_deref_array(b
, temp_deref
,
161 (*old_interp_deref
)->arr
.index
.ssa
);
163 nir_build_deref_array(b
, new_interp_deref
,
164 (*old_interp_deref
)->arr
.index
.ssa
);
167 /* We have an indirect deref, so we have to emit interpolations
168 * for every index. Recurse in case we have an array of arrays.
170 unsigned length
= glsl_get_length(temp_deref
->type
);
171 for (unsigned i
= 0; i
< length
; i
++) {
172 nir_deref_instr
*new_temp
=
173 nir_build_deref_array_imm(b
, temp_deref
, i
);
174 nir_deref_instr
*new_interp
=
175 nir_build_deref_array_imm(b
, new_interp_deref
, i
);
177 emit_interp(b
, old_interp_deref
+ 1, new_temp
, new_interp
,
184 case nir_deref_type_var
:
185 case nir_deref_type_array_wildcard
:
186 case nir_deref_type_ptr_as_array
:
187 case nir_deref_type_cast
:
188 unreachable("bad deref type");
194 /* Now that we've constructed a fully-qualified deref with all the indirect
195 * derefs replaced with direct ones, it's time to actually emit the new
196 * interpolation instruction.
199 nir_intrinsic_instr
*new_interp
=
200 nir_intrinsic_instr_create(b
->shader
, interp
->intrinsic
);
202 new_interp
->src
[0] = nir_src_for_ssa(&new_interp_deref
->dest
.ssa
);
203 if (interp
->intrinsic
== nir_intrinsic_interp_deref_at_sample
||
204 interp
->intrinsic
== nir_intrinsic_interp_deref_at_offset
||
205 interp
->intrinsic
== nir_intrinsic_interp_deref_at_vertex
) {
206 new_interp
->src
[1] = interp
->src
[1];
209 new_interp
->num_components
= interp
->num_components
;
210 nir_ssa_dest_init(&new_interp
->instr
, &new_interp
->dest
,
211 interp
->dest
.ssa
.num_components
,
212 interp
->dest
.ssa
.bit_size
, NULL
);
214 nir_builder_instr_insert(b
, &new_interp
->instr
);
215 nir_store_deref(b
, temp_deref
, &new_interp
->dest
.ssa
,
216 (1 << interp
->dest
.ssa
.num_components
) - 1);
220 fixup_interpolation_instr(struct lower_io_state
*state
,
221 nir_intrinsic_instr
*interp
, nir_builder
*b
)
223 nir_deref_path interp_path
;
224 nir_deref_path_init(&interp_path
, nir_src_as_deref(interp
->src
[0]), NULL
);
226 b
->cursor
= nir_before_instr(&interp
->instr
);
228 /* The original interpolation instruction should contain a deref path
229 * starting with the original variable, which is now the temporary.
231 nir_deref_instr
*temp_root
= interp_path
.path
[0];
233 /* Fish out the newly-created input variable. */
234 assert(temp_root
->deref_type
== nir_deref_type_var
);
235 struct hash_entry
*entry
= _mesa_hash_table_search(state
->input_map
,
238 nir_variable
*input
= entry
->data
;
239 nir_deref_instr
*input_root
= nir_build_deref_var(b
, input
);
241 /* Emit the interpolation instructions. */
242 emit_interp(b
, interp_path
.path
+ 1, temp_root
, input_root
, interp
);
244 /* Now the temporary contains the interpolation results, and we can just
245 * load from it. We can reuse the original deref, since it points to the
246 * correct part of the temporary.
248 nir_ssa_def
*load
= nir_load_deref(b
, nir_src_as_deref(interp
->src
[0]));
249 nir_ssa_def_rewrite_uses(&interp
->dest
.ssa
, nir_src_for_ssa(load
));
250 nir_instr_remove(&interp
->instr
);
252 nir_deref_path_finish(&interp_path
);
256 fixup_interpolation(struct lower_io_state
*state
, nir_function_impl
*impl
,
259 nir_foreach_block(block
, impl
) {
260 nir_foreach_instr_safe(instr
, block
) {
261 if (instr
->type
!= nir_instr_type_intrinsic
)
264 nir_intrinsic_instr
*interp
= nir_instr_as_intrinsic(instr
);
266 if (interp
->intrinsic
== nir_intrinsic_interp_deref_at_centroid
||
267 interp
->intrinsic
== nir_intrinsic_interp_deref_at_sample
||
268 interp
->intrinsic
== nir_intrinsic_interp_deref_at_offset
||
269 interp
->intrinsic
== nir_intrinsic_interp_deref_at_vertex
) {
270 fixup_interpolation_instr(state
, interp
, b
);
277 emit_input_copies_impl(struct lower_io_state
*state
, nir_function_impl
*impl
)
279 if (impl
== state
->entrypoint
) {
281 nir_builder_init(&b
, impl
);
282 b
.cursor
= nir_before_block(nir_start_block(impl
));
283 emit_copies(&b
, &state
->old_inputs
, &state
->new_inputs
);
284 if (state
->shader
->info
.stage
== MESA_SHADER_FRAGMENT
)
285 fixup_interpolation(state
, impl
, &b
);
289 static nir_variable
*
290 create_shadow_temp(struct lower_io_state
*state
, nir_variable
*var
)
292 nir_variable
*nvar
= ralloc(state
->shader
, nir_variable
);
293 memcpy(nvar
, var
, sizeof *nvar
);
294 nvar
->data
.cannot_coalesce
= true;
296 /* The original is now the temporary */
297 nir_variable
*temp
= var
;
299 /* Reparent the name to the new variable */
300 ralloc_steal(nvar
, nvar
->name
);
302 assert(nvar
->constant_initializer
== NULL
&& nvar
->pointer_initializer
== NULL
);
304 /* Give the original a new name with @<mode>-temp appended */
305 const char *mode
= (temp
->data
.mode
== nir_var_shader_in
) ? "in" : "out";
306 temp
->name
= ralloc_asprintf(var
, "%s@%s-temp", mode
, nvar
->name
);
307 temp
->data
.mode
= nir_var_shader_temp
;
308 temp
->data
.read_only
= false;
309 temp
->data
.fb_fetch_output
= false;
310 temp
->data
.compact
= false;
316 move_variables_to_list(nir_shader
*shader
, nir_variable_mode mode
,
317 struct exec_list
*dst_list
)
319 nir_foreach_variable_with_modes_safe(var
, shader
, mode
) {
320 exec_node_remove(&var
->node
);
321 exec_list_push_tail(dst_list
, &var
->node
);
326 nir_lower_io_to_temporaries(nir_shader
*shader
, nir_function_impl
*entrypoint
,
327 bool outputs
, bool inputs
)
329 struct lower_io_state state
;
331 if (shader
->info
.stage
== MESA_SHADER_TESS_CTRL
)
334 state
.shader
= shader
;
335 state
.entrypoint
= entrypoint
;
336 state
.input_map
= _mesa_pointer_hash_table_create(NULL
);
338 exec_list_make_empty(&state
.old_inputs
);
340 move_variables_to_list(shader
, nir_var_shader_in
, &state
.old_inputs
);
342 exec_list_make_empty(&state
.old_outputs
);
344 move_variables_to_list(shader
, nir_var_shader_out
, &state
.old_outputs
);
346 exec_list_make_empty(&state
.new_inputs
);
347 exec_list_make_empty(&state
.new_outputs
);
349 /* Walk over all of the outputs turn each output into a temporary and
350 * make a new variable for the actual output.
352 nir_foreach_variable_in_list(var
, &state
.old_outputs
) {
353 nir_variable
*output
= create_shadow_temp(&state
, var
);
354 exec_list_push_tail(&state
.new_outputs
, &output
->node
);
357 /* and same for inputs: */
358 nir_foreach_variable_in_list(var
, &state
.old_inputs
) {
359 nir_variable
*input
= create_shadow_temp(&state
, var
);
360 exec_list_push_tail(&state
.new_inputs
, &input
->node
);
361 _mesa_hash_table_insert(state
.input_map
, var
, input
);
364 nir_foreach_function(function
, shader
) {
365 if (function
->impl
== NULL
)
369 emit_input_copies_impl(&state
, function
->impl
);
372 emit_output_copies_impl(&state
, function
->impl
);
374 nir_metadata_preserve(function
->impl
, nir_metadata_block_index
|
375 nir_metadata_dominance
);
378 exec_list_append(&shader
->variables
, &state
.old_inputs
);
379 exec_list_append(&shader
->variables
, &state
.old_outputs
);
380 exec_list_append(&shader
->variables
, &state
.new_inputs
);
381 exec_list_append(&shader
->variables
, &state
.new_outputs
);
383 nir_fixup_deref_modes(shader
);
385 _mesa_hash_table_destroy(state
.input_map
, NULL
);