2 * Copyright © 2017 Timothy Arceri
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "nir_builder.h"
26 #include "nir_deref.h"
28 /** @file nir_lower_io_arrays_to_elements.c
30 * Split arrays/matrices with direct indexing into individual elements. This
31 * will allow optimisation passes to better clean up unused elements.
36 get_io_offset(nir_builder
*b
, nir_deref_instr
*deref
, nir_variable
*var
,
37 unsigned *element_index
, nir_ssa_def
**vertex_index
)
39 bool vs_in
= (b
->shader
->info
.stage
== MESA_SHADER_VERTEX
) &&
40 (var
->data
.mode
== nir_var_shader_in
);
43 nir_deref_path_init(&path
, deref
, NULL
);
45 assert(path
.path
[0]->deref_type
== nir_deref_type_var
);
46 nir_deref_instr
**p
= &path
.path
[1];
48 /* For per-vertex input arrays (i.e. geometry shader inputs), skip the
49 * outermost array index. Process the rest normally.
51 if (nir_is_per_vertex_io(var
, b
->shader
->info
.stage
)) {
52 *vertex_index
= nir_ssa_for_src(b
, (*p
)->arr
.index
, 1);
58 if ((*p
)->deref_type
== nir_deref_type_array
) {
59 nir_const_value
*c
= nir_src_as_const_value((*p
)->arr
.index
);
61 assert(c
); /* must not be indirect dereference */
63 unsigned size
= glsl_count_attribute_slots((*p
)->type
, vs_in
);
64 offset
+= size
* c
->u32
[0];
66 unsigned num_elements
= glsl_type_is_array((*p
)->type
) ?
67 glsl_get_aoa_size((*p
)->type
) : 1;
69 num_elements
*= glsl_type_is_matrix(glsl_without_array((*p
)->type
)) ?
70 glsl_get_matrix_columns(glsl_without_array((*p
)->type
)) : 1;
72 *element_index
+= num_elements
* c
->u32
[0];
73 } else if ((*p
)->deref_type
== nir_deref_type_struct
) {
74 /* TODO: we could also add struct splitting support to this pass */
79 nir_deref_path_finish(&path
);
84 static nir_variable
**
85 get_array_elements(struct hash_table
*ht
, nir_variable
*var
,
86 gl_shader_stage stage
)
88 nir_variable
**elements
;
89 struct hash_entry
*entry
= _mesa_hash_table_search(ht
, var
);
91 const struct glsl_type
*type
= var
->type
;
92 if (nir_is_per_vertex_io(var
, stage
)) {
93 assert(glsl_type_is_array(type
));
94 type
= glsl_get_array_element(type
);
97 unsigned num_elements
= glsl_type_is_array(type
) ?
98 glsl_get_aoa_size(type
) : 1;
100 num_elements
*= glsl_type_is_matrix(glsl_without_array(type
)) ?
101 glsl_get_matrix_columns(glsl_without_array(type
)) : 1;
103 elements
= (nir_variable
**) calloc(num_elements
, sizeof(nir_variable
*));
104 _mesa_hash_table_insert(ht
, var
, elements
);
106 elements
= (nir_variable
**) entry
->data
;
113 lower_array(nir_builder
*b
, nir_intrinsic_instr
*intr
, nir_variable
*var
,
114 struct hash_table
*varyings
)
116 b
->cursor
= nir_before_instr(&intr
->instr
);
118 nir_variable
**elements
=
119 get_array_elements(varyings
, var
, b
->shader
->info
.stage
);
121 nir_ssa_def
*vertex_index
= NULL
;
122 unsigned elements_index
= 0;
123 unsigned io_offset
= get_io_offset(b
, nir_src_as_deref(intr
->src
[0]),
124 var
, &elements_index
, &vertex_index
);
126 nir_variable
*element
= elements
[elements_index
];
128 element
= nir_variable_clone(var
, b
->shader
);
129 element
->data
.location
= var
->data
.location
+ io_offset
;
131 const struct glsl_type
*type
= glsl_without_array(element
->type
);
133 /* This pass also splits matrices so we need give them a new type. */
134 if (glsl_type_is_matrix(type
)) {
135 type
= glsl_vector_type(glsl_get_base_type(type
),
136 glsl_get_vector_elements(type
));
139 if (nir_is_per_vertex_io(var
, b
->shader
->info
.stage
)) {
140 type
= glsl_get_array_instance(type
,
141 glsl_get_length(element
->type
));
144 element
->type
= type
;
145 elements
[elements_index
] = element
;
147 nir_shader_add_variable(b
->shader
, element
);
150 nir_deref_instr
*element_deref
= nir_build_deref_var(b
, element
);
152 if (nir_is_per_vertex_io(var
, b
->shader
->info
.stage
)) {
153 assert(vertex_index
);
154 element_deref
= nir_build_deref_array(b
, element_deref
, vertex_index
);
157 nir_intrinsic_instr
*element_intr
=
158 nir_intrinsic_instr_create(b
->shader
, intr
->intrinsic
);
159 element_intr
->num_components
= intr
->num_components
;
160 element_intr
->src
[0] = nir_src_for_ssa(&element_deref
->dest
.ssa
);
162 if (intr
->intrinsic
!= nir_intrinsic_store_deref
) {
163 nir_ssa_dest_init(&element_intr
->instr
, &element_intr
->dest
,
164 intr
->num_components
, intr
->dest
.ssa
.bit_size
, NULL
);
166 if (intr
->intrinsic
== nir_intrinsic_interp_deref_at_offset
||
167 intr
->intrinsic
== nir_intrinsic_interp_deref_at_sample
) {
168 nir_src_copy(&element_intr
->src
[1], &intr
->src
[1],
169 &element_intr
->instr
);
172 nir_ssa_def_rewrite_uses(&intr
->dest
.ssa
,
173 nir_src_for_ssa(&element_intr
->dest
.ssa
));
175 nir_intrinsic_set_write_mask(element_intr
,
176 nir_intrinsic_write_mask(intr
));
177 nir_src_copy(&element_intr
->src
[1], &intr
->src
[1],
178 &element_intr
->instr
);
181 nir_builder_instr_insert(b
, &element_intr
->instr
);
183 /* Remove the old load intrinsic */
184 nir_instr_remove(&intr
->instr
);
188 deref_has_indirect(nir_builder
*b
, nir_variable
*var
, nir_deref_path
*path
)
190 assert(path
->path
[0]->deref_type
== nir_deref_type_var
);
191 nir_deref_instr
**p
= &path
->path
[1];
193 if (nir_is_per_vertex_io(var
, b
->shader
->info
.stage
)) {
198 if ((*p
)->deref_type
!= nir_deref_type_array
)
201 if (!nir_src_as_const_value((*p
)->arr
.index
))
208 /* Creates a mask of locations that contains arrays that are indexed via
212 create_indirects_mask(nir_shader
*shader
, uint64_t *indirects
,
213 uint64_t *patch_indirects
, nir_variable_mode mode
)
215 nir_foreach_function(function
, shader
) {
216 if (function
->impl
) {
218 nir_builder_init(&b
, function
->impl
);
220 nir_foreach_block(block
, function
->impl
) {
221 nir_foreach_instr_safe(instr
, block
) {
223 if (instr
->type
!= nir_instr_type_intrinsic
)
226 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
228 if (intr
->intrinsic
!= nir_intrinsic_load_deref
&&
229 intr
->intrinsic
!= nir_intrinsic_store_deref
&&
230 intr
->intrinsic
!= nir_intrinsic_interp_deref_at_centroid
&&
231 intr
->intrinsic
!= nir_intrinsic_interp_deref_at_sample
&&
232 intr
->intrinsic
!= nir_intrinsic_interp_deref_at_offset
)
235 nir_deref_instr
*deref
= nir_src_as_deref(intr
->src
[0]);
236 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
238 if (var
->data
.mode
!= mode
)
242 nir_deref_path_init(&path
, deref
, NULL
);
244 uint64_t loc_mask
= ((uint64_t)1) << var
->data
.location
;
245 if (var
->data
.patch
) {
246 if (deref_has_indirect(&b
, var
, &path
))
247 patch_indirects
[var
->data
.location_frac
] |= loc_mask
;
249 if (deref_has_indirect(&b
, var
, &path
))
250 indirects
[var
->data
.location_frac
] |= loc_mask
;
253 nir_deref_path_finish(&path
);
261 lower_io_arrays_to_elements(nir_shader
*shader
, nir_variable_mode mask
,
262 uint64_t *indirects
, uint64_t *patch_indirects
,
263 struct hash_table
*varyings
,
264 bool after_cross_stage_opts
)
266 nir_foreach_function(function
, shader
) {
267 if (function
->impl
) {
269 nir_builder_init(&b
, function
->impl
);
271 nir_foreach_block(block
, function
->impl
) {
272 nir_foreach_instr_safe(instr
, block
) {
273 if (instr
->type
!= nir_instr_type_intrinsic
)
276 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
278 if (intr
->intrinsic
!= nir_intrinsic_load_deref
&&
279 intr
->intrinsic
!= nir_intrinsic_store_deref
&&
280 intr
->intrinsic
!= nir_intrinsic_interp_deref_at_centroid
&&
281 intr
->intrinsic
!= nir_intrinsic_interp_deref_at_sample
&&
282 intr
->intrinsic
!= nir_intrinsic_interp_deref_at_offset
)
286 nir_deref_instr_get_variable(nir_src_as_deref(intr
->src
[0]));
289 uint64_t loc_mask
= ((uint64_t)1) << var
->data
.location
;
290 if (var
->data
.patch
) {
291 if (patch_indirects
[var
->data
.location_frac
] & loc_mask
)
294 if (indirects
[var
->data
.location_frac
] & loc_mask
)
298 nir_variable_mode mode
= var
->data
.mode
;
300 const struct glsl_type
*type
= var
->type
;
301 if (nir_is_per_vertex_io(var
, b
.shader
->info
.stage
)) {
302 assert(glsl_type_is_array(type
));
303 type
= glsl_get_array_element(type
);
306 /* Skip types we cannot split.
308 * TODO: Add support for struct splitting.
310 if ((!glsl_type_is_array(type
) && !glsl_type_is_matrix(type
))||
311 glsl_type_is_struct(glsl_without_array(type
)))
315 if (!after_cross_stage_opts
&&
316 var
->data
.location
< VARYING_SLOT_VAR0
&&
317 var
->data
.location
>= 0)
320 /* Don't bother splitting if we can't opt away any unused
323 if (!after_cross_stage_opts
&& var
->data
.always_active_io
)
326 switch (intr
->intrinsic
) {
327 case nir_intrinsic_interp_deref_at_centroid
:
328 case nir_intrinsic_interp_deref_at_sample
:
329 case nir_intrinsic_interp_deref_at_offset
:
330 case nir_intrinsic_load_deref
:
331 case nir_intrinsic_store_deref
:
332 if ((mask
& nir_var_shader_in
&& mode
== nir_var_shader_in
) ||
333 (mask
& nir_var_shader_out
&& mode
== nir_var_shader_out
))
334 lower_array(&b
, intr
, var
, varyings
);
346 nir_lower_io_arrays_to_elements_no_indirects(nir_shader
*shader
,
349 struct hash_table
*split_inputs
=
350 _mesa_hash_table_create(NULL
, _mesa_hash_pointer
,
351 _mesa_key_pointer_equal
);
352 struct hash_table
*split_outputs
=
353 _mesa_hash_table_create(NULL
, _mesa_hash_pointer
,
354 _mesa_key_pointer_equal
);
356 nir_assert_unlowered_derefs(shader
, nir_lower_load_store_derefs
| nir_lower_interp_derefs
);
358 uint64_t indirects
[4] = {0}, patch_indirects
[4] = {0};
360 lower_io_arrays_to_elements(shader
, nir_var_shader_out
, indirects
,
361 patch_indirects
, split_outputs
, true);
364 lower_io_arrays_to_elements(shader
, nir_var_shader_in
, indirects
,
365 patch_indirects
, split_inputs
, true);
367 /* Remove old input from the shaders inputs list */
368 struct hash_entry
*entry
;
369 hash_table_foreach(split_inputs
, entry
) {
370 nir_variable
*var
= (nir_variable
*) entry
->key
;
371 exec_node_remove(&var
->node
);
377 /* Remove old output from the shaders outputs list */
378 struct hash_entry
*entry
;
379 hash_table_foreach(split_outputs
, entry
) {
380 nir_variable
*var
= (nir_variable
*) entry
->key
;
381 exec_node_remove(&var
->node
);
386 _mesa_hash_table_destroy(split_inputs
, NULL
);
387 _mesa_hash_table_destroy(split_outputs
, NULL
);
389 nir_remove_dead_derefs(shader
);
393 nir_lower_io_arrays_to_elements(nir_shader
*producer
, nir_shader
*consumer
)
395 struct hash_table
*split_inputs
=
396 _mesa_hash_table_create(NULL
, _mesa_hash_pointer
,
397 _mesa_key_pointer_equal
);
398 struct hash_table
*split_outputs
=
399 _mesa_hash_table_create(NULL
, _mesa_hash_pointer
,
400 _mesa_key_pointer_equal
);
402 nir_assert_unlowered_derefs(producer
, nir_lower_load_store_derefs
| nir_lower_interp_derefs
);
403 nir_assert_unlowered_derefs(consumer
, nir_lower_load_store_derefs
| nir_lower_interp_derefs
);
405 uint64_t indirects
[4] = {0}, patch_indirects
[4] = {0};
406 create_indirects_mask(producer
, indirects
, patch_indirects
,
408 create_indirects_mask(consumer
, indirects
, patch_indirects
,
411 lower_io_arrays_to_elements(producer
, nir_var_shader_out
, indirects
,
412 patch_indirects
, split_outputs
, false);
414 lower_io_arrays_to_elements(consumer
, nir_var_shader_in
, indirects
,
415 patch_indirects
, split_inputs
, false);
417 /* Remove old input from the shaders inputs list */
418 struct hash_entry
*entry
;
419 hash_table_foreach(split_inputs
, entry
) {
420 nir_variable
*var
= (nir_variable
*) entry
->key
;
421 exec_node_remove(&var
->node
);
426 /* Remove old output from the shaders outputs list */
427 hash_table_foreach(split_outputs
, entry
) {
428 nir_variable
*var
= (nir_variable
*) entry
->key
;
429 exec_node_remove(&var
->node
);
434 _mesa_hash_table_destroy(split_inputs
, NULL
);
435 _mesa_hash_table_destroy(split_outputs
, NULL
);
437 nir_remove_dead_derefs(producer
);
438 nir_remove_dead_derefs(consumer
);