2 * Copyright © 2017 Timothy Arceri
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "nir_builder.h"
27 /** @file nir_lower_io_arrays_to_elements.c
29 * Split arrays/matrices with direct indexing into individual elements. This
30 * will allow optimisation passes to better clean up unused elements.
35 get_io_offset(nir_builder
*b
, nir_deref_var
*deref
, nir_variable
*var
,
36 unsigned *element_index
)
38 bool vs_in
= (b
->shader
->info
.stage
== MESA_SHADER_VERTEX
) &&
39 (var
->data
.mode
== nir_var_shader_in
);
41 nir_deref
*tail
= &deref
->deref
;
43 /* For per-vertex input arrays (i.e. geometry shader inputs), skip the
44 * outermost array index. Process the rest normally.
46 if (nir_is_per_vertex_io(var
, b
->shader
->info
.stage
)) {
51 while (tail
->child
!= NULL
) {
54 if (tail
->deref_type
== nir_deref_type_array
) {
55 nir_deref_array
*deref_array
= nir_deref_as_array(tail
);
56 assert(deref_array
->deref_array_type
!= nir_deref_array_type_indirect
);
58 unsigned size
= glsl_count_attribute_slots(tail
->type
, vs_in
);
59 offset
+= size
* deref_array
->base_offset
;
61 unsigned num_elements
= glsl_type_is_array(tail
->type
) ?
62 glsl_get_aoa_size(tail
->type
) : 1;
64 num_elements
*= glsl_type_is_matrix(glsl_without_array(tail
->type
)) ?
65 glsl_get_matrix_columns(glsl_without_array(tail
->type
)) : 1;
67 *element_index
+= num_elements
* deref_array
->base_offset
;
68 } else if (tail
->deref_type
== nir_deref_type_struct
) {
69 /* TODO: we could also add struct splitting support to this pass */
77 static nir_variable
**
78 get_array_elements(struct hash_table
*ht
, nir_variable
*var
,
79 gl_shader_stage stage
)
81 nir_variable
**elements
;
82 struct hash_entry
*entry
= _mesa_hash_table_search(ht
, var
);
84 const struct glsl_type
*type
= var
->type
;
85 if (nir_is_per_vertex_io(var
, stage
)) {
86 assert(glsl_type_is_array(type
));
87 type
= glsl_get_array_element(type
);
90 unsigned num_elements
= glsl_type_is_array(type
) ?
91 glsl_get_aoa_size(type
) : 1;
93 num_elements
*= glsl_type_is_matrix(glsl_without_array(type
)) ?
94 glsl_get_matrix_columns(glsl_without_array(type
)) : 1;
96 elements
= (nir_variable
**) calloc(num_elements
, sizeof(nir_variable
*));
97 _mesa_hash_table_insert(ht
, var
, elements
);
99 elements
= (nir_variable
**) entry
->data
;
106 create_array_deref(nir_intrinsic_instr
*arr_intr
,
107 nir_intrinsic_instr
*element_intr
)
109 assert(arr_intr
->variables
[0]->deref
.child
);
111 nir_deref
*parent
= &element_intr
->variables
[0]->deref
;
112 nir_deref_array
*darr
=
113 nir_deref_as_array(arr_intr
->variables
[0]->deref
.child
);
114 nir_deref_array
*ndarr
= nir_deref_array_create(parent
);
116 ndarr
->deref
.type
= glsl_get_array_element(parent
->type
);
117 ndarr
->deref_array_type
= darr
->deref_array_type
;
118 ndarr
->base_offset
= darr
->base_offset
;
120 if (ndarr
->deref_array_type
== nir_deref_array_type_indirect
)
121 nir_src_copy(&ndarr
->indirect
, &darr
->indirect
, parent
);
123 element_intr
->variables
[0]->deref
.child
= &ndarr
->deref
;
127 lower_array(nir_builder
*b
, nir_intrinsic_instr
*intr
, nir_variable
*var
,
128 struct hash_table
*varyings
)
130 b
->cursor
= nir_before_instr(&intr
->instr
);
132 nir_variable
**elements
=
133 get_array_elements(varyings
, var
, b
->shader
->info
.stage
);
135 unsigned elements_index
= 0;
136 unsigned io_offset
= get_io_offset(b
, intr
->variables
[0], var
,
139 nir_variable
*element
= elements
[elements_index
];
141 element
= nir_variable_clone(var
, b
->shader
);
142 element
->data
.location
= var
->data
.location
+ io_offset
;
144 const struct glsl_type
*type
= glsl_without_array(element
->type
);
146 /* This pass also splits matrices so we need give them a new type. */
147 if (glsl_type_is_matrix(type
)) {
148 type
= glsl_vector_type(glsl_get_base_type(type
),
149 glsl_get_vector_elements(type
));
152 if (nir_is_per_vertex_io(var
, b
->shader
->info
.stage
)) {
153 type
= glsl_get_array_instance(type
,
154 glsl_get_length(element
->type
));
157 element
->type
= type
;
158 elements
[elements_index
] = element
;
160 nir_shader_add_variable(b
->shader
, element
);
163 nir_intrinsic_instr
*element_intr
=
164 nir_intrinsic_instr_create(b
->shader
, intr
->intrinsic
);
165 element_intr
->num_components
= intr
->num_components
;
166 element_intr
->variables
[0] = nir_deref_var_create(element_intr
, element
);
168 if (intr
->intrinsic
!= nir_intrinsic_store_var
) {
169 nir_ssa_dest_init(&element_intr
->instr
, &element_intr
->dest
,
170 intr
->num_components
, intr
->dest
.ssa
.bit_size
, NULL
);
172 if (intr
->intrinsic
== nir_intrinsic_interp_var_at_offset
||
173 intr
->intrinsic
== nir_intrinsic_interp_var_at_sample
) {
174 nir_src_copy(&element_intr
->src
[0], &intr
->src
[0],
175 &element_intr
->instr
);
178 nir_ssa_def_rewrite_uses(&intr
->dest
.ssa
,
179 nir_src_for_ssa(&element_intr
->dest
.ssa
));
181 nir_intrinsic_set_write_mask(element_intr
,
182 nir_intrinsic_write_mask(intr
));
183 nir_src_copy(&element_intr
->src
[0], &intr
->src
[0],
184 &element_intr
->instr
);
187 if (nir_is_per_vertex_io(var
, b
->shader
->info
.stage
)) {
188 create_array_deref(intr
, element_intr
);
191 nir_builder_instr_insert(b
, &element_intr
->instr
);
193 /* Remove the old load intrinsic */
194 nir_instr_remove(&intr
->instr
);
198 deref_has_indirect(nir_builder
*b
, nir_variable
*var
, nir_deref_var
*deref
)
200 nir_deref
*tail
= &deref
->deref
;
202 if (nir_is_per_vertex_io(var
, b
->shader
->info
.stage
)) {
206 for (tail
= tail
->child
; tail
; tail
= tail
->child
) {
207 if (tail
->deref_type
!= nir_deref_type_array
)
210 nir_deref_array
*arr
= nir_deref_as_array(tail
);
211 if (arr
->deref_array_type
== nir_deref_array_type_indirect
)
218 /* Creates a mask of locations that contains arrays that are indexed via
222 create_indirects_mask(nir_shader
*shader
, uint64_t *indirects
,
223 uint64_t *patch_indirects
, nir_variable_mode mode
)
225 nir_foreach_function(function
, shader
) {
226 if (function
->impl
) {
228 nir_builder_init(&b
, function
->impl
);
230 nir_foreach_block(block
, function
->impl
) {
231 nir_foreach_instr_safe(instr
, block
) {
233 if (instr
->type
!= nir_instr_type_intrinsic
)
236 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
238 if (intr
->intrinsic
!= nir_intrinsic_load_var
&&
239 intr
->intrinsic
!= nir_intrinsic_store_var
&&
240 intr
->intrinsic
!= nir_intrinsic_interp_var_at_centroid
&&
241 intr
->intrinsic
!= nir_intrinsic_interp_var_at_sample
&&
242 intr
->intrinsic
!= nir_intrinsic_interp_var_at_offset
)
245 nir_variable
*var
= intr
->variables
[0]->var
;
247 if (var
->data
.mode
!= mode
)
250 uint64_t loc_mask
= ((uint64_t)1) << var
->data
.location
;
251 if (var
->data
.patch
) {
252 if (deref_has_indirect(&b
, var
, intr
->variables
[0]))
253 patch_indirects
[var
->data
.location_frac
] |= loc_mask
;
255 if (deref_has_indirect(&b
, var
, intr
->variables
[0]))
256 indirects
[var
->data
.location_frac
] |= loc_mask
;
265 lower_io_arrays_to_elements(nir_shader
*shader
, nir_variable_mode mask
,
266 uint64_t *indirects
, uint64_t *patch_indirects
,
267 struct hash_table
*varyings
,
268 bool after_cross_stage_opts
)
270 nir_foreach_function(function
, shader
) {
271 if (function
->impl
) {
273 nir_builder_init(&b
, function
->impl
);
275 nir_foreach_block(block
, function
->impl
) {
276 nir_foreach_instr_safe(instr
, block
) {
277 if (instr
->type
!= nir_instr_type_intrinsic
)
280 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
282 if (intr
->intrinsic
!= nir_intrinsic_load_var
&&
283 intr
->intrinsic
!= nir_intrinsic_store_var
&&
284 intr
->intrinsic
!= nir_intrinsic_interp_var_at_centroid
&&
285 intr
->intrinsic
!= nir_intrinsic_interp_var_at_sample
&&
286 intr
->intrinsic
!= nir_intrinsic_interp_var_at_offset
)
289 nir_variable
*var
= intr
->variables
[0]->var
;
292 uint64_t loc_mask
= ((uint64_t)1) << var
->data
.location
;
293 if (var
->data
.patch
) {
294 if (patch_indirects
[var
->data
.location_frac
] & loc_mask
)
297 if (indirects
[var
->data
.location_frac
] & loc_mask
)
301 nir_variable_mode mode
= var
->data
.mode
;
303 const struct glsl_type
*type
= var
->type
;
304 if (nir_is_per_vertex_io(var
, b
.shader
->info
.stage
)) {
305 assert(glsl_type_is_array(type
));
306 type
= glsl_get_array_element(type
);
309 /* Skip types we cannot split.
311 * TODO: Add support for struct splitting.
313 if ((!glsl_type_is_array(type
) && !glsl_type_is_matrix(type
))||
314 glsl_type_is_struct(glsl_without_array(type
)))
318 if (!after_cross_stage_opts
&&
319 var
->data
.location
< VARYING_SLOT_VAR0
&&
320 var
->data
.location
>= 0)
323 /* Don't bother splitting if we can't opt away any unused
326 if (!after_cross_stage_opts
&& var
->data
.always_active_io
)
329 switch (intr
->intrinsic
) {
330 case nir_intrinsic_interp_var_at_centroid
:
331 case nir_intrinsic_interp_var_at_sample
:
332 case nir_intrinsic_interp_var_at_offset
:
333 case nir_intrinsic_load_var
:
334 case nir_intrinsic_store_var
:
335 if ((mask
& nir_var_shader_in
&& mode
== nir_var_shader_in
) ||
336 (mask
& nir_var_shader_out
&& mode
== nir_var_shader_out
))
337 lower_array(&b
, intr
, var
, varyings
);
349 nir_lower_io_arrays_to_elements_no_indirects(nir_shader
*shader
)
351 struct hash_table
*split_inputs
=
352 _mesa_hash_table_create(NULL
, _mesa_hash_pointer
,
353 _mesa_key_pointer_equal
);
354 struct hash_table
*split_outputs
=
355 _mesa_hash_table_create(NULL
, _mesa_hash_pointer
,
356 _mesa_key_pointer_equal
);
358 uint64_t indirects
[4] = {0}, patch_indirects
[4] = {0};
360 lower_io_arrays_to_elements(shader
, nir_var_shader_out
, indirects
,
361 patch_indirects
, split_outputs
, true);
363 lower_io_arrays_to_elements(shader
, nir_var_shader_in
, indirects
,
364 patch_indirects
, split_inputs
, true);
366 /* Remove old input from the shaders inputs list */
367 struct hash_entry
*entry
;
368 hash_table_foreach(split_inputs
, entry
) {
369 nir_variable
*var
= (nir_variable
*) entry
->key
;
370 exec_node_remove(&var
->node
);
375 /* Remove old output from the shaders outputs list */
376 hash_table_foreach(split_outputs
, entry
) {
377 nir_variable
*var
= (nir_variable
*) entry
->key
;
378 exec_node_remove(&var
->node
);
383 _mesa_hash_table_destroy(split_inputs
, NULL
);
384 _mesa_hash_table_destroy(split_outputs
, NULL
);
388 nir_lower_io_arrays_to_elements(nir_shader
*producer
, nir_shader
*consumer
)
390 struct hash_table
*split_inputs
=
391 _mesa_hash_table_create(NULL
, _mesa_hash_pointer
,
392 _mesa_key_pointer_equal
);
393 struct hash_table
*split_outputs
=
394 _mesa_hash_table_create(NULL
, _mesa_hash_pointer
,
395 _mesa_key_pointer_equal
);
397 uint64_t indirects
[4] = {0}, patch_indirects
[4] = {0};
398 create_indirects_mask(producer
, indirects
, patch_indirects
,
400 create_indirects_mask(consumer
, indirects
, patch_indirects
,
403 lower_io_arrays_to_elements(producer
, nir_var_shader_out
, indirects
,
404 patch_indirects
, split_outputs
, false);
406 lower_io_arrays_to_elements(consumer
, nir_var_shader_in
, indirects
,
407 patch_indirects
, split_inputs
, false);
409 /* Remove old input from the shaders inputs list */
410 struct hash_entry
*entry
;
411 hash_table_foreach(split_inputs
, entry
) {
412 nir_variable
*var
= (nir_variable
*) entry
->key
;
413 exec_node_remove(&var
->node
);
418 /* Remove old output from the shaders outputs list */
419 hash_table_foreach(split_outputs
, entry
) {
420 nir_variable
*var
= (nir_variable
*) entry
->key
;
421 exec_node_remove(&var
->node
);
426 _mesa_hash_table_destroy(split_inputs
, NULL
);
427 _mesa_hash_table_destroy(split_outputs
, NULL
);