2 * Copyright © 2016 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "nir_builder.h"
27 /** @file nir_lower_io_to_scalar.c
29 * Replaces nir_load_input/nir_store_output operations with num_components !=
30 * 1 with individual per-channel operations.
34 lower_load_input_to_scalar(nir_builder
*b
, nir_intrinsic_instr
*intr
)
36 b
->cursor
= nir_before_instr(&intr
->instr
);
38 assert(intr
->dest
.is_ssa
);
40 nir_ssa_def
*loads
[4];
42 for (unsigned i
= 0; i
< intr
->num_components
; i
++) {
43 nir_intrinsic_instr
*chan_intr
=
44 nir_intrinsic_instr_create(b
->shader
, intr
->intrinsic
);
45 nir_ssa_dest_init(&chan_intr
->instr
, &chan_intr
->dest
,
46 1, intr
->dest
.ssa
.bit_size
, NULL
);
47 chan_intr
->num_components
= 1;
49 nir_intrinsic_set_base(chan_intr
, nir_intrinsic_base(intr
));
50 nir_intrinsic_set_component(chan_intr
, nir_intrinsic_component(intr
) + i
);
52 nir_src_copy(&chan_intr
->src
[0], &intr
->src
[0], chan_intr
);
54 nir_builder_instr_insert(b
, &chan_intr
->instr
);
56 loads
[i
] = &chan_intr
->dest
.ssa
;
59 nir_ssa_def_rewrite_uses(&intr
->dest
.ssa
,
60 nir_src_for_ssa(nir_vec(b
, loads
,
61 intr
->num_components
)));
62 nir_instr_remove(&intr
->instr
);
66 lower_store_output_to_scalar(nir_builder
*b
, nir_intrinsic_instr
*intr
)
68 b
->cursor
= nir_before_instr(&intr
->instr
);
70 nir_ssa_def
*value
= nir_ssa_for_src(b
, intr
->src
[0], intr
->num_components
);
72 for (unsigned i
= 0; i
< intr
->num_components
; i
++) {
73 if (!(nir_intrinsic_write_mask(intr
) & (1 << i
)))
76 nir_intrinsic_instr
*chan_intr
=
77 nir_intrinsic_instr_create(b
->shader
, intr
->intrinsic
);
78 chan_intr
->num_components
= 1;
80 nir_intrinsic_set_base(chan_intr
, nir_intrinsic_base(intr
));
81 nir_intrinsic_set_write_mask(chan_intr
, 0x1);
82 nir_intrinsic_set_component(chan_intr
, nir_intrinsic_component(intr
) + i
);
85 chan_intr
->src
[0] = nir_src_for_ssa(nir_channel(b
, value
, i
));
87 nir_src_copy(&chan_intr
->src
[1], &intr
->src
[1], chan_intr
);
89 nir_builder_instr_insert(b
, &chan_intr
->instr
);
92 nir_instr_remove(&intr
->instr
);
96 nir_lower_io_to_scalar(nir_shader
*shader
, nir_variable_mode mask
)
98 nir_foreach_function(function
, shader
) {
101 nir_builder_init(&b
, function
->impl
);
103 nir_foreach_block(block
, function
->impl
) {
104 nir_foreach_instr_safe(instr
, block
) {
105 if (instr
->type
!= nir_instr_type_intrinsic
)
108 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
110 if (intr
->num_components
== 1)
113 switch (intr
->intrinsic
) {
114 case nir_intrinsic_load_input
:
115 if (mask
& nir_var_shader_in
)
116 lower_load_input_to_scalar(&b
, intr
);
118 case nir_intrinsic_store_output
:
119 if (mask
& nir_var_shader_out
)
120 lower_store_output_to_scalar(&b
, intr
);
131 static nir_variable
**
132 get_channel_variables(struct hash_table
*ht
, nir_variable
*var
)
134 nir_variable
**chan_vars
;
135 struct hash_entry
*entry
= _mesa_hash_table_search(ht
, var
);
137 chan_vars
= (nir_variable
**) calloc(4, sizeof(nir_variable
*));
138 _mesa_hash_table_insert(ht
, var
, chan_vars
);
140 chan_vars
= (nir_variable
**) entry
->data
;
147 * This function differs from nir_deref_clone() in that it gets its type from
148 * the parent deref rather than our source deref. This is useful when splitting
149 * vectors because we want to use the scalar type of the new parent rather than
150 * then the old vector type.
152 static nir_deref_array
*
153 clone_deref_array(const nir_deref_array
*darr
, nir_deref
*parent
)
155 nir_deref_array
*ndarr
= nir_deref_array_create(parent
);
157 ndarr
->deref
.type
= glsl_get_array_element(parent
->type
);
158 if (darr
->deref
.child
)
160 &clone_deref_array(nir_deref_as_array(darr
->deref
.child
),
161 &ndarr
->deref
)->deref
;
163 ndarr
->deref_array_type
= darr
->deref_array_type
;
164 ndarr
->base_offset
= darr
->base_offset
;
165 if (ndarr
->deref_array_type
== nir_deref_array_type_indirect
)
166 nir_src_copy(&ndarr
->indirect
, &darr
->indirect
, parent
);
172 lower_load_to_scalar_early(nir_builder
*b
, nir_intrinsic_instr
*intr
,
173 nir_variable
*var
, struct hash_table
*split_inputs
,
174 struct hash_table
*split_outputs
)
176 b
->cursor
= nir_before_instr(&intr
->instr
);
178 assert(intr
->dest
.is_ssa
);
180 nir_ssa_def
*loads
[4];
182 nir_variable
**chan_vars
;
183 if (var
->data
.mode
== nir_var_shader_in
) {
184 chan_vars
= get_channel_variables(split_inputs
, var
);
186 chan_vars
= get_channel_variables(split_outputs
, var
);
189 for (unsigned i
= 0; i
< intr
->num_components
; i
++) {
190 nir_variable
*chan_var
= chan_vars
[var
->data
.location_frac
+ i
];
191 if (!chan_vars
[var
->data
.location_frac
+ i
]) {
192 chan_var
= nir_variable_clone(var
, b
->shader
);
193 chan_var
->data
.location_frac
= var
->data
.location_frac
+ i
;
194 chan_var
->type
= glsl_channel_type(chan_var
->type
);
196 chan_vars
[var
->data
.location_frac
+ i
] = chan_var
;
198 nir_shader_add_variable(b
->shader
, chan_var
);
201 nir_intrinsic_instr
*chan_intr
=
202 nir_intrinsic_instr_create(b
->shader
, intr
->intrinsic
);
203 nir_ssa_dest_init(&chan_intr
->instr
, &chan_intr
->dest
,
204 1, intr
->dest
.ssa
.bit_size
, NULL
);
205 chan_intr
->num_components
= 1;
206 chan_intr
->variables
[0] = nir_deref_var_create(chan_intr
, chan_var
);
208 if (intr
->variables
[0]->deref
.child
) {
209 chan_intr
->variables
[0]->deref
.child
=
210 &clone_deref_array(nir_deref_as_array(intr
->variables
[0]->deref
.child
),
211 &chan_intr
->variables
[0]->deref
)->deref
;
214 if (intr
->intrinsic
== nir_intrinsic_interp_var_at_offset
||
215 intr
->intrinsic
== nir_intrinsic_interp_var_at_sample
)
216 nir_src_copy(chan_intr
->src
, intr
->src
, &chan_intr
->instr
);
218 nir_builder_instr_insert(b
, &chan_intr
->instr
);
220 loads
[i
] = &chan_intr
->dest
.ssa
;
223 nir_ssa_def_rewrite_uses(&intr
->dest
.ssa
,
224 nir_src_for_ssa(nir_vec(b
, loads
,
225 intr
->num_components
)));
227 /* Remove the old load intrinsic */
228 nir_instr_remove(&intr
->instr
);
232 lower_store_output_to_scalar_early(nir_builder
*b
, nir_intrinsic_instr
*intr
,
234 struct hash_table
*split_outputs
)
236 b
->cursor
= nir_before_instr(&intr
->instr
);
238 nir_ssa_def
*value
= nir_ssa_for_src(b
, intr
->src
[0], intr
->num_components
);
240 nir_variable
**chan_vars
= get_channel_variables(split_outputs
, var
);
241 for (unsigned i
= 0; i
< intr
->num_components
; i
++) {
242 if (!(nir_intrinsic_write_mask(intr
) & (1 << i
)))
245 nir_variable
*chan_var
= chan_vars
[var
->data
.location_frac
+ i
];
246 if (!chan_vars
[var
->data
.location_frac
+ i
]) {
247 chan_var
= nir_variable_clone(var
, b
->shader
);
248 chan_var
->data
.location_frac
= var
->data
.location_frac
+ i
;
249 chan_var
->type
= glsl_channel_type(chan_var
->type
);
251 chan_vars
[var
->data
.location_frac
+ i
] = chan_var
;
253 nir_shader_add_variable(b
->shader
, chan_var
);
256 nir_intrinsic_instr
*chan_intr
=
257 nir_intrinsic_instr_create(b
->shader
, intr
->intrinsic
);
258 chan_intr
->num_components
= 1;
260 nir_intrinsic_set_write_mask(chan_intr
, 0x1);
262 chan_intr
->variables
[0] = nir_deref_var_create(chan_intr
, chan_var
);
263 chan_intr
->src
[0] = nir_src_for_ssa(nir_channel(b
, value
, i
));
265 if (intr
->variables
[0]->deref
.child
) {
266 chan_intr
->variables
[0]->deref
.child
=
267 &clone_deref_array(nir_deref_as_array(intr
->variables
[0]->deref
.child
),
268 &chan_intr
->variables
[0]->deref
)->deref
;
271 nir_builder_instr_insert(b
, &chan_intr
->instr
);
274 /* Remove the old store intrinsic */
275 nir_instr_remove(&intr
->instr
);
279 * This function is intended to be called earlier than nir_lower_io_to_scalar()
280 * i.e. before nir_lower_io() is called.
283 nir_lower_io_to_scalar_early(nir_shader
*shader
, nir_variable_mode mask
)
285 struct hash_table
*split_inputs
=
286 _mesa_hash_table_create(NULL
, _mesa_hash_pointer
,
287 _mesa_key_pointer_equal
);
288 struct hash_table
*split_outputs
=
289 _mesa_hash_table_create(NULL
, _mesa_hash_pointer
,
290 _mesa_key_pointer_equal
);
292 nir_foreach_function(function
, shader
) {
293 if (function
->impl
) {
295 nir_builder_init(&b
, function
->impl
);
297 nir_foreach_block(block
, function
->impl
) {
298 nir_foreach_instr_safe(instr
, block
) {
299 if (instr
->type
!= nir_instr_type_intrinsic
)
302 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
304 if (intr
->num_components
== 1)
307 if (intr
->intrinsic
!= nir_intrinsic_load_var
&&
308 intr
->intrinsic
!= nir_intrinsic_store_var
&&
309 intr
->intrinsic
!= nir_intrinsic_interp_var_at_centroid
&&
310 intr
->intrinsic
!= nir_intrinsic_interp_var_at_sample
&&
311 intr
->intrinsic
!= nir_intrinsic_interp_var_at_offset
)
314 nir_variable
*var
= intr
->variables
[0]->var
;
315 nir_variable_mode mode
= var
->data
.mode
;
317 /* TODO: add patch support */
321 /* TODO: add doubles support */
322 if (glsl_type_is_64bit(glsl_without_array(var
->type
)))
325 if (var
->data
.location
< VARYING_SLOT_VAR0
&&
326 var
->data
.location
>= 0)
329 /* Don't bother splitting if we can't opt away any unused
332 if (var
->data
.always_active_io
)
335 /* Skip types we cannot split */
336 if (glsl_type_is_matrix(glsl_without_array(var
->type
)) ||
337 glsl_type_is_struct(glsl_without_array(var
->type
)))
340 switch (intr
->intrinsic
) {
341 case nir_intrinsic_interp_var_at_centroid
:
342 case nir_intrinsic_interp_var_at_sample
:
343 case nir_intrinsic_interp_var_at_offset
:
344 case nir_intrinsic_load_var
:
345 if ((mask
& nir_var_shader_in
&& mode
== nir_var_shader_in
) ||
346 (mask
& nir_var_shader_out
&& mode
== nir_var_shader_out
))
347 lower_load_to_scalar_early(&b
, intr
, var
, split_inputs
,
350 case nir_intrinsic_store_var
:
351 if (mask
& nir_var_shader_out
&&
352 mode
== nir_var_shader_out
)
353 lower_store_output_to_scalar_early(&b
, intr
, var
,
364 /* Remove old input from the shaders inputs list */
365 struct hash_entry
*entry
;
366 hash_table_foreach(split_inputs
, entry
) {
367 nir_variable
*var
= (nir_variable
*) entry
->key
;
368 exec_node_remove(&var
->node
);
373 /* Remove old output from the shaders outputs list */
374 hash_table_foreach(split_outputs
, entry
) {
375 nir_variable
*var
= (nir_variable
*) entry
->key
;
376 exec_node_remove(&var
->node
);
381 _mesa_hash_table_destroy(split_inputs
, NULL
);
382 _mesa_hash_table_destroy(split_outputs
, NULL
);