2 * Copyright © 2016 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "nir_builder.h"
26 #include "nir_deref.h"
28 /** @file nir_lower_io_to_scalar.c
30 * Replaces nir_load_input/nir_store_output operations with num_components !=
31 * 1 with individual per-channel operations.
35 lower_load_input_to_scalar(nir_builder
*b
, nir_intrinsic_instr
*intr
)
37 b
->cursor
= nir_before_instr(&intr
->instr
);
39 assert(intr
->dest
.is_ssa
);
41 nir_ssa_def
*loads
[NIR_MAX_VEC_COMPONENTS
];
43 for (unsigned i
= 0; i
< intr
->num_components
; i
++) {
44 nir_intrinsic_instr
*chan_intr
=
45 nir_intrinsic_instr_create(b
->shader
, intr
->intrinsic
);
46 nir_ssa_dest_init(&chan_intr
->instr
, &chan_intr
->dest
,
47 1, intr
->dest
.ssa
.bit_size
, NULL
);
48 chan_intr
->num_components
= 1;
50 nir_intrinsic_set_base(chan_intr
, nir_intrinsic_base(intr
));
51 nir_intrinsic_set_component(chan_intr
, nir_intrinsic_component(intr
) + i
);
52 nir_intrinsic_set_type(chan_intr
, nir_intrinsic_type(intr
));
54 nir_src_copy(&chan_intr
->src
[0], &intr
->src
[0], chan_intr
);
56 nir_builder_instr_insert(b
, &chan_intr
->instr
);
58 loads
[i
] = &chan_intr
->dest
.ssa
;
61 nir_ssa_def_rewrite_uses(&intr
->dest
.ssa
,
62 nir_src_for_ssa(nir_vec(b
, loads
,
63 intr
->num_components
)));
64 nir_instr_remove(&intr
->instr
);
68 lower_store_output_to_scalar(nir_builder
*b
, nir_intrinsic_instr
*intr
)
70 b
->cursor
= nir_before_instr(&intr
->instr
);
72 nir_ssa_def
*value
= nir_ssa_for_src(b
, intr
->src
[0], intr
->num_components
);
74 for (unsigned i
= 0; i
< intr
->num_components
; i
++) {
75 if (!(nir_intrinsic_write_mask(intr
) & (1 << i
)))
78 nir_intrinsic_instr
*chan_intr
=
79 nir_intrinsic_instr_create(b
->shader
, intr
->intrinsic
);
80 chan_intr
->num_components
= 1;
82 nir_intrinsic_set_base(chan_intr
, nir_intrinsic_base(intr
));
83 nir_intrinsic_set_write_mask(chan_intr
, 0x1);
84 nir_intrinsic_set_component(chan_intr
, nir_intrinsic_component(intr
) + i
);
85 nir_intrinsic_set_type(chan_intr
, nir_intrinsic_type(intr
));
88 chan_intr
->src
[0] = nir_src_for_ssa(nir_channel(b
, value
, i
));
90 nir_src_copy(&chan_intr
->src
[1], &intr
->src
[1], chan_intr
);
92 nir_builder_instr_insert(b
, &chan_intr
->instr
);
95 nir_instr_remove(&intr
->instr
);
99 nir_lower_io_to_scalar(nir_shader
*shader
, nir_variable_mode mask
)
101 nir_foreach_function(function
, shader
) {
102 if (function
->impl
) {
104 nir_builder_init(&b
, function
->impl
);
106 nir_foreach_block(block
, function
->impl
) {
107 nir_foreach_instr_safe(instr
, block
) {
108 if (instr
->type
!= nir_instr_type_intrinsic
)
111 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
113 if (intr
->num_components
== 1)
116 switch (intr
->intrinsic
) {
117 case nir_intrinsic_load_input
:
118 if (mask
& nir_var_shader_in
)
119 lower_load_input_to_scalar(&b
, intr
);
121 case nir_intrinsic_store_output
:
122 if (mask
& nir_var_shader_out
)
123 lower_store_output_to_scalar(&b
, intr
);
134 static nir_variable
**
135 get_channel_variables(struct hash_table
*ht
, nir_variable
*var
)
137 nir_variable
**chan_vars
;
138 struct hash_entry
*entry
= _mesa_hash_table_search(ht
, var
);
140 chan_vars
= (nir_variable
**) calloc(4, sizeof(nir_variable
*));
141 _mesa_hash_table_insert(ht
, var
, chan_vars
);
143 chan_vars
= (nir_variable
**) entry
->data
;
150 * Note that the src deref that we are cloning is the head of the
151 * chain of deref instructions from the original intrinsic, but
152 * the dst we are cloning to is the tail (because chains of deref
153 * instructions are created back to front)
156 static nir_deref_instr
*
157 clone_deref_array(nir_builder
*b
, nir_deref_instr
*dst_tail
,
158 const nir_deref_instr
*src_head
)
160 const nir_deref_instr
*parent
= nir_deref_instr_parent(src_head
);
165 assert(src_head
->deref_type
== nir_deref_type_array
);
167 dst_tail
= clone_deref_array(b
, dst_tail
, parent
);
169 return nir_build_deref_array(b
, dst_tail
,
170 nir_ssa_for_src(b
, src_head
->arr
.index
, 1));
174 lower_load_to_scalar_early(nir_builder
*b
, nir_intrinsic_instr
*intr
,
175 nir_variable
*var
, struct hash_table
*split_inputs
,
176 struct hash_table
*split_outputs
)
178 b
->cursor
= nir_before_instr(&intr
->instr
);
180 assert(intr
->dest
.is_ssa
);
182 nir_ssa_def
*loads
[NIR_MAX_VEC_COMPONENTS
];
184 nir_variable
**chan_vars
;
185 if (var
->data
.mode
== nir_var_shader_in
) {
186 chan_vars
= get_channel_variables(split_inputs
, var
);
188 chan_vars
= get_channel_variables(split_outputs
, var
);
191 for (unsigned i
= 0; i
< intr
->num_components
; i
++) {
192 nir_variable
*chan_var
= chan_vars
[var
->data
.location_frac
+ i
];
193 if (!chan_vars
[var
->data
.location_frac
+ i
]) {
194 chan_var
= nir_variable_clone(var
, b
->shader
);
195 chan_var
->data
.location_frac
= var
->data
.location_frac
+ i
;
196 chan_var
->type
= glsl_channel_type(chan_var
->type
);
197 if (var
->data
.explicit_offset
) {
198 unsigned comp_size
= glsl_get_bit_size(chan_var
->type
) / 8;
199 chan_var
->data
.offset
= var
->data
.offset
+ i
* comp_size
;
202 chan_vars
[var
->data
.location_frac
+ i
] = chan_var
;
204 nir_shader_add_variable(b
->shader
, chan_var
);
207 nir_intrinsic_instr
*chan_intr
=
208 nir_intrinsic_instr_create(b
->shader
, intr
->intrinsic
);
209 nir_ssa_dest_init(&chan_intr
->instr
, &chan_intr
->dest
,
210 1, intr
->dest
.ssa
.bit_size
, NULL
);
211 chan_intr
->num_components
= 1;
213 nir_deref_instr
*deref
= nir_build_deref_var(b
, chan_var
);
215 deref
= clone_deref_array(b
, deref
, nir_src_as_deref(intr
->src
[0]));
217 chan_intr
->src
[0] = nir_src_for_ssa(&deref
->dest
.ssa
);
219 if (intr
->intrinsic
== nir_intrinsic_interp_deref_at_offset
||
220 intr
->intrinsic
== nir_intrinsic_interp_deref_at_sample
)
221 nir_src_copy(&chan_intr
->src
[1], &intr
->src
[1], &chan_intr
->instr
);
223 nir_builder_instr_insert(b
, &chan_intr
->instr
);
225 loads
[i
] = &chan_intr
->dest
.ssa
;
228 nir_ssa_def_rewrite_uses(&intr
->dest
.ssa
,
229 nir_src_for_ssa(nir_vec(b
, loads
,
230 intr
->num_components
)));
232 /* Remove the old load intrinsic */
233 nir_instr_remove(&intr
->instr
);
237 lower_store_output_to_scalar_early(nir_builder
*b
, nir_intrinsic_instr
*intr
,
239 struct hash_table
*split_outputs
)
241 b
->cursor
= nir_before_instr(&intr
->instr
);
243 nir_ssa_def
*value
= nir_ssa_for_src(b
, intr
->src
[1], intr
->num_components
);
245 nir_variable
**chan_vars
= get_channel_variables(split_outputs
, var
);
246 for (unsigned i
= 0; i
< intr
->num_components
; i
++) {
247 if (!(nir_intrinsic_write_mask(intr
) & (1 << i
)))
250 nir_variable
*chan_var
= chan_vars
[var
->data
.location_frac
+ i
];
251 if (!chan_vars
[var
->data
.location_frac
+ i
]) {
252 chan_var
= nir_variable_clone(var
, b
->shader
);
253 chan_var
->data
.location_frac
= var
->data
.location_frac
+ i
;
254 chan_var
->type
= glsl_channel_type(chan_var
->type
);
255 if (var
->data
.explicit_offset
) {
256 unsigned comp_size
= glsl_get_bit_size(chan_var
->type
) / 8;
257 chan_var
->data
.offset
= var
->data
.offset
+ i
* comp_size
;
260 chan_vars
[var
->data
.location_frac
+ i
] = chan_var
;
262 nir_shader_add_variable(b
->shader
, chan_var
);
265 nir_intrinsic_instr
*chan_intr
=
266 nir_intrinsic_instr_create(b
->shader
, intr
->intrinsic
);
267 chan_intr
->num_components
= 1;
269 nir_intrinsic_set_write_mask(chan_intr
, 0x1);
271 nir_deref_instr
*deref
= nir_build_deref_var(b
, chan_var
);
273 deref
= clone_deref_array(b
, deref
, nir_src_as_deref(intr
->src
[0]));
275 chan_intr
->src
[0] = nir_src_for_ssa(&deref
->dest
.ssa
);
276 chan_intr
->src
[1] = nir_src_for_ssa(nir_channel(b
, value
, i
));
278 nir_builder_instr_insert(b
, &chan_intr
->instr
);
281 /* Remove the old store intrinsic */
282 nir_instr_remove(&intr
->instr
);
286 * This function is intended to be called earlier than nir_lower_io_to_scalar()
287 * i.e. before nir_lower_io() is called.
290 nir_lower_io_to_scalar_early(nir_shader
*shader
, nir_variable_mode mask
)
292 struct hash_table
*split_inputs
= _mesa_pointer_hash_table_create(NULL
);
293 struct hash_table
*split_outputs
= _mesa_pointer_hash_table_create(NULL
);
295 nir_foreach_function(function
, shader
) {
296 if (function
->impl
) {
298 nir_builder_init(&b
, function
->impl
);
300 nir_foreach_block(block
, function
->impl
) {
301 nir_foreach_instr_safe(instr
, block
) {
302 if (instr
->type
!= nir_instr_type_intrinsic
)
305 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
307 if (intr
->num_components
== 1)
310 if (intr
->intrinsic
!= nir_intrinsic_load_deref
&&
311 intr
->intrinsic
!= nir_intrinsic_store_deref
&&
312 intr
->intrinsic
!= nir_intrinsic_interp_deref_at_centroid
&&
313 intr
->intrinsic
!= nir_intrinsic_interp_deref_at_sample
&&
314 intr
->intrinsic
!= nir_intrinsic_interp_deref_at_offset
)
317 nir_deref_instr
*deref
= nir_src_as_deref(intr
->src
[0]);
318 nir_variable_mode mode
= deref
->mode
;
322 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
324 /* TODO: add patch support */
328 /* TODO: add doubles support */
329 if (glsl_type_is_64bit(glsl_without_array(var
->type
)))
332 if (!(shader
->info
.stage
== MESA_SHADER_VERTEX
&&
333 mode
== nir_var_shader_in
) &&
334 var
->data
.location
< VARYING_SLOT_VAR0
&&
335 var
->data
.location
>= 0)
338 /* Don't bother splitting if we can't opt away any unused
341 if (var
->data
.always_active_io
)
344 /* Skip types we cannot split */
345 if (glsl_type_is_matrix(glsl_without_array(var
->type
)) ||
346 glsl_type_is_struct_or_ifc(glsl_without_array(var
->type
)))
349 switch (intr
->intrinsic
) {
350 case nir_intrinsic_interp_deref_at_centroid
:
351 case nir_intrinsic_interp_deref_at_sample
:
352 case nir_intrinsic_interp_deref_at_offset
:
353 case nir_intrinsic_load_deref
:
354 if ((mask
& nir_var_shader_in
&& mode
== nir_var_shader_in
) ||
355 (mask
& nir_var_shader_out
&& mode
== nir_var_shader_out
))
356 lower_load_to_scalar_early(&b
, intr
, var
, split_inputs
,
359 case nir_intrinsic_store_deref
:
360 if (mask
& nir_var_shader_out
&&
361 mode
== nir_var_shader_out
)
362 lower_store_output_to_scalar_early(&b
, intr
, var
,
373 /* Remove old input from the shaders inputs list */
374 hash_table_foreach(split_inputs
, entry
) {
375 nir_variable
*var
= (nir_variable
*) entry
->key
;
376 exec_node_remove(&var
->node
);
381 /* Remove old output from the shaders outputs list */
382 hash_table_foreach(split_outputs
, entry
) {
383 nir_variable
*var
= (nir_variable
*) entry
->key
;
384 exec_node_remove(&var
->node
);
389 _mesa_hash_table_destroy(split_inputs
, NULL
);
390 _mesa_hash_table_destroy(split_outputs
, NULL
);
392 nir_remove_dead_derefs(shader
);