2 * Copyright © 2016 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "nir_builder.h"
26 #include "nir_deref.h"
28 /** @file nir_lower_io_to_scalar.c
30 * Replaces nir_load_input/nir_store_output operations with num_components !=
31 * 1 with individual per-channel operations.
35 lower_load_input_to_scalar(nir_builder
*b
, nir_intrinsic_instr
*intr
)
37 b
->cursor
= nir_before_instr(&intr
->instr
);
39 assert(intr
->dest
.is_ssa
);
41 nir_ssa_def
*loads
[NIR_MAX_VEC_COMPONENTS
];
43 for (unsigned i
= 0; i
< intr
->num_components
; i
++) {
44 nir_intrinsic_instr
*chan_intr
=
45 nir_intrinsic_instr_create(b
->shader
, intr
->intrinsic
);
46 nir_ssa_dest_init(&chan_intr
->instr
, &chan_intr
->dest
,
47 1, intr
->dest
.ssa
.bit_size
, NULL
);
48 chan_intr
->num_components
= 1;
50 nir_intrinsic_set_base(chan_intr
, nir_intrinsic_base(intr
));
51 nir_intrinsic_set_component(chan_intr
, nir_intrinsic_component(intr
) + i
);
53 nir_src_copy(&chan_intr
->src
[0], &intr
->src
[0], chan_intr
);
55 nir_builder_instr_insert(b
, &chan_intr
->instr
);
57 loads
[i
] = &chan_intr
->dest
.ssa
;
60 nir_ssa_def_rewrite_uses(&intr
->dest
.ssa
,
61 nir_src_for_ssa(nir_vec(b
, loads
,
62 intr
->num_components
)));
63 nir_instr_remove(&intr
->instr
);
67 lower_store_output_to_scalar(nir_builder
*b
, nir_intrinsic_instr
*intr
)
69 b
->cursor
= nir_before_instr(&intr
->instr
);
71 nir_ssa_def
*value
= nir_ssa_for_src(b
, intr
->src
[0], intr
->num_components
);
73 for (unsigned i
= 0; i
< intr
->num_components
; i
++) {
74 if (!(nir_intrinsic_write_mask(intr
) & (1 << i
)))
77 nir_intrinsic_instr
*chan_intr
=
78 nir_intrinsic_instr_create(b
->shader
, intr
->intrinsic
);
79 chan_intr
->num_components
= 1;
81 nir_intrinsic_set_base(chan_intr
, nir_intrinsic_base(intr
));
82 nir_intrinsic_set_write_mask(chan_intr
, 0x1);
83 nir_intrinsic_set_component(chan_intr
, nir_intrinsic_component(intr
) + i
);
86 chan_intr
->src
[0] = nir_src_for_ssa(nir_channel(b
, value
, i
));
88 nir_src_copy(&chan_intr
->src
[1], &intr
->src
[1], chan_intr
);
90 nir_builder_instr_insert(b
, &chan_intr
->instr
);
93 nir_instr_remove(&intr
->instr
);
97 nir_lower_io_to_scalar(nir_shader
*shader
, nir_variable_mode mask
)
99 nir_foreach_function(function
, shader
) {
100 if (function
->impl
) {
102 nir_builder_init(&b
, function
->impl
);
104 nir_foreach_block(block
, function
->impl
) {
105 nir_foreach_instr_safe(instr
, block
) {
106 if (instr
->type
!= nir_instr_type_intrinsic
)
109 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
111 if (intr
->num_components
== 1)
114 switch (intr
->intrinsic
) {
115 case nir_intrinsic_load_input
:
116 if (mask
& nir_var_shader_in
)
117 lower_load_input_to_scalar(&b
, intr
);
119 case nir_intrinsic_store_output
:
120 if (mask
& nir_var_shader_out
)
121 lower_store_output_to_scalar(&b
, intr
);
132 static nir_variable
**
133 get_channel_variables(struct hash_table
*ht
, nir_variable
*var
)
135 nir_variable
**chan_vars
;
136 struct hash_entry
*entry
= _mesa_hash_table_search(ht
, var
);
138 chan_vars
= (nir_variable
**) calloc(4, sizeof(nir_variable
*));
139 _mesa_hash_table_insert(ht
, var
, chan_vars
);
141 chan_vars
= (nir_variable
**) entry
->data
;
148 * Note that the src deref that we are cloning is the head of the
149 * chain of deref instructions from the original intrinsic, but
150 * the dst we are cloning to is the tail (because chains of deref
151 * instructions are created back to front)
154 static nir_deref_instr
*
155 clone_deref_array(nir_builder
*b
, nir_deref_instr
*dst_tail
,
156 const nir_deref_instr
*src_head
)
158 const nir_deref_instr
*parent
= nir_deref_instr_parent(src_head
);
163 assert(src_head
->deref_type
== nir_deref_type_array
);
165 dst_tail
= clone_deref_array(b
, dst_tail
, parent
);
167 return nir_build_deref_array(b
, dst_tail
,
168 nir_ssa_for_src(b
, src_head
->arr
.index
, 1));
172 lower_load_to_scalar_early(nir_builder
*b
, nir_intrinsic_instr
*intr
,
173 nir_variable
*var
, struct hash_table
*split_inputs
,
174 struct hash_table
*split_outputs
)
176 b
->cursor
= nir_before_instr(&intr
->instr
);
178 assert(intr
->dest
.is_ssa
);
180 nir_ssa_def
*loads
[NIR_MAX_VEC_COMPONENTS
];
182 nir_variable
**chan_vars
;
183 if (var
->data
.mode
== nir_var_shader_in
) {
184 chan_vars
= get_channel_variables(split_inputs
, var
);
186 chan_vars
= get_channel_variables(split_outputs
, var
);
189 for (unsigned i
= 0; i
< intr
->num_components
; i
++) {
190 nir_variable
*chan_var
= chan_vars
[var
->data
.location_frac
+ i
];
191 if (!chan_vars
[var
->data
.location_frac
+ i
]) {
192 chan_var
= nir_variable_clone(var
, b
->shader
);
193 chan_var
->data
.location_frac
= var
->data
.location_frac
+ i
;
194 chan_var
->type
= glsl_channel_type(chan_var
->type
);
196 chan_vars
[var
->data
.location_frac
+ i
] = chan_var
;
198 nir_shader_add_variable(b
->shader
, chan_var
);
201 nir_intrinsic_instr
*chan_intr
=
202 nir_intrinsic_instr_create(b
->shader
, intr
->intrinsic
);
203 nir_ssa_dest_init(&chan_intr
->instr
, &chan_intr
->dest
,
204 1, intr
->dest
.ssa
.bit_size
, NULL
);
205 chan_intr
->num_components
= 1;
207 nir_deref_instr
*deref
= nir_build_deref_var(b
, chan_var
);
209 deref
= clone_deref_array(b
, deref
, nir_src_as_deref(intr
->src
[0]));
211 chan_intr
->src
[0] = nir_src_for_ssa(&deref
->dest
.ssa
);
213 if (intr
->intrinsic
== nir_intrinsic_interp_deref_at_offset
||
214 intr
->intrinsic
== nir_intrinsic_interp_deref_at_sample
)
215 nir_src_copy(&chan_intr
->src
[1], &intr
->src
[1], &chan_intr
->instr
);
217 nir_builder_instr_insert(b
, &chan_intr
->instr
);
219 loads
[i
] = &chan_intr
->dest
.ssa
;
222 nir_ssa_def_rewrite_uses(&intr
->dest
.ssa
,
223 nir_src_for_ssa(nir_vec(b
, loads
,
224 intr
->num_components
)));
226 /* Remove the old load intrinsic */
227 nir_instr_remove(&intr
->instr
);
231 lower_store_output_to_scalar_early(nir_builder
*b
, nir_intrinsic_instr
*intr
,
233 struct hash_table
*split_outputs
)
235 b
->cursor
= nir_before_instr(&intr
->instr
);
237 nir_ssa_def
*value
= nir_ssa_for_src(b
, intr
->src
[1], intr
->num_components
);
239 nir_variable
**chan_vars
= get_channel_variables(split_outputs
, var
);
240 for (unsigned i
= 0; i
< intr
->num_components
; i
++) {
241 if (!(nir_intrinsic_write_mask(intr
) & (1 << i
)))
244 nir_variable
*chan_var
= chan_vars
[var
->data
.location_frac
+ i
];
245 if (!chan_vars
[var
->data
.location_frac
+ i
]) {
246 chan_var
= nir_variable_clone(var
, b
->shader
);
247 chan_var
->data
.location_frac
= var
->data
.location_frac
+ i
;
248 chan_var
->type
= glsl_channel_type(chan_var
->type
);
250 chan_vars
[var
->data
.location_frac
+ i
] = chan_var
;
252 nir_shader_add_variable(b
->shader
, chan_var
);
255 nir_intrinsic_instr
*chan_intr
=
256 nir_intrinsic_instr_create(b
->shader
, intr
->intrinsic
);
257 chan_intr
->num_components
= 1;
259 nir_intrinsic_set_write_mask(chan_intr
, 0x1);
261 nir_deref_instr
*deref
= nir_build_deref_var(b
, chan_var
);
263 deref
= clone_deref_array(b
, deref
, nir_src_as_deref(intr
->src
[0]));
265 chan_intr
->src
[0] = nir_src_for_ssa(&deref
->dest
.ssa
);
266 chan_intr
->src
[1] = nir_src_for_ssa(nir_channel(b
, value
, i
));
268 nir_builder_instr_insert(b
, &chan_intr
->instr
);
271 /* Remove the old store intrinsic */
272 nir_instr_remove(&intr
->instr
);
276 * This function is intended to be called earlier than nir_lower_io_to_scalar()
277 * i.e. before nir_lower_io() is called.
280 nir_lower_io_to_scalar_early(nir_shader
*shader
, nir_variable_mode mask
)
282 struct hash_table
*split_inputs
=
283 _mesa_hash_table_create(NULL
, _mesa_hash_pointer
,
284 _mesa_key_pointer_equal
);
285 struct hash_table
*split_outputs
=
286 _mesa_hash_table_create(NULL
, _mesa_hash_pointer
,
287 _mesa_key_pointer_equal
);
289 nir_foreach_function(function
, shader
) {
290 if (function
->impl
) {
292 nir_builder_init(&b
, function
->impl
);
294 nir_foreach_block(block
, function
->impl
) {
295 nir_foreach_instr_safe(instr
, block
) {
296 if (instr
->type
!= nir_instr_type_intrinsic
)
299 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
301 if (intr
->num_components
== 1)
304 if (intr
->intrinsic
!= nir_intrinsic_load_deref
&&
305 intr
->intrinsic
!= nir_intrinsic_store_deref
&&
306 intr
->intrinsic
!= nir_intrinsic_interp_deref_at_centroid
&&
307 intr
->intrinsic
!= nir_intrinsic_interp_deref_at_sample
&&
308 intr
->intrinsic
!= nir_intrinsic_interp_deref_at_offset
)
312 nir_deref_instr_get_variable(nir_src_as_deref(intr
->src
[0]));
313 nir_variable_mode mode
= var
->data
.mode
;
315 /* TODO: add patch support */
319 /* TODO: add doubles support */
320 if (glsl_type_is_64bit(glsl_without_array(var
->type
)))
323 if (var
->data
.location
< VARYING_SLOT_VAR0
&&
324 var
->data
.location
>= 0)
327 /* Don't bother splitting if we can't opt away any unused
330 if (var
->data
.always_active_io
)
333 /* Skip types we cannot split */
334 if (glsl_type_is_matrix(glsl_without_array(var
->type
)) ||
335 glsl_type_is_struct(glsl_without_array(var
->type
)))
338 switch (intr
->intrinsic
) {
339 case nir_intrinsic_interp_deref_at_centroid
:
340 case nir_intrinsic_interp_deref_at_sample
:
341 case nir_intrinsic_interp_deref_at_offset
:
342 case nir_intrinsic_load_deref
:
343 if ((mask
& nir_var_shader_in
&& mode
== nir_var_shader_in
) ||
344 (mask
& nir_var_shader_out
&& mode
== nir_var_shader_out
))
345 lower_load_to_scalar_early(&b
, intr
, var
, split_inputs
,
348 case nir_intrinsic_store_deref
:
349 if (mask
& nir_var_shader_out
&&
350 mode
== nir_var_shader_out
)
351 lower_store_output_to_scalar_early(&b
, intr
, var
,
362 /* Remove old input from the shaders inputs list */
363 struct hash_entry
*entry
;
364 hash_table_foreach(split_inputs
, entry
) {
365 nir_variable
*var
= (nir_variable
*) entry
->key
;
366 exec_node_remove(&var
->node
);
371 /* Remove old output from the shaders outputs list */
372 hash_table_foreach(split_outputs
, entry
) {
373 nir_variable
*var
= (nir_variable
*) entry
->key
;
374 exec_node_remove(&var
->node
);
379 _mesa_hash_table_destroy(split_inputs
, NULL
);
380 _mesa_hash_table_destroy(split_outputs
, NULL
);
382 nir_remove_dead_derefs(shader
);