2 * Copyright © 2016 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "nir_builder.h"
26 #include "nir_deref.h"
28 /** @file nir_lower_io_to_scalar.c
30 * Replaces nir_load_input/nir_store_output operations with num_components !=
31 * 1 with individual per-channel operations.
35 lower_load_input_to_scalar(nir_builder
*b
, nir_intrinsic_instr
*intr
)
37 b
->cursor
= nir_before_instr(&intr
->instr
);
39 assert(intr
->dest
.is_ssa
);
41 nir_ssa_def
*loads
[NIR_MAX_VEC_COMPONENTS
];
43 for (unsigned i
= 0; i
< intr
->num_components
; i
++) {
44 nir_intrinsic_instr
*chan_intr
=
45 nir_intrinsic_instr_create(b
->shader
, intr
->intrinsic
);
46 nir_ssa_dest_init(&chan_intr
->instr
, &chan_intr
->dest
,
47 1, intr
->dest
.ssa
.bit_size
, NULL
);
48 chan_intr
->num_components
= 1;
50 nir_intrinsic_set_base(chan_intr
, nir_intrinsic_base(intr
));
51 nir_intrinsic_set_component(chan_intr
, nir_intrinsic_component(intr
) + i
);
53 nir_src_copy(&chan_intr
->src
[0], &intr
->src
[0], chan_intr
);
55 nir_builder_instr_insert(b
, &chan_intr
->instr
);
57 loads
[i
] = &chan_intr
->dest
.ssa
;
60 nir_ssa_def_rewrite_uses(&intr
->dest
.ssa
,
61 nir_src_for_ssa(nir_vec(b
, loads
,
62 intr
->num_components
)));
63 nir_instr_remove(&intr
->instr
);
67 lower_store_output_to_scalar(nir_builder
*b
, nir_intrinsic_instr
*intr
)
69 b
->cursor
= nir_before_instr(&intr
->instr
);
71 nir_ssa_def
*value
= nir_ssa_for_src(b
, intr
->src
[0], intr
->num_components
);
73 for (unsigned i
= 0; i
< intr
->num_components
; i
++) {
74 if (!(nir_intrinsic_write_mask(intr
) & (1 << i
)))
77 nir_intrinsic_instr
*chan_intr
=
78 nir_intrinsic_instr_create(b
->shader
, intr
->intrinsic
);
79 chan_intr
->num_components
= 1;
81 nir_intrinsic_set_base(chan_intr
, nir_intrinsic_base(intr
));
82 nir_intrinsic_set_write_mask(chan_intr
, 0x1);
83 nir_intrinsic_set_component(chan_intr
, nir_intrinsic_component(intr
) + i
);
86 chan_intr
->src
[0] = nir_src_for_ssa(nir_channel(b
, value
, i
));
88 nir_src_copy(&chan_intr
->src
[1], &intr
->src
[1], chan_intr
);
90 nir_builder_instr_insert(b
, &chan_intr
->instr
);
93 nir_instr_remove(&intr
->instr
);
97 nir_lower_io_to_scalar(nir_shader
*shader
, nir_variable_mode mask
)
99 nir_foreach_function(function
, shader
) {
100 if (function
->impl
) {
102 nir_builder_init(&b
, function
->impl
);
104 nir_foreach_block(block
, function
->impl
) {
105 nir_foreach_instr_safe(instr
, block
) {
106 if (instr
->type
!= nir_instr_type_intrinsic
)
109 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
111 if (intr
->num_components
== 1)
114 switch (intr
->intrinsic
) {
115 case nir_intrinsic_load_input
:
116 if (mask
& nir_var_shader_in
)
117 lower_load_input_to_scalar(&b
, intr
);
119 case nir_intrinsic_store_output
:
120 if (mask
& nir_var_shader_out
)
121 lower_store_output_to_scalar(&b
, intr
);
132 static nir_variable
**
133 get_channel_variables(struct hash_table
*ht
, nir_variable
*var
)
135 nir_variable
**chan_vars
;
136 struct hash_entry
*entry
= _mesa_hash_table_search(ht
, var
);
138 chan_vars
= (nir_variable
**) calloc(4, sizeof(nir_variable
*));
139 _mesa_hash_table_insert(ht
, var
, chan_vars
);
141 chan_vars
= (nir_variable
**) entry
->data
;
148 * Note that the src deref that we are cloning is the head of the
149 * chain of deref instructions from the original intrinsic, but
150 * the dst we are cloning to is the tail (because chains of deref
151 * instructions are created back to front)
154 static nir_deref_instr
*
155 clone_deref_array(nir_builder
*b
, nir_deref_instr
*dst_tail
,
156 const nir_deref_instr
*src_head
)
158 const nir_deref_instr
*parent
= nir_deref_instr_parent(src_head
);
163 assert(src_head
->deref_type
== nir_deref_type_array
);
165 dst_tail
= clone_deref_array(b
, dst_tail
, parent
);
167 return nir_build_deref_array(b
, dst_tail
,
168 nir_ssa_for_src(b
, src_head
->arr
.index
, 1));
172 lower_load_to_scalar_early(nir_builder
*b
, nir_intrinsic_instr
*intr
,
173 nir_variable
*var
, struct hash_table
*split_inputs
,
174 struct hash_table
*split_outputs
)
176 b
->cursor
= nir_before_instr(&intr
->instr
);
178 assert(intr
->dest
.is_ssa
);
180 nir_ssa_def
*loads
[NIR_MAX_VEC_COMPONENTS
];
182 nir_variable
**chan_vars
;
183 if (var
->data
.mode
== nir_var_shader_in
) {
184 chan_vars
= get_channel_variables(split_inputs
, var
);
186 chan_vars
= get_channel_variables(split_outputs
, var
);
189 for (unsigned i
= 0; i
< intr
->num_components
; i
++) {
190 nir_variable
*chan_var
= chan_vars
[var
->data
.location_frac
+ i
];
191 if (!chan_vars
[var
->data
.location_frac
+ i
]) {
192 chan_var
= nir_variable_clone(var
, b
->shader
);
193 chan_var
->data
.location_frac
= var
->data
.location_frac
+ i
;
194 chan_var
->type
= glsl_channel_type(chan_var
->type
);
195 if (var
->data
.explicit_offset
) {
196 unsigned comp_size
= glsl_get_bit_size(chan_var
->type
) / 8;
197 chan_var
->data
.offset
= var
->data
.offset
+ i
* comp_size
;
200 chan_vars
[var
->data
.location_frac
+ i
] = chan_var
;
202 nir_shader_add_variable(b
->shader
, chan_var
);
205 nir_intrinsic_instr
*chan_intr
=
206 nir_intrinsic_instr_create(b
->shader
, intr
->intrinsic
);
207 nir_ssa_dest_init(&chan_intr
->instr
, &chan_intr
->dest
,
208 1, intr
->dest
.ssa
.bit_size
, NULL
);
209 chan_intr
->num_components
= 1;
211 nir_deref_instr
*deref
= nir_build_deref_var(b
, chan_var
);
213 deref
= clone_deref_array(b
, deref
, nir_src_as_deref(intr
->src
[0]));
215 chan_intr
->src
[0] = nir_src_for_ssa(&deref
->dest
.ssa
);
217 if (intr
->intrinsic
== nir_intrinsic_interp_deref_at_offset
||
218 intr
->intrinsic
== nir_intrinsic_interp_deref_at_sample
)
219 nir_src_copy(&chan_intr
->src
[1], &intr
->src
[1], &chan_intr
->instr
);
221 nir_builder_instr_insert(b
, &chan_intr
->instr
);
223 loads
[i
] = &chan_intr
->dest
.ssa
;
226 nir_ssa_def_rewrite_uses(&intr
->dest
.ssa
,
227 nir_src_for_ssa(nir_vec(b
, loads
,
228 intr
->num_components
)));
230 /* Remove the old load intrinsic */
231 nir_instr_remove(&intr
->instr
);
235 lower_store_output_to_scalar_early(nir_builder
*b
, nir_intrinsic_instr
*intr
,
237 struct hash_table
*split_outputs
)
239 b
->cursor
= nir_before_instr(&intr
->instr
);
241 nir_ssa_def
*value
= nir_ssa_for_src(b
, intr
->src
[1], intr
->num_components
);
243 nir_variable
**chan_vars
= get_channel_variables(split_outputs
, var
);
244 for (unsigned i
= 0; i
< intr
->num_components
; i
++) {
245 if (!(nir_intrinsic_write_mask(intr
) & (1 << i
)))
248 nir_variable
*chan_var
= chan_vars
[var
->data
.location_frac
+ i
];
249 if (!chan_vars
[var
->data
.location_frac
+ i
]) {
250 chan_var
= nir_variable_clone(var
, b
->shader
);
251 chan_var
->data
.location_frac
= var
->data
.location_frac
+ i
;
252 chan_var
->type
= glsl_channel_type(chan_var
->type
);
253 if (var
->data
.explicit_offset
) {
254 unsigned comp_size
= glsl_get_bit_size(chan_var
->type
) / 8;
255 chan_var
->data
.offset
= var
->data
.offset
+ i
* comp_size
;
258 chan_vars
[var
->data
.location_frac
+ i
] = chan_var
;
260 nir_shader_add_variable(b
->shader
, chan_var
);
263 nir_intrinsic_instr
*chan_intr
=
264 nir_intrinsic_instr_create(b
->shader
, intr
->intrinsic
);
265 chan_intr
->num_components
= 1;
267 nir_intrinsic_set_write_mask(chan_intr
, 0x1);
269 nir_deref_instr
*deref
= nir_build_deref_var(b
, chan_var
);
271 deref
= clone_deref_array(b
, deref
, nir_src_as_deref(intr
->src
[0]));
273 chan_intr
->src
[0] = nir_src_for_ssa(&deref
->dest
.ssa
);
274 chan_intr
->src
[1] = nir_src_for_ssa(nir_channel(b
, value
, i
));
276 nir_builder_instr_insert(b
, &chan_intr
->instr
);
279 /* Remove the old store intrinsic */
280 nir_instr_remove(&intr
->instr
);
284 * This function is intended to be called earlier than nir_lower_io_to_scalar()
285 * i.e. before nir_lower_io() is called.
288 nir_lower_io_to_scalar_early(nir_shader
*shader
, nir_variable_mode mask
)
290 struct hash_table
*split_inputs
= _mesa_pointer_hash_table_create(NULL
);
291 struct hash_table
*split_outputs
= _mesa_pointer_hash_table_create(NULL
);
293 nir_foreach_function(function
, shader
) {
294 if (function
->impl
) {
296 nir_builder_init(&b
, function
->impl
);
298 nir_foreach_block(block
, function
->impl
) {
299 nir_foreach_instr_safe(instr
, block
) {
300 if (instr
->type
!= nir_instr_type_intrinsic
)
303 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
305 if (intr
->num_components
== 1)
308 if (intr
->intrinsic
!= nir_intrinsic_load_deref
&&
309 intr
->intrinsic
!= nir_intrinsic_store_deref
&&
310 intr
->intrinsic
!= nir_intrinsic_interp_deref_at_centroid
&&
311 intr
->intrinsic
!= nir_intrinsic_interp_deref_at_sample
&&
312 intr
->intrinsic
!= nir_intrinsic_interp_deref_at_offset
)
315 nir_deref_instr
*deref
= nir_src_as_deref(intr
->src
[0]);
316 nir_variable_mode mode
= deref
->mode
;
320 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
322 /* TODO: add patch support */
326 /* TODO: add doubles support */
327 if (glsl_type_is_64bit(glsl_without_array(var
->type
)))
330 if (!(shader
->info
.stage
== MESA_SHADER_VERTEX
&&
331 mode
== nir_var_shader_in
) &&
332 var
->data
.location
< VARYING_SLOT_VAR0
&&
333 var
->data
.location
>= 0)
336 /* Don't bother splitting if we can't opt away any unused
339 if (var
->data
.always_active_io
)
342 /* Skip types we cannot split */
343 if (glsl_type_is_matrix(glsl_without_array(var
->type
)) ||
344 glsl_type_is_struct_or_ifc(glsl_without_array(var
->type
)))
347 switch (intr
->intrinsic
) {
348 case nir_intrinsic_interp_deref_at_centroid
:
349 case nir_intrinsic_interp_deref_at_sample
:
350 case nir_intrinsic_interp_deref_at_offset
:
351 case nir_intrinsic_load_deref
:
352 if ((mask
& nir_var_shader_in
&& mode
== nir_var_shader_in
) ||
353 (mask
& nir_var_shader_out
&& mode
== nir_var_shader_out
))
354 lower_load_to_scalar_early(&b
, intr
, var
, split_inputs
,
357 case nir_intrinsic_store_deref
:
358 if (mask
& nir_var_shader_out
&&
359 mode
== nir_var_shader_out
)
360 lower_store_output_to_scalar_early(&b
, intr
, var
,
371 /* Remove old input from the shaders inputs list */
372 hash_table_foreach(split_inputs
, entry
) {
373 nir_variable
*var
= (nir_variable
*) entry
->key
;
374 exec_node_remove(&var
->node
);
379 /* Remove old output from the shaders outputs list */
380 hash_table_foreach(split_outputs
, entry
) {
381 nir_variable
*var
= (nir_variable
*) entry
->key
;
382 exec_node_remove(&var
->node
);
387 _mesa_hash_table_destroy(split_inputs
, NULL
);
388 _mesa_hash_table_destroy(split_outputs
, NULL
);
390 nir_remove_dead_derefs(shader
);