2 * Copyright © 2016 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "nir_builder.h"
26 #include "nir_deref.h"
28 /** @file nir_lower_io_to_scalar.c
30 * Replaces nir_load_input/nir_store_output operations with num_components !=
31 * 1 with individual per-channel operations.
35 lower_load_input_to_scalar(nir_builder
*b
, nir_intrinsic_instr
*intr
)
37 b
->cursor
= nir_before_instr(&intr
->instr
);
39 assert(intr
->dest
.is_ssa
);
41 nir_ssa_def
*loads
[NIR_MAX_VEC_COMPONENTS
];
43 for (unsigned i
= 0; i
< intr
->num_components
; i
++) {
44 nir_intrinsic_instr
*chan_intr
=
45 nir_intrinsic_instr_create(b
->shader
, intr
->intrinsic
);
46 nir_ssa_dest_init(&chan_intr
->instr
, &chan_intr
->dest
,
47 1, intr
->dest
.ssa
.bit_size
, NULL
);
48 chan_intr
->num_components
= 1;
50 nir_intrinsic_set_base(chan_intr
, nir_intrinsic_base(intr
));
51 nir_intrinsic_set_component(chan_intr
, nir_intrinsic_component(intr
) + i
);
52 nir_intrinsic_set_type(chan_intr
, nir_intrinsic_type(intr
));
54 nir_src_copy(&chan_intr
->src
[0], &intr
->src
[0], chan_intr
);
56 nir_builder_instr_insert(b
, &chan_intr
->instr
);
58 loads
[i
] = &chan_intr
->dest
.ssa
;
61 nir_ssa_def_rewrite_uses(&intr
->dest
.ssa
,
62 nir_src_for_ssa(nir_vec(b
, loads
,
63 intr
->num_components
)));
64 nir_instr_remove(&intr
->instr
);
68 lower_store_output_to_scalar(nir_builder
*b
, nir_intrinsic_instr
*intr
)
70 b
->cursor
= nir_before_instr(&intr
->instr
);
72 nir_ssa_def
*value
= nir_ssa_for_src(b
, intr
->src
[0], intr
->num_components
);
74 for (unsigned i
= 0; i
< intr
->num_components
; i
++) {
75 if (!(nir_intrinsic_write_mask(intr
) & (1 << i
)))
78 nir_intrinsic_instr
*chan_intr
=
79 nir_intrinsic_instr_create(b
->shader
, intr
->intrinsic
);
80 chan_intr
->num_components
= 1;
82 nir_intrinsic_set_base(chan_intr
, nir_intrinsic_base(intr
));
83 nir_intrinsic_set_write_mask(chan_intr
, 0x1);
84 nir_intrinsic_set_component(chan_intr
, nir_intrinsic_component(intr
) + i
);
85 nir_intrinsic_set_type(chan_intr
, nir_intrinsic_type(intr
));
88 chan_intr
->src
[0] = nir_src_for_ssa(nir_channel(b
, value
, i
));
90 nir_src_copy(&chan_intr
->src
[1], &intr
->src
[1], chan_intr
);
92 nir_builder_instr_insert(b
, &chan_intr
->instr
);
95 nir_instr_remove(&intr
->instr
);
99 nir_lower_io_to_scalar_instr(nir_builder
*b
, nir_instr
*instr
, void *data
)
101 nir_variable_mode mask
= *(nir_variable_mode
*)data
;
103 if (instr
->type
!= nir_instr_type_intrinsic
)
106 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
108 if (intr
->num_components
== 1)
111 if (intr
->intrinsic
== nir_intrinsic_load_input
&&
112 (mask
& nir_var_shader_in
)) {
113 lower_load_input_to_scalar(b
, intr
);
117 if (intr
->intrinsic
== nir_intrinsic_store_output
&&
118 mask
& nir_var_shader_out
) {
119 lower_store_output_to_scalar(b
, intr
);
127 nir_lower_io_to_scalar(nir_shader
*shader
, nir_variable_mode mask
)
129 nir_shader_instructions_pass(shader
,
130 nir_lower_io_to_scalar_instr
,
131 nir_metadata_block_index
|
132 nir_metadata_dominance
,
136 static nir_variable
**
137 get_channel_variables(struct hash_table
*ht
, nir_variable
*var
)
139 nir_variable
**chan_vars
;
140 struct hash_entry
*entry
= _mesa_hash_table_search(ht
, var
);
142 chan_vars
= (nir_variable
**) calloc(4, sizeof(nir_variable
*));
143 _mesa_hash_table_insert(ht
, var
, chan_vars
);
145 chan_vars
= (nir_variable
**) entry
->data
;
152 * Note that the src deref that we are cloning is the head of the
153 * chain of deref instructions from the original intrinsic, but
154 * the dst we are cloning to is the tail (because chains of deref
155 * instructions are created back to front)
158 static nir_deref_instr
*
159 clone_deref_array(nir_builder
*b
, nir_deref_instr
*dst_tail
,
160 const nir_deref_instr
*src_head
)
162 const nir_deref_instr
*parent
= nir_deref_instr_parent(src_head
);
167 assert(src_head
->deref_type
== nir_deref_type_array
);
169 dst_tail
= clone_deref_array(b
, dst_tail
, parent
);
171 return nir_build_deref_array(b
, dst_tail
,
172 nir_ssa_for_src(b
, src_head
->arr
.index
, 1));
176 lower_load_to_scalar_early(nir_builder
*b
, nir_intrinsic_instr
*intr
,
177 nir_variable
*var
, struct hash_table
*split_inputs
,
178 struct hash_table
*split_outputs
)
180 b
->cursor
= nir_before_instr(&intr
->instr
);
182 assert(intr
->dest
.is_ssa
);
184 nir_ssa_def
*loads
[NIR_MAX_VEC_COMPONENTS
];
186 nir_variable
**chan_vars
;
187 if (var
->data
.mode
== nir_var_shader_in
) {
188 chan_vars
= get_channel_variables(split_inputs
, var
);
190 chan_vars
= get_channel_variables(split_outputs
, var
);
193 for (unsigned i
= 0; i
< intr
->num_components
; i
++) {
194 nir_variable
*chan_var
= chan_vars
[var
->data
.location_frac
+ i
];
195 if (!chan_vars
[var
->data
.location_frac
+ i
]) {
196 chan_var
= nir_variable_clone(var
, b
->shader
);
197 chan_var
->data
.location_frac
= var
->data
.location_frac
+ i
;
198 chan_var
->type
= glsl_channel_type(chan_var
->type
);
199 if (var
->data
.explicit_offset
) {
200 unsigned comp_size
= glsl_get_bit_size(chan_var
->type
) / 8;
201 chan_var
->data
.offset
= var
->data
.offset
+ i
* comp_size
;
204 chan_vars
[var
->data
.location_frac
+ i
] = chan_var
;
206 nir_shader_add_variable(b
->shader
, chan_var
);
209 nir_intrinsic_instr
*chan_intr
=
210 nir_intrinsic_instr_create(b
->shader
, intr
->intrinsic
);
211 nir_ssa_dest_init(&chan_intr
->instr
, &chan_intr
->dest
,
212 1, intr
->dest
.ssa
.bit_size
, NULL
);
213 chan_intr
->num_components
= 1;
215 nir_deref_instr
*deref
= nir_build_deref_var(b
, chan_var
);
217 deref
= clone_deref_array(b
, deref
, nir_src_as_deref(intr
->src
[0]));
219 chan_intr
->src
[0] = nir_src_for_ssa(&deref
->dest
.ssa
);
221 if (intr
->intrinsic
== nir_intrinsic_interp_deref_at_offset
||
222 intr
->intrinsic
== nir_intrinsic_interp_deref_at_sample
||
223 intr
->intrinsic
== nir_intrinsic_interp_deref_at_vertex
)
224 nir_src_copy(&chan_intr
->src
[1], &intr
->src
[1], &chan_intr
->instr
);
226 nir_builder_instr_insert(b
, &chan_intr
->instr
);
228 loads
[i
] = &chan_intr
->dest
.ssa
;
231 nir_ssa_def_rewrite_uses(&intr
->dest
.ssa
,
232 nir_src_for_ssa(nir_vec(b
, loads
,
233 intr
->num_components
)));
235 /* Remove the old load intrinsic */
236 nir_instr_remove(&intr
->instr
);
240 lower_store_output_to_scalar_early(nir_builder
*b
, nir_intrinsic_instr
*intr
,
242 struct hash_table
*split_outputs
)
244 b
->cursor
= nir_before_instr(&intr
->instr
);
246 nir_ssa_def
*value
= nir_ssa_for_src(b
, intr
->src
[1], intr
->num_components
);
248 nir_variable
**chan_vars
= get_channel_variables(split_outputs
, var
);
249 for (unsigned i
= 0; i
< intr
->num_components
; i
++) {
250 if (!(nir_intrinsic_write_mask(intr
) & (1 << i
)))
253 nir_variable
*chan_var
= chan_vars
[var
->data
.location_frac
+ i
];
254 if (!chan_vars
[var
->data
.location_frac
+ i
]) {
255 chan_var
= nir_variable_clone(var
, b
->shader
);
256 chan_var
->data
.location_frac
= var
->data
.location_frac
+ i
;
257 chan_var
->type
= glsl_channel_type(chan_var
->type
);
258 if (var
->data
.explicit_offset
) {
259 unsigned comp_size
= glsl_get_bit_size(chan_var
->type
) / 8;
260 chan_var
->data
.offset
= var
->data
.offset
+ i
* comp_size
;
263 chan_vars
[var
->data
.location_frac
+ i
] = chan_var
;
265 nir_shader_add_variable(b
->shader
, chan_var
);
268 nir_intrinsic_instr
*chan_intr
=
269 nir_intrinsic_instr_create(b
->shader
, intr
->intrinsic
);
270 chan_intr
->num_components
= 1;
272 nir_intrinsic_set_write_mask(chan_intr
, 0x1);
274 nir_deref_instr
*deref
= nir_build_deref_var(b
, chan_var
);
276 deref
= clone_deref_array(b
, deref
, nir_src_as_deref(intr
->src
[0]));
278 chan_intr
->src
[0] = nir_src_for_ssa(&deref
->dest
.ssa
);
279 chan_intr
->src
[1] = nir_src_for_ssa(nir_channel(b
, value
, i
));
281 nir_builder_instr_insert(b
, &chan_intr
->instr
);
284 /* Remove the old store intrinsic */
285 nir_instr_remove(&intr
->instr
);
288 struct io_to_scalar_early_state
{
289 struct hash_table
*split_inputs
, *split_outputs
;
290 nir_variable_mode mask
;
294 nir_lower_io_to_scalar_early_instr(nir_builder
*b
, nir_instr
*instr
, void *data
)
296 struct io_to_scalar_early_state
*state
= data
;
298 if (instr
->type
!= nir_instr_type_intrinsic
)
301 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
303 if (intr
->num_components
== 1)
306 if (intr
->intrinsic
!= nir_intrinsic_load_deref
&&
307 intr
->intrinsic
!= nir_intrinsic_store_deref
&&
308 intr
->intrinsic
!= nir_intrinsic_interp_deref_at_centroid
&&
309 intr
->intrinsic
!= nir_intrinsic_interp_deref_at_sample
&&
310 intr
->intrinsic
!= nir_intrinsic_interp_deref_at_offset
&&
311 intr
->intrinsic
!= nir_intrinsic_interp_deref_at_vertex
)
314 nir_deref_instr
*deref
= nir_src_as_deref(intr
->src
[0]);
315 nir_variable_mode mode
= deref
->mode
;
316 if (!(mode
& state
->mask
))
319 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
321 /* TODO: add patch support */
325 /* TODO: add doubles support */
326 if (glsl_type_is_64bit(glsl_without_array(var
->type
)))
329 if (!(b
->shader
->info
.stage
== MESA_SHADER_VERTEX
&&
330 mode
== nir_var_shader_in
) &&
331 var
->data
.location
< VARYING_SLOT_VAR0
&&
332 var
->data
.location
>= 0)
335 /* Don't bother splitting if we can't opt away any unused
338 if (var
->data
.always_active_io
)
341 /* Skip types we cannot split */
342 if (glsl_type_is_matrix(glsl_without_array(var
->type
)) ||
343 glsl_type_is_struct_or_ifc(glsl_without_array(var
->type
)))
346 switch (intr
->intrinsic
) {
347 case nir_intrinsic_interp_deref_at_centroid
:
348 case nir_intrinsic_interp_deref_at_sample
:
349 case nir_intrinsic_interp_deref_at_offset
:
350 case nir_intrinsic_interp_deref_at_vertex
:
351 case nir_intrinsic_load_deref
:
352 if ((state
->mask
& nir_var_shader_in
&& mode
== nir_var_shader_in
) ||
353 (state
->mask
& nir_var_shader_out
&& mode
== nir_var_shader_out
)) {
354 lower_load_to_scalar_early(b
, intr
, var
, state
->split_inputs
,
355 state
->split_outputs
);
359 case nir_intrinsic_store_deref
:
360 if (state
->mask
& nir_var_shader_out
&&
361 mode
== nir_var_shader_out
) {
362 lower_store_output_to_scalar_early(b
, intr
, var
, state
->split_outputs
);
374 * This function is intended to be called earlier than nir_lower_io_to_scalar()
375 * i.e. before nir_lower_io() is called.
378 nir_lower_io_to_scalar_early(nir_shader
*shader
, nir_variable_mode mask
)
380 struct io_to_scalar_early_state state
= {
381 .split_inputs
= _mesa_pointer_hash_table_create(NULL
),
382 .split_outputs
= _mesa_pointer_hash_table_create(NULL
),
386 nir_shader_instructions_pass(shader
,
387 nir_lower_io_to_scalar_early_instr
,
388 nir_metadata_block_index
|
389 nir_metadata_dominance
,
392 /* Remove old input from the shaders inputs list */
393 hash_table_foreach(state
.split_inputs
, entry
) {
394 nir_variable
*var
= (nir_variable
*) entry
->key
;
395 exec_node_remove(&var
->node
);
400 /* Remove old output from the shaders outputs list */
401 hash_table_foreach(state
.split_outputs
, entry
) {
402 nir_variable
*var
= (nir_variable
*) entry
->key
;
403 exec_node_remove(&var
->node
);
408 _mesa_hash_table_destroy(state
.split_inputs
, NULL
);
409 _mesa_hash_table_destroy(state
.split_outputs
, NULL
);
411 nir_remove_dead_derefs(shader
);