2 * Copyright © 2020 Google LLC
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27 * Trims off the unused trailing components of SSA defs.
29 * Due to various optimization passes (or frontend implementations,
30 * particularly prog_to_nir), we may have instructions generating vectors
31 * whose components don't get read by any instruction. While it can be tricky
32 * to eliminate either unused low components of a writemask (you might need to
33 * increment some offset from a load_uniform, for example) or channels in the
34 * middle of a partially set writemask (you might need to reswizzle ALU ops
35 * using the value), it is trivial to just drop the trailing components.
37 * This pass is probably only of use to vector backends -- scalar backends
38 * typically get unused def channel trimming by scalarizing and dead code
43 #include "nir_builder.h"
46 shrink_dest_to_read_mask(nir_ssa_def
*def
)
48 /* early out if there's nothing to do. */
49 if (def
->num_components
== 1)
52 unsigned mask
= nir_ssa_def_components_read(def
);
53 int last_bit
= util_last_bit(mask
);
55 /* If nothing was read, leave it up to DCE. */
59 if (def
->num_components
> last_bit
) {
60 def
->num_components
= last_bit
;
68 opt_shrink_vectors_alu(nir_builder
*b
, nir_alu_instr
*instr
)
70 nir_ssa_def
*def
= &instr
->dest
.dest
.ssa
;
72 if (nir_op_infos
[instr
->op
].output_size
== 0) {
73 if (shrink_dest_to_read_mask(def
)) {
74 instr
->dest
.write_mask
&=
75 BITFIELD_MASK(def
->num_components
);
85 unsigned mask
= nir_ssa_def_components_read(def
);
87 /* If nothing was read, leave it up to DCE. */
91 int last_bit
= util_last_bit(mask
);
92 if (last_bit
< def
->num_components
) {
93 nir_ssa_def
*srcs
[NIR_MAX_VEC_COMPONENTS
] = { 0 };
94 for (int i
= 0; i
< last_bit
; i
++)
95 srcs
[i
] = nir_ssa_for_alu_src(b
, instr
, i
);
97 nir_ssa_def
*new_vec
= nir_vec(b
, srcs
, last_bit
);
98 nir_ssa_def_rewrite_uses(def
, nir_src_for_ssa(new_vec
));
113 opt_shrink_vectors_intrinsic(nir_builder
*b
, nir_intrinsic_instr
*instr
)
115 switch (instr
->intrinsic
) {
116 case nir_intrinsic_load_uniform
:
117 case nir_intrinsic_load_ubo
:
118 case nir_intrinsic_load_input
:
119 case nir_intrinsic_load_input_vertex
:
120 case nir_intrinsic_load_per_vertex_input
:
121 case nir_intrinsic_load_interpolated_input
:
122 case nir_intrinsic_load_ssbo
:
123 case nir_intrinsic_load_push_constant
:
124 case nir_intrinsic_load_constant
:
125 case nir_intrinsic_load_global
:
126 case nir_intrinsic_load_kernel_input
:
127 case nir_intrinsic_load_scratch
:
128 case nir_intrinsic_store_output
:
129 case nir_intrinsic_store_per_vertex_output
:
130 case nir_intrinsic_store_ssbo
:
131 case nir_intrinsic_store_shared
:
132 case nir_intrinsic_store_global
:
133 case nir_intrinsic_store_scratch
:
139 /* Must be a vectorized intrinsic that we can resize. */
140 assert(instr
->num_components
!= 0);
142 if (nir_intrinsic_infos
[instr
->intrinsic
].has_dest
) {
143 /* loads: Trim the dest to the used channels */
145 if (shrink_dest_to_read_mask(&instr
->dest
.ssa
)) {
146 instr
->num_components
= instr
->dest
.ssa
.num_components
;
150 /* Stores: trim the num_components stored according to the write
153 unsigned write_mask
= nir_intrinsic_write_mask(instr
);
154 unsigned last_bit
= util_last_bit(write_mask
);
155 if (last_bit
< instr
->num_components
&& instr
->src
[0].is_ssa
) {
156 nir_ssa_def
*def
= nir_channels(b
, instr
->src
[0].ssa
,
157 BITSET_MASK(last_bit
));
158 nir_instr_rewrite_src(&instr
->instr
,
160 nir_src_for_ssa(def
));
161 instr
->num_components
= last_bit
;
171 opt_shrink_vectors_load_const(nir_load_const_instr
*instr
)
173 return shrink_dest_to_read_mask(&instr
->def
);
177 opt_shrink_vectors_ssa_undef(nir_ssa_undef_instr
*instr
)
179 return shrink_dest_to_read_mask(&instr
->def
);
183 opt_shrink_vectors_instr(nir_builder
*b
, nir_instr
*instr
)
185 b
->cursor
= nir_before_instr(instr
);
187 switch (instr
->type
) {
188 case nir_instr_type_alu
:
189 return opt_shrink_vectors_alu(b
, nir_instr_as_alu(instr
));
191 case nir_instr_type_intrinsic
:
192 return opt_shrink_vectors_intrinsic(b
, nir_instr_as_intrinsic(instr
));
194 case nir_instr_type_load_const
:
195 return opt_shrink_vectors_load_const(nir_instr_as_load_const(instr
));
197 case nir_instr_type_ssa_undef
:
198 return opt_shrink_vectors_ssa_undef(nir_instr_as_ssa_undef(instr
));
208 nir_opt_shrink_vectors(nir_shader
*shader
)
210 bool progress
= false;
212 nir_foreach_function(function
, shader
) {
217 nir_builder_init(&b
, function
->impl
);
219 nir_foreach_block(block
, function
->impl
) {
220 nir_foreach_instr(instr
, block
) {
221 progress
|= opt_shrink_vectors_instr(&b
, instr
);
226 nir_metadata_preserve(function
->impl
,
227 nir_metadata_block_index
|
228 nir_metadata_dominance
);
230 nir_metadata_preserve(function
->impl
, nir_metadata_all
);