2 * Copyright © 2020 Google LLC
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27 * Trims off the unused trailing components of SSA defs.
29 * Due to various optimization passes (or frontend implementations,
30 * particularly prog_to_nir), we may have instructions generating vectors
31 * whose components don't get read by any instruction. While it can be tricky
32 * to eliminate either unused low components of a writemask (you might need to
33 * increment some offset from a load_uniform, for example) or channels in the
34 * middle of a partially set writemask (you might need to reswizzle ALU ops
35 * using the value), it is trivial to just drop the trailing components.
37 * This pass is probably only of use to vector backends -- scalar backends
38 * typically get unused def channel trimming by scalarizing and dead code
43 #include "nir_builder.h"
46 shrink_dest_to_read_mask(nir_ssa_def
*def
)
48 /* early out if there's nothing to do. */
49 if (def
->num_components
== 1)
52 unsigned mask
= nir_ssa_def_components_read(def
);
53 int last_bit
= util_last_bit(mask
);
55 /* If nothing was read, leave it up to DCE. */
59 if (def
->num_components
> last_bit
) {
60 def
->num_components
= last_bit
;
68 opt_shrink_vectors_alu(nir_builder
*b
, nir_alu_instr
*instr
)
70 nir_ssa_def
*def
= &instr
->dest
.dest
.ssa
;
72 if (nir_op_infos
[instr
->op
].output_size
== 0) {
73 if (shrink_dest_to_read_mask(def
)) {
74 instr
->dest
.write_mask
&=
75 BITFIELD_MASK(def
->num_components
);
85 unsigned mask
= nir_ssa_def_components_read(def
);
87 /* If nothing was read, leave it up to DCE. */
91 int last_bit
= util_last_bit(mask
);
92 if (last_bit
< def
->num_components
) {
93 nir_ssa_def
*srcs
[NIR_MAX_VEC_COMPONENTS
] = { 0 };
94 for (int i
= 0; i
< last_bit
; i
++)
95 srcs
[i
] = nir_ssa_for_alu_src(b
, instr
, i
);
97 nir_ssa_def
*new_vec
= nir_vec(b
, srcs
, last_bit
);
98 nir_ssa_def_rewrite_uses(def
, nir_src_for_ssa(new_vec
));
113 opt_shrink_vectors_intrinsic(nir_intrinsic_instr
*instr
)
115 switch (instr
->intrinsic
) {
116 case nir_intrinsic_load_uniform
:
117 case nir_intrinsic_load_ubo
:
118 case nir_intrinsic_load_input
:
119 case nir_intrinsic_load_input_vertex
:
120 case nir_intrinsic_load_per_vertex_input
:
121 case nir_intrinsic_load_interpolated_input
:
122 case nir_intrinsic_load_ssbo
:
123 case nir_intrinsic_load_push_constant
:
124 case nir_intrinsic_load_constant
:
125 case nir_intrinsic_load_global
:
126 case nir_intrinsic_load_kernel_input
:
127 case nir_intrinsic_load_scratch
:
133 assert(nir_intrinsic_infos
[instr
->intrinsic
].has_dest
);
134 /* Must be a vectorized intrinsic that we can resize. */
135 assert(instr
->num_components
!= 0);
137 if (shrink_dest_to_read_mask(&instr
->dest
.ssa
)) {
138 instr
->num_components
= instr
->dest
.ssa
.num_components
;
146 opt_shrink_vectors_load_const(nir_load_const_instr
*instr
)
148 return shrink_dest_to_read_mask(&instr
->def
);
152 opt_shrink_vectors_ssa_undef(nir_ssa_undef_instr
*instr
)
154 return shrink_dest_to_read_mask(&instr
->def
);
158 opt_shrink_vectors_instr(nir_builder
*b
, nir_instr
*instr
)
160 b
->cursor
= nir_before_instr(instr
);
162 switch (instr
->type
) {
163 case nir_instr_type_alu
:
164 return opt_shrink_vectors_alu(b
, nir_instr_as_alu(instr
));
166 case nir_instr_type_intrinsic
:
167 return opt_shrink_vectors_intrinsic(nir_instr_as_intrinsic(instr
));
169 case nir_instr_type_load_const
:
170 return opt_shrink_vectors_load_const(nir_instr_as_load_const(instr
));
172 case nir_instr_type_ssa_undef
:
173 return opt_shrink_vectors_ssa_undef(nir_instr_as_ssa_undef(instr
));
183 nir_opt_shrink_vectors(nir_shader
*shader
)
185 bool progress
= false;
187 nir_foreach_function(function
, shader
) {
192 nir_builder_init(&b
, function
->impl
);
194 nir_foreach_block(block
, function
->impl
) {
195 nir_foreach_instr(instr
, block
) {
196 progress
|= opt_shrink_vectors_instr(&b
, instr
);
201 nir_metadata_preserve(function
->impl
,
202 nir_metadata_block_index
|
203 nir_metadata_dominance
);
205 nir_metadata_preserve(function
->impl
, nir_metadata_all
);