2 * Copyright © 2018 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "nir_builder.h"
28 assert_ssa_def_is_not_1bit(nir_ssa_def
*def
, UNUSED
void *unused
)
30 assert(def
->bit_size
> 1);
35 rewrite_1bit_ssa_def_to_32bit(nir_ssa_def
*def
, void *_progress
)
37 bool *progress
= _progress
;
38 if (def
->bit_size
== 1) {
46 get_bool_convert_opcode(uint32_t dst_bit_size
)
48 switch (dst_bit_size
) {
49 case 32: return nir_op_i2i32
;
50 case 16: return nir_op_i2i16
;
51 case 8: return nir_op_i2i8
;
53 unreachable("invalid boolean bit-size");
58 make_sources_canonical(nir_builder
*b
, nir_alu_instr
*alu
, uint32_t start_idx
)
60 /* TODO: for now we take the bit-size of the first source as the canonical
61 * form but we could try to be smarter.
63 const nir_op_info
*op_info
= &nir_op_infos
[alu
->op
];
64 uint32_t bit_size
= nir_src_bit_size(alu
->src
[start_idx
].src
);
65 for (uint32_t i
= start_idx
+ 1; i
< op_info
->num_inputs
; i
++) {
66 if (nir_src_bit_size(alu
->src
[i
].src
) != bit_size
) {
67 b
->cursor
= nir_before_instr(&alu
->instr
);
68 nir_op convert_op
= get_bool_convert_opcode(bit_size
);
69 nir_ssa_def
*new_src
=
70 nir_build_alu(b
, convert_op
, alu
->src
[i
].src
.ssa
, NULL
, NULL
, NULL
);
71 /* Retain the write mask and swizzle of the original instruction so
72 * that we don’t unnecessarily create a vectorized instruction.
74 nir_alu_instr
*conv_instr
=
75 nir_instr_as_alu(nir_builder_last_instr(b
));
76 conv_instr
->dest
.write_mask
= alu
->dest
.write_mask
;
77 conv_instr
->dest
.dest
.ssa
.num_components
=
78 alu
->dest
.dest
.ssa
.num_components
;
79 memcpy(conv_instr
->src
[0].swizzle
,
81 sizeof(conv_instr
->src
[0].swizzle
));
82 nir_instr_rewrite_src(&alu
->instr
,
83 &alu
->src
[i
].src
, nir_src_for_ssa(new_src
));
84 /* The swizzle will have been handled by the conversion instruction
85 * so we can reset it back to the default
87 for (unsigned j
= 0; j
< NIR_MAX_VEC_COMPONENTS
; j
++)
88 alu
->src
[i
].swizzle
[j
] = j
;
94 lower_alu_instr(nir_builder
*b
, nir_alu_instr
*alu
)
96 const nir_op_info
*op_info
= &nir_op_infos
[alu
->op
];
98 /* For operations that can take multiple boolean sources we need to ensure
99 * that all booleans have the same bit-size
110 if (nir_dest_bit_size(alu
->dest
.dest
) > 1)
111 break; /* Not a boolean instruction */
114 case nir_op_ball_fequal2
:
115 case nir_op_ball_fequal3
:
116 case nir_op_ball_fequal4
:
117 case nir_op_bany_fnequal2
:
118 case nir_op_bany_fnequal3
:
119 case nir_op_bany_fnequal4
:
120 case nir_op_ball_iequal2
:
121 case nir_op_ball_iequal3
:
122 case nir_op_ball_iequal4
:
123 case nir_op_bany_inequal2
:
124 case nir_op_bany_inequal3
:
125 case nir_op_bany_inequal4
:
128 make_sources_canonical(b
, alu
, 0);
132 /* bcsel may be choosing between boolean sources too */
133 if (nir_dest_bit_size(alu
->dest
.dest
) == 1)
134 make_sources_canonical(b
, alu
, 1);
141 /* Now that we have a canonical boolean bit-size, go on and rewrite the
142 * instruction to match the canonical bit-size.
144 uint32_t bit_size
= nir_src_bit_size(alu
->src
[0].src
);
145 assert(bit_size
> 1);
147 nir_op opcode
= alu
->op
;
157 /* Nothing to do here, we do not specialize these opcodes by bit-size */
161 opcode
= bit_size
== 8 ? nir_op_f2b8
:
162 bit_size
== 16 ? nir_op_f2b16
: nir_op_f2b32
;
166 opcode
= bit_size
== 8 ? nir_op_i2b8
:
167 bit_size
== 16 ? nir_op_i2b16
: nir_op_i2b32
;
171 /* Since the canonical bit size is the size of the src, it's a no-op */
176 /* For up-converting booleans, sign-extend */
177 opcode
= nir_op_i2i32
;
181 opcode
= bit_size
== 8 ? nir_op_flt8
:
182 bit_size
== 16 ? nir_op_flt16
: nir_op_flt32
;
186 opcode
= bit_size
== 8 ? nir_op_fge8
:
187 bit_size
== 16 ? nir_op_fge16
: nir_op_fge32
;
191 opcode
= bit_size
== 8 ? nir_op_feq8
:
192 bit_size
== 16 ? nir_op_feq16
: nir_op_feq32
;
196 opcode
= bit_size
== 8 ? nir_op_fne8
:
197 bit_size
== 16 ? nir_op_fne16
: nir_op_fne32
;
201 opcode
= bit_size
== 8 ? nir_op_ilt8
:
202 bit_size
== 16 ? nir_op_ilt16
: nir_op_ilt32
;
206 opcode
= bit_size
== 8 ? nir_op_ige8
:
207 bit_size
== 16 ? nir_op_ige16
: nir_op_ige32
;
211 opcode
= bit_size
== 8 ? nir_op_ieq8
:
212 bit_size
== 16 ? nir_op_ieq16
: nir_op_ieq32
;
216 opcode
= bit_size
== 8 ? nir_op_ine8
:
217 bit_size
== 16 ? nir_op_ine16
: nir_op_ine32
;
221 opcode
= bit_size
== 8 ? nir_op_ult8
:
222 bit_size
== 16 ? nir_op_ult16
: nir_op_ult32
;
226 opcode
= bit_size
== 8 ? nir_op_uge8
:
227 bit_size
== 16 ? nir_op_uge16
: nir_op_uge32
;
230 case nir_op_ball_fequal2
:
231 opcode
= bit_size
== 8 ? nir_op_b8all_fequal2
:
232 bit_size
== 16 ? nir_op_b16all_fequal2
:
233 nir_op_b32all_fequal2
;
236 case nir_op_ball_fequal3
:
237 opcode
= bit_size
== 8 ? nir_op_b8all_fequal3
:
238 bit_size
== 16 ? nir_op_b16all_fequal3
:
239 nir_op_b32all_fequal3
;
242 case nir_op_ball_fequal4
:
243 opcode
= bit_size
== 8 ? nir_op_b8all_fequal4
:
244 bit_size
== 16 ? nir_op_b16all_fequal4
:
245 nir_op_b32all_fequal4
;
248 case nir_op_bany_fnequal2
:
249 opcode
= bit_size
== 8 ? nir_op_b8any_fnequal2
:
250 bit_size
== 16 ? nir_op_b16any_fnequal2
:
251 nir_op_b32any_fnequal2
;
254 case nir_op_bany_fnequal3
:
255 opcode
= bit_size
== 8 ? nir_op_b8any_fnequal3
:
256 bit_size
== 16 ? nir_op_b16any_fnequal3
:
257 nir_op_b32any_fnequal3
;
260 case nir_op_bany_fnequal4
:
261 opcode
= bit_size
== 8 ? nir_op_b8any_fnequal4
:
262 bit_size
== 16 ? nir_op_b16any_fnequal4
:
263 nir_op_b32any_fnequal4
;
266 case nir_op_ball_iequal2
:
267 opcode
= bit_size
== 8 ? nir_op_b8all_iequal2
:
268 bit_size
== 16 ? nir_op_b16all_iequal2
:
269 nir_op_b32all_iequal2
;
272 case nir_op_ball_iequal3
:
273 opcode
= bit_size
== 8 ? nir_op_b8all_iequal3
:
274 bit_size
== 16 ? nir_op_b16all_iequal3
:
275 nir_op_b32all_iequal3
;
278 case nir_op_ball_iequal4
:
279 opcode
= bit_size
== 8 ? nir_op_b8all_iequal4
:
280 bit_size
== 16 ? nir_op_b16all_iequal4
:
281 nir_op_b32all_iequal4
;
284 case nir_op_bany_inequal2
:
285 opcode
= bit_size
== 8 ? nir_op_b8any_inequal2
:
286 bit_size
== 16 ? nir_op_b16any_inequal2
:
287 nir_op_b32any_inequal2
;
290 case nir_op_bany_inequal3
:
291 opcode
= bit_size
== 8 ? nir_op_b8any_inequal3
:
292 bit_size
== 16 ? nir_op_b16any_inequal3
:
293 nir_op_b32any_inequal3
;
296 case nir_op_bany_inequal4
:
297 opcode
= bit_size
== 8 ? nir_op_b8any_inequal4
:
298 bit_size
== 16 ? nir_op_b16any_inequal4
:
299 nir_op_b32any_inequal4
;
303 opcode
= bit_size
== 8 ? nir_op_b8csel
:
304 bit_size
== 16 ? nir_op_b16csel
: nir_op_b32csel
;
306 /* The destination of the selection may have a different bit-size from
307 * the bcsel condition.
309 bit_size
= nir_src_bit_size(alu
->src
[1].src
);
313 assert(alu
->dest
.dest
.ssa
.bit_size
> 1);
314 for (unsigned i
= 0; i
< op_info
->num_inputs
; i
++)
315 assert(alu
->src
[i
].src
.ssa
->bit_size
> 1);
321 if (alu
->dest
.dest
.ssa
.bit_size
== 1)
322 alu
->dest
.dest
.ssa
.bit_size
= bit_size
;
328 lower_load_const_instr(nir_load_const_instr
*load
)
330 bool progress
= false;
332 if (load
->def
.bit_size
> 1)
335 /* TODO: It is not clear if there is any case in which we can ever hit
336 * this path, so for now we just provide a 32-bit default.
338 * TODO2: after some changed on nir_const_value and other on upstream, we
339 * removed the initialization of a general value like this:
340 * nir_const_value value = load->value
342 * to initialize per value component. Need to confirm if that is correct,
343 * but look at the TOO before.
345 for (unsigned i
= 0; i
< load
->def
.num_components
; i
++) {
346 load
->value
[i
].u32
= load
->value
[i
].b
? NIR_TRUE
: NIR_FALSE
;
347 load
->def
.bit_size
= 32;
355 lower_phi_instr(nir_builder
*b
, nir_phi_instr
*phi
)
357 if (nir_dest_bit_size(phi
->dest
) != 1)
360 /* Ensure all phi sources have a canonical bit-size. We choose the
361 * bit-size of the first phi source as the canonical form.
363 * TODO: maybe we can be smarter about how we choose the canonical form.
365 uint32_t dst_bit_size
= 0;
366 nir_foreach_phi_src(phi_src
, phi
) {
367 uint32_t src_bit_size
= nir_src_bit_size(phi_src
->src
);
368 if (dst_bit_size
== 0) {
369 dst_bit_size
= src_bit_size
;
370 } else if (src_bit_size
!= dst_bit_size
) {
371 assert(phi_src
->src
.is_ssa
);
372 b
->cursor
= nir_before_src(&phi_src
->src
, false);
373 nir_op convert_op
= get_bool_convert_opcode(dst_bit_size
);
374 nir_ssa_def
*new_src
=
375 nir_build_alu(b
, convert_op
, phi_src
->src
.ssa
, NULL
, NULL
, NULL
);
376 nir_instr_rewrite_src(&phi
->instr
, &phi_src
->src
,
377 nir_src_for_ssa(new_src
));
381 phi
->dest
.ssa
.bit_size
= dst_bit_size
;
387 nir_lower_bool_to_bitsize_impl(nir_builder
*b
, nir_function_impl
*impl
)
389 bool progress
= false;
391 nir_foreach_block(block
, impl
) {
392 nir_foreach_instr_safe(instr
, block
) {
393 switch (instr
->type
) {
394 case nir_instr_type_alu
:
395 progress
|= lower_alu_instr(b
, nir_instr_as_alu(instr
));
398 case nir_instr_type_load_const
:
399 progress
|= lower_load_const_instr(nir_instr_as_load_const(instr
));
402 case nir_instr_type_phi
:
403 progress
|= lower_phi_instr(b
, nir_instr_as_phi(instr
));
406 case nir_instr_type_ssa_undef
:
407 case nir_instr_type_intrinsic
:
408 case nir_instr_type_tex
:
409 nir_foreach_ssa_def(instr
, rewrite_1bit_ssa_def_to_32bit
,
414 nir_foreach_ssa_def(instr
, assert_ssa_def_is_not_1bit
, NULL
);
420 nir_metadata_preserve(impl
, nir_metadata_block_index
|
421 nir_metadata_dominance
);
428 nir_lower_bool_to_bitsize(nir_shader
*shader
)
431 bool progress
= false;
433 nir_foreach_function(function
, shader
) {
434 if (function
->impl
) {
435 nir_builder_init(&b
, function
->impl
);
436 progress
= nir_lower_bool_to_bitsize_impl(&b
, function
->impl
) || progress
;