2 * Copyright © 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Jason Ekstrand (jason@jlekstrand.net)
29 #include "nir_search.h"
30 #include "nir_builder.h"
31 #include "util/half_float.h"
36 unsigned variables_seen
;
37 nir_alu_src variables
[NIR_SEARCH_MAX_VARIABLES
];
41 match_expression(const nir_search_expression
*expr
, nir_alu_instr
*instr
,
42 unsigned num_components
, const uint8_t *swizzle
,
43 struct match_state
*state
);
45 static const uint8_t identity_swizzle
[NIR_MAX_VEC_COMPONENTS
] = { 0, 1, 2, 3 };
48 * Check if a source produces a value of the given type.
50 * Used for satisfying 'a@type' constraints.
53 src_is_type(nir_src src
, nir_alu_type type
)
55 assert(type
!= nir_type_invalid
);
60 if (src
.ssa
->parent_instr
->type
== nir_instr_type_alu
) {
61 nir_alu_instr
*src_alu
= nir_instr_as_alu(src
.ssa
->parent_instr
);
62 nir_alu_type output_type
= nir_op_infos
[src_alu
->op
].output_type
;
64 if (type
== nir_type_bool
) {
65 switch (src_alu
->op
) {
69 return src_is_type(src_alu
->src
[0].src
, nir_type_bool
) &&
70 src_is_type(src_alu
->src
[1].src
, nir_type_bool
);
72 return src_is_type(src_alu
->src
[0].src
, nir_type_bool
);
78 return nir_alu_type_get_base_type(output_type
) == type
;
79 } else if (src
.ssa
->parent_instr
->type
== nir_instr_type_intrinsic
) {
80 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(src
.ssa
->parent_instr
);
82 if (type
== nir_type_bool
) {
83 return intr
->intrinsic
== nir_intrinsic_load_front_face
||
84 intr
->intrinsic
== nir_intrinsic_load_helper_invocation
;
93 nir_op_matches_search_op(nir_op nop
, uint16_t sop
)
95 if (sop
<= nir_last_opcode
)
98 #define MATCH_FCONV_CASE(op) \
99 case nir_search_op_##op: \
100 return nop == nir_op_##op##16 || \
101 nop == nir_op_##op##32 || \
102 nop == nir_op_##op##64;
104 #define MATCH_ICONV_CASE(op) \
105 case nir_search_op_##op: \
106 return nop == nir_op_##op##8 || \
107 nop == nir_op_##op##16 || \
108 nop == nir_op_##op##32 || \
109 nop == nir_op_##op##64;
111 #define MATCH_BCONV_CASE(op) \
112 case nir_search_op_##op: \
113 return nop == nir_op_##op##1 || \
114 nop == nir_op_##op##32;
117 MATCH_FCONV_CASE(i2f
)
118 MATCH_FCONV_CASE(u2f
)
119 MATCH_FCONV_CASE(f2f
)
120 MATCH_ICONV_CASE(f2u
)
121 MATCH_ICONV_CASE(f2i
)
122 MATCH_ICONV_CASE(u2u
)
123 MATCH_ICONV_CASE(i2i
)
124 MATCH_FCONV_CASE(b2f
)
125 MATCH_ICONV_CASE(b2i
)
126 MATCH_BCONV_CASE(i2b
)
127 MATCH_BCONV_CASE(f2b
)
129 unreachable("Invalid nir_search_op");
132 #undef MATCH_FCONV_CASE
133 #undef MATCH_ICONV_CASE
137 nir_op_for_search_op(uint16_t sop
, unsigned bit_size
)
139 if (sop
<= nir_last_opcode
)
142 #define RET_FCONV_CASE(op) \
143 case nir_search_op_##op: \
144 switch (bit_size) { \
145 case 16: return nir_op_##op##16; \
146 case 32: return nir_op_##op##32; \
147 case 64: return nir_op_##op##64; \
148 default: unreachable("Invalid bit size"); \
151 #define RET_ICONV_CASE(op) \
152 case nir_search_op_##op: \
153 switch (bit_size) { \
154 case 8: return nir_op_##op##8; \
155 case 16: return nir_op_##op##16; \
156 case 32: return nir_op_##op##32; \
157 case 64: return nir_op_##op##64; \
158 default: unreachable("Invalid bit size"); \
161 #define RET_BCONV_CASE(op) \
162 case nir_search_op_##op: \
163 switch (bit_size) { \
164 case 1: return nir_op_##op##1; \
165 case 32: return nir_op_##op##32; \
166 default: unreachable("Invalid bit size"); \
182 unreachable("Invalid nir_search_op");
185 #undef RET_FCONV_CASE
186 #undef RET_ICONV_CASE
190 match_value(const nir_search_value
*value
, nir_alu_instr
*instr
, unsigned src
,
191 unsigned num_components
, const uint8_t *swizzle
,
192 struct match_state
*state
)
194 uint8_t new_swizzle
[NIR_MAX_VEC_COMPONENTS
];
196 /* Searching only works on SSA values because, if it's not SSA, we can't
197 * know if the value changed between one instance of that value in the
198 * expression and another. Also, the replace operation will place reads of
199 * that value right before the last instruction in the expression we're
200 * replacing so those reads will happen after the original reads and may
201 * not be valid if they're register reads.
203 if (!instr
->src
[src
].src
.is_ssa
)
206 /* If the source is an explicitly sized source, then we need to reset
207 * both the number of components and the swizzle.
209 if (nir_op_infos
[instr
->op
].input_sizes
[src
] != 0) {
210 num_components
= nir_op_infos
[instr
->op
].input_sizes
[src
];
211 swizzle
= identity_swizzle
;
214 for (unsigned i
= 0; i
< num_components
; ++i
)
215 new_swizzle
[i
] = instr
->src
[src
].swizzle
[swizzle
[i
]];
217 /* If the value has a specific bit size and it doesn't match, bail */
218 if (value
->bit_size
> 0 &&
219 nir_src_bit_size(instr
->src
[src
].src
) != value
->bit_size
)
222 switch (value
->type
) {
223 case nir_search_value_expression
:
224 if (instr
->src
[src
].src
.ssa
->parent_instr
->type
!= nir_instr_type_alu
)
227 return match_expression(nir_search_value_as_expression(value
),
228 nir_instr_as_alu(instr
->src
[src
].src
.ssa
->parent_instr
),
229 num_components
, new_swizzle
, state
);
231 case nir_search_value_variable
: {
232 nir_search_variable
*var
= nir_search_value_as_variable(value
);
233 assert(var
->variable
< NIR_SEARCH_MAX_VARIABLES
);
235 if (state
->variables_seen
& (1 << var
->variable
)) {
236 if (state
->variables
[var
->variable
].src
.ssa
!= instr
->src
[src
].src
.ssa
)
239 assert(!instr
->src
[src
].abs
&& !instr
->src
[src
].negate
);
241 for (unsigned i
= 0; i
< num_components
; ++i
) {
242 if (state
->variables
[var
->variable
].swizzle
[i
] != new_swizzle
[i
])
248 if (var
->is_constant
&&
249 instr
->src
[src
].src
.ssa
->parent_instr
->type
!= nir_instr_type_load_const
)
252 if (var
->cond
&& !var
->cond(instr
, src
, num_components
, new_swizzle
))
255 if (var
->type
!= nir_type_invalid
&&
256 !src_is_type(instr
->src
[src
].src
, var
->type
))
259 state
->variables_seen
|= (1 << var
->variable
);
260 state
->variables
[var
->variable
].src
= instr
->src
[src
].src
;
261 state
->variables
[var
->variable
].abs
= false;
262 state
->variables
[var
->variable
].negate
= false;
264 for (unsigned i
= 0; i
< NIR_MAX_VEC_COMPONENTS
; ++i
) {
265 if (i
< num_components
)
266 state
->variables
[var
->variable
].swizzle
[i
] = new_swizzle
[i
];
268 state
->variables
[var
->variable
].swizzle
[i
] = 0;
275 case nir_search_value_constant
: {
276 nir_search_constant
*const_val
= nir_search_value_as_constant(value
);
278 if (!nir_src_is_const(instr
->src
[src
].src
))
281 switch (const_val
->type
) {
283 for (unsigned i
= 0; i
< num_components
; ++i
) {
284 double val
= nir_src_comp_as_float(instr
->src
[src
].src
,
286 if (val
!= const_val
->data
.d
)
293 case nir_type_bool
: {
294 unsigned bit_size
= nir_src_bit_size(instr
->src
[src
].src
);
295 uint64_t mask
= bit_size
== 64 ? UINT64_MAX
: (1ull << bit_size
) - 1;
296 for (unsigned i
= 0; i
< num_components
; ++i
) {
297 uint64_t val
= nir_src_comp_as_uint(instr
->src
[src
].src
,
299 if ((val
& mask
) != (const_val
->data
.u
& mask
))
306 unreachable("Invalid alu source type");
311 unreachable("Invalid search value type");
316 match_expression(const nir_search_expression
*expr
, nir_alu_instr
*instr
,
317 unsigned num_components
, const uint8_t *swizzle
,
318 struct match_state
*state
)
320 if (expr
->cond
&& !expr
->cond(instr
))
323 if (!nir_op_matches_search_op(instr
->op
, expr
->opcode
))
326 assert(instr
->dest
.dest
.is_ssa
);
328 if (expr
->value
.bit_size
> 0 &&
329 instr
->dest
.dest
.ssa
.bit_size
!= expr
->value
.bit_size
)
332 state
->inexact_match
= expr
->inexact
|| state
->inexact_match
;
333 state
->has_exact_alu
= instr
->exact
|| state
->has_exact_alu
;
334 if (state
->inexact_match
&& state
->has_exact_alu
)
337 assert(!instr
->dest
.saturate
);
338 assert(nir_op_infos
[instr
->op
].num_inputs
> 0);
340 /* If we have an explicitly sized destination, we can only handle the
341 * identity swizzle. While dot(vec3(a, b, c).zxy) is a valid
342 * expression, we don't have the information right now to propagate that
343 * swizzle through. We can only properly propagate swizzles if the
344 * instruction is vectorized.
346 if (nir_op_infos
[instr
->op
].output_size
!= 0) {
347 for (unsigned i
= 0; i
< num_components
; i
++) {
353 /* Stash off the current variables_seen bitmask. This way we can
354 * restore it prior to matching in the commutative case below.
356 unsigned variables_seen_stash
= state
->variables_seen
;
359 for (unsigned i
= 0; i
< nir_op_infos
[instr
->op
].num_inputs
; i
++) {
360 if (!match_value(expr
->srcs
[i
], instr
, i
, num_components
,
370 if (nir_op_infos
[instr
->op
].algebraic_properties
& NIR_OP_IS_COMMUTATIVE
) {
371 assert(nir_op_infos
[instr
->op
].num_inputs
== 2);
373 /* Restore the variables_seen bitmask. If we don't do this, then we
374 * could end up with an erroneous failure due to variables found in the
375 * first match attempt above not matching those in the second.
377 state
->variables_seen
= variables_seen_stash
;
379 if (!match_value(expr
->srcs
[0], instr
, 1, num_components
,
383 return match_value(expr
->srcs
[1], instr
, 0, num_components
,
391 replace_bitsize(const nir_search_value
*value
, unsigned search_bitsize
,
392 struct match_state
*state
)
394 if (value
->bit_size
> 0)
395 return value
->bit_size
;
396 if (value
->bit_size
< 0)
397 return nir_src_bit_size(state
->variables
[-value
->bit_size
- 1].src
);
398 return search_bitsize
;
402 construct_value(nir_builder
*build
,
403 const nir_search_value
*value
,
404 unsigned num_components
, unsigned search_bitsize
,
405 struct match_state
*state
,
408 switch (value
->type
) {
409 case nir_search_value_expression
: {
410 const nir_search_expression
*expr
= nir_search_value_as_expression(value
);
411 unsigned dst_bit_size
= replace_bitsize(value
, search_bitsize
, state
);
412 nir_op op
= nir_op_for_search_op(expr
->opcode
, dst_bit_size
);
414 if (nir_op_infos
[op
].output_size
!= 0)
415 num_components
= nir_op_infos
[op
].output_size
;
417 nir_alu_instr
*alu
= nir_alu_instr_create(build
->shader
, op
);
418 nir_ssa_dest_init(&alu
->instr
, &alu
->dest
.dest
, num_components
,
420 alu
->dest
.write_mask
= (1 << num_components
) - 1;
421 alu
->dest
.saturate
= false;
423 /* We have no way of knowing what values in a given search expression
424 * map to a particular replacement value. Therefore, if the
425 * expression we are replacing has any exact values, the entire
426 * replacement should be exact.
428 alu
->exact
= state
->has_exact_alu
;
430 for (unsigned i
= 0; i
< nir_op_infos
[op
].num_inputs
; i
++) {
431 /* If the source is an explicitly sized source, then we need to reset
432 * the number of components to match.
434 if (nir_op_infos
[alu
->op
].input_sizes
[i
] != 0)
435 num_components
= nir_op_infos
[alu
->op
].input_sizes
[i
];
437 alu
->src
[i
] = construct_value(build
, expr
->srcs
[i
],
438 num_components
, search_bitsize
,
442 nir_builder_instr_insert(build
, &alu
->instr
);
445 val
.src
= nir_src_for_ssa(&alu
->dest
.dest
.ssa
);
448 memcpy(val
.swizzle
, identity_swizzle
, sizeof val
.swizzle
);
453 case nir_search_value_variable
: {
454 const nir_search_variable
*var
= nir_search_value_as_variable(value
);
455 assert(state
->variables_seen
& (1 << var
->variable
));
457 nir_alu_src val
= { NIR_SRC_INIT
};
458 nir_alu_src_copy(&val
, &state
->variables
[var
->variable
],
459 (void *)build
->shader
);
460 assert(!var
->is_constant
);
465 case nir_search_value_constant
: {
466 const nir_search_constant
*c
= nir_search_value_as_constant(value
);
467 unsigned bit_size
= replace_bitsize(value
, search_bitsize
, state
);
472 cval
= nir_imm_floatN_t(build
, c
->data
.d
, bit_size
);
477 cval
= nir_imm_intN_t(build
, c
->data
.i
, bit_size
);
481 cval
= nir_imm_boolN_t(build
, c
->data
.u
, bit_size
);
485 unreachable("Invalid alu source type");
489 val
.src
= nir_src_for_ssa(cval
);
492 memset(val
.swizzle
, 0, sizeof val
.swizzle
);
498 unreachable("Invalid search value type");
503 nir_replace_instr(nir_builder
*build
, nir_alu_instr
*instr
,
504 const nir_search_expression
*search
,
505 const nir_search_value
*replace
)
507 uint8_t swizzle
[NIR_MAX_VEC_COMPONENTS
] = { 0 };
509 for (unsigned i
= 0; i
< instr
->dest
.dest
.ssa
.num_components
; ++i
)
512 assert(instr
->dest
.dest
.is_ssa
);
514 struct match_state state
;
515 state
.inexact_match
= false;
516 state
.has_exact_alu
= false;
517 state
.variables_seen
= 0;
519 if (!match_expression(search
, instr
, instr
->dest
.dest
.ssa
.num_components
,
523 build
->cursor
= nir_before_instr(&instr
->instr
);
525 nir_alu_src val
= construct_value(build
, replace
,
526 instr
->dest
.dest
.ssa
.num_components
,
527 instr
->dest
.dest
.ssa
.bit_size
,
528 &state
, &instr
->instr
);
530 /* Inserting a mov may be unnecessary. However, it's much easier to
531 * simply let copy propagation clean this up than to try to go through
532 * and rewrite swizzles ourselves.
534 nir_ssa_def
*ssa_val
=
535 nir_imov_alu(build
, val
, instr
->dest
.dest
.ssa
.num_components
);
536 nir_ssa_def_rewrite_uses(&instr
->dest
.dest
.ssa
, nir_src_for_ssa(ssa_val
));
538 /* We know this one has no more uses because we just rewrote them all,
539 * so we can remove it. The rest of the matched expression, however, we
540 * don't know so much about. We'll just let dead code clean them up.
542 nir_instr_remove(&instr
->instr
);