2 * Copyright © 2018 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 #include "nir_range_analysis.h"
27 #include "util/hash_table.h"
30 * Analyzes a sequence of operations to determine some aspects of the range of
35 is_not_negative(enum ssa_ranges r
)
37 return r
== gt_zero
|| r
== ge_zero
|| r
== eq_zero
;
41 pack_data(const struct ssa_result_range r
)
43 return (void *)(uintptr_t)(r
.range
| r
.is_integral
<< 8);
46 static struct ssa_result_range
47 unpack_data(const void *p
)
49 const uintptr_t v
= (uintptr_t) p
;
51 return (struct ssa_result_range
){v
& 0xff, (v
& 0x0ff00) != 0};
55 pack_key(const struct nir_alu_instr
*instr
, nir_alu_type type
)
57 uintptr_t type_encoding
;
58 uintptr_t ptr
= (uintptr_t) instr
;
60 /* The low 2 bits have to be zero or this whole scheme falls apart. */
61 assert((ptr
& 0x3) == 0);
63 /* NIR is typeless in the sense that sequences of bits have whatever
64 * meaning is attached to them by the instruction that consumes them.
65 * However, the number of bits must match between producer and consumer.
66 * As a result, the number of bits does not need to be encoded here.
68 switch (nir_alu_type_get_base_type(type
)) {
69 case nir_type_int
: type_encoding
= 0; break;
70 case nir_type_uint
: type_encoding
= 1; break;
71 case nir_type_bool
: type_encoding
= 2; break;
72 case nir_type_float
: type_encoding
= 3; break;
73 default: unreachable("Invalid base type.");
76 return (void *)(ptr
| type_encoding
);
80 nir_alu_src_type(const nir_alu_instr
*instr
, unsigned src
)
82 return nir_alu_type_get_base_type(nir_op_infos
[instr
->op
].input_types
[src
]) |
83 nir_src_bit_size(instr
->src
[src
].src
);
86 static struct ssa_result_range
87 analyze_constant(const struct nir_alu_instr
*instr
, unsigned src
,
88 nir_alu_type use_type
)
90 uint8_t swizzle
[4] = { 0, 1, 2, 3 };
92 /* If the source is an explicitly sized source, then we need to reset
93 * both the number of components and the swizzle.
95 const unsigned num_components
= nir_ssa_alu_instr_src_components(instr
, src
);
97 for (unsigned i
= 0; i
< num_components
; ++i
)
98 swizzle
[i
] = instr
->src
[src
].swizzle
[i
];
100 const nir_load_const_instr
*const load
=
101 nir_instr_as_load_const(instr
->src
[src
].src
.ssa
->parent_instr
);
103 struct ssa_result_range r
= { unknown
, false };
105 switch (nir_alu_type_get_base_type(use_type
)) {
106 case nir_type_float
: {
107 double min_value
= DBL_MAX
;
108 double max_value
= -DBL_MAX
;
109 bool any_zero
= false;
110 bool all_zero
= true;
112 r
.is_integral
= true;
114 for (unsigned i
= 0; i
< num_components
; ++i
) {
115 const double v
= nir_const_value_as_float(load
->value
[swizzle
[i
]],
119 r
.is_integral
= false;
121 any_zero
= any_zero
|| (v
== 0.0);
122 all_zero
= all_zero
&& (v
== 0.0);
123 min_value
= MIN2(min_value
, v
);
124 max_value
= MAX2(max_value
, v
);
127 assert(any_zero
>= all_zero
);
128 assert(isnan(max_value
) || max_value
>= min_value
);
132 else if (min_value
> 0.0)
134 else if (min_value
== 0.0)
136 else if (max_value
< 0.0)
138 else if (max_value
== 0.0)
149 case nir_type_bool
: {
150 int64_t min_value
= INT_MAX
;
151 int64_t max_value
= INT_MIN
;
152 bool any_zero
= false;
153 bool all_zero
= true;
155 for (unsigned i
= 0; i
< num_components
; ++i
) {
156 const int64_t v
= nir_const_value_as_int(load
->value
[swizzle
[i
]],
159 any_zero
= any_zero
|| (v
== 0);
160 all_zero
= all_zero
&& (v
== 0);
161 min_value
= MIN2(min_value
, v
);
162 max_value
= MAX2(max_value
, v
);
165 assert(any_zero
>= all_zero
);
166 assert(max_value
>= min_value
);
170 else if (min_value
> 0)
172 else if (min_value
== 0)
174 else if (max_value
< 0)
176 else if (max_value
== 0)
186 case nir_type_uint
: {
187 bool any_zero
= false;
188 bool all_zero
= true;
190 for (unsigned i
= 0; i
< num_components
; ++i
) {
191 const uint64_t v
= nir_const_value_as_uint(load
->value
[swizzle
[i
]],
194 any_zero
= any_zero
|| (v
== 0);
195 all_zero
= all_zero
&& (v
== 0);
198 assert(any_zero
>= all_zero
);
211 unreachable("Invalid alu source type");
216 * Short-hand name for use in the tables in analyze_expression. If this name
217 * becomes a problem on some compiler, we can change it to _.
219 #define _______ unknown
222 #if defined(__clang__)
223 /* clang wants _Pragma("unroll X") */
224 #define pragma_unroll_5 _Pragma("unroll 5")
225 #define pragma_unroll_7 _Pragma("unroll 7")
226 /* gcc wants _Pragma("GCC unroll X") */
227 #elif defined(__GNUC__)
229 #define pragma_unroll_5 _Pragma("GCC unroll 5")
230 #define pragma_unroll_7 _Pragma("GCC unroll 7")
232 #pragma GCC optimize ("unroll-loops")
233 #define pragma_unroll_5
234 #define pragma_unroll_7
237 /* MSVC doesn't have C99's _Pragma() */
238 #define pragma_unroll_5
239 #define pragma_unroll_7
244 #define ASSERT_TABLE_IS_COMMUTATIVE(t) \
246 static bool first = true; \
250 for (unsigned r = 0; r < ARRAY_SIZE(t); r++) { \
252 for (unsigned c = 0; c < ARRAY_SIZE(t[0]); c++) \
253 assert(t[r][c] == t[c][r]); \
258 #define ASSERT_TABLE_IS_DIAGONAL(t) \
260 static bool first = true; \
264 for (unsigned r = 0; r < ARRAY_SIZE(t); r++) \
265 assert(t[r][r] == r); \
269 static enum ssa_ranges
270 union_ranges(enum ssa_ranges a
, enum ssa_ranges b
)
272 static const enum ssa_ranges union_table
[last_range
+ 1][last_range
+ 1] = {
273 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
274 /* unknown */ { _______
, _______
, _______
, _______
, _______
, _______
, _______
},
275 /* lt_zero */ { _______
, lt_zero
, le_zero
, ne_zero
, _______
, ne_zero
, le_zero
},
276 /* le_zero */ { _______
, le_zero
, le_zero
, _______
, _______
, _______
, le_zero
},
277 /* gt_zero */ { _______
, ne_zero
, _______
, gt_zero
, ge_zero
, ne_zero
, ge_zero
},
278 /* ge_zero */ { _______
, _______
, _______
, ge_zero
, ge_zero
, _______
, ge_zero
},
279 /* ne_zero */ { _______
, ne_zero
, _______
, ne_zero
, _______
, ne_zero
, _______
},
280 /* eq_zero */ { _______
, le_zero
, le_zero
, ge_zero
, ge_zero
, _______
, eq_zero
},
283 ASSERT_TABLE_IS_COMMUTATIVE(union_table
);
284 ASSERT_TABLE_IS_DIAGONAL(union_table
);
286 return union_table
[a
][b
];
289 /* Verify that the 'unknown' entry in each row (or column) of the table is the
290 * union of all the other values in the row (or column).
292 #define ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(t) \
294 static bool first = true; \
298 for (unsigned i = 0; i < last_range; i++) { \
299 enum ssa_ranges col_range = t[i][unknown + 1]; \
300 enum ssa_ranges row_range = t[unknown + 1][i]; \
303 for (unsigned j = unknown + 2; j < last_range; j++) { \
304 col_range = union_ranges(col_range, t[i][j]); \
305 row_range = union_ranges(row_range, t[j][i]); \
308 assert(col_range == t[i][unknown]); \
309 assert(row_range == t[unknown][i]); \
314 /* For most operations, the union of ranges for a strict inequality and
315 * equality should be the range of the non-strict inequality (e.g.,
316 * union_ranges(range(op(lt_zero), range(op(eq_zero))) == range(op(le_zero)).
318 * Does not apply to selection-like opcodes (bcsel, fmin, fmax, etc.).
320 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(t) \
322 assert(union_ranges(t[lt_zero], t[eq_zero]) == t[le_zero]); \
323 assert(union_ranges(t[gt_zero], t[eq_zero]) == t[ge_zero]); \
326 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(t) \
328 static bool first = true; \
332 for (unsigned i = 0; i < last_range; i++) { \
333 assert(union_ranges(t[i][lt_zero], t[i][eq_zero]) == t[i][le_zero]); \
334 assert(union_ranges(t[i][gt_zero], t[i][eq_zero]) == t[i][ge_zero]); \
335 assert(union_ranges(t[lt_zero][i], t[eq_zero][i]) == t[le_zero][i]); \
336 assert(union_ranges(t[gt_zero][i], t[eq_zero][i]) == t[ge_zero][i]); \
341 /* Several other unordered tuples span the range of "everything." Each should
342 * have the same value as unknown: (lt_zero, ge_zero), (le_zero, gt_zero), and
343 * (eq_zero, ne_zero). union_ranges is already commutative, so only one
344 * ordering needs to be checked.
346 * Does not apply to selection-like opcodes (bcsel, fmin, fmax, etc.).
348 * In cases where this can be used, it is unnecessary to also use
349 * ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_*_SOURCE. For any range X,
350 * union_ranges(X, X) == X. The disjoint ranges cover all of the non-unknown
351 * possibilities, so the union of all the unions of disjoint ranges is
352 * equivalent to the union of "others."
354 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(t) \
356 assert(union_ranges(t[lt_zero], t[ge_zero]) == t[unknown]); \
357 assert(union_ranges(t[le_zero], t[gt_zero]) == t[unknown]); \
358 assert(union_ranges(t[eq_zero], t[ne_zero]) == t[unknown]); \
361 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(t) \
363 static bool first = true; \
367 for (unsigned i = 0; i < last_range; i++) { \
368 assert(union_ranges(t[i][lt_zero], t[i][ge_zero]) == \
370 assert(union_ranges(t[i][le_zero], t[i][gt_zero]) == \
372 assert(union_ranges(t[i][eq_zero], t[i][ne_zero]) == \
375 assert(union_ranges(t[lt_zero][i], t[ge_zero][i]) == \
377 assert(union_ranges(t[le_zero][i], t[gt_zero][i]) == \
379 assert(union_ranges(t[eq_zero][i], t[ne_zero][i]) == \
386 #define ASSERT_TABLE_IS_COMMUTATIVE(t)
387 #define ASSERT_TABLE_IS_DIAGONAL(t)
388 #define ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(t)
389 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(t)
390 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(t)
391 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(t)
392 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(t)
396 * Analyze an expression to determine the range of its result
398 * The end result of this analysis is a token that communicates something
399 * about the range of values. There's an implicit grammar that produces
400 * tokens from sequences of literal values, other tokens, and operations.
401 * This function implements this grammar as a recursive-descent parser. Some
402 * (but not all) of the grammar is listed in-line in the function.
404 static struct ssa_result_range
405 analyze_expression(const nir_alu_instr
*instr
, unsigned src
,
406 struct hash_table
*ht
, nir_alu_type use_type
)
408 /* Ensure that the _Pragma("GCC unroll 7") above are correct. */
409 STATIC_ASSERT(last_range
+ 1 == 7);
411 if (!instr
->src
[src
].src
.is_ssa
)
412 return (struct ssa_result_range
){unknown
, false};
414 if (nir_src_is_const(instr
->src
[src
].src
))
415 return analyze_constant(instr
, src
, use_type
);
417 if (instr
->src
[src
].src
.ssa
->parent_instr
->type
!= nir_instr_type_alu
)
418 return (struct ssa_result_range
){unknown
, false};
420 const struct nir_alu_instr
*const alu
=
421 nir_instr_as_alu(instr
->src
[src
].src
.ssa
->parent_instr
);
423 /* Bail if the type of the instruction generating the value does not match
424 * the type the value will be interpreted as. int/uint/bool can be
425 * reinterpreted trivially. The most important cases are between float and
428 if (alu
->op
!= nir_op_mov
&& alu
->op
!= nir_op_bcsel
) {
429 const nir_alu_type use_base_type
=
430 nir_alu_type_get_base_type(use_type
);
431 const nir_alu_type src_base_type
=
432 nir_alu_type_get_base_type(nir_op_infos
[alu
->op
].output_type
);
434 if (use_base_type
!= src_base_type
&&
435 (use_base_type
== nir_type_float
||
436 src_base_type
== nir_type_float
)) {
437 return (struct ssa_result_range
){unknown
, false};
441 struct hash_entry
*he
= _mesa_hash_table_search(ht
, pack_key(alu
, use_type
));
443 return unpack_data(he
->data
);
445 struct ssa_result_range r
= {unknown
, false};
447 /* ge_zero: ge_zero + ge_zero
449 * gt_zero: gt_zero + eq_zero
450 * | gt_zero + ge_zero
451 * | eq_zero + gt_zero # Addition is commutative
452 * | ge_zero + gt_zero # Addition is commutative
453 * | gt_zero + gt_zero
456 * le_zero: le_zero + le_zero
458 * lt_zero: lt_zero + eq_zero
459 * | lt_zero + le_zero
460 * | eq_zero + lt_zero # Addition is commutative
461 * | le_zero + lt_zero # Addition is commutative
462 * | lt_zero + lt_zero
465 * ne_zero: eq_zero + ne_zero
466 * | ne_zero + eq_zero # Addition is commutative
469 * eq_zero: eq_zero + eq_zero
472 * All other cases are 'unknown'. The seeming odd entry is (ne_zero,
473 * ne_zero), but that could be (-5, +5) which is not ne_zero.
475 static const enum ssa_ranges fadd_table
[last_range
+ 1][last_range
+ 1] = {
476 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
477 /* unknown */ { _______
, _______
, _______
, _______
, _______
, _______
, _______
},
478 /* lt_zero */ { _______
, lt_zero
, lt_zero
, _______
, _______
, _______
, lt_zero
},
479 /* le_zero */ { _______
, lt_zero
, le_zero
, _______
, _______
, _______
, le_zero
},
480 /* gt_zero */ { _______
, _______
, _______
, gt_zero
, gt_zero
, _______
, gt_zero
},
481 /* ge_zero */ { _______
, _______
, _______
, gt_zero
, ge_zero
, _______
, ge_zero
},
482 /* ne_zero */ { _______
, _______
, _______
, _______
, _______
, _______
, ne_zero
},
483 /* eq_zero */ { _______
, lt_zero
, le_zero
, gt_zero
, ge_zero
, ne_zero
, eq_zero
},
486 ASSERT_TABLE_IS_COMMUTATIVE(fadd_table
);
487 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(fadd_table
);
488 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(fadd_table
);
490 /* Due to flush-to-zero semanatics of floating-point numbers with very
491 * small mangnitudes, we can never really be sure a result will be
494 * ge_zero: ge_zero * ge_zero
495 * | ge_zero * gt_zero
496 * | ge_zero * eq_zero
497 * | le_zero * lt_zero
498 * | lt_zero * le_zero # Multiplication is commutative
499 * | le_zero * le_zero
500 * | gt_zero * ge_zero # Multiplication is commutative
501 * | eq_zero * ge_zero # Multiplication is commutative
502 * | a * a # Left source == right source
503 * | gt_zero * gt_zero
504 * | lt_zero * lt_zero
507 * le_zero: ge_zero * le_zero
508 * | ge_zero * lt_zero
509 * | lt_zero * ge_zero # Multiplication is commutative
510 * | le_zero * ge_zero # Multiplication is commutative
511 * | le_zero * gt_zero
512 * | lt_zero * gt_zero
513 * | gt_zero * lt_zero # Multiplication is commutative
516 * eq_zero: eq_zero * <any>
517 * <any> * eq_zero # Multiplication is commutative
519 * All other cases are 'unknown'.
521 static const enum ssa_ranges fmul_table
[last_range
+ 1][last_range
+ 1] = {
522 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
523 /* unknown */ { _______
, _______
, _______
, _______
, _______
, _______
, eq_zero
},
524 /* lt_zero */ { _______
, ge_zero
, ge_zero
, le_zero
, le_zero
, _______
, eq_zero
},
525 /* le_zero */ { _______
, ge_zero
, ge_zero
, le_zero
, le_zero
, _______
, eq_zero
},
526 /* gt_zero */ { _______
, le_zero
, le_zero
, ge_zero
, ge_zero
, _______
, eq_zero
},
527 /* ge_zero */ { _______
, le_zero
, le_zero
, ge_zero
, ge_zero
, _______
, eq_zero
},
528 /* ne_zero */ { _______
, _______
, _______
, _______
, _______
, _______
, eq_zero
},
529 /* eq_zero */ { eq_zero
, eq_zero
, eq_zero
, eq_zero
, eq_zero
, eq_zero
, eq_zero
}
532 ASSERT_TABLE_IS_COMMUTATIVE(fmul_table
);
533 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(fmul_table
);
534 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(fmul_table
);
536 static const enum ssa_ranges fneg_table
[last_range
+ 1] = {
537 /* unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
538 _______
, gt_zero
, ge_zero
, lt_zero
, le_zero
, ne_zero
, eq_zero
541 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(fneg_table
);
542 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(fneg_table
);
548 r
= (struct ssa_result_range
){ge_zero
, alu
->op
== nir_op_b2f32
};
552 const struct ssa_result_range left
=
553 analyze_expression(alu
, 1, ht
, use_type
);
554 const struct ssa_result_range right
=
555 analyze_expression(alu
, 2, ht
, use_type
);
557 r
.is_integral
= left
.is_integral
&& right
.is_integral
;
559 /* le_zero: bcsel(<any>, le_zero, lt_zero)
560 * | bcsel(<any>, eq_zero, lt_zero)
561 * | bcsel(<any>, le_zero, eq_zero)
562 * | bcsel(<any>, lt_zero, le_zero)
563 * | bcsel(<any>, lt_zero, eq_zero)
564 * | bcsel(<any>, eq_zero, le_zero)
565 * | bcsel(<any>, le_zero, le_zero)
568 * lt_zero: bcsel(<any>, lt_zero, lt_zero)
571 * ge_zero: bcsel(<any>, ge_zero, ge_zero)
572 * | bcsel(<any>, ge_zero, gt_zero)
573 * | bcsel(<any>, ge_zero, eq_zero)
574 * | bcsel(<any>, gt_zero, ge_zero)
575 * | bcsel(<any>, eq_zero, ge_zero)
578 * gt_zero: bcsel(<any>, gt_zero, gt_zero)
581 * ne_zero: bcsel(<any>, ne_zero, gt_zero)
582 * | bcsel(<any>, ne_zero, lt_zero)
583 * | bcsel(<any>, gt_zero, lt_zero)
584 * | bcsel(<any>, gt_zero, ne_zero)
585 * | bcsel(<any>, lt_zero, ne_zero)
586 * | bcsel(<any>, lt_zero, gt_zero)
587 * | bcsel(<any>, ne_zero, ne_zero)
590 * eq_zero: bcsel(<any>, eq_zero, eq_zero)
593 * All other cases are 'unknown'.
595 * The ranges could be tightened if the range of the first source is
596 * known. However, opt_algebraic will (eventually) elminiate the bcsel
597 * if the condition is known.
599 static const enum ssa_ranges table
[last_range
+ 1][last_range
+ 1] = {
600 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
601 /* unknown */ { _______
, _______
, _______
, _______
, _______
, _______
, _______
},
602 /* lt_zero */ { _______
, lt_zero
, le_zero
, ne_zero
, _______
, ne_zero
, le_zero
},
603 /* le_zero */ { _______
, le_zero
, le_zero
, _______
, _______
, _______
, le_zero
},
604 /* gt_zero */ { _______
, ne_zero
, _______
, gt_zero
, ge_zero
, ne_zero
, ge_zero
},
605 /* ge_zero */ { _______
, _______
, _______
, ge_zero
, ge_zero
, _______
, ge_zero
},
606 /* ne_zero */ { _______
, ne_zero
, _______
, ne_zero
, _______
, ne_zero
, _______
},
607 /* eq_zero */ { _______
, le_zero
, le_zero
, ge_zero
, ge_zero
, _______
, eq_zero
},
610 ASSERT_TABLE_IS_COMMUTATIVE(table
);
611 ASSERT_TABLE_IS_DIAGONAL(table
);
612 ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(table
);
614 r
.range
= table
[left
.range
][right
.range
];
620 r
= analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
622 r
.is_integral
= true;
624 if (r
.range
== unknown
&& alu
->op
== nir_op_u2f32
)
630 r
= analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
652 const struct ssa_result_range left
=
653 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
654 const struct ssa_result_range right
=
655 analyze_expression(alu
, 1, ht
, nir_alu_src_type(alu
, 1));
657 r
.is_integral
= left
.is_integral
&& right
.is_integral
;
658 r
.range
= fadd_table
[left
.range
][right
.range
];
663 /* If the parameter might be less than zero, the mathematically result
664 * will be on (0, 1). For sufficiently large magnitude negative
665 * parameters, the result will flush to zero.
667 static const enum ssa_ranges table
[last_range
+ 1] = {
668 /* unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
669 ge_zero
, ge_zero
, ge_zero
, gt_zero
, gt_zero
, ge_zero
, gt_zero
672 r
= analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
674 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(table
);
675 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(table
);
677 r
.is_integral
= r
.is_integral
&& is_not_negative(r
.range
);
678 r
.range
= table
[r
.range
];
683 const struct ssa_result_range left
=
684 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
685 const struct ssa_result_range right
=
686 analyze_expression(alu
, 1, ht
, nir_alu_src_type(alu
, 1));
688 r
.is_integral
= left
.is_integral
&& right
.is_integral
;
690 /* gt_zero: fmax(gt_zero, *)
691 * | fmax(*, gt_zero) # Treat fmax as commutative
694 * ge_zero: fmax(ge_zero, ne_zero)
695 * | fmax(ge_zero, lt_zero)
696 * | fmax(ge_zero, le_zero)
697 * | fmax(ge_zero, eq_zero)
698 * | fmax(ne_zero, ge_zero) # Treat fmax as commutative
699 * | fmax(lt_zero, ge_zero) # Treat fmax as commutative
700 * | fmax(le_zero, ge_zero) # Treat fmax as commutative
701 * | fmax(eq_zero, ge_zero) # Treat fmax as commutative
702 * | fmax(ge_zero, ge_zero)
705 * le_zero: fmax(le_zero, lt_zero)
706 * | fmax(lt_zero, le_zero) # Treat fmax as commutative
707 * | fmax(le_zero, le_zero)
710 * lt_zero: fmax(lt_zero, lt_zero)
713 * ne_zero: fmax(ne_zero, lt_zero)
714 * | fmax(lt_zero, ne_zero) # Treat fmax as commutative
715 * | fmax(ne_zero, ne_zero)
718 * eq_zero: fmax(eq_zero, le_zero)
719 * | fmax(eq_zero, lt_zero)
720 * | fmax(le_zero, eq_zero) # Treat fmax as commutative
721 * | fmax(lt_zero, eq_zero) # Treat fmax as commutative
722 * | fmax(eq_zero, eq_zero)
725 * All other cases are 'unknown'.
727 static const enum ssa_ranges table
[last_range
+ 1][last_range
+ 1] = {
728 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
729 /* unknown */ { _______
, _______
, _______
, gt_zero
, ge_zero
, _______
, _______
},
730 /* lt_zero */ { _______
, lt_zero
, le_zero
, gt_zero
, ge_zero
, ne_zero
, eq_zero
},
731 /* le_zero */ { _______
, le_zero
, le_zero
, gt_zero
, ge_zero
, _______
, eq_zero
},
732 /* gt_zero */ { gt_zero
, gt_zero
, gt_zero
, gt_zero
, gt_zero
, gt_zero
, gt_zero
},
733 /* ge_zero */ { ge_zero
, ge_zero
, ge_zero
, gt_zero
, ge_zero
, ge_zero
, ge_zero
},
734 /* ne_zero */ { _______
, ne_zero
, _______
, gt_zero
, ge_zero
, ne_zero
, _______
},
735 /* eq_zero */ { _______
, eq_zero
, eq_zero
, gt_zero
, ge_zero
, _______
, eq_zero
}
738 /* Treat fmax as commutative. */
739 ASSERT_TABLE_IS_COMMUTATIVE(table
);
740 ASSERT_TABLE_IS_DIAGONAL(table
);
741 ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(table
);
743 r
.range
= table
[left
.range
][right
.range
];
748 const struct ssa_result_range left
=
749 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
750 const struct ssa_result_range right
=
751 analyze_expression(alu
, 1, ht
, nir_alu_src_type(alu
, 1));
753 r
.is_integral
= left
.is_integral
&& right
.is_integral
;
755 /* lt_zero: fmin(lt_zero, *)
756 * | fmin(*, lt_zero) # Treat fmin as commutative
759 * le_zero: fmin(le_zero, ne_zero)
760 * | fmin(le_zero, gt_zero)
761 * | fmin(le_zero, ge_zero)
762 * | fmin(le_zero, eq_zero)
763 * | fmin(ne_zero, le_zero) # Treat fmin as commutative
764 * | fmin(gt_zero, le_zero) # Treat fmin as commutative
765 * | fmin(ge_zero, le_zero) # Treat fmin as commutative
766 * | fmin(eq_zero, le_zero) # Treat fmin as commutative
767 * | fmin(le_zero, le_zero)
770 * ge_zero: fmin(ge_zero, gt_zero)
771 * | fmin(gt_zero, ge_zero) # Treat fmin as commutative
772 * | fmin(ge_zero, ge_zero)
775 * gt_zero: fmin(gt_zero, gt_zero)
778 * ne_zero: fmin(ne_zero, gt_zero)
779 * | fmin(gt_zero, ne_zero) # Treat fmin as commutative
780 * | fmin(ne_zero, ne_zero)
783 * eq_zero: fmin(eq_zero, ge_zero)
784 * | fmin(eq_zero, gt_zero)
785 * | fmin(ge_zero, eq_zero) # Treat fmin as commutative
786 * | fmin(gt_zero, eq_zero) # Treat fmin as commutative
787 * | fmin(eq_zero, eq_zero)
790 * All other cases are 'unknown'.
792 static const enum ssa_ranges table
[last_range
+ 1][last_range
+ 1] = {
793 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
794 /* unknown */ { _______
, lt_zero
, le_zero
, _______
, _______
, _______
, _______
},
795 /* lt_zero */ { lt_zero
, lt_zero
, lt_zero
, lt_zero
, lt_zero
, lt_zero
, lt_zero
},
796 /* le_zero */ { le_zero
, lt_zero
, le_zero
, le_zero
, le_zero
, le_zero
, le_zero
},
797 /* gt_zero */ { _______
, lt_zero
, le_zero
, gt_zero
, ge_zero
, ne_zero
, eq_zero
},
798 /* ge_zero */ { _______
, lt_zero
, le_zero
, ge_zero
, ge_zero
, _______
, eq_zero
},
799 /* ne_zero */ { _______
, lt_zero
, le_zero
, ne_zero
, _______
, ne_zero
, _______
},
800 /* eq_zero */ { _______
, lt_zero
, le_zero
, eq_zero
, eq_zero
, _______
, eq_zero
}
803 /* Treat fmin as commutative. */
804 ASSERT_TABLE_IS_COMMUTATIVE(table
);
805 ASSERT_TABLE_IS_DIAGONAL(table
);
806 ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(table
);
808 r
.range
= table
[left
.range
][right
.range
];
813 const struct ssa_result_range left
=
814 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
815 const struct ssa_result_range right
=
816 analyze_expression(alu
, 1, ht
, nir_alu_src_type(alu
, 1));
818 r
.is_integral
= left
.is_integral
&& right
.is_integral
;
820 /* x * x => ge_zero */
821 if (left
.range
!= eq_zero
&& nir_alu_srcs_equal(alu
, alu
, 0, 1)) {
822 /* Even if x > 0, the result of x*x can be zero when x is, for
823 * example, a subnormal number.
826 } else if (left
.range
!= eq_zero
&& nir_alu_srcs_negative_equal(alu
, alu
, 0, 1)) {
827 /* -x * x => le_zero. */
830 r
.range
= fmul_table
[left
.range
][right
.range
];
836 r
= (struct ssa_result_range
){
837 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0)).range
,
843 r
= analyze_expression(alu
, 0, ht
, use_type
);
847 r
= analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
849 r
.range
= fneg_table
[r
.range
];
853 r
= analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
859 r
.is_integral
= true;
863 assert(r
.is_integral
);
866 /* The fsat doesn't add any information in these cases. */
871 /* Since the result must be in [0, 1], the value must be >= 0. */
878 r
= (struct ssa_result_range
){
879 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0)).range
,
886 r
= (struct ssa_result_range
){ge_zero
, false};
889 case nir_op_ffloor
: {
890 const struct ssa_result_range left
=
891 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
893 r
.is_integral
= true;
895 if (left
.is_integral
|| left
.range
== le_zero
|| left
.range
== lt_zero
)
896 r
.range
= left
.range
;
897 else if (left
.range
== ge_zero
|| left
.range
== gt_zero
)
899 else if (left
.range
== ne_zero
)
906 const struct ssa_result_range left
=
907 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
909 r
.is_integral
= true;
911 if (left
.is_integral
|| left
.range
== ge_zero
|| left
.range
== gt_zero
)
912 r
.range
= left
.range
;
913 else if (left
.range
== le_zero
|| left
.range
== lt_zero
)
915 else if (left
.range
== ne_zero
)
921 case nir_op_ftrunc
: {
922 const struct ssa_result_range left
=
923 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
925 r
.is_integral
= true;
927 if (left
.is_integral
)
928 r
.range
= left
.range
;
929 else if (left
.range
== ge_zero
|| left
.range
== gt_zero
)
931 else if (left
.range
== le_zero
|| left
.range
== lt_zero
)
933 else if (left
.range
== ne_zero
)
949 /* Boolean results are 0 or -1. */
950 r
= (struct ssa_result_range
){le_zero
, false};
954 /* Due to flush-to-zero semanatics of floating-point numbers with very
955 * small mangnitudes, we can never really be sure a result will be
958 * NIR uses pow() and powf() to constant evaluate nir_op_fpow. The man
959 * page for that function says:
961 * If y is 0, the result is 1.0 (even if x is a NaN).
963 * gt_zero: pow(*, eq_zero)
964 * | pow(eq_zero, lt_zero) # 0^-y = +inf
965 * | pow(eq_zero, le_zero) # 0^-y = +inf or 0^0 = 1.0
968 * eq_zero: pow(eq_zero, gt_zero)
971 * ge_zero: pow(gt_zero, gt_zero)
972 * | pow(gt_zero, ge_zero)
973 * | pow(gt_zero, lt_zero)
974 * | pow(gt_zero, le_zero)
975 * | pow(gt_zero, ne_zero)
976 * | pow(gt_zero, unknown)
977 * | pow(ge_zero, gt_zero)
978 * | pow(ge_zero, ge_zero)
979 * | pow(ge_zero, lt_zero)
980 * | pow(ge_zero, le_zero)
981 * | pow(ge_zero, ne_zero)
982 * | pow(ge_zero, unknown)
983 * | pow(eq_zero, ge_zero) # 0^0 = 1.0 or 0^+y = 0.0
984 * | pow(eq_zero, ne_zero) # 0^-y = +inf or 0^+y = 0.0
985 * | pow(eq_zero, unknown) # union of all other y cases
988 * All other cases are unknown.
990 * We could do better if the right operand is a constant, integral
993 static const enum ssa_ranges table
[last_range
+ 1][last_range
+ 1] = {
994 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
995 /* unknown */ { _______
, _______
, _______
, _______
, _______
, _______
, gt_zero
},
996 /* lt_zero */ { _______
, _______
, _______
, _______
, _______
, _______
, gt_zero
},
997 /* le_zero */ { _______
, _______
, _______
, _______
, _______
, _______
, gt_zero
},
998 /* gt_zero */ { ge_zero
, ge_zero
, ge_zero
, ge_zero
, ge_zero
, ge_zero
, gt_zero
},
999 /* ge_zero */ { ge_zero
, ge_zero
, ge_zero
, ge_zero
, ge_zero
, ge_zero
, gt_zero
},
1000 /* ne_zero */ { _______
, _______
, _______
, _______
, _______
, _______
, gt_zero
},
1001 /* eq_zero */ { ge_zero
, gt_zero
, gt_zero
, eq_zero
, ge_zero
, ge_zero
, gt_zero
},
1004 const struct ssa_result_range left
=
1005 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
1006 const struct ssa_result_range right
=
1007 analyze_expression(alu
, 1, ht
, nir_alu_src_type(alu
, 1));
1009 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(table
);
1010 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(table
);
1012 r
.is_integral
= left
.is_integral
&& right
.is_integral
&&
1013 is_not_negative(right
.range
);
1014 r
.range
= table
[left
.range
][right
.range
];
1019 const struct ssa_result_range first
=
1020 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
1021 const struct ssa_result_range second
=
1022 analyze_expression(alu
, 1, ht
, nir_alu_src_type(alu
, 1));
1023 const struct ssa_result_range third
=
1024 analyze_expression(alu
, 2, ht
, nir_alu_src_type(alu
, 2));
1026 r
.is_integral
= first
.is_integral
&& second
.is_integral
&&
1029 enum ssa_ranges fmul_range
;
1031 if (first
.range
!= eq_zero
&& nir_alu_srcs_equal(alu
, alu
, 0, 1)) {
1032 /* See handling of nir_op_fmul for explanation of why ge_zero is the
1035 fmul_range
= ge_zero
;
1036 } else if (first
.range
!= eq_zero
&& nir_alu_srcs_negative_equal(alu
, alu
, 0, 1)) {
1037 /* -x * x => le_zero */
1038 fmul_range
= le_zero
;
1040 fmul_range
= fmul_table
[first
.range
][second
.range
];
1042 r
.range
= fadd_table
[fmul_range
][third
.range
];
1047 const struct ssa_result_range first
=
1048 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
1049 const struct ssa_result_range second
=
1050 analyze_expression(alu
, 1, ht
, nir_alu_src_type(alu
, 1));
1051 const struct ssa_result_range third
=
1052 analyze_expression(alu
, 2, ht
, nir_alu_src_type(alu
, 2));
1054 r
.is_integral
= first
.is_integral
&& second
.is_integral
&&
1057 /* Decompose the flrp to first + third * (second + -first) */
1058 const enum ssa_ranges inner_fadd_range
=
1059 fadd_table
[second
.range
][fneg_table
[first
.range
]];
1061 const enum ssa_ranges fmul_range
=
1062 fmul_table
[third
.range
][inner_fadd_range
];
1064 r
.range
= fadd_table
[first
.range
][fmul_range
];
1069 r
= (struct ssa_result_range
){unknown
, false};
1073 if (r
.range
== eq_zero
)
1074 r
.is_integral
= true;
1076 _mesa_hash_table_insert(ht
, pack_key(alu
, use_type
), pack_data(r
));
1082 struct ssa_result_range
1083 nir_analyze_range(struct hash_table
*range_ht
,
1084 const nir_alu_instr
*instr
, unsigned src
)
1086 return analyze_expression(instr
, src
, range_ht
,
1087 nir_alu_src_type(instr
, src
));
1090 static uint32_t bitmask(uint32_t size
) {
1091 return size
>= 32 ? 0xffffffffu
: ((uint32_t)1 << size
) - 1u;
1094 static uint64_t mul_clamp(uint32_t a
, uint32_t b
)
1096 if (a
!= 0 && (a
* b
) / a
!= b
)
1097 return (uint64_t)UINT32_MAX
+ 1;
1103 search_phi_bcsel(nir_ssa_scalar scalar
, nir_ssa_scalar
*buf
, unsigned buf_size
, struct set
*visited
)
1105 if (_mesa_set_search(visited
, scalar
.def
))
1107 _mesa_set_add(visited
, scalar
.def
);
1109 if (scalar
.def
->parent_instr
->type
== nir_instr_type_phi
) {
1110 nir_phi_instr
*phi
= nir_instr_as_phi(scalar
.def
->parent_instr
);
1111 unsigned num_sources_left
= exec_list_length(&phi
->srcs
);
1112 unsigned total_added
= 0;
1113 nir_foreach_phi_src(src
, phi
) {
1114 unsigned added
= search_phi_bcsel(
1115 (nir_ssa_scalar
){src
->src
.ssa
, 0}, buf
+ total_added
, buf_size
- num_sources_left
, visited
);
1117 total_added
+= added
;
1123 if (nir_ssa_scalar_is_alu(scalar
)) {
1124 nir_op op
= nir_ssa_scalar_alu_op(scalar
);
1126 if ((op
== nir_op_bcsel
|| op
== nir_op_b32csel
) && buf_size
>= 2) {
1127 nir_ssa_scalar src0
= nir_ssa_scalar_chase_alu_src(scalar
, 0);
1128 nir_ssa_scalar src1
= nir_ssa_scalar_chase_alu_src(scalar
, 1);
1130 unsigned added
= search_phi_bcsel(src0
, buf
, buf_size
- 1, visited
);
1132 added
+= search_phi_bcsel(src1
, buf
+ added
, buf_size
, visited
);
1141 static nir_variable
*
1142 lookup_input(nir_shader
*shader
, unsigned driver_location
)
1144 return nir_find_variable_with_driver_location(shader
, nir_var_shader_in
,
1149 nir_unsigned_upper_bound(nir_shader
*shader
, struct hash_table
*range_ht
,
1150 nir_ssa_scalar scalar
,
1151 const nir_unsigned_upper_bound_config
*config
)
1153 assert(scalar
.def
->bit_size
<= 32);
1155 if (nir_ssa_scalar_is_const(scalar
))
1156 return nir_ssa_scalar_as_uint(scalar
);
1158 /* keys can't be 0, so we have to add 1 to the index */
1159 void *key
= (void*)(((uintptr_t)(scalar
.def
->index
+ 1) << 4) | scalar
.comp
);
1160 struct hash_entry
*he
= _mesa_hash_table_search(range_ht
, key
);
1162 return (uintptr_t)he
->data
;
1164 uint32_t max
= bitmask(scalar
.def
->bit_size
);
1166 if (scalar
.def
->parent_instr
->type
== nir_instr_type_intrinsic
) {
1168 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(scalar
.def
->parent_instr
);
1169 switch (intrin
->intrinsic
) {
1170 case nir_intrinsic_load_local_invocation_index
:
1171 if (shader
->info
.cs
.local_size_variable
) {
1172 res
= config
->max_work_group_invocations
- 1;
1174 res
= (shader
->info
.cs
.local_size
[0] *
1175 shader
->info
.cs
.local_size
[1] *
1176 shader
->info
.cs
.local_size
[2]) - 1u;
1179 case nir_intrinsic_load_local_invocation_id
:
1180 if (shader
->info
.cs
.local_size_variable
)
1181 res
= config
->max_work_group_size
[scalar
.comp
] - 1u;
1183 res
= shader
->info
.cs
.local_size
[scalar
.comp
] - 1u;
1185 case nir_intrinsic_load_work_group_id
:
1186 res
= config
->max_work_group_count
[scalar
.comp
] - 1u;
1188 case nir_intrinsic_load_num_work_groups
:
1189 res
= config
->max_work_group_count
[scalar
.comp
];
1191 case nir_intrinsic_load_global_invocation_id
:
1192 if (shader
->info
.cs
.local_size_variable
) {
1193 res
= mul_clamp(config
->max_work_group_size
[scalar
.comp
],
1194 config
->max_work_group_count
[scalar
.comp
]) - 1u;
1196 res
= (shader
->info
.cs
.local_size
[scalar
.comp
] *
1197 config
->max_work_group_count
[scalar
.comp
]) - 1u;
1200 case nir_intrinsic_load_subgroup_invocation
:
1201 case nir_intrinsic_first_invocation
:
1202 case nir_intrinsic_mbcnt_amd
:
1203 res
= config
->max_subgroup_size
- 1;
1205 case nir_intrinsic_load_subgroup_size
:
1206 res
= config
->max_subgroup_size
;
1208 case nir_intrinsic_load_subgroup_id
:
1209 case nir_intrinsic_load_num_subgroups
: {
1210 uint32_t work_group_size
= config
->max_work_group_invocations
;
1211 if (!shader
->info
.cs
.local_size_variable
) {
1212 work_group_size
= shader
->info
.cs
.local_size
[0] *
1213 shader
->info
.cs
.local_size
[1] *
1214 shader
->info
.cs
.local_size
[2];
1216 res
= (work_group_size
+ config
->min_subgroup_size
- 1) / config
->min_subgroup_size
;
1217 if (intrin
->intrinsic
== nir_intrinsic_load_subgroup_id
)
1221 case nir_intrinsic_load_input
: {
1222 if (shader
->info
.stage
== MESA_SHADER_VERTEX
&& nir_src_is_const(intrin
->src
[0])) {
1223 nir_variable
*var
= lookup_input(shader
, nir_intrinsic_base(intrin
));
1225 int loc
= var
->data
.location
- VERT_ATTRIB_GENERIC0
;
1227 res
= config
->vertex_attrib_max
[loc
];
1232 case nir_intrinsic_reduce
:
1233 case nir_intrinsic_inclusive_scan
:
1234 case nir_intrinsic_exclusive_scan
: {
1235 nir_op op
= nir_intrinsic_reduction_op(intrin
);
1236 if (op
== nir_op_umin
|| op
== nir_op_umax
|| op
== nir_op_imin
|| op
== nir_op_imax
)
1237 res
= nir_unsigned_upper_bound(shader
, range_ht
, (nir_ssa_scalar
){intrin
->src
[0].ssa
, 0}, config
);
1240 case nir_intrinsic_read_first_invocation
:
1241 case nir_intrinsic_read_invocation
:
1242 case nir_intrinsic_shuffle
:
1243 case nir_intrinsic_shuffle_xor
:
1244 case nir_intrinsic_shuffle_up
:
1245 case nir_intrinsic_shuffle_down
:
1246 case nir_intrinsic_quad_broadcast
:
1247 case nir_intrinsic_quad_swap_horizontal
:
1248 case nir_intrinsic_quad_swap_vertical
:
1249 case nir_intrinsic_quad_swap_diagonal
:
1250 case nir_intrinsic_quad_swizzle_amd
:
1251 case nir_intrinsic_masked_swizzle_amd
:
1252 res
= nir_unsigned_upper_bound(shader
, range_ht
, (nir_ssa_scalar
){intrin
->src
[0].ssa
, 0}, config
);
1254 case nir_intrinsic_write_invocation_amd
: {
1255 uint32_t src0
= nir_unsigned_upper_bound(shader
, range_ht
, (nir_ssa_scalar
){intrin
->src
[0].ssa
, 0}, config
);
1256 uint32_t src1
= nir_unsigned_upper_bound(shader
, range_ht
, (nir_ssa_scalar
){intrin
->src
[1].ssa
, 0}, config
);
1257 res
= MAX2(src0
, src1
);
1264 _mesa_hash_table_insert(range_ht
, key
, (void*)(uintptr_t)res
);
1268 if (scalar
.def
->parent_instr
->type
== nir_instr_type_phi
) {
1269 bool cyclic
= false;
1270 nir_foreach_phi_src(src
, nir_instr_as_phi(scalar
.def
->parent_instr
)) {
1271 if (nir_block_dominates(scalar
.def
->parent_instr
->block
, src
->pred
)) {
1279 _mesa_hash_table_insert(range_ht
, key
, (void*)(uintptr_t)max
);
1281 struct set
*visited
= _mesa_pointer_set_create(NULL
);
1282 nir_ssa_scalar defs
[64];
1283 unsigned def_count
= search_phi_bcsel(scalar
, defs
, 64, visited
);
1284 _mesa_set_destroy(visited
, NULL
);
1286 for (unsigned i
= 0; i
< def_count
; i
++)
1287 res
= MAX2(res
, nir_unsigned_upper_bound(shader
, range_ht
, defs
[i
], config
));
1289 nir_foreach_phi_src(src
, nir_instr_as_phi(scalar
.def
->parent_instr
)) {
1290 res
= MAX2(res
, nir_unsigned_upper_bound(
1291 shader
, range_ht
, (nir_ssa_scalar
){src
->src
.ssa
, 0}, config
));
1295 _mesa_hash_table_insert(range_ht
, key
, (void*)(uintptr_t)res
);
1299 if (nir_ssa_scalar_is_alu(scalar
)) {
1300 nir_op op
= nir_ssa_scalar_alu_op(scalar
);
1318 case nir_op_b32csel
:
1332 uint32_t src0
= nir_unsigned_upper_bound(shader
, range_ht
, nir_ssa_scalar_chase_alu_src(scalar
, 0), config
);
1333 uint32_t src1
= max
, src2
= max
;
1334 if (nir_op_infos
[op
].num_inputs
> 1)
1335 src1
= nir_unsigned_upper_bound(shader
, range_ht
, nir_ssa_scalar_chase_alu_src(scalar
, 1), config
);
1336 if (nir_op_infos
[op
].num_inputs
> 2)
1337 src2
= nir_unsigned_upper_bound(shader
, range_ht
, nir_ssa_scalar_chase_alu_src(scalar
, 2), config
);
1342 res
= src0
< src1
? src0
: src1
;
1347 res
= src0
> src1
? src0
: src1
;
1350 res
= bitmask(util_last_bit64(src0
)) & bitmask(util_last_bit64(src1
));
1354 res
= bitmask(util_last_bit64(src0
)) | bitmask(util_last_bit64(src1
));
1357 if (util_last_bit64(src0
) + src1
> scalar
.def
->bit_size
)
1358 res
= max
; /* overflow */
1360 res
= src0
<< MIN2(src1
, scalar
.def
->bit_size
- 1u);
1363 if (src0
!= 0 && (src0
* src1
) / src0
!= src1
)
1369 nir_ssa_scalar src1_scalar
= nir_ssa_scalar_chase_alu_src(scalar
, 1);
1370 if (nir_ssa_scalar_is_const(src1_scalar
))
1371 res
= src0
>> nir_ssa_scalar_as_uint(src1_scalar
);
1377 nir_ssa_scalar src1_scalar
= nir_ssa_scalar_chase_alu_src(scalar
, 1);
1378 if (src0
<= 2147483647 && nir_ssa_scalar_is_const(src1_scalar
))
1379 res
= src0
>> nir_ssa_scalar_as_uint(src1_scalar
);
1385 if (src0
+ src1
< src0
)
1386 res
= max
; /* overflow */
1391 res
= src1
? src1
- 1 : 0;
1394 nir_ssa_scalar src1_scalar
= nir_ssa_scalar_chase_alu_src(scalar
, 1);
1395 if (nir_ssa_scalar_is_const(src1_scalar
))
1396 res
= nir_ssa_scalar_as_uint(src1_scalar
) ? src0
/ nir_ssa_scalar_as_uint(src1_scalar
) : 0;
1402 case nir_op_b32csel
:
1403 res
= src1
> src2
? src1
: src2
;
1408 src0
= src0
> src1
? src0
: src1
;
1409 res
= src0
> src2
? src0
: src2
;
1412 src0
= src0
< src1
? src0
: src1
;
1413 res
= src0
< src2
? src0
: src2
;
1416 res
= bitmask(MIN2(src2
, scalar
.def
->bit_size
));
1419 nir_ssa_scalar src1_scalar
= nir_ssa_scalar_chase_alu_src(scalar
, 1);
1420 if (nir_ssa_scalar_is_const(src1_scalar
)) {
1421 src0
= MIN2(src0
, 31);
1422 src1
= nir_ssa_scalar_as_uint(src1_scalar
) & 0x1fu
;
1423 res
= bitmask(src0
) << src1
;
1425 src0
= MIN2(src0
, 31);
1426 src1
= MIN2(src1
, 31);
1427 res
= bitmask(MIN2(src0
+ src1
, 32));
1431 /* limited floating-point support for f2u32(fmul(load_input(), <constant>)) */
1433 /* infinity/NaN starts at 0x7f800000u, negative numbers at 0x80000000 */
1434 if (src0
< 0x7f800000u
) {
1436 memcpy(&val
, &src0
, 4);
1437 res
= (uint32_t)val
;
1441 /* infinity/NaN starts at 0x7f800000u, negative numbers at 0x80000000 */
1442 if (src0
< 0x7f800000u
&& src1
< 0x7f800000u
) {
1443 float src0_f
, src1_f
;
1444 memcpy(&src0_f
, &src0
, 4);
1445 memcpy(&src1_f
, &src1
, 4);
1446 /* not a proper rounding-up multiplication, but should be good enough */
1447 float max_f
= ceilf(src0_f
) * ceilf(src1_f
);
1448 memcpy(&res
, &max_f
, 4);
1455 _mesa_hash_table_insert(range_ht
, key
, (void*)(uintptr_t)res
);
1463 nir_addition_might_overflow(nir_shader
*shader
, struct hash_table
*range_ht
,
1464 nir_ssa_scalar ssa
, unsigned const_val
,
1465 const nir_unsigned_upper_bound_config
*config
)
1467 nir_op alu_op
= nir_ssa_scalar_alu_op(ssa
);
1469 /* iadd(imul(a, #b), #c) */
1470 if (alu_op
== nir_op_imul
|| alu_op
== nir_op_ishl
) {
1471 nir_ssa_scalar mul_src0
= nir_ssa_scalar_chase_alu_src(ssa
, 0);
1472 nir_ssa_scalar mul_src1
= nir_ssa_scalar_chase_alu_src(ssa
, 1);
1473 uint32_t stride
= 1;
1474 if (nir_ssa_scalar_is_const(mul_src0
))
1475 stride
= nir_ssa_scalar_as_uint(mul_src0
);
1476 else if (nir_ssa_scalar_is_const(mul_src1
))
1477 stride
= nir_ssa_scalar_as_uint(mul_src1
);
1479 if (alu_op
== nir_op_ishl
)
1480 stride
= 1u << (stride
% 32u);
1482 if (!stride
|| const_val
<= UINT32_MAX
- (UINT32_MAX
/ stride
* stride
))
1486 /* iadd(iand(a, #b), #c) */
1487 if (alu_op
== nir_op_iand
) {
1488 nir_ssa_scalar and_src0
= nir_ssa_scalar_chase_alu_src(ssa
, 0);
1489 nir_ssa_scalar and_src1
= nir_ssa_scalar_chase_alu_src(ssa
, 1);
1490 uint32_t mask
= 0xffffffff;
1491 if (nir_ssa_scalar_is_const(and_src0
))
1492 mask
= nir_ssa_scalar_as_uint(and_src0
);
1493 else if (nir_ssa_scalar_is_const(and_src1
))
1494 mask
= nir_ssa_scalar_as_uint(and_src1
);
1495 if (mask
== 0 || const_val
< (1u << (ffs(mask
) - 1)))
1499 uint32_t ub
= nir_unsigned_upper_bound(shader
, range_ht
, ssa
, config
);
1500 return const_val
+ ub
< const_val
;