2 * Copyright © 2018 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 #include "nir_range_analysis.h"
27 #include "util/hash_table.h"
30 * Analyzes a sequence of operations to determine some aspects of the range of
35 is_not_negative(enum ssa_ranges r
)
37 return r
== gt_zero
|| r
== ge_zero
|| r
== eq_zero
;
41 pack_data(const struct ssa_result_range r
)
43 return (void *)(uintptr_t)(r
.range
| r
.is_integral
<< 8);
46 static struct ssa_result_range
47 unpack_data(const void *p
)
49 const uintptr_t v
= (uintptr_t) p
;
51 return (struct ssa_result_range
){v
& 0xff, (v
& 0x0ff00) != 0};
55 pack_key(const struct nir_alu_instr
*instr
, nir_alu_type type
)
57 uintptr_t type_encoding
;
58 uintptr_t ptr
= (uintptr_t) instr
;
60 /* The low 2 bits have to be zero or this whole scheme falls apart. */
61 assert((ptr
& 0x3) == 0);
63 /* NIR is typeless in the sense that sequences of bits have whatever
64 * meaning is attached to them by the instruction that consumes them.
65 * However, the number of bits must match between producer and consumer.
66 * As a result, the number of bits does not need to be encoded here.
68 switch (nir_alu_type_get_base_type(type
)) {
69 case nir_type_int
: type_encoding
= 0; break;
70 case nir_type_uint
: type_encoding
= 1; break;
71 case nir_type_bool
: type_encoding
= 2; break;
72 case nir_type_float
: type_encoding
= 3; break;
73 default: unreachable("Invalid base type.");
76 return (void *)(ptr
| type_encoding
);
80 nir_alu_src_type(const nir_alu_instr
*instr
, unsigned src
)
82 return nir_alu_type_get_base_type(nir_op_infos
[instr
->op
].input_types
[src
]) |
83 nir_src_bit_size(instr
->src
[src
].src
);
86 static struct ssa_result_range
87 analyze_constant(const struct nir_alu_instr
*instr
, unsigned src
,
88 nir_alu_type use_type
)
90 uint8_t swizzle
[4] = { 0, 1, 2, 3 };
92 /* If the source is an explicitly sized source, then we need to reset
93 * both the number of components and the swizzle.
95 const unsigned num_components
= nir_ssa_alu_instr_src_components(instr
, src
);
97 for (unsigned i
= 0; i
< num_components
; ++i
)
98 swizzle
[i
] = instr
->src
[src
].swizzle
[i
];
100 const nir_load_const_instr
*const load
=
101 nir_instr_as_load_const(instr
->src
[src
].src
.ssa
->parent_instr
);
103 struct ssa_result_range r
= { unknown
, false };
105 switch (nir_alu_type_get_base_type(use_type
)) {
106 case nir_type_float
: {
107 double min_value
= DBL_MAX
;
108 double max_value
= -DBL_MAX
;
109 bool any_zero
= false;
110 bool all_zero
= true;
112 r
.is_integral
= true;
114 for (unsigned i
= 0; i
< num_components
; ++i
) {
115 const double v
= nir_const_value_as_float(load
->value
[swizzle
[i
]],
119 r
.is_integral
= false;
121 any_zero
= any_zero
|| (v
== 0.0);
122 all_zero
= all_zero
&& (v
== 0.0);
123 min_value
= MIN2(min_value
, v
);
124 max_value
= MAX2(max_value
, v
);
127 assert(any_zero
>= all_zero
);
128 assert(isnan(max_value
) || max_value
>= min_value
);
132 else if (min_value
> 0.0)
134 else if (min_value
== 0.0)
136 else if (max_value
< 0.0)
138 else if (max_value
== 0.0)
149 case nir_type_bool
: {
150 int64_t min_value
= INT_MAX
;
151 int64_t max_value
= INT_MIN
;
152 bool any_zero
= false;
153 bool all_zero
= true;
155 for (unsigned i
= 0; i
< num_components
; ++i
) {
156 const int64_t v
= nir_const_value_as_int(load
->value
[swizzle
[i
]],
159 any_zero
= any_zero
|| (v
== 0);
160 all_zero
= all_zero
&& (v
== 0);
161 min_value
= MIN2(min_value
, v
);
162 max_value
= MAX2(max_value
, v
);
165 assert(any_zero
>= all_zero
);
166 assert(max_value
>= min_value
);
170 else if (min_value
> 0)
172 else if (min_value
== 0)
174 else if (max_value
< 0)
176 else if (max_value
== 0)
186 case nir_type_uint
: {
187 bool any_zero
= false;
188 bool all_zero
= true;
190 for (unsigned i
= 0; i
< num_components
; ++i
) {
191 const uint64_t v
= nir_const_value_as_uint(load
->value
[swizzle
[i
]],
194 any_zero
= any_zero
|| (v
== 0);
195 all_zero
= all_zero
&& (v
== 0);
198 assert(any_zero
>= all_zero
);
211 unreachable("Invalid alu source type");
216 * Short-hand name for use in the tables in analyze_expression. If this name
217 * becomes a problem on some compiler, we can change it to _.
219 #define _______ unknown
222 /* MSVC doesn't have C99's _Pragma() */
229 #define ASSERT_TABLE_IS_COMMUTATIVE(t) \
231 static bool first = true; \
234 _Pragma("GCC unroll 7") \
235 for (unsigned r = 0; r < ARRAY_SIZE(t); r++) { \
236 _Pragma("GCC unroll 7") \
237 for (unsigned c = 0; c < ARRAY_SIZE(t[0]); c++) \
238 assert(t[r][c] == t[c][r]); \
243 #define ASSERT_TABLE_IS_DIAGONAL(t) \
245 static bool first = true; \
248 _Pragma("GCC unroll 7") \
249 for (unsigned r = 0; r < ARRAY_SIZE(t); r++) \
250 assert(t[r][r] == r); \
254 static enum ssa_ranges
255 union_ranges(enum ssa_ranges a
, enum ssa_ranges b
)
257 static const enum ssa_ranges union_table
[last_range
+ 1][last_range
+ 1] = {
258 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
259 /* unknown */ { _______
, _______
, _______
, _______
, _______
, _______
, _______
},
260 /* lt_zero */ { _______
, lt_zero
, le_zero
, ne_zero
, _______
, ne_zero
, le_zero
},
261 /* le_zero */ { _______
, le_zero
, le_zero
, _______
, _______
, _______
, le_zero
},
262 /* gt_zero */ { _______
, ne_zero
, _______
, gt_zero
, ge_zero
, ne_zero
, ge_zero
},
263 /* ge_zero */ { _______
, _______
, _______
, ge_zero
, ge_zero
, _______
, ge_zero
},
264 /* ne_zero */ { _______
, ne_zero
, _______
, ne_zero
, _______
, ne_zero
, _______
},
265 /* eq_zero */ { _______
, le_zero
, le_zero
, ge_zero
, ge_zero
, _______
, eq_zero
},
268 ASSERT_TABLE_IS_COMMUTATIVE(union_table
);
269 ASSERT_TABLE_IS_DIAGONAL(union_table
);
271 return union_table
[a
][b
];
274 /* Verify that the 'unknown' entry in each row (or column) of the table is the
275 * union of all the other values in the row (or column).
277 #define ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(t) \
279 static bool first = true; \
282 _Pragma("GCC unroll 7") \
283 for (unsigned i = 0; i < last_range; i++) { \
284 enum ssa_ranges col_range = t[i][unknown + 1]; \
285 enum ssa_ranges row_range = t[unknown + 1][i]; \
287 _Pragma("GCC unroll 5") \
288 for (unsigned j = unknown + 2; j < last_range; j++) { \
289 col_range = union_ranges(col_range, t[i][j]); \
290 row_range = union_ranges(row_range, t[j][i]); \
293 assert(col_range == t[i][unknown]); \
294 assert(row_range == t[unknown][i]); \
299 /* For most operations, the union of ranges for a strict inequality and
300 * equality should be the range of the non-strict inequality (e.g.,
301 * union_ranges(range(op(lt_zero), range(op(eq_zero))) == range(op(le_zero)).
303 * Does not apply to selection-like opcodes (bcsel, fmin, fmax, etc.).
305 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(t) \
307 assert(union_ranges(t[lt_zero], t[eq_zero]) == t[le_zero]); \
308 assert(union_ranges(t[gt_zero], t[eq_zero]) == t[ge_zero]); \
311 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(t) \
313 static bool first = true; \
316 _Pragma("GCC unroll 7") \
317 for (unsigned i = 0; i < last_range; i++) { \
318 assert(union_ranges(t[i][lt_zero], t[i][eq_zero]) == t[i][le_zero]); \
319 assert(union_ranges(t[i][gt_zero], t[i][eq_zero]) == t[i][ge_zero]); \
320 assert(union_ranges(t[lt_zero][i], t[eq_zero][i]) == t[le_zero][i]); \
321 assert(union_ranges(t[gt_zero][i], t[eq_zero][i]) == t[ge_zero][i]); \
326 /* Several other unordered tuples span the range of "everything." Each should
327 * have the same value as unknown: (lt_zero, ge_zero), (le_zero, gt_zero), and
328 * (eq_zero, ne_zero). union_ranges is already commutative, so only one
329 * ordering needs to be checked.
331 * Does not apply to selection-like opcodes (bcsel, fmin, fmax, etc.).
333 * In cases where this can be used, it is unnecessary to also use
334 * ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_*_SOURCE. For any range X,
335 * union_ranges(X, X) == X. The disjoint ranges cover all of the non-unknown
336 * possibilities, so the union of all the unions of disjoint ranges is
337 * equivalent to the union of "others."
339 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(t) \
341 assert(union_ranges(t[lt_zero], t[ge_zero]) == t[unknown]); \
342 assert(union_ranges(t[le_zero], t[gt_zero]) == t[unknown]); \
343 assert(union_ranges(t[eq_zero], t[ne_zero]) == t[unknown]); \
346 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(t) \
348 static bool first = true; \
351 _Pragma("GCC unroll 7") \
352 for (unsigned i = 0; i < last_range; i++) { \
353 assert(union_ranges(t[i][lt_zero], t[i][ge_zero]) == \
355 assert(union_ranges(t[i][le_zero], t[i][gt_zero]) == \
357 assert(union_ranges(t[i][eq_zero], t[i][ne_zero]) == \
360 assert(union_ranges(t[lt_zero][i], t[ge_zero][i]) == \
362 assert(union_ranges(t[le_zero][i], t[gt_zero][i]) == \
364 assert(union_ranges(t[eq_zero][i], t[ne_zero][i]) == \
371 #define ASSERT_TABLE_IS_COMMUTATIVE(t)
372 #define ASSERT_TABLE_IS_DIAGONAL(t)
373 #define ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(t)
374 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(t)
375 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(t)
376 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(t)
377 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(t)
381 * Analyze an expression to determine the range of its result
383 * The end result of this analysis is a token that communicates something
384 * about the range of values. There's an implicit grammar that produces
385 * tokens from sequences of literal values, other tokens, and operations.
386 * This function implements this grammar as a recursive-descent parser. Some
387 * (but not all) of the grammar is listed in-line in the function.
389 static struct ssa_result_range
390 analyze_expression(const nir_alu_instr
*instr
, unsigned src
,
391 struct hash_table
*ht
, nir_alu_type use_type
)
393 /* Ensure that the _Pragma("GCC unroll 7") above are correct. */
394 STATIC_ASSERT(last_range
+ 1 == 7);
396 if (!instr
->src
[src
].src
.is_ssa
)
397 return (struct ssa_result_range
){unknown
, false};
399 if (nir_src_is_const(instr
->src
[src
].src
))
400 return analyze_constant(instr
, src
, use_type
);
402 if (instr
->src
[src
].src
.ssa
->parent_instr
->type
!= nir_instr_type_alu
)
403 return (struct ssa_result_range
){unknown
, false};
405 const struct nir_alu_instr
*const alu
=
406 nir_instr_as_alu(instr
->src
[src
].src
.ssa
->parent_instr
);
408 /* Bail if the type of the instruction generating the value does not match
409 * the type the value will be interpreted as. int/uint/bool can be
410 * reinterpreted trivially. The most important cases are between float and
413 if (alu
->op
!= nir_op_mov
&& alu
->op
!= nir_op_bcsel
) {
414 const nir_alu_type use_base_type
=
415 nir_alu_type_get_base_type(use_type
);
416 const nir_alu_type src_base_type
=
417 nir_alu_type_get_base_type(nir_op_infos
[alu
->op
].output_type
);
419 if (use_base_type
!= src_base_type
&&
420 (use_base_type
== nir_type_float
||
421 src_base_type
== nir_type_float
)) {
422 return (struct ssa_result_range
){unknown
, false};
426 struct hash_entry
*he
= _mesa_hash_table_search(ht
, pack_key(alu
, use_type
));
428 return unpack_data(he
->data
);
430 struct ssa_result_range r
= {unknown
, false};
432 /* ge_zero: ge_zero + ge_zero
434 * gt_zero: gt_zero + eq_zero
435 * | gt_zero + ge_zero
436 * | eq_zero + gt_zero # Addition is commutative
437 * | ge_zero + gt_zero # Addition is commutative
438 * | gt_zero + gt_zero
441 * le_zero: le_zero + le_zero
443 * lt_zero: lt_zero + eq_zero
444 * | lt_zero + le_zero
445 * | eq_zero + lt_zero # Addition is commutative
446 * | le_zero + lt_zero # Addition is commutative
447 * | lt_zero + lt_zero
450 * ne_zero: eq_zero + ne_zero
451 * | ne_zero + eq_zero # Addition is commutative
454 * eq_zero: eq_zero + eq_zero
457 * All other cases are 'unknown'. The seeming odd entry is (ne_zero,
458 * ne_zero), but that could be (-5, +5) which is not ne_zero.
460 static const enum ssa_ranges fadd_table
[last_range
+ 1][last_range
+ 1] = {
461 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
462 /* unknown */ { _______
, _______
, _______
, _______
, _______
, _______
, _______
},
463 /* lt_zero */ { _______
, lt_zero
, lt_zero
, _______
, _______
, _______
, lt_zero
},
464 /* le_zero */ { _______
, lt_zero
, le_zero
, _______
, _______
, _______
, le_zero
},
465 /* gt_zero */ { _______
, _______
, _______
, gt_zero
, gt_zero
, _______
, gt_zero
},
466 /* ge_zero */ { _______
, _______
, _______
, gt_zero
, ge_zero
, _______
, ge_zero
},
467 /* ne_zero */ { _______
, _______
, _______
, _______
, _______
, _______
, ne_zero
},
468 /* eq_zero */ { _______
, lt_zero
, le_zero
, gt_zero
, ge_zero
, ne_zero
, eq_zero
},
471 ASSERT_TABLE_IS_COMMUTATIVE(fadd_table
);
472 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(fadd_table
);
473 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(fadd_table
);
475 /* Due to flush-to-zero semanatics of floating-point numbers with very
476 * small mangnitudes, we can never really be sure a result will be
479 * ge_zero: ge_zero * ge_zero
480 * | ge_zero * gt_zero
481 * | ge_zero * eq_zero
482 * | le_zero * lt_zero
483 * | lt_zero * le_zero # Multiplication is commutative
484 * | le_zero * le_zero
485 * | gt_zero * ge_zero # Multiplication is commutative
486 * | eq_zero * ge_zero # Multiplication is commutative
487 * | a * a # Left source == right source
488 * | gt_zero * gt_zero
489 * | lt_zero * lt_zero
492 * le_zero: ge_zero * le_zero
493 * | ge_zero * lt_zero
494 * | lt_zero * ge_zero # Multiplication is commutative
495 * | le_zero * ge_zero # Multiplication is commutative
496 * | le_zero * gt_zero
497 * | lt_zero * gt_zero
498 * | gt_zero * lt_zero # Multiplication is commutative
501 * eq_zero: eq_zero * <any>
502 * <any> * eq_zero # Multiplication is commutative
504 * All other cases are 'unknown'.
506 static const enum ssa_ranges fmul_table
[last_range
+ 1][last_range
+ 1] = {
507 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
508 /* unknown */ { _______
, _______
, _______
, _______
, _______
, _______
, eq_zero
},
509 /* lt_zero */ { _______
, ge_zero
, ge_zero
, le_zero
, le_zero
, _______
, eq_zero
},
510 /* le_zero */ { _______
, ge_zero
, ge_zero
, le_zero
, le_zero
, _______
, eq_zero
},
511 /* gt_zero */ { _______
, le_zero
, le_zero
, ge_zero
, ge_zero
, _______
, eq_zero
},
512 /* ge_zero */ { _______
, le_zero
, le_zero
, ge_zero
, ge_zero
, _______
, eq_zero
},
513 /* ne_zero */ { _______
, _______
, _______
, _______
, _______
, _______
, eq_zero
},
514 /* eq_zero */ { eq_zero
, eq_zero
, eq_zero
, eq_zero
, eq_zero
, eq_zero
, eq_zero
}
517 ASSERT_TABLE_IS_COMMUTATIVE(fmul_table
);
518 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(fmul_table
);
519 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(fmul_table
);
521 static const enum ssa_ranges fneg_table
[last_range
+ 1] = {
522 /* unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
523 _______
, gt_zero
, ge_zero
, lt_zero
, le_zero
, ne_zero
, eq_zero
526 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(fneg_table
);
527 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(fneg_table
);
533 r
= (struct ssa_result_range
){ge_zero
, alu
->op
== nir_op_b2f32
};
537 const struct ssa_result_range left
=
538 analyze_expression(alu
, 1, ht
, use_type
);
539 const struct ssa_result_range right
=
540 analyze_expression(alu
, 2, ht
, use_type
);
542 r
.is_integral
= left
.is_integral
&& right
.is_integral
;
544 /* le_zero: bcsel(<any>, le_zero, lt_zero)
545 * | bcsel(<any>, eq_zero, lt_zero)
546 * | bcsel(<any>, le_zero, eq_zero)
547 * | bcsel(<any>, lt_zero, le_zero)
548 * | bcsel(<any>, lt_zero, eq_zero)
549 * | bcsel(<any>, eq_zero, le_zero)
550 * | bcsel(<any>, le_zero, le_zero)
553 * lt_zero: bcsel(<any>, lt_zero, lt_zero)
556 * ge_zero: bcsel(<any>, ge_zero, ge_zero)
557 * | bcsel(<any>, ge_zero, gt_zero)
558 * | bcsel(<any>, ge_zero, eq_zero)
559 * | bcsel(<any>, gt_zero, ge_zero)
560 * | bcsel(<any>, eq_zero, ge_zero)
563 * gt_zero: bcsel(<any>, gt_zero, gt_zero)
566 * ne_zero: bcsel(<any>, ne_zero, gt_zero)
567 * | bcsel(<any>, ne_zero, lt_zero)
568 * | bcsel(<any>, gt_zero, lt_zero)
569 * | bcsel(<any>, gt_zero, ne_zero)
570 * | bcsel(<any>, lt_zero, ne_zero)
571 * | bcsel(<any>, lt_zero, gt_zero)
572 * | bcsel(<any>, ne_zero, ne_zero)
575 * eq_zero: bcsel(<any>, eq_zero, eq_zero)
578 * All other cases are 'unknown'.
580 * The ranges could be tightened if the range of the first source is
581 * known. However, opt_algebraic will (eventually) elminiate the bcsel
582 * if the condition is known.
584 static const enum ssa_ranges table
[last_range
+ 1][last_range
+ 1] = {
585 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
586 /* unknown */ { _______
, _______
, _______
, _______
, _______
, _______
, _______
},
587 /* lt_zero */ { _______
, lt_zero
, le_zero
, ne_zero
, _______
, ne_zero
, le_zero
},
588 /* le_zero */ { _______
, le_zero
, le_zero
, _______
, _______
, _______
, le_zero
},
589 /* gt_zero */ { _______
, ne_zero
, _______
, gt_zero
, ge_zero
, ne_zero
, ge_zero
},
590 /* ge_zero */ { _______
, _______
, _______
, ge_zero
, ge_zero
, _______
, ge_zero
},
591 /* ne_zero */ { _______
, ne_zero
, _______
, ne_zero
, _______
, ne_zero
, _______
},
592 /* eq_zero */ { _______
, le_zero
, le_zero
, ge_zero
, ge_zero
, _______
, eq_zero
},
595 ASSERT_TABLE_IS_COMMUTATIVE(table
);
596 ASSERT_TABLE_IS_DIAGONAL(table
);
597 ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(table
);
599 r
.range
= table
[left
.range
][right
.range
];
605 r
= analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
607 r
.is_integral
= true;
609 if (r
.range
== unknown
&& alu
->op
== nir_op_u2f32
)
615 r
= analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
637 const struct ssa_result_range left
=
638 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
639 const struct ssa_result_range right
=
640 analyze_expression(alu
, 1, ht
, nir_alu_src_type(alu
, 1));
642 r
.is_integral
= left
.is_integral
&& right
.is_integral
;
643 r
.range
= fadd_table
[left
.range
][right
.range
];
648 /* If the parameter might be less than zero, the mathematically result
649 * will be on (0, 1). For sufficiently large magnitude negative
650 * parameters, the result will flush to zero.
652 static const enum ssa_ranges table
[last_range
+ 1] = {
653 /* unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
654 ge_zero
, ge_zero
, ge_zero
, gt_zero
, gt_zero
, ge_zero
, gt_zero
657 r
= analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
659 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(table
);
660 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(table
);
662 r
.is_integral
= r
.is_integral
&& is_not_negative(r
.range
);
663 r
.range
= table
[r
.range
];
668 const struct ssa_result_range left
=
669 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
670 const struct ssa_result_range right
=
671 analyze_expression(alu
, 1, ht
, nir_alu_src_type(alu
, 1));
673 r
.is_integral
= left
.is_integral
&& right
.is_integral
;
675 /* gt_zero: fmax(gt_zero, *)
676 * | fmax(*, gt_zero) # Treat fmax as commutative
679 * ge_zero: fmax(ge_zero, ne_zero)
680 * | fmax(ge_zero, lt_zero)
681 * | fmax(ge_zero, le_zero)
682 * | fmax(ge_zero, eq_zero)
683 * | fmax(ne_zero, ge_zero) # Treat fmax as commutative
684 * | fmax(lt_zero, ge_zero) # Treat fmax as commutative
685 * | fmax(le_zero, ge_zero) # Treat fmax as commutative
686 * | fmax(eq_zero, ge_zero) # Treat fmax as commutative
687 * | fmax(ge_zero, ge_zero)
690 * le_zero: fmax(le_zero, lt_zero)
691 * | fmax(lt_zero, le_zero) # Treat fmax as commutative
692 * | fmax(le_zero, le_zero)
695 * lt_zero: fmax(lt_zero, lt_zero)
698 * ne_zero: fmax(ne_zero, lt_zero)
699 * | fmax(lt_zero, ne_zero) # Treat fmax as commutative
700 * | fmax(ne_zero, ne_zero)
703 * eq_zero: fmax(eq_zero, le_zero)
704 * | fmax(eq_zero, lt_zero)
705 * | fmax(le_zero, eq_zero) # Treat fmax as commutative
706 * | fmax(lt_zero, eq_zero) # Treat fmax as commutative
707 * | fmax(eq_zero, eq_zero)
710 * All other cases are 'unknown'.
712 static const enum ssa_ranges table
[last_range
+ 1][last_range
+ 1] = {
713 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
714 /* unknown */ { _______
, _______
, _______
, gt_zero
, ge_zero
, _______
, _______
},
715 /* lt_zero */ { _______
, lt_zero
, le_zero
, gt_zero
, ge_zero
, ne_zero
, eq_zero
},
716 /* le_zero */ { _______
, le_zero
, le_zero
, gt_zero
, ge_zero
, _______
, eq_zero
},
717 /* gt_zero */ { gt_zero
, gt_zero
, gt_zero
, gt_zero
, gt_zero
, gt_zero
, gt_zero
},
718 /* ge_zero */ { ge_zero
, ge_zero
, ge_zero
, gt_zero
, ge_zero
, ge_zero
, ge_zero
},
719 /* ne_zero */ { _______
, ne_zero
, _______
, gt_zero
, ge_zero
, ne_zero
, _______
},
720 /* eq_zero */ { _______
, eq_zero
, eq_zero
, gt_zero
, ge_zero
, _______
, eq_zero
}
723 /* Treat fmax as commutative. */
724 ASSERT_TABLE_IS_COMMUTATIVE(table
);
725 ASSERT_TABLE_IS_DIAGONAL(table
);
726 ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(table
);
728 r
.range
= table
[left
.range
][right
.range
];
733 const struct ssa_result_range left
=
734 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
735 const struct ssa_result_range right
=
736 analyze_expression(alu
, 1, ht
, nir_alu_src_type(alu
, 1));
738 r
.is_integral
= left
.is_integral
&& right
.is_integral
;
740 /* lt_zero: fmin(lt_zero, *)
741 * | fmin(*, lt_zero) # Treat fmin as commutative
744 * le_zero: fmin(le_zero, ne_zero)
745 * | fmin(le_zero, gt_zero)
746 * | fmin(le_zero, ge_zero)
747 * | fmin(le_zero, eq_zero)
748 * | fmin(ne_zero, le_zero) # Treat fmin as commutative
749 * | fmin(gt_zero, le_zero) # Treat fmin as commutative
750 * | fmin(ge_zero, le_zero) # Treat fmin as commutative
751 * | fmin(eq_zero, le_zero) # Treat fmin as commutative
752 * | fmin(le_zero, le_zero)
755 * ge_zero: fmin(ge_zero, gt_zero)
756 * | fmin(gt_zero, ge_zero) # Treat fmin as commutative
757 * | fmin(ge_zero, ge_zero)
760 * gt_zero: fmin(gt_zero, gt_zero)
763 * ne_zero: fmin(ne_zero, gt_zero)
764 * | fmin(gt_zero, ne_zero) # Treat fmin as commutative
765 * | fmin(ne_zero, ne_zero)
768 * eq_zero: fmin(eq_zero, ge_zero)
769 * | fmin(eq_zero, gt_zero)
770 * | fmin(ge_zero, eq_zero) # Treat fmin as commutative
771 * | fmin(gt_zero, eq_zero) # Treat fmin as commutative
772 * | fmin(eq_zero, eq_zero)
775 * All other cases are 'unknown'.
777 static const enum ssa_ranges table
[last_range
+ 1][last_range
+ 1] = {
778 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
779 /* unknown */ { _______
, lt_zero
, le_zero
, _______
, _______
, _______
, _______
},
780 /* lt_zero */ { lt_zero
, lt_zero
, lt_zero
, lt_zero
, lt_zero
, lt_zero
, lt_zero
},
781 /* le_zero */ { le_zero
, lt_zero
, le_zero
, le_zero
, le_zero
, le_zero
, le_zero
},
782 /* gt_zero */ { _______
, lt_zero
, le_zero
, gt_zero
, ge_zero
, ne_zero
, eq_zero
},
783 /* ge_zero */ { _______
, lt_zero
, le_zero
, ge_zero
, ge_zero
, _______
, eq_zero
},
784 /* ne_zero */ { _______
, lt_zero
, le_zero
, ne_zero
, _______
, ne_zero
, _______
},
785 /* eq_zero */ { _______
, lt_zero
, le_zero
, eq_zero
, eq_zero
, _______
, eq_zero
}
788 /* Treat fmin as commutative. */
789 ASSERT_TABLE_IS_COMMUTATIVE(table
);
790 ASSERT_TABLE_IS_DIAGONAL(table
);
791 ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(table
);
793 r
.range
= table
[left
.range
][right
.range
];
798 const struct ssa_result_range left
=
799 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
800 const struct ssa_result_range right
=
801 analyze_expression(alu
, 1, ht
, nir_alu_src_type(alu
, 1));
803 r
.is_integral
= left
.is_integral
&& right
.is_integral
;
805 /* x * x => ge_zero */
806 if (left
.range
!= eq_zero
&& nir_alu_srcs_equal(alu
, alu
, 0, 1)) {
807 /* Even if x > 0, the result of x*x can be zero when x is, for
808 * example, a subnormal number.
811 } else if (left
.range
!= eq_zero
&& nir_alu_srcs_negative_equal(alu
, alu
, 0, 1)) {
812 /* -x * x => le_zero. */
815 r
.range
= fmul_table
[left
.range
][right
.range
];
821 r
= (struct ssa_result_range
){
822 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0)).range
,
828 r
= analyze_expression(alu
, 0, ht
, use_type
);
832 r
= analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
834 r
.range
= fneg_table
[r
.range
];
838 r
= analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
844 r
.is_integral
= true;
848 assert(r
.is_integral
);
851 /* The fsat doesn't add any information in these cases. */
856 /* Since the result must be in [0, 1], the value must be >= 0. */
863 r
= (struct ssa_result_range
){
864 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0)).range
,
871 r
= (struct ssa_result_range
){ge_zero
, false};
874 case nir_op_ffloor
: {
875 const struct ssa_result_range left
=
876 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
878 r
.is_integral
= true;
880 if (left
.is_integral
|| left
.range
== le_zero
|| left
.range
== lt_zero
)
881 r
.range
= left
.range
;
882 else if (left
.range
== ge_zero
|| left
.range
== gt_zero
)
884 else if (left
.range
== ne_zero
)
891 const struct ssa_result_range left
=
892 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
894 r
.is_integral
= true;
896 if (left
.is_integral
|| left
.range
== ge_zero
|| left
.range
== gt_zero
)
897 r
.range
= left
.range
;
898 else if (left
.range
== le_zero
|| left
.range
== lt_zero
)
900 else if (left
.range
== ne_zero
)
906 case nir_op_ftrunc
: {
907 const struct ssa_result_range left
=
908 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
910 r
.is_integral
= true;
912 if (left
.is_integral
)
913 r
.range
= left
.range
;
914 else if (left
.range
== ge_zero
|| left
.range
== gt_zero
)
916 else if (left
.range
== le_zero
|| left
.range
== lt_zero
)
918 else if (left
.range
== ne_zero
)
934 /* Boolean results are 0 or -1. */
935 r
= (struct ssa_result_range
){le_zero
, false};
939 /* Due to flush-to-zero semanatics of floating-point numbers with very
940 * small mangnitudes, we can never really be sure a result will be
943 * NIR uses pow() and powf() to constant evaluate nir_op_fpow. The man
944 * page for that function says:
946 * If y is 0, the result is 1.0 (even if x is a NaN).
948 * gt_zero: pow(*, eq_zero)
949 * | pow(eq_zero, lt_zero) # 0^-y = +inf
950 * | pow(eq_zero, le_zero) # 0^-y = +inf or 0^0 = 1.0
953 * eq_zero: pow(eq_zero, gt_zero)
956 * ge_zero: pow(gt_zero, gt_zero)
957 * | pow(gt_zero, ge_zero)
958 * | pow(gt_zero, lt_zero)
959 * | pow(gt_zero, le_zero)
960 * | pow(gt_zero, ne_zero)
961 * | pow(gt_zero, unknown)
962 * | pow(ge_zero, gt_zero)
963 * | pow(ge_zero, ge_zero)
964 * | pow(ge_zero, lt_zero)
965 * | pow(ge_zero, le_zero)
966 * | pow(ge_zero, ne_zero)
967 * | pow(ge_zero, unknown)
968 * | pow(eq_zero, ge_zero) # 0^0 = 1.0 or 0^+y = 0.0
969 * | pow(eq_zero, ne_zero) # 0^-y = +inf or 0^+y = 0.0
970 * | pow(eq_zero, unknown) # union of all other y cases
973 * All other cases are unknown.
975 * We could do better if the right operand is a constant, integral
978 static const enum ssa_ranges table
[last_range
+ 1][last_range
+ 1] = {
979 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
980 /* unknown */ { _______
, _______
, _______
, _______
, _______
, _______
, gt_zero
},
981 /* lt_zero */ { _______
, _______
, _______
, _______
, _______
, _______
, gt_zero
},
982 /* le_zero */ { _______
, _______
, _______
, _______
, _______
, _______
, gt_zero
},
983 /* gt_zero */ { ge_zero
, ge_zero
, ge_zero
, ge_zero
, ge_zero
, ge_zero
, gt_zero
},
984 /* ge_zero */ { ge_zero
, ge_zero
, ge_zero
, ge_zero
, ge_zero
, ge_zero
, gt_zero
},
985 /* ne_zero */ { _______
, _______
, _______
, _______
, _______
, _______
, gt_zero
},
986 /* eq_zero */ { ge_zero
, gt_zero
, gt_zero
, eq_zero
, ge_zero
, ge_zero
, gt_zero
},
989 const struct ssa_result_range left
=
990 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
991 const struct ssa_result_range right
=
992 analyze_expression(alu
, 1, ht
, nir_alu_src_type(alu
, 1));
994 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(table
);
995 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(table
);
997 r
.is_integral
= left
.is_integral
&& right
.is_integral
&&
998 is_not_negative(right
.range
);
999 r
.range
= table
[left
.range
][right
.range
];
1004 const struct ssa_result_range first
=
1005 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
1006 const struct ssa_result_range second
=
1007 analyze_expression(alu
, 1, ht
, nir_alu_src_type(alu
, 1));
1008 const struct ssa_result_range third
=
1009 analyze_expression(alu
, 2, ht
, nir_alu_src_type(alu
, 2));
1011 r
.is_integral
= first
.is_integral
&& second
.is_integral
&&
1014 enum ssa_ranges fmul_range
;
1016 if (first
.range
!= eq_zero
&& nir_alu_srcs_equal(alu
, alu
, 0, 1)) {
1017 /* See handling of nir_op_fmul for explanation of why ge_zero is the
1020 fmul_range
= ge_zero
;
1021 } else if (first
.range
!= eq_zero
&& nir_alu_srcs_negative_equal(alu
, alu
, 0, 1)) {
1022 /* -x * x => le_zero */
1023 fmul_range
= le_zero
;
1025 fmul_range
= fmul_table
[first
.range
][second
.range
];
1027 r
.range
= fadd_table
[fmul_range
][third
.range
];
1032 const struct ssa_result_range first
=
1033 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
1034 const struct ssa_result_range second
=
1035 analyze_expression(alu
, 1, ht
, nir_alu_src_type(alu
, 1));
1036 const struct ssa_result_range third
=
1037 analyze_expression(alu
, 2, ht
, nir_alu_src_type(alu
, 2));
1039 r
.is_integral
= first
.is_integral
&& second
.is_integral
&&
1042 /* Decompose the flrp to first + third * (second + -first) */
1043 const enum ssa_ranges inner_fadd_range
=
1044 fadd_table
[second
.range
][fneg_table
[first
.range
]];
1046 const enum ssa_ranges fmul_range
=
1047 fmul_table
[third
.range
][inner_fadd_range
];
1049 r
.range
= fadd_table
[first
.range
][fmul_range
];
1054 r
= (struct ssa_result_range
){unknown
, false};
1058 if (r
.range
== eq_zero
)
1059 r
.is_integral
= true;
1061 _mesa_hash_table_insert(ht
, pack_key(alu
, use_type
), pack_data(r
));
1067 struct ssa_result_range
1068 nir_analyze_range(struct hash_table
*range_ht
,
1069 const nir_alu_instr
*instr
, unsigned src
)
1071 return analyze_expression(instr
, src
, range_ht
,
1072 nir_alu_src_type(instr
, src
));