2 * Copyright © 2018 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 #include "nir_range_analysis.h"
27 #include "util/hash_table.h"
30 * Analyzes a sequence of operations to determine some aspects of the range of
35 is_not_negative(enum ssa_ranges r
)
37 return r
== gt_zero
|| r
== ge_zero
|| r
== eq_zero
;
41 pack_data(const struct ssa_result_range r
)
43 return (void *)(uintptr_t)(r
.range
| r
.is_integral
<< 8);
46 static struct ssa_result_range
47 unpack_data(const void *p
)
49 const uintptr_t v
= (uintptr_t) p
;
51 return (struct ssa_result_range
){v
& 0xff, (v
& 0x0ff00) != 0};
55 pack_key(const struct nir_alu_instr
*instr
, nir_alu_type type
)
57 uintptr_t type_encoding
;
58 uintptr_t ptr
= (uintptr_t) instr
;
60 /* The low 2 bits have to be zero or this whole scheme falls apart. */
61 assert((ptr
& 0x3) == 0);
63 /* NIR is typeless in the sense that sequences of bits have whatever
64 * meaning is attached to them by the instruction that consumes them.
65 * However, the number of bits must match between producer and consumer.
66 * As a result, the number of bits does not need to be encoded here.
68 switch (nir_alu_type_get_base_type(type
)) {
69 case nir_type_int
: type_encoding
= 0; break;
70 case nir_type_uint
: type_encoding
= 1; break;
71 case nir_type_bool
: type_encoding
= 2; break;
72 case nir_type_float
: type_encoding
= 3; break;
73 default: unreachable("Invalid base type.");
76 return (void *)(ptr
| type_encoding
);
80 nir_alu_src_type(const nir_alu_instr
*instr
, unsigned src
)
82 return nir_alu_type_get_base_type(nir_op_infos
[instr
->op
].input_types
[src
]) |
83 nir_src_bit_size(instr
->src
[src
].src
);
86 static struct ssa_result_range
87 analyze_constant(const struct nir_alu_instr
*instr
, unsigned src
,
88 nir_alu_type use_type
)
90 uint8_t swizzle
[4] = { 0, 1, 2, 3 };
92 /* If the source is an explicitly sized source, then we need to reset
93 * both the number of components and the swizzle.
95 const unsigned num_components
= nir_ssa_alu_instr_src_components(instr
, src
);
97 for (unsigned i
= 0; i
< num_components
; ++i
)
98 swizzle
[i
] = instr
->src
[src
].swizzle
[i
];
100 const nir_load_const_instr
*const load
=
101 nir_instr_as_load_const(instr
->src
[src
].src
.ssa
->parent_instr
);
103 struct ssa_result_range r
= { unknown
, false };
105 switch (nir_alu_type_get_base_type(use_type
)) {
106 case nir_type_float
: {
107 double min_value
= DBL_MAX
;
108 double max_value
= -DBL_MAX
;
109 bool any_zero
= false;
110 bool all_zero
= true;
112 r
.is_integral
= true;
114 for (unsigned i
= 0; i
< num_components
; ++i
) {
115 const double v
= nir_const_value_as_float(load
->value
[swizzle
[i
]],
119 r
.is_integral
= false;
121 any_zero
= any_zero
|| (v
== 0.0);
122 all_zero
= all_zero
&& (v
== 0.0);
123 min_value
= MIN2(min_value
, v
);
124 max_value
= MAX2(max_value
, v
);
127 assert(any_zero
>= all_zero
);
128 assert(isnan(max_value
) || max_value
>= min_value
);
132 else if (min_value
> 0.0)
134 else if (min_value
== 0.0)
136 else if (max_value
< 0.0)
138 else if (max_value
== 0.0)
149 case nir_type_bool
: {
150 int64_t min_value
= INT_MAX
;
151 int64_t max_value
= INT_MIN
;
152 bool any_zero
= false;
153 bool all_zero
= true;
155 for (unsigned i
= 0; i
< num_components
; ++i
) {
156 const int64_t v
= nir_const_value_as_int(load
->value
[swizzle
[i
]],
159 any_zero
= any_zero
|| (v
== 0);
160 all_zero
= all_zero
&& (v
== 0);
161 min_value
= MIN2(min_value
, v
);
162 max_value
= MAX2(max_value
, v
);
165 assert(any_zero
>= all_zero
);
166 assert(max_value
>= min_value
);
170 else if (min_value
> 0)
172 else if (min_value
== 0)
174 else if (max_value
< 0)
176 else if (max_value
== 0)
186 case nir_type_uint
: {
187 bool any_zero
= false;
188 bool all_zero
= true;
190 for (unsigned i
= 0; i
< num_components
; ++i
) {
191 const uint64_t v
= nir_const_value_as_uint(load
->value
[swizzle
[i
]],
194 any_zero
= any_zero
|| (v
== 0);
195 all_zero
= all_zero
&& (v
== 0);
198 assert(any_zero
>= all_zero
);
211 unreachable("Invalid alu source type");
216 * Short-hand name for use in the tables in analyze_expression. If this name
217 * becomes a problem on some compiler, we can change it to _.
219 #define _______ unknown
222 #define ASSERT_TABLE_IS_COMMUTATIVE(t) \
224 for (unsigned r = 0; r < ARRAY_SIZE(t); r++) { \
225 for (unsigned c = 0; c < ARRAY_SIZE(t[0]); c++) \
226 assert(t[r][c] == t[c][r]); \
230 #define ASSERT_TABLE_IS_DIAGONAL(t) \
232 for (unsigned r = 0; r < ARRAY_SIZE(t); r++) \
233 assert(t[r][r] == r); \
236 static enum ssa_ranges
237 union_ranges(enum ssa_ranges a
, enum ssa_ranges b
)
239 static const enum ssa_ranges union_table
[last_range
+ 1][last_range
+ 1] = {
240 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
241 /* unknown */ { _______
, _______
, _______
, _______
, _______
, _______
, _______
},
242 /* lt_zero */ { _______
, lt_zero
, le_zero
, ne_zero
, _______
, ne_zero
, le_zero
},
243 /* le_zero */ { _______
, le_zero
, le_zero
, _______
, _______
, _______
, le_zero
},
244 /* gt_zero */ { _______
, ne_zero
, _______
, gt_zero
, ge_zero
, ne_zero
, ge_zero
},
245 /* ge_zero */ { _______
, _______
, _______
, ge_zero
, ge_zero
, _______
, ge_zero
},
246 /* ne_zero */ { _______
, ne_zero
, _______
, ne_zero
, _______
, ne_zero
, _______
},
247 /* eq_zero */ { _______
, le_zero
, le_zero
, ge_zero
, ge_zero
, _______
, eq_zero
},
250 ASSERT_TABLE_IS_COMMUTATIVE(union_table
);
251 ASSERT_TABLE_IS_DIAGONAL(union_table
);
253 return union_table
[a
][b
];
256 /* Verify that the 'unknown' entry in each row (or column) of the table is the
257 * union of all the other values in the row (or column).
259 #define ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(t) \
261 for (unsigned i = 0; i < last_range; i++) { \
262 enum ssa_ranges col_range = t[i][unknown + 1]; \
263 enum ssa_ranges row_range = t[unknown + 1][i]; \
265 for (unsigned j = unknown + 2; j < last_range; j++) { \
266 col_range = union_ranges(col_range, t[i][j]); \
267 row_range = union_ranges(row_range, t[j][i]); \
270 assert(col_range == t[i][unknown]); \
271 assert(row_range == t[unknown][i]); \
275 /* For most operations, the union of ranges for a strict inequality and
276 * equality should be the range of the non-strict inequality (e.g.,
277 * union_ranges(range(op(lt_zero), range(op(eq_zero))) == range(op(le_zero)).
279 * Does not apply to selection-like opcodes (bcsel, fmin, fmax, etc.).
281 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(t) \
283 assert(union_ranges(t[lt_zero], t[eq_zero]) == t[le_zero]); \
284 assert(union_ranges(t[gt_zero], t[eq_zero]) == t[ge_zero]); \
287 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(t) \
289 for (unsigned i = 0; i < last_range; i++) { \
290 assert(union_ranges(t[i][lt_zero], t[i][eq_zero]) == t[i][le_zero]); \
291 assert(union_ranges(t[i][gt_zero], t[i][eq_zero]) == t[i][ge_zero]); \
292 assert(union_ranges(t[lt_zero][i], t[eq_zero][i]) == t[le_zero][i]); \
293 assert(union_ranges(t[gt_zero][i], t[eq_zero][i]) == t[ge_zero][i]); \
297 /* Several other unordered tuples span the range of "everything." Each should
298 * have the same value as unknown: (lt_zero, ge_zero), (le_zero, gt_zero), and
299 * (eq_zero, ne_zero). union_ranges is already commutative, so only one
300 * ordering needs to be checked.
302 * Does not apply to selection-like opcodes (bcsel, fmin, fmax, etc.).
304 * In cases where this can be used, it is unnecessary to also use
305 * ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_*_SOURCE. For any range X,
306 * union_ranges(X, X) == X. The disjoint ranges cover all of the non-unknown
307 * possibilities, so the union of all the unions of disjoint ranges is
308 * equivalent to the union of "others."
310 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(t) \
312 assert(union_ranges(t[lt_zero], t[ge_zero]) == t[unknown]); \
313 assert(union_ranges(t[le_zero], t[gt_zero]) == t[unknown]); \
314 assert(union_ranges(t[eq_zero], t[ne_zero]) == t[unknown]); \
317 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(t) \
319 for (unsigned i = 0; i < last_range; i++) { \
320 assert(union_ranges(t[i][lt_zero], t[i][ge_zero]) == \
322 assert(union_ranges(t[i][le_zero], t[i][gt_zero]) == \
324 assert(union_ranges(t[i][eq_zero], t[i][ne_zero]) == \
327 assert(union_ranges(t[lt_zero][i], t[ge_zero][i]) == \
329 assert(union_ranges(t[le_zero][i], t[gt_zero][i]) == \
331 assert(union_ranges(t[eq_zero][i], t[ne_zero][i]) == \
337 #define ASSERT_TABLE_IS_COMMUTATIVE(t)
338 #define ASSERT_TABLE_IS_DIAGONAL(t)
339 #define ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(t)
340 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(t)
341 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(t)
342 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(t)
343 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(t)
347 * Analyze an expression to determine the range of its result
349 * The end result of this analysis is a token that communicates something
350 * about the range of values. There's an implicit grammar that produces
351 * tokens from sequences of literal values, other tokens, and operations.
352 * This function implements this grammar as a recursive-descent parser. Some
353 * (but not all) of the grammar is listed in-line in the function.
355 static struct ssa_result_range
356 analyze_expression(const nir_alu_instr
*instr
, unsigned src
,
357 struct hash_table
*ht
, nir_alu_type use_type
)
359 if (!instr
->src
[src
].src
.is_ssa
)
360 return (struct ssa_result_range
){unknown
, false};
362 if (nir_src_is_const(instr
->src
[src
].src
))
363 return analyze_constant(instr
, src
, use_type
);
365 if (instr
->src
[src
].src
.ssa
->parent_instr
->type
!= nir_instr_type_alu
)
366 return (struct ssa_result_range
){unknown
, false};
368 const struct nir_alu_instr
*const alu
=
369 nir_instr_as_alu(instr
->src
[src
].src
.ssa
->parent_instr
);
371 /* Bail if the type of the instruction generating the value does not match
372 * the type the value will be interpreted as. int/uint/bool can be
373 * reinterpreted trivially. The most important cases are between float and
376 if (alu
->op
!= nir_op_mov
&& alu
->op
!= nir_op_bcsel
) {
377 const nir_alu_type use_base_type
=
378 nir_alu_type_get_base_type(use_type
);
379 const nir_alu_type src_base_type
=
380 nir_alu_type_get_base_type(nir_op_infos
[alu
->op
].output_type
);
382 if (use_base_type
!= src_base_type
&&
383 (use_base_type
== nir_type_float
||
384 src_base_type
== nir_type_float
)) {
385 return (struct ssa_result_range
){unknown
, false};
389 struct hash_entry
*he
= _mesa_hash_table_search(ht
, pack_key(alu
, use_type
));
391 return unpack_data(he
->data
);
393 struct ssa_result_range r
= {unknown
, false};
395 /* ge_zero: ge_zero + ge_zero
397 * gt_zero: gt_zero + eq_zero
398 * | gt_zero + ge_zero
399 * | eq_zero + gt_zero # Addition is commutative
400 * | ge_zero + gt_zero # Addition is commutative
401 * | gt_zero + gt_zero
404 * le_zero: le_zero + le_zero
406 * lt_zero: lt_zero + eq_zero
407 * | lt_zero + le_zero
408 * | eq_zero + lt_zero # Addition is commutative
409 * | le_zero + lt_zero # Addition is commutative
410 * | lt_zero + lt_zero
413 * ne_zero: eq_zero + ne_zero
414 * | ne_zero + eq_zero # Addition is commutative
417 * eq_zero: eq_zero + eq_zero
420 * All other cases are 'unknown'. The seeming odd entry is (ne_zero,
421 * ne_zero), but that could be (-5, +5) which is not ne_zero.
423 static const enum ssa_ranges fadd_table
[last_range
+ 1][last_range
+ 1] = {
424 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
425 /* unknown */ { _______
, _______
, _______
, _______
, _______
, _______
, _______
},
426 /* lt_zero */ { _______
, lt_zero
, lt_zero
, _______
, _______
, _______
, lt_zero
},
427 /* le_zero */ { _______
, lt_zero
, le_zero
, _______
, _______
, _______
, le_zero
},
428 /* gt_zero */ { _______
, _______
, _______
, gt_zero
, gt_zero
, _______
, gt_zero
},
429 /* ge_zero */ { _______
, _______
, _______
, gt_zero
, ge_zero
, _______
, ge_zero
},
430 /* ne_zero */ { _______
, _______
, _______
, _______
, _______
, _______
, ne_zero
},
431 /* eq_zero */ { _______
, lt_zero
, le_zero
, gt_zero
, ge_zero
, ne_zero
, eq_zero
},
434 ASSERT_TABLE_IS_COMMUTATIVE(fadd_table
);
435 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(fadd_table
);
436 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(fadd_table
);
438 /* Due to flush-to-zero semanatics of floating-point numbers with very
439 * small mangnitudes, we can never really be sure a result will be
442 * ge_zero: ge_zero * ge_zero
443 * | ge_zero * gt_zero
444 * | ge_zero * eq_zero
445 * | le_zero * lt_zero
446 * | lt_zero * le_zero # Multiplication is commutative
447 * | le_zero * le_zero
448 * | gt_zero * ge_zero # Multiplication is commutative
449 * | eq_zero * ge_zero # Multiplication is commutative
450 * | a * a # Left source == right source
451 * | gt_zero * gt_zero
452 * | lt_zero * lt_zero
455 * le_zero: ge_zero * le_zero
456 * | ge_zero * lt_zero
457 * | lt_zero * ge_zero # Multiplication is commutative
458 * | le_zero * ge_zero # Multiplication is commutative
459 * | le_zero * gt_zero
460 * | lt_zero * gt_zero
461 * | gt_zero * lt_zero # Multiplication is commutative
464 * eq_zero: eq_zero * <any>
465 * <any> * eq_zero # Multiplication is commutative
467 * All other cases are 'unknown'.
469 static const enum ssa_ranges fmul_table
[last_range
+ 1][last_range
+ 1] = {
470 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
471 /* unknown */ { _______
, _______
, _______
, _______
, _______
, _______
, eq_zero
},
472 /* lt_zero */ { _______
, ge_zero
, ge_zero
, le_zero
, le_zero
, _______
, eq_zero
},
473 /* le_zero */ { _______
, ge_zero
, ge_zero
, le_zero
, le_zero
, _______
, eq_zero
},
474 /* gt_zero */ { _______
, le_zero
, le_zero
, ge_zero
, ge_zero
, _______
, eq_zero
},
475 /* ge_zero */ { _______
, le_zero
, le_zero
, ge_zero
, ge_zero
, _______
, eq_zero
},
476 /* ne_zero */ { _______
, _______
, _______
, _______
, _______
, _______
, eq_zero
},
477 /* eq_zero */ { eq_zero
, eq_zero
, eq_zero
, eq_zero
, eq_zero
, eq_zero
, eq_zero
}
480 ASSERT_TABLE_IS_COMMUTATIVE(fmul_table
);
481 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(fmul_table
);
482 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(fmul_table
);
484 static const enum ssa_ranges fneg_table
[last_range
+ 1] = {
485 /* unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
486 _______
, gt_zero
, ge_zero
, lt_zero
, le_zero
, ne_zero
, eq_zero
489 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(fneg_table
);
490 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(fneg_table
);
496 r
= (struct ssa_result_range
){ge_zero
, alu
->op
== nir_op_b2f32
};
500 const struct ssa_result_range left
=
501 analyze_expression(alu
, 1, ht
, nir_alu_src_type(alu
, 1));
502 const struct ssa_result_range right
=
503 analyze_expression(alu
, 2, ht
, nir_alu_src_type(alu
, 2));
505 /* If either source is a constant load that is not zero, punt. The type
506 * will always be uint regardless of the actual type. We can't even
507 * decide if the value is non-zero because -0.0 is 0x80000000, and that
508 * will (possibly incorrectly) be considered non-zero.
510 /* FINISHME: We could do better, but it would require having the expected
511 * FINISHME: type passed in.
513 if ((nir_src_is_const(alu
->src
[1].src
) && left
.range
!= eq_zero
) ||
514 (nir_src_is_const(alu
->src
[2].src
) && right
.range
!= eq_zero
)) {
515 return (struct ssa_result_range
){unknown
, false};
518 r
.is_integral
= left
.is_integral
&& right
.is_integral
;
520 /* le_zero: bcsel(<any>, le_zero, lt_zero)
521 * | bcsel(<any>, eq_zero, lt_zero)
522 * | bcsel(<any>, le_zero, eq_zero)
523 * | bcsel(<any>, lt_zero, le_zero)
524 * | bcsel(<any>, lt_zero, eq_zero)
525 * | bcsel(<any>, eq_zero, le_zero)
526 * | bcsel(<any>, le_zero, le_zero)
529 * lt_zero: bcsel(<any>, lt_zero, lt_zero)
532 * ge_zero: bcsel(<any>, ge_zero, ge_zero)
533 * | bcsel(<any>, ge_zero, gt_zero)
534 * | bcsel(<any>, ge_zero, eq_zero)
535 * | bcsel(<any>, gt_zero, ge_zero)
536 * | bcsel(<any>, eq_zero, ge_zero)
539 * gt_zero: bcsel(<any>, gt_zero, gt_zero)
542 * ne_zero: bcsel(<any>, ne_zero, gt_zero)
543 * | bcsel(<any>, ne_zero, lt_zero)
544 * | bcsel(<any>, gt_zero, lt_zero)
545 * | bcsel(<any>, gt_zero, ne_zero)
546 * | bcsel(<any>, lt_zero, ne_zero)
547 * | bcsel(<any>, lt_zero, gt_zero)
548 * | bcsel(<any>, ne_zero, ne_zero)
551 * eq_zero: bcsel(<any>, eq_zero, eq_zero)
554 * All other cases are 'unknown'.
556 * The ranges could be tightened if the range of the first source is
557 * known. However, opt_algebraic will (eventually) elminiate the bcsel
558 * if the condition is known.
560 static const enum ssa_ranges table
[last_range
+ 1][last_range
+ 1] = {
561 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
562 /* unknown */ { _______
, _______
, _______
, _______
, _______
, _______
, _______
},
563 /* lt_zero */ { _______
, lt_zero
, le_zero
, ne_zero
, _______
, ne_zero
, le_zero
},
564 /* le_zero */ { _______
, le_zero
, le_zero
, _______
, _______
, _______
, le_zero
},
565 /* gt_zero */ { _______
, ne_zero
, _______
, gt_zero
, ge_zero
, ne_zero
, ge_zero
},
566 /* ge_zero */ { _______
, _______
, _______
, ge_zero
, ge_zero
, _______
, ge_zero
},
567 /* ne_zero */ { _______
, ne_zero
, _______
, ne_zero
, _______
, ne_zero
, _______
},
568 /* eq_zero */ { _______
, le_zero
, le_zero
, ge_zero
, ge_zero
, _______
, eq_zero
},
571 ASSERT_TABLE_IS_COMMUTATIVE(table
);
572 ASSERT_TABLE_IS_DIAGONAL(table
);
573 ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(table
);
575 r
.range
= table
[left
.range
][right
.range
];
581 r
= analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
583 r
.is_integral
= true;
585 if (r
.range
== unknown
&& alu
->op
== nir_op_u2f32
)
591 r
= analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
613 const struct ssa_result_range left
=
614 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
615 const struct ssa_result_range right
=
616 analyze_expression(alu
, 1, ht
, nir_alu_src_type(alu
, 1));
618 r
.is_integral
= left
.is_integral
&& right
.is_integral
;
619 r
.range
= fadd_table
[left
.range
][right
.range
];
624 /* If the parameter might be less than zero, the mathematically result
625 * will be on (0, 1). For sufficiently large magnitude negative
626 * parameters, the result will flush to zero.
628 static const enum ssa_ranges table
[last_range
+ 1] = {
629 /* unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
630 ge_zero
, ge_zero
, ge_zero
, gt_zero
, gt_zero
, ge_zero
, gt_zero
633 r
= analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
635 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(table
);
636 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(table
);
638 r
.is_integral
= r
.is_integral
&& is_not_negative(r
.range
);
639 r
.range
= table
[r
.range
];
644 const struct ssa_result_range left
=
645 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
646 const struct ssa_result_range right
=
647 analyze_expression(alu
, 1, ht
, nir_alu_src_type(alu
, 1));
649 r
.is_integral
= left
.is_integral
&& right
.is_integral
;
651 /* gt_zero: fmax(gt_zero, *)
652 * | fmax(*, gt_zero) # Treat fmax as commutative
655 * ge_zero: fmax(ge_zero, ne_zero)
656 * | fmax(ge_zero, lt_zero)
657 * | fmax(ge_zero, le_zero)
658 * | fmax(ge_zero, eq_zero)
659 * | fmax(ne_zero, ge_zero) # Treat fmax as commutative
660 * | fmax(lt_zero, ge_zero) # Treat fmax as commutative
661 * | fmax(le_zero, ge_zero) # Treat fmax as commutative
662 * | fmax(eq_zero, ge_zero) # Treat fmax as commutative
663 * | fmax(ge_zero, ge_zero)
666 * le_zero: fmax(le_zero, lt_zero)
667 * | fmax(lt_zero, le_zero) # Treat fmax as commutative
668 * | fmax(le_zero, le_zero)
671 * lt_zero: fmax(lt_zero, lt_zero)
674 * ne_zero: fmax(ne_zero, lt_zero)
675 * | fmax(lt_zero, ne_zero) # Treat fmax as commutative
676 * | fmax(ne_zero, ne_zero)
679 * eq_zero: fmax(eq_zero, le_zero)
680 * | fmax(eq_zero, lt_zero)
681 * | fmax(le_zero, eq_zero) # Treat fmax as commutative
682 * | fmax(lt_zero, eq_zero) # Treat fmax as commutative
683 * | fmax(eq_zero, eq_zero)
686 * All other cases are 'unknown'.
688 static const enum ssa_ranges table
[last_range
+ 1][last_range
+ 1] = {
689 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
690 /* unknown */ { _______
, _______
, _______
, gt_zero
, ge_zero
, _______
, _______
},
691 /* lt_zero */ { _______
, lt_zero
, le_zero
, gt_zero
, ge_zero
, ne_zero
, eq_zero
},
692 /* le_zero */ { _______
, le_zero
, le_zero
, gt_zero
, ge_zero
, _______
, eq_zero
},
693 /* gt_zero */ { gt_zero
, gt_zero
, gt_zero
, gt_zero
, gt_zero
, gt_zero
, gt_zero
},
694 /* ge_zero */ { ge_zero
, ge_zero
, ge_zero
, gt_zero
, ge_zero
, ge_zero
, ge_zero
},
695 /* ne_zero */ { _______
, ne_zero
, _______
, gt_zero
, ge_zero
, ne_zero
, _______
},
696 /* eq_zero */ { _______
, eq_zero
, eq_zero
, gt_zero
, ge_zero
, _______
, eq_zero
}
699 /* Treat fmax as commutative. */
700 ASSERT_TABLE_IS_COMMUTATIVE(table
);
701 ASSERT_TABLE_IS_DIAGONAL(table
);
702 ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(table
);
704 r
.range
= table
[left
.range
][right
.range
];
709 const struct ssa_result_range left
=
710 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
711 const struct ssa_result_range right
=
712 analyze_expression(alu
, 1, ht
, nir_alu_src_type(alu
, 1));
714 r
.is_integral
= left
.is_integral
&& right
.is_integral
;
716 /* lt_zero: fmin(lt_zero, *)
717 * | fmin(*, lt_zero) # Treat fmin as commutative
720 * le_zero: fmin(le_zero, ne_zero)
721 * | fmin(le_zero, gt_zero)
722 * | fmin(le_zero, ge_zero)
723 * | fmin(le_zero, eq_zero)
724 * | fmin(ne_zero, le_zero) # Treat fmin as commutative
725 * | fmin(gt_zero, le_zero) # Treat fmin as commutative
726 * | fmin(ge_zero, le_zero) # Treat fmin as commutative
727 * | fmin(eq_zero, le_zero) # Treat fmin as commutative
728 * | fmin(le_zero, le_zero)
731 * ge_zero: fmin(ge_zero, gt_zero)
732 * | fmin(gt_zero, ge_zero) # Treat fmin as commutative
733 * | fmin(ge_zero, ge_zero)
736 * gt_zero: fmin(gt_zero, gt_zero)
739 * ne_zero: fmin(ne_zero, gt_zero)
740 * | fmin(gt_zero, ne_zero) # Treat fmin as commutative
741 * | fmin(ne_zero, ne_zero)
744 * eq_zero: fmin(eq_zero, ge_zero)
745 * | fmin(eq_zero, gt_zero)
746 * | fmin(ge_zero, eq_zero) # Treat fmin as commutative
747 * | fmin(gt_zero, eq_zero) # Treat fmin as commutative
748 * | fmin(eq_zero, eq_zero)
751 * All other cases are 'unknown'.
753 static const enum ssa_ranges table
[last_range
+ 1][last_range
+ 1] = {
754 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
755 /* unknown */ { _______
, lt_zero
, le_zero
, _______
, _______
, _______
, _______
},
756 /* lt_zero */ { lt_zero
, lt_zero
, lt_zero
, lt_zero
, lt_zero
, lt_zero
, lt_zero
},
757 /* le_zero */ { le_zero
, lt_zero
, le_zero
, le_zero
, le_zero
, le_zero
, le_zero
},
758 /* gt_zero */ { _______
, lt_zero
, le_zero
, gt_zero
, ge_zero
, ne_zero
, eq_zero
},
759 /* ge_zero */ { _______
, lt_zero
, le_zero
, ge_zero
, ge_zero
, _______
, eq_zero
},
760 /* ne_zero */ { _______
, lt_zero
, le_zero
, ne_zero
, _______
, ne_zero
, _______
},
761 /* eq_zero */ { _______
, lt_zero
, le_zero
, eq_zero
, eq_zero
, _______
, eq_zero
}
764 /* Treat fmin as commutative. */
765 ASSERT_TABLE_IS_COMMUTATIVE(table
);
766 ASSERT_TABLE_IS_DIAGONAL(table
);
767 ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(table
);
769 r
.range
= table
[left
.range
][right
.range
];
774 const struct ssa_result_range left
=
775 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
776 const struct ssa_result_range right
=
777 analyze_expression(alu
, 1, ht
, nir_alu_src_type(alu
, 1));
779 r
.is_integral
= left
.is_integral
&& right
.is_integral
;
781 /* x * x => ge_zero */
782 if (left
.range
!= eq_zero
&& nir_alu_srcs_equal(alu
, alu
, 0, 1)) {
783 /* Even if x > 0, the result of x*x can be zero when x is, for
784 * example, a subnormal number.
787 } else if (left
.range
!= eq_zero
&& nir_alu_srcs_negative_equal(alu
, alu
, 0, 1)) {
788 /* -x * x => le_zero. */
791 r
.range
= fmul_table
[left
.range
][right
.range
];
797 r
= (struct ssa_result_range
){
798 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0)).range
,
804 const struct ssa_result_range left
=
805 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
807 /* See commentary in nir_op_bcsel for the reasons this is necessary. */
808 if (nir_src_is_const(alu
->src
[0].src
) && left
.range
!= eq_zero
)
809 return (struct ssa_result_range
){unknown
, false};
816 r
= analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
818 r
.range
= fneg_table
[r
.range
];
822 r
= analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
828 r
.is_integral
= true;
832 assert(r
.is_integral
);
835 /* The fsat doesn't add any information in these cases. */
840 /* Since the result must be in [0, 1], the value must be >= 0. */
847 r
= (struct ssa_result_range
){
848 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0)).range
,
855 r
= (struct ssa_result_range
){ge_zero
, false};
858 case nir_op_ffloor
: {
859 const struct ssa_result_range left
=
860 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
862 r
.is_integral
= true;
864 if (left
.is_integral
|| left
.range
== le_zero
|| left
.range
== lt_zero
)
865 r
.range
= left
.range
;
866 else if (left
.range
== ge_zero
|| left
.range
== gt_zero
)
868 else if (left
.range
== ne_zero
)
875 const struct ssa_result_range left
=
876 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
878 r
.is_integral
= true;
880 if (left
.is_integral
|| left
.range
== ge_zero
|| left
.range
== gt_zero
)
881 r
.range
= left
.range
;
882 else if (left
.range
== le_zero
|| left
.range
== lt_zero
)
884 else if (left
.range
== ne_zero
)
890 case nir_op_ftrunc
: {
891 const struct ssa_result_range left
=
892 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
894 r
.is_integral
= true;
896 if (left
.is_integral
)
897 r
.range
= left
.range
;
898 else if (left
.range
== ge_zero
|| left
.range
== gt_zero
)
900 else if (left
.range
== le_zero
|| left
.range
== lt_zero
)
902 else if (left
.range
== ne_zero
)
918 /* Boolean results are 0 or -1. */
919 r
= (struct ssa_result_range
){le_zero
, false};
923 /* Due to flush-to-zero semanatics of floating-point numbers with very
924 * small mangnitudes, we can never really be sure a result will be
927 * NIR uses pow() and powf() to constant evaluate nir_op_fpow. The man
928 * page for that function says:
930 * If y is 0, the result is 1.0 (even if x is a NaN).
932 * gt_zero: pow(*, eq_zero)
933 * | pow(eq_zero, lt_zero) # 0^-y = +inf
934 * | pow(eq_zero, le_zero) # 0^-y = +inf or 0^0 = 1.0
937 * eq_zero: pow(eq_zero, gt_zero)
940 * ge_zero: pow(gt_zero, gt_zero)
941 * | pow(gt_zero, ge_zero)
942 * | pow(gt_zero, lt_zero)
943 * | pow(gt_zero, le_zero)
944 * | pow(gt_zero, ne_zero)
945 * | pow(gt_zero, unknown)
946 * | pow(ge_zero, gt_zero)
947 * | pow(ge_zero, ge_zero)
948 * | pow(ge_zero, lt_zero)
949 * | pow(ge_zero, le_zero)
950 * | pow(ge_zero, ne_zero)
951 * | pow(ge_zero, unknown)
952 * | pow(eq_zero, ge_zero) # 0^0 = 1.0 or 0^+y = 0.0
953 * | pow(eq_zero, ne_zero) # 0^-y = +inf or 0^+y = 0.0
954 * | pow(eq_zero, unknown) # union of all other y cases
957 * All other cases are unknown.
959 * We could do better if the right operand is a constant, integral
962 static const enum ssa_ranges table
[last_range
+ 1][last_range
+ 1] = {
963 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
964 /* unknown */ { _______
, _______
, _______
, _______
, _______
, _______
, gt_zero
},
965 /* lt_zero */ { _______
, _______
, _______
, _______
, _______
, _______
, gt_zero
},
966 /* le_zero */ { _______
, _______
, _______
, _______
, _______
, _______
, gt_zero
},
967 /* gt_zero */ { ge_zero
, ge_zero
, ge_zero
, ge_zero
, ge_zero
, ge_zero
, gt_zero
},
968 /* ge_zero */ { ge_zero
, ge_zero
, ge_zero
, ge_zero
, ge_zero
, ge_zero
, gt_zero
},
969 /* ne_zero */ { _______
, _______
, _______
, _______
, _______
, _______
, gt_zero
},
970 /* eq_zero */ { ge_zero
, gt_zero
, gt_zero
, eq_zero
, ge_zero
, ge_zero
, gt_zero
},
973 const struct ssa_result_range left
=
974 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
975 const struct ssa_result_range right
=
976 analyze_expression(alu
, 1, ht
, nir_alu_src_type(alu
, 1));
978 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(table
);
979 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(table
);
981 r
.is_integral
= left
.is_integral
&& right
.is_integral
&&
982 is_not_negative(right
.range
);
983 r
.range
= table
[left
.range
][right
.range
];
988 const struct ssa_result_range first
=
989 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
990 const struct ssa_result_range second
=
991 analyze_expression(alu
, 1, ht
, nir_alu_src_type(alu
, 1));
992 const struct ssa_result_range third
=
993 analyze_expression(alu
, 2, ht
, nir_alu_src_type(alu
, 2));
995 r
.is_integral
= first
.is_integral
&& second
.is_integral
&&
998 enum ssa_ranges fmul_range
;
1000 if (first
.range
!= eq_zero
&& nir_alu_srcs_equal(alu
, alu
, 0, 1)) {
1001 /* See handling of nir_op_fmul for explanation of why ge_zero is the
1004 fmul_range
= ge_zero
;
1005 } else if (first
.range
!= eq_zero
&& nir_alu_srcs_negative_equal(alu
, alu
, 0, 1)) {
1006 /* -x * x => le_zero */
1007 fmul_range
= le_zero
;
1009 fmul_range
= fmul_table
[first
.range
][second
.range
];
1011 r
.range
= fadd_table
[fmul_range
][third
.range
];
1016 const struct ssa_result_range first
=
1017 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
1018 const struct ssa_result_range second
=
1019 analyze_expression(alu
, 1, ht
, nir_alu_src_type(alu
, 1));
1020 const struct ssa_result_range third
=
1021 analyze_expression(alu
, 2, ht
, nir_alu_src_type(alu
, 2));
1023 r
.is_integral
= first
.is_integral
&& second
.is_integral
&&
1026 /* Decompose the flrp to first + third * (second + -first) */
1027 const enum ssa_ranges inner_fadd_range
=
1028 fadd_table
[second
.range
][fneg_table
[first
.range
]];
1030 const enum ssa_ranges fmul_range
=
1031 fmul_table
[third
.range
][inner_fadd_range
];
1033 r
.range
= fadd_table
[first
.range
][fmul_range
];
1038 r
= (struct ssa_result_range
){unknown
, false};
1042 if (r
.range
== eq_zero
)
1043 r
.is_integral
= true;
1045 _mesa_hash_table_insert(ht
, pack_key(alu
, use_type
), pack_data(r
));
1051 struct ssa_result_range
1052 nir_analyze_range(const nir_alu_instr
*instr
, unsigned src
)
1054 struct hash_table
*ht
= _mesa_pointer_hash_table_create(NULL
);
1056 const struct ssa_result_range r
=
1057 analyze_expression(instr
, src
, ht
, nir_alu_src_type(instr
, src
));
1059 _mesa_hash_table_destroy(ht
, NULL
);