2 * Copyright © 2018 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 #include "nir_range_analysis.h"
27 #include "util/hash_table.h"
30 * Analyzes a sequence of operations to determine some aspects of the range of
35 is_not_negative(enum ssa_ranges r
)
37 return r
== gt_zero
|| r
== ge_zero
|| r
== eq_zero
;
41 pack_data(const struct ssa_result_range r
)
43 return (void *)(uintptr_t)(r
.range
| r
.is_integral
<< 8);
46 static struct ssa_result_range
47 unpack_data(const void *p
)
49 const uintptr_t v
= (uintptr_t) p
;
51 return (struct ssa_result_range
){v
& 0xff, (v
& 0x0ff00) != 0};
55 pack_key(const struct nir_alu_instr
*instr
, nir_alu_type type
)
57 uintptr_t type_encoding
;
58 uintptr_t ptr
= (uintptr_t) instr
;
60 /* The low 2 bits have to be zero or this whole scheme falls apart. */
61 assert((ptr
& 0x3) == 0);
63 /* NIR is typeless in the sense that sequences of bits have whatever
64 * meaning is attached to them by the instruction that consumes them.
65 * However, the number of bits must match between producer and consumer.
66 * As a result, the number of bits does not need to be encoded here.
68 switch (nir_alu_type_get_base_type(type
)) {
69 case nir_type_int
: type_encoding
= 0; break;
70 case nir_type_uint
: type_encoding
= 1; break;
71 case nir_type_bool
: type_encoding
= 2; break;
72 case nir_type_float
: type_encoding
= 3; break;
73 default: unreachable("Invalid base type.");
76 return (void *)(ptr
| type_encoding
);
80 nir_alu_src_type(const nir_alu_instr
*instr
, unsigned src
)
82 return nir_alu_type_get_base_type(nir_op_infos
[instr
->op
].input_types
[src
]) |
83 nir_src_bit_size(instr
->src
[src
].src
);
86 static struct ssa_result_range
87 analyze_constant(const struct nir_alu_instr
*instr
, unsigned src
,
88 nir_alu_type use_type
)
90 uint8_t swizzle
[4] = { 0, 1, 2, 3 };
92 /* If the source is an explicitly sized source, then we need to reset
93 * both the number of components and the swizzle.
95 const unsigned num_components
= nir_ssa_alu_instr_src_components(instr
, src
);
97 for (unsigned i
= 0; i
< num_components
; ++i
)
98 swizzle
[i
] = instr
->src
[src
].swizzle
[i
];
100 const nir_load_const_instr
*const load
=
101 nir_instr_as_load_const(instr
->src
[src
].src
.ssa
->parent_instr
);
103 struct ssa_result_range r
= { unknown
, false };
105 switch (nir_alu_type_get_base_type(use_type
)) {
106 case nir_type_float
: {
107 double min_value
= DBL_MAX
;
108 double max_value
= -DBL_MAX
;
109 bool any_zero
= false;
110 bool all_zero
= true;
112 r
.is_integral
= true;
114 for (unsigned i
= 0; i
< num_components
; ++i
) {
115 const double v
= nir_const_value_as_float(load
->value
[swizzle
[i
]],
119 r
.is_integral
= false;
121 any_zero
= any_zero
|| (v
== 0.0);
122 all_zero
= all_zero
&& (v
== 0.0);
123 min_value
= MIN2(min_value
, v
);
124 max_value
= MAX2(max_value
, v
);
127 assert(any_zero
>= all_zero
);
128 assert(isnan(max_value
) || max_value
>= min_value
);
132 else if (min_value
> 0.0)
134 else if (min_value
== 0.0)
136 else if (max_value
< 0.0)
138 else if (max_value
== 0.0)
149 case nir_type_bool
: {
150 int64_t min_value
= INT_MAX
;
151 int64_t max_value
= INT_MIN
;
152 bool any_zero
= false;
153 bool all_zero
= true;
155 for (unsigned i
= 0; i
< num_components
; ++i
) {
156 const int64_t v
= nir_const_value_as_int(load
->value
[swizzle
[i
]],
159 any_zero
= any_zero
|| (v
== 0);
160 all_zero
= all_zero
&& (v
== 0);
161 min_value
= MIN2(min_value
, v
);
162 max_value
= MAX2(max_value
, v
);
165 assert(any_zero
>= all_zero
);
166 assert(max_value
>= min_value
);
170 else if (min_value
> 0)
172 else if (min_value
== 0)
174 else if (max_value
< 0)
176 else if (max_value
== 0)
186 case nir_type_uint
: {
187 bool any_zero
= false;
188 bool all_zero
= true;
190 for (unsigned i
= 0; i
< num_components
; ++i
) {
191 const uint64_t v
= nir_const_value_as_uint(load
->value
[swizzle
[i
]],
194 any_zero
= any_zero
|| (v
== 0);
195 all_zero
= all_zero
&& (v
== 0);
198 assert(any_zero
>= all_zero
);
211 unreachable("Invalid alu source type");
216 * Short-hand name for use in the tables in analyze_expression. If this name
217 * becomes a problem on some compiler, we can change it to _.
219 #define _______ unknown
222 #define ASSERT_TABLE_IS_COMMUTATIVE(t) \
224 for (unsigned r = 0; r < ARRAY_SIZE(t); r++) { \
225 for (unsigned c = 0; c < ARRAY_SIZE(t[0]); c++) \
226 assert(t[r][c] == t[c][r]); \
230 #define ASSERT_TABLE_IS_DIAGONAL(t) \
232 for (unsigned r = 0; r < ARRAY_SIZE(t); r++) \
233 assert(t[r][r] == r); \
236 static enum ssa_ranges
237 union_ranges(enum ssa_ranges a
, enum ssa_ranges b
)
239 static const enum ssa_ranges union_table
[last_range
+ 1][last_range
+ 1] = {
240 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
241 /* unknown */ { _______
, _______
, _______
, _______
, _______
, _______
, _______
},
242 /* lt_zero */ { _______
, lt_zero
, le_zero
, ne_zero
, _______
, ne_zero
, le_zero
},
243 /* le_zero */ { _______
, le_zero
, le_zero
, _______
, _______
, _______
, le_zero
},
244 /* gt_zero */ { _______
, ne_zero
, _______
, gt_zero
, ge_zero
, ne_zero
, ge_zero
},
245 /* ge_zero */ { _______
, _______
, _______
, ge_zero
, ge_zero
, _______
, ge_zero
},
246 /* ne_zero */ { _______
, ne_zero
, _______
, ne_zero
, _______
, ne_zero
, _______
},
247 /* eq_zero */ { _______
, le_zero
, le_zero
, ge_zero
, ge_zero
, _______
, eq_zero
},
250 ASSERT_TABLE_IS_COMMUTATIVE(union_table
);
251 ASSERT_TABLE_IS_DIAGONAL(union_table
);
253 return union_table
[a
][b
];
256 /* Verify that the 'unknown' entry in each row (or column) of the table is the
257 * union of all the other values in the row (or column).
259 #define ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(t) \
261 for (unsigned i = 0; i < last_range; i++) { \
262 enum ssa_ranges col_range = t[i][unknown + 1]; \
263 enum ssa_ranges row_range = t[unknown + 1][i]; \
265 for (unsigned j = unknown + 2; j < last_range; j++) { \
266 col_range = union_ranges(col_range, t[i][j]); \
267 row_range = union_ranges(row_range, t[j][i]); \
270 assert(col_range == t[i][unknown]); \
271 assert(row_range == t[unknown][i]); \
275 /* For most operations, the union of ranges for a strict inequality and
276 * equality should be the range of the non-strict inequality (e.g.,
277 * union_ranges(range(op(lt_zero), range(op(eq_zero))) == range(op(le_zero)).
279 * Does not apply to selection-like opcodes (bcsel, fmin, fmax, etc.).
281 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(t) \
283 assert(union_ranges(t[lt_zero], t[eq_zero]) == t[le_zero]); \
284 assert(union_ranges(t[gt_zero], t[eq_zero]) == t[ge_zero]); \
287 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(t) \
289 for (unsigned i = 0; i < last_range; i++) { \
290 assert(union_ranges(t[i][lt_zero], t[i][eq_zero]) == t[i][le_zero]); \
291 assert(union_ranges(t[i][gt_zero], t[i][eq_zero]) == t[i][ge_zero]); \
292 assert(union_ranges(t[lt_zero][i], t[eq_zero][i]) == t[le_zero][i]); \
293 assert(union_ranges(t[gt_zero][i], t[eq_zero][i]) == t[ge_zero][i]); \
297 /* Several other unordered tuples span the range of "everything." Each should
298 * have the same value as unknown: (lt_zero, ge_zero), (le_zero, gt_zero), and
299 * (eq_zero, ne_zero). union_ranges is already commutative, so only one
300 * ordering needs to be checked.
302 * Does not apply to selection-like opcodes (bcsel, fmin, fmax, etc.).
304 * In cases where this can be used, it is unnecessary to also use
305 * ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_*_SOURCE. For any range X,
306 * union_ranges(X, X) == X. The disjoint ranges cover all of the non-unknown
307 * possibilities, so the union of all the unions of disjoint ranges is
308 * equivalent to the union of "others."
310 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(t) \
312 assert(union_ranges(t[lt_zero], t[ge_zero]) == t[unknown]); \
313 assert(union_ranges(t[le_zero], t[gt_zero]) == t[unknown]); \
314 assert(union_ranges(t[eq_zero], t[ne_zero]) == t[unknown]); \
317 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(t) \
319 for (unsigned i = 0; i < last_range; i++) { \
320 assert(union_ranges(t[i][lt_zero], t[i][ge_zero]) == \
322 assert(union_ranges(t[i][le_zero], t[i][gt_zero]) == \
324 assert(union_ranges(t[i][eq_zero], t[i][ne_zero]) == \
327 assert(union_ranges(t[lt_zero][i], t[ge_zero][i]) == \
329 assert(union_ranges(t[le_zero][i], t[gt_zero][i]) == \
331 assert(union_ranges(t[eq_zero][i], t[ne_zero][i]) == \
337 #define ASSERT_TABLE_IS_COMMUTATIVE(t)
338 #define ASSERT_TABLE_IS_DIAGONAL(t)
339 #define ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(t)
340 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(t)
341 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(t)
342 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(t)
343 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(t)
347 * Analyze an expression to determine the range of its result
349 * The end result of this analysis is a token that communicates something
350 * about the range of values. There's an implicit grammar that produces
351 * tokens from sequences of literal values, other tokens, and operations.
352 * This function implements this grammar as a recursive-descent parser. Some
353 * (but not all) of the grammar is listed in-line in the function.
355 static struct ssa_result_range
356 analyze_expression(const nir_alu_instr
*instr
, unsigned src
,
357 struct hash_table
*ht
, nir_alu_type use_type
)
359 if (!instr
->src
[src
].src
.is_ssa
)
360 return (struct ssa_result_range
){unknown
, false};
362 if (nir_src_is_const(instr
->src
[src
].src
))
363 return analyze_constant(instr
, src
, use_type
);
365 if (instr
->src
[src
].src
.ssa
->parent_instr
->type
!= nir_instr_type_alu
)
366 return (struct ssa_result_range
){unknown
, false};
368 const struct nir_alu_instr
*const alu
=
369 nir_instr_as_alu(instr
->src
[src
].src
.ssa
->parent_instr
);
371 /* Bail if the type of the instruction generating the value does not match
372 * the type the value will be interpreted as. int/uint/bool can be
373 * reinterpreted trivially. The most important cases are between float and
376 if (alu
->op
!= nir_op_mov
&& alu
->op
!= nir_op_bcsel
) {
377 const nir_alu_type use_base_type
=
378 nir_alu_type_get_base_type(use_type
);
379 const nir_alu_type src_base_type
=
380 nir_alu_type_get_base_type(nir_op_infos
[alu
->op
].output_type
);
382 if (use_base_type
!= src_base_type
&&
383 (use_base_type
== nir_type_float
||
384 src_base_type
== nir_type_float
)) {
385 return (struct ssa_result_range
){unknown
, false};
389 struct hash_entry
*he
= _mesa_hash_table_search(ht
, pack_key(alu
, use_type
));
391 return unpack_data(he
->data
);
393 struct ssa_result_range r
= {unknown
, false};
395 /* ge_zero: ge_zero + ge_zero
397 * gt_zero: gt_zero + eq_zero
398 * | gt_zero + ge_zero
399 * | eq_zero + gt_zero # Addition is commutative
400 * | ge_zero + gt_zero # Addition is commutative
401 * | gt_zero + gt_zero
404 * le_zero: le_zero + le_zero
406 * lt_zero: lt_zero + eq_zero
407 * | lt_zero + le_zero
408 * | eq_zero + lt_zero # Addition is commutative
409 * | le_zero + lt_zero # Addition is commutative
410 * | lt_zero + lt_zero
413 * ne_zero: eq_zero + ne_zero
414 * | ne_zero + eq_zero # Addition is commutative
417 * eq_zero: eq_zero + eq_zero
420 * All other cases are 'unknown'. The seeming odd entry is (ne_zero,
421 * ne_zero), but that could be (-5, +5) which is not ne_zero.
423 static const enum ssa_ranges fadd_table
[last_range
+ 1][last_range
+ 1] = {
424 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
425 /* unknown */ { _______
, _______
, _______
, _______
, _______
, _______
, _______
},
426 /* lt_zero */ { _______
, lt_zero
, lt_zero
, _______
, _______
, _______
, lt_zero
},
427 /* le_zero */ { _______
, lt_zero
, le_zero
, _______
, _______
, _______
, le_zero
},
428 /* gt_zero */ { _______
, _______
, _______
, gt_zero
, gt_zero
, _______
, gt_zero
},
429 /* ge_zero */ { _______
, _______
, _______
, gt_zero
, ge_zero
, _______
, ge_zero
},
430 /* ne_zero */ { _______
, _______
, _______
, _______
, _______
, _______
, ne_zero
},
431 /* eq_zero */ { _______
, lt_zero
, le_zero
, gt_zero
, ge_zero
, ne_zero
, eq_zero
},
434 ASSERT_TABLE_IS_COMMUTATIVE(fadd_table
);
435 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(fadd_table
);
436 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(fadd_table
);
438 /* Due to flush-to-zero semanatics of floating-point numbers with very
439 * small mangnitudes, we can never really be sure a result will be
442 * ge_zero: ge_zero * ge_zero
443 * | ge_zero * gt_zero
444 * | ge_zero * eq_zero
445 * | le_zero * lt_zero
446 * | lt_zero * le_zero # Multiplication is commutative
447 * | le_zero * le_zero
448 * | gt_zero * ge_zero # Multiplication is commutative
449 * | eq_zero * ge_zero # Multiplication is commutative
450 * | a * a # Left source == right source
451 * | gt_zero * gt_zero
452 * | lt_zero * lt_zero
455 * le_zero: ge_zero * le_zero
456 * | ge_zero * lt_zero
457 * | lt_zero * ge_zero # Multiplication is commutative
458 * | le_zero * ge_zero # Multiplication is commutative
459 * | le_zero * gt_zero
460 * | lt_zero * gt_zero
461 * | gt_zero * lt_zero # Multiplication is commutative
464 * eq_zero: eq_zero * <any>
465 * <any> * eq_zero # Multiplication is commutative
467 * All other cases are 'unknown'.
469 static const enum ssa_ranges fmul_table
[last_range
+ 1][last_range
+ 1] = {
470 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
471 /* unknown */ { _______
, _______
, _______
, _______
, _______
, _______
, eq_zero
},
472 /* lt_zero */ { _______
, ge_zero
, ge_zero
, le_zero
, le_zero
, _______
, eq_zero
},
473 /* le_zero */ { _______
, ge_zero
, ge_zero
, le_zero
, le_zero
, _______
, eq_zero
},
474 /* gt_zero */ { _______
, le_zero
, le_zero
, ge_zero
, ge_zero
, _______
, eq_zero
},
475 /* ge_zero */ { _______
, le_zero
, le_zero
, ge_zero
, ge_zero
, _______
, eq_zero
},
476 /* ne_zero */ { _______
, _______
, _______
, _______
, _______
, _______
, eq_zero
},
477 /* eq_zero */ { eq_zero
, eq_zero
, eq_zero
, eq_zero
, eq_zero
, eq_zero
, eq_zero
}
480 ASSERT_TABLE_IS_COMMUTATIVE(fmul_table
);
481 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(fmul_table
);
482 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(fmul_table
);
484 static const enum ssa_ranges fneg_table
[last_range
+ 1] = {
485 /* unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
486 _______
, gt_zero
, ge_zero
, lt_zero
, le_zero
, ne_zero
, eq_zero
489 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(fneg_table
);
490 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(fneg_table
);
496 r
= (struct ssa_result_range
){ge_zero
, alu
->op
== nir_op_b2f32
};
500 const struct ssa_result_range left
=
501 analyze_expression(alu
, 1, ht
, use_type
);
502 const struct ssa_result_range right
=
503 analyze_expression(alu
, 2, ht
, use_type
);
505 r
.is_integral
= left
.is_integral
&& right
.is_integral
;
507 /* le_zero: bcsel(<any>, le_zero, lt_zero)
508 * | bcsel(<any>, eq_zero, lt_zero)
509 * | bcsel(<any>, le_zero, eq_zero)
510 * | bcsel(<any>, lt_zero, le_zero)
511 * | bcsel(<any>, lt_zero, eq_zero)
512 * | bcsel(<any>, eq_zero, le_zero)
513 * | bcsel(<any>, le_zero, le_zero)
516 * lt_zero: bcsel(<any>, lt_zero, lt_zero)
519 * ge_zero: bcsel(<any>, ge_zero, ge_zero)
520 * | bcsel(<any>, ge_zero, gt_zero)
521 * | bcsel(<any>, ge_zero, eq_zero)
522 * | bcsel(<any>, gt_zero, ge_zero)
523 * | bcsel(<any>, eq_zero, ge_zero)
526 * gt_zero: bcsel(<any>, gt_zero, gt_zero)
529 * ne_zero: bcsel(<any>, ne_zero, gt_zero)
530 * | bcsel(<any>, ne_zero, lt_zero)
531 * | bcsel(<any>, gt_zero, lt_zero)
532 * | bcsel(<any>, gt_zero, ne_zero)
533 * | bcsel(<any>, lt_zero, ne_zero)
534 * | bcsel(<any>, lt_zero, gt_zero)
535 * | bcsel(<any>, ne_zero, ne_zero)
538 * eq_zero: bcsel(<any>, eq_zero, eq_zero)
541 * All other cases are 'unknown'.
543 * The ranges could be tightened if the range of the first source is
544 * known. However, opt_algebraic will (eventually) elminiate the bcsel
545 * if the condition is known.
547 static const enum ssa_ranges table
[last_range
+ 1][last_range
+ 1] = {
548 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
549 /* unknown */ { _______
, _______
, _______
, _______
, _______
, _______
, _______
},
550 /* lt_zero */ { _______
, lt_zero
, le_zero
, ne_zero
, _______
, ne_zero
, le_zero
},
551 /* le_zero */ { _______
, le_zero
, le_zero
, _______
, _______
, _______
, le_zero
},
552 /* gt_zero */ { _______
, ne_zero
, _______
, gt_zero
, ge_zero
, ne_zero
, ge_zero
},
553 /* ge_zero */ { _______
, _______
, _______
, ge_zero
, ge_zero
, _______
, ge_zero
},
554 /* ne_zero */ { _______
, ne_zero
, _______
, ne_zero
, _______
, ne_zero
, _______
},
555 /* eq_zero */ { _______
, le_zero
, le_zero
, ge_zero
, ge_zero
, _______
, eq_zero
},
558 ASSERT_TABLE_IS_COMMUTATIVE(table
);
559 ASSERT_TABLE_IS_DIAGONAL(table
);
560 ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(table
);
562 r
.range
= table
[left
.range
][right
.range
];
568 r
= analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
570 r
.is_integral
= true;
572 if (r
.range
== unknown
&& alu
->op
== nir_op_u2f32
)
578 r
= analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
600 const struct ssa_result_range left
=
601 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
602 const struct ssa_result_range right
=
603 analyze_expression(alu
, 1, ht
, nir_alu_src_type(alu
, 1));
605 r
.is_integral
= left
.is_integral
&& right
.is_integral
;
606 r
.range
= fadd_table
[left
.range
][right
.range
];
611 /* If the parameter might be less than zero, the mathematically result
612 * will be on (0, 1). For sufficiently large magnitude negative
613 * parameters, the result will flush to zero.
615 static const enum ssa_ranges table
[last_range
+ 1] = {
616 /* unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
617 ge_zero
, ge_zero
, ge_zero
, gt_zero
, gt_zero
, ge_zero
, gt_zero
620 r
= analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
622 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(table
);
623 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(table
);
625 r
.is_integral
= r
.is_integral
&& is_not_negative(r
.range
);
626 r
.range
= table
[r
.range
];
631 const struct ssa_result_range left
=
632 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
633 const struct ssa_result_range right
=
634 analyze_expression(alu
, 1, ht
, nir_alu_src_type(alu
, 1));
636 r
.is_integral
= left
.is_integral
&& right
.is_integral
;
638 /* gt_zero: fmax(gt_zero, *)
639 * | fmax(*, gt_zero) # Treat fmax as commutative
642 * ge_zero: fmax(ge_zero, ne_zero)
643 * | fmax(ge_zero, lt_zero)
644 * | fmax(ge_zero, le_zero)
645 * | fmax(ge_zero, eq_zero)
646 * | fmax(ne_zero, ge_zero) # Treat fmax as commutative
647 * | fmax(lt_zero, ge_zero) # Treat fmax as commutative
648 * | fmax(le_zero, ge_zero) # Treat fmax as commutative
649 * | fmax(eq_zero, ge_zero) # Treat fmax as commutative
650 * | fmax(ge_zero, ge_zero)
653 * le_zero: fmax(le_zero, lt_zero)
654 * | fmax(lt_zero, le_zero) # Treat fmax as commutative
655 * | fmax(le_zero, le_zero)
658 * lt_zero: fmax(lt_zero, lt_zero)
661 * ne_zero: fmax(ne_zero, lt_zero)
662 * | fmax(lt_zero, ne_zero) # Treat fmax as commutative
663 * | fmax(ne_zero, ne_zero)
666 * eq_zero: fmax(eq_zero, le_zero)
667 * | fmax(eq_zero, lt_zero)
668 * | fmax(le_zero, eq_zero) # Treat fmax as commutative
669 * | fmax(lt_zero, eq_zero) # Treat fmax as commutative
670 * | fmax(eq_zero, eq_zero)
673 * All other cases are 'unknown'.
675 static const enum ssa_ranges table
[last_range
+ 1][last_range
+ 1] = {
676 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
677 /* unknown */ { _______
, _______
, _______
, gt_zero
, ge_zero
, _______
, _______
},
678 /* lt_zero */ { _______
, lt_zero
, le_zero
, gt_zero
, ge_zero
, ne_zero
, eq_zero
},
679 /* le_zero */ { _______
, le_zero
, le_zero
, gt_zero
, ge_zero
, _______
, eq_zero
},
680 /* gt_zero */ { gt_zero
, gt_zero
, gt_zero
, gt_zero
, gt_zero
, gt_zero
, gt_zero
},
681 /* ge_zero */ { ge_zero
, ge_zero
, ge_zero
, gt_zero
, ge_zero
, ge_zero
, ge_zero
},
682 /* ne_zero */ { _______
, ne_zero
, _______
, gt_zero
, ge_zero
, ne_zero
, _______
},
683 /* eq_zero */ { _______
, eq_zero
, eq_zero
, gt_zero
, ge_zero
, _______
, eq_zero
}
686 /* Treat fmax as commutative. */
687 ASSERT_TABLE_IS_COMMUTATIVE(table
);
688 ASSERT_TABLE_IS_DIAGONAL(table
);
689 ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(table
);
691 r
.range
= table
[left
.range
][right
.range
];
696 const struct ssa_result_range left
=
697 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
698 const struct ssa_result_range right
=
699 analyze_expression(alu
, 1, ht
, nir_alu_src_type(alu
, 1));
701 r
.is_integral
= left
.is_integral
&& right
.is_integral
;
703 /* lt_zero: fmin(lt_zero, *)
704 * | fmin(*, lt_zero) # Treat fmin as commutative
707 * le_zero: fmin(le_zero, ne_zero)
708 * | fmin(le_zero, gt_zero)
709 * | fmin(le_zero, ge_zero)
710 * | fmin(le_zero, eq_zero)
711 * | fmin(ne_zero, le_zero) # Treat fmin as commutative
712 * | fmin(gt_zero, le_zero) # Treat fmin as commutative
713 * | fmin(ge_zero, le_zero) # Treat fmin as commutative
714 * | fmin(eq_zero, le_zero) # Treat fmin as commutative
715 * | fmin(le_zero, le_zero)
718 * ge_zero: fmin(ge_zero, gt_zero)
719 * | fmin(gt_zero, ge_zero) # Treat fmin as commutative
720 * | fmin(ge_zero, ge_zero)
723 * gt_zero: fmin(gt_zero, gt_zero)
726 * ne_zero: fmin(ne_zero, gt_zero)
727 * | fmin(gt_zero, ne_zero) # Treat fmin as commutative
728 * | fmin(ne_zero, ne_zero)
731 * eq_zero: fmin(eq_zero, ge_zero)
732 * | fmin(eq_zero, gt_zero)
733 * | fmin(ge_zero, eq_zero) # Treat fmin as commutative
734 * | fmin(gt_zero, eq_zero) # Treat fmin as commutative
735 * | fmin(eq_zero, eq_zero)
738 * All other cases are 'unknown'.
740 static const enum ssa_ranges table
[last_range
+ 1][last_range
+ 1] = {
741 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
742 /* unknown */ { _______
, lt_zero
, le_zero
, _______
, _______
, _______
, _______
},
743 /* lt_zero */ { lt_zero
, lt_zero
, lt_zero
, lt_zero
, lt_zero
, lt_zero
, lt_zero
},
744 /* le_zero */ { le_zero
, lt_zero
, le_zero
, le_zero
, le_zero
, le_zero
, le_zero
},
745 /* gt_zero */ { _______
, lt_zero
, le_zero
, gt_zero
, ge_zero
, ne_zero
, eq_zero
},
746 /* ge_zero */ { _______
, lt_zero
, le_zero
, ge_zero
, ge_zero
, _______
, eq_zero
},
747 /* ne_zero */ { _______
, lt_zero
, le_zero
, ne_zero
, _______
, ne_zero
, _______
},
748 /* eq_zero */ { _______
, lt_zero
, le_zero
, eq_zero
, eq_zero
, _______
, eq_zero
}
751 /* Treat fmin as commutative. */
752 ASSERT_TABLE_IS_COMMUTATIVE(table
);
753 ASSERT_TABLE_IS_DIAGONAL(table
);
754 ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(table
);
756 r
.range
= table
[left
.range
][right
.range
];
761 const struct ssa_result_range left
=
762 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
763 const struct ssa_result_range right
=
764 analyze_expression(alu
, 1, ht
, nir_alu_src_type(alu
, 1));
766 r
.is_integral
= left
.is_integral
&& right
.is_integral
;
768 /* x * x => ge_zero */
769 if (left
.range
!= eq_zero
&& nir_alu_srcs_equal(alu
, alu
, 0, 1)) {
770 /* Even if x > 0, the result of x*x can be zero when x is, for
771 * example, a subnormal number.
774 } else if (left
.range
!= eq_zero
&& nir_alu_srcs_negative_equal(alu
, alu
, 0, 1)) {
775 /* -x * x => le_zero. */
778 r
.range
= fmul_table
[left
.range
][right
.range
];
784 r
= (struct ssa_result_range
){
785 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0)).range
,
791 r
= analyze_expression(alu
, 0, ht
, use_type
);
795 r
= analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
797 r
.range
= fneg_table
[r
.range
];
801 r
= analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
807 r
.is_integral
= true;
811 assert(r
.is_integral
);
814 /* The fsat doesn't add any information in these cases. */
819 /* Since the result must be in [0, 1], the value must be >= 0. */
826 r
= (struct ssa_result_range
){
827 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0)).range
,
834 r
= (struct ssa_result_range
){ge_zero
, false};
837 case nir_op_ffloor
: {
838 const struct ssa_result_range left
=
839 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
841 r
.is_integral
= true;
843 if (left
.is_integral
|| left
.range
== le_zero
|| left
.range
== lt_zero
)
844 r
.range
= left
.range
;
845 else if (left
.range
== ge_zero
|| left
.range
== gt_zero
)
847 else if (left
.range
== ne_zero
)
854 const struct ssa_result_range left
=
855 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
857 r
.is_integral
= true;
859 if (left
.is_integral
|| left
.range
== ge_zero
|| left
.range
== gt_zero
)
860 r
.range
= left
.range
;
861 else if (left
.range
== le_zero
|| left
.range
== lt_zero
)
863 else if (left
.range
== ne_zero
)
869 case nir_op_ftrunc
: {
870 const struct ssa_result_range left
=
871 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
873 r
.is_integral
= true;
875 if (left
.is_integral
)
876 r
.range
= left
.range
;
877 else if (left
.range
== ge_zero
|| left
.range
== gt_zero
)
879 else if (left
.range
== le_zero
|| left
.range
== lt_zero
)
881 else if (left
.range
== ne_zero
)
897 /* Boolean results are 0 or -1. */
898 r
= (struct ssa_result_range
){le_zero
, false};
902 /* Due to flush-to-zero semanatics of floating-point numbers with very
903 * small mangnitudes, we can never really be sure a result will be
906 * NIR uses pow() and powf() to constant evaluate nir_op_fpow. The man
907 * page for that function says:
909 * If y is 0, the result is 1.0 (even if x is a NaN).
911 * gt_zero: pow(*, eq_zero)
912 * | pow(eq_zero, lt_zero) # 0^-y = +inf
913 * | pow(eq_zero, le_zero) # 0^-y = +inf or 0^0 = 1.0
916 * eq_zero: pow(eq_zero, gt_zero)
919 * ge_zero: pow(gt_zero, gt_zero)
920 * | pow(gt_zero, ge_zero)
921 * | pow(gt_zero, lt_zero)
922 * | pow(gt_zero, le_zero)
923 * | pow(gt_zero, ne_zero)
924 * | pow(gt_zero, unknown)
925 * | pow(ge_zero, gt_zero)
926 * | pow(ge_zero, ge_zero)
927 * | pow(ge_zero, lt_zero)
928 * | pow(ge_zero, le_zero)
929 * | pow(ge_zero, ne_zero)
930 * | pow(ge_zero, unknown)
931 * | pow(eq_zero, ge_zero) # 0^0 = 1.0 or 0^+y = 0.0
932 * | pow(eq_zero, ne_zero) # 0^-y = +inf or 0^+y = 0.0
933 * | pow(eq_zero, unknown) # union of all other y cases
936 * All other cases are unknown.
938 * We could do better if the right operand is a constant, integral
941 static const enum ssa_ranges table
[last_range
+ 1][last_range
+ 1] = {
942 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
943 /* unknown */ { _______
, _______
, _______
, _______
, _______
, _______
, gt_zero
},
944 /* lt_zero */ { _______
, _______
, _______
, _______
, _______
, _______
, gt_zero
},
945 /* le_zero */ { _______
, _______
, _______
, _______
, _______
, _______
, gt_zero
},
946 /* gt_zero */ { ge_zero
, ge_zero
, ge_zero
, ge_zero
, ge_zero
, ge_zero
, gt_zero
},
947 /* ge_zero */ { ge_zero
, ge_zero
, ge_zero
, ge_zero
, ge_zero
, ge_zero
, gt_zero
},
948 /* ne_zero */ { _______
, _______
, _______
, _______
, _______
, _______
, gt_zero
},
949 /* eq_zero */ { ge_zero
, gt_zero
, gt_zero
, eq_zero
, ge_zero
, ge_zero
, gt_zero
},
952 const struct ssa_result_range left
=
953 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
954 const struct ssa_result_range right
=
955 analyze_expression(alu
, 1, ht
, nir_alu_src_type(alu
, 1));
957 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(table
);
958 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(table
);
960 r
.is_integral
= left
.is_integral
&& right
.is_integral
&&
961 is_not_negative(right
.range
);
962 r
.range
= table
[left
.range
][right
.range
];
967 const struct ssa_result_range first
=
968 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
969 const struct ssa_result_range second
=
970 analyze_expression(alu
, 1, ht
, nir_alu_src_type(alu
, 1));
971 const struct ssa_result_range third
=
972 analyze_expression(alu
, 2, ht
, nir_alu_src_type(alu
, 2));
974 r
.is_integral
= first
.is_integral
&& second
.is_integral
&&
977 enum ssa_ranges fmul_range
;
979 if (first
.range
!= eq_zero
&& nir_alu_srcs_equal(alu
, alu
, 0, 1)) {
980 /* See handling of nir_op_fmul for explanation of why ge_zero is the
983 fmul_range
= ge_zero
;
984 } else if (first
.range
!= eq_zero
&& nir_alu_srcs_negative_equal(alu
, alu
, 0, 1)) {
985 /* -x * x => le_zero */
986 fmul_range
= le_zero
;
988 fmul_range
= fmul_table
[first
.range
][second
.range
];
990 r
.range
= fadd_table
[fmul_range
][third
.range
];
995 const struct ssa_result_range first
=
996 analyze_expression(alu
, 0, ht
, nir_alu_src_type(alu
, 0));
997 const struct ssa_result_range second
=
998 analyze_expression(alu
, 1, ht
, nir_alu_src_type(alu
, 1));
999 const struct ssa_result_range third
=
1000 analyze_expression(alu
, 2, ht
, nir_alu_src_type(alu
, 2));
1002 r
.is_integral
= first
.is_integral
&& second
.is_integral
&&
1005 /* Decompose the flrp to first + third * (second + -first) */
1006 const enum ssa_ranges inner_fadd_range
=
1007 fadd_table
[second
.range
][fneg_table
[first
.range
]];
1009 const enum ssa_ranges fmul_range
=
1010 fmul_table
[third
.range
][inner_fadd_range
];
1012 r
.range
= fadd_table
[first
.range
][fmul_range
];
1017 r
= (struct ssa_result_range
){unknown
, false};
1021 if (r
.range
== eq_zero
)
1022 r
.is_integral
= true;
1024 _mesa_hash_table_insert(ht
, pack_key(alu
, use_type
), pack_data(r
));
1030 struct ssa_result_range
1031 nir_analyze_range(struct hash_table
*range_ht
,
1032 const nir_alu_instr
*instr
, unsigned src
)
1034 return analyze_expression(instr
, src
, range_ht
,
1035 nir_alu_src_type(instr
, src
));