2 * Copyright © 2018 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 #include "nir_range_analysis.h"
27 #include "util/hash_table.h"
30 * Analyzes a sequence of operations to determine some aspects of the range of
35 is_not_negative(enum ssa_ranges r
)
37 return r
== gt_zero
|| r
== ge_zero
|| r
== eq_zero
;
41 pack_data(const struct ssa_result_range r
)
43 return (void *)(uintptr_t)(r
.range
| r
.is_integral
<< 8);
46 static struct ssa_result_range
47 unpack_data(const void *p
)
49 const uintptr_t v
= (uintptr_t) p
;
51 return (struct ssa_result_range
){v
& 0xff, (v
& 0x0ff00) != 0};
54 static struct ssa_result_range
55 analyze_constant(const struct nir_alu_instr
*instr
, unsigned src
)
57 uint8_t swizzle
[4] = { 0, 1, 2, 3 };
59 /* If the source is an explicitly sized source, then we need to reset
60 * both the number of components and the swizzle.
62 const unsigned num_components
= nir_ssa_alu_instr_src_components(instr
, src
);
64 for (unsigned i
= 0; i
< num_components
; ++i
)
65 swizzle
[i
] = instr
->src
[src
].swizzle
[i
];
67 const nir_load_const_instr
*const load
=
68 nir_instr_as_load_const(instr
->src
[src
].src
.ssa
->parent_instr
);
70 struct ssa_result_range r
= { unknown
, false };
72 switch (nir_op_infos
[instr
->op
].input_types
[src
]) {
73 case nir_type_float
: {
74 double min_value
= DBL_MAX
;
75 double max_value
= -DBL_MAX
;
76 bool any_zero
= false;
81 for (unsigned i
= 0; i
< num_components
; ++i
) {
82 const double v
= nir_const_value_as_float(load
->value
[swizzle
[i
]],
86 r
.is_integral
= false;
88 any_zero
= any_zero
|| (v
== 0.0);
89 all_zero
= all_zero
&& (v
== 0.0);
90 min_value
= MIN2(min_value
, v
);
91 max_value
= MAX2(max_value
, v
);
94 assert(any_zero
>= all_zero
);
95 assert(isnan(max_value
) || max_value
>= min_value
);
99 else if (min_value
> 0.0)
101 else if (min_value
== 0.0)
103 else if (max_value
< 0.0)
105 else if (max_value
== 0.0)
116 case nir_type_bool
: {
117 int64_t min_value
= INT_MAX
;
118 int64_t max_value
= INT_MIN
;
119 bool any_zero
= false;
120 bool all_zero
= true;
122 for (unsigned i
= 0; i
< num_components
; ++i
) {
123 const int64_t v
= nir_const_value_as_int(load
->value
[swizzle
[i
]],
126 any_zero
= any_zero
|| (v
== 0);
127 all_zero
= all_zero
&& (v
== 0);
128 min_value
= MIN2(min_value
, v
);
129 max_value
= MAX2(max_value
, v
);
132 assert(any_zero
>= all_zero
);
133 assert(max_value
>= min_value
);
137 else if (min_value
> 0)
139 else if (min_value
== 0)
141 else if (max_value
< 0)
143 else if (max_value
== 0)
153 case nir_type_uint
: {
154 bool any_zero
= false;
155 bool all_zero
= true;
157 for (unsigned i
= 0; i
< num_components
; ++i
) {
158 const uint64_t v
= nir_const_value_as_uint(load
->value
[swizzle
[i
]],
161 any_zero
= any_zero
|| (v
== 0);
162 all_zero
= all_zero
&& (v
== 0);
165 assert(any_zero
>= all_zero
);
178 unreachable("Invalid alu source type");
183 * Short-hand name for use in the tables in analyze_expression. If this name
184 * becomes a problem on some compiler, we can change it to _.
186 #define _______ unknown
189 #define ASSERT_TABLE_IS_COMMUTATIVE(t) \
191 for (unsigned r = 0; r < ARRAY_SIZE(t); r++) { \
192 for (unsigned c = 0; c < ARRAY_SIZE(t[0]); c++) \
193 assert(t[r][c] == t[c][r]); \
197 #define ASSERT_TABLE_IS_DIAGONAL(t) \
199 for (unsigned r = 0; r < ARRAY_SIZE(t); r++) \
200 assert(t[r][r] == r); \
203 static enum ssa_ranges
204 union_ranges(enum ssa_ranges a
, enum ssa_ranges b
)
206 static const enum ssa_ranges union_table
[last_range
+ 1][last_range
+ 1] = {
207 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
208 /* unknown */ { _______
, _______
, _______
, _______
, _______
, _______
, _______
},
209 /* lt_zero */ { _______
, lt_zero
, le_zero
, ne_zero
, _______
, ne_zero
, le_zero
},
210 /* le_zero */ { _______
, le_zero
, le_zero
, _______
, _______
, _______
, le_zero
},
211 /* gt_zero */ { _______
, ne_zero
, _______
, gt_zero
, ge_zero
, ne_zero
, ge_zero
},
212 /* ge_zero */ { _______
, _______
, _______
, ge_zero
, ge_zero
, _______
, ge_zero
},
213 /* ne_zero */ { _______
, ne_zero
, _______
, ne_zero
, _______
, ne_zero
, _______
},
214 /* eq_zero */ { _______
, le_zero
, le_zero
, ge_zero
, ge_zero
, _______
, eq_zero
},
217 ASSERT_TABLE_IS_COMMUTATIVE(union_table
);
218 ASSERT_TABLE_IS_DIAGONAL(union_table
);
220 return union_table
[a
][b
];
223 /* Verify that the 'unknown' entry in each row (or column) of the table is the
224 * union of all the other values in the row (or column).
226 #define ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(t) \
228 for (unsigned i = 0; i < last_range; i++) { \
229 enum ssa_ranges col_range = t[i][unknown + 1]; \
230 enum ssa_ranges row_range = t[unknown + 1][i]; \
232 for (unsigned j = unknown + 2; j < last_range; j++) { \
233 col_range = union_ranges(col_range, t[i][j]); \
234 row_range = union_ranges(row_range, t[j][i]); \
237 assert(col_range == t[i][unknown]); \
238 assert(row_range == t[unknown][i]); \
242 /* For most operations, the union of ranges for a strict inequality and
243 * equality should be the range of the non-strict inequality (e.g.,
244 * union_ranges(range(op(lt_zero), range(op(eq_zero))) == range(op(le_zero)).
246 * Does not apply to selection-like opcodes (bcsel, fmin, fmax, etc.).
248 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(t) \
250 assert(union_ranges(t[lt_zero], t[eq_zero]) == t[le_zero]); \
251 assert(union_ranges(t[gt_zero], t[eq_zero]) == t[ge_zero]); \
254 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(t) \
256 for (unsigned i = 0; i < last_range; i++) { \
257 assert(union_ranges(t[i][lt_zero], t[i][eq_zero]) == t[i][le_zero]); \
258 assert(union_ranges(t[i][gt_zero], t[i][eq_zero]) == t[i][ge_zero]); \
259 assert(union_ranges(t[lt_zero][i], t[eq_zero][i]) == t[le_zero][i]); \
260 assert(union_ranges(t[gt_zero][i], t[eq_zero][i]) == t[ge_zero][i]); \
264 /* Several other unordered tuples span the range of "everything." Each should
265 * have the same value as unknown: (lt_zero, ge_zero), (le_zero, gt_zero), and
266 * (eq_zero, ne_zero). union_ranges is already commutative, so only one
267 * ordering needs to be checked.
269 * Does not apply to selection-like opcodes (bcsel, fmin, fmax, etc.).
271 * In cases where this can be used, it is unnecessary to also use
272 * ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_*_SOURCE. For any range X,
273 * union_ranges(X, X) == X. The disjoint ranges cover all of the non-unknown
274 * possibilities, so the union of all the unions of disjoint ranges is
275 * equivalent to the union of "others."
277 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(t) \
279 assert(union_ranges(t[lt_zero], t[ge_zero]) == t[unknown]); \
280 assert(union_ranges(t[le_zero], t[gt_zero]) == t[unknown]); \
281 assert(union_ranges(t[eq_zero], t[ne_zero]) == t[unknown]); \
284 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(t) \
286 for (unsigned i = 0; i < last_range; i++) { \
287 assert(union_ranges(t[i][lt_zero], t[i][ge_zero]) == \
289 assert(union_ranges(t[i][le_zero], t[i][gt_zero]) == \
291 assert(union_ranges(t[i][eq_zero], t[i][ne_zero]) == \
294 assert(union_ranges(t[lt_zero][i], t[ge_zero][i]) == \
296 assert(union_ranges(t[le_zero][i], t[gt_zero][i]) == \
298 assert(union_ranges(t[eq_zero][i], t[ne_zero][i]) == \
304 #define ASSERT_TABLE_IS_COMMUTATIVE(t)
305 #define ASSERT_TABLE_IS_DIAGONAL(t)
306 #define ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(t)
307 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(t)
308 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(t)
309 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(t)
310 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(t)
314 * Analyze an expression to determine the range of its result
316 * The end result of this analysis is a token that communicates something
317 * about the range of values. There's an implicit grammar that produces
318 * tokens from sequences of literal values, other tokens, and operations.
319 * This function implements this grammar as a recursive-descent parser. Some
320 * (but not all) of the grammar is listed in-line in the function.
322 static struct ssa_result_range
323 analyze_expression(const nir_alu_instr
*instr
, unsigned src
,
324 struct hash_table
*ht
)
326 if (!instr
->src
[src
].src
.is_ssa
)
327 return (struct ssa_result_range
){unknown
, false};
329 if (nir_src_is_const(instr
->src
[src
].src
))
330 return analyze_constant(instr
, src
);
332 if (instr
->src
[src
].src
.ssa
->parent_instr
->type
!= nir_instr_type_alu
)
333 return (struct ssa_result_range
){unknown
, false};
335 const struct nir_alu_instr
*const alu
=
336 nir_instr_as_alu(instr
->src
[src
].src
.ssa
->parent_instr
);
338 struct hash_entry
*he
= _mesa_hash_table_search(ht
, alu
);
340 return unpack_data(he
->data
);
342 struct ssa_result_range r
= {unknown
, false};
344 /* ge_zero: ge_zero + ge_zero
346 * gt_zero: gt_zero + eq_zero
347 * | gt_zero + ge_zero
348 * | eq_zero + gt_zero # Addition is commutative
349 * | ge_zero + gt_zero # Addition is commutative
350 * | gt_zero + gt_zero
353 * le_zero: le_zero + le_zero
355 * lt_zero: lt_zero + eq_zero
356 * | lt_zero + le_zero
357 * | eq_zero + lt_zero # Addition is commutative
358 * | le_zero + lt_zero # Addition is commutative
359 * | lt_zero + lt_zero
362 * ne_zero: eq_zero + ne_zero
363 * | ne_zero + eq_zero # Addition is commutative
366 * eq_zero: eq_zero + eq_zero
369 * All other cases are 'unknown'. The seeming odd entry is (ne_zero,
370 * ne_zero), but that could be (-5, +5) which is not ne_zero.
372 static const enum ssa_ranges fadd_table
[last_range
+ 1][last_range
+ 1] = {
373 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
374 /* unknown */ { _______
, _______
, _______
, _______
, _______
, _______
, _______
},
375 /* lt_zero */ { _______
, lt_zero
, lt_zero
, _______
, _______
, _______
, lt_zero
},
376 /* le_zero */ { _______
, lt_zero
, le_zero
, _______
, _______
, _______
, le_zero
},
377 /* gt_zero */ { _______
, _______
, _______
, gt_zero
, gt_zero
, _______
, gt_zero
},
378 /* ge_zero */ { _______
, _______
, _______
, gt_zero
, ge_zero
, _______
, ge_zero
},
379 /* ne_zero */ { _______
, _______
, _______
, _______
, _______
, _______
, ne_zero
},
380 /* eq_zero */ { _______
, lt_zero
, le_zero
, gt_zero
, ge_zero
, ne_zero
, eq_zero
},
383 ASSERT_TABLE_IS_COMMUTATIVE(fadd_table
);
384 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(fadd_table
);
385 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(fadd_table
);
387 /* Due to flush-to-zero semanatics of floating-point numbers with very
388 * small mangnitudes, we can never really be sure a result will be
391 * ge_zero: ge_zero * ge_zero
392 * | ge_zero * gt_zero
393 * | ge_zero * eq_zero
394 * | le_zero * lt_zero
395 * | lt_zero * le_zero # Multiplication is commutative
396 * | le_zero * le_zero
397 * | gt_zero * ge_zero # Multiplication is commutative
398 * | eq_zero * ge_zero # Multiplication is commutative
399 * | a * a # Left source == right source
400 * | gt_zero * gt_zero
401 * | lt_zero * lt_zero
404 * le_zero: ge_zero * le_zero
405 * | ge_zero * lt_zero
406 * | lt_zero * ge_zero # Multiplication is commutative
407 * | le_zero * ge_zero # Multiplication is commutative
408 * | le_zero * gt_zero
409 * | lt_zero * gt_zero
410 * | gt_zero * lt_zero # Multiplication is commutative
413 * eq_zero: eq_zero * <any>
414 * <any> * eq_zero # Multiplication is commutative
416 * All other cases are 'unknown'.
418 static const enum ssa_ranges fmul_table
[last_range
+ 1][last_range
+ 1] = {
419 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
420 /* unknown */ { _______
, _______
, _______
, _______
, _______
, _______
, eq_zero
},
421 /* lt_zero */ { _______
, ge_zero
, ge_zero
, le_zero
, le_zero
, _______
, eq_zero
},
422 /* le_zero */ { _______
, ge_zero
, ge_zero
, le_zero
, le_zero
, _______
, eq_zero
},
423 /* gt_zero */ { _______
, le_zero
, le_zero
, ge_zero
, ge_zero
, _______
, eq_zero
},
424 /* ge_zero */ { _______
, le_zero
, le_zero
, ge_zero
, ge_zero
, _______
, eq_zero
},
425 /* ne_zero */ { _______
, _______
, _______
, _______
, _______
, _______
, eq_zero
},
426 /* eq_zero */ { eq_zero
, eq_zero
, eq_zero
, eq_zero
, eq_zero
, eq_zero
, eq_zero
}
429 ASSERT_TABLE_IS_COMMUTATIVE(fmul_table
);
430 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(fmul_table
);
431 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(fmul_table
);
433 static const enum ssa_ranges fneg_table
[last_range
+ 1] = {
434 /* unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
435 _______
, gt_zero
, ge_zero
, lt_zero
, le_zero
, ne_zero
, eq_zero
438 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(fneg_table
);
439 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(fneg_table
);
445 r
= (struct ssa_result_range
){ge_zero
, alu
->op
== nir_op_b2f32
};
449 const struct ssa_result_range left
= analyze_expression(alu
, 1, ht
);
450 const struct ssa_result_range right
= analyze_expression(alu
, 2, ht
);
452 /* If either source is a constant load that is not zero, punt. The type
453 * will always be uint regardless of the actual type. We can't even
454 * decide if the value is non-zero because -0.0 is 0x80000000, and that
455 * will (possibly incorrectly) be considered non-zero.
457 /* FINISHME: We could do better, but it would require having the expected
458 * FINISHME: type passed in.
460 if ((nir_src_is_const(alu
->src
[1].src
) && left
.range
!= eq_zero
) ||
461 (nir_src_is_const(alu
->src
[2].src
) && right
.range
!= eq_zero
)) {
462 return (struct ssa_result_range
){unknown
, false};
465 r
.is_integral
= left
.is_integral
&& right
.is_integral
;
467 /* le_zero: bcsel(<any>, le_zero, lt_zero)
468 * | bcsel(<any>, eq_zero, lt_zero)
469 * | bcsel(<any>, le_zero, eq_zero)
470 * | bcsel(<any>, lt_zero, le_zero)
471 * | bcsel(<any>, lt_zero, eq_zero)
472 * | bcsel(<any>, eq_zero, le_zero)
473 * | bcsel(<any>, le_zero, le_zero)
476 * lt_zero: bcsel(<any>, lt_zero, lt_zero)
479 * ge_zero: bcsel(<any>, ge_zero, ge_zero)
480 * | bcsel(<any>, ge_zero, gt_zero)
481 * | bcsel(<any>, ge_zero, eq_zero)
482 * | bcsel(<any>, gt_zero, ge_zero)
483 * | bcsel(<any>, eq_zero, ge_zero)
486 * gt_zero: bcsel(<any>, gt_zero, gt_zero)
489 * ne_zero: bcsel(<any>, ne_zero, gt_zero)
490 * | bcsel(<any>, ne_zero, lt_zero)
491 * | bcsel(<any>, gt_zero, lt_zero)
492 * | bcsel(<any>, gt_zero, ne_zero)
493 * | bcsel(<any>, lt_zero, ne_zero)
494 * | bcsel(<any>, lt_zero, gt_zero)
495 * | bcsel(<any>, ne_zero, ne_zero)
498 * eq_zero: bcsel(<any>, eq_zero, eq_zero)
501 * All other cases are 'unknown'.
503 * The ranges could be tightened if the range of the first source is
504 * known. However, opt_algebraic will (eventually) elminiate the bcsel
505 * if the condition is known.
507 static const enum ssa_ranges table
[last_range
+ 1][last_range
+ 1] = {
508 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
509 /* unknown */ { _______
, _______
, _______
, _______
, _______
, _______
, _______
},
510 /* lt_zero */ { _______
, lt_zero
, le_zero
, ne_zero
, _______
, ne_zero
, le_zero
},
511 /* le_zero */ { _______
, le_zero
, le_zero
, _______
, _______
, _______
, le_zero
},
512 /* gt_zero */ { _______
, ne_zero
, _______
, gt_zero
, ge_zero
, ne_zero
, ge_zero
},
513 /* ge_zero */ { _______
, _______
, _______
, ge_zero
, ge_zero
, _______
, ge_zero
},
514 /* ne_zero */ { _______
, ne_zero
, _______
, ne_zero
, _______
, ne_zero
, _______
},
515 /* eq_zero */ { _______
, le_zero
, le_zero
, ge_zero
, ge_zero
, _______
, eq_zero
},
518 ASSERT_TABLE_IS_COMMUTATIVE(table
);
519 ASSERT_TABLE_IS_DIAGONAL(table
);
520 ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(table
);
522 r
.range
= table
[left
.range
][right
.range
];
528 r
= analyze_expression(alu
, 0, ht
);
530 r
.is_integral
= true;
532 if (r
.range
== unknown
&& alu
->op
== nir_op_u2f32
)
538 r
= analyze_expression(alu
, 0, ht
);
560 const struct ssa_result_range left
= analyze_expression(alu
, 0, ht
);
561 const struct ssa_result_range right
= analyze_expression(alu
, 1, ht
);
563 r
.is_integral
= left
.is_integral
&& right
.is_integral
;
564 r
.range
= fadd_table
[left
.range
][right
.range
];
569 /* If the parameter might be less than zero, the mathematically result
570 * will be on (0, 1). For sufficiently large magnitude negative
571 * parameters, the result will flush to zero.
573 static const enum ssa_ranges table
[last_range
+ 1] = {
574 /* unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
575 ge_zero
, ge_zero
, ge_zero
, gt_zero
, gt_zero
, ge_zero
, gt_zero
578 r
= analyze_expression(alu
, 0, ht
);
580 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(table
);
581 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(table
);
583 r
.is_integral
= r
.is_integral
&& is_not_negative(r
.range
);
584 r
.range
= table
[r
.range
];
589 const struct ssa_result_range left
= analyze_expression(alu
, 0, ht
);
590 const struct ssa_result_range right
= analyze_expression(alu
, 1, ht
);
592 r
.is_integral
= left
.is_integral
&& right
.is_integral
;
594 /* gt_zero: fmax(gt_zero, *)
595 * | fmax(*, gt_zero) # Treat fmax as commutative
598 * ge_zero: fmax(ge_zero, ne_zero)
599 * | fmax(ge_zero, lt_zero)
600 * | fmax(ge_zero, le_zero)
601 * | fmax(ge_zero, eq_zero)
602 * | fmax(ne_zero, ge_zero) # Treat fmax as commutative
603 * | fmax(lt_zero, ge_zero) # Treat fmax as commutative
604 * | fmax(le_zero, ge_zero) # Treat fmax as commutative
605 * | fmax(eq_zero, ge_zero) # Treat fmax as commutative
606 * | fmax(ge_zero, ge_zero)
609 * le_zero: fmax(le_zero, lt_zero)
610 * | fmax(lt_zero, le_zero) # Treat fmax as commutative
611 * | fmax(le_zero, le_zero)
614 * lt_zero: fmax(lt_zero, lt_zero)
617 * ne_zero: fmax(ne_zero, lt_zero)
618 * | fmax(lt_zero, ne_zero) # Treat fmax as commutative
619 * | fmax(ne_zero, ne_zero)
622 * eq_zero: fmax(eq_zero, le_zero)
623 * | fmax(eq_zero, lt_zero)
624 * | fmax(le_zero, eq_zero) # Treat fmax as commutative
625 * | fmax(lt_zero, eq_zero) # Treat fmax as commutative
626 * | fmax(eq_zero, eq_zero)
629 * All other cases are 'unknown'.
631 static const enum ssa_ranges table
[last_range
+ 1][last_range
+ 1] = {
632 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
633 /* unknown */ { _______
, _______
, _______
, gt_zero
, ge_zero
, _______
, _______
},
634 /* lt_zero */ { _______
, lt_zero
, le_zero
, gt_zero
, ge_zero
, ne_zero
, eq_zero
},
635 /* le_zero */ { _______
, le_zero
, le_zero
, gt_zero
, ge_zero
, _______
, eq_zero
},
636 /* gt_zero */ { gt_zero
, gt_zero
, gt_zero
, gt_zero
, gt_zero
, gt_zero
, gt_zero
},
637 /* ge_zero */ { ge_zero
, ge_zero
, ge_zero
, gt_zero
, ge_zero
, ge_zero
, ge_zero
},
638 /* ne_zero */ { _______
, ne_zero
, _______
, gt_zero
, ge_zero
, ne_zero
, _______
},
639 /* eq_zero */ { _______
, eq_zero
, eq_zero
, gt_zero
, ge_zero
, _______
, eq_zero
}
642 /* Treat fmax as commutative. */
643 ASSERT_TABLE_IS_COMMUTATIVE(table
);
644 ASSERT_TABLE_IS_DIAGONAL(table
);
645 ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(table
);
647 r
.range
= table
[left
.range
][right
.range
];
652 const struct ssa_result_range left
= analyze_expression(alu
, 0, ht
);
653 const struct ssa_result_range right
= analyze_expression(alu
, 1, ht
);
655 r
.is_integral
= left
.is_integral
&& right
.is_integral
;
657 /* lt_zero: fmin(lt_zero, *)
658 * | fmin(*, lt_zero) # Treat fmin as commutative
661 * le_zero: fmin(le_zero, ne_zero)
662 * | fmin(le_zero, gt_zero)
663 * | fmin(le_zero, ge_zero)
664 * | fmin(le_zero, eq_zero)
665 * | fmin(ne_zero, le_zero) # Treat fmin as commutative
666 * | fmin(gt_zero, le_zero) # Treat fmin as commutative
667 * | fmin(ge_zero, le_zero) # Treat fmin as commutative
668 * | fmin(eq_zero, le_zero) # Treat fmin as commutative
669 * | fmin(le_zero, le_zero)
672 * ge_zero: fmin(ge_zero, gt_zero)
673 * | fmin(gt_zero, ge_zero) # Treat fmin as commutative
674 * | fmin(ge_zero, ge_zero)
677 * gt_zero: fmin(gt_zero, gt_zero)
680 * ne_zero: fmin(ne_zero, gt_zero)
681 * | fmin(gt_zero, ne_zero) # Treat fmin as commutative
682 * | fmin(ne_zero, ne_zero)
685 * eq_zero: fmin(eq_zero, ge_zero)
686 * | fmin(eq_zero, gt_zero)
687 * | fmin(ge_zero, eq_zero) # Treat fmin as commutative
688 * | fmin(gt_zero, eq_zero) # Treat fmin as commutative
689 * | fmin(eq_zero, eq_zero)
692 * All other cases are 'unknown'.
694 static const enum ssa_ranges table
[last_range
+ 1][last_range
+ 1] = {
695 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
696 /* unknown */ { _______
, lt_zero
, le_zero
, _______
, _______
, _______
, _______
},
697 /* lt_zero */ { lt_zero
, lt_zero
, lt_zero
, lt_zero
, lt_zero
, lt_zero
, lt_zero
},
698 /* le_zero */ { le_zero
, lt_zero
, le_zero
, le_zero
, le_zero
, le_zero
, le_zero
},
699 /* gt_zero */ { _______
, lt_zero
, le_zero
, gt_zero
, ge_zero
, ne_zero
, eq_zero
},
700 /* ge_zero */ { _______
, lt_zero
, le_zero
, ge_zero
, ge_zero
, _______
, eq_zero
},
701 /* ne_zero */ { _______
, lt_zero
, le_zero
, ne_zero
, _______
, ne_zero
, _______
},
702 /* eq_zero */ { _______
, lt_zero
, le_zero
, eq_zero
, eq_zero
, _______
, eq_zero
}
705 /* Treat fmin as commutative. */
706 ASSERT_TABLE_IS_COMMUTATIVE(table
);
707 ASSERT_TABLE_IS_DIAGONAL(table
);
708 ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(table
);
710 r
.range
= table
[left
.range
][right
.range
];
715 const struct ssa_result_range left
= analyze_expression(alu
, 0, ht
);
716 const struct ssa_result_range right
= analyze_expression(alu
, 1, ht
);
718 r
.is_integral
= left
.is_integral
&& right
.is_integral
;
720 /* x * x => ge_zero */
721 if (left
.range
!= eq_zero
&& nir_alu_srcs_equal(alu
, alu
, 0, 1)) {
722 /* Even if x > 0, the result of x*x can be zero when x is, for
723 * example, a subnormal number.
726 } else if (left
.range
!= eq_zero
&& nir_alu_srcs_negative_equal(alu
, alu
, 0, 1)) {
727 /* -x * x => le_zero. */
730 r
.range
= fmul_table
[left
.range
][right
.range
];
736 r
= (struct ssa_result_range
){analyze_expression(alu
, 0, ht
).range
, false};
740 const struct ssa_result_range left
= analyze_expression(alu
, 0, ht
);
742 /* See commentary in nir_op_bcsel for the reasons this is necessary. */
743 if (nir_src_is_const(alu
->src
[0].src
) && left
.range
!= eq_zero
)
744 return (struct ssa_result_range
){unknown
, false};
751 r
= analyze_expression(alu
, 0, ht
);
753 r
.range
= fneg_table
[r
.range
];
757 r
= analyze_expression(alu
, 0, ht
);
763 r
.is_integral
= true;
767 assert(r
.is_integral
);
770 /* The fsat doesn't add any information in these cases. */
775 /* Since the result must be in [0, 1], the value must be >= 0. */
782 r
= (struct ssa_result_range
){analyze_expression(alu
, 0, ht
).range
, true};
787 r
= (struct ssa_result_range
){ge_zero
, false};
790 case nir_op_ffloor
: {
791 const struct ssa_result_range left
= analyze_expression(alu
, 0, ht
);
793 r
.is_integral
= true;
795 if (left
.is_integral
|| left
.range
== le_zero
|| left
.range
== lt_zero
)
796 r
.range
= left
.range
;
797 else if (left
.range
== ge_zero
|| left
.range
== gt_zero
)
799 else if (left
.range
== ne_zero
)
806 const struct ssa_result_range left
= analyze_expression(alu
, 0, ht
);
808 r
.is_integral
= true;
810 if (left
.is_integral
|| left
.range
== ge_zero
|| left
.range
== gt_zero
)
811 r
.range
= left
.range
;
812 else if (left
.range
== le_zero
|| left
.range
== lt_zero
)
814 else if (left
.range
== ne_zero
)
820 case nir_op_ftrunc
: {
821 const struct ssa_result_range left
= analyze_expression(alu
, 0, ht
);
823 r
.is_integral
= true;
825 if (left
.is_integral
)
826 r
.range
= left
.range
;
827 else if (left
.range
== ge_zero
|| left
.range
== gt_zero
)
829 else if (left
.range
== le_zero
|| left
.range
== lt_zero
)
831 else if (left
.range
== ne_zero
)
847 /* Boolean results are 0 or -1. */
848 r
= (struct ssa_result_range
){le_zero
, false};
852 /* Due to flush-to-zero semanatics of floating-point numbers with very
853 * small mangnitudes, we can never really be sure a result will be
856 * NIR uses pow() and powf() to constant evaluate nir_op_fpow. The man
857 * page for that function says:
859 * If y is 0, the result is 1.0 (even if x is a NaN).
861 * gt_zero: pow(*, eq_zero)
862 * | pow(eq_zero, lt_zero) # 0^-y = +inf
863 * | pow(eq_zero, le_zero) # 0^-y = +inf or 0^0 = 1.0
866 * eq_zero: pow(eq_zero, gt_zero)
869 * ge_zero: pow(gt_zero, gt_zero)
870 * | pow(gt_zero, ge_zero)
871 * | pow(gt_zero, lt_zero)
872 * | pow(gt_zero, le_zero)
873 * | pow(gt_zero, ne_zero)
874 * | pow(gt_zero, unknown)
875 * | pow(ge_zero, gt_zero)
876 * | pow(ge_zero, ge_zero)
877 * | pow(ge_zero, lt_zero)
878 * | pow(ge_zero, le_zero)
879 * | pow(ge_zero, ne_zero)
880 * | pow(ge_zero, unknown)
881 * | pow(eq_zero, ge_zero) # 0^0 = 1.0 or 0^+y = 0.0
882 * | pow(eq_zero, ne_zero) # 0^-y = +inf or 0^+y = 0.0
883 * | pow(eq_zero, unknown) # union of all other y cases
886 * All other cases are unknown.
888 * We could do better if the right operand is a constant, integral
891 static const enum ssa_ranges table
[last_range
+ 1][last_range
+ 1] = {
892 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
893 /* unknown */ { _______
, _______
, _______
, _______
, _______
, _______
, gt_zero
},
894 /* lt_zero */ { _______
, _______
, _______
, _______
, _______
, _______
, gt_zero
},
895 /* le_zero */ { _______
, _______
, _______
, _______
, _______
, _______
, gt_zero
},
896 /* gt_zero */ { ge_zero
, ge_zero
, ge_zero
, ge_zero
, ge_zero
, ge_zero
, gt_zero
},
897 /* ge_zero */ { ge_zero
, ge_zero
, ge_zero
, ge_zero
, ge_zero
, ge_zero
, gt_zero
},
898 /* ne_zero */ { _______
, _______
, _______
, _______
, _______
, _______
, gt_zero
},
899 /* eq_zero */ { ge_zero
, gt_zero
, gt_zero
, eq_zero
, ge_zero
, ge_zero
, gt_zero
},
902 const struct ssa_result_range left
= analyze_expression(alu
, 0, ht
);
903 const struct ssa_result_range right
= analyze_expression(alu
, 1, ht
);
905 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(table
);
906 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(table
);
908 r
.is_integral
= left
.is_integral
&& right
.is_integral
&&
909 is_not_negative(right
.range
);
910 r
.range
= table
[left
.range
][right
.range
];
915 const struct ssa_result_range first
= analyze_expression(alu
, 0, ht
);
916 const struct ssa_result_range second
= analyze_expression(alu
, 1, ht
);
917 const struct ssa_result_range third
= analyze_expression(alu
, 2, ht
);
919 r
.is_integral
= first
.is_integral
&& second
.is_integral
&&
922 enum ssa_ranges fmul_range
;
924 if (first
.range
!= eq_zero
&& nir_alu_srcs_equal(alu
, alu
, 0, 1)) {
925 /* See handling of nir_op_fmul for explanation of why ge_zero is the
928 fmul_range
= ge_zero
;
929 } else if (first
.range
!= eq_zero
&& nir_alu_srcs_negative_equal(alu
, alu
, 0, 1)) {
930 /* -x * x => le_zero */
931 fmul_range
= le_zero
;
933 fmul_range
= fmul_table
[first
.range
][second
.range
];
935 r
.range
= fadd_table
[fmul_range
][third
.range
];
940 const struct ssa_result_range first
= analyze_expression(alu
, 0, ht
);
941 const struct ssa_result_range second
= analyze_expression(alu
, 1, ht
);
942 const struct ssa_result_range third
= analyze_expression(alu
, 2, ht
);
944 r
.is_integral
= first
.is_integral
&& second
.is_integral
&&
947 /* Decompose the flrp to first + third * (second + -first) */
948 const enum ssa_ranges inner_fadd_range
=
949 fadd_table
[second
.range
][fneg_table
[first
.range
]];
951 const enum ssa_ranges fmul_range
=
952 fmul_table
[third
.range
][inner_fadd_range
];
954 r
.range
= fadd_table
[first
.range
][fmul_range
];
959 r
= (struct ssa_result_range
){unknown
, false};
963 if (r
.range
== eq_zero
)
964 r
.is_integral
= true;
966 _mesa_hash_table_insert(ht
, alu
, pack_data(r
));
972 struct ssa_result_range
973 nir_analyze_range(const nir_alu_instr
*instr
, unsigned src
)
975 struct hash_table
*ht
= _mesa_pointer_hash_table_create(NULL
);
977 const struct ssa_result_range r
= analyze_expression(instr
, src
, ht
);
979 _mesa_hash_table_destroy(ht
, NULL
);