2 * Copyright © 2018 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 #include "nir_range_analysis.h"
27 #include "util/hash_table.h"
30 * Analyzes a sequence of operations to determine some aspects of the range of
35 is_not_negative(enum ssa_ranges r
)
37 return r
== gt_zero
|| r
== ge_zero
|| r
== eq_zero
;
41 pack_data(const struct ssa_result_range r
)
43 return (void *)(uintptr_t)(r
.range
| r
.is_integral
<< 8);
46 static struct ssa_result_range
47 unpack_data(const void *p
)
49 const uintptr_t v
= (uintptr_t) p
;
51 return (struct ssa_result_range
){v
& 0xff, (v
& 0x0ff00) != 0};
54 static struct ssa_result_range
55 analyze_constant(const struct nir_alu_instr
*instr
, unsigned src
)
57 uint8_t swizzle
[4] = { 0, 1, 2, 3 };
59 /* If the source is an explicitly sized source, then we need to reset
60 * both the number of components and the swizzle.
62 const unsigned num_components
= nir_ssa_alu_instr_src_components(instr
, src
);
64 for (unsigned i
= 0; i
< num_components
; ++i
)
65 swizzle
[i
] = instr
->src
[src
].swizzle
[i
];
67 const nir_load_const_instr
*const load
=
68 nir_instr_as_load_const(instr
->src
[src
].src
.ssa
->parent_instr
);
70 struct ssa_result_range r
= { unknown
, false };
72 switch (nir_op_infos
[instr
->op
].input_types
[src
]) {
73 case nir_type_float
: {
74 double min_value
= DBL_MAX
;
75 double max_value
= -DBL_MAX
;
76 bool any_zero
= false;
81 for (unsigned i
= 0; i
< num_components
; ++i
) {
82 const double v
= nir_const_value_as_float(load
->value
[swizzle
[i
]],
86 r
.is_integral
= false;
88 any_zero
= any_zero
|| (v
== 0.0);
89 all_zero
= all_zero
&& (v
== 0.0);
90 min_value
= MIN2(min_value
, v
);
91 max_value
= MAX2(max_value
, v
);
94 assert(any_zero
>= all_zero
);
95 assert(isnan(max_value
) || max_value
>= min_value
);
99 else if (min_value
> 0.0)
101 else if (min_value
== 0.0)
103 else if (max_value
< 0.0)
105 else if (max_value
== 0.0)
116 case nir_type_bool
: {
117 int64_t min_value
= INT_MAX
;
118 int64_t max_value
= INT_MIN
;
119 bool any_zero
= false;
120 bool all_zero
= true;
122 for (unsigned i
= 0; i
< num_components
; ++i
) {
123 const int64_t v
= nir_const_value_as_int(load
->value
[swizzle
[i
]],
126 any_zero
= any_zero
|| (v
== 0);
127 all_zero
= all_zero
&& (v
== 0);
128 min_value
= MIN2(min_value
, v
);
129 max_value
= MAX2(max_value
, v
);
132 assert(any_zero
>= all_zero
);
133 assert(max_value
>= min_value
);
137 else if (min_value
> 0)
139 else if (min_value
== 0)
141 else if (max_value
< 0)
143 else if (max_value
== 0)
153 case nir_type_uint
: {
154 bool any_zero
= false;
155 bool all_zero
= true;
157 for (unsigned i
= 0; i
< num_components
; ++i
) {
158 const uint64_t v
= nir_const_value_as_uint(load
->value
[swizzle
[i
]],
161 any_zero
= any_zero
|| (v
== 0);
162 all_zero
= all_zero
&& (v
== 0);
165 assert(any_zero
>= all_zero
);
178 unreachable("Invalid alu source type");
183 #define ASSERT_TABLE_IS_COMMUTATIVE(t) \
185 for (unsigned r = 0; r < ARRAY_SIZE(t); r++) { \
186 for (unsigned c = 0; c < ARRAY_SIZE(t[0]); c++) \
187 assert(t[r][c] == t[c][r]); \
191 #define ASSERT_TABLE_IS_DIAGONAL(t) \
193 for (unsigned r = 0; r < ARRAY_SIZE(t); r++) \
194 assert(t[r][r] == r); \
197 #define ASSERT_TABLE_IS_COMMUTATIVE(t)
198 #define ASSERT_TABLE_IS_DIAGONAL(t)
202 * Short-hand name for use in the tables in analyze_expression. If this name
203 * becomes a problem on some compiler, we can change it to _.
205 #define _______ unknown
208 * Analyze an expression to determine the range of its result
210 * The end result of this analysis is a token that communicates something
211 * about the range of values. There's an implicit grammar that produces
212 * tokens from sequences of literal values, other tokens, and operations.
213 * This function implements this grammar as a recursive-descent parser. Some
214 * (but not all) of the grammar is listed in-line in the function.
216 static struct ssa_result_range
217 analyze_expression(const nir_alu_instr
*instr
, unsigned src
,
218 struct hash_table
*ht
)
220 if (!instr
->src
[src
].src
.is_ssa
)
221 return (struct ssa_result_range
){unknown
, false};
223 if (nir_src_is_const(instr
->src
[src
].src
))
224 return analyze_constant(instr
, src
);
226 if (instr
->src
[src
].src
.ssa
->parent_instr
->type
!= nir_instr_type_alu
)
227 return (struct ssa_result_range
){unknown
, false};
229 const struct nir_alu_instr
*const alu
=
230 nir_instr_as_alu(instr
->src
[src
].src
.ssa
->parent_instr
);
232 struct hash_entry
*he
= _mesa_hash_table_search(ht
, alu
);
234 return unpack_data(he
->data
);
236 struct ssa_result_range r
= {unknown
, false};
238 /* ge_zero: ge_zero + ge_zero
240 * gt_zero: gt_zero + eq_zero
241 * | gt_zero + ge_zero
242 * | eq_zero + gt_zero # Addition is commutative
243 * | ge_zero + gt_zero # Addition is commutative
244 * | gt_zero + gt_zero
247 * le_zero: le_zero + le_zero
249 * lt_zero: lt_zero + eq_zero
250 * | lt_zero + le_zero
251 * | eq_zero + lt_zero # Addition is commutative
252 * | le_zero + lt_zero # Addition is commutative
253 * | lt_zero + lt_zero
256 * ne_zero: eq_zero + ne_zero
257 * | ne_zero + eq_zero # Addition is commutative
260 * eq_zero: eq_zero + eq_zero
263 * All other cases are 'unknown'. The seeming odd entry is (ne_zero,
264 * ne_zero), but that could be (-5, +5) which is not ne_zero.
266 static const enum ssa_ranges fadd_table
[last_range
+ 1][last_range
+ 1] = {
267 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
268 /* unknown */ { _______
, _______
, _______
, _______
, _______
, _______
, _______
},
269 /* lt_zero */ { _______
, lt_zero
, lt_zero
, _______
, _______
, _______
, lt_zero
},
270 /* le_zero */ { _______
, lt_zero
, le_zero
, _______
, _______
, _______
, le_zero
},
271 /* gt_zero */ { _______
, _______
, _______
, gt_zero
, gt_zero
, _______
, gt_zero
},
272 /* ge_zero */ { _______
, _______
, _______
, gt_zero
, ge_zero
, _______
, ge_zero
},
273 /* ne_zero */ { _______
, _______
, _______
, _______
, _______
, _______
, ne_zero
},
274 /* eq_zero */ { _______
, lt_zero
, le_zero
, gt_zero
, ge_zero
, ne_zero
, eq_zero
},
277 ASSERT_TABLE_IS_COMMUTATIVE(fadd_table
);
279 /* Due to flush-to-zero semanatics of floating-point numbers with very
280 * small mangnitudes, we can never really be sure a result will be
283 * ge_zero: ge_zero * ge_zero
284 * | ge_zero * gt_zero
285 * | ge_zero * eq_zero
286 * | le_zero * lt_zero
287 * | lt_zero * le_zero # Multiplication is commutative
288 * | le_zero * le_zero
289 * | gt_zero * ge_zero # Multiplication is commutative
290 * | eq_zero * ge_zero # Multiplication is commutative
291 * | a * a # Left source == right source
292 * | gt_zero * gt_zero
293 * | lt_zero * lt_zero
296 * le_zero: ge_zero * le_zero
297 * | ge_zero * lt_zero
298 * | lt_zero * ge_zero # Multiplication is commutative
299 * | le_zero * ge_zero # Multiplication is commutative
300 * | le_zero * gt_zero
301 * | lt_zero * gt_zero
302 * | gt_zero * lt_zero # Multiplication is commutative
305 * eq_zero: eq_zero * <any>
306 * <any> * eq_zero # Multiplication is commutative
308 * All other cases are 'unknown'.
310 static const enum ssa_ranges fmul_table
[last_range
+ 1][last_range
+ 1] = {
311 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
312 /* unknown */ { _______
, _______
, _______
, _______
, _______
, _______
, eq_zero
},
313 /* lt_zero */ { _______
, ge_zero
, ge_zero
, le_zero
, le_zero
, _______
, eq_zero
},
314 /* le_zero */ { _______
, ge_zero
, ge_zero
, le_zero
, le_zero
, _______
, eq_zero
},
315 /* gt_zero */ { _______
, le_zero
, le_zero
, ge_zero
, ge_zero
, _______
, eq_zero
},
316 /* ge_zero */ { _______
, le_zero
, le_zero
, ge_zero
, ge_zero
, _______
, eq_zero
},
317 /* ne_zero */ { _______
, _______
, _______
, _______
, _______
, _______
, eq_zero
},
318 /* eq_zero */ { eq_zero
, eq_zero
, eq_zero
, eq_zero
, eq_zero
, eq_zero
, eq_zero
}
321 ASSERT_TABLE_IS_COMMUTATIVE(fmul_table
);
323 static const enum ssa_ranges fneg_table
[last_range
+ 1] = {
324 /* unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
325 _______
, gt_zero
, ge_zero
, lt_zero
, le_zero
, ne_zero
, eq_zero
332 r
= (struct ssa_result_range
){ge_zero
, alu
->op
== nir_op_b2f32
};
336 const struct ssa_result_range left
= analyze_expression(alu
, 1, ht
);
337 const struct ssa_result_range right
= analyze_expression(alu
, 2, ht
);
339 /* If either source is a constant load that is not zero, punt. The type
340 * will always be uint regardless of the actual type. We can't even
341 * decide if the value is non-zero because -0.0 is 0x80000000, and that
342 * will (possibly incorrectly) be considered non-zero.
344 /* FINISHME: We could do better, but it would require having the expected
345 * FINISHME: type passed in.
347 if ((nir_src_is_const(alu
->src
[1].src
) && left
.range
!= eq_zero
) ||
348 (nir_src_is_const(alu
->src
[2].src
) && right
.range
!= eq_zero
)) {
349 return (struct ssa_result_range
){unknown
, false};
352 r
.is_integral
= left
.is_integral
&& right
.is_integral
;
354 /* le_zero: bcsel(<any>, le_zero, lt_zero)
355 * | bcsel(<any>, eq_zero, lt_zero)
356 * | bcsel(<any>, le_zero, eq_zero)
357 * | bcsel(<any>, lt_zero, le_zero)
358 * | bcsel(<any>, lt_zero, eq_zero)
359 * | bcsel(<any>, eq_zero, le_zero)
360 * | bcsel(<any>, le_zero, le_zero)
363 * lt_zero: bcsel(<any>, lt_zero, lt_zero)
366 * ge_zero: bcsel(<any>, ge_zero, ge_zero)
367 * | bcsel(<any>, ge_zero, gt_zero)
368 * | bcsel(<any>, ge_zero, eq_zero)
369 * | bcsel(<any>, gt_zero, ge_zero)
370 * | bcsel(<any>, eq_zero, ge_zero)
373 * gt_zero: bcsel(<any>, gt_zero, gt_zero)
376 * ne_zero: bcsel(<any>, ne_zero, gt_zero)
377 * | bcsel(<any>, ne_zero, lt_zero)
378 * | bcsel(<any>, gt_zero, lt_zero)
379 * | bcsel(<any>, gt_zero, ne_zero)
380 * | bcsel(<any>, lt_zero, ne_zero)
381 * | bcsel(<any>, lt_zero, gt_zero)
382 * | bcsel(<any>, ne_zero, ne_zero)
385 * eq_zero: bcsel(<any>, eq_zero, eq_zero)
388 * All other cases are 'unknown'.
390 * The ranges could be tightened if the range of the first source is
391 * known. However, opt_algebraic will (eventually) elminiate the bcsel
392 * if the condition is known.
394 static const enum ssa_ranges table
[last_range
+ 1][last_range
+ 1] = {
395 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
396 /* unknown */ { _______
, _______
, _______
, _______
, _______
, _______
, _______
},
397 /* lt_zero */ { _______
, lt_zero
, le_zero
, ne_zero
, _______
, ne_zero
, le_zero
},
398 /* le_zero */ { _______
, le_zero
, le_zero
, _______
, _______
, _______
, le_zero
},
399 /* gt_zero */ { _______
, ne_zero
, _______
, gt_zero
, ge_zero
, ne_zero
, ge_zero
},
400 /* ge_zero */ { _______
, _______
, _______
, ge_zero
, ge_zero
, _______
, ge_zero
},
401 /* ne_zero */ { _______
, ne_zero
, _______
, ne_zero
, _______
, ne_zero
, _______
},
402 /* eq_zero */ { _______
, le_zero
, le_zero
, ge_zero
, ge_zero
, _______
, eq_zero
},
405 ASSERT_TABLE_IS_COMMUTATIVE(table
);
406 ASSERT_TABLE_IS_DIAGONAL(table
);
408 r
.range
= table
[left
.range
][right
.range
];
414 r
= analyze_expression(alu
, 0, ht
);
416 r
.is_integral
= true;
418 if (r
.range
== unknown
&& alu
->op
== nir_op_u2f32
)
424 r
= analyze_expression(alu
, 0, ht
);
446 const struct ssa_result_range left
= analyze_expression(alu
, 0, ht
);
447 const struct ssa_result_range right
= analyze_expression(alu
, 1, ht
);
449 r
.is_integral
= left
.is_integral
&& right
.is_integral
;
450 r
.range
= fadd_table
[left
.range
][right
.range
];
455 /* If the parameter might be less than zero, the mathematically result
456 * will be on (0, 1). For sufficiently large magnitude negative
457 * parameters, the result will flush to zero.
459 static const enum ssa_ranges table
[last_range
+ 1] = {
460 /* unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
461 ge_zero
, ge_zero
, ge_zero
, gt_zero
, gt_zero
, ge_zero
, gt_zero
464 r
= analyze_expression(alu
, 0, ht
);
466 r
.range
= table
[r
.range
];
471 const struct ssa_result_range left
= analyze_expression(alu
, 0, ht
);
472 const struct ssa_result_range right
= analyze_expression(alu
, 1, ht
);
474 r
.is_integral
= left
.is_integral
&& right
.is_integral
;
476 /* gt_zero: fmax(gt_zero, *)
477 * | fmax(*, gt_zero) # Treat fmax as commutative
480 * ge_zero: fmax(ge_zero, ne_zero)
481 * | fmax(ge_zero, lt_zero)
482 * | fmax(ge_zero, le_zero)
483 * | fmax(ge_zero, eq_zero)
484 * | fmax(ne_zero, ge_zero) # Treat fmax as commutative
485 * | fmax(lt_zero, ge_zero) # Treat fmax as commutative
486 * | fmax(le_zero, ge_zero) # Treat fmax as commutative
487 * | fmax(eq_zero, ge_zero) # Treat fmax as commutative
488 * | fmax(ge_zero, ge_zero)
491 * le_zero: fmax(le_zero, lt_zero)
492 * | fmax(lt_zero, le_zero) # Treat fmax as commutative
493 * | fmax(le_zero, le_zero)
496 * lt_zero: fmax(lt_zero, lt_zero)
499 * ne_zero: fmax(ne_zero, lt_zero)
500 * | fmax(lt_zero, ne_zero) # Treat fmax as commutative
501 * | fmax(ne_zero, ne_zero)
504 * eq_zero: fmax(eq_zero, le_zero)
505 * | fmax(eq_zero, lt_zero)
506 * | fmax(le_zero, eq_zero) # Treat fmax as commutative
507 * | fmax(lt_zero, eq_zero) # Treat fmax as commutative
508 * | fmax(eq_zero, eq_zero)
511 * All other cases are 'unknown'.
513 static const enum ssa_ranges table
[last_range
+ 1][last_range
+ 1] = {
514 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
515 /* unknown */ { _______
, _______
, _______
, gt_zero
, ge_zero
, _______
, _______
},
516 /* lt_zero */ { _______
, lt_zero
, le_zero
, gt_zero
, ge_zero
, ne_zero
, eq_zero
},
517 /* le_zero */ { _______
, le_zero
, le_zero
, gt_zero
, ge_zero
, _______
, eq_zero
},
518 /* gt_zero */ { gt_zero
, gt_zero
, gt_zero
, gt_zero
, gt_zero
, gt_zero
, gt_zero
},
519 /* ge_zero */ { ge_zero
, ge_zero
, ge_zero
, gt_zero
, ge_zero
, ge_zero
, ge_zero
},
520 /* ne_zero */ { _______
, ne_zero
, _______
, gt_zero
, ge_zero
, ne_zero
, _______
},
521 /* eq_zero */ { _______
, eq_zero
, eq_zero
, gt_zero
, ge_zero
, _______
, eq_zero
}
524 /* Treat fmax as commutative. */
525 ASSERT_TABLE_IS_COMMUTATIVE(table
);
526 ASSERT_TABLE_IS_DIAGONAL(table
);
528 r
.range
= table
[left
.range
][right
.range
];
533 const struct ssa_result_range left
= analyze_expression(alu
, 0, ht
);
534 const struct ssa_result_range right
= analyze_expression(alu
, 1, ht
);
536 r
.is_integral
= left
.is_integral
&& right
.is_integral
;
538 /* lt_zero: fmin(lt_zero, *)
539 * | fmin(*, lt_zero) # Treat fmin as commutative
542 * le_zero: fmin(le_zero, ne_zero)
543 * | fmin(le_zero, gt_zero)
544 * | fmin(le_zero, ge_zero)
545 * | fmin(le_zero, eq_zero)
546 * | fmin(ne_zero, le_zero) # Treat fmin as commutative
547 * | fmin(gt_zero, le_zero) # Treat fmin as commutative
548 * | fmin(ge_zero, le_zero) # Treat fmin as commutative
549 * | fmin(eq_zero, le_zero) # Treat fmin as commutative
550 * | fmin(le_zero, le_zero)
553 * ge_zero: fmin(ge_zero, gt_zero)
554 * | fmin(gt_zero, ge_zero) # Treat fmin as commutative
555 * | fmin(ge_zero, ge_zero)
558 * gt_zero: fmin(gt_zero, gt_zero)
561 * ne_zero: fmin(ne_zero, gt_zero)
562 * | fmin(gt_zero, ne_zero) # Treat fmin as commutative
563 * | fmin(ne_zero, ne_zero)
566 * eq_zero: fmin(eq_zero, ge_zero)
567 * | fmin(eq_zero, gt_zero)
568 * | fmin(ge_zero, eq_zero) # Treat fmin as commutative
569 * | fmin(gt_zero, eq_zero) # Treat fmin as commutative
570 * | fmin(eq_zero, eq_zero)
573 * All other cases are 'unknown'.
575 static const enum ssa_ranges table
[last_range
+ 1][last_range
+ 1] = {
576 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
577 /* unknown */ { _______
, lt_zero
, le_zero
, _______
, _______
, _______
, _______
},
578 /* lt_zero */ { lt_zero
, lt_zero
, lt_zero
, lt_zero
, lt_zero
, lt_zero
, lt_zero
},
579 /* le_zero */ { le_zero
, lt_zero
, le_zero
, le_zero
, le_zero
, le_zero
, le_zero
},
580 /* gt_zero */ { _______
, lt_zero
, le_zero
, gt_zero
, ge_zero
, ne_zero
, eq_zero
},
581 /* ge_zero */ { _______
, lt_zero
, le_zero
, ge_zero
, ge_zero
, _______
, eq_zero
},
582 /* ne_zero */ { _______
, lt_zero
, le_zero
, ne_zero
, _______
, ne_zero
, _______
},
583 /* eq_zero */ { _______
, lt_zero
, le_zero
, eq_zero
, eq_zero
, _______
, eq_zero
}
586 /* Treat fmin as commutative. */
587 ASSERT_TABLE_IS_COMMUTATIVE(table
);
588 ASSERT_TABLE_IS_DIAGONAL(table
);
590 r
.range
= table
[left
.range
][right
.range
];
595 const struct ssa_result_range left
= analyze_expression(alu
, 0, ht
);
596 const struct ssa_result_range right
= analyze_expression(alu
, 1, ht
);
598 r
.is_integral
= left
.is_integral
&& right
.is_integral
;
600 /* x * x => ge_zero */
601 if (left
.range
!= eq_zero
&& nir_alu_srcs_equal(alu
, alu
, 0, 1)) {
602 /* Even if x > 0, the result of x*x can be zero when x is, for
603 * example, a subnormal number.
606 } else if (left
.range
!= eq_zero
&& nir_alu_srcs_negative_equal(alu
, alu
, 0, 1)) {
607 /* -x * x => le_zero. */
610 r
.range
= fmul_table
[left
.range
][right
.range
];
616 r
= (struct ssa_result_range
){analyze_expression(alu
, 0, ht
).range
, false};
620 const struct ssa_result_range left
= analyze_expression(alu
, 0, ht
);
622 /* See commentary in nir_op_bcsel for the reasons this is necessary. */
623 if (nir_src_is_const(alu
->src
[0].src
) && left
.range
!= eq_zero
)
624 return (struct ssa_result_range
){unknown
, false};
631 r
= analyze_expression(alu
, 0, ht
);
633 r
.range
= fneg_table
[r
.range
];
637 r
= analyze_expression(alu
, 0, ht
);
643 r
.is_integral
= true;
647 assert(r
.is_integral
);
650 /* The fsat doesn't add any information in these cases. */
655 /* Since the result must be in [0, 1], the value must be >= 0. */
662 r
= (struct ssa_result_range
){analyze_expression(alu
, 0, ht
).range
, true};
667 r
= (struct ssa_result_range
){ge_zero
, false};
670 case nir_op_ffloor
: {
671 const struct ssa_result_range left
= analyze_expression(alu
, 0, ht
);
673 r
.is_integral
= true;
675 if (left
.is_integral
|| left
.range
== le_zero
|| left
.range
== lt_zero
)
676 r
.range
= left
.range
;
677 else if (left
.range
== ge_zero
|| left
.range
== gt_zero
)
679 else if (left
.range
== ne_zero
)
686 const struct ssa_result_range left
= analyze_expression(alu
, 0, ht
);
688 r
.is_integral
= true;
690 if (left
.is_integral
|| left
.range
== ge_zero
|| left
.range
== gt_zero
)
691 r
.range
= left
.range
;
692 else if (left
.range
== le_zero
|| left
.range
== lt_zero
)
694 else if (left
.range
== ne_zero
)
700 case nir_op_ftrunc
: {
701 const struct ssa_result_range left
= analyze_expression(alu
, 0, ht
);
703 r
.is_integral
= true;
705 if (left
.is_integral
)
706 r
.range
= left
.range
;
707 else if (left
.range
== ge_zero
|| left
.range
== gt_zero
)
709 else if (left
.range
== le_zero
|| left
.range
== lt_zero
)
711 else if (left
.range
== ne_zero
)
727 /* Boolean results are 0 or -1. */
728 r
= (struct ssa_result_range
){le_zero
, false};
732 /* Due to flush-to-zero semanatics of floating-point numbers with very
733 * small mangnitudes, we can never really be sure a result will be
736 * NIR uses pow() and powf() to constant evaluate nir_op_fpow. The man
737 * page for that function says:
739 * If y is 0, the result is 1.0 (even if x is a NaN).
741 * gt_zero: pow(*, eq_zero)
742 * | pow(eq_zero, lt_zero) # 0^-y = +inf
743 * | pow(eq_zero, le_zero) # 0^-y = +inf or 0^0 = 1.0
746 * eq_zero: pow(eq_zero, gt_zero)
749 * ge_zero: pow(gt_zero, gt_zero)
750 * | pow(gt_zero, ge_zero)
751 * | pow(gt_zero, lt_zero)
752 * | pow(gt_zero, le_zero)
753 * | pow(gt_zero, ne_zero)
754 * | pow(gt_zero, unknown)
755 * | pow(ge_zero, gt_zero)
756 * | pow(ge_zero, ge_zero)
757 * | pow(ge_zero, lt_zero)
758 * | pow(ge_zero, le_zero)
759 * | pow(ge_zero, ne_zero)
760 * | pow(ge_zero, unknown)
761 * | pow(eq_zero, ge_zero) # 0^0 = 1.0 or 0^+y = 0.0
762 * | pow(eq_zero, ne_zero) # 0^-y = +inf or 0^+y = 0.0
763 * | pow(eq_zero, unknown) # union of all other y cases
766 * All other cases are unknown.
768 * We could do better if the right operand is a constant, integral
771 static const enum ssa_ranges table
[last_range
+ 1][last_range
+ 1] = {
772 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
773 /* unknown */ { _______
, _______
, _______
, _______
, _______
, _______
, gt_zero
},
774 /* lt_zero */ { _______
, _______
, _______
, _______
, _______
, _______
, gt_zero
},
775 /* le_zero */ { _______
, _______
, _______
, _______
, _______
, _______
, gt_zero
},
776 /* gt_zero */ { ge_zero
, ge_zero
, ge_zero
, ge_zero
, ge_zero
, ge_zero
, gt_zero
},
777 /* ge_zero */ { ge_zero
, ge_zero
, ge_zero
, ge_zero
, ge_zero
, ge_zero
, gt_zero
},
778 /* ne_zero */ { _______
, _______
, _______
, _______
, _______
, _______
, gt_zero
},
779 /* eq_zero */ { ge_zero
, gt_zero
, gt_zero
, eq_zero
, ge_zero
, ge_zero
, gt_zero
},
782 const struct ssa_result_range left
= analyze_expression(alu
, 0, ht
);
783 const struct ssa_result_range right
= analyze_expression(alu
, 1, ht
);
785 r
.is_integral
= left
.is_integral
&& right
.is_integral
&&
786 is_not_negative(right
.range
);
787 r
.range
= table
[left
.range
][right
.range
];
792 const struct ssa_result_range first
= analyze_expression(alu
, 0, ht
);
793 const struct ssa_result_range second
= analyze_expression(alu
, 1, ht
);
794 const struct ssa_result_range third
= analyze_expression(alu
, 2, ht
);
796 r
.is_integral
= first
.is_integral
&& second
.is_integral
&&
799 enum ssa_ranges fmul_range
;
801 if (first
.range
!= eq_zero
&& nir_alu_srcs_equal(alu
, alu
, 0, 1)) {
802 /* See handling of nir_op_fmul for explanation of why ge_zero is the
805 fmul_range
= ge_zero
;
806 } else if (first
.range
!= eq_zero
&& nir_alu_srcs_negative_equal(alu
, alu
, 0, 1)) {
807 /* -x * x => le_zero */
808 fmul_range
= le_zero
;
810 fmul_range
= fmul_table
[first
.range
][second
.range
];
812 r
.range
= fadd_table
[fmul_range
][third
.range
];
817 const struct ssa_result_range first
= analyze_expression(alu
, 0, ht
);
818 const struct ssa_result_range second
= analyze_expression(alu
, 1, ht
);
819 const struct ssa_result_range third
= analyze_expression(alu
, 2, ht
);
821 r
.is_integral
= first
.is_integral
&& second
.is_integral
&&
824 /* Decompose the flrp to first + third * (second + -first) */
825 const enum ssa_ranges inner_fadd_range
=
826 fadd_table
[second
.range
][fneg_table
[first
.range
]];
828 const enum ssa_ranges fmul_range
=
829 fmul_table
[third
.range
][inner_fadd_range
];
831 r
.range
= fadd_table
[first
.range
][fmul_range
];
836 r
= (struct ssa_result_range
){unknown
, false};
840 if (r
.range
== eq_zero
)
841 r
.is_integral
= true;
843 _mesa_hash_table_insert(ht
, alu
, pack_data(r
));
849 struct ssa_result_range
850 nir_analyze_range(const nir_alu_instr
*instr
, unsigned src
)
852 struct hash_table
*ht
= _mesa_pointer_hash_table_create(NULL
);
854 const struct ssa_result_range r
= analyze_expression(instr
, src
, ht
);
856 _mesa_hash_table_destroy(ht
, NULL
);