nir/range-analysis: Fix incorrect fadd range result for (ne_zero, ne_zero)
[mesa.git] / src / compiler / nir / nir_range_analysis.c
1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23 #include <math.h>
24 #include <float.h>
25 #include "nir.h"
26 #include "nir_range_analysis.h"
27 #include "util/hash_table.h"
28
29 /**
30 * Analyzes a sequence of operations to determine some aspects of the range of
31 * the result.
32 */
33
34 static void *
35 pack_data(const struct ssa_result_range r)
36 {
37 return (void *)(uintptr_t)(r.range | r.is_integral << 8);
38 }
39
40 static struct ssa_result_range
41 unpack_data(const void *p)
42 {
43 const uintptr_t v = (uintptr_t) p;
44
45 return (struct ssa_result_range){v & 0xff, (v & 0x0ff00) != 0};
46 }
47
48 static struct ssa_result_range
49 analyze_constant(const struct nir_alu_instr *instr, unsigned src)
50 {
51 uint8_t swizzle[4] = { 0, 1, 2, 3 };
52
53 /* If the source is an explicitly sized source, then we need to reset
54 * both the number of components and the swizzle.
55 */
56 const unsigned num_components = nir_ssa_alu_instr_src_components(instr, src);
57
58 for (unsigned i = 0; i < num_components; ++i)
59 swizzle[i] = instr->src[src].swizzle[i];
60
61 const nir_load_const_instr *const load =
62 nir_instr_as_load_const(instr->src[src].src.ssa->parent_instr);
63
64 struct ssa_result_range r = { unknown, false };
65
66 switch (nir_op_infos[instr->op].input_types[src]) {
67 case nir_type_float: {
68 double min_value = DBL_MAX;
69 double max_value = -DBL_MAX;
70 bool any_zero = false;
71 bool all_zero = true;
72
73 r.is_integral = true;
74
75 for (unsigned i = 0; i < num_components; ++i) {
76 const double v = nir_const_value_as_float(load->value[swizzle[i]],
77 load->def.bit_size);
78
79 if (floor(v) != v)
80 r.is_integral = false;
81
82 any_zero = any_zero || (v == 0.0);
83 all_zero = all_zero && (v == 0.0);
84 min_value = MIN2(min_value, v);
85 max_value = MAX2(max_value, v);
86 }
87
88 assert(any_zero >= all_zero);
89 assert(isnan(max_value) || max_value >= min_value);
90
91 if (all_zero)
92 r.range = eq_zero;
93 else if (min_value > 0.0)
94 r.range = gt_zero;
95 else if (min_value == 0.0)
96 r.range = ge_zero;
97 else if (max_value < 0.0)
98 r.range = lt_zero;
99 else if (max_value == 0.0)
100 r.range = le_zero;
101 else if (!any_zero)
102 r.range = ne_zero;
103 else
104 r.range = unknown;
105
106 return r;
107 }
108
109 case nir_type_int:
110 case nir_type_bool: {
111 int64_t min_value = INT_MAX;
112 int64_t max_value = INT_MIN;
113 bool any_zero = false;
114 bool all_zero = true;
115
116 for (unsigned i = 0; i < num_components; ++i) {
117 const int64_t v = nir_const_value_as_int(load->value[swizzle[i]],
118 load->def.bit_size);
119
120 any_zero = any_zero || (v == 0);
121 all_zero = all_zero && (v == 0);
122 min_value = MIN2(min_value, v);
123 max_value = MAX2(max_value, v);
124 }
125
126 assert(any_zero >= all_zero);
127 assert(max_value >= min_value);
128
129 if (all_zero)
130 r.range = eq_zero;
131 else if (min_value > 0)
132 r.range = gt_zero;
133 else if (min_value == 0)
134 r.range = ge_zero;
135 else if (max_value < 0)
136 r.range = lt_zero;
137 else if (max_value == 0)
138 r.range = le_zero;
139 else if (!any_zero)
140 r.range = ne_zero;
141 else
142 r.range = unknown;
143
144 return r;
145 }
146
147 case nir_type_uint: {
148 bool any_zero = false;
149 bool all_zero = true;
150
151 for (unsigned i = 0; i < num_components; ++i) {
152 const uint64_t v = nir_const_value_as_uint(load->value[swizzle[i]],
153 load->def.bit_size);
154
155 any_zero = any_zero || (v == 0);
156 all_zero = all_zero && (v == 0);
157 }
158
159 assert(any_zero >= all_zero);
160
161 if (all_zero)
162 r.range = eq_zero;
163 else if (any_zero)
164 r.range = ge_zero;
165 else
166 r.range = gt_zero;
167
168 return r;
169 }
170
171 default:
172 unreachable("Invalid alu source type");
173 }
174 }
175
176 #ifndef NDEBUG
177 #define ASSERT_TABLE_IS_COMMUTATIVE(t) \
178 do { \
179 for (unsigned r = 0; r < ARRAY_SIZE(t); r++) { \
180 for (unsigned c = 0; c < ARRAY_SIZE(t[0]); c++) \
181 assert(t[r][c] == t[c][r]); \
182 } \
183 } while (false)
184
185 #define ASSERT_TABLE_IS_DIAGONAL(t) \
186 do { \
187 for (unsigned r = 0; r < ARRAY_SIZE(t); r++) \
188 assert(t[r][r] == r); \
189 } while (false)
190 #else
191 #define ASSERT_TABLE_IS_COMMUTATIVE(t)
192 #define ASSERT_TABLE_IS_DIAGONAL(t)
193 #endif
194
195 /**
196 * Short-hand name for use in the tables in analyze_expression. If this name
197 * becomes a problem on some compiler, we can change it to _.
198 */
199 #define _______ unknown
200
201 /**
202 * Analyze an expression to determine the range of its result
203 *
204 * The end result of this analysis is a token that communicates something
205 * about the range of values. There's an implicit grammar that produces
206 * tokens from sequences of literal values, other tokens, and operations.
207 * This function implements this grammar as a recursive-descent parser. Some
208 * (but not all) of the grammar is listed in-line in the function.
209 */
210 static struct ssa_result_range
211 analyze_expression(const nir_alu_instr *instr, unsigned src,
212 struct hash_table *ht)
213 {
214 if (!instr->src[src].src.is_ssa)
215 return (struct ssa_result_range){unknown, false};
216
217 if (nir_src_is_const(instr->src[src].src))
218 return analyze_constant(instr, src);
219
220 if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_alu)
221 return (struct ssa_result_range){unknown, false};
222
223 const struct nir_alu_instr *const alu =
224 nir_instr_as_alu(instr->src[src].src.ssa->parent_instr);
225
226 struct hash_entry *he = _mesa_hash_table_search(ht, alu);
227 if (he != NULL)
228 return unpack_data(he->data);
229
230 struct ssa_result_range r = {unknown, false};
231
232 /* ge_zero: ge_zero + ge_zero
233 *
234 * gt_zero: gt_zero + eq_zero
235 * | gt_zero + ge_zero
236 * | eq_zero + gt_zero # Addition is commutative
237 * | ge_zero + gt_zero # Addition is commutative
238 * | gt_zero + gt_zero
239 * ;
240 *
241 * le_zero: le_zero + le_zero
242 *
243 * lt_zero: lt_zero + eq_zero
244 * | lt_zero + le_zero
245 * | eq_zero + lt_zero # Addition is commutative
246 * | le_zero + lt_zero # Addition is commutative
247 * | lt_zero + lt_zero
248 * ;
249 *
250 * ne_zero: eq_zero + ne_zero
251 * | ne_zero + eq_zero # Addition is commutative
252 * ;
253 *
254 * eq_zero: eq_zero + eq_zero
255 * ;
256 *
257 * All other cases are 'unknown'. The seeming odd entry is (ne_zero,
258 * ne_zero), but that could be (-5, +5) which is not ne_zero.
259 */
260 static const enum ssa_ranges fadd_table[last_range + 1][last_range + 1] = {
261 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
262 /* unknown */ { _______, _______, _______, _______, _______, _______, _______ },
263 /* lt_zero */ { _______, lt_zero, lt_zero, _______, _______, _______, lt_zero },
264 /* le_zero */ { _______, lt_zero, le_zero, _______, _______, _______, le_zero },
265 /* gt_zero */ { _______, _______, _______, gt_zero, gt_zero, _______, gt_zero },
266 /* ge_zero */ { _______, _______, _______, gt_zero, ge_zero, _______, ge_zero },
267 /* ne_zero */ { _______, _______, _______, _______, _______, _______, ne_zero },
268 /* eq_zero */ { _______, lt_zero, le_zero, gt_zero, ge_zero, ne_zero, eq_zero },
269 };
270
271 ASSERT_TABLE_IS_COMMUTATIVE(fadd_table);
272
273 /* Due to flush-to-zero semanatics of floating-point numbers with very
274 * small mangnitudes, we can never really be sure a result will be
275 * non-zero.
276 *
277 * ge_zero: ge_zero * ge_zero
278 * | ge_zero * gt_zero
279 * | ge_zero * eq_zero
280 * | le_zero * lt_zero
281 * | lt_zero * le_zero # Multiplication is commutative
282 * | le_zero * le_zero
283 * | gt_zero * ge_zero # Multiplication is commutative
284 * | eq_zero * ge_zero # Multiplication is commutative
285 * | a * a # Left source == right source
286 * | gt_zero * gt_zero
287 * | lt_zero * lt_zero
288 * ;
289 *
290 * le_zero: ge_zero * le_zero
291 * | ge_zero * lt_zero
292 * | lt_zero * ge_zero # Multiplication is commutative
293 * | le_zero * ge_zero # Multiplication is commutative
294 * | le_zero * gt_zero
295 * | lt_zero * gt_zero
296 * | gt_zero * lt_zero # Multiplication is commutative
297 * ;
298 *
299 * eq_zero: eq_zero * <any>
300 * <any> * eq_zero # Multiplication is commutative
301 *
302 * All other cases are 'unknown'.
303 */
304 static const enum ssa_ranges fmul_table[last_range + 1][last_range + 1] = {
305 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
306 /* unknown */ { _______, _______, _______, _______, _______, _______, eq_zero },
307 /* lt_zero */ { _______, ge_zero, ge_zero, le_zero, le_zero, _______, eq_zero },
308 /* le_zero */ { _______, ge_zero, ge_zero, le_zero, le_zero, _______, eq_zero },
309 /* gt_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
310 /* ge_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
311 /* ne_zero */ { _______, _______, _______, _______, _______, _______, eq_zero },
312 /* eq_zero */ { eq_zero, eq_zero, eq_zero, eq_zero, eq_zero, eq_zero, eq_zero }
313 };
314
315 ASSERT_TABLE_IS_COMMUTATIVE(fmul_table);
316
317 static const enum ssa_ranges fneg_table[last_range + 1] = {
318 /* unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
319 _______, gt_zero, ge_zero, lt_zero, le_zero, ne_zero, eq_zero
320 };
321
322
323 switch (alu->op) {
324 case nir_op_b2f32:
325 case nir_op_b2i32:
326 r = (struct ssa_result_range){ge_zero, alu->op == nir_op_b2f32};
327 break;
328
329 case nir_op_bcsel: {
330 const struct ssa_result_range left = analyze_expression(alu, 1, ht);
331 const struct ssa_result_range right = analyze_expression(alu, 2, ht);
332
333 /* If either source is a constant load that is not zero, punt. The type
334 * will always be uint regardless of the actual type. We can't even
335 * decide if the value is non-zero because -0.0 is 0x80000000, and that
336 * will (possibly incorrectly) be considered non-zero.
337 */
338 /* FINISHME: We could do better, but it would require having the expected
339 * FINISHME: type passed in.
340 */
341 if ((nir_src_is_const(alu->src[1].src) && left.range != eq_zero) ||
342 (nir_src_is_const(alu->src[2].src) && right.range != eq_zero)) {
343 return (struct ssa_result_range){unknown, false};
344 }
345
346 r.is_integral = left.is_integral && right.is_integral;
347
348 /* le_zero: bcsel(<any>, le_zero, lt_zero)
349 * | bcsel(<any>, eq_zero, lt_zero)
350 * | bcsel(<any>, le_zero, eq_zero)
351 * | bcsel(<any>, lt_zero, le_zero)
352 * | bcsel(<any>, lt_zero, eq_zero)
353 * | bcsel(<any>, eq_zero, le_zero)
354 * | bcsel(<any>, le_zero, le_zero)
355 * ;
356 *
357 * lt_zero: bcsel(<any>, lt_zero, lt_zero)
358 * ;
359 *
360 * ge_zero: bcsel(<any>, ge_zero, ge_zero)
361 * | bcsel(<any>, ge_zero, gt_zero)
362 * | bcsel(<any>, ge_zero, eq_zero)
363 * | bcsel(<any>, gt_zero, ge_zero)
364 * | bcsel(<any>, eq_zero, ge_zero)
365 * ;
366 *
367 * gt_zero: bcsel(<any>, gt_zero, gt_zero)
368 * ;
369 *
370 * ne_zero: bcsel(<any>, ne_zero, gt_zero)
371 * | bcsel(<any>, ne_zero, lt_zero)
372 * | bcsel(<any>, gt_zero, lt_zero)
373 * | bcsel(<any>, gt_zero, ne_zero)
374 * | bcsel(<any>, lt_zero, ne_zero)
375 * | bcsel(<any>, lt_zero, gt_zero)
376 * | bcsel(<any>, ne_zero, ne_zero)
377 * ;
378 *
379 * eq_zero: bcsel(<any>, eq_zero, eq_zero)
380 * ;
381 *
382 * All other cases are 'unknown'.
383 *
384 * The ranges could be tightened if the range of the first source is
385 * known. However, opt_algebraic will (eventually) elminiate the bcsel
386 * if the condition is known.
387 */
388 static const enum ssa_ranges table[last_range + 1][last_range + 1] = {
389 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
390 /* unknown */ { _______, _______, _______, _______, _______, _______, _______ },
391 /* lt_zero */ { _______, lt_zero, le_zero, ne_zero, _______, ne_zero, le_zero },
392 /* le_zero */ { _______, le_zero, le_zero, _______, _______, _______, le_zero },
393 /* gt_zero */ { _______, ne_zero, _______, gt_zero, ge_zero, ne_zero, ge_zero },
394 /* ge_zero */ { _______, _______, _______, ge_zero, ge_zero, _______, ge_zero },
395 /* ne_zero */ { _______, ne_zero, _______, ne_zero, _______, ne_zero, _______ },
396 /* eq_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
397 };
398
399 ASSERT_TABLE_IS_COMMUTATIVE(table);
400 ASSERT_TABLE_IS_DIAGONAL(table);
401
402 r.range = table[left.range][right.range];
403 break;
404 }
405
406 case nir_op_i2f32:
407 case nir_op_u2f32:
408 r = analyze_expression(alu, 0, ht);
409
410 r.is_integral = true;
411
412 if (r.range == unknown && alu->op == nir_op_u2f32)
413 r.range = ge_zero;
414
415 break;
416
417 case nir_op_fabs:
418 r = analyze_expression(alu, 0, ht);
419
420 switch (r.range) {
421 case unknown:
422 case le_zero:
423 case ge_zero:
424 r.range = ge_zero;
425 break;
426
427 case lt_zero:
428 case gt_zero:
429 case ne_zero:
430 r.range = gt_zero;
431 break;
432
433 case eq_zero:
434 break;
435 }
436
437 break;
438
439 case nir_op_fadd: {
440 const struct ssa_result_range left = analyze_expression(alu, 0, ht);
441 const struct ssa_result_range right = analyze_expression(alu, 1, ht);
442
443 r.is_integral = left.is_integral && right.is_integral;
444 r.range = fadd_table[left.range][right.range];
445 break;
446 }
447
448 case nir_op_fexp2: {
449 /* If the parameter might be less than zero, the mathematically result
450 * will be on (0, 1). For sufficiently large magnitude negative
451 * parameters, the result will flush to zero.
452 */
453 static const enum ssa_ranges table[last_range + 1] = {
454 /* unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
455 ge_zero, ge_zero, ge_zero, gt_zero, gt_zero, ge_zero, gt_zero
456 };
457
458 r = analyze_expression(alu, 0, ht);
459
460 r.range = table[r.range];
461 break;
462 }
463
464 case nir_op_fmax: {
465 const struct ssa_result_range left = analyze_expression(alu, 0, ht);
466 const struct ssa_result_range right = analyze_expression(alu, 1, ht);
467
468 r.is_integral = left.is_integral && right.is_integral;
469
470 /* gt_zero: fmax(gt_zero, *)
471 * | fmax(*, gt_zero) # Treat fmax as commutative
472 * ;
473 *
474 * ge_zero: fmax(ge_zero, ne_zero)
475 * | fmax(ge_zero, lt_zero)
476 * | fmax(ge_zero, le_zero)
477 * | fmax(ge_zero, eq_zero)
478 * | fmax(ne_zero, ge_zero) # Treat fmax as commutative
479 * | fmax(lt_zero, ge_zero) # Treat fmax as commutative
480 * | fmax(le_zero, ge_zero) # Treat fmax as commutative
481 * | fmax(eq_zero, ge_zero) # Treat fmax as commutative
482 * | fmax(ge_zero, ge_zero)
483 * ;
484 *
485 * le_zero: fmax(le_zero, lt_zero)
486 * | fmax(lt_zero, le_zero) # Treat fmax as commutative
487 * | fmax(le_zero, le_zero)
488 * ;
489 *
490 * lt_zero: fmax(lt_zero, lt_zero)
491 * ;
492 *
493 * ne_zero: fmax(ne_zero, lt_zero)
494 * | fmax(lt_zero, ne_zero) # Treat fmax as commutative
495 * | fmax(ne_zero, ne_zero)
496 * ;
497 *
498 * eq_zero: fmax(eq_zero, le_zero)
499 * | fmax(eq_zero, lt_zero)
500 * | fmax(le_zero, eq_zero) # Treat fmax as commutative
501 * | fmax(lt_zero, eq_zero) # Treat fmax as commutative
502 * | fmax(eq_zero, eq_zero)
503 * ;
504 *
505 * All other cases are 'unknown'.
506 */
507 static const enum ssa_ranges table[last_range + 1][last_range + 1] = {
508 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
509 /* unknown */ { _______, _______, _______, gt_zero, ge_zero, _______, _______ },
510 /* lt_zero */ { _______, lt_zero, le_zero, gt_zero, ge_zero, ne_zero, eq_zero },
511 /* le_zero */ { _______, le_zero, le_zero, gt_zero, ge_zero, _______, eq_zero },
512 /* gt_zero */ { gt_zero, gt_zero, gt_zero, gt_zero, gt_zero, gt_zero, gt_zero },
513 /* ge_zero */ { ge_zero, ge_zero, ge_zero, gt_zero, ge_zero, ge_zero, ge_zero },
514 /* ne_zero */ { _______, ne_zero, _______, gt_zero, ge_zero, ne_zero, _______ },
515 /* eq_zero */ { _______, eq_zero, eq_zero, gt_zero, ge_zero, _______, eq_zero }
516 };
517
518 /* Treat fmax as commutative. */
519 ASSERT_TABLE_IS_COMMUTATIVE(table);
520 ASSERT_TABLE_IS_DIAGONAL(table);
521
522 r.range = table[left.range][right.range];
523 break;
524 }
525
526 case nir_op_fmin: {
527 const struct ssa_result_range left = analyze_expression(alu, 0, ht);
528 const struct ssa_result_range right = analyze_expression(alu, 1, ht);
529
530 r.is_integral = left.is_integral && right.is_integral;
531
532 /* lt_zero: fmin(lt_zero, *)
533 * | fmin(*, lt_zero) # Treat fmin as commutative
534 * ;
535 *
536 * le_zero: fmin(le_zero, ne_zero)
537 * | fmin(le_zero, gt_zero)
538 * | fmin(le_zero, ge_zero)
539 * | fmin(le_zero, eq_zero)
540 * | fmin(ne_zero, le_zero) # Treat fmin as commutative
541 * | fmin(gt_zero, le_zero) # Treat fmin as commutative
542 * | fmin(ge_zero, le_zero) # Treat fmin as commutative
543 * | fmin(eq_zero, le_zero) # Treat fmin as commutative
544 * | fmin(le_zero, le_zero)
545 * ;
546 *
547 * ge_zero: fmin(ge_zero, gt_zero)
548 * | fmin(gt_zero, ge_zero) # Treat fmin as commutative
549 * | fmin(ge_zero, ge_zero)
550 * ;
551 *
552 * gt_zero: fmin(gt_zero, gt_zero)
553 * ;
554 *
555 * ne_zero: fmin(ne_zero, gt_zero)
556 * | fmin(gt_zero, ne_zero) # Treat fmin as commutative
557 * | fmin(ne_zero, ne_zero)
558 * ;
559 *
560 * eq_zero: fmin(eq_zero, ge_zero)
561 * | fmin(eq_zero, gt_zero)
562 * | fmin(ge_zero, eq_zero) # Treat fmin as commutative
563 * | fmin(gt_zero, eq_zero) # Treat fmin as commutative
564 * | fmin(eq_zero, eq_zero)
565 * ;
566 *
567 * All other cases are 'unknown'.
568 */
569 static const enum ssa_ranges table[last_range + 1][last_range + 1] = {
570 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
571 /* unknown */ { _______, lt_zero, le_zero, _______, _______, _______, _______ },
572 /* lt_zero */ { lt_zero, lt_zero, lt_zero, lt_zero, lt_zero, lt_zero, lt_zero },
573 /* le_zero */ { le_zero, lt_zero, le_zero, le_zero, le_zero, le_zero, le_zero },
574 /* gt_zero */ { _______, lt_zero, le_zero, gt_zero, ge_zero, ne_zero, eq_zero },
575 /* ge_zero */ { _______, lt_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
576 /* ne_zero */ { _______, lt_zero, le_zero, ne_zero, _______, ne_zero, _______ },
577 /* eq_zero */ { _______, lt_zero, le_zero, eq_zero, eq_zero, _______, eq_zero }
578 };
579
580 /* Treat fmin as commutative. */
581 ASSERT_TABLE_IS_COMMUTATIVE(table);
582 ASSERT_TABLE_IS_DIAGONAL(table);
583
584 r.range = table[left.range][right.range];
585 break;
586 }
587
588 case nir_op_fmul: {
589 const struct ssa_result_range left = analyze_expression(alu, 0, ht);
590 const struct ssa_result_range right = analyze_expression(alu, 1, ht);
591
592 r.is_integral = left.is_integral && right.is_integral;
593
594 /* x * x => ge_zero */
595 if (left.range != eq_zero && nir_alu_srcs_equal(alu, alu, 0, 1)) {
596 /* Even if x > 0, the result of x*x can be zero when x is, for
597 * example, a subnormal number.
598 */
599 r.range = ge_zero;
600 } else if (left.range != eq_zero && nir_alu_srcs_negative_equal(alu, alu, 0, 1)) {
601 /* -x * x => le_zero. */
602 r.range = le_zero;
603 } else
604 r.range = fmul_table[left.range][right.range];
605
606 break;
607 }
608
609 case nir_op_frcp:
610 r = (struct ssa_result_range){analyze_expression(alu, 0, ht).range, false};
611 break;
612
613 case nir_op_mov:
614 r = analyze_expression(alu, 0, ht);
615 break;
616
617 case nir_op_fneg:
618 r = analyze_expression(alu, 0, ht);
619
620 r.range = fneg_table[r.range];
621 break;
622
623 case nir_op_fsat:
624 r = analyze_expression(alu, 0, ht);
625
626 switch (r.range) {
627 case le_zero:
628 case lt_zero:
629 r.range = eq_zero;
630 r.is_integral = true;
631 break;
632
633 case eq_zero:
634 assert(r.is_integral);
635 case gt_zero:
636 case ge_zero:
637 /* The fsat doesn't add any information in these cases. */
638 break;
639
640 case ne_zero:
641 case unknown:
642 /* Since the result must be in [0, 1], the value must be >= 0. */
643 r.range = ge_zero;
644 break;
645 }
646 break;
647
648 case nir_op_fsign:
649 r = (struct ssa_result_range){analyze_expression(alu, 0, ht).range, true};
650 break;
651
652 case nir_op_fsqrt:
653 case nir_op_frsq:
654 r = (struct ssa_result_range){ge_zero, false};
655 break;
656
657 case nir_op_ffloor: {
658 const struct ssa_result_range left = analyze_expression(alu, 0, ht);
659
660 r.is_integral = true;
661
662 if (left.is_integral || left.range == le_zero || left.range == lt_zero)
663 r.range = left.range;
664 else if (left.range == ge_zero || left.range == gt_zero)
665 r.range = ge_zero;
666 else if (left.range == ne_zero)
667 r.range = unknown;
668
669 break;
670 }
671
672 case nir_op_fceil: {
673 const struct ssa_result_range left = analyze_expression(alu, 0, ht);
674
675 r.is_integral = true;
676
677 if (left.is_integral || left.range == ge_zero || left.range == gt_zero)
678 r.range = left.range;
679 else if (left.range == le_zero || left.range == lt_zero)
680 r.range = le_zero;
681 else if (left.range == ne_zero)
682 r.range = unknown;
683
684 break;
685 }
686
687 case nir_op_ftrunc: {
688 const struct ssa_result_range left = analyze_expression(alu, 0, ht);
689
690 r.is_integral = true;
691
692 if (left.is_integral)
693 r.range = left.range;
694 else if (left.range == ge_zero || left.range == gt_zero)
695 r.range = ge_zero;
696 else if (left.range == le_zero || left.range == lt_zero)
697 r.range = le_zero;
698 else if (left.range == ne_zero)
699 r.range = unknown;
700
701 break;
702 }
703
704 case nir_op_flt:
705 case nir_op_fge:
706 case nir_op_feq:
707 case nir_op_fne:
708 case nir_op_ilt:
709 case nir_op_ige:
710 case nir_op_ieq:
711 case nir_op_ine:
712 case nir_op_ult:
713 case nir_op_uge:
714 /* Boolean results are 0 or -1. */
715 r = (struct ssa_result_range){le_zero, false};
716 break;
717
718 case nir_op_ffma: {
719 const struct ssa_result_range first = analyze_expression(alu, 0, ht);
720 const struct ssa_result_range second = analyze_expression(alu, 1, ht);
721 const struct ssa_result_range third = analyze_expression(alu, 2, ht);
722
723 r.is_integral = first.is_integral && second.is_integral &&
724 third.is_integral;
725
726 enum ssa_ranges fmul_range;
727
728 if (first.range != eq_zero && nir_alu_srcs_equal(alu, alu, 0, 1)) {
729 /* See handling of nir_op_fmul for explanation of why ge_zero is the
730 * range.
731 */
732 fmul_range = ge_zero;
733 } else if (first.range != eq_zero && nir_alu_srcs_negative_equal(alu, alu, 0, 1)) {
734 /* -x * x => le_zero */
735 fmul_range = le_zero;
736 } else
737 fmul_range = fmul_table[first.range][second.range];
738
739 r.range = fadd_table[fmul_range][third.range];
740 break;
741 }
742
743 case nir_op_flrp: {
744 const struct ssa_result_range first = analyze_expression(alu, 0, ht);
745 const struct ssa_result_range second = analyze_expression(alu, 1, ht);
746 const struct ssa_result_range third = analyze_expression(alu, 2, ht);
747
748 r.is_integral = first.is_integral && second.is_integral &&
749 third.is_integral;
750
751 /* Decompose the flrp to first + third * (second + -first) */
752 const enum ssa_ranges inner_fadd_range =
753 fadd_table[second.range][fneg_table[first.range]];
754
755 const enum ssa_ranges fmul_range =
756 fmul_table[third.range][inner_fadd_range];
757
758 r.range = fadd_table[first.range][fmul_range];
759 break;
760 }
761
762 default:
763 r = (struct ssa_result_range){unknown, false};
764 break;
765 }
766
767 if (r.range == eq_zero)
768 r.is_integral = true;
769
770 _mesa_hash_table_insert(ht, alu, pack_data(r));
771 return r;
772 }
773
774 #undef _______
775
776 struct ssa_result_range
777 nir_analyze_range(const nir_alu_instr *instr, unsigned src)
778 {
779 struct hash_table *ht = _mesa_pointer_hash_table_create(NULL);
780
781 const struct ssa_result_range r = analyze_expression(instr, src, ht);
782
783 _mesa_hash_table_destroy(ht, NULL);
784
785 return r;
786 }