nir/range-analysis: Range tracking for fpow
[mesa.git] / src / compiler / nir / nir_range_analysis.c
1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23 #include <math.h>
24 #include <float.h>
25 #include "nir.h"
26 #include "nir_range_analysis.h"
27 #include "util/hash_table.h"
28
29 /**
30 * Analyzes a sequence of operations to determine some aspects of the range of
31 * the result.
32 */
33
34 static bool
35 is_not_negative(enum ssa_ranges r)
36 {
37 return r == gt_zero || r == ge_zero || r == eq_zero;
38 }
39
40 static void *
41 pack_data(const struct ssa_result_range r)
42 {
43 return (void *)(uintptr_t)(r.range | r.is_integral << 8);
44 }
45
46 static struct ssa_result_range
47 unpack_data(const void *p)
48 {
49 const uintptr_t v = (uintptr_t) p;
50
51 return (struct ssa_result_range){v & 0xff, (v & 0x0ff00) != 0};
52 }
53
54 static struct ssa_result_range
55 analyze_constant(const struct nir_alu_instr *instr, unsigned src)
56 {
57 uint8_t swizzle[4] = { 0, 1, 2, 3 };
58
59 /* If the source is an explicitly sized source, then we need to reset
60 * both the number of components and the swizzle.
61 */
62 const unsigned num_components = nir_ssa_alu_instr_src_components(instr, src);
63
64 for (unsigned i = 0; i < num_components; ++i)
65 swizzle[i] = instr->src[src].swizzle[i];
66
67 const nir_load_const_instr *const load =
68 nir_instr_as_load_const(instr->src[src].src.ssa->parent_instr);
69
70 struct ssa_result_range r = { unknown, false };
71
72 switch (nir_op_infos[instr->op].input_types[src]) {
73 case nir_type_float: {
74 double min_value = DBL_MAX;
75 double max_value = -DBL_MAX;
76 bool any_zero = false;
77 bool all_zero = true;
78
79 r.is_integral = true;
80
81 for (unsigned i = 0; i < num_components; ++i) {
82 const double v = nir_const_value_as_float(load->value[swizzle[i]],
83 load->def.bit_size);
84
85 if (floor(v) != v)
86 r.is_integral = false;
87
88 any_zero = any_zero || (v == 0.0);
89 all_zero = all_zero && (v == 0.0);
90 min_value = MIN2(min_value, v);
91 max_value = MAX2(max_value, v);
92 }
93
94 assert(any_zero >= all_zero);
95 assert(isnan(max_value) || max_value >= min_value);
96
97 if (all_zero)
98 r.range = eq_zero;
99 else if (min_value > 0.0)
100 r.range = gt_zero;
101 else if (min_value == 0.0)
102 r.range = ge_zero;
103 else if (max_value < 0.0)
104 r.range = lt_zero;
105 else if (max_value == 0.0)
106 r.range = le_zero;
107 else if (!any_zero)
108 r.range = ne_zero;
109 else
110 r.range = unknown;
111
112 return r;
113 }
114
115 case nir_type_int:
116 case nir_type_bool: {
117 int64_t min_value = INT_MAX;
118 int64_t max_value = INT_MIN;
119 bool any_zero = false;
120 bool all_zero = true;
121
122 for (unsigned i = 0; i < num_components; ++i) {
123 const int64_t v = nir_const_value_as_int(load->value[swizzle[i]],
124 load->def.bit_size);
125
126 any_zero = any_zero || (v == 0);
127 all_zero = all_zero && (v == 0);
128 min_value = MIN2(min_value, v);
129 max_value = MAX2(max_value, v);
130 }
131
132 assert(any_zero >= all_zero);
133 assert(max_value >= min_value);
134
135 if (all_zero)
136 r.range = eq_zero;
137 else if (min_value > 0)
138 r.range = gt_zero;
139 else if (min_value == 0)
140 r.range = ge_zero;
141 else if (max_value < 0)
142 r.range = lt_zero;
143 else if (max_value == 0)
144 r.range = le_zero;
145 else if (!any_zero)
146 r.range = ne_zero;
147 else
148 r.range = unknown;
149
150 return r;
151 }
152
153 case nir_type_uint: {
154 bool any_zero = false;
155 bool all_zero = true;
156
157 for (unsigned i = 0; i < num_components; ++i) {
158 const uint64_t v = nir_const_value_as_uint(load->value[swizzle[i]],
159 load->def.bit_size);
160
161 any_zero = any_zero || (v == 0);
162 all_zero = all_zero && (v == 0);
163 }
164
165 assert(any_zero >= all_zero);
166
167 if (all_zero)
168 r.range = eq_zero;
169 else if (any_zero)
170 r.range = ge_zero;
171 else
172 r.range = gt_zero;
173
174 return r;
175 }
176
177 default:
178 unreachable("Invalid alu source type");
179 }
180 }
181
182 #ifndef NDEBUG
183 #define ASSERT_TABLE_IS_COMMUTATIVE(t) \
184 do { \
185 for (unsigned r = 0; r < ARRAY_SIZE(t); r++) { \
186 for (unsigned c = 0; c < ARRAY_SIZE(t[0]); c++) \
187 assert(t[r][c] == t[c][r]); \
188 } \
189 } while (false)
190
191 #define ASSERT_TABLE_IS_DIAGONAL(t) \
192 do { \
193 for (unsigned r = 0; r < ARRAY_SIZE(t); r++) \
194 assert(t[r][r] == r); \
195 } while (false)
196 #else
197 #define ASSERT_TABLE_IS_COMMUTATIVE(t)
198 #define ASSERT_TABLE_IS_DIAGONAL(t)
199 #endif
200
201 /**
202 * Short-hand name for use in the tables in analyze_expression. If this name
203 * becomes a problem on some compiler, we can change it to _.
204 */
205 #define _______ unknown
206
207 /**
208 * Analyze an expression to determine the range of its result
209 *
210 * The end result of this analysis is a token that communicates something
211 * about the range of values. There's an implicit grammar that produces
212 * tokens from sequences of literal values, other tokens, and operations.
213 * This function implements this grammar as a recursive-descent parser. Some
214 * (but not all) of the grammar is listed in-line in the function.
215 */
216 static struct ssa_result_range
217 analyze_expression(const nir_alu_instr *instr, unsigned src,
218 struct hash_table *ht)
219 {
220 if (!instr->src[src].src.is_ssa)
221 return (struct ssa_result_range){unknown, false};
222
223 if (nir_src_is_const(instr->src[src].src))
224 return analyze_constant(instr, src);
225
226 if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_alu)
227 return (struct ssa_result_range){unknown, false};
228
229 const struct nir_alu_instr *const alu =
230 nir_instr_as_alu(instr->src[src].src.ssa->parent_instr);
231
232 struct hash_entry *he = _mesa_hash_table_search(ht, alu);
233 if (he != NULL)
234 return unpack_data(he->data);
235
236 struct ssa_result_range r = {unknown, false};
237
238 /* ge_zero: ge_zero + ge_zero
239 *
240 * gt_zero: gt_zero + eq_zero
241 * | gt_zero + ge_zero
242 * | eq_zero + gt_zero # Addition is commutative
243 * | ge_zero + gt_zero # Addition is commutative
244 * | gt_zero + gt_zero
245 * ;
246 *
247 * le_zero: le_zero + le_zero
248 *
249 * lt_zero: lt_zero + eq_zero
250 * | lt_zero + le_zero
251 * | eq_zero + lt_zero # Addition is commutative
252 * | le_zero + lt_zero # Addition is commutative
253 * | lt_zero + lt_zero
254 * ;
255 *
256 * ne_zero: eq_zero + ne_zero
257 * | ne_zero + eq_zero # Addition is commutative
258 * ;
259 *
260 * eq_zero: eq_zero + eq_zero
261 * ;
262 *
263 * All other cases are 'unknown'. The seeming odd entry is (ne_zero,
264 * ne_zero), but that could be (-5, +5) which is not ne_zero.
265 */
266 static const enum ssa_ranges fadd_table[last_range + 1][last_range + 1] = {
267 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
268 /* unknown */ { _______, _______, _______, _______, _______, _______, _______ },
269 /* lt_zero */ { _______, lt_zero, lt_zero, _______, _______, _______, lt_zero },
270 /* le_zero */ { _______, lt_zero, le_zero, _______, _______, _______, le_zero },
271 /* gt_zero */ { _______, _______, _______, gt_zero, gt_zero, _______, gt_zero },
272 /* ge_zero */ { _______, _______, _______, gt_zero, ge_zero, _______, ge_zero },
273 /* ne_zero */ { _______, _______, _______, _______, _______, _______, ne_zero },
274 /* eq_zero */ { _______, lt_zero, le_zero, gt_zero, ge_zero, ne_zero, eq_zero },
275 };
276
277 ASSERT_TABLE_IS_COMMUTATIVE(fadd_table);
278
279 /* Due to flush-to-zero semanatics of floating-point numbers with very
280 * small mangnitudes, we can never really be sure a result will be
281 * non-zero.
282 *
283 * ge_zero: ge_zero * ge_zero
284 * | ge_zero * gt_zero
285 * | ge_zero * eq_zero
286 * | le_zero * lt_zero
287 * | lt_zero * le_zero # Multiplication is commutative
288 * | le_zero * le_zero
289 * | gt_zero * ge_zero # Multiplication is commutative
290 * | eq_zero * ge_zero # Multiplication is commutative
291 * | a * a # Left source == right source
292 * | gt_zero * gt_zero
293 * | lt_zero * lt_zero
294 * ;
295 *
296 * le_zero: ge_zero * le_zero
297 * | ge_zero * lt_zero
298 * | lt_zero * ge_zero # Multiplication is commutative
299 * | le_zero * ge_zero # Multiplication is commutative
300 * | le_zero * gt_zero
301 * | lt_zero * gt_zero
302 * | gt_zero * lt_zero # Multiplication is commutative
303 * ;
304 *
305 * eq_zero: eq_zero * <any>
306 * <any> * eq_zero # Multiplication is commutative
307 *
308 * All other cases are 'unknown'.
309 */
310 static const enum ssa_ranges fmul_table[last_range + 1][last_range + 1] = {
311 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
312 /* unknown */ { _______, _______, _______, _______, _______, _______, eq_zero },
313 /* lt_zero */ { _______, ge_zero, ge_zero, le_zero, le_zero, _______, eq_zero },
314 /* le_zero */ { _______, ge_zero, ge_zero, le_zero, le_zero, _______, eq_zero },
315 /* gt_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
316 /* ge_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
317 /* ne_zero */ { _______, _______, _______, _______, _______, _______, eq_zero },
318 /* eq_zero */ { eq_zero, eq_zero, eq_zero, eq_zero, eq_zero, eq_zero, eq_zero }
319 };
320
321 ASSERT_TABLE_IS_COMMUTATIVE(fmul_table);
322
323 static const enum ssa_ranges fneg_table[last_range + 1] = {
324 /* unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
325 _______, gt_zero, ge_zero, lt_zero, le_zero, ne_zero, eq_zero
326 };
327
328
329 switch (alu->op) {
330 case nir_op_b2f32:
331 case nir_op_b2i32:
332 r = (struct ssa_result_range){ge_zero, alu->op == nir_op_b2f32};
333 break;
334
335 case nir_op_bcsel: {
336 const struct ssa_result_range left = analyze_expression(alu, 1, ht);
337 const struct ssa_result_range right = analyze_expression(alu, 2, ht);
338
339 /* If either source is a constant load that is not zero, punt. The type
340 * will always be uint regardless of the actual type. We can't even
341 * decide if the value is non-zero because -0.0 is 0x80000000, and that
342 * will (possibly incorrectly) be considered non-zero.
343 */
344 /* FINISHME: We could do better, but it would require having the expected
345 * FINISHME: type passed in.
346 */
347 if ((nir_src_is_const(alu->src[1].src) && left.range != eq_zero) ||
348 (nir_src_is_const(alu->src[2].src) && right.range != eq_zero)) {
349 return (struct ssa_result_range){unknown, false};
350 }
351
352 r.is_integral = left.is_integral && right.is_integral;
353
354 /* le_zero: bcsel(<any>, le_zero, lt_zero)
355 * | bcsel(<any>, eq_zero, lt_zero)
356 * | bcsel(<any>, le_zero, eq_zero)
357 * | bcsel(<any>, lt_zero, le_zero)
358 * | bcsel(<any>, lt_zero, eq_zero)
359 * | bcsel(<any>, eq_zero, le_zero)
360 * | bcsel(<any>, le_zero, le_zero)
361 * ;
362 *
363 * lt_zero: bcsel(<any>, lt_zero, lt_zero)
364 * ;
365 *
366 * ge_zero: bcsel(<any>, ge_zero, ge_zero)
367 * | bcsel(<any>, ge_zero, gt_zero)
368 * | bcsel(<any>, ge_zero, eq_zero)
369 * | bcsel(<any>, gt_zero, ge_zero)
370 * | bcsel(<any>, eq_zero, ge_zero)
371 * ;
372 *
373 * gt_zero: bcsel(<any>, gt_zero, gt_zero)
374 * ;
375 *
376 * ne_zero: bcsel(<any>, ne_zero, gt_zero)
377 * | bcsel(<any>, ne_zero, lt_zero)
378 * | bcsel(<any>, gt_zero, lt_zero)
379 * | bcsel(<any>, gt_zero, ne_zero)
380 * | bcsel(<any>, lt_zero, ne_zero)
381 * | bcsel(<any>, lt_zero, gt_zero)
382 * | bcsel(<any>, ne_zero, ne_zero)
383 * ;
384 *
385 * eq_zero: bcsel(<any>, eq_zero, eq_zero)
386 * ;
387 *
388 * All other cases are 'unknown'.
389 *
390 * The ranges could be tightened if the range of the first source is
391 * known. However, opt_algebraic will (eventually) elminiate the bcsel
392 * if the condition is known.
393 */
394 static const enum ssa_ranges table[last_range + 1][last_range + 1] = {
395 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
396 /* unknown */ { _______, _______, _______, _______, _______, _______, _______ },
397 /* lt_zero */ { _______, lt_zero, le_zero, ne_zero, _______, ne_zero, le_zero },
398 /* le_zero */ { _______, le_zero, le_zero, _______, _______, _______, le_zero },
399 /* gt_zero */ { _______, ne_zero, _______, gt_zero, ge_zero, ne_zero, ge_zero },
400 /* ge_zero */ { _______, _______, _______, ge_zero, ge_zero, _______, ge_zero },
401 /* ne_zero */ { _______, ne_zero, _______, ne_zero, _______, ne_zero, _______ },
402 /* eq_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
403 };
404
405 ASSERT_TABLE_IS_COMMUTATIVE(table);
406 ASSERT_TABLE_IS_DIAGONAL(table);
407
408 r.range = table[left.range][right.range];
409 break;
410 }
411
412 case nir_op_i2f32:
413 case nir_op_u2f32:
414 r = analyze_expression(alu, 0, ht);
415
416 r.is_integral = true;
417
418 if (r.range == unknown && alu->op == nir_op_u2f32)
419 r.range = ge_zero;
420
421 break;
422
423 case nir_op_fabs:
424 r = analyze_expression(alu, 0, ht);
425
426 switch (r.range) {
427 case unknown:
428 case le_zero:
429 case ge_zero:
430 r.range = ge_zero;
431 break;
432
433 case lt_zero:
434 case gt_zero:
435 case ne_zero:
436 r.range = gt_zero;
437 break;
438
439 case eq_zero:
440 break;
441 }
442
443 break;
444
445 case nir_op_fadd: {
446 const struct ssa_result_range left = analyze_expression(alu, 0, ht);
447 const struct ssa_result_range right = analyze_expression(alu, 1, ht);
448
449 r.is_integral = left.is_integral && right.is_integral;
450 r.range = fadd_table[left.range][right.range];
451 break;
452 }
453
454 case nir_op_fexp2: {
455 /* If the parameter might be less than zero, the mathematically result
456 * will be on (0, 1). For sufficiently large magnitude negative
457 * parameters, the result will flush to zero.
458 */
459 static const enum ssa_ranges table[last_range + 1] = {
460 /* unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
461 ge_zero, ge_zero, ge_zero, gt_zero, gt_zero, ge_zero, gt_zero
462 };
463
464 r = analyze_expression(alu, 0, ht);
465
466 r.range = table[r.range];
467 break;
468 }
469
470 case nir_op_fmax: {
471 const struct ssa_result_range left = analyze_expression(alu, 0, ht);
472 const struct ssa_result_range right = analyze_expression(alu, 1, ht);
473
474 r.is_integral = left.is_integral && right.is_integral;
475
476 /* gt_zero: fmax(gt_zero, *)
477 * | fmax(*, gt_zero) # Treat fmax as commutative
478 * ;
479 *
480 * ge_zero: fmax(ge_zero, ne_zero)
481 * | fmax(ge_zero, lt_zero)
482 * | fmax(ge_zero, le_zero)
483 * | fmax(ge_zero, eq_zero)
484 * | fmax(ne_zero, ge_zero) # Treat fmax as commutative
485 * | fmax(lt_zero, ge_zero) # Treat fmax as commutative
486 * | fmax(le_zero, ge_zero) # Treat fmax as commutative
487 * | fmax(eq_zero, ge_zero) # Treat fmax as commutative
488 * | fmax(ge_zero, ge_zero)
489 * ;
490 *
491 * le_zero: fmax(le_zero, lt_zero)
492 * | fmax(lt_zero, le_zero) # Treat fmax as commutative
493 * | fmax(le_zero, le_zero)
494 * ;
495 *
496 * lt_zero: fmax(lt_zero, lt_zero)
497 * ;
498 *
499 * ne_zero: fmax(ne_zero, lt_zero)
500 * | fmax(lt_zero, ne_zero) # Treat fmax as commutative
501 * | fmax(ne_zero, ne_zero)
502 * ;
503 *
504 * eq_zero: fmax(eq_zero, le_zero)
505 * | fmax(eq_zero, lt_zero)
506 * | fmax(le_zero, eq_zero) # Treat fmax as commutative
507 * | fmax(lt_zero, eq_zero) # Treat fmax as commutative
508 * | fmax(eq_zero, eq_zero)
509 * ;
510 *
511 * All other cases are 'unknown'.
512 */
513 static const enum ssa_ranges table[last_range + 1][last_range + 1] = {
514 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
515 /* unknown */ { _______, _______, _______, gt_zero, ge_zero, _______, _______ },
516 /* lt_zero */ { _______, lt_zero, le_zero, gt_zero, ge_zero, ne_zero, eq_zero },
517 /* le_zero */ { _______, le_zero, le_zero, gt_zero, ge_zero, _______, eq_zero },
518 /* gt_zero */ { gt_zero, gt_zero, gt_zero, gt_zero, gt_zero, gt_zero, gt_zero },
519 /* ge_zero */ { ge_zero, ge_zero, ge_zero, gt_zero, ge_zero, ge_zero, ge_zero },
520 /* ne_zero */ { _______, ne_zero, _______, gt_zero, ge_zero, ne_zero, _______ },
521 /* eq_zero */ { _______, eq_zero, eq_zero, gt_zero, ge_zero, _______, eq_zero }
522 };
523
524 /* Treat fmax as commutative. */
525 ASSERT_TABLE_IS_COMMUTATIVE(table);
526 ASSERT_TABLE_IS_DIAGONAL(table);
527
528 r.range = table[left.range][right.range];
529 break;
530 }
531
532 case nir_op_fmin: {
533 const struct ssa_result_range left = analyze_expression(alu, 0, ht);
534 const struct ssa_result_range right = analyze_expression(alu, 1, ht);
535
536 r.is_integral = left.is_integral && right.is_integral;
537
538 /* lt_zero: fmin(lt_zero, *)
539 * | fmin(*, lt_zero) # Treat fmin as commutative
540 * ;
541 *
542 * le_zero: fmin(le_zero, ne_zero)
543 * | fmin(le_zero, gt_zero)
544 * | fmin(le_zero, ge_zero)
545 * | fmin(le_zero, eq_zero)
546 * | fmin(ne_zero, le_zero) # Treat fmin as commutative
547 * | fmin(gt_zero, le_zero) # Treat fmin as commutative
548 * | fmin(ge_zero, le_zero) # Treat fmin as commutative
549 * | fmin(eq_zero, le_zero) # Treat fmin as commutative
550 * | fmin(le_zero, le_zero)
551 * ;
552 *
553 * ge_zero: fmin(ge_zero, gt_zero)
554 * | fmin(gt_zero, ge_zero) # Treat fmin as commutative
555 * | fmin(ge_zero, ge_zero)
556 * ;
557 *
558 * gt_zero: fmin(gt_zero, gt_zero)
559 * ;
560 *
561 * ne_zero: fmin(ne_zero, gt_zero)
562 * | fmin(gt_zero, ne_zero) # Treat fmin as commutative
563 * | fmin(ne_zero, ne_zero)
564 * ;
565 *
566 * eq_zero: fmin(eq_zero, ge_zero)
567 * | fmin(eq_zero, gt_zero)
568 * | fmin(ge_zero, eq_zero) # Treat fmin as commutative
569 * | fmin(gt_zero, eq_zero) # Treat fmin as commutative
570 * | fmin(eq_zero, eq_zero)
571 * ;
572 *
573 * All other cases are 'unknown'.
574 */
575 static const enum ssa_ranges table[last_range + 1][last_range + 1] = {
576 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
577 /* unknown */ { _______, lt_zero, le_zero, _______, _______, _______, _______ },
578 /* lt_zero */ { lt_zero, lt_zero, lt_zero, lt_zero, lt_zero, lt_zero, lt_zero },
579 /* le_zero */ { le_zero, lt_zero, le_zero, le_zero, le_zero, le_zero, le_zero },
580 /* gt_zero */ { _______, lt_zero, le_zero, gt_zero, ge_zero, ne_zero, eq_zero },
581 /* ge_zero */ { _______, lt_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
582 /* ne_zero */ { _______, lt_zero, le_zero, ne_zero, _______, ne_zero, _______ },
583 /* eq_zero */ { _______, lt_zero, le_zero, eq_zero, eq_zero, _______, eq_zero }
584 };
585
586 /* Treat fmin as commutative. */
587 ASSERT_TABLE_IS_COMMUTATIVE(table);
588 ASSERT_TABLE_IS_DIAGONAL(table);
589
590 r.range = table[left.range][right.range];
591 break;
592 }
593
594 case nir_op_fmul: {
595 const struct ssa_result_range left = analyze_expression(alu, 0, ht);
596 const struct ssa_result_range right = analyze_expression(alu, 1, ht);
597
598 r.is_integral = left.is_integral && right.is_integral;
599
600 /* x * x => ge_zero */
601 if (left.range != eq_zero && nir_alu_srcs_equal(alu, alu, 0, 1)) {
602 /* Even if x > 0, the result of x*x can be zero when x is, for
603 * example, a subnormal number.
604 */
605 r.range = ge_zero;
606 } else if (left.range != eq_zero && nir_alu_srcs_negative_equal(alu, alu, 0, 1)) {
607 /* -x * x => le_zero. */
608 r.range = le_zero;
609 } else
610 r.range = fmul_table[left.range][right.range];
611
612 break;
613 }
614
615 case nir_op_frcp:
616 r = (struct ssa_result_range){analyze_expression(alu, 0, ht).range, false};
617 break;
618
619 case nir_op_mov: {
620 const struct ssa_result_range left = analyze_expression(alu, 0, ht);
621
622 /* See commentary in nir_op_bcsel for the reasons this is necessary. */
623 if (nir_src_is_const(alu->src[0].src) && left.range != eq_zero)
624 return (struct ssa_result_range){unknown, false};
625
626 r = left;
627 break;
628 }
629
630 case nir_op_fneg:
631 r = analyze_expression(alu, 0, ht);
632
633 r.range = fneg_table[r.range];
634 break;
635
636 case nir_op_fsat:
637 r = analyze_expression(alu, 0, ht);
638
639 switch (r.range) {
640 case le_zero:
641 case lt_zero:
642 r.range = eq_zero;
643 r.is_integral = true;
644 break;
645
646 case eq_zero:
647 assert(r.is_integral);
648 case gt_zero:
649 case ge_zero:
650 /* The fsat doesn't add any information in these cases. */
651 break;
652
653 case ne_zero:
654 case unknown:
655 /* Since the result must be in [0, 1], the value must be >= 0. */
656 r.range = ge_zero;
657 break;
658 }
659 break;
660
661 case nir_op_fsign:
662 r = (struct ssa_result_range){analyze_expression(alu, 0, ht).range, true};
663 break;
664
665 case nir_op_fsqrt:
666 case nir_op_frsq:
667 r = (struct ssa_result_range){ge_zero, false};
668 break;
669
670 case nir_op_ffloor: {
671 const struct ssa_result_range left = analyze_expression(alu, 0, ht);
672
673 r.is_integral = true;
674
675 if (left.is_integral || left.range == le_zero || left.range == lt_zero)
676 r.range = left.range;
677 else if (left.range == ge_zero || left.range == gt_zero)
678 r.range = ge_zero;
679 else if (left.range == ne_zero)
680 r.range = unknown;
681
682 break;
683 }
684
685 case nir_op_fceil: {
686 const struct ssa_result_range left = analyze_expression(alu, 0, ht);
687
688 r.is_integral = true;
689
690 if (left.is_integral || left.range == ge_zero || left.range == gt_zero)
691 r.range = left.range;
692 else if (left.range == le_zero || left.range == lt_zero)
693 r.range = le_zero;
694 else if (left.range == ne_zero)
695 r.range = unknown;
696
697 break;
698 }
699
700 case nir_op_ftrunc: {
701 const struct ssa_result_range left = analyze_expression(alu, 0, ht);
702
703 r.is_integral = true;
704
705 if (left.is_integral)
706 r.range = left.range;
707 else if (left.range == ge_zero || left.range == gt_zero)
708 r.range = ge_zero;
709 else if (left.range == le_zero || left.range == lt_zero)
710 r.range = le_zero;
711 else if (left.range == ne_zero)
712 r.range = unknown;
713
714 break;
715 }
716
717 case nir_op_flt:
718 case nir_op_fge:
719 case nir_op_feq:
720 case nir_op_fne:
721 case nir_op_ilt:
722 case nir_op_ige:
723 case nir_op_ieq:
724 case nir_op_ine:
725 case nir_op_ult:
726 case nir_op_uge:
727 /* Boolean results are 0 or -1. */
728 r = (struct ssa_result_range){le_zero, false};
729 break;
730
731 case nir_op_fpow: {
732 /* Due to flush-to-zero semanatics of floating-point numbers with very
733 * small mangnitudes, we can never really be sure a result will be
734 * non-zero.
735 *
736 * NIR uses pow() and powf() to constant evaluate nir_op_fpow. The man
737 * page for that function says:
738 *
739 * If y is 0, the result is 1.0 (even if x is a NaN).
740 *
741 * gt_zero: pow(*, eq_zero)
742 * | pow(eq_zero, lt_zero) # 0^-y = +inf
743 * | pow(eq_zero, le_zero) # 0^-y = +inf or 0^0 = 1.0
744 * ;
745 *
746 * eq_zero: pow(eq_zero, gt_zero)
747 * ;
748 *
749 * ge_zero: pow(gt_zero, gt_zero)
750 * | pow(gt_zero, ge_zero)
751 * | pow(gt_zero, lt_zero)
752 * | pow(gt_zero, le_zero)
753 * | pow(gt_zero, ne_zero)
754 * | pow(gt_zero, unknown)
755 * | pow(ge_zero, gt_zero)
756 * | pow(ge_zero, ge_zero)
757 * | pow(ge_zero, lt_zero)
758 * | pow(ge_zero, le_zero)
759 * | pow(ge_zero, ne_zero)
760 * | pow(ge_zero, unknown)
761 * | pow(eq_zero, ge_zero) # 0^0 = 1.0 or 0^+y = 0.0
762 * | pow(eq_zero, ne_zero) # 0^-y = +inf or 0^+y = 0.0
763 * | pow(eq_zero, unknown) # union of all other y cases
764 * ;
765 *
766 * All other cases are unknown.
767 *
768 * We could do better if the right operand is a constant, integral
769 * value.
770 */
771 static const enum ssa_ranges table[last_range + 1][last_range + 1] = {
772 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
773 /* unknown */ { _______, _______, _______, _______, _______, _______, gt_zero },
774 /* lt_zero */ { _______, _______, _______, _______, _______, _______, gt_zero },
775 /* le_zero */ { _______, _______, _______, _______, _______, _______, gt_zero },
776 /* gt_zero */ { ge_zero, ge_zero, ge_zero, ge_zero, ge_zero, ge_zero, gt_zero },
777 /* ge_zero */ { ge_zero, ge_zero, ge_zero, ge_zero, ge_zero, ge_zero, gt_zero },
778 /* ne_zero */ { _______, _______, _______, _______, _______, _______, gt_zero },
779 /* eq_zero */ { ge_zero, gt_zero, gt_zero, eq_zero, ge_zero, ge_zero, gt_zero },
780 };
781
782 const struct ssa_result_range left = analyze_expression(alu, 0, ht);
783 const struct ssa_result_range right = analyze_expression(alu, 1, ht);
784
785 r.is_integral = left.is_integral && right.is_integral &&
786 is_not_negative(right.range);
787 r.range = table[left.range][right.range];
788 break;
789 }
790
791 case nir_op_ffma: {
792 const struct ssa_result_range first = analyze_expression(alu, 0, ht);
793 const struct ssa_result_range second = analyze_expression(alu, 1, ht);
794 const struct ssa_result_range third = analyze_expression(alu, 2, ht);
795
796 r.is_integral = first.is_integral && second.is_integral &&
797 third.is_integral;
798
799 enum ssa_ranges fmul_range;
800
801 if (first.range != eq_zero && nir_alu_srcs_equal(alu, alu, 0, 1)) {
802 /* See handling of nir_op_fmul for explanation of why ge_zero is the
803 * range.
804 */
805 fmul_range = ge_zero;
806 } else if (first.range != eq_zero && nir_alu_srcs_negative_equal(alu, alu, 0, 1)) {
807 /* -x * x => le_zero */
808 fmul_range = le_zero;
809 } else
810 fmul_range = fmul_table[first.range][second.range];
811
812 r.range = fadd_table[fmul_range][third.range];
813 break;
814 }
815
816 case nir_op_flrp: {
817 const struct ssa_result_range first = analyze_expression(alu, 0, ht);
818 const struct ssa_result_range second = analyze_expression(alu, 1, ht);
819 const struct ssa_result_range third = analyze_expression(alu, 2, ht);
820
821 r.is_integral = first.is_integral && second.is_integral &&
822 third.is_integral;
823
824 /* Decompose the flrp to first + third * (second + -first) */
825 const enum ssa_ranges inner_fadd_range =
826 fadd_table[second.range][fneg_table[first.range]];
827
828 const enum ssa_ranges fmul_range =
829 fmul_table[third.range][inner_fadd_range];
830
831 r.range = fadd_table[first.range][fmul_range];
832 break;
833 }
834
835 default:
836 r = (struct ssa_result_range){unknown, false};
837 break;
838 }
839
840 if (r.range == eq_zero)
841 r.is_integral = true;
842
843 _mesa_hash_table_insert(ht, alu, pack_data(r));
844 return r;
845 }
846
847 #undef _______
848
849 struct ssa_result_range
850 nir_analyze_range(const nir_alu_instr *instr, unsigned src)
851 {
852 struct hash_table *ht = _mesa_pointer_hash_table_create(NULL);
853
854 const struct ssa_result_range r = analyze_expression(instr, src, ht);
855
856 _mesa_hash_table_destroy(ht, NULL);
857
858 return r;
859 }