nir: Keep the range analysis HT around intra-pass until we make a change.
[mesa.git] / src / compiler / nir / nir_range_analysis.c
1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23 #include <math.h>
24 #include <float.h>
25 #include "nir.h"
26 #include "nir_range_analysis.h"
27 #include "util/hash_table.h"
28
29 /**
30 * Analyzes a sequence of operations to determine some aspects of the range of
31 * the result.
32 */
33
34 static bool
35 is_not_negative(enum ssa_ranges r)
36 {
37 return r == gt_zero || r == ge_zero || r == eq_zero;
38 }
39
40 static void *
41 pack_data(const struct ssa_result_range r)
42 {
43 return (void *)(uintptr_t)(r.range | r.is_integral << 8);
44 }
45
46 static struct ssa_result_range
47 unpack_data(const void *p)
48 {
49 const uintptr_t v = (uintptr_t) p;
50
51 return (struct ssa_result_range){v & 0xff, (v & 0x0ff00) != 0};
52 }
53
54 static void *
55 pack_key(const struct nir_alu_instr *instr, nir_alu_type type)
56 {
57 uintptr_t type_encoding;
58 uintptr_t ptr = (uintptr_t) instr;
59
60 /* The low 2 bits have to be zero or this whole scheme falls apart. */
61 assert((ptr & 0x3) == 0);
62
63 /* NIR is typeless in the sense that sequences of bits have whatever
64 * meaning is attached to them by the instruction that consumes them.
65 * However, the number of bits must match between producer and consumer.
66 * As a result, the number of bits does not need to be encoded here.
67 */
68 switch (nir_alu_type_get_base_type(type)) {
69 case nir_type_int: type_encoding = 0; break;
70 case nir_type_uint: type_encoding = 1; break;
71 case nir_type_bool: type_encoding = 2; break;
72 case nir_type_float: type_encoding = 3; break;
73 default: unreachable("Invalid base type.");
74 }
75
76 return (void *)(ptr | type_encoding);
77 }
78
79 static nir_alu_type
80 nir_alu_src_type(const nir_alu_instr *instr, unsigned src)
81 {
82 return nir_alu_type_get_base_type(nir_op_infos[instr->op].input_types[src]) |
83 nir_src_bit_size(instr->src[src].src);
84 }
85
86 static struct ssa_result_range
87 analyze_constant(const struct nir_alu_instr *instr, unsigned src,
88 nir_alu_type use_type)
89 {
90 uint8_t swizzle[4] = { 0, 1, 2, 3 };
91
92 /* If the source is an explicitly sized source, then we need to reset
93 * both the number of components and the swizzle.
94 */
95 const unsigned num_components = nir_ssa_alu_instr_src_components(instr, src);
96
97 for (unsigned i = 0; i < num_components; ++i)
98 swizzle[i] = instr->src[src].swizzle[i];
99
100 const nir_load_const_instr *const load =
101 nir_instr_as_load_const(instr->src[src].src.ssa->parent_instr);
102
103 struct ssa_result_range r = { unknown, false };
104
105 switch (nir_alu_type_get_base_type(use_type)) {
106 case nir_type_float: {
107 double min_value = DBL_MAX;
108 double max_value = -DBL_MAX;
109 bool any_zero = false;
110 bool all_zero = true;
111
112 r.is_integral = true;
113
114 for (unsigned i = 0; i < num_components; ++i) {
115 const double v = nir_const_value_as_float(load->value[swizzle[i]],
116 load->def.bit_size);
117
118 if (floor(v) != v)
119 r.is_integral = false;
120
121 any_zero = any_zero || (v == 0.0);
122 all_zero = all_zero && (v == 0.0);
123 min_value = MIN2(min_value, v);
124 max_value = MAX2(max_value, v);
125 }
126
127 assert(any_zero >= all_zero);
128 assert(isnan(max_value) || max_value >= min_value);
129
130 if (all_zero)
131 r.range = eq_zero;
132 else if (min_value > 0.0)
133 r.range = gt_zero;
134 else if (min_value == 0.0)
135 r.range = ge_zero;
136 else if (max_value < 0.0)
137 r.range = lt_zero;
138 else if (max_value == 0.0)
139 r.range = le_zero;
140 else if (!any_zero)
141 r.range = ne_zero;
142 else
143 r.range = unknown;
144
145 return r;
146 }
147
148 case nir_type_int:
149 case nir_type_bool: {
150 int64_t min_value = INT_MAX;
151 int64_t max_value = INT_MIN;
152 bool any_zero = false;
153 bool all_zero = true;
154
155 for (unsigned i = 0; i < num_components; ++i) {
156 const int64_t v = nir_const_value_as_int(load->value[swizzle[i]],
157 load->def.bit_size);
158
159 any_zero = any_zero || (v == 0);
160 all_zero = all_zero && (v == 0);
161 min_value = MIN2(min_value, v);
162 max_value = MAX2(max_value, v);
163 }
164
165 assert(any_zero >= all_zero);
166 assert(max_value >= min_value);
167
168 if (all_zero)
169 r.range = eq_zero;
170 else if (min_value > 0)
171 r.range = gt_zero;
172 else if (min_value == 0)
173 r.range = ge_zero;
174 else if (max_value < 0)
175 r.range = lt_zero;
176 else if (max_value == 0)
177 r.range = le_zero;
178 else if (!any_zero)
179 r.range = ne_zero;
180 else
181 r.range = unknown;
182
183 return r;
184 }
185
186 case nir_type_uint: {
187 bool any_zero = false;
188 bool all_zero = true;
189
190 for (unsigned i = 0; i < num_components; ++i) {
191 const uint64_t v = nir_const_value_as_uint(load->value[swizzle[i]],
192 load->def.bit_size);
193
194 any_zero = any_zero || (v == 0);
195 all_zero = all_zero && (v == 0);
196 }
197
198 assert(any_zero >= all_zero);
199
200 if (all_zero)
201 r.range = eq_zero;
202 else if (any_zero)
203 r.range = ge_zero;
204 else
205 r.range = gt_zero;
206
207 return r;
208 }
209
210 default:
211 unreachable("Invalid alu source type");
212 }
213 }
214
215 /**
216 * Short-hand name for use in the tables in analyze_expression. If this name
217 * becomes a problem on some compiler, we can change it to _.
218 */
219 #define _______ unknown
220
221 #ifndef NDEBUG
222 #define ASSERT_TABLE_IS_COMMUTATIVE(t) \
223 do { \
224 for (unsigned r = 0; r < ARRAY_SIZE(t); r++) { \
225 for (unsigned c = 0; c < ARRAY_SIZE(t[0]); c++) \
226 assert(t[r][c] == t[c][r]); \
227 } \
228 } while (false)
229
230 #define ASSERT_TABLE_IS_DIAGONAL(t) \
231 do { \
232 for (unsigned r = 0; r < ARRAY_SIZE(t); r++) \
233 assert(t[r][r] == r); \
234 } while (false)
235
236 static enum ssa_ranges
237 union_ranges(enum ssa_ranges a, enum ssa_ranges b)
238 {
239 static const enum ssa_ranges union_table[last_range + 1][last_range + 1] = {
240 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
241 /* unknown */ { _______, _______, _______, _______, _______, _______, _______ },
242 /* lt_zero */ { _______, lt_zero, le_zero, ne_zero, _______, ne_zero, le_zero },
243 /* le_zero */ { _______, le_zero, le_zero, _______, _______, _______, le_zero },
244 /* gt_zero */ { _______, ne_zero, _______, gt_zero, ge_zero, ne_zero, ge_zero },
245 /* ge_zero */ { _______, _______, _______, ge_zero, ge_zero, _______, ge_zero },
246 /* ne_zero */ { _______, ne_zero, _______, ne_zero, _______, ne_zero, _______ },
247 /* eq_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
248 };
249
250 ASSERT_TABLE_IS_COMMUTATIVE(union_table);
251 ASSERT_TABLE_IS_DIAGONAL(union_table);
252
253 return union_table[a][b];
254 }
255
256 /* Verify that the 'unknown' entry in each row (or column) of the table is the
257 * union of all the other values in the row (or column).
258 */
259 #define ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(t) \
260 do { \
261 for (unsigned i = 0; i < last_range; i++) { \
262 enum ssa_ranges col_range = t[i][unknown + 1]; \
263 enum ssa_ranges row_range = t[unknown + 1][i]; \
264 \
265 for (unsigned j = unknown + 2; j < last_range; j++) { \
266 col_range = union_ranges(col_range, t[i][j]); \
267 row_range = union_ranges(row_range, t[j][i]); \
268 } \
269 \
270 assert(col_range == t[i][unknown]); \
271 assert(row_range == t[unknown][i]); \
272 } \
273 } while (false)
274
275 /* For most operations, the union of ranges for a strict inequality and
276 * equality should be the range of the non-strict inequality (e.g.,
277 * union_ranges(range(op(lt_zero), range(op(eq_zero))) == range(op(le_zero)).
278 *
279 * Does not apply to selection-like opcodes (bcsel, fmin, fmax, etc.).
280 */
281 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(t) \
282 do { \
283 assert(union_ranges(t[lt_zero], t[eq_zero]) == t[le_zero]); \
284 assert(union_ranges(t[gt_zero], t[eq_zero]) == t[ge_zero]); \
285 } while (false)
286
287 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(t) \
288 do { \
289 for (unsigned i = 0; i < last_range; i++) { \
290 assert(union_ranges(t[i][lt_zero], t[i][eq_zero]) == t[i][le_zero]); \
291 assert(union_ranges(t[i][gt_zero], t[i][eq_zero]) == t[i][ge_zero]); \
292 assert(union_ranges(t[lt_zero][i], t[eq_zero][i]) == t[le_zero][i]); \
293 assert(union_ranges(t[gt_zero][i], t[eq_zero][i]) == t[ge_zero][i]); \
294 } \
295 } while (false)
296
297 /* Several other unordered tuples span the range of "everything." Each should
298 * have the same value as unknown: (lt_zero, ge_zero), (le_zero, gt_zero), and
299 * (eq_zero, ne_zero). union_ranges is already commutative, so only one
300 * ordering needs to be checked.
301 *
302 * Does not apply to selection-like opcodes (bcsel, fmin, fmax, etc.).
303 *
304 * In cases where this can be used, it is unnecessary to also use
305 * ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_*_SOURCE. For any range X,
306 * union_ranges(X, X) == X. The disjoint ranges cover all of the non-unknown
307 * possibilities, so the union of all the unions of disjoint ranges is
308 * equivalent to the union of "others."
309 */
310 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(t) \
311 do { \
312 assert(union_ranges(t[lt_zero], t[ge_zero]) == t[unknown]); \
313 assert(union_ranges(t[le_zero], t[gt_zero]) == t[unknown]); \
314 assert(union_ranges(t[eq_zero], t[ne_zero]) == t[unknown]); \
315 } while (false)
316
317 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(t) \
318 do { \
319 for (unsigned i = 0; i < last_range; i++) { \
320 assert(union_ranges(t[i][lt_zero], t[i][ge_zero]) == \
321 t[i][unknown]); \
322 assert(union_ranges(t[i][le_zero], t[i][gt_zero]) == \
323 t[i][unknown]); \
324 assert(union_ranges(t[i][eq_zero], t[i][ne_zero]) == \
325 t[i][unknown]); \
326 \
327 assert(union_ranges(t[lt_zero][i], t[ge_zero][i]) == \
328 t[unknown][i]); \
329 assert(union_ranges(t[le_zero][i], t[gt_zero][i]) == \
330 t[unknown][i]); \
331 assert(union_ranges(t[eq_zero][i], t[ne_zero][i]) == \
332 t[unknown][i]); \
333 } \
334 } while (false)
335
336 #else
337 #define ASSERT_TABLE_IS_COMMUTATIVE(t)
338 #define ASSERT_TABLE_IS_DIAGONAL(t)
339 #define ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(t)
340 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(t)
341 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(t)
342 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(t)
343 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(t)
344 #endif
345
346 /**
347 * Analyze an expression to determine the range of its result
348 *
349 * The end result of this analysis is a token that communicates something
350 * about the range of values. There's an implicit grammar that produces
351 * tokens from sequences of literal values, other tokens, and operations.
352 * This function implements this grammar as a recursive-descent parser. Some
353 * (but not all) of the grammar is listed in-line in the function.
354 */
355 static struct ssa_result_range
356 analyze_expression(const nir_alu_instr *instr, unsigned src,
357 struct hash_table *ht, nir_alu_type use_type)
358 {
359 if (!instr->src[src].src.is_ssa)
360 return (struct ssa_result_range){unknown, false};
361
362 if (nir_src_is_const(instr->src[src].src))
363 return analyze_constant(instr, src, use_type);
364
365 if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_alu)
366 return (struct ssa_result_range){unknown, false};
367
368 const struct nir_alu_instr *const alu =
369 nir_instr_as_alu(instr->src[src].src.ssa->parent_instr);
370
371 /* Bail if the type of the instruction generating the value does not match
372 * the type the value will be interpreted as. int/uint/bool can be
373 * reinterpreted trivially. The most important cases are between float and
374 * non-float.
375 */
376 if (alu->op != nir_op_mov && alu->op != nir_op_bcsel) {
377 const nir_alu_type use_base_type =
378 nir_alu_type_get_base_type(use_type);
379 const nir_alu_type src_base_type =
380 nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type);
381
382 if (use_base_type != src_base_type &&
383 (use_base_type == nir_type_float ||
384 src_base_type == nir_type_float)) {
385 return (struct ssa_result_range){unknown, false};
386 }
387 }
388
389 struct hash_entry *he = _mesa_hash_table_search(ht, pack_key(alu, use_type));
390 if (he != NULL)
391 return unpack_data(he->data);
392
393 struct ssa_result_range r = {unknown, false};
394
395 /* ge_zero: ge_zero + ge_zero
396 *
397 * gt_zero: gt_zero + eq_zero
398 * | gt_zero + ge_zero
399 * | eq_zero + gt_zero # Addition is commutative
400 * | ge_zero + gt_zero # Addition is commutative
401 * | gt_zero + gt_zero
402 * ;
403 *
404 * le_zero: le_zero + le_zero
405 *
406 * lt_zero: lt_zero + eq_zero
407 * | lt_zero + le_zero
408 * | eq_zero + lt_zero # Addition is commutative
409 * | le_zero + lt_zero # Addition is commutative
410 * | lt_zero + lt_zero
411 * ;
412 *
413 * ne_zero: eq_zero + ne_zero
414 * | ne_zero + eq_zero # Addition is commutative
415 * ;
416 *
417 * eq_zero: eq_zero + eq_zero
418 * ;
419 *
420 * All other cases are 'unknown'. The seeming odd entry is (ne_zero,
421 * ne_zero), but that could be (-5, +5) which is not ne_zero.
422 */
423 static const enum ssa_ranges fadd_table[last_range + 1][last_range + 1] = {
424 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
425 /* unknown */ { _______, _______, _______, _______, _______, _______, _______ },
426 /* lt_zero */ { _______, lt_zero, lt_zero, _______, _______, _______, lt_zero },
427 /* le_zero */ { _______, lt_zero, le_zero, _______, _______, _______, le_zero },
428 /* gt_zero */ { _______, _______, _______, gt_zero, gt_zero, _______, gt_zero },
429 /* ge_zero */ { _______, _______, _______, gt_zero, ge_zero, _______, ge_zero },
430 /* ne_zero */ { _______, _______, _______, _______, _______, _______, ne_zero },
431 /* eq_zero */ { _______, lt_zero, le_zero, gt_zero, ge_zero, ne_zero, eq_zero },
432 };
433
434 ASSERT_TABLE_IS_COMMUTATIVE(fadd_table);
435 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(fadd_table);
436 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(fadd_table);
437
438 /* Due to flush-to-zero semanatics of floating-point numbers with very
439 * small mangnitudes, we can never really be sure a result will be
440 * non-zero.
441 *
442 * ge_zero: ge_zero * ge_zero
443 * | ge_zero * gt_zero
444 * | ge_zero * eq_zero
445 * | le_zero * lt_zero
446 * | lt_zero * le_zero # Multiplication is commutative
447 * | le_zero * le_zero
448 * | gt_zero * ge_zero # Multiplication is commutative
449 * | eq_zero * ge_zero # Multiplication is commutative
450 * | a * a # Left source == right source
451 * | gt_zero * gt_zero
452 * | lt_zero * lt_zero
453 * ;
454 *
455 * le_zero: ge_zero * le_zero
456 * | ge_zero * lt_zero
457 * | lt_zero * ge_zero # Multiplication is commutative
458 * | le_zero * ge_zero # Multiplication is commutative
459 * | le_zero * gt_zero
460 * | lt_zero * gt_zero
461 * | gt_zero * lt_zero # Multiplication is commutative
462 * ;
463 *
464 * eq_zero: eq_zero * <any>
465 * <any> * eq_zero # Multiplication is commutative
466 *
467 * All other cases are 'unknown'.
468 */
469 static const enum ssa_ranges fmul_table[last_range + 1][last_range + 1] = {
470 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
471 /* unknown */ { _______, _______, _______, _______, _______, _______, eq_zero },
472 /* lt_zero */ { _______, ge_zero, ge_zero, le_zero, le_zero, _______, eq_zero },
473 /* le_zero */ { _______, ge_zero, ge_zero, le_zero, le_zero, _______, eq_zero },
474 /* gt_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
475 /* ge_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
476 /* ne_zero */ { _______, _______, _______, _______, _______, _______, eq_zero },
477 /* eq_zero */ { eq_zero, eq_zero, eq_zero, eq_zero, eq_zero, eq_zero, eq_zero }
478 };
479
480 ASSERT_TABLE_IS_COMMUTATIVE(fmul_table);
481 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(fmul_table);
482 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(fmul_table);
483
484 static const enum ssa_ranges fneg_table[last_range + 1] = {
485 /* unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
486 _______, gt_zero, ge_zero, lt_zero, le_zero, ne_zero, eq_zero
487 };
488
489 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(fneg_table);
490 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(fneg_table);
491
492
493 switch (alu->op) {
494 case nir_op_b2f32:
495 case nir_op_b2i32:
496 r = (struct ssa_result_range){ge_zero, alu->op == nir_op_b2f32};
497 break;
498
499 case nir_op_bcsel: {
500 const struct ssa_result_range left =
501 analyze_expression(alu, 1, ht, use_type);
502 const struct ssa_result_range right =
503 analyze_expression(alu, 2, ht, use_type);
504
505 r.is_integral = left.is_integral && right.is_integral;
506
507 /* le_zero: bcsel(<any>, le_zero, lt_zero)
508 * | bcsel(<any>, eq_zero, lt_zero)
509 * | bcsel(<any>, le_zero, eq_zero)
510 * | bcsel(<any>, lt_zero, le_zero)
511 * | bcsel(<any>, lt_zero, eq_zero)
512 * | bcsel(<any>, eq_zero, le_zero)
513 * | bcsel(<any>, le_zero, le_zero)
514 * ;
515 *
516 * lt_zero: bcsel(<any>, lt_zero, lt_zero)
517 * ;
518 *
519 * ge_zero: bcsel(<any>, ge_zero, ge_zero)
520 * | bcsel(<any>, ge_zero, gt_zero)
521 * | bcsel(<any>, ge_zero, eq_zero)
522 * | bcsel(<any>, gt_zero, ge_zero)
523 * | bcsel(<any>, eq_zero, ge_zero)
524 * ;
525 *
526 * gt_zero: bcsel(<any>, gt_zero, gt_zero)
527 * ;
528 *
529 * ne_zero: bcsel(<any>, ne_zero, gt_zero)
530 * | bcsel(<any>, ne_zero, lt_zero)
531 * | bcsel(<any>, gt_zero, lt_zero)
532 * | bcsel(<any>, gt_zero, ne_zero)
533 * | bcsel(<any>, lt_zero, ne_zero)
534 * | bcsel(<any>, lt_zero, gt_zero)
535 * | bcsel(<any>, ne_zero, ne_zero)
536 * ;
537 *
538 * eq_zero: bcsel(<any>, eq_zero, eq_zero)
539 * ;
540 *
541 * All other cases are 'unknown'.
542 *
543 * The ranges could be tightened if the range of the first source is
544 * known. However, opt_algebraic will (eventually) elminiate the bcsel
545 * if the condition is known.
546 */
547 static const enum ssa_ranges table[last_range + 1][last_range + 1] = {
548 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
549 /* unknown */ { _______, _______, _______, _______, _______, _______, _______ },
550 /* lt_zero */ { _______, lt_zero, le_zero, ne_zero, _______, ne_zero, le_zero },
551 /* le_zero */ { _______, le_zero, le_zero, _______, _______, _______, le_zero },
552 /* gt_zero */ { _______, ne_zero, _______, gt_zero, ge_zero, ne_zero, ge_zero },
553 /* ge_zero */ { _______, _______, _______, ge_zero, ge_zero, _______, ge_zero },
554 /* ne_zero */ { _______, ne_zero, _______, ne_zero, _______, ne_zero, _______ },
555 /* eq_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
556 };
557
558 ASSERT_TABLE_IS_COMMUTATIVE(table);
559 ASSERT_TABLE_IS_DIAGONAL(table);
560 ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(table);
561
562 r.range = table[left.range][right.range];
563 break;
564 }
565
566 case nir_op_i2f32:
567 case nir_op_u2f32:
568 r = analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
569
570 r.is_integral = true;
571
572 if (r.range == unknown && alu->op == nir_op_u2f32)
573 r.range = ge_zero;
574
575 break;
576
577 case nir_op_fabs:
578 r = analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
579
580 switch (r.range) {
581 case unknown:
582 case le_zero:
583 case ge_zero:
584 r.range = ge_zero;
585 break;
586
587 case lt_zero:
588 case gt_zero:
589 case ne_zero:
590 r.range = gt_zero;
591 break;
592
593 case eq_zero:
594 break;
595 }
596
597 break;
598
599 case nir_op_fadd: {
600 const struct ssa_result_range left =
601 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
602 const struct ssa_result_range right =
603 analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1));
604
605 r.is_integral = left.is_integral && right.is_integral;
606 r.range = fadd_table[left.range][right.range];
607 break;
608 }
609
610 case nir_op_fexp2: {
611 /* If the parameter might be less than zero, the mathematically result
612 * will be on (0, 1). For sufficiently large magnitude negative
613 * parameters, the result will flush to zero.
614 */
615 static const enum ssa_ranges table[last_range + 1] = {
616 /* unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
617 ge_zero, ge_zero, ge_zero, gt_zero, gt_zero, ge_zero, gt_zero
618 };
619
620 r = analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
621
622 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(table);
623 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(table);
624
625 r.is_integral = r.is_integral && is_not_negative(r.range);
626 r.range = table[r.range];
627 break;
628 }
629
630 case nir_op_fmax: {
631 const struct ssa_result_range left =
632 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
633 const struct ssa_result_range right =
634 analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1));
635
636 r.is_integral = left.is_integral && right.is_integral;
637
638 /* gt_zero: fmax(gt_zero, *)
639 * | fmax(*, gt_zero) # Treat fmax as commutative
640 * ;
641 *
642 * ge_zero: fmax(ge_zero, ne_zero)
643 * | fmax(ge_zero, lt_zero)
644 * | fmax(ge_zero, le_zero)
645 * | fmax(ge_zero, eq_zero)
646 * | fmax(ne_zero, ge_zero) # Treat fmax as commutative
647 * | fmax(lt_zero, ge_zero) # Treat fmax as commutative
648 * | fmax(le_zero, ge_zero) # Treat fmax as commutative
649 * | fmax(eq_zero, ge_zero) # Treat fmax as commutative
650 * | fmax(ge_zero, ge_zero)
651 * ;
652 *
653 * le_zero: fmax(le_zero, lt_zero)
654 * | fmax(lt_zero, le_zero) # Treat fmax as commutative
655 * | fmax(le_zero, le_zero)
656 * ;
657 *
658 * lt_zero: fmax(lt_zero, lt_zero)
659 * ;
660 *
661 * ne_zero: fmax(ne_zero, lt_zero)
662 * | fmax(lt_zero, ne_zero) # Treat fmax as commutative
663 * | fmax(ne_zero, ne_zero)
664 * ;
665 *
666 * eq_zero: fmax(eq_zero, le_zero)
667 * | fmax(eq_zero, lt_zero)
668 * | fmax(le_zero, eq_zero) # Treat fmax as commutative
669 * | fmax(lt_zero, eq_zero) # Treat fmax as commutative
670 * | fmax(eq_zero, eq_zero)
671 * ;
672 *
673 * All other cases are 'unknown'.
674 */
675 static const enum ssa_ranges table[last_range + 1][last_range + 1] = {
676 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
677 /* unknown */ { _______, _______, _______, gt_zero, ge_zero, _______, _______ },
678 /* lt_zero */ { _______, lt_zero, le_zero, gt_zero, ge_zero, ne_zero, eq_zero },
679 /* le_zero */ { _______, le_zero, le_zero, gt_zero, ge_zero, _______, eq_zero },
680 /* gt_zero */ { gt_zero, gt_zero, gt_zero, gt_zero, gt_zero, gt_zero, gt_zero },
681 /* ge_zero */ { ge_zero, ge_zero, ge_zero, gt_zero, ge_zero, ge_zero, ge_zero },
682 /* ne_zero */ { _______, ne_zero, _______, gt_zero, ge_zero, ne_zero, _______ },
683 /* eq_zero */ { _______, eq_zero, eq_zero, gt_zero, ge_zero, _______, eq_zero }
684 };
685
686 /* Treat fmax as commutative. */
687 ASSERT_TABLE_IS_COMMUTATIVE(table);
688 ASSERT_TABLE_IS_DIAGONAL(table);
689 ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(table);
690
691 r.range = table[left.range][right.range];
692 break;
693 }
694
695 case nir_op_fmin: {
696 const struct ssa_result_range left =
697 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
698 const struct ssa_result_range right =
699 analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1));
700
701 r.is_integral = left.is_integral && right.is_integral;
702
703 /* lt_zero: fmin(lt_zero, *)
704 * | fmin(*, lt_zero) # Treat fmin as commutative
705 * ;
706 *
707 * le_zero: fmin(le_zero, ne_zero)
708 * | fmin(le_zero, gt_zero)
709 * | fmin(le_zero, ge_zero)
710 * | fmin(le_zero, eq_zero)
711 * | fmin(ne_zero, le_zero) # Treat fmin as commutative
712 * | fmin(gt_zero, le_zero) # Treat fmin as commutative
713 * | fmin(ge_zero, le_zero) # Treat fmin as commutative
714 * | fmin(eq_zero, le_zero) # Treat fmin as commutative
715 * | fmin(le_zero, le_zero)
716 * ;
717 *
718 * ge_zero: fmin(ge_zero, gt_zero)
719 * | fmin(gt_zero, ge_zero) # Treat fmin as commutative
720 * | fmin(ge_zero, ge_zero)
721 * ;
722 *
723 * gt_zero: fmin(gt_zero, gt_zero)
724 * ;
725 *
726 * ne_zero: fmin(ne_zero, gt_zero)
727 * | fmin(gt_zero, ne_zero) # Treat fmin as commutative
728 * | fmin(ne_zero, ne_zero)
729 * ;
730 *
731 * eq_zero: fmin(eq_zero, ge_zero)
732 * | fmin(eq_zero, gt_zero)
733 * | fmin(ge_zero, eq_zero) # Treat fmin as commutative
734 * | fmin(gt_zero, eq_zero) # Treat fmin as commutative
735 * | fmin(eq_zero, eq_zero)
736 * ;
737 *
738 * All other cases are 'unknown'.
739 */
740 static const enum ssa_ranges table[last_range + 1][last_range + 1] = {
741 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
742 /* unknown */ { _______, lt_zero, le_zero, _______, _______, _______, _______ },
743 /* lt_zero */ { lt_zero, lt_zero, lt_zero, lt_zero, lt_zero, lt_zero, lt_zero },
744 /* le_zero */ { le_zero, lt_zero, le_zero, le_zero, le_zero, le_zero, le_zero },
745 /* gt_zero */ { _______, lt_zero, le_zero, gt_zero, ge_zero, ne_zero, eq_zero },
746 /* ge_zero */ { _______, lt_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
747 /* ne_zero */ { _______, lt_zero, le_zero, ne_zero, _______, ne_zero, _______ },
748 /* eq_zero */ { _______, lt_zero, le_zero, eq_zero, eq_zero, _______, eq_zero }
749 };
750
751 /* Treat fmin as commutative. */
752 ASSERT_TABLE_IS_COMMUTATIVE(table);
753 ASSERT_TABLE_IS_DIAGONAL(table);
754 ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(table);
755
756 r.range = table[left.range][right.range];
757 break;
758 }
759
760 case nir_op_fmul: {
761 const struct ssa_result_range left =
762 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
763 const struct ssa_result_range right =
764 analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1));
765
766 r.is_integral = left.is_integral && right.is_integral;
767
768 /* x * x => ge_zero */
769 if (left.range != eq_zero && nir_alu_srcs_equal(alu, alu, 0, 1)) {
770 /* Even if x > 0, the result of x*x can be zero when x is, for
771 * example, a subnormal number.
772 */
773 r.range = ge_zero;
774 } else if (left.range != eq_zero && nir_alu_srcs_negative_equal(alu, alu, 0, 1)) {
775 /* -x * x => le_zero. */
776 r.range = le_zero;
777 } else
778 r.range = fmul_table[left.range][right.range];
779
780 break;
781 }
782
783 case nir_op_frcp:
784 r = (struct ssa_result_range){
785 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0)).range,
786 false
787 };
788 break;
789
790 case nir_op_mov:
791 r = analyze_expression(alu, 0, ht, use_type);
792 break;
793
794 case nir_op_fneg:
795 r = analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
796
797 r.range = fneg_table[r.range];
798 break;
799
800 case nir_op_fsat:
801 r = analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
802
803 switch (r.range) {
804 case le_zero:
805 case lt_zero:
806 r.range = eq_zero;
807 r.is_integral = true;
808 break;
809
810 case eq_zero:
811 assert(r.is_integral);
812 case gt_zero:
813 case ge_zero:
814 /* The fsat doesn't add any information in these cases. */
815 break;
816
817 case ne_zero:
818 case unknown:
819 /* Since the result must be in [0, 1], the value must be >= 0. */
820 r.range = ge_zero;
821 break;
822 }
823 break;
824
825 case nir_op_fsign:
826 r = (struct ssa_result_range){
827 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0)).range,
828 true
829 };
830 break;
831
832 case nir_op_fsqrt:
833 case nir_op_frsq:
834 r = (struct ssa_result_range){ge_zero, false};
835 break;
836
837 case nir_op_ffloor: {
838 const struct ssa_result_range left =
839 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
840
841 r.is_integral = true;
842
843 if (left.is_integral || left.range == le_zero || left.range == lt_zero)
844 r.range = left.range;
845 else if (left.range == ge_zero || left.range == gt_zero)
846 r.range = ge_zero;
847 else if (left.range == ne_zero)
848 r.range = unknown;
849
850 break;
851 }
852
853 case nir_op_fceil: {
854 const struct ssa_result_range left =
855 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
856
857 r.is_integral = true;
858
859 if (left.is_integral || left.range == ge_zero || left.range == gt_zero)
860 r.range = left.range;
861 else if (left.range == le_zero || left.range == lt_zero)
862 r.range = le_zero;
863 else if (left.range == ne_zero)
864 r.range = unknown;
865
866 break;
867 }
868
869 case nir_op_ftrunc: {
870 const struct ssa_result_range left =
871 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
872
873 r.is_integral = true;
874
875 if (left.is_integral)
876 r.range = left.range;
877 else if (left.range == ge_zero || left.range == gt_zero)
878 r.range = ge_zero;
879 else if (left.range == le_zero || left.range == lt_zero)
880 r.range = le_zero;
881 else if (left.range == ne_zero)
882 r.range = unknown;
883
884 break;
885 }
886
887 case nir_op_flt:
888 case nir_op_fge:
889 case nir_op_feq:
890 case nir_op_fne:
891 case nir_op_ilt:
892 case nir_op_ige:
893 case nir_op_ieq:
894 case nir_op_ine:
895 case nir_op_ult:
896 case nir_op_uge:
897 /* Boolean results are 0 or -1. */
898 r = (struct ssa_result_range){le_zero, false};
899 break;
900
901 case nir_op_fpow: {
902 /* Due to flush-to-zero semanatics of floating-point numbers with very
903 * small mangnitudes, we can never really be sure a result will be
904 * non-zero.
905 *
906 * NIR uses pow() and powf() to constant evaluate nir_op_fpow. The man
907 * page for that function says:
908 *
909 * If y is 0, the result is 1.0 (even if x is a NaN).
910 *
911 * gt_zero: pow(*, eq_zero)
912 * | pow(eq_zero, lt_zero) # 0^-y = +inf
913 * | pow(eq_zero, le_zero) # 0^-y = +inf or 0^0 = 1.0
914 * ;
915 *
916 * eq_zero: pow(eq_zero, gt_zero)
917 * ;
918 *
919 * ge_zero: pow(gt_zero, gt_zero)
920 * | pow(gt_zero, ge_zero)
921 * | pow(gt_zero, lt_zero)
922 * | pow(gt_zero, le_zero)
923 * | pow(gt_zero, ne_zero)
924 * | pow(gt_zero, unknown)
925 * | pow(ge_zero, gt_zero)
926 * | pow(ge_zero, ge_zero)
927 * | pow(ge_zero, lt_zero)
928 * | pow(ge_zero, le_zero)
929 * | pow(ge_zero, ne_zero)
930 * | pow(ge_zero, unknown)
931 * | pow(eq_zero, ge_zero) # 0^0 = 1.0 or 0^+y = 0.0
932 * | pow(eq_zero, ne_zero) # 0^-y = +inf or 0^+y = 0.0
933 * | pow(eq_zero, unknown) # union of all other y cases
934 * ;
935 *
936 * All other cases are unknown.
937 *
938 * We could do better if the right operand is a constant, integral
939 * value.
940 */
941 static const enum ssa_ranges table[last_range + 1][last_range + 1] = {
942 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
943 /* unknown */ { _______, _______, _______, _______, _______, _______, gt_zero },
944 /* lt_zero */ { _______, _______, _______, _______, _______, _______, gt_zero },
945 /* le_zero */ { _______, _______, _______, _______, _______, _______, gt_zero },
946 /* gt_zero */ { ge_zero, ge_zero, ge_zero, ge_zero, ge_zero, ge_zero, gt_zero },
947 /* ge_zero */ { ge_zero, ge_zero, ge_zero, ge_zero, ge_zero, ge_zero, gt_zero },
948 /* ne_zero */ { _______, _______, _______, _______, _______, _______, gt_zero },
949 /* eq_zero */ { ge_zero, gt_zero, gt_zero, eq_zero, ge_zero, ge_zero, gt_zero },
950 };
951
952 const struct ssa_result_range left =
953 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
954 const struct ssa_result_range right =
955 analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1));
956
957 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(table);
958 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(table);
959
960 r.is_integral = left.is_integral && right.is_integral &&
961 is_not_negative(right.range);
962 r.range = table[left.range][right.range];
963 break;
964 }
965
966 case nir_op_ffma: {
967 const struct ssa_result_range first =
968 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
969 const struct ssa_result_range second =
970 analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1));
971 const struct ssa_result_range third =
972 analyze_expression(alu, 2, ht, nir_alu_src_type(alu, 2));
973
974 r.is_integral = first.is_integral && second.is_integral &&
975 third.is_integral;
976
977 enum ssa_ranges fmul_range;
978
979 if (first.range != eq_zero && nir_alu_srcs_equal(alu, alu, 0, 1)) {
980 /* See handling of nir_op_fmul for explanation of why ge_zero is the
981 * range.
982 */
983 fmul_range = ge_zero;
984 } else if (first.range != eq_zero && nir_alu_srcs_negative_equal(alu, alu, 0, 1)) {
985 /* -x * x => le_zero */
986 fmul_range = le_zero;
987 } else
988 fmul_range = fmul_table[first.range][second.range];
989
990 r.range = fadd_table[fmul_range][third.range];
991 break;
992 }
993
994 case nir_op_flrp: {
995 const struct ssa_result_range first =
996 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
997 const struct ssa_result_range second =
998 analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1));
999 const struct ssa_result_range third =
1000 analyze_expression(alu, 2, ht, nir_alu_src_type(alu, 2));
1001
1002 r.is_integral = first.is_integral && second.is_integral &&
1003 third.is_integral;
1004
1005 /* Decompose the flrp to first + third * (second + -first) */
1006 const enum ssa_ranges inner_fadd_range =
1007 fadd_table[second.range][fneg_table[first.range]];
1008
1009 const enum ssa_ranges fmul_range =
1010 fmul_table[third.range][inner_fadd_range];
1011
1012 r.range = fadd_table[first.range][fmul_range];
1013 break;
1014 }
1015
1016 default:
1017 r = (struct ssa_result_range){unknown, false};
1018 break;
1019 }
1020
1021 if (r.range == eq_zero)
1022 r.is_integral = true;
1023
1024 _mesa_hash_table_insert(ht, pack_key(alu, use_type), pack_data(r));
1025 return r;
1026 }
1027
1028 #undef _______
1029
1030 struct ssa_result_range
1031 nir_analyze_range(struct hash_table *range_ht,
1032 const nir_alu_instr *instr, unsigned src)
1033 {
1034 return analyze_expression(instr, src, range_ht,
1035 nir_alu_src_type(instr, src));
1036 }