nir/range-analysis: Use types in the hash key
[mesa.git] / src / compiler / nir / nir_range_analysis.c
1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23 #include <math.h>
24 #include <float.h>
25 #include "nir.h"
26 #include "nir_range_analysis.h"
27 #include "util/hash_table.h"
28
29 /**
30 * Analyzes a sequence of operations to determine some aspects of the range of
31 * the result.
32 */
33
34 static bool
35 is_not_negative(enum ssa_ranges r)
36 {
37 return r == gt_zero || r == ge_zero || r == eq_zero;
38 }
39
40 static void *
41 pack_data(const struct ssa_result_range r)
42 {
43 return (void *)(uintptr_t)(r.range | r.is_integral << 8);
44 }
45
46 static struct ssa_result_range
47 unpack_data(const void *p)
48 {
49 const uintptr_t v = (uintptr_t) p;
50
51 return (struct ssa_result_range){v & 0xff, (v & 0x0ff00) != 0};
52 }
53
54 static void *
55 pack_key(const struct nir_alu_instr *instr, nir_alu_type type)
56 {
57 uintptr_t type_encoding;
58 uintptr_t ptr = (uintptr_t) instr;
59
60 /* The low 2 bits have to be zero or this whole scheme falls apart. */
61 assert((ptr & 0x3) == 0);
62
63 /* NIR is typeless in the sense that sequences of bits have whatever
64 * meaning is attached to them by the instruction that consumes them.
65 * However, the number of bits must match between producer and consumer.
66 * As a result, the number of bits does not need to be encoded here.
67 */
68 switch (nir_alu_type_get_base_type(type)) {
69 case nir_type_int: type_encoding = 0; break;
70 case nir_type_uint: type_encoding = 1; break;
71 case nir_type_bool: type_encoding = 2; break;
72 case nir_type_float: type_encoding = 3; break;
73 default: unreachable("Invalid base type.");
74 }
75
76 return (void *)(ptr | type_encoding);
77 }
78
79 static nir_alu_type
80 nir_alu_src_type(const nir_alu_instr *instr, unsigned src)
81 {
82 return nir_alu_type_get_base_type(nir_op_infos[instr->op].input_types[src]) |
83 nir_src_bit_size(instr->src[src].src);
84 }
85
86 static struct ssa_result_range
87 analyze_constant(const struct nir_alu_instr *instr, unsigned src,
88 nir_alu_type use_type)
89 {
90 uint8_t swizzle[4] = { 0, 1, 2, 3 };
91
92 /* If the source is an explicitly sized source, then we need to reset
93 * both the number of components and the swizzle.
94 */
95 const unsigned num_components = nir_ssa_alu_instr_src_components(instr, src);
96
97 for (unsigned i = 0; i < num_components; ++i)
98 swizzle[i] = instr->src[src].swizzle[i];
99
100 const nir_load_const_instr *const load =
101 nir_instr_as_load_const(instr->src[src].src.ssa->parent_instr);
102
103 struct ssa_result_range r = { unknown, false };
104
105 switch (nir_alu_type_get_base_type(use_type)) {
106 case nir_type_float: {
107 double min_value = DBL_MAX;
108 double max_value = -DBL_MAX;
109 bool any_zero = false;
110 bool all_zero = true;
111
112 r.is_integral = true;
113
114 for (unsigned i = 0; i < num_components; ++i) {
115 const double v = nir_const_value_as_float(load->value[swizzle[i]],
116 load->def.bit_size);
117
118 if (floor(v) != v)
119 r.is_integral = false;
120
121 any_zero = any_zero || (v == 0.0);
122 all_zero = all_zero && (v == 0.0);
123 min_value = MIN2(min_value, v);
124 max_value = MAX2(max_value, v);
125 }
126
127 assert(any_zero >= all_zero);
128 assert(isnan(max_value) || max_value >= min_value);
129
130 if (all_zero)
131 r.range = eq_zero;
132 else if (min_value > 0.0)
133 r.range = gt_zero;
134 else if (min_value == 0.0)
135 r.range = ge_zero;
136 else if (max_value < 0.0)
137 r.range = lt_zero;
138 else if (max_value == 0.0)
139 r.range = le_zero;
140 else if (!any_zero)
141 r.range = ne_zero;
142 else
143 r.range = unknown;
144
145 return r;
146 }
147
148 case nir_type_int:
149 case nir_type_bool: {
150 int64_t min_value = INT_MAX;
151 int64_t max_value = INT_MIN;
152 bool any_zero = false;
153 bool all_zero = true;
154
155 for (unsigned i = 0; i < num_components; ++i) {
156 const int64_t v = nir_const_value_as_int(load->value[swizzle[i]],
157 load->def.bit_size);
158
159 any_zero = any_zero || (v == 0);
160 all_zero = all_zero && (v == 0);
161 min_value = MIN2(min_value, v);
162 max_value = MAX2(max_value, v);
163 }
164
165 assert(any_zero >= all_zero);
166 assert(max_value >= min_value);
167
168 if (all_zero)
169 r.range = eq_zero;
170 else if (min_value > 0)
171 r.range = gt_zero;
172 else if (min_value == 0)
173 r.range = ge_zero;
174 else if (max_value < 0)
175 r.range = lt_zero;
176 else if (max_value == 0)
177 r.range = le_zero;
178 else if (!any_zero)
179 r.range = ne_zero;
180 else
181 r.range = unknown;
182
183 return r;
184 }
185
186 case nir_type_uint: {
187 bool any_zero = false;
188 bool all_zero = true;
189
190 for (unsigned i = 0; i < num_components; ++i) {
191 const uint64_t v = nir_const_value_as_uint(load->value[swizzle[i]],
192 load->def.bit_size);
193
194 any_zero = any_zero || (v == 0);
195 all_zero = all_zero && (v == 0);
196 }
197
198 assert(any_zero >= all_zero);
199
200 if (all_zero)
201 r.range = eq_zero;
202 else if (any_zero)
203 r.range = ge_zero;
204 else
205 r.range = gt_zero;
206
207 return r;
208 }
209
210 default:
211 unreachable("Invalid alu source type");
212 }
213 }
214
215 /**
216 * Short-hand name for use in the tables in analyze_expression. If this name
217 * becomes a problem on some compiler, we can change it to _.
218 */
219 #define _______ unknown
220
221 #ifndef NDEBUG
222 #define ASSERT_TABLE_IS_COMMUTATIVE(t) \
223 do { \
224 for (unsigned r = 0; r < ARRAY_SIZE(t); r++) { \
225 for (unsigned c = 0; c < ARRAY_SIZE(t[0]); c++) \
226 assert(t[r][c] == t[c][r]); \
227 } \
228 } while (false)
229
230 #define ASSERT_TABLE_IS_DIAGONAL(t) \
231 do { \
232 for (unsigned r = 0; r < ARRAY_SIZE(t); r++) \
233 assert(t[r][r] == r); \
234 } while (false)
235
236 static enum ssa_ranges
237 union_ranges(enum ssa_ranges a, enum ssa_ranges b)
238 {
239 static const enum ssa_ranges union_table[last_range + 1][last_range + 1] = {
240 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
241 /* unknown */ { _______, _______, _______, _______, _______, _______, _______ },
242 /* lt_zero */ { _______, lt_zero, le_zero, ne_zero, _______, ne_zero, le_zero },
243 /* le_zero */ { _______, le_zero, le_zero, _______, _______, _______, le_zero },
244 /* gt_zero */ { _______, ne_zero, _______, gt_zero, ge_zero, ne_zero, ge_zero },
245 /* ge_zero */ { _______, _______, _______, ge_zero, ge_zero, _______, ge_zero },
246 /* ne_zero */ { _______, ne_zero, _______, ne_zero, _______, ne_zero, _______ },
247 /* eq_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
248 };
249
250 ASSERT_TABLE_IS_COMMUTATIVE(union_table);
251 ASSERT_TABLE_IS_DIAGONAL(union_table);
252
253 return union_table[a][b];
254 }
255
256 /* Verify that the 'unknown' entry in each row (or column) of the table is the
257 * union of all the other values in the row (or column).
258 */
259 #define ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(t) \
260 do { \
261 for (unsigned i = 0; i < last_range; i++) { \
262 enum ssa_ranges col_range = t[i][unknown + 1]; \
263 enum ssa_ranges row_range = t[unknown + 1][i]; \
264 \
265 for (unsigned j = unknown + 2; j < last_range; j++) { \
266 col_range = union_ranges(col_range, t[i][j]); \
267 row_range = union_ranges(row_range, t[j][i]); \
268 } \
269 \
270 assert(col_range == t[i][unknown]); \
271 assert(row_range == t[unknown][i]); \
272 } \
273 } while (false)
274
275 /* For most operations, the union of ranges for a strict inequality and
276 * equality should be the range of the non-strict inequality (e.g.,
277 * union_ranges(range(op(lt_zero), range(op(eq_zero))) == range(op(le_zero)).
278 *
279 * Does not apply to selection-like opcodes (bcsel, fmin, fmax, etc.).
280 */
281 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(t) \
282 do { \
283 assert(union_ranges(t[lt_zero], t[eq_zero]) == t[le_zero]); \
284 assert(union_ranges(t[gt_zero], t[eq_zero]) == t[ge_zero]); \
285 } while (false)
286
287 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(t) \
288 do { \
289 for (unsigned i = 0; i < last_range; i++) { \
290 assert(union_ranges(t[i][lt_zero], t[i][eq_zero]) == t[i][le_zero]); \
291 assert(union_ranges(t[i][gt_zero], t[i][eq_zero]) == t[i][ge_zero]); \
292 assert(union_ranges(t[lt_zero][i], t[eq_zero][i]) == t[le_zero][i]); \
293 assert(union_ranges(t[gt_zero][i], t[eq_zero][i]) == t[ge_zero][i]); \
294 } \
295 } while (false)
296
297 /* Several other unordered tuples span the range of "everything." Each should
298 * have the same value as unknown: (lt_zero, ge_zero), (le_zero, gt_zero), and
299 * (eq_zero, ne_zero). union_ranges is already commutative, so only one
300 * ordering needs to be checked.
301 *
302 * Does not apply to selection-like opcodes (bcsel, fmin, fmax, etc.).
303 *
304 * In cases where this can be used, it is unnecessary to also use
305 * ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_*_SOURCE. For any range X,
306 * union_ranges(X, X) == X. The disjoint ranges cover all of the non-unknown
307 * possibilities, so the union of all the unions of disjoint ranges is
308 * equivalent to the union of "others."
309 */
310 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(t) \
311 do { \
312 assert(union_ranges(t[lt_zero], t[ge_zero]) == t[unknown]); \
313 assert(union_ranges(t[le_zero], t[gt_zero]) == t[unknown]); \
314 assert(union_ranges(t[eq_zero], t[ne_zero]) == t[unknown]); \
315 } while (false)
316
317 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(t) \
318 do { \
319 for (unsigned i = 0; i < last_range; i++) { \
320 assert(union_ranges(t[i][lt_zero], t[i][ge_zero]) == \
321 t[i][unknown]); \
322 assert(union_ranges(t[i][le_zero], t[i][gt_zero]) == \
323 t[i][unknown]); \
324 assert(union_ranges(t[i][eq_zero], t[i][ne_zero]) == \
325 t[i][unknown]); \
326 \
327 assert(union_ranges(t[lt_zero][i], t[ge_zero][i]) == \
328 t[unknown][i]); \
329 assert(union_ranges(t[le_zero][i], t[gt_zero][i]) == \
330 t[unknown][i]); \
331 assert(union_ranges(t[eq_zero][i], t[ne_zero][i]) == \
332 t[unknown][i]); \
333 } \
334 } while (false)
335
336 #else
337 #define ASSERT_TABLE_IS_COMMUTATIVE(t)
338 #define ASSERT_TABLE_IS_DIAGONAL(t)
339 #define ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(t)
340 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(t)
341 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(t)
342 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(t)
343 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(t)
344 #endif
345
346 /**
347 * Analyze an expression to determine the range of its result
348 *
349 * The end result of this analysis is a token that communicates something
350 * about the range of values. There's an implicit grammar that produces
351 * tokens from sequences of literal values, other tokens, and operations.
352 * This function implements this grammar as a recursive-descent parser. Some
353 * (but not all) of the grammar is listed in-line in the function.
354 */
355 static struct ssa_result_range
356 analyze_expression(const nir_alu_instr *instr, unsigned src,
357 struct hash_table *ht, nir_alu_type use_type)
358 {
359 if (!instr->src[src].src.is_ssa)
360 return (struct ssa_result_range){unknown, false};
361
362 if (nir_src_is_const(instr->src[src].src))
363 return analyze_constant(instr, src, use_type);
364
365 if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_alu)
366 return (struct ssa_result_range){unknown, false};
367
368 const struct nir_alu_instr *const alu =
369 nir_instr_as_alu(instr->src[src].src.ssa->parent_instr);
370
371 /* Bail if the type of the instruction generating the value does not match
372 * the type the value will be interpreted as. int/uint/bool can be
373 * reinterpreted trivially. The most important cases are between float and
374 * non-float.
375 */
376 if (alu->op != nir_op_mov && alu->op != nir_op_bcsel) {
377 const nir_alu_type use_base_type =
378 nir_alu_type_get_base_type(use_type);
379 const nir_alu_type src_base_type =
380 nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type);
381
382 if (use_base_type != src_base_type &&
383 (use_base_type == nir_type_float ||
384 src_base_type == nir_type_float)) {
385 return (struct ssa_result_range){unknown, false};
386 }
387 }
388
389 struct hash_entry *he = _mesa_hash_table_search(ht, pack_key(alu, use_type));
390 if (he != NULL)
391 return unpack_data(he->data);
392
393 struct ssa_result_range r = {unknown, false};
394
395 /* ge_zero: ge_zero + ge_zero
396 *
397 * gt_zero: gt_zero + eq_zero
398 * | gt_zero + ge_zero
399 * | eq_zero + gt_zero # Addition is commutative
400 * | ge_zero + gt_zero # Addition is commutative
401 * | gt_zero + gt_zero
402 * ;
403 *
404 * le_zero: le_zero + le_zero
405 *
406 * lt_zero: lt_zero + eq_zero
407 * | lt_zero + le_zero
408 * | eq_zero + lt_zero # Addition is commutative
409 * | le_zero + lt_zero # Addition is commutative
410 * | lt_zero + lt_zero
411 * ;
412 *
413 * ne_zero: eq_zero + ne_zero
414 * | ne_zero + eq_zero # Addition is commutative
415 * ;
416 *
417 * eq_zero: eq_zero + eq_zero
418 * ;
419 *
420 * All other cases are 'unknown'. The seeming odd entry is (ne_zero,
421 * ne_zero), but that could be (-5, +5) which is not ne_zero.
422 */
423 static const enum ssa_ranges fadd_table[last_range + 1][last_range + 1] = {
424 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
425 /* unknown */ { _______, _______, _______, _______, _______, _______, _______ },
426 /* lt_zero */ { _______, lt_zero, lt_zero, _______, _______, _______, lt_zero },
427 /* le_zero */ { _______, lt_zero, le_zero, _______, _______, _______, le_zero },
428 /* gt_zero */ { _______, _______, _______, gt_zero, gt_zero, _______, gt_zero },
429 /* ge_zero */ { _______, _______, _______, gt_zero, ge_zero, _______, ge_zero },
430 /* ne_zero */ { _______, _______, _______, _______, _______, _______, ne_zero },
431 /* eq_zero */ { _______, lt_zero, le_zero, gt_zero, ge_zero, ne_zero, eq_zero },
432 };
433
434 ASSERT_TABLE_IS_COMMUTATIVE(fadd_table);
435 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(fadd_table);
436 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(fadd_table);
437
438 /* Due to flush-to-zero semanatics of floating-point numbers with very
439 * small mangnitudes, we can never really be sure a result will be
440 * non-zero.
441 *
442 * ge_zero: ge_zero * ge_zero
443 * | ge_zero * gt_zero
444 * | ge_zero * eq_zero
445 * | le_zero * lt_zero
446 * | lt_zero * le_zero # Multiplication is commutative
447 * | le_zero * le_zero
448 * | gt_zero * ge_zero # Multiplication is commutative
449 * | eq_zero * ge_zero # Multiplication is commutative
450 * | a * a # Left source == right source
451 * | gt_zero * gt_zero
452 * | lt_zero * lt_zero
453 * ;
454 *
455 * le_zero: ge_zero * le_zero
456 * | ge_zero * lt_zero
457 * | lt_zero * ge_zero # Multiplication is commutative
458 * | le_zero * ge_zero # Multiplication is commutative
459 * | le_zero * gt_zero
460 * | lt_zero * gt_zero
461 * | gt_zero * lt_zero # Multiplication is commutative
462 * ;
463 *
464 * eq_zero: eq_zero * <any>
465 * <any> * eq_zero # Multiplication is commutative
466 *
467 * All other cases are 'unknown'.
468 */
469 static const enum ssa_ranges fmul_table[last_range + 1][last_range + 1] = {
470 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
471 /* unknown */ { _______, _______, _______, _______, _______, _______, eq_zero },
472 /* lt_zero */ { _______, ge_zero, ge_zero, le_zero, le_zero, _______, eq_zero },
473 /* le_zero */ { _______, ge_zero, ge_zero, le_zero, le_zero, _______, eq_zero },
474 /* gt_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
475 /* ge_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
476 /* ne_zero */ { _______, _______, _______, _______, _______, _______, eq_zero },
477 /* eq_zero */ { eq_zero, eq_zero, eq_zero, eq_zero, eq_zero, eq_zero, eq_zero }
478 };
479
480 ASSERT_TABLE_IS_COMMUTATIVE(fmul_table);
481 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(fmul_table);
482 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(fmul_table);
483
484 static const enum ssa_ranges fneg_table[last_range + 1] = {
485 /* unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
486 _______, gt_zero, ge_zero, lt_zero, le_zero, ne_zero, eq_zero
487 };
488
489 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(fneg_table);
490 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(fneg_table);
491
492
493 switch (alu->op) {
494 case nir_op_b2f32:
495 case nir_op_b2i32:
496 r = (struct ssa_result_range){ge_zero, alu->op == nir_op_b2f32};
497 break;
498
499 case nir_op_bcsel: {
500 const struct ssa_result_range left =
501 analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1));
502 const struct ssa_result_range right =
503 analyze_expression(alu, 2, ht, nir_alu_src_type(alu, 2));
504
505 /* If either source is a constant load that is not zero, punt. The type
506 * will always be uint regardless of the actual type. We can't even
507 * decide if the value is non-zero because -0.0 is 0x80000000, and that
508 * will (possibly incorrectly) be considered non-zero.
509 */
510 /* FINISHME: We could do better, but it would require having the expected
511 * FINISHME: type passed in.
512 */
513 if ((nir_src_is_const(alu->src[1].src) && left.range != eq_zero) ||
514 (nir_src_is_const(alu->src[2].src) && right.range != eq_zero)) {
515 return (struct ssa_result_range){unknown, false};
516 }
517
518 r.is_integral = left.is_integral && right.is_integral;
519
520 /* le_zero: bcsel(<any>, le_zero, lt_zero)
521 * | bcsel(<any>, eq_zero, lt_zero)
522 * | bcsel(<any>, le_zero, eq_zero)
523 * | bcsel(<any>, lt_zero, le_zero)
524 * | bcsel(<any>, lt_zero, eq_zero)
525 * | bcsel(<any>, eq_zero, le_zero)
526 * | bcsel(<any>, le_zero, le_zero)
527 * ;
528 *
529 * lt_zero: bcsel(<any>, lt_zero, lt_zero)
530 * ;
531 *
532 * ge_zero: bcsel(<any>, ge_zero, ge_zero)
533 * | bcsel(<any>, ge_zero, gt_zero)
534 * | bcsel(<any>, ge_zero, eq_zero)
535 * | bcsel(<any>, gt_zero, ge_zero)
536 * | bcsel(<any>, eq_zero, ge_zero)
537 * ;
538 *
539 * gt_zero: bcsel(<any>, gt_zero, gt_zero)
540 * ;
541 *
542 * ne_zero: bcsel(<any>, ne_zero, gt_zero)
543 * | bcsel(<any>, ne_zero, lt_zero)
544 * | bcsel(<any>, gt_zero, lt_zero)
545 * | bcsel(<any>, gt_zero, ne_zero)
546 * | bcsel(<any>, lt_zero, ne_zero)
547 * | bcsel(<any>, lt_zero, gt_zero)
548 * | bcsel(<any>, ne_zero, ne_zero)
549 * ;
550 *
551 * eq_zero: bcsel(<any>, eq_zero, eq_zero)
552 * ;
553 *
554 * All other cases are 'unknown'.
555 *
556 * The ranges could be tightened if the range of the first source is
557 * known. However, opt_algebraic will (eventually) elminiate the bcsel
558 * if the condition is known.
559 */
560 static const enum ssa_ranges table[last_range + 1][last_range + 1] = {
561 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
562 /* unknown */ { _______, _______, _______, _______, _______, _______, _______ },
563 /* lt_zero */ { _______, lt_zero, le_zero, ne_zero, _______, ne_zero, le_zero },
564 /* le_zero */ { _______, le_zero, le_zero, _______, _______, _______, le_zero },
565 /* gt_zero */ { _______, ne_zero, _______, gt_zero, ge_zero, ne_zero, ge_zero },
566 /* ge_zero */ { _______, _______, _______, ge_zero, ge_zero, _______, ge_zero },
567 /* ne_zero */ { _______, ne_zero, _______, ne_zero, _______, ne_zero, _______ },
568 /* eq_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
569 };
570
571 ASSERT_TABLE_IS_COMMUTATIVE(table);
572 ASSERT_TABLE_IS_DIAGONAL(table);
573 ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(table);
574
575 r.range = table[left.range][right.range];
576 break;
577 }
578
579 case nir_op_i2f32:
580 case nir_op_u2f32:
581 r = analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
582
583 r.is_integral = true;
584
585 if (r.range == unknown && alu->op == nir_op_u2f32)
586 r.range = ge_zero;
587
588 break;
589
590 case nir_op_fabs:
591 r = analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
592
593 switch (r.range) {
594 case unknown:
595 case le_zero:
596 case ge_zero:
597 r.range = ge_zero;
598 break;
599
600 case lt_zero:
601 case gt_zero:
602 case ne_zero:
603 r.range = gt_zero;
604 break;
605
606 case eq_zero:
607 break;
608 }
609
610 break;
611
612 case nir_op_fadd: {
613 const struct ssa_result_range left =
614 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
615 const struct ssa_result_range right =
616 analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1));
617
618 r.is_integral = left.is_integral && right.is_integral;
619 r.range = fadd_table[left.range][right.range];
620 break;
621 }
622
623 case nir_op_fexp2: {
624 /* If the parameter might be less than zero, the mathematically result
625 * will be on (0, 1). For sufficiently large magnitude negative
626 * parameters, the result will flush to zero.
627 */
628 static const enum ssa_ranges table[last_range + 1] = {
629 /* unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
630 ge_zero, ge_zero, ge_zero, gt_zero, gt_zero, ge_zero, gt_zero
631 };
632
633 r = analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
634
635 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(table);
636 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(table);
637
638 r.is_integral = r.is_integral && is_not_negative(r.range);
639 r.range = table[r.range];
640 break;
641 }
642
643 case nir_op_fmax: {
644 const struct ssa_result_range left =
645 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
646 const struct ssa_result_range right =
647 analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1));
648
649 r.is_integral = left.is_integral && right.is_integral;
650
651 /* gt_zero: fmax(gt_zero, *)
652 * | fmax(*, gt_zero) # Treat fmax as commutative
653 * ;
654 *
655 * ge_zero: fmax(ge_zero, ne_zero)
656 * | fmax(ge_zero, lt_zero)
657 * | fmax(ge_zero, le_zero)
658 * | fmax(ge_zero, eq_zero)
659 * | fmax(ne_zero, ge_zero) # Treat fmax as commutative
660 * | fmax(lt_zero, ge_zero) # Treat fmax as commutative
661 * | fmax(le_zero, ge_zero) # Treat fmax as commutative
662 * | fmax(eq_zero, ge_zero) # Treat fmax as commutative
663 * | fmax(ge_zero, ge_zero)
664 * ;
665 *
666 * le_zero: fmax(le_zero, lt_zero)
667 * | fmax(lt_zero, le_zero) # Treat fmax as commutative
668 * | fmax(le_zero, le_zero)
669 * ;
670 *
671 * lt_zero: fmax(lt_zero, lt_zero)
672 * ;
673 *
674 * ne_zero: fmax(ne_zero, lt_zero)
675 * | fmax(lt_zero, ne_zero) # Treat fmax as commutative
676 * | fmax(ne_zero, ne_zero)
677 * ;
678 *
679 * eq_zero: fmax(eq_zero, le_zero)
680 * | fmax(eq_zero, lt_zero)
681 * | fmax(le_zero, eq_zero) # Treat fmax as commutative
682 * | fmax(lt_zero, eq_zero) # Treat fmax as commutative
683 * | fmax(eq_zero, eq_zero)
684 * ;
685 *
686 * All other cases are 'unknown'.
687 */
688 static const enum ssa_ranges table[last_range + 1][last_range + 1] = {
689 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
690 /* unknown */ { _______, _______, _______, gt_zero, ge_zero, _______, _______ },
691 /* lt_zero */ { _______, lt_zero, le_zero, gt_zero, ge_zero, ne_zero, eq_zero },
692 /* le_zero */ { _______, le_zero, le_zero, gt_zero, ge_zero, _______, eq_zero },
693 /* gt_zero */ { gt_zero, gt_zero, gt_zero, gt_zero, gt_zero, gt_zero, gt_zero },
694 /* ge_zero */ { ge_zero, ge_zero, ge_zero, gt_zero, ge_zero, ge_zero, ge_zero },
695 /* ne_zero */ { _______, ne_zero, _______, gt_zero, ge_zero, ne_zero, _______ },
696 /* eq_zero */ { _______, eq_zero, eq_zero, gt_zero, ge_zero, _______, eq_zero }
697 };
698
699 /* Treat fmax as commutative. */
700 ASSERT_TABLE_IS_COMMUTATIVE(table);
701 ASSERT_TABLE_IS_DIAGONAL(table);
702 ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(table);
703
704 r.range = table[left.range][right.range];
705 break;
706 }
707
708 case nir_op_fmin: {
709 const struct ssa_result_range left =
710 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
711 const struct ssa_result_range right =
712 analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1));
713
714 r.is_integral = left.is_integral && right.is_integral;
715
716 /* lt_zero: fmin(lt_zero, *)
717 * | fmin(*, lt_zero) # Treat fmin as commutative
718 * ;
719 *
720 * le_zero: fmin(le_zero, ne_zero)
721 * | fmin(le_zero, gt_zero)
722 * | fmin(le_zero, ge_zero)
723 * | fmin(le_zero, eq_zero)
724 * | fmin(ne_zero, le_zero) # Treat fmin as commutative
725 * | fmin(gt_zero, le_zero) # Treat fmin as commutative
726 * | fmin(ge_zero, le_zero) # Treat fmin as commutative
727 * | fmin(eq_zero, le_zero) # Treat fmin as commutative
728 * | fmin(le_zero, le_zero)
729 * ;
730 *
731 * ge_zero: fmin(ge_zero, gt_zero)
732 * | fmin(gt_zero, ge_zero) # Treat fmin as commutative
733 * | fmin(ge_zero, ge_zero)
734 * ;
735 *
736 * gt_zero: fmin(gt_zero, gt_zero)
737 * ;
738 *
739 * ne_zero: fmin(ne_zero, gt_zero)
740 * | fmin(gt_zero, ne_zero) # Treat fmin as commutative
741 * | fmin(ne_zero, ne_zero)
742 * ;
743 *
744 * eq_zero: fmin(eq_zero, ge_zero)
745 * | fmin(eq_zero, gt_zero)
746 * | fmin(ge_zero, eq_zero) # Treat fmin as commutative
747 * | fmin(gt_zero, eq_zero) # Treat fmin as commutative
748 * | fmin(eq_zero, eq_zero)
749 * ;
750 *
751 * All other cases are 'unknown'.
752 */
753 static const enum ssa_ranges table[last_range + 1][last_range + 1] = {
754 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
755 /* unknown */ { _______, lt_zero, le_zero, _______, _______, _______, _______ },
756 /* lt_zero */ { lt_zero, lt_zero, lt_zero, lt_zero, lt_zero, lt_zero, lt_zero },
757 /* le_zero */ { le_zero, lt_zero, le_zero, le_zero, le_zero, le_zero, le_zero },
758 /* gt_zero */ { _______, lt_zero, le_zero, gt_zero, ge_zero, ne_zero, eq_zero },
759 /* ge_zero */ { _______, lt_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
760 /* ne_zero */ { _______, lt_zero, le_zero, ne_zero, _______, ne_zero, _______ },
761 /* eq_zero */ { _______, lt_zero, le_zero, eq_zero, eq_zero, _______, eq_zero }
762 };
763
764 /* Treat fmin as commutative. */
765 ASSERT_TABLE_IS_COMMUTATIVE(table);
766 ASSERT_TABLE_IS_DIAGONAL(table);
767 ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(table);
768
769 r.range = table[left.range][right.range];
770 break;
771 }
772
773 case nir_op_fmul: {
774 const struct ssa_result_range left =
775 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
776 const struct ssa_result_range right =
777 analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1));
778
779 r.is_integral = left.is_integral && right.is_integral;
780
781 /* x * x => ge_zero */
782 if (left.range != eq_zero && nir_alu_srcs_equal(alu, alu, 0, 1)) {
783 /* Even if x > 0, the result of x*x can be zero when x is, for
784 * example, a subnormal number.
785 */
786 r.range = ge_zero;
787 } else if (left.range != eq_zero && nir_alu_srcs_negative_equal(alu, alu, 0, 1)) {
788 /* -x * x => le_zero. */
789 r.range = le_zero;
790 } else
791 r.range = fmul_table[left.range][right.range];
792
793 break;
794 }
795
796 case nir_op_frcp:
797 r = (struct ssa_result_range){
798 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0)).range,
799 false
800 };
801 break;
802
803 case nir_op_mov: {
804 const struct ssa_result_range left =
805 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
806
807 /* See commentary in nir_op_bcsel for the reasons this is necessary. */
808 if (nir_src_is_const(alu->src[0].src) && left.range != eq_zero)
809 return (struct ssa_result_range){unknown, false};
810
811 r = left;
812 break;
813 }
814
815 case nir_op_fneg:
816 r = analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
817
818 r.range = fneg_table[r.range];
819 break;
820
821 case nir_op_fsat:
822 r = analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
823
824 switch (r.range) {
825 case le_zero:
826 case lt_zero:
827 r.range = eq_zero;
828 r.is_integral = true;
829 break;
830
831 case eq_zero:
832 assert(r.is_integral);
833 case gt_zero:
834 case ge_zero:
835 /* The fsat doesn't add any information in these cases. */
836 break;
837
838 case ne_zero:
839 case unknown:
840 /* Since the result must be in [0, 1], the value must be >= 0. */
841 r.range = ge_zero;
842 break;
843 }
844 break;
845
846 case nir_op_fsign:
847 r = (struct ssa_result_range){
848 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0)).range,
849 true
850 };
851 break;
852
853 case nir_op_fsqrt:
854 case nir_op_frsq:
855 r = (struct ssa_result_range){ge_zero, false};
856 break;
857
858 case nir_op_ffloor: {
859 const struct ssa_result_range left =
860 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
861
862 r.is_integral = true;
863
864 if (left.is_integral || left.range == le_zero || left.range == lt_zero)
865 r.range = left.range;
866 else if (left.range == ge_zero || left.range == gt_zero)
867 r.range = ge_zero;
868 else if (left.range == ne_zero)
869 r.range = unknown;
870
871 break;
872 }
873
874 case nir_op_fceil: {
875 const struct ssa_result_range left =
876 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
877
878 r.is_integral = true;
879
880 if (left.is_integral || left.range == ge_zero || left.range == gt_zero)
881 r.range = left.range;
882 else if (left.range == le_zero || left.range == lt_zero)
883 r.range = le_zero;
884 else if (left.range == ne_zero)
885 r.range = unknown;
886
887 break;
888 }
889
890 case nir_op_ftrunc: {
891 const struct ssa_result_range left =
892 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
893
894 r.is_integral = true;
895
896 if (left.is_integral)
897 r.range = left.range;
898 else if (left.range == ge_zero || left.range == gt_zero)
899 r.range = ge_zero;
900 else if (left.range == le_zero || left.range == lt_zero)
901 r.range = le_zero;
902 else if (left.range == ne_zero)
903 r.range = unknown;
904
905 break;
906 }
907
908 case nir_op_flt:
909 case nir_op_fge:
910 case nir_op_feq:
911 case nir_op_fne:
912 case nir_op_ilt:
913 case nir_op_ige:
914 case nir_op_ieq:
915 case nir_op_ine:
916 case nir_op_ult:
917 case nir_op_uge:
918 /* Boolean results are 0 or -1. */
919 r = (struct ssa_result_range){le_zero, false};
920 break;
921
922 case nir_op_fpow: {
923 /* Due to flush-to-zero semanatics of floating-point numbers with very
924 * small mangnitudes, we can never really be sure a result will be
925 * non-zero.
926 *
927 * NIR uses pow() and powf() to constant evaluate nir_op_fpow. The man
928 * page for that function says:
929 *
930 * If y is 0, the result is 1.0 (even if x is a NaN).
931 *
932 * gt_zero: pow(*, eq_zero)
933 * | pow(eq_zero, lt_zero) # 0^-y = +inf
934 * | pow(eq_zero, le_zero) # 0^-y = +inf or 0^0 = 1.0
935 * ;
936 *
937 * eq_zero: pow(eq_zero, gt_zero)
938 * ;
939 *
940 * ge_zero: pow(gt_zero, gt_zero)
941 * | pow(gt_zero, ge_zero)
942 * | pow(gt_zero, lt_zero)
943 * | pow(gt_zero, le_zero)
944 * | pow(gt_zero, ne_zero)
945 * | pow(gt_zero, unknown)
946 * | pow(ge_zero, gt_zero)
947 * | pow(ge_zero, ge_zero)
948 * | pow(ge_zero, lt_zero)
949 * | pow(ge_zero, le_zero)
950 * | pow(ge_zero, ne_zero)
951 * | pow(ge_zero, unknown)
952 * | pow(eq_zero, ge_zero) # 0^0 = 1.0 or 0^+y = 0.0
953 * | pow(eq_zero, ne_zero) # 0^-y = +inf or 0^+y = 0.0
954 * | pow(eq_zero, unknown) # union of all other y cases
955 * ;
956 *
957 * All other cases are unknown.
958 *
959 * We could do better if the right operand is a constant, integral
960 * value.
961 */
962 static const enum ssa_ranges table[last_range + 1][last_range + 1] = {
963 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
964 /* unknown */ { _______, _______, _______, _______, _______, _______, gt_zero },
965 /* lt_zero */ { _______, _______, _______, _______, _______, _______, gt_zero },
966 /* le_zero */ { _______, _______, _______, _______, _______, _______, gt_zero },
967 /* gt_zero */ { ge_zero, ge_zero, ge_zero, ge_zero, ge_zero, ge_zero, gt_zero },
968 /* ge_zero */ { ge_zero, ge_zero, ge_zero, ge_zero, ge_zero, ge_zero, gt_zero },
969 /* ne_zero */ { _______, _______, _______, _______, _______, _______, gt_zero },
970 /* eq_zero */ { ge_zero, gt_zero, gt_zero, eq_zero, ge_zero, ge_zero, gt_zero },
971 };
972
973 const struct ssa_result_range left =
974 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
975 const struct ssa_result_range right =
976 analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1));
977
978 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(table);
979 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(table);
980
981 r.is_integral = left.is_integral && right.is_integral &&
982 is_not_negative(right.range);
983 r.range = table[left.range][right.range];
984 break;
985 }
986
987 case nir_op_ffma: {
988 const struct ssa_result_range first =
989 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
990 const struct ssa_result_range second =
991 analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1));
992 const struct ssa_result_range third =
993 analyze_expression(alu, 2, ht, nir_alu_src_type(alu, 2));
994
995 r.is_integral = first.is_integral && second.is_integral &&
996 third.is_integral;
997
998 enum ssa_ranges fmul_range;
999
1000 if (first.range != eq_zero && nir_alu_srcs_equal(alu, alu, 0, 1)) {
1001 /* See handling of nir_op_fmul for explanation of why ge_zero is the
1002 * range.
1003 */
1004 fmul_range = ge_zero;
1005 } else if (first.range != eq_zero && nir_alu_srcs_negative_equal(alu, alu, 0, 1)) {
1006 /* -x * x => le_zero */
1007 fmul_range = le_zero;
1008 } else
1009 fmul_range = fmul_table[first.range][second.range];
1010
1011 r.range = fadd_table[fmul_range][third.range];
1012 break;
1013 }
1014
1015 case nir_op_flrp: {
1016 const struct ssa_result_range first =
1017 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
1018 const struct ssa_result_range second =
1019 analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1));
1020 const struct ssa_result_range third =
1021 analyze_expression(alu, 2, ht, nir_alu_src_type(alu, 2));
1022
1023 r.is_integral = first.is_integral && second.is_integral &&
1024 third.is_integral;
1025
1026 /* Decompose the flrp to first + third * (second + -first) */
1027 const enum ssa_ranges inner_fadd_range =
1028 fadd_table[second.range][fneg_table[first.range]];
1029
1030 const enum ssa_ranges fmul_range =
1031 fmul_table[third.range][inner_fadd_range];
1032
1033 r.range = fadd_table[first.range][fmul_range];
1034 break;
1035 }
1036
1037 default:
1038 r = (struct ssa_result_range){unknown, false};
1039 break;
1040 }
1041
1042 if (r.range == eq_zero)
1043 r.is_integral = true;
1044
1045 _mesa_hash_table_insert(ht, pack_key(alu, use_type), pack_data(r));
1046 return r;
1047 }
1048
1049 #undef _______
1050
1051 struct ssa_result_range
1052 nir_analyze_range(const nir_alu_instr *instr, unsigned src)
1053 {
1054 struct hash_table *ht = _mesa_pointer_hash_table_create(NULL);
1055
1056 const struct ssa_result_range r =
1057 analyze_expression(instr, src, ht, nir_alu_src_type(instr, src));
1058
1059 _mesa_hash_table_destroy(ht, NULL);
1060
1061 return r;
1062 }