nir/lower_io_to_vector: don't merge compact varyings
[mesa.git] / src / compiler / nir / nir_range_analysis.c
1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23 #include <math.h>
24 #include <float.h>
25 #include "nir.h"
26 #include "nir_range_analysis.h"
27 #include "util/hash_table.h"
28
29 /**
30 * Analyzes a sequence of operations to determine some aspects of the range of
31 * the result.
32 */
33
34 static bool
35 is_not_negative(enum ssa_ranges r)
36 {
37 return r == gt_zero || r == ge_zero || r == eq_zero;
38 }
39
40 static void *
41 pack_data(const struct ssa_result_range r)
42 {
43 return (void *)(uintptr_t)(r.range | r.is_integral << 8);
44 }
45
46 static struct ssa_result_range
47 unpack_data(const void *p)
48 {
49 const uintptr_t v = (uintptr_t) p;
50
51 return (struct ssa_result_range){v & 0xff, (v & 0x0ff00) != 0};
52 }
53
54 static struct ssa_result_range
55 analyze_constant(const struct nir_alu_instr *instr, unsigned src)
56 {
57 uint8_t swizzle[4] = { 0, 1, 2, 3 };
58
59 /* If the source is an explicitly sized source, then we need to reset
60 * both the number of components and the swizzle.
61 */
62 const unsigned num_components = nir_ssa_alu_instr_src_components(instr, src);
63
64 for (unsigned i = 0; i < num_components; ++i)
65 swizzle[i] = instr->src[src].swizzle[i];
66
67 const nir_load_const_instr *const load =
68 nir_instr_as_load_const(instr->src[src].src.ssa->parent_instr);
69
70 struct ssa_result_range r = { unknown, false };
71
72 switch (nir_op_infos[instr->op].input_types[src]) {
73 case nir_type_float: {
74 double min_value = DBL_MAX;
75 double max_value = -DBL_MAX;
76 bool any_zero = false;
77 bool all_zero = true;
78
79 r.is_integral = true;
80
81 for (unsigned i = 0; i < num_components; ++i) {
82 const double v = nir_const_value_as_float(load->value[swizzle[i]],
83 load->def.bit_size);
84
85 if (floor(v) != v)
86 r.is_integral = false;
87
88 any_zero = any_zero || (v == 0.0);
89 all_zero = all_zero && (v == 0.0);
90 min_value = MIN2(min_value, v);
91 max_value = MAX2(max_value, v);
92 }
93
94 assert(any_zero >= all_zero);
95 assert(isnan(max_value) || max_value >= min_value);
96
97 if (all_zero)
98 r.range = eq_zero;
99 else if (min_value > 0.0)
100 r.range = gt_zero;
101 else if (min_value == 0.0)
102 r.range = ge_zero;
103 else if (max_value < 0.0)
104 r.range = lt_zero;
105 else if (max_value == 0.0)
106 r.range = le_zero;
107 else if (!any_zero)
108 r.range = ne_zero;
109 else
110 r.range = unknown;
111
112 return r;
113 }
114
115 case nir_type_int:
116 case nir_type_bool: {
117 int64_t min_value = INT_MAX;
118 int64_t max_value = INT_MIN;
119 bool any_zero = false;
120 bool all_zero = true;
121
122 for (unsigned i = 0; i < num_components; ++i) {
123 const int64_t v = nir_const_value_as_int(load->value[swizzle[i]],
124 load->def.bit_size);
125
126 any_zero = any_zero || (v == 0);
127 all_zero = all_zero && (v == 0);
128 min_value = MIN2(min_value, v);
129 max_value = MAX2(max_value, v);
130 }
131
132 assert(any_zero >= all_zero);
133 assert(max_value >= min_value);
134
135 if (all_zero)
136 r.range = eq_zero;
137 else if (min_value > 0)
138 r.range = gt_zero;
139 else if (min_value == 0)
140 r.range = ge_zero;
141 else if (max_value < 0)
142 r.range = lt_zero;
143 else if (max_value == 0)
144 r.range = le_zero;
145 else if (!any_zero)
146 r.range = ne_zero;
147 else
148 r.range = unknown;
149
150 return r;
151 }
152
153 case nir_type_uint: {
154 bool any_zero = false;
155 bool all_zero = true;
156
157 for (unsigned i = 0; i < num_components; ++i) {
158 const uint64_t v = nir_const_value_as_uint(load->value[swizzle[i]],
159 load->def.bit_size);
160
161 any_zero = any_zero || (v == 0);
162 all_zero = all_zero && (v == 0);
163 }
164
165 assert(any_zero >= all_zero);
166
167 if (all_zero)
168 r.range = eq_zero;
169 else if (any_zero)
170 r.range = ge_zero;
171 else
172 r.range = gt_zero;
173
174 return r;
175 }
176
177 default:
178 unreachable("Invalid alu source type");
179 }
180 }
181
182 /**
183 * Short-hand name for use in the tables in analyze_expression. If this name
184 * becomes a problem on some compiler, we can change it to _.
185 */
186 #define _______ unknown
187
188 #ifndef NDEBUG
189 #define ASSERT_TABLE_IS_COMMUTATIVE(t) \
190 do { \
191 for (unsigned r = 0; r < ARRAY_SIZE(t); r++) { \
192 for (unsigned c = 0; c < ARRAY_SIZE(t[0]); c++) \
193 assert(t[r][c] == t[c][r]); \
194 } \
195 } while (false)
196
197 #define ASSERT_TABLE_IS_DIAGONAL(t) \
198 do { \
199 for (unsigned r = 0; r < ARRAY_SIZE(t); r++) \
200 assert(t[r][r] == r); \
201 } while (false)
202
203 static enum ssa_ranges
204 union_ranges(enum ssa_ranges a, enum ssa_ranges b)
205 {
206 static const enum ssa_ranges union_table[last_range + 1][last_range + 1] = {
207 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
208 /* unknown */ { _______, _______, _______, _______, _______, _______, _______ },
209 /* lt_zero */ { _______, lt_zero, le_zero, ne_zero, _______, ne_zero, le_zero },
210 /* le_zero */ { _______, le_zero, le_zero, _______, _______, _______, le_zero },
211 /* gt_zero */ { _______, ne_zero, _______, gt_zero, ge_zero, ne_zero, ge_zero },
212 /* ge_zero */ { _______, _______, _______, ge_zero, ge_zero, _______, ge_zero },
213 /* ne_zero */ { _______, ne_zero, _______, ne_zero, _______, ne_zero, _______ },
214 /* eq_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
215 };
216
217 ASSERT_TABLE_IS_COMMUTATIVE(union_table);
218 ASSERT_TABLE_IS_DIAGONAL(union_table);
219
220 return union_table[a][b];
221 }
222
223 /* Verify that the 'unknown' entry in each row (or column) of the table is the
224 * union of all the other values in the row (or column).
225 */
226 #define ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(t) \
227 do { \
228 for (unsigned i = 0; i < last_range; i++) { \
229 enum ssa_ranges col_range = t[i][unknown + 1]; \
230 enum ssa_ranges row_range = t[unknown + 1][i]; \
231 \
232 for (unsigned j = unknown + 2; j < last_range; j++) { \
233 col_range = union_ranges(col_range, t[i][j]); \
234 row_range = union_ranges(row_range, t[j][i]); \
235 } \
236 \
237 assert(col_range == t[i][unknown]); \
238 assert(row_range == t[unknown][i]); \
239 } \
240 } while (false)
241
242 /* For most operations, the union of ranges for a strict inequality and
243 * equality should be the range of the non-strict inequality (e.g.,
244 * union_ranges(range(op(lt_zero), range(op(eq_zero))) == range(op(le_zero)).
245 *
246 * Does not apply to selection-like opcodes (bcsel, fmin, fmax, etc.).
247 */
248 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(t) \
249 do { \
250 assert(union_ranges(t[lt_zero], t[eq_zero]) == t[le_zero]); \
251 assert(union_ranges(t[gt_zero], t[eq_zero]) == t[ge_zero]); \
252 } while (false)
253
254 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(t) \
255 do { \
256 for (unsigned i = 0; i < last_range; i++) { \
257 assert(union_ranges(t[i][lt_zero], t[i][eq_zero]) == t[i][le_zero]); \
258 assert(union_ranges(t[i][gt_zero], t[i][eq_zero]) == t[i][ge_zero]); \
259 assert(union_ranges(t[lt_zero][i], t[eq_zero][i]) == t[le_zero][i]); \
260 assert(union_ranges(t[gt_zero][i], t[eq_zero][i]) == t[ge_zero][i]); \
261 } \
262 } while (false)
263
264 /* Several other unordered tuples span the range of "everything." Each should
265 * have the same value as unknown: (lt_zero, ge_zero), (le_zero, gt_zero), and
266 * (eq_zero, ne_zero). union_ranges is already commutative, so only one
267 * ordering needs to be checked.
268 *
269 * Does not apply to selection-like opcodes (bcsel, fmin, fmax, etc.).
270 *
271 * In cases where this can be used, it is unnecessary to also use
272 * ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_*_SOURCE. For any range X,
273 * union_ranges(X, X) == X. The disjoint ranges cover all of the non-unknown
274 * possibilities, so the union of all the unions of disjoint ranges is
275 * equivalent to the union of "others."
276 */
277 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(t) \
278 do { \
279 assert(union_ranges(t[lt_zero], t[ge_zero]) == t[unknown]); \
280 assert(union_ranges(t[le_zero], t[gt_zero]) == t[unknown]); \
281 assert(union_ranges(t[eq_zero], t[ne_zero]) == t[unknown]); \
282 } while (false)
283
284 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(t) \
285 do { \
286 for (unsigned i = 0; i < last_range; i++) { \
287 assert(union_ranges(t[i][lt_zero], t[i][ge_zero]) == \
288 t[i][unknown]); \
289 assert(union_ranges(t[i][le_zero], t[i][gt_zero]) == \
290 t[i][unknown]); \
291 assert(union_ranges(t[i][eq_zero], t[i][ne_zero]) == \
292 t[i][unknown]); \
293 \
294 assert(union_ranges(t[lt_zero][i], t[ge_zero][i]) == \
295 t[unknown][i]); \
296 assert(union_ranges(t[le_zero][i], t[gt_zero][i]) == \
297 t[unknown][i]); \
298 assert(union_ranges(t[eq_zero][i], t[ne_zero][i]) == \
299 t[unknown][i]); \
300 } \
301 } while (false)
302
303 #else
304 #define ASSERT_TABLE_IS_COMMUTATIVE(t)
305 #define ASSERT_TABLE_IS_DIAGONAL(t)
306 #define ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(t)
307 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(t)
308 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(t)
309 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(t)
310 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(t)
311 #endif
312
313 /**
314 * Analyze an expression to determine the range of its result
315 *
316 * The end result of this analysis is a token that communicates something
317 * about the range of values. There's an implicit grammar that produces
318 * tokens from sequences of literal values, other tokens, and operations.
319 * This function implements this grammar as a recursive-descent parser. Some
320 * (but not all) of the grammar is listed in-line in the function.
321 */
322 static struct ssa_result_range
323 analyze_expression(const nir_alu_instr *instr, unsigned src,
324 struct hash_table *ht)
325 {
326 if (!instr->src[src].src.is_ssa)
327 return (struct ssa_result_range){unknown, false};
328
329 if (nir_src_is_const(instr->src[src].src))
330 return analyze_constant(instr, src);
331
332 if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_alu)
333 return (struct ssa_result_range){unknown, false};
334
335 const struct nir_alu_instr *const alu =
336 nir_instr_as_alu(instr->src[src].src.ssa->parent_instr);
337
338 struct hash_entry *he = _mesa_hash_table_search(ht, alu);
339 if (he != NULL)
340 return unpack_data(he->data);
341
342 struct ssa_result_range r = {unknown, false};
343
344 /* ge_zero: ge_zero + ge_zero
345 *
346 * gt_zero: gt_zero + eq_zero
347 * | gt_zero + ge_zero
348 * | eq_zero + gt_zero # Addition is commutative
349 * | ge_zero + gt_zero # Addition is commutative
350 * | gt_zero + gt_zero
351 * ;
352 *
353 * le_zero: le_zero + le_zero
354 *
355 * lt_zero: lt_zero + eq_zero
356 * | lt_zero + le_zero
357 * | eq_zero + lt_zero # Addition is commutative
358 * | le_zero + lt_zero # Addition is commutative
359 * | lt_zero + lt_zero
360 * ;
361 *
362 * ne_zero: eq_zero + ne_zero
363 * | ne_zero + eq_zero # Addition is commutative
364 * ;
365 *
366 * eq_zero: eq_zero + eq_zero
367 * ;
368 *
369 * All other cases are 'unknown'. The seeming odd entry is (ne_zero,
370 * ne_zero), but that could be (-5, +5) which is not ne_zero.
371 */
372 static const enum ssa_ranges fadd_table[last_range + 1][last_range + 1] = {
373 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
374 /* unknown */ { _______, _______, _______, _______, _______, _______, _______ },
375 /* lt_zero */ { _______, lt_zero, lt_zero, _______, _______, _______, lt_zero },
376 /* le_zero */ { _______, lt_zero, le_zero, _______, _______, _______, le_zero },
377 /* gt_zero */ { _______, _______, _______, gt_zero, gt_zero, _______, gt_zero },
378 /* ge_zero */ { _______, _______, _______, gt_zero, ge_zero, _______, ge_zero },
379 /* ne_zero */ { _______, _______, _______, _______, _______, _______, ne_zero },
380 /* eq_zero */ { _______, lt_zero, le_zero, gt_zero, ge_zero, ne_zero, eq_zero },
381 };
382
383 ASSERT_TABLE_IS_COMMUTATIVE(fadd_table);
384 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(fadd_table);
385 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(fadd_table);
386
387 /* Due to flush-to-zero semanatics of floating-point numbers with very
388 * small mangnitudes, we can never really be sure a result will be
389 * non-zero.
390 *
391 * ge_zero: ge_zero * ge_zero
392 * | ge_zero * gt_zero
393 * | ge_zero * eq_zero
394 * | le_zero * lt_zero
395 * | lt_zero * le_zero # Multiplication is commutative
396 * | le_zero * le_zero
397 * | gt_zero * ge_zero # Multiplication is commutative
398 * | eq_zero * ge_zero # Multiplication is commutative
399 * | a * a # Left source == right source
400 * | gt_zero * gt_zero
401 * | lt_zero * lt_zero
402 * ;
403 *
404 * le_zero: ge_zero * le_zero
405 * | ge_zero * lt_zero
406 * | lt_zero * ge_zero # Multiplication is commutative
407 * | le_zero * ge_zero # Multiplication is commutative
408 * | le_zero * gt_zero
409 * | lt_zero * gt_zero
410 * | gt_zero * lt_zero # Multiplication is commutative
411 * ;
412 *
413 * eq_zero: eq_zero * <any>
414 * <any> * eq_zero # Multiplication is commutative
415 *
416 * All other cases are 'unknown'.
417 */
418 static const enum ssa_ranges fmul_table[last_range + 1][last_range + 1] = {
419 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
420 /* unknown */ { _______, _______, _______, _______, _______, _______, eq_zero },
421 /* lt_zero */ { _______, ge_zero, ge_zero, le_zero, le_zero, _______, eq_zero },
422 /* le_zero */ { _______, ge_zero, ge_zero, le_zero, le_zero, _______, eq_zero },
423 /* gt_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
424 /* ge_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
425 /* ne_zero */ { _______, _______, _______, _______, _______, _______, eq_zero },
426 /* eq_zero */ { eq_zero, eq_zero, eq_zero, eq_zero, eq_zero, eq_zero, eq_zero }
427 };
428
429 ASSERT_TABLE_IS_COMMUTATIVE(fmul_table);
430 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(fmul_table);
431 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(fmul_table);
432
433 static const enum ssa_ranges fneg_table[last_range + 1] = {
434 /* unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
435 _______, gt_zero, ge_zero, lt_zero, le_zero, ne_zero, eq_zero
436 };
437
438 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(fneg_table);
439 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(fneg_table);
440
441
442 switch (alu->op) {
443 case nir_op_b2f32:
444 case nir_op_b2i32:
445 r = (struct ssa_result_range){ge_zero, alu->op == nir_op_b2f32};
446 break;
447
448 case nir_op_bcsel: {
449 const struct ssa_result_range left = analyze_expression(alu, 1, ht);
450 const struct ssa_result_range right = analyze_expression(alu, 2, ht);
451
452 /* If either source is a constant load that is not zero, punt. The type
453 * will always be uint regardless of the actual type. We can't even
454 * decide if the value is non-zero because -0.0 is 0x80000000, and that
455 * will (possibly incorrectly) be considered non-zero.
456 */
457 /* FINISHME: We could do better, but it would require having the expected
458 * FINISHME: type passed in.
459 */
460 if ((nir_src_is_const(alu->src[1].src) && left.range != eq_zero) ||
461 (nir_src_is_const(alu->src[2].src) && right.range != eq_zero)) {
462 return (struct ssa_result_range){unknown, false};
463 }
464
465 r.is_integral = left.is_integral && right.is_integral;
466
467 /* le_zero: bcsel(<any>, le_zero, lt_zero)
468 * | bcsel(<any>, eq_zero, lt_zero)
469 * | bcsel(<any>, le_zero, eq_zero)
470 * | bcsel(<any>, lt_zero, le_zero)
471 * | bcsel(<any>, lt_zero, eq_zero)
472 * | bcsel(<any>, eq_zero, le_zero)
473 * | bcsel(<any>, le_zero, le_zero)
474 * ;
475 *
476 * lt_zero: bcsel(<any>, lt_zero, lt_zero)
477 * ;
478 *
479 * ge_zero: bcsel(<any>, ge_zero, ge_zero)
480 * | bcsel(<any>, ge_zero, gt_zero)
481 * | bcsel(<any>, ge_zero, eq_zero)
482 * | bcsel(<any>, gt_zero, ge_zero)
483 * | bcsel(<any>, eq_zero, ge_zero)
484 * ;
485 *
486 * gt_zero: bcsel(<any>, gt_zero, gt_zero)
487 * ;
488 *
489 * ne_zero: bcsel(<any>, ne_zero, gt_zero)
490 * | bcsel(<any>, ne_zero, lt_zero)
491 * | bcsel(<any>, gt_zero, lt_zero)
492 * | bcsel(<any>, gt_zero, ne_zero)
493 * | bcsel(<any>, lt_zero, ne_zero)
494 * | bcsel(<any>, lt_zero, gt_zero)
495 * | bcsel(<any>, ne_zero, ne_zero)
496 * ;
497 *
498 * eq_zero: bcsel(<any>, eq_zero, eq_zero)
499 * ;
500 *
501 * All other cases are 'unknown'.
502 *
503 * The ranges could be tightened if the range of the first source is
504 * known. However, opt_algebraic will (eventually) elminiate the bcsel
505 * if the condition is known.
506 */
507 static const enum ssa_ranges table[last_range + 1][last_range + 1] = {
508 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
509 /* unknown */ { _______, _______, _______, _______, _______, _______, _______ },
510 /* lt_zero */ { _______, lt_zero, le_zero, ne_zero, _______, ne_zero, le_zero },
511 /* le_zero */ { _______, le_zero, le_zero, _______, _______, _______, le_zero },
512 /* gt_zero */ { _______, ne_zero, _______, gt_zero, ge_zero, ne_zero, ge_zero },
513 /* ge_zero */ { _______, _______, _______, ge_zero, ge_zero, _______, ge_zero },
514 /* ne_zero */ { _______, ne_zero, _______, ne_zero, _______, ne_zero, _______ },
515 /* eq_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
516 };
517
518 ASSERT_TABLE_IS_COMMUTATIVE(table);
519 ASSERT_TABLE_IS_DIAGONAL(table);
520 ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(table);
521
522 r.range = table[left.range][right.range];
523 break;
524 }
525
526 case nir_op_i2f32:
527 case nir_op_u2f32:
528 r = analyze_expression(alu, 0, ht);
529
530 r.is_integral = true;
531
532 if (r.range == unknown && alu->op == nir_op_u2f32)
533 r.range = ge_zero;
534
535 break;
536
537 case nir_op_fabs:
538 r = analyze_expression(alu, 0, ht);
539
540 switch (r.range) {
541 case unknown:
542 case le_zero:
543 case ge_zero:
544 r.range = ge_zero;
545 break;
546
547 case lt_zero:
548 case gt_zero:
549 case ne_zero:
550 r.range = gt_zero;
551 break;
552
553 case eq_zero:
554 break;
555 }
556
557 break;
558
559 case nir_op_fadd: {
560 const struct ssa_result_range left = analyze_expression(alu, 0, ht);
561 const struct ssa_result_range right = analyze_expression(alu, 1, ht);
562
563 r.is_integral = left.is_integral && right.is_integral;
564 r.range = fadd_table[left.range][right.range];
565 break;
566 }
567
568 case nir_op_fexp2: {
569 /* If the parameter might be less than zero, the mathematically result
570 * will be on (0, 1). For sufficiently large magnitude negative
571 * parameters, the result will flush to zero.
572 */
573 static const enum ssa_ranges table[last_range + 1] = {
574 /* unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
575 ge_zero, ge_zero, ge_zero, gt_zero, gt_zero, ge_zero, gt_zero
576 };
577
578 r = analyze_expression(alu, 0, ht);
579
580 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(table);
581 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(table);
582
583 r.is_integral = r.is_integral && is_not_negative(r.range);
584 r.range = table[r.range];
585 break;
586 }
587
588 case nir_op_fmax: {
589 const struct ssa_result_range left = analyze_expression(alu, 0, ht);
590 const struct ssa_result_range right = analyze_expression(alu, 1, ht);
591
592 r.is_integral = left.is_integral && right.is_integral;
593
594 /* gt_zero: fmax(gt_zero, *)
595 * | fmax(*, gt_zero) # Treat fmax as commutative
596 * ;
597 *
598 * ge_zero: fmax(ge_zero, ne_zero)
599 * | fmax(ge_zero, lt_zero)
600 * | fmax(ge_zero, le_zero)
601 * | fmax(ge_zero, eq_zero)
602 * | fmax(ne_zero, ge_zero) # Treat fmax as commutative
603 * | fmax(lt_zero, ge_zero) # Treat fmax as commutative
604 * | fmax(le_zero, ge_zero) # Treat fmax as commutative
605 * | fmax(eq_zero, ge_zero) # Treat fmax as commutative
606 * | fmax(ge_zero, ge_zero)
607 * ;
608 *
609 * le_zero: fmax(le_zero, lt_zero)
610 * | fmax(lt_zero, le_zero) # Treat fmax as commutative
611 * | fmax(le_zero, le_zero)
612 * ;
613 *
614 * lt_zero: fmax(lt_zero, lt_zero)
615 * ;
616 *
617 * ne_zero: fmax(ne_zero, lt_zero)
618 * | fmax(lt_zero, ne_zero) # Treat fmax as commutative
619 * | fmax(ne_zero, ne_zero)
620 * ;
621 *
622 * eq_zero: fmax(eq_zero, le_zero)
623 * | fmax(eq_zero, lt_zero)
624 * | fmax(le_zero, eq_zero) # Treat fmax as commutative
625 * | fmax(lt_zero, eq_zero) # Treat fmax as commutative
626 * | fmax(eq_zero, eq_zero)
627 * ;
628 *
629 * All other cases are 'unknown'.
630 */
631 static const enum ssa_ranges table[last_range + 1][last_range + 1] = {
632 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
633 /* unknown */ { _______, _______, _______, gt_zero, ge_zero, _______, _______ },
634 /* lt_zero */ { _______, lt_zero, le_zero, gt_zero, ge_zero, ne_zero, eq_zero },
635 /* le_zero */ { _______, le_zero, le_zero, gt_zero, ge_zero, _______, eq_zero },
636 /* gt_zero */ { gt_zero, gt_zero, gt_zero, gt_zero, gt_zero, gt_zero, gt_zero },
637 /* ge_zero */ { ge_zero, ge_zero, ge_zero, gt_zero, ge_zero, ge_zero, ge_zero },
638 /* ne_zero */ { _______, ne_zero, _______, gt_zero, ge_zero, ne_zero, _______ },
639 /* eq_zero */ { _______, eq_zero, eq_zero, gt_zero, ge_zero, _______, eq_zero }
640 };
641
642 /* Treat fmax as commutative. */
643 ASSERT_TABLE_IS_COMMUTATIVE(table);
644 ASSERT_TABLE_IS_DIAGONAL(table);
645 ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(table);
646
647 r.range = table[left.range][right.range];
648 break;
649 }
650
651 case nir_op_fmin: {
652 const struct ssa_result_range left = analyze_expression(alu, 0, ht);
653 const struct ssa_result_range right = analyze_expression(alu, 1, ht);
654
655 r.is_integral = left.is_integral && right.is_integral;
656
657 /* lt_zero: fmin(lt_zero, *)
658 * | fmin(*, lt_zero) # Treat fmin as commutative
659 * ;
660 *
661 * le_zero: fmin(le_zero, ne_zero)
662 * | fmin(le_zero, gt_zero)
663 * | fmin(le_zero, ge_zero)
664 * | fmin(le_zero, eq_zero)
665 * | fmin(ne_zero, le_zero) # Treat fmin as commutative
666 * | fmin(gt_zero, le_zero) # Treat fmin as commutative
667 * | fmin(ge_zero, le_zero) # Treat fmin as commutative
668 * | fmin(eq_zero, le_zero) # Treat fmin as commutative
669 * | fmin(le_zero, le_zero)
670 * ;
671 *
672 * ge_zero: fmin(ge_zero, gt_zero)
673 * | fmin(gt_zero, ge_zero) # Treat fmin as commutative
674 * | fmin(ge_zero, ge_zero)
675 * ;
676 *
677 * gt_zero: fmin(gt_zero, gt_zero)
678 * ;
679 *
680 * ne_zero: fmin(ne_zero, gt_zero)
681 * | fmin(gt_zero, ne_zero) # Treat fmin as commutative
682 * | fmin(ne_zero, ne_zero)
683 * ;
684 *
685 * eq_zero: fmin(eq_zero, ge_zero)
686 * | fmin(eq_zero, gt_zero)
687 * | fmin(ge_zero, eq_zero) # Treat fmin as commutative
688 * | fmin(gt_zero, eq_zero) # Treat fmin as commutative
689 * | fmin(eq_zero, eq_zero)
690 * ;
691 *
692 * All other cases are 'unknown'.
693 */
694 static const enum ssa_ranges table[last_range + 1][last_range + 1] = {
695 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
696 /* unknown */ { _______, lt_zero, le_zero, _______, _______, _______, _______ },
697 /* lt_zero */ { lt_zero, lt_zero, lt_zero, lt_zero, lt_zero, lt_zero, lt_zero },
698 /* le_zero */ { le_zero, lt_zero, le_zero, le_zero, le_zero, le_zero, le_zero },
699 /* gt_zero */ { _______, lt_zero, le_zero, gt_zero, ge_zero, ne_zero, eq_zero },
700 /* ge_zero */ { _______, lt_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
701 /* ne_zero */ { _______, lt_zero, le_zero, ne_zero, _______, ne_zero, _______ },
702 /* eq_zero */ { _______, lt_zero, le_zero, eq_zero, eq_zero, _______, eq_zero }
703 };
704
705 /* Treat fmin as commutative. */
706 ASSERT_TABLE_IS_COMMUTATIVE(table);
707 ASSERT_TABLE_IS_DIAGONAL(table);
708 ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(table);
709
710 r.range = table[left.range][right.range];
711 break;
712 }
713
714 case nir_op_fmul: {
715 const struct ssa_result_range left = analyze_expression(alu, 0, ht);
716 const struct ssa_result_range right = analyze_expression(alu, 1, ht);
717
718 r.is_integral = left.is_integral && right.is_integral;
719
720 /* x * x => ge_zero */
721 if (left.range != eq_zero && nir_alu_srcs_equal(alu, alu, 0, 1)) {
722 /* Even if x > 0, the result of x*x can be zero when x is, for
723 * example, a subnormal number.
724 */
725 r.range = ge_zero;
726 } else if (left.range != eq_zero && nir_alu_srcs_negative_equal(alu, alu, 0, 1)) {
727 /* -x * x => le_zero. */
728 r.range = le_zero;
729 } else
730 r.range = fmul_table[left.range][right.range];
731
732 break;
733 }
734
735 case nir_op_frcp:
736 r = (struct ssa_result_range){analyze_expression(alu, 0, ht).range, false};
737 break;
738
739 case nir_op_mov: {
740 const struct ssa_result_range left = analyze_expression(alu, 0, ht);
741
742 /* See commentary in nir_op_bcsel for the reasons this is necessary. */
743 if (nir_src_is_const(alu->src[0].src) && left.range != eq_zero)
744 return (struct ssa_result_range){unknown, false};
745
746 r = left;
747 break;
748 }
749
750 case nir_op_fneg:
751 r = analyze_expression(alu, 0, ht);
752
753 r.range = fneg_table[r.range];
754 break;
755
756 case nir_op_fsat:
757 r = analyze_expression(alu, 0, ht);
758
759 switch (r.range) {
760 case le_zero:
761 case lt_zero:
762 r.range = eq_zero;
763 r.is_integral = true;
764 break;
765
766 case eq_zero:
767 assert(r.is_integral);
768 case gt_zero:
769 case ge_zero:
770 /* The fsat doesn't add any information in these cases. */
771 break;
772
773 case ne_zero:
774 case unknown:
775 /* Since the result must be in [0, 1], the value must be >= 0. */
776 r.range = ge_zero;
777 break;
778 }
779 break;
780
781 case nir_op_fsign:
782 r = (struct ssa_result_range){analyze_expression(alu, 0, ht).range, true};
783 break;
784
785 case nir_op_fsqrt:
786 case nir_op_frsq:
787 r = (struct ssa_result_range){ge_zero, false};
788 break;
789
790 case nir_op_ffloor: {
791 const struct ssa_result_range left = analyze_expression(alu, 0, ht);
792
793 r.is_integral = true;
794
795 if (left.is_integral || left.range == le_zero || left.range == lt_zero)
796 r.range = left.range;
797 else if (left.range == ge_zero || left.range == gt_zero)
798 r.range = ge_zero;
799 else if (left.range == ne_zero)
800 r.range = unknown;
801
802 break;
803 }
804
805 case nir_op_fceil: {
806 const struct ssa_result_range left = analyze_expression(alu, 0, ht);
807
808 r.is_integral = true;
809
810 if (left.is_integral || left.range == ge_zero || left.range == gt_zero)
811 r.range = left.range;
812 else if (left.range == le_zero || left.range == lt_zero)
813 r.range = le_zero;
814 else if (left.range == ne_zero)
815 r.range = unknown;
816
817 break;
818 }
819
820 case nir_op_ftrunc: {
821 const struct ssa_result_range left = analyze_expression(alu, 0, ht);
822
823 r.is_integral = true;
824
825 if (left.is_integral)
826 r.range = left.range;
827 else if (left.range == ge_zero || left.range == gt_zero)
828 r.range = ge_zero;
829 else if (left.range == le_zero || left.range == lt_zero)
830 r.range = le_zero;
831 else if (left.range == ne_zero)
832 r.range = unknown;
833
834 break;
835 }
836
837 case nir_op_flt:
838 case nir_op_fge:
839 case nir_op_feq:
840 case nir_op_fne:
841 case nir_op_ilt:
842 case nir_op_ige:
843 case nir_op_ieq:
844 case nir_op_ine:
845 case nir_op_ult:
846 case nir_op_uge:
847 /* Boolean results are 0 or -1. */
848 r = (struct ssa_result_range){le_zero, false};
849 break;
850
851 case nir_op_fpow: {
852 /* Due to flush-to-zero semanatics of floating-point numbers with very
853 * small mangnitudes, we can never really be sure a result will be
854 * non-zero.
855 *
856 * NIR uses pow() and powf() to constant evaluate nir_op_fpow. The man
857 * page for that function says:
858 *
859 * If y is 0, the result is 1.0 (even if x is a NaN).
860 *
861 * gt_zero: pow(*, eq_zero)
862 * | pow(eq_zero, lt_zero) # 0^-y = +inf
863 * | pow(eq_zero, le_zero) # 0^-y = +inf or 0^0 = 1.0
864 * ;
865 *
866 * eq_zero: pow(eq_zero, gt_zero)
867 * ;
868 *
869 * ge_zero: pow(gt_zero, gt_zero)
870 * | pow(gt_zero, ge_zero)
871 * | pow(gt_zero, lt_zero)
872 * | pow(gt_zero, le_zero)
873 * | pow(gt_zero, ne_zero)
874 * | pow(gt_zero, unknown)
875 * | pow(ge_zero, gt_zero)
876 * | pow(ge_zero, ge_zero)
877 * | pow(ge_zero, lt_zero)
878 * | pow(ge_zero, le_zero)
879 * | pow(ge_zero, ne_zero)
880 * | pow(ge_zero, unknown)
881 * | pow(eq_zero, ge_zero) # 0^0 = 1.0 or 0^+y = 0.0
882 * | pow(eq_zero, ne_zero) # 0^-y = +inf or 0^+y = 0.0
883 * | pow(eq_zero, unknown) # union of all other y cases
884 * ;
885 *
886 * All other cases are unknown.
887 *
888 * We could do better if the right operand is a constant, integral
889 * value.
890 */
891 static const enum ssa_ranges table[last_range + 1][last_range + 1] = {
892 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
893 /* unknown */ { _______, _______, _______, _______, _______, _______, gt_zero },
894 /* lt_zero */ { _______, _______, _______, _______, _______, _______, gt_zero },
895 /* le_zero */ { _______, _______, _______, _______, _______, _______, gt_zero },
896 /* gt_zero */ { ge_zero, ge_zero, ge_zero, ge_zero, ge_zero, ge_zero, gt_zero },
897 /* ge_zero */ { ge_zero, ge_zero, ge_zero, ge_zero, ge_zero, ge_zero, gt_zero },
898 /* ne_zero */ { _______, _______, _______, _______, _______, _______, gt_zero },
899 /* eq_zero */ { ge_zero, gt_zero, gt_zero, eq_zero, ge_zero, ge_zero, gt_zero },
900 };
901
902 const struct ssa_result_range left = analyze_expression(alu, 0, ht);
903 const struct ssa_result_range right = analyze_expression(alu, 1, ht);
904
905 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(table);
906 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(table);
907
908 r.is_integral = left.is_integral && right.is_integral &&
909 is_not_negative(right.range);
910 r.range = table[left.range][right.range];
911 break;
912 }
913
914 case nir_op_ffma: {
915 const struct ssa_result_range first = analyze_expression(alu, 0, ht);
916 const struct ssa_result_range second = analyze_expression(alu, 1, ht);
917 const struct ssa_result_range third = analyze_expression(alu, 2, ht);
918
919 r.is_integral = first.is_integral && second.is_integral &&
920 third.is_integral;
921
922 enum ssa_ranges fmul_range;
923
924 if (first.range != eq_zero && nir_alu_srcs_equal(alu, alu, 0, 1)) {
925 /* See handling of nir_op_fmul for explanation of why ge_zero is the
926 * range.
927 */
928 fmul_range = ge_zero;
929 } else if (first.range != eq_zero && nir_alu_srcs_negative_equal(alu, alu, 0, 1)) {
930 /* -x * x => le_zero */
931 fmul_range = le_zero;
932 } else
933 fmul_range = fmul_table[first.range][second.range];
934
935 r.range = fadd_table[fmul_range][third.range];
936 break;
937 }
938
939 case nir_op_flrp: {
940 const struct ssa_result_range first = analyze_expression(alu, 0, ht);
941 const struct ssa_result_range second = analyze_expression(alu, 1, ht);
942 const struct ssa_result_range third = analyze_expression(alu, 2, ht);
943
944 r.is_integral = first.is_integral && second.is_integral &&
945 third.is_integral;
946
947 /* Decompose the flrp to first + third * (second + -first) */
948 const enum ssa_ranges inner_fadd_range =
949 fadd_table[second.range][fneg_table[first.range]];
950
951 const enum ssa_ranges fmul_range =
952 fmul_table[third.range][inner_fadd_range];
953
954 r.range = fadd_table[first.range][fmul_range];
955 break;
956 }
957
958 default:
959 r = (struct ssa_result_range){unknown, false};
960 break;
961 }
962
963 if (r.range == eq_zero)
964 r.is_integral = true;
965
966 _mesa_hash_table_insert(ht, alu, pack_data(r));
967 return r;
968 }
969
970 #undef _______
971
972 struct ssa_result_range
973 nir_analyze_range(const nir_alu_instr *instr, unsigned src)
974 {
975 struct hash_table *ht = _mesa_pointer_hash_table_create(NULL);
976
977 const struct ssa_result_range r = analyze_expression(instr, src, ht);
978
979 _mesa_hash_table_destroy(ht, NULL);
980
981 return r;
982 }