nir: no-op C99 _Pragma() with MSVC
[mesa.git] / src / compiler / nir / nir_range_analysis.c
1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23 #include <math.h>
24 #include <float.h>
25 #include "nir.h"
26 #include "nir_range_analysis.h"
27 #include "util/hash_table.h"
28
29 /**
30 * Analyzes a sequence of operations to determine some aspects of the range of
31 * the result.
32 */
33
34 static bool
35 is_not_negative(enum ssa_ranges r)
36 {
37 return r == gt_zero || r == ge_zero || r == eq_zero;
38 }
39
40 static void *
41 pack_data(const struct ssa_result_range r)
42 {
43 return (void *)(uintptr_t)(r.range | r.is_integral << 8);
44 }
45
46 static struct ssa_result_range
47 unpack_data(const void *p)
48 {
49 const uintptr_t v = (uintptr_t) p;
50
51 return (struct ssa_result_range){v & 0xff, (v & 0x0ff00) != 0};
52 }
53
54 static void *
55 pack_key(const struct nir_alu_instr *instr, nir_alu_type type)
56 {
57 uintptr_t type_encoding;
58 uintptr_t ptr = (uintptr_t) instr;
59
60 /* The low 2 bits have to be zero or this whole scheme falls apart. */
61 assert((ptr & 0x3) == 0);
62
63 /* NIR is typeless in the sense that sequences of bits have whatever
64 * meaning is attached to them by the instruction that consumes them.
65 * However, the number of bits must match between producer and consumer.
66 * As a result, the number of bits does not need to be encoded here.
67 */
68 switch (nir_alu_type_get_base_type(type)) {
69 case nir_type_int: type_encoding = 0; break;
70 case nir_type_uint: type_encoding = 1; break;
71 case nir_type_bool: type_encoding = 2; break;
72 case nir_type_float: type_encoding = 3; break;
73 default: unreachable("Invalid base type.");
74 }
75
76 return (void *)(ptr | type_encoding);
77 }
78
79 static nir_alu_type
80 nir_alu_src_type(const nir_alu_instr *instr, unsigned src)
81 {
82 return nir_alu_type_get_base_type(nir_op_infos[instr->op].input_types[src]) |
83 nir_src_bit_size(instr->src[src].src);
84 }
85
86 static struct ssa_result_range
87 analyze_constant(const struct nir_alu_instr *instr, unsigned src,
88 nir_alu_type use_type)
89 {
90 uint8_t swizzle[4] = { 0, 1, 2, 3 };
91
92 /* If the source is an explicitly sized source, then we need to reset
93 * both the number of components and the swizzle.
94 */
95 const unsigned num_components = nir_ssa_alu_instr_src_components(instr, src);
96
97 for (unsigned i = 0; i < num_components; ++i)
98 swizzle[i] = instr->src[src].swizzle[i];
99
100 const nir_load_const_instr *const load =
101 nir_instr_as_load_const(instr->src[src].src.ssa->parent_instr);
102
103 struct ssa_result_range r = { unknown, false };
104
105 switch (nir_alu_type_get_base_type(use_type)) {
106 case nir_type_float: {
107 double min_value = DBL_MAX;
108 double max_value = -DBL_MAX;
109 bool any_zero = false;
110 bool all_zero = true;
111
112 r.is_integral = true;
113
114 for (unsigned i = 0; i < num_components; ++i) {
115 const double v = nir_const_value_as_float(load->value[swizzle[i]],
116 load->def.bit_size);
117
118 if (floor(v) != v)
119 r.is_integral = false;
120
121 any_zero = any_zero || (v == 0.0);
122 all_zero = all_zero && (v == 0.0);
123 min_value = MIN2(min_value, v);
124 max_value = MAX2(max_value, v);
125 }
126
127 assert(any_zero >= all_zero);
128 assert(isnan(max_value) || max_value >= min_value);
129
130 if (all_zero)
131 r.range = eq_zero;
132 else if (min_value > 0.0)
133 r.range = gt_zero;
134 else if (min_value == 0.0)
135 r.range = ge_zero;
136 else if (max_value < 0.0)
137 r.range = lt_zero;
138 else if (max_value == 0.0)
139 r.range = le_zero;
140 else if (!any_zero)
141 r.range = ne_zero;
142 else
143 r.range = unknown;
144
145 return r;
146 }
147
148 case nir_type_int:
149 case nir_type_bool: {
150 int64_t min_value = INT_MAX;
151 int64_t max_value = INT_MIN;
152 bool any_zero = false;
153 bool all_zero = true;
154
155 for (unsigned i = 0; i < num_components; ++i) {
156 const int64_t v = nir_const_value_as_int(load->value[swizzle[i]],
157 load->def.bit_size);
158
159 any_zero = any_zero || (v == 0);
160 all_zero = all_zero && (v == 0);
161 min_value = MIN2(min_value, v);
162 max_value = MAX2(max_value, v);
163 }
164
165 assert(any_zero >= all_zero);
166 assert(max_value >= min_value);
167
168 if (all_zero)
169 r.range = eq_zero;
170 else if (min_value > 0)
171 r.range = gt_zero;
172 else if (min_value == 0)
173 r.range = ge_zero;
174 else if (max_value < 0)
175 r.range = lt_zero;
176 else if (max_value == 0)
177 r.range = le_zero;
178 else if (!any_zero)
179 r.range = ne_zero;
180 else
181 r.range = unknown;
182
183 return r;
184 }
185
186 case nir_type_uint: {
187 bool any_zero = false;
188 bool all_zero = true;
189
190 for (unsigned i = 0; i < num_components; ++i) {
191 const uint64_t v = nir_const_value_as_uint(load->value[swizzle[i]],
192 load->def.bit_size);
193
194 any_zero = any_zero || (v == 0);
195 all_zero = all_zero && (v == 0);
196 }
197
198 assert(any_zero >= all_zero);
199
200 if (all_zero)
201 r.range = eq_zero;
202 else if (any_zero)
203 r.range = ge_zero;
204 else
205 r.range = gt_zero;
206
207 return r;
208 }
209
210 default:
211 unreachable("Invalid alu source type");
212 }
213 }
214
215 /**
216 * Short-hand name for use in the tables in analyze_expression. If this name
217 * becomes a problem on some compiler, we can change it to _.
218 */
219 #define _______ unknown
220
221
222 /* MSVC doesn't have C99's _Pragma() */
223 #ifdef _MSC_VER
224 #define _Pragma(x)
225 #endif
226
227
228 #ifndef NDEBUG
229 #define ASSERT_TABLE_IS_COMMUTATIVE(t) \
230 do { \
231 static bool first = true; \
232 if (first) { \
233 first = false; \
234 _Pragma("GCC unroll 7") \
235 for (unsigned r = 0; r < ARRAY_SIZE(t); r++) { \
236 _Pragma("GCC unroll 7") \
237 for (unsigned c = 0; c < ARRAY_SIZE(t[0]); c++) \
238 assert(t[r][c] == t[c][r]); \
239 } \
240 } \
241 } while (false)
242
243 #define ASSERT_TABLE_IS_DIAGONAL(t) \
244 do { \
245 static bool first = true; \
246 if (first) { \
247 first = false; \
248 _Pragma("GCC unroll 7") \
249 for (unsigned r = 0; r < ARRAY_SIZE(t); r++) \
250 assert(t[r][r] == r); \
251 } \
252 } while (false)
253
254 static enum ssa_ranges
255 union_ranges(enum ssa_ranges a, enum ssa_ranges b)
256 {
257 static const enum ssa_ranges union_table[last_range + 1][last_range + 1] = {
258 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
259 /* unknown */ { _______, _______, _______, _______, _______, _______, _______ },
260 /* lt_zero */ { _______, lt_zero, le_zero, ne_zero, _______, ne_zero, le_zero },
261 /* le_zero */ { _______, le_zero, le_zero, _______, _______, _______, le_zero },
262 /* gt_zero */ { _______, ne_zero, _______, gt_zero, ge_zero, ne_zero, ge_zero },
263 /* ge_zero */ { _______, _______, _______, ge_zero, ge_zero, _______, ge_zero },
264 /* ne_zero */ { _______, ne_zero, _______, ne_zero, _______, ne_zero, _______ },
265 /* eq_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
266 };
267
268 ASSERT_TABLE_IS_COMMUTATIVE(union_table);
269 ASSERT_TABLE_IS_DIAGONAL(union_table);
270
271 return union_table[a][b];
272 }
273
274 /* Verify that the 'unknown' entry in each row (or column) of the table is the
275 * union of all the other values in the row (or column).
276 */
277 #define ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(t) \
278 do { \
279 static bool first = true; \
280 if (first) { \
281 first = false; \
282 _Pragma("GCC unroll 7") \
283 for (unsigned i = 0; i < last_range; i++) { \
284 enum ssa_ranges col_range = t[i][unknown + 1]; \
285 enum ssa_ranges row_range = t[unknown + 1][i]; \
286 \
287 _Pragma("GCC unroll 5") \
288 for (unsigned j = unknown + 2; j < last_range; j++) { \
289 col_range = union_ranges(col_range, t[i][j]); \
290 row_range = union_ranges(row_range, t[j][i]); \
291 } \
292 \
293 assert(col_range == t[i][unknown]); \
294 assert(row_range == t[unknown][i]); \
295 } \
296 } \
297 } while (false)
298
299 /* For most operations, the union of ranges for a strict inequality and
300 * equality should be the range of the non-strict inequality (e.g.,
301 * union_ranges(range(op(lt_zero), range(op(eq_zero))) == range(op(le_zero)).
302 *
303 * Does not apply to selection-like opcodes (bcsel, fmin, fmax, etc.).
304 */
305 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(t) \
306 do { \
307 assert(union_ranges(t[lt_zero], t[eq_zero]) == t[le_zero]); \
308 assert(union_ranges(t[gt_zero], t[eq_zero]) == t[ge_zero]); \
309 } while (false)
310
311 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(t) \
312 do { \
313 static bool first = true; \
314 if (first) { \
315 first = false; \
316 _Pragma("GCC unroll 7") \
317 for (unsigned i = 0; i < last_range; i++) { \
318 assert(union_ranges(t[i][lt_zero], t[i][eq_zero]) == t[i][le_zero]); \
319 assert(union_ranges(t[i][gt_zero], t[i][eq_zero]) == t[i][ge_zero]); \
320 assert(union_ranges(t[lt_zero][i], t[eq_zero][i]) == t[le_zero][i]); \
321 assert(union_ranges(t[gt_zero][i], t[eq_zero][i]) == t[ge_zero][i]); \
322 } \
323 } \
324 } while (false)
325
326 /* Several other unordered tuples span the range of "everything." Each should
327 * have the same value as unknown: (lt_zero, ge_zero), (le_zero, gt_zero), and
328 * (eq_zero, ne_zero). union_ranges is already commutative, so only one
329 * ordering needs to be checked.
330 *
331 * Does not apply to selection-like opcodes (bcsel, fmin, fmax, etc.).
332 *
333 * In cases where this can be used, it is unnecessary to also use
334 * ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_*_SOURCE. For any range X,
335 * union_ranges(X, X) == X. The disjoint ranges cover all of the non-unknown
336 * possibilities, so the union of all the unions of disjoint ranges is
337 * equivalent to the union of "others."
338 */
339 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(t) \
340 do { \
341 assert(union_ranges(t[lt_zero], t[ge_zero]) == t[unknown]); \
342 assert(union_ranges(t[le_zero], t[gt_zero]) == t[unknown]); \
343 assert(union_ranges(t[eq_zero], t[ne_zero]) == t[unknown]); \
344 } while (false)
345
346 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(t) \
347 do { \
348 static bool first = true; \
349 if (first) { \
350 first = false; \
351 _Pragma("GCC unroll 7") \
352 for (unsigned i = 0; i < last_range; i++) { \
353 assert(union_ranges(t[i][lt_zero], t[i][ge_zero]) == \
354 t[i][unknown]); \
355 assert(union_ranges(t[i][le_zero], t[i][gt_zero]) == \
356 t[i][unknown]); \
357 assert(union_ranges(t[i][eq_zero], t[i][ne_zero]) == \
358 t[i][unknown]); \
359 \
360 assert(union_ranges(t[lt_zero][i], t[ge_zero][i]) == \
361 t[unknown][i]); \
362 assert(union_ranges(t[le_zero][i], t[gt_zero][i]) == \
363 t[unknown][i]); \
364 assert(union_ranges(t[eq_zero][i], t[ne_zero][i]) == \
365 t[unknown][i]); \
366 } \
367 } \
368 } while (false)
369
370 #else
371 #define ASSERT_TABLE_IS_COMMUTATIVE(t)
372 #define ASSERT_TABLE_IS_DIAGONAL(t)
373 #define ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(t)
374 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(t)
375 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(t)
376 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(t)
377 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(t)
378 #endif
379
380 /**
381 * Analyze an expression to determine the range of its result
382 *
383 * The end result of this analysis is a token that communicates something
384 * about the range of values. There's an implicit grammar that produces
385 * tokens from sequences of literal values, other tokens, and operations.
386 * This function implements this grammar as a recursive-descent parser. Some
387 * (but not all) of the grammar is listed in-line in the function.
388 */
389 static struct ssa_result_range
390 analyze_expression(const nir_alu_instr *instr, unsigned src,
391 struct hash_table *ht, nir_alu_type use_type)
392 {
393 /* Ensure that the _Pragma("GCC unroll 7") above are correct. */
394 STATIC_ASSERT(last_range + 1 == 7);
395
396 if (!instr->src[src].src.is_ssa)
397 return (struct ssa_result_range){unknown, false};
398
399 if (nir_src_is_const(instr->src[src].src))
400 return analyze_constant(instr, src, use_type);
401
402 if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_alu)
403 return (struct ssa_result_range){unknown, false};
404
405 const struct nir_alu_instr *const alu =
406 nir_instr_as_alu(instr->src[src].src.ssa->parent_instr);
407
408 /* Bail if the type of the instruction generating the value does not match
409 * the type the value will be interpreted as. int/uint/bool can be
410 * reinterpreted trivially. The most important cases are between float and
411 * non-float.
412 */
413 if (alu->op != nir_op_mov && alu->op != nir_op_bcsel) {
414 const nir_alu_type use_base_type =
415 nir_alu_type_get_base_type(use_type);
416 const nir_alu_type src_base_type =
417 nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type);
418
419 if (use_base_type != src_base_type &&
420 (use_base_type == nir_type_float ||
421 src_base_type == nir_type_float)) {
422 return (struct ssa_result_range){unknown, false};
423 }
424 }
425
426 struct hash_entry *he = _mesa_hash_table_search(ht, pack_key(alu, use_type));
427 if (he != NULL)
428 return unpack_data(he->data);
429
430 struct ssa_result_range r = {unknown, false};
431
432 /* ge_zero: ge_zero + ge_zero
433 *
434 * gt_zero: gt_zero + eq_zero
435 * | gt_zero + ge_zero
436 * | eq_zero + gt_zero # Addition is commutative
437 * | ge_zero + gt_zero # Addition is commutative
438 * | gt_zero + gt_zero
439 * ;
440 *
441 * le_zero: le_zero + le_zero
442 *
443 * lt_zero: lt_zero + eq_zero
444 * | lt_zero + le_zero
445 * | eq_zero + lt_zero # Addition is commutative
446 * | le_zero + lt_zero # Addition is commutative
447 * | lt_zero + lt_zero
448 * ;
449 *
450 * ne_zero: eq_zero + ne_zero
451 * | ne_zero + eq_zero # Addition is commutative
452 * ;
453 *
454 * eq_zero: eq_zero + eq_zero
455 * ;
456 *
457 * All other cases are 'unknown'. The seeming odd entry is (ne_zero,
458 * ne_zero), but that could be (-5, +5) which is not ne_zero.
459 */
460 static const enum ssa_ranges fadd_table[last_range + 1][last_range + 1] = {
461 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
462 /* unknown */ { _______, _______, _______, _______, _______, _______, _______ },
463 /* lt_zero */ { _______, lt_zero, lt_zero, _______, _______, _______, lt_zero },
464 /* le_zero */ { _______, lt_zero, le_zero, _______, _______, _______, le_zero },
465 /* gt_zero */ { _______, _______, _______, gt_zero, gt_zero, _______, gt_zero },
466 /* ge_zero */ { _______, _______, _______, gt_zero, ge_zero, _______, ge_zero },
467 /* ne_zero */ { _______, _______, _______, _______, _______, _______, ne_zero },
468 /* eq_zero */ { _______, lt_zero, le_zero, gt_zero, ge_zero, ne_zero, eq_zero },
469 };
470
471 ASSERT_TABLE_IS_COMMUTATIVE(fadd_table);
472 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(fadd_table);
473 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(fadd_table);
474
475 /* Due to flush-to-zero semanatics of floating-point numbers with very
476 * small mangnitudes, we can never really be sure a result will be
477 * non-zero.
478 *
479 * ge_zero: ge_zero * ge_zero
480 * | ge_zero * gt_zero
481 * | ge_zero * eq_zero
482 * | le_zero * lt_zero
483 * | lt_zero * le_zero # Multiplication is commutative
484 * | le_zero * le_zero
485 * | gt_zero * ge_zero # Multiplication is commutative
486 * | eq_zero * ge_zero # Multiplication is commutative
487 * | a * a # Left source == right source
488 * | gt_zero * gt_zero
489 * | lt_zero * lt_zero
490 * ;
491 *
492 * le_zero: ge_zero * le_zero
493 * | ge_zero * lt_zero
494 * | lt_zero * ge_zero # Multiplication is commutative
495 * | le_zero * ge_zero # Multiplication is commutative
496 * | le_zero * gt_zero
497 * | lt_zero * gt_zero
498 * | gt_zero * lt_zero # Multiplication is commutative
499 * ;
500 *
501 * eq_zero: eq_zero * <any>
502 * <any> * eq_zero # Multiplication is commutative
503 *
504 * All other cases are 'unknown'.
505 */
506 static const enum ssa_ranges fmul_table[last_range + 1][last_range + 1] = {
507 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
508 /* unknown */ { _______, _______, _______, _______, _______, _______, eq_zero },
509 /* lt_zero */ { _______, ge_zero, ge_zero, le_zero, le_zero, _______, eq_zero },
510 /* le_zero */ { _______, ge_zero, ge_zero, le_zero, le_zero, _______, eq_zero },
511 /* gt_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
512 /* ge_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
513 /* ne_zero */ { _______, _______, _______, _______, _______, _______, eq_zero },
514 /* eq_zero */ { eq_zero, eq_zero, eq_zero, eq_zero, eq_zero, eq_zero, eq_zero }
515 };
516
517 ASSERT_TABLE_IS_COMMUTATIVE(fmul_table);
518 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(fmul_table);
519 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(fmul_table);
520
521 static const enum ssa_ranges fneg_table[last_range + 1] = {
522 /* unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
523 _______, gt_zero, ge_zero, lt_zero, le_zero, ne_zero, eq_zero
524 };
525
526 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(fneg_table);
527 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(fneg_table);
528
529
530 switch (alu->op) {
531 case nir_op_b2f32:
532 case nir_op_b2i32:
533 r = (struct ssa_result_range){ge_zero, alu->op == nir_op_b2f32};
534 break;
535
536 case nir_op_bcsel: {
537 const struct ssa_result_range left =
538 analyze_expression(alu, 1, ht, use_type);
539 const struct ssa_result_range right =
540 analyze_expression(alu, 2, ht, use_type);
541
542 r.is_integral = left.is_integral && right.is_integral;
543
544 /* le_zero: bcsel(<any>, le_zero, lt_zero)
545 * | bcsel(<any>, eq_zero, lt_zero)
546 * | bcsel(<any>, le_zero, eq_zero)
547 * | bcsel(<any>, lt_zero, le_zero)
548 * | bcsel(<any>, lt_zero, eq_zero)
549 * | bcsel(<any>, eq_zero, le_zero)
550 * | bcsel(<any>, le_zero, le_zero)
551 * ;
552 *
553 * lt_zero: bcsel(<any>, lt_zero, lt_zero)
554 * ;
555 *
556 * ge_zero: bcsel(<any>, ge_zero, ge_zero)
557 * | bcsel(<any>, ge_zero, gt_zero)
558 * | bcsel(<any>, ge_zero, eq_zero)
559 * | bcsel(<any>, gt_zero, ge_zero)
560 * | bcsel(<any>, eq_zero, ge_zero)
561 * ;
562 *
563 * gt_zero: bcsel(<any>, gt_zero, gt_zero)
564 * ;
565 *
566 * ne_zero: bcsel(<any>, ne_zero, gt_zero)
567 * | bcsel(<any>, ne_zero, lt_zero)
568 * | bcsel(<any>, gt_zero, lt_zero)
569 * | bcsel(<any>, gt_zero, ne_zero)
570 * | bcsel(<any>, lt_zero, ne_zero)
571 * | bcsel(<any>, lt_zero, gt_zero)
572 * | bcsel(<any>, ne_zero, ne_zero)
573 * ;
574 *
575 * eq_zero: bcsel(<any>, eq_zero, eq_zero)
576 * ;
577 *
578 * All other cases are 'unknown'.
579 *
580 * The ranges could be tightened if the range of the first source is
581 * known. However, opt_algebraic will (eventually) elminiate the bcsel
582 * if the condition is known.
583 */
584 static const enum ssa_ranges table[last_range + 1][last_range + 1] = {
585 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
586 /* unknown */ { _______, _______, _______, _______, _______, _______, _______ },
587 /* lt_zero */ { _______, lt_zero, le_zero, ne_zero, _______, ne_zero, le_zero },
588 /* le_zero */ { _______, le_zero, le_zero, _______, _______, _______, le_zero },
589 /* gt_zero */ { _______, ne_zero, _______, gt_zero, ge_zero, ne_zero, ge_zero },
590 /* ge_zero */ { _______, _______, _______, ge_zero, ge_zero, _______, ge_zero },
591 /* ne_zero */ { _______, ne_zero, _______, ne_zero, _______, ne_zero, _______ },
592 /* eq_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
593 };
594
595 ASSERT_TABLE_IS_COMMUTATIVE(table);
596 ASSERT_TABLE_IS_DIAGONAL(table);
597 ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(table);
598
599 r.range = table[left.range][right.range];
600 break;
601 }
602
603 case nir_op_i2f32:
604 case nir_op_u2f32:
605 r = analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
606
607 r.is_integral = true;
608
609 if (r.range == unknown && alu->op == nir_op_u2f32)
610 r.range = ge_zero;
611
612 break;
613
614 case nir_op_fabs:
615 r = analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
616
617 switch (r.range) {
618 case unknown:
619 case le_zero:
620 case ge_zero:
621 r.range = ge_zero;
622 break;
623
624 case lt_zero:
625 case gt_zero:
626 case ne_zero:
627 r.range = gt_zero;
628 break;
629
630 case eq_zero:
631 break;
632 }
633
634 break;
635
636 case nir_op_fadd: {
637 const struct ssa_result_range left =
638 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
639 const struct ssa_result_range right =
640 analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1));
641
642 r.is_integral = left.is_integral && right.is_integral;
643 r.range = fadd_table[left.range][right.range];
644 break;
645 }
646
647 case nir_op_fexp2: {
648 /* If the parameter might be less than zero, the mathematically result
649 * will be on (0, 1). For sufficiently large magnitude negative
650 * parameters, the result will flush to zero.
651 */
652 static const enum ssa_ranges table[last_range + 1] = {
653 /* unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
654 ge_zero, ge_zero, ge_zero, gt_zero, gt_zero, ge_zero, gt_zero
655 };
656
657 r = analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
658
659 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(table);
660 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(table);
661
662 r.is_integral = r.is_integral && is_not_negative(r.range);
663 r.range = table[r.range];
664 break;
665 }
666
667 case nir_op_fmax: {
668 const struct ssa_result_range left =
669 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
670 const struct ssa_result_range right =
671 analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1));
672
673 r.is_integral = left.is_integral && right.is_integral;
674
675 /* gt_zero: fmax(gt_zero, *)
676 * | fmax(*, gt_zero) # Treat fmax as commutative
677 * ;
678 *
679 * ge_zero: fmax(ge_zero, ne_zero)
680 * | fmax(ge_zero, lt_zero)
681 * | fmax(ge_zero, le_zero)
682 * | fmax(ge_zero, eq_zero)
683 * | fmax(ne_zero, ge_zero) # Treat fmax as commutative
684 * | fmax(lt_zero, ge_zero) # Treat fmax as commutative
685 * | fmax(le_zero, ge_zero) # Treat fmax as commutative
686 * | fmax(eq_zero, ge_zero) # Treat fmax as commutative
687 * | fmax(ge_zero, ge_zero)
688 * ;
689 *
690 * le_zero: fmax(le_zero, lt_zero)
691 * | fmax(lt_zero, le_zero) # Treat fmax as commutative
692 * | fmax(le_zero, le_zero)
693 * ;
694 *
695 * lt_zero: fmax(lt_zero, lt_zero)
696 * ;
697 *
698 * ne_zero: fmax(ne_zero, lt_zero)
699 * | fmax(lt_zero, ne_zero) # Treat fmax as commutative
700 * | fmax(ne_zero, ne_zero)
701 * ;
702 *
703 * eq_zero: fmax(eq_zero, le_zero)
704 * | fmax(eq_zero, lt_zero)
705 * | fmax(le_zero, eq_zero) # Treat fmax as commutative
706 * | fmax(lt_zero, eq_zero) # Treat fmax as commutative
707 * | fmax(eq_zero, eq_zero)
708 * ;
709 *
710 * All other cases are 'unknown'.
711 */
712 static const enum ssa_ranges table[last_range + 1][last_range + 1] = {
713 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
714 /* unknown */ { _______, _______, _______, gt_zero, ge_zero, _______, _______ },
715 /* lt_zero */ { _______, lt_zero, le_zero, gt_zero, ge_zero, ne_zero, eq_zero },
716 /* le_zero */ { _______, le_zero, le_zero, gt_zero, ge_zero, _______, eq_zero },
717 /* gt_zero */ { gt_zero, gt_zero, gt_zero, gt_zero, gt_zero, gt_zero, gt_zero },
718 /* ge_zero */ { ge_zero, ge_zero, ge_zero, gt_zero, ge_zero, ge_zero, ge_zero },
719 /* ne_zero */ { _______, ne_zero, _______, gt_zero, ge_zero, ne_zero, _______ },
720 /* eq_zero */ { _______, eq_zero, eq_zero, gt_zero, ge_zero, _______, eq_zero }
721 };
722
723 /* Treat fmax as commutative. */
724 ASSERT_TABLE_IS_COMMUTATIVE(table);
725 ASSERT_TABLE_IS_DIAGONAL(table);
726 ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(table);
727
728 r.range = table[left.range][right.range];
729 break;
730 }
731
732 case nir_op_fmin: {
733 const struct ssa_result_range left =
734 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
735 const struct ssa_result_range right =
736 analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1));
737
738 r.is_integral = left.is_integral && right.is_integral;
739
740 /* lt_zero: fmin(lt_zero, *)
741 * | fmin(*, lt_zero) # Treat fmin as commutative
742 * ;
743 *
744 * le_zero: fmin(le_zero, ne_zero)
745 * | fmin(le_zero, gt_zero)
746 * | fmin(le_zero, ge_zero)
747 * | fmin(le_zero, eq_zero)
748 * | fmin(ne_zero, le_zero) # Treat fmin as commutative
749 * | fmin(gt_zero, le_zero) # Treat fmin as commutative
750 * | fmin(ge_zero, le_zero) # Treat fmin as commutative
751 * | fmin(eq_zero, le_zero) # Treat fmin as commutative
752 * | fmin(le_zero, le_zero)
753 * ;
754 *
755 * ge_zero: fmin(ge_zero, gt_zero)
756 * | fmin(gt_zero, ge_zero) # Treat fmin as commutative
757 * | fmin(ge_zero, ge_zero)
758 * ;
759 *
760 * gt_zero: fmin(gt_zero, gt_zero)
761 * ;
762 *
763 * ne_zero: fmin(ne_zero, gt_zero)
764 * | fmin(gt_zero, ne_zero) # Treat fmin as commutative
765 * | fmin(ne_zero, ne_zero)
766 * ;
767 *
768 * eq_zero: fmin(eq_zero, ge_zero)
769 * | fmin(eq_zero, gt_zero)
770 * | fmin(ge_zero, eq_zero) # Treat fmin as commutative
771 * | fmin(gt_zero, eq_zero) # Treat fmin as commutative
772 * | fmin(eq_zero, eq_zero)
773 * ;
774 *
775 * All other cases are 'unknown'.
776 */
777 static const enum ssa_ranges table[last_range + 1][last_range + 1] = {
778 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
779 /* unknown */ { _______, lt_zero, le_zero, _______, _______, _______, _______ },
780 /* lt_zero */ { lt_zero, lt_zero, lt_zero, lt_zero, lt_zero, lt_zero, lt_zero },
781 /* le_zero */ { le_zero, lt_zero, le_zero, le_zero, le_zero, le_zero, le_zero },
782 /* gt_zero */ { _______, lt_zero, le_zero, gt_zero, ge_zero, ne_zero, eq_zero },
783 /* ge_zero */ { _______, lt_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
784 /* ne_zero */ { _______, lt_zero, le_zero, ne_zero, _______, ne_zero, _______ },
785 /* eq_zero */ { _______, lt_zero, le_zero, eq_zero, eq_zero, _______, eq_zero }
786 };
787
788 /* Treat fmin as commutative. */
789 ASSERT_TABLE_IS_COMMUTATIVE(table);
790 ASSERT_TABLE_IS_DIAGONAL(table);
791 ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(table);
792
793 r.range = table[left.range][right.range];
794 break;
795 }
796
797 case nir_op_fmul: {
798 const struct ssa_result_range left =
799 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
800 const struct ssa_result_range right =
801 analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1));
802
803 r.is_integral = left.is_integral && right.is_integral;
804
805 /* x * x => ge_zero */
806 if (left.range != eq_zero && nir_alu_srcs_equal(alu, alu, 0, 1)) {
807 /* Even if x > 0, the result of x*x can be zero when x is, for
808 * example, a subnormal number.
809 */
810 r.range = ge_zero;
811 } else if (left.range != eq_zero && nir_alu_srcs_negative_equal(alu, alu, 0, 1)) {
812 /* -x * x => le_zero. */
813 r.range = le_zero;
814 } else
815 r.range = fmul_table[left.range][right.range];
816
817 break;
818 }
819
820 case nir_op_frcp:
821 r = (struct ssa_result_range){
822 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0)).range,
823 false
824 };
825 break;
826
827 case nir_op_mov:
828 r = analyze_expression(alu, 0, ht, use_type);
829 break;
830
831 case nir_op_fneg:
832 r = analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
833
834 r.range = fneg_table[r.range];
835 break;
836
837 case nir_op_fsat:
838 r = analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
839
840 switch (r.range) {
841 case le_zero:
842 case lt_zero:
843 r.range = eq_zero;
844 r.is_integral = true;
845 break;
846
847 case eq_zero:
848 assert(r.is_integral);
849 case gt_zero:
850 case ge_zero:
851 /* The fsat doesn't add any information in these cases. */
852 break;
853
854 case ne_zero:
855 case unknown:
856 /* Since the result must be in [0, 1], the value must be >= 0. */
857 r.range = ge_zero;
858 break;
859 }
860 break;
861
862 case nir_op_fsign:
863 r = (struct ssa_result_range){
864 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0)).range,
865 true
866 };
867 break;
868
869 case nir_op_fsqrt:
870 case nir_op_frsq:
871 r = (struct ssa_result_range){ge_zero, false};
872 break;
873
874 case nir_op_ffloor: {
875 const struct ssa_result_range left =
876 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
877
878 r.is_integral = true;
879
880 if (left.is_integral || left.range == le_zero || left.range == lt_zero)
881 r.range = left.range;
882 else if (left.range == ge_zero || left.range == gt_zero)
883 r.range = ge_zero;
884 else if (left.range == ne_zero)
885 r.range = unknown;
886
887 break;
888 }
889
890 case nir_op_fceil: {
891 const struct ssa_result_range left =
892 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
893
894 r.is_integral = true;
895
896 if (left.is_integral || left.range == ge_zero || left.range == gt_zero)
897 r.range = left.range;
898 else if (left.range == le_zero || left.range == lt_zero)
899 r.range = le_zero;
900 else if (left.range == ne_zero)
901 r.range = unknown;
902
903 break;
904 }
905
906 case nir_op_ftrunc: {
907 const struct ssa_result_range left =
908 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
909
910 r.is_integral = true;
911
912 if (left.is_integral)
913 r.range = left.range;
914 else if (left.range == ge_zero || left.range == gt_zero)
915 r.range = ge_zero;
916 else if (left.range == le_zero || left.range == lt_zero)
917 r.range = le_zero;
918 else if (left.range == ne_zero)
919 r.range = unknown;
920
921 break;
922 }
923
924 case nir_op_flt:
925 case nir_op_fge:
926 case nir_op_feq:
927 case nir_op_fne:
928 case nir_op_ilt:
929 case nir_op_ige:
930 case nir_op_ieq:
931 case nir_op_ine:
932 case nir_op_ult:
933 case nir_op_uge:
934 /* Boolean results are 0 or -1. */
935 r = (struct ssa_result_range){le_zero, false};
936 break;
937
938 case nir_op_fpow: {
939 /* Due to flush-to-zero semanatics of floating-point numbers with very
940 * small mangnitudes, we can never really be sure a result will be
941 * non-zero.
942 *
943 * NIR uses pow() and powf() to constant evaluate nir_op_fpow. The man
944 * page for that function says:
945 *
946 * If y is 0, the result is 1.0 (even if x is a NaN).
947 *
948 * gt_zero: pow(*, eq_zero)
949 * | pow(eq_zero, lt_zero) # 0^-y = +inf
950 * | pow(eq_zero, le_zero) # 0^-y = +inf or 0^0 = 1.0
951 * ;
952 *
953 * eq_zero: pow(eq_zero, gt_zero)
954 * ;
955 *
956 * ge_zero: pow(gt_zero, gt_zero)
957 * | pow(gt_zero, ge_zero)
958 * | pow(gt_zero, lt_zero)
959 * | pow(gt_zero, le_zero)
960 * | pow(gt_zero, ne_zero)
961 * | pow(gt_zero, unknown)
962 * | pow(ge_zero, gt_zero)
963 * | pow(ge_zero, ge_zero)
964 * | pow(ge_zero, lt_zero)
965 * | pow(ge_zero, le_zero)
966 * | pow(ge_zero, ne_zero)
967 * | pow(ge_zero, unknown)
968 * | pow(eq_zero, ge_zero) # 0^0 = 1.0 or 0^+y = 0.0
969 * | pow(eq_zero, ne_zero) # 0^-y = +inf or 0^+y = 0.0
970 * | pow(eq_zero, unknown) # union of all other y cases
971 * ;
972 *
973 * All other cases are unknown.
974 *
975 * We could do better if the right operand is a constant, integral
976 * value.
977 */
978 static const enum ssa_ranges table[last_range + 1][last_range + 1] = {
979 /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
980 /* unknown */ { _______, _______, _______, _______, _______, _______, gt_zero },
981 /* lt_zero */ { _______, _______, _______, _______, _______, _______, gt_zero },
982 /* le_zero */ { _______, _______, _______, _______, _______, _______, gt_zero },
983 /* gt_zero */ { ge_zero, ge_zero, ge_zero, ge_zero, ge_zero, ge_zero, gt_zero },
984 /* ge_zero */ { ge_zero, ge_zero, ge_zero, ge_zero, ge_zero, ge_zero, gt_zero },
985 /* ne_zero */ { _______, _______, _______, _______, _______, _______, gt_zero },
986 /* eq_zero */ { ge_zero, gt_zero, gt_zero, eq_zero, ge_zero, ge_zero, gt_zero },
987 };
988
989 const struct ssa_result_range left =
990 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
991 const struct ssa_result_range right =
992 analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1));
993
994 ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(table);
995 ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(table);
996
997 r.is_integral = left.is_integral && right.is_integral &&
998 is_not_negative(right.range);
999 r.range = table[left.range][right.range];
1000 break;
1001 }
1002
1003 case nir_op_ffma: {
1004 const struct ssa_result_range first =
1005 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
1006 const struct ssa_result_range second =
1007 analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1));
1008 const struct ssa_result_range third =
1009 analyze_expression(alu, 2, ht, nir_alu_src_type(alu, 2));
1010
1011 r.is_integral = first.is_integral && second.is_integral &&
1012 third.is_integral;
1013
1014 enum ssa_ranges fmul_range;
1015
1016 if (first.range != eq_zero && nir_alu_srcs_equal(alu, alu, 0, 1)) {
1017 /* See handling of nir_op_fmul for explanation of why ge_zero is the
1018 * range.
1019 */
1020 fmul_range = ge_zero;
1021 } else if (first.range != eq_zero && nir_alu_srcs_negative_equal(alu, alu, 0, 1)) {
1022 /* -x * x => le_zero */
1023 fmul_range = le_zero;
1024 } else
1025 fmul_range = fmul_table[first.range][second.range];
1026
1027 r.range = fadd_table[fmul_range][third.range];
1028 break;
1029 }
1030
1031 case nir_op_flrp: {
1032 const struct ssa_result_range first =
1033 analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
1034 const struct ssa_result_range second =
1035 analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1));
1036 const struct ssa_result_range third =
1037 analyze_expression(alu, 2, ht, nir_alu_src_type(alu, 2));
1038
1039 r.is_integral = first.is_integral && second.is_integral &&
1040 third.is_integral;
1041
1042 /* Decompose the flrp to first + third * (second + -first) */
1043 const enum ssa_ranges inner_fadd_range =
1044 fadd_table[second.range][fneg_table[first.range]];
1045
1046 const enum ssa_ranges fmul_range =
1047 fmul_table[third.range][inner_fadd_range];
1048
1049 r.range = fadd_table[first.range][fmul_range];
1050 break;
1051 }
1052
1053 default:
1054 r = (struct ssa_result_range){unknown, false};
1055 break;
1056 }
1057
1058 if (r.range == eq_zero)
1059 r.is_integral = true;
1060
1061 _mesa_hash_table_insert(ht, pack_key(alu, use_type), pack_data(r));
1062 return r;
1063 }
1064
1065 #undef _______
1066
1067 struct ssa_result_range
1068 nir_analyze_range(struct hash_table *range_ht,
1069 const nir_alu_instr *instr, unsigned src)
1070 {
1071 return analyze_expression(instr, src, range_ht,
1072 nir_alu_src_type(instr, src));
1073 }