vulkan: add vk_x11_strict_image_count option
[mesa.git] / src / util / u_math.h
1 /**************************************************************************
2 *
3 * Copyright 2008 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 /**
30 * Math utilities and approximations for common math functions.
31 * Reduced precision is usually acceptable in shaders...
32 *
33 * "fast" is used in the names of functions which are low-precision,
34 * or at least lower-precision than the normal C lib functions.
35 */
36
37
38 #ifndef U_MATH_H
39 #define U_MATH_H
40
41
42 #include "c99_math.h"
43 #include <assert.h>
44 #include <float.h>
45 #include <stdarg.h>
46
47 #include "bitscan.h"
48 #include "u_endian.h" /* for PIPE_ARCH_BIG_ENDIAN */
49
50 #ifdef __cplusplus
51 extern "C" {
52 #endif
53
54
55 #ifndef M_SQRT2
56 #define M_SQRT2 1.41421356237309504880
57 #endif
58
59 #define POW2_TABLE_SIZE_LOG2 9
60 #define POW2_TABLE_SIZE (1 << POW2_TABLE_SIZE_LOG2)
61 #define POW2_TABLE_OFFSET (POW2_TABLE_SIZE/2)
62 #define POW2_TABLE_SCALE ((float)(POW2_TABLE_SIZE/2))
63 extern float pow2_table[POW2_TABLE_SIZE];
64
65
66 /**
67 * Initialize math module. This should be called before using any
68 * other functions in this module.
69 */
70 extern void
71 util_init_math(void);
72
73
74 union fi {
75 float f;
76 int32_t i;
77 uint32_t ui;
78 };
79
80
81 union di {
82 double d;
83 int64_t i;
84 uint64_t ui;
85 };
86
87
88 /**
89 * Extract the IEEE float32 exponent.
90 */
91 static inline signed
92 util_get_float32_exponent(float x)
93 {
94 union fi f;
95
96 f.f = x;
97
98 return ((f.ui >> 23) & 0xff) - 127;
99 }
100
101
102 /**
103 * Fast version of 2^x
104 * Identity: exp2(a + b) = exp2(a) * exp2(b)
105 * Let ipart = int(x)
106 * Let fpart = x - ipart;
107 * So, exp2(x) = exp2(ipart) * exp2(fpart)
108 * Compute exp2(ipart) with i << ipart
109 * Compute exp2(fpart) with lookup table.
110 */
111 static inline float
112 util_fast_exp2(float x)
113 {
114 int32_t ipart;
115 float fpart, mpart;
116 union fi epart;
117
118 if(x > 129.00000f)
119 return 3.402823466e+38f;
120
121 if (x < -126.99999f)
122 return 0.0f;
123
124 ipart = (int32_t) x;
125 fpart = x - (float) ipart;
126
127 /* same as
128 * epart.f = (float) (1 << ipart)
129 * but faster and without integer overflow for ipart > 31
130 */
131 epart.i = (ipart + 127 ) << 23;
132
133 mpart = pow2_table[POW2_TABLE_OFFSET + (int)(fpart * POW2_TABLE_SCALE)];
134
135 return epart.f * mpart;
136 }
137
138
139 /**
140 * Fast approximation to exp(x).
141 */
142 static inline float
143 util_fast_exp(float x)
144 {
145 const float k = 1.44269f; /* = log2(e) */
146 return util_fast_exp2(k * x);
147 }
148
149
150 #define LOG2_TABLE_SIZE_LOG2 16
151 #define LOG2_TABLE_SCALE (1 << LOG2_TABLE_SIZE_LOG2)
152 #define LOG2_TABLE_SIZE (LOG2_TABLE_SCALE + 1)
153 extern float log2_table[LOG2_TABLE_SIZE];
154
155
156 /**
157 * Fast approximation to log2(x).
158 */
159 static inline float
160 util_fast_log2(float x)
161 {
162 union fi num;
163 float epart, mpart;
164 num.f = x;
165 epart = (float)(((num.i & 0x7f800000) >> 23) - 127);
166 /* mpart = log2_table[mantissa*LOG2_TABLE_SCALE + 0.5] */
167 mpart = log2_table[((num.i & 0x007fffff) + (1 << (22 - LOG2_TABLE_SIZE_LOG2))) >> (23 - LOG2_TABLE_SIZE_LOG2)];
168 return epart + mpart;
169 }
170
171
172 /**
173 * Fast approximation to x^y.
174 */
175 static inline float
176 util_fast_pow(float x, float y)
177 {
178 return util_fast_exp2(util_fast_log2(x) * y);
179 }
180
181
182 /**
183 * Floor(x), returned as int.
184 */
185 static inline int
186 util_ifloor(float f)
187 {
188 int ai, bi;
189 double af, bf;
190 union fi u;
191 af = (3 << 22) + 0.5 + (double) f;
192 bf = (3 << 22) + 0.5 - (double) f;
193 u.f = (float) af; ai = u.i;
194 u.f = (float) bf; bi = u.i;
195 return (ai - bi) >> 1;
196 }
197
198
199 /**
200 * Round float to nearest int.
201 */
202 static inline int
203 util_iround(float f)
204 {
205 #if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86)
206 int r;
207 __asm__ ("fistpl %0" : "=m" (r) : "t" (f) : "st");
208 return r;
209 #elif defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86)
210 int r;
211 _asm {
212 fld f
213 fistp r
214 }
215 return r;
216 #else
217 if (f >= 0.0f)
218 return (int) (f + 0.5f);
219 else
220 return (int) (f - 0.5f);
221 #endif
222 }
223
224
225 /**
226 * Approximate floating point comparison
227 */
228 static inline bool
229 util_is_approx(float a, float b, float tol)
230 {
231 return fabsf(b - a) <= tol;
232 }
233
234
235 /**
236 * util_is_X_inf_or_nan = test if x is NaN or +/- Inf
237 * util_is_X_nan = test if x is NaN
238 * util_X_inf_sign = return +1 for +Inf, -1 for -Inf, or 0 for not Inf
239 *
240 * NaN can be checked with x != x, however this fails with the fast math flag
241 **/
242
243
244 /**
245 * Single-float
246 */
247 static inline bool
248 util_is_inf_or_nan(float x)
249 {
250 union fi tmp;
251 tmp.f = x;
252 return (tmp.ui & 0x7f800000) == 0x7f800000;
253 }
254
255
256 static inline bool
257 util_is_nan(float x)
258 {
259 union fi tmp;
260 tmp.f = x;
261 return (tmp.ui & 0x7fffffff) > 0x7f800000;
262 }
263
264
265 static inline int
266 util_inf_sign(float x)
267 {
268 union fi tmp;
269 tmp.f = x;
270 if ((tmp.ui & 0x7fffffff) != 0x7f800000) {
271 return 0;
272 }
273
274 return (x < 0) ? -1 : 1;
275 }
276
277
278 /**
279 * Double-float
280 */
281 static inline bool
282 util_is_double_inf_or_nan(double x)
283 {
284 union di tmp;
285 tmp.d = x;
286 return (tmp.ui & 0x7ff0000000000000ULL) == 0x7ff0000000000000ULL;
287 }
288
289
290 static inline bool
291 util_is_double_nan(double x)
292 {
293 union di tmp;
294 tmp.d = x;
295 return (tmp.ui & 0x7fffffffffffffffULL) > 0x7ff0000000000000ULL;
296 }
297
298
299 static inline int
300 util_double_inf_sign(double x)
301 {
302 union di tmp;
303 tmp.d = x;
304 if ((tmp.ui & 0x7fffffffffffffffULL) != 0x7ff0000000000000ULL) {
305 return 0;
306 }
307
308 return (x < 0) ? -1 : 1;
309 }
310
311
312 /**
313 * Half-float
314 */
315 static inline bool
316 util_is_half_inf_or_nan(int16_t x)
317 {
318 return (x & 0x7c00) == 0x7c00;
319 }
320
321
322 static inline bool
323 util_is_half_nan(int16_t x)
324 {
325 return (x & 0x7fff) > 0x7c00;
326 }
327
328
329 static inline int
330 util_half_inf_sign(int16_t x)
331 {
332 if ((x & 0x7fff) != 0x7c00) {
333 return 0;
334 }
335
336 return (x < 0) ? -1 : 1;
337 }
338
339
340 /**
341 * Return float bits.
342 */
343 static inline unsigned
344 fui( float f )
345 {
346 union fi fi;
347 fi.f = f;
348 return fi.ui;
349 }
350
351 static inline float
352 uif(uint32_t ui)
353 {
354 union fi fi;
355 fi.ui = ui;
356 return fi.f;
357 }
358
359
360 /**
361 * Convert uint8_t to float in [0, 1].
362 */
363 static inline float
364 ubyte_to_float(uint8_t ub)
365 {
366 return (float) ub * (1.0f / 255.0f);
367 }
368
369
370 /**
371 * Convert float in [0,1] to uint8_t in [0,255] with clamping.
372 */
373 static inline uint8_t
374 float_to_ubyte(float f)
375 {
376 /* return 0 for NaN too */
377 if (!(f > 0.0f)) {
378 return (uint8_t) 0;
379 }
380 else if (f >= 1.0f) {
381 return (uint8_t) 255;
382 }
383 else {
384 union fi tmp;
385 tmp.f = f;
386 tmp.f = tmp.f * (255.0f/256.0f) + 32768.0f;
387 return (uint8_t) tmp.i;
388 }
389 }
390
391 /**
392 * Convert uint16_t to float in [0, 1].
393 */
394 static inline float
395 ushort_to_float(uint16_t us)
396 {
397 return (float) us * (1.0f / 65535.0f);
398 }
399
400
401 /**
402 * Convert float in [0,1] to uint16_t in [0,65535] with clamping.
403 */
404 static inline uint16_t
405 float_to_ushort(float f)
406 {
407 /* return 0 for NaN too */
408 if (!(f > 0.0f)) {
409 return (uint16_t) 0;
410 }
411 else if (f >= 1.0f) {
412 return (uint16_t) 65535;
413 }
414 else {
415 union fi tmp;
416 tmp.f = f;
417 tmp.f = tmp.f * (65535.0f/65536.0f) + 128.0f;
418 return (uint16_t) tmp.i;
419 }
420 }
421
422 static inline float
423 byte_to_float_tex(int8_t b)
424 {
425 return (b == -128) ? -1.0F : b * 1.0F / 127.0F;
426 }
427
428 static inline int8_t
429 float_to_byte_tex(float f)
430 {
431 return (int8_t) (127.0F * f);
432 }
433
434 /**
435 * Calc log base 2
436 */
437 static inline unsigned
438 util_logbase2(unsigned n)
439 {
440 #if defined(HAVE___BUILTIN_CLZ)
441 return ((sizeof(unsigned) * 8 - 1) - __builtin_clz(n | 1));
442 #else
443 unsigned pos = 0;
444 if (n >= 1<<16) { n >>= 16; pos += 16; }
445 if (n >= 1<< 8) { n >>= 8; pos += 8; }
446 if (n >= 1<< 4) { n >>= 4; pos += 4; }
447 if (n >= 1<< 2) { n >>= 2; pos += 2; }
448 if (n >= 1<< 1) { pos += 1; }
449 return pos;
450 #endif
451 }
452
453 static inline uint64_t
454 util_logbase2_64(uint64_t n)
455 {
456 #if defined(HAVE___BUILTIN_CLZLL)
457 return ((sizeof(uint64_t) * 8 - 1) - __builtin_clzll(n | 1));
458 #else
459 uint64_t pos = 0ull;
460 if (n >= 1ull<<32) { n >>= 32; pos += 32; }
461 if (n >= 1ull<<16) { n >>= 16; pos += 16; }
462 if (n >= 1ull<< 8) { n >>= 8; pos += 8; }
463 if (n >= 1ull<< 4) { n >>= 4; pos += 4; }
464 if (n >= 1ull<< 2) { n >>= 2; pos += 2; }
465 if (n >= 1ull<< 1) { pos += 1; }
466 return pos;
467 #endif
468 }
469
470 /**
471 * Returns the ceiling of log n base 2, and 0 when n == 0. Equivalently,
472 * returns the smallest x such that n <= 2**x.
473 */
474 static inline unsigned
475 util_logbase2_ceil(unsigned n)
476 {
477 if (n <= 1)
478 return 0;
479
480 return 1 + util_logbase2(n - 1);
481 }
482
483 static inline uint64_t
484 util_logbase2_ceil64(uint64_t n)
485 {
486 if (n <= 1)
487 return 0;
488
489 return 1ull + util_logbase2_64(n - 1);
490 }
491
492 /**
493 * Returns the smallest power of two >= x
494 */
495 static inline unsigned
496 util_next_power_of_two(unsigned x)
497 {
498 #if defined(HAVE___BUILTIN_CLZ)
499 if (x <= 1)
500 return 1;
501
502 return (1 << ((sizeof(unsigned) * 8) - __builtin_clz(x - 1)));
503 #else
504 unsigned val = x;
505
506 if (x <= 1)
507 return 1;
508
509 if (util_is_power_of_two_or_zero(x))
510 return x;
511
512 val--;
513 val = (val >> 1) | val;
514 val = (val >> 2) | val;
515 val = (val >> 4) | val;
516 val = (val >> 8) | val;
517 val = (val >> 16) | val;
518 val++;
519 return val;
520 #endif
521 }
522
523 static inline uint64_t
524 util_next_power_of_two64(uint64_t x)
525 {
526 #if defined(HAVE___BUILTIN_CLZLL)
527 if (x <= 1)
528 return 1;
529
530 return (1ull << ((sizeof(uint64_t) * 8) - __builtin_clzll(x - 1)));
531 #else
532 uint64_t val = x;
533
534 if (x <= 1)
535 return 1;
536
537 if (util_is_power_of_two_or_zero64(x))
538 return x;
539
540 val--;
541 val = (val >> 1) | val;
542 val = (val >> 2) | val;
543 val = (val >> 4) | val;
544 val = (val >> 8) | val;
545 val = (val >> 16) | val;
546 val = (val >> 32) | val;
547 val++;
548 return val;
549 #endif
550 }
551
552 /**
553 * Reverse bits in n
554 * Algorithm taken from:
555 * http://stackoverflow.com/questions/9144800/c-reverse-bits-in-unsigned-integer
556 */
557 static inline unsigned
558 util_bitreverse(unsigned n)
559 {
560 n = ((n >> 1) & 0x55555555u) | ((n & 0x55555555u) << 1);
561 n = ((n >> 2) & 0x33333333u) | ((n & 0x33333333u) << 2);
562 n = ((n >> 4) & 0x0f0f0f0fu) | ((n & 0x0f0f0f0fu) << 4);
563 n = ((n >> 8) & 0x00ff00ffu) | ((n & 0x00ff00ffu) << 8);
564 n = ((n >> 16) & 0xffffu) | ((n & 0xffffu) << 16);
565 return n;
566 }
567
568 /**
569 * Convert from little endian to CPU byte order.
570 */
571
572 #ifdef PIPE_ARCH_BIG_ENDIAN
573 #define util_le64_to_cpu(x) util_bswap64(x)
574 #define util_le32_to_cpu(x) util_bswap32(x)
575 #define util_le16_to_cpu(x) util_bswap16(x)
576 #else
577 #define util_le64_to_cpu(x) (x)
578 #define util_le32_to_cpu(x) (x)
579 #define util_le16_to_cpu(x) (x)
580 #endif
581
582 #define util_cpu_to_le64(x) util_le64_to_cpu(x)
583 #define util_cpu_to_le32(x) util_le32_to_cpu(x)
584 #define util_cpu_to_le16(x) util_le16_to_cpu(x)
585
586 /**
587 * Reverse byte order of a 32 bit word.
588 */
589 static inline uint32_t
590 util_bswap32(uint32_t n)
591 {
592 #if defined(HAVE___BUILTIN_BSWAP32)
593 return __builtin_bswap32(n);
594 #else
595 return (n >> 24) |
596 ((n >> 8) & 0x0000ff00) |
597 ((n << 8) & 0x00ff0000) |
598 (n << 24);
599 #endif
600 }
601
602 /**
603 * Reverse byte order of a 64bit word.
604 */
605 static inline uint64_t
606 util_bswap64(uint64_t n)
607 {
608 #if defined(HAVE___BUILTIN_BSWAP64)
609 return __builtin_bswap64(n);
610 #else
611 return ((uint64_t)util_bswap32((uint32_t)n) << 32) |
612 util_bswap32((n >> 32));
613 #endif
614 }
615
616
617 /**
618 * Reverse byte order of a 16 bit word.
619 */
620 static inline uint16_t
621 util_bswap16(uint16_t n)
622 {
623 return (n >> 8) |
624 (n << 8);
625 }
626
627 static inline void*
628 util_memcpy_cpu_to_le32(void * restrict dest, const void * restrict src, size_t n)
629 {
630 #ifdef PIPE_ARCH_BIG_ENDIAN
631 size_t i, e;
632 assert(n % 4 == 0);
633
634 for (i = 0, e = n / 4; i < e; i++) {
635 uint32_t * restrict d = (uint32_t* restrict)dest;
636 const uint32_t * restrict s = (const uint32_t* restrict)src;
637 d[i] = util_bswap32(s[i]);
638 }
639 return dest;
640 #else
641 return memcpy(dest, src, n);
642 #endif
643 }
644
645 /**
646 * Clamp X to [MIN, MAX].
647 * This is a macro to allow float, int, uint, etc. types.
648 * We arbitrarily turn NaN into MIN.
649 */
650 #define CLAMP( X, MIN, MAX ) ( (X)>(MIN) ? ((X)>(MAX) ? (MAX) : (X)) : (MIN) )
651
652 #define MIN2( A, B ) ( (A)<(B) ? (A) : (B) )
653 #define MAX2( A, B ) ( (A)>(B) ? (A) : (B) )
654
655 #define MIN3( A, B, C ) ((A) < (B) ? MIN2(A, C) : MIN2(B, C))
656 #define MAX3( A, B, C ) ((A) > (B) ? MAX2(A, C) : MAX2(B, C))
657
658 #define MIN4( A, B, C, D ) ((A) < (B) ? MIN3(A, C, D) : MIN3(B, C, D))
659 #define MAX4( A, B, C, D ) ((A) > (B) ? MAX3(A, C, D) : MAX3(B, C, D))
660
661
662 /**
663 * Align a value, only works pot alignemnts.
664 */
665 static inline int
666 align(int value, int alignment)
667 {
668 return (value + alignment - 1) & ~(alignment - 1);
669 }
670
671 static inline uint64_t
672 align64(uint64_t value, unsigned alignment)
673 {
674 return (value + alignment - 1) & ~((uint64_t)alignment - 1);
675 }
676
677 /**
678 * Works like align but on npot alignments.
679 */
680 static inline size_t
681 util_align_npot(size_t value, size_t alignment)
682 {
683 if (value % alignment)
684 return value + (alignment - (value % alignment));
685 return value;
686 }
687
688 static inline unsigned
689 u_minify(unsigned value, unsigned levels)
690 {
691 return MAX2(1, value >> levels);
692 }
693
694 #ifndef COPY_4V
695 #define COPY_4V( DST, SRC ) \
696 do { \
697 (DST)[0] = (SRC)[0]; \
698 (DST)[1] = (SRC)[1]; \
699 (DST)[2] = (SRC)[2]; \
700 (DST)[3] = (SRC)[3]; \
701 } while (0)
702 #endif
703
704
705 #ifndef COPY_4FV
706 #define COPY_4FV( DST, SRC ) COPY_4V(DST, SRC)
707 #endif
708
709
710 #ifndef ASSIGN_4V
711 #define ASSIGN_4V( DST, V0, V1, V2, V3 ) \
712 do { \
713 (DST)[0] = (V0); \
714 (DST)[1] = (V1); \
715 (DST)[2] = (V2); \
716 (DST)[3] = (V3); \
717 } while (0)
718 #endif
719
720
721 static inline uint32_t
722 util_unsigned_fixed(float value, unsigned frac_bits)
723 {
724 return value < 0 ? 0 : (uint32_t)(value * (1<<frac_bits));
725 }
726
727 static inline int32_t
728 util_signed_fixed(float value, unsigned frac_bits)
729 {
730 return (int32_t)(value * (1<<frac_bits));
731 }
732
733 unsigned
734 util_fpstate_get(void);
735 unsigned
736 util_fpstate_set_denorms_to_zero(unsigned current_fpstate);
737 void
738 util_fpstate_set(unsigned fpstate);
739
740
741
742 #ifdef __cplusplus
743 }
744 #endif
745
746 #endif /* U_MATH_H */