Remove _mesa_sqrt* in favor of plain sqrt
[mesa.git] / src / mesa / main / imports.c
1 /**
2 * \file imports.c
3 * Standard C library function wrappers.
4 *
5 * Imports are services which the device driver or window system or
6 * operating system provides to the core renderer. The core renderer (Mesa)
7 * will call these functions in order to do memory allocation, simple I/O,
8 * etc.
9 *
10 * Some drivers will want to override/replace this file with something
11 * specialized, but that'll be rare.
12 *
13 * Eventually, I want to move roll the glheader.h file into this.
14 *
15 * \todo Functions still needed:
16 * - scanf
17 * - qsort
18 * - rand and RAND_MAX
19 */
20
21 /*
22 * Mesa 3-D graphics library
23 * Version: 7.1
24 *
25 * Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
26 *
27 * Permission is hereby granted, free of charge, to any person obtaining a
28 * copy of this software and associated documentation files (the "Software"),
29 * to deal in the Software without restriction, including without limitation
30 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
31 * and/or sell copies of the Software, and to permit persons to whom the
32 * Software is furnished to do so, subject to the following conditions:
33 *
34 * The above copyright notice and this permission notice shall be included
35 * in all copies or substantial portions of the Software.
36 *
37 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
38 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
39 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
40 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
41 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
42 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
43 */
44
45
46
47 #include "imports.h"
48 #include "context.h"
49 #include "mtypes.h"
50 #include "version.h"
51
52 #ifdef _GNU_SOURCE
53 #include <locale.h>
54 #ifdef __APPLE__
55 #include <xlocale.h>
56 #endif
57 #endif
58
59
60 #ifdef WIN32
61 #define vsnprintf _vsnprintf
62 #elif defined(__IBMC__) || defined(__IBMCPP__) || ( defined(__VMS) && __CRTL_VER < 70312000 )
63 extern int vsnprintf(char *str, size_t count, const char *fmt, va_list arg);
64 #ifdef __VMS
65 #include "vsnprintf.c"
66 #endif
67 #endif
68
69 /**********************************************************************/
70 /** \name Memory */
71 /*@{*/
72
73 /**
74 * Allocate aligned memory.
75 *
76 * \param bytes number of bytes to allocate.
77 * \param alignment alignment (must be greater than zero).
78 *
79 * Allocates extra memory to accommodate rounding up the address for
80 * alignment and to record the real malloc address.
81 *
82 * \sa _mesa_align_free().
83 */
84 void *
85 _mesa_align_malloc(size_t bytes, unsigned long alignment)
86 {
87 #if defined(HAVE_POSIX_MEMALIGN)
88 void *mem;
89 int err = posix_memalign(& mem, alignment, bytes);
90 if (err)
91 return NULL;
92 return mem;
93 #elif defined(_WIN32) && defined(_MSC_VER)
94 return _aligned_malloc(bytes, alignment);
95 #else
96 uintptr_t ptr, buf;
97
98 ASSERT( alignment > 0 );
99
100 ptr = (uintptr_t) malloc(bytes + alignment + sizeof(void *));
101 if (!ptr)
102 return NULL;
103
104 buf = (ptr + alignment + sizeof(void *)) & ~(uintptr_t)(alignment - 1);
105 *(uintptr_t *)(buf - sizeof(void *)) = ptr;
106
107 #ifdef DEBUG
108 /* mark the non-aligned area */
109 while ( ptr < buf - sizeof(void *) ) {
110 *(unsigned long *)ptr = 0xcdcdcdcd;
111 ptr += sizeof(unsigned long);
112 }
113 #endif
114
115 return (void *) buf;
116 #endif /* defined(HAVE_POSIX_MEMALIGN) */
117 }
118
119 /**
120 * Same as _mesa_align_malloc(), but using calloc(1, ) instead of
121 * malloc()
122 */
123 void *
124 _mesa_align_calloc(size_t bytes, unsigned long alignment)
125 {
126 #if defined(HAVE_POSIX_MEMALIGN)
127 void *mem;
128
129 mem = _mesa_align_malloc(bytes, alignment);
130 if (mem != NULL) {
131 (void) memset(mem, 0, bytes);
132 }
133
134 return mem;
135 #elif defined(_WIN32) && defined(_MSC_VER)
136 void *mem;
137
138 mem = _aligned_malloc(bytes, alignment);
139 if (mem != NULL) {
140 (void) memset(mem, 0, bytes);
141 }
142
143 return mem;
144 #else
145 uintptr_t ptr, buf;
146
147 ASSERT( alignment > 0 );
148
149 ptr = (uintptr_t) calloc(1, bytes + alignment + sizeof(void *));
150 if (!ptr)
151 return NULL;
152
153 buf = (ptr + alignment + sizeof(void *)) & ~(uintptr_t)(alignment - 1);
154 *(uintptr_t *)(buf - sizeof(void *)) = ptr;
155
156 #ifdef DEBUG
157 /* mark the non-aligned area */
158 while ( ptr < buf - sizeof(void *) ) {
159 *(unsigned long *)ptr = 0xcdcdcdcd;
160 ptr += sizeof(unsigned long);
161 }
162 #endif
163
164 return (void *)buf;
165 #endif /* defined(HAVE_POSIX_MEMALIGN) */
166 }
167
168 /**
169 * Free memory which was allocated with either _mesa_align_malloc()
170 * or _mesa_align_calloc().
171 * \param ptr pointer to the memory to be freed.
172 * The actual address to free is stored in the word immediately before the
173 * address the client sees.
174 */
175 void
176 _mesa_align_free(void *ptr)
177 {
178 #if defined(HAVE_POSIX_MEMALIGN)
179 free(ptr);
180 #elif defined(_WIN32) && defined(_MSC_VER)
181 _aligned_free(ptr);
182 #else
183 void **cubbyHole = (void **) ((char *) ptr - sizeof(void *));
184 void *realAddr = *cubbyHole;
185 free(realAddr);
186 #endif /* defined(HAVE_POSIX_MEMALIGN) */
187 }
188
189 /**
190 * Reallocate memory, with alignment.
191 */
192 void *
193 _mesa_align_realloc(void *oldBuffer, size_t oldSize, size_t newSize,
194 unsigned long alignment)
195 {
196 #if defined(_WIN32) && defined(_MSC_VER)
197 (void) oldSize;
198 return _aligned_realloc(oldBuffer, newSize, alignment);
199 #else
200 const size_t copySize = (oldSize < newSize) ? oldSize : newSize;
201 void *newBuf = _mesa_align_malloc(newSize, alignment);
202 if (newBuf && oldBuffer && copySize > 0) {
203 memcpy(newBuf, oldBuffer, copySize);
204 }
205 if (oldBuffer)
206 _mesa_align_free(oldBuffer);
207 return newBuf;
208 #endif
209 }
210
211
212
213 /** Reallocate memory */
214 void *
215 _mesa_realloc(void *oldBuffer, size_t oldSize, size_t newSize)
216 {
217 const size_t copySize = (oldSize < newSize) ? oldSize : newSize;
218 void *newBuffer = malloc(newSize);
219 if (newBuffer && oldBuffer && copySize > 0)
220 memcpy(newBuffer, oldBuffer, copySize);
221 if (oldBuffer)
222 free(oldBuffer);
223 return newBuffer;
224 }
225
226 /**
227 * Fill memory with a constant 16bit word.
228 * \param dst destination pointer.
229 * \param val value.
230 * \param n number of words.
231 */
232 void
233 _mesa_memset16( unsigned short *dst, unsigned short val, size_t n )
234 {
235 while (n-- > 0)
236 *dst++ = val;
237 }
238
239 /*@}*/
240
241
242 /**********************************************************************/
243 /** \name Math */
244 /*@{*/
245
246
247 /**
248 inv_sqrt - A single precision 1/sqrt routine for IEEE format floats.
249 written by Josh Vanderhoof, based on newsgroup posts by James Van Buskirk
250 and Vesa Karvonen.
251 */
252 float
253 _mesa_inv_sqrtf(float n)
254 {
255 #if defined(USE_IEEE) && !defined(DEBUG)
256 float r0, x0, y0;
257 float r1, x1, y1;
258 float r2, x2, y2;
259 #if 0 /* not used, see below -BP */
260 float r3, x3, y3;
261 #endif
262 fi_type u;
263 unsigned int magic;
264
265 /*
266 Exponent part of the magic number -
267
268 We want to:
269 1. subtract the bias from the exponent,
270 2. negate it
271 3. divide by two (rounding towards -inf)
272 4. add the bias back
273
274 Which is the same as subtracting the exponent from 381 and dividing
275 by 2.
276
277 floor(-(x - 127) / 2) + 127 = floor((381 - x) / 2)
278 */
279
280 magic = 381 << 23;
281
282 /*
283 Significand part of magic number -
284
285 With the current magic number, "(magic - u.i) >> 1" will give you:
286
287 for 1 <= u.f <= 2: 1.25 - u.f / 4
288 for 2 <= u.f <= 4: 1.00 - u.f / 8
289
290 This isn't a bad approximation of 1/sqrt. The maximum difference from
291 1/sqrt will be around .06. After three Newton-Raphson iterations, the
292 maximum difference is less than 4.5e-8. (Which is actually close
293 enough to make the following bias academic...)
294
295 To get a better approximation you can add a bias to the magic
296 number. For example, if you subtract 1/2 of the maximum difference in
297 the first approximation (.03), you will get the following function:
298
299 for 1 <= u.f <= 2: 1.22 - u.f / 4
300 for 2 <= u.f <= 3.76: 0.97 - u.f / 8
301 for 3.76 <= u.f <= 4: 0.72 - u.f / 16
302 (The 3.76 to 4 range is where the result is < .5.)
303
304 This is the closest possible initial approximation, but with a maximum
305 error of 8e-11 after three NR iterations, it is still not perfect. If
306 you subtract 0.0332281 instead of .03, the maximum error will be
307 2.5e-11 after three NR iterations, which should be about as close as
308 is possible.
309
310 for 1 <= u.f <= 2: 1.2167719 - u.f / 4
311 for 2 <= u.f <= 3.73: 0.9667719 - u.f / 8
312 for 3.73 <= u.f <= 4: 0.7167719 - u.f / 16
313
314 */
315
316 magic -= (int)(0.0332281 * (1 << 25));
317
318 u.f = n;
319 u.i = (magic - u.i) >> 1;
320
321 /*
322 Instead of Newton-Raphson, we use Goldschmidt's algorithm, which
323 allows more parallelism. From what I understand, the parallelism
324 comes at the cost of less precision, because it lets error
325 accumulate across iterations.
326 */
327 x0 = 1.0f;
328 y0 = 0.5f * n;
329 r0 = u.f;
330
331 x1 = x0 * r0;
332 y1 = y0 * r0 * r0;
333 r1 = 1.5f - y1;
334
335 x2 = x1 * r1;
336 y2 = y1 * r1 * r1;
337 r2 = 1.5f - y2;
338
339 #if 1
340 return x2 * r2; /* we can stop here, and be conformant -BP */
341 #else
342 x3 = x2 * r2;
343 y3 = y2 * r2 * r2;
344 r3 = 1.5f - y3;
345
346 return x3 * r3;
347 #endif
348 #else
349 return (float) (1.0 / sqrt(n));
350 #endif
351 }
352
353 #ifndef __GNUC__
354 /**
355 * Find the first bit set in a word.
356 */
357 int
358 ffs(int i)
359 {
360 register int bit = 0;
361 if (i != 0) {
362 if ((i & 0xffff) == 0) {
363 bit += 16;
364 i >>= 16;
365 }
366 if ((i & 0xff) == 0) {
367 bit += 8;
368 i >>= 8;
369 }
370 if ((i & 0xf) == 0) {
371 bit += 4;
372 i >>= 4;
373 }
374 while ((i & 1) == 0) {
375 bit++;
376 i >>= 1;
377 }
378 bit++;
379 }
380 return bit;
381 }
382
383
384 /**
385 * Find position of first bit set in given value.
386 * XXX Warning: this function can only be used on 64-bit systems!
387 * \return position of least-significant bit set, starting at 1, return zero
388 * if no bits set.
389 */
390 int
391 ffsll(long long int val)
392 {
393 int bit;
394
395 assert(sizeof(val) == 8);
396
397 bit = ffs((int) val);
398 if (bit != 0)
399 return bit;
400
401 bit = ffs((int) (val >> 32));
402 if (bit != 0)
403 return 32 + bit;
404
405 return 0;
406 }
407 #endif /* __GNUC__ */
408
409
410 #if !defined(__GNUC__) ||\
411 ((__GNUC__ * 100 + __GNUC_MINOR__) < 304) /* Not gcc 3.4 or later */
412 /**
413 * Return number of bits set in given GLuint.
414 */
415 unsigned int
416 _mesa_bitcount(unsigned int n)
417 {
418 unsigned int bits;
419 for (bits = 0; n > 0; n = n >> 1) {
420 bits += (n & 1);
421 }
422 return bits;
423 }
424
425 /**
426 * Return number of bits set in given 64-bit uint.
427 */
428 unsigned int
429 _mesa_bitcount_64(uint64_t n)
430 {
431 unsigned int bits;
432 for (bits = 0; n > 0; n = n >> 1) {
433 bits += (n & 1);
434 }
435 return bits;
436 }
437 #endif
438
439
440 /**
441 * Convert a 4-byte float to a 2-byte half float.
442 * Based on code from:
443 * http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html
444 */
445 GLhalfARB
446 _mesa_float_to_half(float val)
447 {
448 const fi_type fi = {val};
449 const int flt_m = fi.i & 0x7fffff;
450 const int flt_e = (fi.i >> 23) & 0xff;
451 const int flt_s = (fi.i >> 31) & 0x1;
452 int s, e, m = 0;
453 GLhalfARB result;
454
455 /* sign bit */
456 s = flt_s;
457
458 /* handle special cases */
459 if ((flt_e == 0) && (flt_m == 0)) {
460 /* zero */
461 /* m = 0; - already set */
462 e = 0;
463 }
464 else if ((flt_e == 0) && (flt_m != 0)) {
465 /* denorm -- denorm float maps to 0 half */
466 /* m = 0; - already set */
467 e = 0;
468 }
469 else if ((flt_e == 0xff) && (flt_m == 0)) {
470 /* infinity */
471 /* m = 0; - already set */
472 e = 31;
473 }
474 else if ((flt_e == 0xff) && (flt_m != 0)) {
475 /* NaN */
476 m = 1;
477 e = 31;
478 }
479 else {
480 /* regular number */
481 const int new_exp = flt_e - 127;
482 if (new_exp < -24) {
483 /* this maps to 0 */
484 /* m = 0; - already set */
485 e = 0;
486 }
487 else if (new_exp < -14) {
488 /* this maps to a denorm */
489 unsigned int exp_val = (unsigned int) (-14 - new_exp); /* 2^-exp_val*/
490 e = 0;
491 switch (exp_val) {
492 case 0:
493 _mesa_warning(NULL,
494 "float_to_half: logical error in denorm creation!\n");
495 /* m = 0; - already set */
496 break;
497 case 1: m = 512 + (flt_m >> 14); break;
498 case 2: m = 256 + (flt_m >> 15); break;
499 case 3: m = 128 + (flt_m >> 16); break;
500 case 4: m = 64 + (flt_m >> 17); break;
501 case 5: m = 32 + (flt_m >> 18); break;
502 case 6: m = 16 + (flt_m >> 19); break;
503 case 7: m = 8 + (flt_m >> 20); break;
504 case 8: m = 4 + (flt_m >> 21); break;
505 case 9: m = 2 + (flt_m >> 22); break;
506 case 10: m = 1; break;
507 }
508 }
509 else if (new_exp > 15) {
510 /* map this value to infinity */
511 /* m = 0; - already set */
512 e = 31;
513 }
514 else {
515 /* regular */
516 e = new_exp + 15;
517 m = flt_m >> 13;
518 }
519 }
520
521 result = (s << 15) | (e << 10) | m;
522 return result;
523 }
524
525
526 /**
527 * Convert a 2-byte half float to a 4-byte float.
528 * Based on code from:
529 * http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html
530 */
531 float
532 _mesa_half_to_float(GLhalfARB val)
533 {
534 /* XXX could also use a 64K-entry lookup table */
535 const int m = val & 0x3ff;
536 const int e = (val >> 10) & 0x1f;
537 const int s = (val >> 15) & 0x1;
538 int flt_m, flt_e, flt_s;
539 fi_type fi;
540 float result;
541
542 /* sign bit */
543 flt_s = s;
544
545 /* handle special cases */
546 if ((e == 0) && (m == 0)) {
547 /* zero */
548 flt_m = 0;
549 flt_e = 0;
550 }
551 else if ((e == 0) && (m != 0)) {
552 /* denorm -- denorm half will fit in non-denorm single */
553 const float half_denorm = 1.0f / 16384.0f; /* 2^-14 */
554 float mantissa = ((float) (m)) / 1024.0f;
555 float sign = s ? -1.0f : 1.0f;
556 return sign * mantissa * half_denorm;
557 }
558 else if ((e == 31) && (m == 0)) {
559 /* infinity */
560 flt_e = 0xff;
561 flt_m = 0;
562 }
563 else if ((e == 31) && (m != 0)) {
564 /* NaN */
565 flt_e = 0xff;
566 flt_m = 1;
567 }
568 else {
569 /* regular */
570 flt_e = e + 112;
571 flt_m = m << 13;
572 }
573
574 fi.i = (flt_s << 31) | (flt_e << 23) | flt_m;
575 result = fi.f;
576 return result;
577 }
578
579 /*@}*/
580
581
582 /**********************************************************************/
583 /** \name Sort & Search */
584 /*@{*/
585
586 /**
587 * Wrapper for bsearch().
588 */
589 void *
590 _mesa_bsearch( const void *key, const void *base, size_t nmemb, size_t size,
591 int (*compar)(const void *, const void *) )
592 {
593 #if defined(_WIN32_WCE)
594 void *mid;
595 int cmp;
596 while (nmemb) {
597 nmemb >>= 1;
598 mid = (char *)base + nmemb * size;
599 cmp = (*compar)(key, mid);
600 if (cmp == 0)
601 return mid;
602 if (cmp > 0) {
603 base = (char *)mid + size;
604 --nmemb;
605 }
606 }
607 return NULL;
608 #else
609 return bsearch(key, base, nmemb, size, compar);
610 #endif
611 }
612
613 /*@}*/
614
615
616 /**********************************************************************/
617 /** \name Environment vars */
618 /*@{*/
619
620 /**
621 * Wrapper for getenv().
622 */
623 char *
624 _mesa_getenv( const char *var )
625 {
626 #if defined(_XBOX) || defined(_WIN32_WCE)
627 return NULL;
628 #else
629 return getenv(var);
630 #endif
631 }
632
633 /*@}*/
634
635
636 /**********************************************************************/
637 /** \name String */
638 /*@{*/
639
640 /**
641 * Implemented using malloc() and strcpy.
642 * Note that NULL is handled accordingly.
643 */
644 char *
645 _mesa_strdup( const char *s )
646 {
647 if (s) {
648 size_t l = strlen(s);
649 char *s2 = (char *) malloc(l + 1);
650 if (s2)
651 strcpy(s2, s);
652 return s2;
653 }
654 else {
655 return NULL;
656 }
657 }
658
659 /** Wrapper around strtof() */
660 float
661 _mesa_strtof( const char *s, char **end )
662 {
663 #if defined(_GNU_SOURCE) && !defined(__CYGWIN__) && !defined(__FreeBSD__) && \
664 !defined(ANDROID) && !defined(__HAIKU__)
665 static locale_t loc = NULL;
666 if (!loc) {
667 loc = newlocale(LC_CTYPE_MASK, "C", NULL);
668 }
669 return strtof_l(s, end, loc);
670 #elif defined(_ISOC99_SOURCE) || (defined(_XOPEN_SOURCE) && _XOPEN_SOURCE >= 600)
671 return strtof(s, end);
672 #else
673 return (float)strtod(s, end);
674 #endif
675 }
676
677 /** Compute simple checksum/hash for a string */
678 unsigned int
679 _mesa_str_checksum(const char *str)
680 {
681 /* This could probably be much better */
682 unsigned int sum, i;
683 const char *c;
684 sum = i = 1;
685 for (c = str; *c; c++, i++)
686 sum += *c * (i % 100);
687 return sum + i;
688 }
689
690
691 /*@}*/
692
693
694 /** Needed due to #ifdef's, above. */
695 int
696 _mesa_vsnprintf(char *str, size_t size, const char *fmt, va_list args)
697 {
698 return vsnprintf( str, size, fmt, args);
699 }
700
701 /** Wrapper around vsnprintf() */
702 int
703 _mesa_snprintf( char *str, size_t size, const char *fmt, ... )
704 {
705 int r;
706 va_list args;
707 va_start( args, fmt );
708 r = vsnprintf( str, size, fmt, args );
709 va_end( args );
710 return r;
711 }
712
713