llvmpipe: Implement more arithmetic functions.
[mesa.git] / src / gallium / drivers / llvmpipe / lp_bld_arit.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 /**
30 * @file
31 * Helper
32 *
33 * LLVM IR doesn't support all basic arithmetic operations we care about (most
34 * notably min/max and saturated operations), and it is often necessary to
35 * resort machine-specific intrinsics directly. The functions here hide all
36 * these implementation details from the other modules.
37 *
38 * We also do simple expressions simplification here. Reasons are:
39 * - it is very easy given we have all necessary information readily available
40 * - LLVM optimization passes fail to simplify several vector expressions
41 * - We often know value constraints which the optimization passes have no way
42 * of knowing, such as when source arguments are known to be in [0, 1] range.
43 *
44 * @author Jose Fonseca <jfonseca@vmware.com>
45 */
46
47
48 #include "util/u_debug.h"
49 #include "util/u_string.h"
50
51 #include "lp_bld_type.h"
52 #include "lp_bld_const.h"
53 #include "lp_bld_intr.h"
54 #include "lp_bld_arit.h"
55
56
57 static LLVMValueRef
58 lp_build_min_simple(struct lp_build_context *bld,
59 LLVMValueRef a,
60 LLVMValueRef b)
61 {
62 const union lp_type type = bld->type;
63 const char *intrinsic = NULL;
64 LLVMValueRef cond;
65
66 /* TODO: optimize the constant case */
67
68 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
69 if(type.width * type.length == 128) {
70 if(type.floating) {
71 if(type.width == 32)
72 intrinsic = "llvm.x86.sse.min.ps";
73 if(type.width == 64)
74 intrinsic = "llvm.x86.sse2.min.pd";
75 }
76 else {
77 if(type.width == 8 && !type.sign)
78 intrinsic = "llvm.x86.sse2.pminu.b";
79 if(type.width == 8 && type.sign)
80 intrinsic = "llvm.x86.sse41.pminsb";
81 if(type.width == 16 && !type.sign)
82 intrinsic = "llvm.x86.sse41.pminuw";
83 if(type.width == 16 && type.sign)
84 intrinsic = "llvm.x86.sse2.pmins.w";
85 if(type.width == 32 && !type.sign)
86 intrinsic = "llvm.x86.sse41.pminud";
87 if(type.width == 32 && type.sign)
88 intrinsic = "llvm.x86.sse41.pminsd";
89 }
90 }
91 #endif
92
93 if(intrinsic)
94 return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b);
95
96 if(type.floating)
97 cond = LLVMBuildFCmp(bld->builder, LLVMRealULT, a, b, "");
98 else
99 cond = LLVMBuildICmp(bld->builder, type.sign ? LLVMIntSLT : LLVMIntULT, a, b, "");
100 return LLVMBuildSelect(bld->builder, cond, a, b, "");
101 }
102
103
104 static LLVMValueRef
105 lp_build_max_simple(struct lp_build_context *bld,
106 LLVMValueRef a,
107 LLVMValueRef b)
108 {
109 const union lp_type type = bld->type;
110 const char *intrinsic = NULL;
111 LLVMValueRef cond;
112
113 /* TODO: optimize the constant case */
114
115 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
116 if(type.width * type.length == 128) {
117 if(type.floating) {
118 if(type.width == 32)
119 intrinsic = "llvm.x86.sse.max.ps";
120 if(type.width == 64)
121 intrinsic = "llvm.x86.sse2.max.pd";
122 }
123 else {
124 if(type.width == 8 && !type.sign)
125 intrinsic = "llvm.x86.sse2.pmaxu.b";
126 if(type.width == 8 && type.sign)
127 intrinsic = "llvm.x86.sse41.pmaxsb";
128 if(type.width == 16 && !type.sign)
129 intrinsic = "llvm.x86.sse41.pmaxuw";
130 if(type.width == 16 && type.sign)
131 intrinsic = "llvm.x86.sse2.pmaxs.w";
132 if(type.width == 32 && !type.sign)
133 intrinsic = "llvm.x86.sse41.pmaxud";
134 if(type.width == 32 && type.sign)
135 intrinsic = "llvm.x86.sse41.pmaxsd";
136 }
137 }
138 #endif
139
140 if(intrinsic)
141 return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b);
142
143 if(type.floating)
144 cond = LLVMBuildFCmp(bld->builder, LLVMRealULT, a, b, "");
145 else
146 cond = LLVMBuildICmp(bld->builder, type.sign ? LLVMIntSLT : LLVMIntULT, a, b, "");
147 return LLVMBuildSelect(bld->builder, cond, b, a, "");
148 }
149
150
151 LLVMValueRef
152 lp_build_comp(struct lp_build_context *bld,
153 LLVMValueRef a)
154 {
155 const union lp_type type = bld->type;
156
157 if(a == bld->one)
158 return bld->zero;
159 if(a == bld->zero)
160 return bld->one;
161
162 if(type.norm && !type.floating && !type.fixed && !type.sign) {
163 if(LLVMIsConstant(a))
164 return LLVMConstNot(a);
165 else
166 return LLVMBuildNot(bld->builder, a, "");
167 }
168
169 if(LLVMIsConstant(a))
170 return LLVMConstSub(bld->one, a);
171 else
172 return LLVMBuildSub(bld->builder, bld->one, a, "");
173 }
174
175
176 LLVMValueRef
177 lp_build_add(struct lp_build_context *bld,
178 LLVMValueRef a,
179 LLVMValueRef b)
180 {
181 const union lp_type type = bld->type;
182 LLVMValueRef res;
183
184 if(a == bld->zero)
185 return b;
186 if(b == bld->zero)
187 return a;
188 if(a == bld->undef || b == bld->undef)
189 return bld->undef;
190
191 if(bld->type.norm) {
192 const char *intrinsic = NULL;
193
194 if(a == bld->one || b == bld->one)
195 return bld->one;
196
197 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
198 if(type.width * type.length == 128 &&
199 !type.floating && !type.fixed) {
200 if(type.width == 8)
201 intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b";
202 if(type.width == 16)
203 intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w";
204 }
205 #endif
206
207 if(intrinsic)
208 return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b);
209 }
210
211 if(LLVMIsConstant(a) && LLVMIsConstant(b))
212 res = LLVMConstAdd(a, b);
213 else
214 res = LLVMBuildAdd(bld->builder, a, b, "");
215
216 if(bld->type.norm && (bld->type.floating || bld->type.fixed))
217 res = lp_build_min_simple(bld, res, bld->one);
218
219 return res;
220 }
221
222
223 LLVMValueRef
224 lp_build_sub(struct lp_build_context *bld,
225 LLVMValueRef a,
226 LLVMValueRef b)
227 {
228 const union lp_type type = bld->type;
229 LLVMValueRef res;
230
231 if(b == bld->zero)
232 return a;
233 if(a == bld->undef || b == bld->undef)
234 return bld->undef;
235 if(a == b)
236 return bld->zero;
237
238 if(bld->type.norm) {
239 const char *intrinsic = NULL;
240
241 if(b == bld->one)
242 return bld->zero;
243
244 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
245 if(type.width * type.length == 128 &&
246 !type.floating && !type.fixed) {
247 if(type.width == 8)
248 intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b";
249 if(type.width == 16)
250 intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w";
251 }
252 #endif
253
254 if(intrinsic)
255 return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b);
256 }
257
258 if(LLVMIsConstant(a) && LLVMIsConstant(b))
259 res = LLVMConstSub(a, b);
260 else
261 res = LLVMBuildSub(bld->builder, a, b, "");
262
263 if(bld->type.norm && (bld->type.floating || bld->type.fixed))
264 res = lp_build_max_simple(bld, res, bld->zero);
265
266 return res;
267 }
268
269
270 /**
271 * Build shuffle vectors that match PUNPCKLxx and PUNPCKHxx instructions.
272 */
273 static LLVMValueRef
274 lp_build_unpack_shuffle(unsigned n, unsigned lo_hi)
275 {
276 LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
277 unsigned i, j;
278
279 assert(n <= LP_MAX_VECTOR_LENGTH);
280 assert(lo_hi < 2);
281
282 for(i = 0, j = lo_hi*n/2; i < n; i += 2, ++j) {
283 elems[i + 0] = LLVMConstInt(LLVMInt32Type(), 0 + j, 0);
284 elems[i + 1] = LLVMConstInt(LLVMInt32Type(), n + j, 0);
285 }
286
287 return LLVMConstVector(elems, n);
288 }
289
290
291 static LLVMValueRef
292 lp_build_const_vec(LLVMTypeRef type, unsigned n, long long c)
293 {
294 LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
295 unsigned i;
296
297 assert(n <= LP_MAX_VECTOR_LENGTH);
298
299 for(i = 0; i < n; ++i)
300 elems[i] = LLVMConstInt(type, c, 0);
301
302 return LLVMConstVector(elems, n);
303 }
304
305
306 /**
307 * Normalized 8bit multiplication.
308 *
309 * - alpha plus one
310 *
311 * makes the following approximation to the division (Sree)
312 *
313 * a*b/255 ~= (a*(b + 1)) >> 256
314 *
315 * which is the fastest method that satisfies the following OpenGL criteria
316 *
317 * 0*0 = 0 and 255*255 = 255
318 *
319 * - geometric series
320 *
321 * takes the geometric series approximation to the division
322 *
323 * t/255 = (t >> 8) + (t >> 16) + (t >> 24) ..
324 *
325 * in this case just the first two terms to fit in 16bit arithmetic
326 *
327 * t/255 ~= (t + (t >> 8)) >> 8
328 *
329 * note that just by itself it doesn't satisfies the OpenGL criteria, as
330 * 255*255 = 254, so the special case b = 255 must be accounted or roundoff
331 * must be used
332 *
333 * - geometric series plus rounding
334 *
335 * when using a geometric series division instead of truncating the result
336 * use roundoff in the approximation (Jim Blinn)
337 *
338 * t/255 ~= (t + (t >> 8) + 0x80) >> 8
339 *
340 * achieving the exact results
341 *
342 * @sa Alvy Ray Smith, Image Compositing Fundamentals, Tech Memo 4, Aug 15, 1995,
343 * ftp://ftp.alvyray.com/Acrobat/4_Comp.pdf
344 * @sa Michael Herf, The "double blend trick", May 2000,
345 * http://www.stereopsis.com/doubleblend.html
346 */
347 static LLVMValueRef
348 lp_build_mul_u8n(LLVMBuilderRef builder,
349 LLVMValueRef a, LLVMValueRef b)
350 {
351 static LLVMValueRef c01 = NULL;
352 static LLVMValueRef c08 = NULL;
353 static LLVMValueRef c80 = NULL;
354 LLVMValueRef ab;
355
356 if(!c01) c01 = lp_build_const_vec(LLVMInt16Type(), 8, 0x01);
357 if(!c08) c08 = lp_build_const_vec(LLVMInt16Type(), 8, 0x08);
358 if(!c80) c80 = lp_build_const_vec(LLVMInt16Type(), 8, 0x80);
359
360 #if 0
361
362 /* a*b/255 ~= (a*(b + 1)) >> 256 */
363 b = LLVMBuildAdd(builder, b, c01, "");
364 ab = LLVMBuildMul(builder, a, b, "");
365
366 #else
367
368 /* t/255 ~= (t + (t >> 8) + 0x80) >> 8 */
369 ab = LLVMBuildMul(builder, a, b, "");
370 ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, c08, ""), "");
371 ab = LLVMBuildAdd(builder, ab, c80, "");
372
373 #endif
374
375 ab = LLVMBuildLShr(builder, ab, c08, "");
376
377 return ab;
378 }
379
380
381 LLVMValueRef
382 lp_build_mul(struct lp_build_context *bld,
383 LLVMValueRef a,
384 LLVMValueRef b)
385 {
386 const union lp_type type = bld->type;
387
388 if(a == bld->zero)
389 return bld->zero;
390 if(a == bld->one)
391 return b;
392 if(b == bld->zero)
393 return bld->zero;
394 if(b == bld->one)
395 return a;
396 if(a == bld->undef || b == bld->undef)
397 return bld->undef;
398
399 if(!type.floating && !type.fixed && type.norm) {
400 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
401 if(type.width == 8 && type.length == 16) {
402 LLVMTypeRef i16x8 = LLVMVectorType(LLVMInt16Type(), 8);
403 LLVMTypeRef i8x16 = LLVMVectorType(LLVMInt8Type(), 16);
404 static LLVMValueRef ml = NULL;
405 static LLVMValueRef mh = NULL;
406 LLVMValueRef al, ah, bl, bh;
407 LLVMValueRef abl, abh;
408 LLVMValueRef ab;
409
410 if(!ml) ml = lp_build_unpack_shuffle(16, 0);
411 if(!mh) mh = lp_build_unpack_shuffle(16, 1);
412
413 /* PUNPCKLBW, PUNPCKHBW */
414 al = LLVMBuildShuffleVector(bld->builder, a, bld->zero, ml, "");
415 bl = LLVMBuildShuffleVector(bld->builder, b, bld->zero, ml, "");
416 ah = LLVMBuildShuffleVector(bld->builder, a, bld->zero, mh, "");
417 bh = LLVMBuildShuffleVector(bld->builder, b, bld->zero, mh, "");
418
419 /* NOP */
420 al = LLVMBuildBitCast(bld->builder, al, i16x8, "");
421 bl = LLVMBuildBitCast(bld->builder, bl, i16x8, "");
422 ah = LLVMBuildBitCast(bld->builder, ah, i16x8, "");
423 bh = LLVMBuildBitCast(bld->builder, bh, i16x8, "");
424
425 /* PMULLW, PSRLW, PADDW */
426 abl = lp_build_mul_u8n(bld->builder, al, bl);
427 abh = lp_build_mul_u8n(bld->builder, ah, bh);
428
429 /* PACKUSWB */
430 ab = lp_build_intrinsic_binary(bld->builder, "llvm.x86.sse2.packuswb.128" , i16x8, abl, abh);
431
432 /* NOP */
433 ab = LLVMBuildBitCast(bld->builder, ab, i8x16, "");
434
435 return ab;
436 }
437 #endif
438
439 /* FIXME */
440 assert(0);
441 }
442
443 if(LLVMIsConstant(a) && LLVMIsConstant(b))
444 return LLVMConstMul(a, b);
445
446 return LLVMBuildMul(bld->builder, a, b, "");
447 }
448
449
450 LLVMValueRef
451 lp_build_div(struct lp_build_context *bld,
452 LLVMValueRef a,
453 LLVMValueRef b)
454 {
455 const union lp_type type = bld->type;
456
457 if(a == bld->zero)
458 return bld->zero;
459 if(a == bld->one)
460 return lp_build_rcp(bld, b);
461 if(b == bld->zero)
462 return bld->undef;
463 if(b == bld->one)
464 return a;
465 if(a == bld->undef || b == bld->undef)
466 return bld->undef;
467
468 if(LLVMIsConstant(a) && LLVMIsConstant(b))
469 return LLVMConstFDiv(a, b);
470
471 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
472 if(type.width == 32 && type.length == 4)
473 return lp_build_mul(bld, a, lp_build_rcp(bld, b));
474 #endif
475
476 return LLVMBuildFDiv(bld->builder, a, b, "");
477 }
478
479
480 LLVMValueRef
481 lp_build_min(struct lp_build_context *bld,
482 LLVMValueRef a,
483 LLVMValueRef b)
484 {
485 if(a == bld->undef || b == bld->undef)
486 return bld->undef;
487
488 if(a == b)
489 return a;
490
491 if(bld->type.norm) {
492 if(a == bld->zero || b == bld->zero)
493 return bld->zero;
494 if(a == bld->one)
495 return b;
496 if(b == bld->one)
497 return a;
498 }
499
500 return lp_build_min_simple(bld, a, b);
501 }
502
503
504 LLVMValueRef
505 lp_build_max(struct lp_build_context *bld,
506 LLVMValueRef a,
507 LLVMValueRef b)
508 {
509 if(a == bld->undef || b == bld->undef)
510 return bld->undef;
511
512 if(a == b)
513 return a;
514
515 if(bld->type.norm) {
516 if(a == bld->one || b == bld->one)
517 return bld->one;
518 if(a == bld->zero)
519 return b;
520 if(b == bld->zero)
521 return a;
522 }
523
524 return lp_build_max_simple(bld, a, b);
525 }
526
527
528 LLVMValueRef
529 lp_build_abs(struct lp_build_context *bld,
530 LLVMValueRef a)
531 {
532 const union lp_type type = bld->type;
533
534 if(!type.sign)
535 return a;
536
537 /* XXX: is this really necessary? */
538 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
539 if(!type.floating && type.width*type.length == 128) {
540 LLVMTypeRef vec_type = lp_build_vec_type(type);
541 if(type.width == 8)
542 return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a);
543 if(type.width == 16)
544 return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.w.128", vec_type, a);
545 if(type.width == 32)
546 return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a);
547 }
548 #endif
549
550 return lp_build_max(bld, a, LLVMBuildNeg(bld->builder, a, ""));
551 }
552
553
554 LLVMValueRef
555 lp_build_sqrt(struct lp_build_context *bld,
556 LLVMValueRef a)
557 {
558 const union lp_type type = bld->type;
559 LLVMTypeRef vec_type = lp_build_vec_type(type);
560 char intrinsic[32];
561
562 /* TODO: optimize the constant case */
563 /* TODO: optimize the constant case */
564
565 assert(type.floating);
566 util_snprintf(intrinsic, sizeof intrinsic, "llvm.sqrt.v%uf%u", type.length, type.width);
567
568 return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a);
569 }
570
571
572 LLVMValueRef
573 lp_build_rcp(struct lp_build_context *bld,
574 LLVMValueRef a)
575 {
576 const union lp_type type = bld->type;
577
578 if(a == bld->zero)
579 return bld->undef;
580 if(a == bld->one)
581 return bld->one;
582 if(a == bld->undef)
583 return bld->undef;
584
585 assert(type.floating);
586
587 if(LLVMIsConstant(a))
588 return LLVMConstFDiv(bld->one, a);
589
590 /* XXX: is this really necessary? */
591 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
592 if(type.width == 32 && type.length == 4)
593 return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a);
594 #endif
595
596 return LLVMBuildFDiv(bld->builder, bld->one, a, "");
597 }
598
599
600 LLVMValueRef
601 lp_build_rsqrt(struct lp_build_context *bld,
602 LLVMValueRef a)
603 {
604 const union lp_type type = bld->type;
605
606 assert(type.floating);
607
608 /* XXX: is this really necessary? */
609 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
610 if(type.width == 32 && type.length == 4)
611 return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", lp_build_vec_type(type), a);
612 #endif
613
614 return lp_build_rcp(bld, lp_build_sqrt(bld, a));
615 }
616
617
618 LLVMValueRef
619 lp_build_cos(struct lp_build_context *bld,
620 LLVMValueRef a)
621 {
622 const union lp_type type = bld->type;
623 LLVMTypeRef vec_type = lp_build_vec_type(type);
624 char intrinsic[32];
625
626 /* TODO: optimize the constant case */
627
628 assert(type.floating);
629 util_snprintf(intrinsic, sizeof intrinsic, "llvm.cos.v%uf%u", type.length, type.width);
630
631 return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a);
632 }
633
634
635 LLVMValueRef
636 lp_build_sin(struct lp_build_context *bld,
637 LLVMValueRef a)
638 {
639 const union lp_type type = bld->type;
640 LLVMTypeRef vec_type = lp_build_vec_type(type);
641 char intrinsic[32];
642
643 /* TODO: optimize the constant case */
644
645 assert(type.floating);
646 util_snprintf(intrinsic, sizeof intrinsic, "llvm.sin.v%uf%u", type.length, type.width);
647
648 return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a);
649 }
650
651
652 LLVMValueRef
653 lp_build_pow(struct lp_build_context *bld,
654 LLVMValueRef a,
655 LLVMValueRef b)
656 {
657 const union lp_type type = bld->type;
658 LLVMTypeRef vec_type = lp_build_vec_type(type);
659 char intrinsic[32];
660
661 /* TODO: optimize the constant case */
662
663 assert(type.floating);
664 util_snprintf(intrinsic, sizeof intrinsic, "llvm.pow.v%uf%u", type.length, type.width);
665
666 return lp_build_intrinsic_binary(bld->builder, intrinsic, vec_type, a, b);
667 }
668
669
670 LLVMValueRef
671 lp_build_exp(struct lp_build_context *bld,
672 LLVMValueRef a)
673 {
674 /* FIXME: optimize */
675 return lp_build_pow(bld, lp_build_const_uni(bld->type, 2.7182818284590452354), a);
676 }
677
678
679 LLVMValueRef
680 lp_build_log(struct lp_build_context *bld,
681 LLVMValueRef a)
682 {
683 /* FIXME: implement */
684 return bld->undef;
685 }
686
687
688 #define EXP_POLY_DEGREE 3
689 #define LOG_POLY_DEGREE 5
690
691
692 static LLVMValueRef
693 lp_build_polynomial(struct lp_build_context *bld,
694 LLVMValueRef x,
695 const double *coeffs,
696 unsigned num_coeffs)
697 {
698 const union lp_type type = bld->type;
699 LLVMValueRef res = NULL;
700 unsigned i;
701
702 for (i = num_coeffs; i--; ) {
703 LLVMValueRef coeff = lp_build_const_uni(type, coeffs[i]);
704 if(res)
705 res = lp_build_add(bld, coeff, lp_build_mul(bld, x, res));
706 else
707 res = coeff;
708 }
709
710 if(res)
711 return res;
712 else
713 return bld->undef;
714 }
715
716
717 LLVMValueRef
718 lp_build_exp2(struct lp_build_context *bld,
719 LLVMValueRef a)
720 {
721 /* FIXME: optimize */
722 return lp_build_pow(bld, lp_build_const_uni(bld->type, 2.0), a);
723 }
724
725
726 /**
727 * See http://www.devmaster.net/forums/showthread.php?p=43580
728 */
729 LLVMValueRef
730 lp_build_log2(struct lp_build_context *bld,
731 LLVMValueRef x)
732 {
733 const union lp_type type = bld->type;
734 LLVMTypeRef vec_type = lp_build_vec_type(type);
735 LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
736
737 LLVMValueRef expmask = lp_build_int_const_uni(type, 0x7f800000);
738 LLVMValueRef mantmask = lp_build_int_const_uni(type, 0x007fffff);
739 LLVMValueRef one = LLVMConstBitCast(bld->one, int_vec_type);
740
741 LLVMValueRef i = LLVMBuildBitCast(bld->builder, x, int_vec_type, "");
742
743 LLVMValueRef exp;
744 LLVMValueRef mant;
745 LLVMValueRef logmant;
746
747 /* exp = (float) exponent(x) */
748 exp = LLVMBuildAnd(bld->builder, i, expmask, "");
749 exp = LLVMBuildLShr(bld->builder, exp, lp_build_int_const_uni(type, 23), "");
750 exp = LLVMBuildSub(bld->builder, exp, lp_build_int_const_uni(type, 127), "");
751 exp = LLVMBuildSIToFP(bld->builder, exp, vec_type, "");
752
753 /* mant = (float) mantissa(x) */
754 mant = LLVMBuildAnd(bld->builder, i, mantmask, "");
755 mant = LLVMBuildOr(bld->builder, mant, one, "");
756 mant = LLVMBuildSIToFP(bld->builder, mant, vec_type, "");
757
758 /* Minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[
759 * These coefficients can be generate with
760 * http://www.boost.org/doc/libs/1_36_0/libs/math/doc/sf_and_dist/html/math_toolkit/toolkit/internals2/minimax.html
761 */
762 const double polynomial[] = {
763 #if LOG_POLY_DEGREE == 6
764 3.11578814719469302614, -3.32419399085241980044, 2.59883907202499966007, -1.23152682416275988241, 0.318212422185251071475, -0.0344359067839062357313
765 #elif LOG_POLY_DEGREE == 5
766 2.8882704548164776201, -2.52074962577807006663, 1.48116647521213171641, -0.465725644288844778798, 0.0596515482674574969533
767 #elif LOG_POLY_DEGREE == 4
768 2.61761038894603480148, -1.75647175389045657003, 0.688243882994381274313, -0.107254423828329604454
769 #elif LOG_POLY_DEGREE == 3
770 2.28330284476918490682, -1.04913055217340124191, 0.204446009836232697516
771 #else
772 #error
773 #endif
774 };
775
776 logmant = lp_build_polynomial(bld, mant, polynomial, sizeof(polynomial)/sizeof(polynomial[0]));
777
778 /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
779 logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildMul(bld->builder, mant, bld->one, ""), "");
780
781 return LLVMBuildAdd(bld->builder, logmant, exp, "");
782 }