1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
33 * LLVM IR doesn't support all basic arithmetic operations we care about (most
34 * notably min/max and saturated operations), and it is often necessary to
35 * resort machine-specific intrinsics directly. The functions here hide all
36 * these implementation details from the other modules.
38 * We also do simple expressions simplification here. Reasons are:
39 * - it is very easy given we have all necessary information readily available
40 * - LLVM optimization passes fail to simplify several vector expressions
41 * - We often know value constraints which the optimization passes have no way
42 * of knowing, such as when source arguments are known to be in [0, 1] range.
44 * @author Jose Fonseca <jfonseca@vmware.com>
48 #include "pipe/p_state.h"
50 #include "lp_bld_arit.h"
54 lp_build_elem_type(union lp_type type
)
60 return LLVMFloatType();
63 return LLVMDoubleType();
67 return LLVMFloatType();
71 return LLVMIntType(type
.width
);
77 lp_build_vec_type(union lp_type type
)
79 LLVMTypeRef elem_type
= lp_build_elem_type(type
);
80 return LLVMVectorType(elem_type
, type
.length
);
85 * This function is a mirrot of lp_build_elem_type() above.
87 * XXX: I'm not sure if it wouldn't be easier/efficient to just recreate the
88 * type and check for identity.
91 lp_check_elem_type(union lp_type type
, LLVMTypeRef elem_type
)
93 LLVMTypeKind elem_kind
;
99 elem_kind
= LLVMGetTypeKind(elem_type
);
104 if(elem_kind
!= LLVMFloatTypeKind
)
108 if(elem_kind
!= LLVMDoubleTypeKind
)
117 if(elem_kind
!= LLVMIntegerTypeKind
)
120 if(LLVMGetIntTypeWidth(elem_type
) != type
.width
)
129 lp_check_vec_type(union lp_type type
, LLVMTypeRef vec_type
)
131 LLVMTypeRef elem_type
;
137 if(LLVMGetTypeKind(vec_type
) != LLVMVectorTypeKind
)
140 if(LLVMGetVectorSize(vec_type
) != type
.length
)
143 elem_type
= LLVMGetElementType(vec_type
);
145 return lp_check_elem_type(type
, elem_type
);
150 lp_check_value(union lp_type type
, LLVMValueRef val
)
152 LLVMTypeRef vec_type
;
158 vec_type
= LLVMTypeOf(val
);
160 return lp_check_vec_type(type
, vec_type
);
165 lp_build_undef(union lp_type type
)
167 LLVMTypeRef vec_type
= lp_build_vec_type(type
);
168 return LLVMGetUndef(vec_type
);
173 lp_build_zero(union lp_type type
)
175 LLVMTypeRef vec_type
= lp_build_vec_type(type
);
176 return LLVMConstNull(vec_type
);
181 lp_build_one(union lp_type type
)
183 LLVMTypeRef elem_type
;
184 LLVMValueRef elems
[LP_MAX_VECTOR_LENGTH
];
187 assert(type
.length
<= LP_MAX_VECTOR_LENGTH
);
189 elem_type
= lp_build_elem_type(type
);
192 elems
[0] = LLVMConstReal(elem_type
, 1.0);
194 elems
[0] = LLVMConstInt(elem_type
, 1LL << (type
.width
/2), 0);
196 elems
[0] = LLVMConstInt(elem_type
, 1, 0);
198 /* special case' -- 1.0 for normalized types is more easily attained if
199 * we start with a vector consisting of all bits set */
200 LLVMTypeRef vec_type
= LLVMVectorType(elem_type
, type
.length
);
201 LLVMValueRef vec
= LLVMConstAllOnes(vec_type
);
204 vec
= LLVMConstLShr(vec
, LLVMConstInt(LLVMInt32Type(), 1, 0));
209 for(i
= 1; i
< type
.length
; ++i
)
212 return LLVMConstVector(elems
, type
.length
);
217 lp_build_const_aos(union lp_type type
,
218 double r
, double g
, double b
, double a
,
219 const unsigned char *swizzle
)
221 const unsigned char default_swizzle
[4] = {0, 1, 2, 3};
222 LLVMTypeRef elem_type
;
223 LLVMValueRef elems
[LP_MAX_VECTOR_LENGTH
];
226 assert(type
.length
% 4 == 0);
227 assert(type
.length
<= LP_MAX_VECTOR_LENGTH
);
229 elem_type
= lp_build_elem_type(type
);
232 swizzle
= default_swizzle
;
235 elems
[swizzle
[0]] = LLVMConstReal(elem_type
, r
);
236 elems
[swizzle
[1]] = LLVMConstReal(elem_type
, g
);
237 elems
[swizzle
[2]] = LLVMConstReal(elem_type
, b
);
238 elems
[swizzle
[3]] = LLVMConstReal(elem_type
, a
);
246 shift
= type
.width
/2;
248 shift
= type
.sign
? type
.width
- 1 : type
.width
;
252 llscale
= (long long)1 << shift
;
253 dscale
= (double)llscale
;
254 assert((long long)dscale
== llscale
);
256 elems
[swizzle
[0]] = LLVMConstInt(elem_type
, r
*dscale
+ 0.5, 0);
257 elems
[swizzle
[1]] = LLVMConstInt(elem_type
, g
*dscale
+ 0.5, 0);
258 elems
[swizzle
[2]] = LLVMConstInt(elem_type
, b
*dscale
+ 0.5, 0);
259 elems
[swizzle
[3]] = LLVMConstInt(elem_type
, a
*dscale
+ 0.5, 0);
262 for(i
= 4; i
< type
.length
; ++i
)
263 elems
[i
] = elems
[i
% 4];
265 return LLVMConstVector(elems
, type
.length
);
270 lp_build_intrinsic_binary(LLVMBuilderRef builder
,
275 LLVMModuleRef module
= LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder
)));
276 LLVMValueRef function
;
277 LLVMValueRef args
[2];
279 function
= LLVMGetNamedFunction(module
, name
);
281 LLVMTypeRef type
= LLVMTypeOf(a
);
282 LLVMTypeRef arg_types
[2];
285 function
= LLVMAddFunction(module
, name
, LLVMFunctionType(type
, arg_types
, 2, 0));
286 LLVMSetFunctionCallConv(function
, LLVMCCallConv
);
287 LLVMSetLinkage(function
, LLVMExternalLinkage
);
289 assert(LLVMIsDeclaration(function
));
292 /* We shouldn't use only constants with intrinsics, as they won't be
293 * propagated by LLVM optimization passes.
295 if(LLVMIsConstant(a
) && LLVMIsConstant(b
))
296 debug_printf("warning: invoking intrinsic \"%s\" with constants\n");
302 return LLVMBuildCall(builder
, function
, args
, 2, "");
307 lp_build_min_simple(struct lp_build_context
*bld
,
311 const union lp_type type
= bld
->type
;
312 const char *intrinsic
= NULL
;
315 /* TODO: optimize the constant case */
317 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
318 if(type
.width
* type
.length
== 128) {
321 intrinsic
= "llvm.x86.sse.min.ps";
323 intrinsic
= "llvm.x86.sse2.min.pd";
325 if(type
.width
== 8 && !type
.sign
)
326 intrinsic
= "llvm.x86.sse2.pminu.b";
327 if(type
.width
== 16 && type
.sign
)
328 intrinsic
= "llvm.x86.sse2.pmins.w";
334 return lp_build_intrinsic_binary(bld
->builder
, intrinsic
, a
, b
);
337 cond
= LLVMBuildFCmp(bld
->builder
, LLVMRealULT
, a
, b
, "");
339 cond
= LLVMBuildICmp(bld
->builder
, type
.sign
? LLVMIntSLT
: LLVMIntULT
, a
, b
, "");
340 return LLVMBuildSelect(bld
->builder
, cond
, a
, b
, "");
345 lp_build_max_simple(struct lp_build_context
*bld
,
349 const union lp_type type
= bld
->type
;
350 const char *intrinsic
= NULL
;
353 /* TODO: optimize the constant case */
355 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
356 if(type
.width
* type
.length
== 128) {
359 intrinsic
= "llvm.x86.sse.max.ps";
361 intrinsic
= "llvm.x86.sse2.max.pd";
363 if(type
.width
== 8 && !type
.sign
)
364 intrinsic
= "llvm.x86.sse2.pmaxu.b";
365 if(type
.width
== 16 && type
.sign
)
366 intrinsic
= "llvm.x86.sse2.pmaxs.w";
372 return lp_build_intrinsic_binary(bld
->builder
, intrinsic
, a
, b
);
375 cond
= LLVMBuildFCmp(bld
->builder
, LLVMRealULT
, a
, b
, "");
377 cond
= LLVMBuildICmp(bld
->builder
, type
.sign
? LLVMIntSLT
: LLVMIntULT
, a
, b
, "");
378 return LLVMBuildSelect(bld
->builder
, cond
, b
, a
, "");
383 lp_build_comp(struct lp_build_context
*bld
,
386 const union lp_type type
= bld
->type
;
393 if(type
.norm
&& !type
.floating
&& !type
.fixed
&& !type
.sign
) {
394 if(LLVMIsConstant(a
))
395 return LLVMConstNot(a
);
397 return LLVMBuildNot(bld
->builder
, a
, "");
400 if(LLVMIsConstant(a
))
401 return LLVMConstSub(bld
->one
, a
);
403 return LLVMBuildSub(bld
->builder
, bld
->one
, a
, "");
408 lp_build_add(struct lp_build_context
*bld
,
412 const union lp_type type
= bld
->type
;
419 if(a
== bld
->undef
|| b
== bld
->undef
)
423 const char *intrinsic
= NULL
;
425 if(a
== bld
->one
|| b
== bld
->one
)
428 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
429 if(type
.width
* type
.length
== 128 &&
430 !type
.floating
&& !type
.fixed
) {
432 intrinsic
= type
.sign
? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b";
434 intrinsic
= type
.sign
? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w";
439 return lp_build_intrinsic_binary(bld
->builder
, intrinsic
, a
, b
);
442 if(LLVMIsConstant(a
) && LLVMIsConstant(b
))
443 res
= LLVMConstAdd(a
, b
);
445 res
= LLVMBuildAdd(bld
->builder
, a
, b
, "");
447 if(bld
->type
.norm
&& (bld
->type
.floating
|| bld
->type
.fixed
))
448 res
= lp_build_min_simple(bld
, res
, bld
->one
);
455 lp_build_sub(struct lp_build_context
*bld
,
459 const union lp_type type
= bld
->type
;
464 if(a
== bld
->undef
|| b
== bld
->undef
)
470 const char *intrinsic
= NULL
;
475 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
476 if(type
.width
* type
.length
== 128 &&
477 !type
.floating
&& !type
.fixed
) {
479 intrinsic
= type
.sign
? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b";
481 intrinsic
= type
.sign
? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w";
486 return lp_build_intrinsic_binary(bld
->builder
, intrinsic
, a
, b
);
489 if(LLVMIsConstant(a
) && LLVMIsConstant(b
))
490 res
= LLVMConstSub(a
, b
);
492 res
= LLVMBuildSub(bld
->builder
, a
, b
, "");
494 if(bld
->type
.norm
&& (bld
->type
.floating
|| bld
->type
.fixed
))
495 res
= lp_build_max_simple(bld
, res
, bld
->zero
);
502 * Build shuffle vectors that match PUNPCKLxx and PUNPCKHxx instructions.
505 lp_build_unpack_shuffle(unsigned n
, unsigned lo_hi
)
507 LLVMValueRef elems
[LP_MAX_VECTOR_LENGTH
];
510 assert(n
<= LP_MAX_VECTOR_LENGTH
);
513 for(i
= 0, j
= lo_hi
*n
/2; i
< n
; i
+= 2, ++j
) {
514 elems
[i
+ 0] = LLVMConstInt(LLVMInt32Type(), 0 + j
, 0);
515 elems
[i
+ 1] = LLVMConstInt(LLVMInt32Type(), n
+ j
, 0);
518 return LLVMConstVector(elems
, n
);
523 lp_build_const_vec(LLVMTypeRef type
, unsigned n
, long long c
)
525 LLVMValueRef elems
[LP_MAX_VECTOR_LENGTH
];
528 assert(n
<= LP_MAX_VECTOR_LENGTH
);
530 for(i
= 0; i
< n
; ++i
)
531 elems
[i
] = LLVMConstInt(type
, c
, 0);
533 return LLVMConstVector(elems
, n
);
538 * Normalized 8bit multiplication.
542 * makes the following approximation to the division (Sree)
544 * a*b/255 ~= (a*(b + 1)) >> 256
546 * which is the fastest method that satisfies the following OpenGL criteria
548 * 0*0 = 0 and 255*255 = 255
552 * takes the geometric series approximation to the division
554 * t/255 = (t >> 8) + (t >> 16) + (t >> 24) ..
556 * in this case just the first two terms to fit in 16bit arithmetic
558 * t/255 ~= (t + (t >> 8)) >> 8
560 * note that just by itself it doesn't satisfies the OpenGL criteria, as
561 * 255*255 = 254, so the special case b = 255 must be accounted or roundoff
564 * - geometric series plus rounding
566 * when using a geometric series division instead of truncating the result
567 * use roundoff in the approximation (Jim Blinn)
569 * t/255 ~= (t + (t >> 8) + 0x80) >> 8
571 * achieving the exact results
573 * @sa Alvy Ray Smith, Image Compositing Fundamentals, Tech Memo 4, Aug 15, 1995,
574 * ftp://ftp.alvyray.com/Acrobat/4_Comp.pdf
575 * @sa Michael Herf, The "double blend trick", May 2000,
576 * http://www.stereopsis.com/doubleblend.html
579 lp_build_mul_u8n(LLVMBuilderRef builder
,
580 LLVMValueRef a
, LLVMValueRef b
)
582 static LLVMValueRef c01
= NULL
;
583 static LLVMValueRef c08
= NULL
;
584 static LLVMValueRef c80
= NULL
;
587 if(!c01
) c01
= lp_build_const_vec(LLVMInt16Type(), 8, 0x01);
588 if(!c08
) c08
= lp_build_const_vec(LLVMInt16Type(), 8, 0x08);
589 if(!c80
) c80
= lp_build_const_vec(LLVMInt16Type(), 8, 0x80);
593 /* a*b/255 ~= (a*(b + 1)) >> 256 */
594 b
= LLVMBuildAdd(builder
, b
, c01
, "");
595 ab
= LLVMBuildMul(builder
, a
, b
, "");
599 /* t/255 ~= (t + (t >> 8) + 0x80) >> 8 */
600 ab
= LLVMBuildMul(builder
, a
, b
, "");
601 ab
= LLVMBuildAdd(builder
, ab
, LLVMBuildLShr(builder
, ab
, c08
, ""), "");
602 ab
= LLVMBuildAdd(builder
, ab
, c80
, "");
606 ab
= LLVMBuildLShr(builder
, ab
, c08
, "");
613 lp_build_mul(struct lp_build_context
*bld
,
617 const union lp_type type
= bld
->type
;
627 if(a
== bld
->undef
|| b
== bld
->undef
)
630 if(!type
.floating
&& !type
.fixed
&& type
.norm
) {
631 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
632 if(type
.width
== 8 && type
.length
== 16) {
633 LLVMTypeRef i16x8
= LLVMVectorType(LLVMInt16Type(), 8);
634 LLVMTypeRef i8x16
= LLVMVectorType(LLVMInt8Type(), 16);
635 static LLVMValueRef ml
= NULL
;
636 static LLVMValueRef mh
= NULL
;
637 LLVMValueRef al
, ah
, bl
, bh
;
638 LLVMValueRef abl
, abh
;
641 if(!ml
) ml
= lp_build_unpack_shuffle(16, 0);
642 if(!mh
) mh
= lp_build_unpack_shuffle(16, 1);
644 /* PUNPCKLBW, PUNPCKHBW */
645 al
= LLVMBuildShuffleVector(bld
->builder
, a
, bld
->zero
, ml
, "");
646 bl
= LLVMBuildShuffleVector(bld
->builder
, b
, bld
->zero
, ml
, "");
647 ah
= LLVMBuildShuffleVector(bld
->builder
, a
, bld
->zero
, mh
, "");
648 bh
= LLVMBuildShuffleVector(bld
->builder
, b
, bld
->zero
, mh
, "");
651 al
= LLVMBuildBitCast(bld
->builder
, al
, i16x8
, "");
652 bl
= LLVMBuildBitCast(bld
->builder
, bl
, i16x8
, "");
653 ah
= LLVMBuildBitCast(bld
->builder
, ah
, i16x8
, "");
654 bh
= LLVMBuildBitCast(bld
->builder
, bh
, i16x8
, "");
656 /* PMULLW, PSRLW, PADDW */
657 abl
= lp_build_mul_u8n(bld
->builder
, al
, bl
);
658 abh
= lp_build_mul_u8n(bld
->builder
, ah
, bh
);
661 ab
= lp_build_intrinsic_binary(bld
->builder
, "llvm.x86.sse2.packuswb.128" , abl
, abh
);
664 ab
= LLVMBuildBitCast(bld
->builder
, ab
, i8x16
, "");
674 if(LLVMIsConstant(a
) && LLVMIsConstant(b
))
675 return LLVMConstMul(a
, b
);
677 return LLVMBuildMul(bld
->builder
, a
, b
, "");
682 lp_build_min(struct lp_build_context
*bld
,
686 if(a
== bld
->undef
|| b
== bld
->undef
)
693 if(a
== bld
->zero
|| b
== bld
->zero
)
701 return lp_build_min_simple(bld
, a
, b
);
706 lp_build_max(struct lp_build_context
*bld
,
710 if(a
== bld
->undef
|| b
== bld
->undef
)
717 if(a
== bld
->one
|| b
== bld
->one
)
725 return lp_build_max_simple(bld
, a
, b
);