1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
33 * LLVM IR doesn't support all basic arithmetic operations we care about (most
34 * notably min/max and saturated operations), and it is often necessary to
35 * resort machine-specific intrinsics directly. The functions here hide all
36 * these implementation details from the other modules.
38 * We also do simple expressions simplification here. Reasons are:
39 * - it is very easy given we have all necessary information readily available
40 * - LLVM optimization passes fail to simplify several vector expressions
41 * - We often know value constraints which the optimization passes have no way
42 * of knowing, such as when source arguments are known to be in [0, 1] range.
44 * @author Jose Fonseca <jfonseca@vmware.com>
48 #include "pipe/p_state.h"
50 #include "lp_bld_arit.h"
54 lp_build_elem_type(union lp_type type
)
60 return LLVMFloatType();
63 return LLVMDoubleType();
67 return LLVMFloatType();
71 return LLVMIntType(type
.width
);
77 lp_build_vec_type(union lp_type type
)
79 LLVMTypeRef elem_type
= lp_build_elem_type(type
);
80 return LLVMVectorType(elem_type
, type
.length
);
85 * This function is a mirrot of lp_build_elem_type() above.
87 * XXX: I'm not sure if it wouldn't be easier/efficient to just recreate the
88 * type and check for identity.
91 lp_check_elem_type(union lp_type type
, LLVMTypeRef elem_type
)
93 LLVMTypeKind elem_kind
;
99 elem_kind
= LLVMGetTypeKind(elem_type
);
104 if(elem_kind
!= LLVMFloatTypeKind
)
108 if(elem_kind
!= LLVMDoubleTypeKind
)
117 if(elem_kind
!= LLVMIntegerTypeKind
)
120 if(LLVMGetIntTypeWidth(elem_type
) != type
.width
)
129 lp_check_vec_type(union lp_type type
, LLVMTypeRef vec_type
)
131 LLVMTypeRef elem_type
;
137 if(LLVMGetTypeKind(vec_type
) != LLVMVectorTypeKind
)
140 if(LLVMGetVectorSize(vec_type
) != type
.length
)
143 elem_type
= LLVMGetElementType(vec_type
);
145 return lp_check_elem_type(type
, elem_type
);
150 lp_check_value(union lp_type type
, LLVMValueRef val
)
152 LLVMTypeRef vec_type
;
158 vec_type
= LLVMTypeOf(val
);
160 return lp_check_vec_type(type
, vec_type
);
165 lp_build_undef(union lp_type type
)
167 LLVMTypeRef vec_type
= lp_build_vec_type(type
);
168 return LLVMGetUndef(vec_type
);
173 lp_build_zero(union lp_type type
)
175 LLVMTypeRef vec_type
= lp_build_vec_type(type
);
176 return LLVMConstNull(vec_type
);
181 lp_build_one(union lp_type type
)
183 LLVMTypeRef elem_type
;
184 LLVMValueRef elems
[LP_MAX_VECTOR_LENGTH
];
187 assert(type
.length
< LP_MAX_VECTOR_LENGTH
);
189 elem_type
= lp_build_elem_type(type
);
192 elems
[0] = LLVMConstReal(elem_type
, 1.0);
194 elems
[0] = LLVMConstInt(elem_type
, 1LL << (type
.width
/2), 0);
196 elems
[0] = LLVMConstInt(elem_type
, 1, 0);
198 /* special case' -- 1.0 for normalized types is more easily attained if
199 * we start with a vector consisting of all bits set */
200 LLVMTypeRef vec_type
= LLVMVectorType(elem_type
, type
.length
);
201 LLVMValueRef vec
= LLVMConstAllOnes(vec_type
);
204 vec
= LLVMConstLShr(vec
, LLVMConstInt(LLVMInt32Type(), 1, 0));
209 for(i
= 1; i
< type
.length
; ++i
)
212 return LLVMConstVector(elems
, type
.length
);
217 lp_build_const_aos(union lp_type type
,
218 double r
, double g
, double b
, double a
,
219 const unsigned char *swizzle
)
221 const unsigned char default_swizzle
[4] = {0, 1, 2, 3};
222 LLVMTypeRef elem_type
;
223 LLVMValueRef elems
[LP_MAX_VECTOR_LENGTH
];
226 assert(type
.length
% 4 == 0);
227 assert(type
.length
< LP_MAX_VECTOR_LENGTH
);
229 elem_type
= lp_build_elem_type(type
);
232 swizzle
= default_swizzle
;
235 elems
[swizzle
[0]] = LLVMConstReal(elem_type
, r
);
236 elems
[swizzle
[1]] = LLVMConstReal(elem_type
, g
);
237 elems
[swizzle
[2]] = LLVMConstReal(elem_type
, b
);
238 elems
[swizzle
[3]] = LLVMConstReal(elem_type
, a
);
246 shift
= type
.width
/2;
248 shift
= type
.sign
? type
.width
- 1 : type
.width
;
252 llscale
= (long long)1 << shift
;
253 dscale
= (double)llscale
;
254 assert((long long)dscale
== llscale
);
256 elems
[swizzle
[0]] = LLVMConstInt(elem_type
, r
*dscale
+ 0.5, 0);
257 elems
[swizzle
[1]] = LLVMConstInt(elem_type
, g
*dscale
+ 0.5, 0);
258 elems
[swizzle
[2]] = LLVMConstInt(elem_type
, b
*dscale
+ 0.5, 0);
259 elems
[swizzle
[3]] = LLVMConstInt(elem_type
, a
*dscale
+ 0.5, 0);
262 for(i
= 4; i
< type
.length
; ++i
)
263 elems
[i
] = elems
[i
% 4];
265 return LLVMConstVector(elems
, type
.length
);
270 lp_build_intrinsic_binary(LLVMBuilderRef builder
,
275 LLVMModuleRef module
= LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder
)));
276 LLVMValueRef function
;
277 LLVMValueRef args
[2];
279 function
= LLVMGetNamedFunction(module
, name
);
281 LLVMTypeRef type
= LLVMTypeOf(a
);
282 LLVMTypeRef arg_types
[2];
285 function
= LLVMAddFunction(module
, name
, LLVMFunctionType(type
, arg_types
, 2, 0));
286 LLVMSetFunctionCallConv(function
, LLVMCCallConv
);
287 LLVMSetLinkage(function
, LLVMExternalLinkage
);
289 assert(LLVMIsDeclaration(function
));
294 return LLVMBuildCall(builder
, function
, args
, 2, "");
299 lp_build_min_simple(struct lp_build_context
*bld
,
303 const union lp_type type
= bld
->type
;
304 const char *intrinsic
= NULL
;
307 /* TODO: optimize the constant case */
309 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
310 if(type
.width
* type
.length
== 128) {
313 intrinsic
= "llvm.x86.sse.min.ps";
315 intrinsic
= "llvm.x86.sse2.min.pd";
317 if(type
.width
== 8 && !type
.sign
)
318 intrinsic
= "llvm.x86.sse2.pminu.b";
319 if(type
.width
== 16 && type
.sign
)
320 intrinsic
= "llvm.x86.sse2.pmins.w";
326 return lp_build_intrinsic_binary(bld
->builder
, intrinsic
, a
, b
);
329 cond
= LLVMBuildFCmp(bld
->builder
, LLVMRealULT
, a
, b
, "");
331 cond
= LLVMBuildICmp(bld
->builder
, type
.sign
? LLVMIntSLT
: LLVMIntULT
, a
, b
, "");
332 return LLVMBuildSelect(bld
->builder
, cond
, a
, b
, "");
337 lp_build_max_simple(struct lp_build_context
*bld
,
341 const union lp_type type
= bld
->type
;
342 const char *intrinsic
= NULL
;
345 /* TODO: optimize the constant case */
347 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
348 if(type
.width
* type
.length
== 128) {
351 intrinsic
= "llvm.x86.sse.max.ps";
353 intrinsic
= "llvm.x86.sse2.max.pd";
355 if(type
.width
== 8 && !type
.sign
)
356 intrinsic
= "llvm.x86.sse2.pmaxu.b";
357 if(type
.width
== 16 && type
.sign
)
358 intrinsic
= "llvm.x86.sse2.pmaxs.w";
364 return lp_build_intrinsic_binary(bld
->builder
, intrinsic
, a
, b
);
367 cond
= LLVMBuildFCmp(bld
->builder
, LLVMRealULT
, a
, b
, "");
369 cond
= LLVMBuildICmp(bld
->builder
, type
.sign
? LLVMIntSLT
: LLVMIntULT
, a
, b
, "");
370 return LLVMBuildSelect(bld
->builder
, cond
, b
, a
, "");
375 lp_build_comp(struct lp_build_context
*bld
,
378 const union lp_type type
= bld
->type
;
385 if(type
.norm
&& !type
.floating
&& !type
.fixed
&& !type
.sign
) {
386 if(LLVMIsConstant(a
))
387 return LLVMConstNot(a
);
389 return LLVMBuildNot(bld
->builder
, a
, "");
392 if(LLVMIsConstant(a
))
393 return LLVMConstSub(bld
->one
, a
);
395 return LLVMBuildSub(bld
->builder
, bld
->one
, a
, "");
400 lp_build_add(struct lp_build_context
*bld
,
404 const union lp_type type
= bld
->type
;
411 if(a
== bld
->undef
|| b
== bld
->undef
)
415 const char *intrinsic
= NULL
;
417 if(a
== bld
->one
|| b
== bld
->one
)
420 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
421 if(type
.width
* type
.length
== 128 &&
422 !type
.floating
&& !type
.fixed
) {
424 intrinsic
= type
.sign
? "llvm.x86.sse2.adds.b" : "llvm.x86.sse2.addus.b";
426 intrinsic
= type
.sign
? "llvm.x86.sse2.adds.w" : "llvm.x86.sse2.addus.w";
431 return lp_build_intrinsic_binary(bld
->builder
, intrinsic
, a
, b
);
434 if(LLVMIsConstant(a
) && LLVMIsConstant(b
))
435 res
= LLVMConstAdd(a
, b
);
437 res
= LLVMBuildAdd(bld
->builder
, a
, b
, "");
439 if(bld
->type
.norm
&& (bld
->type
.floating
|| bld
->type
.fixed
))
440 res
= lp_build_min_simple(bld
, res
, bld
->one
);
447 lp_build_sub(struct lp_build_context
*bld
,
451 const union lp_type type
= bld
->type
;
456 if(a
== bld
->undef
|| b
== bld
->undef
)
462 const char *intrinsic
= NULL
;
467 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
468 if(type
.width
* type
.length
== 128 &&
469 !type
.floating
&& !type
.fixed
) {
471 intrinsic
= type
.sign
? "llvm.x86.sse2.subs.b" : "llvm.x86.sse2.subus.b";
473 intrinsic
= type
.sign
? "llvm.x86.sse2.subs.w" : "llvm.x86.sse2.subus.w";
478 return lp_build_intrinsic_binary(bld
->builder
, intrinsic
, a
, b
);
481 if(LLVMIsConstant(a
) && LLVMIsConstant(b
))
482 res
= LLVMConstSub(a
, b
);
484 res
= LLVMBuildSub(bld
->builder
, a
, b
, "");
486 if(bld
->type
.norm
&& (bld
->type
.floating
|| bld
->type
.fixed
))
487 res
= lp_build_max_simple(bld
, res
, bld
->zero
);
494 lp_build_mul(struct lp_build_context
*bld
,
506 if(a
== bld
->undef
|| b
== bld
->undef
)
509 if(LLVMIsConstant(a
) && LLVMIsConstant(b
))
510 return LLVMConstMul(a
, b
);
512 return LLVMBuildMul(bld
->builder
, a
, b
, "");
517 lp_build_min(struct lp_build_context
*bld
,
521 if(a
== bld
->undef
|| b
== bld
->undef
)
525 if(a
== bld
->zero
|| b
== bld
->zero
)
533 return lp_build_min_simple(bld
, a
, b
);
538 lp_build_max(struct lp_build_context
*bld
,
542 if(a
== bld
->undef
|| b
== bld
->undef
)
546 if(a
== bld
->one
|| b
== bld
->one
)
554 return lp_build_max_simple(bld
, a
, b
);