#include "lp_bld_intr.h"
#include "lp_bld_logic.h"
#include "lp_bld_pack.h"
+#include "lp_bld_debug.h"
#include "lp_bld_arit.h"
/**
- * Linear interpolation.
- *
- * This also works for integer values with a few caveats.
+ * Linear interpolation -- without any checks.
*
* @sa http://www.stereopsis.com/doubleblend.html
*/
-LLVMValueRef
-lp_build_lerp(struct lp_build_context *bld,
- LLVMValueRef x,
- LLVMValueRef v0,
- LLVMValueRef v1)
+static INLINE LLVMValueRef
+lp_build_lerp_simple(struct lp_build_context *bld,
+ LLVMValueRef x,
+ LLVMValueRef v0,
+ LLVMValueRef v1)
{
LLVMValueRef delta;
LLVMValueRef res;
res = lp_build_add(bld, v0, res);
- if(bld->type.fixed)
+ if (bld->type.fixed) {
/* XXX: This step is necessary for lerping 8bit colors stored on 16bits,
* but it will be wrong for other uses. Basically we need a more
* powerful lp_type, capable of further distinguishing the values
* interpretation from the value storage. */
res = LLVMBuildAnd(bld->builder, res, lp_build_const_int_vec(bld->type, (1 << bld->type.width/2) - 1), "");
+ }
+
+ return res;
+}
+
+
+/**
+ * Linear interpolation.
+ */
+LLVMValueRef
+lp_build_lerp(struct lp_build_context *bld,
+ LLVMValueRef x,
+ LLVMValueRef v0,
+ LLVMValueRef v1)
+{
+ const struct lp_type type = bld->type;
+ LLVMValueRef res;
+
+ assert(lp_check_value(type, x));
+ assert(lp_check_value(type, v0));
+ assert(lp_check_value(type, v1));
+
+ if (type.norm) {
+ struct lp_type wide_type;
+ struct lp_build_context wide_bld;
+ LLVMValueRef xl, xh, v0l, v0h, v1l, v1h, resl, resh;
+ LLVMValueRef shift;
+
+ assert(type.length >= 2);
+ assert(!type.sign);
+
+ /*
+ * Create a wider type, enough to hold the intermediate result of the
+ * multiplication.
+ */
+ memset(&wide_type, 0, sizeof wide_type);
+ wide_type.fixed = TRUE;
+ wide_type.width = type.width*2;
+ wide_type.length = type.length/2;
+
+ lp_build_context_init(&wide_bld, bld->builder, wide_type);
+
+ lp_build_unpack2(bld->builder, type, wide_type, x, &xl, &xh);
+ lp_build_unpack2(bld->builder, type, wide_type, v0, &v0l, &v0h);
+ lp_build_unpack2(bld->builder, type, wide_type, v1, &v1l, &v1h);
+
+ /*
+ * Scale x from [0, 255] to [0, 256]
+ */
+
+ shift = lp_build_const_int_vec(wide_type, type.width - 1);
+
+ xl = lp_build_add(&wide_bld, xl,
+ LLVMBuildAShr(bld->builder, xl, shift, ""));
+ xh = lp_build_add(&wide_bld, xh,
+ LLVMBuildAShr(bld->builder, xh, shift, ""));
+
+ /*
+ * Lerp both halves.
+ */
+
+ resl = lp_build_lerp_simple(&wide_bld, xl, v0l, v1l);
+ resh = lp_build_lerp_simple(&wide_bld, xh, v0h, v1h);
+
+ res = lp_build_pack2(bld->builder, wide_type, type, resl, resh);
+ } else {
+ res = lp_build_lerp_simple(bld, x, v0, v1);
+ }
return res;
}
enum lp_build_round_sse41_mode mode)
{
const struct lp_type type = bld->type;
- LLVMTypeRef vec_type = lp_build_vec_type(type);
+ LLVMTypeRef i32t = LLVMInt32Type();
const char *intrinsic;
+ LLVMValueRef res;
assert(type.floating);
- assert(type.width*type.length == 128);
+
assert(lp_check_value(type, a));
assert(util_cpu_caps.has_sse4_1);
- switch(type.width) {
- case 32:
- intrinsic = "llvm.x86.sse41.round.ps";
- break;
- case 64:
- intrinsic = "llvm.x86.sse41.round.pd";
- break;
- default:
- assert(0);
- return bld->undef;
+ if (type.length == 1) {
+ LLVMTypeRef vec_type;
+ LLVMValueRef undef;
+ LLVMValueRef args[3];
+ LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
+
+ switch(type.width) {
+ case 32:
+ intrinsic = "llvm.x86.sse41.round.ss";
+ break;
+ case 64:
+ intrinsic = "llvm.x86.sse41.round.sd";
+ break;
+ default:
+ assert(0);
+ return bld->undef;
+ }
+
+ vec_type = LLVMVectorType(bld->elem_type, 4);
+
+ undef = LLVMGetUndef(vec_type);
+
+ args[0] = undef;
+ args[1] = LLVMBuildInsertElement(bld->builder, undef, a, index0, "");
+ args[2] = LLVMConstInt(i32t, mode, 0);
+
+ res = lp_build_intrinsic(bld->builder, intrinsic,
+ vec_type, args, Elements(args));
+
+ res = LLVMBuildExtractElement(bld->builder, res, index0, "");
}
+ else {
+ assert(type.width*type.length == 128);
- return lp_build_intrinsic_binary(bld->builder, intrinsic, vec_type, a,
- LLVMConstInt(LLVMInt32Type(), mode, 0));
+ switch(type.width) {
+ case 32:
+ intrinsic = "llvm.x86.sse41.round.ps";
+ break;
+ case 64:
+ intrinsic = "llvm.x86.sse41.round.pd";
+ break;
+ default:
+ assert(0);
+ return bld->undef;
+ }
+
+ res = lp_build_intrinsic_binary(bld->builder, intrinsic,
+ bld->vec_type, a,
+ LLVMConstInt(i32t, mode, 0));
+ }
+
+ return res;
}
assert(type.floating);
assert(lp_check_value(type, a));
- if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128)
+ if (util_cpu_caps.has_sse4_1 &&
+ (type.length == 1 || type.width*type.length == 128)) {
return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE);
+ }
else {
LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
assert(type.floating);
assert(lp_check_value(type, a));
- if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128)
+ if (util_cpu_caps.has_sse4_1 &&
+ (type.length == 1 || type.width*type.length == 128)) {
return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST);
+ }
else {
LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMValueRef res;
assert(type.floating);
assert(lp_check_value(type, a));
- if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128)
+ if (util_cpu_caps.has_sse4_1 &&
+ (type.length == 1 || type.width*type.length == 128)) {
return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
+ }
else {
LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMValueRef res;
assert(type.floating);
assert(lp_check_value(type, a));
- if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128)
+ if (util_cpu_caps.has_sse4_1 &&
+ (type.length == 1 || type.width*type.length == 128)) {
return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL);
+ }
else {
LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMValueRef res;
LLVMValueRef a)
{
const struct lp_type type = bld->type;
- LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ LLVMTypeRef int_vec_type = bld->int_vec_type;
LLVMValueRef res;
assert(type.floating);
assert(lp_check_value(type, a));
- if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) {
+ if (util_cpu_caps.has_sse4_1 &&
+ (type.length == 1 || type.width*type.length == 128)) {
res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST);
}
else {
- LLVMTypeRef vec_type = lp_build_vec_type(type);
- LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
- LLVMValueRef sign;
LLVMValueRef half;
- /* get sign bit */
- sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
- sign = LLVMBuildAnd(bld->builder, sign, mask, "");
-
- /* sign * 0.5 */
half = lp_build_const_vec(type, 0.5);
- half = LLVMBuildBitCast(bld->builder, half, int_vec_type, "");
- half = LLVMBuildOr(bld->builder, sign, half, "");
- half = LLVMBuildBitCast(bld->builder, half, vec_type, "");
+
+ if (type.sign) {
+ LLVMTypeRef vec_type = bld->vec_type;
+ LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
+ LLVMValueRef sign;
+
+ /* get sign bit */
+ sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ sign = LLVMBuildAnd(bld->builder, sign, mask, "");
+
+ /* sign * 0.5 */
+ half = LLVMBuildBitCast(bld->builder, half, int_vec_type, "");
+ half = LLVMBuildOr(bld->builder, sign, half, "");
+ half = LLVMBuildBitCast(bld->builder, half, vec_type, "");
+ }
res = LLVMBuildFAdd(bld->builder, a, half, "");
}
LLVMValueRef a)
{
const struct lp_type type = bld->type;
- LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ LLVMTypeRef int_vec_type = bld->int_vec_type;
LLVMValueRef res;
assert(type.floating);
assert(lp_check_value(type, a));
- if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) {
+ if (util_cpu_caps.has_sse4_1 &&
+ (type.length == 1 || type.width*type.length == 128)) {
res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
}
else {
- /* Take the sign bit and add it to 1 constant */
- LLVMTypeRef vec_type = lp_build_vec_type(type);
- unsigned mantissa = lp_mantissa(type);
- LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
- LLVMValueRef sign;
- LLVMValueRef offset;
-
- /* sign = a < 0 ? ~0 : 0 */
- sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
- sign = LLVMBuildAnd(bld->builder, sign, mask, "");
- sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "ifloor.sign");
-
- /* offset = -0.99999(9)f */
- offset = lp_build_const_vec(type, -(double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa));
- offset = LLVMConstBitCast(offset, int_vec_type);
-
- /* offset = a < 0 ? offset : 0.0f */
- offset = LLVMBuildAnd(bld->builder, offset, sign, "");
- offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "ifloor.offset");
-
- res = LLVMBuildFAdd(bld->builder, a, offset, "ifloor.res");
+ res = a;
+
+ if (type.sign) {
+ /* Take the sign bit and add it to 1 constant */
+ LLVMTypeRef vec_type = bld->vec_type;
+ unsigned mantissa = lp_mantissa(type);
+ LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
+ LLVMValueRef sign;
+ LLVMValueRef offset;
+
+ /* sign = a < 0 ? ~0 : 0 */
+ sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ sign = LLVMBuildAnd(bld->builder, sign, mask, "");
+ sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "ifloor.sign");
+
+ /* offset = -0.99999(9)f */
+ offset = lp_build_const_vec(type, -(double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa));
+ offset = LLVMConstBitCast(offset, int_vec_type);
+
+ /* offset = a < 0 ? offset : 0.0f */
+ offset = LLVMBuildAnd(bld->builder, offset, sign, "");
+ offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "ifloor.offset");
+
+ res = LLVMBuildFAdd(bld->builder, res, offset, "ifloor.res");
+ }
}
/* round to nearest (toward zero) */
LLVMValueRef a)
{
const struct lp_type type = bld->type;
- LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ LLVMTypeRef int_vec_type = bld->int_vec_type;
LLVMValueRef res;
assert(type.floating);
assert(lp_check_value(type, a));
- if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) {
+ if (util_cpu_caps.has_sse4_1 &&
+ (type.length == 1 || type.width*type.length == 128)) {
res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL);
}
else {
- LLVMTypeRef vec_type = lp_build_vec_type(type);
+ LLVMTypeRef vec_type = bld->vec_type;
unsigned mantissa = lp_mantissa(type);
- LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
- LLVMValueRef sign;
LLVMValueRef offset;
- /* sign = a < 0 ? 0 : ~0 */
- sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
- sign = LLVMBuildAnd(bld->builder, sign, mask, "");
- sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "iceil.sign");
- sign = LLVMBuildNot(bld->builder, sign, "iceil.not");
-
/* offset = 0.99999(9)f */
offset = lp_build_const_vec(type, (double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa));
- offset = LLVMConstBitCast(offset, int_vec_type);
- /* offset = a < 0 ? 0.0 : offset */
- offset = LLVMBuildAnd(bld->builder, offset, sign, "");
- offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "iceil.offset");
+ if (type.sign) {
+ LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
+ LLVMValueRef sign;
+
+ /* sign = a < 0 ? 0 : ~0 */
+ sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ sign = LLVMBuildAnd(bld->builder, sign, mask, "");
+ sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "iceil.sign");
+ sign = LLVMBuildNot(bld->builder, sign, "iceil.not");
+
+ /* offset = a < 0 ? 0.0 : offset */
+ offset = LLVMConstBitCast(offset, int_vec_type);
+ offset = LLVMBuildAnd(bld->builder, offset, sign, "");
+ offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "iceil.offset");
+ }
res = LLVMBuildFAdd(bld->builder, a, offset, "iceil.res");
}
LLVMValueRef y)
{
/* TODO: optimize the constant case */
- if(LLVMIsConstant(x) && LLVMIsConstant(y))
+ if (gallivm_debug & GALLIVM_DEBUG_PERF &&
+ LLVMIsConstant(x) && LLVMIsConstant(y)) {
debug_printf("%s: inefficient/imprecise constant arithmetic\n",
__FUNCTION__);
+ }
return lp_build_exp2(bld, lp_build_mul(bld, lp_build_log2(bld, x), y));
}
assert(lp_check_value(bld->type, x));
/* TODO: optimize the constant case */
- if(LLVMIsConstant(x))
+ if (gallivm_debug & GALLIVM_DEBUG_PERF &&
+ LLVMIsConstant(x)) {
debug_printf("%s: inefficient/imprecise constant arithmetic\n",
__FUNCTION__);
+ }
for (i = num_coeffs; i--; ) {
LLVMValueRef coeff;
if(p_exp2_int_part || p_frac_part || p_exp2) {
/* TODO: optimize the constant case */
- if(LLVMIsConstant(x))
+ if (gallivm_debug & GALLIVM_DEBUG_PERF &&
+ LLVMIsConstant(x)) {
debug_printf("%s: inefficient/imprecise constant arithmetic\n",
__FUNCTION__);
+ }
assert(type.floating && type.width == 32);
if(p_exp || p_floor_log2 || p_log2) {
/* TODO: optimize the constant case */
- if(LLVMIsConstant(x))
+ if (gallivm_debug & GALLIVM_DEBUG_PERF &&
+ LLVMIsConstant(x)) {
debug_printf("%s: inefficient/imprecise constant arithmetic\n",
__FUNCTION__);
+ }
assert(type.floating && type.width == 32);
lp_build_log2_approx(bld, x, NULL, NULL, &res);
return res;
}
+
+
+/**
+ * Faster (and less accurate) log2.
+ *
+ * log2(x) = floor(log2(x)) + frac(x)
+ *
+ * See http://www.flipcode.com/archives/Fast_log_Function.shtml
+ */
+LLVMValueRef
+lp_build_fast_log2(struct lp_build_context *bld,
+ LLVMValueRef x)
+{
+ const struct lp_type type = bld->type;
+ LLVMTypeRef vec_type = bld->vec_type;
+ LLVMTypeRef int_vec_type = bld->int_vec_type;
+
+ unsigned mantissa = lp_mantissa(type);
+ LLVMValueRef mantmask = lp_build_const_int_vec(type, (1ULL << mantissa) - 1);
+ LLVMValueRef one = LLVMConstBitCast(bld->one, int_vec_type);
+
+ LLVMValueRef ipart;
+ LLVMValueRef fpart;
+
+ assert(lp_check_value(bld->type, x));
+
+ assert(type.floating);
+
+ x = LLVMBuildBitCast(bld->builder, x, int_vec_type, "");
+
+ /* ipart = floor(log2(x)) - 1 */
+ ipart = LLVMBuildLShr(bld->builder, x, lp_build_const_int_vec(type, mantissa), "");
+ ipart = LLVMBuildAnd(bld->builder, ipart, lp_build_const_int_vec(type, 255), "");
+ ipart = LLVMBuildSub(bld->builder, ipart, lp_build_const_int_vec(type, 128), "");
+ ipart = LLVMBuildSIToFP(bld->builder, ipart, vec_type, "");
+
+ /* fpart = 1.0 + frac(x) */
+ fpart = LLVMBuildAnd(bld->builder, x, mantmask, "");
+ fpart = LLVMBuildOr(bld->builder, fpart, one, "");
+ fpart = LLVMBuildBitCast(bld->builder, fpart, vec_type, "");
+
+ /* floor(log2(x)) + frac(x) */
+ return LLVMBuildFAdd(bld->builder, ipart, fpart, "");
+}