From ff7542ab90154769930c80d58597cfec40844bd9 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jos=C3=A9=20Fonseca?= Date: Sat, 8 May 2010 23:07:09 +0100 Subject: [PATCH] gallivm: Actually do floor/ceil/trunc for scalars. Also start axing the code duplication for scalar case. The olution is to treat the scalar case specially in a few innermost functions, and leave outer functions untouched. --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 192 +++--------------- src/gallium/auxiliary/gallivm/lp_bld_const.c | 6 + .../auxiliary/gallivm/lp_bld_swizzle.c | 2 + 3 files changed, 34 insertions(+), 166 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 2405be4a2fc..9aea04e05be 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -676,26 +676,13 @@ lp_build_abs(struct lp_build_context *bld, if(type.floating) { /* Mask out the sign bit */ - if (type.length == 1) { - LLVMTypeRef int_type = LLVMIntType(type.width); - LLVMTypeRef float_type = LLVMFloatType(); - unsigned long long absMask = ~(1ULL << (type.width - 1)); - LLVMValueRef mask = LLVMConstInt(int_type, absMask, 0); - a = LLVMBuildBitCast(bld->builder, a, int_type, ""); - a = LLVMBuildAnd(bld->builder, a, mask, ""); - a = LLVMBuildBitCast(bld->builder, a, float_type, ""); - return a; - } - else { - /* vector of floats */ - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - unsigned long long absMask = ~(1ULL << (type.width - 1)); - LLVMValueRef mask = lp_build_const_int_vec(type, ((unsigned long long) absMask)); - a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); - a = LLVMBuildAnd(bld->builder, a, mask, ""); - a = LLVMBuildBitCast(bld->builder, a, vec_type, ""); - return a; - } + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + unsigned long long absMask = ~(1ULL << (type.width - 1)); + LLVMValueRef mask = lp_build_const_int_vec(type, ((unsigned long long) absMask)); + a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + a = LLVMBuildAnd(bld->builder, a, mask, ""); + a = LLVMBuildBitCast(bld->builder, a, vec_type, ""); + return a; } if(type.width*type.length == 128 && util_cpu_caps.has_ssse3) { @@ -743,17 +730,9 @@ lp_build_sgn(struct lp_build_context *bld, LLVMValueRef one; unsigned long long maskBit = (unsigned long long)1 << (type.width - 1); - if (type.length == 1) { - int_type = lp_build_int_elem_type(type); - vec_type = lp_build_elem_type(type); - mask = LLVMConstInt(int_type, maskBit, 0); - } - else { - /* vector */ - int_type = lp_build_int_vec_type(type); - vec_type = lp_build_vec_type(type); - mask = lp_build_const_int_vec(type, maskBit); - } + int_type = lp_build_int_vec_type(type); + vec_type = lp_build_vec_type(type); + mask = lp_build_const_int_vec(type, maskBit); /* Take the sign bit and add it to 1 constant */ sign = LLVMBuildBitCast(bld->builder, a, int_type, ""); @@ -820,21 +799,11 @@ lp_build_int_to_float(struct lp_build_context *bld, LLVMValueRef a) { const struct lp_type type = bld->type; + LLVMTypeRef vec_type = lp_build_vec_type(type); assert(type.floating); - /*assert(lp_check_value(type, a));*/ - if (type.length == 1) { - LLVMTypeRef float_type = LLVMFloatType(); - return LLVMBuildSIToFP(bld->builder, a, float_type, ""); - } - else { - LLVMTypeRef vec_type = lp_build_vec_type(type); - /*LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);*/ - LLVMValueRef res; - res = LLVMBuildSIToFP(bld->builder, a, vec_type, ""); - return res; - } + return LLVMBuildSIToFP(bld->builder, a, vec_type, ""); } @@ -888,7 +857,7 @@ lp_build_trunc(struct lp_build_context *bld, assert(type.floating); assert(lp_check_value(type, a)); - if(util_cpu_caps.has_sse4_1) + if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE); else { LLVMTypeRef vec_type = lp_build_vec_type(type); @@ -910,7 +879,7 @@ lp_build_round(struct lp_build_context *bld, assert(type.floating); assert(lp_check_value(type, a)); - if(util_cpu_caps.has_sse4_1) + if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST); else { LLVMTypeRef vec_type = lp_build_vec_type(type); @@ -929,15 +898,9 @@ lp_build_floor(struct lp_build_context *bld, const struct lp_type type = bld->type; assert(type.floating); + assert(lp_check_value(type, a)); - if (type.length == 1) { - LLVMValueRef res; - res = lp_build_ifloor(bld, a); - res = LLVMBuildSIToFP(bld->builder, res, LLVMFloatType(), ""); - return res; - } - - if(util_cpu_caps.has_sse4_1) + if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); else { LLVMTypeRef vec_type = lp_build_vec_type(type); @@ -958,7 +921,7 @@ lp_build_ceil(struct lp_build_context *bld, assert(type.floating); assert(lp_check_value(type, a)); - if(util_cpu_caps.has_sse4_1) + if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL); else { LLVMTypeRef vec_type = lp_build_vec_type(type); @@ -992,18 +955,12 @@ lp_build_itrunc(struct lp_build_context *bld, LLVMValueRef a) { const struct lp_type type = bld->type; + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); assert(type.floating); + assert(lp_check_value(type, a)); - if (type.length == 1) { - LLVMTypeRef int_type = LLVMIntType(type.width); - return LLVMBuildFPToSI(bld->builder, a, int_type, ""); - } - else { - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - assert(lp_check_value(type, a)); - return LLVMBuildFPToSI(bld->builder, a, int_vec_type, ""); - } + return LLVMBuildFPToSI(bld->builder, a, int_vec_type, ""); } @@ -1020,17 +977,9 @@ lp_build_iround(struct lp_build_context *bld, assert(type.floating); - if (type.length == 1) { - /* scalar float to int */ - LLVMTypeRef int_type = LLVMIntType(type.width); - /* XXX we want rounding here! */ - res = LLVMBuildFPToSI(bld->builder, a, int_type, ""); - return res; - } - assert(lp_check_value(type, a)); - if(util_cpu_caps.has_sse4_1) { + if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) { res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST); } else { @@ -1070,17 +1019,9 @@ lp_build_ifloor(struct lp_build_context *bld, LLVMValueRef res; assert(type.floating); - - if (type.length == 1) { - /* scalar float to int */ - LLVMTypeRef int_type = LLVMIntType(type.width); - res = LLVMBuildFPToSI(bld->builder, a, int_type, ""); - return res; - } - assert(lp_check_value(type, a)); - if(util_cpu_caps.has_sse4_1) { + if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) { res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); } else { @@ -1128,10 +1069,11 @@ lp_build_iceil(struct lp_build_context *bld, assert(type.floating); assert(lp_check_value(type, a)); - if(util_cpu_caps.has_sse4_1) { + if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) { res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL); } else { + /* TODO: mimic lp_build_ifloor() here */ assert(0); res = bld->undef; } @@ -1407,7 +1349,6 @@ lp_build_polynomial(struct lp_build_context *bld, unsigned num_coeffs) { const struct lp_type type = bld->type; - LLVMTypeRef float_type = LLVMFloatType(); LLVMValueRef res = NULL; unsigned i; @@ -1419,10 +1360,7 @@ lp_build_polynomial(struct lp_build_context *bld, for (i = num_coeffs; i--; ) { LLVMValueRef coeff; - if (type.length == 1) - coeff = LLVMConstReal(float_type, coeffs[i]); - else - coeff = lp_build_const_vec(type, coeffs[i]); + coeff = lp_build_const_vec(type, coeffs[i]); if(res) res = lp_build_add(bld, coeff, lp_build_mul(bld, x, res)); @@ -1646,89 +1584,11 @@ lp_build_log2_approx(struct lp_build_context *bld, } -/** scalar version of above function */ -static void -lp_build_float_log2_approx(struct lp_build_context *bld, - LLVMValueRef x, - LLVMValueRef *p_exp, - LLVMValueRef *p_floor_log2, - LLVMValueRef *p_log2) -{ - const struct lp_type type = bld->type; - LLVMTypeRef float_type = LLVMFloatType(); - LLVMTypeRef int_type = LLVMIntType(type.width); - - LLVMValueRef expmask = LLVMConstInt(int_type, 0x7f800000, 0); - LLVMValueRef mantmask = LLVMConstInt(int_type, 0x007fffff, 0); - LLVMValueRef one = LLVMConstBitCast(bld->one, int_type); - - LLVMValueRef i = NULL; - LLVMValueRef exp = NULL; - LLVMValueRef mant = NULL; - LLVMValueRef logexp = NULL; - LLVMValueRef logmant = NULL; - LLVMValueRef res = NULL; - - if(p_exp || p_floor_log2 || p_log2) { - /* TODO: optimize the constant case */ - if(LLVMIsConstant(x)) - debug_printf("%s: inefficient/imprecise constant arithmetic\n", - __FUNCTION__); - - assert(type.floating && type.width == 32); - - i = LLVMBuildBitCast(bld->builder, x, int_type, ""); - - /* exp = (float) exponent(x) */ - exp = LLVMBuildAnd(bld->builder, i, expmask, ""); - } - - if(p_floor_log2 || p_log2) { - LLVMValueRef c23 = LLVMConstInt(int_type, 23, 0); - LLVMValueRef c127 = LLVMConstInt(int_type, 127, 0); - logexp = LLVMBuildLShr(bld->builder, exp, c23, ""); - logexp = LLVMBuildSub(bld->builder, logexp, c127, ""); - logexp = LLVMBuildSIToFP(bld->builder, logexp, float_type, ""); - } - - if(p_log2) { - /* mant = (float) mantissa(x) */ - mant = LLVMBuildAnd(bld->builder, i, mantmask, ""); - mant = LLVMBuildOr(bld->builder, mant, one, ""); - mant = LLVMBuildBitCast(bld->builder, mant, float_type, ""); - - logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial, - Elements(lp_build_log2_polynomial)); - - /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/ - logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), ""); - - res = LLVMBuildAdd(bld->builder, logmant, logexp, ""); - } - - if(p_exp) { - exp = LLVMBuildBitCast(bld->builder, exp, float_type, ""); - *p_exp = exp; - } - - if(p_floor_log2) - *p_floor_log2 = logexp; - - if(p_log2) - *p_log2 = res; -} - - LLVMValueRef lp_build_log2(struct lp_build_context *bld, LLVMValueRef x) { LLVMValueRef res; - if (bld->type.length == 1) { - lp_build_float_log2_approx(bld, x, NULL, NULL, &res); - } - else { - lp_build_log2_approx(bld, x, NULL, NULL, &res); - } + lp_build_log2_approx(bld, x, NULL, NULL, &res); return res; } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.c b/src/gallium/auxiliary/gallivm/lp_bld_const.c index 57843e9a60c..031ce9d1a37 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_const.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_const.c @@ -301,6 +301,9 @@ lp_build_const_vec(struct lp_type type, elems[0] = LLVMConstInt(elem_type, val*dscale + 0.5, 0); } + if (type.length == 1) + return elems[0]; + for(i = 1; i < type.length; ++i) elems[i] = elems[0]; @@ -321,6 +324,9 @@ lp_build_const_int_vec(struct lp_type type, for(i = 0; i < type.length; ++i) elems[i] = LLVMConstInt(elem_type, val, type.sign ? 1 : 0); + if (type.length == 1) + return elems[0]; + return LLVMConstVector(elems, type.length); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c index e101d2866b4..6a3c8f3f3a4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c @@ -71,6 +71,8 @@ lp_build_broadcast_scalar(struct lp_build_context *bld, LLVMValueRef res; unsigned i; + assert(lp_check_elem_type(type, LLVMTypeOf(scalar))); + res = bld->undef; for(i = 0; i < type.length; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); -- 2.30.2