From: Jose Fonseca Date: Sun, 3 Apr 2016 23:05:33 +0000 (+0100) Subject: gallivm: Use llvm.fmuladd.*. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=320d1191c61a0a82444605c12e5c4b2ee0b241eb;p=mesa.git gallivm: Use llvm.fmuladd.*. Reviewed-by: Roland Scheidegger --- diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index b098132033a..87951fa165d 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -1123,10 +1123,8 @@ generate_viewport(struct draw_llvm_variant *variant, /* divide by w */ out = LLVMBuildFMul(builder, out, out3, ""); - /* mult by scale */ - out = LLVMBuildFMul(builder, out, scale, ""); - /* add translation */ - out = LLVMBuildFAdd(builder, out, trans, ""); + /* mult by scale, add translation */ + out = lp_build_fmuladd(builder, out, scale, trans); /* store transformed outputs */ LLVMBuildStore(builder, out, outputs[pos][i]); @@ -1303,22 +1301,19 @@ generate_clipmask(struct draw_llvm *llvm, plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_y"); planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1); - test = LLVMBuildFMul(builder, planes, cv_y, ""); - sum = LLVMBuildFAdd(builder, sum, test, ""); + sum = lp_build_fmuladd(builder, planes, cv_y, sum); indices[2] = lp_build_const_int32(gallivm, 2); plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_z"); planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1); - test = LLVMBuildFMul(builder, planes, cv_z, ""); - sum = LLVMBuildFAdd(builder, sum, test, ""); + sum = lp_build_fmuladd(builder, planes, cv_z, sum); indices[2] = lp_build_const_int32(gallivm, 3); plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_w"); planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1); - test = LLVMBuildFMul(builder, planes, cv_w, ""); - sum = LLVMBuildFAdd(builder, sum, test, ""); + sum = lp_build_fmuladd(builder, planes, cv_w, sum); test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, sum); temp = lp_build_const_int_vec(gallivm, i32_type, 1LL << plane_idx); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 11a1e7d002a..5d6a0335654 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -50,7 +50,6 @@ #include "util/u_memory.h" #include "util/u_debug.h" #include "util/u_math.h" -#include "util/u_string.h" #include "util/u_cpu_detect.h" #include "lp_bld_type.h" @@ -262,6 +261,22 @@ lp_build_min_simple(struct lp_build_context *bld, } +LLVMValueRef +lp_build_fmuladd(LLVMBuilderRef builder, + LLVMValueRef a, + LLVMValueRef b, + LLVMValueRef c) +{ + LLVMTypeRef type = LLVMTypeOf(a); + assert(type == LLVMTypeOf(b)); + assert(type == LLVMTypeOf(c)); + char intrinsic[32]; + lp_format_intrinsic(intrinsic, sizeof intrinsic, "llvm.fmuladd", type); + LLVMValueRef args[] = { a, b, c }; + return lp_build_intrinsic(builder, intrinsic, type, args, 3, 0); +} + + /** * Generate max(a, b) * No checks for special case values of a or b = 1 or 0 are done. @@ -1023,6 +1038,22 @@ lp_build_mul(struct lp_build_context *bld, } +/* a * b + c */ +LLVMValueRef +lp_build_mad(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef b, + LLVMValueRef c) +{ + const struct lp_type type = bld->type; + if (type.floating) { + return lp_build_fmuladd(bld->gallivm->builder, a, b, c); + } else { + return lp_build_add(bld, lp_build_mul(bld, a, b), c); + } +} + + /** * Small vector x scale multiplication optimization. */ @@ -1153,6 +1184,11 @@ lp_build_lerp_simple(struct lp_build_context *bld, delta = lp_build_sub(bld, v1, v0); + if (bld->type.floating) { + assert(flags == 0); + return lp_build_mad(bld, x, delta, v0); + } + if (flags & LP_BLD_LERP_WIDE_NORMALIZED) { if (!bld->type.sign) { if (!(flags & LP_BLD_LERP_PRESCALED_WEIGHTS)) { @@ -2717,23 +2753,10 @@ lp_build_sin_or_cos(struct lp_build_context *bld, /* * The magic pass: "Extended precision modular arithmetic" * x = ((x - y * DP1) - y * DP2) - y * DP3; - * xmm1 = _mm_mul_ps(y, xmm1); - * xmm2 = _mm_mul_ps(y, xmm2); - * xmm3 = _mm_mul_ps(y, xmm3); - */ - LLVMValueRef xmm1 = LLVMBuildFMul(b, y_2, DP1, "xmm1"); - LLVMValueRef xmm2 = LLVMBuildFMul(b, y_2, DP2, "xmm2"); - LLVMValueRef xmm3 = LLVMBuildFMul(b, y_2, DP3, "xmm3"); - - /* - * x = _mm_add_ps(x, xmm1); - * x = _mm_add_ps(x, xmm2); - * x = _mm_add_ps(x, xmm3); */ - - LLVMValueRef x_1 = LLVMBuildFAdd(b, x_abs, xmm1, "x_1"); - LLVMValueRef x_2 = LLVMBuildFAdd(b, x_1, xmm2, "x_2"); - LLVMValueRef x_3 = LLVMBuildFAdd(b, x_2, xmm3, "x_3"); + LLVMValueRef x_1 = lp_build_fmuladd(b, y_2, DP1, x_abs); + LLVMValueRef x_2 = lp_build_fmuladd(b, y_2, DP2, x_1); + LLVMValueRef x_3 = lp_build_fmuladd(b, y_2, DP3, x_2); /* * Evaluate the first polynom (0 <= x <= Pi/4) @@ -2755,10 +2778,8 @@ lp_build_sin_or_cos(struct lp_build_context *bld, * y = *(v4sf*)_ps_coscof_p0; * y = _mm_mul_ps(y, z); */ - LLVMValueRef y_3 = LLVMBuildFMul(b, z, coscof_p0, "y_3"); - LLVMValueRef y_4 = LLVMBuildFAdd(b, y_3, coscof_p1, "y_4"); - LLVMValueRef y_5 = LLVMBuildFMul(b, y_4, z, "y_5"); - LLVMValueRef y_6 = LLVMBuildFAdd(b, y_5, coscof_p2, "y_6"); + LLVMValueRef y_4 = lp_build_fmuladd(b, z, coscof_p0, coscof_p1); + LLVMValueRef y_6 = lp_build_fmuladd(b, y_4, z, coscof_p2); LLVMValueRef y_7 = LLVMBuildFMul(b, y_6, z, "y_7"); LLVMValueRef y_8 = LLVMBuildFMul(b, y_7, z, "y_8"); @@ -2796,13 +2817,10 @@ lp_build_sin_or_cos(struct lp_build_context *bld, * y2 = _mm_add_ps(y2, x); */ - LLVMValueRef y2_3 = LLVMBuildFMul(b, z, sincof_p0, "y2_3"); - LLVMValueRef y2_4 = LLVMBuildFAdd(b, y2_3, sincof_p1, "y2_4"); - LLVMValueRef y2_5 = LLVMBuildFMul(b, y2_4, z, "y2_5"); - LLVMValueRef y2_6 = LLVMBuildFAdd(b, y2_5, sincof_p2, "y2_6"); + LLVMValueRef y2_4 = lp_build_fmuladd(b, z, sincof_p0, sincof_p1); + LLVMValueRef y2_6 = lp_build_fmuladd(b, y2_4, z, sincof_p2); LLVMValueRef y2_7 = LLVMBuildFMul(b, y2_6, z, "y2_7"); - LLVMValueRef y2_8 = LLVMBuildFMul(b, y2_7, x_3, "y2_8"); - LLVMValueRef y2_9 = LLVMBuildFAdd(b, y2_8, x_3, "y2_9"); + LLVMValueRef y2_9 = lp_build_fmuladd(b, y2_7, x_3, x_3); /* * select the correct result from the two polynoms @@ -2969,19 +2987,19 @@ lp_build_polynomial(struct lp_build_context *bld, if (i % 2 == 0) { if (even) - even = lp_build_add(bld, coeff, lp_build_mul(bld, x2, even)); + even = lp_build_mad(bld, x2, even, coeff); else even = coeff; } else { if (odd) - odd = lp_build_add(bld, coeff, lp_build_mul(bld, x2, odd)); + odd = lp_build_mad(bld, x2, odd, coeff); else odd = coeff; } } if (odd) - return lp_build_add(bld, lp_build_mul(bld, odd, x), even); + return lp_build_mad(bld, odd, x, even); else if (even) return even; else @@ -3212,7 +3230,7 @@ lp_build_log2_approx(struct lp_build_context *bld, LLVMValueRef exp = NULL; LLVMValueRef mant = NULL; LLVMValueRef logexp = NULL; - LLVMValueRef logmant = NULL; + LLVMValueRef p_z = NULL; LLVMValueRef res = NULL; assert(lp_check_value(bld->type, x)); @@ -3261,13 +3279,11 @@ lp_build_log2_approx(struct lp_build_context *bld, z = lp_build_mul(bld, y, y); /* compute P(z) */ - logmant = lp_build_polynomial(bld, z, lp_build_log2_polynomial, - ARRAY_SIZE(lp_build_log2_polynomial)); - - /* logmant = y * P(z) */ - logmant = lp_build_mul(bld, y, logmant); + p_z = lp_build_polynomial(bld, z, lp_build_log2_polynomial, + ARRAY_SIZE(lp_build_log2_polynomial)); - res = lp_build_add(bld, logmant, logexp); + /* y * P(z) + logexp */ + res = lp_build_mad(bld, y, p_z, logexp); if (type.floating && handle_edge_cases) { LLVMValueRef negmask, infmask, zmask; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h index e76977cfb0b..622b930a937 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h @@ -87,6 +87,21 @@ lp_build_div(struct lp_build_context *bld, LLVMValueRef b); +/* llvm.fmuladd.* intrinsic */ +LLVMValueRef +lp_build_fmuladd(LLVMBuilderRef builder, + LLVMValueRef a, + LLVMValueRef b, + LLVMValueRef c); + +/* a * b + c */ +LLVMValueRef +lp_build_mad(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef b, + LLVMValueRef c); + + /** * Set when the weights for normalized are prescaled, that is, in range * 0..2**n, as opposed to range 0..2**(n-1). diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c b/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c index a6311a1ce84..d5cf698b060 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c @@ -289,8 +289,7 @@ lp_build_linear_to_srgb(struct gallivm_state *gallivm, c_const = lp_build_const_vec(gallivm, src_type, -0.0620f * 255.0f); tmp = lp_build_mul(&f32_bld, a_const, x0375); - tmp2 = lp_build_mul(&f32_bld, b_const, x05); - tmp2 = lp_build_add(&f32_bld, tmp2, c_const); + tmp2 = lp_build_mad(&f32_bld, b_const, x05, c_const); pow_final = lp_build_add(&f32_bld, tmp, tmp2); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index 4befb3a1c80..a4b3a7b8348 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -580,10 +580,8 @@ lp_build_brilinear_lod(struct lp_build_context *bld, lp_build_ifloor_fract(bld, lod, out_lod_ipart, &lod_fpart); - lod_fpart = lp_build_mul(bld, lod_fpart, - lp_build_const_vec(bld->gallivm, bld->type, factor)); - - lod_fpart = lp_build_add(bld, lod_fpart, + lod_fpart = lp_build_mad(bld, lod_fpart, + lp_build_const_vec(bld->gallivm, bld->type, factor), lp_build_const_vec(bld->gallivm, bld->type, post_offset)); /* @@ -639,10 +637,8 @@ lp_build_brilinear_rho(struct lp_build_context *bld, /* fpart = rho / 2**ipart */ lod_fpart = lp_build_extract_mantissa(bld, rho); - lod_fpart = lp_build_mul(bld, lod_fpart, - lp_build_const_vec(bld->gallivm, bld->type, factor)); - - lod_fpart = lp_build_add(bld, lod_fpart, + lod_fpart = lp_build_mad(bld, lod_fpart, + lp_build_const_vec(bld->gallivm, bld->type, factor), lp_build_const_vec(bld->gallivm, bld->type, post_offset)); /* diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c index 43af6b4ea0d..1ee97049235 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c @@ -1577,6 +1577,19 @@ log_emit_cpu( } +/* TGSI_OPCODE_MAD (CPU Only) */ + +static void +mad_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = + lp_build_mad(&bld_base->base, + emit_data->args[0], emit_data->args[1], emit_data->args[2]); +} + /* TGSI_OPCODE_MAX (CPU Only) */ static void @@ -2162,6 +2175,7 @@ lp_set_default_actions_cpu( bld_base->op_actions[TGSI_OPCODE_LG2].emit = lg2_emit_cpu; bld_base->op_actions[TGSI_OPCODE_LOG].emit = log_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_MAD].emit = mad_emit_cpu; bld_base->op_actions[TGSI_OPCODE_MAX].emit = max_emit_cpu; bld_base->op_actions[TGSI_OPCODE_MIN].emit = min_emit_cpu; bld_base->op_actions[TGSI_OPCODE_MOD].emit = mod_emit_cpu; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index ceac86abe1d..8e4f029fc81 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -307,10 +307,8 @@ attribs_update_simple(struct lp_build_interp_soa_context *bld, /* * a = a0 + (x * dadx + y * dady) */ - dadx = LLVMBuildFMul(builder, dadx, pixoffx, ""); - dady = LLVMBuildFMul(builder, dady, pixoffy, ""); - a = LLVMBuildFAdd(builder, a, dadx, ""); - a = LLVMBuildFAdd(builder, a, dady, ""); + a = lp_build_fmuladd(builder, dadx, pixoffx, a); + a = lp_build_fmuladd(builder, dady, pixoffy, a); if (interp == LP_INTERP_PERSPECTIVE) { if (oow == NULL) { @@ -437,13 +435,10 @@ coeffs_init(struct lp_build_interp_soa_context *bld, */ if (interp != LP_INTERP_CONSTANT && interp != LP_INTERP_FACING) { - LLVMValueRef axaos, ayaos; - axaos = LLVMBuildFMul(builder, lp_build_broadcast_scalar(setup_bld, bld->x), - dadxaos, ""); - ayaos = LLVMBuildFMul(builder, lp_build_broadcast_scalar(setup_bld, bld->y), - dadyaos, ""); - a0aos = LLVMBuildFAdd(builder, a0aos, ayaos, ""); - a0aos = LLVMBuildFAdd(builder, a0aos, axaos, ""); + LLVMValueRef x = lp_build_broadcast_scalar(setup_bld, bld->x); + LLVMValueRef y = lp_build_broadcast_scalar(setup_bld, bld->y); + a0aos = lp_build_fmuladd(builder, x, dadxaos, a0aos); + a0aos = lp_build_fmuladd(builder, y, dadyaos, a0aos); } /*