X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fllvmpipe%2Flp_state_setup.c;h=77c7ac1bbde585e2a4717f2998c5088dbb814b0e;hb=709905cbb683d5b0b9fdbb82c29165aba0149706;hp=ee4991bf8d2d633cafd561291dfecec4123bbafb;hpb=543fb77ddece7e1806e8eaa0d65bb2a945ef9a75;p=mesa.git diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.c b/src/gallium/drivers/llvmpipe/lp_state_setup.c index ee4991bf8d2..77c7ac1bbde 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_state_setup.c @@ -28,25 +28,31 @@ #include "util/u_math.h" #include "util/u_memory.h" -#include "util/u_simple_list.h" -#include "os/os_time.h" +#include "util/simple_list.h" +#include "util/os_time.h" +#include "gallivm/lp_bld_arit.h" +#include "gallivm/lp_bld_bitarit.h" +#include "gallivm/lp_bld_const.h" #include "gallivm/lp_bld_debug.h" #include "gallivm/lp_bld_init.h" +#include "gallivm/lp_bld_logic.h" #include "gallivm/lp_bld_intr.h" -#include /* for LLVMVerifyFunction */ +#include "gallivm/lp_bld_flow.h" +#include "gallivm/lp_bld_type.h" #include "lp_perf.h" #include "lp_debug.h" #include "lp_flush.h" #include "lp_screen.h" #include "lp_context.h" -#include "lp_setup_context.h" -#include "lp_rast.h" #include "lp_state.h" #include "lp_state_fs.h" #include "lp_state_setup.h" +/** Setup shader number (for debugging) */ +static unsigned setup_no = 0; + /* currently organized to interpolate full float[4] attributes even * when some elements are unused. Later, can pack vertex data more @@ -74,91 +80,43 @@ struct lp_setup_args LLVMValueRef dy01_ooa; LLVMValueRef dx20_ooa; LLVMValueRef dx01_ooa; + struct lp_build_context bld; }; -static LLVMTypeRef type4f(void) -{ - return LLVMVectorType(LLVMFloatType(), 4); -} - - -/* Equivalent of _mm_setr_ps(a,b,c,d) - */ -static LLVMValueRef vec4f(LLVMBuilderRef bld, - LLVMValueRef a, LLVMValueRef b, LLVMValueRef c, LLVMValueRef d, - const char *name) -{ - LLVMValueRef i0 = LLVMConstInt(LLVMInt32Type(), 0, 0); - LLVMValueRef i1 = LLVMConstInt(LLVMInt32Type(), 1, 0); - LLVMValueRef i2 = LLVMConstInt(LLVMInt32Type(), 2, 0); - LLVMValueRef i3 = LLVMConstInt(LLVMInt32Type(), 3, 0); - - LLVMValueRef res = LLVMGetUndef(type4f()); - - res = LLVMBuildInsertElement(bld, res, a, i0, ""); - res = LLVMBuildInsertElement(bld, res, b, i1, ""); - res = LLVMBuildInsertElement(bld, res, c, i2, ""); - res = LLVMBuildInsertElement(bld, res, d, i3, name); - - return res; -} - -/* Equivalent of _mm_set1_ps(a) - */ -static LLVMValueRef vec4f_from_scalar(LLVMBuilderRef bld, - LLVMValueRef a, - const char *name) -{ - LLVMValueRef res = LLVMGetUndef(type4f()); - int i; - - for(i = 0; i < 4; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - res = LLVMBuildInsertElement(bld, res, a, index, i == 3 ? name : ""); - } - - return res; -} static void -store_coef(LLVMBuilderRef builder, - struct lp_setup_args *args, - unsigned slot, - LLVMValueRef a0, - LLVMValueRef dadx, - LLVMValueRef dady) +store_coef(struct gallivm_state *gallivm, + struct lp_setup_args *args, + unsigned slot, + LLVMValueRef a0, + LLVMValueRef dadx, + LLVMValueRef dady) { - LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), slot, 0); - + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef idx = lp_build_const_int32(gallivm, slot); + LLVMBuildStore(builder, - a0, - LLVMBuildGEP(builder, args->a0, &idx, 1, "")); + a0, + LLVMBuildGEP(builder, args->a0, &idx, 1, "")); LLVMBuildStore(builder, - dadx, - LLVMBuildGEP(builder, args->dadx, &idx, 1, "")); + dadx, + LLVMBuildGEP(builder, args->dadx, &idx, 1, "")); LLVMBuildStore(builder, - dady, - LLVMBuildGEP(builder, args->dady, &idx, 1, "")); + dady, + LLVMBuildGEP(builder, args->dady, &idx, 1, "")); } static void -emit_constant_coef4( LLVMBuilderRef builder, - struct lp_setup_args *args, - unsigned slot, - LLVMValueRef vert, - unsigned attr) +emit_constant_coef4(struct gallivm_state *gallivm, + struct lp_setup_args *args, + unsigned slot, + LLVMValueRef vert) { - LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0); - LLVMValueRef zerovec = vec4f_from_scalar(builder, zero, "zero"); - LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), attr, 0); - LLVMValueRef attr_ptr = LLVMBuildGEP(builder, vert, &idx, 1, "attr_ptr"); - LLVMValueRef vert_attr = LLVMBuildLoad(builder, attr_ptr, "vert_attr"); - - store_coef(builder, args, slot, vert_attr, zerovec, zerovec); + store_coef(gallivm, args, slot, vert, args->bld.zero, args->bld.zero); } @@ -168,52 +126,288 @@ emit_constant_coef4( LLVMBuilderRef builder, * \param frontface is the triangle front facing? */ static void -emit_facing_coef( LLVMBuilderRef builder, - struct lp_setup_args *args, - unsigned slot ) +emit_facing_coef(struct gallivm_state *gallivm, + struct lp_setup_args *args, + unsigned slot ) { + LLVMBuilderRef builder = gallivm->builder; + LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context); LLVMValueRef a0_0 = args->facing; - LLVMValueRef a0_0f = LLVMBuildSIToFP(builder, a0_0, LLVMFloatType(), ""); - LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0); - LLVMValueRef a0 = vec4f(builder, a0_0f, zero, zero, zero, "facing"); - LLVMValueRef zerovec = vec4f_from_scalar(builder, zero, "zero"); - - store_coef(builder, args, slot, a0, zerovec, zerovec); + LLVMValueRef a0_0f = LLVMBuildSIToFP(builder, a0_0, float_type, ""); + LLVMValueRef a0, face_val; + const unsigned char swizzles[4] = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_0, + PIPE_SWIZZLE_0, PIPE_SWIZZLE_0 }; + /* Our face val is either 1 or 0 so we do + * face = (val * 2) - 1 + * to make it 1 or -1 + */ + face_val = + LLVMBuildFAdd(builder, + LLVMBuildFMul(builder, a0_0f, + lp_build_const_float(gallivm, 2.0), + ""), + lp_build_const_float(gallivm, -1.0), + "facing"); + face_val = lp_build_broadcast_scalar(&args->bld, face_val); + a0 = lp_build_swizzle_aos(&args->bld, face_val, swizzles); + + store_coef(gallivm, args, slot, a0, args->bld.zero, args->bld.zero); } static LLVMValueRef -vert_attrib(LLVMBuilderRef b, - LLVMValueRef vert, - int attr, - int elem, - const char *name) +vert_attrib(struct gallivm_state *gallivm, + LLVMValueRef vert, + int attr, + int elem, + const char *name) { + LLVMBuilderRef b = gallivm->builder; LLVMValueRef idx[2]; - idx[0] = LLVMConstInt(LLVMInt32Type(), attr, 0); - idx[1] = LLVMConstInt(LLVMInt32Type(), elem, 0); + idx[0] = lp_build_const_int32(gallivm, attr); + idx[1] = lp_build_const_int32(gallivm, elem); return LLVMBuildLoad(b, LLVMBuildGEP(b, vert, idx, 2, ""), name); } +static void +lp_twoside(struct gallivm_state *gallivm, + struct lp_setup_args *args, + const struct lp_setup_variant_key *key, + int bcolor_slot, + LLVMValueRef attribv[3]) +{ + LLVMBuilderRef b = gallivm->builder; + LLVMValueRef a0_back, a1_back, a2_back; + LLVMValueRef idx2 = lp_build_const_int32(gallivm, bcolor_slot); + + LLVMValueRef facing = args->facing; + LLVMValueRef front_facing = LLVMBuildICmp(b, LLVMIntEQ, facing, + lp_build_const_int32(gallivm, 0), ""); /** need i1 for if condition */ + + a0_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx2, 1, ""), "v0a_back"); + a1_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx2, 1, ""), "v1a_back"); + a2_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx2, 1, ""), "v2a_back"); + + /* Possibly swap the front and back attrib values, + * + * Prefer select to if so we don't have to worry about phis or + * allocas. + */ + attribv[0] = LLVMBuildSelect(b, front_facing, a0_back, attribv[0], ""); + attribv[1] = LLVMBuildSelect(b, front_facing, a1_back, attribv[1], ""); + attribv[2] = LLVMBuildSelect(b, front_facing, a2_back, attribv[2], ""); + +} + +static void +lp_do_offset_tri(struct gallivm_state *gallivm, + struct lp_setup_args *args, + const struct lp_setup_variant_key *key, + LLVMValueRef inv_det, + LLVMValueRef dxyz01, + LLVMValueRef dxyz20, + LLVMValueRef attribv[3]) +{ + LLVMBuilderRef b = gallivm->builder; + struct lp_build_context flt_scalar_bld; + struct lp_build_context int_scalar_bld; + struct lp_build_context *bld = &args->bld; + LLVMValueRef zoffset, mult; + LLVMValueRef z0_new, z1_new, z2_new; + LLVMValueRef dzdxdzdy, dzdx, dzdy, dzxyz20, dyzzx01, dyzzx01_dzxyz20, dzx01_dyz20; + LLVMValueRef z0z1, z0z1z2; + LLVMValueRef max, max_value, res12; + LLVMValueRef shuffles[4]; + LLVMTypeRef shuf_type = LLVMInt32TypeInContext(gallivm->context); + LLVMValueRef onei = lp_build_const_int32(gallivm, 1); + LLVMValueRef zeroi = lp_build_const_int32(gallivm, 0); + LLVMValueRef twoi = lp_build_const_int32(gallivm, 2); + LLVMValueRef threei = lp_build_const_int32(gallivm, 3); + + /* (res12) = cross(e,f).xy */ + shuffles[0] = twoi; + shuffles[1] = zeroi; + shuffles[2] = onei; + shuffles[3] = twoi; + dzxyz20 = LLVMBuildShuffleVector(b, dxyz20, dxyz20, LLVMConstVector(shuffles, 4), ""); + + shuffles[0] = onei; + shuffles[1] = twoi; + shuffles[2] = twoi; + shuffles[3] = zeroi; + dyzzx01 = LLVMBuildShuffleVector(b, dxyz01, dxyz01, LLVMConstVector(shuffles, 4), ""); + + dyzzx01_dzxyz20 = LLVMBuildFMul(b, dzxyz20, dyzzx01, "dyzzx01_dzxyz20"); + + shuffles[0] = twoi; + shuffles[1] = threei; + shuffles[2] = LLVMGetUndef(shuf_type); + shuffles[3] = LLVMGetUndef(shuf_type); + dzx01_dyz20 = LLVMBuildShuffleVector(b, dyzzx01_dzxyz20, dyzzx01_dzxyz20, + LLVMConstVector(shuffles, 4), ""); + + res12 = LLVMBuildFSub(b, dyzzx01_dzxyz20, dzx01_dyz20, "res12"); + + /* dzdx = fabsf(res1 * inv_det), dydx = fabsf(res2 * inv_det)*/ + dzdxdzdy = LLVMBuildFMul(b, res12, inv_det, "dzdxdzdy"); + dzdxdzdy = lp_build_abs(bld, dzdxdzdy); + + dzdx = LLVMBuildExtractElement(b, dzdxdzdy, zeroi, ""); + dzdy = LLVMBuildExtractElement(b, dzdxdzdy, onei, ""); + + /* mult = MAX2(dzdx, dzdy) * pgon_offset_scale */ + max = LLVMBuildFCmp(b, LLVMRealUGT, dzdx, dzdy, ""); + max_value = LLVMBuildSelect(b, max, dzdx, dzdy, "max"); + + mult = LLVMBuildFMul(b, max_value, + lp_build_const_float(gallivm, key->pgon_offset_scale), ""); + + lp_build_context_init(&flt_scalar_bld, gallivm, lp_type_float_vec(32, 32)); + + if (key->floating_point_depth) { + /* + * bias = pgon_offset_units * 2^(exponent(max(z0, z1, z2)) - mantissa_bits) + + * MAX2(dzdx, dzdy) * pgon_offset_scale + * + * NOTE: Assumes IEEE float32. + */ + LLVMValueRef c23_shifted, exp_mask, bias, exp; + LLVMValueRef maxz_value, maxz0z1_value; + + lp_build_context_init(&int_scalar_bld, gallivm, lp_type_int_vec(32, 32)); + + c23_shifted = lp_build_const_int32(gallivm, 23 << 23); + exp_mask = lp_build_const_int32(gallivm, 0xff << 23); + + maxz0z1_value = lp_build_max(&flt_scalar_bld, + LLVMBuildExtractElement(b, attribv[0], twoi, ""), + LLVMBuildExtractElement(b, attribv[1], twoi, "")); + + maxz_value = lp_build_max(&flt_scalar_bld, + LLVMBuildExtractElement(b, attribv[2], twoi, ""), + maxz0z1_value); + + exp = LLVMBuildBitCast(b, maxz_value, int_scalar_bld.vec_type, ""); + exp = lp_build_and(&int_scalar_bld, exp, exp_mask); + exp = lp_build_sub(&int_scalar_bld, exp, c23_shifted); + /* Clamping to zero means mrd will be zero for very small numbers, + * but specs do not indicate this should be prevented by clamping + * mrd to smallest normal number instead. */ + exp = lp_build_max(&int_scalar_bld, exp, int_scalar_bld.zero); + exp = LLVMBuildBitCast(b, exp, flt_scalar_bld.vec_type, ""); + + bias = LLVMBuildFMul(b, exp, + lp_build_const_float(gallivm, key->pgon_offset_units), + "bias"); + + zoffset = LLVMBuildFAdd(b, bias, mult, "zoffset"); + } else { + /* + * bias = pgon_offset_units + MAX2(dzdx, dzdy) * pgon_offset_scale + */ + zoffset = LLVMBuildFAdd(b, + lp_build_const_float(gallivm, key->pgon_offset_units), + mult, "zoffset"); + } + if (key->pgon_offset_clamp > 0) { + zoffset = lp_build_min(&flt_scalar_bld, + lp_build_const_float(gallivm, key->pgon_offset_clamp), + zoffset); + } + else if (key->pgon_offset_clamp < 0) { + zoffset = lp_build_max(&flt_scalar_bld, + lp_build_const_float(gallivm, key->pgon_offset_clamp), + zoffset); + } + + /* yuck */ + shuffles[0] = twoi; + shuffles[1] = lp_build_const_int32(gallivm, 6); + shuffles[2] = LLVMGetUndef(shuf_type); + shuffles[3] = LLVMGetUndef(shuf_type); + z0z1 = LLVMBuildShuffleVector(b, attribv[0], attribv[1], LLVMConstVector(shuffles, 4), ""); + shuffles[0] = zeroi; + shuffles[1] = onei; + shuffles[2] = lp_build_const_int32(gallivm, 6); + shuffles[3] = LLVMGetUndef(shuf_type); + z0z1z2 = LLVMBuildShuffleVector(b, z0z1, attribv[2], LLVMConstVector(shuffles, 4), ""); + zoffset = lp_build_broadcast_scalar(bld, zoffset); + + /* clamp and do offset */ + /* + * FIXME I suspect the clamp (is that even right to always clamp to fixed + * 0.0/1.0?) should really be per fragment? + */ + z0z1z2 = lp_build_clamp(bld, LLVMBuildFAdd(b, z0z1z2, zoffset, ""), bld->zero, bld->one); + + /* insert into args->a0.z, a1.z, a2.z: + */ + z0_new = LLVMBuildExtractElement(b, z0z1z2, zeroi, ""); + z1_new = LLVMBuildExtractElement(b, z0z1z2, onei, ""); + z2_new = LLVMBuildExtractElement(b, z0z1z2, twoi, ""); + attribv[0] = LLVMBuildInsertElement(b, attribv[0], z0_new, twoi, ""); + attribv[1] = LLVMBuildInsertElement(b, attribv[1], z1_new, twoi, ""); + attribv[2] = LLVMBuildInsertElement(b, attribv[2], z2_new, twoi, ""); +} + +static void +load_attribute(struct gallivm_state *gallivm, + struct lp_setup_args *args, + const struct lp_setup_variant_key *key, + unsigned vert_attr, + LLVMValueRef attribv[3]) +{ + LLVMBuilderRef b = gallivm->builder; + LLVMValueRef idx = lp_build_const_int32(gallivm, vert_attr); + + /* Load the vertex data + */ + attribv[0] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx, 1, ""), "v0a"); + attribv[1] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx, 1, ""), "v1a"); + attribv[2] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx, 1, ""), "v2a"); + + + /* Potentially modify it according to twoside, etc: + */ + if (key->twoside) { + if (vert_attr == key->color_slot && key->bcolor_slot >= 0) + lp_twoside(gallivm, args, key, key->bcolor_slot, attribv); + else if (vert_attr == key->spec_slot && key->bspec_slot >= 0) + lp_twoside(gallivm, args, key, key->bspec_slot, attribv); + } +} + +/* + * FIXME: interpolation is always done wrt fb origin (0/0). + * However, if some (small) tri is far away from the origin and gradients + * are large, this can lead to HUGE errors, since the a0 value calculated + * here can get very large (with the actual values inside the triangle way + * smaller), leading to complete loss of accuracy. This could be prevented + * by using some point inside (or at corner) of the tri as interpolation + * origin, or just use barycentric interpolation (which GL suggests and is + * what real hw does - you can get the barycentric coordinates from the + * edge functions in rasterization in principle (though we skip these + * sometimes completely in case of tris covering a block fully, + * which obviously wouldn't work)). + */ static void -emit_coef4( LLVMBuilderRef b, - struct lp_setup_args *args, - unsigned slot, - LLVMValueRef a0, - LLVMValueRef a1, - LLVMValueRef a2) +emit_coef4( struct gallivm_state *gallivm, + struct lp_setup_args *args, + unsigned slot, + LLVMValueRef a0, + LLVMValueRef a1, + LLVMValueRef a2) { + LLVMBuilderRef b = gallivm->builder; + LLVMValueRef attr_0; LLVMValueRef dy20_ooa = args->dy20_ooa; LLVMValueRef dy01_ooa = args->dy01_ooa; LLVMValueRef dx20_ooa = args->dx20_ooa; LLVMValueRef dx01_ooa = args->dx01_ooa; LLVMValueRef x0_center = args->x0_center; LLVMValueRef y0_center = args->y0_center; - - /* XXX: using fsub, fmul on vector types -- does this work?? - */ LLVMValueRef da01 = LLVMBuildFSub(b, a0, a1, "da01"); LLVMValueRef da20 = LLVMBuildFSub(b, a2, a0, "da20"); @@ -231,32 +425,30 @@ emit_coef4( LLVMBuilderRef b, /* Calculate a0 - the attribute value at the origin */ - LLVMValueRef dadx_x0 = LLVMBuildFMul(b, dadx, x0_center, "dadx_x0"); - LLVMValueRef dady_y0 = LLVMBuildFMul(b, dady, y0_center, "dady_y0"); - LLVMValueRef attr_v0 = LLVMBuildFAdd(b, dadx_x0, dady_y0, "attr_v0"); - LLVMValueRef attr_0 = LLVMBuildFSub(b, a0, attr_v0, "attr_0"); + LLVMValueRef dadx_x0 = LLVMBuildFMul(b, dadx, x0_center, "dadx_x0"); + LLVMValueRef dady_y0 = LLVMBuildFMul(b, dady, y0_center, "dady_y0"); + LLVMValueRef attr_v0 = LLVMBuildFAdd(b, dadx_x0, dady_y0, "attr_v0"); + attr_0 = LLVMBuildFSub(b, a0, attr_v0, "attr_0"); - store_coef(b, args, slot, attr_0, dadx, dady); + store_coef(gallivm, args, slot, attr_0, dadx, dady); } static void -emit_linear_coef( LLVMBuilderRef b, - struct lp_setup_args *args, - unsigned slot, - unsigned vert_attr) +emit_linear_coef( struct gallivm_state *gallivm, + struct lp_setup_args *args, + unsigned slot, + LLVMValueRef attribv[3]) { - LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), vert_attr, 0); - - LLVMValueRef a0 = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx, 1, ""), "v0a"); - LLVMValueRef a1 = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx, 1, ""), "v1a"); - LLVMValueRef a2 = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx, 1, ""), "v2a"); - - emit_coef4(b, args, slot, a0, a1, a2); + /* nothing to do anymore */ + emit_coef4(gallivm, + args, slot, + attribv[0], + attribv[1], + attribv[2]); } - /** * Compute a0, dadx and dady for a perspective-corrected interpolant, * for a triangle. @@ -266,77 +458,141 @@ emit_linear_coef( LLVMBuilderRef b, * divide the interpolated value by the interpolated W at that fragment. */ static void -emit_perspective_coef( LLVMBuilderRef b, - struct lp_setup_args *args, - unsigned slot, - unsigned vert_attr) +apply_perspective_corr( struct gallivm_state *gallivm, + struct lp_setup_args *args, + unsigned slot, + LLVMValueRef attribv[3]) { + LLVMBuilderRef b = gallivm->builder; + /* premultiply by 1/w (v[0][3] is always 1/w): */ - LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), vert_attr, 0); - - LLVMValueRef v0a = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx, 1, ""), "v0a"); - LLVMValueRef v1a = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx, 1, ""), "v1a"); - LLVMValueRef v2a = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx, 1, ""), "v2a"); - - LLVMValueRef v0_oow = vec4f_from_scalar(b, vert_attrib(b, args->v0, 0, 3, ""), "v0_oow"); - LLVMValueRef v1_oow = vec4f_from_scalar(b, vert_attrib(b, args->v1, 0, 3, ""), "v1_oow"); - LLVMValueRef v2_oow = vec4f_from_scalar(b, vert_attrib(b, args->v2, 0, 3, ""), "v2_oow"); - - LLVMValueRef v0_oow_v0a = LLVMBuildFMul(b, v0a, v0_oow, "v0_oow_v0a"); - LLVMValueRef v1_oow_v1a = LLVMBuildFMul(b, v1a, v1_oow, "v1_oow_v1a"); - LLVMValueRef v2_oow_v2a = LLVMBuildFMul(b, v2a, v2_oow, "v2_oow_v2a"); - - emit_coef4(b, args, slot, v0_oow_v0a, v1_oow_v1a, v2_oow_v2a); + LLVMValueRef v0_oow = lp_build_broadcast_scalar(&args->bld, + vert_attrib(gallivm, args->v0, 0, 3, "v0_oow")); + LLVMValueRef v1_oow = lp_build_broadcast_scalar(&args->bld, + vert_attrib(gallivm, args->v1, 0, 3, "v1_oow")); + LLVMValueRef v2_oow = lp_build_broadcast_scalar(&args->bld, + vert_attrib(gallivm, args->v2, 0, 3, "v2_oow")); + + attribv[0] = LLVMBuildFMul(b, attribv[0], v0_oow, "v0_oow_v0a"); + attribv[1] = LLVMBuildFMul(b, attribv[1], v1_oow, "v1_oow_v1a"); + attribv[2] = LLVMBuildFMul(b, attribv[2], v2_oow, "v2_oow_v2a"); } +/** + * Applys cylindrical wrapping to vertex attributes if enabled. + * Input coordinates must be in [0, 1] range, otherwise results are undefined. + * + * @param cyl_wrap TGSI_CYLINDRICAL_WRAP_x flags + */ static void -emit_position_coef( LLVMBuilderRef builder, - struct lp_setup_args *args, - int slot, int attrib ) +emit_apply_cyl_wrap(struct gallivm_state *gallivm, + struct lp_setup_args *args, + uint cyl_wrap, + LLVMValueRef attribv[3]) + { - emit_linear_coef(builder, args, slot, attrib); + LLVMBuilderRef builder = gallivm->builder; + struct lp_type type = args->bld.type; + LLVMTypeRef float_vec_type = args->bld.vec_type; + LLVMValueRef pos_half; + LLVMValueRef neg_half; + LLVMValueRef cyl_mask; + LLVMValueRef offset; + LLVMValueRef delta; + LLVMValueRef one; + + if (!cyl_wrap) + return; + + /* Constants */ + pos_half = lp_build_const_vec(gallivm, type, +0.5f); + neg_half = lp_build_const_vec(gallivm, type, -0.5f); + cyl_mask = lp_build_const_mask_aos(gallivm, type, cyl_wrap, 4); + + one = lp_build_const_vec(gallivm, type, 1.0f); + one = LLVMBuildBitCast(builder, one, lp_build_int_vec_type(gallivm, type), ""); + one = LLVMBuildAnd(builder, one, cyl_mask, ""); + + /* Edge v0 -> v1 */ + delta = LLVMBuildFSub(builder, attribv[1], attribv[0], ""); + + offset = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, delta, pos_half); + offset = LLVMBuildAnd(builder, offset, one, ""); + offset = LLVMBuildBitCast(builder, offset, float_vec_type, ""); + attribv[0] = LLVMBuildFAdd(builder, attribv[0], offset, ""); + + offset = lp_build_compare(gallivm, type, PIPE_FUNC_LESS, delta, neg_half); + offset = LLVMBuildAnd(builder, offset, one, ""); + offset = LLVMBuildBitCast(builder, offset, float_vec_type, ""); + attribv[1] = LLVMBuildFAdd(builder, attribv[1], offset, ""); + + /* Edge v1 -> v2 */ + delta = LLVMBuildFSub(builder, attribv[2], attribv[1], ""); + + offset = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, delta, pos_half); + offset = LLVMBuildAnd(builder, offset, one, ""); + offset = LLVMBuildBitCast(builder, offset, float_vec_type, ""); + attribv[1] = LLVMBuildFAdd(builder, attribv[1], offset, ""); + + offset = lp_build_compare(gallivm, type, PIPE_FUNC_LESS, delta, neg_half); + offset = LLVMBuildAnd(builder, offset, one, ""); + offset = LLVMBuildBitCast(builder, offset, float_vec_type, ""); + attribv[2] = LLVMBuildFAdd(builder, attribv[2], offset, ""); + + /* Edge v2 -> v0 */ + delta = LLVMBuildFSub(builder, attribv[0], attribv[2], ""); + + offset = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, delta, pos_half); + offset = LLVMBuildAnd(builder, offset, one, ""); + offset = LLVMBuildBitCast(builder, offset, float_vec_type, ""); + attribv[2] = LLVMBuildFAdd(builder, attribv[2], offset, ""); + + offset = lp_build_compare(gallivm, type, PIPE_FUNC_LESS, delta, neg_half); + offset = LLVMBuildAnd(builder, offset, one, ""); + offset = LLVMBuildBitCast(builder, offset, float_vec_type, ""); + attribv[0] = LLVMBuildFAdd(builder, attribv[0], offset, ""); } - - /** * Compute the inputs-> dadx, dady, a0 values. */ static void -emit_tri_coef( LLVMBuilderRef builder, - const struct lp_setup_variant_key *key, - struct lp_setup_args *args ) +emit_tri_coef( struct gallivm_state *gallivm, + const struct lp_setup_variant_key *key, + struct lp_setup_args *args) { unsigned slot; - /* The internal position input is in slot zero: - */ - emit_position_coef(builder, args, 0, 0); + LLVMValueRef attribs[3]; - /* setup interpolation for all the remaining attributes: + /* setup interpolation for all the remaining attributes: */ for (slot = 0; slot < key->num_inputs; slot++) { - unsigned vert_attr = key->inputs[slot].src_index; - switch (key->inputs[slot].interp) { case LP_INTERP_CONSTANT: - if (key->flatshade_first) { - emit_constant_coef4(builder, args, slot+1, args->v0, vert_attr); - } - else { - emit_constant_coef4(builder, args, slot+1, args->v2, vert_attr); - } - break; + load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs); + if (key->flatshade_first) { + emit_constant_coef4(gallivm, args, slot+1, attribs[0]); + } + else { + emit_constant_coef4(gallivm, args, slot+1, attribs[2]); + } + break; case LP_INTERP_LINEAR: - emit_linear_coef(builder, args, slot+1, vert_attr); + load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs); + emit_apply_cyl_wrap(gallivm, args, key->inputs[slot].cyl_wrap, attribs); + emit_linear_coef(gallivm, args, slot+1, attribs); break; case LP_INTERP_PERSPECTIVE: - emit_perspective_coef(builder, args, slot+1, vert_attr); + load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs); + emit_apply_cyl_wrap(gallivm, args, key->inputs[slot].cyl_wrap, attribs); + apply_perspective_corr(gallivm, args, slot+1, attribs); + emit_linear_coef(gallivm, args, slot+1, attribs); break; case LP_INTERP_POSITION: @@ -347,7 +603,7 @@ emit_tri_coef( LLVMBuilderRef builder, break; case LP_INTERP_FACING: - emit_facing_coef(builder, args, slot+1); + emit_facing_coef(gallivm, args, slot+1); break; default: @@ -357,119 +613,90 @@ emit_tri_coef( LLVMBuilderRef builder, } -/* XXX: This is generic code, share with fs/vs codegen: - */ -static lp_jit_setup_triangle -finalize_function(struct llvmpipe_screen *screen, - LLVMBuilderRef builder, - LLVMValueRef function) -{ - void *f; - - /* Verify the LLVM IR. If invalid, dump and abort */ -#ifdef DEBUG - if (LLVMVerifyFunction(function, LLVMPrintMessageAction)) { - if (1) - lp_debug_dump_value(function); - abort(); - } -#endif - - /* Apply optimizations to LLVM IR */ - LLVMRunFunctionPassManager(screen->pass, function); - - if (gallivm_debug & GALLIVM_DEBUG_IR) - { - /* Print the LLVM IR to stderr */ - lp_debug_dump_value(function); - debug_printf("\n"); - } - - /* - * Translate the LLVM IR into machine code. - */ - f = LLVMGetPointerToGlobal(screen->engine, function); - - if (gallivm_debug & GALLIVM_DEBUG_ASM) - { - lp_disassemble(f); - } - - lp_func_delete_body(function); - - return f; -} - -/* XXX: Generic code: - */ -static void -lp_emit_emms(LLVMBuilderRef builder) -{ -#ifdef PIPE_ARCH_X86 - /* Avoid corrupting the FPU stack on 32bit OSes. */ - lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0); -#endif -} - - /* XXX: generic code: */ static void set_noalias(LLVMBuilderRef builder, - LLVMValueRef function, - const LLVMTypeRef *arg_types, - int nr_args) + LLVMValueRef function, + const LLVMTypeRef *arg_types, + int nr_args) { int i; - for(i = 0; i < Elements(arg_types); ++i) + for(i = 0; i < nr_args; ++i) if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) - LLVMAddAttribute(LLVMGetParam(function, i), - LLVMNoAliasAttribute); + lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS); } static void -init_args(LLVMBuilderRef b, - struct lp_setup_args *args, - const struct lp_setup_variant *variant) +init_args(struct gallivm_state *gallivm, + const struct lp_setup_variant_key *key, + struct lp_setup_args *args) { - LLVMValueRef v0_x = vert_attrib(b, args->v0, 0, 0, "v0_x"); - LLVMValueRef v0_y = vert_attrib(b, args->v0, 0, 1, "v0_y"); + LLVMBuilderRef b = gallivm->builder; + LLVMTypeRef shuf_type = LLVMInt32TypeInContext(gallivm->context); + LLVMValueRef onef = lp_build_const_float(gallivm, 1.0); + LLVMValueRef onei = lp_build_const_int32(gallivm, 1); + LLVMValueRef zeroi = lp_build_const_int32(gallivm, 0); + LLVMValueRef pixel_center, xy0_center, dxy01, dxy20, dyx20; + LLVMValueRef e, f, ef, ooa; + LLVMValueRef shuffles[4], shuf10; + LLVMValueRef attr_pos[3]; + struct lp_type typef4 = lp_type_float_vec(32, 128); + struct lp_build_context bld; + + lp_build_context_init(&bld, gallivm, typef4); + args->bld = bld; + + /* The internal position input is in slot zero: + */ + load_attribute(gallivm, args, key, 0, attr_pos); - LLVMValueRef v1_x = vert_attrib(b, args->v1, 0, 0, "v1_x"); - LLVMValueRef v1_y = vert_attrib(b, args->v1, 0, 1, "v1_y"); + pixel_center = lp_build_const_vec(gallivm, typef4, + key->pixel_center_half ? 0.5 : 0.0); - LLVMValueRef v2_x = vert_attrib(b, args->v2, 0, 0, "v2_x"); - LLVMValueRef v2_y = vert_attrib(b, args->v2, 0, 1, "v2_y"); + /* + * xy are first two elems in v0a/v1a/v2a but just use vec4 arit + * also offset_tri uses actually xyz in them + */ + xy0_center = LLVMBuildFSub(b, attr_pos[0], pixel_center, "xy0_center" ); - LLVMValueRef pixel_center = LLVMConstReal(LLVMFloatType(), - variant->key.pixel_center_half ? 0.5 : 0); + dxy01 = LLVMBuildFSub(b, attr_pos[0], attr_pos[1], "dxy01"); + dxy20 = LLVMBuildFSub(b, attr_pos[2], attr_pos[0], "dxy20"); - LLVMValueRef x0_center = LLVMBuildFSub(b, v0_x, pixel_center, "x0_center" ); - LLVMValueRef y0_center = LLVMBuildFSub(b, v0_y, pixel_center, "y0_center" ); - - LLVMValueRef dx01 = LLVMBuildFSub(b, v0_x, v1_x, "dx01"); - LLVMValueRef dy01 = LLVMBuildFSub(b, v0_y, v1_y, "dy01"); - LLVMValueRef dx20 = LLVMBuildFSub(b, v2_x, v0_x, "dx20"); - LLVMValueRef dy20 = LLVMBuildFSub(b, v2_y, v0_y, "dy20"); + shuffles[0] = onei; + shuffles[1] = zeroi; + shuffles[2] = LLVMGetUndef(shuf_type); + shuffles[3] = LLVMGetUndef(shuf_type); + shuf10 = LLVMConstVector(shuffles, 4); - LLVMValueRef one = LLVMConstReal(LLVMFloatType(), 1.0); - LLVMValueRef e = LLVMBuildFMul(b, dx01, dy20, "e"); - LLVMValueRef f = LLVMBuildFMul(b, dx20, dy01, "f"); - LLVMValueRef ooa = LLVMBuildFDiv(b, one, LLVMBuildFSub(b, e, f, ""), "ooa"); + dyx20 = LLVMBuildShuffleVector(b, dxy20, dxy20, shuf10, ""); - LLVMValueRef dy20_ooa = LLVMBuildFMul(b, dy20, ooa, "dy20_ooa"); - LLVMValueRef dy01_ooa = LLVMBuildFMul(b, dy01, ooa, "dy01_ooa"); - LLVMValueRef dx20_ooa = LLVMBuildFMul(b, dx20, ooa, "dx20_ooa"); - LLVMValueRef dx01_ooa = LLVMBuildFMul(b, dx01, ooa, "dx01_ooa"); + ef = LLVMBuildFMul(b, dxy01, dyx20, "ef"); + e = LLVMBuildExtractElement(b, ef, zeroi, ""); + f = LLVMBuildExtractElement(b, ef, onei, ""); - args->dy20_ooa = vec4f_from_scalar(b, dy20_ooa, "dy20_ooa_4f"); - args->dy01_ooa = vec4f_from_scalar(b, dy01_ooa, "dy01_ooa_4f"); + ooa = LLVMBuildFDiv(b, onef, LLVMBuildFSub(b, e, f, ""), "ooa"); - args->dx20_ooa = vec4f_from_scalar(b, dx20_ooa, "dx20_ooa_4f"); - args->dx01_ooa = vec4f_from_scalar(b, dx01_ooa, "dx01_ooa_4f"); + ooa = lp_build_broadcast_scalar(&bld, ooa); - args->x0_center = vec4f_from_scalar(b, x0_center, "x0_center_4f"); - args->y0_center = vec4f_from_scalar(b, y0_center, "y0_center_4f"); + /* tri offset calc shares a lot of arithmetic, do it here */ + if (key->pgon_offset_scale != 0.0f || key->pgon_offset_units != 0.0f) { + lp_do_offset_tri(gallivm, args, key, ooa, dxy01, dxy20, attr_pos); + } + + dxy20 = LLVMBuildFMul(b, dxy20, ooa, ""); + dxy01 = LLVMBuildFMul(b, dxy01, ooa, ""); + + args->dy20_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy20, onei); + args->dy01_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy01, onei); + + args->dx20_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy20, zeroi); + args->dx01_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy01, zeroi); + + args->x0_center = lp_build_extract_broadcast(gallivm, typef4, typef4, xy0_center, zeroi); + args->y0_center = lp_build_extract_broadcast(gallivm, typef4, typef4, xy0_center, onei); + + emit_linear_coef(gallivm, args, 0, attr_pos); } /** @@ -477,25 +704,38 @@ init_args(LLVMBuilderRef b, * */ static struct lp_setup_variant * -generate_setup_variant(struct llvmpipe_screen *screen, - struct lp_setup_variant_key *key) +generate_setup_variant(struct lp_setup_variant_key *key, + struct llvmpipe_context *lp) { struct lp_setup_variant *variant = NULL; + struct gallivm_state *gallivm; struct lp_setup_args args; - char func_name[256]; + char func_name[64]; LLVMTypeRef vec4f_type; LLVMTypeRef func_type; - LLVMTypeRef arg_types[8]; + LLVMTypeRef arg_types[7]; LLVMBasicBlockRef block; LLVMBuilderRef builder; - int64_t t0, t1; + int64_t t0 = 0, t1; if (0) goto fail; variant = CALLOC_STRUCT(lp_setup_variant); - if (variant == NULL) + if (!variant) + goto fail; + + variant->no = setup_no++; + + util_snprintf(func_name, sizeof(func_name), "setup_variant_%u", + variant->no); + + variant->gallivm = gallivm = gallivm_create(func_name, lp->context); + if (!variant->gallivm) { goto fail; + } + + builder = gallivm->builder; if (LP_DEBUG & DEBUG_COUNTERS) { t0 = os_time_get(); @@ -504,28 +744,24 @@ generate_setup_variant(struct llvmpipe_screen *screen, memcpy(&variant->key, key, key->size); variant->list_item_global.base = variant; - util_snprintf(func_name, sizeof(func_name), "fs%u_setup%u", - 0, - variant->no); - /* Currently always deal with full 4-wide vertex attributes from * the vertices. */ - vec4f_type = LLVMVectorType(LLVMFloatType(), 4); + vec4f_type = LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4); arg_types[0] = LLVMPointerType(vec4f_type, 0); /* v0 */ arg_types[1] = LLVMPointerType(vec4f_type, 0); /* v1 */ arg_types[2] = LLVMPointerType(vec4f_type, 0); /* v2 */ - arg_types[3] = LLVMInt32Type(); /* facing */ + arg_types[3] = LLVMInt32TypeInContext(gallivm->context); /* facing */ arg_types[4] = LLVMPointerType(vec4f_type, 0); /* a0, aligned */ arg_types[5] = LLVMPointerType(vec4f_type, 0); /* dadx, aligned */ arg_types[6] = LLVMPointerType(vec4f_type, 0); /* dady, aligned */ - arg_types[7] = LLVMPointerType(vec4f_type, 0); /* key, unused */ - func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); + func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context), + arg_types, ARRAY_SIZE(arg_types), 0); - variant->function = LLVMAddFunction(screen->module, func_name, func_type); + variant->function = LLVMAddFunction(gallivm->module, func_name, func_type); if (!variant->function) goto fail; @@ -550,23 +786,27 @@ generate_setup_variant(struct llvmpipe_screen *screen, /* * Function body */ - block = LLVMAppendBasicBlock(variant->function, "entry"); - builder = LLVMCreateBuilder(); + block = LLVMAppendBasicBlockInContext(gallivm->context, + variant->function, "entry"); LLVMPositionBuilderAtEnd(builder, block); - set_noalias(builder, variant->function, arg_types, Elements(arg_types)); - init_args(builder, &args, variant); - emit_tri_coef(builder, &variant->key, &args); + set_noalias(builder, variant->function, arg_types, ARRAY_SIZE(arg_types)); + init_args(gallivm, &variant->key, &args); + emit_tri_coef(gallivm, &variant->key, &args); - lp_emit_emms(builder); LLVMBuildRetVoid(builder); - LLVMDisposeBuilder(builder); - variant->jit_function = finalize_function(screen, builder, - variant->function); + gallivm_verify_function(gallivm, variant->function); + + gallivm_compile_module(gallivm); + + variant->jit_function = (lp_jit_setup_triangle) + gallivm_jit_function(gallivm, variant->function); if (!variant->jit_function) goto fail; + gallivm_free_ir(variant->gallivm); + /* * Update timing information: */ @@ -575,20 +815,17 @@ generate_setup_variant(struct llvmpipe_screen *screen, LP_COUNT_ADD(llvm_compile_time, t1 - t0); LP_COUNT_ADD(nr_llvm_compiles, 1); } - + return variant; fail: if (variant) { - if (variant->function) { - if (variant->jit_function) - LLVMFreeMachineCodeForFunction(screen->engine, - variant->function); - LLVMDeleteFunction(variant->function); + if (variant->gallivm) { + gallivm_destroy(variant->gallivm); } FREE(variant); } - + return NULL; } @@ -596,27 +833,49 @@ fail: static void lp_make_setup_variant_key(struct llvmpipe_context *lp, - struct lp_setup_variant_key *key) + struct lp_setup_variant_key *key) { struct lp_fragment_shader *fs = lp->fs; unsigned i; - assert(sizeof key->inputs[0] == sizeof(ushort)); - - key->num_inputs = fs->info.num_inputs; + assert(sizeof key->inputs[0] == sizeof(uint)); + + key->num_inputs = fs->info.base.num_inputs; key->flatshade_first = lp->rasterizer->flatshade_first; - key->pixel_center_half = lp->rasterizer->gl_rasterization_rules; + key->pixel_center_half = lp->rasterizer->half_pixel_center; + key->twoside = lp->rasterizer->light_twoside; key->size = Offset(struct lp_setup_variant_key, - inputs[key->num_inputs]); - key->pad = 0; + inputs[key->num_inputs]); + + key->color_slot = lp->color_slot[0]; + key->bcolor_slot = lp->bcolor_slot[0]; + key->spec_slot = lp->color_slot[1]; + key->bspec_slot = lp->bcolor_slot[1]; + + /* + * If depth is floating point, depth bias is calculated with respect + * to the primitive's maximum Z value. Retain the original depth bias + * value until that stage. + */ + key->floating_point_depth = lp->floating_point_depth; + if (key->floating_point_depth) { + key->pgon_offset_units = (float) lp->rasterizer->offset_units; + } else { + key->pgon_offset_units = + (float) (lp->rasterizer->offset_units * lp->mrd); + } + + key->pgon_offset_scale = lp->rasterizer->offset_scale; + key->pgon_offset_clamp = lp->rasterizer->offset_clamp; + key->pad = 0; memcpy(key->inputs, fs->inputs, key->num_inputs * sizeof key->inputs[0]); for (i = 0; i < key->num_inputs; i++) { if (key->inputs[i].interp == LP_INTERP_COLOR) { - if (lp->rasterizer->flatshade) - key->inputs[i].interp = LP_INTERP_CONSTANT; - else - key->inputs[i].interp = LP_INTERP_LINEAR; + if (lp->rasterizer->flatshade) + key->inputs[i].interp = LP_INTERP_CONSTANT; + else + key->inputs[i].interp = LP_INTERP_PERSPECTIVE; } } @@ -625,20 +884,15 @@ lp_make_setup_variant_key(struct llvmpipe_context *lp, static void remove_setup_variant(struct llvmpipe_context *lp, - struct lp_setup_variant *variant) + struct lp_setup_variant *variant) { - struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); - if (gallivm_debug & GALLIVM_DEBUG_IR) { debug_printf("llvmpipe: del setup_variant #%u total %u\n", - variant->no, lp->nr_setup_variants); + variant->no, lp->nr_setup_variants); } - if (variant->function) { - if (variant->jit_function) - LLVMFreeMachineCodeForFunction(screen->engine, - variant->function); - LLVMDeleteFunction(variant->function); + if (variant->gallivm) { + gallivm_destroy(variant->gallivm); } remove_from_list(&variant->list_item_global); @@ -665,7 +919,13 @@ cull_setup_variants(struct llvmpipe_context *lp) llvmpipe_finish(pipe, __FUNCTION__); for (i = 0; i < LP_MAX_SETUP_VARIANTS / 4; i++) { - struct lp_setup_variant_list_item *item = last_elem(&lp->setup_variants_list); + struct lp_setup_variant_list_item *item; + if (is_empty_list(&lp->setup_variants_list)) { + break; + } + item = last_elem(&lp->setup_variants_list); + assert(item); + assert(item->base); remove_setup_variant(lp, item->base); } } @@ -679,8 +939,6 @@ cull_setup_variants(struct llvmpipe_context *lp) void llvmpipe_update_setup(struct llvmpipe_context *lp) { - struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); - struct lp_setup_variant_key *key = &lp->setup_variant.key; struct lp_setup_variant *variant = NULL; struct lp_setup_variant_list_item *li; @@ -689,7 +947,7 @@ llvmpipe_update_setup(struct llvmpipe_context *lp) foreach(li, &lp->setup_variants_list) { if(li->base->key.size == key->size && - memcmp(&li->base->key, key, key->size) == 0) { + memcmp(&li->base->key, key, key->size) == 0) { variant = li->base; break; } @@ -700,16 +958,17 @@ llvmpipe_update_setup(struct llvmpipe_context *lp) } else { if (lp->nr_setup_variants >= LP_MAX_SETUP_VARIANTS) { - cull_setup_variants(lp); + cull_setup_variants(lp); } - variant = generate_setup_variant(screen, key); - insert_at_head(&lp->setup_variants_list, &variant->list_item_global); - lp->nr_setup_variants++; + variant = generate_setup_variant(key, lp); + if (variant) { + insert_at_head(&lp->setup_variants_list, &variant->list_item_global); + lp->nr_setup_variants++; + } } - lp_setup_set_setup_variant(lp->setup, - variant); + lp_setup_set_setup_variant(lp->setup, variant); } void @@ -725,36 +984,33 @@ lp_delete_setup_variants(struct llvmpipe_context *lp) } void -lp_dump_setup_coef( const struct lp_setup_variant_key *key, - const float (*sa0)[4], - const float (*sdadx)[4], - const float (*sdady)[4]) +lp_dump_setup_coef(const struct lp_setup_variant_key *key, + const float (*sa0)[4], + const float (*sdadx)[4], + const float (*sdady)[4]) { int i, slot; - for (i = 0; i < NUM_CHANNELS; i++) { + for (i = 0; i < TGSI_NUM_CHANNELS; i++) { float a0 = sa0 [0][i]; float dadx = sdadx[0][i]; float dady = sdady[0][i]; debug_printf("POS.%c: a0 = %f, dadx = %f, dady = %f\n", - "xyzw"[i], - a0, dadx, dady); + "xyzw"[i], a0, dadx, dady); } for (slot = 0; slot < key->num_inputs; slot++) { unsigned usage_mask = key->inputs[slot].usage_mask; - for (i = 0; i < NUM_CHANNELS; i++) { - if (usage_mask & (1 << i)) { - float a0 = sa0 [1 + slot][i]; - float dadx = sdadx[1 + slot][i]; - float dady = sdady[1 + slot][i]; - - debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n", - slot, - "xyzw"[i], - a0, dadx, dady); - } + for (i = 0; i < TGSI_NUM_CHANNELS; i++) { + if (usage_mask & (1 << i)) { + float a0 = sa0 [1 + slot][i]; + float dadx = sdadx[1 + slot][i]; + float dady = sdady[1 + slot][i]; + + debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n", + slot, "xyzw"[i], a0, dadx, dady); + } } } }