From: Roland Scheidegger Date: Sat, 9 Mar 2013 00:46:33 +0000 (+0100) Subject: gallivm: clean up passing derivatives around X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=5c41d1c22282fe2fd72a77339246de8e861b4b22;p=mesa.git gallivm: clean up passing derivatives around Previously, the derivatives were calculated and passed in a packed form to the sample code (for implicit derivatives, explicit derivatives were packed to the same format). There's several reasons why this wasn't such a good idea: 1) the derivatives may not even be needed (not as bad as it sounds since llvm will just throw the calculations needed for them away but still) 2) the special packing format really shouldn't be part of the sampler interface 3) depending what the sample code actually does the derivatives will be processed differently, hence there is no "ideal" packing. For cube maps with explicit derivatives (which we don't do yet) for instance the packing looked downright useless, and for non-isotropic filtering we'd need different calculations too. So, instead just pass the derivatives as is (for explicit derivatives), or let the rho calculating sample code calculate them itself. This still does exactly the same packing stuff for implicit derivatives for now, though explicit ones are handled in a more straightforward manner (quick estimates show performance should be quite similar, though it is much easier to follow and also does the rho calculation per-pixel until the end, which we eventually need for spec compliance anyway). No piglit changes. Reviewed-by: Jose Fonseca --- diff --git a/src/gallium/auxiliary/gallivm/lp_bld_quad.c b/src/gallium/auxiliary/gallivm/lp_bld_quad.c index 8a0efed655f..1955add8883 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_quad.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_quad.c @@ -79,14 +79,9 @@ lp_build_ddy(struct lp_build_context *bld, } /* - * To be able to handle multiple quads at once in texture sampling and - * do lod calculations per quad, it is necessary to get the per-quad - * derivatives into the lp_build_rho function. - * For 8-wide vectors the packed derivative values for 3 coords would - * look like this, this scales to a arbitrary (multiple of 4) vector size: - * ds1dx ds1dy dt1dx dt1dy ds2dx ds2dy dt2dx dt2dy + * Helper for building packed ddx/ddy vector for one coord (scalar per quad + * values). The vector will look like this (8-wide): * dr1dx dr1dy _____ _____ dr2dx dr2dy _____ _____ - * The second vector will be unused for 1d and 2d textures. */ LLVMValueRef lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld, @@ -121,6 +116,11 @@ lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld, } +/* + * Helper for building packed ddx/ddy vector for one coord (scalar per quad + * values). The vector will look like this (8-wide): + * ds1dx ds1dy dt1dx dt1dy ds2dx ds2dy dt2dx dt2dy + */ LLVMValueRef lp_build_packed_ddx_ddy_twocoord(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index ef0631c684a..fc8bae70152 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -46,6 +46,7 @@ #include "lp_bld_type.h" #include "lp_bld_logic.h" #include "lp_bld_pack.h" +#include "lp_bld_quad.h" /* @@ -203,6 +204,9 @@ lp_sampler_static_sampler_state(struct lp_static_sampler_state *state, static LLVMValueRef lp_build_rho(struct lp_build_sample_context *bld, unsigned texture_unit, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef r, const struct lp_derivatives *derivs) { struct gallivm_state *gallivm = bld->gallivm; @@ -211,8 +215,8 @@ lp_build_rho(struct lp_build_sample_context *bld, struct lp_build_context *float_bld = &bld->float_bld; struct lp_build_context *coord_bld = &bld->coord_bld; struct lp_build_context *perquadf_bld = &bld->perquadf_bld; - const LLVMValueRef *ddx_ddy = derivs->ddx_ddy; const unsigned dims = bld->dims; + LLVMValueRef ddx_ddy[2]; LLVMBuilderRef builder = bld->gallivm->builder; LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context); LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0); @@ -229,59 +233,7 @@ lp_build_rho(struct lp_build_sample_context *bld, LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); LLVMValueRef rho_xvec, rho_yvec; - abs_ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]); - if (dims > 2) { - abs_ddx_ddy[1] = lp_build_abs(coord_bld, ddx_ddy[1]); - } - else { - abs_ddx_ddy[1] = NULL; - } - - if (dims == 1) { - static const unsigned char swizzle1[] = { - 0, LP_BLD_SWIZZLE_DONTCARE, - LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE - }; - static const unsigned char swizzle2[] = { - 1, LP_BLD_SWIZZLE_DONTCARE, - LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE - }; - rho_xvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle1); - rho_yvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle2); - } - else if (dims == 2) { - static const unsigned char swizzle1[] = { - 0, 2, - LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE - }; - static const unsigned char swizzle2[] = { - 1, 3, - LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE - }; - rho_xvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle1); - rho_yvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle2); - } - else { - LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH]; - LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH]; - assert(dims == 3); - for (i = 0; i < num_quads; i++) { - shuffles1[4*i + 0] = lp_build_const_int32(gallivm, 4*i); - shuffles1[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 2); - shuffles1[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i); - shuffles1[4*i + 3] = i32undef; - shuffles2[4*i + 0] = lp_build_const_int32(gallivm, 4*i + 1); - shuffles2[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 3); - shuffles2[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i + 1); - shuffles2[4*i + 3] = i32undef; - } - rho_xvec = LLVMBuildShuffleVector(builder, abs_ddx_ddy[0], abs_ddx_ddy[1], - LLVMConstVector(shuffles1, length), ""); - rho_yvec = LLVMBuildShuffleVector(builder, abs_ddx_ddy[0], abs_ddx_ddy[1], - LLVMConstVector(shuffles2, length), ""); - } - - rho_vec = lp_build_max(coord_bld, rho_xvec, rho_yvec); + /* Note that all simplified calculations will only work for isotropic filtering */ first_level = bld->dynamic_state->first_level(bld->dynamic_state, bld->gallivm, texture_unit); @@ -289,76 +241,176 @@ lp_build_rho(struct lp_build_sample_context *bld, int_size = lp_build_minify(int_size_bld, bld->int_size, first_level_vec); float_size = lp_build_int_to_float(float_size_bld, int_size); - if (bld->coord_type.length > 4) { - /* expand size to each quad */ + /* XXX ignoring explicit derivs for cube maps for now */ + if (derivs && !(bld->static_texture_state->target == PIPE_TEXTURE_CUBE)) { + LLVMValueRef ddmax[3]; + for (i = 0; i < dims; i++) { + LLVMValueRef ddx, ddy; + LLVMValueRef floatdim; + LLVMValueRef indexi = lp_build_const_int32(gallivm, i); + ddx = lp_build_abs(coord_bld, derivs->ddx[i]); + ddy = lp_build_abs(coord_bld, derivs->ddy[i]); + ddmax[i] = lp_build_max(coord_bld, ddx, ddy); + floatdim = lp_build_extract_broadcast(gallivm, bld->float_size_in_type, + coord_bld->type, float_size, indexi); + ddmax[i] = lp_build_mul(coord_bld, floatdim, ddmax[i]); + } + rho_vec = ddmax[0]; if (dims > 1) { - /* could use some broadcast_vector helper for this? */ - int num_quads = bld->coord_type.length / 4; - LLVMValueRef src[LP_MAX_VECTOR_LENGTH/4]; - for (i = 0; i < num_quads; i++) { - src[i] = float_size; + rho_vec = lp_build_max(coord_bld, rho_vec, ddmax[1]); + if (dims > 2) { + rho_vec = lp_build_max(coord_bld, rho_vec, ddmax[2]); + } + } + /* + * rho_vec now still contains per-pixel rho, convert to scalar per quad + * since we can't handle per-pixel rho/lod from now on (TODO). + */ + rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type, + perquadf_bld->type, rho_vec, 0); + } + else { + /* + * This looks all a bit complex, but it's not that bad + * (the shuffle code makes it look worse than it is). + * Still, might not be ideal for all cases. + */ + if (dims < 2) { + ddx_ddy[0] = lp_build_packed_ddx_ddy_onecoord(coord_bld, s); + } + else if (dims >= 2) { + ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(coord_bld, + s, t); + if (dims > 2) { + ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(coord_bld, r); } - float_size = lp_build_concat(bld->gallivm, src, float_size_bld->type, num_quads); + } + + abs_ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]); + if (dims > 2) { + abs_ddx_ddy[1] = lp_build_abs(coord_bld, ddx_ddy[1]); } else { - float_size = lp_build_broadcast_scalar(coord_bld, float_size); + abs_ddx_ddy[1] = NULL; } - rho_vec = lp_build_mul(coord_bld, rho_vec, float_size); - if (dims <= 1) { - rho = rho_vec; + if (dims == 1) { + static const unsigned char swizzle1[] = { + 0, LP_BLD_SWIZZLE_DONTCARE, + LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE + }; + static const unsigned char swizzle2[] = { + 1, LP_BLD_SWIZZLE_DONTCARE, + LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE + }; + rho_xvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle1); + rho_yvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle2); + } + else if (dims == 2) { + static const unsigned char swizzle1[] = { + 0, 2, + LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE + }; + static const unsigned char swizzle2[] = { + 1, 3, + LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE + }; + rho_xvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle1); + rho_yvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle2); } else { - if (dims >= 2) { - static const unsigned char swizzle1[] = { - 0, LP_BLD_SWIZZLE_DONTCARE, - LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE - }; - static const unsigned char swizzle2[] = { - 1, LP_BLD_SWIZZLE_DONTCARE, - LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE - }; - LLVMValueRef rho_s, rho_t, rho_r; - - rho_s = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle1); - rho_t = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle2); - - rho = lp_build_max(coord_bld, rho_s, rho_t); - - if (dims >= 3) { - static const unsigned char swizzle3[] = { - 2, LP_BLD_SWIZZLE_DONTCARE, + LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH]; + LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH]; + assert(dims == 3); + for (i = 0; i < num_quads; i++) { + shuffles1[4*i + 0] = lp_build_const_int32(gallivm, 4*i); + shuffles1[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 2); + shuffles1[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i); + shuffles1[4*i + 3] = i32undef; + shuffles2[4*i + 0] = lp_build_const_int32(gallivm, 4*i + 1); + shuffles2[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 3); + shuffles2[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i + 1); + shuffles2[4*i + 3] = i32undef; + } + rho_xvec = LLVMBuildShuffleVector(builder, abs_ddx_ddy[0], abs_ddx_ddy[1], + LLVMConstVector(shuffles1, length), ""); + rho_yvec = LLVMBuildShuffleVector(builder, abs_ddx_ddy[0], abs_ddx_ddy[1], + LLVMConstVector(shuffles2, length), ""); + } + + rho_vec = lp_build_max(coord_bld, rho_xvec, rho_yvec); + + if (bld->coord_type.length > 4) { + /* expand size to each quad */ + if (dims > 1) { + /* could use some broadcast_vector helper for this? */ + int num_quads = bld->coord_type.length / 4; + LLVMValueRef src[LP_MAX_VECTOR_LENGTH/4]; + for (i = 0; i < num_quads; i++) { + src[i] = float_size; + } + float_size = lp_build_concat(bld->gallivm, src, float_size_bld->type, num_quads); + } + else { + float_size = lp_build_broadcast_scalar(coord_bld, float_size); + } + rho_vec = lp_build_mul(coord_bld, rho_vec, float_size); + + if (dims <= 1) { + rho = rho_vec; + } + else { + if (dims >= 2) { + static const unsigned char swizzle1[] = { + 0, LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE }; - rho_r = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle3); - rho = lp_build_max(coord_bld, rho, rho_r); + static const unsigned char swizzle2[] = { + 1, LP_BLD_SWIZZLE_DONTCARE, + LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE + }; + LLVMValueRef rho_s, rho_t, rho_r; + + rho_s = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle1); + rho_t = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle2); + + rho = lp_build_max(coord_bld, rho_s, rho_t); + + if (dims >= 3) { + static const unsigned char swizzle3[] = { + 2, LP_BLD_SWIZZLE_DONTCARE, + LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE + }; + rho_r = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle3); + rho = lp_build_max(coord_bld, rho, rho_r); + } } } - } - rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type, - perquadf_bld->type, rho, 0); - } - else { - if (dims <= 1) { - rho_vec = LLVMBuildExtractElement(builder, rho_vec, index0, ""); - } - rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size); - - if (dims <= 1) { - rho = rho_vec; + rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type, + perquadf_bld->type, rho, 0); } else { - if (dims >= 2) { - LLVMValueRef rho_s, rho_t, rho_r; + if (dims <= 1) { + rho_vec = LLVMBuildExtractElement(builder, rho_vec, index0, ""); + } + rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size); + + if (dims <= 1) { + rho = rho_vec; + } + else { + if (dims >= 2) { + LLVMValueRef rho_s, rho_t, rho_r; - rho_s = LLVMBuildExtractElement(builder, rho_vec, index0, ""); - rho_t = LLVMBuildExtractElement(builder, rho_vec, index1, ""); + rho_s = LLVMBuildExtractElement(builder, rho_vec, index0, ""); + rho_t = LLVMBuildExtractElement(builder, rho_vec, index1, ""); - rho = lp_build_max(float_bld, rho_s, rho_t); + rho = lp_build_max(float_bld, rho_s, rho_t); - if (dims >= 3) { - rho_r = LLVMBuildExtractElement(builder, rho_vec, index2, ""); - rho = lp_build_max(float_bld, rho, rho_r); + if (dims >= 3) { + rho_r = LLVMBuildExtractElement(builder, rho_vec, index2, ""); + rho = lp_build_max(float_bld, rho, rho_r); + } } } } @@ -511,6 +563,9 @@ void lp_build_lod_selector(struct lp_build_sample_context *bld, unsigned texture_unit, unsigned sampler_unit, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef r, const struct lp_derivatives *derivs, LLVMValueRef lod_bias, /* optional */ LLVMValueRef explicit_lod, /* optional */ @@ -544,7 +599,7 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, else { LLVMValueRef rho; - rho = lp_build_rho(bld, texture_unit, derivs); + rho = lp_build_rho(bld, texture_unit, s, t, r, derivs); /* * Compute lod = log2(rho) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h index 63064550ee6..1abe0ca414e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h @@ -56,7 +56,8 @@ struct lp_build_context; */ struct lp_derivatives { - LLVMValueRef ddx_ddy[2]; + LLVMValueRef ddx[3]; + LLVMValueRef ddy[3]; }; @@ -366,6 +367,9 @@ void lp_build_lod_selector(struct lp_build_sample_context *bld, unsigned texture_index, unsigned sampler_index, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef r, const struct lp_derivatives *derivs, LLVMValueRef lod_bias, /* optional */ LLVMValueRef explicit_lod, /* optional */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 8aa41662d67..cdd910fabcf 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -1077,7 +1077,7 @@ lp_build_sample_common(struct lp_build_sample_context *bld, LLVMValueRef *s, LLVMValueRef *t, LLVMValueRef *r, - const struct lp_derivatives *derivs, + const struct lp_derivatives *derivs, /* optional */ LLVMValueRef lod_bias, /* optional */ LLVMValueRef explicit_lod, /* optional */ LLVMValueRef *lod_ipart, @@ -1090,7 +1090,6 @@ lp_build_sample_common(struct lp_build_sample_context *bld, const unsigned mag_filter = bld->static_sampler_state->mag_img_filter; const unsigned target = bld->static_texture_state->target; LLVMValueRef first_level; - struct lp_derivatives face_derivs; /* printf("%s mip %d min %d mag %d\n", __FUNCTION__, @@ -1107,11 +1106,6 @@ lp_build_sample_common(struct lp_build_sample_context *bld, *t = face_t; /* vec */ /* use 'r' to indicate cube face */ *r = face; /* vec */ - - /* recompute ddx, ddy using the new (s,t) face texcoords */ - face_derivs.ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(&bld->coord_bld, *s, *t); - face_derivs.ddx_ddy[1] = NULL; - derivs = &face_derivs; } else if (target == PIPE_TEXTURE_1D_ARRAY) { *r = lp_build_iround(&bld->coord_bld, *t); @@ -1131,6 +1125,7 @@ lp_build_sample_common(struct lp_build_sample_context *bld, * distinguish between minification/magnification with one mipmap level. */ lp_build_lod_selector(bld, texture_index, sampler_index, + *s, *t, *r, derivs, lod_bias, explicit_lod, mip_filter, lod_ipart, lod_fpart); @@ -1479,7 +1474,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm, unsigned sampler_index, const LLVMValueRef *coords, const LLVMValueRef *offsets, - const struct lp_derivatives *derivs, + const struct lp_derivatives *derivs, /* optional */ LLVMValueRef lod_bias, /* optional */ LLVMValueRef explicit_lod, /* optional */ LLVMValueRef texel_out[4]) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c index 9a30cc80296..98bce0eb269 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c @@ -363,7 +363,7 @@ emit_tex(struct lp_build_tgsi_aos_context *bld, unsigned target; unsigned unit; LLVMValueRef coords; - struct lp_derivatives derivs; + struct lp_derivatives derivs = { {NULL}, {NULL} }; if (!bld->sampler) { _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); @@ -374,22 +374,15 @@ emit_tex(struct lp_build_tgsi_aos_context *bld, coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL); - if (0 && modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { - lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL); - lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL); + if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { + /* probably not going to work */ + derivs.ddx[0] = lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL); + derivs.ddy[0] = lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL); unit = inst->Src[3].Register.Index; - } else { -#if 0 - ddx = lp_build_ddx( &bld->bld_base.base, coords ); - ddy = lp_build_ddy( &bld->bld_base.base, coords ); -#else - /* TODO */ - derivs.ddx_ddy[0] = bld->bld_base.base.one; - derivs.ddx_ddy[1] = bld->bld_base.base.one; -#endif + } + else { unit = inst->Src[1].Register.Index; } - return bld->sampler->emit_fetch_texel(bld->sampler, &bld->bld_base.base, target, unit, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 69957fe7bb9..9fe87c40b63 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -1164,14 +1164,13 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, enum lp_build_tex_modifier modifier, LLVMValueRef *texel) { - LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; - struct gallivm_state *gallivm = bld->bld_base.base.gallivm; unsigned unit; LLVMValueRef lod_bias, explicit_lod; LLVMValueRef oow = NULL; LLVMValueRef coords[4]; LLVMValueRef offsets[3] = { NULL }; struct lp_derivatives derivs; + struct lp_derivatives *deriv_ptr = NULL; unsigned num_coords; unsigned dims; unsigned i; @@ -1184,9 +1183,6 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, return; } - derivs.ddx_ddy[0] = bld->bld_base.base.undef; - derivs.ddx_ddy[1] = bld->bld_base.base.undef; - switch (inst->Texture.Texture) { case TGSI_TEXTURE_1D: num_coords = 1; @@ -1259,58 +1255,14 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, } if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { - LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); - LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; - LLVMValueRef ddxdyonec[3]; - unsigned length = bld->bld_base.base.type.length; - unsigned num_quads = length / 4; unsigned dim; - unsigned quad; - for (dim = 0; dim < dims; ++dim) { - LLVMValueRef srcx = lp_build_emit_fetch( &bld->bld_base, inst, 1, dim ); - LLVMValueRef srcy = lp_build_emit_fetch( &bld->bld_base, inst, 2, dim ); - for (quad = 0; quad < num_quads; ++quad) { - unsigned s1 = 4*quad; - unsigned s2 = 4*quad + length; - shuffles[4*quad + 0] = lp_build_const_int32(gallivm, s1); - shuffles[4*quad + 1] = lp_build_const_int32(gallivm, s2); - shuffles[4*quad + 2] = i32undef; - shuffles[4*quad + 3] = i32undef; - } - ddxdyonec[dim] = LLVMBuildShuffleVector(builder, srcx, srcy, - LLVMConstVector(shuffles, length), ""); - } - if (dims == 1) { - derivs.ddx_ddy[0] = ddxdyonec[0]; - } - else if (dims >= 2) { - for (quad = 0; quad < num_quads; ++quad) { - unsigned s1 = 4*quad; - unsigned s2 = 4*quad + length; - shuffles[4*quad + 0] = lp_build_const_int32(gallivm, s1); - shuffles[4*quad + 1] = lp_build_const_int32(gallivm, s1 + 1); - shuffles[4*quad + 2] = lp_build_const_int32(gallivm, s2); - shuffles[4*quad + 3] = lp_build_const_int32(gallivm, s2 + 1); - } - derivs.ddx_ddy[0] = LLVMBuildShuffleVector(builder, ddxdyonec[0], ddxdyonec[1], - LLVMConstVector(shuffles, length), ""); - if (dims == 3) { - derivs.ddx_ddy[1] = ddxdyonec[2]; - } + derivs.ddx[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 1, dim ); + derivs.ddy[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 2, dim ); } + deriv_ptr = &derivs; unit = inst->Src[3].Register.Index; } else { - if (dims == 1) { - derivs.ddx_ddy[0] = lp_build_packed_ddx_ddy_onecoord(&bld->bld_base.base, coords[0]); - } - else if (dims >= 2) { - derivs.ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(&bld->bld_base.base, - coords[0], coords[1]); - if (dims == 3) { - derivs.ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(&bld->bld_base.base, coords[2]); - } - } unit = inst->Src[1].Register.Index; } @@ -1329,7 +1281,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, unit, unit, coords, offsets, - &derivs, + deriv_ptr, lod_bias, explicit_lod, texel); } @@ -1341,13 +1293,13 @@ emit_sample(struct lp_build_tgsi_soa_context *bld, boolean compare, LLVMValueRef *texel) { - LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; struct gallivm_state *gallivm = bld->bld_base.base.gallivm; unsigned texture_unit, sampler_unit; LLVMValueRef lod_bias, explicit_lod; LLVMValueRef coords[4]; LLVMValueRef offsets[3] = { NULL }; struct lp_derivatives derivs; + struct lp_derivatives *deriv_ptr = NULL; unsigned num_coords, dims; unsigned i; @@ -1366,9 +1318,6 @@ emit_sample(struct lp_build_tgsi_soa_context *bld, texture_unit = inst->Src[1].Register.Index; sampler_unit = inst->Src[2].Register.Index; - derivs.ddx_ddy[0] = bld->bld_base.base.undef; - derivs.ddx_ddy[1] = bld->bld_base.base.undef; - /* * Note inst->Texture.Texture will contain the number of offsets, * however the target information is NOT there and comes from the @@ -1449,57 +1398,12 @@ emit_sample(struct lp_build_tgsi_soa_context *bld, } if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { - LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); - LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; - LLVMValueRef ddxdyonec[3]; - unsigned length = bld->bld_base.base.type.length; - unsigned num_quads = length / 4; unsigned dim; - unsigned quad; - for (dim = 0; dim < dims; ++dim) { - LLVMValueRef srcx = lp_build_emit_fetch( &bld->bld_base, inst, 3, dim ); - LLVMValueRef srcy = lp_build_emit_fetch( &bld->bld_base, inst, 4, dim ); - for (quad = 0; quad < num_quads; ++quad) { - unsigned s1 = 4*quad; - unsigned s2 = 4*quad + length; - shuffles[4*quad + 0] = lp_build_const_int32(gallivm, s1); - shuffles[4*quad + 1] = lp_build_const_int32(gallivm, s2); - shuffles[4*quad + 2] = i32undef; - shuffles[4*quad + 3] = i32undef; - } - ddxdyonec[dim] = LLVMBuildShuffleVector(builder, srcx, srcy, - LLVMConstVector(shuffles, length), ""); - } - if (dims == 1) { - derivs.ddx_ddy[0] = ddxdyonec[0]; - } - else if (dims >= 2) { - for (quad = 0; quad < num_quads; ++quad) { - unsigned s1 = 4*quad; - unsigned s2 = 4*quad + length; - shuffles[4*quad + 0] = lp_build_const_int32(gallivm, s1); - shuffles[4*quad + 1] = lp_build_const_int32(gallivm, s1 + 1); - shuffles[4*quad + 2] = lp_build_const_int32(gallivm, s2); - shuffles[4*quad + 3] = lp_build_const_int32(gallivm, s2 + 1); - } - derivs.ddx_ddy[0] = LLVMBuildShuffleVector(builder, ddxdyonec[0], ddxdyonec[1], - LLVMConstVector(shuffles, length), ""); - if (dims == 3) { - derivs.ddx_ddy[1] = ddxdyonec[2]; - } - } - } else { - if (dims == 1) { - derivs.ddx_ddy[0] = lp_build_packed_ddx_ddy_onecoord(&bld->bld_base.base, coords[0]); - } - else if (dims >= 2) { - derivs.ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(&bld->bld_base.base, - coords[0], coords[1]); - if (dims == 3) { - derivs.ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(&bld->bld_base.base, coords[2]); - } + derivs.ddx[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 3, dim ); + derivs.ddy[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 4, dim ); } + deriv_ptr = &derivs; } /* some advanced gather instructions (txgo) would require 4 offsets */ @@ -1517,7 +1421,7 @@ emit_sample(struct lp_build_tgsi_soa_context *bld, texture_unit, sampler_unit, coords, offsets, - &derivs, + deriv_ptr, lod_bias, explicit_lod, texel); } @@ -1533,7 +1437,6 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context *bld, LLVMValueRef explicit_lod = NULL; LLVMValueRef coords[3]; LLVMValueRef offsets[3] = { NULL }; - struct lp_derivatives derivs; unsigned num_coords; unsigned dims; unsigned i; @@ -1548,9 +1451,6 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context *bld, unit = inst->Src[1].Register.Index; - derivs.ddx_ddy[0] = coord_undef; - derivs.ddx_ddy[1] = coord_undef; - if (is_samplei) { target = bld->sv[unit].Resource; } @@ -1612,7 +1512,7 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context *bld, unit, unit, coords, offsets, - &derivs, + NULL, NULL, explicit_lod, texel); }