X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fllvmpipe%2Flp_bld_interp.c;h=79325683c61efe858212f2fb9def50320ec0766d;hb=26c5ae80f0b5c5f1c8779e4540a9aba88720c2cd;hp=a6acaead88766019f984346a061bd07dd35e3841;hpb=bee9964b29b2428ee75e2d1efc0e1d2c2518a417;p=mesa.git diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index a6acaead887..79325683c61 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -1,7 +1,7 @@ /************************************************************************** * * Copyright 2009 VMware, Inc. - * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2007-2008 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -19,7 +19,7 @@ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @@ -37,11 +37,12 @@ #include "util/u_debug.h" #include "util/u_memory.h" #include "util/u_math.h" -#include "tgsi/tgsi_parse.h" -#include "lp_bld_debug.h" -#include "lp_bld_const.h" -#include "lp_bld_arit.h" -#include "lp_bld_swizzle.h" +#include "tgsi/tgsi_scan.h" +#include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_arit.h" +#include "gallivm/lp_bld_swizzle.h" +#include "gallivm/lp_bld_flow.h" #include "lp_bld_interp.h" @@ -61,6 +62,9 @@ * # | # | # * ################# * + * If we iterate over multiple quads at once, quads 01 and 23 are processed + * together. + * * Within each quad, we have four pixels which are represented in SOA * order: * @@ -72,9 +76,44 @@ * * So the green channel (for example) of the four pixels is stored in * a single vector register: {g0, g1, g2, g3}. + * The order stays the same even with multiple quads: + * 0 1 4 5 + * 2 3 6 7 + * is stored as g0..g7 */ +/** + * Do one perspective divide per quad. + * + * For perspective interpolation, the final attribute value is given + * + * a' = a/w = a * oow + * + * where + * + * a = a0 + dadx*x + dady*y + * w = w0 + dwdx*x + dwdy*y + * oow = 1/w = 1/(w0 + dwdx*x + dwdy*y) + * + * Instead of computing the division per pixel, with this macro we compute the + * division on the upper left pixel of each quad, and use a linear + * approximation in the remaining pixels, given by: + * + * da'dx = (dadx - dwdx*a)*oow + * da'dy = (dady - dwdy*a)*oow + * + * Ironically, this actually makes things slower -- probably because the + * divide hardware unit is rarely used, whereas the multiply unit is typically + * already saturated. + */ +#define PERSPECTIVE_DIVIDE_PER_QUAD 0 + + +static const unsigned char quad_offset_x[16] = {0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3}; +static const unsigned char quad_offset_y[16] = {0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3}; + + static void attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix) { @@ -84,48 +123,214 @@ attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix); } +static void +calc_offsets(struct lp_build_context *coeff_bld, + unsigned quad_start_index, + LLVMValueRef *pixoffx, + LLVMValueRef *pixoffy) +{ + unsigned i; + unsigned num_pix = coeff_bld->type.length; + struct gallivm_state *gallivm = coeff_bld->gallivm; + LLVMBuilderRef builder = coeff_bld->gallivm->builder; + LLVMValueRef nr, pixxf, pixyf; + + *pixoffx = coeff_bld->undef; + *pixoffy = coeff_bld->undef; + + for (i = 0; i < num_pix; i++) { + nr = lp_build_const_int32(gallivm, i); + pixxf = lp_build_const_float(gallivm, quad_offset_x[i % num_pix] + + (quad_start_index & 1) * 2); + pixyf = lp_build_const_float(gallivm, quad_offset_y[i % num_pix] + + (quad_start_index & 2)); + *pixoffx = LLVMBuildInsertElement(builder, *pixoffx, pixxf, nr, ""); + *pixoffy = LLVMBuildInsertElement(builder, *pixoffy, pixyf, nr, ""); + } +} + -/** - * Initialize the bld->a0, dadx, dady fields. This involves fetching - * those values from the arrays which are passed into the JIT function. +/* Much easier, and significantly less instructions in the per-stamp + * part (less than half) but overall more instructions so a loss if + * most quads are active. Might be a win though with larger vectors. + * No ability to do per-quad divide (doable but not implemented) + * Could be made to work with passed in pixel offsets (i.e. active quad merging). */ static void -coeffs_init(struct lp_build_interp_soa_context *bld, - LLVMValueRef a0_ptr, - LLVMValueRef dadx_ptr, - LLVMValueRef dady_ptr) +coeffs_init_simple(struct lp_build_interp_soa_context *bld, + LLVMValueRef a0_ptr, + LLVMValueRef dadx_ptr, + LLVMValueRef dady_ptr) { - LLVMBuilderRef builder = bld->base.builder; + struct lp_build_context *coeff_bld = &bld->coeff_bld; + struct lp_build_context *setup_bld = &bld->setup_bld; + struct gallivm_state *gallivm = coeff_bld->gallivm; + LLVMBuilderRef builder = gallivm->builder; unsigned attrib; - unsigned chan; - for(attrib = 0; attrib < bld->num_attribs; ++attrib) { - unsigned mask = bld->mask[attrib]; - unsigned mode = bld->mode[attrib]; - for(chan = 0; chan < NUM_CHANNELS; ++chan) { - if(mask & (1 << chan)) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), attrib*NUM_CHANNELS + chan, 0); - LLVMValueRef a0 = NULL; - LLVMValueRef dadx = NULL; - LLVMValueRef dady = NULL; + for (attrib = 0; attrib < bld->num_attribs; ++attrib) { + /* + * always fetch all 4 values for performance/simplicity + * Note: we do that here because it seems to generate better + * code. It generates a lot of moves initially but less + * moves later. As far as I can tell this looks like a + * llvm issue, instead of simply reloading the values from + * the passed in pointers it if it runs out of registers + * it spills/reloads them. Maybe some optimization passes + * would help. + * Might want to investigate this again later. + */ + const unsigned interp = bld->interp[attrib]; + LLVMValueRef index = lp_build_const_int32(gallivm, + attrib * TGSI_NUM_CHANNELS); + LLVMValueRef ptr; + LLVMValueRef dadxaos = setup_bld->zero; + LLVMValueRef dadyaos = setup_bld->zero; + LLVMValueRef a0aos = setup_bld->zero; + + switch (interp) { + case LP_INTERP_PERSPECTIVE: + /* fall-through */ + + case LP_INTERP_LINEAR: + ptr = LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""); + ptr = LLVMBuildBitCast(builder, ptr, + LLVMPointerType(setup_bld->vec_type, 0), ""); + dadxaos = LLVMBuildLoad(builder, ptr, ""); + + ptr = LLVMBuildGEP(builder, dady_ptr, &index, 1, ""); + ptr = LLVMBuildBitCast(builder, ptr, + LLVMPointerType(setup_bld->vec_type, 0), ""); + dadyaos = LLVMBuildLoad(builder, ptr, ""); + + attrib_name(dadxaos, attrib, 0, ".dadxaos"); + attrib_name(dadyaos, attrib, 0, ".dadyaos"); + /* fall-through */ + + case LP_INTERP_CONSTANT: + case LP_INTERP_FACING: + ptr = LLVMBuildGEP(builder, a0_ptr, &index, 1, ""); + ptr = LLVMBuildBitCast(builder, ptr, + LLVMPointerType(setup_bld->vec_type, 0), ""); + a0aos = LLVMBuildLoad(builder, ptr, ""); + attrib_name(a0aos, attrib, 0, ".a0aos"); + break; - switch( mode ) { - case TGSI_INTERPOLATE_PERSPECTIVE: - /* fall-through */ + case LP_INTERP_POSITION: + /* Nothing to do as the position coeffs are already setup in slot 0 */ + continue; + + default: + assert(0); + break; + } + bld->a0aos[attrib] = a0aos; + bld->dadxaos[attrib] = dadxaos; + bld->dadyaos[attrib] = dadyaos; + } +} - case TGSI_INTERPOLATE_LINEAR: - dadx = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""), ""); - dady = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dady_ptr, &index, 1, ""), ""); - dadx = lp_build_broadcast_scalar(&bld->base, dadx); - dady = lp_build_broadcast_scalar(&bld->base, dady); - attrib_name(dadx, attrib, chan, ".dadx"); - attrib_name(dady, attrib, chan, ".dady"); +/** + * Interpolate the shader input attribute values. + * This is called for each (group of) quad(s). + */ +static void +attribs_update_simple(struct lp_build_interp_soa_context *bld, + struct gallivm_state *gallivm, + LLVMValueRef loop_iter, + int start, + int end) +{ + LLVMBuilderRef builder = gallivm->builder; + struct lp_build_context *coeff_bld = &bld->coeff_bld; + struct lp_build_context *setup_bld = &bld->setup_bld; + LLVMValueRef oow = NULL; + unsigned attrib; + LLVMValueRef pixoffx; + LLVMValueRef pixoffy; + LLVMValueRef ptr; + + /* could do this with code-generated passed in pixel offsets too */ + + assert(loop_iter); + ptr = LLVMBuildGEP(builder, bld->xoffset_store, &loop_iter, 1, ""); + pixoffx = LLVMBuildLoad(builder, ptr, ""); + ptr = LLVMBuildGEP(builder, bld->yoffset_store, &loop_iter, 1, ""); + pixoffy = LLVMBuildLoad(builder, ptr, ""); + + pixoffx = LLVMBuildFAdd(builder, pixoffx, + lp_build_broadcast_scalar(coeff_bld, bld->x), ""); + pixoffy = LLVMBuildFAdd(builder, pixoffy, + lp_build_broadcast_scalar(coeff_bld, bld->y), ""); + + for (attrib = start; attrib < end; attrib++) { + const unsigned mask = bld->mask[attrib]; + const unsigned interp = bld->interp[attrib]; + unsigned chan; + + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (mask & (1 << chan)) { + LLVMValueRef index; + LLVMValueRef dadx = coeff_bld->zero; + LLVMValueRef dady = coeff_bld->zero; + LLVMValueRef a = coeff_bld->zero; + + index = lp_build_const_int32(gallivm, chan); + switch (interp) { + case LP_INTERP_PERSPECTIVE: /* fall-through */ - case TGSI_INTERPOLATE_CONSTANT: - a0 = LLVMBuildLoad(builder, LLVMBuildGEP(builder, a0_ptr, &index, 1, ""), ""); - a0 = lp_build_broadcast_scalar(&bld->base, a0); - attrib_name(a0, attrib, chan, ".a0"); + case LP_INTERP_LINEAR: + if (attrib == 0 && chan == 0) { + dadx = coeff_bld->one; + if (bld->pos_offset) { + a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset); + } + } + else if (attrib == 0 && chan == 1) { + dady = coeff_bld->one; + if (bld->pos_offset) { + a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset); + } + } + else { + dadx = lp_build_extract_broadcast(gallivm, setup_bld->type, + coeff_bld->type, bld->dadxaos[attrib], + index); + dady = lp_build_extract_broadcast(gallivm, setup_bld->type, + coeff_bld->type, bld->dadyaos[attrib], + index); + a = lp_build_extract_broadcast(gallivm, setup_bld->type, + coeff_bld->type, bld->a0aos[attrib], + index); + } + /* + * a = a0 + (x * dadx + y * dady) + */ + a = lp_build_fmuladd(builder, dadx, pixoffx, a); + a = lp_build_fmuladd(builder, dady, pixoffy, a); + + if (interp == LP_INTERP_PERSPECTIVE) { + if (oow == NULL) { + LLVMValueRef w = bld->attribs[0][3]; + assert(attrib != 0); + assert(bld->mask[0] & TGSI_WRITEMASK_W); + oow = lp_build_rcp(coeff_bld, w); + } + a = lp_build_mul(coeff_bld, a, oow); + } + break; + + case LP_INTERP_CONSTANT: + case LP_INTERP_FACING: + a = lp_build_extract_broadcast(gallivm, setup_bld->type, + coeff_bld->type, bld->a0aos[attrib], + index); + break; + + case LP_INTERP_POSITION: + assert(attrib > 0); + a = bld->attribs[0][chan]; break; default: @@ -133,64 +338,210 @@ coeffs_init(struct lp_build_interp_soa_context *bld, break; } - bld->a0 [attrib][chan] = a0; - bld->dadx[attrib][chan] = dadx; - bld->dady[attrib][chan] = dady; + if ((attrib == 0) && (chan == 2) && !bld->depth_clamp){ + /* FIXME: Depth values can exceed 1.0, due to the fact that + * setup interpolation coefficients refer to (0,0) which causes + * precision loss. So we must clamp to 1.0 here to avoid artifacts. + * Note though values outside [0,1] are perfectly valid with + * depth clip disabled. + * XXX: If depth clip is disabled but we force depth clamp + * we may get values larger than 1.0 in the fs (but not in + * depth test). Not sure if that's an issue... + * Also, on a similar note, it is not obvious if the depth values + * appearing in fs (with depth clip disabled) should be clamped + * to [0,1], clamped to near/far or not be clamped at all... + */ + a = lp_build_min(coeff_bld, a, coeff_bld->one); + } + bld->attribs[attrib][chan] = a; } } } } - /** - * Emit LLVM code to compute the fragment shader input attribute values. - * For example, for a color input, we'll compute red, green, blue and alpha - * values for the four pixels in a quad. - * Recall that we're operating on 4-element vectors so each arithmetic - * operation is operating on the four pixels in a quad. + * Initialize the bld->a, dadq fields. This involves fetching + * those values from the arrays which are passed into the JIT function. */ static void -attribs_init(struct lp_build_interp_soa_context *bld) +coeffs_init(struct lp_build_interp_soa_context *bld, + LLVMValueRef a0_ptr, + LLVMValueRef dadx_ptr, + LLVMValueRef dady_ptr) { - LLVMValueRef x = bld->pos[0]; - LLVMValueRef y = bld->pos[1]; - LLVMValueRef oow = NULL; + struct lp_build_context *coeff_bld = &bld->coeff_bld; + struct lp_build_context *setup_bld = &bld->setup_bld; + struct gallivm_state *gallivm = coeff_bld->gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef pixoffx, pixoffy; unsigned attrib; unsigned chan; + unsigned i; + + pixoffx = coeff_bld->undef; + pixoffy = coeff_bld->undef; + for (i = 0; i < coeff_bld->type.length; i++) { + LLVMValueRef nr = lp_build_const_int32(gallivm, i); + LLVMValueRef pixxf = lp_build_const_float(gallivm, quad_offset_x[i]); + LLVMValueRef pixyf = lp_build_const_float(gallivm, quad_offset_y[i]); + pixoffx = LLVMBuildInsertElement(builder, pixoffx, pixxf, nr, ""); + pixoffy = LLVMBuildInsertElement(builder, pixoffy, pixyf, nr, ""); + } - for(attrib = 0; attrib < bld->num_attribs; ++attrib) { - unsigned mask = bld->mask[attrib]; - unsigned mode = bld->mode[attrib]; - for(chan = 0; chan < NUM_CHANNELS; ++chan) { - if(mask & (1 << chan)) { - LLVMValueRef a0 = bld->a0 [attrib][chan]; - LLVMValueRef dadx = bld->dadx[attrib][chan]; - LLVMValueRef dady = bld->dady[attrib][chan]; - LLVMValueRef res; - - res = a0; - - if (mode != TGSI_INTERPOLATE_CONSTANT) { - /* res = res + x * dadx */ - res = lp_build_add(&bld->base, res, lp_build_mul(&bld->base, x, dadx)); - /* res = res + y * dady */ - res = lp_build_add(&bld->base, res, lp_build_mul(&bld->base, y, dady)); - } - /* Keep the value of the attribue before perspective divide for faster updates */ - bld->attribs_pre[attrib][chan] = res; + for (attrib = 0; attrib < bld->num_attribs; ++attrib) { + const unsigned mask = bld->mask[attrib]; + const unsigned interp = bld->interp[attrib]; + LLVMValueRef index = lp_build_const_int32(gallivm, + attrib * TGSI_NUM_CHANNELS); + LLVMValueRef ptr; + LLVMValueRef dadxaos = setup_bld->zero; + LLVMValueRef dadyaos = setup_bld->zero; + LLVMValueRef a0aos = setup_bld->zero; + + /* always fetch all 4 values for performance/simplicity */ + switch (interp) { + case LP_INTERP_PERSPECTIVE: + /* fall-through */ + + case LP_INTERP_LINEAR: + ptr = LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""); + ptr = LLVMBuildBitCast(builder, ptr, + LLVMPointerType(setup_bld->vec_type, 0), ""); + dadxaos = LLVMBuildLoad(builder, ptr, ""); + + ptr = LLVMBuildGEP(builder, dady_ptr, &index, 1, ""); + ptr = LLVMBuildBitCast(builder, ptr, + LLVMPointerType(setup_bld->vec_type, 0), ""); + dadyaos = LLVMBuildLoad(builder, ptr, ""); + + attrib_name(dadxaos, attrib, 0, ".dadxaos"); + attrib_name(dadyaos, attrib, 0, ".dadyaos"); + /* fall-through */ + + case LP_INTERP_CONSTANT: + case LP_INTERP_FACING: + ptr = LLVMBuildGEP(builder, a0_ptr, &index, 1, ""); + ptr = LLVMBuildBitCast(builder, ptr, + LLVMPointerType(setup_bld->vec_type, 0), ""); + a0aos = LLVMBuildLoad(builder, ptr, ""); + attrib_name(a0aos, attrib, 0, ".a0aos"); + break; + + case LP_INTERP_POSITION: + /* Nothing to do as the position coeffs are already setup in slot 0 */ + continue; + + default: + assert(0); + break; + } + + /* + * a = a0 + (x * dadx + y * dady) + * a0aos is the attrib value at top left corner of stamp + */ + if (interp != LP_INTERP_CONSTANT && + interp != LP_INTERP_FACING) { + LLVMValueRef x = lp_build_broadcast_scalar(setup_bld, bld->x); + LLVMValueRef y = lp_build_broadcast_scalar(setup_bld, bld->y); + a0aos = lp_build_fmuladd(builder, x, dadxaos, a0aos); + a0aos = lp_build_fmuladd(builder, y, dadyaos, a0aos); + } + + /* + * dadq = {0, dadx, dady, dadx + dady} + * for two quads (side by side) this is: + * {0, dadx, dady, dadx+dady, 2*dadx, 2*dadx+dady, 3*dadx+dady} + */ + for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { + /* this generates a CRAPLOAD of shuffles... */ + if (mask & (1 << chan)) { + LLVMValueRef dadx, dady; + LLVMValueRef dadq, dadq2; + LLVMValueRef a; + LLVMValueRef chan_index = lp_build_const_int32(gallivm, chan); + + if (attrib == 0 && chan == 0) { + a = bld->x; + if (bld->pos_offset) { + a = LLVMBuildFAdd(builder, a, lp_build_const_float(gallivm, bld->pos_offset), ""); + } + a = lp_build_broadcast_scalar(coeff_bld, a); + dadx = coeff_bld->one; + dady = coeff_bld->zero; + } + else if (attrib == 0 && chan == 1) { + a = bld->y; + if (bld->pos_offset) { + a = LLVMBuildFAdd(builder, a, lp_build_const_float(gallivm, bld->pos_offset), ""); + } + a = lp_build_broadcast_scalar(coeff_bld, a); + dady = coeff_bld->one; + dadx = coeff_bld->zero; + } + else { + dadx = lp_build_extract_broadcast(gallivm, setup_bld->type, + coeff_bld->type, dadxaos, chan_index); + dady = lp_build_extract_broadcast(gallivm, setup_bld->type, + coeff_bld->type, dadyaos, chan_index); + + /* + * a = {a, a, a, a} + */ + a = lp_build_extract_broadcast(gallivm, setup_bld->type, + coeff_bld->type, a0aos, chan_index); + } - if (mode == TGSI_INTERPOLATE_PERSPECTIVE) { - LLVMValueRef w = bld->pos[3]; + dadx = LLVMBuildFMul(builder, dadx, pixoffx, ""); + dady = LLVMBuildFMul(builder, dady, pixoffy, ""); + dadq = LLVMBuildFAdd(builder, dadx, dady, ""); + + /* + * Compute the attrib values on the upper-left corner of each + * group of quads. + * Note that if we process 2 quads at once this doesn't + * really exactly to what we want. + * We need to access elem 0 and 2 respectively later if we process + * 2 quads at once. + */ + + if (interp != LP_INTERP_CONSTANT && + interp != LP_INTERP_FACING) { + dadq2 = LLVMBuildFAdd(builder, dadq, dadq, ""); + a = LLVMBuildFAdd(builder, a, dadq2, ""); + } + +#if PERSPECTIVE_DIVIDE_PER_QUAD + /* + * a *= 1 / w + */ + + /* + * XXX since we're only going to access elements 0,2 out of 8 + * if we have 8-wide vectors we should do the division only 4-wide. + * a is really a 2-elements in a 4-wide vector disguised as 8-wide + * in this case. + */ + if (interp == LP_INTERP_PERSPECTIVE) { + LLVMValueRef w = bld->a[0][3]; assert(attrib != 0); - if(!oow) - oow = lp_build_rcp(&bld->base, w); - res = lp_build_mul(&bld->base, res, oow); + assert(bld->mask[0] & TGSI_WRITEMASK_W); + if (!bld->oow) { + bld->oow = lp_build_rcp(coeff_bld, w); + lp_build_name(bld->oow, "oow"); + } + a = lp_build_mul(coeff_bld, a, bld->oow); } +#endif - attrib_name(res, attrib, chan, ""); + attrib_name(a, attrib, chan, ".a"); + attrib_name(dadq, attrib, chan, ".dadq"); - bld->attribs[attrib][chan] = res; + bld->a[attrib][chan] = lp_build_alloca(gallivm, + LLVMTypeOf(a), ""); + LLVMBuildStore(builder, a, bld->a[attrib][chan]); + bld->dadq[attrib][chan] = dadq; } } } @@ -202,55 +553,113 @@ attribs_init(struct lp_build_interp_soa_context *bld) * This is called when we move from one quad to the next. */ static void -attribs_update(struct lp_build_interp_soa_context *bld, int quad_index) +attribs_update(struct lp_build_interp_soa_context *bld, + struct gallivm_state *gallivm, + LLVMValueRef loop_iter, + int start, + int end) { + LLVMBuilderRef builder = gallivm->builder; + struct lp_build_context *coeff_bld = &bld->coeff_bld; LLVMValueRef oow = NULL; unsigned attrib; unsigned chan; - assert(quad_index < 4); - - for(attrib = 0; attrib < bld->num_attribs; ++attrib) { - unsigned mask = bld->mask[attrib]; - unsigned mode = bld->mode[attrib]; + for(attrib = start; attrib < end; ++attrib) { + const unsigned mask = bld->mask[attrib]; + const unsigned interp = bld->interp[attrib]; + for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { + if(mask & (1 << chan)) { + LLVMValueRef a; + if (interp == LP_INTERP_CONSTANT || + interp == LP_INTERP_FACING) { + a = LLVMBuildLoad(builder, bld->a[attrib][chan], ""); + } + else if (interp == LP_INTERP_POSITION) { + assert(attrib > 0); + a = bld->attribs[0][chan]; + } + else { + LLVMValueRef dadq; - if (mode != TGSI_INTERPOLATE_CONSTANT) { - for(chan = 0; chan < NUM_CHANNELS; ++chan) { - if(mask & (1 << chan)) { - LLVMValueRef dadx = bld->dadx[attrib][chan]; - LLVMValueRef dady = bld->dady[attrib][chan]; - LLVMValueRef res; + a = bld->a[attrib][chan]; - res = bld->attribs_pre[attrib][chan]; + /* + * Broadcast the attribute value for this quad into all elements + */ - if (quad_index == 1 || quad_index == 3) { - /* top-right or bottom-right quad */ - /* build res = res + dadx + dadx */ - res = lp_build_add(&bld->base, res, dadx); - res = lp_build_add(&bld->base, res, dadx); + { + /* stored as vector load as float */ + LLVMTypeRef ptr_type = LLVMPointerType(LLVMFloatTypeInContext( + gallivm->context), 0); + LLVMValueRef ptr; + a = LLVMBuildBitCast(builder, a, ptr_type, ""); + ptr = LLVMBuildGEP(builder, a, &loop_iter, 1, ""); + a = LLVMBuildLoad(builder, ptr, ""); + a = lp_build_broadcast_scalar(&bld->coeff_bld, a); } - if (quad_index == 2 || quad_index == 3) { - /* bottom-left or bottom-right quad */ - /* build res = res + dady + dady */ - res = lp_build_add(&bld->base, res, dady); - res = lp_build_add(&bld->base, res, dady); - } + /* + * Get the derivatives. + */ - //XXX bld->attribs_pre[attrib][chan] = res; + dadq = bld->dadq[attrib][chan]; - if (mode == TGSI_INTERPOLATE_PERSPECTIVE) { - LLVMValueRef w = bld->pos[3]; - assert(attrib != 0); - if(!oow) - oow = lp_build_rcp(&bld->base, w); - res = lp_build_mul(&bld->base, res, oow); +#if PERSPECTIVE_DIVIDE_PER_QUAD + if (interp == LP_INTERP_PERSPECTIVE) { + LLVMValueRef dwdq = bld->dadq[0][3]; + + if (oow == NULL) { + assert(bld->oow); + oow = LLVMBuildShuffleVector(coeff_bld->builder, + bld->oow, coeff_bld->undef, + shuffle, ""); + } + + dadq = lp_build_sub(coeff_bld, + dadq, + lp_build_mul(coeff_bld, a, dwdq)); + dadq = lp_build_mul(coeff_bld, dadq, oow); } +#endif - attrib_name(res, attrib, chan, ""); + /* + * Add the derivatives + */ - bld->attribs[attrib][chan] = res; + a = lp_build_add(coeff_bld, a, dadq); + +#if !PERSPECTIVE_DIVIDE_PER_QUAD + if (interp == LP_INTERP_PERSPECTIVE) { + if (oow == NULL) { + LLVMValueRef w = bld->attribs[0][3]; + assert(attrib != 0); + assert(bld->mask[0] & TGSI_WRITEMASK_W); + oow = lp_build_rcp(coeff_bld, w); + } + a = lp_build_mul(coeff_bld, a, oow); + } +#endif + + if (attrib == 0 && chan == 2 && !bld->depth_clamp) { + /* FIXME: Depth values can exceed 1.0, due to the fact that + * setup interpolation coefficients refer to (0,0) which causes + * precision loss. So we must clamp to 1.0 here to avoid artifacts. + * Note though values outside [0,1] are perfectly valid with + * depth clip disabled.. + * XXX: If depth clip is disabled but we force depth clamp + * we may get values larger than 1.0 in the fs (but not in + * depth test). Not sure if that's an issue... + * Also, on a similar note, it is not obvious if the depth values + * appearing in fs (with depth clip disabled) should be clamped + * to [0,1], clamped to near/far or not be clamped at all... + */ + a = lp_build_min(coeff_bld, a, coeff_bld->one); + } + + attrib_name(a, attrib, chan, ""); } + bld->attribs[attrib][chan] = a; } } } @@ -260,53 +669,18 @@ attribs_update(struct lp_build_interp_soa_context *bld, int quad_index) /** * Generate the position vectors. * - * Parameter x0, y0 are the integer values with the quad upper left coordinates. + * Parameter x0, y0 are the integer values with upper left coordinates. */ static void pos_init(struct lp_build_interp_soa_context *bld, LLVMValueRef x0, LLVMValueRef y0) { - lp_build_name(x0, "pos.x"); - lp_build_name(y0, "pos.y"); - - bld->attribs[0][0] = x0; - bld->attribs[0][1] = y0; -} - - -/** - * Update quad position values when moving to the next quad. - */ -static void -pos_update(struct lp_build_interp_soa_context *bld, int quad_index) -{ - LLVMValueRef x = bld->attribs[0][0]; - LLVMValueRef y = bld->attribs[0][1]; - const int xstep = 2, ystep = 2; - - if (quad_index == 1 || quad_index == 3) { - /* top-right or bottom-right quad in block */ - /* build x += xstep */ - x = lp_build_add(&bld->base, x, - lp_build_const_scalar(bld->base.type, xstep)); - } - - if (quad_index == 2) { - /* bottom-left quad in block */ - /* build y += ystep */ - y = lp_build_add(&bld->base, y, - lp_build_const_scalar(bld->base.type, ystep)); - /* build x -= xstep */ - x = lp_build_sub(&bld->base, x, - lp_build_const_scalar(bld->base.type, xstep)); - } + LLVMBuilderRef builder = bld->coeff_bld.gallivm->builder; + struct lp_build_context *coeff_bld = &bld->coeff_bld; - lp_build_name(x, "pos.x"); - lp_build_name(y, "pos.y"); - - bld->attribs[0][0] = x; - bld->attribs[0][1] = y; + bld->x = LLVMBuildSIToFP(builder, x0, coeff_bld->elem_type, ""); + bld->y = LLVMBuildSIToFP(builder, y0, coeff_bld->elem_type, ""); } @@ -315,8 +689,11 @@ pos_update(struct lp_build_interp_soa_context *bld, int quad_index) */ void lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, - const struct tgsi_token *tokens, - boolean flatshade, + struct gallivm_state *gallivm, + unsigned num_inputs, + const struct lp_shader_input *inputs, + boolean pixel_center_integer, + boolean depth_clamp, LLVMBuilderRef builder, struct lp_type type, LLVMValueRef a0_ptr, @@ -325,83 +702,131 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, LLVMValueRef x0, LLVMValueRef y0) { - struct tgsi_parse_context parse; - struct tgsi_full_declaration *decl; + struct lp_type coeff_type; + struct lp_type setup_type; + unsigned attrib; + unsigned chan; memset(bld, 0, sizeof *bld); - lp_build_context_init(&bld->base, builder, type); + memset(&coeff_type, 0, sizeof coeff_type); + coeff_type.floating = TRUE; + coeff_type.sign = TRUE; + coeff_type.width = 32; + coeff_type.length = type.length; + + memset(&setup_type, 0, sizeof setup_type); + setup_type.floating = TRUE; + setup_type.sign = TRUE; + setup_type.width = 32; + setup_type.length = TGSI_NUM_CHANNELS; + + + /* XXX: we don't support interpolating into any other types */ + assert(memcmp(&coeff_type, &type, sizeof coeff_type) == 0); + + lp_build_context_init(&bld->coeff_bld, gallivm, coeff_type); + lp_build_context_init(&bld->setup_bld, gallivm, setup_type); /* For convenience */ bld->pos = bld->attribs[0]; - bld->inputs = (const LLVMValueRef (*)[NUM_CHANNELS]) bld->attribs[1]; + bld->inputs = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) bld->attribs[1]; /* Position */ - bld->num_attribs = 1; - bld->mask[0] = TGSI_WRITEMASK_ZW; - bld->mode[0] = TGSI_INTERPOLATE_LINEAR; + bld->mask[0] = TGSI_WRITEMASK_XYZW; + bld->interp[0] = LP_INTERP_LINEAR; /* Inputs */ - tgsi_parse_init( &parse, tokens ); - while( !tgsi_parse_end_of_tokens( &parse ) ) { - tgsi_parse_token( &parse ); - - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_DECLARATION: - decl = &parse.FullToken.FullDeclaration; - if( decl->Declaration.File == TGSI_FILE_INPUT ) { - unsigned first, last, mask; - unsigned attrib; - - first = decl->Range.First; - last = decl->Range.Last; - mask = decl->Declaration.UsageMask; - - for( attrib = first; attrib <= last; ++attrib ) { - bld->mask[1 + attrib] = mask; - - /* XXX: have mesa set INTERP_CONSTANT in the fragment - * shader. - */ - if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR && - flatshade) - bld->mode[1 + attrib] = TGSI_INTERPOLATE_CONSTANT; - else - bld->mode[1 + attrib] = decl->Declaration.Interpolate; - } - - bld->num_attribs = MAX2(bld->num_attribs, 1 + last + 1); - } - break; - - case TGSI_TOKEN_TYPE_INSTRUCTION: - case TGSI_TOKEN_TYPE_IMMEDIATE: - break; + for (attrib = 0; attrib < num_inputs; ++attrib) { + bld->mask[1 + attrib] = inputs[attrib].usage_mask; + bld->interp[1 + attrib] = inputs[attrib].interp; + } + bld->num_attribs = 1 + num_inputs; - default: - assert( 0 ); + /* Ensure all masked out input channels have a valid value */ + for (attrib = 0; attrib < bld->num_attribs; ++attrib) { + for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { + bld->attribs[attrib][chan] = bld->coeff_bld.undef; } } - tgsi_parse_free( &parse ); - coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr); + if (pixel_center_integer) { + bld->pos_offset = 0.0; + } else { + bld->pos_offset = 0.5; + } + bld->depth_clamp = depth_clamp; pos_init(bld, x0, y0); - attribs_init(bld); + /* + * Simple method (single step interpolation) may be slower if vector length + * is just 4, but the results are different (generally less accurate) with + * the other method, so always use more accurate version. + */ + if (1) { + bld->simple_interp = TRUE; + { + /* XXX this should use a global static table */ + unsigned i; + unsigned num_loops = 16 / type.length; + LLVMValueRef pixoffx, pixoffy, index; + LLVMValueRef ptr; + + bld->xoffset_store = lp_build_array_alloca(gallivm, + lp_build_vec_type(gallivm, type), + lp_build_const_int32(gallivm, num_loops), + ""); + bld->yoffset_store = lp_build_array_alloca(gallivm, + lp_build_vec_type(gallivm, type), + lp_build_const_int32(gallivm, num_loops), + ""); + for (i = 0; i < num_loops; i++) { + index = lp_build_const_int32(gallivm, i); + calc_offsets(&bld->coeff_bld, i*type.length/4, &pixoffx, &pixoffy); + ptr = LLVMBuildGEP(builder, bld->xoffset_store, &index, 1, ""); + LLVMBuildStore(builder, pixoffx, ptr); + ptr = LLVMBuildGEP(builder, bld->yoffset_store, &index, 1, ""); + LLVMBuildStore(builder, pixoffy, ptr); + } + } + coeffs_init_simple(bld, a0_ptr, dadx_ptr, dady_ptr); + } + else { + bld->simple_interp = FALSE; + coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr); + } + } -/** +/* * Advance the position and inputs to the given quad within the block. */ + void -lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld, - int quad_index) +lp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context *bld, + struct gallivm_state *gallivm, + LLVMValueRef quad_start_index) { - assert(quad_index < 4); - - pos_update(bld, quad_index); + if (bld->simple_interp) { + attribs_update_simple(bld, gallivm, quad_start_index, 1, bld->num_attribs); + } + else { + attribs_update(bld, gallivm, quad_start_index, 1, bld->num_attribs); + } +} - attribs_update(bld, quad_index); +void +lp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context *bld, + struct gallivm_state *gallivm, + LLVMValueRef quad_start_index) +{ + if (bld->simple_interp) { + attribs_update_simple(bld, gallivm, quad_start_index, 0, 1); + } + else { + attribs_update(bld, gallivm, quad_start_index, 0, 1); + } } +