X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fllvmpipe%2Flp_bld_depth.c;h=1bf741194c5ec6d4e4737f380d52874f3085ec13;hb=48e191f90cbb7735cadf30c444e1fb599311c55a;hp=98ec1cb1b9dca428118830ae8e8f70173a4d9648;hpb=650e02003fbb5511ec758d993b7ec0a302ee2235;p=mesa.git diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 98ec1cb1b9d..1bf741194c5 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2009 VMware, Inc. + * Copyright 2009-2010 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -52,22 +52,235 @@ * Z31 Z32 Z41 Z42 Z33 Z34 Z43 Z44 ... * ... ... ... ... ... ... ... ... ... * - * FIXME: Code generate stencil test * * @author Jose Fonseca + * @author Brian Paul */ #include "pipe/p_state.h" #include "util/u_format.h" -#include "lp_bld_type.h" -#include "lp_bld_const.h" -#include "lp_bld_logic.h" -#include "lp_bld_flow.h" -#include "lp_bld_debug.h" +#include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_arit.h" +#include "gallivm/lp_bld_bitarit.h" +#include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_conv.h" +#include "gallivm/lp_bld_logic.h" +#include "gallivm/lp_bld_flow.h" +#include "gallivm/lp_bld_intr.h" +#include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_swizzle.h" + #include "lp_bld_depth.h" +/** Used to select fields from pipe_stencil_state */ +enum stencil_op { + S_FAIL_OP, + Z_FAIL_OP, + Z_PASS_OP +}; + + + +/** + * Do the stencil test comparison (compare FB stencil values against ref value). + * This will be used twice when generating two-sided stencil code. + * \param stencil the front/back stencil state + * \param stencilRef the stencil reference value, replicated as a vector + * \param stencilVals vector of stencil values from framebuffer + * \return vector mask of pass/fail values (~0 or 0) + */ +static LLVMValueRef +lp_build_stencil_test_single(struct lp_build_context *bld, + const struct pipe_stencil_state *stencil, + LLVMValueRef stencilRef, + LLVMValueRef stencilVals) +{ + LLVMBuilderRef builder = bld->gallivm->builder; + const unsigned stencilMax = 255; /* XXX fix */ + struct lp_type type = bld->type; + LLVMValueRef res; + + assert(type.sign); + + assert(stencil->enabled); + + if (stencil->valuemask != stencilMax) { + /* compute stencilRef = stencilRef & valuemask */ + LLVMValueRef valuemask = lp_build_const_int_vec(bld->gallivm, type, stencil->valuemask); + stencilRef = LLVMBuildAnd(builder, stencilRef, valuemask, ""); + /* compute stencilVals = stencilVals & valuemask */ + stencilVals = LLVMBuildAnd(builder, stencilVals, valuemask, ""); + } + + res = lp_build_cmp(bld, stencil->func, stencilRef, stencilVals); + + return res; +} + + +/** + * Do the one or two-sided stencil test comparison. + * \sa lp_build_stencil_test_single + * \param front_facing an integer vector mask, indicating front (~0) or back + * (0) facing polygon. If NULL, assume front-facing. + */ +static LLVMValueRef +lp_build_stencil_test(struct lp_build_context *bld, + const struct pipe_stencil_state stencil[2], + LLVMValueRef stencilRefs[2], + LLVMValueRef stencilVals, + LLVMValueRef front_facing) +{ + LLVMValueRef res; + + assert(stencil[0].enabled); + + /* do front face test */ + res = lp_build_stencil_test_single(bld, &stencil[0], + stencilRefs[0], stencilVals); + + if (stencil[1].enabled && front_facing) { + /* do back face test */ + LLVMValueRef back_res; + + back_res = lp_build_stencil_test_single(bld, &stencil[1], + stencilRefs[1], stencilVals); + + res = lp_build_select(bld, front_facing, res, back_res); + } + + return res; +} + + +/** + * Apply the stencil operator (add/sub/keep/etc) to the given vector + * of stencil values. + * \return new stencil values vector + */ +static LLVMValueRef +lp_build_stencil_op_single(struct lp_build_context *bld, + const struct pipe_stencil_state *stencil, + enum stencil_op op, + LLVMValueRef stencilRef, + LLVMValueRef stencilVals) + +{ + LLVMBuilderRef builder = bld->gallivm->builder; + struct lp_type type = bld->type; + LLVMValueRef res; + LLVMValueRef max = lp_build_const_int_vec(bld->gallivm, type, 0xff); + unsigned stencil_op; + + assert(type.sign); + + switch (op) { + case S_FAIL_OP: + stencil_op = stencil->fail_op; + break; + case Z_FAIL_OP: + stencil_op = stencil->zfail_op; + break; + case Z_PASS_OP: + stencil_op = stencil->zpass_op; + break; + default: + assert(0 && "Invalid stencil_op mode"); + stencil_op = PIPE_STENCIL_OP_KEEP; + } + + switch (stencil_op) { + case PIPE_STENCIL_OP_KEEP: + res = stencilVals; + /* we can return early for this case */ + return res; + case PIPE_STENCIL_OP_ZERO: + res = bld->zero; + break; + case PIPE_STENCIL_OP_REPLACE: + res = stencilRef; + break; + case PIPE_STENCIL_OP_INCR: + res = lp_build_add(bld, stencilVals, bld->one); + res = lp_build_min(bld, res, max); + break; + case PIPE_STENCIL_OP_DECR: + res = lp_build_sub(bld, stencilVals, bld->one); + res = lp_build_max(bld, res, bld->zero); + break; + case PIPE_STENCIL_OP_INCR_WRAP: + res = lp_build_add(bld, stencilVals, bld->one); + res = LLVMBuildAnd(builder, res, max, ""); + break; + case PIPE_STENCIL_OP_DECR_WRAP: + res = lp_build_sub(bld, stencilVals, bld->one); + res = LLVMBuildAnd(builder, res, max, ""); + break; + case PIPE_STENCIL_OP_INVERT: + res = LLVMBuildNot(builder, stencilVals, ""); + res = LLVMBuildAnd(builder, res, max, ""); + break; + default: + assert(0 && "bad stencil op mode"); + res = bld->undef; + } + + return res; +} + + +/** + * Do the one or two-sided stencil test op/update. + */ +static LLVMValueRef +lp_build_stencil_op(struct lp_build_context *bld, + const struct pipe_stencil_state stencil[2], + enum stencil_op op, + LLVMValueRef stencilRefs[2], + LLVMValueRef stencilVals, + LLVMValueRef mask, + LLVMValueRef front_facing) + +{ + LLVMBuilderRef builder = bld->gallivm->builder; + LLVMValueRef res; + + assert(stencil[0].enabled); + + /* do front face op */ + res = lp_build_stencil_op_single(bld, &stencil[0], op, + stencilRefs[0], stencilVals); + + if (stencil[1].enabled && front_facing) { + /* do back face op */ + LLVMValueRef back_res; + + back_res = lp_build_stencil_op_single(bld, &stencil[1], op, + stencilRefs[1], stencilVals); + + res = lp_build_select(bld, front_facing, res, back_res); + } + + if (stencil->writemask != 0xff) { + /* mask &= stencil->writemask */ + LLVMValueRef writemask = lp_build_const_int_vec(bld->gallivm, bld->type, + stencil->writemask); + mask = LLVMBuildAnd(builder, mask, writemask, ""); + /* res = (res & mask) | (stencilVals & ~mask) */ + res = lp_build_select_bitwise(bld, mask, res, stencilVals); + } + else { + /* res = mask ? res : stencilVals */ + res = lp_build_select(bld, mask, res, stencilVals); + } + + return res; +} + + + /** * Return a type appropriate for depth/stencil testing. */ @@ -90,13 +303,18 @@ lp_depth_type(const struct util_format_description *format_desc, if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) { type.floating = TRUE; - assert(swizzle = 0); + assert(swizzle == 0); assert(format_desc->channel[swizzle].size == format_desc->block.bits); } else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) { assert(format_desc->block.bits <= 32); - if(format_desc->channel[swizzle].normalized) - type.norm = TRUE; + assert(format_desc->channel[swizzle].normalized); + if (format_desc->channel[swizzle].size < format_desc->block.bits) { + /* Prefer signed integers when possible, as SSE has less support + * for unsigned comparison; + */ + type.sign = TRUE; + } } else assert(0); @@ -109,105 +327,484 @@ lp_depth_type(const struct util_format_description *format_desc, /** - * Depth test. + * Compute bitmask and bit shift to apply to the incoming fragment Z values + * and the Z buffer values needed before doing the Z comparison. + * + * Note that we leave the Z bits in the position that we find them + * in the Z buffer (typically 0xffffff00 or 0x00ffffff). That lets us + * get by with fewer bit twiddling steps. */ -void -lp_build_depth_test(LLVMBuilderRef builder, - const struct pipe_depth_state *state, - struct lp_type type, - const struct util_format_description *format_desc, - struct lp_build_mask_context *mask, - LLVMValueRef src, - LLVMValueRef dst_ptr) +static boolean +get_z_shift_and_mask(const struct util_format_description *format_desc, + unsigned *shift, unsigned *width, unsigned *mask) { - struct lp_build_context bld; + const unsigned total_bits = format_desc->block.bits; unsigned z_swizzle; - LLVMValueRef dst; - LLVMValueRef z_bitmask = NULL; - LLVMValueRef test; - - if(!state->enabled) - return; - + unsigned chan; + unsigned padding_left, padding_right; + assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); assert(format_desc->block.width == 1); assert(format_desc->block.height == 1); z_swizzle = format_desc->swizzle[0]; - if(z_swizzle == UTIL_FORMAT_SWIZZLE_NONE) - return; - /* Sanity checking */ - assert(z_swizzle < 4); - assert(format_desc->block.bits == type.width); - if(type.floating) { - assert(z_swizzle == 0); - assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT); - assert(format_desc->channel[z_swizzle].size == format_desc->block.bits); + if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE) + return FALSE; + + *width = format_desc->channel[z_swizzle].size; + + padding_right = 0; + for (chan = 0; chan < z_swizzle; ++chan) + padding_right += format_desc->channel[chan].size; + + padding_left = + total_bits - (padding_right + *width); + + if (padding_left || padding_right) { + unsigned long long mask_left = (1ULL << (total_bits - padding_left)) - 1; + unsigned long long mask_right = (1ULL << (padding_right)) - 1; + *mask = mask_left ^ mask_right; } else { - assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED); - assert(format_desc->channel[z_swizzle].normalized); - assert(!type.fixed); - assert(!type.sign); - assert(type.norm); + *mask = 0xffffffff; } - /* Setup build context */ - lp_build_context_init(&bld, builder, type); + *shift = padding_right; + + return TRUE; +} + + +/** + * Compute bitmask and bit shift to apply to the framebuffer pixel values + * to put the stencil bits in the least significant position. + * (i.e. 0x000000ff) + */ +static boolean +get_s_shift_and_mask(const struct util_format_description *format_desc, + unsigned *shift, unsigned *mask) +{ + unsigned s_swizzle; + unsigned chan, sz; + + s_swizzle = format_desc->swizzle[1]; + + if (s_swizzle == UTIL_FORMAT_SWIZZLE_NONE) + return FALSE; + + *shift = 0; + for (chan = 0; chan < s_swizzle; chan++) + *shift += format_desc->channel[chan].size; + + sz = format_desc->channel[s_swizzle].size; + *mask = (1U << sz) - 1U; + + return TRUE; +} + + +/** + * Perform the occlusion test and increase the counter. + * Test the depth mask. Add the number of channel which has none zero mask + * into the occlusion counter. e.g. maskvalue is {-1, -1, -1, -1}. + * The counter will add 4. + * + * \param type holds element type of the mask vector. + * \param maskvalue is the depth test mask. + * \param counter is a pointer of the uint32 counter. + */ +void +lp_build_occlusion_count(struct gallivm_state *gallivm, + struct lp_type type, + LLVMValueRef maskvalue, + LLVMValueRef counter) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMContextRef context = gallivm->context; + LLVMValueRef countmask = lp_build_const_int_vec(gallivm, type, 1); + LLVMValueRef countv = LLVMBuildAnd(builder, maskvalue, countmask, "countv"); + LLVMTypeRef i8v16 = LLVMVectorType(LLVMInt8TypeInContext(context), 16); + LLVMValueRef counti = LLVMBuildBitCast(builder, countv, i8v16, "counti"); + LLVMValueRef maskarray[4] = { + lp_build_const_int32(gallivm, 0), + lp_build_const_int32(gallivm, 4), + lp_build_const_int32(gallivm, 8), + lp_build_const_int32(gallivm, 12) + }; + LLVMValueRef shufflemask = LLVMConstVector(maskarray, 4); + LLVMValueRef shufflev = LLVMBuildShuffleVector(builder, counti, LLVMGetUndef(i8v16), shufflemask, "shufflev"); + LLVMValueRef shuffle = LLVMBuildBitCast(builder, shufflev, LLVMInt32TypeInContext(context), "shuffle"); + LLVMValueRef count = lp_build_intrinsic_unary(builder, "llvm.ctpop.i32", LLVMInt32TypeInContext(context), shuffle); + LLVMValueRef orig = LLVMBuildLoad(builder, counter, "orig"); + LLVMValueRef incr = LLVMBuildAdd(builder, orig, count, "incr"); + LLVMBuildStore(builder, incr, counter); +} + + - dst = LLVMBuildLoad(builder, dst_ptr, ""); +/** + * Generate code for performing depth and/or stencil tests. + * We operate on a vector of values (typically a 2x2 quad). + * + * \param depth the depth test state + * \param stencil the front/back stencil state + * \param type the data type of the fragment depth/stencil values + * \param format_desc description of the depth/stencil surface + * \param mask the alive/dead pixel mask for the quad (vector) + * \param stencil_refs the front/back stencil ref values (scalar) + * \param z_src the incoming depth/stencil values (a 2x2 quad, float32) + * \param zs_dst_ptr pointer to depth/stencil values in framebuffer + * \param facing contains boolean value indicating front/back facing polygon + */ +void +lp_build_depth_stencil_test(struct gallivm_state *gallivm, + const struct pipe_depth_state *depth, + const struct pipe_stencil_state stencil[2], + struct lp_type z_src_type, + const struct util_format_description *format_desc, + struct lp_build_mask_context *mask, + LLVMValueRef stencil_refs[2], + LLVMValueRef z_src, + LLVMValueRef zs_dst_ptr, + LLVMValueRef face, + LLVMValueRef *zs_value, + boolean do_branch) +{ + LLVMBuilderRef builder = gallivm->builder; + struct lp_type z_type; + struct lp_build_context z_bld; + struct lp_build_context s_bld; + struct lp_type s_type; + unsigned z_shift = 0, z_width = 0, z_mask = 0; + LLVMValueRef zs_dst, z_dst = NULL; + LLVMValueRef stencil_vals = NULL; + LLVMValueRef z_bitmask = NULL, stencil_shift = NULL; + LLVMValueRef z_pass = NULL, s_pass_mask = NULL; + LLVMValueRef orig_mask = lp_build_mask_value(mask); + LLVMValueRef front_facing = NULL; - lp_build_name(dst, "zsbuf"); - /* Align the source depth bits with the destination's, and mask out any - * stencil or padding bits from both */ - if(format_desc->channel[z_swizzle].size == format_desc->block.bits) { - assert(z_swizzle == 0); - /* nothing to do */ + /* + * Depths are expected to be between 0 and 1, even if they are stored in + * floats. Setting these bits here will ensure that the lp_build_conv() call + * below won't try to unnecessarily clamp the incoming values. + */ + if(z_src_type.floating) { + z_src_type.sign = FALSE; + z_src_type.norm = TRUE; } else { - unsigned padding_left; - unsigned padding_right; - unsigned chan; - - assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH); - assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED); - assert(format_desc->channel[z_swizzle].size <= format_desc->block.bits); - assert(format_desc->channel[z_swizzle].normalized); - - padding_right = 0; - for(chan = 0; chan < z_swizzle; ++chan) - padding_right += format_desc->channel[chan].size; - padding_left = format_desc->block.bits - - (padding_right + format_desc->channel[z_swizzle].size); - - if(padding_left || padding_right) { - const unsigned long long mask_left = ((unsigned long long)1 << (format_desc->block.bits - padding_left)) - 1; - const unsigned long long mask_right = ((unsigned long long)1 << (padding_right)) - 1; - z_bitmask = lp_build_int_const_scalar(type, mask_left ^ mask_right); + assert(!z_src_type.sign); + assert(z_src_type.norm); + } + + /* Pick the depth type. */ + z_type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length); + + /* FIXME: Cope with a depth test type with a different bit width. */ + assert(z_type.width == z_src_type.width); + assert(z_type.length == z_src_type.length); + + /* Sanity checking */ + { + const unsigned z_swizzle = format_desc->swizzle[0]; + const unsigned s_swizzle = format_desc->swizzle[1]; + + assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE || + s_swizzle != UTIL_FORMAT_SWIZZLE_NONE); + + assert(depth->enabled || stencil[0].enabled); + + assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); + assert(format_desc->block.width == 1); + assert(format_desc->block.height == 1); + + if (stencil[0].enabled) { + assert(format_desc->format == PIPE_FORMAT_Z24_UNORM_S8_USCALED || + format_desc->format == PIPE_FORMAT_S8_USCALED_Z24_UNORM); + } + + assert(z_swizzle < 4); + assert(format_desc->block.bits == z_type.width); + if (z_type.floating) { + assert(z_swizzle == 0); + assert(format_desc->channel[z_swizzle].type == + UTIL_FORMAT_TYPE_FLOAT); + assert(format_desc->channel[z_swizzle].size == + format_desc->block.bits); + } + else { + assert(format_desc->channel[z_swizzle].type == + UTIL_FORMAT_TYPE_UNSIGNED); + assert(format_desc->channel[z_swizzle].normalized); + assert(!z_type.fixed); + } + } + + + /* Setup build context for Z vals */ + lp_build_context_init(&z_bld, gallivm, z_type); + + /* Setup build context for stencil vals */ + s_type = lp_type_int_vec(z_type.width); + lp_build_context_init(&s_bld, gallivm, s_type); + + /* Load current z/stencil value from z/stencil buffer */ + zs_dst_ptr = LLVMBuildBitCast(builder, + zs_dst_ptr, + LLVMPointerType(z_bld.vec_type, 0), ""); + zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, ""); + + lp_build_name(zs_dst, "zs_dst"); + + + /* Compute and apply the Z/stencil bitmasks and shifts. + */ + { + unsigned s_shift, s_mask; + + if (get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask)) { + if (z_mask != 0xffffffff) { + z_bitmask = lp_build_const_int_vec(gallivm, z_type, z_mask); + } + + /* + * Align the framebuffer Z 's LSB to the right. + */ + if (z_shift) { + LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift); + z_dst = LLVMBuildLShr(builder, zs_dst, shift, "z_dst"); + } else if (z_bitmask) { + /* TODO: Instead of loading a mask from memory and ANDing, it's + * probably faster to just shake the bits with two shifts. */ + z_dst = LLVMBuildAnd(builder, zs_dst, z_bitmask, "z_dst"); + } else { + z_dst = zs_dst; + lp_build_name(z_dst, "z_dst"); + } + } + + if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) { + if (s_shift) { + LLVMValueRef shift = lp_build_const_int_vec(gallivm, s_type, s_shift); + stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, ""); + stencil_shift = shift; /* used below */ + } + else { + stencil_vals = zs_dst; + } + + if (s_mask != 0xffffffff) { + LLVMValueRef mask = lp_build_const_int_vec(gallivm, s_type, s_mask); + stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, ""); + } + + lp_build_name(stencil_vals, "s_dst"); + } + } + + if (stencil[0].enabled) { + + if (face) { + LLVMValueRef zero = lp_build_const_int32(gallivm, 0); + + /* front_facing = face != 0 ? ~0 : 0 */ + front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, ""); + front_facing = LLVMBuildSExt(builder, front_facing, + LLVMIntTypeInContext(gallivm->context, + s_bld.type.length*s_bld.type.width), + ""); + front_facing = LLVMBuildBitCast(builder, front_facing, + s_bld.int_vec_type, ""); + } + + /* convert scalar stencil refs into vectors */ + stencil_refs[0] = lp_build_broadcast_scalar(&s_bld, stencil_refs[0]); + stencil_refs[1] = lp_build_broadcast_scalar(&s_bld, stencil_refs[1]); + + s_pass_mask = lp_build_stencil_test(&s_bld, stencil, + stencil_refs, stencil_vals, + front_facing); + + /* apply stencil-fail operator */ + { + LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, orig_mask, s_pass_mask); + stencil_vals = lp_build_stencil_op(&s_bld, stencil, S_FAIL_OP, + stencil_refs, stencil_vals, + s_fail_mask, front_facing); + } + } + + if (depth->enabled) { + /* + * Convert fragment Z to the desired type, aligning the LSB to the right. + */ + + assert(z_type.width == z_src_type.width); + assert(z_type.length == z_src_type.length); + assert(lp_check_value(z_src_type, z_src)); + if (z_src_type.floating) { + /* + * Convert from floating point values + */ + + if (!z_type.floating) { + z_src = lp_build_clamped_float_to_unsigned_norm(gallivm, + z_src_type, + z_width, + z_src); + } + } else { + /* + * Convert from unsigned normalized values. + */ + + assert(!z_src_type.sign); + assert(!z_src_type.fixed); + assert(z_src_type.norm); + assert(!z_type.floating); + if (z_src_type.width > z_width) { + LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_src_type, + z_src_type.width - z_width); + z_src = LLVMBuildLShr(builder, z_src, shift, ""); + } + } + assert(lp_check_value(z_type, z_src)); + + lp_build_name(z_src, "z_src"); + + /* compare src Z to dst Z, returning 'pass' mask */ + z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst); + + if (!stencil[0].enabled) { + /* We can potentially skip all remaining operations here, but only + * if stencil is disabled because we still need to update the stencil + * buffer values. Don't need to update Z buffer values. + */ + lp_build_mask_update(mask, z_pass); + + if (do_branch) { + lp_build_mask_check(mask); + do_branch = FALSE; + } } - if(padding_left) - src = LLVMBuildLShr(builder, src, lp_build_int_const_scalar(type, padding_left), ""); - if(padding_right) - src = LLVMBuildAnd(builder, src, z_bitmask, ""); - if(padding_left || padding_right) - dst = LLVMBuildAnd(builder, dst, z_bitmask, ""); + if (depth->writemask) { + LLVMValueRef zselectmask; + + /* mask off bits that failed Z test */ + zselectmask = LLVMBuildAnd(builder, orig_mask, z_pass, ""); + + /* mask off bits that failed stencil test */ + if (s_pass_mask) { + zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, ""); + } + + /* Mix the old and new Z buffer values. + * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i] + */ + z_dst = lp_build_select(&z_bld, zselectmask, z_src, z_dst); + } + + if (stencil[0].enabled) { + /* update stencil buffer values according to z pass/fail result */ + LLVMValueRef z_fail_mask, z_pass_mask; + + /* apply Z-fail operator */ + z_fail_mask = lp_build_andnot(&z_bld, orig_mask, z_pass); + stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP, + stencil_refs, stencil_vals, + z_fail_mask, front_facing); + + /* apply Z-pass operator */ + z_pass_mask = LLVMBuildAnd(builder, orig_mask, z_pass, ""); + stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP, + stencil_refs, stencil_vals, + z_pass_mask, front_facing); + } + } + else { + /* No depth test: apply Z-pass operator to stencil buffer values which + * passed the stencil test. + */ + s_pass_mask = LLVMBuildAnd(builder, orig_mask, s_pass_mask, ""); + stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP, + stencil_refs, stencil_vals, + s_pass_mask, front_facing); } - lp_build_name(dst, "zsbuf.z"); + /* Put Z and ztencil bits in the right place */ + if (z_dst && z_shift) { + LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift); + z_dst = LLVMBuildShl(builder, z_dst, shift, ""); + } + if (stencil_vals && stencil_shift) + stencil_vals = LLVMBuildShl(builder, stencil_vals, + stencil_shift, ""); - test = lp_build_cmp(&bld, state->func, src, dst); - lp_build_mask_update(mask, test); + /* Finally, merge/store the z/stencil values */ + if ((depth->enabled && depth->writemask) || + (stencil[0].enabled && stencil[0].writemask)) { - if(state->writemask) { - if(z_bitmask) - z_bitmask = LLVMBuildAnd(builder, mask->value, z_bitmask, ""); + if (z_dst && stencil_vals) + zs_dst = LLVMBuildOr(builder, z_dst, stencil_vals, ""); + else if (z_dst) + zs_dst = z_dst; else - z_bitmask = mask->value; + zs_dst = stencil_vals; - dst = lp_build_select(&bld, z_bitmask, src, dst); - LLVMBuildStore(builder, dst, dst_ptr); + *zs_value = zs_dst; } + + if (s_pass_mask) + lp_build_mask_update(mask, s_pass_mask); + + if (depth->enabled && stencil[0].enabled) + lp_build_mask_update(mask, z_pass); + + if (do_branch) + lp_build_mask_check(mask); + +} + + +void +lp_build_depth_write(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + LLVMValueRef zs_dst_ptr, + LLVMValueRef zs_value) +{ + zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, + LLVMPointerType(LLVMTypeOf(zs_value), 0), ""); + + LLVMBuildStore(builder, zs_value, zs_dst_ptr); +} + + +void +lp_build_deferred_depth_write(struct gallivm_state *gallivm, + struct lp_type z_src_type, + const struct util_format_description *format_desc, + struct lp_build_mask_context *mask, + LLVMValueRef zs_dst_ptr, + LLVMValueRef zs_value) +{ + struct lp_type z_type; + struct lp_build_context z_bld; + LLVMValueRef z_dst; + LLVMBuilderRef builder = gallivm->builder; + + /* XXX: pointlessly redo type logic: + */ + z_type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length); + lp_build_context_init(&z_bld, gallivm, z_type); + + zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, + LLVMPointerType(z_bld.vec_type, 0), ""); + + z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval"); + z_dst = lp_build_select(&z_bld, lp_build_mask_value(mask), zs_value, z_dst); + + LLVMBuildStore(builder, z_dst, zs_dst_ptr); }