Merge commit 'origin/7.8'
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_depth.c
index f08f8eb6d8b9c457b3179a74285e5f4f7dfc3bfc..564ea2e3189e68dad9fa2b9e5a1497617fb15b81 100644 (file)
  *  Z31 Z32 Z41 Z42 Z33 Z34 Z43 Z44 ...
  *  ... ... ... ... ... ... ... ... ...
  *
- * FIXME: Code generate stencil test
+ *
+ * Stencil test:
+ * Two-sided stencil test is supported but probably not as efficient as
+ * it could be.  Currently, we use if/then/else constructs to do the
+ * operations for front vs. back-facing polygons.  We could probably do
+ * both the front and back arithmetic then use a Select() instruction to
+ * choose the result depending on polyon orientation.  We'd have to
+ * measure performance both ways and see which is better.
  *
  * @author Jose Fonseca <jfonseca@vmware.com>
  */
 #include "util/u_format.h"
 
 #include "lp_bld_type.h"
+#include "lp_bld_arit.h"
 #include "lp_bld_const.h"
 #include "lp_bld_logic.h"
 #include "lp_bld_flow.h"
 #include "lp_bld_debug.h"
 #include "lp_bld_depth.h"
+#include "lp_bld_swizzle.h"
+
+
+/** Used to select fields from pipe_stencil_state */
+enum stencil_op {
+   S_FAIL_OP,
+   Z_FAIL_OP,
+   Z_PASS_OP
+};
+
+
+
+/**
+ * Do the stencil test comparison (compare FB stencil values against ref value).
+ * This will be used twice when generating two-sided stencil code.
+ * \param stencil  the front/back stencil state
+ * \param stencilRef  the stencil reference value, replicated as a vector
+ * \param stencilVals  vector of stencil values from framebuffer
+ * \return vector mask of pass/fail values (~0 or 0)
+ */
+static LLVMValueRef
+lp_build_stencil_test_single(struct lp_build_context *bld,
+                             const struct pipe_stencil_state *stencil,
+                             LLVMValueRef stencilRef,
+                             LLVMValueRef stencilVals)
+{
+   const unsigned stencilMax = 255; /* XXX fix */
+   struct lp_type type = bld->type;
+   LLVMValueRef res;
+
+   assert(type.sign);
+
+   assert(stencil->enabled);
+
+   if (stencil->valuemask != stencilMax) {
+      /* compute stencilRef = stencilRef & valuemask */
+      LLVMValueRef valuemask = lp_build_const_int_vec(type, stencil->valuemask);
+      stencilRef = LLVMBuildAnd(bld->builder, stencilRef, valuemask, "");
+      /* compute stencilVals = stencilVals & valuemask */
+      stencilVals = LLVMBuildAnd(bld->builder, stencilVals, valuemask, "");
+   }
+
+   res = lp_build_cmp(bld, stencil->func, stencilRef, stencilVals);
+
+   return res;
+}
+
+
+/**
+ * Do the one or two-sided stencil test comparison.
+ * \sa lp_build_stencil_test_single
+ * \param face  an integer indicating front (+) or back (-) facing polygon.
+ *              If NULL, assume front-facing.
+ */
+static LLVMValueRef
+lp_build_stencil_test(struct lp_build_context *bld,
+                      const struct pipe_stencil_state stencil[2],
+                      LLVMValueRef stencilRefs[2],
+                      LLVMValueRef stencilVals,
+                      LLVMValueRef face)
+{
+   LLVMValueRef res;
+
+   assert(stencil[0].enabled);
+
+   if (stencil[1].enabled && face) {
+      /* do two-sided test */
+      struct lp_build_flow_context *flow_ctx;
+      struct lp_build_if_state if_ctx;
+      LLVMValueRef front_facing;
+      LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0);
+      LLVMValueRef result = bld->undef;
+
+      flow_ctx = lp_build_flow_create(bld->builder);
+      lp_build_flow_scope_begin(flow_ctx);
+
+      lp_build_flow_scope_declare(flow_ctx, &result);
+
+      /* front_facing = face > 0.0 */
+      front_facing = LLVMBuildFCmp(bld->builder, LLVMRealUGT, face, zero, "");
+
+      lp_build_if(&if_ctx, flow_ctx, bld->builder, front_facing);
+      {
+         result = lp_build_stencil_test_single(bld, &stencil[0],
+                                               stencilRefs[0], stencilVals);
+      }
+      lp_build_else(&if_ctx);
+      {
+         result = lp_build_stencil_test_single(bld, &stencil[1],
+                                               stencilRefs[1], stencilVals);
+      }
+      lp_build_endif(&if_ctx);
+
+      lp_build_flow_scope_end(flow_ctx);
+      lp_build_flow_destroy(flow_ctx);
+
+      res = result;
+   }
+   else {
+      /* do single-side test */
+      res = lp_build_stencil_test_single(bld, &stencil[0],
+                                         stencilRefs[0], stencilVals);
+   }
+
+   return res;
+}
+
+
+/**
+ * Apply the stencil operator (add/sub/keep/etc) to the given vector
+ * of stencil values.
+ * \return  new stencil values vector
+ */
+static LLVMValueRef
+lp_build_stencil_op_single(struct lp_build_context *bld,
+                           const struct pipe_stencil_state *stencil,
+                           enum stencil_op op,
+                           LLVMValueRef stencilRef,
+                           LLVMValueRef stencilVals,
+                           LLVMValueRef mask)
+
+{
+   const unsigned stencilMax = 255; /* XXX fix */
+   struct lp_type type = bld->type;
+   LLVMValueRef res;
+   LLVMValueRef max = lp_build_const_int_vec(type, stencilMax);
+   unsigned stencil_op;
+
+   assert(type.sign);
+
+   switch (op) {
+   case S_FAIL_OP:
+      stencil_op = stencil->fail_op;
+      break;
+   case Z_FAIL_OP:
+      stencil_op = stencil->zfail_op;
+      break;
+   case Z_PASS_OP:
+      stencil_op = stencil->zpass_op;
+      break;
+   default:
+      assert(0 && "Invalid stencil_op mode");
+      stencil_op = PIPE_STENCIL_OP_KEEP;
+   }
+
+   switch (stencil_op) {
+   case PIPE_STENCIL_OP_KEEP:
+      res = stencilVals;
+      /* we can return early for this case */
+      return res;
+   case PIPE_STENCIL_OP_ZERO:
+      res = bld->zero;
+      break;
+   case PIPE_STENCIL_OP_REPLACE:
+      res = stencilRef;
+      break;
+   case PIPE_STENCIL_OP_INCR:
+      res = lp_build_add(bld, stencilVals, bld->one);
+      res = lp_build_min(bld, res, max);
+      break;
+   case PIPE_STENCIL_OP_DECR:
+      res = lp_build_sub(bld, stencilVals, bld->one);
+      res = lp_build_max(bld, res, bld->zero);
+      break;
+   case PIPE_STENCIL_OP_INCR_WRAP:
+      res = lp_build_add(bld, stencilVals, bld->one);
+      res = LLVMBuildAnd(bld->builder, res, max, "");
+      break;
+   case PIPE_STENCIL_OP_DECR_WRAP:
+      res = lp_build_sub(bld, stencilVals, bld->one);
+      res = LLVMBuildAnd(bld->builder, res, max, "");
+      break;
+   case PIPE_STENCIL_OP_INVERT:
+      res = LLVMBuildNot(bld->builder, stencilVals, "");
+      res = LLVMBuildAnd(bld->builder, res, max, "");
+      break;
+   default:
+      assert(0 && "bad stencil op mode");
+      res = NULL;
+   }
+
+   if (stencil->writemask != stencilMax) {
+      /* compute res = (res & mask) | (stencilVals & ~mask) */
+      LLVMValueRef mask = lp_build_const_int_vec(type, stencil->writemask);
+      LLVMValueRef cmask = LLVMBuildNot(bld->builder, mask, "notWritemask");
+      LLVMValueRef t1 = LLVMBuildAnd(bld->builder, res, mask, "t1");
+      LLVMValueRef t2 = LLVMBuildAnd(bld->builder, stencilVals, cmask, "t2");
+      res = LLVMBuildOr(bld->builder, t1, t2, "t1_or_t2");
+   }
+
+   /* only the update the vector elements enabled by 'mask' */
+   res = lp_build_select(bld, mask, res, stencilVals);
+
+   return res;
+}
+
+
+/**
+ * Do the one or two-sided stencil test op/update.
+ */
+static LLVMValueRef
+lp_build_stencil_op(struct lp_build_context *bld,
+                    const struct pipe_stencil_state stencil[2],
+                    enum stencil_op op,
+                    LLVMValueRef stencilRefs[2],
+                    LLVMValueRef stencilVals,
+                    LLVMValueRef mask,
+                    LLVMValueRef face)
+
+{
+   assert(stencil[0].enabled);
+
+   if (stencil[1].enabled && face) {
+      /* do two-sided op */
+      struct lp_build_flow_context *flow_ctx;
+      struct lp_build_if_state if_ctx;
+      LLVMValueRef front_facing;
+      LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0);
+      LLVMValueRef result = bld->undef;
+
+      flow_ctx = lp_build_flow_create(bld->builder);
+      lp_build_flow_scope_begin(flow_ctx);
+
+      lp_build_flow_scope_declare(flow_ctx, &result);
+
+      /* front_facing = face > 0.0 */
+      front_facing = LLVMBuildFCmp(bld->builder, LLVMRealUGT, face, zero, "");
+
+      lp_build_if(&if_ctx, flow_ctx, bld->builder, front_facing);
+      {
+         result = lp_build_stencil_op_single(bld, &stencil[0], op,
+                                             stencilRefs[0], stencilVals, mask);
+      }
+      lp_build_else(&if_ctx);
+      {
+         result = lp_build_stencil_op_single(bld, &stencil[1], op,
+                                             stencilRefs[1], stencilVals, mask);
+      }
+      lp_build_endif(&if_ctx);
+
+      lp_build_flow_scope_end(flow_ctx);
+      lp_build_flow_destroy(flow_ctx);
+
+      return result;
+   }
+   else {
+      /* do single-sided op */
+      return lp_build_stencil_op_single(bld, &stencil[0], op,
+                                        stencilRefs[0], stencilVals, mask);
+   }
+}
+
 
 
 /**
@@ -109,105 +369,303 @@ lp_depth_type(const struct util_format_description *format_desc,
 
 
 /**
- * Depth test.
+ * Compute bitmask and bit shift to apply to the incoming fragment Z values
+ * and the Z buffer values needed before doing the Z comparison.
+ *
+ * Note that we leave the Z bits in the position that we find them
+ * in the Z buffer (typically 0xffffff00 or 0x00ffffff).  That lets us
+ * get by with fewer bit twiddling steps.
  */
-void
-lp_build_depth_test(LLVMBuilderRef builder,
-                    const struct pipe_depth_state *state,
-                    struct lp_type type,
-                    const struct util_format_description *format_desc,
-                    struct lp_build_mask_context *mask,
-                    LLVMValueRef src,
-                    LLVMValueRef dst_ptr)
+static boolean
+get_z_shift_and_mask(const struct util_format_description *format_desc,
+                     unsigned *shift, unsigned *mask)
 {
-   struct lp_build_context bld;
+   const unsigned total_bits = format_desc->block.bits;
    unsigned z_swizzle;
-   LLVMValueRef dst;
-   LLVMValueRef z_bitmask = NULL;
-   LLVMValueRef test;
-
-   if(!state->enabled)
-      return;
-
+   int chan;
+   unsigned padding_left, padding_right;
+   
    assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
    assert(format_desc->block.width == 1);
    assert(format_desc->block.height == 1);
 
    z_swizzle = format_desc->swizzle[0];
-   if(z_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
-      return;
 
-   /* Sanity checking */
-   assert(z_swizzle < 4);
-   assert(format_desc->block.bits == type.width);
-   if(type.floating) {
-      assert(z_swizzle == 0);
-      assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT);
-      assert(format_desc->channel[z_swizzle].size == format_desc->block.bits);
+   if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
+      return FALSE;
+
+   padding_right = 0;
+   for (chan = 0; chan < z_swizzle; ++chan)
+      padding_right += format_desc->channel[chan].size;
+
+   padding_left =
+      total_bits - (padding_right + format_desc->channel[z_swizzle].size);
+
+   if (padding_left || padding_right) {
+      unsigned long long mask_left = (1ULL << (total_bits - padding_left)) - 1;
+      unsigned long long mask_right = (1ULL << (padding_right)) - 1;
+      *mask = mask_left ^ mask_right;
    }
    else {
-      assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED);
-      assert(format_desc->channel[z_swizzle].normalized);
-      assert(!type.fixed);
-      assert(!type.sign);
-      assert(type.norm);
+      *mask = 0xffffffff;
+   }
+
+   *shift = padding_left;
+
+   return TRUE;
+}
+
+
+/**
+ * Compute bitmask and bit shift to apply to the framebuffer pixel values
+ * to put the stencil bits in the least significant position.
+ * (i.e. 0x000000ff)
+ */
+static boolean
+get_s_shift_and_mask(const struct util_format_description *format_desc,
+                     unsigned *shift, unsigned *mask)
+{
+   unsigned s_swizzle;
+   int chan, sz;
+
+   s_swizzle = format_desc->swizzle[1];
+
+   if (s_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
+      return FALSE;
+
+   *shift = 0;
+   for (chan = 0; chan < s_swizzle; chan++)
+      *shift += format_desc->channel[chan].size;
+
+   sz = format_desc->channel[s_swizzle].size;
+   *mask = (1U << sz) - 1U;
+
+   return TRUE;
+}
+
+
+
+/**
+ * Generate code for performing depth and/or stencil tests.
+ * We operate on a vector of values (typically a 2x2 quad).
+ *
+ * \param depth  the depth test state
+ * \param stencil  the front/back stencil state
+ * \param type  the data type of the fragment depth/stencil values
+ * \param format_desc  description of the depth/stencil surface
+ * \param mask  the alive/dead pixel mask for the quad (vector)
+ * \param stencil_refs  the front/back stencil ref values (scalar)
+ * \param z_src  the incoming depth/stencil values (a 2x2 quad)
+ * \param zs_dst_ptr  pointer to depth/stencil values in framebuffer
+ * \param facing  contains float value indicating front/back facing polygon
+ */
+void
+lp_build_depth_stencil_test(LLVMBuilderRef builder,
+                            const struct pipe_depth_state *depth,
+                            const struct pipe_stencil_state stencil[2],
+                            struct lp_type type,
+                            const struct util_format_description *format_desc,
+                            struct lp_build_mask_context *mask,
+                            LLVMValueRef stencil_refs[2],
+                            LLVMValueRef z_src,
+                            LLVMValueRef zs_dst_ptr,
+                            LLVMValueRef face)
+{
+   struct lp_build_context bld;
+   struct lp_build_context sbld;
+   struct lp_type s_type;
+   LLVMValueRef zs_dst, z_dst = NULL;
+   LLVMValueRef stencil_vals = NULL;
+   LLVMValueRef z_bitmask = NULL, stencil_shift = NULL;
+   LLVMValueRef z_pass = NULL, s_pass_mask = NULL;
+   LLVMValueRef orig_mask = mask->value;
+
+   /* Sanity checking */
+   {
+      const unsigned z_swizzle = format_desc->swizzle[0];
+      const unsigned s_swizzle = format_desc->swizzle[1];
+
+      assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE ||
+             s_swizzle != UTIL_FORMAT_SWIZZLE_NONE);
+
+      assert(depth->enabled || stencil[0].enabled);
+
+      assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
+      assert(format_desc->block.width == 1);
+      assert(format_desc->block.height == 1);
+
+      if (stencil[0].enabled) {
+         assert(format_desc->format == PIPE_FORMAT_Z24_UNORM_S8_USCALED ||
+                format_desc->format == PIPE_FORMAT_S8_USCALED_Z24_UNORM);
+      }
+
+      assert(z_swizzle < 4);
+      assert(format_desc->block.bits == type.width);
+      if (type.floating) {
+         assert(z_swizzle == 0);
+         assert(format_desc->channel[z_swizzle].type ==
+                UTIL_FORMAT_TYPE_FLOAT);
+         assert(format_desc->channel[z_swizzle].size ==
+                format_desc->block.bits);
+      }
+      else {
+         assert(format_desc->channel[z_swizzle].type ==
+                UTIL_FORMAT_TYPE_UNSIGNED);
+         assert(format_desc->channel[z_swizzle].normalized);
+         assert(!type.fixed);
+         assert(!type.sign);
+         assert(type.norm);
+      }
    }
 
-   /* Setup build context */
+
+   /* Setup build context for Z vals */
    lp_build_context_init(&bld, builder, type);
 
-   dst = LLVMBuildLoad(builder, dst_ptr, "");
+   /* Setup build context for stencil vals */
+   s_type = lp_type_int_vec(type.width);
+   lp_build_context_init(&sbld, builder, s_type);
+
+   /* Load current z/stencil value from z/stencil buffer */
+   zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, "");
+
+   lp_build_name(zs_dst, "zsbufval");
 
-   lp_build_name(dst, "zsbuf");
 
-   /* Align the source depth bits with the destination's, and mask out any
-    * stencil or padding bits from both */
-   if(format_desc->channel[z_swizzle].size == format_desc->block.bits) {
-      assert(z_swizzle == 0);
-      /* nothing to do */
+   /* Compute and apply the Z/stencil bitmasks and shifts.
+    */
+   {
+      unsigned z_shift, z_mask;
+      unsigned s_shift, s_mask;
+
+      if (get_z_shift_and_mask(format_desc, &z_shift, &z_mask)) {
+         if (z_shift) {
+            LLVMValueRef shift = lp_build_const_int_vec(type, z_shift);
+            z_src = LLVMBuildLShr(builder, z_src, shift, "");
+         }
+
+         if (z_mask != 0xffffffff) {
+            LLVMValueRef mask = lp_build_const_int_vec(type, z_mask);
+            z_src = LLVMBuildAnd(builder, z_src, mask, "");
+            z_dst = LLVMBuildAnd(builder, zs_dst, mask, "");
+            z_bitmask = mask;  /* used below */
+         }
+         else {
+            z_dst = zs_dst;
+         }
+
+         lp_build_name(z_dst, "zsbuf.z");
+      }
+
+      if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) {
+         if (s_shift) {
+            LLVMValueRef shift = lp_build_const_int_vec(type, s_shift);
+            stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, "");
+            stencil_shift = shift;  /* used below */
+         }
+         else {
+            stencil_vals = zs_dst;
+         }
+
+         if (s_mask != 0xffffffff) {
+            LLVMValueRef mask = lp_build_const_int_vec(type, s_mask);
+            stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, "");
+         }
+
+         lp_build_name(stencil_vals, "stencil");
+      }
    }
-   else {
-      unsigned padding_left;
-      unsigned padding_right;
-      unsigned chan;
-
-      assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
-      assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED);
-      assert(format_desc->channel[z_swizzle].size <= format_desc->block.bits);
-      assert(format_desc->channel[z_swizzle].normalized);
-
-      padding_right = 0;
-      for(chan = 0; chan < z_swizzle; ++chan)
-         padding_right += format_desc->channel[chan].size;
-      padding_left = format_desc->block.bits -
-                     (padding_right + format_desc->channel[z_swizzle].size);
-
-      if(padding_left || padding_right) {
-         const unsigned long long mask_left = ((unsigned long long)1 << (format_desc->block.bits - padding_left)) - 1;
-         const unsigned long long mask_right = ((unsigned long long)1 << (padding_right)) - 1;
-         z_bitmask = lp_build_int_const_scalar(type, mask_left ^ mask_right);
+
+
+   if (stencil[0].enabled) {
+      /* convert scalar stencil refs into vectors */
+      stencil_refs[0] = lp_build_broadcast_scalar(&bld, stencil_refs[0]);
+      stencil_refs[1] = lp_build_broadcast_scalar(&bld, stencil_refs[1]);
+
+      s_pass_mask = lp_build_stencil_test(&sbld, stencil,
+                                          stencil_refs, stencil_vals, face);
+
+      /* apply stencil-fail operator */
+      {
+         LLVMValueRef s_fail_mask = lp_build_andc(&bld, orig_mask, s_pass_mask);
+         stencil_vals = lp_build_stencil_op(&sbld, stencil, S_FAIL_OP,
+                                            stencil_refs, stencil_vals,
+                                            s_fail_mask, face);
+      }
+   }
+
+   if (depth->enabled) {
+      /* compare src Z to dst Z, returning 'pass' mask */
+      z_pass = lp_build_cmp(&bld, depth->func, z_src, z_dst);
+
+      if (!stencil[0].enabled) {
+         /* We can potentially skip all remaining operations here, but only
+          * if stencil is disabled because we still need to update the stencil
+          * buffer values.  Don't need to update Z buffer values.
+          */
+         lp_build_mask_update(mask, z_pass);
+      }
+
+      if (depth->writemask) {
+         if(z_bitmask)
+            z_bitmask = LLVMBuildAnd(builder, mask->value, z_bitmask, "");
+         else
+            z_bitmask = mask->value;
+
+         z_dst = lp_build_select(&bld, z_bitmask, z_src, z_dst);
       }
 
-      if(padding_left)
-         src = LLVMBuildLShr(builder, src, lp_build_int_const_scalar(type, padding_left), "");
-      if(padding_right)
-         src = LLVMBuildAnd(builder, src, z_bitmask, "");
-      if(padding_left || padding_right)
-         dst = LLVMBuildAnd(builder, dst, z_bitmask, "");
+      if (stencil[0].enabled) {
+         /* update stencil buffer values according to z pass/fail result */
+         LLVMValueRef z_fail_mask, z_pass_mask;
+
+         /* apply Z-fail operator */
+         z_fail_mask = lp_build_andc(&bld, orig_mask, z_pass);
+         stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_FAIL_OP,
+                                            stencil_refs, stencil_vals,
+                                            z_fail_mask, face);
+
+         /* apply Z-pass operator */
+         z_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, z_pass, "");
+         stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP,
+                                            stencil_refs, stencil_vals,
+                                            z_pass_mask, face);
+      }
+   }
+   else {
+      /* No depth test: apply Z-pass operator to stencil buffer values which
+       * passed the stencil test.
+       */
+      s_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, s_pass_mask, "");
+      stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP,
+                                         stencil_refs, stencil_vals,
+                                         s_pass_mask, face);
    }
 
-   lp_build_name(dst, "zsbuf.z");
+   /* The Z bits are already in the right place but we may need to shift the
+    * stencil bits before ORing Z with Stencil to make the final pixel value.
+    */
+   if (stencil_vals && stencil_shift)
+      stencil_vals = LLVMBuildShl(bld.builder, stencil_vals,
+                                  stencil_shift, "");
 
-   test = lp_build_cmp(&bld, state->func, src, dst);
-   lp_build_mask_update(mask, test);
+   /* Finally, merge/store the z/stencil values */
+   if ((depth->enabled && depth->writemask) ||
+       (stencil[0].enabled && stencil[0].writemask)) {
 
-   if(state->writemask) {
-      if(z_bitmask)
-         z_bitmask = LLVMBuildAnd(builder, mask->value, z_bitmask, "");
+      if (z_dst && stencil_vals)
+         zs_dst = LLVMBuildOr(bld.builder, z_dst, stencil_vals, "");
+      else if (z_dst)
+         zs_dst = z_dst;
       else
-         z_bitmask = mask->value;
+         zs_dst = stencil_vals;
 
-      dst = lp_build_select(&bld, z_bitmask, src, dst);
-      LLVMBuildStore(builder, dst, dst_ptr);
+      LLVMBuildStore(builder, zs_dst, zs_dst_ptr);
    }
+
+   if (s_pass_mask)
+      lp_build_mask_update(mask, s_pass_mask);
+
+   if (depth->enabled && stencil[0].enabled)
+      lp_build_mask_update(mask, z_pass);
 }