llvmpipe: handle z32s8x24 depth/stencil format
authorRoland Scheidegger <sroland@vmware.com>
Fri, 17 May 2013 22:16:03 +0000 (00:16 +0200)
committerRoland Scheidegger <sroland@vmware.com>
Fri, 17 May 2013 22:32:33 +0000 (00:32 +0200)
We need to split up the depth and stencil values in this case, and there's
some new logic required to handle float depth and stencil simultaneously.
Also make sure we get the 64bit zs clear values and masks propagated
correctly.

src/gallium/auxiliary/gallivm/lp_bld_pack.c
src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
src/gallium/drivers/llvmpipe/lp_bld_depth.c
src/gallium/drivers/llvmpipe/lp_bld_depth.h
src/gallium/drivers/llvmpipe/lp_rast.c
src/gallium/drivers/llvmpipe/lp_rast.h
src/gallium/drivers/llvmpipe/lp_setup.c
src/gallium/drivers/llvmpipe/lp_setup_context.h
src/gallium/drivers/llvmpipe/lp_state_fs.c

index 9eb9ab0261fca8961651deaaa0a32b50a7921aa1..0a57e39611c585999821021d99c5b2789315eaf8 100644 (file)
@@ -255,7 +255,8 @@ lp_build_concat_n(struct gallivm_state *gallivm,
 /**
  * Interleave vector elements.
  *
- * Matches the PUNPCKLxx and PUNPCKHxx SSE instructions.
+ * Matches the PUNPCKLxx and PUNPCKHxx SSE instructions
+ * (but not for 256bit AVX vectors).
  */
 LLVMValueRef
 lp_build_interleave2(struct gallivm_state *gallivm,
index cc29c5c885facd97bc70b8244892c7892d90f05d..7ac0029051db2c5585402753c164149047ace47d 100644 (file)
@@ -1545,6 +1545,11 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
          bld.texel_type = lp_type_uint_vec(type.width, type.width * type.length);
       }
    }
+   else if (util_format_has_stencil(bld.format_desc) &&
+       !util_format_has_depth(bld.format_desc)) {
+      /* for stencil only formats, sample stencil (uint) */
+      bld.texel_type = lp_type_int_vec(type.width, type.width * type.length);
+   }
 
    if (!static_texture_state->level_zero_only) {
       derived_sampler_state.min_mip_filter = static_sampler_state->min_mip_filter;
index 1cd36b87909665907b6646e3a0e3cf066a919c3a..2376ca73c51e148a8f1d4007bd5303e8223b8d60 100644 (file)
@@ -313,7 +313,7 @@ lp_depth_type(const struct util_format_description *format_desc,
       if (format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT) {
          type.floating = TRUE;
          assert(z_swizzle == 0);
-         assert(format_desc->channel[z_swizzle].size == format_desc->block.bits);
+         assert(format_desc->channel[z_swizzle].size == 32);
       }
       else if(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) {
          assert(format_desc->block.bits <= 32);
@@ -347,15 +347,18 @@ static boolean
 get_z_shift_and_mask(const struct util_format_description *format_desc,
                      unsigned *shift, unsigned *width, unsigned *mask)
 {
-   const unsigned total_bits = format_desc->block.bits;
+   unsigned total_bits;
    unsigned z_swizzle;
    unsigned chan;
    unsigned padding_left, padding_right;
-   
+
    assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
    assert(format_desc->block.width == 1);
    assert(format_desc->block.height == 1);
 
+   /* 64bit d/s format is special already extracted 32 bits */
+   total_bits = format_desc->block.bits > 32 ? 32 : format_desc->block.bits;
+
    z_swizzle = format_desc->swizzle[0];
 
    if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
@@ -402,6 +405,14 @@ get_s_shift_and_mask(const struct util_format_description *format_desc,
    if (s_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
       return FALSE;
 
+   /* just special case 64bit d/s format */
+   if (format_desc->block.bits > 32) {
+      assert(format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
+      *shift = 0;
+      *mask = 0xff;
+      return TRUE;
+   }
+
    *shift = 0;
    for (chan = 0; chan < s_swizzle; chan++)
       *shift += format_desc->channel[chan].size;
@@ -517,24 +528,31 @@ lp_build_occlusion_count(struct gallivm_state *gallivm,
  * \param loop_counter  the current loop iteration
  * \param depth_ptr  pointer to the depth/stencil values of this 4x4 block
  * \param depth_stride  stride of the depth/stencil buffer
+ * \param z_fb  contains z values loaded from fb (may include padding)
+ * \param s_fb  contains s values loaded from fb (may include padding)
  */
-LLVMValueRef
+void
 lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
                                      struct lp_type z_src_type,
                                      const struct util_format_description *format_desc,
                                      LLVMValueRef depth_ptr,
                                      LLVMValueRef depth_stride,
+                                     LLVMValueRef *z_fb,
+                                     LLVMValueRef *s_fb,
                                      LLVMValueRef loop_counter)
 {
    LLVMBuilderRef builder = gallivm->builder;
    LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4];
-   LLVMValueRef zs_dst, zs_dst1, zs_dst2;
+   LLVMValueRef zs_dst1, zs_dst2;
    LLVMValueRef zs_dst_ptr;
    LLVMValueRef depth_offset1, depth_offset2;
-   unsigned depth_bits = format_desc->block.bits/8;
+   LLVMTypeRef load_ptr_type;
+   unsigned depth_bytes = format_desc->block.bits / 8;
    struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length);
    struct lp_type zs_load_type = zs_type;
+
    zs_load_type.length = zs_load_type.length / 2;
+   load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0);
 
    if (z_src_type.length == 4) {
       unsigned i;
@@ -545,7 +563,7 @@ lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
       LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb,
                                           depth_stride, "");
       depth_offset1 = LLVMBuildMul(builder, looplsb,
-                                   lp_build_const_int32(gallivm, depth_bits * 2), "");
+                                   lp_build_const_int32(gallivm, depth_bytes * 2), "");
       depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, "");
 
       /* just concatenate the loaded 2x2 values into 4-wide vector */
@@ -564,7 +582,6 @@ lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
        * 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately.
        */
       for (i = 0; i < 8; i++) {
-
          shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2);
       }
    }
@@ -573,63 +590,103 @@ lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
 
    /* Load current z/stencil values from z/stencil buffer */
    zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, "");
-   zs_dst_ptr = LLVMBuildBitCast(builder,
-                                 zs_dst_ptr,
-                                 LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), "");
+   zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, "");
    zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr, "");
    zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, "");
-   zs_dst_ptr = LLVMBuildBitCast(builder,
-                                 zs_dst_ptr,
-                                 LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), "");
+   zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, "");
    zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr, "");
 
-   zs_dst = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2,
-                                   LLVMConstVector(shuffles, zs_type.length), "");
+   *z_fb = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2,
+                                  LLVMConstVector(shuffles, zs_type.length), "");
+   *s_fb = *z_fb;
 
    if (format_desc->block.bits < z_src_type.width) {
       /* Extend destination ZS values (e.g., when reading from Z16_UNORM) */
-      zs_dst = LLVMBuildZExt(builder, zs_dst, lp_build_int_vec_type(gallivm, z_src_type), "");
+      *z_fb = LLVMBuildZExt(builder, *z_fb,
+                            lp_build_int_vec_type(gallivm, z_src_type), "");
    }
 
-   lp_build_name(zs_dst, "zs_dst");
+   else if (format_desc->block.bits > 32) {
+      /* rely on llvm to handle too wide vector we have here nicely */
+      unsigned i;
+      struct lp_type typex2 = zs_type;
+      struct lp_type s_type = zs_type;
+      LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH / 4];
+      LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH / 4];
+      LLVMValueRef tmp;
+
+      typex2.width = typex2.width / 2;
+      typex2.length = typex2.length * 2;
+      s_type.width = s_type.width / 2;
+      s_type.floating = 0;
+
+      tmp = LLVMBuildBitCast(builder, *z_fb,
+                             lp_build_vec_type(gallivm, typex2), "");
+
+      for (i = 0; i < zs_type.length; i++) {
+         shuffles1[i] = lp_build_const_int32(gallivm, i * 2);
+         shuffles2[i] = lp_build_const_int32(gallivm, i * 2 + 1);
+      }
+      *z_fb = LLVMBuildShuffleVector(builder, tmp, tmp,
+                                     LLVMConstVector(shuffles1, zs_type.length), "");
+      *s_fb = LLVMBuildShuffleVector(builder, tmp, tmp,
+                                     LLVMConstVector(shuffles2, zs_type.length), "");
+      *s_fb = LLVMBuildBitCast(builder, *s_fb,
+                               lp_build_vec_type(gallivm, s_type), "");
+      lp_build_name(*s_fb, "s_dst");
+   }
 
-   return zs_dst;
+   lp_build_name(*z_fb, "z_dst");
+   lp_build_name(*s_fb, "s_dst");
+   lp_build_name(*z_fb, "z_dst");
 }
 
 /**
  * Store depth/stencil values.
  * Incoming values are swizzled (typically n 2x2 quads), stored linear.
- * If there's a mask it will do reload/select/store otherwise just store.
+ * If there's a mask it will do select/store otherwise just store.
  *
  * \param type  the data type of the fragment depth/stencil values
  * \param format_desc  description of the depth/stencil surface
  * \param mask  the alive/dead pixel mask for the quad (vector)
+ * \param z_fb  z values read from fb (with padding)
+ * \param s_fb  s values read from fb (with padding)
  * \param loop_counter  the current loop iteration
  * \param depth_ptr  pointer to the depth/stencil values of this 4x4 block
  * \param depth_stride  stride of the depth/stencil buffer
- * \param zs_value  the depth/stencil values to store
+ * \param z_value the depth values to store (with padding)
+ * \param s_value the stencil values to store (with padding)
  */
 void
 lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
                                       struct lp_type z_src_type,
                                       const struct util_format_description *format_desc,
                                       struct lp_build_mask_context *mask,
+                                      LLVMValueRef z_fb,
+                                      LLVMValueRef s_fb,
                                       LLVMValueRef loop_counter,
                                       LLVMValueRef depth_ptr,
                                       LLVMValueRef depth_stride,
-                                      LLVMValueRef zs_value)
+                                      LLVMValueRef z_value,
+                                      LLVMValueRef s_value)
 {
    struct lp_build_context z_bld;
    LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4];
    LLVMBuilderRef builder = gallivm->builder;
    LLVMValueRef mask_value = NULL;
-   LLVMValueRef zs_dst = NULL, zs_dst1, zs_dst2;
+   LLVMValueRef zs_dst1, zs_dst2;
    LLVMValueRef zs_dst_ptr1, zs_dst_ptr2;
    LLVMValueRef depth_offset1, depth_offset2;
-   unsigned depth_bits = format_desc->block.bits/8;
+   LLVMTypeRef load_ptr_type;
+   unsigned depth_bytes = format_desc->block.bits / 8;
    struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length);
    struct lp_type zs_load_type = zs_type;
+
    zs_load_type.length = zs_load_type.length / 2;
+   load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0);
+
+   if (zs_type.width > 32)
+      zs_type.width = 32;
 
    lp_build_context_init(&z_bld, gallivm, zs_type);
 
@@ -638,7 +695,6 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
     * outside the fs loop to avoid all the swizzle stuff.
     */
    if (z_src_type.length == 4) {
-      unsigned i;
       LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter,
                                           lp_build_const_int32(gallivm, 1), "");
       LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter,
@@ -646,13 +702,8 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
       LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb,
                                           depth_stride, "");
       depth_offset1 = LLVMBuildMul(builder, looplsb,
-                                   lp_build_const_int32(gallivm, depth_bits * 2), "");
+                                   lp_build_const_int32(gallivm, depth_bytes * 2), "");
       depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, "");
-
-      /* just concatenate the loaded 2x2 values into 4-wide vector */
-      for (i = 0; i < 4; i++) {
-         shuffles[i] = lp_build_const_int32(gallivm, i);
-      }
    }
    else {
       unsigned i;
@@ -669,55 +720,75 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
       }
    }
 
-
    depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, "");
 
    zs_dst_ptr1 = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, "");
-   zs_dst_ptr1 = LLVMBuildBitCast(builder,
-                                 zs_dst_ptr1,
-                                 LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), "");
+   zs_dst_ptr1 = LLVMBuildBitCast(builder, zs_dst_ptr1, load_ptr_type, "");
    zs_dst_ptr2 = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, "");
-   zs_dst_ptr2 = LLVMBuildBitCast(builder,
-                                  zs_dst_ptr2,
-                                  LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), "");
+   zs_dst_ptr2 = LLVMBuildBitCast(builder, zs_dst_ptr2, load_ptr_type, "");
 
-   if (mask) {
-      zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr1, "");
-      zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr2, "");
-      zs_dst = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2,
-                                      LLVMConstVector(shuffles, zs_type.length),
-                                      "zsbufval");
+   if (format_desc->block.bits > 32) {
+      s_value = LLVMBuildBitCast(builder, s_value, z_bld.vec_type, "");
+   }
 
+   if (mask) {
       mask_value = lp_build_mask_value(mask);
+      z_value = lp_build_select(&z_bld, mask_value, z_value, z_fb);
+      if (format_desc->block.bits > 32) {
+         s_fb = LLVMBuildBitCast(builder, s_fb, z_bld.vec_type, "");
+         s_value = lp_build_select(&z_bld, mask_value, s_value, s_fb);
+      }
    }
 
    if (zs_type.width < z_src_type.width) {
-      /* Truncate incoming ZS and mask values (e.g., when writing to Z16_UNORM) */
-      zs_value = LLVMBuildTrunc(builder, zs_value, z_bld.vec_type, "");
-      if (mask)
-         mask_value = LLVMBuildTrunc(builder, mask_value, z_bld.vec_type, "");
+      /* Truncate ZS values (e.g., when writing to Z16_UNORM) */
+      z_value = LLVMBuildTrunc(builder, z_value, z_bld.vec_type, "");
    }
 
-   if (mask) {
-      zs_value = lp_build_select(&z_bld, mask_value, zs_value, zs_dst);
-   }
-
-   if (z_src_type.length == 4) {
-      zs_dst1 = lp_build_extract_range(gallivm, zs_value, 0, 2);
-      zs_dst2 = lp_build_extract_range(gallivm, zs_value, 2, 2);
+   if (format_desc->block.bits <= 32) {
+      if (z_src_type.length == 4) {
+         zs_dst1 = lp_build_extract_range(gallivm, z_value, 0, 2);
+         zs_dst2 = lp_build_extract_range(gallivm, z_value, 2, 2);
+      }
+      else {
+         assert(z_src_type.length == 8);
+         zs_dst1 = LLVMBuildShuffleVector(builder, z_value, z_value,
+                                          LLVMConstVector(&shuffles[0],
+                                                          zs_load_type.length), "");
+         zs_dst2 = LLVMBuildShuffleVector(builder, z_value, z_value,
+                                          LLVMConstVector(&shuffles[4],
+                                                          zs_load_type.length), "");
+      }
    }
    else {
-      assert(z_src_type.length == 8);
-      zs_dst1 = LLVMBuildShuffleVector(builder, zs_value, zs_value,
-                                       LLVMConstVector(&shuffles[0],
-                                                       zs_load_type.length),
-                                       "");
-      zs_dst2 = LLVMBuildShuffleVector(builder, zs_value, zs_value,
-                                       LLVMConstVector(&shuffles[4],
-                                                       zs_load_type.length),
-                                       "");
-
+      if (z_src_type.length == 4) {
+         zs_dst1 = lp_build_interleave2(gallivm, zs_type,
+                                        z_value, s_value, 0);
+         zs_dst2 = lp_build_interleave2(gallivm, zs_type,
+                                        z_value, s_value, 1);
+      }
+      else {
+         unsigned i;
+         LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 2];
+         assert(z_src_type.length == 8);
+         for (i = 0; i < 8; i++) {
+            shuffles[i*2] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2);
+            shuffles[i*2+1] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2 +
+                                                   z_src_type.length);
+         }
+         zs_dst1 = LLVMBuildShuffleVector(builder, z_value, s_value,
+                                          LLVMConstVector(&shuffles[0],
+                                                          z_src_type.length), "");
+         zs_dst2 = LLVMBuildShuffleVector(builder, z_value, s_value,
+                                          LLVMConstVector(&shuffles[8],
+                                                          z_src_type.length), "");
+      }
+      zs_dst1 = LLVMBuildBitCast(builder, zs_dst1,
+                                 lp_build_vec_type(gallivm, zs_load_type), "");
+      zs_dst2 = LLVMBuildBitCast(builder, zs_dst2,
+                                 lp_build_vec_type(gallivm, zs_load_type), "");
    }
+
    LLVMBuildStore(builder, zs_dst1, zs_dst_ptr1);
    LLVMBuildStore(builder, zs_dst2, zs_dst_ptr2);
 }
@@ -745,13 +816,14 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
                             struct lp_build_mask_context *mask,
                             LLVMValueRef stencil_refs[2],
                             LLVMValueRef z_src,
-                            LLVMValueRef zs_dst,
+                            LLVMValueRef z_fb,
+                            LLVMValueRef s_fb,
                             LLVMValueRef face,
-                            LLVMValueRef *zs_value,
+                            LLVMValueRef *z_value,
+                            LLVMValueRef *s_value,
                             boolean do_branch)
 {
    LLVMBuilderRef builder = gallivm->builder;
-   struct lp_type zs_type;
    struct lp_type z_type;
    struct lp_build_context z_bld;
    struct lp_build_context s_bld;
@@ -763,7 +835,7 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
    LLVMValueRef z_pass = NULL, s_pass_mask = NULL;
    LLVMValueRef orig_mask = lp_build_mask_value(mask);
    LLVMValueRef front_facing = NULL;
-
+   boolean have_z, have_s;
 
    /*
     * Depths are expected to be between 0 and 1, even if they are stored in
@@ -780,12 +852,9 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
    }
 
    /* Pick the type matching the depth-stencil format. */
-   zs_type = lp_depth_type(format_desc, z_src_type.length);
+   z_type = lp_depth_type(format_desc, z_src_type.length);
 
    /* Pick the intermediate type for depth operations. */
-   z_type = zs_type;
-   /* FIXME: Cope with a depth test type with higher bit width. */
-   assert(zs_type.width <= z_src_type.width);
    z_type.width = z_src_type.width;
    assert(z_type.length == z_src_type.length);
 
@@ -819,13 +888,11 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
 
       if (depth->enabled) {
          assert(z_swizzle < 4);
-         assert(format_desc->block.bits <= z_type.width);
          if (z_type.floating) {
             assert(z_swizzle == 0);
             assert(format_desc->channel[z_swizzle].type ==
                    UTIL_FORMAT_TYPE_FLOAT);
-            assert(format_desc->channel[z_swizzle].size ==
-                   format_desc->block.bits);
+            assert(format_desc->channel[z_swizzle].size == 32);
          }
          else {
             assert(format_desc->channel[z_swizzle].type ==
@@ -849,7 +916,13 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
    {
       unsigned s_shift, s_mask;
 
-      if (get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask)) {
+      z_dst = z_fb;
+      stencil_vals = s_fb;
+
+      have_z = get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask);
+      have_s = get_s_shift_and_mask(format_desc, &s_shift, &s_mask);
+
+      if (have_z) {
          if (z_mask != 0xffffffff) {
             z_bitmask = lp_build_const_int_vec(gallivm, z_type, z_mask);
          }
@@ -859,26 +932,20 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
           */
          if (z_shift) {
             LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift);
-            z_dst = LLVMBuildLShr(builder, zs_dst, shift, "z_dst");
+            z_dst = LLVMBuildLShr(builder, z_dst, shift, "z_dst");
          } else if (z_bitmask) {
-           /* TODO: Instead of loading a mask from memory and ANDing, it's
-            * probably faster to just shake the bits with two shifts. */
-            z_dst = LLVMBuildAnd(builder, zs_dst, z_bitmask, "z_dst");
+            z_dst = LLVMBuildAnd(builder, z_dst, z_bitmask, "z_dst");
          } else {
-            z_dst = zs_dst;
             lp_build_name(z_dst, "z_dst");
          }
       }
 
-      if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) {
+      if (have_s) {
          if (s_shift) {
             LLVMValueRef shift = lp_build_const_int_vec(gallivm, s_type, s_shift);
-            stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, "");
+            stencil_vals = LLVMBuildLShr(builder, stencil_vals, shift, "");
             stencil_shift = shift;  /* used below */
          }
-         else {
-            stencil_vals = zs_dst;
-         }
 
          if (s_mask != 0xffffffff) {
             LLVMValueRef mask = lp_build_const_int_vec(gallivm, s_type, s_mask);
@@ -997,7 +1064,7 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
          LLVMValueRef z_fail_mask, z_pass_mask;
 
          /* apply Z-fail operator */
-         z_fail_mask = lp_build_andnot(&z_bld, orig_mask, z_pass);
+         z_fail_mask = lp_build_andnot(&s_bld, orig_mask, z_pass);
          stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP,
                                             stencil_refs, stencil_vals,
                                             z_fail_mask, front_facing);
@@ -1019,8 +1086,8 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
                                          s_pass_mask, front_facing);
    }
 
-   /* Put Z and ztencil bits in the right place */
-   if (z_dst && z_shift) {
+   /* Put Z and stencil bits in the right place */
+   if (have_z && z_shift) {
       LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift);
       z_dst = LLVMBuildShl(builder, z_dst, shift, "");
    }
@@ -1028,18 +1095,24 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
       stencil_vals = LLVMBuildShl(builder, stencil_vals,
                                   stencil_shift, "");
 
-   /* Finally, merge/store the z/stencil values */
+   /* Finally, merge the z/stencil values */
    if ((depth->enabled && depth->writemask) ||
-       (stencil[0].enabled && stencil[0].writemask)) {
-
-      if (z_dst && stencil_vals)
-         zs_dst = LLVMBuildOr(builder, z_dst, stencil_vals, "");
-      else if (z_dst)
-         zs_dst = z_dst;
-      else
-         zs_dst = stencil_vals;
-
-      *zs_value = zs_dst;
+       (stencil[0].enabled && (stencil[0].writemask ||
+                               (stencil[1].enabled && stencil[1].writemask)))) {
+
+      if (format_desc->block.bits <= 32) {
+         if (have_z && have_s)
+            *z_value = LLVMBuildOr(builder, z_dst, stencil_vals, "");
+         else if (have_z)
+            *z_value = z_dst;
+         else
+            *z_value = stencil_vals;
+         *s_value = *z_value;
+      }
+      else {
+         *z_value = z_dst;
+         *s_value = stencil_vals;
+      }
    }
 
    if (s_pass_mask)
index c000494667dc2ee6fa972083f3d306f2c78a4a25..2534dc309cea4ba738e471e236ddd230f95cc7c5 100644 (file)
@@ -63,17 +63,21 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
                             struct lp_build_mask_context *mask,
                             LLVMValueRef stencil_refs[2],
                             LLVMValueRef z_src,
-                            LLVMValueRef zs_dst,
+                            LLVMValueRef z_fb,
+                            LLVMValueRef s_fb,
                             LLVMValueRef face,
-                            LLVMValueRef *zs_value,
+                            LLVMValueRef *z_value,
+                            LLVMValueRef *s_value,
                             boolean do_branch);
 
-LLVMValueRef
+void
 lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
                                      struct lp_type z_src_type,
                                      const struct util_format_description *format_desc,
                                      LLVMValueRef depth_ptr,
                                      LLVMValueRef depth_stride,
+                                     LLVMValueRef *z_fb,
+                                     LLVMValueRef *s_fb,
                                      LLVMValueRef loop_counter);
 
 void
@@ -81,10 +85,13 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
                                       struct lp_type z_src_type,
                                       const struct util_format_description *format_desc,
                                       struct lp_build_mask_context *mask,
+                                      LLVMValueRef z_fb,
+                                      LLVMValueRef s_fb,
                                       LLVMValueRef loop_counter,
                                       LLVMValueRef depth_ptr,
                                       LLVMValueRef depth_stride,
-                                      LLVMValueRef zs_value);
+                                      LLVMValueRef z_value,
+                                      LLVMValueRef s_value);
 
 
 void
index a557db4b4dc2c77b3350125ac81c92c1d05f5022..8a4b00f785d6e3734b0c019f3e38e04f2beedf48 100644 (file)
@@ -193,8 +193,10 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
                        const union lp_rast_cmd_arg arg)
 {
    const struct lp_scene *scene = task->scene;
-   uint32_t clear_value = arg.clear_zstencil.value;
-   uint32_t clear_mask = arg.clear_zstencil.mask;
+   uint64_t clear_value64 = arg.clear_zstencil.value;
+   uint64_t clear_mask64 = arg.clear_zstencil.mask;
+   uint32_t clear_value = (uint32_t) clear_value64;
+   uint32_t clear_mask = (uint32_t) clear_mask64;
    const unsigned height = TILE_SIZE;
    const unsigned width = TILE_SIZE;
    const unsigned block_size = scene->zsbuf.blocksize;
@@ -260,6 +262,28 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
             }
          }
          break;
+      case 8:
+         clear_value64 &= clear_mask64;
+         if (clear_mask64 == 0xffffffffffULL) {
+            for (i = 0; i < height; i++) {
+               uint64_t *row = (uint64_t *)dst;
+               for (j = 0; j < width; j++)
+                  *row++ = clear_value64;
+               dst += dst_stride;
+            }
+         }
+         else {
+            for (i = 0; i < height; i++) {
+               uint64_t *row = (uint64_t *)dst;
+               for (j = 0; j < width; j++) {
+                  uint64_t tmp = ~clear_mask64 & *row;
+                  *row++ = clear_value64 | tmp;
+               }
+               dst += dst_stride;
+            }
+         }
+         break;
+
       default:
          assert(0);
          break;
index 2f5fa2276191067690ba680740971f1766649629..8dd3615e78adf88ff74b381f2356f575db9f2eb3 100644 (file)
@@ -154,8 +154,8 @@ union lp_rast_cmd_arg {
    const struct lp_rast_state *set_state;
    union pipe_color_union clear_color;
    struct {
-      uint32_t value;
-      uint32_t mask;
+      uint64_t value;
+      uint64_t mask;
    } clear_zstencil;
    const struct lp_rast_state *state;
    struct lp_fence *fence;
@@ -217,7 +217,7 @@ lp_rast_arg_fence( struct lp_fence *fence )
 
 
 static INLINE union lp_rast_cmd_arg
-lp_rast_arg_clearzs( unsigned value, unsigned mask )
+lp_rast_arg_clearzs( uint64_t value, uint64_t mask )
 {
    union lp_rast_cmd_arg arg;
    arg.clear_zstencil.value = value;
index f9cbdaba62c25736604d8f62ac83910331b3817c..9fef34e77e051954303e1d3fab67d70b48a18dcb 100644 (file)
@@ -390,8 +390,8 @@ lp_setup_try_clear( struct lp_setup_context *setup,
                     unsigned stencil,
                     unsigned flags )
 {
-   uint32_t zsmask = 0;
-   uint32_t zsvalue = 0;
+   uint64_t zsmask = 0;
+   uint64_t zsvalue = 0;
    union lp_rast_cmd_arg color_arg;
    unsigned i;
 
@@ -404,16 +404,16 @@ lp_setup_try_clear( struct lp_setup_context *setup,
 
    if (flags & PIPE_CLEAR_DEPTHSTENCIL) {
       uint32_t zmask = (flags & PIPE_CLEAR_DEPTH) ? ~0 : 0;
-      uint32_t smask = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0;
+      uint8_t smask = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0;
 
-      zsvalue = util_pack_z_stencil(setup->fb.zsbuf->format,
-                                    depth,
-                                    stencil);
+      zsvalue = util_pack64_z_stencil(setup->fb.zsbuf->format,
+                                      depth,
+                                      stencil);
 
 
-      zsmask = util_pack_mask_z_stencil(setup->fb.zsbuf->format,
-                                        zmask,
-                                        smask);
+      zsmask = util_pack64_mask_z_stencil(setup->fb.zsbuf->format,
+                                          zmask,
+                                          smask);
 
       zsvalue &= zsmask;
    }
index efc2ab768136732dfc8bcb5e26205a2635aeac9e..6b35a0209ca78c9aeddf9b6c3503b880d5349e9d 100644 (file)
@@ -112,8 +112,8 @@ struct lp_setup_context
    struct {
       unsigned flags;
       union lp_rast_cmd_arg color;    /**< lp_rast_clear_color() cmd */
-      unsigned zsmask;
-      unsigned zsvalue;               /**< lp_rast_clear_zstencil() cmd */
+      uint64_t zsmask;
+      uint64_t zsvalue;               /**< lp_rast_clear_zstencil() cmd */
    } clear;
 
    enum setup_state {
index 69212109a87d92f18884f85ac1c7b3d216162c47..1dfc75a42dce942eccd25d6737b4b2b24e15395b 100644 (file)
@@ -239,7 +239,8 @@ generate_fs_loop(struct gallivm_state *gallivm,
    LLVMValueRef mask_ptr, mask_val;
    LLVMValueRef consts_ptr;
    LLVMValueRef z;
-   LLVMValueRef zs_value = NULL;
+   LLVMValueRef z_value, s_value;
+   LLVMValueRef z_fb, s_fb;
    LLVMValueRef stencil_refs[2];
    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
    struct lp_build_for_loop_state loop_state;
@@ -259,8 +260,7 @@ generate_fs_loop(struct gallivm_state *gallivm,
    memset(&system_values, 0, sizeof(system_values));
 
    if (key->depth.enabled ||
-       key->stencil[0].enabled ||
-       key->stencil[1].enabled) {
+       key->stencil[0].enabled) {
 
       zs_format_desc = util_format_description(key->zsbuf_format);
       assert(zs_format_desc);
@@ -281,7 +281,9 @@ generate_fs_loop(struct gallivm_state *gallivm,
       }
 
       if (!(key->depth.enabled && key->depth.writemask) &&
-          !(key->stencil[0].enabled && key->stencil[0].writemask))
+          !((key->stencil[0].enabled && (key->stencil[0].writemask ||
+                                        (key->stencil[1].enabled &&
+                                         key->stencil[1].writemask)))))
          depth_mode &= ~(LATE_DEPTH_WRITE | EARLY_DEPTH_WRITE);
    }
    else {
@@ -337,11 +339,10 @@ generate_fs_loop(struct gallivm_state *gallivm,
    z = interp->pos[2];
 
    if (depth_mode & EARLY_DEPTH_TEST) {
-      LLVMValueRef zs_dst_val;
-      zs_dst_val = lp_build_depth_stencil_load_swizzled(gallivm, type,
-                                                        zs_format_desc,
-                                                        depth_ptr, depth_stride,
-                                                        loop_state.counter);
+      lp_build_depth_stencil_load_swizzled(gallivm, type,
+                                           zs_format_desc,
+                                           depth_ptr, depth_stride,
+                                           &z_fb, &s_fb, loop_state.counter);
       lp_build_depth_stencil_test(gallivm,
                                   &key->depth,
                                   key->stencil,
@@ -349,16 +350,16 @@ generate_fs_loop(struct gallivm_state *gallivm,
                                   zs_format_desc,
                                   &mask,
                                   stencil_refs,
-                                  z,
-                                  zs_dst_val,
+                                  z, z_fb, s_fb,
                                   facing,
-                                  &zs_value,
+                                  &z_value, &s_value,
                                   !simple_shader);
 
       if (depth_mode & EARLY_DEPTH_WRITE) {
          lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc,
-                                               NULL, loop_state.counter,
-                                               depth_ptr, depth_stride, zs_value);
+                                               NULL, NULL, NULL, loop_state.counter,
+                                               depth_ptr, depth_stride,
+                                               z_value, s_value);
       }
    }
 
@@ -394,7 +395,6 @@ generate_fs_loop(struct gallivm_state *gallivm,
 
    /* Late Z test */
    if (depth_mode & LATE_DEPTH_TEST) {
-      LLVMValueRef zs_dst_val;
       int pos0 = find_output_by_semantic(&shader->info.base,
                                          TGSI_SEMANTIC_POSITION,
                                          0);
@@ -403,10 +403,10 @@ generate_fs_loop(struct gallivm_state *gallivm,
          z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z");
       }
 
-      zs_dst_val = lp_build_depth_stencil_load_swizzled(gallivm, type,
-                                                        zs_format_desc,
-                                                        depth_ptr, depth_stride,
-                                                        loop_state.counter);
+      lp_build_depth_stencil_load_swizzled(gallivm, type,
+                                           zs_format_desc,
+                                           depth_ptr, depth_stride,
+                                           &z_fb, &s_fb, loop_state.counter);
 
       lp_build_depth_stencil_test(gallivm,
                                   &key->depth,
@@ -415,16 +415,16 @@ generate_fs_loop(struct gallivm_state *gallivm,
                                   zs_format_desc,
                                   &mask,
                                   stencil_refs,
-                                  z,
-                                  zs_dst_val,
+                                  z, z_fb, s_fb,
                                   facing,
-                                  &zs_value,
+                                  &z_value, &s_value,
                                   !simple_shader);
       /* Late Z write */
       if (depth_mode & LATE_DEPTH_WRITE) {
          lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc,
-                                               NULL, loop_state.counter,
-                                               depth_ptr, depth_stride, zs_value);
+                                               NULL, NULL, NULL, loop_state.counter,
+                                               depth_ptr, depth_stride,
+                                               z_value, s_value);
       }
    }
    else if ((depth_mode & EARLY_DEPTH_TEST) &&
@@ -435,8 +435,9 @@ generate_fs_loop(struct gallivm_state *gallivm,
        * write that out.
        */
       lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc,
-                                            &mask, loop_state.counter,
-                                            depth_ptr, depth_stride, zs_value);
+                                            &mask, z_fb, s_fb, loop_state.counter,
+                                            depth_ptr, depth_stride,
+                                            z_value, s_value);
    }