llvmpipe: get rid of depth swizzling.
authorRoland Scheidegger <sroland@vmware.com>
Wed, 1 May 2013 15:54:08 +0000 (17:54 +0200)
committerRoland Scheidegger <sroland@vmware.com>
Fri, 3 May 2013 19:36:20 +0000 (21:36 +0200)
Eliminating this we no longer need to copy between linear and swizzled layout.
This is probably not quite ideal since it's a bit more work for now, could do
some optimizations by moving depth testing outside the fragment shader loop
(but tricky for early depth test as we don't have neither the mask nor the
interpolated z in the right order handy).
The large amount of tile/untile code is no longer needed will be deleted
in next commit.
No piglit regressions.
v2: change a forgotten LAYOUT_NONE to LAYOUT_LINEAR.
v3: fix (bogus) uninitialized variable warnings, add comments, fix a bad type

Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
src/gallium/drivers/llvmpipe/lp_bld_depth.c
src/gallium/drivers/llvmpipe/lp_bld_depth.h
src/gallium/drivers/llvmpipe/lp_jit.h
src/gallium/drivers/llvmpipe/lp_rast.c
src/gallium/drivers/llvmpipe/lp_rast_priv.h
src/gallium/drivers/llvmpipe/lp_scene.c
src/gallium/drivers/llvmpipe/lp_state_fs.c

index b9dbdc5a8ac8b2acf756a823df01df4b06dd8f72..1cd36b87909665907b6646e3a0e3cf066a919c3a 100644 (file)
  * flushing would avoid this, but it would most likely result in depth fighting
  * artifacts.
  *
- * We are free to use a different pixel layout though. Since our basic
- * processing unit is a quad (2x2 pixel block) we store the depth/stencil
- * values tiled, a quad at time. That is, a depth buffer containing 
- *
- *  Z11 Z12 Z13 Z14 ...
- *  Z21 Z22 Z23 Z24 ...
- *  Z31 Z32 Z33 Z34 ...
- *  Z41 Z42 Z43 Z44 ...
- *  ... ... ... ... ...
- *
- * will actually be stored in memory as
- *
- *  Z11 Z12 Z21 Z22 Z13 Z14 Z23 Z24 ...
- *  Z31 Z32 Z41 Z42 Z33 Z34 Z43 Z44 ...
- *  ... ... ... ... ... ... ... ... ...
+ * Since we're using linear layout for everything, but we need to deal with
+ * 2x2 quads, we need to load/store multiple values and swizzle them into
+ * place (we could avoid this by doing depth/stencil testing in linear format,
+ * which would be easy for late depth/stencil test as we could do that after
+ * the fragment shader loop just as we do for color buffers, but more tricky
+ * for early depth test as we'd need both masks and interpolated depth in
+ * linear format).
  *
  *
  * @author Jose Fonseca <jfonseca@vmware.com>
@@ -71,6 +63,7 @@
 #include "gallivm/lp_bld_intr.h"
 #include "gallivm/lp_bld_debug.h"
 #include "gallivm/lp_bld_swizzle.h"
+#include "gallivm/lp_bld_pack.h"
 
 #include "lp_bld_depth.h"
 
@@ -515,6 +508,219 @@ lp_build_occlusion_count(struct gallivm_state *gallivm,
 }
 
 
+/**
+ * Load depth/stencil values.
+ * The stored values are linear, swizzle them.
+ *
+ * \param type  the data type of the fragment depth/stencil values
+ * \param format_desc  description of the depth/stencil surface
+ * \param loop_counter  the current loop iteration
+ * \param depth_ptr  pointer to the depth/stencil values of this 4x4 block
+ * \param depth_stride  stride of the depth/stencil buffer
+ */
+LLVMValueRef
+lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
+                                     struct lp_type z_src_type,
+                                     const struct util_format_description *format_desc,
+                                     LLVMValueRef depth_ptr,
+                                     LLVMValueRef depth_stride,
+                                     LLVMValueRef loop_counter)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4];
+   LLVMValueRef zs_dst, zs_dst1, zs_dst2;
+   LLVMValueRef zs_dst_ptr;
+   LLVMValueRef depth_offset1, depth_offset2;
+   unsigned depth_bits = format_desc->block.bits/8;
+   struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length);
+   struct lp_type zs_load_type = zs_type;
+   zs_load_type.length = zs_load_type.length / 2;
+
+   if (z_src_type.length == 4) {
+      unsigned i;
+      LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter,
+                                          lp_build_const_int32(gallivm, 1), "");
+      LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter,
+                                          lp_build_const_int32(gallivm, 2), "");
+      LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb,
+                                          depth_stride, "");
+      depth_offset1 = LLVMBuildMul(builder, looplsb,
+                                   lp_build_const_int32(gallivm, depth_bits * 2), "");
+      depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, "");
+
+      /* just concatenate the loaded 2x2 values into 4-wide vector */
+      for (i = 0; i < 4; i++) {
+         shuffles[i] = lp_build_const_int32(gallivm, i);
+      }
+   }
+   else {
+      unsigned i;
+      LLVMValueRef loopx2 = LLVMBuildShl(builder, loop_counter,
+                                         lp_build_const_int32(gallivm, 1), "");
+      assert(z_src_type.length == 8);
+      depth_offset1 = LLVMBuildMul(builder, loopx2, depth_stride, "");
+      /*
+       * We load 2x4 values, and need to swizzle them (order
+       * 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately.
+       */
+      for (i = 0; i < 8; i++) {
+
+         shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2);
+      }
+   }
+
+   depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, "");
+
+   /* Load current z/stencil values from z/stencil buffer */
+   zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, "");
+   zs_dst_ptr = LLVMBuildBitCast(builder,
+                                 zs_dst_ptr,
+                                 LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), "");
+   zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr, "");
+   zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, "");
+   zs_dst_ptr = LLVMBuildBitCast(builder,
+                                 zs_dst_ptr,
+                                 LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), "");
+   zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr, "");
+
+   zs_dst = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2,
+                                   LLVMConstVector(shuffles, zs_type.length), "");
+
+   if (format_desc->block.bits < z_src_type.width) {
+      /* Extend destination ZS values (e.g., when reading from Z16_UNORM) */
+      zs_dst = LLVMBuildZExt(builder, zs_dst, lp_build_int_vec_type(gallivm, z_src_type), "");
+   }
+
+   lp_build_name(zs_dst, "zs_dst");
+
+   return zs_dst;
+}
+
+/**
+ * Store depth/stencil values.
+ * Incoming values are swizzled (typically n 2x2 quads), stored linear.
+ * If there's a mask it will do reload/select/store otherwise just store.
+ *
+ * \param type  the data type of the fragment depth/stencil values
+ * \param format_desc  description of the depth/stencil surface
+ * \param mask  the alive/dead pixel mask for the quad (vector)
+ * \param loop_counter  the current loop iteration
+ * \param depth_ptr  pointer to the depth/stencil values of this 4x4 block
+ * \param depth_stride  stride of the depth/stencil buffer
+ * \param zs_value  the depth/stencil values to store
+ */
+void
+lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
+                                      struct lp_type z_src_type,
+                                      const struct util_format_description *format_desc,
+                                      struct lp_build_mask_context *mask,
+                                      LLVMValueRef loop_counter,
+                                      LLVMValueRef depth_ptr,
+                                      LLVMValueRef depth_stride,
+                                      LLVMValueRef zs_value)
+{
+   struct lp_build_context z_bld;
+   LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4];
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef mask_value = NULL;
+   LLVMValueRef zs_dst = NULL, zs_dst1, zs_dst2;
+   LLVMValueRef zs_dst_ptr1, zs_dst_ptr2;
+   LLVMValueRef depth_offset1, depth_offset2;
+   unsigned depth_bits = format_desc->block.bits/8;
+   struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length);
+   struct lp_type zs_load_type = zs_type;
+   zs_load_type.length = zs_load_type.length / 2;
+
+   lp_build_context_init(&z_bld, gallivm, zs_type);
+
+   /*
+    * This is far from ideal, at least for late depth write we should do this
+    * outside the fs loop to avoid all the swizzle stuff.
+    */
+   if (z_src_type.length == 4) {
+      unsigned i;
+      LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter,
+                                          lp_build_const_int32(gallivm, 1), "");
+      LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter,
+                                          lp_build_const_int32(gallivm, 2), "");
+      LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb,
+                                          depth_stride, "");
+      depth_offset1 = LLVMBuildMul(builder, looplsb,
+                                   lp_build_const_int32(gallivm, depth_bits * 2), "");
+      depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, "");
+
+      /* just concatenate the loaded 2x2 values into 4-wide vector */
+      for (i = 0; i < 4; i++) {
+         shuffles[i] = lp_build_const_int32(gallivm, i);
+      }
+   }
+   else {
+      unsigned i;
+      LLVMValueRef loopx2 = LLVMBuildShl(builder, loop_counter,
+                                         lp_build_const_int32(gallivm, 1), "");
+      assert(z_src_type.length == 8);
+      depth_offset1 = LLVMBuildMul(builder, loopx2, depth_stride, "");
+      /*
+       * We load 2x4 values, and need to swizzle them (order
+       * 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately.
+       */
+      for (i = 0; i < 8; i++) {
+         shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2);
+      }
+   }
+
+
+   depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, "");
+
+   zs_dst_ptr1 = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, "");
+   zs_dst_ptr1 = LLVMBuildBitCast(builder,
+                                 zs_dst_ptr1,
+                                 LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), "");
+   zs_dst_ptr2 = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, "");
+   zs_dst_ptr2 = LLVMBuildBitCast(builder,
+                                  zs_dst_ptr2,
+                                  LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), "");
+
+   if (mask) {
+      zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr1, "");
+      zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr2, "");
+      zs_dst = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2,
+                                      LLVMConstVector(shuffles, zs_type.length),
+                                      "zsbufval");
+
+      mask_value = lp_build_mask_value(mask);
+   }
+
+   if (zs_type.width < z_src_type.width) {
+      /* Truncate incoming ZS and mask values (e.g., when writing to Z16_UNORM) */
+      zs_value = LLVMBuildTrunc(builder, zs_value, z_bld.vec_type, "");
+      if (mask)
+         mask_value = LLVMBuildTrunc(builder, mask_value, z_bld.vec_type, "");
+   }
+
+   if (mask) {
+      zs_value = lp_build_select(&z_bld, mask_value, zs_value, zs_dst);
+   }
+
+   if (z_src_type.length == 4) {
+      zs_dst1 = lp_build_extract_range(gallivm, zs_value, 0, 2);
+      zs_dst2 = lp_build_extract_range(gallivm, zs_value, 2, 2);
+   }
+   else {
+      assert(z_src_type.length == 8);
+      zs_dst1 = LLVMBuildShuffleVector(builder, zs_value, zs_value,
+                                       LLVMConstVector(&shuffles[0],
+                                                       zs_load_type.length),
+                                       "");
+      zs_dst2 = LLVMBuildShuffleVector(builder, zs_value, zs_value,
+                                       LLVMConstVector(&shuffles[4],
+                                                       zs_load_type.length),
+                                       "");
+
+   }
+   LLVMBuildStore(builder, zs_dst1, zs_dst_ptr1);
+   LLVMBuildStore(builder, zs_dst2, zs_dst_ptr2);
+}
 
 /**
  * Generate code for performing depth and/or stencil tests.
@@ -527,7 +733,7 @@ lp_build_occlusion_count(struct gallivm_state *gallivm,
  * \param mask  the alive/dead pixel mask for the quad (vector)
  * \param stencil_refs  the front/back stencil ref values (scalar)
  * \param z_src  the incoming depth/stencil values (n 2x2 quad values, float32)
- * \param zs_dst_ptr  pointer to depth/stencil values in framebuffer
+ * \param zs_dst  the depth/stencil values in framebuffer
  * \param face  contains boolean value indicating front/back facing polygon
  */
 void
@@ -539,7 +745,7 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
                             struct lp_build_mask_context *mask,
                             LLVMValueRef stencil_refs[2],
                             LLVMValueRef z_src,
-                            LLVMValueRef zs_dst_ptr,
+                            LLVMValueRef zs_dst,
                             LLVMValueRef face,
                             LLVMValueRef *zs_value,
                             boolean do_branch)
@@ -551,7 +757,7 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
    struct lp_build_context s_bld;
    struct lp_type s_type;
    unsigned z_shift = 0, z_width = 0, z_mask = 0;
-   LLVMValueRef zs_dst, z_dst = NULL;
+   LLVMValueRef z_dst = NULL;
    LLVMValueRef stencil_vals = NULL;
    LLVMValueRef z_bitmask = NULL, stencil_shift = NULL;
    LLVMValueRef z_pass = NULL, s_pass_mask = NULL;
@@ -638,19 +844,6 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
    s_type = lp_int_type(z_type);
    lp_build_context_init(&s_bld, gallivm, s_type);
 
-   /* Load current z/stencil value from z/stencil buffer */
-   zs_dst_ptr = LLVMBuildBitCast(builder,
-                                 zs_dst_ptr,
-                                 LLVMPointerType(lp_build_vec_type(gallivm, zs_type), 0), "");
-   zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, "");
-   if (format_desc->block.bits < z_type.width) {
-      /* Extend destination ZS values (e.g., when reading from Z16_UNORM) */
-      zs_dst = LLVMBuildZExt(builder, zs_dst, z_bld.vec_type, "");
-   }
-
-   lp_build_name(zs_dst, "zs_dst");
-
-
    /* Compute and apply the Z/stencil bitmasks and shifts.
     */
    {
@@ -860,65 +1053,3 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
 
 }
 
-
-void
-lp_build_depth_write(struct gallivm_state *gallivm,
-                     struct lp_type z_src_type,
-                     const struct util_format_description *format_desc,
-                     LLVMValueRef zs_dst_ptr,
-                     LLVMValueRef zs_value)
-{
-   LLVMBuilderRef builder = gallivm->builder;
-
-   if (format_desc->block.bits < z_src_type.width) {
-      /* Truncate income ZS values (e.g., when writing to Z16_UNORM) */
-      LLVMTypeRef zs_type = LLVMIntTypeInContext(gallivm->context, format_desc->block.bits);
-      if (z_src_type.length > 1) {
-         zs_type = LLVMVectorType(zs_type, z_src_type.length);
-      }
-      zs_value = LLVMBuildTrunc(builder, zs_value, zs_type, "");
-   }
-
-   zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr,
-                                 LLVMPointerType(LLVMTypeOf(zs_value), 0), "");
-
-   LLVMBuildStore(builder, zs_value, zs_dst_ptr);
-}
-
-
-void
-lp_build_deferred_depth_write(struct gallivm_state *gallivm,
-                              struct lp_type z_src_type,
-                              const struct util_format_description *format_desc,
-                              struct lp_build_mask_context *mask,
-                              LLVMValueRef zs_dst_ptr,
-                              LLVMValueRef zs_value)
-{
-   struct lp_type z_type;
-   struct lp_build_context z_bld;
-   LLVMValueRef z_dst;
-   LLVMBuilderRef builder = gallivm->builder;
-   LLVMValueRef mask_value;
-
-   /* XXX: pointlessly redo type logic:
-    */
-   z_type = lp_depth_type(format_desc, z_src_type.length);
-   lp_build_context_init(&z_bld, gallivm, z_type);
-
-   zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr,
-                                 LLVMPointerType(z_bld.vec_type, 0), "");
-
-   z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval");
-
-   mask_value = lp_build_mask_value(mask);
-
-   if (z_type.width < z_src_type.width) {
-      /* Truncate incoming ZS and mask values (e.g., when writing to Z16_UNORM) */
-      zs_value = LLVMBuildTrunc(builder, zs_value, z_bld.vec_type, "");
-      mask_value = LLVMBuildTrunc(builder, mask_value, z_bld.int_vec_type, "");
-   }
-
-   z_dst = lp_build_select(&z_bld, mask_value, zs_value, z_dst);
-
-   LLVMBuildStore(builder, z_dst, zs_dst_ptr);
-}
index 33cb0dd4a9e3e20cd55213ae01c34ebe4aa182b0..c000494667dc2ee6fa972083f3d306f2c78a4a25 100644 (file)
@@ -58,30 +58,34 @@ void
 lp_build_depth_stencil_test(struct gallivm_state *gallivm,
                             const struct pipe_depth_state *depth,
                             const struct pipe_stencil_state stencil[2],
-                            struct lp_type type,
+                            struct lp_type z_src_type,
                             const struct util_format_description *format_desc,
                             struct lp_build_mask_context *mask,
                             LLVMValueRef stencil_refs[2],
-                            LLVMValueRef zs_src,
-                            LLVMValueRef zs_dst_ptr,
-                            LLVMValueRef facing,
+                            LLVMValueRef z_src,
+                            LLVMValueRef zs_dst,
+                            LLVMValueRef face,
                             LLVMValueRef *zs_value,
                             boolean do_branch);
 
-void
-lp_build_depth_write(struct gallivm_state *gallivm,
-                     struct lp_type z_src_type,
-                     const struct util_format_description *format_desc,
-                     LLVMValueRef zs_dst_ptr,
-                     LLVMValueRef zs_value);
+LLVMValueRef
+lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
+                                     struct lp_type z_src_type,
+                                     const struct util_format_description *format_desc,
+                                     LLVMValueRef depth_ptr,
+                                     LLVMValueRef depth_stride,
+                                     LLVMValueRef loop_counter);
 
 void
-lp_build_deferred_depth_write(struct gallivm_state *gallivm,
-                              struct lp_type z_src_type,
-                              const struct util_format_description *format_desc,
-                              struct lp_build_mask_context *mask,
-                              LLVMValueRef zs_dst_ptr,
-                              LLVMValueRef zs_value);
+lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
+                                      struct lp_type z_src_type,
+                                      const struct util_format_description *format_desc,
+                                      struct lp_build_mask_context *mask,
+                                      LLVMValueRef loop_counter,
+                                      LLVMValueRef depth_ptr,
+                                      LLVMValueRef depth_stride,
+                                      LLVMValueRef zs_value);
+
 
 void
 lp_build_occlusion_count(struct gallivm_state *gallivm,
index 4eddb2a2f3c7c42915dcda1d0f459b28e5cfca02..4e9ca764fe72a496ac1f64ef1d5ec43fe8236841 100644 (file)
@@ -193,6 +193,7 @@ enum {
  * @param mask          mask of visible pixels in block
  * @param thread_data   task thread data
  * @param stride        color buffer row stride in bytes
+ * @param depth_stride  depth buffer row stride in bytes
  */
 typedef void
 (*lp_jit_frag_func)(const struct lp_jit_context *context,
@@ -206,7 +207,8 @@ typedef void
                     void *depth,
                     uint32_t mask,
                     struct lp_jit_thread_data *thread_data,
-                    unsigned *stride);
+                    unsigned *stride,
+                    unsigned depth_stride);
 
 
 void
index ef49ba9ab6f50f0457febe12e83ab047ff39f80f..a557db4b4dc2c77b3350125ac81c92c1d05f5022 100644 (file)
@@ -89,51 +89,15 @@ static void
 lp_rast_tile_begin(struct lp_rasterizer_task *task,
                    const struct cmd_bin *bin)
 {
-   const struct lp_scene *scene = task->scene;
-   enum lp_texture_usage usage;
-
    LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, bin->x, bin->y);
 
    task->bin = bin;
    task->x = bin->x * TILE_SIZE;
    task->y = bin->y * TILE_SIZE;
 
-   /* reset pointers to color tile(s) */
+   /* reset pointers to color and depth tile(s) */
    memset(task->color_tiles, 0, sizeof(task->color_tiles));
-
-   /* get pointer to depth/stencil tile */
-   {
-      struct pipe_surface *zsbuf = task->scene->fb.zsbuf;
-      if (zsbuf) {
-         struct llvmpipe_resource *lpt = llvmpipe_resource(zsbuf->texture);
-
-         if (scene->has_depthstencil_clear)
-            usage = LP_TEX_USAGE_WRITE_ALL;
-         else
-            usage = LP_TEX_USAGE_READ_WRITE;
-
-         /* "prime" the tile: convert data from linear to tiled if necessary
-          * and update the tile's layout info.
-          */
-         (void) llvmpipe_get_texture_tile(lpt,
-                                          zsbuf->u.tex.first_layer,
-                                          zsbuf->u.tex.level,
-                                          usage,
-                                          task->x,
-                                          task->y);
-         /* Get actual pointer to the tile data.  Note that depth/stencil
-          * data is tiled differently than color data.
-          */
-         task->depth_tile = lp_rast_get_depth_block_pointer(task,
-                                                            task->x,
-                                                            task->y);
-
-         assert(task->depth_tile);
-      }
-      else {
-         task->depth_tile = NULL;
-      }
-   }
+   task->depth_tile = NULL;
 }
 
 
@@ -220,8 +184,6 @@ lp_rast_clear_color(struct lp_rasterizer_task *task,
 
 
 
-
-
 /**
  * Clear the rasterizer's current z/stencil tile.
  * This is a bin command called during bin processing.
@@ -233,10 +195,10 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
    const struct lp_scene *scene = task->scene;
    uint32_t clear_value = arg.clear_zstencil.value;
    uint32_t clear_mask = arg.clear_zstencil.mask;
-   const unsigned height = TILE_SIZE / TILE_VECTOR_HEIGHT;
-   const unsigned width = TILE_SIZE * TILE_VECTOR_HEIGHT;
+   const unsigned height = TILE_SIZE;
+   const unsigned width = TILE_SIZE;
    const unsigned block_size = scene->zsbuf.blocksize;
-   const unsigned dst_stride = scene->zsbuf.stride * TILE_VECTOR_HEIGHT;
+   const unsigned dst_stride = scene->zsbuf.stride;
    uint8_t *dst;
    unsigned i, j;
 
@@ -244,65 +206,64 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
            __FUNCTION__, clear_value, clear_mask);
 
    /*
-    * Clear the area of the swizzled depth/depth buffer matching this tile, in
-    * stripes of TILE_VECTOR_HEIGHT x TILE_SIZE at a time.
-    *
-    * The swizzled depth format is such that the depths for
-    * TILE_VECTOR_HEIGHT x TILE_VECTOR_WIDTH pixels have consecutive offsets.
+    * Clear the area of the depth/depth buffer matching this tile.
     */
 
-   dst = task->depth_tile;
+   if (scene->fb.zsbuf) {
 
-   clear_value &= clear_mask;
+      dst = lp_rast_get_unswizzled_depth_tile_pointer(task, LP_TEX_USAGE_READ_WRITE);
 
-   switch (block_size) {
-   case 1:
-      assert(clear_mask == 0xff);
-      memset(dst, (uint8_t) clear_value, height * width);
-      break;
-   case 2:
-      if (clear_mask == 0xffff) {
-         for (i = 0; i < height; i++) {
-            uint16_t *row = (uint16_t *)dst;
-            for (j = 0; j < width; j++)
-               *row++ = (uint16_t) clear_value;
-            dst += dst_stride;
+      clear_value &= clear_mask;
+
+      switch (block_size) {
+      case 1:
+         assert(clear_mask == 0xff);
+         memset(dst, (uint8_t) clear_value, height * width);
+         break;
+      case 2:
+         if (clear_mask == 0xffff) {
+            for (i = 0; i < height; i++) {
+               uint16_t *row = (uint16_t *)dst;
+               for (j = 0; j < width; j++)
+                  *row++ = (uint16_t) clear_value;
+               dst += dst_stride;
+            }
          }
-      }
-      else {
-         for (i = 0; i < height; i++) {
-            uint16_t *row = (uint16_t *)dst;
-            for (j = 0; j < width; j++) {
-               uint16_t tmp = ~clear_mask & *row;
-               *row++ = clear_value | tmp;
+         else {
+            for (i = 0; i < height; i++) {
+               uint16_t *row = (uint16_t *)dst;
+               for (j = 0; j < width; j++) {
+                  uint16_t tmp = ~clear_mask & *row;
+                  *row++ = clear_value | tmp;
+               }
+               dst += dst_stride;
             }
-            dst += dst_stride;
          }
-      }
-      break;
-   case 4:
-      if (clear_mask == 0xffffffff) {
-         for (i = 0; i < height; i++) {
-            uint32_t *row = (uint32_t *)dst;
-            for (j = 0; j < width; j++)
-               *row++ = clear_value;
-            dst += dst_stride;
+         break;
+      case 4:
+         if (clear_mask == 0xffffffff) {
+            for (i = 0; i < height; i++) {
+               uint32_t *row = (uint32_t *)dst;
+               for (j = 0; j < width; j++)
+                  *row++ = clear_value;
+               dst += dst_stride;
+            }
          }
-      }
-      else {
-         for (i = 0; i < height; i++) {
-            uint32_t *row = (uint32_t *)dst;
-            for (j = 0; j < width; j++) {
-               uint32_t tmp = ~clear_mask & *row;
-               *row++ = clear_value | tmp;
+         else {
+            for (i = 0; i < height; i++) {
+               uint32_t *row = (uint32_t *)dst;
+               for (j = 0; j < width; j++) {
+                  uint32_t tmp = ~clear_mask & *row;
+                  *row++ = clear_value | tmp;
+               }
+               dst += dst_stride;
             }
-            dst += dst_stride;
          }
+         break;
+      default:
+         assert(0);
+         break;
       }
-      break;
-   default:
-      assert(0);
-      break;
    }
 }
 
@@ -343,7 +304,8 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
       for (x = 0; x < TILE_SIZE; x += 4) {
          uint8_t *color[PIPE_MAX_COLOR_BUFS];
          unsigned stride[PIPE_MAX_COLOR_BUFS];
-         uint32_t *depth;
+         uint8_t *depth = NULL;
+         unsigned depth_stride = 0;
          unsigned i;
 
          /* color buffer */
@@ -354,7 +316,11 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
          }
 
          /* depth buffer */
-         depth = lp_rast_get_depth_block_pointer(task, tile_x + x, tile_y + y);
+         if (scene->zsbuf.map) {
+            depth = lp_rast_get_unswizzled_depth_block_pointer(task, tile_x + x, tile_y + y);
+            depth_stride = scene->zsbuf.stride;
+         }
+
 
          /* run shader on 4x4 block */
          BEGIN_JIT_CALL(state, task);
@@ -368,7 +334,8 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
                                             depth,
                                             0xffff,
                                             &task->thread_data,
-                                            stride);
+                                            stride,
+                                            depth_stride);
          END_JIT_CALL();
       }
    }
@@ -412,7 +379,8 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
    const struct lp_scene *scene = task->scene;
    uint8_t *color[PIPE_MAX_COLOR_BUFS];
    unsigned stride[PIPE_MAX_COLOR_BUFS];
-   void *depth;
+   void *depth = NULL;
+   unsigned depth_stride = 0;
    unsigned i;
 
    assert(state);
@@ -434,8 +402,10 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
    }
 
    /* depth buffer */
-   depth = lp_rast_get_depth_block_pointer(task, x, y);
-
+   if (scene->zsbuf.map) {
+      depth_stride = scene->zsbuf.stride;
+      depth = lp_rast_get_unswizzled_depth_block_pointer(task, x, y);
+   }
 
    assert(lp_check_alignment(state->jit_context.u8_blend_color, 16));
 
@@ -451,7 +421,8 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
                                          depth,
                                          mask,
                                          &task->thread_data,
-                                         stride);
+                                         stride,
+                                         depth_stride);
    END_JIT_CALL();
 }
 
index c0f41f69fe29a5497f15bb64d487ad236005c230..7d01da15113a521a80c69823e3cc9f7a236f86df 100644 (file)
@@ -140,48 +140,39 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
 
 
 /**
- * Get the pointer to a 4x4 depth/stencil block.
- * We'll map the z/stencil buffer on demand here.
- * Note that this may be called even when there's no z/stencil buffer - return
- * NULL in that case.
- * \param x, y location of 4x4 block in window coords
+ * Get pointer to the unswizzled color tile
  */
-static INLINE void *
-lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task,
-                                unsigned x, unsigned y)
+static INLINE uint8_t *
+lp_rast_get_unswizzled_color_tile_pointer(struct lp_rasterizer_task *task,
+                                          unsigned buf, enum lp_texture_usage usage)
 {
    const struct lp_scene *scene = task->scene;
-   void *depth;
+   unsigned format_bytes;
 
-   assert(x < scene->tiles_x * TILE_SIZE);
-   assert(y < scene->tiles_y * TILE_SIZE);
-   assert((x % TILE_VECTOR_WIDTH) == 0);
-   assert((y % TILE_VECTOR_HEIGHT) == 0);
+   assert(task->x < scene->tiles_x * TILE_SIZE);
+   assert(task->y < scene->tiles_y * TILE_SIZE);
+   assert(task->x % TILE_SIZE == 0);
+   assert(task->y % TILE_SIZE == 0);
+   assert(buf < scene->fb.nr_cbufs);
 
-   if (!scene->zsbuf.map) {
-      /* Either out of memory or no zsbuf.  Can't tell without access
-       * to the state.  Just use dummy tile memory, but don't print
-       * the oom warning as this most likely because there is no
-       * zsbuf.
-       */
-      return lp_dummy_tile;
-   }
+   if (!task->color_tiles[buf]) {
+      struct pipe_surface *cbuf = scene->fb.cbufs[buf];
+      assert(cbuf);
 
-   depth = (scene->zsbuf.map +
-            scene->zsbuf.stride * y +
-            scene->zsbuf.blocksize * x * TILE_VECTOR_HEIGHT);
+      format_bytes = util_format_get_blocksize(cbuf->format);
+      task->color_tiles[buf] = scene->cbufs[buf].map + scene->cbufs[buf].stride * task->y + format_bytes * task->x;
+   }
 
-   assert(lp_check_alignment(depth, 16));
-   return depth;
+   return task->color_tiles[buf];
 }
 
 
 /**
- * Get pointer to the unswizzled color tile
+ * Get pointer to the unswizzled depth tile
  */
 static INLINE uint8_t *
-lp_rast_get_unswizzled_color_tile_pointer(struct lp_rasterizer_task *task,
-                                          unsigned buf, enum lp_texture_usage usage)
+lp_rast_get_unswizzled_depth_tile_pointer(struct lp_rasterizer_task *task,
+                                          enum lp_texture_usage usage)
 {
    const struct lp_scene *scene = task->scene;
    unsigned format_bytes;
@@ -190,17 +181,16 @@ lp_rast_get_unswizzled_color_tile_pointer(struct lp_rasterizer_task *task,
    assert(task->y < scene->tiles_y * TILE_SIZE);
    assert(task->x % TILE_SIZE == 0);
    assert(task->y % TILE_SIZE == 0);
-   assert(buf < scene->fb.nr_cbufs);
 
-   if (!task->color_tiles[buf]) {
-      struct pipe_surface *cbuf = scene->fb.cbufs[buf];
-      assert(cbuf);
+   if (!task->depth_tile) {
+      struct pipe_surface *dbuf = scene->fb.zsbuf;
+      assert(dbuf);
 
-      format_bytes = util_format_get_blocksize(cbuf->format);
-      task->color_tiles[buf] = scene->cbufs[buf].map + scene->cbufs[buf].stride * task->y + format_bytes * task->x;
+      format_bytes = util_format_get_blocksize(dbuf->format);
+      task->depth_tile = scene->zsbuf.map + scene->zsbuf.stride * task->y + format_bytes * task->x;
    }
 
-   return task->color_tiles[buf];
+   return task->depth_tile;
 }
 
 
@@ -237,6 +227,38 @@ lp_rast_get_unswizzled_color_block_pointer(struct lp_rasterizer_task *task,
 }
 
 
+/**
+ * Get the pointer to an unswizzled 4x4 depth block (within an unswizzled 64x64 tile).
+ * \param x, y location of 4x4 block in window coords
+ */
+static INLINE uint8_t *
+lp_rast_get_unswizzled_depth_block_pointer(struct lp_rasterizer_task *task,
+                                           unsigned x, unsigned y)
+{
+   unsigned px, py, pixel_offset, format_bytes;
+   uint8_t *depth;
+
+   assert(x < task->scene->tiles_x * TILE_SIZE);
+   assert(y < task->scene->tiles_y * TILE_SIZE);
+   assert((x % TILE_VECTOR_WIDTH) == 0);
+   assert((y % TILE_VECTOR_HEIGHT) == 0);
+
+   format_bytes = util_format_get_blocksize(task->scene->fb.zsbuf->format);
+
+   depth = lp_rast_get_unswizzled_depth_tile_pointer(task, LP_TEX_USAGE_READ_WRITE);
+   assert(depth);
+
+   px = x % TILE_SIZE;
+   py = y % TILE_SIZE;
+   pixel_offset = px * format_bytes + py * task->scene->zsbuf.stride;
+
+   depth = depth + pixel_offset;
+
+   assert(lp_check_alignment(depth, llvmpipe_get_format_alignment(task->scene->fb.zsbuf->format)));
+   return depth;
+}
+
+
 
 /**
  * Shade all pixels in a 4x4 block.  The fragment code omits the
@@ -253,7 +275,8 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
    struct lp_fragment_shader_variant *variant = state->variant;
    uint8_t *color[PIPE_MAX_COLOR_BUFS];
    unsigned stride[PIPE_MAX_COLOR_BUFS];
-   void *depth;
+   void *depth = NULL;
+   unsigned depth_stride = 0;
    unsigned i;
 
    /* color buffer */
@@ -263,7 +286,10 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
       color[i] = lp_rast_get_unswizzled_color_block_pointer(task, i, x, y);
    }
 
-   depth = lp_rast_get_depth_block_pointer(task, x, y);
+   if (scene->zsbuf.map) {
+      depth = lp_rast_get_unswizzled_depth_block_pointer(task, x, y);
+      depth_stride = scene->zsbuf.stride;
+   }
 
    /* run shader on 4x4 block */
    BEGIN_JIT_CALL(state, task);
@@ -277,7 +303,8 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
                                       depth,
                                       0xffff,
                                       &task->thread_data,
-                                      stride );
+                                      stride,
+                                      depth_stride);
    END_JIT_CALL();
 }
 
index a8885863ef0f7f283abbf80d88c207f1ecff58c7..e05ea753b4b36436c2277b49c6994520479b2c7f 100644 (file)
@@ -185,7 +185,7 @@ lp_scene_begin_rasterization(struct lp_scene *scene)
                                                zsbuf->u.tex.level,
                                                zsbuf->u.tex.first_layer,
                                                LP_TEX_USAGE_READ_WRITE,
-                                               LP_TEX_LAYOUT_NONE);
+                                               LP_TEX_LAYOUT_LINEAR);
    }
 }
 
index 1a9a194c8bee067367235d83e2759a4330cd9c97..69212109a87d92f18884f85ac1c7b3d216162c47 100644 (file)
@@ -229,7 +229,7 @@ generate_fs_loop(struct gallivm_state *gallivm,
                  LLVMValueRef mask_store,
                  LLVMValueRef (*out_color)[4],
                  LLVMValueRef depth_ptr,
-                 unsigned depth_bits,
+                 LLVMValueRef depth_stride,
                  LLVMValueRef facing,
                  LLVMValueRef thread_data_ptr)
 {
@@ -241,8 +241,6 @@ generate_fs_loop(struct gallivm_state *gallivm,
    LLVMValueRef z;
    LLVMValueRef zs_value = NULL;
    LLVMValueRef stencil_refs[2];
-   LLVMValueRef depth_ptr_i;
-   LLVMValueRef depth_offset;
    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
    struct lp_build_for_loop_state loop_state;
    struct lp_build_mask_context mask;
@@ -308,12 +306,6 @@ generate_fs_loop(struct gallivm_state *gallivm,
                            &loop_state.counter, 1, "mask_ptr");
    mask_val = LLVMBuildLoad(builder, mask_ptr, "");
 
-   depth_offset = LLVMBuildMul(builder, loop_state.counter,
-                               lp_build_const_int32(gallivm, depth_bits * type.length),
-                               "");
-
-   depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &depth_offset, 1, "");
-
    memset(outputs, 0, sizeof outputs);
 
    for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
@@ -345,6 +337,11 @@ generate_fs_loop(struct gallivm_state *gallivm,
    z = interp->pos[2];
 
    if (depth_mode & EARLY_DEPTH_TEST) {
+      LLVMValueRef zs_dst_val;
+      zs_dst_val = lp_build_depth_stencil_load_swizzled(gallivm, type,
+                                                        zs_format_desc,
+                                                        depth_ptr, depth_stride,
+                                                        loop_state.counter);
       lp_build_depth_stencil_test(gallivm,
                                   &key->depth,
                                   key->stencil,
@@ -353,12 +350,15 @@ generate_fs_loop(struct gallivm_state *gallivm,
                                   &mask,
                                   stencil_refs,
                                   z,
-                                  depth_ptr_i, facing,
+                                  zs_dst_val,
+                                  facing,
                                   &zs_value,
                                   !simple_shader);
 
       if (depth_mode & EARLY_DEPTH_WRITE) {
-         lp_build_depth_write(gallivm, type, zs_format_desc, depth_ptr_i, zs_value);
+         lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc,
+                                               NULL, loop_state.counter,
+                                               depth_ptr, depth_stride, zs_value);
       }
    }
 
@@ -394,6 +394,7 @@ generate_fs_loop(struct gallivm_state *gallivm,
 
    /* Late Z test */
    if (depth_mode & LATE_DEPTH_TEST) {
+      LLVMValueRef zs_dst_val;
       int pos0 = find_output_by_semantic(&shader->info.base,
                                          TGSI_SEMANTIC_POSITION,
                                          0);
@@ -402,6 +403,11 @@ generate_fs_loop(struct gallivm_state *gallivm,
          z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z");
       }
 
+      zs_dst_val = lp_build_depth_stencil_load_swizzled(gallivm, type,
+                                                        zs_format_desc,
+                                                        depth_ptr, depth_stride,
+                                                        loop_state.counter);
+
       lp_build_depth_stencil_test(gallivm,
                                   &key->depth,
                                   key->stencil,
@@ -410,12 +416,15 @@ generate_fs_loop(struct gallivm_state *gallivm,
                                   &mask,
                                   stencil_refs,
                                   z,
-                                  depth_ptr_i, facing,
+                                  zs_dst_val,
+                                  facing,
                                   &zs_value,
                                   !simple_shader);
       /* Late Z write */
       if (depth_mode & LATE_DEPTH_WRITE) {
-         lp_build_depth_write(gallivm, type, zs_format_desc, depth_ptr_i, zs_value);
+         lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc,
+                                               NULL, loop_state.counter,
+                                               depth_ptr, depth_stride, zs_value);
       }
    }
    else if ((depth_mode & EARLY_DEPTH_TEST) &&
@@ -425,12 +434,9 @@ generate_fs_loop(struct gallivm_state *gallivm,
        * depth value, update from zs_value with the new mask value and
        * write that out.
        */
-      lp_build_deferred_depth_write(gallivm,
-                                    type,
-                                    zs_format_desc,
-                                    &mask,
-                                    depth_ptr_i,
-                                    zs_value);
+      lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc,
+                                            &mask, loop_state.counter,
+                                            depth_ptr, depth_stride, zs_value);
    }
 
 
@@ -1749,7 +1755,7 @@ generate_fragment(struct llvmpipe_context *lp,
    struct lp_type blend_type;
    LLVMTypeRef fs_elem_type;
    LLVMTypeRef blend_vec_type;
-   LLVMTypeRef arg_types[12];
+   LLVMTypeRef arg_types[13];
    LLVMTypeRef func_type;
    LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
    LLVMTypeRef int8_type = LLVMInt8TypeInContext(gallivm->context);
@@ -1762,6 +1768,7 @@ generate_fragment(struct llvmpipe_context *lp,
    LLVMValueRef color_ptr_ptr;
    LLVMValueRef stride_ptr;
    LLVMValueRef depth_ptr;
+   LLVMValueRef depth_stride;
    LLVMValueRef mask_input;
    LLVMValueRef thread_data_ptr;
    LLVMBasicBlockRef block;
@@ -1772,7 +1779,6 @@ generate_fragment(struct llvmpipe_context *lp,
    LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS][16 / 4];
    LLVMValueRef function;
    LLVMValueRef facing;
-   const struct util_format_description *zs_format_desc;
    unsigned num_fs;
    unsigned i;
    unsigned chan;
@@ -1847,6 +1853,7 @@ generate_fragment(struct llvmpipe_context *lp,
    arg_types[9] = int32_type;                          /* mask_input */
    arg_types[10] = variant->jit_thread_data_ptr_type;  /* per thread data */
    arg_types[11] = LLVMPointerType(int32_type, 0);     /* stride */
+   arg_types[12] = int32_type;                         /* depth_stride */
 
    func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context),
                                 arg_types, Elements(arg_types), 0);
@@ -1875,6 +1882,7 @@ generate_fragment(struct llvmpipe_context *lp,
    mask_input   = LLVMGetParam(function, 9);
    thread_data_ptr  = LLVMGetParam(function, 10);
    stride_ptr   = LLVMGetParam(function, 11);
+   depth_stride = LLVMGetParam(function, 12);
 
    lp_build_name(context_ptr, "context");
    lp_build_name(x, "x");
@@ -1887,6 +1895,7 @@ generate_fragment(struct llvmpipe_context *lp,
    lp_build_name(thread_data_ptr, "thread_data");
    lp_build_name(mask_input, "mask_input");
    lp_build_name(stride_ptr, "stride_ptr");
+   lp_build_name(depth_stride, "depth_stride");
 
    /*
     * Function body
@@ -1900,10 +1909,7 @@ generate_fragment(struct llvmpipe_context *lp,
    /* code generated texture sampling */
    sampler = lp_llvm_sampler_soa_create(key->state, context_ptr);
 
-   zs_format_desc = util_format_description(key->zsbuf_format);
-
    {
-      unsigned depth_bits = zs_format_desc->block.bits/8;
       LLVMValueRef num_loop = lp_build_const_int32(gallivm, num_fs);
       LLVMTypeRef mask_type = lp_build_int_vec_type(gallivm, fs_type);
       LLVMValueRef mask_store = lp_build_array_alloca(gallivm, mask_type,
@@ -1951,7 +1957,7 @@ generate_fragment(struct llvmpipe_context *lp,
                        mask_store, /* output */
                        color_store,
                        depth_ptr,
-                       depth_bits,
+                       depth_stride,
                        facing,
                        thread_data_ptr);