llvmpipe: clean up shader pre/postamble, try to catch more early-z
authorKeith Whitwell <keithw@vmware.com>
Sat, 9 Oct 2010 10:28:00 +0000 (11:28 +0100)
committerKeith Whitwell <keithw@vmware.com>
Sat, 9 Oct 2010 10:44:45 +0000 (11:44 +0100)
Specifically, can do early-depth-test even when alpahtest or
kill-pixel are active, providing we defer the actual z write until the
final mask is avaialable.

Improves demos/fire.c especially in the case where you get close to
the trees.

src/gallium/drivers/llvmpipe/lp_bld_depth.c
src/gallium/drivers/llvmpipe/lp_bld_depth.h
src/gallium/drivers/llvmpipe/lp_state_fs.c

index 6b8ffb6ca265708ca117def02ede8cd90f1cca43..8d9be2ebbbf379426b57beb90d8f9a4f78fc5278 100644 (file)
@@ -410,7 +410,7 @@ get_s_shift_and_mask(const struct util_format_description *format_desc,
  * \param maskvalue is the depth test mask.
  * \param counter is a pointer of the uint32 counter.
  */
-static void
+void
 lp_build_occlusion_count(LLVMBuilderRef builder,
                          struct lp_type type,
                          LLVMValueRef maskvalue,
@@ -462,7 +462,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
                             LLVMValueRef z_src,
                             LLVMValueRef zs_dst_ptr,
                             LLVMValueRef face,
-                            LLVMValueRef counter,
+                            LLVMValueRef *zs_value,
                             boolean do_branch)
 {
    struct lp_type type;
@@ -524,17 +524,14 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
        * storage.
        */
       if (depth->writemask) {
-         type.sign = 0;
+         type.sign = 1;
          lp_build_context_init(&bld, builder, type);
 
          z_dst = lp_build_select(&bld, mask->value, z_src, z_dst);
          z_dst = LLVMBuildShl(builder, z_dst, const_8_int, "z_dst");
-         LLVMBuildStore(builder, z_dst, zs_dst_ptr);
+         *zs_value = z_dst;
       }
 
-      if (counter)
-         lp_build_occlusion_count(builder, type, mask->value, counter);
-
       return;
    }
 
@@ -779,7 +776,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
       else
          zs_dst = stencil_vals;
 
-      LLVMBuildStore(builder, zs_dst, zs_dst_ptr);
+      *zs_value = zs_dst;
    }
 
    if (s_pass_mask)
@@ -791,6 +788,29 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
    if (do_branch)
       lp_build_mask_check(mask);
 
-   if (counter)
-      lp_build_occlusion_count(builder, type, mask->value, counter);
+}
+
+
+
+void
+lp_build_deferred_depth_write(LLVMBuilderRef builder,
+                              struct lp_type z_src_type,
+                              const struct util_format_description *format_desc,
+                              struct lp_build_mask_context *mask,
+                              LLVMValueRef zs_dst_ptr,
+                              LLVMValueRef zs_value)
+{
+   struct lp_type type;
+   struct lp_build_context bld;
+   LLVMValueRef z_dst;
+
+   /* XXX: pointlessly redo type logic:
+    */
+   type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length);
+   lp_build_context_init(&bld, builder, type);
+
+   z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval");
+   z_dst = lp_build_select(&bld, mask->value, zs_value, z_dst);
+
+   LLVMBuildStore(builder, z_dst, zs_dst_ptr);
 }
index 2a63bb9378b59b448568b23d6fb83955e67d9db8..0f89668123af2b2368b2c6c488dd1e58c5834046 100644 (file)
@@ -61,8 +61,21 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
                             LLVMValueRef zs_src,
                             LLVMValueRef zs_dst_ptr,
                             LLVMValueRef facing,
-                            LLVMValueRef counter,
+                            LLVMValueRef *zs_value,
                             boolean do_branch);
 
+void
+lp_build_deferred_depth_write(LLVMBuilderRef builder,
+                              struct lp_type z_src_type,
+                              const struct util_format_description *format_desc,
+                              struct lp_build_mask_context *mask,
+                              LLVMValueRef zs_dst_ptr,
+                              LLVMValueRef zs_value);
+
+void
+lp_build_occlusion_count(LLVMBuilderRef builder,
+                         struct lp_type type,
+                         LLVMValueRef maskvalue,
+                         LLVMValueRef counter);
 
 #endif /* !LP_BLD_DEPTH_H */
index df5dd83c875357d121a457704107fb01e14c783d..f45f36f633206957a7d3ebc15c05490f080d23af 100644 (file)
 static unsigned fs_no = 0;
 
 
-/**
- * Generate the depth /stencil test code.
- */
-static void
-generate_depth_stencil(LLVMBuilderRef builder,
-                       const struct lp_fragment_shader_variant_key *key,
-                       struct lp_type src_type,
-                       struct lp_build_mask_context *mask,
-                       LLVMValueRef stencil_refs[2],
-                       LLVMValueRef src,
-                       LLVMValueRef dst_ptr,
-                       LLVMValueRef facing,
-                       LLVMValueRef counter,
-                       boolean do_branch)
-{
-   const struct util_format_description *format_desc;
-
-   if (!key->depth.enabled && !key->stencil[0].enabled && !key->stencil[1].enabled)
-      return;
-
-   format_desc = util_format_description(key->zsbuf_format);
-   assert(format_desc);
-
-   lp_build_depth_stencil_test(builder,
-                               &key->depth,
-                               key->stencil,
-                               src_type,
-                               format_desc,
-                               mask,
-                               stencil_refs,
-                               src,
-                               dst_ptr,
-                               facing,
-                               counter,
-                               do_branch);
-}
-
 
 /**
  * Expand the relevent bits of mask_input to a 4-dword mask for the 
@@ -222,6 +185,26 @@ generate_quad_mask(LLVMBuilderRef builder,
 }
 
 
+#define EARLY_DEPTH_TEST  0x1
+#define LATE_DEPTH_TEST   0x2
+#define EARLY_DEPTH_WRITE 0x4
+#define LATE_DEPTH_WRITE  0x8
+
+static int
+find_output_by_semantic( const struct tgsi_shader_info *info,
+                        unsigned semantic,
+                        unsigned index )
+{
+   int i;
+
+   for (i = 0; i < info->num_outputs; i++)
+      if (info->output_semantic_name[i] == semantic &&
+         info->output_semantic_index[i] == index)
+        return i;
+
+   return -1;
+}
+
 
 /**
  * Generate the fragment shader, depth/stencil test, and alpha tests.
@@ -246,21 +229,53 @@ generate_fs(struct llvmpipe_context *lp,
             LLVMValueRef mask_input,
             LLVMValueRef counter)
 {
+   const struct util_format_description *zs_format_desc = NULL;
    const struct tgsi_token *tokens = shader->base.tokens;
    LLVMTypeRef vec_type;
    LLVMValueRef consts_ptr;
    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
    LLVMValueRef z;
+   LLVMValueRef zs_value = NULL;
    LLVMValueRef stencil_refs[2];
    struct lp_build_flow_context *flow;
    struct lp_build_mask_context mask;
-   boolean early_depth_stencil_test;
    boolean simple_shader = (shader->info.file_count[TGSI_FILE_SAMPLER] == 0 &&
                             shader->info.num_inputs < 3 &&
                             shader->info.num_instructions < 8);
    unsigned attrib;
    unsigned chan;
    unsigned cbuf;
+   unsigned depth_mode;
+
+   if (key->depth.enabled ||
+       key->stencil[0].enabled ||
+       key->stencil[1].enabled) {
+
+      zs_format_desc = util_format_description(key->zsbuf_format);
+      assert(zs_format_desc);
+
+      if (!shader->info.writes_z) {
+         if (key->alpha.enabled || shader->info.uses_kill)
+            /* With alpha test and kill, can do the depth test early
+             * and hopefully eliminate some quads.  But need to do a
+             * special deferred depth write once the final mask value
+             * is known.
+             */
+            depth_mode = EARLY_DEPTH_TEST | LATE_DEPTH_WRITE;
+         else
+            depth_mode = EARLY_DEPTH_TEST | EARLY_DEPTH_WRITE;
+      }
+      else {
+         depth_mode = LATE_DEPTH_TEST | LATE_DEPTH_WRITE;
+      }
+
+      if (!(key->depth.enabled && key->depth.writemask) &&
+          !(key->stencil[0].enabled && key->stencil[0].writemask))
+         depth_mode &= ~(LATE_DEPTH_WRITE | EARLY_DEPTH_WRITE);
+   }
+   else {
+      depth_mode = 0;
+   }
 
    assert(i < 4);
 
@@ -293,79 +308,121 @@ generate_fs(struct llvmpipe_context *lp,
       *pmask = lp_build_const_int_vec(type, ~0);
    }
 
-
-   early_depth_stencil_test =
-      (key->depth.enabled || key->stencil[0].enabled) &&
-      !key->alpha.enabled &&
-      !shader->info.uses_kill &&
-      !shader->info.writes_z;
-
    /* 'mask' will control execution based on quad's pixel alive/killed state */
    lp_build_mask_begin(&mask, flow, type, *pmask);
 
-   if (!early_depth_stencil_test && !simple_shader)
+   if (!(depth_mode & EARLY_DEPTH_TEST) && !simple_shader)
       lp_build_mask_check(&mask);
 
    lp_build_interp_soa_update_pos(interp, i);
    z = interp->pos[2];
 
-   if (early_depth_stencil_test)
-      generate_depth_stencil(builder, key,
-                             type, &mask,
-                             stencil_refs, 
-                             z, depth_ptr,
-                             facing, counter,
-                             !simple_shader);
+   if (depth_mode & EARLY_DEPTH_TEST) {
+      lp_build_depth_stencil_test(builder,
+                                  &key->depth,
+                                  key->stencil,
+                                  type,
+                                  zs_format_desc,
+                                  &mask,
+                                  stencil_refs,
+                                  z,
+                                  depth_ptr, facing,
+                                  &zs_value,
+                                  !simple_shader);
+
+      if (depth_mode & EARLY_DEPTH_WRITE)
+         LLVMBuildStore(builder, zs_value, depth_ptr);
+   }
 
    lp_build_interp_soa_update_inputs(interp, i);
-
+   
+   /* Build the actual shader */
    lp_build_tgsi_soa(builder, tokens, type, &mask,
                      consts_ptr, interp->pos, interp->inputs,
                      outputs, sampler, &shader->info);
 
-   /* loop over fragment shader outputs/results */
-   for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) {
-      for(chan = 0; chan < NUM_CHANNELS; ++chan) {
-         if(outputs[attrib][chan]) {
+
+   /* Alpha test */
+   if (key->alpha.enabled) {
+      int color0 = find_output_by_semantic(&shader->info,
+                                           TGSI_SEMANTIC_COLOR,
+                                           0);
+
+      if (color0 != -1) {
+         LLVMValueRef alpha = LLVMBuildLoad(builder, outputs[color0][3], "alpha");
+         LLVMValueRef alpha_ref_value;
+
+         alpha_ref_value = lp_jit_context_alpha_ref_value(builder, context_ptr);
+         alpha_ref_value = lp_build_broadcast(builder, vec_type, alpha_ref_value);
+
+         lp_build_alpha_test(builder, key->alpha.func, type,
+                             &mask, alpha, alpha_ref_value,
+                             (depth_mode & LATE_DEPTH_TEST) != 0);
+      }
+   }
+
+   /* Late Z test */
+   if (depth_mode & LATE_DEPTH_TEST) { 
+      int pos0 = find_output_by_semantic(&shader->info,
+                                         TGSI_SEMANTIC_POSITION,
+                                         0);
+         
+      if (pos0 != -1) {
+         z = LLVMBuildLoad(builder, outputs[pos0][2], "z");
+         lp_build_name(z, "output%u.%u.%c", i, pos0, "xyzw"[chan]);
+      }
+
+      lp_build_depth_stencil_test(builder,
+                                  &key->depth,
+                                  key->stencil,
+                                  type,
+                                  zs_format_desc,
+                                  &mask,
+                                  stencil_refs,
+                                  z,
+                                  depth_ptr, facing,
+                                  &zs_value,
+                                  !simple_shader);
+      /* Late Z write */
+      if (depth_mode & LATE_DEPTH_WRITE)
+         LLVMBuildStore(builder, zs_value, depth_ptr);
+   }
+   else if ((depth_mode & EARLY_DEPTH_TEST) &&
+            (depth_mode & LATE_DEPTH_WRITE))
+   {
+      /* Need to apply a reduced mask to the depth write.  Reload the
+       * depth value, update from zs_value with the new mask value and
+       * write that out.
+       */
+      lp_build_deferred_depth_write(builder,
+                                    type,
+                                    zs_format_desc,
+                                    &mask,
+                                    depth_ptr,
+                                    zs_value);
+   }
+
+
+   /* Color write  */
+   for (attrib = 0; attrib < shader->info.num_outputs; ++attrib)
+   {
+      if (shader->info.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR)
+      {
+         unsigned cbuf = shader->info.output_semantic_index[attrib];
+         for(chan = 0; chan < NUM_CHANNELS; ++chan)
+         {
+            /* XXX: just initialize outputs to point at colors[] and
+             * skip this.
+             */
             LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
-            lp_build_name(out, "output%u.%u.%c", i, attrib, "xyzw"[chan]);
-
-            switch (shader->info.output_semantic_name[attrib]) {
-            case TGSI_SEMANTIC_COLOR:
-               {
-                  unsigned cbuf = shader->info.output_semantic_index[attrib];
-
-                  lp_build_name(out, "color%u.%u.%c", i, attrib, "rgba"[chan]);
-
-                  /* Alpha test */
-                 /* XXX: should only test the final assignment to alpha */
-                  if (cbuf == 0 && chan == 3 && key->alpha.enabled) {
-                     LLVMValueRef alpha = out;
-                     LLVMValueRef alpha_ref_value;
-                     alpha_ref_value = lp_jit_context_alpha_ref_value(builder, context_ptr);
-                     alpha_ref_value = lp_build_broadcast(builder, vec_type, alpha_ref_value);
-                     lp_build_alpha_test(builder, key->alpha.func, type,
-                                         &mask, alpha, alpha_ref_value, FALSE);
-                  }
-
-                  LLVMBuildStore(builder, out, color[cbuf][chan]);
-                  break;
-               }
-
-            case TGSI_SEMANTIC_POSITION:
-               if(chan == 2)
-                  z = out;
-               break;
-            }
+            lp_build_name(out, "color%u.%u.%c", i, attrib, "rgba"[chan]);
+            LLVMBuildStore(builder, out, color[cbuf][chan]);
          }
       }
    }
 
-   if (!early_depth_stencil_test)
-      generate_depth_stencil(builder, key,
-                             type, &mask,
-                             stencil_refs, z, depth_ptr,
-                             facing, counter, FALSE);
+   if (counter)
+      lp_build_occlusion_count(builder, type, mask.value, counter);
 
    lp_build_mask_end(&mask);