llvmpipe: try to be sensible about whether to branch after mask updates
authorKeith Whitwell <keithw@vmware.com>
Thu, 7 Oct 2010 14:01:07 +0000 (15:01 +0100)
committerKeith Whitwell <keithw@vmware.com>
Sat, 9 Oct 2010 10:44:45 +0000 (11:44 +0100)
Don't branch more than once in quick succession.  Don't branch at the
end of the shader.

src/gallium/auxiliary/gallivm/lp_bld_flow.c
src/gallium/auxiliary/gallivm/lp_bld_flow.h
src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
src/gallium/drivers/llvmpipe/lp_bld_alpha.c
src/gallium/drivers/llvmpipe/lp_bld_alpha.h
src/gallium/drivers/llvmpipe/lp_bld_depth.c
src/gallium/drivers/llvmpipe/lp_bld_depth.h
src/gallium/drivers/llvmpipe/lp_state_fs.c

index cd5fbc246382fb1440e90adcfcc65d3127744fe3..1ec33c742e235dba1b7697e1c99bbfb56723bec1 100644 (file)
@@ -450,7 +450,7 @@ lp_build_flow_skip_end(struct lp_build_flow_context *flow)
 /**
  * Check if the mask predicate is zero.  If so, jump to the end of the block.
  */
-static void
+void
 lp_build_mask_check(struct lp_build_mask_context *mask)
 {
    LLVMBuilderRef builder = mask->flow->builder;
@@ -490,8 +490,6 @@ lp_build_mask_begin(struct lp_build_mask_context *mask,
    lp_build_flow_scope_begin(flow);
    lp_build_flow_scope_declare(flow, &mask->value);
    lp_build_flow_skip_begin(flow);
-
-   lp_build_mask_check(mask);
 }
 
 
@@ -505,8 +503,6 @@ lp_build_mask_update(struct lp_build_mask_context *mask,
                      LLVMValueRef value)
 {
    mask->value = LLVMBuildAnd( mask->flow->builder, mask->value, value, "");
-
-   lp_build_mask_check(mask);
 }
 
 
index fffb493a93b2470a9a542e14aea7946bae54026c..095c781ec546df69f3affab1d6bba6d62bacc6be 100644 (file)
@@ -94,6 +94,9 @@ void
 lp_build_mask_update(struct lp_build_mask_context *mask,
                      LLVMValueRef value);
 
+void
+lp_build_mask_check(struct lp_build_mask_context *mask);
+
 LLVMValueRef
 lp_build_mask_end(struct lp_build_mask_context *mask);
 
index 441aebae298e86bb6d86a2ed1f3a9db87dcde205..03020a62f8554eedffc12f6fbfd8edeec991e719 100644 (file)
@@ -959,8 +959,13 @@ emit_kil(
       }
    }
 
-   if(mask)
+   if(mask) {
       lp_build_mask_update(bld->mask, mask);
+
+      /* XXX: figure out if we are at the end of the shader and skip this:
+       */
+      lp_build_mask_check(bld->mask);
+   }
 }
 
 
@@ -987,6 +992,10 @@ emit_kilp(struct lp_build_tgsi_soa_context *bld,
    }
 
    lp_build_mask_update(bld->mask, mask);
+
+   /* XXX: figure out if we are at the end of the shader and skip this:
+    */
+   lp_build_mask_check(bld->mask);
 }
 
 static void
index e28efe778f9a42666e1f9e0ea9c43c6d83920301..e50643790c8aa4c3f6eef0a48e31ada6c118deab 100644 (file)
@@ -48,7 +48,8 @@ lp_build_alpha_test(LLVMBuilderRef builder,
                     struct lp_type type,
                     struct lp_build_mask_context *mask,
                     LLVMValueRef alpha,
-                    LLVMValueRef ref)
+                    LLVMValueRef ref,
+                    boolean do_branch)
 {
    struct lp_build_context bld;
    LLVMValueRef test;
@@ -60,4 +61,7 @@ lp_build_alpha_test(LLVMBuilderRef builder,
    lp_build_name(test, "alpha_mask");
 
    lp_build_mask_update(mask, test);
+
+   if (do_branch)
+      lp_build_mask_check(mask);
 }
index 44603b418c04171ef5e0f42912ce505bfe2f922d..27ca8aad4d4406283fbf44c4ed70d31b932a516d 100644 (file)
@@ -48,7 +48,8 @@ lp_build_alpha_test(LLVMBuilderRef builder,
                     struct lp_type type,
                     struct lp_build_mask_context *mask,
                     LLVMValueRef alpha,
-                    LLVMValueRef ref);
+                    LLVMValueRef ref,
+                    boolean do_branch);
 
 
 #endif /* !LP_BLD_ALPHA_H */
index 09b82fbe9ba8eb803ce36367532e6a3c5df1e7a5..6b8ffb6ca265708ca117def02ede8cd90f1cca43 100644 (file)
@@ -462,7 +462,8 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
                             LLVMValueRef z_src,
                             LLVMValueRef zs_dst_ptr,
                             LLVMValueRef face,
-                            LLVMValueRef counter)
+                            LLVMValueRef counter,
+                            boolean do_branch)
 {
    struct lp_type type;
    struct lp_build_context bld;
@@ -515,6 +516,9 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
 
       lp_build_mask_update(mask, z_pass);
 
+      if (do_branch)
+         lp_build_mask_check(mask);
+
       /* No need to worry about old stencil contents, just blend the
        * old and new values and shift into the correct position for
        * storage.
@@ -701,6 +705,11 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
           * buffer values.  Don't need to update Z buffer values.
           */
          lp_build_mask_update(mask, z_pass);
+
+         if (do_branch) {
+            lp_build_mask_check(mask);
+            do_branch = FALSE;
+         }
       }
 
       if (depth->writemask) {
@@ -779,6 +788,9 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
    if (depth->enabled && stencil[0].enabled)
       lp_build_mask_update(mask, z_pass);
 
+   if (do_branch)
+      lp_build_mask_check(mask);
+
    if (counter)
       lp_build_occlusion_count(builder, type, mask->value, counter);
 }
index e257a5bd7d09c678361945e9d56a34b86fd3a60d..2a63bb9378b59b448568b23d6fb83955e67d9db8 100644 (file)
@@ -61,7 +61,8 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
                             LLVMValueRef zs_src,
                             LLVMValueRef zs_dst_ptr,
                             LLVMValueRef facing,
-                            LLVMValueRef counter);
+                            LLVMValueRef counter,
+                            boolean do_branch);
 
 
 #endif /* !LP_BLD_DEPTH_H */
index b7a51cd66790c1244fc3be5b7fbf6decc285d146..df5dd83c875357d121a457704107fb01e14c783d 100644 (file)
@@ -116,7 +116,8 @@ generate_depth_stencil(LLVMBuilderRef builder,
                        LLVMValueRef src,
                        LLVMValueRef dst_ptr,
                        LLVMValueRef facing,
-                       LLVMValueRef counter)
+                       LLVMValueRef counter,
+                       boolean do_branch)
 {
    const struct util_format_description *format_desc;
 
@@ -136,7 +137,8 @@ generate_depth_stencil(LLVMBuilderRef builder,
                                src,
                                dst_ptr,
                                facing,
-                               counter);
+                               counter,
+                               do_branch);
 }
 
 
@@ -253,6 +255,9 @@ generate_fs(struct llvmpipe_context *lp,
    struct lp_build_flow_context *flow;
    struct lp_build_mask_context mask;
    boolean early_depth_stencil_test;
+   boolean simple_shader = (shader->info.file_count[TGSI_FILE_SAMPLER] == 0 &&
+                            shader->info.num_inputs < 3 &&
+                            shader->info.num_instructions < 8);
    unsigned attrib;
    unsigned chan;
    unsigned cbuf;
@@ -288,15 +293,6 @@ generate_fs(struct llvmpipe_context *lp,
       *pmask = lp_build_const_int_vec(type, ~0);
    }
 
-   /* 'mask' will control execution based on quad's pixel alive/killed state */
-   lp_build_mask_begin(&mask, flow, type, *pmask);
-
-   lp_build_interp_soa_update_pos(interp, i);
-
-   /* Try to avoid the 1/w for quads where mask is zero.  TODO: avoid
-    * this for depth-fail quads also.
-    */
-   z = interp->pos[2];
 
    early_depth_stencil_test =
       (key->depth.enabled || key->stencil[0].enabled) &&
@@ -304,10 +300,22 @@ generate_fs(struct llvmpipe_context *lp,
       !shader->info.uses_kill &&
       !shader->info.writes_z;
 
+   /* 'mask' will control execution based on quad's pixel alive/killed state */
+   lp_build_mask_begin(&mask, flow, type, *pmask);
+
+   if (!early_depth_stencil_test && !simple_shader)
+      lp_build_mask_check(&mask);
+
+   lp_build_interp_soa_update_pos(interp, i);
+   z = interp->pos[2];
+
    if (early_depth_stencil_test)
       generate_depth_stencil(builder, key,
                              type, &mask,
-                             stencil_refs, z, depth_ptr, facing, counter);
+                             stencil_refs, 
+                             z, depth_ptr,
+                             facing, counter,
+                             !simple_shader);
 
    lp_build_interp_soa_update_inputs(interp, i);
 
@@ -337,7 +345,7 @@ generate_fs(struct llvmpipe_context *lp,
                      alpha_ref_value = lp_jit_context_alpha_ref_value(builder, context_ptr);
                      alpha_ref_value = lp_build_broadcast(builder, vec_type, alpha_ref_value);
                      lp_build_alpha_test(builder, key->alpha.func, type,
-                                         &mask, alpha, alpha_ref_value);
+                                         &mask, alpha, alpha_ref_value, FALSE);
                   }
 
                   LLVMBuildStore(builder, out, color[cbuf][chan]);
@@ -356,7 +364,8 @@ generate_fs(struct llvmpipe_context *lp,
    if (!early_depth_stencil_test)
       generate_depth_stencil(builder, key,
                              type, &mask,
-                             stencil_refs, z, depth_ptr, facing, counter);
+                             stencil_refs, z, depth_ptr,
+                             facing, counter, FALSE);
 
    lp_build_mask_end(&mask);
 
@@ -386,7 +395,8 @@ generate_blend(const struct pipe_blend_state *blend,
                LLVMValueRef context_ptr,
                LLVMValueRef mask,
                LLVMValueRef *src,
-               LLVMValueRef dst_ptr)
+               LLVMValueRef dst_ptr,
+               boolean do_branch)
 {
    struct lp_build_context bld;
    struct lp_build_flow_context *flow;
@@ -401,9 +411,9 @@ generate_blend(const struct pipe_blend_state *blend,
    lp_build_context_init(&bld, builder, type);
 
    flow = lp_build_flow_create(builder);
-
-   /* we'll use this mask context to skip blending if all pixels are dead */
    lp_build_mask_begin(&mask_ctx, flow, type, mask);
+   if (do_branch)
+      lp_build_mask_check(&mask_ctx);
 
    vec_type = lp_build_vec_type(type);
 
@@ -670,14 +680,23 @@ generate_fragment(struct llvmpipe_context *lp,
       /*
        * Blending.
        */
-      generate_blend(&key->blend,
-                     rt,
-                    builder,
-                    blend_type,
-                    context_ptr,
-                    blend_mask,
-                    blend_in_color,
-                    color_ptr);
+      {
+         /* Could the 4x4 have been killed?
+          */
+         boolean do_branch = ((key->depth.enabled || key->stencil[0].enabled) &&
+                              !key->alpha.enabled &&
+                              !shader->info.uses_kill);
+
+         generate_blend(&key->blend,
+                        rt,
+                        builder,
+                        blend_type,
+                        context_ptr,
+                        blend_mask,
+                        blend_in_color,
+                        color_ptr,
+                        do_branch);
+      }
    }
 
 #ifdef PIPE_ARCH_X86