llvmpipe: use runtime loop instead of static loop for looping over quads

author Roland Scheidegger <sroland@vmware.com>

Fri, 6 Jul 2012 00:53:44 +0000 (02:53 +0200)

committer José Fonseca <jfonseca@vmware.com>

Fri, 20 Jul 2012 19:17:15 +0000 (20:17 +0100)
author Roland Scheidegger <sroland@vmware.com>
Fri, 6 Jul 2012 00:53:44 +0000 (02:53 +0200)
committer José Fonseca <jfonseca@vmware.com>
Fri, 20 Jul 2012 19:17:15 +0000 (20:17 +0100)
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c

index d108f35f7196442c9a484374951a3554d5709e7c..4947f304a11e7b8a4d2c3624197028d315972872 100644 (file)
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
@@ -42,6 +42,7 @@
  #include "gallivm/lp_bld_const.h"
  #include "gallivm/lp_bld_arit.h"
  #include "gallivm/lp_bld_swizzle.h"
+#include "gallivm/lp_bld_flow.h"
  #include "lp_bld_interp.h"
  
  
@@ -122,6 +123,33 @@ attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix
        lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix);
  }
  
+static void
+calc_offsets(struct lp_build_context *coeff_bld,
+             unsigned quad_start_index,
+             LLVMValueRef *pixoffx,
+             LLVMValueRef *pixoffy)
+{
+   unsigned i;
+   unsigned num_pix = coeff_bld->type.length;
+   struct gallivm_state *gallivm = coeff_bld->gallivm;
+   LLVMBuilderRef builder = coeff_bld->gallivm->builder;
+   LLVMValueRef nr, pixxf, pixyf;
+
+   *pixoffx = coeff_bld->undef;
+   *pixoffy = coeff_bld->undef;
+
+   for (i = 0; i < num_pix; i++) {
+      nr = lp_build_const_int32(gallivm, i);
+      pixxf = lp_build_const_float(gallivm, quad_offset_x[i % num_pix] +
+                                   (quad_start_index & 1) * 2);
+      pixyf = lp_build_const_float(gallivm, quad_offset_y[i % num_pix] +
+                                   (quad_start_index & 2));
+      *pixoffx = LLVMBuildInsertElement(builder, *pixoffx, pixxf, nr, "");
+      *pixoffy = LLVMBuildInsertElement(builder, *pixoffy, pixyf, nr, "");
+   }
+}
+
+
  /* Much easier, and significantly less instructions in the per-stamp
   * part (less than half) but overall more instructions so a loss if
   * most quads are active. Might be a win though with larger vectors.
@@ -210,6 +238,7 @@ static void
  attribs_update_simple(struct lp_build_interp_soa_context *bld,
                        struct gallivm_state *gallivm,
                        int quad_start_index,
+                      LLVMValueRef loop_iter,
                        int start,
                        int end)
  {
@@ -217,22 +246,22 @@ attribs_update_simple(struct lp_build_interp_soa_context *bld,
     struct lp_build_context *coeff_bld = &bld->coeff_bld;
     struct lp_build_context *setup_bld = &bld->setup_bld;
     LLVMValueRef oow = NULL;
-   unsigned attrib, i;
+   unsigned attrib;
     LLVMValueRef pixoffx;
     LLVMValueRef pixoffy;
-   unsigned num_pix = coeff_bld->type.length;
  
-   /* could do this with code-generated passed in pixel offsets */
-   pixoffx = coeff_bld->undef;
-   pixoffy = coeff_bld->undef;
-   for (i = 0; i < coeff_bld->type.length; i++) {
-      LLVMValueRef nr = lp_build_const_int32(gallivm, i);
-      LLVMValueRef pixxf = lp_build_const_float(gallivm, quad_offset_x[i % num_pix] +
-                                                (quad_start_index & 1) * 2);
-      LLVMValueRef pixyf = lp_build_const_float(gallivm, quad_offset_y[i % num_pix] +
-                                                (quad_start_index & 2));
-      pixoffx = LLVMBuildInsertElement(builder, pixoffx, pixxf, nr, "");
-      pixoffy = LLVMBuildInsertElement(builder, pixoffy, pixyf, nr, "");
+   /* could do this with code-generated passed in pixel offsets too */
+   if (bld->dynamic_offsets) {
+      LLVMValueRef ptr;
+
+      assert(loop_iter);
+      ptr = LLVMBuildGEP(builder, bld->xoffset_store, &loop_iter, 1, "");
+      pixoffx = LLVMBuildLoad(builder, ptr, "");
+      ptr = LLVMBuildGEP(builder, bld->yoffset_store, &loop_iter, 1, "");
+      pixoffy = LLVMBuildLoad(builder, ptr, "");
+   }
+   else {
+      calc_offsets(coeff_bld, quad_start_index, &pixoffx, &pixoffy);
     }
  
     pixoffx = LLVMBuildFAdd(builder, pixoffx,
@@ -498,7 +527,14 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
              attrib_name(a, attrib, chan, ".a");
              attrib_name(dadq, attrib, chan, ".dadq");
  
-            bld->a   [attrib][chan] = a;
+            if (bld->dynamic_offsets) {
+               bld->a[attrib][chan] = lp_build_alloca(gallivm,
+                                                      LLVMTypeOf(a), "");
+               LLVMBuildStore(builder, a, bld->a[attrib][chan]);
+            }
+            else {
+               bld->a[attrib][chan] = a;
+            }
              bld->dadq[attrib][chan] = dadq;
           }
        }
@@ -514,6 +550,7 @@ static void
  attribs_update(struct lp_build_interp_soa_context *bld,
                 struct gallivm_state *gallivm,
                 int quad_start_index,
+               LLVMValueRef loop_iter,
                 int start,
                 int end)
  {
@@ -535,6 +572,9 @@ attribs_update(struct lp_build_interp_soa_context *bld,
              if (interp == LP_INTERP_CONSTANT ||
                  interp == LP_INTERP_FACING) {
                 a = bld->a[attrib][chan];
+               if (bld->dynamic_offsets) {
+                  a = LLVMBuildLoad(builder, a, "");
+               }
              }
              else if (interp == LP_INTERP_POSITION) {
                 assert(attrib > 0);
@@ -549,8 +589,20 @@ attribs_update(struct lp_build_interp_soa_context *bld,
                  * Broadcast the attribute value for this quad into all elements
                  */
  
-               a = LLVMBuildShuffleVector(builder,
-                                          a, coeff_bld->undef, shuffle, "");
+               if (bld->dynamic_offsets) {
+                  /* stored as vector load as float */
+                  LLVMTypeRef ptr_type = LLVMPointerType(LLVMFloatTypeInContext(
+                                                            gallivm->context), 0);
+                  LLVMValueRef ptr;
+                  a = LLVMBuildBitCast(builder, a, ptr_type, "");
+                  ptr = LLVMBuildGEP(builder, a, &loop_iter, 1, "");
+                  a = LLVMBuildLoad(builder, ptr, "");
+                  a = lp_build_broadcast_scalar(&bld->coeff_bld, a);
+               }
+               else {
+                  a = LLVMBuildShuffleVector(builder,
+                                             a, coeff_bld->undef, shuffle, "");
+               }
  
                 /*
                  * Get the derivatives.
@@ -639,6 +691,7 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
                           const struct lp_shader_input *inputs,
                           LLVMBuilderRef builder,
                           struct lp_type type,
+                         boolean dynamic_offsets,
                           LLVMValueRef a0_ptr,
                           LLVMValueRef dadx_ptr,
                           LLVMValueRef dady_ptr,
@@ -696,11 +749,42 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
     pos_init(bld, x0, y0);
  
     if (coeff_type.length > 4) {
+      bld->simple_interp = TRUE;
+      if (dynamic_offsets) {
+         /* XXX this should use a global static table */
+         unsigned i;
+         unsigned num_loops = 16 / type.length;
+         LLVMValueRef pixoffx, pixoffy, index;
+         LLVMValueRef ptr;
+
+         bld->dynamic_offsets = TRUE;
+         bld->xoffset_store = lp_build_array_alloca(gallivm,
+                                                    lp_build_vec_type(gallivm, type),
+                                                    lp_build_const_int32(gallivm, num_loops),
+                                                    "");
+         bld->yoffset_store = lp_build_array_alloca(gallivm,
+                                                    lp_build_vec_type(gallivm, type),
+                                                    lp_build_const_int32(gallivm, num_loops),
+                                                    "");
+         for (i = 0; i < num_loops; i++) {
+            index = lp_build_const_int32(gallivm, i);
+            calc_offsets(&bld->coeff_bld, i*type.length/4, &pixoffx, &pixoffy);
+            ptr = LLVMBuildGEP(builder, bld->xoffset_store, &index, 1, "");
+            LLVMBuildStore(builder, pixoffx, ptr);
+            ptr = LLVMBuildGEP(builder, bld->yoffset_store, &index, 1, "");
+            LLVMBuildStore(builder, pixoffy, ptr);
+         }
+      }
        coeffs_init_simple(bld, a0_ptr, dadx_ptr, dady_ptr);
     }
     else {
+      bld->simple_interp = FALSE;
+      if (dynamic_offsets) {
+         bld->dynamic_offsets = TRUE;
+      }
        coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr);
     }
+
  }
  
  
@@ -714,26 +798,52 @@ lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld,
  {
     assert(quad_start_index < 4);
  
-   if (bld->coeff_bld.type.length > 4) {
-      attribs_update_simple(bld, gallivm, quad_start_index, 1, bld->num_attribs);
+   if (bld->simple_interp) {
+      attribs_update_simple(bld, gallivm, quad_start_index, NULL, 1, bld->num_attribs);
     }
     else {
-      attribs_update(bld, gallivm, quad_start_index, 1, bld->num_attribs);
+      attribs_update(bld, gallivm, quad_start_index, NULL, 1, bld->num_attribs);
     }
  }
  
  void
  lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld,
-                                  struct gallivm_state *gallivm,
-                                  int quad_start_index)
+                               struct gallivm_state *gallivm,
+                               int quad_start_index)
  {
     assert(quad_start_index < 4);
  
-   if (bld->coeff_bld.type.length > 4) {
-      attribs_update_simple(bld, gallivm, quad_start_index, 0, 1);
+   if (bld->simple_interp) {
+      attribs_update_simple(bld, gallivm, quad_start_index, NULL, 0, 1);
+   }
+   else {
+      attribs_update(bld, gallivm, quad_start_index, NULL, 0, 1);
+   }
+}
+
+void
+lp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context *bld,
+                                      struct gallivm_state *gallivm,
+                                      LLVMValueRef quad_start_index)
+{
+   if (bld->simple_interp) {
+      attribs_update_simple(bld, gallivm, 0, quad_start_index, 1, bld->num_attribs);
+   }
+   else {
+      attribs_update(bld, gallivm, 0, quad_start_index, 1, bld->num_attribs);
+   }
+}
+
+void
+lp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context *bld,
+                                   struct gallivm_state *gallivm,
+                                   LLVMValueRef quad_start_index)
+{
+   if (bld->simple_interp) {
+      attribs_update_simple(bld, gallivm, 0, quad_start_index, 0, 1);
     }
     else {
-      attribs_update(bld, gallivm, quad_start_index, 0, 1);
+      attribs_update(bld, gallivm, 0, quad_start_index, 0, 1);
     }
  }
  
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h

index f293b582318a1016bde86ca1699a562a487fec39..d273e3f9b9927f4f0c740e04c589f8823342fde5 100644 (file)
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
@@ -84,6 +84,8 @@ struct lp_build_interp_soa_context
     unsigned num_attribs;
     unsigned mask[1 + PIPE_MAX_SHADER_INPUTS]; /**< TGSI_WRITE_MASK_x */
     enum lp_interp interp[1 + PIPE_MAX_SHADER_INPUTS];
+   boolean simple_interp;
+   boolean dynamic_offsets;
  
     LLVMValueRef x;
     LLVMValueRef y;
@@ -98,6 +100,9 @@ struct lp_build_interp_soa_context
  
     LLVMValueRef attribs[1 + PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
  
+   LLVMValueRef xoffset_store;
+   LLVMValueRef yoffset_store;
+
     /*
      * Convenience pointers. Callers may access this one.
      */
@@ -113,6 +118,7 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
                           const struct lp_shader_input *inputs,
                           LLVMBuilderRef builder,
                           struct lp_type type,
+                         boolean dynamic_offsets,
                           LLVMValueRef a0_ptr,
                           LLVMValueRef dadx_ptr,
                           LLVMValueRef dady_ptr,
@@ -129,5 +135,14 @@ lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld,
                                 struct gallivm_state *gallivm,
                                 int quad__start_index);
  
+void
+lp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context *bld,
+                                      struct gallivm_state *gallivm,
+                                      LLVMValueRef quad_start_index);
+
+void
+lp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context *bld,
+                                   struct gallivm_state *gallivm,
+                                   LLVMValueRef quad_start_index);
  
  #endif /* LP_BLD_INTERP_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c

index 54f45357fdc5107405182675868eaff6545d479d..374544fcf70d1bab985998a618983916db06a3ac 100644 (file)
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -295,7 +295,7 @@ generate_fs(struct gallivm_state *gallivm,
     /* Declare the color and z variables */
     for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
        for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
-        color[cbuf][chan] = lp_build_alloca(gallivm, vec_type, "color");
+         color[cbuf][chan] = lp_build_alloca(gallivm, vec_type, "color");
        }
     }
  
@@ -336,7 +336,7 @@ generate_fs(struct gallivm_state *gallivm,
     }
  
     lp_build_interp_soa_update_inputs(interp, gallivm, i*type.length/4);
-   
+
     /* Build the actual shader */
     lp_build_tgsi_soa(gallivm, tokens, type, &mask,
                       consts_ptr, &system_values,
@@ -435,6 +435,252 @@ generate_fs(struct gallivm_state *gallivm,
  }
  
  
+/**
+ * Generate the fragment shader, depth/stencil test, and alpha tests.
+ */
+static void
+generate_fs_loop(struct gallivm_state *gallivm,
+                 struct lp_fragment_shader *shader,
+                 const struct lp_fragment_shader_variant_key *key,
+                 LLVMBuilderRef builder,
+                 struct lp_type type,
+                 LLVMValueRef context_ptr,
+                 LLVMValueRef num_loop,
+                 struct lp_build_interp_soa_context *interp,
+                 struct lp_build_sampler_soa *sampler,
+                 LLVMValueRef mask_store,
+                 LLVMValueRef (*out_color)[4],
+                 LLVMValueRef depth_ptr,
+                 unsigned depth_bits,
+                 LLVMValueRef facing,
+                 LLVMValueRef counter)
+{
+   const struct util_format_description *zs_format_desc = NULL;
+   const struct tgsi_token *tokens = shader->base.tokens;
+   LLVMTypeRef vec_type;
+   LLVMValueRef mask_ptr, mask_val;
+   LLVMValueRef consts_ptr;
+   LLVMValueRef z;
+   LLVMValueRef zs_value = NULL;
+   LLVMValueRef stencil_refs[2];
+   LLVMValueRef depth_ptr_i;
+   LLVMValueRef depth_offset;
+   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
+   struct lp_build_for_loop_state loop_state;
+   struct lp_build_mask_context mask;
+   boolean simple_shader = (shader->info.base.file_count[TGSI_FILE_SAMPLER] == 0 &&
+                            shader->info.base.num_inputs < 3 &&
+                            shader->info.base.num_instructions < 8);
+   unsigned attrib;
+   unsigned chan;
+   unsigned cbuf;
+   unsigned depth_mode;
+
+   struct lp_bld_tgsi_system_values system_values;
+
+   memset(&system_values, 0, sizeof(system_values));
+
+   if (key->depth.enabled ||
+       key->stencil[0].enabled ||
+       key->stencil[1].enabled) {
+
+      zs_format_desc = util_format_description(key->zsbuf_format);
+      assert(zs_format_desc);
+
+      if (!shader->info.base.writes_z) {
+         if (key->alpha.enabled || shader->info.base.uses_kill)
+            /* With alpha test and kill, can do the depth test early
+             * and hopefully eliminate some quads.  But need to do a
+             * special deferred depth write once the final mask value
+             * is known.
+             */
+            depth_mode = EARLY_DEPTH_TEST | LATE_DEPTH_WRITE;
+         else
+            depth_mode = EARLY_DEPTH_TEST | EARLY_DEPTH_WRITE;
+      }
+      else {
+         depth_mode = LATE_DEPTH_TEST | LATE_DEPTH_WRITE;
+      }
+
+      if (!(key->depth.enabled && key->depth.writemask) &&
+          !(key->stencil[0].enabled && key->stencil[0].writemask))
+         depth_mode &= ~(LATE_DEPTH_WRITE | EARLY_DEPTH_WRITE);
+   }
+   else {
+      depth_mode = 0;
+   }
+
+
+   stencil_refs[0] = lp_jit_context_stencil_ref_front_value(gallivm, context_ptr);
+   stencil_refs[1] = lp_jit_context_stencil_ref_back_value(gallivm, context_ptr);
+
+   vec_type = lp_build_vec_type(gallivm, type);
+
+   consts_ptr = lp_jit_context_constants(gallivm, context_ptr);
+
+   lp_build_for_loop_begin(&loop_state, gallivm,
+                           lp_build_const_int32(gallivm, 0),
+                           LLVMIntULT,
+                           num_loop,
+                           lp_build_const_int32(gallivm, 1));
+
+   mask_ptr = LLVMBuildGEP(builder, mask_store,
+                           &loop_state.counter, 1, "mask_ptr");
+   mask_val = LLVMBuildLoad(builder, mask_ptr, "");
+
+   depth_offset = LLVMBuildMul(builder, loop_state.counter,
+                               lp_build_const_int32(gallivm, depth_bits * type.length),
+                               "");
+
+   depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &depth_offset, 1, "");
+
+   memset(outputs, 0, sizeof outputs);
+
+   for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
+      for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
+         out_color[cbuf][chan] = lp_build_array_alloca(gallivm,
+                                                       lp_build_vec_type(gallivm,
+                                                                         type),
+                                                       num_loop, "color");
+      }
+   }
+
+
+
+   /* 'mask' will control execution based on quad's pixel alive/killed state */
+   lp_build_mask_begin(&mask, gallivm, type, mask_val);
+
+   if (!(depth_mode & EARLY_DEPTH_TEST) && !simple_shader)
+      lp_build_mask_check(&mask);
+
+   lp_build_interp_soa_update_pos_dyn(interp, gallivm, loop_state.counter);
+   z = interp->pos[2];
+
+   if (depth_mode & EARLY_DEPTH_TEST) {
+      lp_build_depth_stencil_test(gallivm,
+                                  &key->depth,
+                                  key->stencil,
+                                  type,
+                                  zs_format_desc,
+                                  &mask,
+                                  stencil_refs,
+                                  z,
+                                  depth_ptr_i, facing,
+                                  &zs_value,
+                                  !simple_shader);
+
+      if (depth_mode & EARLY_DEPTH_WRITE) {
+         lp_build_depth_write(builder, zs_format_desc, depth_ptr_i, zs_value);
+      }
+   }
+
+   lp_build_interp_soa_update_inputs_dyn(interp, gallivm, loop_state.counter);
+
+   /* Build the actual shader */
+   lp_build_tgsi_soa(gallivm, tokens, type, &mask,
+                     consts_ptr, &system_values,
+                     interp->pos, interp->inputs,
+                     outputs, sampler, &shader->info.base);
+
+   /* Alpha test */
+   if (key->alpha.enabled) {
+      int color0 = find_output_by_semantic(&shader->info.base,
+                                           TGSI_SEMANTIC_COLOR,
+                                           0);
+
+      if (color0 != -1 && outputs[color0][3]) {
+         const struct util_format_description *cbuf_format_desc;
+         LLVMValueRef alpha = LLVMBuildLoad(builder, outputs[color0][3], "alpha");
+         LLVMValueRef alpha_ref_value;
+
+         alpha_ref_value = lp_jit_context_alpha_ref_value(gallivm, context_ptr);
+         alpha_ref_value = lp_build_broadcast(gallivm, vec_type, alpha_ref_value);
+
+         cbuf_format_desc = util_format_description(key->cbuf_format[0]);
+
+         lp_build_alpha_test(gallivm, key->alpha.func, type, cbuf_format_desc,
+                             &mask, alpha, alpha_ref_value,
+                             (depth_mode & LATE_DEPTH_TEST) != 0);
+      }
+   }
+
+   /* Late Z test */
+   if (depth_mode & LATE_DEPTH_TEST) {
+      int pos0 = find_output_by_semantic(&shader->info.base,
+                                         TGSI_SEMANTIC_POSITION,
+                                         0);
+
+      if (pos0 != -1 && outputs[pos0][2]) {
+         z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z");
+      }
+
+      lp_build_depth_stencil_test(gallivm,
+                                  &key->depth,
+                                  key->stencil,
+                                  type,
+                                  zs_format_desc,
+                                  &mask,
+                                  stencil_refs,
+                                  z,
+                                  depth_ptr_i, facing,
+                                  &zs_value,
+                                  !simple_shader);
+      /* Late Z write */
+      if (depth_mode & LATE_DEPTH_WRITE) {
+         lp_build_depth_write(builder, zs_format_desc, depth_ptr_i, zs_value);
+      }
+   }
+   else if ((depth_mode & EARLY_DEPTH_TEST) &&
+            (depth_mode & LATE_DEPTH_WRITE))
+   {
+      /* Need to apply a reduced mask to the depth write.  Reload the
+       * depth value, update from zs_value with the new mask value and
+       * write that out.
+       */
+      lp_build_deferred_depth_write(gallivm,
+                                    type,
+                                    zs_format_desc,
+                                    &mask,
+                                    depth_ptr_i,
+                                    zs_value);
+   }
+
+
+   /* Color write  */
+   for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib)
+   {
+      if (shader->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR &&
+          shader->info.base.output_semantic_index[attrib] < key->nr_cbufs)
+      {
+         unsigned cbuf = shader->info.base.output_semantic_index[attrib];
+         for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
+            if(outputs[attrib][chan]) {
+               /* XXX: just initialize outputs to point at colors[] and
+                * skip this.
+                */
+               LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
+               LLVMValueRef color_ptr;
+               color_ptr = LLVMBuildGEP(builder, out_color[cbuf][chan],
+                                        &loop_state.counter, 1, "");
+               lp_build_name(out, "color%u.%c", attrib, "rgba"[chan]);
+               LLVMBuildStore(builder, out, color_ptr);
+            }
+         }
+      }
+   }
+
+   if (key->occlusion_count) {
+      lp_build_name(counter, "counter");
+      lp_build_occlusion_count(gallivm, type,
+                               lp_build_mask_value(&mask), counter);
+   }
+
+   mask_val = lp_build_mask_end(&mask);
+   LLVMBuildStore(builder, mask_val, mask_ptr);
+   lp_build_for_loop_end(&loop_state);
+}
+
+
  /**
   * Generate color blending and color output.
   * \param rt  the render target index (to index blend, colormask state)
@@ -554,6 +800,7 @@ generate_fragment(struct llvmpipe_context *lp,
     unsigned chan;
     unsigned cbuf;
     boolean cbuf0_write_all;
+   boolean try_loop = TRUE;
  
     assert(lp_native_vector_width / 32 >= 4);
  
@@ -671,54 +918,126 @@ generate_fragment(struct llvmpipe_context *lp,
     assert(builder);
     LLVMPositionBuilderAtEnd(builder, block);
  
-   /*
-    * The shader input interpolation info is not explicitely baked in the
-    * shader key, but everything it derives from (TGSI, and flatshade) is
-    * already included in the shader key.
-    */
-   lp_build_interp_soa_init(&interp, 
-                            gallivm,
-                            shader->info.base.num_inputs,
-                            inputs,
-                            builder, fs_type,
-                            a0_ptr, dadx_ptr, dady_ptr,
-                            x, y);
-
     /* code generated texture sampling */
     sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr);
  
-   /* loop over quads in the block */
     zs_format_desc = util_format_description(key->zsbuf_format);
  
-   for(i = 0; i < num_fs; ++i) {
-      LLVMValueRef depth_offset = LLVMConstInt(int32_type,
-                                               i*fs_type.length*zs_format_desc->block.bits/8,
-                                               0);
-      LLVMValueRef out_color[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS];
-      LLVMValueRef depth_ptr_i;
-
-      depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &depth_offset, 1, "");
-
-      generate_fs(gallivm,
-                  shader, key,
-                  builder,
-                  fs_type,
-                  context_ptr,
-                  i,
-                  &interp,
-                  sampler,
-                  &fs_mask[i], /* output */
-                  out_color,
-                  depth_ptr_i,
-                  facing,
-                  partial_mask,
-                  mask_input,
-                  counter);
-
-      for (cbuf = 0; cbuf < key->nr_cbufs; cbuf++)
-         for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan)
-            fs_out_color[cbuf][chan][i] =
-               out_color[cbuf * !cbuf0_write_all][chan];
+   if (!try_loop) {
+      /*
+       * The shader input interpolation info is not explicitely baked in the
+       * shader key, but everything it derives from (TGSI, and flatshade) is
+       * already included in the shader key.
+       */
+      lp_build_interp_soa_init(&interp,
+                               gallivm,
+                               shader->info.base.num_inputs,
+                               inputs,
+                               builder, fs_type,
+                               FALSE,
+                               a0_ptr, dadx_ptr, dady_ptr,
+                               x, y);
+
+      /* loop over quads in the block */
+      for(i = 0; i < num_fs; ++i) {
+         LLVMValueRef depth_offset = LLVMConstInt(int32_type,
+                                                  i*fs_type.length*zs_format_desc->block.bits/8,
+                                                  0);
+         LLVMValueRef out_color[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS];
+         LLVMValueRef depth_ptr_i;
+
+         depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &depth_offset, 1, "");
+
+         generate_fs(gallivm,
+                     shader, key,
+                     builder,
+                     fs_type,
+                     context_ptr,
+                     i,
+                     &interp,
+                     sampler,
+                     &fs_mask[i], /* output */
+                     out_color,
+                     depth_ptr_i,
+                     facing,
+                     partial_mask,
+                     mask_input,
+                     counter);
+
+         for (cbuf = 0; cbuf < key->nr_cbufs; cbuf++)
+            for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan)
+               fs_out_color[cbuf][chan][i] =
+                  out_color[cbuf * !cbuf0_write_all][chan];
+      }
+   }
+   else {
+      unsigned depth_bits = zs_format_desc->block.bits/8;
+      LLVMValueRef num_loop = lp_build_const_int32(gallivm, num_fs);
+      LLVMTypeRef mask_type = lp_build_int_vec_type(gallivm, fs_type);
+      LLVMValueRef mask_store = lp_build_array_alloca(gallivm, mask_type,
+                                                      num_loop, "mask_store");
+      LLVMValueRef color_store[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS];
+
+      /*
+       * The shader input interpolation info is not explicitely baked in the
+       * shader key, but everything it derives from (TGSI, and flatshade) is
+       * already included in the shader key.
+       */
+      lp_build_interp_soa_init(&interp,
+                               gallivm,
+                               shader->info.base.num_inputs,
+                               inputs,
+                               builder, fs_type,
+                               TRUE,
+                               a0_ptr, dadx_ptr, dady_ptr,
+                               x, y);
+
+      for (i = 0; i < num_fs; i++) {
+         LLVMValueRef mask;
+         LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
+         LLVMValueRef mask_ptr = LLVMBuildGEP(builder, mask_store,
+                                              &indexi, 1, "mask_ptr");
+
+         if (partial_mask) {
+            mask = generate_quad_mask(gallivm, fs_type,
+                                      i*fs_type.length/4, mask_input);
+         }
+         else {
+            mask = lp_build_const_int_vec(gallivm, fs_type, ~0);
+         }
+         LLVMBuildStore(builder, mask, mask_ptr);
+      }
+
+      generate_fs_loop(gallivm,
+                       shader, key,
+                       builder,
+                       fs_type,
+                       context_ptr,
+                       num_loop,
+                       &interp,
+                       sampler,
+                       mask_store, /* output */
+                       color_store,
+                       depth_ptr,
+                       depth_bits,
+                       facing,
+                       counter);
+
+      for (i = 0; i < num_fs; i++) {
+         LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
+         LLVMValueRef ptr = LLVMBuildGEP(builder, mask_store,
+                                         &indexi, 1, "");
+         fs_mask[i] = LLVMBuildLoad(builder, ptr, "mask");
+         /* This is fucked up need to reorganize things */
+         for (cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
+            for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
+               ptr = LLVMBuildGEP(builder,
+                                  color_store[cbuf * !cbuf0_write_all][chan],
+                                  &indexi, 1, "");
+               fs_out_color[cbuf][chan][i] = ptr;
+            }
+         }
+      }
     }
  
     sampler->destroy(sampler);
@@ -732,7 +1051,7 @@ generate_fragment(struct llvmpipe_context *lp,
        unsigned rt;
  
        /* 
-       * Convert the fs's output color and mask to fit to the blending type. 
+       * Convert the fs's output color and mask to fit to the blending type.
         */
        for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
           LLVMValueRef fs_color_vals[LP_MAX_VECTOR_LENGTH];
@@ -759,8 +1078,8 @@ generate_fragment(struct llvmpipe_context *lp,
        }
  
        color_ptr = LLVMBuildLoad(builder, 
-                               LLVMBuildGEP(builder, color_ptr_ptr, &index, 1, ""),
-                               "");
+                                LLVMBuildGEP(builder, color_ptr_ptr, &index, 1, ""),
+                                "");
        lp_build_name(color_ptr, "color_ptr%d", cbuf);
  
        /* which blend/colormask state to use */
author	Roland Scheidegger <sroland@vmware.com>
	Fri, 6 Jul 2012 00:53:44 +0000 (02:53 +0200)
committer	José Fonseca <jfonseca@vmware.com>
	Fri, 20 Jul 2012 19:17:15 +0000 (20:17 +0100)
src/gallium/drivers/llvmpipe/lp_bld_interp.c		patch \| blob \| history
src/gallium/drivers/llvmpipe/lp_bld_interp.h		patch \| blob \| history
src/gallium/drivers/llvmpipe/lp_state_fs.c		patch \| blob \| history