llvmpipe: Color slot interpolation can be flat or perspective, not linear.
[mesa.git] / src / gallium / drivers / llvmpipe / lp_state_fs.c
index 90f9271223c7f52892ba720f4cd73ca9f52d578e..11a3871c40817c20f65a809054bedee511fa4a4c 100644 (file)
@@ -233,7 +233,7 @@ generate_fs(struct gallivm_state *gallivm,
    const struct tgsi_token *tokens = shader->base.tokens;
    LLVMTypeRef vec_type;
    LLVMValueRef consts_ptr;
-   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
+   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
    LLVMValueRef z;
    LLVMValueRef zs_value = NULL;
    LLVMValueRef stencil_refs[2];
@@ -289,7 +289,7 @@ generate_fs(struct gallivm_state *gallivm,
 
    /* Declare the color and z variables */
    for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
-      for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+      for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
         color[cbuf][chan] = lp_build_alloca(gallivm, vec_type, "color");
       }
    }
@@ -406,7 +406,7 @@ generate_fs(struct gallivm_state *gallivm,
           shader->info.base.output_semantic_index[attrib] < key->nr_cbufs)
       {
          unsigned cbuf = shader->info.base.output_semantic_index[attrib];
-         for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+         for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
             if(outputs[attrib][chan]) {
                /* XXX: just initialize outputs to point at colors[] and
                 * skip this.
@@ -536,7 +536,7 @@ generate_fragment(struct llvmpipe_context *lp,
    struct lp_build_sampler_soa *sampler;
    struct lp_build_interp_soa_context interp;
    LLVMValueRef fs_mask[LP_MAX_VECTOR_LENGTH];
-   LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][LP_MAX_VECTOR_LENGTH];
+   LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS][LP_MAX_VECTOR_LENGTH];
    LLVMValueRef blend_mask;
    LLVMValueRef function;
    LLVMValueRef facing;
@@ -555,7 +555,7 @@ generate_fragment(struct llvmpipe_context *lp,
         if (key->flatshade)
            inputs[i].interp = LP_INTERP_CONSTANT;
         else
-           inputs[i].interp = LP_INTERP_LINEAR;
+           inputs[i].interp = LP_INTERP_PERSPECTIVE;
       }
    }
 
@@ -684,7 +684,7 @@ generate_fragment(struct llvmpipe_context *lp,
       LLVMValueRef depth_offset = LLVMConstInt(int32_type,
                                                i*fs_type.length*zs_format_desc->block.bits/8,
                                                0);
-      LLVMValueRef out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS];
+      LLVMValueRef out_color[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS];
       LLVMValueRef depth_ptr_i;
 
       depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &depth_offset, 1, "");
@@ -706,7 +706,7 @@ generate_fragment(struct llvmpipe_context *lp,
                   counter);
 
       for (cbuf = 0; cbuf < key->nr_cbufs; cbuf++)
-         for (chan = 0; chan < NUM_CHANNELS; ++chan)
+         for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan)
             fs_out_color[cbuf][chan][i] =
                out_color[cbuf * !cbuf0_write_all][chan];
    }
@@ -718,13 +718,13 @@ generate_fragment(struct llvmpipe_context *lp,
    for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
       LLVMValueRef color_ptr;
       LLVMValueRef index = lp_build_const_int32(gallivm, cbuf);
-      LLVMValueRef blend_in_color[NUM_CHANNELS];
+      LLVMValueRef blend_in_color[TGSI_NUM_CHANNELS];
       unsigned rt;
 
       /* 
        * Convert the fs's output color and mask to fit to the blending type. 
        */
-      for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+      for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
          LLVMValueRef fs_color_vals[LP_MAX_VECTOR_LENGTH];
          
          for (i = 0; i < num_fs; i++) {
@@ -804,6 +804,7 @@ generate_fragment(struct llvmpipe_context *lp,
       LLVMWriteBitcodeToFile(gallivm->module, "llvmpipe.bc");
    }
 
+   variant->nr_instrs += lp_build_count_instructions(function);
    /*
     * Translate the LLVM IR into machine code.
     */
@@ -923,6 +924,7 @@ generate_variant(struct llvmpipe_context *lp,
                  const struct lp_fragment_shader_variant_key *key)
 {
    struct lp_fragment_shader_variant *variant;
+   const struct util_format_description *cbuf0_format_desc;
    boolean fullcolormask;
 
    variant = CALLOC_STRUCT(lp_fragment_shader_variant);
@@ -941,12 +943,8 @@ generate_variant(struct llvmpipe_context *lp,
     */
    fullcolormask = FALSE;
    if (key->nr_cbufs == 1) {
-      const struct util_format_description *format_desc;
-      format_desc = util_format_description(key->cbuf_format[0]);
-      if ((~key->blend.rt[0].colormask &
-           util_format_colormask(format_desc)) == 0) {
-         fullcolormask = TRUE;
-      }
+      cbuf0_format_desc = util_format_description(key->cbuf_format[0]);
+      fullcolormask = util_format_colormask_full(cbuf0_format_desc, key->blend.rt[0].colormask);
    }
 
    variant->opaque =
@@ -1024,19 +1022,15 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
       case TGSI_INTERPOLATE_PERSPECTIVE:
         shader->inputs[i].interp = LP_INTERP_PERSPECTIVE;
         break;
+      case TGSI_INTERPOLATE_COLOR:
+        shader->inputs[i].interp = LP_INTERP_COLOR;
+        break;
       default:
         assert(0);
         break;
       }
 
       switch (shader->info.base.input_semantic_name[i]) {
-      case TGSI_SEMANTIC_COLOR:
-         /* Colors may be either linearly or constant interpolated in
-         * the fragment shader, but that information isn't available
-         * here.  Mark color inputs and fix them up later.
-          */
-        shader->inputs[i].interp = LP_INTERP_COLOR;
-         break;
       case TGSI_SEMANTIC_FACE:
         shader->inputs[i].interp = LP_INTERP_FACING;
         break;
@@ -1129,6 +1123,7 @@ llvmpipe_remove_shader_variant(struct llvmpipe_context *lp,
    /* remove from context's list */
    remove_from_list(&variant->list_item_global);
    lp->nr_fs_variants--;
+   lp->nr_fs_instrs -= variant->nr_instrs;
 
    FREE(variant);
 }
@@ -1171,11 +1166,22 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs)
 static void
 llvmpipe_set_constant_buffer(struct pipe_context *pipe,
                              uint shader, uint index,
-                             struct pipe_resource *constants)
+                             struct pipe_constant_buffer *cb)
 {
    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
-   unsigned size = constants ? constants->width0 : 0;
-   const void *data = constants ? llvmpipe_resource_data(constants) : NULL;
+   struct pipe_resource *constants = cb ? cb->buffer : NULL;
+   unsigned size;
+   const void *data;
+
+   if (cb && cb->user_buffer) {
+      constants = llvmpipe_user_buffer_create(pipe->screen,
+                                              (void *) cb->user_buffer,
+                                              cb->buffer_size,
+                                              PIPE_BIND_CONSTANT_BUFFER);
+   }
+
+   size = constants ? constants->width0 : 0;
+   data = constants ? llvmpipe_resource_data(constants) : NULL;
 
    assert(shader < PIPE_SHADER_TYPES);
    assert(index < PIPE_MAX_CONSTANT_BUFFERS);
@@ -1195,6 +1201,10 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe,
    }
 
    llvmpipe->dirty |= LP_NEW_CONSTANTS;
+
+   if (cb && cb->user_buffer) {
+      pipe_resource_reference(&constants, NULL);
+   }
 }
 
 
@@ -1292,7 +1302,8 @@ make_variant_key(struct llvmpipe_context *lp,
        *
        * Also, force rgb/alpha func/factors match, to make AoS blending easier.
        */
-      if (format_desc->swizzle[3] > UTIL_FORMAT_SWIZZLE_W) {
+      if (format_desc->swizzle[3] > UTIL_FORMAT_SWIZZLE_W ||
+         format_desc->swizzle[3] == format_desc->swizzle[0]) {
          blend_rt->rgb_src_factor   = force_dst_alpha_one(blend_rt->rgb_src_factor);
          blend_rt->rgb_dst_factor   = force_dst_alpha_one(blend_rt->rgb_dst_factor);
          blend_rt->alpha_func       = blend_rt->rgb_func;
@@ -1350,11 +1361,22 @@ llvmpipe_update_fs(struct llvmpipe_context *lp)
       /* variant not found, create it now */
       int64_t t0, t1, dt;
       unsigned i;
+      unsigned variants_to_cull;
+
+      if (0) {
+         debug_printf("%u variants,\t%u instrs,\t%u instrs/variant\n",
+                      lp->nr_fs_variants,
+                      lp->nr_fs_instrs,
+                      lp->nr_fs_variants ? lp->nr_fs_instrs / lp->nr_fs_variants : 0);
+      }
 
       /* First, check if we've exceeded the max number of shader variants.
        * If so, free 25% of them (the least recently used ones).
        */
-      if (lp->nr_fs_variants >= LP_MAX_SHADER_VARIANTS) {
+      variants_to_cull = lp->nr_fs_variants >= LP_MAX_SHADER_VARIANTS ? LP_MAX_SHADER_VARIANTS / 4 : 0;
+
+      if (variants_to_cull ||
+          lp->nr_fs_instrs >= LP_MAX_SHADER_INSTRUCTIONS) {
          struct pipe_context *pipe = &lp->pipe;
 
          /*
@@ -1370,15 +1392,15 @@ llvmpipe_update_fs(struct llvmpipe_context *lp)
           * pending for destruction on flush.
           */
 
-         if (lp->nr_fs_variants >= LP_MAX_SHADER_VARIANTS) {
-            for (i = 0; i < LP_MAX_SHADER_VARIANTS / 4; i++) {
-               struct lp_fs_variant_list_item *item;
-               item = last_elem(&lp->fs_variants_list);
-               if (!item) {
-                  break;
-               }
-               llvmpipe_remove_shader_variant(lp, item->base);
+         for (i = 0; i < variants_to_cull || lp->nr_fs_instrs >= LP_MAX_SHADER_INSTRUCTIONS; i++) {
+            struct lp_fs_variant_list_item *item;
+            if (is_empty_list(&lp->fs_variants_list)) {
+               break;
             }
+            item = last_elem(&lp->fs_variants_list);
+            assert(item);
+            assert(item->base);
+            llvmpipe_remove_shader_variant(lp, item->base);
          }
       }
 
@@ -1399,6 +1421,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp)
          insert_at_head(&shader->variants, &variant->list_item_local);
          insert_at_head(&lp->fs_variants_list, &variant->list_item_global);
          lp->nr_fs_variants++;
+         lp->nr_fs_instrs += variant->nr_instrs;
          shader->variants_cached++;
       }
    }