llvmpipe: raise dirty flag on transfers to bound constbuf
[mesa.git] / src / gallium / drivers / llvmpipe / lp_state_fs.c
index f45f36f633206957a7d3ebc15c05490f080d23af..48971510f213d9822b7807c81d062abea18c4c40 100644 (file)
@@ -99,6 +99,7 @@
 
 
 #include <llvm-c/Analysis.h>
+#include <llvm-c/BitWriter.h>
 
 
 static unsigned fs_no = 0;
@@ -212,8 +213,7 @@ find_output_by_semantic( const struct tgsi_shader_info *info,
  * \param partial_mask  if 1, do mask_input testing
  */
 static void
-generate_fs(struct llvmpipe_context *lp,
-            struct lp_fragment_shader *shader,
+generate_fs(struct lp_fragment_shader *shader,
             const struct lp_fragment_shader_variant_key *key,
             LLVMBuilderRef builder,
             struct lp_type type,
@@ -237,11 +237,10 @@ generate_fs(struct llvmpipe_context *lp,
    LLVMValueRef z;
    LLVMValueRef zs_value = NULL;
    LLVMValueRef stencil_refs[2];
-   struct lp_build_flow_context *flow;
    struct lp_build_mask_context mask;
-   boolean simple_shader = (shader->info.file_count[TGSI_FILE_SAMPLER] == 0 &&
-                            shader->info.num_inputs < 3 &&
-                            shader->info.num_instructions < 8);
+   boolean simple_shader = (shader->info.base.file_count[TGSI_FILE_SAMPLER] == 0 &&
+                            shader->info.base.num_inputs < 3 &&
+                            shader->info.base.num_instructions < 8);
    unsigned attrib;
    unsigned chan;
    unsigned cbuf;
@@ -254,8 +253,8 @@ generate_fs(struct llvmpipe_context *lp,
       zs_format_desc = util_format_description(key->zsbuf_format);
       assert(zs_format_desc);
 
-      if (!shader->info.writes_z) {
-         if (key->alpha.enabled || shader->info.uses_kill)
+      if (!shader->info.base.writes_z) {
+         if (key->alpha.enabled || shader->info.base.uses_kill)
             /* With alpha test and kill, can do the depth test early
              * and hopefully eliminate some quads.  But need to do a
              * special deferred depth write once the final mask value
@@ -286,12 +285,8 @@ generate_fs(struct llvmpipe_context *lp,
 
    consts_ptr = lp_jit_context_constants(builder, context_ptr);
 
-   flow = lp_build_flow_create(builder);
-
    memset(outputs, 0, sizeof outputs);
 
-   lp_build_flow_scope_begin(flow);
-
    /* Declare the color and z variables */
    for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
       for(chan = 0; chan < NUM_CHANNELS; ++chan) {
@@ -309,7 +304,7 @@ generate_fs(struct llvmpipe_context *lp,
    }
 
    /* 'mask' will control execution based on quad's pixel alive/killed state */
-   lp_build_mask_begin(&mask, flow, type, *pmask);
+   lp_build_mask_begin(&mask, builder, type, *pmask);
 
    if (!(depth_mode & EARLY_DEPTH_TEST) && !simple_shader)
       lp_build_mask_check(&mask);
@@ -330,8 +325,9 @@ generate_fs(struct llvmpipe_context *lp,
                                   &zs_value,
                                   !simple_shader);
 
-      if (depth_mode & EARLY_DEPTH_WRITE)
-         LLVMBuildStore(builder, zs_value, depth_ptr);
+      if (depth_mode & EARLY_DEPTH_WRITE) {
+         lp_build_depth_write(builder, zs_format_desc, depth_ptr, zs_value);
+      }
    }
 
    lp_build_interp_soa_update_inputs(interp, i);
@@ -339,16 +335,16 @@ generate_fs(struct llvmpipe_context *lp,
    /* Build the actual shader */
    lp_build_tgsi_soa(builder, tokens, type, &mask,
                      consts_ptr, interp->pos, interp->inputs,
-                     outputs, sampler, &shader->info);
+                     outputs, sampler, &shader->info.base);
 
 
    /* Alpha test */
    if (key->alpha.enabled) {
-      int color0 = find_output_by_semantic(&shader->info,
+      int color0 = find_output_by_semantic(&shader->info.base,
                                            TGSI_SEMANTIC_COLOR,
                                            0);
 
-      if (color0 != -1) {
+      if (color0 != -1 && outputs[color0][3]) {
          LLVMValueRef alpha = LLVMBuildLoad(builder, outputs[color0][3], "alpha");
          LLVMValueRef alpha_ref_value;
 
@@ -363,13 +359,12 @@ generate_fs(struct llvmpipe_context *lp,
 
    /* Late Z test */
    if (depth_mode & LATE_DEPTH_TEST) { 
-      int pos0 = find_output_by_semantic(&shader->info,
+      int pos0 = find_output_by_semantic(&shader->info.base,
                                          TGSI_SEMANTIC_POSITION,
                                          0);
          
-      if (pos0 != -1) {
-         z = LLVMBuildLoad(builder, outputs[pos0][2], "z");
-         lp_build_name(z, "output%u.%u.%c", i, pos0, "xyzw"[chan]);
+      if (pos0 != -1 && outputs[pos0][2]) {
+         z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z");
       }
 
       lp_build_depth_stencil_test(builder,
@@ -384,8 +379,9 @@ generate_fs(struct llvmpipe_context *lp,
                                   &zs_value,
                                   !simple_shader);
       /* Late Z write */
-      if (depth_mode & LATE_DEPTH_WRITE)
-         LLVMBuildStore(builder, zs_value, depth_ptr);
+      if (depth_mode & LATE_DEPTH_WRITE) {
+         lp_build_depth_write(builder, zs_format_desc, depth_ptr, zs_value);
+      }
    }
    else if ((depth_mode & EARLY_DEPTH_TEST) &&
             (depth_mode & LATE_DEPTH_WRITE))
@@ -404,34 +400,30 @@ generate_fs(struct llvmpipe_context *lp,
 
 
    /* Color write  */
-   for (attrib = 0; attrib < shader->info.num_outputs; ++attrib)
+   for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib)
    {
-      if (shader->info.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR)
+      if (shader->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR &&
+          shader->info.base.output_semantic_index[attrib] < key->nr_cbufs)
       {
-         unsigned cbuf = shader->info.output_semantic_index[attrib];
-         for(chan = 0; chan < NUM_CHANNELS; ++chan)
-         {
-            /* XXX: just initialize outputs to point at colors[] and
-             * skip this.
-             */
-            LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
-            lp_build_name(out, "color%u.%u.%c", i, attrib, "rgba"[chan]);
-            LLVMBuildStore(builder, out, color[cbuf][chan]);
+         unsigned cbuf = shader->info.base.output_semantic_index[attrib];
+         for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+            if(outputs[attrib][chan]) {
+               /* XXX: just initialize outputs to point at colors[] and
+                * skip this.
+                */
+               LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
+               lp_build_name(out, "color%u.%u.%c", i, attrib, "rgba"[chan]);
+               LLVMBuildStore(builder, out, color[cbuf][chan]);
+            }
          }
       }
    }
 
    if (counter)
-      lp_build_occlusion_count(builder, type, mask.value, counter);
-
-   lp_build_mask_end(&mask);
-
-   lp_build_flow_scope_end(flow);
-
-   lp_build_flow_destroy(flow);
-
-   *pmask = mask.value;
+      lp_build_occlusion_count(builder, type,
+                               lp_build_mask_value(&mask), counter);
 
+   *pmask = lp_build_mask_end(&mask);
 }
 
 
@@ -456,7 +448,6 @@ generate_blend(const struct pipe_blend_state *blend,
                boolean do_branch)
 {
    struct lp_build_context bld;
-   struct lp_build_flow_context *flow;
    struct lp_build_mask_context mask_ctx;
    LLVMTypeRef vec_type;
    LLVMValueRef const_ptr;
@@ -467,8 +458,7 @@ generate_blend(const struct pipe_blend_state *blend,
 
    lp_build_context_init(&bld, builder, type);
 
-   flow = lp_build_flow_create(builder);
-   lp_build_mask_begin(&mask_ctx, flow, type, mask);
+   lp_build_mask_begin(&mask_ctx, builder, type, mask);
    if (do_branch)
       lp_build_mask_check(&mask_ctx);
 
@@ -503,7 +493,6 @@ generate_blend(const struct pipe_blend_state *blend,
    }
 
    lp_build_mask_end(&mask_ctx);
-   lp_build_flow_destroy(flow);
 }
 
 
@@ -514,13 +503,13 @@ generate_blend(const struct pipe_blend_state *blend,
  * 2x2 pixels.
  */
 static void
-generate_fragment(struct llvmpipe_context *lp,
+generate_fragment(struct llvmpipe_screen *screen,
                   struct lp_fragment_shader *shader,
                   struct lp_fragment_shader_variant *variant,
                   unsigned partial_mask)
 {
-   struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
    const struct lp_fragment_shader_variant_key *key = &variant->key;
+   struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS];
    char func_name[256];
    struct lp_type fs_type;
    struct lp_type blend_type;
@@ -548,11 +537,24 @@ generate_fragment(struct llvmpipe_context *lp,
    LLVMValueRef blend_mask;
    LLVMValueRef function;
    LLVMValueRef facing;
+   const struct util_format_description *zs_format_desc;
    unsigned num_fs;
    unsigned i;
    unsigned chan;
    unsigned cbuf;
 
+   /* Adjust color input interpolation according to flatshade state:
+    */
+   memcpy(inputs, shader->inputs, shader->info.base.num_inputs * sizeof inputs[0]);
+   for (i = 0; i < shader->info.base.num_inputs; i++) {
+      if (inputs[i].interp == LP_INTERP_COLOR) {
+        if (key->flatshade)
+           inputs[i].interp = LP_INTERP_CONSTANT;
+        else
+           inputs[i].interp = LP_INTERP_LINEAR;
+      }
+   }
+
 
    /* TODO: actually pick these based on the fs and color buffer
     * characteristics. */
@@ -588,12 +590,12 @@ generate_fragment(struct llvmpipe_context *lp,
    arg_types[0] = screen->context_ptr_type;            /* context */
    arg_types[1] = LLVMInt32Type();                     /* x */
    arg_types[2] = LLVMInt32Type();                     /* y */
-   arg_types[3] = LLVMFloatType();                     /* facing */
+   arg_types[3] = LLVMInt32Type();                     /* facing */
    arg_types[4] = LLVMPointerType(fs_elem_type, 0);    /* a0 */
    arg_types[5] = LLVMPointerType(fs_elem_type, 0);    /* dadx */
    arg_types[6] = LLVMPointerType(fs_elem_type, 0);    /* dady */
    arg_types[7] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0);  /* color */
-   arg_types[8] = LLVMPointerType(fs_int_vec_type, 0); /* depth */
+   arg_types[8] = LLVMPointerType(LLVMInt8Type(), 0);  /* depth */
    arg_types[9] = LLVMInt32Type();                     /* mask_input */
    arg_types[10] = LLVMPointerType(LLVMInt32Type(), 0);/* counter */
 
@@ -604,7 +606,6 @@ generate_fragment(struct llvmpipe_context *lp,
 
    variant->function[partial_mask] = function;
 
-
    /* XXX: need to propagate noalias down into color param now we are
     * passing a pointer-to-pointer?
     */
@@ -652,8 +653,8 @@ generate_fragment(struct llvmpipe_context *lp,
     * already included in the shader key.
     */
    lp_build_interp_soa_init(&interp, 
-                            lp->num_inputs,
-                            lp->inputs,
+                            shader->info.base.num_inputs,
+                            inputs,
                             builder, fs_type,
                             a0_ptr, dadx_ptr, dady_ptr,
                             x, y);
@@ -662,14 +663,18 @@ generate_fragment(struct llvmpipe_context *lp,
    sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr);
 
    /* loop over quads in the block */
+   zs_format_desc = util_format_description(key->zsbuf_format);
+
    for(i = 0; i < num_fs; ++i) {
-      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+      LLVMValueRef depth_offset = LLVMConstInt(LLVMInt32Type(),
+                                               i*fs_type.length*zs_format_desc->block.bits/8,
+                                               0);
       LLVMValueRef out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS];
       LLVMValueRef depth_ptr_i;
 
-      depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, "");
+      depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &depth_offset, 1, "");
 
-      generate_fs(lp, shader, key,
+      generate_fs(shader, key,
                   builder,
                   fs_type,
                   context_ptr,
@@ -742,7 +747,7 @@ generate_fragment(struct llvmpipe_context *lp,
           */
          boolean do_branch = ((key->depth.enabled || key->stencil[0].enabled) &&
                               !key->alpha.enabled &&
-                              !shader->info.uses_kill);
+                              !shader->info.base.uses_kill);
 
          generate_blend(&key->blend,
                         rt,
@@ -756,11 +761,6 @@ generate_fragment(struct llvmpipe_context *lp,
       }
    }
 
-#ifdef PIPE_ARCH_X86
-   /* Avoid corrupting the FPU stack on 32bit OSes. */
-   lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0);
-#endif
-
    LLVMBuildRetVoid(builder);
 
    LLVMDisposeBuilder(builder);
@@ -784,6 +784,11 @@ generate_fragment(struct llvmpipe_context *lp,
       debug_printf("\n");
    }
 
+   /* Dump byte code to a file */
+   if (0) {
+      LLVMWriteBitcodeToFile(lp_build_module, "llvmpipe.bc");
+   }
+
    /*
     * Translate the LLVM IR into machine code.
     */
@@ -893,7 +898,7 @@ lp_debug_fs_variant(const struct lp_fragment_shader_variant *variant)
 }
 
 static struct lp_fragment_shader_variant *
-generate_variant(struct llvmpipe_context *lp,
+generate_variant(struct llvmpipe_screen *screen,
                  struct lp_fragment_shader *shader,
                  const struct lp_fragment_shader_variant_key *key)
 {
@@ -931,7 +936,7 @@ generate_variant(struct llvmpipe_context *lp,
          !key->stencil[0].enabled &&
          !key->alpha.enabled &&
          !key->depth.enabled &&
-         !shader->info.uses_kill
+         !shader->info.base.uses_kill
          ? TRUE : FALSE;
 
 
@@ -939,11 +944,11 @@ generate_variant(struct llvmpipe_context *lp,
       lp_debug_fs_variant(variant);
    }
 
-   generate_fragment(lp, shader, variant, RAST_EDGE_TEST);
+   generate_fragment(screen, shader, variant, RAST_EDGE_TEST);
 
    if (variant->opaque) {
       /* Specialized shader, which doesn't need to read the color buffer. */
-      generate_fragment(lp, shader, variant, RAST_WHOLE);
+      generate_fragment(screen, shader, variant, RAST_WHOLE);
    } else {
       variant->jit_function[RAST_WHOLE] = variant->jit_function[RAST_EDGE_TEST];
    }
@@ -959,6 +964,7 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
    struct lp_fragment_shader *shader;
    int nr_samplers;
+   int i;
 
    shader = CALLOC_STRUCT(lp_fragment_shader);
    if (!shader)
@@ -968,7 +974,7 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
    make_empty_list(&shader->variants);
 
    /* get/save the summary info for this shader */
-   tgsi_scan_shader(templ->tokens, &shader->info);
+   lp_build_tgsi_info(templ->tokens, &shader->info);
 
    /* we need to keep a local copy of the tokens */
    shader->base.tokens = tgsi_dup_tokens(templ->tokens);
@@ -980,18 +986,58 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
       return NULL;
    }
 
-   nr_samplers = shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
+   nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1;
 
    shader->variant_key_size = Offset(struct lp_fragment_shader_variant_key,
                                     sampler[nr_samplers]);
 
+   for (i = 0; i < shader->info.base.num_inputs; i++) {
+      shader->inputs[i].usage_mask = shader->info.base.input_usage_mask[i];
+
+      switch (shader->info.base.input_interpolate[i]) {
+      case TGSI_INTERPOLATE_CONSTANT:
+        shader->inputs[i].interp = LP_INTERP_CONSTANT;
+        break;
+      case TGSI_INTERPOLATE_LINEAR:
+        shader->inputs[i].interp = LP_INTERP_LINEAR;
+        break;
+      case TGSI_INTERPOLATE_PERSPECTIVE:
+        shader->inputs[i].interp = LP_INTERP_PERSPECTIVE;
+        break;
+      default:
+        assert(0);
+        break;
+      }
+
+      switch (shader->info.base.input_semantic_name[i]) {
+      case TGSI_SEMANTIC_COLOR:
+         /* Colors may be either linearly or constant interpolated in
+         * the fragment shader, but that information isn't available
+         * here.  Mark color inputs and fix them up later.
+          */
+        shader->inputs[i].interp = LP_INTERP_COLOR;
+         break;
+      case TGSI_SEMANTIC_FACE:
+        shader->inputs[i].interp = LP_INTERP_FACING;
+        break;
+      case TGSI_SEMANTIC_POSITION:
+        /* Position was already emitted above
+         */
+        shader->inputs[i].interp = LP_INTERP_POSITION;
+        shader->inputs[i].src_index = 0;
+        continue;
+      }
+
+      shader->inputs[i].src_index = i+1;
+   }
+
    if (LP_DEBUG & DEBUG_TGSI) {
       unsigned attrib;
       debug_printf("llvmpipe: Create fragment shader #%u %p:\n", shader->no, (void *) shader);
       tgsi_dump(templ->tokens, 0);
       debug_printf("usage masks:\n");
-      for (attrib = 0; attrib < shader->info.num_inputs; ++attrib) {
-         unsigned usage_mask = shader->info.input_usage_mask[attrib];
+      for (attrib = 0; attrib < shader->info.base.num_inputs; ++attrib) {
+         unsigned usage_mask = shader->info.base.input_usage_mask[attrib];
          debug_printf("  IN[%u].%s%s%s%s\n",
                       attrib,
                       usage_mask & TGSI_WRITEMASK_X ? "x" : "",
@@ -1016,11 +1062,11 @@ llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs)
 
    draw_flush(llvmpipe->draw);
 
+   llvmpipe->fs = (struct lp_fragment_shader *) fs;
+
    draw_bind_fragment_shader(llvmpipe->draw,
                              (llvmpipe->fs ? llvmpipe->fs->draw_data : NULL));
 
-   llvmpipe->fs = fs;
-
    llvmpipe->dirty |= LP_NEW_FS;
 }
 
@@ -1220,10 +1266,10 @@ make_variant_key(struct llvmpipe_context *lp,
 
    /* This value will be the same for all the variants of a given shader:
     */
-   key->nr_samplers = shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
+   key->nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1;
 
    for(i = 0; i < key->nr_samplers; ++i) {
-      if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
+      if(shader->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
          lp_sampler_static_state(&key->sampler[i],
                                 lp->fragment_sampler_views[i],
                                 lp->sampler[i]);
@@ -1238,6 +1284,7 @@ make_variant_key(struct llvmpipe_context *lp,
 void 
 llvmpipe_update_fs(struct llvmpipe_context *lp)
 {
+   struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
    struct lp_fragment_shader *shader = lp->fs;
    struct lp_fragment_shader_variant_key key;
    struct lp_fragment_shader_variant *variant = NULL;
@@ -1278,7 +1325,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp)
       }
       t0 = os_time_get();
 
-      variant = generate_variant(lp, shader, &key);
+      variant = generate_variant(screen, shader, &key);
 
       t1 = os_time_get();
       dt = t1 - t0;
@@ -1298,6 +1345,10 @@ llvmpipe_update_fs(struct llvmpipe_context *lp)
 
 
 
+
+
+
+
 void
 llvmpipe_init_fs_funcs(struct llvmpipe_context *llvmpipe)
 {