llvmpipe: remove unneeded draw_flush() call
[mesa.git] / src / gallium / drivers / llvmpipe / lp_state_fs.c
index bf0217d68473673035ce46bf9fd084b33417ee61..5a8351bff678ed54dd42e9739a0c7fd82a8179c9 100644 (file)
@@ -685,76 +685,6 @@ generate_fs_loop(struct gallivm_state *gallivm,
 }
 
 
-/**
- * Generate color blending and color output.
- * \param rt  the render target index (to index blend, colormask state)
- * \param type  the pixel color type
- * \param context_ptr  pointer to the runtime JIT context
- * \param mask  execution mask (active fragment/pixel mask)
- * \param src  colors from the fragment shader
- * \param dst_ptr  the destination color buffer pointer
- */
-static void
-generate_blend(struct gallivm_state *gallivm,
-               const struct pipe_blend_state *blend,
-               unsigned rt,
-               LLVMBuilderRef builder,
-               struct lp_type type,
-               LLVMValueRef context_ptr,
-               LLVMValueRef mask,
-               LLVMValueRef *src,
-               LLVMValueRef dst_ptr,
-               boolean do_branch)
-{
-   struct lp_build_context bld;
-   struct lp_build_mask_context mask_ctx;
-   LLVMTypeRef vec_type;
-   LLVMValueRef const_ptr;
-   LLVMValueRef con[4];
-   LLVMValueRef dst[4];
-   LLVMValueRef res[4];
-   unsigned chan;
-
-   lp_build_context_init(&bld, gallivm, type);
-
-   lp_build_mask_begin(&mask_ctx, gallivm, type, mask);
-   if (do_branch)
-      lp_build_mask_check(&mask_ctx);
-
-   vec_type = lp_build_vec_type(gallivm, type);
-
-   const_ptr = lp_jit_context_u8_blend_color(gallivm, context_ptr);
-   const_ptr = LLVMBuildBitCast(builder, const_ptr,
-                                LLVMPointerType(vec_type, 0), "");
-
-   /* load constant blend color and colors from the dest color buffer */
-   for(chan = 0; chan < 4; ++chan) {
-      LLVMValueRef index = lp_build_const_int32(gallivm, chan);
-      con[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), "");
-
-      dst[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), "");
-
-      lp_build_name(con[chan], "con.%c", "rgba"[chan]);
-      lp_build_name(dst[chan], "dst.%c", "rgba"[chan]);
-   }
-
-   /* do blend */
-   lp_build_blend_soa(gallivm, blend, type, rt, src, dst, con, res);
-
-   /* store results to color buffer */
-   for(chan = 0; chan < 4; ++chan) {
-      if(blend->rt[rt].colormask & (1 << chan)) {
-         LLVMValueRef index = lp_build_const_int32(gallivm, chan);
-         lp_build_name(res[chan], "res.%c", "rgba"[chan]);
-         res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]);
-         LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, ""));
-      }
-   }
-
-   lp_build_mask_end(&mask_ctx);
-}
-
-
 /**
  * This function will reorder pixels from the fragment shader SoA to memory layout AoS
  *
@@ -909,7 +839,8 @@ load_unswizzled_block(struct gallivm_state *gallivm,
                       unsigned block_height,
                       LLVMValueRef* dst,
                       struct lp_type dst_type,
-                      unsigned dst_count)
+                      unsigned dst_count,
+                      unsigned dst_alignment)
 {
    LLVMBuilderRef builder = gallivm->builder;
    unsigned row_size = dst_count / block_height;
@@ -936,9 +867,7 @@ load_unswizzled_block(struct gallivm_state *gallivm,
 
       dst[i] = LLVMBuildLoad(builder, dst_ptr, "");
 
-      if ((dst_type.length % 3) == 0) {
-         lp_set_load_alignment(dst[i], dst_type.width / 8);
-      }
+      lp_set_load_alignment(dst[i], dst_alignment);
    }
 }
 
@@ -954,7 +883,8 @@ store_unswizzled_block(struct gallivm_state *gallivm,
                        unsigned block_height,
                        LLVMValueRef* src,
                        struct lp_type src_type,
-                       unsigned src_count)
+                       unsigned src_count,
+                       unsigned src_alignment)
 {
    LLVMBuilderRef builder = gallivm->builder;
    unsigned row_size = src_count / block_height;
@@ -981,9 +911,7 @@ store_unswizzled_block(struct gallivm_state *gallivm,
 
       src_ptr = LLVMBuildStore(builder, src[i], src_ptr);
 
-      if ((src_type.length % 3) == 0) {
-         lp_set_store_alignment(src_ptr, src_type.width / 8);
-      }
+      lp_set_store_alignment(src_ptr, src_alignment);
    }
 }
 
@@ -1147,10 +1075,10 @@ convert_to_blend_type(struct gallivm_state *gallivm,
                       const struct util_format_description *src_fmt,
                       struct lp_type src_type,
                       struct lp_type dst_type,
-                      LLVMValueRef* src,
-                      unsigned num_srcs,
-                      LLVMValueRef* dst)
+                      LLVMValueRef* src, // and dst
+                      unsigned num_srcs)
 {
+   LLVMValueRef *dst = src;
    LLVMBuilderRef builder = gallivm->builder;
    struct lp_type blend_type;
    struct lp_type mem_type;
@@ -1158,8 +1086,6 @@ convert_to_blend_type(struct gallivm_state *gallivm,
    unsigned pixels = 16 / num_srcs;
    bool is_arith;
 
-   memcpy(dst, src, sizeof(LLVMValueRef*) * num_srcs);
-
    lp_mem_type_from_format_desc(src_fmt, &mem_type);
    lp_blend_type_from_format_desc(src_fmt, &blend_type);
 
@@ -1249,10 +1175,10 @@ convert_from_blend_type(struct gallivm_state *gallivm,
                         const struct util_format_description *src_fmt,
                         struct lp_type src_type,
                         struct lp_type dst_type,
-                        LLVMValueRef* src,
-                        unsigned num_srcs,
-                        LLVMValueRef* dst)
+                        LLVMValueRef* src, // and dst
+                        unsigned num_srcs)
 {
+   LLVMValueRef* dst = src;
    unsigned i, j, k;
    struct lp_type mem_type;
    struct lp_type blend_type;
@@ -1260,8 +1186,6 @@ convert_from_blend_type(struct gallivm_state *gallivm,
    unsigned pixels = 16 / num_srcs;
    bool is_arith;
 
-   memcpy(dst, src, sizeof(LLVMValueRef*) * num_srcs);
-
    lp_mem_type_from_format_desc(src_fmt, &mem_type);
    lp_blend_type_from_format_desc(src_fmt, &blend_type);
 
@@ -1377,10 +1301,10 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
 
    LLVMBuilderRef builder = gallivm->builder;
    LLVMValueRef fs_src[4][TGSI_NUM_CHANNELS];
-   LLVMValueRef src_alpha[block_size];
-   LLVMValueRef src_mask[block_size];
-   LLVMValueRef src[block_size];
-   LLVMValueRef dst[block_size];
+   LLVMValueRef src_alpha[4 * 4];
+   LLVMValueRef src_mask[4 * 4];
+   LLVMValueRef src[4 * 4];
+   LLVMValueRef dst[4 * 4];
    LLVMValueRef blend_color;
    LLVMValueRef blend_alpha;
    LLVMValueRef i32_zero;
@@ -1403,6 +1327,8 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
 
    const struct util_format_description* out_format_desc = util_format_description(out_format);
 
+   unsigned dst_alignment;
+
    bool pad_inline = is_arithmetic_format(out_format_desc);
    bool has_alpha = false;
 
@@ -1410,6 +1336,18 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
    mask_type = lp_int32_vec4_type();
    mask_type.length = fs_type.length;
 
+   /* Compute the alignment of the destination pointer in bytes */
+#if 0
+   dst_alignment = (block_width * out_format_desc->block.bits + 7)/(out_format_desc->block.width * 8);
+#else
+   /* FIXME -- currently we're fetching pixels one by one, instead of row by row */
+   dst_alignment = (1 * out_format_desc->block.bits + 7)/(out_format_desc->block.width * 8);
+#endif
+   /* Force power-of-two alignment by extracting only the least-significant-bit */
+   dst_alignment = 1 << (ffs(dst_alignment) - 1);
+   /* Resource base and stride pointers are aligned to 16 bytes, so that's the maximum alignment we can guarantee */
+   dst_alignment = MIN2(dst_alignment, 16);
+
    /* Do not bother executing code when mask is empty.. */
    if (do_branch) {
       check_mask = LLVMConstNull(lp_build_int_vec_type(gallivm, mask_type));
@@ -1686,7 +1624,8 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
 
    dst_type.length *= 16 / dst_count;
 
-   load_unswizzled_block(gallivm, color_ptr, stride, block_width, block_height, dst, dst_type, dst_count);
+   load_unswizzled_block(gallivm, color_ptr, stride, block_width, block_height,
+                         dst, dst_type, dst_count, dst_alignment);
 
 
    /*
@@ -1706,7 +1645,7 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
    /*
     * Blending
     */
-   convert_to_blend_type(gallivm, out_format_desc, dst_type, row_type, dst, src_count, dst);
+   convert_to_blend_type(gallivm, out_format_desc, dst_type, row_type, dst, src_count);
 
    for (i = 0; i < src_count; ++i) {
       dst[i] = lp_build_blend_aos(gallivm,
@@ -1724,7 +1663,7 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
                                   pad_inline ? 4 : dst_channels);
    }
 
-   convert_from_blend_type(gallivm, out_format_desc, row_type, dst_type, dst, src_count, dst);
+   convert_from_blend_type(gallivm, out_format_desc, row_type, dst_type, dst, src_count);
 
    /* Split the blend rows back to memory rows */
    if (dst_count > src_count) {
@@ -1751,7 +1690,8 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
    /*
     * Store blend result to memory
     */
-   store_unswizzled_block(gallivm, color_ptr, stride, block_width, block_height, dst, dst_type, dst_count);
+   store_unswizzled_block(gallivm, color_ptr, stride, block_width, block_height,
+                          dst, dst_type, dst_count, dst_alignment);
 
    if (do_branch) {
       lp_build_mask_end(&mask_ctx);
@@ -1800,7 +1740,6 @@ generate_fragment(struct llvmpipe_context *lp,
    struct lp_build_interp_soa_context interp;
    LLVMValueRef fs_mask[16 / 4];
    LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS][16 / 4];
-   LLVMValueRef blend_mask;
    LLVMValueRef function;
    LLVMValueRef facing;
    const struct util_format_description *zs_format_desc;
@@ -2058,8 +1997,8 @@ generate_fragment(struct llvmpipe_context *lp,
     */
    for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
       LLVMValueRef color_ptr;
+      LLVMValueRef stride;
       LLVMValueRef index = lp_build_const_int32(gallivm, cbuf);
-      LLVMValueRef blend_in_color[TGSI_NUM_CHANNELS];
       unsigned rt = key->blend.independent_blend_enable ? cbuf : 0;
 
       boolean do_branch = ((key->depth.enabled
@@ -2073,53 +2012,13 @@ generate_fragment(struct llvmpipe_context *lp,
 
       lp_build_name(color_ptr, "color_ptr%d", cbuf);
 
-      if (variant->unswizzled_cbufs & (1 << cbuf)) {
-         LLVMValueRef stride = LLVMBuildLoad(builder,
-                                             LLVMBuildGEP(builder, stride_ptr, &index, 1, ""),
-                                             "");
-
-         generate_unswizzled_blend(gallivm, rt, variant, key->cbuf_format[cbuf],
-                                   num_fs, fs_type, fs_mask, fs_out_color[cbuf],
-                                   context_ptr, color_ptr, stride, partial_mask, do_branch);
-      } else {
-         /*
-          * Convert the fs's output color and mask to fit to the blending type.
-          */
-         for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
-            LLVMValueRef fs_color_vals[LP_MAX_VECTOR_LENGTH];
-
-            for (i = 0; i < num_fs; i++) {
-               fs_color_vals[i] =
-                     LLVMBuildLoad(builder, fs_out_color[cbuf][chan][i], "fs_color_vals");
-            }
-
-            lp_build_conv(gallivm, fs_type, blend_type,
-                          fs_color_vals,
-                          num_fs,
-                          &blend_in_color[chan], 1);
-
-            lp_build_name(blend_in_color[chan], "color%d.%c", cbuf, "rgba"[chan]);
-         }
-
-         if (partial_mask || !variant->opaque) {
-            lp_build_conv_mask(gallivm, fs_type, blend_type,
-                               fs_mask, num_fs,
-                               &blend_mask, 1);
-         } else {
-            blend_mask = lp_build_const_int_vec(gallivm, blend_type, ~0);
-         }
+      stride = LLVMBuildLoad(builder,
+                             LLVMBuildGEP(builder, stride_ptr, &index, 1, ""),
+                             "");
 
-         generate_blend(gallivm,
-                        &key->blend,
-                        rt,
-                        builder,
-                        blend_type,
-                        context_ptr,
-                        blend_mask,
-                        blend_in_color,
-                        color_ptr,
-                        do_branch);
-      }
+      generate_unswizzled_blend(gallivm, rt, variant, key->cbuf_format[cbuf],
+                                num_fs, fs_type, fs_mask, fs_out_color[cbuf],
+                                context_ptr, color_ptr, stride, partial_mask, do_branch);
    }
 
    LLVMBuildRetVoid(builder);
@@ -2235,7 +2134,6 @@ generate_variant(struct llvmpipe_context *lp,
    struct lp_fragment_shader_variant *variant;
    const struct util_format_description *cbuf0_format_desc;
    boolean fullcolormask;
-   unsigned i;
 
    variant = CALLOC_STRUCT(lp_fragment_shader_variant);
    if(!variant)
@@ -2273,10 +2171,6 @@ generate_variant(struct llvmpipe_context *lp,
          !shader->info.base.uses_kill
          ? TRUE : FALSE;
 
-   for (i = 0; i < key->nr_cbufs; ++i) {
-      variant->unswizzled_cbufs |= llvmpipe_is_format_unswizzled(key->cbuf_format[i]) << i;
-   }
-
    if ((LP_DEBUG & DEBUG_FS) || (gallivm_debug & GALLIVM_DEBUG_IR)) {
       lp_debug_fs_variant(variant);
    }
@@ -2418,8 +2312,6 @@ llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs)
    if (llvmpipe->fs == fs)
       return;
 
-   draw_flush(llvmpipe->draw);
-
    llvmpipe->fs = (struct lp_fragment_shader *) fs;
 
    draw_bind_fragment_shader(llvmpipe->draw,
@@ -2514,32 +2406,32 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe,
 {
    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
    struct pipe_resource *constants = cb ? cb->buffer : NULL;
-   unsigned size;
-   const void *data;
-
-   if (cb && cb->user_buffer) {
-      constants = llvmpipe_user_buffer_create(pipe->screen,
-                                              (void *) cb->user_buffer,
-                                              cb->buffer_size,
-                                              PIPE_BIND_CONSTANT_BUFFER);
-   }
-
-   size = constants ? constants->width0 : 0;
-   data = constants ? llvmpipe_resource_data(constants) : NULL;
 
    assert(shader < PIPE_SHADER_TYPES);
-   assert(index < PIPE_MAX_CONSTANT_BUFFERS);
+   assert(index < Elements(llvmpipe->constants[shader]));
 
-   if(llvmpipe->constants[shader][index] == constants)
-      return;
+   /* note: reference counting */
+   util_copy_constant_buffer(&llvmpipe->constants[shader][index], cb);
 
-   draw_flush(llvmpipe->draw);
+   if (shader == PIPE_SHADER_VERTEX ||
+       shader == PIPE_SHADER_GEOMETRY) {
+      /* Pass the constants to the 'draw' module */
+      const unsigned size = cb ? cb->buffer_size : 0;
+      const ubyte *data;
 
-   /* note: reference counting */
-   pipe_resource_reference(&llvmpipe->constants[shader][index], constants);
+      if (constants) {
+         data = (ubyte *) llvmpipe_resource_data(constants);
+      }
+      else if (cb && cb->user_buffer) {
+         data = (ubyte *) cb->user_buffer;
+      }
+      else {
+         data = NULL;
+      }
+
+      if (data)
+         data += cb->buffer_offset;
 
-   if(shader == PIPE_SHADER_VERTEX ||
-      shader == PIPE_SHADER_GEOMETRY) {
       draw_set_mapped_constant_buffer(llvmpipe->draw, shader,
                                       index, data, size);
    }
@@ -2604,7 +2496,7 @@ make_variant_key(struct llvmpipe_context *lp,
    /* alpha.ref_value is passed in jit_context */
 
    key->flatshade = lp->rasterizer->flatshade;
-   if (lp->active_query_count) {
+   if (lp->active_occlusion_query) {
       key->occlusion_count = TRUE;
    }