llvmpipe: use alpha from already converted color if possible

author Roland Scheidegger <sroland@vmware.com>

Thu, 22 Dec 2016 02:49:22 +0000 (03:49 +0100)

committer Roland Scheidegger <sroland@vmware.com>

Fri, 6 Jan 2017 22:13:34 +0000 (23:13 +0100)
author Roland Scheidegger <sroland@vmware.com>
Thu, 22 Dec 2016 02:49:22 +0000 (03:49 +0100)
committer Roland Scheidegger <sroland@vmware.com>
Fri, 6 Jan 2017 22:13:34 +0000 (23:13 +0100)
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c

index a57670d49567ebdfb63b7124b40103f0780a63a0..45c5c2bb65e681b56528451733150d93fbc27a53 100644 (file)
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
@@ -74,6 +74,7 @@ struct lp_build_blend_aos_context
     LLVMValueRef dst;
     LLVMValueRef const_;
     LLVMValueRef const_alpha;
+   boolean has_dst_alpha;
  
     LLVMValueRef inv_src;
     LLVMValueRef inv_src_alpha;
@@ -115,10 +116,10 @@ lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld,
           return bld->base.one;
        else {
           /*
-          * if there's separate src_alpha there's no dst alpha hence the complement
-          * is zero but for unclamped float inputs min can be non-zero (negative).
+          * If there's no dst alpha the complement is zero but for unclamped
+          * float inputs min can be non-zero (negative).
            */
-         if (bld->src_alpha) {
+         if (!bld->has_dst_alpha) {
              if (!bld->saturate)
                 bld->saturate = lp_build_min(&bld->base, src_alpha, bld->base.zero);
           }
@@ -264,7 +265,8 @@ lp_build_blend_factor(struct lp_build_blend_aos_context *bld,
     if (alpha_swizzle != PIPE_SWIZZLE_NONE) {
        rgb_swizzle   = lp_build_blend_factor_swizzle(rgb_factor);
        alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
-      return lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle, num_channels);
+      return lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle,
+                                    alpha_swizzle, num_channels);
     } else {
        return rgb_factor_;
     }
@@ -327,6 +329,7 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
     bld.src_alpha = src_alpha;
     bld.src1_alpha = src1_alpha;
     bld.const_alpha = const_alpha;
+   bld.has_dst_alpha = FALSE;
  
     /* Find the alpha channel if not provided seperately */
     if (!src_alpha) {
@@ -335,6 +338,14 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
              alpha_swizzle = i;
           }
        }
+      /*
+       * Note that we may get src_alpha included from source (and 4 channels)
+       * even if the destination doesn't have an alpha channel (for rgbx
+       * formats). Generally this shouldn't make much of a difference (we're
+       * relying on blend factors being sanitized already if there's no
+       * dst alpha).
+       */
+      bld.has_dst_alpha = desc->swizzle[3] <= PIPE_SWIZZLE_W;
     }
  
     if (blend->logicop_enable) {
@@ -347,7 +358,9 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
     } else if (!state->blend_enable) {
        result = src;
     } else {
-      boolean rgb_alpha_same = (state->rgb_src_factor == state->rgb_dst_factor && state->alpha_src_factor == state->alpha_dst_factor) || nr_channels == 1;
+      boolean rgb_alpha_same = (state->rgb_src_factor == state->rgb_dst_factor &&
+                                state->alpha_src_factor == state->alpha_dst_factor) ||
+                               nr_channels == 1;
  
        src_factor = lp_build_blend_factor(&bld, state->rgb_src_factor,
                                           state->alpha_src_factor,
@@ -370,7 +383,8 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
                                rgb_alpha_same,
                                false);
  
-      if(state->rgb_func != state->alpha_func && nr_channels > 1 && alpha_swizzle != PIPE_SWIZZLE_NONE) {
+      if(state->rgb_func != state->alpha_func && nr_channels > 1 &&
+                            alpha_swizzle != PIPE_SWIZZLE_NONE) {
           LLVMValueRef alpha;
  
           alpha = lp_build_blend(&bld.base,
@@ -397,7 +411,8 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
     if (!util_format_colormask_full(desc, state->colormask)) {
        LLVMValueRef color_mask;
  
-      color_mask = lp_build_const_mask_aos_swizzled(gallivm, bld.base.type, state->colormask, nr_channels, swizzle);
+      color_mask = lp_build_const_mask_aos_swizzled(gallivm, bld.base.type,
+                                                    state->colormask, nr_channels, swizzle);
        lp_build_name(color_mask, "color_mask");
  
        /* Combine with input mask if necessary */
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c

index b6f4c2a36c91b09de5b52f2e6e1247275d3ad9cc..2c0339cad60a8ef1a0162da2037299994724857b 100644 (file)
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -1460,7 +1460,8 @@ convert_from_blend_type(struct gallivm_state *gallivm,
           /* Extract bits */
           chans[j] = LLVMBuildLShr(builder,
                                    dst[i],
-                                  lp_build_const_int_vec(gallivm, src_type, from_lsb * blend_type.width),
+                                  lp_build_const_int_vec(gallivm, src_type,
+                                                         from_lsb * blend_type.width),
                                    "");
  
           chans[j] = LLVMBuildAnd(builder,
@@ -1548,7 +1549,8 @@ convert_alpha(struct gallivm_state *gallivm,
        /* If there is a src for each pixel broadcast the alpha across whole row */
        if (src_count == block_size) {
           for (i = 0; i < src_count; ++i) {
-            src_alpha[i] = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, row_type), src_alpha[i]);
+            src_alpha[i] = lp_build_broadcast(gallivm,
+                              lp_build_vec_type(gallivm, row_type), src_alpha[i]);
           }
        } else {
           unsigned pixels = block_size / src_count;
@@ -1749,13 +1751,23 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
     }
  
     /* If 3 channels then pad to include alpha for 4 element transpose */
-   if (dst_channels == 3 && !has_alpha) {
+   if (dst_channels == 3) {
+      assert (!has_alpha);
        for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
           if (swizzle[i] > TGSI_NUM_CHANNELS)
              swizzle[i] = 3;
        }
        if (out_format_desc->nr_channels == 4) {
           dst_channels = 4;
+         /*
+          * We use alpha from the color conversion, not separate one.
+          * We had to include it for transpose, hence it will get converted
+          * too (albeit when doing transpose after conversion, that would
+          * no longer be the case necessarily).
+          * (It works only with 4 channel dsts, e.g. rgbx formats, because
+          * otherwise we really have padding, not alpha, included.)
+          */
+         has_alpha = true;
        }
     }
  
@@ -1787,6 +1799,7 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
        /*
         * XXX If we include that here maybe could actually use it instead of
         * separate alpha for blending?
+       * (Difficult though we actually convert pad channels, not alpha.)
         */
        if (dst_channels == 3 && !has_alpha) {
           fs_src[i][3] = alpha;
@@ -1794,11 +1807,14 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
  
        /* We split the row_mask and row_alpha as we want 128bit interleave */
        if (fs_type.length == 8) {
-         src_mask[i*2 + 0]  = lp_build_extract_range(gallivm, fs_mask[i], 0, src_channels);
-         src_mask[i*2 + 1]  = lp_build_extract_range(gallivm, fs_mask[i], src_channels, src_channels);
+         src_mask[i*2 + 0]  = lp_build_extract_range(gallivm, fs_mask[i],
+                                                     0, src_channels);
+         src_mask[i*2 + 1]  = lp_build_extract_range(gallivm, fs_mask[i],
+                                                     src_channels, src_channels);
  
           src_alpha[i*2 + 0] = lp_build_extract_range(gallivm, alpha, 0, src_channels);
-         src_alpha[i*2 + 1] = lp_build_extract_range(gallivm, alpha, src_channels, src_channels);
+         src_alpha[i*2 + 1] = lp_build_extract_range(gallivm, alpha,
+                                                     src_channels, src_channels);
        } else {
           src_mask[i] = fs_mask[i];
           src_alpha[i] = alpha;
@@ -1829,7 +1845,8 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
           }
           if (fs_type.length == 8) {
              src1_alpha[i*2 + 0] = lp_build_extract_range(gallivm, alpha, 0, src_channels);
-            src1_alpha[i*2 + 1] = lp_build_extract_range(gallivm, alpha, src_channels, src_channels);
+            src1_alpha[i*2 + 1] = lp_build_extract_range(gallivm, alpha,
+                                                         src_channels, src_channels);
           } else {
              src1_alpha[i] = alpha;
           }
@@ -1911,8 +1928,10 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
      * Blend Colour conversion
      */
     blend_color = lp_jit_context_f_blend_color(gallivm, context_ptr);
-   blend_color = LLVMBuildPointerCast(builder, blend_color, LLVMPointerType(lp_build_vec_type(gallivm, fs_type), 0), "");
-   blend_color = LLVMBuildLoad(builder, LLVMBuildGEP(builder, blend_color, &i32_zero, 1, ""), "");
+   blend_color = LLVMBuildPointerCast(builder, blend_color,
+                    LLVMPointerType(lp_build_vec_type(gallivm, fs_type), 0), "");
+   blend_color = LLVMBuildLoad(builder, LLVMBuildGEP(builder, blend_color,
+                               &i32_zero, 1, ""), "");
  
     /* Convert */
     lp_build_conv(gallivm, fs_type, blend_type, &blend_color, 1, &blend_color, 1);
@@ -2141,7 +2160,8 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
      * It seems some cleanup could be done here (like skipping conversion/blend
      * when not needed).
      */
-   convert_to_blend_type(gallivm, block_size, out_format_desc, dst_type, row_type, dst, src_count);
+   convert_to_blend_type(gallivm, block_size, out_format_desc, dst_type,
+                         row_type, dst, src_count);
  
     /*
      * FIXME: Really should get logic ops / masks out of generic blend / row
@@ -2167,7 +2187,8 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
                                    pad_inline ? 4 : dst_channels);
     }
  
-   convert_from_blend_type(gallivm, block_size, out_format_desc, row_type, dst_type, dst, src_count);
+   convert_from_blend_type(gallivm, block_size, out_format_desc,
+                           row_type, dst_type, dst, src_count);
  
     /* Split the blend rows back to memory rows */
     if (dst_count > src_count) {
author	Roland Scheidegger <sroland@vmware.com>
	Thu, 22 Dec 2016 02:49:22 +0000 (03:49 +0100)
committer	Roland Scheidegger <sroland@vmware.com>
	Fri, 6 Jan 2017 22:13:34 +0000 (23:13 +0100)
src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c		patch \| blob \| history
src/gallium/drivers/llvmpipe/lp_state_fs.c		patch \| blob \| history