llvmpipe: use alpha from already converted color if possible
authorRoland Scheidegger <sroland@vmware.com>
Thu, 22 Dec 2016 02:49:22 +0000 (03:49 +0100)
committerRoland Scheidegger <sroland@vmware.com>
Fri, 6 Jan 2017 22:13:34 +0000 (23:13 +0100)
For rgbx formats, there is no point in doing alpha conversion again (and
with different tranpose even, so llvm can't eliminate it).
Albeit it looks like there's some minimal changes needed in the blend code
(found by code inspection, no test seemed to complain) if we do this -
the blend factors are already sanitized if we have no destination alpha,
however for src_alpha_saturate it looks like it still might make a
difference (note that we forced has_alpha to true before for some formats
and nothing complained, but this seems safer).

Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
src/gallium/drivers/llvmpipe/lp_state_fs.c

index a57670d49567ebdfb63b7124b40103f0780a63a0..45c5c2bb65e681b56528451733150d93fbc27a53 100644 (file)
@@ -74,6 +74,7 @@ struct lp_build_blend_aos_context
    LLVMValueRef dst;
    LLVMValueRef const_;
    LLVMValueRef const_alpha;
+   boolean has_dst_alpha;
 
    LLVMValueRef inv_src;
    LLVMValueRef inv_src_alpha;
@@ -115,10 +116,10 @@ lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld,
          return bld->base.one;
       else {
          /*
-          * if there's separate src_alpha there's no dst alpha hence the complement
-          * is zero but for unclamped float inputs min can be non-zero (negative).
+          * If there's no dst alpha the complement is zero but for unclamped
+          * float inputs min can be non-zero (negative).
           */
-         if (bld->src_alpha) {
+         if (!bld->has_dst_alpha) {
             if (!bld->saturate)
                bld->saturate = lp_build_min(&bld->base, src_alpha, bld->base.zero);
          }
@@ -264,7 +265,8 @@ lp_build_blend_factor(struct lp_build_blend_aos_context *bld,
    if (alpha_swizzle != PIPE_SWIZZLE_NONE) {
       rgb_swizzle   = lp_build_blend_factor_swizzle(rgb_factor);
       alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
-      return lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle, num_channels);
+      return lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle,
+                                    alpha_swizzle, num_channels);
    } else {
       return rgb_factor_;
    }
@@ -327,6 +329,7 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
    bld.src_alpha = src_alpha;
    bld.src1_alpha = src1_alpha;
    bld.const_alpha = const_alpha;
+   bld.has_dst_alpha = FALSE;
 
    /* Find the alpha channel if not provided seperately */
    if (!src_alpha) {
@@ -335,6 +338,14 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
             alpha_swizzle = i;
          }
       }
+      /*
+       * Note that we may get src_alpha included from source (and 4 channels)
+       * even if the destination doesn't have an alpha channel (for rgbx
+       * formats). Generally this shouldn't make much of a difference (we're
+       * relying on blend factors being sanitized already if there's no
+       * dst alpha).
+       */
+      bld.has_dst_alpha = desc->swizzle[3] <= PIPE_SWIZZLE_W;
    }
 
    if (blend->logicop_enable) {
@@ -347,7 +358,9 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
    } else if (!state->blend_enable) {
       result = src;
    } else {
-      boolean rgb_alpha_same = (state->rgb_src_factor == state->rgb_dst_factor && state->alpha_src_factor == state->alpha_dst_factor) || nr_channels == 1;
+      boolean rgb_alpha_same = (state->rgb_src_factor == state->rgb_dst_factor &&
+                                state->alpha_src_factor == state->alpha_dst_factor) ||
+                               nr_channels == 1;
 
       src_factor = lp_build_blend_factor(&bld, state->rgb_src_factor,
                                          state->alpha_src_factor,
@@ -370,7 +383,8 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
                               rgb_alpha_same,
                               false);
 
-      if(state->rgb_func != state->alpha_func && nr_channels > 1 && alpha_swizzle != PIPE_SWIZZLE_NONE) {
+      if(state->rgb_func != state->alpha_func && nr_channels > 1 &&
+                            alpha_swizzle != PIPE_SWIZZLE_NONE) {
          LLVMValueRef alpha;
 
          alpha = lp_build_blend(&bld.base,
@@ -397,7 +411,8 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
    if (!util_format_colormask_full(desc, state->colormask)) {
       LLVMValueRef color_mask;
 
-      color_mask = lp_build_const_mask_aos_swizzled(gallivm, bld.base.type, state->colormask, nr_channels, swizzle);
+      color_mask = lp_build_const_mask_aos_swizzled(gallivm, bld.base.type,
+                                                    state->colormask, nr_channels, swizzle);
       lp_build_name(color_mask, "color_mask");
 
       /* Combine with input mask if necessary */
index b6f4c2a36c91b09de5b52f2e6e1247275d3ad9cc..2c0339cad60a8ef1a0162da2037299994724857b 100644 (file)
@@ -1460,7 +1460,8 @@ convert_from_blend_type(struct gallivm_state *gallivm,
          /* Extract bits */
          chans[j] = LLVMBuildLShr(builder,
                                   dst[i],
-                                  lp_build_const_int_vec(gallivm, src_type, from_lsb * blend_type.width),
+                                  lp_build_const_int_vec(gallivm, src_type,
+                                                         from_lsb * blend_type.width),
                                   "");
 
          chans[j] = LLVMBuildAnd(builder,
@@ -1548,7 +1549,8 @@ convert_alpha(struct gallivm_state *gallivm,
       /* If there is a src for each pixel broadcast the alpha across whole row */
       if (src_count == block_size) {
          for (i = 0; i < src_count; ++i) {
-            src_alpha[i] = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, row_type), src_alpha[i]);
+            src_alpha[i] = lp_build_broadcast(gallivm,
+                              lp_build_vec_type(gallivm, row_type), src_alpha[i]);
          }
       } else {
          unsigned pixels = block_size / src_count;
@@ -1749,13 +1751,23 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
    }
 
    /* If 3 channels then pad to include alpha for 4 element transpose */
-   if (dst_channels == 3 && !has_alpha) {
+   if (dst_channels == 3) {
+      assert (!has_alpha);
       for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
          if (swizzle[i] > TGSI_NUM_CHANNELS)
             swizzle[i] = 3;
       }
       if (out_format_desc->nr_channels == 4) {
          dst_channels = 4;
+         /*
+          * We use alpha from the color conversion, not separate one.
+          * We had to include it for transpose, hence it will get converted
+          * too (albeit when doing transpose after conversion, that would
+          * no longer be the case necessarily).
+          * (It works only with 4 channel dsts, e.g. rgbx formats, because
+          * otherwise we really have padding, not alpha, included.)
+          */
+         has_alpha = true;
       }
    }
 
@@ -1787,6 +1799,7 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
       /*
        * XXX If we include that here maybe could actually use it instead of
        * separate alpha for blending?
+       * (Difficult though we actually convert pad channels, not alpha.)
        */
       if (dst_channels == 3 && !has_alpha) {
          fs_src[i][3] = alpha;
@@ -1794,11 +1807,14 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
 
       /* We split the row_mask and row_alpha as we want 128bit interleave */
       if (fs_type.length == 8) {
-         src_mask[i*2 + 0]  = lp_build_extract_range(gallivm, fs_mask[i], 0, src_channels);
-         src_mask[i*2 + 1]  = lp_build_extract_range(gallivm, fs_mask[i], src_channels, src_channels);
+         src_mask[i*2 + 0]  = lp_build_extract_range(gallivm, fs_mask[i],
+                                                     0, src_channels);
+         src_mask[i*2 + 1]  = lp_build_extract_range(gallivm, fs_mask[i],
+                                                     src_channels, src_channels);
 
          src_alpha[i*2 + 0] = lp_build_extract_range(gallivm, alpha, 0, src_channels);
-         src_alpha[i*2 + 1] = lp_build_extract_range(gallivm, alpha, src_channels, src_channels);
+         src_alpha[i*2 + 1] = lp_build_extract_range(gallivm, alpha,
+                                                     src_channels, src_channels);
       } else {
          src_mask[i] = fs_mask[i];
          src_alpha[i] = alpha;
@@ -1829,7 +1845,8 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
          }
          if (fs_type.length == 8) {
             src1_alpha[i*2 + 0] = lp_build_extract_range(gallivm, alpha, 0, src_channels);
-            src1_alpha[i*2 + 1] = lp_build_extract_range(gallivm, alpha, src_channels, src_channels);
+            src1_alpha[i*2 + 1] = lp_build_extract_range(gallivm, alpha,
+                                                         src_channels, src_channels);
          } else {
             src1_alpha[i] = alpha;
          }
@@ -1911,8 +1928,10 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
     * Blend Colour conversion
     */
    blend_color = lp_jit_context_f_blend_color(gallivm, context_ptr);
-   blend_color = LLVMBuildPointerCast(builder, blend_color, LLVMPointerType(lp_build_vec_type(gallivm, fs_type), 0), "");
-   blend_color = LLVMBuildLoad(builder, LLVMBuildGEP(builder, blend_color, &i32_zero, 1, ""), "");
+   blend_color = LLVMBuildPointerCast(builder, blend_color,
+                    LLVMPointerType(lp_build_vec_type(gallivm, fs_type), 0), "");
+   blend_color = LLVMBuildLoad(builder, LLVMBuildGEP(builder, blend_color,
+                               &i32_zero, 1, ""), "");
 
    /* Convert */
    lp_build_conv(gallivm, fs_type, blend_type, &blend_color, 1, &blend_color, 1);
@@ -2141,7 +2160,8 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
     * It seems some cleanup could be done here (like skipping conversion/blend
     * when not needed).
     */
-   convert_to_blend_type(gallivm, block_size, out_format_desc, dst_type, row_type, dst, src_count);
+   convert_to_blend_type(gallivm, block_size, out_format_desc, dst_type,
+                         row_type, dst, src_count);
 
    /*
     * FIXME: Really should get logic ops / masks out of generic blend / row
@@ -2167,7 +2187,8 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
                                   pad_inline ? 4 : dst_channels);
    }
 
-   convert_from_blend_type(gallivm, block_size, out_format_desc, row_type, dst_type, dst, src_count);
+   convert_from_blend_type(gallivm, block_size, out_format_desc,
+                           row_type, dst_type, dst, src_count);
 
    /* Split the blend rows back to memory rows */
    if (dst_count > src_count) {