swr/rast: Initial work for debugging support.
[mesa.git] / src / gallium / drivers / llvmpipe / lp_bld_blend.c
index e070aac378df334049fe4ec2565746ff95e32235..02ec55eddbd39854b2e29c03bd1d4fa6cbb0a7a1 100644 (file)
@@ -1,6 +1,6 @@
 /**************************************************************************
  *
- * Copyright 2009 VMware, Inc.
+ * Copyright 2012 VMware, Inc.
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  *
  **************************************************************************/
 
-
-/**
- * @file
- * Blend LLVM IR generation.
- *
- * This code is generic -- it should be able to cope both with floating point
- * and integer inputs in AOS form.
- *
- * @author Jose Fonseca <jfonseca@vmware.com>
- */
-
-
 #include "pipe/p_state.h"
+#include "util/u_debug.h"
 
-#include "lp_bld.h"
-#include "lp_bld_type.h"
-#include "lp_bld_const.h"
-#include "lp_bld_arit.h"
-#include "lp_bld_swizzle.h"
+#include "gallivm/lp_bld_type.h"
+#include "gallivm/lp_bld_arit.h"
+#include "gallivm/lp_bld_const.h"
+#include "gallivm/lp_bld_logic.h"
+#include "gallivm/lp_bld_swizzle.h"
+#include "gallivm/lp_bld_flow.h"
+#include "gallivm/lp_bld_debug.h"
+#include "gallivm/lp_bld_pack.h"
 
+#include "lp_bld_blend.h"
 
 /**
- * We may the same values several times, so we keep them here to avoid
- * recomputing them. Also reusing the values allows us to do simplifications
- * that LLVM optimization passes wouldn't normally be able to do.
+ * Is (a OP b) == (b OP a)?
  */
-struct lp_build_blend_context
+boolean
+lp_build_blend_func_commutative(unsigned func)
 {
-   struct lp_build_context base;
-   
-   LLVMValueRef src;
-   LLVMValueRef dst;
-   LLVMValueRef const_;
-
-   LLVMValueRef inv_src;
-   LLVMValueRef inv_dst;
-   LLVMValueRef inv_const;
-   LLVMValueRef saturate;
-
-   LLVMValueRef rgb_src_factor;
-   LLVMValueRef alpha_src_factor;
-   LLVMValueRef rgb_dst_factor;
-   LLVMValueRef alpha_dst_factor;
-};
-
-
-static LLVMValueRef
-lp_build_blend_factor_unswizzled(struct lp_build_blend_context *bld,
-                                 unsigned factor,
-                                 boolean alpha)
-{
-   switch (factor) {
-   case PIPE_BLENDFACTOR_ZERO:
-      return bld->base.zero;
-   case PIPE_BLENDFACTOR_ONE:
-      return bld->base.one;
-   case PIPE_BLENDFACTOR_SRC_COLOR:
-   case PIPE_BLENDFACTOR_SRC_ALPHA:
-      return bld->src;
-   case PIPE_BLENDFACTOR_DST_COLOR:
-   case PIPE_BLENDFACTOR_DST_ALPHA:
-      return bld->dst;
-   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
-      if(alpha)
-         return bld->base.one;
-      else {
-         if(!bld->inv_dst)
-            bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
-         if(!bld->saturate)
-            bld->saturate = lp_build_min(&bld->base, bld->src, bld->inv_dst);
-         return bld->saturate;
-      }
-   case PIPE_BLENDFACTOR_CONST_COLOR:
-   case PIPE_BLENDFACTOR_CONST_ALPHA:
-      return bld->const_;
-   case PIPE_BLENDFACTOR_SRC1_COLOR:
-   case PIPE_BLENDFACTOR_SRC1_ALPHA:
-      /* TODO */
-      assert(0);
-      return bld->base.zero;
-   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
-   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
-      if(!bld->inv_src)
-         bld->inv_src = lp_build_comp(&bld->base, bld->src);
-      return bld->inv_src;
-   case PIPE_BLENDFACTOR_INV_DST_COLOR:
-   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
-      if(!bld->inv_dst)
-         bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
-      return bld->inv_dst;
-   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
-   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
-      if(!bld->inv_const)
-         bld->inv_const = lp_build_comp(&bld->base, bld->const_);
-      return bld->inv_const;
-   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
-   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
-      /* TODO */
-      assert(0);
-      return bld->base.zero;
+   switch (func) {
+   case PIPE_BLEND_ADD:
+   case PIPE_BLEND_MIN:
+   case PIPE_BLEND_MAX:
+      return TRUE;
+   case PIPE_BLEND_SUBTRACT:
+   case PIPE_BLEND_REVERSE_SUBTRACT:
+      return FALSE;
    default:
       assert(0);
-      return bld->base.zero;
+      return TRUE;
    }
 }
 
 
-enum lp_build_blend_swizzle {
-   LP_BUILD_BLEND_SWIZZLE_RGBA = 0,
-   LP_BUILD_BLEND_SWIZZLE_AAAA = 1,
-};
+/**
+ * Whether the blending functions are the reverse of each other.
+ */
+boolean
+lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func)
+{
+   if (rgb_func == alpha_func)
+      return FALSE;
+   if (rgb_func == PIPE_BLEND_SUBTRACT && alpha_func == PIPE_BLEND_REVERSE_SUBTRACT)
+      return TRUE;
+   if (rgb_func == PIPE_BLEND_REVERSE_SUBTRACT && alpha_func == PIPE_BLEND_SUBTRACT)
+      return TRUE;
+   return FALSE;
+}
 
 
 /**
- * How should we shuffle the base factor.
+ * Whether the blending factors are complementary of each other.
  */
-static enum lp_build_blend_swizzle
-lp_build_blend_factor_swizzle(unsigned factor)
+static inline boolean
+lp_build_blend_factor_complementary(unsigned src_factor, unsigned dst_factor)
 {
-   switch (factor) {
-   case PIPE_BLENDFACTOR_ONE:
-   case PIPE_BLENDFACTOR_ZERO:
-   case PIPE_BLENDFACTOR_SRC_COLOR:
-   case PIPE_BLENDFACTOR_DST_COLOR:
-   case PIPE_BLENDFACTOR_CONST_COLOR:
-   case PIPE_BLENDFACTOR_SRC1_COLOR:
-   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
-   case PIPE_BLENDFACTOR_INV_DST_COLOR:
-   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
-   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
-      return LP_BUILD_BLEND_SWIZZLE_RGBA;
-   case PIPE_BLENDFACTOR_SRC_ALPHA:
-   case PIPE_BLENDFACTOR_DST_ALPHA:
-   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
-   case PIPE_BLENDFACTOR_SRC1_ALPHA:
-   case PIPE_BLENDFACTOR_CONST_ALPHA:
-   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
-   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
-   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
-   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
-      return LP_BUILD_BLEND_SWIZZLE_AAAA;
-   default:
-      assert(0);
-      return LP_BUILD_BLEND_SWIZZLE_RGBA;
-   }
+   STATIC_ASSERT((PIPE_BLENDFACTOR_ZERO ^ 0x10) == PIPE_BLENDFACTOR_ONE);
+   STATIC_ASSERT((PIPE_BLENDFACTOR_CONST_COLOR ^ 0x10) ==
+                 PIPE_BLENDFACTOR_INV_CONST_COLOR);
+   return dst_factor == (src_factor ^ 0x10);
 }
 
 
-static LLVMValueRef
-lp_build_blend_swizzle(struct lp_build_blend_context *bld,
-                       LLVMValueRef rgb, 
-                       LLVMValueRef alpha, 
-                       enum lp_build_blend_swizzle rgb_swizzle,
-                       unsigned alpha_swizzle)
+/**
+ * Whether this is a inverse blend factor
+ */
+static inline boolean
+is_inverse_factor(unsigned factor)
 {
-   if(rgb == alpha) {
-      if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA)
-         return rgb;
-      if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA)
-         return lp_build_broadcast_aos(&bld->base, rgb, alpha_swizzle);
-   }
-   else {
-      if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA) {
-         boolean cond[4] = {0, 0, 0, 0};
-         cond[alpha_swizzle] = 1;
-         return lp_build_select_aos(&bld->base, alpha, rgb, cond);
-      }
-      if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) {
-         unsigned char swizzle[4];
-         swizzle[0] = alpha_swizzle;
-         swizzle[1] = alpha_swizzle;
-         swizzle[2] = alpha_swizzle;
-         swizzle[3] = alpha_swizzle;
-         swizzle[alpha_swizzle] += 4;
-         return lp_build_swizzle2_aos(&bld->base, rgb, alpha, swizzle);
-      }
-   }
-   assert(0);
-   return bld->base.undef;
+   STATIC_ASSERT(PIPE_BLENDFACTOR_ZERO == 0x11);
+   return factor > 0x11;
 }
 
 
 /**
- * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendFuncSeparate.xml
+ * Calculates the (expanded to wider type) multiplication
+ * of 2 normalized numbers.
  */
-static LLVMValueRef
-lp_build_blend_factor(struct lp_build_blend_context *bld,
-                      LLVMValueRef factor1,
-                      unsigned rgb_factor,
-                      unsigned alpha_factor,
-                      unsigned alpha_swizzle)
+static void
+lp_build_mul_norm_expand(struct lp_build_context *bld,
+                         LLVMValueRef a, LLVMValueRef b,
+                         LLVMValueRef *resl, LLVMValueRef *resh,
+                         boolean signedness_differs)
 {
-   LLVMValueRef rgb_factor_;
-   LLVMValueRef alpha_factor_;
-   LLVMValueRef factor2;
-   enum lp_build_blend_swizzle rgb_swizzle;
-
-   rgb_factor_   = lp_build_blend_factor_unswizzled(bld, rgb_factor,   FALSE);
-   alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
+   const struct lp_type type = bld->type;
+   struct lp_type wide_type = lp_wider_type(type);
+   struct lp_type wide_type2 = wide_type;
+   struct lp_type type2 = type;
+   LLVMValueRef al, ah, bl, bh;
+
+   assert(lp_check_value(type, a));
+   assert(lp_check_value(type, b));
+   assert(!type.floating && !type.fixed && type.norm);
+
+   if (a == bld->zero || b == bld->zero) {
+      LLVMValueRef zero = LLVMConstNull(lp_build_vec_type(bld->gallivm, wide_type));
+      *resl = zero;
+      *resh = zero;
+      return;
+   }
 
-   rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor);
+   if (signedness_differs) {
+      type2.sign = !type.sign;
+      wide_type2.sign = !wide_type2.sign;
+   }
 
-   factor2 = lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle);
+   lp_build_unpack2_native(bld->gallivm, type, wide_type, a, &al, &ah);
+   lp_build_unpack2_native(bld->gallivm, type2, wide_type2, b, &bl, &bh);
 
-   return lp_build_mul(&bld->base, factor1, factor2);
+   *resl = lp_build_mul_norm(bld->gallivm, wide_type, al, bl);
+   *resh = lp_build_mul_norm(bld->gallivm, wide_type, ah, bh);
 }
 
 
 /**
  * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml
  */
-static LLVMValueRef
-lp_build_blend_func(struct lp_build_blend_context *bld,
+LLVMValueRef
+lp_build_blend_func(struct lp_build_context *bld,
                     unsigned func,
-                    LLVMValueRef term1, 
+                    LLVMValueRef term1,
                     LLVMValueRef term2)
 {
    switch (func) {
    case PIPE_BLEND_ADD:
-      return lp_build_add(&bld->base, term1, term2);
-      break;
+      return lp_build_add(bld, term1, term2);
    case PIPE_BLEND_SUBTRACT:
-      return lp_build_sub(&bld->base, term1, term2);
+      return lp_build_sub(bld, term1, term2);
    case PIPE_BLEND_REVERSE_SUBTRACT:
-      return lp_build_sub(&bld->base, term2, term1);
+      return lp_build_sub(bld, term2, term1);
    case PIPE_BLEND_MIN:
-      return lp_build_min(&bld->base, term1, term2);
+      return lp_build_min(bld, term1, term2);
    case PIPE_BLEND_MAX:
-      return lp_build_max(&bld->base, term1, term2);
+      return lp_build_max(bld, term1, term2);
    default:
       assert(0);
-      return bld->base.zero;
+      return bld->zero;
    }
 }
 
 
+/**
+ * Performs optimisations and blending independent of SoA/AoS
+ *
+ * @param func                   the blend function
+ * @param factor_src             PIPE_BLENDFACTOR_xxx
+ * @param factor_dst             PIPE_BLENDFACTOR_xxx
+ * @param src                    source rgba
+ * @param dst                    dest rgba
+ * @param src_factor             src factor computed value
+ * @param dst_factor             dst factor computed value
+ * @param not_alpha_dependent    same factors accross all channels of src/dst
+ *
+ * not_alpha_dependent should be:
+ *  SoA: always true as it is only one channel at a time
+ *  AoS: rgb_src_factor == alpha_src_factor && rgb_dst_factor == alpha_dst_factor
+ *
+ * Note that pretty much every possible optimisation can only be done on non-unorm targets
+ * due to unorm values not going above 1.0 meaning factorisation can change results.
+ * e.g. (0.9 * 0.9) + (0.9 * 0.9) != 0.9 * (0.9 + 0.9) as result of + is always <= 1.
+ */
 LLVMValueRef
-lp_build_blend(LLVMBuilderRef builder,
-               const struct pipe_blend_state *blend,
-               union lp_type type,
+lp_build_blend(struct lp_build_context *bld,
+               unsigned func,
+               unsigned factor_src,
+               unsigned factor_dst,
                LLVMValueRef src,
                LLVMValueRef dst,
-               LLVMValueRef const_,
-               unsigned alpha_swizzle)
+               LLVMValueRef src_factor,
+               LLVMValueRef dst_factor,
+               boolean not_alpha_dependent,
+               boolean optimise_only)
 {
-   struct lp_build_blend_context bld;
-   LLVMValueRef src_term;
-   LLVMValueRef dst_term;
-
-   /* It makes no sense to blend unless values are normalized */
-   assert(type.norm);
-
-   /* Setup build context */
-   memset(&bld, 0, sizeof bld);
-   bld.base.builder = builder;
-   bld.base.type = type;
-   bld.base.undef = lp_build_undef(type);
-   bld.base.zero = lp_build_zero(type);
-   bld.base.one = lp_build_one(type);
-   bld.src = src;
-   bld.dst = dst;
-   bld.const_ = const_;
-
-   /* TODO: There are still a few optimization oportunities here. For certain
-    * combinations it is possible to reorder the operations and therefor saving
-    * some instructions. */
-
-   src_term = lp_build_blend_factor(&bld, src, blend->rgb_src_factor, blend->alpha_src_factor, alpha_swizzle);
-   dst_term = lp_build_blend_factor(&bld, dst, blend->rgb_dst_factor, blend->alpha_dst_factor, alpha_swizzle);
-
-#ifdef DEBUG
-   LLVMSetValueName(src_term, "src_term");
-   LLVMSetValueName(dst_term, "dst_term");
-#endif
-
-   if(blend->rgb_func == blend->alpha_func) {
-      return lp_build_blend_func(&bld, blend->rgb_func, src_term, dst_term);
-   }
-   else {
-      /* Seperate RGB / A functions */
+   LLVMValueRef result, src_term, dst_term;
+
+   /* If we are not alpha dependent we can mess with the src/dst factors */
+   if (not_alpha_dependent) {
+      if (lp_build_blend_factor_complementary(factor_src, factor_dst)) {
+         if (func == PIPE_BLEND_ADD) {
+            if (factor_src < factor_dst) {
+               return lp_build_lerp(bld, src_factor, dst, src, 0);
+            } else {
+               return lp_build_lerp(bld, dst_factor, src, dst, 0);
+            }
+         } else if (bld->type.floating && func == PIPE_BLEND_SUBTRACT) {
+            result = lp_build_add(bld, src, dst);
+
+            if (factor_src < factor_dst) {
+               result = lp_build_mul(bld, result, src_factor);
+               return lp_build_sub(bld, result, dst);
+            } else {
+               result = lp_build_mul(bld, result, dst_factor);
+               return lp_build_sub(bld, src, result);
+            }
+         } else if (bld->type.floating && func == PIPE_BLEND_REVERSE_SUBTRACT) {
+            result = lp_build_add(bld, src, dst);
+
+            if (factor_src < factor_dst) {
+               result = lp_build_mul(bld, result, src_factor);
+               return lp_build_sub(bld, dst, result);
+            } else {
+               result = lp_build_mul(bld, result, dst_factor);
+               return lp_build_sub(bld, result, src);
+            }
+         }
+      }
 
-      LLVMValueRef rgb;
-      LLVMValueRef alpha;
+      if (bld->type.floating && factor_src == factor_dst) {
+         if (func == PIPE_BLEND_ADD ||
+             func == PIPE_BLEND_SUBTRACT ||
+             func == PIPE_BLEND_REVERSE_SUBTRACT) {
+            LLVMValueRef result;
+            result = lp_build_blend_func(bld, func, src, dst);
+            return lp_build_mul(bld, result, src_factor);
+         }
+      }
+   }
 
-      rgb   = lp_build_blend_func(&bld, blend->rgb_func,   src_term, dst_term);
-      alpha = lp_build_blend_func(&bld, blend->alpha_func, src_term, dst_term);
+   if (optimise_only)
+      return NULL;
+
+   if ((bld->type.norm && bld->type.sign) &&
+       (is_inverse_factor(factor_src) || is_inverse_factor(factor_dst))) {
+      /*
+       * With snorm blending, the inverse blend factors range from [0,2]
+       * instead of [-1,1], so the ordinary signed normalized arithmetic
+       * doesn't quite work. Unpack must be unsigned, and the add/sub
+       * must be done with wider type.
+       * (Note that it's not quite obvious what the blend equation wrt to
+       * clamping should actually be based on GL spec in this case, but
+       * really the incoming src values are clamped to [-1,1] (the dst is
+       * always clamped already), and then NO further clamping occurs until
+       * the end.)
+       */
+      struct lp_build_context bldw;
+      struct lp_type wide_type = lp_wider_type(bld->type);
+      LLVMValueRef src_terml, src_termh, dst_terml, dst_termh;
+      LLVMValueRef resl, resh;
+
+      /*
+       * We don't need saturate math for the sub/add, since we have
+       * x+1 bit numbers in x*2 wide type (result is x+2 bits).
+       * (Doesn't really matter on x86 sse2 though as we use saturated
+       * intrinsics.)
+       */
+      wide_type.norm = 0;
+      lp_build_context_init(&bldw, bld->gallivm, wide_type);
+
+      /*
+       * XXX This is a bit hackish. Note that -128 really should
+       * be -1.0, the same as -127. However, we did not actually clamp
+       * things anywhere (relying on pack intrinsics instead) therefore
+       * we will get -128, and the inverted factor then 255. But the mul
+       * can overflow in this case (rather the rounding fixups for the mul,
+       * -128*255 will be positive).
+       * So we clamp the src and dst up here but only when necessary (we
+       * should do this before calculating blend factors but it's enough
+       * for avoiding overflow).
+       */
+      if (is_inverse_factor(factor_src)) {
+         src = lp_build_max(bld, src,
+                            lp_build_const_vec(bld->gallivm, bld->type, -1.0));
+      }
+      if (is_inverse_factor(factor_dst)) {
+         dst = lp_build_max(bld, dst,
+                            lp_build_const_vec(bld->gallivm, bld->type, -1.0));
+      }
 
-      return lp_build_blend_swizzle(&bld, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle);
+      lp_build_mul_norm_expand(bld, src, src_factor, &src_terml, &src_termh,
+                               is_inverse_factor(factor_src) ? TRUE : FALSE);
+      lp_build_mul_norm_expand(bld, dst, dst_factor, &dst_terml, &dst_termh,
+                               is_inverse_factor(factor_dst) ? TRUE : FALSE);
+      resl = lp_build_blend_func(&bldw, func, src_terml, dst_terml);
+      resh = lp_build_blend_func(&bldw, func, src_termh, dst_termh);
+
+      /*
+       * XXX pack2_native is not ok because the values have to be in dst
+       * range. We need native pack though for the correct order on avx2.
+       * Will break on everything not implementing clamping pack intrinsics
+       * (i.e. everything but sse2 and altivec).
+       */
+      return lp_build_pack2_native(bld->gallivm, wide_type, bld->type, resl, resh);
+   } else {
+      src_term = lp_build_mul(bld, src, src_factor);
+      dst_term = lp_build_mul(bld, dst, dst_factor);
+      return lp_build_blend_func(bld, func, src_term, dst_term);
    }
 }
+
+void
+lp_build_alpha_to_coverage(struct gallivm_state *gallivm,
+                           struct lp_type type,
+                           struct lp_build_mask_context *mask,
+                           LLVMValueRef alpha,
+                           boolean do_branch)
+{
+   struct lp_build_context bld;
+   LLVMValueRef test;
+   LLVMValueRef alpha_ref_value;
+
+   lp_build_context_init(&bld, gallivm, type);
+
+   alpha_ref_value = lp_build_const_vec(gallivm, type, 0.5);
+
+   test = lp_build_cmp(&bld, PIPE_FUNC_GREATER, alpha, alpha_ref_value);
+
+   lp_build_name(test, "alpha_to_coverage");
+
+   lp_build_mask_update(mask, test);
+
+   if (do_branch)
+      lp_build_mask_check(mask);
+}