draw: Properly limit vertex buffer fetches on draw arrays.
[mesa.git] / src / gallium / drivers / softpipe / sp_quad_blend.c
index c1a9b6f3c5d1f908f84cd43732dc63607b18eb3d..b85431d5be9ff8250e0fd3c8b4f78e2738ba4355 100644 (file)
@@ -34,6 +34,7 @@
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "util/u_format.h"
+#include "util/u_dual_blend.h"
 #include "sp_context.h"
 #include "sp_state.h"
 #include "sp_quad.h"
@@ -57,6 +58,7 @@ struct blend_quad_stage
    struct quad_stage base;
    boolean clamp[PIPE_MAX_COLOR_BUFS];  /**< clamp colors to [0,1]? */
    enum format base_format[PIPE_MAX_COLOR_BUFS];
+   enum util_format_type format_type[PIPE_MAX_COLOR_BUFS];
 };
 
 
@@ -259,6 +261,7 @@ logicop_quad(struct quad_stage *qs,
 static void
 blend_quad(struct quad_stage *qs, 
            float (*quadColor)[4],
+           float (*quadColor2)[4],
            float (*dest)[4],
            const float const_blend_color[4],
            unsigned blend_index)
@@ -266,8 +269,8 @@ blend_quad(struct quad_stage *qs,
    static const float zero[4] = { 0, 0, 0, 0 };
    static const float one[4] = { 1, 1, 1, 1 };
    struct softpipe_context *softpipe = qs->softpipe;
-   float source[4][QUAD_SIZE] = { { 0 } };
-   float blend_dest[4][QUAD_SIZE];
+   float source[4][TGSI_QUAD_SIZE] = { { 0 } };
+   float blend_dest[4][TGSI_QUAD_SIZE];
 
    /*
     * Compute src/first term RGB
@@ -284,13 +287,13 @@ blend_quad(struct quad_stage *qs,
       VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */
       break;
    case PIPE_BLENDFACTOR_SRC_ALPHA:
-   {
-      const float *alpha = quadColor[3];
-      VEC4_MUL(source[0], quadColor[0], alpha); /* R */
-      VEC4_MUL(source[1], quadColor[1], alpha); /* G */
-      VEC4_MUL(source[2], quadColor[2], alpha); /* B */
-   }
-   break;
+      {
+         const float *alpha = quadColor[3];
+         VEC4_MUL(source[0], quadColor[0], alpha); /* R */
+         VEC4_MUL(source[1], quadColor[1], alpha); /* G */
+         VEC4_MUL(source[2], quadColor[2], alpha); /* B */
+      }
+      break;
    case PIPE_BLENDFACTOR_DST_COLOR:
       VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */
       VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */
@@ -316,30 +319,37 @@ blend_quad(struct quad_stage *qs,
       }
       break;
    case PIPE_BLENDFACTOR_CONST_COLOR:
-   {
-      float comp[4];
-      VEC4_SCALAR(comp, const_blend_color[0]); /* R */
-      VEC4_MUL(source[0], quadColor[0], comp); /* R */
-      VEC4_SCALAR(comp, const_blend_color[1]); /* G */
-      VEC4_MUL(source[1], quadColor[1], comp); /* G */
-      VEC4_SCALAR(comp, const_blend_color[2]); /* B */
-      VEC4_MUL(source[2], quadColor[2], comp); /* B */
-   }
-   break;
+      {
+         float comp[4];
+         VEC4_SCALAR(comp, const_blend_color[0]); /* R */
+         VEC4_MUL(source[0], quadColor[0], comp); /* R */
+         VEC4_SCALAR(comp, const_blend_color[1]); /* G */
+         VEC4_MUL(source[1], quadColor[1], comp); /* G */
+         VEC4_SCALAR(comp, const_blend_color[2]); /* B */
+         VEC4_MUL(source[2], quadColor[2], comp); /* B */
+      }
+      break;
    case PIPE_BLENDFACTOR_CONST_ALPHA:
-   {
-      float alpha[4];
-      VEC4_SCALAR(alpha, const_blend_color[3]);
-      VEC4_MUL(source[0], quadColor[0], alpha); /* R */
-      VEC4_MUL(source[1], quadColor[1], alpha); /* G */
-      VEC4_MUL(source[2], quadColor[2], alpha); /* B */
-   }
-   break;
+      {
+         float alpha[4];
+         VEC4_SCALAR(alpha, const_blend_color[3]);
+         VEC4_MUL(source[0], quadColor[0], alpha); /* R */
+         VEC4_MUL(source[1], quadColor[1], alpha); /* G */
+         VEC4_MUL(source[2], quadColor[2], alpha); /* B */
+      }
+      break;
    case PIPE_BLENDFACTOR_SRC1_COLOR:
-      assert(0); /* to do */
+      VEC4_MUL(source[0], quadColor[0], quadColor2[0]); /* R */
+      VEC4_MUL(source[1], quadColor[1], quadColor2[1]); /* G */
+      VEC4_MUL(source[2], quadColor[2], quadColor2[2]); /* B */         
       break;
    case PIPE_BLENDFACTOR_SRC1_ALPHA:
-      assert(0); /* to do */
+      {
+         const float *alpha = quadColor2[3];
+         VEC4_MUL(source[0], quadColor[0], alpha); /* R */
+         VEC4_MUL(source[1], quadColor[1], alpha); /* G */
+         VEC4_MUL(source[2], quadColor[2], alpha); /* B */
+      }
       break;
    case PIPE_BLENDFACTOR_ZERO:
       VEC4_COPY(source[0], zero); /* R */
@@ -347,25 +357,25 @@ blend_quad(struct quad_stage *qs,
       VEC4_COPY(source[2], zero); /* B */
       break;
    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
-   {
-      float inv_comp[4];
-      VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
-      VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
-      VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
-      VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
-      VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
-      VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
-   }
-   break;
+      {
+         float inv_comp[4];
+         VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
+         VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
+         VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
+         VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
+         VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
+         VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
+      }
+      break;
    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
-   {
-      float inv_alpha[4];
-      VEC4_SUB(inv_alpha, one, quadColor[3]);
-      VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
-      VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
-      VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
-   }
-   break;
+      {
+         float inv_alpha[4];
+         VEC4_SUB(inv_alpha, one, quadColor[3]);
+         VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
+         VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
+         VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
+      }
+      break;
    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
       {
          float inv_alpha[4];
@@ -376,44 +386,58 @@ blend_quad(struct quad_stage *qs,
       }
       break;
    case PIPE_BLENDFACTOR_INV_DST_COLOR:
-   {
-      float inv_comp[4];
-      VEC4_SUB(inv_comp, one, dest[0]); /* R */
-      VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
-      VEC4_SUB(inv_comp, one, dest[1]); /* G */
-      VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
-      VEC4_SUB(inv_comp, one, dest[2]); /* B */
-      VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
-   }
-   break;
+      {
+         float inv_comp[4];
+         VEC4_SUB(inv_comp, one, dest[0]); /* R */
+         VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
+         VEC4_SUB(inv_comp, one, dest[1]); /* G */
+         VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
+         VEC4_SUB(inv_comp, one, dest[2]); /* B */
+         VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
+      }
+      break;
    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
-   {
-      float inv_comp[4];
-      /* R */
-      VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[0]);
-      VEC4_MUL(source[0], quadColor[0], inv_comp);
-      /* G */
-      VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[1]);
-      VEC4_MUL(source[1], quadColor[1], inv_comp);
-      /* B */
-      VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[2]);
-      VEC4_MUL(source[2], quadColor[2], inv_comp);
-   }
-   break;
+      {
+         float inv_comp[4];
+         /* R */
+         VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[0]);
+         VEC4_MUL(source[0], quadColor[0], inv_comp);
+         /* G */
+         VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[1]);
+         VEC4_MUL(source[1], quadColor[1], inv_comp);
+         /* B */
+         VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[2]);
+         VEC4_MUL(source[2], quadColor[2], inv_comp);
+      }
+      break;
    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
-   {
-      float inv_alpha[4];
-      VEC4_SCALAR(inv_alpha, 1.0f - const_blend_color[3]);
-      VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
-      VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
-      VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
-   }
-   break;
+      {
+         float inv_alpha[4];
+         VEC4_SCALAR(inv_alpha, 1.0f - const_blend_color[3]);
+         VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
+         VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
+         VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
+      }
+      break;
    case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
-      assert(0); /* to do */
+      {
+         float inv_comp[4];
+         VEC4_SUB(inv_comp, one, quadColor2[0]); /* R */
+         VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
+         VEC4_SUB(inv_comp, one, quadColor2[1]); /* G */
+         VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
+         VEC4_SUB(inv_comp, one, quadColor2[2]); /* B */
+         VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
+      }
       break;
    case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
-      assert(0); /* to do */
+      {
+         float inv_alpha[4];
+         VEC4_SUB(inv_alpha, one, quadColor2[3]);
+         VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
+         VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
+         VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
+      }
       break;
    default:
       assert(0 && "invalid rgb src factor");
@@ -429,11 +453,11 @@ blend_quad(struct quad_stage *qs,
    case PIPE_BLENDFACTOR_SRC_COLOR:
       /* fall-through */
    case PIPE_BLENDFACTOR_SRC_ALPHA:
-   {
-      const float *alpha = quadColor[3];
-      VEC4_MUL(source[3], quadColor[3], alpha); /* A */
-   }
-   break;
+      {
+         const float *alpha = quadColor[3];
+         VEC4_MUL(source[3], quadColor[3], alpha); /* A */
+      }
+      break;
    case PIPE_BLENDFACTOR_DST_COLOR:
       /* fall-through */
    case PIPE_BLENDFACTOR_DST_ALPHA:
@@ -446,24 +470,24 @@ blend_quad(struct quad_stage *qs,
    case PIPE_BLENDFACTOR_CONST_COLOR:
       /* fall-through */
    case PIPE_BLENDFACTOR_CONST_ALPHA:
-   {
-      float comp[4];
-      VEC4_SCALAR(comp, const_blend_color[3]); /* A */
-      VEC4_MUL(source[3], quadColor[3], comp); /* A */
-   }
-   break;
+      {
+         float comp[4];
+         VEC4_SCALAR(comp, const_blend_color[3]); /* A */
+         VEC4_MUL(source[3], quadColor[3], comp); /* A */
+      }
+      break;
    case PIPE_BLENDFACTOR_ZERO:
       VEC4_COPY(source[3], zero); /* A */
       break;
    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
       /* fall-through */
    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
-   {
-      float inv_alpha[4];
-      VEC4_SUB(inv_alpha, one, quadColor[3]);
-      VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
-   }
-   break;
+      {
+         float inv_alpha[4];
+         VEC4_SUB(inv_alpha, one, quadColor[3]);
+         VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
+      }
+      break;
    case PIPE_BLENDFACTOR_INV_DST_COLOR:
       /* fall-through */
    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
@@ -476,13 +500,30 @@ blend_quad(struct quad_stage *qs,
    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
       /* fall-through */
    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
-   {
-      float inv_comp[4];
-      /* A */
-      VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[3]);
-      VEC4_MUL(source[3], quadColor[3], inv_comp);
-   }
-   break;
+      {
+         float inv_comp[4];
+         /* A */
+         VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[3]);
+         VEC4_MUL(source[3], quadColor[3], inv_comp);
+      }
+      break;
+   case PIPE_BLENDFACTOR_SRC1_COLOR:
+      /* fall-through */
+   case PIPE_BLENDFACTOR_SRC1_ALPHA:
+      {
+         const float *alpha = quadColor2[3];
+         VEC4_MUL(source[3], quadColor[3], alpha); /* A */
+      }
+      break;
+   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+      /* fall-through */
+   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+      {
+         float inv_alpha[4];
+         VEC4_SUB(inv_alpha, one, quadColor2[3]);
+         VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
+      }
+      break;
    default:
       assert(0 && "invalid alpha src factor");
    }
@@ -527,61 +568,66 @@ blend_quad(struct quad_stage *qs,
          float diff[4], temp[4];
          VEC4_SUB(diff, one, blend_dest[3]);
          VEC4_MIN(temp, alpha, diff);
-         VEC4_MUL(blend_dest[0], quadColor[0], temp); /* R */
-         VEC4_MUL(blend_dest[1], quadColor[1], temp); /* G */
-         VEC4_MUL(blend_dest[2], quadColor[2], temp); /* B */
+         VEC4_MUL(blend_dest[0], blend_dest[0], temp); /* R */
+         VEC4_MUL(blend_dest[1], blend_dest[1], temp); /* G */
+         VEC4_MUL(blend_dest[2], blend_dest[2], temp); /* B */
       }
       break;
    case PIPE_BLENDFACTOR_CONST_COLOR:
-   {
-      float comp[4];
-      VEC4_SCALAR(comp, const_blend_color[0]); /* R */
-      VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */
-      VEC4_SCALAR(comp, const_blend_color[1]); /* G */
-      VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */
-      VEC4_SCALAR(comp, const_blend_color[2]); /* B */
-      VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */
-   }
-   break;
+      {
+         float comp[4];
+         VEC4_SCALAR(comp, const_blend_color[0]); /* R */
+         VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */
+         VEC4_SCALAR(comp, const_blend_color[1]); /* G */
+         VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */
+         VEC4_SCALAR(comp, const_blend_color[2]); /* B */
+         VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */
+      }
+      break;
    case PIPE_BLENDFACTOR_CONST_ALPHA:
-   {
-      float comp[4];
-      VEC4_SCALAR(comp, const_blend_color[3]); /* A */
-      VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */
-      VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */
-      VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */
-   }
-   break;
+      {
+         float comp[4];
+         VEC4_SCALAR(comp, const_blend_color[3]); /* A */
+         VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */
+         VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */
+         VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */
+      }
+      break;
    case PIPE_BLENDFACTOR_ZERO:
       VEC4_COPY(blend_dest[0], zero); /* R */
       VEC4_COPY(blend_dest[1], zero); /* G */
       VEC4_COPY(blend_dest[2], zero); /* B */
       break;
    case PIPE_BLENDFACTOR_SRC1_COLOR:
+      VEC4_MUL(blend_dest[0], blend_dest[0], quadColor2[0]); /* R */
+      VEC4_MUL(blend_dest[1], blend_dest[1], quadColor2[1]); /* G */
+      VEC4_MUL(blend_dest[2], blend_dest[2], quadColor2[2]); /* B */
+      break;
    case PIPE_BLENDFACTOR_SRC1_ALPHA:
-      /* XXX what are these? */
-      assert(0);
+      VEC4_MUL(blend_dest[0], blend_dest[0], quadColor2[3]); /* R * A */
+      VEC4_MUL(blend_dest[1], blend_dest[1], quadColor2[3]); /* G * A */
+      VEC4_MUL(blend_dest[2], blend_dest[2], quadColor2[3]); /* B * A */
       break;
    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
-   {
-      float inv_comp[4];
-      VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
-      VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
-      VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
-      VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
-      VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
-      VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
-   }
-   break;
+      {
+         float inv_comp[4];
+         VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
+         VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
+         VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
+         VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
+         VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
+         VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
+      }
+      break;
    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
-   {
-      float one_minus_alpha[QUAD_SIZE];
-      VEC4_SUB(one_minus_alpha, one, quadColor[3]);
-      VEC4_MUL(blend_dest[0], blend_dest[0], one_minus_alpha); /* R */
-      VEC4_MUL(blend_dest[1], blend_dest[1], one_minus_alpha); /* G */
-      VEC4_MUL(blend_dest[2], blend_dest[2], one_minus_alpha); /* B */
-   }
-   break;
+      {
+         float one_minus_alpha[TGSI_QUAD_SIZE];
+         VEC4_SUB(one_minus_alpha, one, quadColor[3]);
+         VEC4_MUL(blend_dest[0], blend_dest[0], one_minus_alpha); /* R */
+         VEC4_MUL(blend_dest[1], blend_dest[1], one_minus_alpha); /* G */
+         VEC4_MUL(blend_dest[2], blend_dest[2], one_minus_alpha); /* B */
+      }
+      break;
    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
       {
          float inv_comp[4];
@@ -592,43 +638,58 @@ blend_quad(struct quad_stage *qs,
       }
       break;
    case PIPE_BLENDFACTOR_INV_DST_COLOR:
-   {
-      float inv_comp[4];
-      VEC4_SUB(inv_comp, one, blend_dest[0]); /* R */
-      VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp); /* R */
-      VEC4_SUB(inv_comp, one, blend_dest[1]); /* G */
-      VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp); /* G */
-      VEC4_SUB(inv_comp, one, blend_dest[2]); /* B */
-      VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp); /* B */
-   }
-   break;
+      {
+         float inv_comp[4];
+         VEC4_SUB(inv_comp, one, blend_dest[0]); /* R */
+         VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp); /* R */
+         VEC4_SUB(inv_comp, one, blend_dest[1]); /* G */
+         VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp); /* G */
+         VEC4_SUB(inv_comp, one, blend_dest[2]); /* B */
+         VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp); /* B */
+      }
+      break;
    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
-   {
-      float inv_comp[4];
-      /* R */
-      VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[0]);
-      VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp);
-      /* G */
-      VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[1]);
-      VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp);
-      /* B */
-      VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[2]);
-      VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp);
-   }
-   break;
+      {
+         float inv_comp[4];
+         /* R */
+         VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[0]);
+         VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp);
+         /* G */
+         VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[1]);
+         VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp);
+         /* B */
+         VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[2]);
+         VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp);
+      }
+      break;
    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
-   {
-      float inv_comp[4];
-      VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[3]);
-      VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp);
-      VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp);
-      VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp);
-   }
-   break;
+      {
+         float inv_comp[4];
+         VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[3]);
+         VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp);
+         VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp);
+         VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp);
+      }
+      break;
    case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+      {
+         float inv_comp[4];
+         VEC4_SUB(inv_comp, one, quadColor2[0]); /* R */
+         VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
+         VEC4_SUB(inv_comp, one, quadColor2[1]); /* G */
+         VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
+         VEC4_SUB(inv_comp, one, quadColor2[2]); /* B */
+         VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
+      }
+      break;
    case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
-      /* XXX what are these? */
-      assert(0);
+      {
+         float one_minus_alpha[TGSI_QUAD_SIZE];
+         VEC4_SUB(one_minus_alpha, one, quadColor2[3]);
+         VEC4_MUL(blend_dest[0], blend_dest[0], one_minus_alpha); /* R */
+         VEC4_MUL(blend_dest[1], blend_dest[1], one_minus_alpha); /* G */
+         VEC4_MUL(blend_dest[2], blend_dest[2], one_minus_alpha); /* B */
+      }
       break;
    default:
       assert(0 && "invalid rgb dst factor");
@@ -657,24 +718,24 @@ blend_quad(struct quad_stage *qs,
    case PIPE_BLENDFACTOR_CONST_COLOR:
       /* fall-through */
    case PIPE_BLENDFACTOR_CONST_ALPHA:
-   {
-      float comp[4];
-      VEC4_SCALAR(comp, const_blend_color[3]); /* A */
-      VEC4_MUL(blend_dest[3], blend_dest[3], comp); /* A */
-   }
-   break;
+      {
+         float comp[4];
+         VEC4_SCALAR(comp, const_blend_color[3]); /* A */
+         VEC4_MUL(blend_dest[3], blend_dest[3], comp); /* A */
+      }
+      break;
    case PIPE_BLENDFACTOR_ZERO:
       VEC4_COPY(blend_dest[3], zero); /* A */
       break;
    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
       /* fall-through */
    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
-   {
-      float one_minus_alpha[QUAD_SIZE];
-      VEC4_SUB(one_minus_alpha, one, quadColor[3]);
-      VEC4_MUL(blend_dest[3], blend_dest[3], one_minus_alpha); /* A */
-   }
-   break;
+      {
+         float one_minus_alpha[TGSI_QUAD_SIZE];
+         VEC4_SUB(one_minus_alpha, one, quadColor[3]);
+         VEC4_MUL(blend_dest[3], blend_dest[3], one_minus_alpha); /* A */
+      }
+      break;
    case PIPE_BLENDFACTOR_INV_DST_COLOR:
       /* fall-through */
    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
@@ -687,12 +748,26 @@ blend_quad(struct quad_stage *qs,
    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
       /* fall-through */
    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
-   {
-      float inv_comp[4];
-      VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[3]);
-      VEC4_MUL(blend_dest[3], blend_dest[3], inv_comp);
-   }
-   break;
+      {
+         float inv_comp[4];
+         VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[3]);
+         VEC4_MUL(blend_dest[3], blend_dest[3], inv_comp);
+      }
+      break;
+   case PIPE_BLENDFACTOR_SRC1_COLOR:
+      /* fall-through */
+   case PIPE_BLENDFACTOR_SRC1_ALPHA:
+      VEC4_MUL(blend_dest[3], blend_dest[3], quadColor2[3]); /* A * A */
+      break;
+   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+      /* fall-through */
+   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+      {
+         float one_minus_alpha[TGSI_QUAD_SIZE];
+         VEC4_SUB(one_minus_alpha, one, quadColor2[3]);
+         VEC4_MUL(blend_dest[3], blend_dest[3], one_minus_alpha); /* A */
+      }
+      break;
    default:
       assert(0 && "invalid alpha dst factor");
    }
@@ -702,19 +777,19 @@ blend_quad(struct quad_stage *qs,
     */
    switch (softpipe->blend->rt[blend_index].rgb_func) {
    case PIPE_BLEND_ADD:
-      VEC4_ADD_SAT(quadColor[0], source[0], blend_dest[0]); /* R */
-      VEC4_ADD_SAT(quadColor[1], source[1], blend_dest[1]); /* G */
-      VEC4_ADD_SAT(quadColor[2], source[2], blend_dest[2]); /* B */
+      VEC4_ADD(quadColor[0], source[0], blend_dest[0]); /* R */
+      VEC4_ADD(quadColor[1], source[1], blend_dest[1]); /* G */
+      VEC4_ADD(quadColor[2], source[2], blend_dest[2]); /* B */
       break;
    case PIPE_BLEND_SUBTRACT:
-      VEC4_SUB_SAT(quadColor[0], source[0], blend_dest[0]); /* R */
-      VEC4_SUB_SAT(quadColor[1], source[1], blend_dest[1]); /* G */
-      VEC4_SUB_SAT(quadColor[2], source[2], blend_dest[2]); /* B */
+      VEC4_SUB(quadColor[0], source[0], blend_dest[0]); /* R */
+      VEC4_SUB(quadColor[1], source[1], blend_dest[1]); /* G */
+      VEC4_SUB(quadColor[2], source[2], blend_dest[2]); /* B */
       break;
    case PIPE_BLEND_REVERSE_SUBTRACT:
-      VEC4_SUB_SAT(quadColor[0], blend_dest[0], source[0]); /* R */
-      VEC4_SUB_SAT(quadColor[1], blend_dest[1], source[1]); /* G */
-      VEC4_SUB_SAT(quadColor[2], blend_dest[2], source[2]); /* B */
+      VEC4_SUB(quadColor[0], blend_dest[0], source[0]); /* R */
+      VEC4_SUB(quadColor[1], blend_dest[1], source[1]); /* G */
+      VEC4_SUB(quadColor[2], blend_dest[2], source[2]); /* B */
       break;
    case PIPE_BLEND_MIN:
       VEC4_MIN(quadColor[0], source[0], blend_dest[0]); /* R */
@@ -735,13 +810,13 @@ blend_quad(struct quad_stage *qs,
     */
    switch (softpipe->blend->rt[blend_index].alpha_func) {
    case PIPE_BLEND_ADD:
-      VEC4_ADD_SAT(quadColor[3], source[3], blend_dest[3]); /* A */
+      VEC4_ADD(quadColor[3], source[3], blend_dest[3]); /* A */
       break;
    case PIPE_BLEND_SUBTRACT:
-      VEC4_SUB_SAT(quadColor[3], source[3], blend_dest[3]); /* A */
+      VEC4_SUB(quadColor[3], source[3], blend_dest[3]); /* A */
       break;
    case PIPE_BLEND_REVERSE_SUBTRACT:
-      VEC4_SUB_SAT(quadColor[3], blend_dest[3], source[3]); /* A */
+      VEC4_SUB(quadColor[3], blend_dest[3], source[3]); /* A */
       break;
    case PIPE_BLEND_MIN:
       VEC4_MIN(quadColor[3], source[3], blend_dest[3]); /* A */
@@ -785,7 +860,7 @@ clamp_colors(float (*quadColor)[4])
 {
    unsigned i, j;
 
-   for (j = 0; j < QUAD_SIZE; j++) {
+   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
       for (i = 0; i < 4; i++) {
          quadColor[i][j] = CLAMP(quadColor[i][j], 0.0F, 1.0F);
       }
@@ -839,8 +914,6 @@ rebase_colors(enum format base_format, float (*quadColor)[4])
    }
 }
 
-
-
 static void
 blend_fallback(struct quad_stage *qs, 
                struct quad_header *quads[],
@@ -858,13 +931,14 @@ blend_fallback(struct quad_stage *qs,
    {
       /* which blend/mask state index to use: */
       const uint blend_buf = blend->independent_blend_enable ? cbuf : 0;
-      float dest[4][QUAD_SIZE];
+      float dest[4][TGSI_QUAD_SIZE];
       struct softpipe_cached_tile *tile
          = sp_get_cached_tile(softpipe->cbuf_cache[cbuf],
                               quads[0]->input.x0, 
                               quads[0]->input.y0);
       const boolean clamp = bqs->clamp[cbuf];
       const float *blend_color;
+      const boolean dual_source_blend = util_blend_state_is_dual(blend, cbuf);
       uint q, i, j;
 
       if (clamp)
@@ -875,12 +949,13 @@ blend_fallback(struct quad_stage *qs,
       for (q = 0; q < nr; q++) {
          struct quad_header *quad = quads[q];
          float (*quadColor)[4];
-         float temp_quad_color[QUAD_SIZE][4];
+         float (*quadColor2)[4] = NULL;
+         float temp_quad_color[TGSI_QUAD_SIZE][4];
          const int itx = (quad->input.x0 & (TILE_SIZE-1));
          const int ity = (quad->input.y0 & (TILE_SIZE-1));
 
          if (write_all) {
-            for (j = 0; j < QUAD_SIZE; j++) {
+            for (j = 0; j < TGSI_QUAD_SIZE; j++) {
                for (i = 0; i < 4; i++) {
                   temp_quad_color[i][j] = quad->output.color[0][i][j];
                }
@@ -888,18 +963,20 @@ blend_fallback(struct quad_stage *qs,
             quadColor = temp_quad_color;
          } else {
             quadColor = quad->output.color[cbuf];
+           if (dual_source_blend)
+              quadColor2 = quad->output.color[cbuf + 1];
          }
 
          /* If fixed-point dest color buffer, need to clamp the incoming
           * fragment colors now.
           */
-         if (clamp) {
+         if (clamp || softpipe->rasterizer->clamp_fragment_color) {
             clamp_colors(quadColor);
          }
 
          /* get/swizzle dest colors
           */
-         for (j = 0; j < QUAD_SIZE; j++) {
+         for (j = 0; j < TGSI_QUAD_SIZE; j++) {
             int x = itx + (j & 1);
             int y = ity + (j >> 1);
             for (i = 0; i < 4; i++) {
@@ -909,10 +986,19 @@ blend_fallback(struct quad_stage *qs,
 
 
          if (blend->logicop_enable) {
-            logicop_quad( qs, quadColor, dest );
+            if (bqs->format_type[cbuf] != UTIL_FORMAT_TYPE_FLOAT) {
+               logicop_quad( qs, quadColor, dest );
+            }
          }
          else if (blend->rt[blend_buf].blend_enable) {
-            blend_quad(qs, quadColor, dest, blend_color, blend_buf);
+            blend_quad(qs, quadColor, quadColor2, dest, blend_color, blend_buf);
+
+            /* If fixed-point dest color buffer, need to clamp the outgoing
+             * fragment colors now.
+             */
+            if (clamp) {
+               clamp_colors(quadColor);
+            }
          }
 
          rebase_colors(bqs->base_format[cbuf], quadColor);
@@ -922,7 +1008,7 @@ blend_fallback(struct quad_stage *qs,
    
          /* Output color values
           */
-         for (j = 0; j < QUAD_SIZE; j++) {
+         for (j = 0; j < TGSI_QUAD_SIZE; j++) {
             if (quad->inout.mask & (1 << j)) {
                int x = itx + (j & 1);
                int y = ity + (j >> 1);
@@ -943,9 +1029,9 @@ blend_single_add_src_alpha_inv_src_alpha(struct quad_stage *qs,
 {
    const struct blend_quad_stage *bqs = blend_quad_stage(qs);
    static const float one[4] = { 1, 1, 1, 1 };
-   float one_minus_alpha[QUAD_SIZE];
-   float dest[4][QUAD_SIZE];
-   float source[4][QUAD_SIZE];
+   float one_minus_alpha[TGSI_QUAD_SIZE];
+   float dest[4][TGSI_QUAD_SIZE];
+   float source[4][TGSI_QUAD_SIZE];
    uint i, j, q;
 
    struct softpipe_cached_tile *tile
@@ -961,7 +1047,7 @@ blend_single_add_src_alpha_inv_src_alpha(struct quad_stage *qs,
       const int ity = (quad->input.y0 & (TILE_SIZE-1));
       
       /* get/swizzle dest colors */
-      for (j = 0; j < QUAD_SIZE; j++) {
+      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
          int x = itx + (j & 1);
          int y = ity + (j >> 1);
          for (i = 0; i < 4; i++) {
@@ -969,6 +1055,13 @@ blend_single_add_src_alpha_inv_src_alpha(struct quad_stage *qs,
          }
       }
 
+      /* If fixed-point dest color buffer, need to clamp the incoming
+       * fragment colors now.
+       */
+      if (bqs->clamp[0] || qs->softpipe->rasterizer->clamp_fragment_color) {
+         clamp_colors(quadColor);
+      }
+
       VEC4_MUL(source[0], quadColor[0], alpha); /* R */
       VEC4_MUL(source[1], quadColor[1], alpha); /* G */
       VEC4_MUL(source[2], quadColor[2], alpha); /* B */
@@ -978,16 +1071,23 @@ blend_single_add_src_alpha_inv_src_alpha(struct quad_stage *qs,
       VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */
       VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */
       VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */
-      VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* B */
+      VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */
 
-      VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */
-      VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */
-      VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */
-      VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */
+      VEC4_ADD(quadColor[0], source[0], dest[0]); /* R */
+      VEC4_ADD(quadColor[1], source[1], dest[1]); /* G */
+      VEC4_ADD(quadColor[2], source[2], dest[2]); /* B */
+      VEC4_ADD(quadColor[3], source[3], dest[3]); /* A */
+
+      /* If fixed-point dest color buffer, need to clamp the outgoing
+       * fragment colors now.
+       */
+      if (bqs->clamp[0]) {
+         clamp_colors(quadColor);
+      }
 
       rebase_colors(bqs->base_format[0], quadColor);
 
-      for (j = 0; j < QUAD_SIZE; j++) {
+      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
          if (quad->inout.mask & (1 << j)) {
             int x = itx + (j & 1);
             int y = ity + (j >> 1);
@@ -1005,7 +1105,7 @@ blend_single_add_one_one(struct quad_stage *qs,
                          unsigned nr)
 {
    const struct blend_quad_stage *bqs = blend_quad_stage(qs);
-   float dest[4][QUAD_SIZE];
+   float dest[4][TGSI_QUAD_SIZE];
    uint i, j, q;
 
    struct softpipe_cached_tile *tile
@@ -1020,7 +1120,7 @@ blend_single_add_one_one(struct quad_stage *qs,
       const int ity = (quad->input.y0 & (TILE_SIZE-1));
       
       /* get/swizzle dest colors */
-      for (j = 0; j < QUAD_SIZE; j++) {
+      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
          int x = itx + (j & 1);
          int y = ity + (j >> 1);
          for (i = 0; i < 4; i++) {
@@ -1031,18 +1131,25 @@ blend_single_add_one_one(struct quad_stage *qs,
       /* If fixed-point dest color buffer, need to clamp the incoming
        * fragment colors now.
        */
-      if (bqs->clamp[0]) {
+      if (bqs->clamp[0] || qs->softpipe->rasterizer->clamp_fragment_color) {
          clamp_colors(quadColor);
       }
 
-      VEC4_ADD_SAT(quadColor[0], quadColor[0], dest[0]); /* R */
-      VEC4_ADD_SAT(quadColor[1], quadColor[1], dest[1]); /* G */
-      VEC4_ADD_SAT(quadColor[2], quadColor[2], dest[2]); /* B */
-      VEC4_ADD_SAT(quadColor[3], quadColor[3], dest[3]); /* A */
+      VEC4_ADD(quadColor[0], quadColor[0], dest[0]); /* R */
+      VEC4_ADD(quadColor[1], quadColor[1], dest[1]); /* G */
+      VEC4_ADD(quadColor[2], quadColor[2], dest[2]); /* B */
+      VEC4_ADD(quadColor[3], quadColor[3], dest[3]); /* A */
+
+      /* If fixed-point dest color buffer, need to clamp the outgoing
+       * fragment colors now.
+       */
+      if (bqs->clamp[0]) {
+         clamp_colors(quadColor);
+      }
 
       rebase_colors(bqs->base_format[0], quadColor);
 
-      for (j = 0; j < QUAD_SIZE; j++) {
+      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
          if (quad->inout.mask & (1 << j)) {
             int x = itx + (j & 1);
             int y = ity + (j >> 1);
@@ -1080,9 +1187,12 @@ single_output_color(struct quad_stage *qs,
       const int itx = (quad->input.x0 & (TILE_SIZE-1));
       const int ity = (quad->input.y0 & (TILE_SIZE-1));
 
+      if (qs->softpipe->rasterizer->clamp_fragment_color)
+         clamp_colors(quadColor);
+
       rebase_colors(bqs->base_format[0], quadColor);
 
-      for (j = 0; j < QUAD_SIZE; j++) {
+      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
          if (quad->inout.mask & (1 << j)) {
             int x = itx + (j & 1);
             int y = ity + (j >> 1);
@@ -1150,6 +1260,7 @@ choose_blend_quad(struct quad_stage *qs,
          util_format_description(format);
       /* assuming all or no color channels are normalized: */
       bqs->clamp[i] = desc->channel[0].normalized;
+      bqs->format_type[i] = desc->channel[0].type;
 
       if (util_format_is_intensity(format))
          bqs->base_format[i] = INTENSITY;