gallium: begin reworking quad stages for multiple color outputs
authorBrian <brian.paul@tungstengraphics.com>
Tue, 8 Apr 2008 03:59:12 +0000 (21:59 -0600)
committerBrian <brian.paul@tungstengraphics.com>
Tue, 8 Apr 2008 03:59:12 +0000 (21:59 -0600)
src/gallium/drivers/softpipe/sp_context.h
src/gallium/drivers/softpipe/sp_fs_llvm.c
src/gallium/drivers/softpipe/sp_headers.h
src/gallium/drivers/softpipe/sp_quad.c
src/gallium/drivers/softpipe/sp_quad_alpha_test.c
src/gallium/drivers/softpipe/sp_quad_blend.c
src/gallium/drivers/softpipe/sp_quad_bufloop.c
src/gallium/drivers/softpipe/sp_quad_colormask.c
src/gallium/drivers/softpipe/sp_quad_coverage.c
src/gallium/drivers/softpipe/sp_quad_fs.c
src/gallium/drivers/softpipe/sp_quad_output.c

index dc9d0e6d5d660303cc529d6b7c66ab6518a03032..9724d185d1585d9912eec357584433ae16003030 100644 (file)
@@ -138,8 +138,6 @@ struct softpipe_context {
    struct draw_stage *vbuf;
    struct softpipe_vbuf_render *vbuf_render;
 
-   uint current_cbuf;      /**< current color buffer being written to */
-
    struct softpipe_tile_cache *cbuf_cache[PIPE_MAX_COLOR_BUFS];
    struct softpipe_tile_cache *zsbuf_cache;
 
index 07d058155f22939363a02a532ec65eb5a5002d3f..6e1d9280bb43fe130ea8d4a3fb651eabc094ca4f 100644 (file)
@@ -99,19 +99,19 @@ shade_quad_llvm(struct quad_stage *qs,
       allvmrt(qss->stage.softpipe->fs->info.output_semantic_name[qss->colorOutSlot]
              == TGSI_SEMANTIC_COLOR);
       for (i = 0; i < QUAD_SIZE; ++i) {
-         quad->outputs.color[0][i] = dests[i][qss->colorOutSlot][0];
-         quad->outputs.color[1][i] = dests[i][qss->colorOutSlot][1];
-         quad->outputs.color[2][i] = dests[i][qss->colorOutSlot][2];
-         quad->outputs.color[3][i] = dests[i][qss->colorOutSlot][3];
+         quad->outputs.color[0][0][i] = dests[i][qss->colorOutSlot][0];
+         quad->outputs.color[0][1][i] = dests[i][qss->colorOutSlot][1];
+         quad->outputs.color[0][2][i] = dests[i][qss->colorOutSlot][2];
+         quad->outputs.color[0][3][i] = dests[i][qss->colorOutSlot][3];
       }
    }
 #if DLLVM
    for (int i = 0; i < QUAD_SIZE; ++i) {
       debug_printf("QLLVM%d(%d) [%f, %f, %f, %f]\n", i, qss->colorOutSlot,
-             quad->outputs.color[0][i],
-             quad->outputs.color[1][i],
-             quad->outputs.color[2][i],
-             quad->outputs.color[3][i]);
+             quad->outputs.color[0][0][i],
+             quad->outputs.color[0][1][i],
+             quad->outputs.color[0][2][i],
+             quad->outputs.color[0][3][i]);
    }
 #endif
 
index 9cf822213336070481078adfcb7438e7956511fd..3d9ede69bbddf545c3147d85269125bdd8e6f21f 100644 (file)
@@ -31,6 +31,7 @@
 #ifndef SP_HEADERS_H
 #define SP_HEADERS_H
 
+#include "pipe/p_state.h"
 #include "tgsi/exec/tgsi_exec.h"
 
 #define PRIM_POINT 1
@@ -66,7 +67,8 @@ struct quad_header {
    unsigned prim:2;     /**< PRIM_POINT, LINE, TRI */
 
    struct {
-      float color[NUM_CHANNELS][QUAD_SIZE];    /* rrrr, gggg, bbbb, aaaa */
+      /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */
+      float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE];
       float depth[QUAD_SIZE];
    } outputs;
 
index 8603c1a3677d984a89d4a290623f609b2f9eb966..bc83d78ea166dd3bbe1773340e2ef45fdfe5d7b1 100644 (file)
@@ -76,15 +76,6 @@ sp_build_quad_pipeline(struct softpipe_context *sp)
       sp_push_quad_first( sp, sp->quad.blend );
    }
 
-   if (sp->framebuffer.num_cbufs == 1) {
-      /* the usual case: write to exactly one colorbuf */
-      sp->current_cbuf = 0;
-   }
-   else {
-      /* insert bufloop stage */
-      sp_push_quad_first( sp, sp->quad.bufloop );
-   }
-
    if (sp->depth_stencil->depth.occlusion_count) {
       sp_push_quad_first( sp, sp->quad.occlusion );
    }
index 318916fe3018a1aebf058d15e1375118bb63f17e..7a42b08ef53f0895b8ae756dc3109698ba0bede1 100644 (file)
@@ -16,7 +16,8 @@ alpha_test_quad(struct quad_stage *qs, struct quad_header *quad)
    struct softpipe_context *softpipe = qs->softpipe;
    const float ref = softpipe->depth_stencil->alpha.ref;
    unsigned passMask = 0x0, j;
-   const float *aaaa = quad->outputs.color[3];
+   const uint cbuf = 0; /* only output[0].alpha is tested */
+   const float *aaaa = quad->outputs.color[cbuf][3];
 
    switch (softpipe->depth_stencil->alpha.func) {
    case PIPE_FUNC_NEVER:
@@ -25,7 +26,7 @@ alpha_test_quad(struct quad_stage *qs, struct quad_header *quad)
    case PIPE_FUNC_LESS:
       /*
        * If mask were an array [4] we could do this SIMD-style:
-       * passMask = (quad->outputs.color[3] <= vec4(ref));
+       * passMask = (quad->outputs.color[0][3] <= vec4(ref));
        */
       for (j = 0; j < QUAD_SIZE; j++) {
          if (aaaa[j] < ref) {
index 0b79cfa1ed2b58930767cc93863199e49818d7f8..8be8025f40cbc58234351fb9aa2c0b88540dfad9 100644 (file)
@@ -101,114 +101,119 @@ static void
 logicop_quad(struct quad_stage *qs, struct quad_header *quad)
 {
    struct softpipe_context *softpipe = qs->softpipe;
-   float dest[4][QUAD_SIZE];
-   ubyte src[4][4], dst[4][4], res[4][4];
-   uint *src4 = (uint *) src;
-   uint *dst4 = (uint *) dst;
-   uint *res4 = (uint *) res;
-   struct softpipe_cached_tile *
-      tile = sp_get_cached_tile(softpipe,
-                                softpipe->cbuf_cache[softpipe->current_cbuf],
-                                quad->x0, quad->y0);
-   float (*quadColor)[4] = quad->outputs.color;
-   uint i, j;
-
-   /* get/swizzle dest colors */
-   for (j = 0; j < QUAD_SIZE; j++) {
-      int x = (quad->x0 & (TILE_SIZE-1)) + (j & 1);
-      int y = (quad->y0 & (TILE_SIZE-1)) + (j >> 1);
-      for (i = 0; i < 4; i++) {
-         dest[i][j] = tile->data.color[y][x][i];
+   uint cbuf;
+
+   /* loop over colorbuffer outputs */
+   for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) {
+      float dest[4][QUAD_SIZE];
+      ubyte src[4][4], dst[4][4], res[4][4];
+      uint *src4 = (uint *) src;
+      uint *dst4 = (uint *) dst;
+      uint *res4 = (uint *) res;
+      struct softpipe_cached_tile *
+         tile = sp_get_cached_tile(softpipe,
+                                   softpipe->cbuf_cache[cbuf],
+                                   quad->x0, quad->y0);
+      float (*quadColor)[4] = quad->outputs.color[cbuf];
+      uint i, j;
+
+      /* get/swizzle dest colors */
+      for (j = 0; j < QUAD_SIZE; j++) {
+         int x = (quad->x0 & (TILE_SIZE-1)) + (j & 1);
+         int y = (quad->y0 & (TILE_SIZE-1)) + (j >> 1);
+         for (i = 0; i < 4; i++) {
+            dest[i][j] = tile->data.color[y][x][i];
+         }
       }
-   }
 
-   /* convert to ubyte */
-   for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */
-      UNCLAMPED_FLOAT_TO_UBYTE(dst[j][0], dest[j][0]); /* P0 */
-      UNCLAMPED_FLOAT_TO_UBYTE(dst[j][1], dest[j][1]); /* P1 */
-      UNCLAMPED_FLOAT_TO_UBYTE(dst[j][2], dest[j][2]); /* P2 */
-      UNCLAMPED_FLOAT_TO_UBYTE(dst[j][3], dest[j][3]); /* P3 */
-
-      UNCLAMPED_FLOAT_TO_UBYTE(src[j][0], quadColor[j][0]); /* P0 */
-      UNCLAMPED_FLOAT_TO_UBYTE(src[j][1], quadColor[j][1]); /* P1 */
-      UNCLAMPED_FLOAT_TO_UBYTE(src[j][2], quadColor[j][2]); /* P2 */
-      UNCLAMPED_FLOAT_TO_UBYTE(src[j][3], quadColor[j][3]); /* P3 */
-   }
+      /* convert to ubyte */
+      for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */
+         UNCLAMPED_FLOAT_TO_UBYTE(dst[j][0], dest[j][0]); /* P0 */
+         UNCLAMPED_FLOAT_TO_UBYTE(dst[j][1], dest[j][1]); /* P1 */
+         UNCLAMPED_FLOAT_TO_UBYTE(dst[j][2], dest[j][2]); /* P2 */
+         UNCLAMPED_FLOAT_TO_UBYTE(dst[j][3], dest[j][3]); /* P3 */
+
+         UNCLAMPED_FLOAT_TO_UBYTE(src[j][0], quadColor[j][0]); /* P0 */
+         UNCLAMPED_FLOAT_TO_UBYTE(src[j][1], quadColor[j][1]); /* P1 */
+         UNCLAMPED_FLOAT_TO_UBYTE(src[j][2], quadColor[j][2]); /* P2 */
+         UNCLAMPED_FLOAT_TO_UBYTE(src[j][3], quadColor[j][3]); /* P3 */
+      }
 
-   switch (softpipe->blend->logicop_func) {
-   case PIPE_LOGICOP_CLEAR:
-      for (j = 0; j < 4; j++)
-         res4[j] = 0;
-      break;
-   case PIPE_LOGICOP_NOR:
-      for (j = 0; j < 4; j++)
-         res4[j] = ~(src4[j] | dst4[j]);
-      break;
-   case PIPE_LOGICOP_AND_INVERTED:
-      for (j = 0; j < 4; j++)
-         res4[j] = ~src4[j] & dst4[j];
-      break;
-   case PIPE_LOGICOP_COPY_INVERTED:
-      for (j = 0; j < 4; j++)
-         res4[j] = ~src4[j];
-      break;
-   case PIPE_LOGICOP_AND_REVERSE:
-      for (j = 0; j < 4; j++)
-         res4[j] = src4[j] & ~dst4[j];
-      break;
-   case PIPE_LOGICOP_INVERT:
-      for (j = 0; j < 4; j++)
-         res4[j] = ~dst4[j];
-      break;
-   case PIPE_LOGICOP_XOR:
-      for (j = 0; j < 4; j++)
-         res4[j] = dst4[j] ^ src4[j];
-      break;
-   case PIPE_LOGICOP_NAND:
-      for (j = 0; j < 4; j++)
-         res4[j] = ~(src4[j] & dst4[j]);
-      break;
-   case PIPE_LOGICOP_AND:
-      for (j = 0; j < 4; j++)
-         res4[j] = src4[j] & dst4[j];
-      break;
-   case PIPE_LOGICOP_EQUIV:
-      for (j = 0; j < 4; j++)
-         res4[j] = ~(src4[j] ^ dst4[j]);
-      break;
-   case PIPE_LOGICOP_NOOP:
-      for (j = 0; j < 4; j++)
-         res4[j] = dst4[j];
-      break;
-   case PIPE_LOGICOP_OR_INVERTED:
-      for (j = 0; j < 4; j++)
-         res4[j] = ~src4[j] | dst4[j];
-      break;
-   case PIPE_LOGICOP_COPY:
-      for (j = 0; j < 4; j++)
-         res4[j] = src4[j];
-      break;
-   case PIPE_LOGICOP_OR_REVERSE:
-      for (j = 0; j < 4; j++)
-         res4[j] = src4[j] | ~dst4[j];
-      break;
-   case PIPE_LOGICOP_OR:
-      for (j = 0; j < 4; j++)
-         res4[j] = src4[j] | dst4[j];
-      break;
-   case PIPE_LOGICOP_SET:
-      for (j = 0; j < 4; j++)
-         res4[j] = ~0;
-      break;
-   default:
-      assert(0);
-   }
+      switch (softpipe->blend->logicop_func) {
+      case PIPE_LOGICOP_CLEAR:
+         for (j = 0; j < 4; j++)
+            res4[j] = 0;
+         break;
+      case PIPE_LOGICOP_NOR:
+         for (j = 0; j < 4; j++)
+            res4[j] = ~(src4[j] | dst4[j]);
+         break;
+      case PIPE_LOGICOP_AND_INVERTED:
+         for (j = 0; j < 4; j++)
+            res4[j] = ~src4[j] & dst4[j];
+         break;
+      case PIPE_LOGICOP_COPY_INVERTED:
+         for (j = 0; j < 4; j++)
+            res4[j] = ~src4[j];
+         break;
+      case PIPE_LOGICOP_AND_REVERSE:
+         for (j = 0; j < 4; j++)
+            res4[j] = src4[j] & ~dst4[j];
+         break;
+      case PIPE_LOGICOP_INVERT:
+         for (j = 0; j < 4; j++)
+            res4[j] = ~dst4[j];
+         break;
+      case PIPE_LOGICOP_XOR:
+         for (j = 0; j < 4; j++)
+            res4[j] = dst4[j] ^ src4[j];
+         break;
+      case PIPE_LOGICOP_NAND:
+         for (j = 0; j < 4; j++)
+            res4[j] = ~(src4[j] & dst4[j]);
+         break;
+      case PIPE_LOGICOP_AND:
+         for (j = 0; j < 4; j++)
+            res4[j] = src4[j] & dst4[j];
+         break;
+      case PIPE_LOGICOP_EQUIV:
+         for (j = 0; j < 4; j++)
+            res4[j] = ~(src4[j] ^ dst4[j]);
+         break;
+      case PIPE_LOGICOP_NOOP:
+         for (j = 0; j < 4; j++)
+            res4[j] = dst4[j];
+         break;
+      case PIPE_LOGICOP_OR_INVERTED:
+         for (j = 0; j < 4; j++)
+            res4[j] = ~src4[j] | dst4[j];
+         break;
+      case PIPE_LOGICOP_COPY:
+         for (j = 0; j < 4; j++)
+            res4[j] = src4[j];
+         break;
+      case PIPE_LOGICOP_OR_REVERSE:
+         for (j = 0; j < 4; j++)
+            res4[j] = src4[j] | ~dst4[j];
+         break;
+      case PIPE_LOGICOP_OR:
+         for (j = 0; j < 4; j++)
+            res4[j] = src4[j] | dst4[j];
+         break;
+      case PIPE_LOGICOP_SET:
+         for (j = 0; j < 4; j++)
+            res4[j] = ~0;
+         break;
+      default:
+         assert(0);
+      }
 
-   for (j = 0; j < 4; j++) {
-      quadColor[j][0] = UBYTE_TO_FLOAT(res[j][0]);
-      quadColor[j][1] = UBYTE_TO_FLOAT(res[j][1]);
-      quadColor[j][2] = UBYTE_TO_FLOAT(res[j][2]);
-      quadColor[j][3] = UBYTE_TO_FLOAT(res[j][3]);
+      for (j = 0; j < 4; j++) {
+         quadColor[j][0] = UBYTE_TO_FLOAT(res[j][0]);
+         quadColor[j][1] = UBYTE_TO_FLOAT(res[j][1]);
+         quadColor[j][2] = UBYTE_TO_FLOAT(res[j][2]);
+         quadColor[j][3] = UBYTE_TO_FLOAT(res[j][3]);
+      }
    }
 
    /* pass quad to next stage */
@@ -221,504 +226,511 @@ logicop_quad(struct quad_stage *qs, struct quad_header *quad)
 static void
 blend_quad(struct quad_stage *qs, struct quad_header *quad)
 {
-   struct softpipe_context *softpipe = qs->softpipe;
    static const float zero[4] = { 0, 0, 0, 0 };
    static const float one[4] = { 1, 1, 1, 1 };
-   float source[4][QUAD_SIZE], dest[4][QUAD_SIZE];
-   struct softpipe_cached_tile *tile
-      = sp_get_cached_tile(softpipe,
-                           softpipe->cbuf_cache[softpipe->current_cbuf],
-                           quad->x0, quad->y0);
-   float (*quadColor)[4] = quad->outputs.color;
-   uint i, j;
-
-   if (softpipe->blend->logicop_enable) {
-      logicop_quad(qs, quad);
-      return;
-   }
 
-   /* get/swizzle dest colors */
-   for (j = 0; j < QUAD_SIZE; j++) {
-      int x = (quad->x0 & (TILE_SIZE-1)) + (j & 1);
-      int y = (quad->y0 & (TILE_SIZE-1)) + (j >> 1);
-      for (i = 0; i < 4; i++) {
-         dest[i][j] = tile->data.color[y][x][i];
+   struct softpipe_context *softpipe = qs->softpipe;
+   uint cbuf;
+
+   /* loop over colorbuffer outputs */
+   for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) {
+      float source[4][QUAD_SIZE], dest[4][QUAD_SIZE];
+      struct softpipe_cached_tile *tile
+         = sp_get_cached_tile(softpipe,
+                              softpipe->cbuf_cache[cbuf],
+                              quad->x0, quad->y0);
+      float (*quadColor)[4] = quad->outputs.color[cbuf];
+      uint i, j;
+
+      if (softpipe->blend->logicop_enable) {
+         logicop_quad(qs, quad);
+         return;
       }
-   }
 
-   /*
-    * Compute src/first term RGB
-    */
-   switch (softpipe->blend->rgb_src_factor) {
-   case PIPE_BLENDFACTOR_ONE:
-      VEC4_COPY(source[0], quadColor[0]); /* R */
-      VEC4_COPY(source[1], quadColor[1]); /* G */
-      VEC4_COPY(source[2], quadColor[2]); /* B */
-      break;
-   case PIPE_BLENDFACTOR_SRC_COLOR:
-      VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */
-      VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */
-      VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */
-      break;
-   case PIPE_BLENDFACTOR_SRC_ALPHA:
-      {
-         const float *alpha = quadColor[3];
-         VEC4_MUL(source[0], quadColor[0], alpha); /* R */
-         VEC4_MUL(source[1], quadColor[1], alpha); /* G */
-         VEC4_MUL(source[2], quadColor[2], alpha); /* B */
-      }
-      break;
-   case PIPE_BLENDFACTOR_DST_COLOR:
-      VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */
-      VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */
-      VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */
-      break;
-   case PIPE_BLENDFACTOR_DST_ALPHA:
-      {
-         const float *alpha = dest[3];
-         VEC4_MUL(source[0], quadColor[0], alpha); /* R */
-         VEC4_MUL(source[1], quadColor[1], alpha); /* G */
-         VEC4_MUL(source[2], quadColor[2], alpha); /* B */
-      }
-      break;
-   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
-      {
-         const float *alpha = quadColor[3];
-         float diff[4];
-         VEC4_SUB(diff, one, dest[3]);
-         VEC4_MIN(source[0], alpha, diff); /* R */
-         VEC4_MIN(source[1], alpha, diff); /* G */
-         VEC4_MIN(source[2], alpha, diff); /* B */
-      }
-      break;
-   case PIPE_BLENDFACTOR_CONST_COLOR:
-      {
-         float comp[4];
-         VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */
-         VEC4_MUL(source[0], quadColor[0], comp); /* R */
-         VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */
-         VEC4_MUL(source[1], quadColor[1], comp); /* G */
-         VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */
-         VEC4_MUL(source[2], quadColor[2], comp); /* B */
-      }
-      break;
-   case PIPE_BLENDFACTOR_CONST_ALPHA:
-      {
-         float alpha[4];
-         VEC4_SCALAR(alpha, softpipe->blend_color.color[3]);
-         VEC4_MUL(source[0], quadColor[0], alpha); /* R */
-         VEC4_MUL(source[1], quadColor[1], alpha); /* G */
-         VEC4_MUL(source[2], quadColor[2], alpha); /* B */
-      }
-      break;
-   case PIPE_BLENDFACTOR_SRC1_COLOR:
-      assert(0); /* to do */
-      break;
-   case PIPE_BLENDFACTOR_SRC1_ALPHA:
-      assert(0); /* to do */
-      break;
-   case PIPE_BLENDFACTOR_ZERO:
-      VEC4_COPY(source[0], zero); /* R */
-      VEC4_COPY(source[1], zero); /* G */
-      VEC4_COPY(source[2], zero); /* B */
-      break;
-   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
-      {
-         float inv_comp[4];
-         VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
-         VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
-         VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
-         VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
-         VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
-         VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
-      }
-      break;
-   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
-      {
-         float inv_alpha[4];
-         VEC4_SUB(inv_alpha, one, quadColor[3]);
-         VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
-         VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
-         VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
-      }
-      break;
-   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
-      {
-         float inv_alpha[4];
-         VEC4_SUB(inv_alpha, one, dest[3]);
-         VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
-         VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
-         VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
-      }
-      break;
-   case PIPE_BLENDFACTOR_INV_DST_COLOR:
-      {
-         float inv_comp[4];
-         VEC4_SUB(inv_comp, one, dest[0]); /* R */
-         VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
-         VEC4_SUB(inv_comp, one, dest[1]); /* G */
-         VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
-         VEC4_SUB(inv_comp, one, dest[2]); /* B */
-         VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
-      }
-      break;
-   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
-      {
-         float inv_comp[4];
-         /* R */
-         VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]);
-         VEC4_MUL(source[0], quadColor[0], inv_comp);
-         /* G */
-         VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]);
-         VEC4_MUL(source[1], quadColor[1], inv_comp);
-         /* B */
-         VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]);
-         VEC4_MUL(source[2], quadColor[2], inv_comp);
-      }
-      break;
-   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
-      {
-         float inv_alpha[4];
-         VEC4_SCALAR(inv_alpha, 1.0f - softpipe->blend_color.color[3]);
-         VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
-         VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
-         VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
-      }
-      break;
-   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
-      assert(0); /* to do */
-      break;
-   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
-      assert(0); /* to do */
-      break;
-   default:
-      assert(0);
-   }
-   
-   /*
-    * Compute src/first term A
-    */
-   switch (softpipe->blend->alpha_src_factor) {
-   case PIPE_BLENDFACTOR_ONE:
-      VEC4_COPY(source[3], quadColor[3]); /* A */
-      break;
-   case PIPE_BLENDFACTOR_SRC_COLOR:
-      /* fall-through */
-   case PIPE_BLENDFACTOR_SRC_ALPHA:
-      {
-         const float *alpha = quadColor[3];
-         VEC4_MUL(source[3], quadColor[3], alpha); /* A */
-      }
-      break;
-   case PIPE_BLENDFACTOR_DST_COLOR:
-      /* fall-through */
-   case PIPE_BLENDFACTOR_DST_ALPHA:
-      VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */
-      break;
-   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
-      {
-         const float *alpha = quadColor[3];
-         float diff[4];
-         VEC4_SUB(diff, one, dest[3]);
-         VEC4_MIN(source[3], alpha, diff); /* A */
-      }
-      break;
-   case PIPE_BLENDFACTOR_CONST_COLOR:
-      /* fall-through */
-   case PIPE_BLENDFACTOR_CONST_ALPHA:
-      {
-         float comp[4];
-         VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */
-         VEC4_MUL(source[3], quadColor[3], comp); /* A */
+      /* get/swizzle dest colors */
+      for (j = 0; j < QUAD_SIZE; j++) {
+         int x = (quad->x0 & (TILE_SIZE-1)) + (j & 1);
+         int y = (quad->y0 & (TILE_SIZE-1)) + (j >> 1);
+         for (i = 0; i < 4; i++) {
+            dest[i][j] = tile->data.color[y][x][i];
+         }
       }
-      break;
-   case PIPE_BLENDFACTOR_ZERO:
-      VEC4_COPY(source[3], zero); /* A */
-      break;
-   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
-      /* fall-through */
-   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
-      {
-         float inv_alpha[4];
-         VEC4_SUB(inv_alpha, one, quadColor[3]);
-         VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
-      }
-      break;
-   case PIPE_BLENDFACTOR_INV_DST_COLOR:
-      /* fall-through */
-   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
-      {
-         float inv_alpha[4];
-         VEC4_SUB(inv_alpha, one, dest[3]);
-         VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
-      }
-      break;
-   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
-      /* fall-through */
-   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
-      {
-         float inv_comp[4];
-         /* A */
-         VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]);
-         VEC4_MUL(source[3], quadColor[3], inv_comp);
-      }
-      break;
-   default:
-      assert(0);
-   }
-   
-   
-   /*
-    * Compute dest/second term RGB
-    */
-   switch (softpipe->blend->rgb_dst_factor) {
-   case PIPE_BLENDFACTOR_ONE:
-      /* dest = dest * 1   NO-OP, leave dest as-is */
-      break;
-   case PIPE_BLENDFACTOR_SRC_COLOR:
-      VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */
-      VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */
-      VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */
-      break;
-   case PIPE_BLENDFACTOR_SRC_ALPHA:
-      VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */
-      VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */
-      VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */
-      break;
-   case PIPE_BLENDFACTOR_DST_ALPHA:
-      VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */
-      VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */
-      VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */
-      break;
-   case PIPE_BLENDFACTOR_DST_COLOR:
-      VEC4_MUL(dest[0], dest[0], dest[0]); /* R */
-      VEC4_MUL(dest[1], dest[1], dest[1]); /* G */
-      VEC4_MUL(dest[2], dest[2], dest[2]); /* B */
-      break;
-   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
-      assert(0); /* illegal */
-      break;
-   case PIPE_BLENDFACTOR_CONST_COLOR:
-      {
-         float comp[4];
-         VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */
-         VEC4_MUL(dest[0], dest[0], comp); /* R */
-         VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */
-         VEC4_MUL(dest[1], dest[1], comp); /* G */
-         VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */
-         VEC4_MUL(dest[2], dest[2], comp); /* B */
-      }
-      break;
-   case PIPE_BLENDFACTOR_CONST_ALPHA:
-      {
-         float comp[4];
-         VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */
-         VEC4_MUL(dest[0], dest[0], comp); /* R */
-         VEC4_MUL(dest[1], dest[1], comp); /* G */
-         VEC4_MUL(dest[2], dest[2], comp); /* B */
-      }
-      break;
-   case PIPE_BLENDFACTOR_ZERO:
-      VEC4_COPY(dest[0], zero); /* R */
-      VEC4_COPY(dest[1], zero); /* G */
-      VEC4_COPY(dest[2], zero); /* B */
-      break;
-   case PIPE_BLENDFACTOR_SRC1_COLOR:
-   case PIPE_BLENDFACTOR_SRC1_ALPHA:
-      /* XXX what are these? */
-      assert(0);
-      break;
-   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
-      {
-         float inv_comp[4];
-         VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
-         VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */
-         VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
-         VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */
-         VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
-         VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */
-      }
-      break;
-   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
-      {
-         float one_minus_alpha[QUAD_SIZE];
-         VEC4_SUB(one_minus_alpha, one, quadColor[3]);
-         VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */
-         VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */
-         VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */
-      }
-      break;
-   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
-      {
-         float inv_comp[4];
-         VEC4_SUB(inv_comp, one, quadColor[3]); /* A */
-         VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */
-         VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */
-         VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */
-      }
-      break;
-   case PIPE_BLENDFACTOR_INV_DST_COLOR:
-      {
-         float inv_comp[4];
-         VEC4_SUB(inv_comp, one, dest[0]); /* R */
-         VEC4_MUL(dest[0], dest[0], inv_comp); /* R */
-         VEC4_SUB(inv_comp, one, dest[1]); /* G */
-         VEC4_MUL(dest[1], dest[1], inv_comp); /* G */
-         VEC4_SUB(inv_comp, one, dest[2]); /* B */
-         VEC4_MUL(dest[2], dest[2], inv_comp); /* B */
-      }
-      break;
-   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
-      {
-         float inv_comp[4];
-         /* R */
-         VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]);
-         VEC4_MUL(dest[0], dest[0], inv_comp);
-         /* G */
-         VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]);
-         VEC4_MUL(dest[1], dest[1], inv_comp);
-         /* B */
-         VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]);
-         VEC4_MUL(dest[2], dest[2], inv_comp);
+
+      /*
+       * Compute src/first term RGB
+       */
+      switch (softpipe->blend->rgb_src_factor) {
+      case PIPE_BLENDFACTOR_ONE:
+         VEC4_COPY(source[0], quadColor[0]); /* R */
+         VEC4_COPY(source[1], quadColor[1]); /* G */
+         VEC4_COPY(source[2], quadColor[2]); /* B */
+         break;
+      case PIPE_BLENDFACTOR_SRC_COLOR:
+         VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */
+         VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */
+         VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */
+         break;
+      case PIPE_BLENDFACTOR_SRC_ALPHA:
+         {
+            const float *alpha = quadColor[3];
+            VEC4_MUL(source[0], quadColor[0], alpha); /* R */
+            VEC4_MUL(source[1], quadColor[1], alpha); /* G */
+            VEC4_MUL(source[2], quadColor[2], alpha); /* B */
+         }
+         break;
+      case PIPE_BLENDFACTOR_DST_COLOR:
+         VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */
+         VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */
+         VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */
+         break;
+      case PIPE_BLENDFACTOR_DST_ALPHA:
+         {
+            const float *alpha = dest[3];
+            VEC4_MUL(source[0], quadColor[0], alpha); /* R */
+            VEC4_MUL(source[1], quadColor[1], alpha); /* G */
+            VEC4_MUL(source[2], quadColor[2], alpha); /* B */
+         }
+         break;
+      case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+         {
+            const float *alpha = quadColor[3];
+            float diff[4];
+            VEC4_SUB(diff, one, dest[3]);
+            VEC4_MIN(source[0], alpha, diff); /* R */
+            VEC4_MIN(source[1], alpha, diff); /* G */
+            VEC4_MIN(source[2], alpha, diff); /* B */
+         }
+         break;
+      case PIPE_BLENDFACTOR_CONST_COLOR:
+         {
+            float comp[4];
+            VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */
+            VEC4_MUL(source[0], quadColor[0], comp); /* R */
+            VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */
+            VEC4_MUL(source[1], quadColor[1], comp); /* G */
+            VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */
+            VEC4_MUL(source[2], quadColor[2], comp); /* B */
+         }
+         break;
+      case PIPE_BLENDFACTOR_CONST_ALPHA:
+         {
+            float alpha[4];
+            VEC4_SCALAR(alpha, softpipe->blend_color.color[3]);
+            VEC4_MUL(source[0], quadColor[0], alpha); /* R */
+            VEC4_MUL(source[1], quadColor[1], alpha); /* G */
+            VEC4_MUL(source[2], quadColor[2], alpha); /* B */
+         }
+         break;
+      case PIPE_BLENDFACTOR_SRC1_COLOR:
+         assert(0); /* to do */
+         break;
+      case PIPE_BLENDFACTOR_SRC1_ALPHA:
+         assert(0); /* to do */
+         break;
+      case PIPE_BLENDFACTOR_ZERO:
+         VEC4_COPY(source[0], zero); /* R */
+         VEC4_COPY(source[1], zero); /* G */
+         VEC4_COPY(source[2], zero); /* B */
+         break;
+      case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+         {
+            float inv_comp[4];
+            VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
+            VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
+            VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
+            VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
+            VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
+            VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
+         }
+         break;
+      case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+         {
+            float inv_alpha[4];
+            VEC4_SUB(inv_alpha, one, quadColor[3]);
+            VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
+            VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
+            VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
+         }
+         break;
+      case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+         {
+            float inv_alpha[4];
+            VEC4_SUB(inv_alpha, one, dest[3]);
+            VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
+            VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
+            VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
+         }
+         break;
+      case PIPE_BLENDFACTOR_INV_DST_COLOR:
+         {
+            float inv_comp[4];
+            VEC4_SUB(inv_comp, one, dest[0]); /* R */
+            VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
+            VEC4_SUB(inv_comp, one, dest[1]); /* G */
+            VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
+            VEC4_SUB(inv_comp, one, dest[2]); /* B */
+            VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
+         }
+         break;
+      case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+         {
+            float inv_comp[4];
+            /* R */
+            VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]);
+            VEC4_MUL(source[0], quadColor[0], inv_comp);
+            /* G */
+            VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]);
+            VEC4_MUL(source[1], quadColor[1], inv_comp);
+            /* B */
+            VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]);
+            VEC4_MUL(source[2], quadColor[2], inv_comp);
+         }
+         break;
+      case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+         {
+            float inv_alpha[4];
+            VEC4_SCALAR(inv_alpha, 1.0f - softpipe->blend_color.color[3]);
+            VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
+            VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
+            VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
+         }
+         break;
+      case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+         assert(0); /* to do */
+         break;
+      case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+         assert(0); /* to do */
+         break;
+      default:
+         assert(0);
       }
-      break;
-   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
-      {
-         float inv_comp[4];
-         VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]);
-         VEC4_MUL(dest[0], dest[0], inv_comp);
-         VEC4_MUL(dest[1], dest[1], inv_comp);
-         VEC4_MUL(dest[2], dest[2], inv_comp);
+
+      /*
+       * Compute src/first term A
+       */
+      switch (softpipe->blend->alpha_src_factor) {
+      case PIPE_BLENDFACTOR_ONE:
+         VEC4_COPY(source[3], quadColor[3]); /* A */
+         break;
+      case PIPE_BLENDFACTOR_SRC_COLOR:
+         /* fall-through */
+      case PIPE_BLENDFACTOR_SRC_ALPHA:
+         {
+            const float *alpha = quadColor[3];
+            VEC4_MUL(source[3], quadColor[3], alpha); /* A */
+         }
+         break;
+      case PIPE_BLENDFACTOR_DST_COLOR:
+         /* fall-through */
+      case PIPE_BLENDFACTOR_DST_ALPHA:
+         VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */
+         break;
+      case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+         {
+            const float *alpha = quadColor[3];
+            float diff[4];
+            VEC4_SUB(diff, one, dest[3]);
+            VEC4_MIN(source[3], alpha, diff); /* A */
+         }
+         break;
+      case PIPE_BLENDFACTOR_CONST_COLOR:
+         /* fall-through */
+      case PIPE_BLENDFACTOR_CONST_ALPHA:
+         {
+            float comp[4];
+            VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */
+            VEC4_MUL(source[3], quadColor[3], comp); /* A */
+         }
+         break;
+      case PIPE_BLENDFACTOR_ZERO:
+         VEC4_COPY(source[3], zero); /* A */
+         break;
+      case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+         /* fall-through */
+      case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+         {
+            float inv_alpha[4];
+            VEC4_SUB(inv_alpha, one, quadColor[3]);
+            VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
+         }
+         break;
+      case PIPE_BLENDFACTOR_INV_DST_COLOR:
+         /* fall-through */
+      case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+         {
+            float inv_alpha[4];
+            VEC4_SUB(inv_alpha, one, dest[3]);
+            VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
+         }
+         break;
+      case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+         /* fall-through */
+      case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+         {
+            float inv_comp[4];
+            /* A */
+            VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]);
+            VEC4_MUL(source[3], quadColor[3], inv_comp);
+         }
+         break;
+      default:
+         assert(0);
       }
-      break;
-   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
-   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
-      /* XXX what are these? */
-      assert(0);
-      break;
-   default:
-      assert(0);
-   }
-   
-   /*
-    * Compute dest/second term A
-    */
-   switch (softpipe->blend->alpha_dst_factor) {
-   case PIPE_BLENDFACTOR_ONE:
-      /* dest = dest * 1   NO-OP, leave dest as-is */
-      break;
-   case PIPE_BLENDFACTOR_SRC_COLOR:
-      /* fall-through */
-   case PIPE_BLENDFACTOR_SRC_ALPHA:
-      VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */
-      break;
-   case PIPE_BLENDFACTOR_DST_COLOR:
-      /* fall-through */
-   case PIPE_BLENDFACTOR_DST_ALPHA:
-      VEC4_MUL(dest[3], dest[3], dest[3]); /* A */
-      break;
-   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
-      assert(0); /* illegal */
-      break;
-   case PIPE_BLENDFACTOR_CONST_COLOR:
-      /* fall-through */
-   case PIPE_BLENDFACTOR_CONST_ALPHA:
-      {
-         float comp[4];
-         VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */
-         VEC4_MUL(dest[3], dest[3], comp); /* A */
+
+
+      /*
+       * Compute dest/second term RGB
+       */
+      switch (softpipe->blend->rgb_dst_factor) {
+      case PIPE_BLENDFACTOR_ONE:
+         /* dest = dest * 1   NO-OP, leave dest as-is */
+         break;
+      case PIPE_BLENDFACTOR_SRC_COLOR:
+         VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */
+         VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */
+         VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */
+         break;
+      case PIPE_BLENDFACTOR_SRC_ALPHA:
+         VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */
+         VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */
+         VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */
+         break;
+      case PIPE_BLENDFACTOR_DST_ALPHA:
+         VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */
+         VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */
+         VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */
+         break;
+      case PIPE_BLENDFACTOR_DST_COLOR:
+         VEC4_MUL(dest[0], dest[0], dest[0]); /* R */
+         VEC4_MUL(dest[1], dest[1], dest[1]); /* G */
+         VEC4_MUL(dest[2], dest[2], dest[2]); /* B */
+         break;
+      case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+         assert(0); /* illegal */
+         break;
+      case PIPE_BLENDFACTOR_CONST_COLOR:
+         {
+            float comp[4];
+            VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */
+            VEC4_MUL(dest[0], dest[0], comp); /* R */
+            VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */
+            VEC4_MUL(dest[1], dest[1], comp); /* G */
+            VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */
+            VEC4_MUL(dest[2], dest[2], comp); /* B */
+         }
+         break;
+      case PIPE_BLENDFACTOR_CONST_ALPHA:
+         {
+            float comp[4];
+            VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */
+            VEC4_MUL(dest[0], dest[0], comp); /* R */
+            VEC4_MUL(dest[1], dest[1], comp); /* G */
+            VEC4_MUL(dest[2], dest[2], comp); /* B */
+         }
+         break;
+      case PIPE_BLENDFACTOR_ZERO:
+         VEC4_COPY(dest[0], zero); /* R */
+         VEC4_COPY(dest[1], zero); /* G */
+         VEC4_COPY(dest[2], zero); /* B */
+         break;
+      case PIPE_BLENDFACTOR_SRC1_COLOR:
+      case PIPE_BLENDFACTOR_SRC1_ALPHA:
+         /* XXX what are these? */
+         assert(0);
+         break;
+      case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+         {
+            float inv_comp[4];
+            VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
+            VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */
+            VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
+            VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */
+            VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
+            VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */
+         }
+         break;
+      case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+         {
+            float one_minus_alpha[QUAD_SIZE];
+            VEC4_SUB(one_minus_alpha, one, quadColor[3]);
+            VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */
+            VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */
+            VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */
+         }
+         break;
+      case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+         {
+            float inv_comp[4];
+            VEC4_SUB(inv_comp, one, quadColor[3]); /* A */
+            VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */
+            VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */
+            VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */
+         }
+         break;
+      case PIPE_BLENDFACTOR_INV_DST_COLOR:
+         {
+            float inv_comp[4];
+            VEC4_SUB(inv_comp, one, dest[0]); /* R */
+            VEC4_MUL(dest[0], dest[0], inv_comp); /* R */
+            VEC4_SUB(inv_comp, one, dest[1]); /* G */
+            VEC4_MUL(dest[1], dest[1], inv_comp); /* G */
+            VEC4_SUB(inv_comp, one, dest[2]); /* B */
+            VEC4_MUL(dest[2], dest[2], inv_comp); /* B */
+         }
+         break;
+      case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+         {
+            float inv_comp[4];
+            /* R */
+            VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]);
+            VEC4_MUL(dest[0], dest[0], inv_comp);
+            /* G */
+            VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]);
+            VEC4_MUL(dest[1], dest[1], inv_comp);
+            /* B */
+            VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]);
+            VEC4_MUL(dest[2], dest[2], inv_comp);
+         }
+         break;
+      case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+         {
+            float inv_comp[4];
+            VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]);
+            VEC4_MUL(dest[0], dest[0], inv_comp);
+            VEC4_MUL(dest[1], dest[1], inv_comp);
+            VEC4_MUL(dest[2], dest[2], inv_comp);
+         }
+         break;
+      case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+      case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+         /* XXX what are these? */
+         assert(0);
+         break;
+      default:
+         assert(0);
       }
-      break;
-   case PIPE_BLENDFACTOR_ZERO:
-      VEC4_COPY(dest[3], zero); /* A */
-      break;
-   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
-      /* fall-through */
-   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
-      {
-         float one_minus_alpha[QUAD_SIZE];
-         VEC4_SUB(one_minus_alpha, one, quadColor[3]);
-         VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */
+
+      /*
+       * Compute dest/second term A
+       */
+      switch (softpipe->blend->alpha_dst_factor) {
+      case PIPE_BLENDFACTOR_ONE:
+         /* dest = dest * 1   NO-OP, leave dest as-is */
+         break;
+      case PIPE_BLENDFACTOR_SRC_COLOR:
+         /* fall-through */
+      case PIPE_BLENDFACTOR_SRC_ALPHA:
+         VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */
+         break;
+      case PIPE_BLENDFACTOR_DST_COLOR:
+         /* fall-through */
+      case PIPE_BLENDFACTOR_DST_ALPHA:
+         VEC4_MUL(dest[3], dest[3], dest[3]); /* A */
+         break;
+      case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+         assert(0); /* illegal */
+         break;
+      case PIPE_BLENDFACTOR_CONST_COLOR:
+         /* fall-through */
+      case PIPE_BLENDFACTOR_CONST_ALPHA:
+         {
+            float comp[4];
+            VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */
+            VEC4_MUL(dest[3], dest[3], comp); /* A */
+         }
+         break;
+      case PIPE_BLENDFACTOR_ZERO:
+         VEC4_COPY(dest[3], zero); /* A */
+         break;
+      case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+         /* fall-through */
+      case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+         {
+            float one_minus_alpha[QUAD_SIZE];
+            VEC4_SUB(one_minus_alpha, one, quadColor[3]);
+            VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */
+         }
+         break;
+      case PIPE_BLENDFACTOR_INV_DST_COLOR:
+         /* fall-through */
+      case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+         {
+            float inv_comp[4];
+            VEC4_SUB(inv_comp, one, dest[3]); /* A */
+            VEC4_MUL(dest[3], inv_comp, dest[3]); /* A */
+         }
+         break;
+      case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+         /* fall-through */
+      case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+         {
+            float inv_comp[4];
+            VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]);
+            VEC4_MUL(dest[3], dest[3], inv_comp);
+         }
+         break;
+      default:
+         assert(0);
       }
-      break;
-   case PIPE_BLENDFACTOR_INV_DST_COLOR:
-      /* fall-through */
-   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
-      {
-         float inv_comp[4];
-         VEC4_SUB(inv_comp, one, dest[3]); /* A */
-         VEC4_MUL(dest[3], inv_comp, dest[3]); /* A */
+
+      /*
+       * Combine RGB terms
+       */
+      switch (softpipe->blend->rgb_func) {
+      case PIPE_BLEND_ADD:
+         VEC4_ADD(quadColor[0], source[0], dest[0]); /* R */
+         VEC4_ADD(quadColor[1], source[1], dest[1]); /* G */
+         VEC4_ADD(quadColor[2], source[2], dest[2]); /* B */
+         break;
+      case PIPE_BLEND_SUBTRACT:
+         VEC4_SUB(quadColor[0], source[0], dest[0]); /* R */
+         VEC4_SUB(quadColor[1], source[1], dest[1]); /* G */
+         VEC4_SUB(quadColor[2], source[2], dest[2]); /* B */
+         break;
+      case PIPE_BLEND_REVERSE_SUBTRACT:
+         VEC4_SUB(quadColor[0], dest[0], source[0]); /* R */
+         VEC4_SUB(quadColor[1], dest[1], source[1]); /* G */
+         VEC4_SUB(quadColor[2], dest[2], source[2]); /* B */
+         break;
+      case PIPE_BLEND_MIN:
+         VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */
+         VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */
+         VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */
+         break;
+      case PIPE_BLEND_MAX:
+         VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */
+         VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */
+         VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */
+         break;
+      default:
+         assert(0);
       }
-      break;
-   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
-      /* fall-through */
-   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
-      {
-         float inv_comp[4];
-         VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]);
-         VEC4_MUL(dest[3], dest[3], inv_comp);
+
+      /*
+       * Combine A terms
+       */
+      switch (softpipe->blend->alpha_func) {
+      case PIPE_BLEND_ADD:
+         VEC4_ADD(quadColor[3], source[3], dest[3]); /* A */
+         break;
+      case PIPE_BLEND_SUBTRACT:
+         VEC4_SUB(quadColor[3], source[3], dest[3]); /* A */
+         break;
+      case PIPE_BLEND_REVERSE_SUBTRACT:
+         VEC4_SUB(quadColor[3], dest[3], source[3]); /* A */
+         break;
+      case PIPE_BLEND_MIN:
+         VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */
+         break;
+      case PIPE_BLEND_MAX:
+         VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */
+         break;
+      default:
+         assert(0);
       }
-      break;
-   default:
-      assert(0);
-   }
-   
-   /*
-    * Combine RGB terms
-    */
-   switch (softpipe->blend->rgb_func) {
-   case PIPE_BLEND_ADD:
-      VEC4_ADD(quadColor[0], source[0], dest[0]); /* R */
-      VEC4_ADD(quadColor[1], source[1], dest[1]); /* G */
-      VEC4_ADD(quadColor[2], source[2], dest[2]); /* B */
-      break;
-   case PIPE_BLEND_SUBTRACT:
-      VEC4_SUB(quadColor[0], source[0], dest[0]); /* R */
-      VEC4_SUB(quadColor[1], source[1], dest[1]); /* G */
-      VEC4_SUB(quadColor[2], source[2], dest[2]); /* B */
-      break;
-   case PIPE_BLEND_REVERSE_SUBTRACT:
-      VEC4_SUB(quadColor[0], dest[0], source[0]); /* R */
-      VEC4_SUB(quadColor[1], dest[1], source[1]); /* G */
-      VEC4_SUB(quadColor[2], dest[2], source[2]); /* B */
-      break;
-   case PIPE_BLEND_MIN:
-      VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */
-      VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */
-      VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */
-      break;
-   case PIPE_BLEND_MAX:
-      VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */
-      VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */
-      VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */
-      break;
-   default:
-      assert(0);
-   }
-   
-   /*
-    * Combine A terms
-    */
-   switch (softpipe->blend->alpha_func) {
-   case PIPE_BLEND_ADD:
-      VEC4_ADD(quadColor[3], source[3], dest[3]); /* A */
-      break;
-   case PIPE_BLEND_SUBTRACT:
-      VEC4_SUB(quadColor[3], source[3], dest[3]); /* A */
-      break;
-   case PIPE_BLEND_REVERSE_SUBTRACT:
-      VEC4_SUB(quadColor[3], dest[3], source[3]); /* A */
-      break;
-   case PIPE_BLEND_MIN:
-      VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */
-      break;
-   case PIPE_BLEND_MAX:
-      VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */
-      break;
-   default:
-      assert(0);
-   }
-   
+
+   } /* cbuf loop */
+
    /* pass blended quad to next stage */
    qs->next->run(qs->next, quad);
 }
index 2ae4e22a7de8776d981cd7ce663f28f016b2523d..b3db428ef180ac6efa3df2107bedaedbbd3560af 100644 (file)
@@ -13,7 +13,7 @@ static void
 cbuf_loop_quad(struct quad_stage *qs, struct quad_header *quad)
 {
    struct softpipe_context *softpipe = qs->softpipe;
-   float tmp[4][QUAD_SIZE];
+   float tmp[PIPE_MAX_COLOR_BUFS][4][QUAD_SIZE];
    unsigned i;
 
    assert(sizeof(quad->outputs.color) == sizeof(tmp));
@@ -30,7 +30,9 @@ cbuf_loop_quad(struct quad_stage *qs, struct quad_header *quad)
 
    for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) {
       /* set current cbuffer */
+#if 0 /* obsolete & going away */
       softpipe->current_cbuf = i;
+#endif
 
       /* pass blended quad to next stage */
       qs->next->run(qs->next, quad);
index 1f09d900ca88b3de72ac53979defe9a5c13555ba..7fe080990bd05f215589fc5eebad6136f7eecd36 100644 (file)
@@ -47,38 +47,43 @@ static void
 colormask_quad(struct quad_stage *qs, struct quad_header *quad)
 {
    struct softpipe_context *softpipe = qs->softpipe;
-   float dest[4][QUAD_SIZE];
-   struct softpipe_cached_tile *tile
-      = sp_get_cached_tile(softpipe,
-                           softpipe->cbuf_cache[softpipe->current_cbuf],
-                           quad->x0, quad->y0);
-   float (*quadColor)[4] = quad->outputs.color;
-   uint i, j;
-
-   /* get/swizzle dest colors */
-   for (j = 0; j < QUAD_SIZE; j++) {
-      int x = (quad->x0 & (TILE_SIZE-1)) + (j & 1);
-      int y = (quad->y0 & (TILE_SIZE-1)) + (j >> 1);
-      for (i = 0; i < 4; i++) {
-         dest[i][j] = tile->data.color[y][x][i];
+   uint cbuf;
+
+   /* loop over colorbuffer outputs */
+   for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) {
+      float dest[4][QUAD_SIZE];
+      struct softpipe_cached_tile *tile
+         = sp_get_cached_tile(softpipe,
+                              softpipe->cbuf_cache[cbuf],
+                              quad->x0, quad->y0);
+      float (*quadColor)[4] = quad->outputs.color[cbuf];
+      uint i, j;
+
+      /* get/swizzle dest colors */
+      for (j = 0; j < QUAD_SIZE; j++) {
+         int x = (quad->x0 & (TILE_SIZE-1)) + (j & 1);
+         int y = (quad->y0 & (TILE_SIZE-1)) + (j >> 1);
+         for (i = 0; i < 4; i++) {
+            dest[i][j] = tile->data.color[y][x][i];
+         }
       }
-   }
 
-   /* R */
-   if (!(softpipe->blend->colormask & PIPE_MASK_R))
-       COPY_4V(quadColor[0], dest[0]);
+      /* R */
+      if (!(softpipe->blend->colormask & PIPE_MASK_R))
+          COPY_4V(quadColor[0], dest[0]);
 
-   /* G */
-   if (!(softpipe->blend->colormask & PIPE_MASK_G))
-       COPY_4V(quadColor[1], dest[1]);
+      /* G */
+      if (!(softpipe->blend->colormask & PIPE_MASK_G))
+          COPY_4V(quadColor[1], dest[1]);
 
-   /* B */
-   if (!(softpipe->blend->colormask & PIPE_MASK_B))
-       COPY_4V(quadColor[2], dest[2]);
+      /* B */
+      if (!(softpipe->blend->colormask & PIPE_MASK_B))
+          COPY_4V(quadColor[2], dest[2]);
 
-   /* A */
-   if (!(softpipe->blend->colormask & PIPE_MASK_A))
-       COPY_4V(quadColor[3], dest[3]);
+      /* A */
+      if (!(softpipe->blend->colormask & PIPE_MASK_A))
+          COPY_4V(quadColor[3], dest[3]);
+   }
 
    /* pass quad to next stage */
    qs->next->run(qs->next, quad);
index b3d3fae22fdad16d043fd3d55d4dd4c62e8f43f1..dd5ebb2296137c8e3b6ee3d99e79ac1708c8f1aa 100644 (file)
@@ -50,12 +50,17 @@ coverage_quad(struct quad_stage *qs, struct quad_header *quad)
    if ((softpipe->rasterizer->poly_smooth && quad->prim == PRIM_TRI) ||
        (softpipe->rasterizer->line_smooth && quad->prim == PRIM_LINE) ||
        (softpipe->rasterizer->point_smooth && quad->prim == PRIM_POINT)) {
-      float (*quadColor)[4] = quad->outputs.color;
-      unsigned j;
-      for (j = 0; j < QUAD_SIZE; j++) {
-         assert(quad->coverage[j] >= 0.0);
-         assert(quad->coverage[j] <= 1.0);
+      uint cbuf;
+
+      /* loop over colorbuffer outputs */
+      for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) {
+         float (*quadColor)[4] = quad->outputs.color[cbuf];
+         unsigned j;
+         for (j = 0; j < QUAD_SIZE; j++) {
+            assert(quad->coverage[j] >= 0.0);
+            assert(quad->coverage[j] <= 1.0);
          quadColor[3][j] *= quad->coverage[j];
+         }
       }
    }
 
index a73df31383f44c33f336ffe19d28cbb03b53bff0..9a20c056a22407c7a5b6fe89e8a3c835fc5da51b 100644 (file)
@@ -88,22 +88,64 @@ shade_quad(
                                    &qss->machine,
                                    quad );
 
-   /* store result color */
+#if 0 /* XXX multi color outputs - untested */
+   /* store outputs */
+   boolean z_written = FALSE;
+   {
+      const ubyte *sem_name = softpipe->fs->info.output_semantic_name;
+      const ubyte *sem_index = softpipe->fs->info.output_semantic_index;
+      const uint n = qss->stage.softpipe->fs->info.num_outputs;
+      uint i;
+      for (i = 0; i < n; i++) {
+         switch (sem_name[i]) {
+         case TGSI_SEMANTIC_COLOR:
+            {
+               uint cbuf = sem_index[i];
+               memcpy(quad->outputs.color[cbuf],
+                      &machine->Outputs[i].xyzw[0].f[0],
+                      sizeof(quad->outputs.color[0]) );
+            }
+            break;
+         case TGSI_SEMANTIC_POSITION:
+            {
+               uint j;
+               for (j = 0; j < 4; j++) {
+                  quad->outputs.depth[j] = machine->Outputs[0].xyzw[2].f[j];
+               }
+               z_written = TRUE;
+            }
+            break;
+         }
+      }
+   }
+
+   if (!z_written) {
+      /* compute Z values now, as in the quad earlyz stage */
+      /* XXX we should really only do this if the earlyz stage is not used */
+      const float fx = (float) quad->x0;
+      const float fy = (float) quad->y0;
+      const float dzdx = quad->posCoef->dadx[2];
+      const float dzdy = quad->posCoef->dady[2];
+      const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy;
+
+      quad->outputs.depth[0] = z0;
+      quad->outputs.depth[1] = z0 + dzdx;
+      quad->outputs.depth[2] = z0 + dzdy;
+      quad->outputs.depth[3] = z0 + dzdx + dzdy;
+   }
+#endif
+
+   /* store result color(s) */
    if (qss->colorOutSlot >= 0) {
       /* XXX need to handle multiple color outputs someday */
-      assert(qss->stage.softpipe->fs->info.output_semantic_name[qss->colorOutSlot]
+      assert(softpipe->fs->info.output_semantic_name[qss->colorOutSlot]
              == TGSI_SEMANTIC_COLOR);
       memcpy(
-             quad->outputs.color,
+             quad->outputs.color[0],
              &machine->Outputs[qss->colorOutSlot].xyzw[0].f[0],
-             sizeof( quad->outputs.color ) );
+             sizeof( quad->outputs.color[0] ) );
    }
 
-   /*
-    * XXX the following code for updating quad->outputs.depth
-    * isn't really needed if we did early z testing.
-    */
-
    /* store result Z */
    if (qss->depthOutSlot >= 0) {
       /* output[slot] is new Z */
index cfe8f11808135f46d35c4f728716a9425460f2ad..40083138a435f23b53c245aa183a30ddecb51c8c 100644 (file)
 
 
 /**
- * Write quad to framebuffer, taking mask into account.
- *
- * Note that surfaces support only full quad reads and writes.
+ * Last step of quad processing: write quad colors to the framebuffer,
+ * taking mask into account.
  */
 static void
 output_quad(struct quad_stage *qs, struct quad_header *quad)
 {
-   struct softpipe_context *softpipe = qs->softpipe;
-   struct softpipe_cached_tile *tile
-      = sp_get_cached_tile(softpipe,
-                           softpipe->cbuf_cache[softpipe->current_cbuf],
-                           quad->x0, quad->y0);
    /* in-tile pos: */
    const int itx = quad->x0 % TILE_SIZE;
    const int ity = quad->y0 % TILE_SIZE;
-   float (*quadColor)[4] = quad->outputs.color;
-   int i, j;
 
-   /* get/swizzle dest colors */
-   for (j = 0; j < QUAD_SIZE; j++) {
-      if (quad->mask & (1 << j)) {
-         int x = itx + (j & 1);
-         int y = ity + (j >> 1);
-         for (i = 0; i < 4; i++) { /* loop over color chans */
-            tile->data.color[y][x][i] = quadColor[i][j];
+   struct softpipe_context *softpipe = qs->softpipe;
+   uint cbuf;
+
+   /* loop over colorbuffer outputs */
+   for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) {
+      struct softpipe_cached_tile *tile
+         = sp_get_cached_tile(softpipe,
+                              softpipe->cbuf_cache[cbuf],
+                              quad->x0, quad->y0);
+      float (*quadColor)[4] = quad->outputs.color[cbuf];
+      int i, j;
+
+      /* get/swizzle dest colors */
+      for (j = 0; j < QUAD_SIZE; j++) {
+         if (quad->mask & (1 << j)) {
+            int x = itx + (j & 1);
+            int y = ity + (j >> 1);
+            for (i = 0; i < 4; i++) { /* loop over color chans */
+               tile->data.color[y][x][i] = quadColor[i][j];
+            }
          }
       }
    }