llvmpipe: Generate the fragment pipeline into a single function.
authorJosé Fonseca <jfonseca@vmware.com>
Sat, 22 Aug 2009 11:39:44 +0000 (12:39 +0100)
committerJosé Fonseca <jfonseca@vmware.com>
Sat, 29 Aug 2009 08:21:40 +0000 (09:21 +0100)
Still hackish. Will document and optimize later.

13 files changed:
src/gallium/drivers/llvmpipe/Makefile
src/gallium/drivers/llvmpipe/SConscript
src/gallium/drivers/llvmpipe/lp_bld_flow.c
src/gallium/drivers/llvmpipe/lp_context.c
src/gallium/drivers/llvmpipe/lp_context.h
src/gallium/drivers/llvmpipe/lp_quad.h
src/gallium/drivers/llvmpipe/lp_quad_blend.c [deleted file]
src/gallium/drivers/llvmpipe/lp_quad_fs.c
src/gallium/drivers/llvmpipe/lp_quad_pipe.c
src/gallium/drivers/llvmpipe/lp_quad_pipe.h
src/gallium/drivers/llvmpipe/lp_state.h
src/gallium/drivers/llvmpipe/lp_state_derived.c
src/gallium/drivers/llvmpipe/lp_state_fs.c

index fbd1300c856b7ec5becb6eeb20ee9b5aed6c0a63..fb77f2a4c939397f5f55c15e2afcdc5ca4f7909c 100644 (file)
@@ -30,7 +30,6 @@ C_SOURCES = \
        lp_prim_setup.c \
        lp_prim_vbuf.c \
        lp_setup.c \
-       lp_quad_blend.c \
        lp_quad_pipe.c \
        lp_quad_fs.c \
        lp_query.c \
index 614d92b0f58778e9f7205fc3dcca7e55ead415c8..f9c09f70746fe2fe9768d69f9a4b03c6c049a185 100644 (file)
@@ -34,7 +34,6 @@ llvmpipe = env.ConvenienceLibrary(
                'lp_prim_setup.c',
                'lp_prim_vbuf.c',
                'lp_setup.c',
-               'lp_quad_blend.c',
                'lp_quad_pipe.c',
                'lp_quad_fs.c',
                'lp_query.c',
index d94af0dea4006c1e42ade3afc7fbc84c1fa42ba2..9d99e1a9d9f97283f0b974d86e2d4451e5f5b338 100644 (file)
@@ -66,6 +66,8 @@ lp_build_mask_update(struct lp_build_mask_context *mask,
    else
       mask->value = value;
 
+   /* FIXME: disabled until we have proper control flow helpers */
+#if 0
    cond = LLVMBuildICmp(mask->builder,
                         LLVMIntEQ,
                         LLVMBuildBitCast(mask->builder, mask->value, mask->reg_type, ""),
@@ -95,6 +97,7 @@ lp_build_mask_update(struct lp_build_mask_context *mask,
    LLVMBuildCondBr(mask->builder, cond, mask->skip_block, new_block);
 
    LLVMPositionBuilderAtEnd(mask->builder, new_block);
+#endif
 }
 
 
index 39019ab3f85c639ad655a990118d92210c85af7d..b9fd681e73145931cc8c18c188611937b001cb33 100644 (file)
@@ -86,7 +86,6 @@ static void llvmpipe_destroy( struct pipe_context *pipe )
       draw_destroy( llvmpipe->draw );
 
       llvmpipe->quad.shade->destroy( llvmpipe->quad.shade );
-      llvmpipe->quad.blend->destroy( llvmpipe->quad.blend );
 
    for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++)
       lp_destroy_tile_cache(llvmpipe->cbuf_cache[i]);
@@ -217,7 +216,6 @@ llvmpipe_create( struct pipe_screen *screen )
 
    /* setup quad rendering stages */
       llvmpipe->quad.shade = lp_quad_shade_stage(llvmpipe);
-      llvmpipe->quad.blend = lp_quad_blend_stage(llvmpipe);
 
    /* vertex shader samplers */
    for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
index c31df0bf3919c2cf52419ee3eb681949447d5433..9de21d0cd046603ec737d83122d3929824d220b3 100644 (file)
@@ -117,7 +117,6 @@ struct llvmpipe_context {
    /** Software quad rendering pipeline */
    struct {
       struct quad_stage *shade;
-      struct quad_stage *blend;
 
       struct quad_stage *first; /**< points to one of the above stages */
    } quad;
index 09027167391eea7430b7c2f6b48d45bb289818f2..7eb05de77a10da4df48ca7456d70421d3a3134ae 100644 (file)
@@ -105,7 +105,6 @@ struct quad_interp_coef
 struct quad_header {
    struct quad_header_input input;
    struct quad_header_inout inout;
-   struct quad_header_output output;
 
    /* Redundant/duplicated:
     */
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_blend.c b/src/gallium/drivers/llvmpipe/lp_quad_blend.c
deleted file mode 100644 (file)
index ba12322..0000000
+++ /dev/null
@@ -1,132 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-/**
- * Quad blending.
- *
- * @author Jose Fonseca <jfonseca@vmware.com>
- * @author Brian Paul
- */
-
-#include "pipe/p_defines.h"
-#include "util/u_math.h"
-#include "util/u_memory.h"
-#include "util/u_debug_dump.h"
-#include "lp_context.h"
-#include "lp_state.h"
-#include "lp_quad.h"
-#include "lp_surface.h"
-#include "lp_tile_cache.h"
-#include "lp_tile_soa.h"
-#include "lp_quad_pipe.h"
-
-
-static void blend_begin(struct quad_stage *qs)
-{
-}
-
-
-static void
-blend_run(struct quad_stage *qs,
-          struct quad_header *quads[],
-          unsigned nr)
-{
-   struct llvmpipe_context *llvmpipe = qs->llvmpipe;
-   struct lp_blend_state *blend = llvmpipe->blend;
-   unsigned cbuf;
-   uint q, i, j;
-
-   for (cbuf = 0; cbuf < llvmpipe->framebuffer.nr_cbufs; cbuf++) 
-   {
-      unsigned x0 = quads[0]->input.x0;
-      unsigned y0 = quads[0]->input.y0;
-      uint8_t ALIGN16_ATTRIB src[NUM_CHANNELS][TILE_VECTOR_HEIGHT*TILE_VECTOR_WIDTH];
-      uint8_t ALIGN16_ATTRIB mask[16];
-      uint8_t *tile = lp_get_cached_tile(llvmpipe->cbuf_cache[cbuf], x0, y0);
-      uint8_t *dst;
-
-      assert(nr * QUAD_SIZE == TILE_VECTOR_HEIGHT * TILE_VECTOR_WIDTH);
-
-      assert(x0 % TILE_VECTOR_WIDTH == 0);
-      assert(y0 % TILE_VECTOR_HEIGHT == 0);
-
-      dst = &TILE_PIXEL(tile, x0 & (TILE_SIZE-1), y0 & (TILE_SIZE-1), 0);
-
-      for (q = 0; q < nr; ++q) {
-         struct quad_header *quad = quads[q];
-         const int itx = (quad->input.x0 & (TILE_SIZE-1));
-         const int ity = (quad->input.y0 & (TILE_SIZE-1));
-
-         assert(quad->input.x0 == x0 + q*2);
-         assert(quad->input.y0 == y0);
-
-         /* get/swizzle src/dest colors
-          */
-         for (j = 0; j < QUAD_SIZE; j++) {
-            int x = itx + (j & 1);
-            int y = ity + (j >> 1);
-
-            assert(x < TILE_SIZE);
-            assert(y < TILE_SIZE);
-
-            for (i = 0; i < 4; i++) {
-               src[i][4*q + j] = float_to_ubyte(quad->output.color[cbuf][i][j]);
-            }
-            mask[4*q + j] = quad->inout.mask & (1 << j) ? ~0 : 0;
-         }
-      }
-
-      assert(blend->jit_function);
-      assert((((uintptr_t)src) & 0xf) == 0);
-      assert((((uintptr_t)dst) & 0xf) == 0);
-      assert((((uintptr_t)llvmpipe->blend_color) & 0xf) == 0);
-      if(blend->jit_function)
-         blend->jit_function( mask,
-                              &src[0][0],
-                              &llvmpipe->blend_color[0][0],
-                              dst );
-   }
-}
-
-
-static void blend_destroy(struct quad_stage *qs)
-{
-   FREE( qs );
-}
-
-
-struct quad_stage *lp_quad_blend_stage( struct llvmpipe_context *llvmpipe )
-{
-   struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
-
-   stage->llvmpipe = llvmpipe;
-   stage->begin = blend_begin;
-   stage->run = blend_run;
-   stage->destroy = blend_destroy;
-
-   return stage;
-}
index 4f7a061fd6a8a529ac34db0da35bfb2cf019bc32..9ead0864a6691f4415eb1e9b8ee5112cca2b32a6 100644 (file)
@@ -46,6 +46,8 @@
 #include "lp_quad.h"
 #include "lp_quad_pipe.h"
 #include "lp_texture.h"
+#include "lp_tile_cache.h"
+#include "lp_tile_soa.h"
 
 
 struct quad_shade_stage
@@ -69,30 +71,48 @@ quad_shade_stage(struct quad_stage *qs)
 /**
  * Execute fragment shader for the four fragments in the quad.
  */
-static boolean
-shade_quad(struct quad_stage *qs, struct quad_header *quad)
+static void
+shade_quads(struct quad_stage *qs,
+                 struct quad_header *quads[],
+                 unsigned nr)
 {
    struct quad_shade_stage *qss = quad_shade_stage( qs );
    struct llvmpipe_context *llvmpipe = qs->llvmpipe;
    struct lp_fragment_shader *fs = llvmpipe->fs;
    void *constants;
    struct tgsi_sampler **samplers;
+   struct quad_header *quad = quads[0];
    const unsigned x = quad->input.x0;
    const unsigned y = quad->input.y0;
+   uint8_t *tile = lp_get_cached_tile(llvmpipe->cbuf_cache[0], x, y);
+   uint8_t *color;
    void *depth;
-   uint32_t ALIGN16_ATTRIB mask[NUM_CHANNELS];
+   uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS];
    unsigned chan_index;
+   unsigned q;
 
    assert(fs->current);
    if(!fs->current)
-      return FALSE;
+      return;
+
+   /* Sanity checks */
+   assert(nr * QUAD_SIZE == TILE_VECTOR_HEIGHT * TILE_VECTOR_WIDTH);
+   assert(x % TILE_VECTOR_WIDTH == 0);
+   assert(y % TILE_VECTOR_HEIGHT == 0);
+   for (q = 0; q < nr; ++q) {
+      assert(quads[q]->input.x0 == x + q*2);
+      assert(quads[q]->input.y0 == y);
+   }
 
-   constants = llvmpipe->mapped_constants[PIPE_SHADER_FRAGMENT];
-   samplers = (struct tgsi_sampler **)llvmpipe->tgsi.frag_samplers_list;
+   /* mask */
+   for (q = 0; q < 4; ++q)
+      for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index)
+         mask[q][chan_index] = quads[q]->inout.mask & (1 << chan_index) ? ~0 : 0;
 
-   for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index)
-      mask[chan_index] = quad->inout.mask & (1 << chan_index) ? ~0 : 0;
+   /* color buffer */
+   color = &TILE_PIXEL(tile, x & (TILE_SIZE-1), y & (TILE_SIZE-1), 0);
 
+   /* depth buffer */
    if(qss->map) {
       assert((x % 2) == 0);
       assert((y % 2) == 0);
@@ -103,9 +123,14 @@ shade_quad(struct quad_stage *qs, struct quad_header *quad)
    else
       depth = NULL;
 
+   constants = llvmpipe->mapped_constants[PIPE_SHADER_FRAGMENT];
+   samplers = (struct tgsi_sampler **)llvmpipe->tgsi.frag_samplers_list;
+   /* TODO: blend color */
+
    assert((((uintptr_t)mask) & 0xf) == 0);
-   assert((((uintptr_t)quad->output.color) & 0xf) == 0);
    assert((((uintptr_t)depth) & 0xf) == 0);
+   assert((((uintptr_t)color) & 0xf) == 0);
+   assert((((uintptr_t)llvmpipe->blend_color) & 0xf) == 0);
 
    /* run shader */
    fs->current->jit_function( x,
@@ -114,71 +139,14 @@ shade_quad(struct quad_stage *qs, struct quad_header *quad)
                               quad->coef->dadx,
                               quad->coef->dady,
                               constants,
-                              mask,
-                              quad->output.color,
+                              &mask[0][0],
+                              color,
                               depth,
                               samplers);
-
-   for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index)
-      if(!mask[chan_index])
-         quad->inout.mask &= ~(1 << chan_index);
-
-   if (quad->inout.mask == 0)
-      return FALSE;
-
-   return TRUE;
-}
-
-
-
-static void
-coverage_quad(struct quad_stage *qs, struct quad_header *quad)
-{
-   struct llvmpipe_context *llvmpipe = qs->llvmpipe;
-   uint cbuf;
-
-   /* loop over colorbuffer outputs */
-   for (cbuf = 0; cbuf < llvmpipe->framebuffer.nr_cbufs; cbuf++) {
-      float (*quadColor)[4] = quad->output.color[cbuf];
-      unsigned j;
-      for (j = 0; j < QUAD_SIZE; j++) {
-         assert(quad->input.coverage[j] >= 0.0);
-         assert(quad->input.coverage[j] <= 1.0);
-         quadColor[3][j] *= quad->input.coverage[j];
-      }
-   }
 }
 
 
 
-static void
-shade_quads(struct quad_stage *qs, 
-                 struct quad_header *quads[],
-                 unsigned nr)
-{
-   unsigned i, pass = 0;
-   
-   for (i = 0; i < nr; i++) {
-      if(!quads[i]->inout.mask)
-         continue;
-
-      if (!shade_quad(qs, quads[i]))
-         continue;
-
-      if (/*do_coverage*/ 0)
-         coverage_quad( qs, quads[i] );
-
-      ++pass;
-   }
-   
-   if (pass)
-      qs->next->run(qs->next, quads, nr);
-}
-   
-
-
-
-
 /**
  * Per-primitive (or per-begin?) setup
  */
@@ -210,7 +178,6 @@ shade_begin(struct quad_stage *qs)
 
    }
 
-   qs->next->begin(qs->next);
 }
 
 
index 70d3ad39a627e7633a70575fbb7a4acc6344a613..e672dc9e0315ffb6f26bceb46707829bf6dec76c 100644 (file)
 #include "lp_state.h"
 #include "pipe/p_shader_tokens.h"
 
-static void
-lp_push_quad_first( struct llvmpipe_context *lp,
-                    struct quad_stage *quad )
-{
-   quad->next = lp->quad.first;
-   lp->quad.first = quad;
-}
-
-
 void
 lp_build_quad_pipeline(struct llvmpipe_context *lp)
 {
-   lp->quad.first = lp->quad.blend;
-
-   lp_push_quad_first( lp, lp->quad.shade );
+   lp->quad.first = lp->quad.shade;
 }
 
index 52d4d68661211d5ac2c1487bfba368b0a9151481..ff4747f33fc47851f64768f3c222dd9b4e1d6683 100644 (file)
@@ -55,14 +55,7 @@ struct quad_stage {
 };
 
 
-struct quad_stage *lp_quad_polygon_stipple_stage( struct llvmpipe_context *llvmpipe );
-struct quad_stage *lp_quad_earlyz_stage( struct llvmpipe_context *llvmpipe );
 struct quad_stage *lp_quad_shade_stage( struct llvmpipe_context *llvmpipe );
-struct quad_stage *lp_quad_stencil_test_stage( struct llvmpipe_context *llvmpipe );
-struct quad_stage *lp_quad_occlusion_stage( struct llvmpipe_context *llvmpipe );
-struct quad_stage *lp_quad_coverage_stage( struct llvmpipe_context *llvmpipe );
-struct quad_stage *lp_quad_blend_stage( struct llvmpipe_context *llvmpipe );
-struct quad_stage *lp_quad_output_stage( struct llvmpipe_context *llvmpipe );
 
 void lp_build_quad_pipeline(struct llvmpipe_context *lp);
 
index 2d6add8f3ac790e641bdd1b2f8db84fefe49ee86..83dace30ce4992638cebbb6a9115f97e2f9c55ae 100644 (file)
@@ -78,6 +78,7 @@ struct lp_fragment_shader_variant_key
 {
    struct pipe_depth_state depth;
    struct pipe_alpha_state alpha;
+   struct pipe_blend_state blend;
 };
 
 
index 35b24a12ffb53d5de1b4c0bd1602840bb14e6c50..9f5d2ffb1186c841d1f4838390b56037d21fdaa4 100644 (file)
@@ -247,13 +247,12 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe )
       compute_cliprect(llvmpipe);
 
    if (llvmpipe->dirty & (LP_NEW_FS |
+                          LP_NEW_BLEND |
                           LP_NEW_DEPTH_STENCIL_ALPHA))
       llvmpipe_update_fs( llvmpipe );
 
 
-   if (llvmpipe->dirty & (LP_NEW_BLEND |
-                          LP_NEW_DEPTH_STENCIL_ALPHA |
-                          LP_NEW_FRAMEBUFFER |
+   if (llvmpipe->dirty & (LP_NEW_FRAMEBUFFER |
                           LP_NEW_FS))
       lp_build_quad_pipeline(llvmpipe);
 
index 521700acd52180f0d6f35fa0cc4eb37f97d4c179..9b0e7cdd37c77eb45d60b812030267936fd48035 100644 (file)
 #include "tgsi/tgsi_parse.h"
 #include "lp_bld_type.h"
 #include "lp_bld_conv.h"
+#include "lp_bld_logic.h"
 #include "lp_bld_depth.h"
 #include "lp_bld_tgsi.h"
 #include "lp_bld_alpha.h"
+#include "lp_bld_blend.h"
 #include "lp_bld_swizzle.h"
 #include "lp_bld_flow.h"
 #include "lp_bld_debug.h"
@@ -55,13 +57,13 @@ static const unsigned char quad_offset_y[4] = {0, 0, 1, 1};
 
 
 static void
-setup_pos_vector(LLVMBuilderRef builder,
-                 LLVMValueRef x,
-                 LLVMValueRef y,
-                 LLVMValueRef a0_ptr,
-                 LLVMValueRef dadx_ptr,
-                 LLVMValueRef dady_ptr,
-                 LLVMValueRef *pos)
+generate_pos(LLVMBuilderRef builder,
+             LLVMValueRef x,
+             LLVMValueRef y,
+             LLVMValueRef a0_ptr,
+             LLVMValueRef dadx_ptr,
+             LLVMValueRef dady_ptr,
+             LLVMValueRef *pos)
 {
    LLVMTypeRef int_elem_type = LLVMInt32Type();
    LLVMTypeRef int_vec_type = LLVMVectorType(int_elem_type, QUAD_SIZE);
@@ -110,13 +112,13 @@ setup_pos_vector(LLVMBuilderRef builder,
 
 
 static void
-depth_test_generate(struct llvmpipe_context *lp,
-                    LLVMBuilderRef builder,
-                    const struct pipe_depth_state *state,
-                    union lp_type src_type,
-                    struct lp_build_mask_context *mask,
-                    LLVMValueRef src,
-                    LLVMValueRef dst_ptr)
+generate_depth(struct llvmpipe_context *lp,
+               LLVMBuilderRef builder,
+               const struct pipe_depth_state *state,
+               union lp_type src_type,
+               struct lp_build_mask_context *mask,
+               LLVMValueRef src,
+               LLVMValueRef dst_ptr)
 {
    const struct util_format_description *format_desc;
    union lp_type dst_type;
@@ -151,18 +153,177 @@ depth_test_generate(struct llvmpipe_context *lp,
 }
 
 
-static struct lp_fragment_shader_variant *
-shader_generate(struct llvmpipe_context *lp,
-                struct lp_fragment_shader *shader,
-                const struct lp_fragment_shader_variant_key *key)
+/**
+ * Generate the fragment shader, depth/stencil and alpha tests.
+ */
+static void
+generate_fs(struct llvmpipe_context *lp,
+            struct lp_fragment_shader *shader,
+            const struct lp_fragment_shader_variant_key *key,
+            LLVMBuilderRef builder,
+            union lp_type type,
+            unsigned i,
+            LLVMValueRef x,
+            LLVMValueRef y,
+            LLVMValueRef a0_ptr,
+            LLVMValueRef dadx_ptr,
+            LLVMValueRef dady_ptr,
+            LLVMValueRef consts_ptr,
+            LLVMValueRef *pmask,
+            LLVMValueRef *color,
+            LLVMValueRef depth_ptr,
+            LLVMValueRef samplers_ptr)
 {
-   struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
-   struct lp_fragment_shader_variant *variant;
    const struct tgsi_token *tokens = shader->base.tokens;
-   union lp_type type;
    LLVMTypeRef elem_type;
    LLVMTypeRef vec_type;
    LLVMTypeRef int_vec_type;
+   LLVMValueRef pos[NUM_CHANNELS];
+   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
+   struct lp_build_mask_context mask;
+   boolean early_depth_test;
+   unsigned attrib;
+   unsigned chan;
+
+   elem_type = lp_build_elem_type(type);
+   vec_type = lp_build_vec_type(type);
+   int_vec_type = lp_build_int_vec_type(type);
+
+   generate_pos(builder, x, y, a0_ptr, dadx_ptr, dady_ptr, pos);
+
+   lp_build_mask_begin(&mask, builder, type, *pmask);
+
+   early_depth_test =
+      lp->depth_stencil->depth.enabled &&
+      lp->framebuffer.zsbuf &&
+      !lp->depth_stencil->alpha.enabled &&
+      !lp->fs->info.uses_kill &&
+      !lp->fs->info.writes_z;
+
+   if(early_depth_test)
+      generate_depth(lp, builder, &key->depth,
+                          type, &mask,
+                          pos[2], depth_ptr);
+
+   memset(outputs, 0, sizeof outputs);
+
+   lp_build_tgsi_soa(builder, tokens, type, &mask,
+                     pos, a0_ptr, dadx_ptr, dady_ptr,
+                     consts_ptr, outputs, samplers_ptr);
+
+   for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) {
+      for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+         if(outputs[attrib][chan]) {
+            lp_build_name(outputs[attrib][chan], "output%u.%u.%c", i, attrib, "xyzw"[chan]);
+
+            switch (shader->info.output_semantic_name[attrib]) {
+            case TGSI_SEMANTIC_COLOR:
+               {
+                  unsigned cbuf = shader->info.output_semantic_index[attrib];
+
+                  lp_build_name(outputs[attrib][chan], "color%u.%u.%c", i, attrib, "rgba"[chan]);
+
+                  /* Alpha test */
+                  /* XXX: should the alpha reference value be passed separately? */
+                  if(cbuf == 0 && chan == 3)
+                     lp_build_alpha_test(builder, &key->alpha, type,
+                                         &mask,
+                                         outputs[attrib][chan]);
+
+                  if(cbuf == 0)
+                     color[chan] = outputs[attrib][chan];
+
+                  break;
+               }
+
+            case TGSI_SEMANTIC_POSITION:
+               if(chan == 2)
+                  pos[2] = outputs[attrib][chan];
+               break;
+            }
+         }
+      }
+   }
+
+   if(!early_depth_test)
+      generate_depth(lp, builder, &key->depth,
+                          type, &mask,
+                          pos[2], depth_ptr);
+
+   lp_build_mask_end(&mask);
+
+   *pmask = mask.value;
+
+}
+
+
+/**
+ * Generate blending code according to blend->base state.
+ * The blend function will look like:
+ *    blend(mask, src_color, constant color, dst_color)
+ * dst_color will be modified and contain the result of the blend func.
+ */
+static void
+generate_blend(const struct pipe_blend_state *blend,
+               LLVMBuilderRef builder,
+               union lp_type type,
+               LLVMValueRef mask,
+               LLVMValueRef *src,
+               LLVMValueRef const_ptr,
+               LLVMValueRef dst_ptr)
+{
+   struct lp_build_context bld;
+   LLVMTypeRef vec_type;
+   LLVMTypeRef int_vec_type;
+   LLVMValueRef con[4];
+   LLVMValueRef dst[4];
+   LLVMValueRef res[4];
+   unsigned chan;
+
+   vec_type = lp_build_vec_type(type);
+   int_vec_type = lp_build_int_vec_type(type);
+
+   lp_build_context_init(&bld, builder, type);
+
+   for(chan = 0; chan < 4; ++chan) {
+      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0);
+
+      if(const_ptr)
+         con[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), "");
+      else
+         con[chan] = LLVMGetUndef(vec_type); /* FIXME */
+
+      dst[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), "");
+
+      lp_build_name(con[chan], "con.%c", "rgba"[chan]);
+      lp_build_name(dst[chan], "dst.%c", "rgba"[chan]);
+   }
+
+   lp_build_blend_soa(builder, blend, type, src, dst, con, res);
+
+   for(chan = 0; chan < 4; ++chan) {
+      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0);
+      lp_build_name(res[chan], "res.%c", "rgba"[chan]);
+      res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]);
+      LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, ""));
+   }
+}
+
+
+static struct lp_fragment_shader_variant *
+generate_fragment(struct llvmpipe_context *lp,
+                  struct lp_fragment_shader *shader,
+                  const struct lp_fragment_shader_variant_key *key)
+{
+   struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
+   struct lp_fragment_shader_variant *variant;
+   union lp_type fs_type;
+   union lp_type blend_type;
+   LLVMTypeRef fs_elem_type;
+   LLVMTypeRef fs_vec_type;
+   LLVMTypeRef fs_int_vec_type;
+   LLVMTypeRef blend_vec_type;
+   LLVMTypeRef blend_int_vec_type;
    LLVMTypeRef arg_types[10];
    LLVMTypeRef func_type;
    LLVMValueRef x;
@@ -177,24 +338,38 @@ shader_generate(struct llvmpipe_context *lp,
    LLVMValueRef samplers_ptr;
    LLVMBasicBlockRef block;
    LLVMBuilderRef builder;
-   LLVMValueRef pos[NUM_CHANNELS];
-   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
-   struct lp_build_mask_context mask;
-   boolean early_depth_test;
+   LLVMValueRef fs_mask[LP_MAX_VECTOR_LENGTH];
+   LLVMValueRef fs_out_color[NUM_CHANNELS][LP_MAX_VECTOR_LENGTH];
+   LLVMValueRef blend_mask;
+   LLVMValueRef blend_in_color[NUM_CHANNELS];
    LLVMValueRef fetch_texel;
+   unsigned num_fs;
    unsigned i;
-   unsigned attrib;
    unsigned chan;
 
 #ifdef DEBUG
    tgsi_dump(shader->base.tokens, 0);
-   debug_printf("depth.enabled = %u\n", key->depth.enabled);
-   debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE));
-   debug_printf("depth.writemask = %u\n", key->depth.writemask);
-   debug_printf("depth.occlusion_count = %u\n", key->depth.occlusion_count);
-   debug_printf("alpha.enabled = %u\n", key->alpha.enabled);
-   debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE));
-   debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value);
+   if(key->depth.enabled) {
+      debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE));
+      debug_printf("depth.writemask = %u\n", key->depth.writemask);
+      debug_printf("depth.occlusion_count = %u\n", key->depth.occlusion_count);
+   }
+   if(key->alpha.enabled) {
+      debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE));
+      debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value);
+   }
+   if(key->blend.logicop_enable) {
+      debug_printf("blend.logicop_func = %u\n", key->blend.logicop_func);
+   }
+   else if(key->blend.blend_enable) {
+      debug_printf("blend.rgb_func = %s\n",   debug_dump_blend_func  (key->blend.rgb_func, TRUE));
+      debug_printf("rgb_src_factor = %s\n",   debug_dump_blend_factor(key->blend.rgb_src_factor, TRUE));
+      debug_printf("rgb_dst_factor = %s\n",   debug_dump_blend_factor(key->blend.rgb_dst_factor, TRUE));
+      debug_printf("alpha_func = %s\n",       debug_dump_blend_func  (key->blend.alpha_func, TRUE));
+      debug_printf("alpha_src_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_src_factor, TRUE));
+      debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE));
+   }
+   debug_printf("blend.colormask = 0x%x\n", key->blend.colormask);
 #endif
 
    variant = CALLOC_STRUCT(lp_fragment_shader_variant);
@@ -204,26 +379,37 @@ shader_generate(struct llvmpipe_context *lp,
    variant->shader = shader;
    memcpy(&variant->key, key, sizeof *key);
 
-   type.value = 0;
-   type.floating = TRUE; /* floating point values */
-   type.sign = TRUE;     /* values are signed */
-   type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
-   type.width = 32;      /* 32-bit float */
-   type.length = 4;      /* 4 element per vector */
+   fs_type.value = 0;
+   fs_type.floating = TRUE; /* floating point values */
+   fs_type.sign = TRUE;     /* values are signed */
+   fs_type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
+   fs_type.width = 32;      /* 32-bit float */
+   fs_type.length = 4;      /* 4 element per vector */
+   num_fs = 4;
 
-   elem_type = lp_build_elem_type(type);
-   vec_type = lp_build_vec_type(type);
-   int_vec_type = lp_build_int_vec_type(type);
+   blend_type.value = 0;
+   blend_type.floating = FALSE; /* values are integers */
+   blend_type.sign = FALSE;     /* values are unsigned */
+   blend_type.norm = TRUE;      /* values are in [0,1] or [-1,1] */
+   blend_type.width = 8;        /* 8-bit ubyte values */
+   blend_type.length = 16;      /* 16 elements per vector */
+
+   fs_elem_type = lp_build_elem_type(fs_type);
+   fs_vec_type = lp_build_vec_type(fs_type);
+   fs_int_vec_type = lp_build_int_vec_type(fs_type);
+
+   blend_vec_type = lp_build_vec_type(blend_type);
+   blend_int_vec_type = lp_build_int_vec_type(blend_type);
 
    arg_types[0] = LLVMInt32Type();                     /* x */
    arg_types[1] = LLVMInt32Type();                     /* y */
-   arg_types[2] = LLVMPointerType(elem_type, 0);       /* a0 */
-   arg_types[3] = LLVMPointerType(elem_type, 0);       /* dadx */
-   arg_types[4] = LLVMPointerType(elem_type, 0);       /* dady */
-   arg_types[5] = LLVMPointerType(elem_type, 0);       /* consts */
-   arg_types[6] = LLVMPointerType(int_vec_type, 0);    /* mask */
-   arg_types[7] = LLVMPointerType(vec_type, 0);        /* color */
-   arg_types[8] = LLVMPointerType(int_vec_type, 0);    /* depth */
+   arg_types[2] = LLVMPointerType(fs_elem_type, 0);    /* a0 */
+   arg_types[3] = LLVMPointerType(fs_elem_type, 0);    /* dadx */
+   arg_types[4] = LLVMPointerType(fs_elem_type, 0);    /* dady */
+   arg_types[5] = LLVMPointerType(fs_elem_type, 0);    /* consts */
+   arg_types[6] = LLVMPointerType(fs_int_vec_type, 0); /* mask */
+   arg_types[7] = LLVMPointerType(blend_vec_type, 0);  /* color */
+   arg_types[8] = LLVMPointerType(fs_int_vec_type, 0); /* depth */
    arg_types[9] = LLVMPointerType(LLVMInt8Type(), 0);  /* samplers */
 
    func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
@@ -260,70 +446,57 @@ shader_generate(struct llvmpipe_context *lp,
    builder = LLVMCreateBuilder();
    LLVMPositionBuilderAtEnd(builder, block);
 
-   setup_pos_vector(builder, x, y, a0_ptr, dadx_ptr, dady_ptr, pos);
-
-   lp_build_mask_begin(&mask, builder, type,
-                       LLVMBuildLoad(builder, mask_ptr, ""));
-
-   early_depth_test =
-      lp->depth_stencil->depth.enabled &&
-      lp->framebuffer.zsbuf &&
-      !lp->depth_stencil->alpha.enabled &&
-      !lp->fs->info.uses_kill &&
-      !lp->fs->info.writes_z;
-
-   if(early_depth_test)
-      depth_test_generate(lp, builder, &key->depth,
-                          type, &mask,
-                          pos[2], depth_ptr);
-
-   memset(outputs, 0, sizeof outputs);
-
-   lp_build_tgsi_soa(builder, tokens, type, &mask,
-                     pos, a0_ptr, dadx_ptr, dady_ptr,
-                     consts_ptr, outputs, samplers_ptr);
-
-   for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) {
-      for(chan = 0; chan < NUM_CHANNELS; ++chan) {
-         if(outputs[attrib][chan]) {
-            lp_build_name(outputs[attrib][chan], "output%u.%c", attrib, "xyzw"[chan]);
-
-            switch (shader->info.output_semantic_name[attrib]) {
-            case TGSI_SEMANTIC_COLOR:
-               {
-                  unsigned cbuf = shader->info.output_semantic_index[attrib];
-                  LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), cbuf*NUM_CHANNELS + chan, 0);
-                  LLVMValueRef output_ptr = LLVMBuildGEP(builder, color_ptr, &index, 1, "");
-                  lp_build_name(outputs[attrib][chan], "color%u.%c", attrib, "rgba"[chan]);
-                  LLVMBuildStore(builder, outputs[attrib][chan], output_ptr);
-
-                  /* Alpha test */
-                  /* XXX: should the alpha reference value be passed separately? */
-                  if(cbuf == 0 && chan == 3)
-                     lp_build_alpha_test(builder, &key->alpha, type,
-                                         &mask,
-                                         outputs[attrib][chan]);
-
-                  break;
-               }
+   for(i = 0; i < num_fs; ++i) {
+      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+      LLVMValueRef out_color[NUM_CHANNELS];
+      LLVMValueRef x_i;
+      LLVMValueRef depth_ptr_i;
+
+      /* TODO: Reuse position interpolation */
+      x_i = LLVMBuildAdd(builder, x, LLVMConstInt(LLVMInt32Type(), 2*i, 0), "");
+
+      fs_mask[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, mask_ptr, &index, 1, ""), "");
+      depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, "");
+
+      generate_fs(lp,
+                  shader,
+                  key,
+                  builder,
+                  fs_type,
+                  i,
+                  x_i,
+                  y,
+                  a0_ptr,
+                  dadx_ptr,
+                  dady_ptr,
+                  consts_ptr,
+                  &fs_mask[i],
+                  out_color,
+                  depth_ptr_i,
+                  samplers_ptr);
+
+      for(chan = 0; chan < NUM_CHANNELS; ++chan)
+         fs_out_color[chan][i] = out_color[chan];
+   }
 
-            case TGSI_SEMANTIC_POSITION:
-               if(chan == 2)
-                  pos[2] = outputs[attrib][chan];
-               break;
-            }
-         }
-      }
+   for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+      lp_build_conv(builder, fs_type, blend_type,
+                    fs_out_color[chan], num_fs,
+                    &blend_in_color[chan], 1);
+      lp_build_name(blend_in_color[chan], "color.%c", "rgba"[chan]);
    }
 
-   if(!early_depth_test)
-      depth_test_generate(lp, builder, &key->depth,
-                          type, &mask,
-                          pos[2], depth_ptr);
+   lp_build_conv_mask(builder, fs_type, blend_type,
+                               fs_mask, num_fs,
+                               &blend_mask, 1);
 
-   lp_build_mask_end(&mask);
-   if(mask.value)
-      LLVMBuildStore(builder, mask.value, mask_ptr);
+   generate_blend(&key->blend,
+                  builder,
+                  blend_type,
+                  blend_mask,
+                  blend_in_color,
+                  NULL /* FIXME: blend_const_color */,
+                  color_ptr);
 
    LLVMBuildRetVoid(builder);;
 
@@ -512,6 +685,7 @@ void llvmpipe_update_fs(struct llvmpipe_context *lp)
    memset(&key, 0, sizeof key);
    memcpy(&key.depth, &lp->depth_stencil->depth, sizeof &key.depth);
    memcpy(&key.alpha, &lp->depth_stencil->alpha, sizeof &key.alpha);
+   memcpy(&key.blend, &lp->blend->base, sizeof &key.blend);
 
    variant = shader->variants;
    while(variant) {
@@ -522,7 +696,7 @@ void llvmpipe_update_fs(struct llvmpipe_context *lp)
    }
 
    if(!variant)
-      variant = shader_generate(lp, shader, &key);
+      variant = generate_fragment(lp, shader, &key);
 
    shader->current = variant;
 }