llvmpipe: initial mrt support
authorKeith Whitwell <keithw@vmware.com>
Sun, 10 Jan 2010 17:22:09 +0000 (17:22 +0000)
committerKeith Whitwell <keithw@vmware.com>
Sun, 10 Jan 2010 17:22:09 +0000 (17:22 +0000)
Non-mrt apps work, and the code looks correct, but not many mrt test apps
handy atm...

src/gallium/drivers/llvmpipe/lp_flush.c
src/gallium/drivers/llvmpipe/lp_jit.h
src/gallium/drivers/llvmpipe/lp_rast.c
src/gallium/drivers/llvmpipe/lp_rast_priv.h
src/gallium/drivers/llvmpipe/lp_setup.c
src/gallium/drivers/llvmpipe/lp_state.h
src/gallium/drivers/llvmpipe/lp_state_fs.c

index 9405150c4f7ff0db0d26c9e8564ed04a1d13f854..16fb00092e662e9e30ce7cbea42796d6240287aa 100644 (file)
@@ -77,8 +77,11 @@ llvmpipe_flush( struct pipe_context *pipe,
    if(flags & PIPE_FLUSH_FRAME) {
       static unsigned frame_no = 1;
       static char filename[256];
-      util_snprintf(filename, sizeof(filename), "cbuf_%u.bmp", frame_no);
-      debug_dump_surface_bmp(filename, llvmpipe->framebuffer.cbufs[0]);
+      unsigned i;
+      for (i = 0; i < llvmpipe->framebuffer.nr_cbufs) {
+        util_snprintf(filename, sizeof(filename), "cbuf%u_%u.bmp", i, frame_no);
+        debug_dump_surface_bmp(filename, llvmpipe->framebuffer.cbufs[i]);
+      }
       util_snprintf(filename, sizeof(filename), "zsbuf_%u.bmp", frame_no);
       debug_dump_surface_bmp(filename, llvmpipe->framebuffer.zsbuf);
       ++frame_no;
index 1a6e939aa247752ee340ebe3f7df4fb92d70af23..3b316914b02a6d9d70574d223d45a809bb8d6ee6 100644 (file)
@@ -108,7 +108,7 @@ typedef void
                     const void *a0,
                     const void *dadx,
                     const void *dady,
-                    void *color,
+                    uint8_t **color,
                     void *depth,
                     const int32_t c1,
                     const int32_t c2,
index 6535e69308908f5bc2618345d2e4c9c3fa53e4f6..38c27b90e357cb343c781faaec4b385068195f67 100644 (file)
@@ -53,6 +53,7 @@ lp_rast_begin( struct lp_rasterizer *rast,
 {
    struct pipe_screen *screen = rast->screen;
    struct pipe_surface *cbuf, *zsbuf;
+   int i;
 
    LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
 
@@ -64,24 +65,27 @@ lp_rast_begin( struct lp_rasterizer *rast,
    rast->check_for_clipped_tiles = (fb->width % TILE_SIZE != 0 ||
                                     fb->height % TILE_SIZE != 0);
 
-   /* XXX support multiple color buffers here */
-   cbuf = rast->state.fb.cbufs[0];
-   if (cbuf) {
-      rast->cbuf_transfer = screen->get_tex_transfer(rast->screen,
-                                                     cbuf->texture,
-                                                     cbuf->face,
-                                                     cbuf->level,
-                                                     cbuf->zslice,
-                                                     PIPE_TRANSFER_READ_WRITE,
-                                                     0, 0,
-                                                     fb->width, fb->height);
-      if (!rast->cbuf_transfer)
-         return FALSE;
-
-      rast->cbuf_map = screen->transfer_map(rast->screen, 
-                                            rast->cbuf_transfer);
-      if (!rast->cbuf_map)
-         return FALSE;
+   
+   for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
+      cbuf = rast->state.fb.cbufs[i];
+      if (cbuf) {
+        rast->cbuf_transfer[i] = screen->get_tex_transfer(rast->screen,
+                                                          cbuf->texture,
+                                                          cbuf->face,
+                                                          cbuf->level,
+                                                          cbuf->zslice,
+                                                          PIPE_TRANSFER_READ_WRITE,
+                                                          0, 0,
+                                                          cbuf->width, 
+                                                          cbuf->height);
+        if (!rast->cbuf_transfer[i])
+           goto fail;
+
+        rast->cbuf_map[i] = screen->transfer_map(rast->screen, 
+                                                 rast->cbuf_transfer[i]);
+        if (!rast->cbuf_map[i])
+           goto fail;
+      }
    }
 
    zsbuf = rast->state.fb.zsbuf;
@@ -93,17 +97,23 @@ lp_rast_begin( struct lp_rasterizer *rast,
                                                       zsbuf->zslice,
                                                       PIPE_TRANSFER_READ_WRITE,
                                                       0, 0,
-                                                      fb->width, fb->height);
+                                                      zsbuf->width,
+                                                     zsbuf->height);
       if (!rast->zsbuf_transfer)
-         return FALSE;
+         goto fail;
 
       rast->zsbuf_map = screen->transfer_map(rast->screen, 
                                             rast->zsbuf_transfer);
       if (!rast->zsbuf_map)
-         return FALSE;
+        goto fail;
    }
 
    return TRUE;
+
+fail:
+   /* Unmap and release transfers?
+    */
+   return FALSE;
 }
 
 
@@ -115,22 +125,26 @@ static void
 lp_rast_end( struct lp_rasterizer *rast )
 {
    struct pipe_screen *screen = rast->screen;
+   unsigned i;
 
-   if (rast->cbuf_map) 
-      screen->transfer_unmap(screen, rast->cbuf_transfer);
+   for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
+      if (rast->cbuf_map[i]) 
+        screen->transfer_unmap(screen, rast->cbuf_transfer[i]);
+
+      if (rast->cbuf_transfer[i])
+        screen->tex_transfer_destroy(rast->cbuf_transfer[i]);
+
+      rast->cbuf_transfer[i] = NULL;
+      rast->cbuf_map[i] = NULL;
+   }
 
    if (rast->zsbuf_map) 
       screen->transfer_unmap(screen, rast->zsbuf_transfer);
 
-   if (rast->cbuf_transfer)
-      screen->tex_transfer_destroy(rast->cbuf_transfer);
-
    if (rast->zsbuf_transfer)
       screen->tex_transfer_destroy(rast->zsbuf_transfer);
 
-   rast->cbuf_transfer = NULL;
    rast->zsbuf_transfer = NULL;
-   rast->cbuf_map = NULL;
    rast->zsbuf_map = NULL;
 }
 
@@ -161,8 +175,9 @@ void lp_rast_clear_color( struct lp_rasterizer *rast,
                           const union lp_rast_cmd_arg arg )
 {
    const uint8_t *clear_color = arg.clear_color;
-   uint8_t *color_tile = rast->tasks[thread_index].tile.color;
-   
+   uint8_t **color_tile = rast->tasks[thread_index].tile.color;
+   unsigned i;
+
    LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__, 
               clear_color[0],
               clear_color[1],
@@ -172,14 +187,17 @@ void lp_rast_clear_color( struct lp_rasterizer *rast,
    if (clear_color[0] == clear_color[1] &&
        clear_color[1] == clear_color[2] &&
        clear_color[2] == clear_color[3]) {
-      memset(color_tile, clear_color[0], TILE_SIZE * TILE_SIZE * 4);
+      for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
+        memset(color_tile[i], clear_color[0], TILE_SIZE * TILE_SIZE * 4);
+      }
    }
    else {
       unsigned x, y, chan;
-      for (y = 0; y < TILE_SIZE; y++)
-         for (x = 0; x < TILE_SIZE; x++)
-            for (chan = 0; chan < 4; ++chan)
-               TILE_PIXEL(color_tile, x, y, chan) = clear_color[chan];
+      for (i = 0; i < rast->state.fb.nr_cbufs; i++)
+        for (y = 0; y < TILE_SIZE; y++)
+           for (x = 0; x < TILE_SIZE; x++)
+              for (chan = 0; chan < 4; ++chan)
+                 TILE_PIXEL(color_tile[i], x, y, chan) = clear_color[chan];
    }
 }
 
@@ -214,28 +232,40 @@ void lp_rast_load_color( struct lp_rasterizer *rast,
    struct lp_rasterizer_task *task = &rast->tasks[thread_index];
    const unsigned x = task->x;
    const unsigned y = task->y;
-   int w = TILE_SIZE;
-   int h = TILE_SIZE;
+   unsigned i;
 
    LP_DBG(DEBUG_RAST, "%s at %u, %u\n", __FUNCTION__, x, y);
 
-   if (x + w > rast->state.fb.width)
-      w -= x + w - rast->state.fb.width;
+   for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
+      struct pipe_transfer *transfer = rast->cbuf_transfer[i];
+      int w = TILE_SIZE;
+      int h = TILE_SIZE;
 
-   if (y + h > rast->state.fb.height)
-      h -= y + h - rast->state.fb.height;
+      if (x >= transfer->width)
+        continue;
 
-   assert(w >= 0);
-   assert(h >= 0);
-   assert(w <= TILE_SIZE);
-   assert(h <= TILE_SIZE);
-
-   lp_tile_read_4ub(rast->cbuf_transfer->texture->format,
-                     rast->tasks[thread_index].tile.color,
-                     rast->cbuf_map, 
-                     rast->cbuf_transfer->stride,
-                     x, y,
-                     w, h);
+      if (y >= transfer->height)
+        continue;
+      /* XXX: require tile-size aligned render target dimensions:
+       */
+      if (x + w > transfer->width)
+        w -= x + w - transfer->width;
+
+      if (y + h > transfer->height)
+        h -= y + h - transfer->height;
+
+      assert(w >= 0);
+      assert(h >= 0);
+      assert(w <= TILE_SIZE);
+      assert(h <= TILE_SIZE);
+
+      lp_tile_read_4ub(transfer->texture->format,
+                      rast->tasks[thread_index].tile.color[i],
+                      rast->cbuf_map[i], 
+                      transfer->stride,
+                      x, y,
+                      w, h);
+   }
 }
 
 
@@ -313,8 +343,9 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast,
 {
    const struct lp_rast_state *state = rast->tasks[thread_index].current_state;
    struct lp_rast_tile *tile = &rast->tasks[thread_index].tile;
-   void *color;
+   uint8_t *color[PIPE_MAX_COLOR_BUFS];
    void *depth;
+   unsigned i;
    unsigned ix, iy;
    int block_offset;
 
@@ -336,14 +367,17 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast,
    block_offset = ((iy/4)*(16*16) + (ix/4)*16);
 
    /* color buffer */
-   color = tile->color + 4 * block_offset;
+   for (i = 0; i < rast->state.fb.nr_cbufs; i++)
+      color[i] = tile->color[i] + 4 * block_offset;
 
    /* depth buffer */
    depth = tile->depth + block_offset;
 
+
+
 #ifdef DEBUG
-   assert(lp_check_alignment(depth, 16));
-   assert(lp_check_alignment(color, 16));
+   assert(lp_check_alignment(tile->depth, 16));
+   assert(lp_check_alignment(tile->color[0], 16));
    assert(lp_check_alignment(state->jit_context.blend_color, 16));
 
    assert(lp_check_alignment(inputs->step[0], 16));
@@ -360,8 +394,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast,
                         color,
                         depth,
                         c1, c2, c3,
-                        inputs->step[0], inputs->step[1], inputs->step[2]
-                        );
+                        inputs->step[0], inputs->step[1], inputs->step[2]);
 }
 
 
@@ -377,29 +410,42 @@ static void lp_rast_store_color( struct lp_rasterizer *rast,
 {
    const unsigned x = rast->tasks[thread_index].x;
    const unsigned y = rast->tasks[thread_index].y;
-   int w = TILE_SIZE;
-   int h = TILE_SIZE;
-
-   if (x + w > rast->state.fb.width)
-      w -= x + w - rast->state.fb.width;
+   unsigned i;
 
-   if (y + h > rast->state.fb.height)
-      h -= y + h - rast->state.fb.height;
+   for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
+      struct pipe_transfer *transfer = rast->cbuf_transfer[i];
+      int w = TILE_SIZE;
+      int h = TILE_SIZE;
 
-   assert(w >= 0);
-   assert(h >= 0);
-   assert(w <= TILE_SIZE);
-   assert(h <= TILE_SIZE);
+      if (x >= transfer->width)
+        continue;
 
-   LP_DBG(DEBUG_RAST, "%s [%u] %d,%d %dx%d\n", __FUNCTION__,
-          thread_index, x, y, w, h);
+      if (y >= transfer->height)
+        continue;
 
-   lp_tile_write_4ub(rast->cbuf_transfer->texture->format,
-                     rast->tasks[thread_index].tile.color,
-                     rast->cbuf_map, 
-                     rast->cbuf_transfer->stride,
-                     x, y,
-                     w, h);
+      /* XXX: require tile-size aligned render target dimensions:
+       */
+      if (x + w > transfer->width)
+        w -= x + w - transfer->width;
+
+      if (y + h > transfer->height)
+        h -= y + h - transfer->height;
+
+      assert(w >= 0);
+      assert(h >= 0);
+      assert(w <= TILE_SIZE);
+      assert(h <= TILE_SIZE);
+
+      LP_DBG(DEBUG_RAST, "%s [%u] %d,%d %dx%d\n", __FUNCTION__,
+            thread_index, x, y, w, h);
+
+      lp_tile_write_4ub(transfer->texture->format,
+                       rast->tasks[thread_index].tile.color[i],
+                       rast->cbuf_map[i], 
+                       transfer->stride,
+                       x, y,
+                       w, h);
+   }
 }
 
 
@@ -600,7 +646,7 @@ lp_rasterize_scene( struct lp_rasterizer *rast,
       /* no threading */
 
       lp_rast_begin( rast, fb,
-                     fb->cbufs[0]!= NULL,
+                     fb->nr_cbufs != 0, /* always write color if cbufs present */
                      fb->zsbuf != NULL && write_depth );
 
       lp_scene_bin_iter_begin( scene );
@@ -667,7 +713,7 @@ thread_func( void *init_data )
          write_depth = rast->curr_scene->write_depth;
 
          lp_rast_begin( rast, fb,
-                        fb->cbufs[0] != NULL,
+                        fb->nr_cbufs != 0,
                         fb->zsbuf != NULL && write_depth );
       }
 
@@ -738,7 +784,7 @@ struct lp_rasterizer *
 lp_rast_create( struct pipe_screen *screen, struct lp_scene_queue *empty )
 {
    struct lp_rasterizer *rast;
-   unsigned i;
+   unsigned i, cbuf;
 
    rast = CALLOC_STRUCT(lp_rasterizer);
    if(!rast)
@@ -750,7 +796,9 @@ lp_rast_create( struct pipe_screen *screen, struct lp_scene_queue *empty )
    rast->full_scenes = lp_scene_queue_create();
 
    for (i = 0; i < Elements(rast->tasks); i++) {
-      rast->tasks[i].tile.color = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 );
+      for (cbuf = 0; cbuf < PIPE_MAX_COLOR_BUFS; cbuf++ )
+        rast->tasks[i].tile.color[cbuf] = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 );
+
       rast->tasks[i].tile.depth = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 );
       rast->tasks[i].rast = rast;
       rast->tasks[i].thread_index = i;
@@ -769,13 +817,14 @@ lp_rast_create( struct pipe_screen *screen, struct lp_scene_queue *empty )
  */
 void lp_rast_destroy( struct lp_rasterizer *rast )
 {
-   unsigned i;
+   unsigned i, cbuf;
 
    util_unreference_framebuffer_state(&rast->state.fb);
 
    for (i = 0; i < Elements(rast->tasks); i++) {
       align_free(rast->tasks[i].tile.depth);
-      align_free(rast->tasks[i].tile.color);
+      for (cbuf = 0; cbuf < PIPE_MAX_COLOR_BUFS; cbuf++ )
+        align_free(rast->tasks[i].tile.color[cbuf]);
    }
 
    /* for synchronizing rasterization threads */
index cd72d7e69d8ecbc1e2f826257b0dbf0cc8b7e605..5afdeab049c6e00b9b610bf419f8f13f2b3c58a6 100644 (file)
@@ -46,7 +46,7 @@ struct lp_rasterizer;
  */
 struct lp_rast_tile
 {
-   uint8_t *color;
+   uint8_t *color[PIPE_MAX_COLOR_BUFS];
 
    uint32_t *depth;
 };
@@ -87,9 +87,9 @@ struct lp_rasterizer
    /* Framebuffer stuff
     */
    struct pipe_screen *screen;
-   struct pipe_transfer *cbuf_transfer;
+   struct pipe_transfer *cbuf_transfer[PIPE_MAX_COLOR_BUFS];
    struct pipe_transfer *zsbuf_transfer;
-   void *cbuf_map;
+   void *cbuf_map[PIPE_MAX_COLOR_BUFS];
    void *zsbuf_map;
 
    struct {
index 5cdcf4ecc985f0521ddcc9836aa8bc436ee5b5be..74f3054864c99359e57d3f1fff1b179d840115fe 100644 (file)
@@ -155,26 +155,26 @@ begin_binning( struct setup_context *setup )
 
    LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
 
-   if (setup->fb.cbufs[0]) {
+   if (setup->fb.nr_cbufs) {
       if (setup->clear.flags & PIPE_CLEAR_COLOR)
          lp_scene_bin_everywhere( scene, 
-                            lp_rast_clear_color, 
-                            setup->clear.color );
+                                 lp_rast_clear_color, 
+                                 setup->clear.color );
       else
          lp_scene_bin_everywhere( scene,
-                            lp_rast_load_color,
-                            lp_rast_arg_null() );
+                                 lp_rast_load_color,
+                                 lp_rast_arg_null() );
    }
 
    if (setup->fb.zsbuf) {
       if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL)
          lp_scene_bin_everywhere( scene, 
-                            lp_rast_clear_zstencil, 
-                            setup->clear.zstencil );
+                                 lp_rast_clear_zstencil, 
+                                 setup->clear.zstencil );
       else
          lp_scene_bin_everywhere( scene,
-                            lp_rast_load_zstencil,
-                            lp_rast_arg_null() );
+                                 lp_rast_load_zstencil,
+                                 lp_rast_arg_null() );
    }
 
    LP_DBG(DEBUG_SETUP, "%s done\n", __FUNCTION__);
index 25d135367414b6b4960b5ceb2e67cfa8fbae7c5f..cb240cb6e5585cb86775dd302731f00e47af17ab 100644 (file)
@@ -67,10 +67,16 @@ struct lp_fragment_shader;
 struct lp_fragment_shader_variant_key
 {
    enum pipe_format zsbuf_format;
+   unsigned nr_cbufs;
+
    struct pipe_depth_state depth;
    struct pipe_alpha_state alpha;
    struct pipe_blend_state blend;
 
+   struct {
+      ubyte colormask;
+   } cbuf_blend[PIPE_MAX_COLOR_BUFS];
+   
    struct lp_sampler_static_state sampler[PIPE_MAX_SAMPLERS];
 };
 
index 293535387ab08c444084cda896c95003e4ead3b9..01912d6ea2dc4025c5d39bad9c65553995891d21 100644 (file)
@@ -327,7 +327,7 @@ generate_fs(struct llvmpipe_context *lp,
             const struct lp_build_interp_soa_context *interp,
             struct lp_build_sampler_soa *sampler,
             LLVMValueRef *pmask,
-            LLVMValueRef *color,
+            LLVMValueRef (*color)[4],
             LLVMValueRef depth_ptr,
             LLVMValueRef c0,
             LLVMValueRef c1,
@@ -348,6 +348,7 @@ generate_fs(struct llvmpipe_context *lp,
    boolean early_depth_test;
    unsigned attrib;
    unsigned chan;
+   unsigned cbuf;
 
    assert(i < 4);
 
@@ -364,9 +365,11 @@ generate_fs(struct llvmpipe_context *lp,
    lp_build_flow_scope_begin(flow);
 
    /* Declare the color and z variables */
-   for(chan = 0; chan < NUM_CHANNELS; ++chan) {
-      color[chan] = LLVMGetUndef(vec_type);
-      lp_build_flow_scope_declare(flow, &color[chan]);
+   for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
+      for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+        color[cbuf][chan] = LLVMGetUndef(vec_type);
+        lp_build_flow_scope_declare(flow, &color[cbuf][chan]);
+      }
    }
    lp_build_flow_scope_declare(flow, &z);
 
@@ -407,6 +410,7 @@ generate_fs(struct llvmpipe_context *lp,
 
                   /* Alpha test */
                   /* XXX: should the alpha reference value be passed separately? */
+                 /* XXX: should only test the final assignment to alpha */
                   if(cbuf == 0 && chan == 3) {
                      LLVMValueRef alpha = outputs[attrib][chan];
                      LLVMValueRef alpha_ref_value;
@@ -416,9 +420,7 @@ generate_fs(struct llvmpipe_context *lp,
                                          &mask, alpha, alpha_ref_value);
                   }
 
-                  if(cbuf == 0)
-                     color[chan] = outputs[attrib][chan];
-
+                 color[cbuf][chan] = outputs[attrib][chan];
                   break;
                }
 
@@ -539,7 +541,7 @@ generate_fragment(struct llvmpipe_context *lp,
    LLVMValueRef a0_ptr;
    LLVMValueRef dadx_ptr;
    LLVMValueRef dady_ptr;
-   LLVMValueRef color_ptr;
+   LLVMValueRef color_ptr_ptr;
    LLVMValueRef depth_ptr;
    LLVMValueRef c0, c1, c2, step0_ptr, step1_ptr, step2_ptr;
    LLVMBasicBlockRef block;
@@ -549,12 +551,13 @@ generate_fragment(struct llvmpipe_context *lp,
    struct lp_build_sampler_soa *sampler;
    struct lp_build_interp_soa_context interp;
    LLVMValueRef fs_mask[LP_MAX_VECTOR_LENGTH];
-   LLVMValueRef fs_out_color[NUM_CHANNELS][LP_MAX_VECTOR_LENGTH];
+   LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][LP_MAX_VECTOR_LENGTH];
    LLVMValueRef blend_mask;
    LLVMValueRef blend_in_color[NUM_CHANNELS];
    unsigned num_fs;
    unsigned i;
    unsigned chan;
+   unsigned cbuf;
 
    if (LP_DEBUG & DEBUG_JIT) {
       tgsi_dump(shader->base.tokens, 0);
@@ -651,7 +654,7 @@ generate_fragment(struct llvmpipe_context *lp,
    arg_types[3] = LLVMPointerType(fs_elem_type, 0);    /* a0 */
    arg_types[4] = LLVMPointerType(fs_elem_type, 0);    /* dadx */
    arg_types[5] = LLVMPointerType(fs_elem_type, 0);    /* dady */
-   arg_types[6] = LLVMPointerType(blend_vec_type, 0);  /* color */
+   arg_types[6] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0);  /* color */
    arg_types[7] = LLVMPointerType(fs_int_vec_type, 0); /* depth */
    arg_types[8] = LLVMInt32Type();                     /* c0 */
    arg_types[9] = LLVMInt32Type();                    /* c1 */
@@ -667,6 +670,10 @@ generate_fragment(struct llvmpipe_context *lp,
 
    variant->function = LLVMAddFunction(screen->module, "shader", func_type);
    LLVMSetFunctionCallConv(variant->function, LLVMCCallConv);
+
+   /* XXX: need to propagate noalias down into color param now we are
+    * passing a pointer-to-pointer?
+    */
    for(i = 0; i < Elements(arg_types); ++i)
       if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
          LLVMAddAttribute(LLVMGetParam(variant->function, i), LLVMNoAliasAttribute);
@@ -677,7 +684,7 @@ generate_fragment(struct llvmpipe_context *lp,
    a0_ptr       = LLVMGetParam(variant->function, 3);
    dadx_ptr     = LLVMGetParam(variant->function, 4);
    dady_ptr     = LLVMGetParam(variant->function, 5);
-   color_ptr    = LLVMGetParam(variant->function, 6);
+   color_ptr_ptr = LLVMGetParam(variant->function, 6);
    depth_ptr    = LLVMGetParam(variant->function, 7);
    c0           = LLVMGetParam(variant->function, 8);
    c1           = LLVMGetParam(variant->function, 9);
@@ -692,7 +699,7 @@ generate_fragment(struct llvmpipe_context *lp,
    lp_build_name(a0_ptr, "a0");
    lp_build_name(dadx_ptr, "dadx");
    lp_build_name(dady_ptr, "dady");
-   lp_build_name(color_ptr, "color");
+   lp_build_name(color_ptr_ptr, "color_ptr");
    lp_build_name(depth_ptr, "depth");
    lp_build_name(c0, "c0");
    lp_build_name(c1, "c1");
@@ -721,8 +728,9 @@ generate_fragment(struct llvmpipe_context *lp,
    /* loop over quads in the block */
    for(i = 0; i < num_fs; ++i) {
       LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
-      LLVMValueRef out_color[NUM_CHANNELS];
+      LLVMValueRef out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS];
       LLVMValueRef depth_ptr_i;
+      int cbuf;
 
       if(i != 0)
          lp_build_interp_soa_update(&interp, i);
@@ -742,40 +750,50 @@ generate_fragment(struct llvmpipe_context *lp,
                   c0, c1, c2,
                   step0_ptr, step1_ptr, step2_ptr);
 
-      for(chan = 0; chan < NUM_CHANNELS; ++chan)
-         fs_out_color[chan][i] = out_color[chan];
+      for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++)
+        for(chan = 0; chan < NUM_CHANNELS; ++chan)
+           fs_out_color[cbuf][chan][i] = out_color[cbuf][chan];
    }
 
    sampler->destroy(sampler);
 
-   /* 
-    * Convert the fs's output color and mask to fit to the blending type. 
+   /* Loop over color outputs / color buffers to do blending.
     */
+   for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
+      LLVMValueRef color_ptr;
+      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), cbuf, 0);
 
-   for(chan = 0; chan < NUM_CHANNELS; ++chan) {
-      lp_build_conv(builder, fs_type, blend_type,
-                    fs_out_color[chan], num_fs,
-                    &blend_in_color[chan], 1);
-      lp_build_name(blend_in_color[chan], "color.%c", "rgba"[chan]);
+      /* 
+       * Convert the fs's output color and mask to fit to the blending type. 
+       */
+      for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+        lp_build_conv(builder, fs_type, blend_type,
+                      fs_out_color[cbuf][chan], num_fs,
+                      &blend_in_color[chan], 1);
+        lp_build_name(blend_in_color[chan], "color%d.%c", cbuf, "rgba"[chan]);
+      }
 
+      lp_build_conv_mask(builder, fs_type, blend_type,
+                        fs_mask, num_fs,
+                        &blend_mask, 1);
+
+      color_ptr = LLVMBuildLoad(builder, 
+                               LLVMBuildGEP(builder, color_ptr_ptr, &index, 1, ""),
+                               "");
+      lp_build_name(color_ptr, "color_ptr%d", cbuf);
+
+      /*
+       * Blending.
+       */
+      generate_blend(&key->blend,
+                    builder,
+                    blend_type,
+                    context_ptr,
+                    blend_mask,
+                    blend_in_color,
+                    color_ptr);
    }
 
-   lp_build_conv_mask(builder, fs_type, blend_type,
-                      fs_mask, num_fs,
-                      &blend_mask, 1);
-
-   /*
-    * Blending.
-    */
-
-   generate_blend(&key->blend,
-                  builder,
-                  blend_type,
-                  context_ptr,
-                  blend_mask,
-                  blend_in_color,
-                  color_ptr);
-
    LLVMBuildRetVoid(builder);
 
    LLVMDisposeBuilder(builder);
@@ -940,21 +958,27 @@ make_variant_key(struct llvmpipe_context *lp,
       key->alpha.func = lp->depth_stencil->alpha.func;
    /* alpha.ref_value is passed in jit_context */
 
-   if(lp->framebuffer.cbufs[0]) {
+   if (lp->framebuffer.nr_cbufs) {
+      memcpy(&key->blend, lp->blend, sizeof key->blend);
+   }
+
+   key->nr_cbufs = lp->framebuffer.nr_cbufs;
+   for (i = 0; i < lp->framebuffer.nr_cbufs; i++) {
       const struct util_format_description *format_desc;
       unsigned chan;
 
-      memcpy(&key->blend, lp->blend, sizeof key->blend);
-
-      format_desc = util_format_description(lp->framebuffer.cbufs[0]->format);
+      format_desc = util_format_description(lp->framebuffer.cbufs[i]->format);
       assert(format_desc->layout == UTIL_FORMAT_COLORSPACE_RGB ||
              format_desc->layout == UTIL_FORMAT_COLORSPACE_SRGB);
 
-      /* mask out color channels not present in the color buffer */
+      /* mask out color channels not present in the color buffer.
+       * Should be simple to incorporate per-cbuf writemasks:
+       */
       for(chan = 0; chan < 4; ++chan) {
          enum util_format_swizzle swizzle = format_desc->swizzle[chan];
-         if(swizzle > 4)
-            key->blend.colormask &= ~(1 << chan);
+
+         if(swizzle <= UTIL_FORMAT_SWIZZLE_W)
+            key->cbuf_blend[i].colormask |= (1 << chan);
       }
    }