From 3c71a190160e30ddf59f64fd3abd8a637281f435 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 24 Jul 2009 18:46:17 +0100 Subject: [PATCH] llvmpipe: example fastpaths in blending --- src/gallium/drivers/llvmpipe/lp_quad_blend.c | 132 +++++++++++++++++-- 1 file changed, 121 insertions(+), 11 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_quad_blend.c b/src/gallium/drivers/llvmpipe/lp_quad_blend.c index 38b699c94f2..b48d41b9473 100644 --- a/src/gallium/drivers/llvmpipe/lp_quad_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_quad_blend.c @@ -117,9 +117,9 @@ do { \ static void -logicop_quad(struct quad_stage *qs, - struct quad_header *quads[], - unsigned nr) +logicop_quads(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) { struct llvmpipe_context *llvmpipe = qs->llvmpipe; uint cbuf; @@ -241,13 +241,102 @@ logicop_quad(struct quad_stage *qs, } } } + + /* pass blended quad to next stage */ + qs->next->run(qs->next, quads, nr); +} + +static void +blend_single_add_src_alpha_inv_src_alpha(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) +{ + static const float one[4] = { 1, 1, 1, 1 }; + float one_minus_alpha[QUAD_SIZE]; + float dest[4][QUAD_SIZE]; + float source[4][QUAD_SIZE]; + uint i, j, q; + + struct llvmpipe_cached_tile *tile + = lp_get_cached_tile(qs->llvmpipe->cbuf_cache[0], + quads[0]->input.x0, + quads[0]->input.y0); + + for (q = 0; q < nr; q++) { + struct quad_header *quad = quads[q]; + float (*quadColor)[4] = quad->output.color[0]; + const float *alpha = quadColor[3]; + + /* get/swizzle dest colors */ + for (j = 0; j < QUAD_SIZE; j++) { + int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1); + int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1); + for (i = 0; i < 4; i++) { + dest[i][j] = tile->data.color[y][x][i]; + } + } + + VEC4_MUL(source[0], quadColor[0], alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], alpha); /* B */ + VEC4_MUL(source[3], quadColor[3], alpha); /* A */ + + VEC4_SUB(one_minus_alpha, one, alpha); + VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */ + VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */ + VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */ + VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* B */ + + VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */ + VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */ + VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */ + VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */ + } + + /* pass blended quad to next stage */ + qs->next->run(qs->next, quads, nr); } +static void +blend_single_add_one_one(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) +{ + float dest[4][QUAD_SIZE]; + uint i, j, q; + + struct llvmpipe_cached_tile *tile + = lp_get_cached_tile(qs->llvmpipe->cbuf_cache[0], + quads[0]->input.x0, + quads[0]->input.y0); + + for (q = 0; q < nr; q++) { + struct quad_header *quad = quads[q]; + float (*quadColor)[4] = quad->output.color[0]; + + /* get/swizzle dest colors */ + for (j = 0; j < QUAD_SIZE; j++) { + int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1); + int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1); + for (i = 0; i < 4; i++) { + dest[i][j] = tile->data.color[y][x][i]; + } + } + + VEC4_ADD_SAT(quadColor[0], quadColor[0], dest[0]); /* R */ + VEC4_ADD_SAT(quadColor[1], quadColor[1], dest[1]); /* G */ + VEC4_ADD_SAT(quadColor[2], quadColor[2], dest[2]); /* B */ + VEC4_ADD_SAT(quadColor[3], quadColor[3], dest[3]); /* A */ + } + + /* pass blended quad to next stage */ + qs->next->run(qs->next, quads, nr); +} static void -blend_quads(struct quad_stage *qs, - struct quad_header *quads[], - unsigned nr) +blend_quads_fallback(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) { static const float zero[4] = { 0, 0, 0, 0 }; static const float one[4] = { 1, 1, 1, 1 }; @@ -747,6 +836,9 @@ blend_quads(struct quad_stage *qs, } } } /* cbuf loop */ + + /* pass blended quad to next stage */ + qs->next->run(qs->next, quads, nr); } @@ -756,21 +848,39 @@ blend_quad(struct quad_stage *qs, unsigned nr) { struct llvmpipe_context *llvmpipe = qs->llvmpipe; + const struct pipe_blend_state *blend = llvmpipe->blend; if (llvmpipe->blend->logicop_enable) { - logicop_quad(qs, quads, nr); + qs->run = logicop_quads; } - else if (llvmpipe->blend->blend_enable) { - blend_quads(qs, quads, nr ); + else { + qs->run = blend_quads_fallback; + + if (blend->rgb_src_factor == blend->alpha_src_factor && + blend->rgb_dst_factor == blend->alpha_dst_factor && + blend->rgb_func == blend->alpha_func && + llvmpipe->framebuffer.nr_cbufs == 1) + { + if (blend->alpha_func == PIPE_BLEND_ADD) { + if (blend->rgb_src_factor == PIPE_BLENDFACTOR_ONE && + blend->rgb_dst_factor == PIPE_BLENDFACTOR_ONE) { + qs->run = blend_single_add_one_one; + } + else if (blend->rgb_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA && + blend->rgb_dst_factor == PIPE_BLENDFACTOR_INV_SRC_ALPHA) + qs->run = blend_single_add_src_alpha_inv_src_alpha; + + } + } } - /* pass blended quad to next stage */ - qs->next->run(qs->next, quads, nr); + qs->run(qs, quads, nr); } static void blend_begin(struct quad_stage *qs) { + qs->run = blend_quad; qs->next->begin(qs->next); } -- 2.30.2