From: Brian Paul Date: Thu, 11 Sep 2008 16:08:06 +0000 (-0600) Subject: cell: begin new blending code (both codegen and fallback paths) X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=add86031db757b0e3abe48bd8fdea40d4e380e05;p=mesa.git cell: begin new blending code (both codegen and fallback paths) --- diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index df29476be66..7966c0916c7 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -231,6 +231,370 @@ gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa, +/** + * Generate SPE code to implement the given blend mode for a quad of pixels. + * \param f SPE function to append instruction onto. + * \param fragR_reg register with fragment red values (float) (in/out) + * \param fragG_reg register with fragment green values (float) (in/out) + * \param fragB_reg register with fragment blue values (float) (in/out) + * \param fragA_reg register with fragment alpha values (float) (in/out) + * \param fbRGBA_reg register with packed framebuffer colors (integer) (in) + */ +static void +gen_blend(const struct pipe_blend_state *blend, + struct spe_function *f, + enum pipe_format color_format, + int fragR_reg, int fragG_reg, int fragB_reg, int fragA_reg, + int fbRGBA_reg) +{ + int term1R_reg = spe_allocate_available_register(f); + int term1G_reg = spe_allocate_available_register(f); + int term1B_reg = spe_allocate_available_register(f); + int term1A_reg = spe_allocate_available_register(f); + + int term2R_reg = spe_allocate_available_register(f); + int term2G_reg = spe_allocate_available_register(f); + int term2B_reg = spe_allocate_available_register(f); + int term2A_reg = spe_allocate_available_register(f); + + int fbR_reg = spe_allocate_available_register(f); + int fbG_reg = spe_allocate_available_register(f); + int fbB_reg = spe_allocate_available_register(f); + int fbA_reg = spe_allocate_available_register(f); + + int one_reg = spe_allocate_available_register(f); + int tmp_reg = spe_allocate_available_register(f); + + ASSERT(blend->blend_enable); + + /* Unpack/convert framebuffer colors from four 32-bit packed colors + * (fbRGBA) to four float RGBA vectors (fbR, fbG, fbB, fbA). + * Each 8-bit color component is expanded into a float in [0.0, 1.0]. + */ + { + int mask_reg = spe_allocate_available_register(f); + + /* mask = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff} */ + spe_fsmbi(f, mask_reg, 0x1111); + + /* XXX there may be more clever ways to implement the following code */ + switch (color_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + /* fbB = fbB & mask */ + spe_and(f, fbB_reg, fbRGBA_reg, mask_reg); + /* mask = mask << 8 */ + spe_roti(f, mask_reg, mask_reg, 8); + + /* fbG = fbRGBA & mask */ + spe_and(f, fbB_reg, fbRGBA_reg, mask_reg); + /* fbG = fbG >> 8 */ + spe_roti(f, fbB_reg, fbB_reg, -8); + /* mask = mask << 8 */ + spe_roti(f, mask_reg, mask_reg, 8); + + /* fbR = fbRGBA & mask */ + spe_and(f, fbR_reg, fbRGBA_reg, mask_reg); + /* fbR = fbR >> 16 */ + spe_roti(f, fbB_reg, fbB_reg, -16); + /* mask = mask << 8 */ + spe_roti(f, mask_reg, mask_reg, 8); + + /* fbA = fbRGBA & mask */ + spe_and(f, fbA_reg, fbRGBA_reg, mask_reg); + /* fbA = fbA >> 24 */ + spe_roti(f, fbA_reg, fbA_reg, -24); + break; + + case PIPE_FORMAT_B8G8R8A8_UNORM: + /* fbA = fbA & mask */ + spe_and(f, fbA_reg, fbRGBA_reg, mask_reg); + /* mask = mask << 8 */ + spe_roti(f, mask_reg, mask_reg, 8); + + /* fbR = fbRGBA & mask */ + spe_and(f, fbR_reg, fbRGBA_reg, mask_reg); + /* fbR = fbR >> 8 */ + spe_roti(f, fbR_reg, fbR_reg, -8); + /* mask = mask << 8 */ + spe_roti(f, mask_reg, mask_reg, 8); + + /* fbG = fbRGBA & mask */ + spe_and(f, fbG_reg, fbRGBA_reg, mask_reg); + /* fbG = fbG >> 16 */ + spe_roti(f, fbG_reg, fbG_reg, -16); + /* mask = mask << 8 */ + spe_roti(f, mask_reg, mask_reg, 8); + + /* fbB = fbRGBA & mask */ + spe_and(f, fbB_reg, fbRGBA_reg, mask_reg); + /* fbB = fbB >> 24 */ + spe_roti(f, fbB_reg, fbB_reg, -24); + break; + + default: + ASSERT(0); + } + + /* convert int[4] in [0,255] to float[4] in [0.0, 1.0] */ + spe_cuflt(f, fbR_reg, fbR_reg, 8); + spe_cuflt(f, fbG_reg, fbG_reg, 8); + spe_cuflt(f, fbB_reg, fbB_reg, 8); + spe_cuflt(f, fbA_reg, fbA_reg, 8); + + spe_release_register(f, mask_reg); + } + + + /* + * Compute Src RGB terms + */ + switch (blend->rgb_src_factor) { + case PIPE_BLENDFACTOR_ONE: + spe_move(f, term1R_reg, fragR_reg); + spe_move(f, term1G_reg, fragG_reg); + spe_move(f, term1B_reg, fragB_reg); + break; + case PIPE_BLENDFACTOR_ZERO: + spe_zero(f, term1R_reg); + spe_zero(f, term1G_reg); + spe_zero(f, term1B_reg); + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + spe_fm(f, term1R_reg, fragR_reg, fragR_reg); + spe_fm(f, term1G_reg, fragG_reg, fragG_reg); + spe_fm(f, term1B_reg, fragB_reg, fragB_reg); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + spe_fm(f, term1R_reg, fragR_reg, fragA_reg); + spe_fm(f, term1G_reg, fragG_reg, fragA_reg); + spe_fm(f, term1B_reg, fragB_reg, fragA_reg); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Compute Src Alpha term + */ + switch (blend->alpha_src_factor) { + case PIPE_BLENDFACTOR_ONE: + spe_move(f, term1A_reg, fragA_reg); + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + spe_fm(f, term1A_reg, fragA_reg, fragA_reg); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + spe_fm(f, term1A_reg, fragA_reg, fragA_reg); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Compute Dest RGB terms + */ + switch (blend->rgb_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + spe_move(f, term2R_reg, fbR_reg); + spe_move(f, term2G_reg, fbG_reg); + spe_move(f, term2B_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_ZERO: + spe_zero(f, term2R_reg); + spe_zero(f, term2G_reg); + spe_zero(f, term2B_reg); + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + spe_fm(f, term2R_reg, fbR_reg, fragR_reg); + spe_fm(f, term2G_reg, fbG_reg, fragG_reg); + spe_fm(f, term2B_reg, fbB_reg, fragB_reg); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + spe_fm(f, term2R_reg, fbR_reg, fragA_reg); + spe_fm(f, term2G_reg, fbG_reg, fragA_reg); + spe_fm(f, term2B_reg, fbB_reg, fragA_reg); + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + /* one = {1.0, 1.0, 1.0, 1.0} */ + spe_load_float(f, one_reg, 1.0f); + /* tmp = one - fragA */ + spe_fs(f, tmp_reg, one_reg, fragA_reg); + /* term = fb * tmp */ + spe_fm(f, term2R_reg, fbR_reg, tmp_reg); + spe_fm(f, term2G_reg, fbG_reg, tmp_reg); + spe_fm(f, term2B_reg, fbB_reg, tmp_reg); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Compute Dest Alpha term + */ + switch (blend->alpha_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + spe_move(f, term2A_reg, fbA_reg); + break; + case PIPE_BLENDFACTOR_ZERO: + spe_zero(f, term2A_reg); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + spe_fm(f, term2A_reg, fbA_reg, fragA_reg); + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + /* one = {1.0, 1.0, 1.0, 1.0} */ + spe_load_float(f, one_reg, 1.0f); + /* tmp = one - fragA */ + spe_fs(f, tmp_reg, one_reg, fragA_reg); + /* termA = fbA * tmp */ + spe_fm(f, term2A_reg, fbA_reg, tmp_reg); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Combine Src/Dest RGB terms + */ + switch (blend->rgb_func) { + case PIPE_BLEND_ADD: + spe_fa(f, fragR_reg, term1R_reg, term2R_reg); + spe_fa(f, fragG_reg, term1G_reg, term2G_reg); + spe_fa(f, fragB_reg, term1B_reg, term2B_reg); + break; + case PIPE_BLEND_SUBTRACT: + spe_fs(f, fragR_reg, term1R_reg, term2R_reg); + spe_fs(f, fragG_reg, term1G_reg, term2G_reg); + spe_fs(f, fragB_reg, term1B_reg, term2B_reg); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Combine Src/Dest A term + */ + switch (blend->alpha_func) { + case PIPE_BLEND_ADD: + spe_fa(f, fragA_reg, term1A_reg, term2A_reg); + break; + case PIPE_BLEND_SUBTRACT: + spe_fs(f, fragA_reg, term1A_reg, term2A_reg); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + spe_release_register(f, term1R_reg); + spe_release_register(f, term1G_reg); + spe_release_register(f, term1B_reg); + spe_release_register(f, term1A_reg); + + spe_release_register(f, term2R_reg); + spe_release_register(f, term2G_reg); + spe_release_register(f, term2B_reg); + spe_release_register(f, term2A_reg); + + spe_release_register(f, fbR_reg); + spe_release_register(f, fbG_reg); + spe_release_register(f, fbB_reg); + spe_release_register(f, fbA_reg); + + spe_release_register(f, one_reg); + spe_release_register(f, tmp_reg); +} + + +static void +gen_logicop(const struct pipe_blend_state *blend, + struct spe_function *f, + int fragRGBA_reg, int fbRGBA_reg) +{ + /* XXX to-do */ + /* operate on 32-bit packed pixels, not float colors */ +} + + +static void +gen_colormask(uint colormask, + struct spe_function *f, + int fragRGBA_reg, int fbRGBA_reg) +{ + /* XXX to-do */ + /* operate on 32-bit packed pixels, not float colors */ +} + + + +/** + * Generate code to pack a quad of float colors into a four 32-bit integers. + * + * \param f SPE function to append instruction onto. + * \param color_format the dest color packing format + * \param r_reg register containing four red values (in/clobbered) + * \param g_reg register containing four green values (in/clobbered) + * \param b_reg register containing four blue values (in/clobbered) + * \param a_reg register containing four alpha values (in/clobbered) + * \param rgba_reg register to store the packed RGBA colors (out) + */ +static void +gen_pack_colors(struct spe_function *f, + enum pipe_format color_format, + int r_reg, int g_reg, int b_reg, int a_reg, + int rgba_reg) +{ + /* Convert float[4] in [0.0,1.0] to int[4] in [0,~0], with clamping */ + spe_cfltu(f, r_reg, r_reg, 32); + spe_cfltu(f, g_reg, g_reg, 32); + spe_cfltu(f, b_reg, b_reg, 32); + spe_cfltu(f, a_reg, a_reg, 32); + + /* Shift the most significant bytes to least the significant positions. + * I.e.: reg = reg >> 24 + */ + spe_rotmi(f, r_reg, r_reg, -24); + spe_rotmi(f, g_reg, g_reg, -24); + spe_rotmi(f, b_reg, b_reg, -24); + spe_rotmi(f, a_reg, a_reg, -24); + + /* Shift the color bytes according to the surface format */ + if (color_format == PIPE_FORMAT_A8R8G8B8_UNORM) { + spe_roti(f, g_reg, g_reg, 8); /* green <<= 8 */ + spe_roti(f, r_reg, r_reg, 16); /* red <<= 16 */ + spe_roti(f, a_reg, a_reg, 24); /* alpha <<= 24 */ + } + else if (color_format == PIPE_FORMAT_B8G8R8A8_UNORM) { + spe_roti(f, r_reg, r_reg, 8); /* red <<= 8 */ + spe_roti(f, g_reg, g_reg, 16); /* green <<= 16 */ + spe_roti(f, b_reg, b_reg, 24); /* blue <<= 24 */ + } + else { + ASSERT(0); + } + + /* Merge red, green, blue, alpha registers to make packed RGBA colors. + * Eg: after shifting according to color_format we might have: + * R = {0x00ff0000, 0x00110000, 0x00220000, 0x00330000} + * G = {0x0000ff00, 0x00004400, 0x00005500, 0x00006600} + * B = {0x000000ff, 0x00000077, 0x00000088, 0x00000099} + * A = {0xff000000, 0xaa000000, 0xbb000000, 0xcc000000} + * OR-ing all those together gives us four packed colors: + * RGBA = {0xffffffff, 0xaa114477, 0xbb225588, 0xcc336699} + */ + spe_or(f, rgba_reg, r_reg, g_reg); + spe_or(f, rgba_reg, rgba_reg, b_reg); + spe_or(f, rgba_reg, rgba_reg, a_reg); +} + + + + /** * Generate SPE code to implement the fragment operations (alpha test, * depth test, stencil test, blending, colormask, and final @@ -257,6 +621,7 @@ gen_fragment_function(struct cell_context *cell, struct spe_function *f) const struct pipe_depth_stencil_alpha_state *dsa = &cell->depth_stencil->base; const struct pipe_blend_state *blend = &cell->blend->base; + const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format; /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */ const int x_reg = 3; /* uint */ @@ -443,64 +808,31 @@ gen_fragment_function(struct cell_context *cell, struct spe_function *f) if (blend->blend_enable) { - /* convert packed tile colors in fbRGBA_reg to float[4] vectors */ - - // gen_blend_code(blend, f, mask_reg, ... ); - + gen_blend(blend, f, color_format, + fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg); } - - /* * Write fragment colors to framebuffer/tile. * This involves converting the fragment colors from float[4] to the * tile's specific format and obeying the quad/pixel mask. */ { - const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format; int rgba_reg = spe_allocate_available_register(f); - /* Convert float[4] in [0.0,1.0] to int[4] in [0,~0], with clamping */ - spe_cfltu(f, fragR_reg, fragR_reg, 32); - spe_cfltu(f, fragG_reg, fragG_reg, 32); - spe_cfltu(f, fragB_reg, fragB_reg, 32); - spe_cfltu(f, fragA_reg, fragA_reg, 32); + /* Pack four float colors as four 32-bit int colors */ + gen_pack_colors(f, color_format, + fragR_reg, fragG_reg, fragB_reg, fragA_reg, + rgba_reg); - /* Shift most the significant bytes to least the significant positions. - * I.e.: reg = reg >> 24 - */ - spe_rotmi(f, fragR_reg, fragR_reg, -24); - spe_rotmi(f, fragG_reg, fragG_reg, -24); - spe_rotmi(f, fragB_reg, fragB_reg, -24); - spe_rotmi(f, fragA_reg, fragA_reg, -24); - - /* Shift the color bytes according to the surface format */ - if (color_format == PIPE_FORMAT_A8R8G8B8_UNORM) { - spe_roti(f, fragG_reg, fragG_reg, 8); /* green <<= 8 */ - spe_roti(f, fragR_reg, fragR_reg, 16); /* red <<= 16 */ - spe_roti(f, fragA_reg, fragA_reg, 24); /* alpha <<= 24 */ - } - else if (color_format == PIPE_FORMAT_B8G8R8A8_UNORM) { - spe_roti(f, fragR_reg, fragR_reg, 8); /* red <<= 8 */ - spe_roti(f, fragG_reg, fragG_reg, 16); /* green <<= 16 */ - spe_roti(f, fragB_reg, fragB_reg, 24); /* blue <<= 24 */ + if (blend->logicop_enable) { + gen_logicop(blend, f, rgba_reg, fbRGBA_reg); } - else { - ASSERT(0); + + if (blend->colormask != 0xf) { + gen_colormask(blend->colormask, f, rgba_reg, fbRGBA_reg); } - /* Merge red, green, blue, alpha registers to make packed RGBA colors. - * Eg: after shifting according to color_format we might have: - * R = {0x00ff0000, 0x00110000, 0x00220000, 0x00330000} - * G = {0x0000ff00, 0x00004400, 0x00005500, 0x00006600} - * B = {0x000000ff, 0x00000077, 0x00000088, 0x00000099} - * A = {0xff000000, 0xaa000000, 0xbb000000, 0xcc000000} - * OR-ing all those together gives us four packed colors: - * RGBA = {0xffffffff, 0xaa114477, 0xbb225588, 0xcc336699} - */ - spe_or(f, rgba_reg, fragR_reg, fragG_reg); - spe_or(f, rgba_reg, rgba_reg, fragB_reg); - spe_or(f, rgba_reg, rgba_reg, fragA_reg); /* Mix fragment colors with framebuffer colors using the quad/pixel mask: * if (mask[i]) diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c index 9ed5fc50cd3..3f0eabaa050 100644 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c @@ -39,9 +39,11 @@ /** - * Called by rasterizer for each quad after the shader has run. This - * is a fallback/debug function. In reality we'll use a generated - * function produced by the PPU. But this function is useful for + * Called by rasterizer for each quad after the shader has run. Do + * all the per-fragment operations including alpha test, z test, + * stencil test, blend, colormask and logicops. This is a + * fallback/debug function. In reality we'll use a generated function + * produced by the PPU. But this function is useful for * debug/validation. */ void @@ -49,13 +51,13 @@ spu_fallback_fragment_ops(uint x, uint y, tile_t *colorTile, tile_t *depthStencilTile, vector float fragZ, - vector float fragRed, - vector float fragGreen, - vector float fragBlue, - vector float fragAlpha, + vector float fragR, + vector float fragG, + vector float fragB, + vector float fragA, vector unsigned int mask) { - vector float frag_soa[4], frag_aos[4]; + vector float frag_aos[4]; unsigned int c0, c1, c2, c3; /* do alpha test */ @@ -65,24 +67,24 @@ spu_fallback_fragment_ops(uint x, uint y, switch (spu.depth_stencil_alpha.alpha.func) { case PIPE_FUNC_LESS: - amask = spu_cmpgt(ref, fragAlpha); /* mask = (fragAlpha < ref) */ + amask = spu_cmpgt(ref, fragA); /* mask = (fragA < ref) */ break; case PIPE_FUNC_GREATER: - amask = spu_cmpgt(fragAlpha, ref); /* mask = (fragAlpha > ref) */ + amask = spu_cmpgt(fragA, ref); /* mask = (fragA > ref) */ break; case PIPE_FUNC_GEQUAL: - amask = spu_cmpgt(ref, fragAlpha); + amask = spu_cmpgt(ref, fragA); amask = spu_nor(amask, amask); break; case PIPE_FUNC_LEQUAL: - amask = spu_cmpgt(fragAlpha, ref); + amask = spu_cmpgt(fragA, ref); amask = spu_nor(amask, amask); break; case PIPE_FUNC_EQUAL: - amask = spu_cmpeq(ref, fragAlpha); + amask = spu_cmpeq(ref, fragA); break; case PIPE_FUNC_NOTEQUAL: - amask = spu_cmpeq(ref, fragAlpha); + amask = spu_cmpeq(ref, fragA); amask = spu_nor(amask, amask); break; case PIPE_FUNC_ALWAYS: @@ -174,7 +176,189 @@ spu_fallback_fragment_ops(uint x, uint y, } } - /* XXX do blending here */ + if (spu.blend.blend_enable) { + vector float term1r, term1g, term1b, term1a; + vector float term2r, term2g, term2b, term2a; + + vector float fbRGBA[4]; + + vector float one, tmp; + + /* get colors from framebuffer */ + { + vector float fc[4]; + uint c0, c1, c2, c3; +#if 0 + c0 = colorTile->ui[y+0][x+0]; + c1 = colorTile->ui[y+0][x+1]; + c2 = colorTile->ui[y+1][x+0]; + c3 = colorTile->ui[y+1][x+1]; +#else + c0 = colorTile->ui[y][x*2+0]; + c1 = colorTile->ui[y][x*2+1]; + c2 = colorTile->ui[y][x*2+2]; + c3 = colorTile->ui[y][x*2+3]; +#endif + switch (spu.fb.color_format) { + case PIPE_FORMAT_B8G8R8A8_UNORM: + fc[0] = spu_unpack_B8G8R8A8(c0); + fc[1] = spu_unpack_B8G8R8A8(c1); + fc[2] = spu_unpack_B8G8R8A8(c2); + fc[3] = spu_unpack_B8G8R8A8(c3); + break; + case PIPE_FORMAT_A8R8G8B8_UNORM: + fc[0] = spu_unpack_A8R8G8B8(c0); + fc[1] = spu_unpack_A8R8G8B8(c1); + fc[2] = spu_unpack_A8R8G8B8(c2); + fc[3] = spu_unpack_A8R8G8B8(c3); + break; + default: + ASSERT(0); + } + _transpose_matrix4x4(fbRGBA, fc); + } + + /* + * Compute Src RGB terms + */ + switch (spu.blend.rgb_src_factor) { + case PIPE_BLENDFACTOR_ONE: + term1r = fragR; + term1g = fragG; + term1b = fragB; + break; + case PIPE_BLENDFACTOR_ZERO: + term1r = + term1g = + term1b = spu_splats(0.0f); + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + term1r = spu_mul(fragR, fragR); + term1g = spu_mul(fragG, fragG); + term1b = spu_mul(fragB, fragB); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + term1r = spu_mul(fragR, fragA); + term1g = spu_mul(fragG, fragA); + term1b = spu_mul(fragB, fragA); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Compute Src Alpha term + */ + switch (spu.blend.alpha_src_factor) { + case PIPE_BLENDFACTOR_ONE: + term1a = fragA; + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + term1a = spu_splats(0.0f); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + term1a = spu_mul(fragA, fragA); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Compute Dest RGB terms + */ + switch (spu.blend.rgb_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + term2r = fragR; + term2g = fragG; + term2b = fragB; + break; + case PIPE_BLENDFACTOR_ZERO: + term2r = + term2g = + term2b = spu_splats(0.0f); + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + term2r = spu_mul(fbRGBA[0], fragR); + term2g = spu_mul(fbRGBA[1], fragG); + term2b = spu_mul(fbRGBA[2], fragB); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + term2r = spu_mul(fbRGBA[0], fragA); + term2g = spu_mul(fbRGBA[1], fragA); + term2b = spu_mul(fbRGBA[2], fragA); + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + one = spu_splats(1.0f); + tmp = spu_sub(one, fragA); + term2r = spu_mul(fbRGBA[0], tmp); + term2g = spu_mul(fbRGBA[1], tmp); + term2b = spu_mul(fbRGBA[2], tmp); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Compute Dest Alpha term + */ + switch (spu.blend.alpha_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + term2a = fragA; + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + term2a = spu_splats(0.0f); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + term2a = spu_mul(fbRGBA[3], fragA); + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + one = spu_splats(1.0f); + tmp = spu_sub(one, fragA); + term2a = spu_mul(fbRGBA[3], tmp); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Combine Src/Dest RGB terms + */ + switch (spu.blend.rgb_func) { + case PIPE_BLEND_ADD: + fragR = spu_add(term1r, term2r); + fragG = spu_add(term1g, term2g); + fragB = spu_add(term1b, term2b); + break; + case PIPE_BLEND_SUBTRACT: + fragR = spu_sub(term1r, term2r); + fragG = spu_sub(term1g, term2g); + fragB = spu_sub(term1b, term2b); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Combine Src/Dest A term + */ + switch (spu.blend.alpha_func) { + case PIPE_BLEND_ADD: + fragA = spu_add(term1a, term2a); + break; + case PIPE_BLEND_SUBTRACT: + fragA = spu_sub(term1a, term2a); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + } + /* XXX do colormask test here */ @@ -190,17 +374,17 @@ spu_fallback_fragment_ops(uint x, uint y, #if 0 { vector float frag_soa[4]; - frag_soa[0] = fragRed; - frag_soa[1] = fragGreen; - frag_soa[2] = fragBlue; - frag_soa[3] = fragAlpha; + frag_soa[0] = fragR; + frag_soa[1] = fragG; + frag_soa[2] = fragB; + frag_soa[3] = fragA; _transpose_matrix4x4(frag_aos, frag_soa); } #else /* short-cut relying on function parameter layout: */ - _transpose_matrix4x4(frag_aos, &fragRed); - (void) fragGreen; - (void) fragBlue; + _transpose_matrix4x4(frag_aos, &fragR); + (void) fragG; + (void) fragB; #endif switch (spu.fb.color_format) { @@ -238,7 +422,7 @@ spu_fallback_fragment_ops(uint x, uint y, if (spu_extract(mask, 2)) colorTile->ui[y+1][x+0] = c2; if (spu_extract(mask, 3)) - colorTile->ui[y+1][x+1] = c3; + colorTile->ui[y+1][x+1] = c3; #else /* * Quad layout: @@ -253,6 +437,6 @@ spu_fallback_fragment_ops(uint x, uint y, if (spu_extract(mask, 2)) colorTile->ui[y][x*2+2] = c2; if (spu_extract(mask, 3)) - colorTile->ui[y][x*2+3] = c3; + colorTile->ui[y][x*2+3] = c3; #endif }