From bb867b0ae43deb73eca8cd04eda2a7a12620cdcf Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jos=C3=A9=20Fonseca?= Date: Tue, 18 Aug 2009 14:18:54 +0100 Subject: [PATCH] llvmpipe: Blend in place. --- src/gallium/drivers/llvmpipe/lp_quad_blend.c | 75 ++++++++----------- src/gallium/drivers/llvmpipe/lp_state.h | 8 +- src/gallium/drivers/llvmpipe/lp_state_blend.c | 40 +++++++--- src/gallium/drivers/llvmpipe/lp_tile_cache.h | 2 +- 4 files changed, 66 insertions(+), 59 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_quad_blend.c b/src/gallium/drivers/llvmpipe/lp_quad_blend.c index 966ac628a95..47b3cd9787a 100644 --- a/src/gallium/drivers/llvmpipe/lp_quad_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_quad_blend.c @@ -58,14 +58,15 @@ blend_run(struct quad_stage *qs, struct llvmpipe_context *llvmpipe = qs->llvmpipe; struct lp_blend_state *blend = llvmpipe->blend; unsigned cbuf; - uint q, i, j, k; + uint q, i, j; for (cbuf = 0; cbuf < llvmpipe->framebuffer.nr_cbufs; cbuf++) { unsigned x0 = quads[0]->input.x0; unsigned y0 = quads[0]->input.y0; - uint8_t ALIGN16_ATTRIB src[4][16]; - uint8_t ALIGN16_ATTRIB dst[4][16]; + uint8_t ALIGN16_ATTRIB src[NUM_CHANNELS][TILE_VECTOR_HEIGHT*TILE_VECTOR_WIDTH]; + uint8_t ALIGN16_ATTRIB mask[16]; + uint8_t *dst; struct llvmpipe_cached_tile *tile = lp_get_cached_tile(llvmpipe->cbuf_cache[cbuf], x0, y0); @@ -74,51 +75,41 @@ blend_run(struct quad_stage *qs, assert(x0 % TILE_VECTOR_WIDTH == 0); assert(y0 % TILE_VECTOR_HEIGHT == 0); - for (q = 0; q < nr; q += 4) { - for (k = 0; k < 4 && q + k < nr; ++k) { - struct quad_header *quad = quads[q + k]; - const int itx = (quad->input.x0 & (TILE_SIZE-1)); - const int ity = (quad->input.y0 & (TILE_SIZE-1)); - - /* get/swizzle src/dest colors - */ - for (j = 0; j < QUAD_SIZE; j++) { - int x = itx + (j & 1); - int y = ity + (j >> 1); - for (i = 0; i < 4; i++) { - src[i][4*k + j] = float_to_ubyte(quad->output.color[cbuf][i][j]); - dst[i][4*k + j] = TILE_PIXEL(tile->data.color, x, y, i); - } - } - } + dst = &TILE_PIXEL(tile->data.color, x0 & (TILE_SIZE-1), y0 & (TILE_SIZE-1), 0); + + for (q = 0; q < nr; ++q) { + struct quad_header *quad = quads[q]; + const int itx = (quad->input.x0 & (TILE_SIZE-1)); + const int ity = (quad->input.y0 & (TILE_SIZE-1)); - assert(blend->jit_function); - assert((((uintptr_t)src) & 0xf) == 0); - assert((((uintptr_t)dst) & 0xf) == 0); - assert((((uintptr_t)llvmpipe->blend_color) & 0xf) == 0); - if(blend->jit_function) - blend->jit_function( src, dst, llvmpipe->blend_color, src ); + assert(quad->input.x0 == x0 + q*2); + assert(quad->input.y0 == y0); - /* Output color values + /* get/swizzle src/dest colors */ - for (k = 0; k < 4 && q + k < nr; ++k) { - struct quad_header *quad = quads[q + k]; - const int itx = (quad->input.x0 & (TILE_SIZE-1)); - const int ity = (quad->input.y0 & (TILE_SIZE-1)); - - for (j = 0; j < QUAD_SIZE; j++) { - if (quad->inout.mask & (1 << j)) { - int x = itx + (j & 1); - int y = ity + (j >> 1); - assert(x < TILE_SIZE); - assert(y < TILE_SIZE); - for (i = 0; i < 4; i++) { /* loop over color chans */ - TILE_PIXEL(tile->data.color, x, y, i) = src[i][4*k + j]; - } - } + for (j = 0; j < QUAD_SIZE; j++) { + int x = itx + (j & 1); + int y = ity + (j >> 1); + + assert(x < TILE_SIZE); + assert(y < TILE_SIZE); + + for (i = 0; i < 4; i++) { + src[i][4*q + j] = float_to_ubyte(quad->output.color[cbuf][i][j]); } + mask[4*q + j] = quad->inout.mask & (1 << j) ? ~0 : 0; } } + + assert(blend->jit_function); + assert((((uintptr_t)src) & 0xf) == 0); + assert((((uintptr_t)dst) & 0xf) == 0); + assert((((uintptr_t)llvmpipe->blend_color) & 0xf) == 0); + if(blend->jit_function) + blend->jit_function( mask, + &src[0][0], + &llvmpipe->blend_color[0][0], + dst ); } } diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index b4bc89ea320..f823762c465 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -95,10 +95,10 @@ struct lp_vertex_shader { typedef void -(*lp_blend_func)(uint8_t src[][16], - uint8_t dst[][16], - uint8_t con[][16], - uint8_t res[][16]); +(*lp_blend_func)(const uint8_t *mask, + const uint8_t *src, + const uint8_t *con, + uint8_t *dst); struct lp_blend_state diff --git a/src/gallium/drivers/llvmpipe/lp_state_blend.c b/src/gallium/drivers/llvmpipe/lp_state_blend.c index d31fc6c5fa6..487146bcd4d 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_state_blend.c @@ -40,6 +40,7 @@ #include "lp_bld_type.h" #include "lp_bld_arit.h" +#include "lp_bld_logic.h" #include "lp_bld_blend.h" #include "lp_bld_debug.h" @@ -49,21 +50,25 @@ blend_generate(struct llvmpipe_screen *screen, struct lp_blend_state *blend) { union lp_type type; + struct lp_build_context bld; LLVMTypeRef vec_type; - LLVMTypeRef args[4]; + LLVMTypeRef int_vec_type; + LLVMTypeRef arg_types[4]; + LLVMTypeRef func_type; + LLVMValueRef mask_ptr; LLVMValueRef src_ptr; LLVMValueRef dst_ptr; LLVMValueRef const_ptr; - LLVMValueRef res_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; + LLVMValueRef mask; LLVMValueRef src[4]; - LLVMValueRef dst[4]; LLVMValueRef con[4]; + LLVMValueRef dst[4]; LLVMValueRef res[4]; char src_name[5] = "src?"; - char dst_name[5] = "dst?"; char con_name[5] = "con?"; + char dst_name[5] = "dst?"; char res_name[5] = "res?"; unsigned i; @@ -75,25 +80,35 @@ blend_generate(struct llvmpipe_screen *screen, type.length = 16; vec_type = lp_build_vec_type(type); - - args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0); - blend->function = LLVMAddFunction(screen->module, "blend", LLVMFunctionType(LLVMVoidType(), args, 4, 0)); + int_vec_type = lp_build_int_vec_type(type); + + arg_types[0] = LLVMPointerType(int_vec_type, 0); /* mask */ + arg_types[1] = LLVMPointerType(vec_type, 0); /* src */ + arg_types[2] = LLVMPointerType(vec_type, 0); /* con */ + arg_types[3] = LLVMPointerType(vec_type, 0); /* dst */ + func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); + blend->function = LLVMAddFunction(screen->module, "blend", func_type); LLVMSetFunctionCallConv(blend->function, LLVMCCallConv); - src_ptr = LLVMGetParam(blend->function, 0); - dst_ptr = LLVMGetParam(blend->function, 1); + + mask_ptr = LLVMGetParam(blend->function, 0); + src_ptr = LLVMGetParam(blend->function, 1); const_ptr = LLVMGetParam(blend->function, 2); - res_ptr = LLVMGetParam(blend->function, 3); + dst_ptr = LLVMGetParam(blend->function, 3); block = LLVMAppendBasicBlock(blend->function, "entry"); builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); + lp_build_context_init(&bld, builder, type); + + mask = LLVMBuildLoad(builder, mask_ptr, "mask"); + for(i = 0; i < 4; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); con_name[3] = dst_name[3] = src_name[3] = "rgba"[i]; src[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, src_ptr, &index, 1, ""), src_name); - dst[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), dst_name); con[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), con_name); + dst[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), dst_name); } lp_build_blend_soa(builder, &blend->base, type, src, dst, con, res); @@ -102,7 +117,8 @@ blend_generate(struct llvmpipe_screen *screen, LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); res_name[3] = "rgba"[i]; LLVMSetValueName(res[i], res_name); - LLVMBuildStore(builder, res[i], LLVMBuildGEP(builder, res_ptr, &index, 1, "")); + res[i] = lp_build_select(&bld, mask, res[i], dst[i]); + LLVMBuildStore(builder, res[i], LLVMBuildGEP(builder, dst_ptr, &index, 1, "")); } LLVMBuildRetVoid(builder);; diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.h b/src/gallium/drivers/llvmpipe/lp_tile_cache.h index 20d4137696a..d4774bad3b6 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_cache.h +++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.h @@ -61,7 +61,7 @@ struct llvmpipe_cached_tile union { /** color in SOA format */ - uint8_t color[TILE_SIZE*TILE_SIZE*NUM_CHANNELS]; + uint8_t ALIGN16_ATTRIB color[TILE_SIZE*TILE_SIZE*NUM_CHANNELS]; uint color32[TILE_SIZE][TILE_SIZE]; uint depth32[TILE_SIZE][TILE_SIZE]; -- 2.30.2