From: José Fonseca Date: Wed, 28 Nov 2012 20:14:17 +0000 (+0000) Subject: llvmpipe: Eliminate color buffer swizzling. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=75da95c50;p=mesa.git llvmpipe: Eliminate color buffer swizzling. Now dead code. Also had to remove the show_tiles/show_subtiles because now the color buffers are always stored in their native format, so there is no longer an easy way to paint the tile sizes. Depth-stencil buffers are still swizzled. Reviewed-by: Roland Scheidegger --- diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index cea44a78679..d8952b90229 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -9,25 +9,6 @@ if not env['llvm']: env = env.Clone() -env.Append(CPPPATH = ['.']) - -env.CodeGenerate( - target = 'lp_tile_soa.c', - script = 'lp_tile_soa.py', - source = ['#src/gallium/auxiliary/util/u_format.csv'], - command = python_cmd + ' $SCRIPT $SOURCE > $TARGET' -) - -# XXX: Our dependency scanner only finds depended modules in relative dirs. -env.Depends('lp_tile_soa.c', [ - '#src/gallium/auxiliary/util/u_format_parse.py', - '#src/gallium/auxiliary/util/u_format_pack.py', -]) - - -lp_tile_soa_os = env.SharedObject('lp_tile_soa.c') - - llvmpipe = env.ConvenienceLibrary( target = 'llvmpipe', source = [ @@ -74,7 +55,6 @@ llvmpipe = env.ConvenienceLibrary( 'lp_tex_sample.c', 'lp_texture.c', 'lp_tile_image.c', - lp_tile_soa_os, ]) env.Alias('llvmpipe', llvmpipe) diff --git a/src/gallium/drivers/llvmpipe/lp_debug.h b/src/gallium/drivers/llvmpipe/lp_debug.h index 15dca5218fd..d9633d79deb 100644 --- a/src/gallium/drivers/llvmpipe/lp_debug.h +++ b/src/gallium/drivers/llvmpipe/lp_debug.h @@ -43,8 +43,6 @@ st_print_current(void); #define DEBUG_RAST 0x20 #define DEBUG_QUERY 0x40 #define DEBUG_SCREEN 0x80 -#define DEBUG_SHOW_TILES 0x200 -#define DEBUG_SHOW_SUBTILES 0x400 #define DEBUG_COUNTERS 0x800 #define DEBUG_SCENE 0x1000 #define DEBUG_FENCE 0x2000 diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 3d83077fcbd..5c233c06a15 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -39,7 +39,6 @@ #include "lp_query.h" #include "lp_rast.h" #include "lp_rast_priv.h" -#include "lp_tile_soa.h" #include "gallivm/lp_bld_debug.h" #include "lp_scene.h" #include "lp_tex_sample.h" @@ -76,12 +75,6 @@ lp_rast_end( struct lp_rasterizer *rast ) lp_scene_end_rasterization( rast->curr_scene ); rast->curr_scene = NULL; - -#ifdef DEBUG - if (0) - debug_printf("Post render scene: tile unswizzle: %u tile swizzle: %u\n", - lp_tile_unswizzle_count, lp_tile_swizzle_count); -#endif } @@ -154,7 +147,6 @@ lp_rast_clear_color(struct lp_rasterizer_task *task, uint8_t clear_color[4]; unsigned i; - boolean gray; for (i = 0; i < 4; ++i) { clear_color[i] = float_to_ubyte(arg.clear_color[i]); @@ -166,57 +158,21 @@ lp_rast_clear_color(struct lp_rasterizer_task *task, clear_color[2], clear_color[3]); - gray = - clear_color[0] == clear_color[1] && - clear_color[1] == clear_color[2] && - clear_color[2] == clear_color[3]; - for (i = 0; i < scene->fb.nr_cbufs; i++) { - if (scene->cbufs[i].unswizzled) { - const struct lp_scene *scene = task->scene; - union util_color uc; - - util_pack_color(arg.clear_color, - scene->fb.cbufs[i]->format, &uc); - - util_fill_rect(scene->cbufs[i].map, - scene->fb.cbufs[i]->format, - scene->cbufs[i].stride, - task->x, - task->y, - TILE_SIZE, - TILE_SIZE, - &uc); - } else { - const unsigned chunk = TILE_SIZE / 4; - uint8_t *ptr; - unsigned j; - - ptr = lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL); - - if (gray) { - /* clear to grayscale value {x, x, x, x} */ - - memset(ptr, clear_color[0], TILE_SIZE * TILE_SIZE * 4); - } else { - /* Non-gray color. - * Note: if the swizzled tile layout changes (see TILE_PIXEL) this code - * will need to change. It'll be pretty obvious when clearing no longer - * works. - */ - - for (j = 0; j < 4 * TILE_SIZE; j++) { - memset(ptr, clear_color[0], chunk); - ptr += chunk; - memset(ptr, clear_color[1], chunk); - ptr += chunk; - memset(ptr, clear_color[2], chunk); - ptr += chunk; - memset(ptr, clear_color[3], chunk); - ptr += chunk; - } - } - } + const struct lp_scene *scene = task->scene; + union util_color uc; + + util_pack_color(arg.clear_color, + scene->fb.cbufs[i]->format, &uc); + + util_fill_rect(scene->cbufs[i].map, + scene->fb.cbufs[i]->format, + scene->cbufs[i].stride, + task->x, + task->y, + TILE_SIZE, + TILE_SIZE, + &uc); } LP_COUNT(nr_color_tile_clear); @@ -313,40 +269,6 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, -/** - * Convert the color tile from tiled to linear layout. - * This is generally only done when we're flushing the scene just prior to - * SwapBuffers. If we didn't do this here, we'd have to convert the entire - * tiled color buffer to linear layout in the llvmpipe_texture_unmap() - * function. It's better to do it here to take advantage of - * threading/parallelism. - * This is a bin command which is stored in all bins. - */ -static void -lp_rast_store_linear_color( struct lp_rasterizer_task *task ) -{ - const struct lp_scene *scene = task->scene; - unsigned buf; - - for (buf = 0; buf < scene->fb.nr_cbufs; buf++) { - struct pipe_surface *cbuf = scene->fb.cbufs[buf]; - const unsigned layer = cbuf->u.tex.first_layer; - const unsigned level = cbuf->u.tex.level; - struct llvmpipe_resource *lpt = llvmpipe_resource(cbuf->texture); - - if (scene->cbufs[buf].unswizzled || !task->color_tiles[buf]) - continue; - - llvmpipe_unswizzle_cbuf_tile(lpt, - layer, - level, - task->x, task->y, - task->color_tiles[buf]); - } -} - - - /** * Run the shader on all blocks in a tile. This is used when a tile is * completely contained inside a triangle. @@ -389,11 +311,7 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, for (i = 0; i < scene->fb.nr_cbufs; i++){ stride[i] = scene->cbufs[i].stride; - if (scene->cbufs[i].unswizzled) { - color[i] = lp_rast_get_unswizzled_color_block_pointer(task, i, tile_x + x, tile_y + y); - } else { - color[i] = lp_rast_get_color_block_pointer(task, i, tile_x + x, tile_y + y); - } + color[i] = lp_rast_get_unswizzled_color_block_pointer(task, i, tile_x + x, tile_y + y); } /* depth buffer */ @@ -427,9 +345,6 @@ static void lp_rast_shade_tile_opaque(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { - const struct lp_scene *scene = task->scene; - unsigned i; - LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); assert(task->state); @@ -437,13 +352,6 @@ lp_rast_shade_tile_opaque(struct lp_rasterizer_task *task, return; } - /* this will prevent converting the layout from tiled to linear */ - for (i = 0; i < scene->fb.nr_cbufs; i++) { - if (!scene->cbufs[i].unswizzled) { - (void)lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL); - } - } - lp_rast_shade_tile(task, arg); } @@ -483,11 +391,7 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, for (i = 0; i < scene->fb.nr_cbufs; i++) { stride[i] = scene->cbufs[i].stride; - if (scene->cbufs[i].unswizzled) { - color[i] = lp_rast_get_unswizzled_color_block_pointer(task, i, x, y); - } else { - color[i] = lp_rast_get_color_block_pointer(task, i, x, y); - } + color[i] = lp_rast_get_unswizzled_color_block_pointer(task, i, x, y); } /* depth buffer */ @@ -557,86 +461,12 @@ lp_rast_set_state(struct lp_rasterizer_task *task, -/** - * Set top row and left column of the tile's pixels to white. For debugging. - */ -static void -outline_tile(uint8_t *tile) -{ - const uint8_t val = 0xff; - unsigned i; - - for (i = 0; i < TILE_SIZE; i++) { - TILE_PIXEL(tile, i, 0, 0) = val; - TILE_PIXEL(tile, i, 0, 1) = val; - TILE_PIXEL(tile, i, 0, 2) = val; - TILE_PIXEL(tile, i, 0, 3) = val; - - TILE_PIXEL(tile, 0, i, 0) = val; - TILE_PIXEL(tile, 0, i, 1) = val; - TILE_PIXEL(tile, 0, i, 2) = val; - TILE_PIXEL(tile, 0, i, 3) = val; - } -} - - -/** - * Draw grid of gray lines at 16-pixel intervals across the tile to - * show the sub-tile boundaries. For debugging. - */ -static void -outline_subtiles(uint8_t *tile) -{ - const uint8_t val = 0x80; - const unsigned step = 16; - unsigned i, j; - - for (i = 0; i < TILE_SIZE; i += step) { - for (j = 0; j < TILE_SIZE; j++) { - TILE_PIXEL(tile, i, j, 0) = val; - TILE_PIXEL(tile, i, j, 1) = val; - TILE_PIXEL(tile, i, j, 2) = val; - TILE_PIXEL(tile, i, j, 3) = val; - - TILE_PIXEL(tile, j, i, 0) = val; - TILE_PIXEL(tile, j, i, 1) = val; - TILE_PIXEL(tile, j, i, 2) = val; - TILE_PIXEL(tile, j, i, 3) = val; - } - } - - outline_tile(tile); -} - - - /** * Called when we're done writing to a color tile. */ static void lp_rast_tile_end(struct lp_rasterizer_task *task) { -#ifdef DEBUG - if (LP_DEBUG & (DEBUG_SHOW_SUBTILES | DEBUG_SHOW_TILES)) { - const struct lp_scene *scene = task->scene; - unsigned buf; - - for (buf = 0; buf < scene->fb.nr_cbufs; buf++) { - uint8_t *color = lp_rast_get_color_block_pointer(task, buf, - task->x, task->y); - - if (LP_DEBUG & DEBUG_SHOW_SUBTILES) - outline_subtiles(color); - else if (LP_DEBUG & DEBUG_SHOW_TILES) - outline_tile(color); - } - } -#else - (void) outline_subtiles; -#endif - - lp_rast_store_linear_color(task); - if (task->query) { union lp_rast_cmd_arg dummy = {0}; lp_rast_end_query(task, dummy); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index eeb1a94138e..dc9739e9ad0 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -36,7 +36,7 @@ #include "lp_scene.h" #include "lp_state.h" #include "lp_texture.h" -#include "lp_tile_soa.h" +#include "lp_tile_image.h" #include "lp_limits.h" @@ -175,42 +175,6 @@ lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task, } -/** - * Get pointer to the swizzled color tile - */ -static INLINE uint8_t * -lp_rast_get_color_tile_pointer(struct lp_rasterizer_task *task, - unsigned buf, enum lp_texture_usage usage) -{ - const struct lp_scene *scene = task->scene; - - assert(task->x < scene->tiles_x * TILE_SIZE); - assert(task->y < scene->tiles_y * TILE_SIZE); - assert(task->x % TILE_SIZE == 0); - assert(task->y % TILE_SIZE == 0); - assert(buf < scene->fb.nr_cbufs); - assert(scene->cbufs[buf].unswizzled == 0); - - if (!task->color_tiles[buf]) { - struct pipe_surface *cbuf = scene->fb.cbufs[buf]; - struct llvmpipe_resource *lpt; - assert(cbuf); - lpt = llvmpipe_resource(cbuf->texture); - task->color_tiles[buf] = lp_swizzled_cbuf[task->thread_index][buf]; - - if (usage != LP_TEX_USAGE_WRITE_ALL) { - llvmpipe_swizzle_cbuf_tile(lpt, - cbuf->u.tex.first_layer, - cbuf->u.tex.level, - task->x, task->y, - task->color_tiles[buf]); - } - } - - return task->color_tiles[buf]; -} - - /** * Get pointer to the unswizzled color tile */ @@ -226,7 +190,6 @@ lp_rast_get_unswizzled_color_tile_pointer(struct lp_rasterizer_task *task, assert(task->x % TILE_SIZE == 0); assert(task->y % TILE_SIZE == 0); assert(buf < scene->fb.nr_cbufs); - assert(scene->cbufs[buf].unswizzled); if (!task->color_tiles[buf]) { struct pipe_surface *cbuf = scene->fb.cbufs[buf]; @@ -240,41 +203,6 @@ lp_rast_get_unswizzled_color_tile_pointer(struct lp_rasterizer_task *task, } -/** - * Get the pointer to a 4x4 color block (within a 64x64 tile). - * We'll map the color buffer on demand here. - * Note that this may be called even when there's no color buffers - return - * NULL in that case. - * \param x, y location of 4x4 block in window coords - */ -static INLINE uint8_t * -lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task, - unsigned buf, unsigned x, unsigned y) -{ - unsigned px, py, pixel_offset; - uint8_t *color; - - assert(x < task->scene->tiles_x * TILE_SIZE); - assert(y < task->scene->tiles_y * TILE_SIZE); - assert((x % TILE_VECTOR_WIDTH) == 0); - assert((y % TILE_VECTOR_HEIGHT) == 0); - assert(buf < task->scene->fb.nr_cbufs); - assert(task->scene->cbufs[buf].unswizzled == 0); - - color = lp_rast_get_color_tile_pointer(task, buf, LP_TEX_USAGE_READ_WRITE); - assert(color); - - px = x % TILE_SIZE; - py = y % TILE_SIZE; - pixel_offset = tile_pixel_offset(px, py, 0); - - color = color + pixel_offset; - - assert(lp_check_alignment(color, 16)); - return color; -} - - /** * Get the pointer to an unswizzled 4x4 color block (within an unswizzled 64x64 tile). * \param x, y location of 4x4 block in window coords @@ -291,7 +219,6 @@ lp_rast_get_unswizzled_color_block_pointer(struct lp_rasterizer_task *task, assert((x % TILE_VECTOR_WIDTH) == 0); assert((y % TILE_VECTOR_HEIGHT) == 0); assert(buf < task->scene->fb.nr_cbufs); - assert(task->scene->cbufs[buf].unswizzled); format_bytes = util_format_description(task->scene->fb.cbufs[buf]->format)->block.bits / 8; @@ -332,11 +259,7 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, for (i = 0; i < scene->fb.nr_cbufs; i++) { stride[i] = scene->cbufs[i].stride; - if (scene->cbufs[i].unswizzled) { - color[i] = lp_rast_get_unswizzled_color_block_pointer(task, i, x, y); - } else { - color[i] = lp_rast_get_color_block_pointer(task, i, x, y); - } + color[i] = lp_rast_get_unswizzled_color_block_pointer(task, i, x, y); } depth = lp_rast_get_depth_block_pointer(task, x, y); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 230b80a945f..5ef070af8df 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -34,7 +34,6 @@ #include "lp_debug.h" #include "lp_perf.h" #include "lp_rast_priv.h" -#include "lp_tile_soa.h" diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index 515717bf021..ed998246fb9 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -150,8 +150,6 @@ lp_scene_begin_rasterization(struct lp_scene *scene) cbuf->u.tex.first_layer, LP_TEX_USAGE_READ_WRITE, LP_TEX_LAYOUT_LINEAR); - - scene->cbufs[i].unswizzled = llvmpipe_is_format_unswizzled(cbuf->format); } if (fb->zsbuf) { diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h index 012fa672cf8..31a5ae72b37 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.h +++ b/src/gallium/drivers/llvmpipe/lp_scene.h @@ -36,7 +36,6 @@ #define LP_SCENE_H #include "os/os_thread.h" -#include "lp_tile_soa.h" #include "lp_rast.h" #include "lp_debug.h" @@ -137,7 +136,6 @@ struct lp_scene { uint8_t *map; unsigned stride; unsigned blocksize; - unsigned unswizzled; } zsbuf, cbufs[PIPE_MAX_COLOR_BUFS]; /** the framebuffer to render the scene into */ diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index f1da6f87cbf..375692ec6d9 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -60,8 +60,6 @@ static const struct debug_named_value lp_debug_flags[] = { { "rast", DEBUG_RAST, NULL }, { "query", DEBUG_QUERY, NULL }, { "screen", DEBUG_SCREEN, NULL }, - { "show_tiles", DEBUG_SHOW_TILES, NULL }, - { "show_subtiles", DEBUG_SHOW_SUBTILES, NULL }, { "counters", DEBUG_COUNTERS, NULL }, { "scene", DEBUG_SCENE, NULL }, { "fence", DEBUG_FENCE, NULL }, diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index dc2533bedc5..7fdd1e735c4 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -37,7 +37,6 @@ #include "lp_setup.h" #include "lp_rast.h" -#include "lp_tile_soa.h" /* for TILE_SIZE */ #include "lp_scene.h" #include "lp_bld_interp.h" /* for struct lp_shader_input */ diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index d702fc07280..83933bb426e 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -685,76 +685,6 @@ generate_fs_loop(struct gallivm_state *gallivm, } -/** - * Generate color blending and color output. - * \param rt the render target index (to index blend, colormask state) - * \param type the pixel color type - * \param context_ptr pointer to the runtime JIT context - * \param mask execution mask (active fragment/pixel mask) - * \param src colors from the fragment shader - * \param dst_ptr the destination color buffer pointer - */ -static void -generate_blend(struct gallivm_state *gallivm, - const struct pipe_blend_state *blend, - unsigned rt, - LLVMBuilderRef builder, - struct lp_type type, - LLVMValueRef context_ptr, - LLVMValueRef mask, - LLVMValueRef *src, - LLVMValueRef dst_ptr, - boolean do_branch) -{ - struct lp_build_context bld; - struct lp_build_mask_context mask_ctx; - LLVMTypeRef vec_type; - LLVMValueRef const_ptr; - LLVMValueRef con[4]; - LLVMValueRef dst[4]; - LLVMValueRef res[4]; - unsigned chan; - - lp_build_context_init(&bld, gallivm, type); - - lp_build_mask_begin(&mask_ctx, gallivm, type, mask); - if (do_branch) - lp_build_mask_check(&mask_ctx); - - vec_type = lp_build_vec_type(gallivm, type); - - const_ptr = lp_jit_context_u8_blend_color(gallivm, context_ptr); - const_ptr = LLVMBuildBitCast(builder, const_ptr, - LLVMPointerType(vec_type, 0), ""); - - /* load constant blend color and colors from the dest color buffer */ - for(chan = 0; chan < 4; ++chan) { - LLVMValueRef index = lp_build_const_int32(gallivm, chan); - con[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), ""); - - dst[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), ""); - - lp_build_name(con[chan], "con.%c", "rgba"[chan]); - lp_build_name(dst[chan], "dst.%c", "rgba"[chan]); - } - - /* do blend */ - lp_build_blend_soa(gallivm, blend, type, rt, src, dst, con, res); - - /* store results to color buffer */ - for(chan = 0; chan < 4; ++chan) { - if(blend->rt[rt].colormask & (1 << chan)) { - LLVMValueRef index = lp_build_const_int32(gallivm, chan); - lp_build_name(res[chan], "res.%c", "rgba"[chan]); - res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]); - LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, "")); - } - } - - lp_build_mask_end(&mask_ctx); -} - - /** * This function will reorder pixels from the fragment shader SoA to memory layout AoS * @@ -1800,7 +1730,6 @@ generate_fragment(struct llvmpipe_context *lp, struct lp_build_interp_soa_context interp; LLVMValueRef fs_mask[16 / 4]; LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS][16 / 4]; - LLVMValueRef blend_mask; LLVMValueRef function; LLVMValueRef facing; const struct util_format_description *zs_format_desc; @@ -2058,8 +1987,8 @@ generate_fragment(struct llvmpipe_context *lp, */ for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) { LLVMValueRef color_ptr; + LLVMValueRef stride; LLVMValueRef index = lp_build_const_int32(gallivm, cbuf); - LLVMValueRef blend_in_color[TGSI_NUM_CHANNELS]; unsigned rt = key->blend.independent_blend_enable ? cbuf : 0; boolean do_branch = ((key->depth.enabled @@ -2073,53 +2002,13 @@ generate_fragment(struct llvmpipe_context *lp, lp_build_name(color_ptr, "color_ptr%d", cbuf); - if (variant->unswizzled_cbufs & (1 << cbuf)) { - LLVMValueRef stride = LLVMBuildLoad(builder, - LLVMBuildGEP(builder, stride_ptr, &index, 1, ""), - ""); - - generate_unswizzled_blend(gallivm, rt, variant, key->cbuf_format[cbuf], - num_fs, fs_type, fs_mask, fs_out_color[cbuf], - context_ptr, color_ptr, stride, partial_mask, do_branch); - } else { - /* - * Convert the fs's output color and mask to fit to the blending type. - */ - for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { - LLVMValueRef fs_color_vals[LP_MAX_VECTOR_LENGTH]; - - for (i = 0; i < num_fs; i++) { - fs_color_vals[i] = - LLVMBuildLoad(builder, fs_out_color[cbuf][chan][i], "fs_color_vals"); - } - - lp_build_conv(gallivm, fs_type, blend_type, - fs_color_vals, - num_fs, - &blend_in_color[chan], 1); - - lp_build_name(blend_in_color[chan], "color%d.%c", cbuf, "rgba"[chan]); - } - - if (partial_mask || !variant->opaque) { - lp_build_conv_mask(gallivm, fs_type, blend_type, - fs_mask, num_fs, - &blend_mask, 1); - } else { - blend_mask = lp_build_const_int_vec(gallivm, blend_type, ~0); - } + stride = LLVMBuildLoad(builder, + LLVMBuildGEP(builder, stride_ptr, &index, 1, ""), + ""); - generate_blend(gallivm, - &key->blend, - rt, - builder, - blend_type, - context_ptr, - blend_mask, - blend_in_color, - color_ptr, - do_branch); - } + generate_unswizzled_blend(gallivm, rt, variant, key->cbuf_format[cbuf], + num_fs, fs_type, fs_mask, fs_out_color[cbuf], + context_ptr, color_ptr, stride, partial_mask, do_branch); } LLVMBuildRetVoid(builder); @@ -2235,7 +2124,6 @@ generate_variant(struct llvmpipe_context *lp, struct lp_fragment_shader_variant *variant; const struct util_format_description *cbuf0_format_desc; boolean fullcolormask; - unsigned i; variant = CALLOC_STRUCT(lp_fragment_shader_variant); if(!variant) @@ -2273,10 +2161,6 @@ generate_variant(struct llvmpipe_context *lp, !shader->info.base.uses_kill ? TRUE : FALSE; - for (i = 0; i < key->nr_cbufs; ++i) { - variant->unswizzled_cbufs |= llvmpipe_is_format_unswizzled(key->cbuf_format[i]) << i; - } - if ((LP_DEBUG & DEBUG_FS) || (gallivm_debug & GALLIVM_DEBUG_IR)) { lp_debug_fs_variant(variant); } diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.h b/src/gallium/drivers/llvmpipe/lp_state_fs.h index 173d2f452ca..306f5f9669a 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.h +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h @@ -94,9 +94,6 @@ struct lp_fragment_shader_variant lp_jit_frag_func jit_function[2]; - /* Bitmask to say what cbufs are unswizzled */ - unsigned unswizzled_cbufs; - /* Total number of LLVM instructions generated */ unsigned nr_instrs; diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index dcf2665dbc2..39d646964e4 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -756,25 +756,6 @@ llvmpipe_is_resource_referenced( struct pipe_context *pipe, return lp_setup_is_resource_referenced(llvmpipe->setup, presource); } -boolean -llvmpipe_is_format_unswizzled( enum pipe_format format ) -{ - const struct util_format_description *desc = util_format_description(format); - - if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN || - (desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB && - desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB) || - desc->block.width != 1 || - desc->block.height != 1 || - desc->is_mixed || - (!desc->is_array && !desc->is_bitmask)) { - assert(0); - return FALSE; - } - - return TRUE; -} - /** * Returns the largest possible alignment for a format in llvmpipe @@ -1364,94 +1345,6 @@ llvmpipe_get_texture_tile(struct llvmpipe_resource *lpr, } -/** - * Get pointer to tiled data for rendering. - * \return pointer to the tiled data at the given tile position - */ -void -llvmpipe_unswizzle_cbuf_tile(struct llvmpipe_resource *lpr, - unsigned face_slice, unsigned level, - unsigned x, unsigned y, - uint8_t *tile) -{ - struct llvmpipe_texture_image *linear_img = &lpr->linear_img; - const unsigned tx = x / TILE_SIZE, ty = y / TILE_SIZE; - uint8_t *linear_image; - - assert(x % TILE_SIZE == 0); - assert(y % TILE_SIZE == 0); - - if (!linear_img->data) { - /* allocate memory for the linear image now */ - alloc_image_data(lpr, LP_TEX_LAYOUT_LINEAR); - } - - /* compute address of the slice/face of the image that contains the tile */ - linear_image = llvmpipe_get_texture_image_address(lpr, face_slice, level, - LP_TEX_LAYOUT_LINEAR); - - { - uint ii = x, jj = y; - uint tile_offset = jj / TILE_SIZE + ii / TILE_SIZE; - uint byte_offset = tile_offset * TILE_SIZE * TILE_SIZE * 4; - - /* Note that lp_tiled_to_linear expects the tile parameter to - * point at the first tile in a whole-image sized array. In - * this code, we have only a single tile and have to do some - * pointer arithmetic to figure out where the "image" would have - * started. - */ - lp_tiled_to_linear(tile - byte_offset, linear_image, - x, y, TILE_SIZE, TILE_SIZE, - lpr->base.format, - lpr->row_stride[level], - 1); /* tiles per row */ - } - - llvmpipe_set_texture_tile_layout(lpr, face_slice, level, tx, ty, - LP_TEX_LAYOUT_LINEAR); -} - - -/** - * Get pointer to tiled data for rendering. - * \return pointer to the tiled data at the given tile position - */ -void -llvmpipe_swizzle_cbuf_tile(struct llvmpipe_resource *lpr, - unsigned face_slice, unsigned level, - unsigned x, unsigned y, - uint8_t *tile) -{ - uint8_t *linear_image; - - assert(x % TILE_SIZE == 0); - assert(y % TILE_SIZE == 0); - - /* compute address of the slice/face of the image that contains the tile */ - linear_image = llvmpipe_get_texture_image_address(lpr, face_slice, level, - LP_TEX_LAYOUT_LINEAR); - - if (linear_image) { - uint ii = x, jj = y; - uint tile_offset = jj / TILE_SIZE + ii / TILE_SIZE; - uint byte_offset = tile_offset * TILE_SIZE * TILE_SIZE * 4; - - /* Note that lp_linear_to_tiled expects the tile parameter to - * point at the first tile in a whole-image sized array. In - * this code, we have only a single tile and have to do some - * pointer arithmetic to figure out where the "image" would have - * started. - */ - lp_linear_to_tiled(linear_image, tile - byte_offset, - x, y, TILE_SIZE, TILE_SIZE, - lpr->base.format, - lpr->row_stride[level], - 1); /* tiles per row */ - } -} - - /** * Return size of resource in bytes */ diff --git a/src/gallium/drivers/llvmpipe/lp_texture.h b/src/gallium/drivers/llvmpipe/lp_texture.h index 67b254021d6..c2f2ee8d22b 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.h +++ b/src/gallium/drivers/llvmpipe/lp_texture.h @@ -224,18 +224,6 @@ llvmpipe_get_texture_tile(struct llvmpipe_resource *lpr, unsigned x, unsigned y); -void -llvmpipe_unswizzle_cbuf_tile(struct llvmpipe_resource *lpr, - unsigned face_slice, unsigned level, - unsigned x, unsigned y, - uint8_t *tile); - -void -llvmpipe_swizzle_cbuf_tile(struct llvmpipe_resource *lpr, - unsigned face_slice, unsigned level, - unsigned x, unsigned y, - uint8_t *tile); - extern void llvmpipe_print_resources(void); @@ -256,9 +244,6 @@ llvmpipe_is_resource_referenced( struct pipe_context *pipe, struct pipe_resource *presource, unsigned level, int layer); -boolean -llvmpipe_is_format_unswizzled(enum pipe_format format); - unsigned llvmpipe_get_format_alignment(enum pipe_format format); diff --git a/src/gallium/drivers/llvmpipe/lp_tile_image.c b/src/gallium/drivers/llvmpipe/lp_tile_image.c index d4ce78597e6..3faf0181183 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_image.c +++ b/src/gallium/drivers/llvmpipe/lp_tile_image.c @@ -34,7 +34,7 @@ #include "util/u_format.h" #include "util/u_memory.h" -#include "lp_tile_soa.h" +#include "lp_limits.h" #include "lp_tile_image.h" @@ -189,27 +189,7 @@ lp_tiled_to_linear(const void *src, void *dst, } } else { - /* color image */ - const uint bpp = 4; - const uint tile_w = TILE_SIZE, tile_h = TILE_SIZE; - const uint bytes_per_tile = tile_w * tile_h * bpp; - uint i, j; - assert(0); - - for (j = 0; j < height; j += tile_h) { - for (i = 0; i < width; i += tile_w) { - uint ii = i + x, jj = j + y; - uint tile_offset = ((jj / tile_h) * tiles_per_row + ii / tile_w); - uint byte_offset = tile_offset * bytes_per_tile; - const uint8_t *src_tile = (uint8_t *) src + byte_offset; - - lp_tile_unswizzle_4ub(format, - src_tile, - dst, dst_stride, - ii, jj); - } - } } } @@ -281,26 +261,7 @@ lp_linear_to_tiled(const void *src, void *dst, } } else { - const uint bpp = 4; - const uint tile_w = TILE_SIZE, tile_h = TILE_SIZE; - const uint bytes_per_tile = tile_w * tile_h * bpp; - uint i, j; - assert(0); - - for (j = 0; j < height; j += TILE_SIZE) { - for (i = 0; i < width; i += TILE_SIZE) { - uint ii = i + x, jj = j + y; - uint tile_offset = ((jj / tile_h) * tiles_per_row + ii / tile_w); - uint byte_offset = tile_offset * bytes_per_tile; - uint8_t *dst_tile = (uint8_t *) dst + byte_offset; - - lp_tile_swizzle_4ub(format, - dst_tile, - src, src_stride, - ii, jj); - } - } } } diff --git a/src/gallium/drivers/llvmpipe/lp_tile_image.h b/src/gallium/drivers/llvmpipe/lp_tile_image.h index 8de8efc6c16..07d367c2542 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_image.h +++ b/src/gallium/drivers/llvmpipe/lp_tile_image.h @@ -29,6 +29,10 @@ #define LP_TILE_IMAGE_H +#define TILE_VECTOR_HEIGHT 4 +#define TILE_VECTOR_WIDTH 4 + + void lp_tiled_to_linear(const void *src, void *dst, unsigned x, unsigned y, diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.h b/src/gallium/drivers/llvmpipe/lp_tile_soa.h deleted file mode 100644 index 6a5fc8dbfc9..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.h +++ /dev/null @@ -1,97 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef LP_TILE_SOA_H -#define LP_TILE_SOA_H - -#include "pipe/p_compiler.h" -#include "tgsi/tgsi_exec.h" /* for TGSI_NUM_CHANNELS */ -#include "lp_limits.h" - -#ifdef __cplusplus -extern "C" { -#endif - - -struct pipe_transfer; - - -#define TILE_VECTOR_HEIGHT 4 -#define TILE_VECTOR_WIDTH 4 - -extern const unsigned char -tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH]; - -#define TILE_C_STRIDE (TILE_VECTOR_HEIGHT * TILE_VECTOR_WIDTH) //16 -#define TILE_X_STRIDE (TGSI_NUM_CHANNELS * TILE_C_STRIDE) //64 -#define TILE_Y_STRIDE (TILE_VECTOR_HEIGHT * TILE_SIZE * TGSI_NUM_CHANNELS) //1024 - - -#ifdef DEBUG -extern unsigned lp_tile_unswizzle_count; -extern unsigned lp_tile_swizzle_count; -#endif - - -/** - * Return offset of the given pixel (and color channel) from the start - * of a tile, in bytes. - */ -static INLINE unsigned -tile_pixel_offset(unsigned x, unsigned y, unsigned c) -{ - unsigned ix = (x / TILE_VECTOR_WIDTH) * TILE_X_STRIDE; - unsigned iy = (y / TILE_VECTOR_HEIGHT) * TILE_Y_STRIDE; - unsigned offset = iy + ix + c * TILE_C_STRIDE + - tile_offset[y % TILE_VECTOR_HEIGHT][x % TILE_VECTOR_WIDTH]; - return offset; -} - - -#define TILE_PIXEL(_p, _x, _y, _c) ((_p)[tile_pixel_offset(_x, _y, _c)]) - - -void -lp_tile_swizzle_4ub(enum pipe_format format, - uint8_t *dst, - const void *src, unsigned src_stride, - unsigned x, unsigned y); - - -void -lp_tile_unswizzle_4ub(enum pipe_format format, - const uint8_t *src, - void *dst, unsigned dst_stride, - unsigned x, unsigned y); - - - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py deleted file mode 100644 index d548ad845c7..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py +++ /dev/null @@ -1,654 +0,0 @@ -#!/usr/bin/env python - -CopyRight = ''' -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * Pixel format accessor functions. - * - * @author Jose Fonseca - */ -''' - - -import sys -import os.path - -sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), '../../auxiliary/util')) - -from u_format_pack import * - - -def is_format_supported(format): - '''Determines whether we actually have the plumbing necessary to generate the - to read/write to/from this format.''' - - # FIXME: Ideally we would support any format combination here. - - if format.name == 'PIPE_FORMAT_R11G11B10_FLOAT': - return True; - - if format.name == 'PIPE_FORMAT_R9G9B9E5_FLOAT': - return True; - - if format.layout != PLAIN: - return False - - for i in range(4): - channel = format.channels[i] - if channel.type not in (VOID, UNSIGNED, SIGNED, FLOAT): - return False - if channel.type == FLOAT and channel.size not in (16, 32 ,64): - return False - - if format.colorspace not in ('rgb', 'srgb'): - return False - - return True - - -def generate_format_read(format, dst_channel, dst_native_type, dst_suffix): - '''Generate the function to read pixels from a particular format''' - - name = format.short_name() - - src_native_type = native_type(format) - - print 'static void' - print 'lp_tile_%s_swizzle_%s(%s * restrict dst, const uint8_t * restrict src, unsigned src_stride, unsigned x0, unsigned y0)' % (name, dst_suffix, dst_native_type) - print '{' - print ' unsigned x, y;' - print ' const uint8_t *src_row = src + y0*src_stride;' - print ' for (y = 0; y < TILE_SIZE; ++y) {' - print ' const %s *src_pixel = (const %s *)(src_row + x0*%u);' % (src_native_type, src_native_type, format.stride()) - print ' for (x = 0; x < TILE_SIZE; ++x) {' - - names = ['']*4 - if format.colorspace in ('rgb', 'srgb'): - for i in range(4): - swizzle = format.swizzles[i] - if swizzle < 4: - names[swizzle] += 'rgba'[i] - elif format.colorspace == 'zs': - swizzle = format.swizzles[0] - if swizzle < 4: - names[swizzle] = 'z' - else: - assert False - else: - assert False - - if format.name == 'PIPE_FORMAT_R11G11B10_FLOAT': - print ' float tmp[3];' - print ' uint8_t r, g, b;' - print ' r11g11b10f_to_float3(*src_pixel++, tmp);' - for i in range(3): - print ' %s = tmp[%d] * 0xff;' % (names[i], i) - elif format.name == 'PIPE_FORMAT_R9G9B9E5_FLOAT': - print ' float tmp[3];' - print ' uint8_t r, g, b;' - print ' rgb9e5_to_float3(*src_pixel++, tmp);' - for i in range(3): - print ' %s = tmp[%d] * 0xff;' % (names[i], i) - elif format.layout == PLAIN: - if not format.is_array(): - print ' %s pixel = *src_pixel++;' % src_native_type - shift = 0; - for i in range(4): - src_channel = format.channels[i] - width = src_channel.size - if names[i]: - value = 'pixel' - mask = (1 << width) - 1 - if shift: - value = '(%s >> %u)' % (value, shift) - if shift + width < format.block_size(): - value = '(%s & 0x%x)' % (value, mask) - value = conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=False) - print ' %s %s = %s;' % (dst_native_type, names[i], value) - shift += width - else: - for i in range(4): - if names[i]: - print ' %s %s;' % (dst_native_type, names[i]) - for i in range(4): - src_channel = format.channels[i] - if names[i]: - value = '(*src_pixel++)' - value = conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=False) - print ' %s = %s;' % (names[i], value) - elif src_channel.size: - print ' ++src_pixel;' - else: - assert False - - for i in range(4): - if format.colorspace in ('rgb', 'srgb'): - swizzle = format.swizzles[i] - if swizzle < 4: - value = names[swizzle] - elif swizzle == SWIZZLE_0: - value = '0' - elif swizzle == SWIZZLE_1: - value = get_one(dst_channel) - else: - assert False - elif format.colorspace == 'zs': - if i < 3: - value = 'z' - else: - value = get_one(dst_channel) - else: - assert False - print ' TILE_PIXEL(dst, x, y, %u) = %s; /* %s */' % (i, value, 'rgba'[i]) - - print ' }' - print ' src_row += src_stride;' - print ' }' - print '}' - print - - -def pack_rgba(format, src_channel, r, g, b, a): - """Return an expression for packing r, g, b, a into a pixel of the - given format. Ex: '(b << 24) | (g << 16) | (r << 8) | (a << 0)' - """ - assert format.colorspace in ('rgb', 'srgb') - inv_swizzle = format.inv_swizzles() - shift = 0 - expr = None - for i in range(4): - # choose r, g, b, or a depending on the inverse swizzle term - if inv_swizzle[i] == 0: - value = r - elif inv_swizzle[i] == 1: - value = g - elif inv_swizzle[i] == 2: - value = b - elif inv_swizzle[i] == 3: - value = a - else: - value = None - - if value: - dst_channel = format.channels[i] - dst_native_type = native_type(format) - value = conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=False) - term = "((%s) << %d)" % (value, shift) - if expr: - expr = expr + " | " + term - else: - expr = term - - width = format.channels[i].size - shift = shift + width - return expr - - -def emit_unrolled_unswizzle_code(format, src_channel): - '''Emit code for writing a block based on unrolled loops. - This is considerably faster than the TILE_PIXEL-based code below. - ''' - dst_native_type = 'uint%u_t' % format.block_size() - print ' const unsigned dstpix_stride = dst_stride / %d;' % format.stride() - print ' %s *dstpix = (%s *) dst;' % (dst_native_type, dst_native_type) - print ' unsigned int qx, qy, i;' - print - print ' for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {' - print ' const unsigned py = y0 + qy;' - print ' for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {' - print ' const unsigned px = x0 + qx;' - print ' const uint8_t *r = src + 0 * TILE_C_STRIDE;' - print ' const uint8_t *g = src + 1 * TILE_C_STRIDE;' - print ' const uint8_t *b = src + 2 * TILE_C_STRIDE;' - print ' const uint8_t *a = src + 3 * TILE_C_STRIDE;' - print ' (void) r; (void) g; (void) b; (void) a; /* silence warnings */' - print ' for (i = 0; i < TILE_C_STRIDE; i += 2) {' - print ' const uint32_t pixel0 = %s;' % pack_rgba(format, src_channel, "r[i+0]", "g[i+0]", "b[i+0]", "a[i+0]") - print ' const uint32_t pixel1 = %s;' % pack_rgba(format, src_channel, "r[i+1]", "g[i+1]", "b[i+1]", "a[i+1]") - print ' const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);' - print ' dstpix[offset + 0] = pixel0;' - print ' dstpix[offset + 1] = pixel1;' - print ' }' - print ' src += TILE_X_STRIDE;' - print ' }' - print ' }' - - -def emit_tile_pixel_unswizzle_code(format, src_channel): - '''Emit code for writing a block based on the TILE_PIXEL macro.''' - dst_native_type = native_type(format) - - inv_swizzle = format.inv_swizzles() - - print ' unsigned x, y;' - print ' uint8_t *dst_row = dst + y0*dst_stride;' - print ' for (y = 0; y < TILE_SIZE; ++y) {' - print ' %s *dst_pixel = (%s *)(dst_row + x0*%u);' % (dst_native_type, dst_native_type, format.stride()) - print ' for (x = 0; x < TILE_SIZE; ++x) {' - - if format.name == 'PIPE_FORMAT_R11G11B10_FLOAT': - print ' float tmp[3];' - for i in range(3): - print ' tmp[%d] = ubyte_to_float(TILE_PIXEL(src, x, y, %u));' % (i, inv_swizzle[i]) - print ' *dst_pixel++ = float3_to_r11g11b10f(tmp);' - elif format.name == 'PIPE_FORMAT_R9G9B9E5_FLOAT': - print ' float tmp[3];' - for i in range(3): - print ' tmp[%d] = ubyte_to_float(TILE_PIXEL(src, x, y, %u));' % (i, inv_swizzle[i]) - print ' *dst_pixel++ = float3_to_rgb9e5(tmp);' - elif format.layout == PLAIN: - if not format.is_array(): - print ' %s pixel = 0;' % dst_native_type - shift = 0; - for i in range(4): - dst_channel = format.channels[i] - width = dst_channel.size - if inv_swizzle[i] is not None: - value = 'TILE_PIXEL(src, x, y, %u)' % inv_swizzle[i] - value = conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=False) - if shift: - value = '(%s << %u)' % (value, shift) - print ' pixel |= %s;' % value - shift += width - print ' *dst_pixel++ = pixel;' - else: - for i in range(4): - dst_channel = format.channels[i] - if inv_swizzle[i] is not None: - value = 'TILE_PIXEL(src, x, y, %u)' % inv_swizzle[i] - value = conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=False) - print ' *dst_pixel++ = %s;' % value - elif dst_channel.size: - print ' ++dst_pixel;' - else: - assert False - - print ' }' - print ' dst_row += dst_stride;' - print ' }' - - -def generate_format_write(format, src_channel, src_native_type, src_suffix): - '''Generate the function to write pixels to a particular format''' - - name = format.short_name() - - print 'static void' - print 'lp_tile_%s_unswizzle_%s(const %s * restrict src, uint8_t * restrict dst, unsigned dst_stride, unsigned x0, unsigned y0)' % (name, src_suffix, src_native_type) - print '{' - if format.layout == PLAIN \ - and format.colorspace == 'rgb' \ - and format.block_size() <= 32 \ - and format.is_pot() \ - and not format.is_mixed() \ - and (format.channels[0].type == UNSIGNED \ - or format.channels[1].type == UNSIGNED): - emit_unrolled_unswizzle_code(format, src_channel) - else: - emit_tile_pixel_unswizzle_code(format, src_channel) - print '}' - print - - -def generate_sse2(): - print ''' -#if defined(PIPE_ARCH_SSE) - -#include "util/u_sse.h" - -static ALWAYS_INLINE void -swz4( const __m128i * restrict x, - const __m128i * restrict y, - const __m128i * restrict z, - const __m128i * restrict w, - __m128i * restrict a, - __m128i * restrict b, - __m128i * restrict c, - __m128i * restrict d) -{ - __m128i i, j, k, l; - __m128i m, n, o, p; - __m128i e, f, g, h; - - m = _mm_unpacklo_epi8(*x,*y); - n = _mm_unpackhi_epi8(*x,*y); - o = _mm_unpacklo_epi8(*z,*w); - p = _mm_unpackhi_epi8(*z,*w); - - i = _mm_unpacklo_epi16(m,n); - j = _mm_unpackhi_epi16(m,n); - k = _mm_unpacklo_epi16(o,p); - l = _mm_unpackhi_epi16(o,p); - - e = _mm_unpacklo_epi8(i,j); - f = _mm_unpackhi_epi8(i,j); - g = _mm_unpacklo_epi8(k,l); - h = _mm_unpackhi_epi8(k,l); - - *a = _mm_unpacklo_epi64(e,g); - *b = _mm_unpackhi_epi64(e,g); - *c = _mm_unpacklo_epi64(f,h); - *d = _mm_unpackhi_epi64(f,h); -} - -static ALWAYS_INLINE void -unswz4( const __m128i * restrict a, - const __m128i * restrict b, - const __m128i * restrict c, - const __m128i * restrict d, - __m128i * restrict x, - __m128i * restrict y, - __m128i * restrict z, - __m128i * restrict w) -{ - __m128i i, j, k, l; - __m128i m, n, o, p; - - i = _mm_unpacklo_epi8(*a,*b); - j = _mm_unpackhi_epi8(*a,*b); - k = _mm_unpacklo_epi8(*c,*d); - l = _mm_unpackhi_epi8(*c,*d); - - m = _mm_unpacklo_epi16(i,k); - n = _mm_unpackhi_epi16(i,k); - o = _mm_unpacklo_epi16(j,l); - p = _mm_unpackhi_epi16(j,l); - - *x = _mm_unpacklo_epi64(m,n); - *y = _mm_unpackhi_epi64(m,n); - *z = _mm_unpacklo_epi64(o,p); - *w = _mm_unpackhi_epi64(o,p); -} - -static void -lp_tile_b8g8r8a8_unorm_swizzle_4ub_sse2(uint8_t * restrict dst, - const uint8_t * restrict src, unsigned src_stride, - unsigned x0, unsigned y0) -{ - __m128i *dst128 = (__m128i *) dst; - unsigned x, y; - - src += y0 * src_stride; - src += x0 * sizeof(uint32_t); - - for (y = 0; y < TILE_SIZE; y += 4) { - const uint8_t *src_row = src; - - for (x = 0; x < TILE_SIZE; x += 4) { - swz4((const __m128i *) (src_row + 0 * src_stride), - (const __m128i *) (src_row + 1 * src_stride), - (const __m128i *) (src_row + 2 * src_stride), - (const __m128i *) (src_row + 3 * src_stride), - dst128 + 2, /* b */ - dst128 + 1, /* g */ - dst128 + 0, /* r */ - dst128 + 3); /* a */ - - dst128 += 4; - src_row += sizeof(__m128i); - } - - src += 4 * src_stride; - } -} - -static void -lp_tile_b8g8r8a8_unorm_unswizzle_4ub_sse2(const uint8_t * restrict src, - uint8_t * restrict dst, unsigned dst_stride, - unsigned x0, unsigned y0) -{ - unsigned int x, y; - const __m128i *src128 = (const __m128i *) src; - - dst += y0 * dst_stride; - dst += x0 * sizeof(uint32_t); - - for (y = 0; y < TILE_SIZE; y += 4) { - const uint8_t *dst_row = dst; - - for (x = 0; x < TILE_SIZE; x += 4) { - unswz4( &src128[2], /* b */ - &src128[1], /* g */ - &src128[0], /* r */ - &src128[3], /* a */ - (__m128i *) (dst_row + 0 * dst_stride), - (__m128i *) (dst_row + 1 * dst_stride), - (__m128i *) (dst_row + 2 * dst_stride), - (__m128i *) (dst_row + 3 * dst_stride)); - - src128 += 4; - dst_row += sizeof(__m128i);; - } - - dst += 4 * dst_stride; - } -} - -static void -lp_tile_b8g8r8x8_unorm_swizzle_4ub_sse2(uint8_t * restrict dst, - const uint8_t * restrict src, unsigned src_stride, - unsigned x0, unsigned y0) -{ - __m128i *dst128 = (__m128i *) dst; - unsigned x, y; - - src += y0 * src_stride; - src += x0 * sizeof(uint32_t); - - for (y = 0; y < TILE_SIZE; y += 4) { - const uint8_t *src_row = src; - - for (x = 0; x < TILE_SIZE; x += 4) { - swz4((const __m128i *) (src_row + 0 * src_stride), - (const __m128i *) (src_row + 1 * src_stride), - (const __m128i *) (src_row + 2 * src_stride), - (const __m128i *) (src_row + 3 * src_stride), - dst128 + 2, /* b */ - dst128 + 1, /* g */ - dst128 + 0, /* r */ - dst128 + 3); /* a */ - - dst128 += 4; - src_row += sizeof(__m128i); - } - - src += 4 * src_stride; - } -} - -static void -lp_tile_b8g8r8x8_unorm_unswizzle_4ub_sse2(const uint8_t * restrict src, - uint8_t * restrict dst, unsigned dst_stride, - unsigned x0, unsigned y0) -{ - unsigned int x, y; - const __m128i *src128 = (const __m128i *) src; - - dst += y0 * dst_stride; - dst += x0 * sizeof(uint32_t); - - for (y = 0; y < TILE_SIZE; y += 4) { - const uint8_t *dst_row = dst; - - for (x = 0; x < TILE_SIZE; x += 4) { - unswz4( &src128[2], /* b */ - &src128[1], /* g */ - &src128[0], /* r */ - &src128[3], /* a */ - (__m128i *) (dst_row + 0 * dst_stride), - (__m128i *) (dst_row + 1 * dst_stride), - (__m128i *) (dst_row + 2 * dst_stride), - (__m128i *) (dst_row + 3 * dst_stride)); - - src128 += 4; - dst_row += sizeof(__m128i);; - } - - dst += 4 * dst_stride; - } -} - -#endif /* PIPE_ARCH_SSE */ -''' - - -def generate_swizzle(formats, dst_channel, dst_native_type, dst_suffix): - '''Generate the dispatch function to read pixels from any format''' - - for format in formats: - if is_format_supported(format): - generate_format_read(format, dst_channel, dst_native_type, dst_suffix) - - print 'void' - print 'lp_tile_swizzle_%s(enum pipe_format format, %s *dst, const void *src, unsigned src_stride, unsigned x, unsigned y)' % (dst_suffix, dst_native_type) - print '{' - print ' void (*func)(%s * restrict dst, const uint8_t * restrict src, unsigned src_stride, unsigned x0, unsigned y0);' % dst_native_type - print '#ifdef DEBUG' - print ' lp_tile_swizzle_count += 1;' - print '#endif' - print ' switch(format) {' - for format in formats: - if is_format_supported(format): - print ' case %s:' % format.name - func_name = 'lp_tile_%s_swizzle_%s' % (format.short_name(), dst_suffix) - if format.name == 'PIPE_FORMAT_B8G8R8A8_UNORM' or format.name == 'PIPE_FORMAT_B8G8R8X8_UNORM': - print '#ifdef PIPE_ARCH_SSE' - print ' func = util_cpu_caps.has_sse2 ? %s_sse2 : %s;' % (func_name, func_name) - print '#else' - print ' func = %s;' % (func_name,) - print '#endif' - else: - print ' func = %s;' % (func_name,) - print ' break;' - print ' default:' - print ' debug_printf("%s: unsupported format %s\\n", __FUNCTION__, util_format_name(format));' - print ' return;' - print ' }' - print ' func(dst, (const uint8_t *)src, src_stride, x, y);' - print '}' - print - - -def generate_unswizzle(formats, src_channel, src_native_type, src_suffix): - '''Generate the dispatch function to write pixels to any format''' - - for format in formats: - if is_format_supported(format): - generate_format_write(format, src_channel, src_native_type, src_suffix) - - print 'void' - print 'lp_tile_unswizzle_%s(enum pipe_format format, const %s *src, void *dst, unsigned dst_stride, unsigned x, unsigned y)' % (src_suffix, src_native_type) - - print '{' - print ' void (*func)(const %s * restrict src, uint8_t * restrict dst, unsigned dst_stride, unsigned x0, unsigned y0);' % src_native_type - print '#ifdef DEBUG' - print ' lp_tile_unswizzle_count += 1;' - print '#endif' - print ' switch(format) {' - for format in formats: - if is_format_supported(format): - print ' case %s:' % format.name - func_name = 'lp_tile_%s_unswizzle_%s' % (format.short_name(), src_suffix) - if format.name == 'PIPE_FORMAT_B8G8R8A8_UNORM' or format.name == 'PIPE_FORMAT_B8G8R8X8_UNORM': - print '#ifdef PIPE_ARCH_SSE' - print ' func = util_cpu_caps.has_sse2 ? %s_sse2 : %s;' % (func_name, func_name) - print '#else' - print ' func = %s;' % (func_name,) - print '#endif' - else: - print ' func = %s;' % (func_name,) - print ' break;' - print ' default:' - print ' debug_printf("%s: unsupported format %s\\n", __FUNCTION__, util_format_name(format));' - print ' return;' - print ' }' - print ' func(src, (uint8_t *)dst, dst_stride, x, y);' - print '}' - print - - -def main(): - formats = [] - for arg in sys.argv[1:]: - formats.extend(parse(arg)) - - print '/* This file is autogenerated by lp_tile_soa.py from u_format.csv. Do not edit directly. */' - print - # This will print the copyright message on the top of this file - print CopyRight.strip() - print - print '#include "pipe/p_compiler.h"' - print '#include "util/u_math.h"' - print '#include "util/u_format.h"' - print '#include "util/u_format_r11g11b10f.h"' - print '#include "util/u_format_rgb9e5.h"' - print '#include "util/u_half.h"' - print '#include "util/u_cpu_detect.h"' - print '#include "lp_tile_soa.h"' - print - print '#ifdef DEBUG' - print 'unsigned lp_tile_unswizzle_count = 0;' - print 'unsigned lp_tile_swizzle_count = 0;' - print '#endif' - print - print 'const unsigned char' - print 'tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH] = {' - print ' { 0, 1, 4, 5},' - print ' { 2, 3, 6, 7},' - print ' { 8, 9, 12, 13},' - print ' { 10, 11, 14, 15}' - print '};' - print - print '/* Note: these lookup tables could be replaced with some' - print ' * bit-twiddling code, but this is a little faster.' - print ' */' - print 'static unsigned tile_x_offset[TILE_VECTOR_WIDTH * TILE_VECTOR_HEIGHT] = {' - print ' 0, 1, 0, 1, 2, 3, 2, 3,' - print ' 0, 1, 0, 1, 2, 3, 2, 3' - print '};' - print - print 'static unsigned tile_y_offset[TILE_VECTOR_WIDTH * TILE_VECTOR_HEIGHT] = {' - print ' 0, 0, 1, 1, 0, 0, 1, 1,' - print ' 2, 2, 3, 3, 2, 2, 3, 3' - print '};' - print - - generate_sse2() - - channel = Channel(UNSIGNED, True, False, 8) - native_type = 'uint8_t' - suffix = '4ub' - - generate_swizzle(formats, channel, native_type, suffix) - generate_unswizzle(formats, channel, native_type, suffix) - - -if __name__ == '__main__': - main()