From b2424fb0304cf4afd363b35c1dab49fb7edddb08 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Fri, 27 Mar 2015 16:49:54 +0100 Subject: [PATCH] llvmpipe: simplify address calculation for 4x4 blocks These functions looked quite complicated, even though what they actually did was trivial (ever since we dropped swizzled rendering). Also drop lookup of format block per bytes done for each block, and do it once per scene instead. This improves everybody's favorite "benchmark" by 3% or so, though lp_rast_shade_quads_all() which calls this shows up still quite high for a function which does little more than call the jit function. (This would most likely be much better handled by the jit function itself, the strides are passed through anyway already, though for being able to handle layers it would definitely add some complexity.) Reviewed-by: Jose Fonseca --- src/gallium/drivers/llvmpipe/lp_rast.c | 20 ++++- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 87 ++++----------------- src/gallium/drivers/llvmpipe/lp_scene.c | 3 + src/gallium/drivers/llvmpipe/lp_scene.h | 1 + 4 files changed, 35 insertions(+), 76 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 903e7c51002..7019acbda9d 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -91,6 +91,9 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task, const struct cmd_bin *bin, int x, int y) { + unsigned i; + struct lp_scene *scene = task->scene; + LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y); task->bin = bin; @@ -104,9 +107,18 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task, task->thread_data.vis_counter = 0; task->ps_invocations = 0; - /* reset pointers to color and depth tile(s) */ - memset(task->color_tiles, 0, sizeof(task->color_tiles)); - task->depth_tile = NULL; + for (i = 0; i < task->scene->fb.nr_cbufs; i++) { + if (task->scene->fb.cbufs[i]) { + task->color_tiles[i] = scene->cbufs[i].map + + scene->cbufs[i].stride * task->y + + scene->cbufs[i].format_bytes * task->x; + } + } + if (task->scene->fb.zsbuf) { + task->depth_tile = scene->zsbuf.map + + scene->zsbuf.stride * task->y + + scene->zsbuf.format_bytes * task->x; + } } @@ -186,7 +198,7 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, if (scene->fb.zsbuf) { unsigned layer; - uint8_t *dst_layer = lp_rast_get_depth_tile_pointer(task, LP_TEX_USAGE_READ_WRITE); + uint8_t *dst_layer = task->depth_tile; block_size = util_format_get_blocksize(scene->fb.zsbuf->format); clear_value &= clear_mask; diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index d92230db68d..e6ebbcd526d 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -141,64 +141,6 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, unsigned mask); - -/** - * Get pointer to the color tile - */ -static INLINE uint8_t * -lp_rast_get_color_tile_pointer(struct lp_rasterizer_task *task, - unsigned buf, enum lp_texture_usage usage) -{ - const struct lp_scene *scene = task->scene; - unsigned format_bytes; - - assert(task->x < scene->tiles_x * TILE_SIZE); - assert(task->y < scene->tiles_y * TILE_SIZE); - assert(task->x % TILE_SIZE == 0); - assert(task->y % TILE_SIZE == 0); - assert(buf < scene->fb.nr_cbufs); - - if (!task->color_tiles[buf]) { - struct pipe_surface *cbuf = scene->fb.cbufs[buf]; - assert(cbuf); - - format_bytes = util_format_get_blocksize(cbuf->format); - task->color_tiles[buf] = scene->cbufs[buf].map + scene->cbufs[buf].stride * task->y + - format_bytes * task->x; - } - - return task->color_tiles[buf]; -} - - -/** - * Get pointer to the depth tile - */ -static INLINE uint8_t * -lp_rast_get_depth_tile_pointer(struct lp_rasterizer_task *task, - enum lp_texture_usage usage) -{ - const struct lp_scene *scene = task->scene; - unsigned format_bytes; - - assert(task->x < scene->tiles_x * TILE_SIZE); - assert(task->y < scene->tiles_y * TILE_SIZE); - assert(task->x % TILE_SIZE == 0); - assert(task->y % TILE_SIZE == 0); - - if (!task->depth_tile) { - struct pipe_surface *dbuf = scene->fb.zsbuf; - assert(dbuf); - - format_bytes = util_format_get_blocksize(dbuf->format); - task->depth_tile = scene->zsbuf.map + scene->zsbuf.stride * task->y + - format_bytes * task->x; - } - - return task->depth_tile; -} - - /** * Get the pointer to a 4x4 color block (within a 64x64 tile). * \param x, y location of 4x4 block in window coords @@ -208,7 +150,7 @@ lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task, unsigned buf, unsigned x, unsigned y, unsigned layer) { - unsigned px, py, pixel_offset, format_bytes; + unsigned px, py, pixel_offset; uint8_t *color; assert(x < task->scene->tiles_x * TILE_SIZE); @@ -217,16 +159,19 @@ lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task, assert((y % TILE_VECTOR_HEIGHT) == 0); assert(buf < task->scene->fb.nr_cbufs); - format_bytes = util_format_get_blocksize(task->scene->fb.cbufs[buf]->format); - - color = lp_rast_get_color_tile_pointer(task, buf, LP_TEX_USAGE_READ_WRITE); - assert(color); + assert(task->color_tiles[buf]); + /* + * We don't actually benefit from having per tile cbuf/zsbuf pointers, + * it's just extra work - the mul/add would be exactly the same anyway. + * Fortunately the extra work (modulo) here is very cheap at least... + */ px = x % TILE_SIZE; py = y % TILE_SIZE; - pixel_offset = px * format_bytes + py * task->scene->cbufs[buf].stride; - color = color + pixel_offset; + pixel_offset = px * task->scene->cbufs[buf].format_bytes + + py * task->scene->cbufs[buf].stride; + color = task->color_tiles[buf] + pixel_offset; if (layer) { color += layer * task->scene->cbufs[buf].layer_stride; @@ -245,7 +190,7 @@ static INLINE uint8_t * lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task, unsigned x, unsigned y, unsigned layer) { - unsigned px, py, pixel_offset, format_bytes; + unsigned px, py, pixel_offset; uint8_t *depth; assert(x < task->scene->tiles_x * TILE_SIZE); @@ -253,16 +198,14 @@ lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task, assert((x % TILE_VECTOR_WIDTH) == 0); assert((y % TILE_VECTOR_HEIGHT) == 0); - format_bytes = util_format_get_blocksize(task->scene->fb.zsbuf->format); - - depth = lp_rast_get_depth_tile_pointer(task, LP_TEX_USAGE_READ_WRITE); - assert(depth); + assert(task->depth_tile); px = x % TILE_SIZE; py = y % TILE_SIZE; - pixel_offset = px * format_bytes + py * task->scene->zsbuf.stride; - depth = depth + pixel_offset; + pixel_offset = px * task->scene->zsbuf.format_bytes + + py * task->scene->zsbuf.stride; + depth = task->depth_tile + pixel_offset; if (layer) { depth += layer * task->scene->zsbuf.layer_stride; diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index e95d76a3289..2441b3c0d88 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -174,6 +174,7 @@ lp_scene_begin_rasterization(struct lp_scene *scene) cbuf->u.tex.level, cbuf->u.tex.first_layer, LP_TEX_USAGE_READ_WRITE); + scene->cbufs[i].format_bytes = util_format_get_blocksize(cbuf->format); } else { struct llvmpipe_resource *lpr = llvmpipe_resource(cbuf->texture); @@ -182,6 +183,7 @@ lp_scene_begin_rasterization(struct lp_scene *scene) scene->cbufs[i].layer_stride = 0; scene->cbufs[i].map = lpr->data; scene->cbufs[i].map += cbuf->u.buf.first_element * pixstride; + scene->cbufs[i].format_bytes = util_format_get_blocksize(cbuf->format); } } @@ -194,6 +196,7 @@ lp_scene_begin_rasterization(struct lp_scene *scene) zsbuf->u.tex.level, zsbuf->u.tex.first_layer, LP_TEX_USAGE_READ_WRITE); + scene->zsbuf.format_bytes = util_format_get_blocksize(zsbuf->format); } } diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h index 19a38115afe..ad23c2009ac 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.h +++ b/src/gallium/drivers/llvmpipe/lp_scene.h @@ -142,6 +142,7 @@ struct lp_scene { uint8_t *map; unsigned stride; unsigned layer_stride; + unsigned format_bytes; } zsbuf, cbufs[PIPE_MAX_COLOR_BUFS]; /* The amount of layers in the fb (minimum of all attachments) */ -- 2.30.2