llvmpipe: simplify address calculation for 4x4 blocks
authorRoland Scheidegger <sroland@vmware.com>
Fri, 27 Mar 2015 15:49:54 +0000 (16:49 +0100)
committerRoland Scheidegger <sroland@vmware.com>
Sat, 28 Mar 2015 01:59:42 +0000 (02:59 +0100)
These functions looked quite complicated, even though what they actually did
was trivial (ever since we dropped swizzled rendering). Also drop lookup of
format block per bytes done for each block, and do it once per scene instead.
This improves everybody's favorite "benchmark" by 3% or so, though
lp_rast_shade_quads_all() which calls this shows up still quite high for a
function which does little more than call the jit function.
(This would most likely be much better handled by the jit function itself,
the strides are passed through anyway already, though for being able to
handle layers it would definitely add some complexity.)

Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
src/gallium/drivers/llvmpipe/lp_rast.c
src/gallium/drivers/llvmpipe/lp_rast_priv.h
src/gallium/drivers/llvmpipe/lp_scene.c
src/gallium/drivers/llvmpipe/lp_scene.h

index 903e7c510022f339157498d5983fd4d217674b86..7019acbda9d8c9cf4ac46f026e1b3eaf6e0c8d52 100644 (file)
@@ -91,6 +91,9 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task,
                    const struct cmd_bin *bin,
                    int x, int y)
 {
+   unsigned i;
+   struct lp_scene *scene = task->scene;
+
    LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y);
 
    task->bin = bin;
@@ -104,9 +107,18 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task,
    task->thread_data.vis_counter = 0;
    task->ps_invocations = 0;
 
-   /* reset pointers to color and depth tile(s) */
-   memset(task->color_tiles, 0, sizeof(task->color_tiles));
-   task->depth_tile = NULL;
+   for (i = 0; i < task->scene->fb.nr_cbufs; i++) {
+      if (task->scene->fb.cbufs[i]) {
+         task->color_tiles[i] = scene->cbufs[i].map +
+                                scene->cbufs[i].stride * task->y +
+                                scene->cbufs[i].format_bytes * task->x;
+      }
+   }
+   if (task->scene->fb.zsbuf) {
+      task->depth_tile = scene->zsbuf.map +
+                         scene->zsbuf.stride * task->y +
+                         scene->zsbuf.format_bytes * task->x;
+   }
 }
 
 
@@ -186,7 +198,7 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
 
    if (scene->fb.zsbuf) {
       unsigned layer;
-      uint8_t *dst_layer = lp_rast_get_depth_tile_pointer(task, LP_TEX_USAGE_READ_WRITE);
+      uint8_t *dst_layer = task->depth_tile;
       block_size = util_format_get_blocksize(scene->fb.zsbuf->format);
 
       clear_value &= clear_mask;
index d92230db68d0eea1ec06f9717c84aff0487a676a..e6ebbcd526db19e61843217b7b08a0ebd5b58fdf 100644 (file)
@@ -141,64 +141,6 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
                          unsigned mask);
 
 
-
-/**
- * Get pointer to the color tile
- */
-static INLINE uint8_t *
-lp_rast_get_color_tile_pointer(struct lp_rasterizer_task *task,
-                               unsigned buf, enum lp_texture_usage usage)
-{
-   const struct lp_scene *scene = task->scene;
-   unsigned format_bytes;
-
-   assert(task->x < scene->tiles_x * TILE_SIZE);
-   assert(task->y < scene->tiles_y * TILE_SIZE);
-   assert(task->x % TILE_SIZE == 0);
-   assert(task->y % TILE_SIZE == 0);
-   assert(buf < scene->fb.nr_cbufs);
-
-   if (!task->color_tiles[buf]) {
-      struct pipe_surface *cbuf = scene->fb.cbufs[buf];
-      assert(cbuf);
-
-      format_bytes = util_format_get_blocksize(cbuf->format);
-      task->color_tiles[buf] = scene->cbufs[buf].map + scene->cbufs[buf].stride * task->y +
-                               format_bytes * task->x;
-   }
-
-   return task->color_tiles[buf];
-}
-
-
-/**
- * Get pointer to the depth tile
- */
-static INLINE uint8_t *
-lp_rast_get_depth_tile_pointer(struct lp_rasterizer_task *task,
-                               enum lp_texture_usage usage)
-{
-   const struct lp_scene *scene = task->scene;
-   unsigned format_bytes;
-
-   assert(task->x < scene->tiles_x * TILE_SIZE);
-   assert(task->y < scene->tiles_y * TILE_SIZE);
-   assert(task->x % TILE_SIZE == 0);
-   assert(task->y % TILE_SIZE == 0);
-
-   if (!task->depth_tile) {
-      struct pipe_surface *dbuf = scene->fb.zsbuf;
-      assert(dbuf);
-
-      format_bytes = util_format_get_blocksize(dbuf->format);
-      task->depth_tile = scene->zsbuf.map + scene->zsbuf.stride * task->y +
-                         format_bytes * task->x;
-   }
-
-   return task->depth_tile;
-}
-
-
 /**
  * Get the pointer to a 4x4 color block (within a 64x64 tile).
  * \param x, y location of 4x4 block in window coords
@@ -208,7 +150,7 @@ lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task,
                                 unsigned buf, unsigned x, unsigned y,
                                 unsigned layer)
 {
-   unsigned px, py, pixel_offset, format_bytes;
+   unsigned px, py, pixel_offset;
    uint8_t *color;
 
    assert(x < task->scene->tiles_x * TILE_SIZE);
@@ -217,16 +159,19 @@ lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task,
    assert((y % TILE_VECTOR_HEIGHT) == 0);
    assert(buf < task->scene->fb.nr_cbufs);
 
-   format_bytes = util_format_get_blocksize(task->scene->fb.cbufs[buf]->format);
-
-   color = lp_rast_get_color_tile_pointer(task, buf, LP_TEX_USAGE_READ_WRITE);
-   assert(color);
+   assert(task->color_tiles[buf]);
 
+   /*
+    * We don't actually benefit from having per tile cbuf/zsbuf pointers,
+    * it's just extra work - the mul/add would be exactly the same anyway.
+    * Fortunately the extra work (modulo) here is very cheap at least...
+    */
    px = x % TILE_SIZE;
    py = y % TILE_SIZE;
-   pixel_offset = px * format_bytes + py * task->scene->cbufs[buf].stride;
 
-   color = color + pixel_offset;
+   pixel_offset = px * task->scene->cbufs[buf].format_bytes +
+                  py * task->scene->cbufs[buf].stride;
+   color = task->color_tiles[buf] + pixel_offset;
 
    if (layer) {
       color += layer * task->scene->cbufs[buf].layer_stride;
@@ -245,7 +190,7 @@ static INLINE uint8_t *
 lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task,
                                 unsigned x, unsigned y, unsigned layer)
 {
-   unsigned px, py, pixel_offset, format_bytes;
+   unsigned px, py, pixel_offset;
    uint8_t *depth;
 
    assert(x < task->scene->tiles_x * TILE_SIZE);
@@ -253,16 +198,14 @@ lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task,
    assert((x % TILE_VECTOR_WIDTH) == 0);
    assert((y % TILE_VECTOR_HEIGHT) == 0);
 
-   format_bytes = util_format_get_blocksize(task->scene->fb.zsbuf->format);
-
-   depth = lp_rast_get_depth_tile_pointer(task, LP_TEX_USAGE_READ_WRITE);
-   assert(depth);
+   assert(task->depth_tile);
 
    px = x % TILE_SIZE;
    py = y % TILE_SIZE;
-   pixel_offset = px * format_bytes + py * task->scene->zsbuf.stride;
 
-   depth = depth + pixel_offset;
+   pixel_offset = px * task->scene->zsbuf.format_bytes +
+                  py * task->scene->zsbuf.stride;
+   depth = task->depth_tile + pixel_offset;
 
    if (layer) {
       depth += layer * task->scene->zsbuf.layer_stride;
index e95d76a32893e6ebaa7087aebc24be86844fb4e4..2441b3c0d886b402ce191ce28b9c4ec2fb6771ec 100644 (file)
@@ -174,6 +174,7 @@ lp_scene_begin_rasterization(struct lp_scene *scene)
                                                      cbuf->u.tex.level,
                                                      cbuf->u.tex.first_layer,
                                                      LP_TEX_USAGE_READ_WRITE);
+         scene->cbufs[i].format_bytes = util_format_get_blocksize(cbuf->format);
       }
       else {
          struct llvmpipe_resource *lpr = llvmpipe_resource(cbuf->texture);
@@ -182,6 +183,7 @@ lp_scene_begin_rasterization(struct lp_scene *scene)
          scene->cbufs[i].layer_stride = 0;
          scene->cbufs[i].map = lpr->data;
          scene->cbufs[i].map += cbuf->u.buf.first_element * pixstride;
+         scene->cbufs[i].format_bytes = util_format_get_blocksize(cbuf->format);
       }
    }
 
@@ -194,6 +196,7 @@ lp_scene_begin_rasterization(struct lp_scene *scene)
                                                zsbuf->u.tex.level,
                                                zsbuf->u.tex.first_layer,
                                                LP_TEX_USAGE_READ_WRITE);
+      scene->zsbuf.format_bytes = util_format_get_blocksize(zsbuf->format);
    }
 }
 
index 19a38115afeb55eea491831ccc52594b70b999e2..ad23c2009acf37ff99b3f164184f104b1b29a634 100644 (file)
@@ -142,6 +142,7 @@ struct lp_scene {
       uint8_t *map;
       unsigned stride;
       unsigned layer_stride;
+      unsigned format_bytes;
    } zsbuf, cbufs[PIPE_MAX_COLOR_BUFS];
 
    /* The amount of layers in the fb (minimum of all attachments) */