llvmpipe: use single swizzled tile
authorKeith Whitwell <keithw@vmware.com>
Fri, 16 Jul 2010 13:40:30 +0000 (14:40 +0100)
committerJosé Fonseca <jfonseca@vmware.com>
Fri, 16 Jul 2010 16:24:21 +0000 (17:24 +0100)
Use a single swizzled tile per colorbuf (and per thread) to avoid
accumulating large amounts of cached swizzled data.

Now that the SSE3 code has been merged to master, the performance delta
of this change is minimal, the main benefit is reduced memory usage
due to no longer keeping swizzled copies of render targets.

It's clear from the performance of the in-place version of this code
that there is still quite a bit of time being spent swizzling &
unswizzling, but it's not clear exactly how to reduce that.

src/gallium/drivers/llvmpipe/lp_memory.c
src/gallium/drivers/llvmpipe/lp_memory.h
src/gallium/drivers/llvmpipe/lp_rast.c
src/gallium/drivers/llvmpipe/lp_rast_priv.h
src/gallium/drivers/llvmpipe/lp_setup.c
src/gallium/drivers/llvmpipe/lp_texture.c
src/gallium/drivers/llvmpipe/lp_texture.h

index 61d16668eb95b84dadcd65266bacabc353cc1616..0f55d4a80ae74fa57833d06cf68842a5cab7c1ce 100644 (file)
 #include "lp_limits.h"
 #include "lp_memory.h"
 
-
-/** 32bpp RGBA dummy tile to use in out of memory conditions */
-static PIPE_ALIGN_VAR(16) uint8_t lp_dummy_tile[TILE_SIZE * TILE_SIZE * 4];
-
-static unsigned lp_out_of_memory = 0;
-
-
-uint8_t *
-lp_get_dummy_tile(void)
-{
-   if (lp_out_of_memory++ < 10) {
-      debug_printf("llvmpipe: out of memory.  Using dummy tile memory.\n");
-   }
-   return lp_dummy_tile;
-}
-
-uint8_t *
-lp_get_dummy_tile_silent(void)
-{
-   return lp_dummy_tile;
-}
-
-
-boolean
-lp_is_dummy_tile(void *tile)
-{
-   return tile == lp_dummy_tile;
-}
+/**
+ * 32bpp RGBA swizzled tiles.  One for for each thread and each
+ * possible colorbuf.  Adds up to quite a bit 8*8*64*64*4 == 1MB.
+ * Several schemes exist to reduce this, such as scaling back the
+ * number of threads or using a smaller tilesize when multiple
+ * colorbuffers are bound.
+ */
+PIPE_ALIGN_VAR(16) uint8_t lp_swizzled_cbuf[LP_MAX_THREADS][PIPE_MAX_COLOR_BUFS][TILE_SIZE * TILE_SIZE * 4];
+
+
+/* A single dummy tile used in a couple of out-of-memory situations. 
+ */
+PIPE_ALIGN_VAR(16) uint8_t lp_dummy_tile[TILE_SIZE * TILE_SIZE * 4];
 
index 1d0e5ebdb695a4b16df4f19ba010cb7cbc49873a..f7418f5e0871ff72b8e16f3a9325f7ae4877a370 100644 (file)
 
 
 #include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
+#include "lp_limits.h"
 
+extern PIPE_ALIGN_VAR(16) uint8_t lp_swizzled_cbuf[LP_MAX_THREADS][PIPE_MAX_COLOR_BUFS][TILE_SIZE * TILE_SIZE * 4];
 
-extern uint8_t *
-lp_get_dummy_tile(void);
-
-uint8_t *
-lp_get_dummy_tile_silent(void);
-
-extern boolean
-lp_is_dummy_tile(void *tile);
-
+extern PIPE_ALIGN_VAR(16) uint8_t lp_dummy_tile[TILE_SIZE * TILE_SIZE * 4];
 
 #endif /* LP_MEMORY_H */
index a023d2b668ee2e5278db428050cb9bc738937899..654f4ea48ebeb965e448d2d25f68379d6f89995c 100644 (file)
@@ -67,7 +67,7 @@ lp_rast_begin( struct lp_rasterizer *rast,
                             cbuf->level,
                             cbuf->zslice,
                             LP_TEX_USAGE_READ_WRITE,
-                            LP_TEX_LAYOUT_NONE);
+                            LP_TEX_LAYOUT_LINEAR);
    }
 
    if (fb->zsbuf) {
@@ -271,11 +271,6 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
 
    dst = task->depth_tile;
 
-   if (lp_is_dummy_tile(dst))
-      return;
-
-   assert(dst == lp_rast_get_depth_block_pointer(task, task->x, task->y));
-
    switch (block_size) {
    case 1:
       memset(dst, (uint8_t) clear_value, height * width);
@@ -375,10 +370,15 @@ lp_rast_store_linear_color( struct lp_rasterizer_task *task,
       struct pipe_surface *cbuf = scene->fb.cbufs[buf];
       const unsigned face = cbuf->face, level = cbuf->level;
       struct llvmpipe_resource *lpt = llvmpipe_resource(cbuf->texture);
-      /* this will convert the tiled data to linear if needed */
-      (void) llvmpipe_get_texture_tile_linear(lpt, face, level,
-                                              LP_TEX_USAGE_READ,
-                                              task->x, task->y);
+
+      if (!task->color_tiles[buf])
+         continue;
+
+      llvmpipe_unswizzle_cbuf_tile(lpt,
+                                   face,
+                                   level,
+                                   task->x, task->y,
+                                   task->color_tiles[buf]);
    }
 }
 
@@ -589,6 +589,11 @@ lp_rast_tile_end(struct lp_rasterizer_task *task)
    (void) outline_subtiles;
 #endif
 
+   {
+      union lp_rast_cmd_arg dummy = {0};
+      lp_rast_store_linear_color(task, dummy);
+   }
+
    /* debug */
    memset(task->color_tiles, 0, sizeof(task->color_tiles));
    task->depth_tile = NULL;
@@ -751,30 +756,8 @@ debug_bin( const struct cmd_bin *bin )
 static boolean
 is_empty_bin( const struct cmd_bin *bin )
 {
-   const struct cmd_block *head = bin->commands.head;
-   int i;
-   
-   if (0)
-      debug_bin(bin);
-   
-   /* We emit at most two load-tile commands at the start of the first
-    * command block.  In addition we seem to emit a couple of
-    * set-state commands even in empty bins.
-    *
-    * As a heuristic, if a bin has more than 4 commands, consider it
-    * non-empty.
-    */
-   if (head->next != NULL ||
-       head->count > 4) {
-      return FALSE;
-   }
-
-   for (i = 0; i < head->count; i++)
-      if (head->cmd[i] != lp_rast_store_linear_color) {
-         return FALSE;
-      }
-
-   return TRUE;
+   if (0) debug_bin(bin);
+   return bin->commands.head->count == 0;
 }
 
 
@@ -984,6 +967,10 @@ lp_rast_create( unsigned num_threads )
    /* for synchronizing rasterization threads */
    pipe_barrier_init( &rast->barrier, rast->num_threads );
 
+   memset(lp_swizzled_cbuf, 0, sizeof lp_swizzled_cbuf);
+
+   memset(lp_dummy_tile, 0, sizeof lp_dummy_tile);
+
    return rast;
 }
 
index 8044927c8be25691cb93bdb2785f3179d5fbed29..b4a48cfd024470c4a7aaee89ba98a8f6ea30b9c5 100644 (file)
@@ -148,7 +148,7 @@ lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task,
        * the oom warning as this most likely because there is no
        * zsbuf.
        */
-      return lp_get_dummy_tile_silent();
+      return lp_dummy_tile;
    }
 
    depth = (rast->zsbuf.map +
@@ -178,15 +178,14 @@ lp_rast_get_color_tile_pointer(struct lp_rasterizer_task *task,
       struct llvmpipe_resource *lpt;
       assert(cbuf);
       lpt = llvmpipe_resource(cbuf->texture);
-      task->color_tiles[buf] = llvmpipe_get_texture_tile(lpt,
-                                                         cbuf->face + cbuf->zslice,
-                                                         cbuf->level,
-                                                         usage,
-                                                         task->x,
-                                                         task->y);
-      if (!task->color_tiles[buf]) {
-         /* out of memory - use dummy tile memory */
-         return lp_get_dummy_tile();
+      task->color_tiles[buf] = lp_swizzled_cbuf[task->thread_index][buf];
+
+      if (usage != LP_TEX_USAGE_WRITE_ALL) {
+         llvmpipe_swizzle_cbuf_tile(lpt,
+                                    cbuf->face + cbuf->zslice,
+                                    cbuf->level,
+                                    task->x, task->y,
+                                    task->color_tiles[buf]);
       }
    }
 
@@ -212,10 +211,7 @@ lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task,
    assert((y % TILE_VECTOR_HEIGHT) == 0);
 
    color = lp_rast_get_color_tile_pointer(task, buf, LP_TEX_USAGE_READ_WRITE);
-   if (!color) {
-      /* out of memory - use dummy tile memory */
-      return lp_get_dummy_tile();
-   }
+   assert(color);
 
    px = x % TILE_SIZE;
    py = y % TILE_SIZE;
index 7d48ad8e741793a7cc2597b2b61d4cb3274c9ab6..556e571585dc948268415dccf01fea6a5d95c6e4 100644 (file)
@@ -280,20 +280,6 @@ lp_setup_flush( struct lp_setup_context *setup,
    LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
 
    if (setup->scene) {
-      struct lp_scene *scene = lp_setup_get_current_scene(setup);
-      union lp_rast_cmd_arg dummy = {0};
-
-      if (flags & (PIPE_FLUSH_SWAPBUFFERS |
-                   PIPE_FLUSH_FRAME)) {
-         /* Store colors in the linear color buffer(s).
-          * If we don't do this here, we'll end up converting the tiled
-          * data to linear in the texture_unmap() function, which will
-          * not be a parallel/threaded operation as here.
-          */
-         lp_scene_bin_everywhere(scene, lp_rast_store_linear_color, dummy);
-      }
-
-
       if (fence) {
          /* if we're going to flush the setup/rasterization modules, emit
           * a fence.
@@ -642,7 +628,7 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
 
                if (!jit_tex->data[j]) {
                   /* out of memory - use dummy tile memory */
-                  jit_tex->data[j] = lp_get_dummy_tile();
+                  jit_tex->data[j] = lp_dummy_tile;
                   jit_tex->width = TILE_SIZE;
                   jit_tex->height = TILE_SIZE;
                   jit_tex->depth = 1;
index d236bad69d187edbd15f20c981c5375c9bc6bc57..bbd834519a3c9bc941193356fa5c93cedc0df10a 100644 (file)
@@ -1208,6 +1208,94 @@ llvmpipe_get_texture_tile(struct llvmpipe_resource *lpr,
 }
 
 
+/**
+ * Get pointer to tiled data for rendering.
+ * \return pointer to the tiled data at the given tile position
+ */
+void
+llvmpipe_unswizzle_cbuf_tile(struct llvmpipe_resource *lpr,
+                             unsigned face_slice, unsigned level,
+                             unsigned x, unsigned y,
+                             uint8_t *tile)
+{
+   struct llvmpipe_texture_image *linear_img = &lpr->linear[level];
+   const unsigned tx = x / TILE_SIZE, ty = y / TILE_SIZE;
+   uint8_t *linear_image;
+
+   assert(x % TILE_SIZE == 0);
+   assert(y % TILE_SIZE == 0);
+
+   if (!linear_img->data) {
+      /* allocate memory for the linear image now */
+      alloc_image_data(lpr, level, LP_TEX_LAYOUT_LINEAR);
+   }
+
+   /* compute address of the slice/face of the image that contains the tile */
+   linear_image = llvmpipe_get_texture_image_address(lpr, face_slice, level,
+                                                     LP_TEX_LAYOUT_LINEAR);
+
+   {
+      uint ii = x, jj = y;
+      uint tile_offset = jj / TILE_SIZE + ii / TILE_SIZE;
+      uint byte_offset = tile_offset * TILE_SIZE * TILE_SIZE * 4;
+      
+      /* Note that lp_tiled_to_linear expects the tile parameter to
+       * point at the first tile in a whole-image sized array.  In
+       * this code, we have only a single tile and have to do some
+       * pointer arithmetic to figure out where the "image" would have
+       * started.
+       */
+      lp_tiled_to_linear(tile - byte_offset, linear_image,
+                         x, y, TILE_SIZE, TILE_SIZE,
+                         lpr->base.format,
+                         lpr->row_stride[level],
+                         1);       /* tiles per row */
+   }
+
+   llvmpipe_set_texture_tile_layout(lpr, face_slice, level, tx, ty,
+                                    LP_TEX_LAYOUT_LINEAR);
+}
+
+
+/**
+ * Get pointer to tiled data for rendering.
+ * \return pointer to the tiled data at the given tile position
+ */
+void
+llvmpipe_swizzle_cbuf_tile(struct llvmpipe_resource *lpr,
+                           unsigned face_slice, unsigned level,
+                           unsigned x, unsigned y,
+                           uint8_t *tile)
+{
+   uint8_t *linear_image;
+
+   assert(x % TILE_SIZE == 0);
+   assert(y % TILE_SIZE == 0);
+
+   /* compute address of the slice/face of the image that contains the tile */
+   linear_image = llvmpipe_get_texture_image_address(lpr, face_slice, level,
+                                                     LP_TEX_LAYOUT_LINEAR);
+
+   if (linear_image) {
+      uint ii = x, jj = y;
+      uint tile_offset = jj / TILE_SIZE + ii / TILE_SIZE;
+      uint byte_offset = tile_offset * TILE_SIZE * TILE_SIZE * 4;
+
+      /* Note that lp_linear_to_tiled expects the tile parameter to
+       * point at the first tile in a whole-image sized array.  In
+       * this code, we have only a single tile and have to do some
+       * pointer arithmetic to figure out where the "image" would have
+       * started.
+       */
+      lp_linear_to_tiled(linear_image, tile - byte_offset,
+                         x, y, TILE_SIZE, TILE_SIZE,
+                         lpr->base.format,
+                         lpr->row_stride[level],
+                         1);       /* tiles per row */
+   }
+}
+
+
 /**
  * Return size of resource in bytes
  */
index 503b6a19a8d57aeb86577f49ce4bd76ffe4c3a69..4e4a65dcb401d1e260b79655ab301a45e6e8c2f1 100644 (file)
@@ -223,6 +223,17 @@ llvmpipe_get_texture_tile(struct llvmpipe_resource *lpr,
                            unsigned x, unsigned y);
 
 
+void
+llvmpipe_unswizzle_cbuf_tile(struct llvmpipe_resource *lpr,
+                             unsigned face_slice, unsigned level,
+                             unsigned x, unsigned y,
+                             uint8_t *tile);
+
+void
+llvmpipe_swizzle_cbuf_tile(struct llvmpipe_resource *lpr,
+                           unsigned face_slice, unsigned level,
+                           unsigned x, unsigned y,
+                           uint8_t *tile);
 
 extern void
 llvmpipe_print_resources(void);