llvmpipe: reduce alignment requirement for resources from 64x64 to 4x4

author Roland Scheidegger <sroland@vmware.com>

Thu, 30 May 2013 00:05:01 +0000 (02:05 +0200)

committer Roland Scheidegger <sroland@vmware.com>

Fri, 31 May 2013 18:21:05 +0000 (20:21 +0200)
author Roland Scheidegger <sroland@vmware.com>
Thu, 30 May 2013 00:05:01 +0000 (02:05 +0200)
committer Roland Scheidegger <sroland@vmware.com>
Fri, 31 May 2013 18:21:05 +0000 (20:21 +0200)
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c

index 5c837a043e0d134e6fcc92e918379e7ab378ed0b..be5a286e3daa94267f40bc68b3f21455c3f57720 100644 (file)
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -95,6 +95,10 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task,
     task->bin = bin;
     task->x = x * TILE_SIZE;
     task->y = y * TILE_SIZE;
+   task->width = TILE_SIZE + x * TILE_SIZE > task->scene->width_aligned ?
+                    task->scene->width_aligned - x * TILE_SIZE : TILE_SIZE;
+   task->height = TILE_SIZE + y * TILE_SIZE > task->scene->height_aligned ?
+                    task->scene->height_aligned - y * TILE_SIZE : TILE_SIZE;
  
     /* reset pointers to color and depth tile(s) */
     memset(task->color_tiles, 0, sizeof(task->color_tiles));
@@ -144,8 +148,8 @@ lp_rast_clear_color(struct lp_rasterizer_task *task,
                             scene->cbufs[i].stride,
                             task->x,
                             task->y,
-                           TILE_SIZE,
-                           TILE_SIZE,
+                           task->width,
+                           task->height,
                             &uc);
           }
        }
@@ -172,8 +176,8 @@ lp_rast_clear_color(struct lp_rasterizer_task *task,
                             scene->cbufs[i].stride,
                             task->x,
                             task->y,
-                           TILE_SIZE,
-                           TILE_SIZE,
+                           task->width,
+                           task->height,
                             &uc);
           }
        }
@@ -198,8 +202,8 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
     uint64_t clear_mask64 = arg.clear_zstencil.mask;
     uint32_t clear_value = (uint32_t) clear_value64;
     uint32_t clear_mask = (uint32_t) clear_mask64;
-   const unsigned height = TILE_SIZE;
-   const unsigned width = TILE_SIZE;
+   const unsigned height = task->height;
+   const unsigned width = task->width;
     const unsigned block_size = scene->zsbuf.blocksize;
     const unsigned dst_stride = scene->zsbuf.stride;
     uint8_t *dst;
@@ -325,8 +329,8 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
     variant = state->variant;
  
     /* render the whole 64x64 tile in 4x4 chunks */
-   for (y = 0; y < TILE_SIZE; y += 4){
-      for (x = 0; x < TILE_SIZE; x += 4) {
+   for (y = 0; y < task->height; y += 4){
+      for (x = 0; x < task->width; x += 4) {
           uint8_t *color[PIPE_MAX_COLOR_BUFS];
           unsigned stride[PIPE_MAX_COLOR_BUFS];
           uint8_t *depth = NULL;
@@ -434,21 +438,27 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
  
     assert(lp_check_alignment(state->jit_context.u8_blend_color, 16));
  
-   /* run shader on 4x4 block */
-   BEGIN_JIT_CALL(state, task);
-   variant->jit_function[RAST_EDGE_TEST](&state->jit_context,
-                                         x, y,
-                                         inputs->frontfacing,
-                                         GET_A0(inputs),
-                                         GET_DADX(inputs),
-                                         GET_DADY(inputs),
-                                         color,
-                                         depth,
-                                         mask,
-                                         &task->thread_data,
-                                         stride,
-                                         depth_stride);
-   END_JIT_CALL();
+   /*
+    * The rasterizer may produce fragments outside our
+    * allocated 4x4 blocks hence need to filter them out here.
+    */
+   if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) {
+      /* run shader on 4x4 block */
+      BEGIN_JIT_CALL(state, task);
+      variant->jit_function[RAST_EDGE_TEST](&state->jit_context,
+                                            x, y,
+                                            inputs->frontfacing,
+                                            GET_A0(inputs),
+                                            GET_DADX(inputs),
+                                            GET_DADY(inputs),
+                                            color,
+                                            depth,
+                                            mask,
+                                            &task->thread_data,
+                                            stride,
+                                            depth_stride);
+      END_JIT_CALL();
+   }
  }
  
  
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h

index 8dd3615e78adf88ff74b381f2356f575db9f2eb3..9fe89e5b6f1f7e16130ca94b694cdb366572a154 100644 (file)
--- a/src/gallium/drivers/llvmpipe/lp_rast.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast.h
@@ -50,6 +50,9 @@ struct cmd_bin;
  #define FIXED_ORDER 4
  #define FIXED_ONE (1<<FIXED_ORDER)
  
+/* Rasterizer output size going to jit fs, width/height */
+#define LP_RASTER_BLOCK_SIZE 4
+
  
  struct lp_rasterizer_task;
  
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h

index e4b6e5b301fd9c259c5c35e93421b5577bbeb3b1..4876d7472fb04c4d3ce690917b81627439317461 100644 (file)
--- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
@@ -86,6 +86,7 @@ struct lp_rasterizer_task
  
     struct lp_scene *scene;
     unsigned x, y;          /**< Pos of this tile in framebuffer, in pixels */
+   unsigned width, height; /**< width, height of current tile, in pixels */
  
     uint8_t *color_tiles[PIPE_MAX_COLOR_BUFS];
     uint8_t *depth_tile;
@@ -293,21 +294,27 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
        depth_stride = scene->zsbuf.stride;
     }
  
-   /* run shader on 4x4 block */
-   BEGIN_JIT_CALL(state, task);
-   variant->jit_function[RAST_WHOLE]( &state->jit_context,
-                                      x, y,
-                                      inputs->frontfacing,
-                                      GET_A0(inputs),
-                                      GET_DADX(inputs),
-                                      GET_DADY(inputs),
-                                      color,
-                                      depth,
-                                      0xffff,
-                                      &task->thread_data,
-                                      stride,
-                                      depth_stride);
-   END_JIT_CALL();
+   /*
+    * The rasterizer may produce fragments outside our
+    * allocated 4x4 blocks hence need to filter them out here.
+    */
+   if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) {
+      /* run shader on 4x4 block */
+      BEGIN_JIT_CALL(state, task);
+      variant->jit_function[RAST_WHOLE]( &state->jit_context,
+                                         x, y,
+                                         inputs->frontfacing,
+                                         GET_A0(inputs),
+                                         GET_DADX(inputs),
+                                         GET_DADY(inputs),
+                                         color,
+                                         depth,
+                                         0xffff,
+                                         &task->thread_data,
+                                         stride,
+                                         depth_stride);
+      END_JIT_CALL();
+   }
  }
  
  void lp_rast_triangle_1( struct lp_rasterizer_task *, 
diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c

index 771ad085a120138a03b639dbd483cdb01ba3c2d8..2dfc7ff9ce7e078337c853d1aa8eb2b9115f6f59 100644 (file)
--- a/src/gallium/drivers/llvmpipe/lp_scene.c
+++ b/src/gallium/drivers/llvmpipe/lp_scene.c
@@ -505,6 +505,8 @@ void lp_scene_begin_binning( struct lp_scene *scene,
  
     scene->tiles_x = align(fb->width, TILE_SIZE) / TILE_SIZE;
     scene->tiles_y = align(fb->height, TILE_SIZE) / TILE_SIZE;
+   scene->width_aligned = align(fb->width, LP_RASTER_BLOCK_SIZE);
+   scene->height_aligned = align(fb->height, LP_RASTER_BLOCK_SIZE);
  
     assert(scene->tiles_x <= TILES_X);
     assert(scene->tiles_y <= TILES_Y);
diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h

index fa5bbcaf013a518b7c4a686524aa3bc9b3d8805d..bc6c448bc7f00223baa3ec2457d719f9636bbe9e 100644 (file)
--- a/src/gallium/drivers/llvmpipe/lp_scene.h
+++ b/src/gallium/drivers/llvmpipe/lp_scene.h
@@ -144,6 +144,10 @@ struct lp_scene {
     /** list of resources referenced by the scene commands */
     struct resource_ref *resources;
  
+   /** aligned scene width, height */
+   unsigned width_aligned;
+   unsigned height_aligned;
+
     /** Total memory used by the scene (in bytes).  This sums all the
      * data blocks and counts all bins, state, resource references and
      * other random allocations within the scene.
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c

index a141fa337ab176d7d0067766252d4dd85e9e5d3e..bafcf56b8033265ef51397863174eeec061d91e7 100644 (file)
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -694,8 +694,7 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
                 assert(last_level <= res->last_level);
  
                 /*
-                * The complexity here is only necessary for depth textures which
-                * still are tiled.
+                * The complexity here should no longer be necessary.
                  */
                 mip_ptr = llvmpipe_get_texture_image_all(lp_tex, first_level,
                                                          LP_TEX_USAGE_READ);
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c

index 0ac3528f7accd7ca5add50498785290a05dcd30d..56eb4999a1cc293c44c5759373d7b3205bf6137b 100644 (file)
--- a/src/gallium/drivers/llvmpipe/lp_texture.c
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -49,6 +49,7 @@
  #include "lp_texture.h"
  #include "lp_setup.h"
  #include "lp_state.h"
+#include "lp_rast.h"
  
  #include "state_tracker/sw_winsys.h"
  
@@ -84,15 +85,15 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen,
        {
           unsigned alignment, nblocksx, nblocksy, block_size;
  
-         /* For non-compressed formats we need to align the texture size
-          * to the tile size to facilitate render-to-texture.
-          * XXX this blows up 1d/1d array textures by unreasonable
-          * amount (factor 64), probably should do something about it.
+         /* For non-compressed formats we need 4x4 pixel alignment
+          * (for now). We also want cache line size in x direction,
+          * otherwise same cache line could end up in multiple threads.
+          * XXX this blows up 1d/1d array textures by a factor of 4.
            */
           if (util_format_is_compressed(pt->format))
              alignment = 1;
           else
-            alignment = TILE_SIZE;
+            alignment = LP_RASTER_BLOCK_SIZE;
  
           nblocksx = util_format_get_nblocksx(pt->format,
                                               align(width, alignment));
@@ -100,7 +101,10 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen,
                                               align(height, alignment));
           block_size = util_format_get_blocksize(pt->format);
  
-         lpr->row_stride[level] = align(nblocksx * block_size, 16);
+         if (util_format_is_compressed(pt->format))
+            lpr->row_stride[level] = nblocksx * block_size;
+         else
+            lpr->row_stride[level] = align(nblocksx * block_size, util_cpu_caps.cacheline);
  
           /* if row_stride * height > LP_MAX_TEXTURE_SIZE */
           if (lpr->row_stride[level] > LP_MAX_TEXTURE_SIZE / nblocksy) {
@@ -244,7 +248,12 @@ llvmpipe_resource_create(struct pipe_screen *_screen,
        assert(templat->height0 == 1);
        assert(templat->depth0 == 1);
        assert(templat->last_level == 0);
-      lpr->data = align_malloc(bytes, 16);
+      /*
+       * Reserve some extra storage since if we'd render to a buffer we
+       * read/write always LP_RASTER_BLOCK_SIZE pixels, but the element
+       * offset doesn't need to be aligned to LP_RASTER_BLOCK_SIZE.
+       */
+      lpr->data = align_malloc(bytes + (LP_RASTER_BLOCK_SIZE - 1) * 4 * sizeof(float), 16);
        /*
         * buffers don't really have stride but it's probably safer
         * (for code doing same calculations for buffers and textures)
@@ -327,7 +336,6 @@ llvmpipe_resource_map(struct pipe_resource *resource,
        struct llvmpipe_screen *screen = llvmpipe_screen(resource->screen);
        struct sw_winsys *winsys = screen->winsys;
        unsigned dt_usage;
-      uint8_t *map2;
  
        if (tex_usage == LP_TEX_USAGE_READ) {
           dt_usage = PIPE_TRANSFER_READ;
@@ -345,14 +353,11 @@ llvmpipe_resource_map(struct pipe_resource *resource,
        /* install this linear image in texture data structure */
        lpr->linear_img.data = map;
  
-      /* make sure tiled data gets converted to linear data */
-      map2 = llvmpipe_get_texture_image(lpr, 0, 0, tex_usage);
-      return map2;
+      return map;
     }
     else if (llvmpipe_resource_is_texture(resource)) {
  
-      map = llvmpipe_get_texture_image(lpr, layer, level,
-                                       tex_usage);
+      map = llvmpipe_get_texture_image(lpr, layer, level, tex_usage);
        return map;
     }
     else {
author	Roland Scheidegger <sroland@vmware.com>
	Thu, 30 May 2013 00:05:01 +0000 (02:05 +0200)
committer	Roland Scheidegger <sroland@vmware.com>
	Fri, 31 May 2013 18:21:05 +0000 (20:21 +0200)
src/gallium/drivers/llvmpipe/lp_rast.c		patch \| blob \| history
src/gallium/drivers/llvmpipe/lp_rast.h		patch \| blob \| history
src/gallium/drivers/llvmpipe/lp_rast_priv.h		patch \| blob \| history
src/gallium/drivers/llvmpipe/lp_scene.c		patch \| blob \| history
src/gallium/drivers/llvmpipe/lp_scene.h		patch \| blob \| history
src/gallium/drivers/llvmpipe/lp_setup.c		patch \| blob \| history
src/gallium/drivers/llvmpipe/lp_texture.c		patch \| blob \| history