panfrost: Size tiled temp buffers correctly
authorAlyssa Rosenzweig <alyssa@rosenzweig.io>
Wed, 3 Apr 2019 03:52:36 +0000 (03:52 +0000)
committerAlyssa Rosenzweig <alyssa@rosenzweig.io>
Thu, 4 Apr 2019 03:51:43 +0000 (03:51 +0000)
This should lower transient memory usage and improve performance
slightly (due to less memory to malloc/free, better cache locality,
etc).

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
src/gallium/drivers/panfrost/pan_resource.c
src/gallium/drivers/panfrost/pan_swizzle.c
src/gallium/drivers/panfrost/pan_swizzle.h

index 217b27c5778b99c74b7c8ea9a9f1a1025f0935b5..15d522f19639d94ac7dc262680d31c8f38a79ead 100644 (file)
@@ -390,8 +390,6 @@ panfrost_transfer_map(struct pipe_context *pctx,
         transfer->base.level = level;
         transfer->base.usage = usage;
         transfer->base.box = *box;
-        transfer->base.stride = bo->slices[level].stride;
-        transfer->base.layer_stride = bo->cubemap_stride;
 
         pipe_resource_reference(&transfer->base.resource, resource);
 
@@ -413,12 +411,17 @@ panfrost_transfer_map(struct pipe_context *pctx,
                 if (usage & PIPE_TRANSFER_MAP_DIRECTLY)
                         return NULL;
 
+                transfer->base.stride = box->width * bytes_per_pixel;
+                transfer->base.layer_stride = transfer->base.stride * box->height;
+
                 /* TODO: Reads */
-                /* TODO: Only allocate "just" enough, shortening the stride */
-                transfer->map = malloc(transfer->base.stride * box->height);
+                transfer->map = malloc(transfer->base.layer_stride * box->depth);
 
                 return transfer->map;
         } else {
+                transfer->base.stride = bo->slices[level].stride;
+                transfer->base.layer_stride = bo->cubemap_stride;
+
                 return bo->cpu
                         + bo->slices[level].offset
                         + transfer->base.box.z * bo->cubemap_stride
@@ -440,7 +443,6 @@ panfrost_tile_texture(struct panfrost_screen *screen, struct panfrost_resource *
                         trans->base.box.width,
                         trans->base.box.height,
                         util_format_get_blocksize(rsrc->base.format),
-                        bo->slices[level].stride,
                         u_minify(rsrc->base.width0, level),
                         trans->map,
                         bo->cpu
index afc89506b336800cdb82e2cc9a81cbb660a97309..291bd1f88aea1b1f8fa26df6808671dec48b7807 100644 (file)
@@ -149,15 +149,18 @@ swizzle_bpp4_align16(int width, int height, int source_stride, int block_pitch,
 void
 panfrost_texture_swizzle(unsigned off_x,
                          unsigned off_y,
-                         int width, int height, int bytes_per_pixel, int source_stride, int dest_width,
+                         int width, int height, int bytes_per_pixel, int dest_width,
                          const uint8_t *pixels,
                          uint8_t *ldest)
 {
         /* Calculate maximum size, overestimating a bit */
         int block_pitch = ALIGN(dest_width, 16) >> 4;
 
+        /* Strides must be tight, since we're only ever called indirectly */
+        int source_stride = width * bytes_per_pixel;
+
         /* Use fast path if available */
-        if (!(off_x || off_y)) {
+        if (!(off_x || off_y) && (width == dest_width)) {
                 if (bytes_per_pixel == 4 /* && (ALIGN(width, 16) == width) */) {
                         swizzle_bpp4_align16(width, height, source_stride >> 2, (block_pitch * 256 >> 4), (const uint32_t *) pixels, (uint32_t *) ldest);
                         return;
index 6f4dadef494cd617b8b61bc67488f267b39c51e4..f4188f139306c3e1b795ad5e10e5607e13bc1cc3 100644 (file)
@@ -32,7 +32,7 @@ panfrost_generate_space_filler_indices(void);
 
 void
 panfrost_texture_swizzle(unsigned off_x, unsigned off_y,
-                        int width, int height, int bytes_per_pixel, int source_stride, int dest_width,
+                        int width, int height, int bytes_per_pixel, int dest_width,
                          const uint8_t *pixels,
                          uint8_t *ldest);