ilo: try unblocking a transfer with a staging bo
authorChia-I Wu <olvaffe@gmail.com>
Mon, 28 Jul 2014 01:28:05 +0000 (09:28 +0800)
committerChia-I Wu <olvaffe@gmail.com>
Mon, 28 Jul 2014 14:57:22 +0000 (22:57 +0800)
When mapping a busy resource with PIPE_TRANSFER_DISCARD_RANGE or
PIPE_TRANSFER_FLUSH_EXPLICIT, we can avoid blocking by allocating and mapping
a staging bo, and emit pipelined copies at proper places.  Since the staging
bo is never bound to GPU, we give it packed layout to save space.

src/gallium/drivers/ilo/ilo_resource.c
src/gallium/drivers/ilo/ilo_screen.c
src/gallium/drivers/ilo/ilo_transfer.c
src/gallium/drivers/ilo/ilo_transfer.h

index c812c19279d49c1b4e2299d59df054dc0575c904..812ccafa93b3d041cb0ea8646b6732b46886efb6 100644 (file)
@@ -1279,15 +1279,14 @@ tex_apply_layout(struct ilo_texture *tex,
                  const struct winsys_handle *handle)
 {
    tex->bo_format = layout->format;
+   tex->block_width = layout->block_width;
+   tex->block_height = layout->block_height;
+   tex->block_size = layout->block_size;
 
    tex->tiling = layout->tiling;
    tex->bo_stride = layout->bo_stride;
    tex->bo_height = layout->bo_height;
 
-   tex->block_width = layout->block_width;
-   tex->block_height = layout->block_height;
-   tex->block_size = layout->block_size;
-
    tex->halign_8 = (layout->align_i == 8);
    tex->valign_4 = (layout->align_j == 4);
    tex->array_spacing_full = layout->array_spacing_full;
@@ -1315,6 +1314,44 @@ tex_apply_layout(struct ilo_texture *tex,
    return true;
 }
 
+/**
+ * The texutre is for transfer only.  We can define our own layout to save
+ * space.
+ */
+static bool
+tex_apply_transfer_layout(struct ilo_texture *tex)
+{
+   const struct pipe_resource *templ = &tex->base;
+   const unsigned num_slices = (templ->target == PIPE_TEXTURE_3D) ?
+      templ->depth0 : templ->array_size;
+   unsigned slice_width, slice_height, i;
+
+   assert(templ->last_level == 0);
+
+   tex->bo_format = templ->format;
+   tex->block_width = util_format_get_blockwidth(templ->format);
+   tex->block_height = util_format_get_blockheight(templ->format);
+   tex->block_size = util_format_get_blocksize(templ->format);
+
+   assert(util_is_power_of_two(tex->block_width) &&
+          util_is_power_of_two(tex->block_height));
+
+   /* use packed layout */
+   slice_width = align(templ->width0, tex->block_width);
+   slice_height = align(templ->height0, tex->block_height);
+   for (i = 0; i < num_slices; i++) {
+      tex->slices[0][i].x = 0;
+      tex->slices[0][i].y = slice_height * i;
+   }
+
+   tex->tiling = INTEL_TILING_NONE;
+   tex->bo_stride = (slice_width / tex->block_width) * tex->block_size;
+   tex->bo_stride = align(tex->bo_stride, 64);
+   tex->bo_height = (slice_height / tex->block_height) * num_slices;
+
+   return tex_create_bo(tex);
+}
+
 static void
 tex_destroy(struct ilo_texture *tex)
 {
@@ -1338,6 +1375,7 @@ tex_create(struct pipe_screen *screen,
 {
    struct tex_layout layout;
    struct ilo_texture *tex;
+   bool transfer_only;
 
    tex = CALLOC_STRUCT(ilo_texture);
    if (!tex)
@@ -1354,6 +1392,18 @@ tex_create(struct pipe_screen *screen,
 
    tex->imported = (handle != NULL);
 
+   /* use transfer layout when the texture is never bound to GPU */
+   transfer_only = !(templ->bind & ~(PIPE_BIND_TRANSFER_WRITE |
+                                     PIPE_BIND_TRANSFER_READ));
+   if (transfer_only && templ->last_level == 0) {
+      if (!tex_apply_transfer_layout(tex)) {
+         tex_destroy(tex);
+         return NULL;
+      }
+
+      return &tex->base;
+   }
+
    if (!tex_layout_init(&layout, screen, templ, tex->slices)) {
       tex_destroy(tex);
       return NULL;
index 09980dd20f2e56b1ddad9d18ee3c18623da4a9b0..d22691101d94d865e8333fbfedbc828a07ef09a2 100644 (file)
@@ -34,6 +34,7 @@
 #include "ilo_context.h"
 #include "ilo_format.h"
 #include "ilo_resource.h"
+#include "ilo_transfer.h" /* for ILO_TRANSFER_MAP_BUFFER_ALIGNMENT */
 #include "ilo_public.h"
 #include "ilo_screen.h"
 
@@ -397,7 +398,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_TEXTURE_MULTISAMPLE:
       return false; /* TODO */
    case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
-      return 64;
+      return ILO_TRANSFER_MAP_BUFFER_ALIGNMENT;
    case PIPE_CAP_CUBE_MAP_ARRAY:
    case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
       return true;
index 0f1347b70a333bd49e32e173e4492bb026c843da..7acb4ac5c913230f70aa58d09eaa4599f098321b 100644 (file)
@@ -30,6 +30,7 @@
 #include "util/u_format_etc.h"
 
 #include "ilo_blit.h"
+#include "ilo_blitter.h"
 #include "ilo_cp.h"
 #include "ilo_context.h"
 #include "ilo_resource.h"
@@ -163,6 +164,52 @@ usage_allows_staging_bo(unsigned usage)
    return (usage & can_writeback) && !(usage & reasons_against);
 }
 
+/**
+ * Allocate the staging resource.  It is always linear and its size matches
+ * the transfer box, with proper paddings.
+ */
+static bool
+xfer_alloc_staging_res(struct ilo_transfer *xfer)
+{
+   const struct pipe_resource *res = xfer->base.resource;
+   const struct pipe_box *box = &xfer->base.box;
+   struct pipe_resource templ;
+
+   memset(&templ, 0, sizeof(templ));
+
+   templ.format = res->format;
+
+   if (res->target == PIPE_BUFFER) {
+      templ.target = PIPE_BUFFER;
+      templ.width0 =
+         (box->x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT) + box->width;
+   }
+   else {
+      /* use 2D array for any texture target */
+      templ.target = PIPE_TEXTURE_2D_ARRAY;
+      templ.width0 = box->width;
+   }
+
+   templ.height0 = box->height;
+   templ.depth0 = 1;
+   templ.array_size = box->depth;
+   templ.nr_samples = 1;
+   templ.usage = PIPE_USAGE_STAGING;
+   templ.bind = PIPE_BIND_TRANSFER_WRITE;
+
+   if (xfer->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT) {
+      templ.flags = PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
+                    PIPE_RESOURCE_FLAG_MAP_COHERENT;
+   }
+
+   xfer->staging.res = res->screen->resource_create(res->screen, &templ);
+
+   if (xfer->staging.res && xfer->staging.res->target != PIPE_BUFFER)
+      assert(ilo_texture(xfer->staging.res)->tiling == INTEL_TILING_NONE);
+
+   return (xfer->staging.res != NULL);
+}
+
 /**
  * Use an alternative transfer method or rename the resource to unblock an
  * otherwise blocking transfer.
@@ -185,11 +232,14 @@ xfer_unblock(struct ilo_transfer *xfer, bool *resource_renamed)
          renamed = true;
          unblocked = true;
       }
-      else if (usage_allows_staging_bo(xfer->base.usage)) {
-         /* TODO */
+      else if (usage_allows_staging_bo(xfer->base.usage) &&
+               xfer_alloc_staging_res(xfer)) {
+         xfer->method = ILO_TRANSFER_MAP_STAGING;
+         unblocked = true;
       }
       break;
    case ILO_TRANSFER_MAP_GTT_UNSYNC:
+   case ILO_TRANSFER_MAP_STAGING:
       unblocked = true;
       break;
    default:
@@ -218,10 +268,10 @@ xfer_alloc_staging_sys(struct ilo_transfer *xfer)
    xfer->base.layer_stride =
       util_format_get_2d_size(format, xfer->base.stride, box->height);
 
-   xfer->staging_sys =
+   xfer->staging.sys =
       align_malloc(xfer->base.layer_stride * box->depth, alignment);
 
-   return (xfer->staging_sys != NULL);
+   return (xfer->staging.sys != NULL);
 }
 
 /**
@@ -244,9 +294,29 @@ xfer_map(struct ilo_transfer *xfer)
    case ILO_TRANSFER_MAP_GTT_UNSYNC:
       ptr = intel_bo_map_unsynchronized(resource_get_bo(xfer->base.resource));
       break;
+   case ILO_TRANSFER_MAP_STAGING:
+      {
+         const struct ilo_screen *is = ilo_screen(xfer->staging.res->screen);
+         struct intel_bo *bo = resource_get_bo(xfer->staging.res);
+
+         /*
+          * We want a writable, optionally persistent and coherent, mapping
+          * for a linear bo.  We can call resource_get_transfer_method(), but
+          * this turns out to be fairly simple.
+          */
+         if (is->dev.has_llc)
+            ptr = intel_bo_map(bo, true);
+         else
+            ptr = intel_bo_map_gtt(bo);
+
+         if (ptr && xfer->staging.res->target == PIPE_BUFFER)
+            ptr += (xfer->base.box.x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT);
+
+      }
+      break;
    case ILO_TRANSFER_MAP_SW_CONVERT:
    case ILO_TRANSFER_MAP_SW_ZS:
-      ptr = xfer->staging_sys;
+      ptr = xfer->staging.sys;
       break;
    default:
       assert(!"unknown mapping method");
@@ -269,6 +339,9 @@ xfer_unmap(struct ilo_transfer *xfer)
    case ILO_TRANSFER_MAP_GTT_UNSYNC:
       intel_bo_unmap(resource_get_bo(xfer->base.resource));
       break;
+   case ILO_TRANSFER_MAP_STAGING:
+      intel_bo_unmap(resource_get_bo(xfer->staging.res));
+      break;
    default:
       break;
    }
@@ -583,7 +656,7 @@ tex_staging_sys_zs_read(struct ilo_texture *tex,
          tex_get_box_origin(s8_tex, xfer->base.level, slice,
                             box, &s8_mem_x, &s8_mem_y);
 
-         dst = xfer->staging_sys + xfer->base.layer_stride * slice;
+         dst = xfer->staging.sys + xfer->base.layer_stride * slice;
 
          for (i = 0; i < box->height; i++) {
             unsigned x = mem_x, s8_x = s8_mem_x;
@@ -622,7 +695,7 @@ tex_staging_sys_zs_read(struct ilo_texture *tex,
          tex_get_box_origin(tex, xfer->base.level, slice,
                             box, &mem_x, &mem_y);
 
-         dst = xfer->staging_sys + xfer->base.layer_stride * slice;
+         dst = xfer->staging.sys + xfer->base.layer_stride * slice;
 
          for (i = 0; i < box->height; i++) {
             unsigned x = mem_x;
@@ -710,7 +783,7 @@ tex_staging_sys_zs_write(struct ilo_texture *tex,
          tex_get_box_origin(s8_tex, xfer->base.level, slice,
                             box, &s8_mem_x, &s8_mem_y);
 
-         src = xfer->staging_sys + xfer->base.layer_stride * slice;
+         src = xfer->staging.sys + xfer->base.layer_stride * slice;
 
          for (i = 0; i < box->height; i++) {
             unsigned x = mem_x, s8_x = s8_mem_x;
@@ -749,7 +822,7 @@ tex_staging_sys_zs_write(struct ilo_texture *tex,
          tex_get_box_origin(tex, xfer->base.level, slice,
                             box, &mem_x, &mem_y);
 
-         src = xfer->staging_sys + xfer->base.layer_stride * slice;
+         src = xfer->staging.sys + xfer->base.layer_stride * slice;
 
          for (i = 0; i < box->height; i++) {
             unsigned x = mem_x;
@@ -800,7 +873,7 @@ tex_staging_sys_convert_write(struct ilo_texture *tex,
    if (unlikely(tex->bo_format == tex->base.format)) {
       util_copy_box(dst, tex->bo_format, tex->bo_stride, dst_slice_stride,
             0, 0, 0, box->width, box->height, box->depth,
-            xfer->staging_sys, xfer->base.stride, xfer->base.layer_stride,
+            xfer->staging.sys, xfer->base.stride, xfer->base.layer_stride,
             0, 0, 0);
 
       tex_staging_sys_unmap_bo(tex);
@@ -814,7 +887,7 @@ tex_staging_sys_convert_write(struct ilo_texture *tex,
 
       for (slice = 0; slice < box->depth; slice++) {
          const void *src =
-            xfer->staging_sys + xfer->base.layer_stride * slice;
+            xfer->staging.sys + xfer->base.layer_stride * slice;
 
          util_format_etc1_rgb8_unpack_rgba_8unorm(dst,
                tex->bo_stride, src, xfer->base.stride,
@@ -919,6 +992,14 @@ tex_map(struct ilo_transfer *xfer)
             tex_get_slice_stride(tex, xfer->base.level) : 0;
       }
       break;
+   case ILO_TRANSFER_MAP_STAGING:
+      ptr = xfer_map(xfer);
+      if (ptr) {
+         const struct ilo_texture *staging = ilo_texture(xfer->staging.res);
+         xfer->base.stride = staging->bo_stride;
+         xfer->base.layer_stride = tex_get_slice_stride(staging, 0);
+      }
+      break;
    case ILO_TRANSFER_MAP_SW_CONVERT:
    case ILO_TRANSFER_MAP_SW_ZS:
       if (xfer_alloc_staging_sys(xfer) && tex_staging_sys_readback(xfer))
@@ -944,7 +1025,9 @@ buf_map(struct ilo_transfer *xfer)
    if (!ptr)
       return NULL;
 
-   ptr += xfer->base.box.x;
+   if (xfer->method != ILO_TRANSFER_MAP_STAGING)
+      ptr += xfer->base.box.x;
+
    xfer->base.stride = 0;
    xfer->base.layer_stride = 0;
 
@@ -957,6 +1040,34 @@ buf_map(struct ilo_transfer *xfer)
    return ptr;
 }
 
+static void
+copy_staging_resource(struct ilo_context *ilo,
+                      struct ilo_transfer *xfer,
+                      const struct pipe_box *box)
+{
+   const unsigned pad_x = (xfer->staging.res->target == PIPE_BUFFER) ?
+      xfer->base.box.x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT : 0;
+   struct pipe_box modified_box;
+
+   assert(xfer->method == ILO_TRANSFER_MAP_STAGING && xfer->staging.res);
+
+   if (!box) {
+      u_box_3d(pad_x, 0, 0, xfer->base.box.width, xfer->base.box.height,
+            xfer->base.box.depth, &modified_box);
+      box = &modified_box;
+   }
+   else if (pad_x) {
+      modified_box = *box;
+      modified_box.x += pad_x;
+      box = &modified_box;
+   }
+
+   ilo_blitter_blt_copy_resource(ilo->blitter,
+         xfer->base.resource, xfer->base.level,
+         xfer->base.box.x, xfer->base.box.y, xfer->base.box.z,
+         xfer->staging.res, 0, box);
+}
+
 static bool
 is_bo_busy(struct ilo_context *ilo, struct intel_bo *bo, bool *need_flush)
 {
@@ -1042,6 +1153,16 @@ ilo_transfer_flush_region(struct pipe_context *pipe,
                           struct pipe_transfer *transfer,
                           const struct pipe_box *box)
 {
+   struct ilo_context *ilo = ilo_context(pipe);
+   struct ilo_transfer *xfer = ilo_transfer(transfer);
+
+   /*
+    * The staging resource is mapped persistently and coherently.  We can copy
+    * without unmapping.
+    */
+   if (xfer->method == ILO_TRANSFER_MAP_STAGING &&
+       (xfer->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
+      copy_staging_resource(ilo, xfer, box);
 }
 
 static void
@@ -1054,10 +1175,15 @@ ilo_transfer_unmap(struct pipe_context *pipe,
    xfer_unmap(xfer);
 
    switch (xfer->method) {
+   case ILO_TRANSFER_MAP_STAGING:
+      if (!(xfer->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
+         copy_staging_resource(ilo, xfer, NULL);
+      pipe_resource_reference(&xfer->staging.res, NULL);
+      break;
    case ILO_TRANSFER_MAP_SW_CONVERT:
    case ILO_TRANSFER_MAP_SW_ZS:
       tex_staging_sys_writeback(xfer);
-      align_free(xfer->staging_sys);
+      align_free(xfer->staging.sys);
       break;
    default:
       break;
index b346f457159c402c1aa7280b972134c7ac49f33d..d7f4838d3845cf526d2ba25a2bb08db2dc0de036 100644 (file)
 
 #include "ilo_common.h"
 
+/*
+ * Direct mappings are always page aligned, but ILO_TRANSFER_MAP_STAGING is
+ * not.
+ */
+#define ILO_TRANSFER_MAP_BUFFER_ALIGNMENT 64
+
 enum ilo_transfer_map_method {
    /* map() / map_gtt() / map_unsynchronized() */
    ILO_TRANSFER_MAP_CPU,
    ILO_TRANSFER_MAP_GTT,
    ILO_TRANSFER_MAP_GTT_UNSYNC,
 
+   /* use staging resource */
+   ILO_TRANSFER_MAP_STAGING,
+
    /* use staging system buffer */
    ILO_TRANSFER_MAP_SW_CONVERT,
    ILO_TRANSFER_MAP_SW_ZS,
@@ -47,7 +56,11 @@ struct ilo_transfer {
    struct pipe_transfer base;
 
    enum ilo_transfer_map_method method;
-   void *staging_sys;
+   /* pipe_resource, system memory, or garbage depending on the method */
+   union {
+      struct pipe_resource *res;
+      void *sys;
+   } staging;
 };
 
 struct ilo_context;