r600g,radeonsi: use a fallback in dma_copy instead of failing

author Marek Olšák <marek.olsak@amd.com>

Sat, 8 Mar 2014 14:15:41 +0000 (15:15 +0100)

committer Marek Olšák <marek.olsak@amd.com>

Tue, 11 Mar 2014 18:18:02 +0000 (19:18 +0100)
author Marek Olšák <marek.olsak@amd.com>
Sat, 8 Mar 2014 14:15:41 +0000 (15:15 +0100)
committer Marek Olšák <marek.olsak@amd.com>
Tue, 11 Mar 2014 18:18:02 +0000 (19:18 +0100)
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c

index 18c2ee59b87496d524cc578da8fffb0f600b788b..05cc3ef4cc22afa118d4936c91ccf356157a0ed3 100644 (file)
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -3325,13 +3325,13 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx,
         }
  }
  
-static boolean evergreen_dma_blit(struct pipe_context *ctx,
-                                 struct pipe_resource *dst,
-                                 unsigned dst_level,
-                                 unsigned dst_x, unsigned dst_y, unsigned dst_z,
-                                 struct pipe_resource *src,
-                                 unsigned src_level,
-                                 const struct pipe_box *src_box)
+static void evergreen_dma_blit(struct pipe_context *ctx,
+                              struct pipe_resource *dst,
+                              unsigned dst_level,
+                              unsigned dstx, unsigned dsty, unsigned dstz,
+                              struct pipe_resource *src,
+                              unsigned src_level,
+                              const struct pipe_box *src_box)
  {
         struct r600_context *rctx = (struct r600_context *)ctx;
         struct r600_texture *rsrc = (struct r600_texture*)src;
@@ -3339,22 +3339,22 @@ static boolean evergreen_dma_blit(struct pipe_context *ctx,
         unsigned dst_pitch, src_pitch, bpp, dst_mode, src_mode, copy_height;
         unsigned src_w, dst_w;
         unsigned src_x, src_y;
+       unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz;
  
         if (rctx->b.rings.dma.cs == NULL) {
-               return FALSE;
+               goto fallback;
         }
  
         if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
                 evergreen_dma_copy(rctx, dst, src, dst_x, src_box->x, src_box->width);
-               return TRUE;
+               return;
         }
  
-       if (src->format != dst->format) {
-               return FALSE;
-       }
-       if (rdst->dirty_level_mask != 0) {
-               return FALSE;
+       if (src->format != dst->format || src_box->depth > 1 ||
+           rdst->dirty_level_mask != 0) {
+               goto fallback;
         }
+
         if (rsrc->dirty_level_mask) {
                 ctx->flush_resource(ctx, src);
         }
@@ -3379,13 +3379,13 @@ static boolean evergreen_dma_blit(struct pipe_context *ctx,
  
         if (src_pitch != dst_pitch || src_box->x || dst_x || src_w != dst_w) {
                 /* FIXME evergreen can do partial blit */
-               return FALSE;
+               goto fallback;
         }
         /* the x test here are currently useless (because we don't support partial blit)
          * but keep them around so we don't forget about those
          */
         if ((src_pitch & 0x7) || (src_box->x & 0x7) || (dst_x & 0x7) || (src_box->y & 0x7) || (dst_y & 0x7)) {
-               return FALSE;
+               goto fallback;
         }
  
         /* 128 bpp surfaces require non_disp_tiling for both
@@ -3396,7 +3396,7 @@ static boolean evergreen_dma_blit(struct pipe_context *ctx,
         if ((rctx->b.chip_class == CAYMAN) &&
             (src_mode != dst_mode) &&
             (util_format_get_blocksize(src->format) >= 16)) {
-               return FALSE;
+               goto fallback;
         }
  
         if (src_mode == dst_mode) {
@@ -3419,7 +3419,11 @@ static boolean evergreen_dma_blit(struct pipe_context *ctx,
                                         src, src_level, src_x, src_y, src_box->z,
                                         copy_height, dst_pitch, bpp);
         }
-       return TRUE;
+       return;
+
+fallback:
+       ctx->resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz,
+                                 src, src_level, src_box);
  }
  
  void evergreen_init_state_functions(struct r600_context *rctx)
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c

index e6254c69a2ca6d1c063ee21438b56dd3d13a78e4..39e38f4dd0d4e1e1ec445cf11685450e4cc93fb0 100644 (file)
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -2882,13 +2882,13 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx,
         return TRUE;
  }
  
-static boolean r600_dma_blit(struct pipe_context *ctx,
-                            struct pipe_resource *dst,
-                            unsigned dst_level,
-                            unsigned dst_x, unsigned dst_y, unsigned dst_z,
-                            struct pipe_resource *src,
-                            unsigned src_level,
-                            const struct pipe_box *src_box)
+static void r600_dma_blit(struct pipe_context *ctx,
+                         struct pipe_resource *dst,
+                         unsigned dst_level,
+                         unsigned dstx, unsigned dsty, unsigned dstz,
+                         struct pipe_resource *src,
+                         unsigned src_level,
+                         const struct pipe_box *src_box)
  {
         struct r600_context *rctx = (struct r600_context *)ctx;
         struct r600_texture *rsrc = (struct r600_texture*)src;
@@ -2896,18 +2896,22 @@ static boolean r600_dma_blit(struct pipe_context *ctx,
         unsigned dst_pitch, src_pitch, bpp, dst_mode, src_mode, copy_height;
         unsigned src_w, dst_w;
         unsigned src_x, src_y;
+       unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz;
  
         if (rctx->b.rings.dma.cs == NULL) {
-               return FALSE;
+               goto fallback;
         }
  
         if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
+               if (dst_x % 4 || src_box->x % 4 || src_box->width % 4)
+                       goto fallback;
+
                 r600_dma_copy(rctx, dst, src, dst_x, src_box->x, src_box->width);
-               return TRUE;
+               return;
         }
  
-       if (src->format != dst->format) {
-               return FALSE;
+       if (src->format != dst->format || src_box->depth > 1) {
+               goto fallback;
         }
  
         src_x = util_format_get_nblocksx(src->format, src_box->x);
@@ -2930,11 +2934,11 @@ static boolean r600_dma_blit(struct pipe_context *ctx,
  
         if (src_pitch != dst_pitch || src_box->x || dst_x || src_w != dst_w) {
                 /* strick requirement on r6xx/r7xx */
-               return FALSE;
+               goto fallback;
         }
         /* lot of constraint on alignment this should capture them all */
         if ((src_pitch & 0x7) || (src_box->y & 0x7) || (dst_y & 0x7)) {
-               return FALSE;
+               goto fallback;
         }
  
         if (src_mode == dst_mode) {
@@ -2954,15 +2958,21 @@ static boolean r600_dma_blit(struct pipe_context *ctx,
                 size = src_box->height * src_pitch;
                 /* must be dw aligned */
                 if ((dst_offset & 0x3) || (src_offset & 0x3) || (size & 0x3)) {
-                       return FALSE;
+                       goto fallback;
                 }
                 r600_dma_copy(rctx, dst, src, dst_offset, src_offset, size);
         } else {
-               return r600_dma_copy_tile(rctx, dst, dst_level, dst_x, dst_y, dst_z,
+               if (!r600_dma_copy_tile(rctx, dst, dst_level, dst_x, dst_y, dst_z,
                                         src, src_level, src_x, src_y, src_box->z,
-                                       copy_height, dst_pitch, bpp);
+                                       copy_height, dst_pitch, bpp)) {
+                       goto fallback;
+               }
         }
-       return TRUE;
+       return;
+
+fallback:
+       ctx->resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz,
+                                 src, src_level, src_box);
  }
  
  void r600_init_state_functions(struct r600_context *rctx)
diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c

index 90ca8cb35e6498093fa51281524a24db93970c1f..a7ecfb3e440090f67c441cac05abaad796e7fdf6 100644 (file)
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -190,6 +190,17 @@ static void *r600_buffer_get_transfer(struct pipe_context *ctx,
         return data;
  }
  
+static bool r600_can_dma_copy_buffer(struct r600_common_context *rctx,
+                                    unsigned dstx, unsigned srcx, unsigned size)
+{
+       bool dword_aligned = !(dstx % 4) && !(srcx % 4) && !(size % 4);
+
+       return rctx->screen->has_cp_dma ||
+              (dword_aligned && (rctx->rings.dma.cs ||
+                                 rctx->screen->has_streamout));
+
+}
+
  static void *r600_buffer_transfer_map(struct pipe_context *ctx,
                                        struct pipe_resource *resource,
                                        unsigned level,
@@ -233,10 +244,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
         else if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
                  !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
                  !(rscreen->debug_flags & DBG_NO_DISCARD_RANGE) &&
-                (rscreen->has_cp_dma ||
-                 (rscreen->has_streamout &&
-                  /* The buffer range must be aligned to 4 with streamout. */
-                  box->x % 4 == 0 && box->width % 4 == 0))) {
+                r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) {
                 assert(usage & PIPE_TRANSFER_WRITE);
  
                 /* Check if mapping this buffer would cause waiting for the GPU. */
@@ -260,10 +268,11 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
                 /* At this point, the buffer is always idle (we checked it above). */
                 usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
         }
-       /* Using DMA for larger reads is much faster */
+       /* Using a staging buffer in GTT for larger reads is much faster. */
         else if ((usage & PIPE_TRANSFER_READ) &&
                  !(usage & PIPE_TRANSFER_WRITE) &&
-                (rbuffer->domains == RADEON_DOMAIN_VRAM)) {
+                rbuffer->domains == RADEON_DOMAIN_VRAM &&
+                r600_can_dma_copy_buffer(rctx, 0, box->x, box->width)) {
                 unsigned offset;
                 struct r600_resource *staging = NULL;
  
@@ -274,26 +283,16 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
                 if (staging) {
                         data += box->x % R600_MAP_BUFFER_ALIGNMENT;
  
-                       /* Copy the staging buffer into the original one. */
-                       if (rctx->dma_copy(ctx, (struct pipe_resource*)staging, 0,
-                                                box->x % R600_MAP_BUFFER_ALIGNMENT,
-                                                0, 0, resource, level, box)) {
-                               rctx->rings.gfx.flush(rctx, 0);
-                               if (rctx->rings.dma.cs)
-                                       rctx->rings.dma.flush(rctx, 0);
-
-                               /* Wait for any offloaded CS flush to complete
-                                * to avoid busy-waiting in the winsys. */
-                               rctx->ws->cs_sync_flush(rctx->rings.gfx.cs);
-                               if (rctx->rings.dma.cs)
-                                       rctx->ws->cs_sync_flush(rctx->rings.dma.cs);
-
-                               rctx->ws->buffer_wait(staging->buf, RADEON_USAGE_WRITE);
-                               return r600_buffer_get_transfer(ctx, resource, level, usage, box,
-                                                               ptransfer, data, staging, offset);
-                       } else {
-                               pipe_resource_reference((struct pipe_resource**)&staging, NULL);
-                       }
+                       /* Copy the VRAM buffer to the staging buffer. */
+                       rctx->dma_copy(ctx, &staging->b.b, 0,
+                                      box->x % R600_MAP_BUFFER_ALIGNMENT,
+                                      0, 0, resource, level, box);
+
+                       /* Just do the synchronization. The buffer is mapped already. */
+                       r600_buffer_map_sync_with_rings(rctx, staging, PIPE_TRANSFER_READ);
+
+                       return r600_buffer_get_transfer(ctx, resource, level, usage, box,
+                                                       ptransfer, data, staging, offset);
                 }
         }
  
@@ -329,12 +328,7 @@ static void r600_buffer_transfer_unmap(struct pipe_context *ctx,
                         u_box_1d(soffset, size, &box);
  
                         /* Copy the staging buffer into the original one. */
-                       if (!(size % 4) && !(doffset % 4) && !(soffset % 4) &&
-                           rctx->dma_copy(ctx, dst, 0, doffset, 0, 0, src, 0, &box)) {
-                               /* DONE. */
-                       } else {
-                               ctx->resource_copy_region(ctx, dst, 0, doffset, 0, 0, src, 0, &box);
-                       }
+                       rctx->dma_copy(ctx, dst, 0, doffset, 0, 0, src, 0, &box);
                 }
                 pipe_resource_reference((struct pipe_resource**)&rtransfer->staging, NULL);
         }
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h

index da953a2ef1ee23a0e7eb723b1ceaf5f6a3a24963..86de6f879aa4e917c34341e75d13b93fe290c7fb 100644 (file)
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -346,15 +346,14 @@ struct r600_common_context {
         boolean                         current_render_cond_cond;
         boolean                         predicate_drawing;
  
-       /* Copy one resource to another using async DMA.
-        * False is returned if the copy couldn't be done. */
-       boolean (*dma_copy)(struct pipe_context *ctx,
-                           struct pipe_resource *dst,
-                           unsigned dst_level,
-                           unsigned dst_x, unsigned dst_y, unsigned dst_z,
-                           struct pipe_resource *src,
-                           unsigned src_level,
-                           const struct pipe_box *src_box);
+       /* Copy one resource to another using async DMA. */
+       void (*dma_copy)(struct pipe_context *ctx,
+                        struct pipe_resource *dst,
+                        unsigned dst_level,
+                        unsigned dst_x, unsigned dst_y, unsigned dst_z,
+                        struct pipe_resource *src,
+                        unsigned src_level,
+                        const struct pipe_box *src_box);
  
         void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
                              unsigned offset, unsigned size, unsigned value);
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c

index 8eb48678f07f5696cde64821a9f8bc0856675f92..be10b46c0f19c8781bb076f398fee2d32b9cec4b 100644 (file)
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -80,12 +80,8 @@ static void r600_copy_to_staging_texture(struct pipe_context *ctx, struct r600_t
                 return;
         }
  
-       if (!rctx->dma_copy(ctx, dst, 0, 0, 0, 0,
-                             src, transfer->level,
-                             &transfer->box)) {
-               ctx->resource_copy_region(ctx, dst, 0, 0, 0, 0,
-                                         src, transfer->level, &transfer->box);
-       }
+       rctx->dma_copy(ctx, dst, 0, 0, 0, 0, src, transfer->level,
+                      &transfer->box);
  }
  
  /* Copy from a transfer's staging texture to a full GPU one. */
@@ -106,13 +102,9 @@ static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600
                 return;
         }
  
-       if (!rctx->dma_copy(ctx, dst, transfer->level,
-                             transfer->box.x, transfer->box.y, transfer->box.z,
-                             src, 0, &sbox)) {
-               ctx->resource_copy_region(ctx, dst, transfer->level,
-                                         transfer->box.x, transfer->box.y, transfer->box.z,
-                                         src, 0, &sbox);
-       }
+       rctx->dma_copy(ctx, dst, transfer->level,
+                      transfer->box.x, transfer->box.y, transfer->box.z,
+                      src, 0, &sbox);
  }
  
  static unsigned r600_texture_get_offset(struct r600_texture *rtex, unsigned level,
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c

index 8dd2885f70b234f3d5c0d346117980f26f38002d..384333093bd6b4a84d57cbf5438e11c5dc683ee8 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2926,16 +2926,19 @@ static void *si_create_blend_custom(struct si_context *sctx, unsigned mode)
         return si_create_blend_state_mode(&sctx->b.b, &blend, mode);
  }
  
-static boolean si_dma_copy(struct pipe_context *ctx,
-                          struct pipe_resource *dst,
-                          unsigned dst_level,
-                          unsigned dst_x, unsigned dst_y, unsigned dst_z,
-                          struct pipe_resource *src,
-                          unsigned src_level,
-                          const struct pipe_box *src_box)
+static void si_dma_copy(struct pipe_context *ctx,
+                       struct pipe_resource *dst,
+                       unsigned dst_level,
+                       unsigned dst_x, unsigned dst_y, unsigned dst_z,
+                       struct pipe_resource *src,
+                       unsigned src_level,
+                       const struct pipe_box *src_box)
  {
         /* XXX implement this or share evergreen_dma_blit with r600g */
-       return FALSE;
+
+       /* Fallback: */
+       ctx->resource_copy_region(ctx, dst, dst_level, dst_x, dst_y, dst_z,
+                                 src, src_level, src_box);
  }
  
  static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable)
author	Marek Olšák <marek.olsak@amd.com>
	Sat, 8 Mar 2014 14:15:41 +0000 (15:15 +0100)
committer	Marek Olšák <marek.olsak@amd.com>
	Tue, 11 Mar 2014 18:18:02 +0000 (19:18 +0100)
src/gallium/drivers/r600/evergreen_state.c		patch \| blob \| history
src/gallium/drivers/r600/r600_state.c		patch \| blob \| history
src/gallium/drivers/radeon/r600_buffer_common.c		patch \| blob \| history
src/gallium/drivers/radeon/r600_pipe_common.h		patch \| blob \| history
src/gallium/drivers/radeon/r600_texture.c		patch \| blob \| history
src/gallium/drivers/radeonsi/si_state.c		patch \| blob \| history