From 4ca3486b195653f875003d335921fd4e7d7c2c4a Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 8 Mar 2014 15:15:41 +0100 Subject: [PATCH] r600g,radeonsi: use a fallback in dma_copy instead of failing v2: - allow byte-aligned DMA buffer copies on Evergreen - fix piglit/texsubimage regression - use the fallback for 3D copies (depth > 1) as well --- src/gallium/drivers/r600/evergreen_state.c | 40 +++++++------ src/gallium/drivers/r600/r600_state.c | 44 ++++++++------ .../drivers/radeon/r600_buffer_common.c | 58 +++++++++---------- src/gallium/drivers/radeon/r600_pipe_common.h | 17 +++--- src/gallium/drivers/radeon/r600_texture.c | 18 ++---- src/gallium/drivers/radeonsi/si_state.c | 19 +++--- 6 files changed, 99 insertions(+), 97 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 18c2ee59b87..05cc3ef4cc2 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -3325,13 +3325,13 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx, } } -static boolean evergreen_dma_blit(struct pipe_context *ctx, - struct pipe_resource *dst, - unsigned dst_level, - unsigned dst_x, unsigned dst_y, unsigned dst_z, - struct pipe_resource *src, - unsigned src_level, - const struct pipe_box *src_box) +static void evergreen_dma_blit(struct pipe_context *ctx, + struct pipe_resource *dst, + unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + unsigned src_level, + const struct pipe_box *src_box) { struct r600_context *rctx = (struct r600_context *)ctx; struct r600_texture *rsrc = (struct r600_texture*)src; @@ -3339,22 +3339,22 @@ static boolean evergreen_dma_blit(struct pipe_context *ctx, unsigned dst_pitch, src_pitch, bpp, dst_mode, src_mode, copy_height; unsigned src_w, dst_w; unsigned src_x, src_y; + unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz; if (rctx->b.rings.dma.cs == NULL) { - return FALSE; + goto fallback; } if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { evergreen_dma_copy(rctx, dst, src, dst_x, src_box->x, src_box->width); - return TRUE; + return; } - if (src->format != dst->format) { - return FALSE; - } - if (rdst->dirty_level_mask != 0) { - return FALSE; + if (src->format != dst->format || src_box->depth > 1 || + rdst->dirty_level_mask != 0) { + goto fallback; } + if (rsrc->dirty_level_mask) { ctx->flush_resource(ctx, src); } @@ -3379,13 +3379,13 @@ static boolean evergreen_dma_blit(struct pipe_context *ctx, if (src_pitch != dst_pitch || src_box->x || dst_x || src_w != dst_w) { /* FIXME evergreen can do partial blit */ - return FALSE; + goto fallback; } /* the x test here are currently useless (because we don't support partial blit) * but keep them around so we don't forget about those */ if ((src_pitch & 0x7) || (src_box->x & 0x7) || (dst_x & 0x7) || (src_box->y & 0x7) || (dst_y & 0x7)) { - return FALSE; + goto fallback; } /* 128 bpp surfaces require non_disp_tiling for both @@ -3396,7 +3396,7 @@ static boolean evergreen_dma_blit(struct pipe_context *ctx, if ((rctx->b.chip_class == CAYMAN) && (src_mode != dst_mode) && (util_format_get_blocksize(src->format) >= 16)) { - return FALSE; + goto fallback; } if (src_mode == dst_mode) { @@ -3419,7 +3419,11 @@ static boolean evergreen_dma_blit(struct pipe_context *ctx, src, src_level, src_x, src_y, src_box->z, copy_height, dst_pitch, bpp); } - return TRUE; + return; + +fallback: + ctx->resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box); } void evergreen_init_state_functions(struct r600_context *rctx) diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index e6254c69a2c..39e38f4dd0d 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -2882,13 +2882,13 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx, return TRUE; } -static boolean r600_dma_blit(struct pipe_context *ctx, - struct pipe_resource *dst, - unsigned dst_level, - unsigned dst_x, unsigned dst_y, unsigned dst_z, - struct pipe_resource *src, - unsigned src_level, - const struct pipe_box *src_box) +static void r600_dma_blit(struct pipe_context *ctx, + struct pipe_resource *dst, + unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + unsigned src_level, + const struct pipe_box *src_box) { struct r600_context *rctx = (struct r600_context *)ctx; struct r600_texture *rsrc = (struct r600_texture*)src; @@ -2896,18 +2896,22 @@ static boolean r600_dma_blit(struct pipe_context *ctx, unsigned dst_pitch, src_pitch, bpp, dst_mode, src_mode, copy_height; unsigned src_w, dst_w; unsigned src_x, src_y; + unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz; if (rctx->b.rings.dma.cs == NULL) { - return FALSE; + goto fallback; } if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { + if (dst_x % 4 || src_box->x % 4 || src_box->width % 4) + goto fallback; + r600_dma_copy(rctx, dst, src, dst_x, src_box->x, src_box->width); - return TRUE; + return; } - if (src->format != dst->format) { - return FALSE; + if (src->format != dst->format || src_box->depth > 1) { + goto fallback; } src_x = util_format_get_nblocksx(src->format, src_box->x); @@ -2930,11 +2934,11 @@ static boolean r600_dma_blit(struct pipe_context *ctx, if (src_pitch != dst_pitch || src_box->x || dst_x || src_w != dst_w) { /* strick requirement on r6xx/r7xx */ - return FALSE; + goto fallback; } /* lot of constraint on alignment this should capture them all */ if ((src_pitch & 0x7) || (src_box->y & 0x7) || (dst_y & 0x7)) { - return FALSE; + goto fallback; } if (src_mode == dst_mode) { @@ -2954,15 +2958,21 @@ static boolean r600_dma_blit(struct pipe_context *ctx, size = src_box->height * src_pitch; /* must be dw aligned */ if ((dst_offset & 0x3) || (src_offset & 0x3) || (size & 0x3)) { - return FALSE; + goto fallback; } r600_dma_copy(rctx, dst, src, dst_offset, src_offset, size); } else { - return r600_dma_copy_tile(rctx, dst, dst_level, dst_x, dst_y, dst_z, + if (!r600_dma_copy_tile(rctx, dst, dst_level, dst_x, dst_y, dst_z, src, src_level, src_x, src_y, src_box->z, - copy_height, dst_pitch, bpp); + copy_height, dst_pitch, bpp)) { + goto fallback; + } } - return TRUE; + return; + +fallback: + ctx->resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box); } void r600_init_state_functions(struct r600_context *rctx) diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c index 90ca8cb35e6..a7ecfb3e440 100644 --- a/src/gallium/drivers/radeon/r600_buffer_common.c +++ b/src/gallium/drivers/radeon/r600_buffer_common.c @@ -190,6 +190,17 @@ static void *r600_buffer_get_transfer(struct pipe_context *ctx, return data; } +static bool r600_can_dma_copy_buffer(struct r600_common_context *rctx, + unsigned dstx, unsigned srcx, unsigned size) +{ + bool dword_aligned = !(dstx % 4) && !(srcx % 4) && !(size % 4); + + return rctx->screen->has_cp_dma || + (dword_aligned && (rctx->rings.dma.cs || + rctx->screen->has_streamout)); + +} + static void *r600_buffer_transfer_map(struct pipe_context *ctx, struct pipe_resource *resource, unsigned level, @@ -233,10 +244,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx, else if ((usage & PIPE_TRANSFER_DISCARD_RANGE) && !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) && !(rscreen->debug_flags & DBG_NO_DISCARD_RANGE) && - (rscreen->has_cp_dma || - (rscreen->has_streamout && - /* The buffer range must be aligned to 4 with streamout. */ - box->x % 4 == 0 && box->width % 4 == 0))) { + r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) { assert(usage & PIPE_TRANSFER_WRITE); /* Check if mapping this buffer would cause waiting for the GPU. */ @@ -260,10 +268,11 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx, /* At this point, the buffer is always idle (we checked it above). */ usage |= PIPE_TRANSFER_UNSYNCHRONIZED; } - /* Using DMA for larger reads is much faster */ + /* Using a staging buffer in GTT for larger reads is much faster. */ else if ((usage & PIPE_TRANSFER_READ) && !(usage & PIPE_TRANSFER_WRITE) && - (rbuffer->domains == RADEON_DOMAIN_VRAM)) { + rbuffer->domains == RADEON_DOMAIN_VRAM && + r600_can_dma_copy_buffer(rctx, 0, box->x, box->width)) { unsigned offset; struct r600_resource *staging = NULL; @@ -274,26 +283,16 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx, if (staging) { data += box->x % R600_MAP_BUFFER_ALIGNMENT; - /* Copy the staging buffer into the original one. */ - if (rctx->dma_copy(ctx, (struct pipe_resource*)staging, 0, - box->x % R600_MAP_BUFFER_ALIGNMENT, - 0, 0, resource, level, box)) { - rctx->rings.gfx.flush(rctx, 0); - if (rctx->rings.dma.cs) - rctx->rings.dma.flush(rctx, 0); - - /* Wait for any offloaded CS flush to complete - * to avoid busy-waiting in the winsys. */ - rctx->ws->cs_sync_flush(rctx->rings.gfx.cs); - if (rctx->rings.dma.cs) - rctx->ws->cs_sync_flush(rctx->rings.dma.cs); - - rctx->ws->buffer_wait(staging->buf, RADEON_USAGE_WRITE); - return r600_buffer_get_transfer(ctx, resource, level, usage, box, - ptransfer, data, staging, offset); - } else { - pipe_resource_reference((struct pipe_resource**)&staging, NULL); - } + /* Copy the VRAM buffer to the staging buffer. */ + rctx->dma_copy(ctx, &staging->b.b, 0, + box->x % R600_MAP_BUFFER_ALIGNMENT, + 0, 0, resource, level, box); + + /* Just do the synchronization. The buffer is mapped already. */ + r600_buffer_map_sync_with_rings(rctx, staging, PIPE_TRANSFER_READ); + + return r600_buffer_get_transfer(ctx, resource, level, usage, box, + ptransfer, data, staging, offset); } } @@ -329,12 +328,7 @@ static void r600_buffer_transfer_unmap(struct pipe_context *ctx, u_box_1d(soffset, size, &box); /* Copy the staging buffer into the original one. */ - if (!(size % 4) && !(doffset % 4) && !(soffset % 4) && - rctx->dma_copy(ctx, dst, 0, doffset, 0, 0, src, 0, &box)) { - /* DONE. */ - } else { - ctx->resource_copy_region(ctx, dst, 0, doffset, 0, 0, src, 0, &box); - } + rctx->dma_copy(ctx, dst, 0, doffset, 0, 0, src, 0, &box); } pipe_resource_reference((struct pipe_resource**)&rtransfer->staging, NULL); } diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index da953a2ef1e..86de6f879aa 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -346,15 +346,14 @@ struct r600_common_context { boolean current_render_cond_cond; boolean predicate_drawing; - /* Copy one resource to another using async DMA. - * False is returned if the copy couldn't be done. */ - boolean (*dma_copy)(struct pipe_context *ctx, - struct pipe_resource *dst, - unsigned dst_level, - unsigned dst_x, unsigned dst_y, unsigned dst_z, - struct pipe_resource *src, - unsigned src_level, - const struct pipe_box *src_box); + /* Copy one resource to another using async DMA. */ + void (*dma_copy)(struct pipe_context *ctx, + struct pipe_resource *dst, + unsigned dst_level, + unsigned dst_x, unsigned dst_y, unsigned dst_z, + struct pipe_resource *src, + unsigned src_level, + const struct pipe_box *src_box); void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst, unsigned offset, unsigned size, unsigned value); diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c index 8eb48678f07..be10b46c0f1 100644 --- a/src/gallium/drivers/radeon/r600_texture.c +++ b/src/gallium/drivers/radeon/r600_texture.c @@ -80,12 +80,8 @@ static void r600_copy_to_staging_texture(struct pipe_context *ctx, struct r600_t return; } - if (!rctx->dma_copy(ctx, dst, 0, 0, 0, 0, - src, transfer->level, - &transfer->box)) { - ctx->resource_copy_region(ctx, dst, 0, 0, 0, 0, - src, transfer->level, &transfer->box); - } + rctx->dma_copy(ctx, dst, 0, 0, 0, 0, src, transfer->level, + &transfer->box); } /* Copy from a transfer's staging texture to a full GPU one. */ @@ -106,13 +102,9 @@ static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600 return; } - if (!rctx->dma_copy(ctx, dst, transfer->level, - transfer->box.x, transfer->box.y, transfer->box.z, - src, 0, &sbox)) { - ctx->resource_copy_region(ctx, dst, transfer->level, - transfer->box.x, transfer->box.y, transfer->box.z, - src, 0, &sbox); - } + rctx->dma_copy(ctx, dst, transfer->level, + transfer->box.x, transfer->box.y, transfer->box.z, + src, 0, &sbox); } static unsigned r600_texture_get_offset(struct r600_texture *rtex, unsigned level, diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 8dd2885f70b..384333093bd 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2926,16 +2926,19 @@ static void *si_create_blend_custom(struct si_context *sctx, unsigned mode) return si_create_blend_state_mode(&sctx->b.b, &blend, mode); } -static boolean si_dma_copy(struct pipe_context *ctx, - struct pipe_resource *dst, - unsigned dst_level, - unsigned dst_x, unsigned dst_y, unsigned dst_z, - struct pipe_resource *src, - unsigned src_level, - const struct pipe_box *src_box) +static void si_dma_copy(struct pipe_context *ctx, + struct pipe_resource *dst, + unsigned dst_level, + unsigned dst_x, unsigned dst_y, unsigned dst_z, + struct pipe_resource *src, + unsigned src_level, + const struct pipe_box *src_box) { /* XXX implement this or share evergreen_dma_blit with r600g */ - return FALSE; + + /* Fallback: */ + ctx->resource_copy_region(ctx, dst, dst_level, dst_x, dst_y, dst_z, + src, src_level, src_box); } static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable) -- 2.30.2