From bb74152597de44ee877b8928587b1cece8b49656 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 28 Apr 2016 16:32:39 +0200 Subject: [PATCH] gallium/radeon: flush if DMA IB memory usage is too high MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This prevents IB rejections due to insane memory usage from many concecutive texture uploads. Reviewed-by: Alex Deucher Reviewed-by: Nicolai Hähnle --- .../drivers/r600/evergreen_hw_context.c | 2 +- src/gallium/drivers/r600/evergreen_state.c | 2 +- src/gallium/drivers/r600/r600_hw_context.c | 2 +- src/gallium/drivers/r600/r600_state.c | 2 +- src/gallium/drivers/radeon/r600_pipe_common.c | 27 ++++++++++++++++--- src/gallium/drivers/radeon/r600_pipe_common.h | 3 ++- src/gallium/drivers/radeonsi/cik_sdma.c | 8 +++--- src/gallium/drivers/radeonsi/si_dma.c | 4 +-- 8 files changed, 35 insertions(+), 15 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c index c2dba8c1996..cd073191cdf 100644 --- a/src/gallium/drivers/r600/evergreen_hw_context.c +++ b/src/gallium/drivers/r600/evergreen_hw_context.c @@ -60,7 +60,7 @@ void evergreen_dma_copy_buffer(struct r600_context *rctx, } ncopy = (size / EG_DMA_COPY_MAX_SIZE) + !!(size % EG_DMA_COPY_MAX_SIZE); - r600_need_dma_space(&rctx->b, ncopy * 5); + r600_need_dma_space(&rctx->b, ncopy * 5, rdst, rsrc); for (i = 0; i < ncopy; i++) { csize = size < EG_DMA_COPY_MAX_SIZE ? size : EG_DMA_COPY_MAX_SIZE; /* emit reloc before writing cs so that cs is always in consistent state */ diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index acf60c6d8d1..62152c03f7e 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -3442,7 +3442,7 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx, size = (copy_height * pitch) / 4; ncopy = (size / EG_DMA_COPY_MAX_SIZE) + !!(size % EG_DMA_COPY_MAX_SIZE); - r600_need_dma_space(&rctx->b, ncopy * 9); + r600_need_dma_space(&rctx->b, ncopy * 9, &rdst->resource, &rsrc->resource); for (i = 0; i < ncopy; i++) { cheight = copy_height; diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index fa1028be7af..857da7f3704 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -467,7 +467,7 @@ void r600_dma_copy_buffer(struct r600_context *rctx, size >>= 2; /* convert to dwords */ ncopy = (size / R600_DMA_COPY_MAX_SIZE_DW) + !!(size % R600_DMA_COPY_MAX_SIZE_DW); - r600_need_dma_space(&rctx->b, ncopy * 5); + r600_need_dma_space(&rctx->b, ncopy * 5, rdst, rsrc); for (i = 0; i < ncopy; i++) { csize = size < R600_DMA_COPY_MAX_SIZE_DW ? size : R600_DMA_COPY_MAX_SIZE_DW; /* emit reloc before writing cs so that cs is always in consistent state */ diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 715c6f11145..ab0cf5cb69c 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -2918,7 +2918,7 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx, */ cheight = ((R600_DMA_COPY_MAX_SIZE_DW * 4) / pitch) & 0xfffffff8; ncopy = (copy_height / cheight) + !!(copy_height % cheight); - r600_need_dma_space(&rctx->b, ncopy * 7); + r600_need_dma_space(&rctx->b, ncopy * 7, &rdst->resource, &rsrc->resource); for (i = 0; i < ncopy; i++) { cheight = cheight > copy_height ? copy_height : cheight; diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index 3e15b143700..ce8d76963a9 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -136,14 +136,33 @@ void r600_draw_rectangle(struct blitter_context *blitter, pipe_resource_reference(&buf, NULL); } -void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw) +void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw, + struct r600_resource *dst, struct r600_resource *src) { + uint64_t vram = 0, gtt = 0; + + if (dst) { + if (dst->domains & RADEON_DOMAIN_VRAM) + vram += dst->buf->size; + else if (dst->domains & RADEON_DOMAIN_GTT) + gtt += dst->buf->size; + } + if (src) { + if (src->domains & RADEON_DOMAIN_VRAM) + vram += src->buf->size; + else if (src->domains & RADEON_DOMAIN_GTT) + gtt += src->buf->size; + } + /* Flush the GFX IB if it's not empty. */ if (ctx->gfx.cs->cdw > ctx->initial_gfx_cs_size) ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL); - /* Flush if there's not enough space. */ - if ((num_dw + ctx->dma.cs->cdw) > ctx->dma.cs->max_dw) { + /* Flush if there's not enough space, or if the memory usage per IB + * is too large. + */ + if ((num_dw + ctx->dma.cs->cdw) > ctx->dma.cs->max_dw || + !ctx->ws->cs_memory_below_limit(ctx->dma.cs, vram, gtt)) { ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL); assert((num_dw + ctx->dma.cs->cdw) <= ctx->dma.cs->max_dw); } @@ -157,7 +176,7 @@ void r600_dma_emit_wait_idle(struct r600_common_context *rctx) /* done at the end of DMA calls, so increment this. */ rctx->num_dma_calls++; - r600_need_dma_space(rctx, 1); + r600_need_dma_space(rctx, 1, NULL, NULL); if (cs->cdw == 0) /* empty queue */ return; diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 03449536b5b..3e54534e79e 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -597,7 +597,8 @@ void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_re struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen, const struct pipe_resource *templ); const char *r600_get_llvm_processor_name(enum radeon_family family); -void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw); +void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw, + struct r600_resource *dst, struct r600_resource *src); void r600_dma_emit_wait_idle(struct r600_common_context *rctx); /* r600_gpu_load.c */ diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c index 396cecc4b44..c68b858b14c 100644 --- a/src/gallium/drivers/radeonsi/cik_sdma.c +++ b/src/gallium/drivers/radeonsi/cik_sdma.c @@ -47,7 +47,7 @@ static void cik_sdma_do_copy_buffer(struct si_context *ctx, src_offset += r600_resource(src)->gpu_address; ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE); - r600_need_dma_space(&ctx->b, ncopy * 7); + r600_need_dma_space(&ctx->b, ncopy * 7, rdst, rsrc); radeon_add_to_buffer_list(&ctx->b, &ctx->b.dma, rsrc, RADEON_USAGE_READ, RADEON_PRIO_SDMA_BUFFER); @@ -212,7 +212,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx, srcy + copy_height != (1 << 14)))) { struct radeon_winsys_cs *cs = sctx->b.dma.cs; - r600_need_dma_space(&sctx->b, 13); + r600_need_dma_space(&sctx->b, 13, &rdst->resource, &rsrc->resource); radeon_add_to_buffer_list(&sctx->b, &sctx->b.dma, &rsrc->resource, RADEON_USAGE_READ, RADEON_PRIO_SDMA_TEXTURE); @@ -382,7 +382,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx, copy_depth <= (1 << 11)) { struct radeon_winsys_cs *cs = sctx->b.dma.cs; - r600_need_dma_space(&sctx->b, 14); + r600_need_dma_space(&sctx->b, 14, &rdst->resource, &rsrc->resource); radeon_add_to_buffer_list(&sctx->b, &sctx->b.dma, &rsrc->resource, RADEON_USAGE_READ, RADEON_PRIO_SDMA_TEXTURE); @@ -484,7 +484,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx, dstx + copy_width != (1 << 14)))) { struct radeon_winsys_cs *cs = sctx->b.dma.cs; - r600_need_dma_space(&sctx->b, 15); + r600_need_dma_space(&sctx->b, 15, &rdst->resource, &rsrc->resource); radeon_add_to_buffer_list(&sctx->b, &sctx->b.dma, &rsrc->resource, RADEON_USAGE_READ, RADEON_PRIO_SDMA_TEXTURE); diff --git a/src/gallium/drivers/radeonsi/si_dma.c b/src/gallium/drivers/radeonsi/si_dma.c index 03321f583a5..25fb4b76812 100644 --- a/src/gallium/drivers/radeonsi/si_dma.c +++ b/src/gallium/drivers/radeonsi/si_dma.c @@ -64,7 +64,7 @@ static void si_dma_copy_buffer(struct si_context *ctx, } ncopy = (size / max_csize) + !!(size % max_csize); - r600_need_dma_space(&ctx->b, ncopy * 5); + r600_need_dma_space(&ctx->b, ncopy * 5, rdst, rsrc); radeon_add_to_buffer_list(&ctx->b, &ctx->b.dma, rsrc, RADEON_USAGE_READ, RADEON_PRIO_SDMA_BUFFER); @@ -161,7 +161,7 @@ static void si_dma_copy_tile(struct si_context *ctx, mt = G_009910_MICRO_TILE_MODE(tile_mode); size = (copy_height * pitch) / 4; ncopy = (size / SI_DMA_COPY_MAX_SIZE_DW) + !!(size % SI_DMA_COPY_MAX_SIZE_DW); - r600_need_dma_space(&ctx->b, ncopy * 9); + r600_need_dma_space(&ctx->b, ncopy * 9, &rdst->resource, &rsrc->resource); radeon_add_to_buffer_list(&ctx->b, &ctx->b.dma, &rsrc->resource, RADEON_USAGE_READ, RADEON_PRIO_SDMA_TEXTURE); -- 2.30.2