From e703f71ebdf91938c83f47c898f1da058ce0ac32 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Nicolai=20H=C3=A4hnle?= Date: Wed, 7 Sep 2016 10:57:56 +0200 Subject: [PATCH] gallium/radeon: add RADEON_USAGE_SYNCHRONIZED MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This is really the behavior we want most of the time, but having a SYNCHRONIZED flag instead of an UNSYNCHRONIZED one has the advantage that OR'ing different flags together always results in stronger guarantees. The parent BOs of sub-allocated buffers will be added unsynchronized. Reviewed-by: Marek Olšák --- src/gallium/drivers/r300/r300_emit.c | 19 +++++++++++-------- src/gallium/drivers/radeon/r600_cs.h | 6 ++++-- src/gallium/drivers/radeon/radeon_uvd.c | 3 ++- src/gallium/drivers/radeon/radeon_vce.c | 3 ++- src/gallium/drivers/radeon/radeon_winsys.h | 7 ++++++- 5 files changed, 25 insertions(+), 13 deletions(-) diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 95971de0e6c..671aa623bd3 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -1321,7 +1321,7 @@ validate: tex = r300_resource(fb->cbufs[i]->texture); assert(tex && tex->buf && "cbuf is marked, but NULL!"); r300->rws->cs_add_buffer(r300->cs, tex->buf, - RADEON_USAGE_READWRITE, + RADEON_USAGE_READWRITE | RADEON_USAGE_SYNCHRONIZED, r300_surface(fb->cbufs[i])->domain, tex->b.b.nr_samples > 1 ? RADEON_PRIO_COLOR_BUFFER_MSAA : @@ -1332,7 +1332,7 @@ validate: tex = r300_resource(fb->zsbuf->texture); assert(tex && tex->buf && "zsbuf is marked, but NULL!"); r300->rws->cs_add_buffer(r300->cs, tex->buf, - RADEON_USAGE_READWRITE, + RADEON_USAGE_READWRITE | RADEON_USAGE_SYNCHRONIZED, r300_surface(fb->zsbuf)->domain, tex->b.b.nr_samples > 1 ? RADEON_PRIO_DEPTH_BUFFER_MSAA : @@ -1343,7 +1343,7 @@ validate: if (r300->aa_state.dirty) { if (aa->dest) { r300->rws->cs_add_buffer(r300->cs, aa->dest->buf, - RADEON_USAGE_WRITE, + RADEON_USAGE_WRITE | RADEON_USAGE_SYNCHRONIZED, aa->dest->domain, RADEON_PRIO_COLOR_BUFFER); } @@ -1356,19 +1356,22 @@ validate: } tex = r300_resource(texstate->sampler_views[i]->base.texture); - r300->rws->cs_add_buffer(r300->cs, tex->buf, RADEON_USAGE_READ, + r300->rws->cs_add_buffer(r300->cs, tex->buf, + RADEON_USAGE_READ | RADEON_USAGE_SYNCHRONIZED, tex->domain, RADEON_PRIO_SAMPLER_TEXTURE); } } /* ...occlusion query buffer... */ if (r300->query_current) r300->rws->cs_add_buffer(r300->cs, r300->query_current->buf, - RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT, + RADEON_USAGE_WRITE | RADEON_USAGE_SYNCHRONIZED, + RADEON_DOMAIN_GTT, RADEON_PRIO_QUERY); /* ...vertex buffer for SWTCL path... */ if (r300->vbo) r300->rws->cs_add_buffer(r300->cs, r300->vbo, - RADEON_USAGE_READ, RADEON_DOMAIN_GTT, + RADEON_USAGE_READ | RADEON_USAGE_SYNCHRONIZED, + RADEON_DOMAIN_GTT, RADEON_PRIO_VERTEX_BUFFER); /* ...vertex buffers for HWTCL path... */ if (do_validate_vertex_buffers && r300->vertex_arrays_dirty) { @@ -1383,7 +1386,7 @@ validate: continue; r300->rws->cs_add_buffer(r300->cs, r300_resource(buf)->buf, - RADEON_USAGE_READ, + RADEON_USAGE_READ | RADEON_USAGE_SYNCHRONIZED, r300_resource(buf)->domain, RADEON_PRIO_SAMPLER_BUFFER); } @@ -1391,7 +1394,7 @@ validate: /* ...and index buffer for HWTCL path. */ if (index_buffer) r300->rws->cs_add_buffer(r300->cs, r300_resource(index_buffer)->buf, - RADEON_USAGE_READ, + RADEON_USAGE_READ | RADEON_USAGE_SYNCHRONIZED, r300_resource(index_buffer)->domain, RADEON_PRIO_INDEX_BUFFER); diff --git a/src/gallium/drivers/radeon/r600_cs.h b/src/gallium/drivers/radeon/r600_cs.h index 6c15df88807..28bdf15b8ad 100644 --- a/src/gallium/drivers/radeon/r600_cs.h +++ b/src/gallium/drivers/radeon/r600_cs.h @@ -73,8 +73,10 @@ static inline unsigned radeon_add_to_buffer_list(struct r600_common_context *rct enum radeon_bo_priority priority) { assert(usage); - return rctx->ws->cs_add_buffer(ring->cs, rbo->buf, usage, - rbo->domains, priority) * 4; + return rctx->ws->cs_add_buffer( + ring->cs, rbo->buf, + (enum radeon_bo_usage)(usage | RADEON_USAGE_SYNCHRONIZED), + rbo->domains, priority) * 4; } /** diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c index d5d654ac4a3..3ae0eaa6e44 100644 --- a/src/gallium/drivers/radeon/radeon_uvd.c +++ b/src/gallium/drivers/radeon/radeon_uvd.c @@ -113,7 +113,8 @@ static void send_cmd(struct ruvd_decoder *dec, unsigned cmd, { int reloc_idx; - reloc_idx = dec->ws->cs_add_buffer(dec->cs, buf, usage, domain, + reloc_idx = dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, + domain, RADEON_PRIO_UVD); if (!dec->use_legacy) { uint64_t addr; diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c index 8504d93d9b8..10c5a78dd33 100644 --- a/src/gallium/drivers/radeon/radeon_vce.c +++ b/src/gallium/drivers/radeon/radeon_vce.c @@ -540,7 +540,8 @@ void rvce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf, { int reloc_idx; - reloc_idx = enc->ws->cs_add_buffer(enc->cs, buf, usage, domain, RADEON_PRIO_VCE); + reloc_idx = enc->ws->cs_add_buffer(enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, + domain, RADEON_PRIO_VCE); if (enc->use_vm) { uint64_t addr; addr = enc->ws->buffer_get_virtual_address(buf); diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index 91f6e898e07..809a2032f7d 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -57,7 +57,12 @@ enum radeon_bo_flag { /* bitfield */ enum radeon_bo_usage { /* bitfield */ RADEON_USAGE_READ = 2, RADEON_USAGE_WRITE = 4, - RADEON_USAGE_READWRITE = RADEON_USAGE_READ | RADEON_USAGE_WRITE + RADEON_USAGE_READWRITE = RADEON_USAGE_READ | RADEON_USAGE_WRITE, + + /* The winsys ensures that the CS submission will be scheduled after + * previously flushed CSs referencing this BO in a conflicting way. + */ + RADEON_USAGE_SYNCHRONIZED = 8 }; enum ring_type { -- 2.30.2