gallium/radeon: add RADEON_USAGE_SYNCHRONIZED
authorNicolai Hähnle <nicolai.haehnle@amd.com>
Wed, 7 Sep 2016 08:57:56 +0000 (10:57 +0200)
committerNicolai Hähnle <nicolai.haehnle@amd.com>
Tue, 27 Sep 2016 14:45:02 +0000 (16:45 +0200)
This is really the behavior we want most of the time, but having a
SYNCHRONIZED flag instead of an UNSYNCHRONIZED one has the advantage that
OR'ing different flags together always results in stronger guarantees.

The parent BOs of sub-allocated buffers will be added unsynchronized.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
src/gallium/drivers/r300/r300_emit.c
src/gallium/drivers/radeon/r600_cs.h
src/gallium/drivers/radeon/radeon_uvd.c
src/gallium/drivers/radeon/radeon_vce.c
src/gallium/drivers/radeon/radeon_winsys.h

index 95971de0e6c2bd48be7403f3a31ceefa6df32d99..671aa623bd3f8e805ab8102ecb574f207361b0e0 100644 (file)
@@ -1321,7 +1321,7 @@ validate:
             tex = r300_resource(fb->cbufs[i]->texture);
             assert(tex && tex->buf && "cbuf is marked, but NULL!");
             r300->rws->cs_add_buffer(r300->cs, tex->buf,
-                                    RADEON_USAGE_READWRITE,
+                                    RADEON_USAGE_READWRITE | RADEON_USAGE_SYNCHRONIZED,
                                     r300_surface(fb->cbufs[i])->domain,
                                     tex->b.b.nr_samples > 1 ?
                                     RADEON_PRIO_COLOR_BUFFER_MSAA :
@@ -1332,7 +1332,7 @@ validate:
             tex = r300_resource(fb->zsbuf->texture);
             assert(tex && tex->buf && "zsbuf is marked, but NULL!");
             r300->rws->cs_add_buffer(r300->cs, tex->buf,
-                                    RADEON_USAGE_READWRITE,
+                                    RADEON_USAGE_READWRITE | RADEON_USAGE_SYNCHRONIZED,
                                     r300_surface(fb->zsbuf)->domain,
                                     tex->b.b.nr_samples > 1 ?
                                     RADEON_PRIO_DEPTH_BUFFER_MSAA :
@@ -1343,7 +1343,7 @@ validate:
     if (r300->aa_state.dirty) {
         if (aa->dest) {
             r300->rws->cs_add_buffer(r300->cs, aa->dest->buf,
-                                    RADEON_USAGE_WRITE,
+                                    RADEON_USAGE_WRITE | RADEON_USAGE_SYNCHRONIZED,
                                     aa->dest->domain,
                                     RADEON_PRIO_COLOR_BUFFER);
         }
@@ -1356,19 +1356,22 @@ validate:
             }
 
             tex = r300_resource(texstate->sampler_views[i]->base.texture);
-            r300->rws->cs_add_buffer(r300->cs, tex->buf, RADEON_USAGE_READ,
+            r300->rws->cs_add_buffer(r300->cs, tex->buf,
+                                     RADEON_USAGE_READ | RADEON_USAGE_SYNCHRONIZED,
                                     tex->domain, RADEON_PRIO_SAMPLER_TEXTURE);
         }
     }
     /* ...occlusion query buffer... */
     if (r300->query_current)
         r300->rws->cs_add_buffer(r300->cs, r300->query_current->buf,
-                                RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT,
+                                 RADEON_USAGE_WRITE | RADEON_USAGE_SYNCHRONIZED,
+                                 RADEON_DOMAIN_GTT,
                                 RADEON_PRIO_QUERY);
     /* ...vertex buffer for SWTCL path... */
     if (r300->vbo)
         r300->rws->cs_add_buffer(r300->cs, r300->vbo,
-                                RADEON_USAGE_READ, RADEON_DOMAIN_GTT,
+                                 RADEON_USAGE_READ | RADEON_USAGE_SYNCHRONIZED,
+                                 RADEON_DOMAIN_GTT,
                                 RADEON_PRIO_VERTEX_BUFFER);
     /* ...vertex buffers for HWTCL path... */
     if (do_validate_vertex_buffers && r300->vertex_arrays_dirty) {
@@ -1383,7 +1386,7 @@ validate:
                 continue;
 
             r300->rws->cs_add_buffer(r300->cs, r300_resource(buf)->buf,
-                                    RADEON_USAGE_READ,
+                                    RADEON_USAGE_READ | RADEON_USAGE_SYNCHRONIZED,
                                     r300_resource(buf)->domain,
                                     RADEON_PRIO_SAMPLER_BUFFER);
         }
@@ -1391,7 +1394,7 @@ validate:
     /* ...and index buffer for HWTCL path. */
     if (index_buffer)
         r300->rws->cs_add_buffer(r300->cs, r300_resource(index_buffer)->buf,
-                                RADEON_USAGE_READ,
+                                RADEON_USAGE_READ | RADEON_USAGE_SYNCHRONIZED,
                                 r300_resource(index_buffer)->domain,
                                 RADEON_PRIO_INDEX_BUFFER);
 
index 6c15df88807fe6db0d969ff0aa46535d395909e5..28bdf15b8ad046b26c29955795552fb332a1e036 100644 (file)
@@ -73,8 +73,10 @@ static inline unsigned radeon_add_to_buffer_list(struct r600_common_context *rct
                                                 enum radeon_bo_priority priority)
 {
        assert(usage);
-       return rctx->ws->cs_add_buffer(ring->cs, rbo->buf, usage,
-                                     rbo->domains, priority) * 4;
+       return rctx->ws->cs_add_buffer(
+               ring->cs, rbo->buf,
+               (enum radeon_bo_usage)(usage | RADEON_USAGE_SYNCHRONIZED),
+               rbo->domains, priority) * 4;
 }
 
 /**
index d5d654ac4a3ee35a04b804740a92674d3d9935da..3ae0eaa6e44cb3314da57ecc60e5ad86aeae95ed 100644 (file)
@@ -113,7 +113,8 @@ static void send_cmd(struct ruvd_decoder *dec, unsigned cmd,
 {
        int reloc_idx;
 
-       reloc_idx = dec->ws->cs_add_buffer(dec->cs, buf, usage, domain,
+       reloc_idx = dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED,
+                                          domain,
                                          RADEON_PRIO_UVD);
        if (!dec->use_legacy) {
                uint64_t addr;
index 8504d93d9b84cb5282073ac8a3dee029223bb251..10c5a78dd3375e5c90d136bd62a2a0e137ac431c 100644 (file)
@@ -540,7 +540,8 @@ void rvce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf,
 {
        int reloc_idx;
 
-       reloc_idx = enc->ws->cs_add_buffer(enc->cs, buf, usage, domain, RADEON_PRIO_VCE);
+       reloc_idx = enc->ws->cs_add_buffer(enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED,
+                                          domain, RADEON_PRIO_VCE);
        if (enc->use_vm) {
                uint64_t addr;
                addr = enc->ws->buffer_get_virtual_address(buf);
index 91f6e898e07cb3df0a1d431348052ce9b3ef630f..809a2032f7d660f7c883dafa0fe8ac40024ce779 100644 (file)
@@ -57,7 +57,12 @@ enum radeon_bo_flag { /* bitfield */
 enum radeon_bo_usage { /* bitfield */
     RADEON_USAGE_READ = 2,
     RADEON_USAGE_WRITE = 4,
-    RADEON_USAGE_READWRITE = RADEON_USAGE_READ | RADEON_USAGE_WRITE
+    RADEON_USAGE_READWRITE = RADEON_USAGE_READ | RADEON_USAGE_WRITE,
+
+    /* The winsys ensures that the CS submission will be scheduled after
+     * previously flushed CSs referencing this BO in a conflicting way.
+     */
+    RADEON_USAGE_SYNCHRONIZED = 8
 };
 
 enum ring_type {