radeonsi: set exact shader buffer read/write usage in CS
authorMarek Olšák <marek.olsak@amd.com>
Thu, 28 Feb 2019 18:02:13 +0000 (13:02 -0500)
committerMarek Olšák <marek.olsak@amd.com>
Thu, 4 Apr 2019 23:28:52 +0000 (19:28 -0400)
Reviewed-by: Timothy Arceri <tarceri@itsqueeze.com>
src/gallium/drivers/radeonsi/si_compute_blit.c
src/gallium/drivers/radeonsi/si_descriptors.c
src/gallium/drivers/radeonsi/si_query.c
src/gallium/drivers/radeonsi/si_query.h
src/gallium/drivers/radeonsi/si_state.c
src/gallium/drivers/radeonsi/si_state.h

index d24c2f3493f90c486b3a72ab43165ade9f6576c3..1abeac6adb0cef230b5c4678f8d193241b69a649 100644 (file)
@@ -102,6 +102,13 @@ static void si_compute_do_clear_or_copy(struct si_context *sctx,
        struct pipe_shader_buffer saved_sb[2] = {};
        si_get_shader_buffers(sctx, PIPE_SHADER_COMPUTE, 0, src ? 2 : 1, saved_sb);
 
+       unsigned saved_writable_mask = 0;
+       for (unsigned i = 0; i < (src ? 2 : 1); i++) {
+               if (sctx->const_and_shader_buffers[PIPE_SHADER_COMPUTE].writable_mask &
+                   (1u << si_get_shaderbuf_slot(i)))
+                       saved_writable_mask |= 1 << i;
+       }
+
        /* The memory accesses are coalesced, meaning that the 1st instruction writes
         * the 1st contiguous block of data for the whole wave, the 2nd instruction
         * writes the 2nd contiguous block of data, etc.
@@ -172,7 +179,8 @@ static void si_compute_do_clear_or_copy(struct si_context *sctx,
 
        /* Restore states. */
        ctx->bind_compute_state(ctx, saved_cs);
-       ctx->set_shader_buffers(ctx, PIPE_SHADER_COMPUTE, 0, src ? 2 : 1, saved_sb, ~0);
+       ctx->set_shader_buffers(ctx, PIPE_SHADER_COMPUTE, 0, src ? 2 : 1, saved_sb,
+                               saved_writable_mask);
        si_compute_internal_end(sctx);
 }
 
index c9e7a3dc61d8315d0c8d5f5b63e8c7cd94b68355..244ba5a7becb767b441973c222242a572c89c359 100644 (file)
@@ -993,13 +993,9 @@ static void si_init_buffer_resources(struct si_buffer_resources *buffers,
                                     struct si_descriptors *descs,
                                     unsigned num_buffers,
                                     short shader_userdata_rel_index,
-                                    enum radeon_bo_usage shader_usage,
-                                    enum radeon_bo_usage shader_usage_constbuf,
                                     enum radeon_bo_priority priority,
                                     enum radeon_bo_priority priority_constbuf)
 {
-       buffers->shader_usage = shader_usage;
-       buffers->shader_usage_constbuf = shader_usage_constbuf;
        buffers->priority = priority;
        buffers->priority_constbuf = priority_constbuf;
        buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*));
@@ -1030,8 +1026,8 @@ static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
 
                radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
                        si_resource(buffers->buffers[i]),
-                       i < SI_NUM_SHADER_BUFFERS ? buffers->shader_usage :
-                                                   buffers->shader_usage_constbuf,
+                       buffers->writable_mask & (1u << i) ? RADEON_USAGE_READWRITE :
+                                                            RADEON_USAGE_READ,
                        i < SI_NUM_SHADER_BUFFERS ? buffers->priority :
                                                    buffers->priority_constbuf);
        }
@@ -1258,7 +1254,7 @@ static void si_set_constant_buffer(struct si_context *sctx,
                buffers->buffers[slot] = buffer;
                radeon_add_to_gfx_buffer_list_check_mem(sctx,
                                                        si_resource(buffer),
-                                                       buffers->shader_usage_constbuf,
+                                                       RADEON_USAGE_READ,
                                                        buffers->priority_constbuf, true);
                buffers->enabled_mask |= 1u << slot;
        } else {
@@ -1311,7 +1307,7 @@ static void si_set_shader_buffer(struct si_context *sctx,
                                 struct si_buffer_resources *buffers,
                                 unsigned descriptors_idx,
                                 uint slot, const struct pipe_shader_buffer *sbuffer,
-                                enum radeon_bo_priority priority)
+                                bool writable, enum radeon_bo_priority priority)
 {
        struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
        uint32_t *desc = descs->list + slot * 4;
@@ -1320,6 +1316,7 @@ static void si_set_shader_buffer(struct si_context *sctx,
                pipe_resource_reference(&buffers->buffers[slot], NULL);
                memset(desc, 0, sizeof(uint32_t) * 4);
                buffers->enabled_mask &= ~(1u << slot);
+               buffers->writable_mask &= ~(1u << slot);
                sctx->descriptors_dirty |= 1u << descriptors_idx;
                return;
        }
@@ -1340,8 +1337,13 @@ static void si_set_shader_buffer(struct si_context *sctx,
 
        pipe_resource_reference(&buffers->buffers[slot], &buf->b.b);
        radeon_add_to_gfx_buffer_list_check_mem(sctx, buf,
-                                               buffers->shader_usage,
+                                               writable ? RADEON_USAGE_READWRITE :
+                                                          RADEON_USAGE_READ,
                                                priority, true);
+       if (writable)
+               buffers->writable_mask |= 1u << slot;
+       else
+               buffers->writable_mask &= ~(1u << slot);
 
        buffers->enabled_mask |= 1u << slot;
        sctx->descriptors_dirty |= 1u << descriptors_idx;
@@ -1371,6 +1373,7 @@ static void si_set_shader_buffers(struct pipe_context *ctx,
                        si_resource(sbuffer->buffer)->bind_history |= PIPE_BIND_SHADER_BUFFER;
 
                si_set_shader_buffer(sctx, buffers, descriptors_idx, slot, sbuffer,
+                                    !!(writable_bitmask & (1u << i)),
                                     buffers->priority);
        }
 }
@@ -1405,7 +1408,7 @@ void si_set_rw_shader_buffer(struct si_context *sctx, uint slot,
                             const struct pipe_shader_buffer *sbuffer)
 {
        si_set_shader_buffer(sctx, &sctx->rw_buffers, SI_DESCS_RW_BUFFERS,
-                            slot, sbuffer, RADEON_PRIO_SHADER_RW_BUFFER);
+                            slot, sbuffer, true, RADEON_PRIO_SHADER_RW_BUFFER);
 }
 
 void si_set_ring_buffer(struct si_context *sctx, uint slot,
@@ -1491,7 +1494,7 @@ void si_set_ring_buffer(struct si_context *sctx, uint slot,
                pipe_resource_reference(&buffers->buffers[slot], buffer);
                radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
                                      si_resource(buffer),
-                                     buffers->shader_usage, buffers->priority);
+                                     RADEON_USAGE_READWRITE, buffers->priority);
                buffers->enabled_mask |= 1u << slot;
        } else {
                /* Clear the descriptor. */
@@ -1601,7 +1604,6 @@ static void si_reset_buffer_resources(struct si_context *sctx,
                                      unsigned slot_mask,
                                      struct pipe_resource *buf,
                                      uint64_t old_va,
-                                     enum radeon_bo_usage usage,
                                      enum radeon_bo_priority priority)
 {
        struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
@@ -1616,7 +1618,10 @@ static void si_reset_buffer_resources(struct si_context *sctx,
 
                        radeon_add_to_gfx_buffer_list_check_mem(sctx,
                                                                si_resource(buf),
-                                                               usage, priority, true);
+                                                               buffers->writable_mask & (1u << i) ?
+                                                                       RADEON_USAGE_READWRITE :
+                                                                       RADEON_USAGE_READ,
+                                                               priority, true);
                }
        }
 }
@@ -1670,7 +1675,7 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf,
                        sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
 
                        radeon_add_to_gfx_buffer_list_check_mem(sctx,
-                                                               buffer, buffers->shader_usage,
+                                                               buffer, RADEON_USAGE_WRITE,
                                                                RADEON_PRIO_SHADER_RW_BUFFER,
                                                                true);
 
@@ -1690,7 +1695,6 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf,
                                                  si_const_and_shader_buffer_descriptors_idx(shader),
                                                  u_bit_consecutive(SI_NUM_SHADER_BUFFERS, SI_NUM_CONST_BUFFERS),
                                                  buf, old_va,
-                                                 sctx->const_and_shader_buffers[shader].shader_usage_constbuf,
                                                  sctx->const_and_shader_buffers[shader].priority_constbuf);
        }
 
@@ -1700,7 +1704,6 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf,
                                                  si_const_and_shader_buffer_descriptors_idx(shader),
                                                  u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS),
                                                  buf, old_va,
-                                                 sctx->const_and_shader_buffers[shader].shader_usage,
                                                  sctx->const_and_shader_buffers[shader].priority);
        }
 
@@ -2677,8 +2680,6 @@ void si_init_all_descriptors(struct si_context *sctx)
                desc = si_const_and_shader_buffer_descriptors(sctx, i);
                si_init_buffer_resources(&sctx->const_and_shader_buffers[i], desc,
                                         num_buffer_slots, rel_dw_offset,
-                                        RADEON_USAGE_READWRITE,
-                                        RADEON_USAGE_READ,
                                         RADEON_PRIO_SHADER_RW_BUFFER,
                                         RADEON_PRIO_CONST_BUFFER);
                desc->slot_index_to_bind_directly = si_get_constbuf_slot(0);
@@ -2708,9 +2709,8 @@ void si_init_all_descriptors(struct si_context *sctx)
        si_init_buffer_resources(&sctx->rw_buffers,
                                 &sctx->descriptors[SI_DESCS_RW_BUFFERS],
                                 SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS,
-                                /* The second set of usage/priority is used by
+                                /* The second priority is used by
                                  * const buffers in RW buffer slots. */
-                                RADEON_USAGE_READWRITE, RADEON_USAGE_READ,
                                 RADEON_PRIO_SHADER_RINGS, RADEON_PRIO_CONST_BUFFER);
        sctx->descriptors[SI_DESCS_RW_BUFFERS].num_active_slots = SI_NUM_RW_BUFFERS;
 
index 5b2c4ae6e18ed3c761f9e320878fad2af0927866..d98bea2eeb3a009fe60e407d4386ac9acf1cf3e6 100644 (file)
@@ -1439,7 +1439,8 @@ static void si_restore_qbo_state(struct si_context *sctx,
        sctx->b.set_constant_buffer(&sctx->b, PIPE_SHADER_COMPUTE, 0, &st->saved_const0);
        pipe_resource_reference(&st->saved_const0.buffer, NULL);
 
-       sctx->b.set_shader_buffers(&sctx->b, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo, ~0);
+       sctx->b.set_shader_buffers(&sctx->b, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo,
+                                  st->saved_ssbo_writable_mask);
        for (unsigned i = 0; i < 3; ++i)
                pipe_resource_reference(&st->saved_ssbo[i].buffer, NULL);
 }
index c61af51d57c0d026fac2cd9d59d94ca3cd972c89..6ff0a239cb3c35297da77c88e1f6c0db6ff94add 100644 (file)
@@ -253,6 +253,7 @@ struct si_qbo_state {
        void *saved_compute;
        struct pipe_constant_buffer saved_const0;
        struct pipe_shader_buffer saved_ssbo[3];
+       unsigned saved_ssbo_writable_mask;
 };
 
 #endif /* SI_QUERY_H */
index e39387a6080aaf94fe31024eb318483bf60246a6..757c17f7df8ccf1e961627052a973e7155d244dc 100644 (file)
@@ -1356,6 +1356,14 @@ void si_save_qbo_state(struct si_context *sctx, struct si_qbo_state *st)
 
        si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &st->saved_const0);
        si_get_shader_buffers(sctx, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo);
+
+       st->saved_ssbo_writable_mask = 0;
+
+       for (unsigned i = 0; i < 3; i++) {
+               if (sctx->const_and_shader_buffers[PIPE_SHADER_COMPUTE].writable_mask &
+                   (1u << si_get_shaderbuf_slot(i)))
+                       st->saved_ssbo_writable_mask |= 1 << i;
+       }
 }
 
 static void si_emit_db_render_state(struct si_context *sctx)
index 6faa4c511b15c2cd47508f84bb026bce6ff497ca..311e1a428ae3eb33c2d3fdbaf4a3e919a12cec8b 100644 (file)
@@ -409,13 +409,12 @@ struct si_descriptors {
 struct si_buffer_resources {
        struct pipe_resource            **buffers; /* this has num_buffers elements */
 
-       enum radeon_bo_usage            shader_usage:4; /* READ, WRITE, or READWRITE */
-       enum radeon_bo_usage            shader_usage_constbuf:4;
        enum radeon_bo_priority         priority:6;
        enum radeon_bo_priority         priority_constbuf:6;
 
        /* The i-th bit is set if that element is enabled (non-NULL resource). */
        unsigned                        enabled_mask;
+       unsigned                        writable_mask;
 };
 
 #define si_pm4_state_changed(sctx, member) \