radeonsi: use a global dirty mask for shader pointers
authorMarek Olšák <marek.olsak@amd.com>
Tue, 17 Jan 2017 20:30:23 +0000 (21:30 +0100)
committerMarek Olšák <marek.olsak@amd.com>
Wed, 18 Jan 2017 18:51:31 +0000 (19:51 +0100)
Only vertex buffers use a separate bool flag.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_descriptors.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_state.h
src/gallium/drivers/radeonsi/si_state_draw.c

index a535fa0e965de39cde72d502c2fc386e281f6423..0a49c877b12297d12936ec28618c701c0ab6ab3c 100644 (file)
@@ -246,7 +246,6 @@ static bool si_upload_descriptors(struct si_context *sctx,
                radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
                                    RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
        }
-       desc->pointer_dirty = true;
        desc->dirty_mask = 0;
 
        if (atom)
@@ -1035,9 +1034,9 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
         * on performance (confirmed by testing). New descriptors are always
         * uploaded to a fresh new buffer, so I don't think flushing the const
         * cache is needed. */
-       desc->pointer_dirty = true;
        si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
        sctx->vertex_buffers_dirty = false;
+       sctx->vertex_buffer_pointer_dirty = true;
        return true;
 }
 
@@ -1735,26 +1734,21 @@ void si_update_all_texture_descriptors(struct si_context *sctx)
 static void si_mark_shader_pointers_dirty(struct si_context *sctx,
                                          unsigned shader)
 {
-       struct si_descriptors *descs =
-               &sctx->descriptors[SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS];
-
-       for (unsigned i = 0; i < SI_NUM_SHADER_DESCS; ++i, ++descs)
-               descs->pointer_dirty = true;
+       sctx->shader_pointers_dirty |=
+               u_bit_consecutive(SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS,
+                                 SI_NUM_SHADER_DESCS);
 
        if (shader == PIPE_SHADER_VERTEX)
-               sctx->vertex_buffers.pointer_dirty = true;
+               sctx->vertex_buffer_pointer_dirty = sctx->vertex_buffers.buffer != NULL;
 
        si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
 }
 
 static void si_shader_userdata_begin_new_cs(struct si_context *sctx)
 {
-       int i;
-
-       for (i = 0; i < SI_NUM_SHADERS; i++) {
-               si_mark_shader_pointers_dirty(sctx, i);
-       }
-       sctx->descriptors[SI_DESCS_RW_BUFFERS].pointer_dirty = true;
+       sctx->shader_pointers_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
+       sctx->vertex_buffer_pointer_dirty = sctx->vertex_buffers.buffer != NULL;
+       si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
 }
 
 /* Set a base register address for user data constants in the given shader.
@@ -1807,13 +1801,12 @@ void si_shader_change_notify(struct si_context *sctx)
 
 static void si_emit_shader_pointer(struct si_context *sctx,
                                   struct si_descriptors *desc,
-                                  unsigned sh_base, bool keep_dirty)
+                                  unsigned sh_base)
 {
        struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
        uint64_t va;
 
-       if (!desc->pointer_dirty || !desc->buffer)
-               return;
+       assert(desc->buffer);
 
        va = desc->buffer->gpu_address +
             desc->buffer_offset;
@@ -1822,55 +1815,66 @@ static void si_emit_shader_pointer(struct si_context *sctx,
        radeon_emit(cs, (sh_base + desc->shader_userdata_offset - SI_SH_REG_OFFSET) >> 2);
        radeon_emit(cs, va);
        radeon_emit(cs, va >> 32);
-
-       desc->pointer_dirty = keep_dirty;
 }
 
 void si_emit_graphics_shader_userdata(struct si_context *sctx,
                                       struct r600_atom *atom)
 {
-       unsigned shader;
+       unsigned mask;
        uint32_t *sh_base = sctx->shader_userdata.sh_base;
        struct si_descriptors *descs;
 
        descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS];
 
-       if (descs->pointer_dirty) {
+       if (sctx->shader_pointers_dirty & (1 << SI_DESCS_RW_BUFFERS)) {
                si_emit_shader_pointer(sctx, descs,
-                                      R_00B030_SPI_SHADER_USER_DATA_PS_0, true);
+                                      R_00B030_SPI_SHADER_USER_DATA_PS_0);
                si_emit_shader_pointer(sctx, descs,
-                                      R_00B130_SPI_SHADER_USER_DATA_VS_0, true);
+                                      R_00B130_SPI_SHADER_USER_DATA_VS_0);
                si_emit_shader_pointer(sctx, descs,
-                                      R_00B230_SPI_SHADER_USER_DATA_GS_0, true);
+                                      R_00B230_SPI_SHADER_USER_DATA_GS_0);
                si_emit_shader_pointer(sctx, descs,
-                                      R_00B330_SPI_SHADER_USER_DATA_ES_0, true);
+                                      R_00B330_SPI_SHADER_USER_DATA_ES_0);
                si_emit_shader_pointer(sctx, descs,
-                                      R_00B430_SPI_SHADER_USER_DATA_HS_0, true);
-               descs->pointer_dirty = false;
+                                      R_00B430_SPI_SHADER_USER_DATA_HS_0);
        }
 
-       descs = &sctx->descriptors[SI_DESCS_FIRST_SHADER];
+       mask = sctx->shader_pointers_dirty &
+              u_bit_consecutive(SI_DESCS_FIRST_SHADER,
+                                SI_DESCS_FIRST_COMPUTE - SI_DESCS_FIRST_SHADER);
 
-       for (shader = 0; shader < SI_NUM_GRAPHICS_SHADERS; shader++) {
+       while (mask) {
+               unsigned i = u_bit_scan(&mask);
+               unsigned shader = (i - SI_DESCS_FIRST_SHADER) / SI_NUM_SHADER_DESCS;
                unsigned base = sh_base[shader];
-               unsigned i;
 
-               if (!base)
-                       continue;
+               if (base)
+                       si_emit_shader_pointer(sctx, descs + i, base);
+       }
+       sctx->shader_pointers_dirty &=
+               ~u_bit_consecutive(SI_DESCS_RW_BUFFERS, SI_DESCS_FIRST_COMPUTE);
 
-               for (i = 0; i < SI_NUM_SHADER_DESCS; i++, descs++)
-                       si_emit_shader_pointer(sctx, descs, base, false);
+       if (sctx->vertex_buffer_pointer_dirty) {
+               si_emit_shader_pointer(sctx, &sctx->vertex_buffers,
+                                      sh_base[PIPE_SHADER_VERTEX]);
+               sctx->vertex_buffer_pointer_dirty = false;
        }
-       si_emit_shader_pointer(sctx, &sctx->vertex_buffers, sh_base[PIPE_SHADER_VERTEX], false);
 }
 
 void si_emit_compute_shader_userdata(struct si_context *sctx)
 {
        unsigned base = R_00B900_COMPUTE_USER_DATA_0;
-       struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_FIRST_COMPUTE];
+       struct si_descriptors *descs = sctx->descriptors;
+       unsigned compute_mask =
+               u_bit_consecutive(SI_DESCS_FIRST_COMPUTE, SI_NUM_SHADER_DESCS);
+       unsigned mask = sctx->shader_pointers_dirty & compute_mask;
 
-       for (unsigned i = 0; i < SI_NUM_SHADER_DESCS; ++i, ++descs)
-               si_emit_shader_pointer(sctx, descs, base, false);
+       while (mask) {
+               unsigned i = u_bit_scan(&mask);
+
+               si_emit_shader_pointer(sctx, descs + i, base);
+       }
+       sctx->shader_pointers_dirty &= ~compute_mask;
 }
 
 /* INIT/DEINIT/UPLOAD */
@@ -1939,6 +1943,9 @@ bool si_upload_graphics_shader_descriptors(struct si_context *sctx)
        const unsigned mask = u_bit_consecutive(0, SI_DESCS_FIRST_COMPUTE);
        unsigned dirty = sctx->descriptors_dirty & mask;
 
+       /* Assume nothing will go wrong: */
+       sctx->shader_pointers_dirty |= dirty;
+
        while (dirty) {
                unsigned i = u_bit_scan(&dirty);
 
@@ -1960,6 +1967,9 @@ bool si_upload_compute_shader_descriptors(struct si_context *sctx)
                                                SI_NUM_DESCS - SI_DESCS_FIRST_COMPUTE);
        unsigned dirty = sctx->descriptors_dirty & mask;
 
+       /* Assume nothing will go wrong: */
+       sctx->shader_pointers_dirty |= dirty;
+
        while (dirty) {
                unsigned i = u_bit_scan(&dirty);
 
index e7d071d3dd3fc095e0d3f7129ae0bfc3f1c951f7..421e2a4cf6ae9b90c9a4ca5343843e07a89f7bc0 100644 (file)
@@ -268,6 +268,7 @@ struct si_context {
        struct si_descriptors           vertex_buffers;
        struct si_descriptors           descriptors[SI_NUM_DESCS];
        unsigned                        descriptors_dirty;
+       unsigned                        shader_pointers_dirty;
        unsigned                        compressed_tex_shader_mask;
        struct si_buffer_resources      rw_buffers;
        struct si_buffer_resources      const_buffers[SI_NUM_SHADERS];
@@ -288,6 +289,7 @@ struct si_context {
 
        /* Vertex and index buffers. */
        bool                            vertex_buffers_dirty;
+       bool                            vertex_buffer_pointer_dirty;
        struct pipe_index_buffer        index_buffer;
        struct pipe_vertex_buffer       vertex_buffer[SI_NUM_VERTEX_BUFFERS];
 
index edc5b9337819ce9c596e32813dc4e22a175996dd..34a0f578492de9f830adbddf84ea90b279bd9f9a 100644 (file)
@@ -237,8 +237,6 @@ struct si_descriptors {
        /* The shader userdata offset within a shader where the 64-bit pointer to the descriptor
         * array will be stored. */
        unsigned shader_userdata_offset;
-       /* Whether the pointer should be re-emitted. */
-       bool pointer_dirty;
 };
 
 struct si_sampler_views {
index 96a0e846e4a14cde39f833b97a865dd008037562..837c0250eda00dedefc1f7cdf7f5e3160c87a232 100644 (file)
@@ -1146,7 +1146,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 
                /* Vertex buffer descriptors are uploaded uncached, so prefetch
                 * them right after the VS binary. */
-               if (sctx->vertex_buffers.pointer_dirty) {
+               if (sctx->vertex_buffer_pointer_dirty) {
                        cik_prefetch_TC_L2_async(sctx, &sctx->vertex_buffers.buffer->b.b,
                                                sctx->vertex_buffers.buffer_offset,
                                                sctx->vertex_elements->count * 16);