radeonsi: bind streamout buffers to VGT and the vertex shader
authorMarek Olšák <marek.olsak@amd.com>
Sun, 18 Aug 2013 00:34:23 +0000 (02:34 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Thu, 12 Sep 2013 23:04:44 +0000 (01:04 +0200)
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
src/gallium/drivers/radeonsi/radeonsi_pipe.h
src/gallium/drivers/radeonsi/radeonsi_shader.c
src/gallium/drivers/radeonsi/radeonsi_shader.h
src/gallium/drivers/radeonsi/si_descriptors.c

index c5059e82dbd5c1e2732389fa099d779674c1e5a9..9306790bdd3b96d5b8ae6c51d19ab8b3e5fe8f4d 100644 (file)
@@ -134,6 +134,7 @@ struct r600_context {
                        /* The order matters. */
                        struct r600_atom *const_buffers[SI_NUM_SHADERS];
                        struct r600_atom *sampler_views[SI_NUM_SHADERS];
+                       struct r600_atom *streamout_buffers;
                        /* Caches must be flushed after resource descriptors are
                         * updated in memory. */
                        struct r600_atom *cache_flush;
@@ -164,6 +165,7 @@ struct r600_context {
        unsigned                        sprite_coord_enable;
        unsigned                        export_16bpc;
        struct si_buffer_resources      const_buffers[SI_NUM_SHADERS];
+       struct si_buffer_resources      streamout_buffers;
        struct r600_textures_info       samplers[SI_NUM_SHADERS];
        struct r600_resource            *border_color_table;
        unsigned                        border_color_offset;
index 77915aefee7c60caffda8e05e4be6211b269aa53..335cd79b1a83fe024bfb08e7086188efdd3ed06a 100644 (file)
@@ -1364,6 +1364,7 @@ static void create_function(struct si_shader_context *si_shader_ctx)
        switch (si_shader_ctx->type) {
        case TGSI_PROCESSOR_VERTEX:
                params[SI_PARAM_VERTEX_BUFFER] = params[SI_PARAM_CONST];
+               params[SI_PARAM_SO_BUFFER] = params[SI_PARAM_CONST];
                params[SI_PARAM_START_INSTANCE] = i32;
                last_sgpr = SI_PARAM_START_INSTANCE;
                params[SI_PARAM_VERTEX_ID] = i32;
index ede8bde3dee7946d741a2e1d02bf57e0dfa25d3b..64766c97327f5a3cfcc0e6dedec1d0309289ccb0 100644 (file)
 #define SI_SGPR_CONST          0
 #define SI_SGPR_SAMPLER                2
 #define SI_SGPR_RESOURCE       4
-#define SI_SGPR_VERTEX_BUFFER  6
-#define SI_SGPR_START_INSTANCE 8
+#define SI_SGPR_VERTEX_BUFFER  6  /* VS only */
+#define SI_SGPR_SO_BUFFER      8  /* VS only, stream-out */
+#define SI_SGPR_START_INSTANCE 10 /* VS only */
 
-#define SI_VS_NUM_USER_SGPR    9
+#define SI_VS_NUM_USER_SGPR    11
 #define SI_PS_NUM_USER_SGPR    6
 
 /* LLVM function parameter indices */
 
 /* VS only parameters */
 #define SI_PARAM_VERTEX_BUFFER 3
-#define SI_PARAM_START_INSTANCE        4
-#define SI_PARAM_VERTEX_ID     5
-#define SI_PARAM_DUMMY_0       6
-#define SI_PARAM_DUMMY_1       7
-#define SI_PARAM_INSTANCE_ID   8
+#define SI_PARAM_SO_BUFFER     4
+#define SI_PARAM_START_INSTANCE        5
+#define SI_PARAM_VERTEX_ID     6
+#define SI_PARAM_DUMMY_0       7
+#define SI_PARAM_DUMMY_1       8
+#define SI_PARAM_INSTANCE_ID   9
 
 /* PS only parameters */
 #define SI_PARAM_PRIM_MASK             3
index 5d8544848bcad6ac46ff0308ed9b248ac0463e60..a8f87815a5b4355faa741bd8dcbe15852abf6b4b 100644 (file)
@@ -456,6 +456,67 @@ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint s
        si_update_descriptors(rctx, &buffers->desc);
 }
 
+/* STREAMOUT BUFFERS */
+
+static void si_set_streamout_targets(struct pipe_context *ctx,
+                                    unsigned num_targets,
+                                    struct pipe_stream_output_target **targets,
+                                    unsigned append_bitmask)
+{
+       struct r600_context *rctx = (struct r600_context *)ctx;
+       struct si_buffer_resources *buffers = &rctx->streamout_buffers;
+       unsigned old_num_targets = rctx->b.streamout.num_targets;
+       unsigned i;
+
+       /* Streamout buffers must be bound in 2 places:
+        * 1) in VGT by setting the VGT_STRMOUT registers
+        * 2) as shader resources
+        */
+
+       /* Set the VGT regs. */
+       r600_set_streamout_targets(ctx, num_targets, targets, append_bitmask);
+
+       /* Set the shader resources.*/
+       for (i = 0; i < num_targets; i++) {
+               if (targets[i]) {
+                       struct pipe_resource *buffer = targets[i]->buffer;
+                       uint64_t va = r600_resource_va(ctx->screen, buffer);
+
+                       /* Set the descriptor. */
+                       uint32_t *desc = buffers->desc_data[i];
+                       desc[0] = va;
+                       desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
+                       desc[2] = 0xffffffff;
+                       desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+                                 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+                                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+                                 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+                       /* Set the resource. */
+                       pipe_resource_reference(&buffers->buffers[i], buffer);
+                       r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
+                                             (struct r600_resource*)buffer,
+                                             buffers->shader_usage);
+                       buffers->desc.enabled_mask |= 1 << i;
+               } else {
+                       /* Clear the descriptor and unset the resource. */
+                       memset(buffers->desc_data[i], 0, sizeof(uint32_t) * 4);
+                       pipe_resource_reference(&buffers->buffers[i], NULL);
+                       buffers->desc.enabled_mask &= ~(1 << i);
+               }
+               buffers->desc.dirty_mask |= 1 << i;
+       }
+       for (; i < old_num_targets; i++) {
+               /* Clear the descriptor and unset the resource. */
+               memset(buffers->desc_data[i], 0, sizeof(uint32_t) * 4);
+               pipe_resource_reference(&buffers->buffers[i], NULL);
+               buffers->desc.enabled_mask &= ~(1 << i);
+               buffers->desc.dirty_mask |= 1 << i;
+       }
+
+       si_update_descriptors(rctx, &buffers->desc);
+}
+
 /* INIT/DEINIT */
 
 void si_init_all_descriptors(struct r600_context *rctx)
@@ -473,8 +534,13 @@ void si_init_all_descriptors(struct r600_context *rctx)
                rctx->atoms.sampler_views[i] = &rctx->samplers[i].views.desc.atom;
        }
 
+       si_init_buffer_resources(rctx, &rctx->streamout_buffers, 4, PIPE_SHADER_VERTEX,
+                                SI_SGPR_SO_BUFFER, RADEON_USAGE_WRITE);
+       rctx->atoms.streamout_buffers = &rctx->streamout_buffers.desc.atom;
+
        /* Set pipe_context functions. */
        rctx->b.b.set_constant_buffer = si_set_constant_buffer;
+       rctx->b.b.set_stream_output_targets = si_set_streamout_targets;
 }
 
 void si_release_all_descriptors(struct r600_context *rctx)
@@ -485,6 +551,7 @@ void si_release_all_descriptors(struct r600_context *rctx)
                si_release_buffer_resources(&rctx->const_buffers[i]);
                si_release_sampler_views(&rctx->samplers[i].views);
        }
+       si_release_buffer_resources(&rctx->streamout_buffers);
 }
 
 void si_all_descriptors_begin_new_cs(struct r600_context *rctx)
@@ -495,4 +562,5 @@ void si_all_descriptors_begin_new_cs(struct r600_context *rctx)
                si_buffer_resources_begin_new_cs(rctx, &rctx->const_buffers[i]);
                si_sampler_views_begin_new_cs(rctx, &rctx->samplers[i].views);
        }
+       si_buffer_resources_begin_new_cs(rctx, &rctx->streamout_buffers);
 }