r600g: Unify 3D and compute vertex buffer emission
authorTom Stellard <tstellar@gmail.com>
Thu, 12 Jul 2012 19:50:27 +0000 (19:50 +0000)
committerMarek Olšák <maraeo@gmail.com>
Sun, 15 Jul 2012 00:00:21 +0000 (02:00 +0200)
Signed-off-by: Marek Olšák <maraeo@gmail.com>
src/gallium/drivers/r600/evergreen_compute.c
src/gallium/drivers/r600/evergreen_compute_internal.c
src/gallium/drivers/r600/evergreen_compute_internal.h
src/gallium/drivers/r600/evergreen_state.c
src/gallium/drivers/r600/r600_pipe.h

index a88cad1246eea818cb297cb1d7077402f09d7a9c..b61ea8ff1f1691427a06b25f6c168a6c753a3e76 100644 (file)
@@ -83,6 +83,22 @@ writable images will consume TEX slots, VTX slots too because of linear indexing
 
 */
 
+static void evergreen_cs_set_vertex_buffer(
+       struct r600_context * rctx,
+       unsigned vb_index,
+       unsigned offset,
+       struct pipe_resource * buffer)
+{
+       struct pipe_vertex_buffer *vb = &rctx->cs_vertex_buffer[vb_index];
+       vb->stride = 1;
+       vb->buffer_offset = offset;
+       vb->buffer = buffer;
+       vb->user_buffer = NULL;
+
+       r600_inval_vertex_cache(rctx);
+       r600_atom_dirty(rctx, &rctx->cs_vertex_buffer_state);
+}
+
 const struct u_resource_vtbl r600_global_buffer_vtbl =
 {
        u_default_resource_get_handle, /* get_handle */
@@ -263,8 +279,8 @@ void evergreen_compute_upload_input(
        ctx->ws->buffer_unmap(ctx->cs_shader->kernel_param->cs_buf);
 
        ///ID=0 is reserved for the parameters
-       evergreen_set_vtx_resource(ctx->cs_shader,
-               ctx->cs_shader->kernel_param, 0, 0, 0);
+       evergreen_cs_set_vertex_buffer(ctx, 0, 0,
+                       (struct pipe_resource*)ctx->cs_shader->kernel_param);
        ///ID=0 is reserved for parameters
        evergreen_set_const_cache(ctx->cs_shader, 0,
                ctx->cs_shader->kernel_param, ctx->cs_shader->input_size, 0);
@@ -350,6 +366,10 @@ static void compute_emit_cs(struct r600_context *ctx)
         cb_state = ctx->states[R600_PIPE_STATE_FRAMEBUFFER];
        r600_context_pipe_state_emit(ctx, cb_state, RADEON_CP_PACKET3_COMPUTE_MODE);
 
+       /* Emit vertex buffer state */
+       ctx->cs_vertex_buffer_state.num_dw = 12 * ctx->nr_cs_vertex_buffers;
+       r600_emit_atom(ctx, &ctx->cs_vertex_buffer_state);
+
        for (i = 0; i < get_compute_resource_num(); i++) {
                if (ctx->cs_shader->resources[i].enabled) {
                        int j;
@@ -452,14 +472,15 @@ static void evergreen_set_compute_resources(struct pipe_context * ctx_,
                        start, count);
 
        for (int i = 0; i < count; i++) {
+               /* The First two vertex buffers are reserved for parameters and
+                * global buffers. */
+               unsigned vtx_id = 2 + i;
                if (resources[i]) {
                        struct r600_resource_global *buffer =
-                               (struct r600_resource_global*)resources[i]->base.texture;
+                               (struct r600_resource_global*)
+                               resources[i]->base.texture;
                        if (resources[i]->base.writable) {
                                assert(i+1 < 12);
-                               struct r600_resource_global *buffer =
-                                       (struct r600_resource_global*)
-                                       resources[i]->base.texture;
 
                                evergreen_set_rat(ctx->cs_shader, i+1,
                                (struct r600_resource *)resources[i]->base.texture,
@@ -467,9 +488,10 @@ static void evergreen_set_compute_resources(struct pipe_context * ctx_,
                                resources[i]->base.texture->width0);
                        }
 
-                       evergreen_set_vtx_resource(ctx->cs_shader,
-                               (struct r600_resource *)resources[i]->base.texture, i+2,
-                                buffer->chunk->start_in_dw*4, resources[i]->base.writable);
+                       evergreen_cs_set_vertex_buffer(ctx, vtx_id,
+                                       buffer->chunk->start_in_dw * 4,
+                                       resources[i]->base.texture);
+                       ctx->nr_cs_vertex_buffers = vtx_id + 1;
                }
        }
 
@@ -539,7 +561,8 @@ static void evergreen_set_global_binding(
        }
 
        evergreen_set_rat(ctx->cs_shader, 0, pool->bo, 0, pool->size_in_dw * 4);
-       evergreen_set_vtx_resource(ctx->cs_shader, pool->bo, 1, 0, 1);
+       evergreen_cs_set_vertex_buffer(ctx, 1, 0,
+                               (struct pipe_resource*)pool->bo);
 }
 
 /**
@@ -712,6 +735,10 @@ void evergreen_init_compute_state_functions(struct r600_context *ctx)
        ctx->context.bind_compute_sampler_states = evergreen_bind_compute_sampler_states;
        ctx->context.set_global_binding = evergreen_set_global_binding;
        ctx->context.launch_grid = evergreen_launch_grid;
+
+       /* We always use at least two vertex buffers for compute, one for
+         * parameters and one for global memory */
+       ctx->nr_cs_vertex_buffers = 2;
 }
 
 
index c5aad932c9470636464e2dbfe750c8863fc617f2..1d11bab82743117c761c7a59054b50c8c136cf54 100644 (file)
@@ -489,87 +489,6 @@ static unsigned r600_tex_dim(unsigned dim)
        }
 }
 
-void evergreen_set_vtx_resource(
-       struct r600_pipe_compute *pipe,
-       struct r600_resource* bo,
-       int id, uint64_t offset, int writable)
-{
-       assert(id < 16);
-       uint32_t sq_vtx_constant_word2, sq_vtx_constant_word3, sq_vtx_constant_word4;
-       struct number_type_and_format fmt;
-       uint64_t va;
-
-       fmt.format = 0;
-
-       assert(bo->b.b.height0 <= 1);
-       assert(bo->b.b.depth0 <= 1);
-
-       int e = evergreen_compute_get_gpu_format(&fmt, bo);
-
-       assert(e && "unknown format");
-
-       struct evergreen_compute_resource* res =
-               get_empty_res(pipe, COMPUTE_RESOURCE_VERT, id);
-
-       unsigned size = bo->b.b.width0;
-       unsigned stride = 1;
-
-//     size = (size * util_format_get_blockwidth(bo->b.b.b.format) *
-//             util_format_get_blocksize(bo->b.b.b.format));
-
-       va = r600_resource_va(&pipe->ctx->screen->screen, &bo->b.b) + offset;
-
-       COMPUTE_DBG("id: %i vtx size: %i byte,  width0: %i elem\n",
-               id, size, bo->b.b.width0);
-
-       sq_vtx_constant_word2 =
-               S_030008_BASE_ADDRESS_HI(va >> 32) |
-               S_030008_STRIDE(stride) |
-               S_030008_DATA_FORMAT(fmt.format) |
-               S_030008_NUM_FORMAT_ALL(fmt.num_format_all) |
-               S_030008_ENDIAN_SWAP(0);
-
-       COMPUTE_DBG("%08X %i %i %i %i\n", sq_vtx_constant_word2, offset,
-                       stride, fmt.format, fmt.num_format_all);
-
-       sq_vtx_constant_word3 =
-               S_03000C_DST_SEL_X(0) |
-               S_03000C_DST_SEL_Y(1) |
-               S_03000C_DST_SEL_Z(2) |
-               S_03000C_DST_SEL_W(3);
-
-       sq_vtx_constant_word4 = 0;
-
-       evergreen_emit_raw_value(res, PKT3C(PKT3_SET_RESOURCE, 8, 0));
-       evergreen_emit_raw_value(res, (id+816)*32 >> 2);
-       evergreen_emit_raw_value(res, (unsigned)((va) & 0xffffffff));
-       evergreen_emit_raw_value(res, size - 1);
-       evergreen_emit_raw_value(res, sq_vtx_constant_word2);
-       evergreen_emit_raw_value(res, sq_vtx_constant_word3);
-       evergreen_emit_raw_value(res, sq_vtx_constant_word4);
-       evergreen_emit_raw_value(res, 0);
-       evergreen_emit_raw_value(res, 0);
-       evergreen_emit_raw_value(res, S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER));
-
-       res->bo = bo;
-
-       if (writable) {
-               res->usage = RADEON_USAGE_READWRITE;
-       }
-       else {
-               res->usage = RADEON_USAGE_READ;
-       }
-
-       res->coher_bo_size = size;
-
-       r600_inval_vertex_cache(pipe->ctx);
-       /* XXX: Do we really need to invalidate the texture cache here?
-        * r600_inval_vertex_cache() will invalidate the texture cache
-        * if the chip does not have a vertex cache.
-        */
-       r600_inval_texture_cache(pipe->ctx);
-}
-
 void evergreen_set_tex_resource(
        struct r600_pipe_compute *pipe,
        struct r600_pipe_sampler_view* view,
index 75ada51320b38fb5c6d2f1634e43f4bffa6de889..5fa9c48fb5ec4a6b2c17fd07f92ad51251f2dec6 100644 (file)
@@ -107,7 +107,6 @@ void evergreen_set_gds(struct r600_pipe_compute *pipe, uint32_t addr, uint32_t s
 void evergreen_set_export(struct r600_pipe_compute *pipe, struct r600_resource* bo, int offset, int size);
 void evergreen_set_loop_const(struct r600_pipe_compute *pipe, int id, int count, int init, int inc);
 void evergreen_set_tmp_ring(struct r600_pipe_compute *pipe, struct r600_resource* bo, int offset, int size, int se);
-void evergreen_set_vtx_resource(struct r600_pipe_compute *pipe, struct r600_resource* bo, int id, uint64_t offset, int writable);
 void evergreen_set_tex_resource(struct r600_pipe_compute *pipe, struct r600_pipe_sampler_view* view, int id);
 void evergreen_set_sampler_resource(struct r600_pipe_compute *pipe, struct compute_sampler_state *sampler, int id);
 void evergreen_set_const_cache(struct r600_pipe_compute *pipe, int cache_id, struct r600_resource* cbo, int size, int offset);
index 8630eafac2eb7231e03af1111f5cd8d8109f2e38..923dea75da106cd670704e2a14c8d9f71d3332a8 100644 (file)
@@ -1765,15 +1765,15 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_
        r600_write_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE, db_render_override);
 }
 
-static void evergreen_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom *atom)
+static void evergreen_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom *atom,
+       struct pipe_vertex_buffer *vb, unsigned vb_count, unsigned resource_offset,
+       unsigned pkt_flags)
 {
        struct radeon_winsys_cs *cs = rctx->cs;
-       struct pipe_vertex_buffer *vb = rctx->vertex_buffer;
-       unsigned count = rctx->nr_vertex_buffers;
        unsigned i;
        uint64_t va;
 
-       for (i = 0; i < count; i++) {
+       for (i = 0; i < vb_count; i++) {
                struct r600_resource *rbuffer = (struct r600_resource*)vb[i].buffer;
 
                if (!rbuffer) {
@@ -1784,8 +1784,8 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx, struct r600
                va += vb[i].buffer_offset;
 
                /* fetch resources start at index 992 */
-               r600_write_value(cs, PKT3(PKT3_SET_RESOURCE, 8, 0));
-               r600_write_value(cs, (992 + i) * 8);
+               r600_write_value(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
+               r600_write_value(cs, (resource_offset + i) * 8);
                r600_write_value(cs, va); /* RESOURCEi_WORD0 */
                r600_write_value(cs, rbuffer->buf->size - vb[i].buffer_offset - 1); /* RESOURCEi_WORD1 */
                r600_write_value(cs, /* RESOURCEi_WORD2 */
@@ -1802,11 +1802,24 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx, struct r600
                r600_write_value(cs, 0); /* RESOURCEi_WORD6 */
                r600_write_value(cs, 0xc0000000); /* RESOURCEi_WORD7 */
 
-               r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
+               r600_write_value(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
                r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ));
        }
 }
 
+static void evergreen_fs_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom * atom)
+{
+       evergreen_emit_vertex_buffers(rctx, atom, rctx->vertex_buffer,
+                                       rctx->nr_vertex_buffers, 992, 0);
+}
+
+static void evergreen_cs_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom * atom)
+{
+       evergreen_emit_vertex_buffers(rctx, atom, rctx->cs_vertex_buffer,
+                                       rctx->nr_cs_vertex_buffers, 816,
+                                       RADEON_CP_PACKET3_COMPUTE_MODE);
+}
+
 static void evergreen_emit_constant_buffers(struct r600_context *rctx,
                                            struct r600_constbuf_state *state,
                                            unsigned buffer_id_base,
@@ -1882,7 +1895,8 @@ void evergreen_init_state_functions(struct r600_context *rctx)
        r600_atom_dirty(rctx, &rctx->cb_misc_state.atom);
        r600_init_atom(&rctx->db_misc_state.atom, evergreen_emit_db_misc_state, 6, 0);
        r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
-       r600_init_atom(&rctx->vertex_buffer_state, evergreen_emit_vertex_buffers, 0, 0);
+       r600_init_atom(&rctx->vertex_buffer_state, evergreen_fs_emit_vertex_buffers, 0, 0);
+       r600_init_atom(&rctx->cs_vertex_buffer_state, evergreen_cs_emit_vertex_buffers, 0, 0);
        r600_init_atom(&rctx->vs_constbuf_state.atom, evergreen_emit_vs_constant_buffers, 0, 0);
        r600_init_atom(&rctx->ps_constbuf_state.atom, evergreen_emit_ps_constant_buffers, 0, 0);
 
index 7843579207a3b71d659c0dcca679a2eb6a33b956..ba63dcc84715337c20e5444ebe4e7ed4b1c8d4b6 100644 (file)
@@ -336,7 +336,10 @@ struct r600_context {
        struct r600_atom                r6xx_flush_and_inv_cmd;
        struct r600_cb_misc_state       cb_misc_state;
        struct r600_db_misc_state       db_misc_state;
+       /** Vertex buffers for fetch shaders */
        struct r600_atom                vertex_buffer_state;
+       /** Vertex buffers for compute shaders */
+       struct r600_atom                cs_vertex_buffer_state;
        struct r600_constbuf_state      vs_constbuf_state;
        struct r600_constbuf_state      ps_constbuf_state;
 
@@ -396,6 +399,8 @@ struct r600_context {
        struct pipe_index_buffer index_buffer;
        struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
        unsigned                nr_vertex_buffers;
+       struct pipe_vertex_buffer cs_vertex_buffer[PIPE_MAX_ATTRIBS];
+       unsigned                nr_cs_vertex_buffers;
 };
 
 static INLINE void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom)