r600g: suballocate memory for fetch shaders from a large buffer
authorMarek Olšák <maraeo@gmail.com>
Sun, 9 Dec 2012 17:51:31 +0000 (18:51 +0100)
committerMarek Olšák <maraeo@gmail.com>
Wed, 12 Dec 2012 12:12:31 +0000 (13:12 +0100)
Fetch shaders are usually destroyed at the context destruction by the state
tracker, so we can put them all in a large buffer without wasting memory.

This reduces the number of relocations sent to the kernel a little bit.

Tested-by: Aaron Watry <awatry@gmail.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
src/gallium/drivers/r600/evergreen_state.c
src/gallium/drivers/r600/r600_asm.c
src/gallium/drivers/r600/r600_pipe.c
src/gallium/drivers/r600/r600_pipe.h
src/gallium/drivers/r600/r600_state.c
src/gallium/drivers/r600/r600_state_common.c

index 9b898cb10e6ba90dda745e3c71fc527e62183e6f..996c1b48b9a0dc4783753721ccf559bd48552bd4 100644 (file)
@@ -2367,12 +2367,12 @@ static void evergreen_emit_vertex_fetch_shader(struct r600_context *rctx, struct
 {
        struct radeon_winsys_cs *cs = rctx->cs;
        struct r600_cso_state *state = (struct r600_cso_state*)a;
-       struct r600_resource *shader = (struct r600_resource*)state->cso;
+       struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state->cso;
 
        r600_write_context_reg(cs, R_0288A4_SQ_PGM_START_FS,
-                              r600_resource_va(rctx->context.screen, &shader->b.b) >> 8);
+                              (r600_resource_va(rctx->context.screen, &shader->buffer->b.b) + shader->offset) >> 8);
        r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
-       r600_write_value(cs, r600_context_bo_reloc(rctx, shader, RADEON_USAGE_READ));
+       r600_write_value(cs, r600_context_bo_reloc(rctx, shader->buffer, RADEON_USAGE_READ));
 }
 
 void evergreen_init_state_functions(struct r600_context *rctx)
index f06af4444730eefb7a4d31a7079ff84e098f3c36..268137ff2e7747a0f0246ce2f456b5f47d76c039 100644 (file)
@@ -2766,7 +2766,7 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
        unsigned format, num_format, format_comp, endian;
        uint32_t *bytecode;
        int i, j, r, fs_size;
-       struct r600_resource *fetch_shader;
+       struct r600_fetch_shader *shader;
 
        assert(count < 32);
 
@@ -2873,22 +2873,25 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
 
        fs_size = bc.ndw*4;
 
-       fetch_shader = (struct r600_resource*)
-                       pipe_buffer_create(rctx->context.screen,
-                                          PIPE_BIND_CUSTOM,
-                                          PIPE_USAGE_IMMUTABLE, fs_size);
-       if (fetch_shader == NULL) {
+       /* Allocate the CSO. */
+       shader = CALLOC_STRUCT(r600_fetch_shader);
+       if (!shader) {
                r600_bytecode_clear(&bc);
                return NULL;
        }
 
-       bytecode = rctx->ws->buffer_map(fetch_shader->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
-       if (bytecode == NULL) {
+       u_suballocator_alloc(rctx->allocator_fetch_shader, fs_size, &shader->offset,
+                            (struct pipe_resource**)&shader->buffer);
+       if (!shader->buffer) {
                r600_bytecode_clear(&bc);
-               pipe_resource_reference((struct pipe_resource**)&fetch_shader, NULL);
+               FREE(shader);
                return NULL;
        }
 
+       bytecode = rctx->ws->buffer_map(shader->buffer->cs_buf, rctx->cs,
+                                       PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED);
+       bytecode += shader->offset / 4;
+
        if (R600_BIG_ENDIAN) {
                for (i = 0; i < fs_size / 4; ++i) {
                        bytecode[i] = bswap_32(bc.bytecode[i]);
@@ -2896,11 +2899,10 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
        } else {
                memcpy(bytecode, bc.bytecode, fs_size);
        }
+       rctx->ws->buffer_unmap(shader->buffer->cs_buf);
 
-       rctx->ws->buffer_unmap(fetch_shader->cs_buf);
        r600_bytecode_clear(&bc);
-
-       return fetch_shader;
+       return shader;
 }
 
 void r600_bytecode_alu_read(struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1)
index 90289e582abb8524bc1572e63805bd655dd807ce..19147d975469b6ba85071ba5e97fc1d00deb41cc 100644 (file)
@@ -188,6 +188,9 @@ static void r600_destroy_context(struct pipe_context *context)
        if (rctx->allocator_so_filled_size) {
                u_suballocator_destroy(rctx->allocator_so_filled_size);
        }
+       if (rctx->allocator_fetch_shader) {
+               u_suballocator_destroy(rctx->allocator_fetch_shader);
+       }
        util_slab_destroy(&rctx->pool_transfers);
 
        r600_release_command_buffer(&rctx->start_cs_cmd);
@@ -294,6 +297,11 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
         if (!rctx->uploader)
                 goto fail;
 
+       rctx->allocator_fetch_shader = u_suballocator_create(&rctx->context, 64 * 1024, 256,
+                                                            0, PIPE_USAGE_STATIC, FALSE);
+        if (!rctx->allocator_fetch_shader)
+                goto fail;
+
        rctx->allocator_so_filled_size = u_suballocator_create(&rctx->context, 4096, 4,
                                                                0, PIPE_USAGE_STATIC, TRUE);
         if (!rctx->allocator_so_filled_size)
index e707a4adda6015ba8c26e851c964220ee1211cb4..c348c76975b3297e7bb5fde7051913232a07fc65 100644 (file)
@@ -383,6 +383,11 @@ struct r600_scissor_state
        bool                            enable; /* r6xx only */
 };
 
+struct r600_fetch_shader {
+       struct r600_resource            *buffer;
+       unsigned                        offset;
+};
+
 struct r600_context {
        struct pipe_context             context;
        struct r600_screen              *screen;
@@ -391,6 +396,7 @@ struct r600_context {
        struct blitter_context          *blitter;
        struct u_upload_mgr             *uploader;
        struct u_suballocator           *allocator_so_filled_size;
+       struct u_suballocator           *allocator_fetch_shader;
        struct util_slab_mempool        pool_transfers;
 
        /* Hardware info. */
index ab658da812ab4d7635b9f4cdd5b673dce8bb72f0..9bfae4f22993ef2fbfa9d7d3bfd48b155d62fc9d 100644 (file)
@@ -2117,11 +2117,11 @@ static void r600_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600
 {
        struct radeon_winsys_cs *cs = rctx->cs;
        struct r600_cso_state *state = (struct r600_cso_state*)a;
-       struct r600_resource *shader = (struct r600_resource*)state->cso;
+       struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state->cso;
 
-       r600_write_context_reg(cs, R_028894_SQ_PGM_START_FS, 0);
+       r600_write_context_reg(cs, R_028894_SQ_PGM_START_FS, shader->offset >> 8);
        r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
-       r600_write_value(cs, r600_context_bo_reloc(rctx, shader, RADEON_USAGE_READ));
+       r600_write_value(cs, r600_context_bo_reloc(rctx, shader->buffer, RADEON_USAGE_READ));
 }
 
 void r600_init_state_functions(struct r600_context *rctx)
index 66120cad27dd8f2707e1aa4fab7a70ea046c6399..b20f6550c1c170fada8aa94db608be3f2bcb5dc9 100644 (file)
@@ -470,7 +470,9 @@ static void r600_bind_vertex_elements(struct pipe_context *ctx, void *state)
 
 static void r600_delete_vertex_elements(struct pipe_context *ctx, void *state)
 {
-       pipe_resource_reference((struct pipe_resource**)&state, NULL);
+       struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state;
+       pipe_resource_reference((struct pipe_resource**)&shader->buffer, NULL);
+       FREE(shader);
 }
 
 static void r600_set_index_buffer(struct pipe_context *ctx,