radeonsi/compute: Pass kernel arguments in a buffer v2
authorTom Stellard <thomas.stellard@amd.com>
Tue, 7 May 2013 02:11:39 +0000 (22:11 -0400)
committerTom Stellard <thomas.stellard@amd.com>
Mon, 3 Jun 2013 18:03:08 +0000 (14:03 -0400)
v2:
  - Fix memory leak in si_set_constant_buffer()

src/gallium/drivers/radeonsi/r600_buffer.c
src/gallium/drivers/radeonsi/radeonsi_compute.c
src/gallium/drivers/radeonsi/si_state.c

index cdf9988c6e78d698026ecb85f394c5878bc92cf7..3d295e8a967a3119aef58b93e2a8b85d16f8157c 100644 (file)
@@ -25,6 +25,8 @@
  *      Corbin Simpson <MostAwesomeDude@gmail.com>
  */
 
+#include <byteswap.h>
+
 #include "pipe/p_screen.h"
 #include "util/u_format.h"
 #include "util/u_math.h"
@@ -168,3 +170,30 @@ void r600_upload_index_buffer(struct r600_context *rctx,
        u_upload_data(rctx->uploader, 0, count * ib->index_size,
                      ib->user_buffer, &ib->offset, &ib->buffer);
 }
+
+void r600_upload_const_buffer(struct r600_context *rctx, struct si_resource **rbuffer,
+                       const uint8_t *ptr, unsigned size,
+                       uint32_t *const_offset)
+{
+       if (R600_BIG_ENDIAN) {
+               uint32_t *tmpPtr;
+               unsigned i;
+
+               if (!(tmpPtr = malloc(size))) {
+                       R600_ERR("Failed to allocate BE swap buffer.\n");
+                       return;
+               }
+
+               for (i = 0; i < size / 4; ++i) {
+                       tmpPtr[i] = bswap_32(((uint32_t *)ptr)[i]);
+               }
+
+               u_upload_data(rctx->uploader, 0, size, tmpPtr, const_offset,
+                               (struct pipe_resource**)rbuffer);
+
+               free(tmpPtr);
+       } else {
+               u_upload_data(rctx->uploader, 0, size, ptr, const_offset,
+                                       (struct pipe_resource**)rbuffer);
+       }
+}
index 3fb6eb16fc902484dba2e9cc7624145a2b6c8f43..4341ecc0924e204684d0aa67a70f80d6f801d8db 100644 (file)
@@ -91,8 +91,11 @@ static void radeonsi_launch_grid(
        struct r600_context *rctx = (struct r600_context*)ctx;
        struct si_pipe_compute *program = rctx->cs_shader_state.program;
        struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
+       struct si_resource *input_buffer = NULL;
+       uint32_t input_offset = 0;
+       uint64_t input_va;
        uint64_t shader_va;
-       unsigned arg_user_sgpr_count;
+       unsigned arg_user_sgpr_count = 2;
        unsigned i;
        struct si_pipe_shader *shader = &program->kernels[pc];
 
@@ -109,21 +112,16 @@ static void radeonsi_launch_grid(
        si_pm4_inval_shader_cache(pm4);
        si_cmd_surface_sync(pm4, pm4->cp_coher_cntl);
 
-       arg_user_sgpr_count = program->input_size / 4;
-       if (program->input_size % 4 != 0) {
-               arg_user_sgpr_count++;
-       }
+       /* Upload the input data */
+       r600_upload_const_buffer(rctx, &input_buffer, input,
+                                       program->input_size, &input_offset);
+       input_va = r600_resource_va(ctx->screen, (struct pipe_resource*)input_buffer);
+       input_va += input_offset;
 
-       /* XXX: We should store arguments in memory if we run out of user sgprs.
-        */
-       assert(arg_user_sgpr_count < 16);
+       si_pm4_add_bo(pm4, input_buffer, RADEON_USAGE_READ);
 
-       for (i = 0; i < arg_user_sgpr_count; i++) {
-               uint32_t *args = (uint32_t*)input;
-               si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 +
-                                       (i * 4),
-                                       args[i]);
-       }
+       si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0, input_va);
+       si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 4, S_008F04_BASE_ADDRESS_HI (input_va >> 32) | S_008F04_STRIDE(0));
 
        si_pm4_set_reg(pm4, R_00B810_COMPUTE_START_X, 0);
        si_pm4_set_reg(pm4, R_00B814_COMPUTE_START_Y, 0);
index 82c16411acd9509eca6beee1541863df454b2b9a..0dc74bc2c82877f61f38acae37c83b4a45685ca9 100644 (file)
@@ -24,8 +24,6 @@
  *      Christian König <christian.koenig@amd.com>
  */
 
-#include <byteswap.h>
-
 #include "util/u_memory.h"
 #include "util/u_framebuffer.h"
 #include "util/u_blitter.h"
@@ -2541,25 +2539,9 @@ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint i
        ptr = input->user_buffer;
 
        if (ptr) {
-               /* Upload the user buffer. */
-               if (R600_BIG_ENDIAN) {
-                       uint32_t *tmpPtr;
-                       unsigned i, size = input->buffer_size;
-
-                       if (!(tmpPtr = malloc(size))) {
-                               R600_ERR("Failed to allocate BE swap buffer.\n");
-                               return;
-                       }
-
-                       for (i = 0; i < size / 4; ++i) {
-                               tmpPtr[i] = bswap_32(((uint32_t *)ptr)[i]);
-                       }
-
-                       u_upload_data(rctx->uploader, 0, size, tmpPtr, &cb->buffer_offset, &cb->buffer);
-                       free(tmpPtr);
-               } else {
-                       u_upload_data(rctx->uploader, 0, input->buffer_size, ptr, &cb->buffer_offset, &cb->buffer);
-               }
+               r600_upload_const_buffer(rctx,
+                               (struct si_resource**)&cb->buffer, ptr,
+                               cb->buffer_size, &cb->buffer_offset);
        } else {
                /* Setup the hw buffer. */
                cb->buffer_offset = input->buffer_offset;