From: Tom Stellard Date: Tue, 7 May 2013 02:11:39 +0000 (-0400) Subject: radeonsi/compute: Pass kernel arguments in a buffer v2 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=3d831206a4baf2cd36e91ea81ed45ce3c24d0f88;p=mesa.git radeonsi/compute: Pass kernel arguments in a buffer v2 v2: - Fix memory leak in si_set_constant_buffer() --- diff --git a/src/gallium/drivers/radeonsi/r600_buffer.c b/src/gallium/drivers/radeonsi/r600_buffer.c index cdf9988c6e7..3d295e8a967 100644 --- a/src/gallium/drivers/radeonsi/r600_buffer.c +++ b/src/gallium/drivers/radeonsi/r600_buffer.c @@ -25,6 +25,8 @@ * Corbin Simpson */ +#include + #include "pipe/p_screen.h" #include "util/u_format.h" #include "util/u_math.h" @@ -168,3 +170,30 @@ void r600_upload_index_buffer(struct r600_context *rctx, u_upload_data(rctx->uploader, 0, count * ib->index_size, ib->user_buffer, &ib->offset, &ib->buffer); } + +void r600_upload_const_buffer(struct r600_context *rctx, struct si_resource **rbuffer, + const uint8_t *ptr, unsigned size, + uint32_t *const_offset) +{ + if (R600_BIG_ENDIAN) { + uint32_t *tmpPtr; + unsigned i; + + if (!(tmpPtr = malloc(size))) { + R600_ERR("Failed to allocate BE swap buffer.\n"); + return; + } + + for (i = 0; i < size / 4; ++i) { + tmpPtr[i] = bswap_32(((uint32_t *)ptr)[i]); + } + + u_upload_data(rctx->uploader, 0, size, tmpPtr, const_offset, + (struct pipe_resource**)rbuffer); + + free(tmpPtr); + } else { + u_upload_data(rctx->uploader, 0, size, ptr, const_offset, + (struct pipe_resource**)rbuffer); + } +} diff --git a/src/gallium/drivers/radeonsi/radeonsi_compute.c b/src/gallium/drivers/radeonsi/radeonsi_compute.c index 3fb6eb16fc9..4341ecc0924 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_compute.c +++ b/src/gallium/drivers/radeonsi/radeonsi_compute.c @@ -91,8 +91,11 @@ static void radeonsi_launch_grid( struct r600_context *rctx = (struct r600_context*)ctx; struct si_pipe_compute *program = rctx->cs_shader_state.program; struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); + struct si_resource *input_buffer = NULL; + uint32_t input_offset = 0; + uint64_t input_va; uint64_t shader_va; - unsigned arg_user_sgpr_count; + unsigned arg_user_sgpr_count = 2; unsigned i; struct si_pipe_shader *shader = &program->kernels[pc]; @@ -109,21 +112,16 @@ static void radeonsi_launch_grid( si_pm4_inval_shader_cache(pm4); si_cmd_surface_sync(pm4, pm4->cp_coher_cntl); - arg_user_sgpr_count = program->input_size / 4; - if (program->input_size % 4 != 0) { - arg_user_sgpr_count++; - } + /* Upload the input data */ + r600_upload_const_buffer(rctx, &input_buffer, input, + program->input_size, &input_offset); + input_va = r600_resource_va(ctx->screen, (struct pipe_resource*)input_buffer); + input_va += input_offset; - /* XXX: We should store arguments in memory if we run out of user sgprs. - */ - assert(arg_user_sgpr_count < 16); + si_pm4_add_bo(pm4, input_buffer, RADEON_USAGE_READ); - for (i = 0; i < arg_user_sgpr_count; i++) { - uint32_t *args = (uint32_t*)input; - si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + - (i * 4), - args[i]); - } + si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0, input_va); + si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 4, S_008F04_BASE_ADDRESS_HI (input_va >> 32) | S_008F04_STRIDE(0)); si_pm4_set_reg(pm4, R_00B810_COMPUTE_START_X, 0); si_pm4_set_reg(pm4, R_00B814_COMPUTE_START_Y, 0); diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 82c16411acd..0dc74bc2c82 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -24,8 +24,6 @@ * Christian König */ -#include - #include "util/u_memory.h" #include "util/u_framebuffer.h" #include "util/u_blitter.h" @@ -2541,25 +2539,9 @@ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint i ptr = input->user_buffer; if (ptr) { - /* Upload the user buffer. */ - if (R600_BIG_ENDIAN) { - uint32_t *tmpPtr; - unsigned i, size = input->buffer_size; - - if (!(tmpPtr = malloc(size))) { - R600_ERR("Failed to allocate BE swap buffer.\n"); - return; - } - - for (i = 0; i < size / 4; ++i) { - tmpPtr[i] = bswap_32(((uint32_t *)ptr)[i]); - } - - u_upload_data(rctx->uploader, 0, size, tmpPtr, &cb->buffer_offset, &cb->buffer); - free(tmpPtr); - } else { - u_upload_data(rctx->uploader, 0, input->buffer_size, ptr, &cb->buffer_offset, &cb->buffer); - } + r600_upload_const_buffer(rctx, + (struct si_resource**)&cb->buffer, ptr, + cb->buffer_size, &cb->buffer_offset); } else { /* Setup the hw buffer. */ cb->buffer_offset = input->buffer_offset;