From 1f4e48d5b53e73605832971f3fb06cb3402f97a5 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 25 Sep 2014 18:11:24 -0700 Subject: [PATCH] radeonsi/compute: Enable PIPE_SHADER_IR_NATIVE for compute shaders v2 v2: - Drop dependency on LLVM >= 3.5.1 - Rename si_create_shader() to si_shader_binary_read() --- src/gallium/drivers/radeonsi/si_compute.c | 71 +++++++++++---- src/gallium/drivers/radeonsi/si_pipe.c | 4 + src/gallium/drivers/radeonsi/si_shader.c | 104 +++++++++++++--------- src/gallium/drivers/radeonsi/si_shader.h | 7 ++ 4 files changed, 127 insertions(+), 59 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index be644181796..6ddb47896c5 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -23,14 +23,15 @@ */ #include "util/u_memory.h" +#include "radeon/r600_pipe_common.h" +#include "radeon/radeon_elf_util.h" +#include "radeon/radeon_llvm_util.h" #include "radeon/r600_cs.h" #include "si_pipe.h" #include "si_shader.h" #include "sid.h" -#include "radeon/radeon_llvm_util.h" - #define MAX_GLOBAL_BUFFERS 20 #if HAVE_LLVM < 0x0305 #define NUM_USER_SGPRS 2 @@ -44,14 +45,18 @@ struct si_compute { unsigned local_size; unsigned private_size; unsigned input_size; - unsigned num_kernels; - struct si_shader *kernels; + struct radeon_shader_binary binary; + struct si_shader program; unsigned num_user_sgprs; struct r600_resource *input_buffer; struct pipe_resource *global_buffers[MAX_GLOBAL_BUFFERS]; +#if HAVE_LLVM < 0x0306 + unsigned num_kernels; + struct si_shader *kernels; LLVMContextRef llvm_ctx; +#endif }; static void *si_create_compute_state( @@ -61,10 +66,7 @@ static void *si_create_compute_state( struct si_context *sctx = (struct si_context *)ctx; struct si_compute *program = CALLOC_STRUCT(si_compute); const struct pipe_llvm_program_header *header; - const unsigned char *code; - unsigned i; - - program->llvm_ctx = LLVMContextCreate(); + const char *code; header = cso->prog; code = cso->prog + sizeof(struct pipe_llvm_program_header); @@ -74,17 +76,27 @@ static void *si_create_compute_state( program->private_size = cso->req_private_mem; program->input_size = cso->req_input_mem; - program->num_kernels = radeon_llvm_get_num_kernels(program->llvm_ctx, code, - header->num_bytes); - program->kernels = CALLOC(sizeof(struct si_shader), - program->num_kernels); - for (i = 0; i < program->num_kernels; i++) { - LLVMModuleRef mod = radeon_llvm_get_kernel_module(program->llvm_ctx, i, - code, header->num_bytes); - si_compile_llvm(sctx->screen, &program->kernels[i], mod); - LLVMDisposeModule(mod); +#if HAVE_LLVM < 0x0306 + { + unsigned i; + program->llvm_ctx = LLVMContextCreate(); + program->num_kernels = radeon_llvm_get_num_kernels(program->llvm_ctx, + code, header->num_bytes); + program->kernels = CALLOC(sizeof(struct si_shader), + program->num_kernels); + for (i = 0; i < program->num_kernels; i++) { + LLVMModuleRef mod = radeon_llvm_get_kernel_module(program->llvm_ctx, i, + code, header->num_bytes); + si_compile_llvm(sctx->screen, &program->kernels[i], mod); + LLVMDisposeModule(mod); + } } +#else + radeon_elf_read(code, header->num_bytes, &program->binary, true); + si_shader_binary_read(sctx->screen, &program->program, &program->binary); + +#endif program->input_buffer = si_resource_create_custom(sctx->b.b.screen, PIPE_USAGE_IMMUTABLE, program->input_size); @@ -181,10 +193,15 @@ static void si_launch_grid( uint64_t shader_va; unsigned arg_user_sgpr_count = NUM_USER_SGPRS; unsigned i; - struct si_shader *shader = &program->kernels[pc]; + struct si_shader *shader = &program->program; unsigned lds_blocks; unsigned num_waves_for_scratch; +#if HAVE_LLVM < 0x0306 + shader = &program->kernels[pc]; +#endif + + radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0) | PKT3_SHADER_TYPE_S(1)); radeon_emit(cs, 0x80000000); radeon_emit(cs, 0x80000000); @@ -198,6 +215,11 @@ static void si_launch_grid( pm4->compute_pkt = true; +#if HAVE_LLVM >= 0x0306 + /* Read the config information */ + si_shader_binary_read_config(&program->binary, &program->program, pc); +#endif + /* Upload the kernel arguments */ /* The extra num_work_size_bytes are for work group / work item size information */ @@ -290,6 +312,10 @@ static void si_launch_grid( } shader_va = shader->bo->gpu_address; + +#if HAVE_LLVM >= 0x0306 + shader_va += pc; +#endif si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, (shader_va >> 8) & 0xffffffff); si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40); @@ -388,6 +414,7 @@ static void si_delete_compute_state(struct pipe_context *ctx, void* state){ return; } +#if HAVE_LLVM < 0x0306 if (program->kernels) { for (int i = 0; i < program->num_kernels; i++){ if (program->kernels[i].bo){ @@ -400,10 +427,16 @@ static void si_delete_compute_state(struct pipe_context *ctx, void* state){ if (program->llvm_ctx){ LLVMContextDispose(program->llvm_ctx); } +#else + si_shader_destroy(ctx, &program->program); +#endif + pipe_resource_reference( (struct pipe_resource **)&program->input_buffer, NULL); - //And then free the program itself. + FREE(program->binary.code); + FREE(program->binary.config); + FREE(program->binary.rodata); FREE(program); } diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 0577cd2a924..53c83ba3889 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -336,7 +336,11 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu case PIPE_SHADER_COMPUTE: switch (param) { case PIPE_SHADER_CAP_PREFERRED_IR: +#if HAVE_LLVM < 0x0306 return PIPE_SHADER_IR_LLVM; +#else + return PIPE_SHADER_IR_NATIVE; +#endif case PIPE_SHADER_CAP_DOUBLES: return 0; /* XXX: Enable doubles once the compiler can handle them. */ diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index f8e9fbe7bde..40a2f906414 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -33,6 +33,7 @@ #include "gallivm/lp_bld_arit.h" #include "gallivm/lp_bld_flow.h" #include "radeon/radeon_llvm.h" +#include "radeon/radeon_elf_util.h" #include "radeon/radeon_llvm_emit.h" #include "util/u_memory.h" #include "tgsi/tgsi_parse.h" @@ -2500,52 +2501,34 @@ static void preload_ring_buffers(struct si_shader_context *si_shader_ctx) } } -int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader, - LLVMModuleRef mod) +void si_shader_binary_read_config(const struct radeon_shader_binary *binary, + struct si_shader *shader, + unsigned symbol_offset) { - unsigned r; /* llvm_compile result */ unsigned i; - unsigned char *ptr; - struct radeon_shader_binary binary; - bool dump = r600_can_dump_shader(&sscreen->b, - shader->selector ? shader->selector->tokens : NULL); - const char * gpu_family = r600_get_llvm_processor_name(sscreen->b.family); - unsigned code_size; - - /* Use LLVM to compile shader */ - memset(&binary, 0, sizeof(binary)); - r = radeon_llvm_compile(mod, &binary, gpu_family, dump); - - /* Output binary dump if rscreen->debug_flags are set */ - if (dump && ! binary.disassembled) { - fprintf(stderr, "SI CODE:\n"); - for (i = 0; i < binary.code_size; i+=4 ) { - fprintf(stderr, "%02x%02x%02x%02x\n", binary.code[i + 3], - binary.code[i + 2], binary.code[i + 1], - binary.code[i]); - } - } + const unsigned char *config = + radeon_shader_binary_config_start(binary, symbol_offset); /* XXX: We may be able to emit some of these values directly rather than * extracting fields to be emitted later. */ - /* Parse config data in compiled binary */ - for (i = 0; i < binary.config_size; i+= 8) { - unsigned reg = util_le32_to_cpu(*(uint32_t*)(binary.config + i)); - unsigned value = util_le32_to_cpu(*(uint32_t*)(binary.config + i + 4)); + + for (i = 0; i < binary->config_size_per_symbol; i+= 8) { + unsigned reg = util_le32_to_cpu(*(uint32_t*)(config + i)); + unsigned value = util_le32_to_cpu(*(uint32_t*)(config + i + 4)); switch (reg) { case R_00B028_SPI_SHADER_PGM_RSRC1_PS: case R_00B128_SPI_SHADER_PGM_RSRC1_VS: case R_00B228_SPI_SHADER_PGM_RSRC1_GS: case R_00B848_COMPUTE_PGM_RSRC1: - shader->num_sgprs = (G_00B028_SGPRS(value) + 1) * 8; - shader->num_vgprs = (G_00B028_VGPRS(value) + 1) * 4; + shader->num_sgprs = MAX2(shader->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8); + shader->num_vgprs = MAX2(shader->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4); break; case R_00B02C_SPI_SHADER_PGM_RSRC2_PS: - shader->lds_size = G_00B02C_EXTRA_LDS_SIZE(value); + shader->lds_size = MAX2(shader->lds_size, G_00B02C_EXTRA_LDS_SIZE(value)); break; case R_00B84C_COMPUTE_PGM_RSRC2: - shader->lds_size = G_00B84C_LDS_SIZE(value); + shader->lds_size = MAX2(shader->lds_size, G_00B84C_LDS_SIZE(value)); break; case R_0286CC_SPI_PS_INPUT_ENA: shader->spi_ps_input_ena = value; @@ -2561,9 +2544,32 @@ int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader, break; } } +} + +int si_shader_binary_read(struct si_screen *sscreen, + struct si_shader *shader, + const struct radeon_shader_binary *binary) +{ + + unsigned i; + unsigned code_size; + unsigned char *ptr; + bool dump = r600_can_dump_shader(&sscreen->b, + shader->selector ? shader->selector->tokens : NULL); + + if (dump && !binary->disassembled) { + fprintf(stderr, "SI CODE:\n"); + for (i = 0; i < binary->code_size; i+=4 ) { + fprintf(stderr, "@0x%x: %02x%02x%02x%02x\n", i, binary->code[i + 3], + binary->code[i + 2], binary->code[i + 1], + binary->code[i]); + } + } + + si_shader_binary_read_config(binary, shader, 0); /* copy new shader */ - code_size = binary.code_size + binary.rodata_size; + code_size = binary->code_size + binary->rodata_size; r600_resource_reference(&shader->bo, NULL); shader->bo = si_resource_create_custom(&sscreen->b.b, PIPE_USAGE_IMMUTABLE, code_size); @@ -2571,19 +2577,37 @@ int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader, return -ENOMEM; } - ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL, PIPE_TRANSFER_WRITE); - util_memcpy_cpu_to_le32(ptr, binary.code, binary.code_size); - if (binary.rodata_size > 0) { - ptr += binary.code_size; - util_memcpy_cpu_to_le32(ptr, binary.rodata, binary.rodata_size); + + ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL, PIPE_TRANSFER_READ_WRITE); + util_memcpy_cpu_to_le32(ptr, binary->code, binary->code_size); + if (binary->rodata_size > 0) { + ptr += binary->code_size; + util_memcpy_cpu_to_le32(ptr, binary->rodata, binary->rodata_size); } sscreen->b.ws->buffer_unmap(shader->bo->cs_buf); - free(binary.code); - free(binary.config); - free(binary.rodata); + return 0; +} + +int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader, + LLVMModuleRef mod) +{ + int r = 0; + struct radeon_shader_binary binary; + bool dump = r600_can_dump_shader(&sscreen->b, + shader->selector ? shader->selector->tokens : NULL); + memset(&binary, 0, sizeof(binary)); + r = radeon_llvm_compile(mod, &binary, + r600_get_llvm_processor_name(sscreen->b.family), dump); + if (r) { + return r; + } + r = si_shader_binary_read(sscreen, shader, &binary); + FREE(binary.code); + FREE(binary.config); + FREE(binary.rodata); return r; } diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 30e6854031e..5e8c9e6365e 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -33,6 +33,8 @@ #include "tgsi/tgsi_scan.h" #include "si_state.h" +struct radeon_shader_binary; + #define SI_SGPR_RW_BUFFERS 0 /* rings (& stream-out, VS only) */ #define SI_SGPR_CONST 2 #define SI_SGPR_SAMPLER 4 @@ -180,5 +182,10 @@ int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader, LLVMModuleRef mod); void si_shader_destroy(struct pipe_context *ctx, struct si_shader *shader); unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index); +int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader, + const struct radeon_shader_binary *binary); +void si_shader_binary_read_config(const struct radeon_shader_binary *binary, + struct si_shader *shader, + unsigned symbol_offset); #endif -- 2.30.2