From b57eba365496307c35373f5c303996731b994f25 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 13 Sep 2012 14:59:50 +0000 Subject: [PATCH] r600g: Handle multiple kernels in the same program v2 v2: - Use pc parameter of launch_grid --- src/gallium/drivers/r600/evergreen_compute.c | 44 ++++++++++++------- .../drivers/r600/evergreen_compute_internal.h | 18 +++++--- src/gallium/drivers/r600/llvm_wrapper.cpp | 38 ++++++++++++++++ src/gallium/drivers/r600/llvm_wrapper.h | 4 ++ src/gallium/drivers/r600/r600_pipe.h | 1 + 5 files changed, 84 insertions(+), 21 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index c7b9d3314ac..b7c734512e2 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -123,11 +123,11 @@ void *evergreen_create_compute_state( { struct r600_context *ctx = (struct r600_context *)ctx_; struct r600_pipe_compute *shader = CALLOC_STRUCT(r600_pipe_compute); - void *p; #ifdef HAVE_OPENCL const struct pipe_llvm_program_header * header; const unsigned char * code; + unsigned i; COMPUTE_DBG("*** evergreen_create_compute_state\n"); @@ -144,18 +144,15 @@ void *evergreen_create_compute_state( shader->input_size = cso->req_input_mem; #ifdef HAVE_OPENCL - shader->mod = llvm_parse_bitcode(code, header->num_bytes); + shader->num_kernels = llvm_get_num_kernels(code, header->num_bytes); + shader->kernels = CALLOC(sizeof(struct r600_kernel), shader->num_kernels); - r600_compute_shader_create(ctx_, shader->mod, &shader->bc); + for (i = 0; i < shader->num_kernels; i++) { + struct r600_kernel *kernel = &shader->kernels[i]; + kernel->llvm_module = llvm_get_kernel_module(i, code, + header->num_bytes); + } #endif - shader->shader_code_bo = r600_compute_buffer_alloc_vram(ctx->screen, - shader->bc.ndw * 4); - - p = ctx->ws->buffer_map(shader->shader_code_bo->cs_buf, ctx->cs, - PIPE_TRANSFER_WRITE); - - memcpy(p, shader->bc.bytecode, shader->bc.ndw * 4); - ctx->ws->buffer_unmap(shader->shader_code_bo->cs_buf); return shader; } @@ -456,20 +453,21 @@ void evergreen_emit_cs_shader( struct r600_cs_shader_state *state = (struct r600_cs_shader_state*)atom; struct r600_pipe_compute *shader = state->shader; + struct r600_kernel *kernel = &shader->kernels[state->kernel_index]; struct radeon_winsys_cs *cs = rctx->cs; uint64_t va; - va = r600_resource_va(&rctx->screen->screen, &shader->shader_code_bo->b.b); + va = r600_resource_va(&rctx->screen->screen, &kernel->code_bo->b.b); r600_write_compute_context_reg_seq(cs, R_0288D0_SQ_PGM_START_LS, 3); r600_write_value(cs, va >> 8); /* R_0288D0_SQ_PGM_START_LS */ r600_write_value(cs, /* R_0288D4_SQ_PGM_RESOURCES_LS */ - S_0288D4_NUM_GPRS(shader->bc.ngpr) - | S_0288D4_STACK_SIZE(shader->bc.nstack)); + S_0288D4_NUM_GPRS(kernel->bc.ngpr) + | S_0288D4_STACK_SIZE(kernel->bc.nstack)); r600_write_value(cs, 0); /* R_0288D8_SQ_PGM_RESOURCES_LS_2 */ r600_write_value(cs, PKT3C(PKT3_NOP, 0, 0)); - r600_write_value(cs, r600_context_bo_reloc(rctx, shader->shader_code_bo, + r600_write_value(cs, r600_context_bo_reloc(rctx, kernel->code_bo, RADEON_USAGE_READ)); rctx->flags |= R600_CONTEXT_SHADERCONST_FLUSH; @@ -481,9 +479,25 @@ static void evergreen_launch_grid( uint32_t pc, const void *input) { struct r600_context *ctx = (struct r600_context *)ctx_; + struct r600_pipe_compute *shader = ctx->cs_shader_state.shader; + void *p; COMPUTE_DBG("*** evergreen_launch_grid: pc = %u\n", pc); +#ifdef HAVE_OPENCL + if (!shader->kernels[pc].code_bo) { + struct r600_kernel *kernel = &shader->kernels[pc]; + r600_compute_shader_create(ctx_, kernel->llvm_module, &kernel->bc); + kernel->code_bo = r600_compute_buffer_alloc_vram(ctx->screen, + kernel->bc.ndw * 4); + p = ctx->ws->buffer_map(kernel->code_bo->cs_buf, ctx->cs, + PIPE_TRANSFER_WRITE); + memcpy(p, kernel->bc.bytecode, kernel->bc.ndw * 4); + ctx->ws->buffer_unmap(kernel->code_bo->cs_buf); + } +#endif + + ctx->cs_shader_state.kernel_index = pc; evergreen_compute_upload_input(ctx_, block_layout, grid_layout, input); compute_emit_cs(ctx, block_layout, grid_layout); } diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.h b/src/gallium/drivers/r600/evergreen_compute_internal.h index 2bef261ebb5..328ce262a7a 100644 --- a/src/gallium/drivers/r600/evergreen_compute_internal.h +++ b/src/gallium/drivers/r600/evergreen_compute_internal.h @@ -26,6 +26,7 @@ #define EVERGREEN_COMPUTE_INTERNAL_H #include "compute_memory_pool.h" +#include "r600_asm.h" enum evergreen_compute_resources { @@ -67,21 +68,26 @@ struct number_type_and_format { unsigned num_format_all; }; +struct r600_kernel { + unsigned count; +#ifdef HAVE_OPENCL + LLVMModuleRef llvm_module; +#endif + struct r600_resource *code_bo; + struct r600_bytecode bc; +}; + struct r600_pipe_compute { struct r600_context *ctx; - struct r600_bytecode bc; - struct tgsi_token *tokens; + unsigned num_kernels; + struct r600_kernel *kernels; struct evergreen_compute_resource *resources; unsigned local_size; unsigned private_size; unsigned input_size; -#ifdef HAVE_OPENCL - LLVMModuleRef mod; -#endif struct r600_resource *kernel_param; - struct r600_resource *shader_code_bo; }; int evergreen_compute_get_gpu_format(struct number_type_and_format* fmt, struct r600_resource *bo); ///get hw format from resource, return 0 on faliure, nonzero on success diff --git a/src/gallium/drivers/r600/llvm_wrapper.cpp b/src/gallium/drivers/r600/llvm_wrapper.cpp index 174fb013c83..81f53974d11 100644 --- a/src/gallium/drivers/r600/llvm_wrapper.cpp +++ b/src/gallium/drivers/r600/llvm_wrapper.cpp @@ -1,9 +1,11 @@ #include #include #include +#include #include #include #include +#include #include "llvm_wrapper.h" @@ -17,3 +19,39 @@ extern "C" LLVMModuleRef llvm_parse_bitcode(const unsigned char * bitcode, unsig M.reset(llvm::ParseIR(buffer, Err, llvm::getGlobalContext())); return wrap(M.take()); } + +extern "C" void llvm_strip_unused_kernels(LLVMModuleRef mod, const char *kernel_name) +{ + llvm::Module *M = llvm::unwrap(mod); + std::vector export_list; + export_list.push_back(kernel_name); + llvm::PassManager PM; + PM.add(llvm::createInternalizePass(export_list)); + PM.add(llvm::createGlobalDCEPass()); + PM.run(*M); +} + +extern "C" unsigned llvm_get_num_kernels(const unsigned char *bitcode, + unsigned bitcode_len) +{ + LLVMModuleRef mod = llvm_parse_bitcode(bitcode, bitcode_len); + llvm::Module *M = llvm::unwrap(mod); + const llvm::NamedMDNode *kernel_node + = M->getNamedMetadata("opencl.kernels"); + unsigned kernel_count = kernel_node->getNumOperands(); + delete M; + return kernel_count; +} + +extern "C" LLVMModuleRef llvm_get_kernel_module(unsigned index, + const unsigned char *bitcode, unsigned bitcode_len) +{ + LLVMModuleRef mod = llvm_parse_bitcode(bitcode, bitcode_len); + llvm::Module *M = llvm::unwrap(mod); + const llvm::NamedMDNode *kernel_node = + M->getNamedMetadata("opencl.kernels"); + const char* kernel_name = kernel_node->getOperand(index)-> + getOperand(0)->getName().data(); + llvm_strip_unused_kernels(mod, kernel_name); + return mod; +} diff --git a/src/gallium/drivers/r600/llvm_wrapper.h b/src/gallium/drivers/r600/llvm_wrapper.h index 3a696455cdf..4b9b93f23c6 100644 --- a/src/gallium/drivers/r600/llvm_wrapper.h +++ b/src/gallium/drivers/r600/llvm_wrapper.h @@ -8,6 +8,10 @@ extern "C" { #endif LLVMModuleRef llvm_parse_bitcode(const unsigned char * bitcode, unsigned bitcode_len); +void llvm_strip_unused_kernels(LLVMModuleRef mod, const char *kernel_name); +unsigned llvm_get_num_kernels(const unsigned char *bitcode, unsigned bitcode_len); +LLVMModuleRef llvm_get_kernel_module(unsigned index, + const unsigned char *bitcode, unsigned bitcode_len); #ifdef __cplusplus } diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 979cb438938..68800609979 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -123,6 +123,7 @@ struct r600_clip_state { struct r600_cs_shader_state { struct r600_atom atom; + unsigned kernel_index; struct r600_pipe_compute *shader; }; -- 2.30.2