From fa07f4b68a20e54e1766876203b11a8b90ff120f Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 25 Sep 2014 18:10:44 -0700 Subject: [PATCH] r600g/compute: Enable PIPE_SHADER_IR_NATIVE for compute shaders v2 v2: - Drop dependency on LLVM >= 3.5.1 --- src/gallium/drivers/r600/evergreen_compute.c | 167 +++++++++++------- .../drivers/r600/evergreen_compute_internal.h | 12 +- src/gallium/drivers/r600/r600_llvm.c | 71 +++++--- src/gallium/drivers/r600/r600_llvm.h | 10 ++ src/gallium/drivers/r600/r600_pipe.c | 4 + src/gallium/drivers/r600/r600_pipe.h | 1 + src/gallium/drivers/radeon/radeon_llvm_util.c | 6 +- src/gallium/drivers/radeon/radeon_llvm_util.h | 6 +- 8 files changed, 180 insertions(+), 97 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 38b78c7dfcb..7a17d1ee089 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -49,6 +49,7 @@ #ifdef HAVE_OPENCL #include "radeon_llvm_util.h" #endif +#include "radeon_elf_util.h" #include /** @@ -198,18 +199,42 @@ void *evergreen_create_compute_state( { struct r600_context *ctx = (struct r600_context *)ctx_; struct r600_pipe_compute *shader = CALLOC_STRUCT(r600_pipe_compute); - -#ifdef HAVE_OPENCL const struct pipe_llvm_program_header * header; - const unsigned char * code; - unsigned i; - - shader->llvm_ctx = LLVMContextCreate(); + const char *code; + void *p; + boolean use_kill; COMPUTE_DBG(ctx->screen, "*** evergreen_create_compute_state\n"); - header = cso->prog; code = cso->prog + sizeof(struct pipe_llvm_program_header); +#if HAVE_LLVM < 0x0306 +#ifdef HAVE_OPENCL + (void)use_kill; + (void)p; + shader->llvm_ctx = LLVMContextCreate(); + shader->num_kernels = radeon_llvm_get_num_kernels(shader->llvm_ctx, + code, header->num_bytes); + shader->kernels = CALLOC(sizeof(struct r600_kernel), + shader->num_kernels); + { + unsigned i; + for (i = 0; i < shader->num_kernels; i++) { + struct r600_kernel *kernel = &shader->kernels[i]; + kernel->llvm_module = radeon_llvm_get_kernel_module( + shader->llvm_ctx, i, code, header->num_bytes); + } + } +#endif +#else + memset(&shader->binary, 0, sizeof(shader->binary)); + radeon_elf_read(code, header->num_bytes, &shader->binary, true); + r600_create_shader(&shader->bc, &shader->binary, &use_kill); + + shader->code_bo = r600_compute_buffer_alloc_vram(ctx->screen, + shader->bc.ndw * 4); + p = r600_buffer_map_sync_with_rings(&ctx->b, shader->code_bo, PIPE_TRANSFER_WRITE); + memcpy(p, shader->bc.bytecode, shader->bc.ndw * 4); + ctx->b.ws->buffer_unmap(shader->code_bo->cs_buf); #endif shader->ctx = (struct r600_context*)ctx; @@ -217,17 +242,6 @@ void *evergreen_create_compute_state( shader->private_size = cso->req_private_mem; shader->input_size = cso->req_input_mem; -#ifdef HAVE_OPENCL - shader->num_kernels = radeon_llvm_get_num_kernels(shader->llvm_ctx, code, - header->num_bytes); - shader->kernels = CALLOC(sizeof(struct r600_kernel), shader->num_kernels); - - for (i = 0; i < shader->num_kernels; i++) { - struct r600_kernel *kernel = &shader->kernels[i]; - kernel->llvm_module = radeon_llvm_get_kernel_module(shader->llvm_ctx, i, - code, header->num_bytes); - } -#endif return shader; } @@ -238,14 +252,6 @@ void evergreen_delete_compute_state(struct pipe_context *ctx, void* state) if (!shader) return; - FREE(shader->kernels); - -#ifdef HAVE_OPENCL - if (shader->llvm_ctx){ - LLVMContextDispose(shader->llvm_ctx); - } -#endif - FREE(shader); } @@ -347,7 +353,13 @@ static void evergreen_emit_direct_dispatch( unsigned wave_divisor = (16 * num_pipes); int group_size = 1; int grid_size = 1; - unsigned lds_size = shader->local_size / 4 + shader->active_kernel->bc.nlds_dw; + unsigned lds_size = shader->local_size / 4 + +#if HAVE_LLVM < 0x0306 + shader->active_kernel->bc.nlds_dw; +#else + shader->bc.nlds_dw; +#endif + /* Calculate group_size/grid_size */ for (i = 0; i < 3; i++) { @@ -520,19 +532,34 @@ void evergreen_emit_cs_shader( struct r600_cs_shader_state *state = (struct r600_cs_shader_state*)atom; struct r600_pipe_compute *shader = state->shader; - struct r600_kernel *kernel = &shader->kernels[state->kernel_index]; struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + uint64_t va; + struct r600_resource *code_bo; + unsigned ngpr, nstack; + +#if HAVE_LLVM < 0x0306 + struct r600_kernel *kernel = &shader->kernels[state->kernel_index]; + code_bo = kernel->code_bo; + va = kernel->code_bo->gpu_address; + ngpr = kernel->bc.ngpr; + nstack = kernel->bc.nstack; +#else + code_bo = shader->code_bo; + va = shader->code_bo->gpu_address + state->pc; + ngpr = shader->bc.ngpr; + nstack = shader->bc.nstack; +#endif r600_write_compute_context_reg_seq(cs, R_0288D0_SQ_PGM_START_LS, 3); - radeon_emit(cs, kernel->code_bo->gpu_address >> 8); /* R_0288D0_SQ_PGM_START_LS */ + radeon_emit(cs, va >> 8); /* R_0288D0_SQ_PGM_START_LS */ radeon_emit(cs, /* R_0288D4_SQ_PGM_RESOURCES_LS */ - S_0288D4_NUM_GPRS(kernel->bc.ngpr) - | S_0288D4_STACK_SIZE(kernel->bc.nstack)); + S_0288D4_NUM_GPRS(ngpr) + | S_0288D4_STACK_SIZE(nstack)); radeon_emit(cs, 0); /* R_0288D8_SQ_PGM_RESOURCES_LS_2 */ radeon_emit(cs, PKT3C(PKT3_NOP, 0, 0)); radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, - kernel->code_bo, RADEON_USAGE_READ, + code_bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA)); } @@ -542,46 +569,54 @@ static void evergreen_launch_grid( uint32_t pc, const void *input) { struct r600_context *ctx = (struct r600_context *)ctx_; - struct r600_pipe_compute *shader = ctx->cs_shader_state.shader; - struct r600_kernel *kernel = &shader->kernels[pc]; - - COMPUTE_DBG(ctx->screen, "*** evergreen_launch_grid: pc = %u\n", pc); + boolean use_kill; +#if HAVE_LLVM < 0x0306 #ifdef HAVE_OPENCL - - if (!kernel->code_bo) { - void *p; - struct r600_bytecode *bc = &kernel->bc; - LLVMModuleRef mod = kernel->llvm_module; - boolean use_kill = false; - bool dump = (ctx->screen->b.debug_flags & DBG_CS) != 0; - unsigned use_sb = ctx->screen->b.debug_flags & DBG_SB_CS; - unsigned sb_disasm = use_sb || - (ctx->screen->b.debug_flags & DBG_SB_DISASM); - - r600_bytecode_init(bc, ctx->b.chip_class, ctx->b.family, - ctx->screen->has_compressed_msaa_texturing); - bc->type = TGSI_PROCESSOR_COMPUTE; - bc->isa = ctx->isa; - r600_llvm_compile(mod, ctx->b.family, bc, &use_kill, dump); - - if (dump && !sb_disasm) { - r600_bytecode_disasm(bc); - } else if ((dump && sb_disasm) || use_sb) { - if (r600_sb_bytecode_process(ctx, bc, NULL, dump, use_sb)) - R600_ERR("r600_sb_bytecode_process failed!\n"); - } - - kernel->code_bo = r600_compute_buffer_alloc_vram(ctx->screen, - kernel->bc.ndw * 4); - p = r600_buffer_map_sync_with_rings(&ctx->b, kernel->code_bo, PIPE_TRANSFER_WRITE); - memcpy(p, kernel->bc.bytecode, kernel->bc.ndw * 4); - ctx->b.ws->buffer_unmap(kernel->code_bo->cs_buf); - } + struct r600_kernel *kernel = &shader->kernels[pc]; + (void)use_kill; + if (!kernel->code_bo) { + void *p; + struct r600_bytecode *bc = &kernel->bc; + LLVMModuleRef mod = kernel->llvm_module; + boolean use_kill = false; + bool dump = (ctx->screen->b.debug_flags & DBG_CS) != 0; + unsigned use_sb = ctx->screen->b.debug_flags & DBG_SB_CS; + unsigned sb_disasm = use_sb || + (ctx->screen->b.debug_flags & DBG_SB_DISASM); + + r600_bytecode_init(bc, ctx->b.chip_class, ctx->b.family, + ctx->screen->has_compressed_msaa_texturing); + bc->type = TGSI_PROCESSOR_COMPUTE; + bc->isa = ctx->isa; + r600_llvm_compile(mod, ctx->b.family, bc, &use_kill, dump); + + if (dump && !sb_disasm) { + r600_bytecode_disasm(bc); + } else if ((dump && sb_disasm) || use_sb) { + if (r600_sb_bytecode_process(ctx, bc, NULL, dump, use_sb)) + R600_ERR("r600_sb_bytecode_process failed!\n"); + } + + kernel->code_bo = r600_compute_buffer_alloc_vram(ctx->screen, + kernel->bc.ndw * 4); + p = r600_buffer_map_sync_with_rings(&ctx->b, kernel->code_bo, PIPE_TRANSFER_WRITE); + memcpy(p, kernel->bc.bytecode, kernel->bc.ndw * 4); + ctx->b.ws->buffer_unmap(kernel->code_bo->cs_buf); + } #endif shader->active_kernel = kernel; ctx->cs_shader_state.kernel_index = pc; +#else + ctx->cs_shader_state.pc = pc; + /* Get the config information for this kernel. */ + r600_shader_binary_read_config(&shader->binary, &shader->bc, pc, &use_kill); +#endif + + COMPUTE_DBG(ctx->screen, "*** evergreen_launch_grid: pc = %u\n", pc); + + evergreen_compute_upload_input(ctx_, block_layout, grid_layout, input); compute_emit_cs(ctx, block_layout, grid_layout); } diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.h b/src/gallium/drivers/r600/evergreen_compute_internal.h index 0929d8dcf27..95593dd8e13 100644 --- a/src/gallium/drivers/r600/evergreen_compute_internal.h +++ b/src/gallium/drivers/r600/evergreen_compute_internal.h @@ -27,6 +27,8 @@ #include "r600_asm.h" +#if HAVE_LLVM < 0x0306 + struct r600_kernel { unsigned count; #ifdef HAVE_OPENCL @@ -36,13 +38,21 @@ struct r600_kernel { struct r600_bytecode bc; }; +#endif + struct r600_pipe_compute { struct r600_context *ctx; +#if HAVE_LLVM < 0x0306 unsigned num_kernels; struct r600_kernel *kernels; - struct r600_kernel *active_kernel; +#endif + + struct radeon_shader_binary binary; + struct r600_resource *code_bo; + struct r600_bytecode bc; + unsigned local_size; unsigned private_size; unsigned input_size; diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c index 766141968a0..c19693a03e6 100644 --- a/src/gallium/drivers/r600/r600_llvm.c +++ b/src/gallium/drivers/r600/r600_llvm.c @@ -13,8 +13,9 @@ #include "r600_opcodes.h" #include "r600_shader.h" #include "r600_pipe.h" -#include "radeon/radeon_llvm.h" -#include "radeon/radeon_llvm_emit.h" +#include "radeon_llvm.h" +#include "radeon_llvm_emit.h" +#include "radeon_elf_util.h" #include @@ -818,31 +819,20 @@ LLVMModuleRef r600_tgsi_llvm( #define R_028868_SQ_PGM_RESOURCES_VS 0x028868 #define R_028850_SQ_PGM_RESOURCES_PS 0x028850 -unsigned r600_llvm_compile( - LLVMModuleRef mod, - enum radeon_family family, - struct r600_bytecode *bc, - boolean *use_kill, - unsigned dump) +void r600_shader_binary_read_config(const struct radeon_shader_binary *binary, + struct r600_bytecode *bc, + uint64_t symbol_offset, + boolean *use_kill) { - unsigned r; - struct radeon_shader_binary binary; - const char * gpu_family = r600_get_llvm_processor_name(family); unsigned i; + const unsigned char *config = + radeon_shader_binary_config_start(binary, symbol_offset); - memset(&binary, 0, sizeof(struct radeon_shader_binary)); - r = radeon_llvm_compile(mod, &binary, gpu_family, dump); - - assert(binary.code_size % 4 == 0); - bc->bytecode = CALLOC(1, binary.code_size); - memcpy(bc->bytecode, binary.code, binary.code_size); - bc->ndw = binary.code_size / 4; - - for (i = 0; i < binary.config_size; i+= 8) { + for (i = 0; i < binary->config_size_per_symbol; i+= 8) { unsigned reg = - util_le32_to_cpu(*(uint32_t*)(binary.config + i)); + util_le32_to_cpu(*(uint32_t*)(config + i)); unsigned value = - util_le32_to_cpu(*(uint32_t*)(binary.config + i + 4)); + util_le32_to_cpu(*(uint32_t*)(config + i + 4)); switch (reg) { /* R600 / R700 */ case R_028850_SQ_PGM_RESOURCES_PS: @@ -851,8 +841,8 @@ unsigned r600_llvm_compile( case R_028844_SQ_PGM_RESOURCES_PS: case R_028860_SQ_PGM_RESOURCES_VS: case R_0288D4_SQ_PGM_RESOURCES_LS: - bc->ngpr = G_028844_NUM_GPRS(value); - bc->nstack = G_028844_STACK_SIZE(value); + bc->ngpr = MAX2(bc->ngpr, G_028844_NUM_GPRS(value)); + bc->nstack = MAX2(bc->nstack, G_028844_STACK_SIZE(value)); break; case R_02880C_DB_SHADER_CONTROL: *use_kill = G_02880C_KILL_ENABLE(value); @@ -863,6 +853,39 @@ unsigned r600_llvm_compile( } } +} + +unsigned r600_create_shader(struct r600_bytecode *bc, + const struct radeon_shader_binary *binary, + boolean *use_kill) + +{ + assert(binary->code_size % 4 == 0); + bc->bytecode = CALLOC(1, binary->code_size); + memcpy(bc->bytecode, binary->code, binary->code_size); + bc->ndw = binary->code_size / 4; + + r600_shader_binary_read_config(binary, bc, 0, use_kill); + + return 0; +} + +unsigned r600_llvm_compile( + LLVMModuleRef mod, + enum radeon_family family, + struct r600_bytecode *bc, + boolean *use_kill, + unsigned dump) +{ + unsigned r; + struct radeon_shader_binary binary; + const char * gpu_family = r600_get_llvm_processor_name(family); + + memset(&binary, 0, sizeof(struct radeon_shader_binary)); + r = radeon_llvm_compile(mod, &binary, gpu_family, dump); + + r = r600_create_shader(bc, &binary, use_kill); + FREE(binary.code); FREE(binary.config); diff --git a/src/gallium/drivers/r600/r600_llvm.h b/src/gallium/drivers/r600/r600_llvm.h index 3840a5a2933..9b5304d9fcb 100644 --- a/src/gallium/drivers/r600/r600_llvm.h +++ b/src/gallium/drivers/r600/r600_llvm.h @@ -10,6 +10,7 @@ struct r600_bytecode; struct r600_shader_ctx; struct radeon_llvm_context; +struct radeon_shader_binary; enum radeon_family; LLVMModuleRef r600_tgsi_llvm( @@ -23,6 +24,15 @@ unsigned r600_llvm_compile( boolean *use_kill, unsigned dump); +unsigned r600_create_shader(struct r600_bytecode *bc, + const struct radeon_shader_binary *binary, + boolean *use_kill); + +void r600_shader_binary_read_config(const struct radeon_shader_binary *binary, + struct r600_bytecode *bc, + uint64_t symbol_offset, + boolean *use_kill); + #endif /* defined R600_USE_LLVM || defined HAVE_OPENCL */ #endif /* R600_LLVM_H */ diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index c86daa6c249..0b571e45e9b 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -472,7 +472,11 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e return 16; case PIPE_SHADER_CAP_PREFERRED_IR: if (shader == PIPE_SHADER_COMPUTE) { +#if HAVE_LLVM < 0x0306 return PIPE_SHADER_IR_LLVM; +#else + return PIPE_SHADER_IR_NATIVE; +#endif } else { return PIPE_SHADER_IR_TGSI; } diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index fa9d34b0d71..40b0328ea20 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -146,6 +146,7 @@ struct r600_clip_state { struct r600_cs_shader_state { struct r600_atom atom; unsigned kernel_index; + unsigned pc; struct r600_pipe_compute *shader; }; diff --git a/src/gallium/drivers/radeon/radeon_llvm_util.c b/src/gallium/drivers/radeon/radeon_llvm_util.c index ec1155923fe..0dfd9ad4867 100644 --- a/src/gallium/drivers/radeon/radeon_llvm_util.c +++ b/src/gallium/drivers/radeon/radeon_llvm_util.c @@ -34,7 +34,7 @@ #include LLVMModuleRef radeon_llvm_parse_bitcode(LLVMContextRef ctx, - const unsigned char * bitcode, unsigned bitcode_len) + const char * bitcode, unsigned bitcode_len) { LLVMMemoryBufferRef buf; LLVMModuleRef module; @@ -47,7 +47,7 @@ LLVMModuleRef radeon_llvm_parse_bitcode(LLVMContextRef ctx, } unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx, - const unsigned char *bitcode, unsigned bitcode_len) + const char *bitcode, unsigned bitcode_len) { LLVMModuleRef mod = radeon_llvm_parse_bitcode(ctx, bitcode, bitcode_len); return LLVMGetNamedMetadataNumOperands(mod, "opencl.kernels"); @@ -88,7 +88,7 @@ static void radeon_llvm_optimize(LLVMModuleRef mod) } LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index, - const unsigned char *bitcode, unsigned bitcode_len) + const char *bitcode, unsigned bitcode_len) { LLVMModuleRef mod; unsigned num_kernels; diff --git a/src/gallium/drivers/radeon/radeon_llvm_util.h b/src/gallium/drivers/radeon/radeon_llvm_util.h index 733c329e99e..cc1932aef47 100644 --- a/src/gallium/drivers/radeon/radeon_llvm_util.h +++ b/src/gallium/drivers/radeon/radeon_llvm_util.h @@ -30,10 +30,10 @@ #include LLVMModuleRef radeon_llvm_parse_bitcode(LLVMContextRef ctx, - const unsigned char * bitcode, unsigned bitcode_len); + const char * bitcode, unsigned bitcode_len); unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx, - const unsigned char *bitcode, unsigned bitcode_len); + const char *bitcode, unsigned bitcode_len); LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index, - const unsigned char *bitcode, unsigned bitcode_len); + const char *bitcode, unsigned bitcode_len); #endif -- 2.30.2