r600g/compute: Enable PIPE_SHADER_IR_NATIVE for compute shaders v2
authorTom Stellard <thomas.stellard@amd.com>
Fri, 26 Sep 2014 01:10:44 +0000 (18:10 -0700)
committerTom Stellard <thomas.stellard@amd.com>
Fri, 31 Oct 2014 19:24:00 +0000 (15:24 -0400)
v2:
  - Drop dependency on LLVM >= 3.5.1

src/gallium/drivers/r600/evergreen_compute.c
src/gallium/drivers/r600/evergreen_compute_internal.h
src/gallium/drivers/r600/r600_llvm.c
src/gallium/drivers/r600/r600_llvm.h
src/gallium/drivers/r600/r600_pipe.c
src/gallium/drivers/r600/r600_pipe.h
src/gallium/drivers/radeon/radeon_llvm_util.c
src/gallium/drivers/radeon/radeon_llvm_util.h

index 38b78c7dfcb188c3521fa24b7e9a23539b6cb9c7..7a17d1ee08956648e27431b9d04e157295e51dbe 100644 (file)
@@ -49,6 +49,7 @@
 #ifdef HAVE_OPENCL
 #include "radeon_llvm_util.h"
 #endif
+#include "radeon_elf_util.h"
 #include <inttypes.h>
 
 /**
@@ -198,18 +199,42 @@ void *evergreen_create_compute_state(
 {
        struct r600_context *ctx = (struct r600_context *)ctx_;
        struct r600_pipe_compute *shader = CALLOC_STRUCT(r600_pipe_compute);
-
-#ifdef HAVE_OPENCL
        const struct pipe_llvm_program_header * header;
-       const unsigned char * code;
-       unsigned i;
-
-       shader->llvm_ctx = LLVMContextCreate();
+       const char *code;
+       void *p;
+       boolean use_kill;
 
        COMPUTE_DBG(ctx->screen, "*** evergreen_create_compute_state\n");
-
        header = cso->prog;
        code = cso->prog + sizeof(struct pipe_llvm_program_header);
+#if HAVE_LLVM < 0x0306
+#ifdef HAVE_OPENCL
+        (void)use_kill;
+       (void)p;
+       shader->llvm_ctx = LLVMContextCreate();
+       shader->num_kernels = radeon_llvm_get_num_kernels(shader->llvm_ctx,
+                               code, header->num_bytes);
+       shader->kernels = CALLOC(sizeof(struct r600_kernel),
+                               shader->num_kernels);
+       {
+               unsigned i;
+               for (i = 0; i < shader->num_kernels; i++) {
+                       struct r600_kernel *kernel = &shader->kernels[i];
+                       kernel->llvm_module = radeon_llvm_get_kernel_module(
+                               shader->llvm_ctx, i, code, header->num_bytes);
+               }
+       }
+#endif
+#else
+       memset(&shader->binary, 0, sizeof(shader->binary));
+       radeon_elf_read(code, header->num_bytes, &shader->binary, true);
+       r600_create_shader(&shader->bc, &shader->binary, &use_kill);
+
+       shader->code_bo = r600_compute_buffer_alloc_vram(ctx->screen,
+                                                       shader->bc.ndw * 4);
+       p = r600_buffer_map_sync_with_rings(&ctx->b, shader->code_bo, PIPE_TRANSFER_WRITE);
+       memcpy(p, shader->bc.bytecode, shader->bc.ndw * 4);
+       ctx->b.ws->buffer_unmap(shader->code_bo->cs_buf);
 #endif
 
        shader->ctx = (struct r600_context*)ctx;
@@ -217,17 +242,6 @@ void *evergreen_create_compute_state(
        shader->private_size = cso->req_private_mem;
        shader->input_size = cso->req_input_mem;
 
-#ifdef HAVE_OPENCL 
-       shader->num_kernels = radeon_llvm_get_num_kernels(shader->llvm_ctx, code,
-                                                       header->num_bytes);
-       shader->kernels = CALLOC(sizeof(struct r600_kernel), shader->num_kernels);
-
-       for (i = 0; i < shader->num_kernels; i++) {
-               struct r600_kernel *kernel = &shader->kernels[i];
-               kernel->llvm_module = radeon_llvm_get_kernel_module(shader->llvm_ctx, i,
-                                                       code, header->num_bytes);
-       }
-#endif
        return shader;
 }
 
@@ -238,14 +252,6 @@ void evergreen_delete_compute_state(struct pipe_context *ctx, void* state)
        if (!shader)
                return;
 
-       FREE(shader->kernels);
-
-#ifdef HAVE_OPENCL
-       if (shader->llvm_ctx){
-               LLVMContextDispose(shader->llvm_ctx);
-       }
-#endif
-
        FREE(shader);
 }
 
@@ -347,7 +353,13 @@ static void evergreen_emit_direct_dispatch(
        unsigned wave_divisor = (16 * num_pipes);
        int group_size = 1;
        int grid_size = 1;
-       unsigned lds_size = shader->local_size / 4 + shader->active_kernel->bc.nlds_dw;
+       unsigned lds_size = shader->local_size / 4 +
+#if HAVE_LLVM < 0x0306
+               shader->active_kernel->bc.nlds_dw;
+#else
+               shader->bc.nlds_dw;
+#endif
+
 
        /* Calculate group_size/grid_size */
        for (i = 0; i < 3; i++) {
@@ -520,19 +532,34 @@ void evergreen_emit_cs_shader(
        struct r600_cs_shader_state *state =
                                        (struct r600_cs_shader_state*)atom;
        struct r600_pipe_compute *shader = state->shader;
-       struct r600_kernel *kernel = &shader->kernels[state->kernel_index];
        struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+       uint64_t va;
+       struct r600_resource *code_bo;
+       unsigned ngpr, nstack;
+
+#if HAVE_LLVM < 0x0306
+       struct r600_kernel *kernel = &shader->kernels[state->kernel_index];
+       code_bo = kernel->code_bo;
+       va = kernel->code_bo->gpu_address;
+       ngpr = kernel->bc.ngpr;
+       nstack = kernel->bc.nstack;
+#else
+       code_bo = shader->code_bo;
+       va = shader->code_bo->gpu_address + state->pc;
+       ngpr = shader->bc.ngpr;
+       nstack = shader->bc.nstack;
+#endif
 
        r600_write_compute_context_reg_seq(cs, R_0288D0_SQ_PGM_START_LS, 3);
-       radeon_emit(cs, kernel->code_bo->gpu_address >> 8); /* R_0288D0_SQ_PGM_START_LS */
+       radeon_emit(cs, va >> 8); /* R_0288D0_SQ_PGM_START_LS */
        radeon_emit(cs,           /* R_0288D4_SQ_PGM_RESOURCES_LS */
-                       S_0288D4_NUM_GPRS(kernel->bc.ngpr)
-                       | S_0288D4_STACK_SIZE(kernel->bc.nstack));
+                       S_0288D4_NUM_GPRS(ngpr)
+                       | S_0288D4_STACK_SIZE(nstack));
        radeon_emit(cs, 0);     /* R_0288D8_SQ_PGM_RESOURCES_LS_2 */
 
        radeon_emit(cs, PKT3C(PKT3_NOP, 0, 0));
        radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
-                                             kernel->code_bo, RADEON_USAGE_READ,
+                                             code_bo, RADEON_USAGE_READ,
                                              RADEON_PRIO_SHADER_DATA));
 }
 
@@ -542,46 +569,54 @@ static void evergreen_launch_grid(
                uint32_t pc, const void *input)
 {
        struct r600_context *ctx = (struct r600_context *)ctx_;
-
        struct r600_pipe_compute *shader = ctx->cs_shader_state.shader;
-       struct r600_kernel *kernel = &shader->kernels[pc];
-
-       COMPUTE_DBG(ctx->screen, "*** evergreen_launch_grid: pc = %u\n", pc);
+       boolean use_kill;
 
+#if HAVE_LLVM < 0x0306
 #ifdef HAVE_OPENCL
-
-       if (!kernel->code_bo) {
-               void *p;
-               struct r600_bytecode *bc = &kernel->bc;
-               LLVMModuleRef mod = kernel->llvm_module;
-               boolean use_kill = false;
-               bool dump = (ctx->screen->b.debug_flags & DBG_CS) != 0;
-               unsigned use_sb = ctx->screen->b.debug_flags & DBG_SB_CS;
-               unsigned sb_disasm = use_sb ||
-                       (ctx->screen->b.debug_flags & DBG_SB_DISASM);
-
-               r600_bytecode_init(bc, ctx->b.chip_class, ctx->b.family,
-                          ctx->screen->has_compressed_msaa_texturing);
-               bc->type = TGSI_PROCESSOR_COMPUTE;
-               bc->isa = ctx->isa;
-               r600_llvm_compile(mod, ctx->b.family, bc, &use_kill, dump);
-
-               if (dump && !sb_disasm) {
-                       r600_bytecode_disasm(bc);
-               } else if ((dump && sb_disasm) || use_sb) {
-                       if (r600_sb_bytecode_process(ctx, bc, NULL, dump, use_sb))
-                               R600_ERR("r600_sb_bytecode_process failed!\n");
-               }
-
-               kernel->code_bo = r600_compute_buffer_alloc_vram(ctx->screen,
-                                                       kernel->bc.ndw * 4);
-               p = r600_buffer_map_sync_with_rings(&ctx->b, kernel->code_bo, PIPE_TRANSFER_WRITE);
-               memcpy(p, kernel->bc.bytecode, kernel->bc.ndw * 4);
-               ctx->b.ws->buffer_unmap(kernel->code_bo->cs_buf);
-       }
+       struct r600_kernel *kernel = &shader->kernels[pc];
+       (void)use_kill;
+        if (!kernel->code_bo) {
+                void *p;
+                struct r600_bytecode *bc = &kernel->bc;
+                LLVMModuleRef mod = kernel->llvm_module;
+                boolean use_kill = false;
+                bool dump = (ctx->screen->b.debug_flags & DBG_CS) != 0;
+                unsigned use_sb = ctx->screen->b.debug_flags & DBG_SB_CS;
+                unsigned sb_disasm = use_sb ||
+                        (ctx->screen->b.debug_flags & DBG_SB_DISASM);
+
+                r600_bytecode_init(bc, ctx->b.chip_class, ctx->b.family,
+                           ctx->screen->has_compressed_msaa_texturing);
+                bc->type = TGSI_PROCESSOR_COMPUTE;
+                bc->isa = ctx->isa;
+                r600_llvm_compile(mod, ctx->b.family, bc, &use_kill, dump);
+
+                if (dump && !sb_disasm) {
+                        r600_bytecode_disasm(bc);
+                } else if ((dump && sb_disasm) || use_sb) {
+                        if (r600_sb_bytecode_process(ctx, bc, NULL, dump, use_sb))
+                                R600_ERR("r600_sb_bytecode_process failed!\n");
+                }
+
+                kernel->code_bo = r600_compute_buffer_alloc_vram(ctx->screen,
+                                                        kernel->bc.ndw * 4);
+                p = r600_buffer_map_sync_with_rings(&ctx->b, kernel->code_bo, PIPE_TRANSFER_WRITE);
+                memcpy(p, kernel->bc.bytecode, kernel->bc.ndw * 4);
+                ctx->b.ws->buffer_unmap(kernel->code_bo->cs_buf);
+        }
 #endif
        shader->active_kernel = kernel;
        ctx->cs_shader_state.kernel_index = pc;
+#else
+       ctx->cs_shader_state.pc = pc;
+       /* Get the config information for this kernel. */
+       r600_shader_binary_read_config(&shader->binary, &shader->bc, pc, &use_kill);
+#endif
+
+       COMPUTE_DBG(ctx->screen, "*** evergreen_launch_grid: pc = %u\n", pc);
+
+
        evergreen_compute_upload_input(ctx_, block_layout, grid_layout, input);
        compute_emit_cs(ctx, block_layout, grid_layout);
 }
index 0929d8dcf270eb1620f31f1a3bcbd4aebfbdc0b5..95593dd8e1346f6424d71d565d7ab2dac5eeace6 100644 (file)
@@ -27,6 +27,8 @@
 
 #include "r600_asm.h"
 
+#if HAVE_LLVM < 0x0306
+
 struct r600_kernel {
        unsigned count;
 #ifdef HAVE_OPENCL
@@ -36,13 +38,21 @@ struct r600_kernel {
        struct r600_bytecode bc;
 };
 
+#endif
+
 struct r600_pipe_compute {
        struct r600_context *ctx;
 
+#if HAVE_LLVM < 0x0306
        unsigned num_kernels;
        struct r600_kernel *kernels;
-
        struct r600_kernel *active_kernel;
+#endif
+
+       struct radeon_shader_binary binary;
+       struct r600_resource *code_bo;
+       struct r600_bytecode bc;
+
        unsigned local_size;
        unsigned private_size;
        unsigned input_size;
index 766141968a0422fcd642754b6106cd41abdd7fa5..c19693a03e6a9d64ed918230ae9c8f839eb49e04 100644 (file)
@@ -13,8 +13,9 @@
 #include "r600_opcodes.h"
 #include "r600_shader.h"
 #include "r600_pipe.h"
-#include "radeon/radeon_llvm.h"
-#include "radeon/radeon_llvm_emit.h"
+#include "radeon_llvm.h"
+#include "radeon_llvm_emit.h"
+#include "radeon_elf_util.h"
 
 #include <stdio.h>
 
@@ -818,31 +819,20 @@ LLVMModuleRef r600_tgsi_llvm(
 #define R_028868_SQ_PGM_RESOURCES_VS                 0x028868
 #define R_028850_SQ_PGM_RESOURCES_PS                 0x028850
 
-unsigned r600_llvm_compile(
-       LLVMModuleRef mod,
-       enum radeon_family family,
-       struct r600_bytecode *bc,
-       boolean *use_kill,
-       unsigned dump)
+void r600_shader_binary_read_config(const struct radeon_shader_binary *binary,
+                                       struct r600_bytecode *bc,
+                                       uint64_t symbol_offset,
+                                       boolean *use_kill)
 {
-       unsigned r;
-       struct radeon_shader_binary binary;
-       const char * gpu_family = r600_get_llvm_processor_name(family);
        unsigned i;
+       const unsigned char *config =
+               radeon_shader_binary_config_start(binary, symbol_offset);
 
-       memset(&binary, 0, sizeof(struct radeon_shader_binary));
-       r = radeon_llvm_compile(mod, &binary, gpu_family, dump);
-
-       assert(binary.code_size % 4 == 0);
-       bc->bytecode = CALLOC(1, binary.code_size);
-       memcpy(bc->bytecode, binary.code, binary.code_size);
-       bc->ndw = binary.code_size / 4;
-
-       for (i = 0; i < binary.config_size; i+= 8) {
+       for (i = 0; i < binary->config_size_per_symbol; i+= 8) {
                unsigned reg =
-                       util_le32_to_cpu(*(uint32_t*)(binary.config + i));
+                       util_le32_to_cpu(*(uint32_t*)(config + i));
                unsigned value =
-                       util_le32_to_cpu(*(uint32_t*)(binary.config + i + 4));
+                       util_le32_to_cpu(*(uint32_t*)(config + i + 4));
                switch (reg) {
                /* R600 / R700 */
                case R_028850_SQ_PGM_RESOURCES_PS:
@@ -851,8 +841,8 @@ unsigned r600_llvm_compile(
                case R_028844_SQ_PGM_RESOURCES_PS:
                case R_028860_SQ_PGM_RESOURCES_VS:
                case R_0288D4_SQ_PGM_RESOURCES_LS:
-                       bc->ngpr = G_028844_NUM_GPRS(value);
-                       bc->nstack = G_028844_STACK_SIZE(value);
+                       bc->ngpr = MAX2(bc->ngpr, G_028844_NUM_GPRS(value));
+                       bc->nstack = MAX2(bc->nstack, G_028844_STACK_SIZE(value));
                        break;
                case R_02880C_DB_SHADER_CONTROL:
                        *use_kill = G_02880C_KILL_ENABLE(value);
@@ -863,6 +853,39 @@ unsigned r600_llvm_compile(
                }
        }
 
+}
+
+unsigned r600_create_shader(struct r600_bytecode *bc,
+               const struct radeon_shader_binary *binary,
+               boolean *use_kill)
+
+{
+       assert(binary->code_size % 4 == 0);
+       bc->bytecode = CALLOC(1, binary->code_size);
+       memcpy(bc->bytecode, binary->code, binary->code_size);
+       bc->ndw = binary->code_size / 4;
+
+       r600_shader_binary_read_config(binary, bc, 0, use_kill);
+
+       return 0;
+}
+
+unsigned r600_llvm_compile(
+       LLVMModuleRef mod,
+       enum radeon_family family,
+       struct r600_bytecode *bc,
+       boolean *use_kill,
+       unsigned dump)
+{
+       unsigned r;
+       struct radeon_shader_binary binary;
+       const char * gpu_family = r600_get_llvm_processor_name(family);
+
+       memset(&binary, 0, sizeof(struct radeon_shader_binary));
+       r = radeon_llvm_compile(mod, &binary, gpu_family, dump);
+
+       r = r600_create_shader(bc, &binary, use_kill);
+
        FREE(binary.code);
        FREE(binary.config);
 
index 3840a5a2933193b38f32f153cbe9bb7988fa1742..9b5304d9fcb6bde7d3a3886e9af77d29c61c7610 100644 (file)
@@ -10,6 +10,7 @@
 struct r600_bytecode;
 struct r600_shader_ctx;
 struct radeon_llvm_context;
+struct radeon_shader_binary;
 enum radeon_family;
 
 LLVMModuleRef r600_tgsi_llvm(
@@ -23,6 +24,15 @@ unsigned r600_llvm_compile(
        boolean *use_kill,
        unsigned dump);
 
+unsigned r600_create_shader(struct r600_bytecode *bc,
+               const struct radeon_shader_binary *binary,
+               boolean *use_kill);
+
+void r600_shader_binary_read_config(const struct radeon_shader_binary *binary,
+               struct r600_bytecode *bc,
+               uint64_t symbol_offset,
+               boolean *use_kill);
+
 #endif /* defined R600_USE_LLVM || defined HAVE_OPENCL */
 
 #endif /* R600_LLVM_H */
index c86daa6c2493c2c8ea871301d24be2e2aa9de02c..0b571e45e9b711baef5dd73558d58db8612f3740 100644 (file)
@@ -472,7 +472,11 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
                return 16;
         case PIPE_SHADER_CAP_PREFERRED_IR:
                if (shader == PIPE_SHADER_COMPUTE) {
+#if HAVE_LLVM < 0x0306
                        return PIPE_SHADER_IR_LLVM;
+#else
+                       return PIPE_SHADER_IR_NATIVE;
+#endif
                } else {
                        return PIPE_SHADER_IR_TGSI;
                }
index fa9d34b0d711f502a4b6e3914dedb6b8263dd813..40b0328ea20f74c968c17993340d79dcc920dceb 100644 (file)
@@ -146,6 +146,7 @@ struct r600_clip_state {
 struct r600_cs_shader_state {
        struct r600_atom atom;
        unsigned kernel_index;
+       unsigned pc;
        struct r600_pipe_compute *shader;
 };
 
index ec1155923fede0ff038a96518adfff15e7da11a2..0dfd9ad4867988e840c1ee0dc142375361c7dd90 100644 (file)
@@ -34,7 +34,7 @@
 #include <llvm-c/Transforms/PassManagerBuilder.h>
 
 LLVMModuleRef radeon_llvm_parse_bitcode(LLVMContextRef ctx,
-                                                       const unsigned char * bitcode, unsigned bitcode_len)
+                                                       const char * bitcode, unsigned bitcode_len)
 {
        LLVMMemoryBufferRef buf;
        LLVMModuleRef module;
@@ -47,7 +47,7 @@ LLVMModuleRef radeon_llvm_parse_bitcode(LLVMContextRef ctx,
 }
 
 unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx,
-                               const unsigned char *bitcode, unsigned bitcode_len)
+                               const char *bitcode, unsigned bitcode_len)
 {
        LLVMModuleRef mod = radeon_llvm_parse_bitcode(ctx, bitcode, bitcode_len);
        return LLVMGetNamedMetadataNumOperands(mod, "opencl.kernels");
@@ -88,7 +88,7 @@ static void radeon_llvm_optimize(LLVMModuleRef mod)
 }
 
 LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
-               const unsigned char *bitcode, unsigned bitcode_len)
+               const char *bitcode, unsigned bitcode_len)
 {
        LLVMModuleRef mod;
        unsigned num_kernels;
index 733c329e99e903d330402e06d75c8ec067d7675a..cc1932aef47e0be9c6641f2d0d843a89ded5cca7 100644 (file)
 #include <llvm-c/Core.h>
 
 LLVMModuleRef radeon_llvm_parse_bitcode(LLVMContextRef ctx,
-                       const unsigned char * bitcode, unsigned bitcode_len);
+                       const char * bitcode, unsigned bitcode_len);
 unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx,
-                       const unsigned char *bitcode, unsigned bitcode_len);
+                       const char *bitcode, unsigned bitcode_len);
 LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
-                       const unsigned char *bitcode, unsigned bitcode_len);
+                       const char *bitcode, unsigned bitcode_len);
 
 #endif