*/
#include "util/u_memory.h"
+#include "radeon/r600_pipe_common.h"
+#include "radeon/radeon_elf_util.h"
+#include "radeon/radeon_llvm_util.h"
#include "radeon/r600_cs.h"
#include "si_pipe.h"
#include "si_shader.h"
#include "sid.h"
-#include "radeon/radeon_llvm_util.h"
-
#define MAX_GLOBAL_BUFFERS 20
#if HAVE_LLVM < 0x0305
#define NUM_USER_SGPRS 2
unsigned local_size;
unsigned private_size;
unsigned input_size;
- unsigned num_kernels;
- struct si_shader *kernels;
+ struct radeon_shader_binary binary;
+ struct si_shader program;
unsigned num_user_sgprs;
struct r600_resource *input_buffer;
struct pipe_resource *global_buffers[MAX_GLOBAL_BUFFERS];
+#if HAVE_LLVM < 0x0306
+ unsigned num_kernels;
+ struct si_shader *kernels;
LLVMContextRef llvm_ctx;
+#endif
};
static void *si_create_compute_state(
struct si_context *sctx = (struct si_context *)ctx;
struct si_compute *program = CALLOC_STRUCT(si_compute);
const struct pipe_llvm_program_header *header;
- const unsigned char *code;
- unsigned i;
-
- program->llvm_ctx = LLVMContextCreate();
+ const char *code;
header = cso->prog;
code = cso->prog + sizeof(struct pipe_llvm_program_header);
program->private_size = cso->req_private_mem;
program->input_size = cso->req_input_mem;
- program->num_kernels = radeon_llvm_get_num_kernels(program->llvm_ctx, code,
- header->num_bytes);
- program->kernels = CALLOC(sizeof(struct si_shader),
- program->num_kernels);
- for (i = 0; i < program->num_kernels; i++) {
- LLVMModuleRef mod = radeon_llvm_get_kernel_module(program->llvm_ctx, i,
- code, header->num_bytes);
- si_compile_llvm(sctx->screen, &program->kernels[i], mod);
- LLVMDisposeModule(mod);
+#if HAVE_LLVM < 0x0306
+ {
+ unsigned i;
+ program->llvm_ctx = LLVMContextCreate();
+ program->num_kernels = radeon_llvm_get_num_kernels(program->llvm_ctx,
+ code, header->num_bytes);
+ program->kernels = CALLOC(sizeof(struct si_shader),
+ program->num_kernels);
+ for (i = 0; i < program->num_kernels; i++) {
+ LLVMModuleRef mod = radeon_llvm_get_kernel_module(program->llvm_ctx, i,
+ code, header->num_bytes);
+ si_compile_llvm(sctx->screen, &program->kernels[i], mod);
+ LLVMDisposeModule(mod);
+ }
}
+#else
+ radeon_elf_read(code, header->num_bytes, &program->binary, true);
+ si_shader_binary_read(sctx->screen, &program->program, &program->binary);
+
+#endif
program->input_buffer = si_resource_create_custom(sctx->b.b.screen,
PIPE_USAGE_IMMUTABLE, program->input_size);
uint64_t shader_va;
unsigned arg_user_sgpr_count = NUM_USER_SGPRS;
unsigned i;
- struct si_shader *shader = &program->kernels[pc];
+ struct si_shader *shader = &program->program;
unsigned lds_blocks;
unsigned num_waves_for_scratch;
+#if HAVE_LLVM < 0x0306
+ shader = &program->kernels[pc];
+#endif
+
+
radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0) | PKT3_SHADER_TYPE_S(1));
radeon_emit(cs, 0x80000000);
radeon_emit(cs, 0x80000000);
pm4->compute_pkt = true;
+#if HAVE_LLVM >= 0x0306
+ /* Read the config information */
+ si_shader_binary_read_config(&program->binary, &program->program, pc);
+#endif
+
/* Upload the kernel arguments */
/* The extra num_work_size_bytes are for work group / work item size information */
}
shader_va = shader->bo->gpu_address;
+
+#if HAVE_LLVM >= 0x0306
+ shader_va += pc;
+#endif
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, (shader_va >> 8) & 0xffffffff);
si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40);
return;
}
+#if HAVE_LLVM < 0x0306
if (program->kernels) {
for (int i = 0; i < program->num_kernels; i++){
if (program->kernels[i].bo){
if (program->llvm_ctx){
LLVMContextDispose(program->llvm_ctx);
}
+#else
+ si_shader_destroy(ctx, &program->program);
+#endif
+
pipe_resource_reference(
(struct pipe_resource **)&program->input_buffer, NULL);
- //And then free the program itself.
+ FREE(program->binary.code);
+ FREE(program->binary.config);
+ FREE(program->binary.rodata);
FREE(program);
}
#include "gallivm/lp_bld_arit.h"
#include "gallivm/lp_bld_flow.h"
#include "radeon/radeon_llvm.h"
+#include "radeon/radeon_elf_util.h"
#include "radeon/radeon_llvm_emit.h"
#include "util/u_memory.h"
#include "tgsi/tgsi_parse.h"
}
}
-int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
- LLVMModuleRef mod)
+void si_shader_binary_read_config(const struct radeon_shader_binary *binary,
+ struct si_shader *shader,
+ unsigned symbol_offset)
{
- unsigned r; /* llvm_compile result */
unsigned i;
- unsigned char *ptr;
- struct radeon_shader_binary binary;
- bool dump = r600_can_dump_shader(&sscreen->b,
- shader->selector ? shader->selector->tokens : NULL);
- const char * gpu_family = r600_get_llvm_processor_name(sscreen->b.family);
- unsigned code_size;
-
- /* Use LLVM to compile shader */
- memset(&binary, 0, sizeof(binary));
- r = radeon_llvm_compile(mod, &binary, gpu_family, dump);
-
- /* Output binary dump if rscreen->debug_flags are set */
- if (dump && ! binary.disassembled) {
- fprintf(stderr, "SI CODE:\n");
- for (i = 0; i < binary.code_size; i+=4 ) {
- fprintf(stderr, "%02x%02x%02x%02x\n", binary.code[i + 3],
- binary.code[i + 2], binary.code[i + 1],
- binary.code[i]);
- }
- }
+ const unsigned char *config =
+ radeon_shader_binary_config_start(binary, symbol_offset);
/* XXX: We may be able to emit some of these values directly rather than
* extracting fields to be emitted later.
*/
- /* Parse config data in compiled binary */
- for (i = 0; i < binary.config_size; i+= 8) {
- unsigned reg = util_le32_to_cpu(*(uint32_t*)(binary.config + i));
- unsigned value = util_le32_to_cpu(*(uint32_t*)(binary.config + i + 4));
+
+ for (i = 0; i < binary->config_size_per_symbol; i+= 8) {
+ unsigned reg = util_le32_to_cpu(*(uint32_t*)(config + i));
+ unsigned value = util_le32_to_cpu(*(uint32_t*)(config + i + 4));
switch (reg) {
case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
case R_00B848_COMPUTE_PGM_RSRC1:
- shader->num_sgprs = (G_00B028_SGPRS(value) + 1) * 8;
- shader->num_vgprs = (G_00B028_VGPRS(value) + 1) * 4;
+ shader->num_sgprs = MAX2(shader->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
+ shader->num_vgprs = MAX2(shader->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
break;
case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
- shader->lds_size = G_00B02C_EXTRA_LDS_SIZE(value);
+ shader->lds_size = MAX2(shader->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
break;
case R_00B84C_COMPUTE_PGM_RSRC2:
- shader->lds_size = G_00B84C_LDS_SIZE(value);
+ shader->lds_size = MAX2(shader->lds_size, G_00B84C_LDS_SIZE(value));
break;
case R_0286CC_SPI_PS_INPUT_ENA:
shader->spi_ps_input_ena = value;
break;
}
}
+}
+
+int si_shader_binary_read(struct si_screen *sscreen,
+ struct si_shader *shader,
+ const struct radeon_shader_binary *binary)
+{
+
+ unsigned i;
+ unsigned code_size;
+ unsigned char *ptr;
+ bool dump = r600_can_dump_shader(&sscreen->b,
+ shader->selector ? shader->selector->tokens : NULL);
+
+ if (dump && !binary->disassembled) {
+ fprintf(stderr, "SI CODE:\n");
+ for (i = 0; i < binary->code_size; i+=4 ) {
+ fprintf(stderr, "@0x%x: %02x%02x%02x%02x\n", i, binary->code[i + 3],
+ binary->code[i + 2], binary->code[i + 1],
+ binary->code[i]);
+ }
+ }
+
+ si_shader_binary_read_config(binary, shader, 0);
/* copy new shader */
- code_size = binary.code_size + binary.rodata_size;
+ code_size = binary->code_size + binary->rodata_size;
r600_resource_reference(&shader->bo, NULL);
shader->bo = si_resource_create_custom(&sscreen->b.b, PIPE_USAGE_IMMUTABLE,
code_size);
return -ENOMEM;
}
- ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL, PIPE_TRANSFER_WRITE);
- util_memcpy_cpu_to_le32(ptr, binary.code, binary.code_size);
- if (binary.rodata_size > 0) {
- ptr += binary.code_size;
- util_memcpy_cpu_to_le32(ptr, binary.rodata, binary.rodata_size);
+
+ ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL, PIPE_TRANSFER_READ_WRITE);
+ util_memcpy_cpu_to_le32(ptr, binary->code, binary->code_size);
+ if (binary->rodata_size > 0) {
+ ptr += binary->code_size;
+ util_memcpy_cpu_to_le32(ptr, binary->rodata, binary->rodata_size);
}
sscreen->b.ws->buffer_unmap(shader->bo->cs_buf);
- free(binary.code);
- free(binary.config);
- free(binary.rodata);
+ return 0;
+}
+
+int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
+ LLVMModuleRef mod)
+{
+ int r = 0;
+ struct radeon_shader_binary binary;
+ bool dump = r600_can_dump_shader(&sscreen->b,
+ shader->selector ? shader->selector->tokens : NULL);
+ memset(&binary, 0, sizeof(binary));
+ r = radeon_llvm_compile(mod, &binary,
+ r600_get_llvm_processor_name(sscreen->b.family), dump);
+ if (r) {
+ return r;
+ }
+ r = si_shader_binary_read(sscreen, shader, &binary);
+ FREE(binary.code);
+ FREE(binary.config);
+ FREE(binary.rodata);
return r;
}