radeonsi: use the new run-time linker for shaders
authorNicolai Hähnle <nicolai.haehnle@amd.com>
Tue, 22 May 2018 14:14:16 +0000 (16:14 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Thu, 13 Jun 2019 00:28:23 +0000 (20:28 -0400)
v2:
- fix a memory leak

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
src/gallium/drivers/radeonsi/si_compute.c
src/gallium/drivers/radeonsi/si_debug.c
src/gallium/drivers/radeonsi/si_pipe.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader.h
src/gallium/drivers/radeonsi/si_shader_internal.h
src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
src/gallium/drivers/radeonsi/si_state_shaders.c

index 4a7ebac9ab7844e83a569d744c96ab9db3ec815a..7eadbcdd96013d9293cef1845152af3778627529 100644 (file)
@@ -28,6 +28,7 @@
 #include "util/u_memory.h"
 #include "util/u_upload_mgr.h"
 
+#include "ac_rtld.h"
 #include "amd_kernel_code_t.h"
 #include "si_build_pm4.h"
 #include "si_compute.h"
@@ -61,8 +62,26 @@ static const amd_kernel_code_t *si_compute_get_code_object(
        if (!program->use_code_object_v2) {
                return NULL;
        }
-       return (const amd_kernel_code_t*)
-               (program->shader.binary.code + symbol_offset);
+
+       struct ac_rtld_binary rtld;
+       if (!ac_rtld_open(&rtld, 1, &program->shader.binary.elf_buffer,
+                         &program->shader.binary.elf_size))
+               return NULL;
+
+       const amd_kernel_code_t *result = NULL;
+       const char *text;
+       size_t size;
+       if (!ac_rtld_get_section_by_name(&rtld, ".text", &text, &size))
+               goto out;
+
+       if (symbol_offset + sizeof(amd_kernel_code_t) > size)
+               goto out;
+
+       result = (const amd_kernel_code_t*)(text + symbol_offset);
+
+out:
+       ac_rtld_close(&rtld);
+       return result;
 }
 
 static void code_object_to_config(const amd_kernel_code_t *code_object,
@@ -145,7 +164,7 @@ static void si_create_compute_state_async(void *job, int thread_index)
                si_shader_dump(sscreen, shader, debug, PIPE_SHADER_COMPUTE,
                               stderr, true);
 
-               if (!si_shader_binary_upload(sscreen, shader))
+               if (!si_shader_binary_upload(sscreen, shader, 0))
                        program->shader.compilation_failed = true;
        } else {
                mtx_unlock(&sscreen->shader_cache_mutex);
@@ -237,25 +256,23 @@ static void *si_create_compute_state(
                header = cso->prog;
                code = cso->prog + sizeof(struct pipe_llvm_program_header);
 
-               ac_elf_read(code, header->num_bytes, &program->shader.binary);
-               if (program->use_code_object_v2) {
-                       const amd_kernel_code_t *code_object =
-                               si_compute_get_code_object(program, 0);
-                       code_object_to_config(code_object, &program->shader.config);
-                       if (program->shader.binary.reloc_count != 0) {
-                               fprintf(stderr, "Error: %d unsupported relocations\n",
-                                       program->shader.binary.reloc_count);
-                               FREE(program);
-                               return NULL;
-                       }
-               } else {
-                       ac_shader_binary_read_config(&program->shader.binary,
-                                    &program->shader.config, 0, false);
+               program->shader.binary.elf_size = header->num_bytes;
+               program->shader.binary.elf_buffer = malloc(header->num_bytes);
+               if (!program->shader.binary.elf_buffer) {
+                       FREE(program);
+                       return NULL;
                }
+               memcpy((void *)program->shader.binary.elf_buffer, code, header->num_bytes);
+
+               const amd_kernel_code_t *code_object =
+                       si_compute_get_code_object(program, 0);
+               code_object_to_config(code_object, &program->shader.config);
+
                si_shader_dump(sctx->screen, &program->shader, &sctx->debug,
                               PIPE_SHADER_COMPUTE, stderr, true);
-               if (!si_shader_binary_upload(sctx->screen, &program->shader)) {
+               if (!si_shader_binary_upload(sctx->screen, &program->shader, 0)) {
                        fprintf(stderr, "LLVM failed to upload shader\n");
+                       free((void *)program->shader.binary.elf_buffer);
                        FREE(program);
                        return NULL;
                }
@@ -390,9 +407,7 @@ static bool si_setup_compute_scratch_buffer(struct si_context *sctx,
        if (sctx->compute_scratch_buffer != shader->scratch_bo && scratch_needed) {
                uint64_t scratch_va = sctx->compute_scratch_buffer->gpu_address;
 
-               si_shader_apply_scratch_relocs(shader, scratch_va);
-
-               if (!si_shader_binary_upload(sctx->screen, shader))
+               if (!si_shader_binary_upload(sctx->screen, shader, scratch_va))
                        return false;
 
                si_resource_reference(&shader->scratch_bo,
@@ -423,11 +438,7 @@ static bool si_switch_compute_shader(struct si_context *sctx,
                unsigned lds_blocks;
 
                config = &inline_config;
-               if (code_object) {
-                       code_object_to_config(code_object, config);
-               } else {
-                       ac_shader_binary_read_config(&shader->binary, config, offset, false);
-               }
+               code_object_to_config(code_object, config);
 
                lds_blocks = config->lds_size;
                /* XXX: We are over allocating LDS.  For GFX6, the shader reports
index 9df3175aa3c96756d11c5455c95ef31fd03e8167..b11f7cc6e4f6ac4bf67626c1c135f5570fca4404 100644 (file)
@@ -32,6 +32,7 @@
 #include "util/u_memory.h"
 #include "util/u_string.h"
 #include "ac_debug.h"
+#include "ac_rtld.h"
 
 static void si_dump_bo_list(struct si_context *sctx,
                            const struct radeon_saved_cs *saved, FILE *f);
@@ -201,15 +202,16 @@ static void si_dump_compute_shader(struct si_context *ctx,
 /**
  * Shader compiles can be overridden with arbitrary ELF objects by setting
  * the environment variable RADEON_REPLACE_SHADERS=num1:filename1[;num2:filename2]
+ *
+ * TODO: key this off some hash
  */
-bool si_replace_shader(unsigned num, struct ac_shader_binary *binary)
+bool si_replace_shader(unsigned num, struct si_shader_binary *binary)
 {
        const char *p = debug_get_option_replace_shaders();
        const char *semicolon;
        char *copy = NULL;
        FILE *f;
        long filesize, nread;
-       char *buf = NULL;
        bool replaced = false;
 
        if (!p)
@@ -265,23 +267,25 @@ bool si_replace_shader(unsigned num, struct ac_shader_binary *binary)
        if (fseek(f, 0, SEEK_SET) != 0)
                goto file_error;
 
-       buf = MALLOC(filesize);
-       if (!buf) {
+       binary->elf_buffer = MALLOC(filesize);
+       if (!binary->elf_buffer) {
                fprintf(stderr, "out of memory\n");
                goto out_close;
        }
 
-       nread = fread(buf, 1, filesize, f);
-       if (nread != filesize)
+       nread = fread((void*)binary->elf_buffer, 1, filesize, f);
+       if (nread != filesize) {
+               FREE((void*)binary->elf_buffer);
+               binary->elf_buffer = NULL;
                goto file_error;
+       }
 
-       ac_elf_read(buf, filesize, binary);
+       binary->elf_size = nread;
        replaced = true;
 
 out_close:
        fclose(f);
 out_free:
-       FREE(buf);
        free(copy);
        return replaced;
 
@@ -922,33 +926,52 @@ struct si_shader_inst {
 };
 
 /**
- * Split a disassembly string into instructions and add them to the array
- * pointed to by \p instructions.
+ * Open the given \p binary as \p rtld_binary and split the contained
+ * disassembly string into instructions and add them to the array
+ * pointed to by \p instructions, which must be sufficiently large.
  *
  * Labels are considered to be part of the following instruction.
+ *
+ * The caller must keep \p rtld_binary alive as long as \p instructions are
+ * used and then close it afterwards.
  */
-static void si_add_split_disasm(const char *disasm,
+static void si_add_split_disasm(struct ac_rtld_binary *rtld_binary,
+                               struct si_shader_binary *binary,
                                uint64_t *addr,
                                unsigned *num,
                                struct si_shader_inst *instructions)
 {
-       const char *semicolon;
+       if (!ac_rtld_open(rtld_binary, 1, &binary->elf_buffer, &binary->elf_size))
+               return;
+
+       const char *disasm;
+       size_t nbytes;
+       if (!ac_rtld_get_section_by_name(rtld_binary, ".AMDGPU.disasm",
+                                        &disasm, &nbytes))
+               return;
+
+       const char *end = disasm + nbytes;
+       while (disasm < end) {
+               const char *semicolon = memchr(disasm, ';', end - disasm);
+               if (!semicolon)
+                       break;
 
-       while ((semicolon = strchr(disasm, ';'))) {
                struct si_shader_inst *inst = &instructions[(*num)++];
-               const char *end = util_strchrnul(semicolon, '\n');
+               const char *inst_end = memchr(semicolon + 1, '\n', end - semicolon - 1);
+               if (!inst_end)
+                       inst_end = end;
 
                inst->text = disasm;
-               inst->textlen = end - disasm;
+               inst->textlen = inst_end - disasm;
 
                inst->addr = *addr;
                /* More than 16 chars after ";" means the instruction is 8 bytes long. */
-               inst->size = end - semicolon > 16 ? 8 : 4;
+               inst->size = inst_end - semicolon > 16 ? 8 : 4;
                *addr += inst->size;
 
-               if (!(*end))
+               if (inst_end == end)
                        break;
-               disasm = end + 1;
+               disasm = inst_end + 1;
        }
 }
 
@@ -961,7 +984,7 @@ static void si_print_annotated_shader(struct si_shader *shader,
                                      unsigned num_waves,
                                      FILE *f)
 {
-       if (!shader || !shader->binary.disasm_string)
+       if (!shader)
                return;
 
        uint64_t start_addr = shader->bo->gpu_address;
@@ -985,25 +1008,26 @@ static void si_print_annotated_shader(struct si_shader *shader,
         */
        unsigned num_inst = 0;
        uint64_t inst_addr = start_addr;
+       struct ac_rtld_binary rtld_binaries[5] = {};
        struct si_shader_inst *instructions =
                calloc(shader->bo->b.b.width0 / 4, sizeof(struct si_shader_inst));
 
        if (shader->prolog) {
-               si_add_split_disasm(shader->prolog->binary.disasm_string,
+               si_add_split_disasm(&rtld_binaries[0], &shader->prolog->binary,
                                    &inst_addr, &num_inst, instructions);
        }
        if (shader->previous_stage) {
-               si_add_split_disasm(shader->previous_stage->binary.disasm_string,
+               si_add_split_disasm(&rtld_binaries[1], &shader->previous_stage->binary,
                                    &inst_addr, &num_inst, instructions);
        }
        if (shader->prolog2) {
-               si_add_split_disasm(shader->prolog2->binary.disasm_string,
+               si_add_split_disasm(&rtld_binaries[2], &shader->prolog2->binary,
                                    &inst_addr, &num_inst, instructions);
        }
-       si_add_split_disasm(shader->binary.disasm_string,
+       si_add_split_disasm(&rtld_binaries[3], &shader->binary,
                            &inst_addr, &num_inst, instructions);
        if (shader->epilog) {
-               si_add_split_disasm(shader->epilog->binary.disasm_string,
+               si_add_split_disasm(&rtld_binaries[4], &shader->epilog->binary,
                                    &inst_addr, &num_inst, instructions);
        }
 
@@ -1041,6 +1065,8 @@ static void si_print_annotated_shader(struct si_shader *shader,
 
        fprintf(f, "\n\n");
        free(instructions);
+       for (unsigned i = 0; i < ARRAY_SIZE(rtld_binaries); ++i)
+               ac_rtld_close(&rtld_binaries[i]);
 }
 
 static void si_dump_annotated_shaders(struct si_context *sctx, FILE *f)
index 1faaa22ab0d4571a19bea6c257e4167656629ba7..8527999645baaec1a57bb69cb6dcb82a99da3ad9 100644 (file)
@@ -721,7 +721,7 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
                        struct si_shader_part *part = parts[i];
 
                        parts[i] = part->next;
-                       ac_shader_binary_clean(&part->binary);
+                       si_shader_binary_clean(&part->binary);
                        FREE(part);
                }
        }
index 20f769d09fda40dfee65a626b922b9ab93642f71..d32feab52c2614bb99dbc22895855c48f6b7db41 100644 (file)
@@ -1296,7 +1296,7 @@ void si_log_compute_state(struct si_context *sctx, struct u_log_context *log);
 void si_init_debug_functions(struct si_context *sctx);
 void si_check_vm_faults(struct si_context *sctx,
                        struct radeon_saved_cs *saved, enum ring_type ring);
-bool si_replace_shader(unsigned num, struct ac_shader_binary *binary);
+bool si_replace_shader(unsigned num, struct si_shader_binary *binary);
 
 /* si_dma.c */
 void si_init_dma_functions(struct si_context *sctx);
index ad965a11750442cbbd829e0bf9489e6f6ea99f71..04944e8bdab9cd8b15490a082d51d7b98a285b00 100644 (file)
 #include "tgsi/tgsi_util.h"
 #include "tgsi/tgsi_dump.h"
 
+#include "ac_binary.h"
 #include "ac_exp_param.h"
 #include "ac_shader_util.h"
+#include "ac_rtld.h"
 #include "ac_llvm_util.h"
 #include "si_shader_internal.h"
 #include "si_pipe.h"
@@ -5045,168 +5047,157 @@ static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx,
        ac_build_kill_if_false(&ctx->ac, bit);
 }
 
-void si_shader_apply_scratch_relocs(struct si_shader *shader,
-                                   uint64_t scratch_va)
-{
-       unsigned i;
-       uint32_t scratch_rsrc_dword0 = scratch_va;
-       uint32_t scratch_rsrc_dword1 =
-               S_008F04_BASE_ADDRESS_HI(scratch_va >> 32);
-
-       /* Enable scratch coalescing. */
-       scratch_rsrc_dword1 |= S_008F04_SWIZZLE_ENABLE(1);
-
-       for (i = 0 ; i < shader->binary.reloc_count; i++) {
-               const struct ac_shader_reloc *reloc =
-                                       &shader->binary.relocs[i];
-               if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name)) {
-                       util_memcpy_cpu_to_le32(shader->binary.code + reloc->offset,
-                       &scratch_rsrc_dword0, 4);
-               } else if (!strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {
-                       util_memcpy_cpu_to_le32(shader->binary.code + reloc->offset,
-                       &scratch_rsrc_dword1, 4);
-               }
-       }
-}
-
 /* For the UMR disassembler. */
 #define DEBUGGER_END_OF_CODE_MARKER    0xbf9f0000 /* invalid instruction */
 #define DEBUGGER_NUM_MARKERS           5
 
+static bool si_shader_binary_open(const struct si_shader *shader,
+                                 struct ac_rtld_binary *rtld)
+{
+       const char *part_elfs[5];
+       size_t part_sizes[5];
+       unsigned num_parts = 0;
+
+#define add_part(shader_or_part) \
+       if (shader_or_part) { \
+               part_elfs[num_parts] = (shader_or_part)->binary.elf_buffer; \
+               part_sizes[num_parts] = (shader_or_part)->binary.elf_size; \
+               num_parts++; \
+       }
+
+       add_part(shader->prolog);
+       add_part(shader->previous_stage);
+       add_part(shader->prolog2);
+       add_part(shader);
+       add_part(shader->epilog);
+
+#undef add_part
+
+       return ac_rtld_open(rtld, num_parts, part_elfs, part_sizes);
+}
+
 static unsigned si_get_shader_binary_size(const struct si_shader *shader)
 {
-       unsigned size = shader->binary.code_size;
-
-       if (shader->prolog)
-               size += shader->prolog->binary.code_size;
-       if (shader->previous_stage)
-               size += shader->previous_stage->binary.code_size;
-       if (shader->prolog2)
-               size += shader->prolog2->binary.code_size;
-       if (shader->epilog)
-               size += shader->epilog->binary.code_size;
-       return size + DEBUGGER_NUM_MARKERS * 4;
-}
-
-bool si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)
-{
-       const struct ac_shader_binary *prolog =
-               shader->prolog ? &shader->prolog->binary : NULL;
-       const struct ac_shader_binary *previous_stage =
-               shader->previous_stage ? &shader->previous_stage->binary : NULL;
-       const struct ac_shader_binary *prolog2 =
-               shader->prolog2 ? &shader->prolog2->binary : NULL;
-       const struct ac_shader_binary *epilog =
-               shader->epilog ? &shader->epilog->binary : NULL;
-       const struct ac_shader_binary *mainb = &shader->binary;
-       unsigned bo_size = si_get_shader_binary_size(shader) +
-                          (!epilog ? mainb->rodata_size : 0);
-       unsigned char *ptr;
-
-       assert(!prolog || !prolog->rodata_size);
-       assert(!previous_stage || !previous_stage->rodata_size);
-       assert(!prolog2 || !prolog2->rodata_size);
-       assert((!prolog && !previous_stage && !prolog2 && !epilog) ||
-              !mainb->rodata_size);
-       assert(!epilog || !epilog->rodata_size);
+       struct ac_rtld_binary rtld;
+       si_shader_binary_open(shader, &rtld);
+       return rtld.rx_size;
+}
+
+
+static bool si_get_external_symbol(void *data, const char *name, uint64_t *value)
+{
+       uint64_t *scratch_va = data;
+
+       if (!strcmp(scratch_rsrc_dword0_symbol, name)) {
+               *value = (uint32_t)*scratch_va;
+               return true;
+       }
+       if (!strcmp(scratch_rsrc_dword1_symbol, name)) {
+               /* Enable scratch coalescing. */
+               *value = S_008F04_BASE_ADDRESS_HI(*scratch_va >> 32) |
+                        S_008F04_SWIZZLE_ENABLE(1);
+               if (HAVE_LLVM < 0x0800) {
+                       /* Old LLVM created an R_ABS32_HI relocation for
+                        * this symbol. */
+                       *value <<= 32;
+               }
+               return true;
+       }
+
+       return false;
+}
+
+bool si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader,
+                            uint64_t scratch_va)
+{
+       struct ac_rtld_binary binary;
+       if (!si_shader_binary_open(shader, &binary))
+               return false;
 
        si_resource_reference(&shader->bo, NULL);
        shader->bo = si_aligned_buffer_create(&sscreen->b,
                                              sscreen->cpdma_prefetch_writes_memory ?
                                                0 : SI_RESOURCE_FLAG_READ_ONLY,
                                               PIPE_USAGE_IMMUTABLE,
-                                              align(bo_size, SI_CPDMA_ALIGNMENT),
+                                              align(binary.rx_size, SI_CPDMA_ALIGNMENT),
                                               256);
        if (!shader->bo)
                return false;
 
        /* Upload. */
-       ptr = sscreen->ws->buffer_map(shader->bo->buf, NULL,
+       struct ac_rtld_upload_info u = {};
+       u.binary = &binary;
+       u.get_external_symbol = si_get_external_symbol;
+       u.cb_data = &scratch_va;
+       u.rx_va = shader->bo->gpu_address;
+       u.rx_ptr = sscreen->ws->buffer_map(shader->bo->buf, NULL,
                                        PIPE_TRANSFER_READ_WRITE |
                                        PIPE_TRANSFER_UNSYNCHRONIZED |
                                        RADEON_TRANSFER_TEMPORARY);
+       if (!u.rx_ptr)
+               return false;
 
-       /* Don't use util_memcpy_cpu_to_le32. LLVM binaries are
-        * endian-independent. */
-       if (prolog) {
-               memcpy(ptr, prolog->code, prolog->code_size);
-               ptr += prolog->code_size;
-       }
-       if (previous_stage) {
-               memcpy(ptr, previous_stage->code, previous_stage->code_size);
-               ptr += previous_stage->code_size;
-       }
-       if (prolog2) {
-               memcpy(ptr, prolog2->code, prolog2->code_size);
-               ptr += prolog2->code_size;
-       }
-
-       memcpy(ptr, mainb->code, mainb->code_size);
-       ptr += mainb->code_size;
-
-       if (epilog) {
-               memcpy(ptr, epilog->code, epilog->code_size);
-               ptr += epilog->code_size;
-       } else if (mainb->rodata_size > 0) {
-               memcpy(ptr, mainb->rodata, mainb->rodata_size);
-               ptr += mainb->rodata_size;
-       }
-
-       /* Add end-of-code markers for the UMR disassembler. */
-       uint32_t *ptr32 = (uint32_t*)ptr;
-       for (unsigned i = 0; i < DEBUGGER_NUM_MARKERS; i++)
-               ptr32[i] = DEBUGGER_END_OF_CODE_MARKER;
+       bool ok = ac_rtld_upload(&u);
 
        sscreen->ws->buffer_unmap(shader->bo->buf);
-       return true;
+       ac_rtld_close(&binary);
+
+       return ok;
 }
 
-static void si_shader_dump_disassembly(const struct ac_shader_binary *binary,
+static void si_shader_dump_disassembly(const struct si_shader_binary *binary,
                                       struct pipe_debug_callback *debug,
                                       const char *name, FILE *file)
 {
-       char *line, *p;
-       unsigned i, count;
+       struct ac_rtld_binary rtld_binary;
 
-       if (binary->disasm_string) {
-               fprintf(file, "Shader %s disassembly:\n", name);
-               fprintf(file, "%s", binary->disasm_string);
+       if (!ac_rtld_open(&rtld_binary, 1, &binary->elf_buffer, &binary->elf_size))
+               return;
 
-               if (debug && debug->debug_message) {
-                       /* Very long debug messages are cut off, so send the
-                        * disassembly one line at a time. This causes more
-                        * overhead, but on the plus side it simplifies
-                        * parsing of resulting logs.
-                        */
-                       pipe_debug_message(debug, SHADER_INFO,
-                                          "Shader Disassembly Begin");
+       const char *disasm;
+       size_t nbytes;
 
-                       line = binary->disasm_string;
-                       while (*line) {
-                               p = util_strchrnul(line, '\n');
-                               count = p - line;
+       if (!ac_rtld_get_section_by_name(&rtld_binary, ".AMDGPU.disasm", &disasm, &nbytes))
+               goto out;
 
-                               if (count) {
-                                       pipe_debug_message(debug, SHADER_INFO,
-                                                          "%.*s", count, line);
-                               }
+       fprintf(file, "Shader %s disassembly:\n", name);
+       if (nbytes > INT_MAX) {
+               fprintf(file, "too long\n");
+               goto out;
+       }
 
-                               if (!*p)
-                                       break;
-                               line = p + 1;
+       fprintf(file, "%*s", (int)nbytes, disasm);
+
+       if (debug && debug->debug_message) {
+               /* Very long debug messages are cut off, so send the
+                * disassembly one line at a time. This causes more
+                * overhead, but on the plus side it simplifies
+                * parsing of resulting logs.
+                */
+               pipe_debug_message(debug, SHADER_INFO,
+                                  "Shader Disassembly Begin");
+
+               uint64_t line = 0;
+               while (line < nbytes) {
+                       int count = nbytes - line;
+                       const char *nl = memchr(disasm + line, '\n', nbytes - line);
+                       if (nl)
+                               count = nl - disasm;
+
+                       if (count) {
+                               pipe_debug_message(debug, SHADER_INFO,
+                                                  "%.*s", count, disasm + line);
                        }
 
-                       pipe_debug_message(debug, SHADER_INFO,
-                                          "Shader Disassembly End");
-               }
-       } else {
-               fprintf(file, "Shader %s binary:\n", name);
-               for (i = 0; i < binary->code_size; i += 4) {
-                       fprintf(file, "@0x%x: %02x%02x%02x%02x\n", i,
-                               binary->code[i + 3], binary->code[i + 2],
-                               binary->code[i + 1], binary->code[i]);
+                       line += count + 1;
                }
+
+               pipe_debug_message(debug, SHADER_INFO,
+                                  "Shader Disassembly End");
        }
+
+out:
+       ac_rtld_close(&rtld_binary);
 }
 
 static void si_calculate_max_simd_waves(struct si_shader *shader)
@@ -5398,8 +5389,21 @@ void si_shader_dump(struct si_screen *sscreen, const struct si_shader *shader,
                             check_debug_option);
 }
 
+bool si_shader_binary_read_config(struct si_shader_binary *binary,
+                                 struct ac_shader_config *conf)
+{
+       struct ac_rtld_binary rtld;
+       if (!ac_rtld_open(&rtld, 1, &binary->elf_buffer, &binary->elf_size))
+               return false;
+
+       bool ok = ac_rtld_read_config(&rtld, conf);
+
+       ac_rtld_close(&rtld);
+       return ok;
+}
+
 static int si_compile_llvm(struct si_screen *sscreen,
-                          struct ac_shader_binary *binary,
+                          struct si_shader_binary *binary,
                           struct ac_shader_config *conf,
                           struct ac_llvm_compiler *compiler,
                           LLVMModuleRef mod,
@@ -5408,7 +5412,6 @@ static int si_compile_llvm(struct si_screen *sscreen,
                           const char *name,
                           bool less_optimized)
 {
-       int r = 0;
        unsigned count = p_atomic_inc_return(&sscreen->num_compilations);
 
        if (si_can_dump_shader(sscreen, processor)) {
@@ -5428,13 +5431,14 @@ static int si_compile_llvm(struct si_screen *sscreen,
        }
 
        if (!si_replace_shader(count, binary)) {
-               r = si_llvm_compile(mod, binary, compiler, debug,
-                                   less_optimized);
+               unsigned r = si_llvm_compile(mod, binary, compiler, debug,
+                                            less_optimized);
                if (r)
                        return r;
        }
 
-       ac_shader_binary_read_config(binary, conf, 0, false);
+       if (!si_shader_binary_read_config(binary, conf))
+               return -1;
 
        /* Enable 64-bit and 16-bit denormals, because there is no performance
         * cost.
@@ -5450,24 +5454,7 @@ static int si_compile_llvm(struct si_screen *sscreen,
         */
        conf->float_mode |= V_00B028_FP_64_DENORMS;
 
-       FREE(binary->config);
-       FREE(binary->global_symbol_offsets);
-       binary->config = NULL;
-       binary->global_symbol_offsets = NULL;
-
-       /* Some shaders can't have rodata because their binaries can be
-        * concatenated.
-        */
-       if (binary->rodata_size &&
-           (processor == PIPE_SHADER_VERTEX ||
-            processor == PIPE_SHADER_TESS_CTRL ||
-            processor == PIPE_SHADER_TESS_EVAL ||
-            processor == PIPE_SHADER_FRAGMENT)) {
-               fprintf(stderr, "radeonsi: The shader can't have rodata.");
-               return -EINVAL;
-       }
-
-       return r;
+       return 0;
 }
 
 static void si_llvm_build_ret(struct si_shader_context *ctx, LLVMValueRef ret)
@@ -5609,7 +5596,11 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
                        fprintf(stderr, "GS Copy Shader:\n");
                si_shader_dump(sscreen, ctx.shader, debug,
                               PIPE_SHADER_GEOMETRY, stderr, true);
-               ok = si_shader_binary_upload(sscreen, ctx.shader);
+
+               if (!ctx.shader->config.scratch_bytes_per_wave)
+                       ok = si_shader_binary_upload(sscreen, ctx.shader, 0);
+               else
+                       ok = true;
        }
 
        si_llvm_dispose(&ctx);
@@ -8011,7 +8002,7 @@ bool si_shader_create(struct si_screen *sscreen, struct ac_llvm_compiler *compil
                       stderr, true);
 
        /* Upload. */
-       if (!si_shader_binary_upload(sscreen, shader)) {
+       if (!si_shader_binary_upload(sscreen, shader, 0)) {
                fprintf(stderr, "LLVM failed to upload shader\n");
                return false;
        }
@@ -8027,7 +8018,7 @@ void si_shader_destroy(struct si_shader *shader)
        si_resource_reference(&shader->bo, NULL);
 
        if (!shader->is_binary_shared)
-               ac_shader_binary_clean(&shader->binary);
+               si_shader_binary_clean(&shader->binary);
 
        free(shader->shader_log);
 }
index 145a03bd1ae23b10b65534531732045a5c7720de..586460e2b4fabc97f595750d02fd9802c1c30ef5 100644 (file)
@@ -588,6 +588,13 @@ struct si_shader_info {
        unsigned                max_simd_waves;
 };
 
+struct si_shader_binary {
+       const char *elf_buffer;
+       size_t elf_size;
+
+       char *llvm_ir_string;
+};
+
 struct si_shader {
        struct si_compiler_ctx_state    compiler_ctx_state;
 
@@ -612,7 +619,7 @@ struct si_shader {
        bool                            is_gs_copy_shader;
 
        /* The following data is all that's needed for binary shaders. */
-       struct ac_shader_binary binary;
+       struct si_shader_binary         binary;
        struct ac_shader_config         config;
        struct si_shader_info           info;
 
@@ -669,7 +676,7 @@ struct si_shader {
 struct si_shader_part {
        struct si_shader_part *next;
        union si_shader_part_key key;
-       struct ac_shader_binary binary;
+       struct si_shader_binary binary;
        struct ac_shader_config config;
 };
 
@@ -690,7 +697,8 @@ void si_shader_destroy(struct si_shader *shader);
 unsigned si_shader_io_get_unique_index_patch(unsigned semantic_name, unsigned index);
 unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index,
                                       unsigned is_varying);
-bool si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader);
+bool si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader,
+                            uint64_t scratch_va);
 void si_shader_dump(struct si_screen *sscreen, const struct si_shader *shader,
                    struct pipe_debug_callback *debug, unsigned processor,
                    FILE *f, bool check_debug_option);
@@ -698,9 +706,10 @@ void si_shader_dump_stats_for_shader_db(const struct si_shader *shader,
                                        struct pipe_debug_callback *debug);
 void si_multiwave_lds_size_workaround(struct si_screen *sscreen,
                                      unsigned *lds_size);
-void si_shader_apply_scratch_relocs(struct si_shader *shader,
-                                   uint64_t scratch_va);
 const char *si_get_shader_name(const struct si_shader *shader, unsigned processor);
+bool si_shader_binary_read_config(struct si_shader_binary *binary,
+                                 struct ac_shader_config *conf);
+void si_shader_binary_clean(struct si_shader_binary *binary);
 
 /* si_shader_nir.c */
 void si_nir_scan_shader(const struct nir_shader *nir,
index 6e21bc7c26b3c47b2ce8a678ea9045711ce72dca..4a7b059de9a5b9ab1a0a2669037702c9fb16c25f 100644 (file)
@@ -36,7 +36,6 @@
 #include <llvm-c/TargetMachine.h>
 
 struct pipe_debug_callback;
-struct ac_shader_binary;
 
 #define RADEON_LLVM_MAX_INPUT_SLOTS 32
 #define RADEON_LLVM_MAX_INPUTS 32 * 4
@@ -243,7 +242,7 @@ void si_create_function(struct si_shader_context *ctx,
                        LLVMTypeRef *returns, unsigned num_returns,
                        struct si_function_info *fninfo,
                        unsigned max_workgroup_size);
-unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
+unsigned si_llvm_compile(LLVMModuleRef M, struct si_shader_binary *binary,
                         struct ac_llvm_compiler *compiler,
                         struct pipe_debug_callback *debug,
                         bool less_optimized);
index f70c41ca8c4215eb8d2dc54ca747d6a18a287c40..33b40685f04a8c96ad6cd36051c53029867b88f0 100644 (file)
@@ -80,7 +80,7 @@ static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
  *
  * @returns 0 for success, 1 for failure
  */
-unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
+unsigned si_llvm_compile(LLVMModuleRef M, struct si_shader_binary *binary,
                         struct ac_llvm_compiler *compiler,
                         struct pipe_debug_callback *debug,
                         bool less_optimized)
@@ -100,7 +100,8 @@ unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
        LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
 
        /* Compile IR. */
-       if (!ac_compile_module_to_binary(passes, M, binary))
+       if (!ac_compile_module_to_elf(passes, M, (char **)&binary->elf_buffer,
+                                     &binary->elf_size))
                diag.retval = 1;
 
        if (diag.retval != 0)
@@ -108,6 +109,15 @@ unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
        return diag.retval;
 }
 
+void si_shader_binary_clean(struct si_shader_binary *binary)
+{
+       free((void *)binary->elf_buffer);
+       binary->elf_buffer = NULL;
+
+       free(binary->llvm_ir_string);
+       binary->llvm_ir_string = NULL;
+}
+
 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
                          enum tgsi_opcode_type type)
 {
index 77d1c014305ff20dc871a7bff2c81c140e9e9819..6e47f7f55ba42410cb48daa2c00bf911a7932744 100644 (file)
@@ -127,21 +127,21 @@ static uint32_t *read_chunk(uint32_t *ptr, void **data, unsigned *size)
 static void *si_get_shader_binary(struct si_shader *shader)
 {
        /* There is always a size of data followed by the data itself. */
-       unsigned relocs_size = shader->binary.reloc_count *
-                              sizeof(shader->binary.relocs[0]);
-       unsigned disasm_size = shader->binary.disasm_string ?
-                              strlen(shader->binary.disasm_string) + 1 : 0;
        unsigned llvm_ir_size = shader->binary.llvm_ir_string ?
                                strlen(shader->binary.llvm_ir_string) + 1 : 0;
+
+       /* Refuse to allocate overly large buffers and guard against integer
+        * overflow. */
+       if (shader->binary.elf_size > UINT_MAX / 4 ||
+           llvm_ir_size > UINT_MAX / 4)
+               return NULL;
+
        unsigned size =
                4 + /* total size */
                4 + /* CRC32 of the data below */
                align(sizeof(shader->config), 4) +
                align(sizeof(shader->info), 4) +
-               4 + align(shader->binary.code_size, 4) +
-               4 + align(shader->binary.rodata_size, 4) +
-               4 + align(relocs_size, 4) +
-               4 + align(disasm_size, 4) +
+               4 + align(shader->binary.elf_size, 4) +
                4 + align(llvm_ir_size, 4);
        void *buffer = CALLOC(1, size);
        uint32_t *ptr = (uint32_t*)buffer;
@@ -154,10 +154,7 @@ static void *si_get_shader_binary(struct si_shader *shader)
 
        ptr = write_data(ptr, &shader->config, sizeof(shader->config));
        ptr = write_data(ptr, &shader->info, sizeof(shader->info));
-       ptr = write_chunk(ptr, shader->binary.code, shader->binary.code_size);
-       ptr = write_chunk(ptr, shader->binary.rodata, shader->binary.rodata_size);
-       ptr = write_chunk(ptr, shader->binary.relocs, relocs_size);
-       ptr = write_chunk(ptr, shader->binary.disasm_string, disasm_size);
+       ptr = write_chunk(ptr, shader->binary.elf_buffer, shader->binary.elf_size);
        ptr = write_chunk(ptr, shader->binary.llvm_ir_string, llvm_ir_size);
        assert((char *)ptr - (char *)buffer == size);
 
@@ -175,6 +172,7 @@ static bool si_load_shader_binary(struct si_shader *shader, void *binary)
        uint32_t size = *ptr++;
        uint32_t crc32 = *ptr++;
        unsigned chunk_size;
+       unsigned elf_size;
 
        if (util_hash_crc32(ptr, size - 8) != crc32) {
                fprintf(stderr, "radeonsi: binary shader has invalid CRC32\n");
@@ -183,13 +181,9 @@ static bool si_load_shader_binary(struct si_shader *shader, void *binary)
 
        ptr = read_data(ptr, &shader->config, sizeof(shader->config));
        ptr = read_data(ptr, &shader->info, sizeof(shader->info));
-       ptr = read_chunk(ptr, (void**)&shader->binary.code,
-                        &shader->binary.code_size);
-       ptr = read_chunk(ptr, (void**)&shader->binary.rodata,
-                        &shader->binary.rodata_size);
-       ptr = read_chunk(ptr, (void**)&shader->binary.relocs, &chunk_size);
-       shader->binary.reloc_count = chunk_size / sizeof(shader->binary.relocs[0]);
-       ptr = read_chunk(ptr, (void**)&shader->binary.disasm_string, &chunk_size);
+       ptr = read_chunk(ptr, (void**)&shader->binary.elf_buffer,
+                        &elf_size);
+       shader->binary.elf_size = elf_size;
        ptr = read_chunk(ptr, (void**)&shader->binary.llvm_ir_string, &chunk_size);
 
        return true;
@@ -3132,13 +3126,8 @@ static int si_update_scratch_buffer(struct si_context *sctx,
 
        assert(sctx->scratch_buffer);
 
-       if (shader->previous_stage)
-               si_shader_apply_scratch_relocs(shader->previous_stage, scratch_va);
-
-       si_shader_apply_scratch_relocs(shader, scratch_va);
-
        /* Replace the shader bo with a new bo that has the relocs applied. */
-       if (!si_shader_binary_upload(sctx->screen, shader)) {
+       if (!si_shader_binary_upload(sctx->screen, shader, scratch_va)) {
                si_shader_unlock(shader);
                return -1;
        }