#include "ac_rtld.h"
+#include "ac_binary.h"
+#include "ac_gpu_info.h"
+#include "util/u_dynarray.h"
+#include "util/u_math.h"
+
#include <gelf.h>
#include <libelf.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
-#include "ac_binary.h"
-#include "ac_gpu_info.h"
-#include "util/u_dynarray.h"
-#include "util/u_math.h"
-
// Old distributions may not have this enum constant
#define MY_EM_AMDGPU 224
#endif
#ifndef R_AMDGPU_NONE
-#define R_AMDGPU_NONE 0
-#define R_AMDGPU_ABS32_LO 1
-#define R_AMDGPU_ABS32_HI 2
-#define R_AMDGPU_ABS64 3
-#define R_AMDGPU_REL32 4
-#define R_AMDGPU_REL64 5
-#define R_AMDGPU_ABS32 6
-#define R_AMDGPU_GOTPCREL 7
+#define R_AMDGPU_NONE 0
+#define R_AMDGPU_ABS32_LO 1
+#define R_AMDGPU_ABS32_HI 2
+#define R_AMDGPU_ABS64 3
+#define R_AMDGPU_REL32 4
+#define R_AMDGPU_REL64 5
+#define R_AMDGPU_ABS32 6
+#define R_AMDGPU_GOTPCREL 7
#define R_AMDGPU_GOTPCREL32_LO 8
#define R_AMDGPU_GOTPCREL32_HI 9
-#define R_AMDGPU_REL32_LO 10
-#define R_AMDGPU_REL32_HI 11
-#define R_AMDGPU_RELATIVE64 13
+#define R_AMDGPU_REL32_LO 10
+#define R_AMDGPU_REL32_HI 11
+#define R_AMDGPU_RELATIVE64 13
#endif
/* For the UMR disassembler. */
-#define DEBUGGER_END_OF_CODE_MARKER 0xbf9f0000 /* invalid instruction */
-#define DEBUGGER_NUM_MARKERS 5
+#define DEBUGGER_END_OF_CODE_MARKER 0xbf9f0000 /* invalid instruction */
+#define DEBUGGER_NUM_MARKERS 5
struct ac_rtld_section {
- bool is_rx : 1;
- bool is_pasted_text : 1;
- uint64_t offset;
- const char *name;
+ bool is_rx : 1;
+ bool is_pasted_text : 1;
+ uint64_t offset;
+ const char *name;
};
struct ac_rtld_part {
- Elf *elf;
- struct ac_rtld_section *sections;
- unsigned num_sections;
+ Elf *elf;
+ struct ac_rtld_section *sections;
+ unsigned num_sections;
};
static void report_erroraf(const char *fmt, va_list va)
{
- char *msg;
- int ret = vasprintf(&msg, fmt, va);
- if (ret < 0)
- msg = "(vasprintf failed)";
+ char *msg;
+ int ret = vasprintf(&msg, fmt, va);
+ if (ret < 0)
+ msg = "(vasprintf failed)";
- fprintf(stderr, "ac_rtld error: %s\n", msg);
+ fprintf(stderr, "ac_rtld error: %s\n", msg);
- if (ret >= 0)
- free(msg);
+ if (ret >= 0)
+ free(msg);
}
static void report_errorf(const char *fmt, ...) PRINTFLIKE(1, 2);
static void report_errorf(const char *fmt, ...)
{
- va_list va;
- va_start(va, fmt);
- report_erroraf(fmt, va);
- va_end(va);
+ va_list va;
+ va_start(va, fmt);
+ report_erroraf(fmt, va);
+ va_end(va);
}
static void report_elf_errorf(const char *fmt, ...) PRINTFLIKE(1, 2);
static void report_elf_errorf(const char *fmt, ...)
{
- va_list va;
- va_start(va, fmt);
- report_erroraf(fmt, va);
- va_end(va);
+ va_list va;
+ va_start(va, fmt);
+ report_erroraf(fmt, va);
+ va_end(va);
- fprintf(stderr, "ELF error: %s\n", elf_errmsg(elf_errno()));
+ fprintf(stderr, "ELF error: %s\n", elf_errmsg(elf_errno()));
}
/**
* \p part_idx.
*/
static const struct ac_rtld_symbol *find_symbol(const struct util_dynarray *symbols,
- const char *name, unsigned part_idx)
+ const char *name, unsigned part_idx)
{
- util_dynarray_foreach(symbols, struct ac_rtld_symbol, symbol) {
- if ((symbol->part_idx == ~0u || symbol->part_idx == part_idx) &&
- !strcmp(name, symbol->name))
- return symbol;
- }
- return 0;
+ util_dynarray_foreach (symbols, struct ac_rtld_symbol, symbol) {
+ if ((symbol->part_idx == ~0u || symbol->part_idx == part_idx) && !strcmp(name, symbol->name))
+ return symbol;
+ }
+ return 0;
}
static int compare_symbol_by_align(const void *lhsp, const void *rhsp)
{
- const struct ac_rtld_symbol *lhs = lhsp;
- const struct ac_rtld_symbol *rhs = rhsp;
- if (rhs->align > lhs->align)
- return 1;
- if (rhs->align < lhs->align)
- return -1;
- return 0;
+ const struct ac_rtld_symbol *lhs = lhsp;
+ const struct ac_rtld_symbol *rhs = rhsp;
+ if (rhs->align > lhs->align)
+ return 1;
+ if (rhs->align < lhs->align)
+ return -1;
+ return 0;
}
/**
* Sort the given symbol list by decreasing alignment and assign offsets.
*/
static bool layout_symbols(struct ac_rtld_symbol *symbols, unsigned num_symbols,
- uint64_t *ptotal_size)
+ uint64_t *ptotal_size)
{
- qsort(symbols, num_symbols, sizeof(*symbols), compare_symbol_by_align);
+ qsort(symbols, num_symbols, sizeof(*symbols), compare_symbol_by_align);
- uint64_t total_size = *ptotal_size;
+ uint64_t total_size = *ptotal_size;
- for (unsigned i = 0; i < num_symbols; ++i) {
- struct ac_rtld_symbol *s = &symbols[i];
- assert(util_is_power_of_two_nonzero(s->align));
+ for (unsigned i = 0; i < num_symbols; ++i) {
+ struct ac_rtld_symbol *s = &symbols[i];
+ assert(util_is_power_of_two_nonzero(s->align));
- total_size = align64(total_size, s->align);
- s->offset = total_size;
+ total_size = align64(total_size, s->align);
+ s->offset = total_size;
- if (total_size + s->size < total_size) {
- report_errorf("%s: size overflow", __FUNCTION__);
- return false;
- }
+ if (total_size + s->size < total_size) {
+ report_errorf("%s: size overflow", __FUNCTION__);
+ return false;
+ }
- total_size += s->size;
- }
+ total_size += s->size;
+ }
- *ptotal_size = total_size;
- return true;
+ *ptotal_size = total_size;
+ return true;
}
/**
*
* Shared LDS symbols are filtered out.
*/
-static bool read_private_lds_symbols(struct ac_rtld_binary *binary,
- unsigned part_idx,
- Elf_Scn *section,
- uint32_t *lds_end_align)
+static bool read_private_lds_symbols(struct ac_rtld_binary *binary, unsigned part_idx,
+ Elf_Scn *section, uint32_t *lds_end_align)
{
-#define report_if(cond) \
- do { \
- if ((cond)) { \
- report_errorf(#cond); \
- return false; \
- } \
- } while (false)
-#define report_elf_if(cond) \
- do { \
- if ((cond)) { \
- report_elf_errorf(#cond); \
- return false; \
- } \
- } while (false)
-
- struct ac_rtld_part *part = &binary->parts[part_idx];
- Elf64_Shdr *shdr = elf64_getshdr(section);
- uint32_t strtabidx = shdr->sh_link;
- Elf_Data *symbols_data = elf_getdata(section, NULL);
- report_elf_if(!symbols_data);
-
- const Elf64_Sym *symbol = symbols_data->d_buf;
- size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
-
- for (size_t j = 0; j < num_symbols; ++j, ++symbol) {
- struct ac_rtld_symbol s = {};
-
- if (ELF64_ST_TYPE(symbol->st_info) == STT_AMDGPU_LDS) {
- /* old-style LDS symbols from initial prototype -- remove eventually */
- s.align = MIN2(1u << (symbol->st_other >> 3), 1u << 16);
- } else if (symbol->st_shndx == SHN_AMDGPU_LDS) {
- s.align = MIN2(symbol->st_value, 1u << 16);
- report_if(!util_is_power_of_two_nonzero(s.align));
- } else
- continue;
-
- report_if(symbol->st_size > 1u << 29);
-
- s.name = elf_strptr(part->elf, strtabidx, symbol->st_name);
- s.size = symbol->st_size;
- s.part_idx = part_idx;
-
- if (!strcmp(s.name, "__lds_end")) {
- report_elf_if(s.size != 0);
- *lds_end_align = MAX2(*lds_end_align, s.align);
- continue;
- }
-
- const struct ac_rtld_symbol *shared =
- find_symbol(&binary->lds_symbols, s.name, part_idx);
- if (shared) {
- report_elf_if(s.align > shared->align);
- report_elf_if(s.size > shared->size);
- continue;
- }
-
- util_dynarray_append(&binary->lds_symbols, struct ac_rtld_symbol, s);
- }
-
- return true;
+#define report_if(cond) \
+ do { \
+ if ((cond)) { \
+ report_errorf(#cond); \
+ return false; \
+ } \
+ } while (false)
+#define report_elf_if(cond) \
+ do { \
+ if ((cond)) { \
+ report_elf_errorf(#cond); \
+ return false; \
+ } \
+ } while (false)
+
+ struct ac_rtld_part *part = &binary->parts[part_idx];
+ Elf64_Shdr *shdr = elf64_getshdr(section);
+ uint32_t strtabidx = shdr->sh_link;
+ Elf_Data *symbols_data = elf_getdata(section, NULL);
+ report_elf_if(!symbols_data);
+
+ const Elf64_Sym *symbol = symbols_data->d_buf;
+ size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
+
+ for (size_t j = 0; j < num_symbols; ++j, ++symbol) {
+ struct ac_rtld_symbol s = {};
+
+ if (ELF64_ST_TYPE(symbol->st_info) == STT_AMDGPU_LDS) {
+ /* old-style LDS symbols from initial prototype -- remove eventually */
+ s.align = MIN2(1u << (symbol->st_other >> 3), 1u << 16);
+ } else if (symbol->st_shndx == SHN_AMDGPU_LDS) {
+ s.align = MIN2(symbol->st_value, 1u << 16);
+ report_if(!util_is_power_of_two_nonzero(s.align));
+ } else
+ continue;
+
+ report_if(symbol->st_size > 1u << 29);
+
+ s.name = elf_strptr(part->elf, strtabidx, symbol->st_name);
+ s.size = symbol->st_size;
+ s.part_idx = part_idx;
+
+ if (!strcmp(s.name, "__lds_end")) {
+ report_elf_if(s.size != 0);
+ *lds_end_align = MAX2(*lds_end_align, s.align);
+ continue;
+ }
+
+ const struct ac_rtld_symbol *shared = find_symbol(&binary->lds_symbols, s.name, part_idx);
+ if (shared) {
+ report_elf_if(s.align > shared->align);
+ report_elf_if(s.size > shared->size);
+ continue;
+ }
+
+ util_dynarray_append(&binary->lds_symbols, struct ac_rtld_symbol, s);
+ }
+
+ return true;
#undef report_if
#undef report_elf_if
* \param binary the uninitialized struct
* \param i binary opening parameters
*/
-bool ac_rtld_open(struct ac_rtld_binary *binary,
- struct ac_rtld_open_info i)
+bool ac_rtld_open(struct ac_rtld_binary *binary, struct ac_rtld_open_info i)
{
- /* One of the libelf implementations
- * (http://www.mr511.de/software/english.htm) requires calling
- * elf_version() before elf_memory().
- */
- elf_version(EV_CURRENT);
-
- memset(binary, 0, sizeof(*binary));
- memcpy(&binary->options, &i.options, sizeof(binary->options));
- binary->wave_size = i.wave_size;
- binary->num_parts = i.num_parts;
- binary->parts = calloc(sizeof(*binary->parts), i.num_parts);
- if (!binary->parts)
- return false;
-
- uint64_t pasted_text_size = 0;
- uint64_t rx_align = 1;
- uint64_t rx_size = 0;
- uint64_t exec_size = 0;
-
-#define report_if(cond) \
- do { \
- if ((cond)) { \
- report_errorf(#cond); \
- goto fail; \
- } \
- } while (false)
-#define report_elf_if(cond) \
- do { \
- if ((cond)) { \
- report_elf_errorf(#cond); \
- goto fail; \
- } \
- } while (false)
-
- /* Copy and layout shared LDS symbols. */
- if (i.num_shared_lds_symbols) {
- if (!util_dynarray_resize(&binary->lds_symbols, struct ac_rtld_symbol,
- i.num_shared_lds_symbols))
- goto fail;
-
- memcpy(binary->lds_symbols.data, i.shared_lds_symbols, binary->lds_symbols.size);
- }
-
- util_dynarray_foreach(&binary->lds_symbols, struct ac_rtld_symbol, symbol)
- symbol->part_idx = ~0u;
-
- unsigned max_lds_size = 64 * 1024;
-
- if (i.info->chip_class == GFX6 ||
- (i.shader_type != MESA_SHADER_COMPUTE &&
- i.shader_type != MESA_SHADER_FRAGMENT))
- max_lds_size = 32 * 1024;
-
- uint64_t shared_lds_size = 0;
- if (!layout_symbols(binary->lds_symbols.data, i.num_shared_lds_symbols, &shared_lds_size))
- goto fail;
-
- if (shared_lds_size > max_lds_size) {
- fprintf(stderr, "ac_rtld error(1): too much LDS (used = %u, max = %u)\n",
- (unsigned)shared_lds_size, max_lds_size);
- goto fail;
- }
- binary->lds_size = shared_lds_size;
-
- /* First pass over all parts: open ELFs, pre-determine the placement of
- * sections in the memory image, and collect and layout private LDS symbols. */
- uint32_t lds_end_align = 0;
-
- if (binary->options.halt_at_entry)
- pasted_text_size += 4;
-
- for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
- struct ac_rtld_part *part = &binary->parts[part_idx];
- unsigned part_lds_symbols_begin =
- util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol);
-
- part->elf = elf_memory((char *)i.elf_ptrs[part_idx], i.elf_sizes[part_idx]);
- report_elf_if(!part->elf);
-
- const Elf64_Ehdr *ehdr = elf64_getehdr(part->elf);
- report_elf_if(!ehdr);
- report_if(ehdr->e_machine != MY_EM_AMDGPU);
-
- size_t section_str_index;
- size_t num_shdrs;
- report_elf_if(elf_getshdrstrndx(part->elf, §ion_str_index) < 0);
- report_elf_if(elf_getshdrnum(part->elf, &num_shdrs) < 0);
-
- part->num_sections = num_shdrs;
- part->sections = calloc(sizeof(*part->sections), num_shdrs);
- report_if(!part->sections);
-
- Elf_Scn *section = NULL;
- while ((section = elf_nextscn(part->elf, section))) {
- Elf64_Shdr *shdr = elf64_getshdr(section);
- struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
- s->name = elf_strptr(part->elf, section_str_index, shdr->sh_name);
- report_elf_if(!s->name);
-
- /* Cannot actually handle linked objects yet */
- report_elf_if(shdr->sh_addr != 0);
-
- /* Alignment must be 0 or a power of two */
- report_elf_if(shdr->sh_addralign & (shdr->sh_addralign - 1));
- uint64_t sh_align = MAX2(shdr->sh_addralign, 1);
-
- if (shdr->sh_flags & SHF_ALLOC &&
- shdr->sh_type != SHT_NOTE) {
- report_if(shdr->sh_flags & SHF_WRITE);
-
- s->is_rx = true;
-
- if (shdr->sh_flags & SHF_EXECINSTR) {
- report_elf_if(shdr->sh_size & 3);
-
- if (!strcmp(s->name, ".text"))
- s->is_pasted_text = true;
-
- exec_size += shdr->sh_size;
- }
-
- if (s->is_pasted_text) {
- s->offset = pasted_text_size;
- pasted_text_size += shdr->sh_size;
- } else {
- rx_align = align(rx_align, sh_align);
- rx_size = align(rx_size, sh_align);
- s->offset = rx_size;
- rx_size += shdr->sh_size;
- }
- } else if (shdr->sh_type == SHT_SYMTAB) {
- if (!read_private_lds_symbols(binary, part_idx, section, &lds_end_align))
- goto fail;
- }
- }
-
- uint64_t part_lds_size = shared_lds_size;
- if (!layout_symbols(
- util_dynarray_element(&binary->lds_symbols, struct ac_rtld_symbol, part_lds_symbols_begin),
- util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol) - part_lds_symbols_begin,
- &part_lds_size))
- goto fail;
- binary->lds_size = MAX2(binary->lds_size, part_lds_size);
- }
-
- binary->rx_end_markers = pasted_text_size;
- pasted_text_size += 4 * DEBUGGER_NUM_MARKERS;
-
- /* __lds_end is a special symbol that points at the end of the memory
- * occupied by other LDS symbols. Its alignment is taken as the
- * maximum of its alignment over all shader parts where it occurs.
- */
- if (lds_end_align) {
- binary->lds_size = align(binary->lds_size, lds_end_align);
-
- struct ac_rtld_symbol *lds_end =
- util_dynarray_grow(&binary->lds_symbols, struct ac_rtld_symbol, 1);
- lds_end->name = "__lds_end";
- lds_end->size = 0;
- lds_end->align = lds_end_align;
- lds_end->offset = binary->lds_size;
- lds_end->part_idx = ~0u;
- }
-
- if (binary->lds_size > max_lds_size) {
- fprintf(stderr, "ac_rtld error(2): too much LDS (used = %u, max = %u)\n",
- (unsigned)binary->lds_size, max_lds_size);
- goto fail;
- }
-
- /* Second pass: Adjust offsets of non-pasted text sections. */
- binary->rx_size = pasted_text_size;
- binary->rx_size = align(binary->rx_size, rx_align);
-
- for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
- struct ac_rtld_part *part = &binary->parts[part_idx];
- size_t num_shdrs;
- elf_getshdrnum(part->elf, &num_shdrs);
-
- for (unsigned j = 0; j < num_shdrs; ++j) {
- struct ac_rtld_section *s = &part->sections[j];
- if (s->is_rx && !s->is_pasted_text)
- s->offset += binary->rx_size;
- }
- }
-
- binary->rx_size += rx_size;
- binary->exec_size = exec_size;
-
- if (i.info->chip_class >= GFX10) {
- /* In gfx10, the SQ fetches up to 3 cache lines of 16 dwords
- * ahead of the PC, configurable by SH_MEM_CONFIG and
- * S_INST_PREFETCH. This can cause two issues:
- *
- * (1) Crossing a page boundary to an unmapped page. The logic
- * does not distinguish between a required fetch and a "mere"
- * prefetch and will fault.
- *
- * (2) Prefetching instructions that will be changed for a
- * different shader.
- *
- * (2) is not currently an issue because we flush the I$ at IB
- * boundaries, but (1) needs to be addressed. Due to buffer
- * suballocation, we just play it safe.
- */
- binary->rx_size = align(binary->rx_size + 3 * 64, 64);
- }
-
- return true;
+ /* One of the libelf implementations
+ * (http://www.mr511.de/software/english.htm) requires calling
+ * elf_version() before elf_memory().
+ */
+ elf_version(EV_CURRENT);
+
+ memset(binary, 0, sizeof(*binary));
+ memcpy(&binary->options, &i.options, sizeof(binary->options));
+ binary->wave_size = i.wave_size;
+ binary->num_parts = i.num_parts;
+ binary->parts = calloc(sizeof(*binary->parts), i.num_parts);
+ if (!binary->parts)
+ return false;
+
+ uint64_t pasted_text_size = 0;
+ uint64_t rx_align = 1;
+ uint64_t rx_size = 0;
+ uint64_t exec_size = 0;
+
+#define report_if(cond) \
+ do { \
+ if ((cond)) { \
+ report_errorf(#cond); \
+ goto fail; \
+ } \
+ } while (false)
+#define report_elf_if(cond) \
+ do { \
+ if ((cond)) { \
+ report_elf_errorf(#cond); \
+ goto fail; \
+ } \
+ } while (false)
+
+ /* Copy and layout shared LDS symbols. */
+ if (i.num_shared_lds_symbols) {
+ if (!util_dynarray_resize(&binary->lds_symbols, struct ac_rtld_symbol,
+ i.num_shared_lds_symbols))
+ goto fail;
+
+ memcpy(binary->lds_symbols.data, i.shared_lds_symbols, binary->lds_symbols.size);
+ }
+
+ util_dynarray_foreach (&binary->lds_symbols, struct ac_rtld_symbol, symbol)
+ symbol->part_idx = ~0u;
+
+ unsigned max_lds_size = 64 * 1024;
+
+ if (i.info->chip_class == GFX6 ||
+ (i.shader_type != MESA_SHADER_COMPUTE && i.shader_type != MESA_SHADER_FRAGMENT))
+ max_lds_size = 32 * 1024;
+
+ uint64_t shared_lds_size = 0;
+ if (!layout_symbols(binary->lds_symbols.data, i.num_shared_lds_symbols, &shared_lds_size))
+ goto fail;
+
+ if (shared_lds_size > max_lds_size) {
+ fprintf(stderr, "ac_rtld error(1): too much LDS (used = %u, max = %u)\n",
+ (unsigned)shared_lds_size, max_lds_size);
+ goto fail;
+ }
+ binary->lds_size = shared_lds_size;
+
+ /* First pass over all parts: open ELFs, pre-determine the placement of
+ * sections in the memory image, and collect and layout private LDS symbols. */
+ uint32_t lds_end_align = 0;
+
+ if (binary->options.halt_at_entry)
+ pasted_text_size += 4;
+
+ for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
+ struct ac_rtld_part *part = &binary->parts[part_idx];
+ unsigned part_lds_symbols_begin =
+ util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol);
+
+ part->elf = elf_memory((char *)i.elf_ptrs[part_idx], i.elf_sizes[part_idx]);
+ report_elf_if(!part->elf);
+
+ const Elf64_Ehdr *ehdr = elf64_getehdr(part->elf);
+ report_elf_if(!ehdr);
+ report_if(ehdr->e_machine != MY_EM_AMDGPU);
+
+ size_t section_str_index;
+ size_t num_shdrs;
+ report_elf_if(elf_getshdrstrndx(part->elf, §ion_str_index) < 0);
+ report_elf_if(elf_getshdrnum(part->elf, &num_shdrs) < 0);
+
+ part->num_sections = num_shdrs;
+ part->sections = calloc(sizeof(*part->sections), num_shdrs);
+ report_if(!part->sections);
+
+ Elf_Scn *section = NULL;
+ while ((section = elf_nextscn(part->elf, section))) {
+ Elf64_Shdr *shdr = elf64_getshdr(section);
+ struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
+ s->name = elf_strptr(part->elf, section_str_index, shdr->sh_name);
+ report_elf_if(!s->name);
+
+ /* Cannot actually handle linked objects yet */
+ report_elf_if(shdr->sh_addr != 0);
+
+ /* Alignment must be 0 or a power of two */
+ report_elf_if(shdr->sh_addralign & (shdr->sh_addralign - 1));
+ uint64_t sh_align = MAX2(shdr->sh_addralign, 1);
+
+ if (shdr->sh_flags & SHF_ALLOC && shdr->sh_type != SHT_NOTE) {
+ report_if(shdr->sh_flags & SHF_WRITE);
+
+ s->is_rx = true;
+
+ if (shdr->sh_flags & SHF_EXECINSTR) {
+ report_elf_if(shdr->sh_size & 3);
+
+ if (!strcmp(s->name, ".text"))
+ s->is_pasted_text = true;
+
+ exec_size += shdr->sh_size;
+ }
+
+ if (s->is_pasted_text) {
+ s->offset = pasted_text_size;
+ pasted_text_size += shdr->sh_size;
+ } else {
+ rx_align = align(rx_align, sh_align);
+ rx_size = align(rx_size, sh_align);
+ s->offset = rx_size;
+ rx_size += shdr->sh_size;
+ }
+ } else if (shdr->sh_type == SHT_SYMTAB) {
+ if (!read_private_lds_symbols(binary, part_idx, section, &lds_end_align))
+ goto fail;
+ }
+ }
+
+ uint64_t part_lds_size = shared_lds_size;
+ if (!layout_symbols(util_dynarray_element(&binary->lds_symbols, struct ac_rtld_symbol,
+ part_lds_symbols_begin),
+ util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol) -
+ part_lds_symbols_begin,
+ &part_lds_size))
+ goto fail;
+ binary->lds_size = MAX2(binary->lds_size, part_lds_size);
+ }
+
+ binary->rx_end_markers = pasted_text_size;
+ pasted_text_size += 4 * DEBUGGER_NUM_MARKERS;
+
+ /* __lds_end is a special symbol that points at the end of the memory
+ * occupied by other LDS symbols. Its alignment is taken as the
+ * maximum of its alignment over all shader parts where it occurs.
+ */
+ if (lds_end_align) {
+ binary->lds_size = align(binary->lds_size, lds_end_align);
+
+ struct ac_rtld_symbol *lds_end =
+ util_dynarray_grow(&binary->lds_symbols, struct ac_rtld_symbol, 1);
+ lds_end->name = "__lds_end";
+ lds_end->size = 0;
+ lds_end->align = lds_end_align;
+ lds_end->offset = binary->lds_size;
+ lds_end->part_idx = ~0u;
+ }
+
+ if (binary->lds_size > max_lds_size) {
+ fprintf(stderr, "ac_rtld error(2): too much LDS (used = %u, max = %u)\n",
+ (unsigned)binary->lds_size, max_lds_size);
+ goto fail;
+ }
+
+ /* Second pass: Adjust offsets of non-pasted text sections. */
+ binary->rx_size = pasted_text_size;
+ binary->rx_size = align(binary->rx_size, rx_align);
+
+ for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
+ struct ac_rtld_part *part = &binary->parts[part_idx];
+ size_t num_shdrs;
+ elf_getshdrnum(part->elf, &num_shdrs);
+
+ for (unsigned j = 0; j < num_shdrs; ++j) {
+ struct ac_rtld_section *s = &part->sections[j];
+ if (s->is_rx && !s->is_pasted_text)
+ s->offset += binary->rx_size;
+ }
+ }
+
+ binary->rx_size += rx_size;
+ binary->exec_size = exec_size;
+
+ if (i.info->chip_class >= GFX10) {
+ /* In gfx10, the SQ fetches up to 3 cache lines of 16 dwords
+ * ahead of the PC, configurable by SH_MEM_CONFIG and
+ * S_INST_PREFETCH. This can cause two issues:
+ *
+ * (1) Crossing a page boundary to an unmapped page. The logic
+ * does not distinguish between a required fetch and a "mere"
+ * prefetch and will fault.
+ *
+ * (2) Prefetching instructions that will be changed for a
+ * different shader.
+ *
+ * (2) is not currently an issue because we flush the I$ at IB
+ * boundaries, but (1) needs to be addressed. Due to buffer
+ * suballocation, we just play it safe.
+ */
+ binary->rx_size = align(binary->rx_size + 3 * 64, 64);
+ }
+
+ return true;
#undef report_if
#undef report_elf_if
fail:
- ac_rtld_close(binary);
- return false;
+ ac_rtld_close(binary);
+ return false;
}
void ac_rtld_close(struct ac_rtld_binary *binary)
{
- for (unsigned i = 0; i < binary->num_parts; ++i) {
- struct ac_rtld_part *part = &binary->parts[i];
- free(part->sections);
- elf_end(part->elf);
- }
-
- util_dynarray_fini(&binary->lds_symbols);
- free(binary->parts);
- binary->parts = NULL;
- binary->num_parts = 0;
+ for (unsigned i = 0; i < binary->num_parts; ++i) {
+ struct ac_rtld_part *part = &binary->parts[i];
+ free(part->sections);
+ elf_end(part->elf);
+ }
+
+ util_dynarray_fini(&binary->lds_symbols);
+ free(binary->parts);
+ binary->parts = NULL;
+ binary->num_parts = 0;
}
-static bool get_section_by_name(struct ac_rtld_part *part, const char *name,
- const char **data, size_t *nbytes)
+static bool get_section_by_name(struct ac_rtld_part *part, const char *name, const char **data,
+ size_t *nbytes)
{
- for (unsigned i = 0; i < part->num_sections; ++i) {
- struct ac_rtld_section *s = &part->sections[i];
- if (s->name && !strcmp(name, s->name)) {
- Elf_Scn *target_scn = elf_getscn(part->elf, i);
- Elf_Data *target_data = elf_getdata(target_scn, NULL);
- if (!target_data) {
- report_elf_errorf("ac_rtld: get_section_by_name: elf_getdata");
- return false;
- }
-
- *data = target_data->d_buf;
- *nbytes = target_data->d_size;
- return true;
- }
- }
- return false;
+ for (unsigned i = 0; i < part->num_sections; ++i) {
+ struct ac_rtld_section *s = &part->sections[i];
+ if (s->name && !strcmp(name, s->name)) {
+ Elf_Scn *target_scn = elf_getscn(part->elf, i);
+ Elf_Data *target_data = elf_getdata(target_scn, NULL);
+ if (!target_data) {
+ report_elf_errorf("ac_rtld: get_section_by_name: elf_getdata");
+ return false;
+ }
+
+ *data = target_data->d_buf;
+ *nbytes = target_data->d_size;
+ return true;
+ }
+ }
+ return false;
}
-bool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name,
- const char **data, size_t *nbytes)
+bool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name, const char **data,
+ size_t *nbytes)
{
- assert(binary->num_parts == 1);
- return get_section_by_name(&binary->parts[0], name, data, nbytes);
+ assert(binary->num_parts == 1);
+ return get_section_by_name(&binary->parts[0], name, data, nbytes);
}
-bool ac_rtld_read_config(const struct radeon_info *info,
- struct ac_rtld_binary *binary,
- struct ac_shader_config *config)
+bool ac_rtld_read_config(const struct radeon_info *info, struct ac_rtld_binary *binary,
+ struct ac_shader_config *config)
{
- for (unsigned i = 0; i < binary->num_parts; ++i) {
- struct ac_rtld_part *part = &binary->parts[i];
- const char *config_data;
- size_t config_nbytes;
-
- if (!get_section_by_name(part, ".AMDGPU.config",
- &config_data, &config_nbytes))
- return false;
-
- /* TODO: be precise about scratch use? */
- struct ac_shader_config c = {};
- ac_parse_shader_binary_config(config_data, config_nbytes,
- binary->wave_size, true, info, &c);
-
- config->num_sgprs = MAX2(config->num_sgprs, c.num_sgprs);
- config->num_vgprs = MAX2(config->num_vgprs, c.num_vgprs);
- config->spilled_sgprs = MAX2(config->spilled_sgprs, c.spilled_sgprs);
- config->spilled_vgprs = MAX2(config->spilled_vgprs, c.spilled_vgprs);
- config->scratch_bytes_per_wave = MAX2(config->scratch_bytes_per_wave,
- c.scratch_bytes_per_wave);
-
- assert(i == 0 || config->float_mode == c.float_mode);
- config->float_mode = c.float_mode;
-
- /* SPI_PS_INPUT_ENA/ADDR can't be combined. Only the value from
- * the main shader part is used. */
- assert(config->spi_ps_input_ena == 0 &&
- config->spi_ps_input_addr == 0);
- config->spi_ps_input_ena = c.spi_ps_input_ena;
- config->spi_ps_input_addr = c.spi_ps_input_addr;
-
- /* TODO: consistently use LDS symbols for this */
- config->lds_size = MAX2(config->lds_size, c.lds_size);
-
- /* TODO: Should we combine these somehow? It's currently only
- * used for radeonsi's compute, where multiple parts aren't used. */
- assert(config->rsrc1 == 0 && config->rsrc2 == 0);
- config->rsrc1 = c.rsrc1;
- config->rsrc2 = c.rsrc2;
- }
-
- return true;
+ for (unsigned i = 0; i < binary->num_parts; ++i) {
+ struct ac_rtld_part *part = &binary->parts[i];
+ const char *config_data;
+ size_t config_nbytes;
+
+ if (!get_section_by_name(part, ".AMDGPU.config", &config_data, &config_nbytes))
+ return false;
+
+ /* TODO: be precise about scratch use? */
+ struct ac_shader_config c = {};
+ ac_parse_shader_binary_config(config_data, config_nbytes, binary->wave_size, true, info, &c);
+
+ config->num_sgprs = MAX2(config->num_sgprs, c.num_sgprs);
+ config->num_vgprs = MAX2(config->num_vgprs, c.num_vgprs);
+ config->spilled_sgprs = MAX2(config->spilled_sgprs, c.spilled_sgprs);
+ config->spilled_vgprs = MAX2(config->spilled_vgprs, c.spilled_vgprs);
+ config->scratch_bytes_per_wave =
+ MAX2(config->scratch_bytes_per_wave, c.scratch_bytes_per_wave);
+
+ assert(i == 0 || config->float_mode == c.float_mode);
+ config->float_mode = c.float_mode;
+
+ /* SPI_PS_INPUT_ENA/ADDR can't be combined. Only the value from
+ * the main shader part is used. */
+ assert(config->spi_ps_input_ena == 0 && config->spi_ps_input_addr == 0);
+ config->spi_ps_input_ena = c.spi_ps_input_ena;
+ config->spi_ps_input_addr = c.spi_ps_input_addr;
+
+ /* TODO: consistently use LDS symbols for this */
+ config->lds_size = MAX2(config->lds_size, c.lds_size);
+
+ /* TODO: Should we combine these somehow? It's currently only
+ * used for radeonsi's compute, where multiple parts aren't used. */
+ assert(config->rsrc1 == 0 && config->rsrc2 == 0);
+ config->rsrc1 = c.rsrc1;
+ config->rsrc2 = c.rsrc2;
+ }
+
+ return true;
}
-static bool resolve_symbol(const struct ac_rtld_upload_info *u,
- unsigned part_idx, const Elf64_Sym *sym,
- const char *name, uint64_t *value)
+static bool resolve_symbol(const struct ac_rtld_upload_info *u, unsigned part_idx,
+ const Elf64_Sym *sym, const char *name, uint64_t *value)
{
- /* TODO: properly disentangle the undef and the LDS cases once
- * STT_AMDGPU_LDS is retired. */
- if (sym->st_shndx == SHN_UNDEF || sym->st_shndx == SHN_AMDGPU_LDS) {
- const struct ac_rtld_symbol *lds_sym =
- find_symbol(&u->binary->lds_symbols, name, part_idx);
-
- if (lds_sym) {
- *value = lds_sym->offset;
- return true;
- }
-
- /* TODO: resolve from other parts */
-
- if (u->get_external_symbol(u->cb_data, name, value))
- return true;
-
- report_errorf("symbol %s: unknown", name);
- return false;
- }
-
- struct ac_rtld_part *part = &u->binary->parts[part_idx];
- if (sym->st_shndx >= part->num_sections) {
- report_errorf("symbol %s: section out of bounds", name);
- return false;
- }
-
- struct ac_rtld_section *s = &part->sections[sym->st_shndx];
- if (!s->is_rx) {
- report_errorf("symbol %s: bad section", name);
- return false;
- }
-
- uint64_t section_base = u->rx_va + s->offset;
-
- *value = section_base + sym->st_value;
- return true;
+ /* TODO: properly disentangle the undef and the LDS cases once
+ * STT_AMDGPU_LDS is retired. */
+ if (sym->st_shndx == SHN_UNDEF || sym->st_shndx == SHN_AMDGPU_LDS) {
+ const struct ac_rtld_symbol *lds_sym = find_symbol(&u->binary->lds_symbols, name, part_idx);
+
+ if (lds_sym) {
+ *value = lds_sym->offset;
+ return true;
+ }
+
+ /* TODO: resolve from other parts */
+
+ if (u->get_external_symbol(u->cb_data, name, value))
+ return true;
+
+ report_errorf("symbol %s: unknown", name);
+ return false;
+ }
+
+ struct ac_rtld_part *part = &u->binary->parts[part_idx];
+ if (sym->st_shndx >= part->num_sections) {
+ report_errorf("symbol %s: section out of bounds", name);
+ return false;
+ }
+
+ struct ac_rtld_section *s = &part->sections[sym->st_shndx];
+ if (!s->is_rx) {
+ report_errorf("symbol %s: bad section", name);
+ return false;
+ }
+
+ uint64_t section_base = u->rx_va + s->offset;
+
+ *value = section_base + sym->st_value;
+ return true;
}
-static bool apply_relocs(const struct ac_rtld_upload_info *u,
- unsigned part_idx, const Elf64_Shdr *reloc_shdr,
- const Elf_Data *reloc_data)
+static bool apply_relocs(const struct ac_rtld_upload_info *u, unsigned part_idx,
+ const Elf64_Shdr *reloc_shdr, const Elf_Data *reloc_data)
{
-#define report_if(cond) \
- do { \
- if ((cond)) { \
- report_errorf(#cond); \
- return false; \
- } \
- } while (false)
-#define report_elf_if(cond) \
- do { \
- if ((cond)) { \
- report_elf_errorf(#cond); \
- return false; \
- } \
- } while (false)
-
- struct ac_rtld_part *part = &u->binary->parts[part_idx];
- Elf_Scn *target_scn = elf_getscn(part->elf, reloc_shdr->sh_info);
- report_elf_if(!target_scn);
-
- Elf_Data *target_data = elf_getdata(target_scn, NULL);
- report_elf_if(!target_data);
-
- Elf_Scn *symbols_scn = elf_getscn(part->elf, reloc_shdr->sh_link);
- report_elf_if(!symbols_scn);
-
- Elf64_Shdr *symbols_shdr = elf64_getshdr(symbols_scn);
- report_elf_if(!symbols_shdr);
- uint32_t strtabidx = symbols_shdr->sh_link;
-
- Elf_Data *symbols_data = elf_getdata(symbols_scn, NULL);
- report_elf_if(!symbols_data);
-
- const Elf64_Sym *symbols = symbols_data->d_buf;
- size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
-
- struct ac_rtld_section *s = &part->sections[reloc_shdr->sh_info];
- report_if(!s->is_rx);
-
- const char *orig_base = target_data->d_buf;
- char *dst_base = u->rx_ptr + s->offset;
- uint64_t va_base = u->rx_va + s->offset;
-
- Elf64_Rel *rel = reloc_data->d_buf;
- size_t num_relocs = reloc_data->d_size / sizeof(*rel);
- for (size_t i = 0; i < num_relocs; ++i, ++rel) {
- size_t r_sym = ELF64_R_SYM(rel->r_info);
- unsigned r_type = ELF64_R_TYPE(rel->r_info);
-
- const char *orig_ptr = orig_base + rel->r_offset;
- char *dst_ptr = dst_base + rel->r_offset;
- uint64_t va = va_base + rel->r_offset;
-
- uint64_t symbol;
- uint64_t addend;
-
- if (r_sym == STN_UNDEF) {
- symbol = 0;
- } else {
- report_elf_if(r_sym >= num_symbols);
-
- const Elf64_Sym *sym = &symbols[r_sym];
- const char *symbol_name =
- elf_strptr(part->elf, strtabidx, sym->st_name);
- report_elf_if(!symbol_name);
-
- if (!resolve_symbol(u, part_idx, sym, symbol_name, &symbol))
- return false;
- }
-
- /* TODO: Should we also support .rela sections, where the
- * addend is part of the relocation record? */
-
- /* Load the addend from the ELF instead of the destination,
- * because the destination may be in VRAM. */
- switch (r_type) {
- case R_AMDGPU_ABS32:
- case R_AMDGPU_ABS32_LO:
- case R_AMDGPU_ABS32_HI:
- case R_AMDGPU_REL32:
- case R_AMDGPU_REL32_LO:
- case R_AMDGPU_REL32_HI:
- addend = *(const uint32_t *)orig_ptr;
- break;
- case R_AMDGPU_ABS64:
- case R_AMDGPU_REL64:
- addend = *(const uint64_t *)orig_ptr;
- break;
- default:
- report_errorf("unsupported r_type == %u", r_type);
- return false;
- }
-
- uint64_t abs = symbol + addend;
-
- switch (r_type) {
- case R_AMDGPU_ABS32:
- assert((uint32_t)abs == abs);
- case R_AMDGPU_ABS32_LO:
- *(uint32_t *)dst_ptr = util_cpu_to_le32(abs);
- break;
- case R_AMDGPU_ABS32_HI:
- *(uint32_t *)dst_ptr = util_cpu_to_le32(abs >> 32);
- break;
- case R_AMDGPU_ABS64:
- *(uint64_t *)dst_ptr = util_cpu_to_le64(abs);
- break;
- case R_AMDGPU_REL32:
- assert((int64_t)(int32_t)(abs - va) == (int64_t)(abs - va));
- case R_AMDGPU_REL32_LO:
- *(uint32_t *)dst_ptr = util_cpu_to_le32(abs - va);
- break;
- case R_AMDGPU_REL32_HI:
- *(uint32_t *)dst_ptr = util_cpu_to_le32((abs - va) >> 32);
- break;
- case R_AMDGPU_REL64:
- *(uint64_t *)dst_ptr = util_cpu_to_le64(abs - va);
- break;
- default:
- unreachable("bad r_type");
- }
- }
-
- return true;
+#define report_if(cond) \
+ do { \
+ if ((cond)) { \
+ report_errorf(#cond); \
+ return false; \
+ } \
+ } while (false)
+#define report_elf_if(cond) \
+ do { \
+ if ((cond)) { \
+ report_elf_errorf(#cond); \
+ return false; \
+ } \
+ } while (false)
+
+ struct ac_rtld_part *part = &u->binary->parts[part_idx];
+ Elf_Scn *target_scn = elf_getscn(part->elf, reloc_shdr->sh_info);
+ report_elf_if(!target_scn);
+
+ Elf_Data *target_data = elf_getdata(target_scn, NULL);
+ report_elf_if(!target_data);
+
+ Elf_Scn *symbols_scn = elf_getscn(part->elf, reloc_shdr->sh_link);
+ report_elf_if(!symbols_scn);
+
+ Elf64_Shdr *symbols_shdr = elf64_getshdr(symbols_scn);
+ report_elf_if(!symbols_shdr);
+ uint32_t strtabidx = symbols_shdr->sh_link;
+
+ Elf_Data *symbols_data = elf_getdata(symbols_scn, NULL);
+ report_elf_if(!symbols_data);
+
+ const Elf64_Sym *symbols = symbols_data->d_buf;
+ size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
+
+ struct ac_rtld_section *s = &part->sections[reloc_shdr->sh_info];
+ report_if(!s->is_rx);
+
+ const char *orig_base = target_data->d_buf;
+ char *dst_base = u->rx_ptr + s->offset;
+ uint64_t va_base = u->rx_va + s->offset;
+
+ Elf64_Rel *rel = reloc_data->d_buf;
+ size_t num_relocs = reloc_data->d_size / sizeof(*rel);
+ for (size_t i = 0; i < num_relocs; ++i, ++rel) {
+ size_t r_sym = ELF64_R_SYM(rel->r_info);
+ unsigned r_type = ELF64_R_TYPE(rel->r_info);
+
+ const char *orig_ptr = orig_base + rel->r_offset;
+ char *dst_ptr = dst_base + rel->r_offset;
+ uint64_t va = va_base + rel->r_offset;
+
+ uint64_t symbol;
+ uint64_t addend;
+
+ if (r_sym == STN_UNDEF) {
+ symbol = 0;
+ } else {
+ report_elf_if(r_sym >= num_symbols);
+
+ const Elf64_Sym *sym = &symbols[r_sym];
+ const char *symbol_name = elf_strptr(part->elf, strtabidx, sym->st_name);
+ report_elf_if(!symbol_name);
+
+ if (!resolve_symbol(u, part_idx, sym, symbol_name, &symbol))
+ return false;
+ }
+
+ /* TODO: Should we also support .rela sections, where the
+ * addend is part of the relocation record? */
+
+ /* Load the addend from the ELF instead of the destination,
+ * because the destination may be in VRAM. */
+ switch (r_type) {
+ case R_AMDGPU_ABS32:
+ case R_AMDGPU_ABS32_LO:
+ case R_AMDGPU_ABS32_HI:
+ case R_AMDGPU_REL32:
+ case R_AMDGPU_REL32_LO:
+ case R_AMDGPU_REL32_HI:
+ addend = *(const uint32_t *)orig_ptr;
+ break;
+ case R_AMDGPU_ABS64:
+ case R_AMDGPU_REL64:
+ addend = *(const uint64_t *)orig_ptr;
+ break;
+ default:
+ report_errorf("unsupported r_type == %u", r_type);
+ return false;
+ }
+
+ uint64_t abs = symbol + addend;
+
+ switch (r_type) {
+ case R_AMDGPU_ABS32:
+ assert((uint32_t)abs == abs);
+ case R_AMDGPU_ABS32_LO:
+ *(uint32_t *)dst_ptr = util_cpu_to_le32(abs);
+ break;
+ case R_AMDGPU_ABS32_HI:
+ *(uint32_t *)dst_ptr = util_cpu_to_le32(abs >> 32);
+ break;
+ case R_AMDGPU_ABS64:
+ *(uint64_t *)dst_ptr = util_cpu_to_le64(abs);
+ break;
+ case R_AMDGPU_REL32:
+ assert((int64_t)(int32_t)(abs - va) == (int64_t)(abs - va));
+ case R_AMDGPU_REL32_LO:
+ *(uint32_t *)dst_ptr = util_cpu_to_le32(abs - va);
+ break;
+ case R_AMDGPU_REL32_HI:
+ *(uint32_t *)dst_ptr = util_cpu_to_le32((abs - va) >> 32);
+ break;
+ case R_AMDGPU_REL64:
+ *(uint64_t *)dst_ptr = util_cpu_to_le64(abs - va);
+ break;
+ default:
+ unreachable("bad r_type");
+ }
+ }
+
+ return true;
#undef report_if
#undef report_elf_if
*/
bool ac_rtld_upload(struct ac_rtld_upload_info *u)
{
-#define report_if(cond) \
- do { \
- if ((cond)) { \
- report_errorf(#cond); \
- return false; \
- } \
- } while (false)
-#define report_elf_if(cond) \
- do { \
- if ((cond)) { \
- report_errorf(#cond); \
- return false; \
- } \
- } while (false)
-
- if (u->binary->options.halt_at_entry) {
- /* s_sethalt 1 */
- *(uint32_t *)u->rx_ptr = util_cpu_to_le32(0xbf8d0001);
- }
-
- /* First pass: upload raw section data and lay out private LDS symbols. */
- for (unsigned i = 0; i < u->binary->num_parts; ++i) {
- struct ac_rtld_part *part = &u->binary->parts[i];
-
- Elf_Scn *section = NULL;
- while ((section = elf_nextscn(part->elf, section))) {
- Elf64_Shdr *shdr = elf64_getshdr(section);
- struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
-
- if (!s->is_rx)
- continue;
-
- report_if(shdr->sh_type != SHT_PROGBITS);
-
- Elf_Data *data = elf_getdata(section, NULL);
- report_elf_if(!data || data->d_size != shdr->sh_size);
- memcpy(u->rx_ptr + s->offset, data->d_buf, shdr->sh_size);
- }
- }
-
- if (u->binary->rx_end_markers) {
- uint32_t *dst = (uint32_t *)(u->rx_ptr + u->binary->rx_end_markers);
- for (unsigned i = 0; i < DEBUGGER_NUM_MARKERS; ++i)
- *dst++ = util_cpu_to_le32(DEBUGGER_END_OF_CODE_MARKER);
- }
-
- /* Second pass: handle relocations, overwriting uploaded data where
- * appropriate. */
- for (unsigned i = 0; i < u->binary->num_parts; ++i) {
- struct ac_rtld_part *part = &u->binary->parts[i];
- Elf_Scn *section = NULL;
- while ((section = elf_nextscn(part->elf, section))) {
- Elf64_Shdr *shdr = elf64_getshdr(section);
- if (shdr->sh_type == SHT_REL) {
- Elf_Data *relocs = elf_getdata(section, NULL);
- report_elf_if(!relocs || relocs->d_size != shdr->sh_size);
- if (!apply_relocs(u, i, shdr, relocs))
- return false;
- } else if (shdr->sh_type == SHT_RELA) {
- report_errorf("SHT_RELA not supported");
- return false;
- }
- }
- }
-
- return true;
+#define report_if(cond) \
+ do { \
+ if ((cond)) { \
+ report_errorf(#cond); \
+ return false; \
+ } \
+ } while (false)
+#define report_elf_if(cond) \
+ do { \
+ if ((cond)) { \
+ report_errorf(#cond); \
+ return false; \
+ } \
+ } while (false)
+
+ if (u->binary->options.halt_at_entry) {
+ /* s_sethalt 1 */
+ *(uint32_t *)u->rx_ptr = util_cpu_to_le32(0xbf8d0001);
+ }
+
+ /* First pass: upload raw section data and lay out private LDS symbols. */
+ for (unsigned i = 0; i < u->binary->num_parts; ++i) {
+ struct ac_rtld_part *part = &u->binary->parts[i];
+
+ Elf_Scn *section = NULL;
+ while ((section = elf_nextscn(part->elf, section))) {
+ Elf64_Shdr *shdr = elf64_getshdr(section);
+ struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
+
+ if (!s->is_rx)
+ continue;
+
+ report_if(shdr->sh_type != SHT_PROGBITS);
+
+ Elf_Data *data = elf_getdata(section, NULL);
+ report_elf_if(!data || data->d_size != shdr->sh_size);
+ memcpy(u->rx_ptr + s->offset, data->d_buf, shdr->sh_size);
+ }
+ }
+
+ if (u->binary->rx_end_markers) {
+ uint32_t *dst = (uint32_t *)(u->rx_ptr + u->binary->rx_end_markers);
+ for (unsigned i = 0; i < DEBUGGER_NUM_MARKERS; ++i)
+ *dst++ = util_cpu_to_le32(DEBUGGER_END_OF_CODE_MARKER);
+ }
+
+ /* Second pass: handle relocations, overwriting uploaded data where
+ * appropriate. */
+ for (unsigned i = 0; i < u->binary->num_parts; ++i) {
+ struct ac_rtld_part *part = &u->binary->parts[i];
+ Elf_Scn *section = NULL;
+ while ((section = elf_nextscn(part->elf, section))) {
+ Elf64_Shdr *shdr = elf64_getshdr(section);
+ if (shdr->sh_type == SHT_REL) {
+ Elf_Data *relocs = elf_getdata(section, NULL);
+ report_elf_if(!relocs || relocs->d_size != shdr->sh_size);
+ if (!apply_relocs(u, i, shdr, relocs))
+ return false;
+ } else if (shdr->sh_type == SHT_RELA) {
+ report_errorf("SHT_RELA not supported");
+ return false;
+ }
+ }
+ }
+
+ return true;
#undef report_if
#undef report_elf_if