#define MY_EM_AMDGPU 224
#ifndef STT_AMDGPU_LDS
-#define STT_AMDGPU_LDS 13
+#define STT_AMDGPU_LDS 13 // this is deprecated -- remove
+#endif
+
+#ifndef SHN_AMDGPU_LDS
+#define SHN_AMDGPU_LDS 0xff00
#endif
#ifndef R_AMDGPU_NONE
Elf_Scn *section,
uint32_t *lds_end_align)
{
-#define report_elf_if(cond) \
+#define report_if(cond) \
do { \
if ((cond)) { \
report_errorf(#cond); \
return false; \
} \
} while (false)
+#define report_elf_if(cond) \
+ do { \
+ if ((cond)) { \
+ report_elf_errorf(#cond); \
+ return false; \
+ } \
+ } while (false)
struct ac_rtld_part *part = &binary->parts[part_idx];
Elf64_Shdr *shdr = elf64_getshdr(section);
size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
for (size_t j = 0; j < num_symbols; ++j, ++symbol) {
- if (ELF64_ST_TYPE(symbol->st_info) != STT_AMDGPU_LDS)
+ struct ac_rtld_symbol s = {};
+
+ if (ELF64_ST_TYPE(symbol->st_info) == STT_AMDGPU_LDS) {
+ /* old-style LDS symbols from initial prototype -- remove eventually */
+ s.align = MIN2(1u << (symbol->st_other >> 3), 1u << 16);
+ } else if (symbol->st_shndx == SHN_AMDGPU_LDS) {
+ s.align = MIN2(symbol->st_value, 1u << 16);
+ report_if(!util_is_power_of_two_nonzero(s.align));
+ } else
continue;
- report_elf_if(symbol->st_size > 1u << 29);
+ report_if(symbol->st_size > 1u << 29);
- struct ac_rtld_symbol s = {};
s.name = elf_strptr(part->elf, strtabidx, symbol->st_name);
s.size = symbol->st_size;
- s.align = MIN2(1u << (symbol->st_other >> 3), 1u << 16);
s.part_idx = part_idx;
if (!strcmp(s.name, "__lds_end")) {
return true;
+#undef report_if
#undef report_elf_if
}
elf_version(EV_CURRENT);
memset(binary, 0, sizeof(*binary));
+ memcpy(&binary->options, &i.options, sizeof(binary->options));
+ binary->wave_size = i.wave_size;
binary->num_parts = i.num_parts;
binary->parts = calloc(sizeof(*binary->parts), i.num_parts);
if (!binary->parts)
util_dynarray_foreach(&binary->lds_symbols, struct ac_rtld_symbol, symbol)
symbol->part_idx = ~0u;
- unsigned max_lds_size = i.info->chip_class >= GFX7 ? 64 * 1024 : 32 * 1024;
+ unsigned max_lds_size = 64 * 1024;
+
+ if (i.info->chip_class == GFX6 ||
+ (i.shader_type != MESA_SHADER_COMPUTE &&
+ i.shader_type != MESA_SHADER_FRAGMENT))
+ max_lds_size = 32 * 1024;
+
uint64_t shared_lds_size = 0;
if (!layout_symbols(binary->lds_symbols.data, i.num_shared_lds_symbols, &shared_lds_size))
goto fail;
- report_if(shared_lds_size > max_lds_size);
+
+ if (shared_lds_size > max_lds_size) {
+ fprintf(stderr, "ac_rtld error(1): too much LDS (used = %u, max = %u)\n",
+ (unsigned)shared_lds_size, max_lds_size);
+ goto fail;
+ }
binary->lds_size = shared_lds_size;
/* First pass over all parts: open ELFs, pre-determine the placement of
* sections in the memory image, and collect and layout private LDS symbols. */
uint32_t lds_end_align = 0;
+ if (binary->options.halt_at_entry)
+ pasted_text_size += 4;
+
for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
struct ac_rtld_part *part = &binary->parts[part_idx];
unsigned part_lds_symbols_begin =
lds_end->part_idx = ~0u;
}
- report_elf_if(binary->lds_size > max_lds_size);
+ if (binary->lds_size > max_lds_size) {
+ fprintf(stderr, "ac_rtld error(2): too much LDS (used = %u, max = %u)\n",
+ (unsigned)binary->lds_size, max_lds_size);
+ goto fail;
+ }
/* Second pass: Adjust offsets of non-pasted text sections. */
binary->rx_size = pasted_text_size;
binary->rx_size += rx_size;
+ if (i.info->chip_class >= GFX10) {
+ /* In gfx10, the SQ fetches up to 3 cache lines of 16 dwords
+ * ahead of the PC, configurable by SH_MEM_CONFIG and
+ * S_INST_PREFETCH. This can cause two issues:
+ *
+ * (1) Crossing a page boundary to an unmapped page. The logic
+ * does not distinguish between a required fetch and a "mere"
+ * prefetch and will fault.
+ *
+ * (2) Prefetching instructions that will be changed for a
+ * different shader.
+ *
+ * (2) is not currently an issue because we flush the I$ at IB
+ * boundaries, but (1) needs to be addressed. Due to buffer
+ * suballocation, we just play it safe.
+ */
+ binary->rx_size = align(binary->rx_size + 3 * 64, 64);
+ }
+
return true;
#undef report_if
/* TODO: be precise about scratch use? */
struct ac_shader_config c = {};
- ac_parse_shader_binary_config(config_data, config_nbytes, true, &c);
+ ac_parse_shader_binary_config(config_data, config_nbytes,
+ binary->wave_size, true, &c);
config->num_sgprs = MAX2(config->num_sgprs, c.num_sgprs);
config->num_vgprs = MAX2(config->num_vgprs, c.num_vgprs);
unsigned part_idx, const Elf64_Sym *sym,
const char *name, uint64_t *value)
{
- if (sym->st_shndx == SHN_UNDEF) {
+ /* TODO: properly disentangle the undef and the LDS cases once
+ * STT_AMDGPU_LDS is retired. */
+ if (sym->st_shndx == SHN_UNDEF || sym->st_shndx == SHN_AMDGPU_LDS) {
const struct ac_rtld_symbol *lds_sym =
find_symbol(&u->binary->lds_symbols, name, part_idx);
} \
} while (false)
+ if (u->binary->options.halt_at_entry) {
+ /* s_sethalt 1 */
+ *(uint32_t *)u->rx_ptr = util_cpu_to_le32(0xbf8d0001);
+ }
+
/* First pass: upload raw section data and lay out private LDS symbols. */
for (unsigned i = 0; i < u->binary->num_parts; ++i) {
struct ac_rtld_part *part = &u->binary->parts[i];