X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fcommon%2Fac_rtld.c;h=a1bb51a8a88861cc4e7e8202e6b1777fb46ee155;hb=d9f33951df9c107e7e6b88ec7cc884f38f92c52b;hp=92020c5f0dd50baf9dcd02b6a3b624d5fff4bf43;hpb=87182200c7dc3528ccb10b5da97cf49545403503;p=mesa.git diff --git a/src/amd/common/ac_rtld.c b/src/amd/common/ac_rtld.c index 92020c5f0dd..a1bb51a8a88 100644 --- a/src/amd/common/ac_rtld.c +++ b/src/amd/common/ac_rtld.c @@ -39,7 +39,11 @@ #define MY_EM_AMDGPU 224 #ifndef STT_AMDGPU_LDS -#define STT_AMDGPU_LDS 13 +#define STT_AMDGPU_LDS 13 // this is deprecated -- remove +#endif + +#ifndef SHN_AMDGPU_LDS +#define SHN_AMDGPU_LDS 0xff00 #endif #ifndef R_AMDGPU_NONE @@ -176,13 +180,20 @@ static bool read_private_lds_symbols(struct ac_rtld_binary *binary, Elf_Scn *section, uint32_t *lds_end_align) { -#define report_elf_if(cond) \ +#define report_if(cond) \ do { \ if ((cond)) { \ report_errorf(#cond); \ return false; \ } \ } while (false) +#define report_elf_if(cond) \ + do { \ + if ((cond)) { \ + report_elf_errorf(#cond); \ + return false; \ + } \ + } while (false) struct ac_rtld_part *part = &binary->parts[part_idx]; Elf64_Shdr *shdr = elf64_getshdr(section); @@ -194,15 +205,21 @@ static bool read_private_lds_symbols(struct ac_rtld_binary *binary, size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym); for (size_t j = 0; j < num_symbols; ++j, ++symbol) { - if (ELF64_ST_TYPE(symbol->st_info) != STT_AMDGPU_LDS) + struct ac_rtld_symbol s = {}; + + if (ELF64_ST_TYPE(symbol->st_info) == STT_AMDGPU_LDS) { + /* old-style LDS symbols from initial prototype -- remove eventually */ + s.align = MIN2(1u << (symbol->st_other >> 3), 1u << 16); + } else if (symbol->st_shndx == SHN_AMDGPU_LDS) { + s.align = MIN2(symbol->st_value, 1u << 16); + report_if(!util_is_power_of_two_nonzero(s.align)); + } else continue; - report_elf_if(symbol->st_size > 1u << 29); + report_if(symbol->st_size > 1u << 29); - struct ac_rtld_symbol s = {}; s.name = elf_strptr(part->elf, strtabidx, symbol->st_name); s.size = symbol->st_size; - s.align = MIN2(1u << (symbol->st_other >> 3), 1u << 16); s.part_idx = part_idx; if (!strcmp(s.name, "__lds_end")) { @@ -224,6 +241,7 @@ static bool read_private_lds_symbols(struct ac_rtld_binary *binary, return true; +#undef report_if #undef report_elf_if } @@ -243,6 +261,8 @@ bool ac_rtld_open(struct ac_rtld_binary *binary, elf_version(EV_CURRENT); memset(binary, 0, sizeof(*binary)); + memcpy(&binary->options, &i.options, sizeof(binary->options)); + binary->wave_size = i.wave_size; binary->num_parts = i.num_parts; binary->parts = calloc(sizeof(*binary->parts), i.num_parts); if (!binary->parts) @@ -279,17 +299,31 @@ bool ac_rtld_open(struct ac_rtld_binary *binary, util_dynarray_foreach(&binary->lds_symbols, struct ac_rtld_symbol, symbol) symbol->part_idx = ~0u; - unsigned max_lds_size = i.info->chip_class >= GFX7 ? 64 * 1024 : 32 * 1024; + unsigned max_lds_size = 64 * 1024; + + if (i.info->chip_class == GFX6 || + (i.shader_type != MESA_SHADER_COMPUTE && + i.shader_type != MESA_SHADER_FRAGMENT)) + max_lds_size = 32 * 1024; + uint64_t shared_lds_size = 0; if (!layout_symbols(binary->lds_symbols.data, i.num_shared_lds_symbols, &shared_lds_size)) goto fail; - report_if(shared_lds_size > max_lds_size); + + if (shared_lds_size > max_lds_size) { + fprintf(stderr, "ac_rtld error(1): too much LDS (used = %u, max = %u)\n", + (unsigned)shared_lds_size, max_lds_size); + goto fail; + } binary->lds_size = shared_lds_size; /* First pass over all parts: open ELFs, pre-determine the placement of * sections in the memory image, and collect and layout private LDS symbols. */ uint32_t lds_end_align = 0; + if (binary->options.halt_at_entry) + pasted_text_size += 4; + for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) { struct ac_rtld_part *part = &binary->parts[part_idx]; unsigned part_lds_symbols_begin = @@ -381,7 +415,11 @@ bool ac_rtld_open(struct ac_rtld_binary *binary, lds_end->part_idx = ~0u; } - report_elf_if(binary->lds_size > max_lds_size); + if (binary->lds_size > max_lds_size) { + fprintf(stderr, "ac_rtld error(2): too much LDS (used = %u, max = %u)\n", + (unsigned)binary->lds_size, max_lds_size); + goto fail; + } /* Second pass: Adjust offsets of non-pasted text sections. */ binary->rx_size = pasted_text_size; @@ -401,6 +439,25 @@ bool ac_rtld_open(struct ac_rtld_binary *binary, binary->rx_size += rx_size; + if (i.info->chip_class >= GFX10) { + /* In gfx10, the SQ fetches up to 3 cache lines of 16 dwords + * ahead of the PC, configurable by SH_MEM_CONFIG and + * S_INST_PREFETCH. This can cause two issues: + * + * (1) Crossing a page boundary to an unmapped page. The logic + * does not distinguish between a required fetch and a "mere" + * prefetch and will fault. + * + * (2) Prefetching instructions that will be changed for a + * different shader. + * + * (2) is not currently an issue because we flush the I$ at IB + * boundaries, but (1) needs to be addressed. Due to buffer + * suballocation, we just play it safe. + */ + binary->rx_size = align(binary->rx_size + 3 * 64, 64); + } + return true; #undef report_if @@ -467,7 +524,8 @@ bool ac_rtld_read_config(struct ac_rtld_binary *binary, /* TODO: be precise about scratch use? */ struct ac_shader_config c = {}; - ac_parse_shader_binary_config(config_data, config_nbytes, true, &c); + ac_parse_shader_binary_config(config_data, config_nbytes, + binary->wave_size, true, &c); config->num_sgprs = MAX2(config->num_sgprs, c.num_sgprs); config->num_vgprs = MAX2(config->num_vgprs, c.num_vgprs); @@ -503,7 +561,9 @@ static bool resolve_symbol(const struct ac_rtld_upload_info *u, unsigned part_idx, const Elf64_Sym *sym, const char *name, uint64_t *value) { - if (sym->st_shndx == SHN_UNDEF) { + /* TODO: properly disentangle the undef and the LDS cases once + * STT_AMDGPU_LDS is retired. */ + if (sym->st_shndx == SHN_UNDEF || sym->st_shndx == SHN_AMDGPU_LDS) { const struct ac_rtld_symbol *lds_sym = find_symbol(&u->binary->lds_symbols, name, part_idx); @@ -692,6 +752,11 @@ bool ac_rtld_upload(struct ac_rtld_upload_info *u) } \ } while (false) + if (u->binary->options.halt_at_entry) { + /* s_sethalt 1 */ + *(uint32_t *)u->rx_ptr = util_cpu_to_le32(0xbf8d0001); + } + /* First pass: upload raw section data and lay out private LDS symbols. */ for (unsigned i = 0; i < u->binary->num_parts; ++i) { struct ac_rtld_part *part = &u->binary->parts[i];