meson: drop `intel_` prefix on imgui_core
[mesa.git] / src / amd / common / ac_rtld.c
1 /*
2 * Copyright 2014-2019 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "ac_rtld.h"
25
26 #include <gelf.h>
27 #include <libelf.h>
28 #include <stdarg.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32
33 #include "ac_binary.h"
34 #include "ac_gpu_info.h"
35 #include "util/u_dynarray.h"
36 #include "util/u_math.h"
37
38 // Old distributions may not have this enum constant
39 #define MY_EM_AMDGPU 224
40
41 #ifndef STT_AMDGPU_LDS
42 #define STT_AMDGPU_LDS 13 // this is deprecated -- remove
43 #endif
44
45 #ifndef SHN_AMDGPU_LDS
46 #define SHN_AMDGPU_LDS 0xff00
47 #endif
48
49 #ifndef R_AMDGPU_NONE
50 #define R_AMDGPU_NONE 0
51 #define R_AMDGPU_ABS32_LO 1
52 #define R_AMDGPU_ABS32_HI 2
53 #define R_AMDGPU_ABS64 3
54 #define R_AMDGPU_REL32 4
55 #define R_AMDGPU_REL64 5
56 #define R_AMDGPU_ABS32 6
57 #define R_AMDGPU_GOTPCREL 7
58 #define R_AMDGPU_GOTPCREL32_LO 8
59 #define R_AMDGPU_GOTPCREL32_HI 9
60 #define R_AMDGPU_REL32_LO 10
61 #define R_AMDGPU_REL32_HI 11
62 #define R_AMDGPU_RELATIVE64 13
63 #endif
64
65 /* For the UMR disassembler. */
66 #define DEBUGGER_END_OF_CODE_MARKER 0xbf9f0000 /* invalid instruction */
67 #define DEBUGGER_NUM_MARKERS 5
68
69 struct ac_rtld_section {
70 bool is_rx : 1;
71 bool is_pasted_text : 1;
72 uint64_t offset;
73 const char *name;
74 };
75
76 struct ac_rtld_part {
77 Elf *elf;
78 struct ac_rtld_section *sections;
79 unsigned num_sections;
80 };
81
82 static void report_erroraf(const char *fmt, va_list va)
83 {
84 char *msg;
85 int ret = asprintf(&msg, fmt, va);
86 if (ret < 0)
87 msg = "(asprintf failed)";
88
89 fprintf(stderr, "ac_rtld error: %s\n", msg);
90
91 if (ret >= 0)
92 free(msg);
93 }
94
95 static void report_errorf(const char *fmt, ...) PRINTFLIKE(1, 2);
96
97 static void report_errorf(const char *fmt, ...)
98 {
99 va_list va;
100 va_start(va, fmt);
101 report_erroraf(fmt, va);
102 va_end(va);
103 }
104
105 static void report_elf_errorf(const char *fmt, ...) PRINTFLIKE(1, 2);
106
107 static void report_elf_errorf(const char *fmt, ...)
108 {
109 va_list va;
110 va_start(va, fmt);
111 report_erroraf(fmt, va);
112 va_end(va);
113
114 fprintf(stderr, "ELF error: %s\n", elf_errmsg(elf_errno()));
115 }
116
117 /**
118 * Find a symbol in a dynarray of struct ac_rtld_symbol by \p name and shader
119 * \p part_idx.
120 */
121 static const struct ac_rtld_symbol *find_symbol(const struct util_dynarray *symbols,
122 const char *name, unsigned part_idx)
123 {
124 util_dynarray_foreach(symbols, struct ac_rtld_symbol, symbol) {
125 if ((symbol->part_idx == ~0u || symbol->part_idx == part_idx) &&
126 !strcmp(name, symbol->name))
127 return symbol;
128 }
129 return 0;
130 }
131
132 static int compare_symbol_by_align(const void *lhsp, const void *rhsp)
133 {
134 const struct ac_rtld_symbol *lhs = lhsp;
135 const struct ac_rtld_symbol *rhs = rhsp;
136 if (rhs->align > lhs->align)
137 return 1;
138 if (rhs->align < lhs->align)
139 return -1;
140 return 0;
141 }
142
143 /**
144 * Sort the given symbol list by decreasing alignment and assign offsets.
145 */
146 static bool layout_symbols(struct ac_rtld_symbol *symbols, unsigned num_symbols,
147 uint64_t *ptotal_size)
148 {
149 qsort(symbols, num_symbols, sizeof(*symbols), compare_symbol_by_align);
150
151 uint64_t total_size = *ptotal_size;
152
153 for (unsigned i = 0; i < num_symbols; ++i) {
154 struct ac_rtld_symbol *s = &symbols[i];
155 assert(util_is_power_of_two_nonzero(s->align));
156
157 total_size = align64(total_size, s->align);
158 s->offset = total_size;
159
160 if (total_size + s->size < total_size) {
161 report_errorf("%s: size overflow", __FUNCTION__);
162 return false;
163 }
164
165 total_size += s->size;
166 }
167
168 *ptotal_size = total_size;
169 return true;
170 }
171
172 /**
173 * Read LDS symbols from the given \p section of the ELF of \p part and append
174 * them to the LDS symbols list.
175 *
176 * Shared LDS symbols are filtered out.
177 */
178 static bool read_private_lds_symbols(struct ac_rtld_binary *binary,
179 unsigned part_idx,
180 Elf_Scn *section,
181 uint32_t *lds_end_align)
182 {
183 #define report_if(cond) \
184 do { \
185 if ((cond)) { \
186 report_errorf(#cond); \
187 return false; \
188 } \
189 } while (false)
190 #define report_elf_if(cond) \
191 do { \
192 if ((cond)) { \
193 report_elf_errorf(#cond); \
194 return false; \
195 } \
196 } while (false)
197
198 struct ac_rtld_part *part = &binary->parts[part_idx];
199 Elf64_Shdr *shdr = elf64_getshdr(section);
200 uint32_t strtabidx = shdr->sh_link;
201 Elf_Data *symbols_data = elf_getdata(section, NULL);
202 report_elf_if(!symbols_data);
203
204 const Elf64_Sym *symbol = symbols_data->d_buf;
205 size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
206
207 for (size_t j = 0; j < num_symbols; ++j, ++symbol) {
208 struct ac_rtld_symbol s = {};
209
210 if (ELF64_ST_TYPE(symbol->st_info) == STT_AMDGPU_LDS) {
211 /* old-style LDS symbols from initial prototype -- remove eventually */
212 s.align = MIN2(1u << (symbol->st_other >> 3), 1u << 16);
213 } else if (symbol->st_shndx == SHN_AMDGPU_LDS) {
214 s.align = MIN2(symbol->st_value, 1u << 16);
215 report_if(!util_is_power_of_two_nonzero(s.align));
216 } else
217 continue;
218
219 report_if(symbol->st_size > 1u << 29);
220
221 s.name = elf_strptr(part->elf, strtabidx, symbol->st_name);
222 s.size = symbol->st_size;
223 s.part_idx = part_idx;
224
225 if (!strcmp(s.name, "__lds_end")) {
226 report_elf_if(s.size != 0);
227 *lds_end_align = MAX2(*lds_end_align, s.align);
228 continue;
229 }
230
231 const struct ac_rtld_symbol *shared =
232 find_symbol(&binary->lds_symbols, s.name, part_idx);
233 if (shared) {
234 report_elf_if(s.align > shared->align);
235 report_elf_if(s.size > shared->size);
236 continue;
237 }
238
239 util_dynarray_append(&binary->lds_symbols, struct ac_rtld_symbol, s);
240 }
241
242 return true;
243
244 #undef report_if
245 #undef report_elf_if
246 }
247
248 /**
249 * Open a binary consisting of one or more shader parts.
250 *
251 * \param binary the uninitialized struct
252 * \param i binary opening parameters
253 */
254 bool ac_rtld_open(struct ac_rtld_binary *binary,
255 struct ac_rtld_open_info i)
256 {
257 /* One of the libelf implementations
258 * (http://www.mr511.de/software/english.htm) requires calling
259 * elf_version() before elf_memory().
260 */
261 elf_version(EV_CURRENT);
262
263 memset(binary, 0, sizeof(*binary));
264 memcpy(&binary->options, &i.options, sizeof(binary->options));
265 binary->wave_size = i.wave_size;
266 binary->num_parts = i.num_parts;
267 binary->parts = calloc(sizeof(*binary->parts), i.num_parts);
268 if (!binary->parts)
269 return false;
270
271 uint64_t pasted_text_size = 0;
272 uint64_t rx_align = 1;
273 uint64_t rx_size = 0;
274 uint64_t exec_size = 0;
275
276 #define report_if(cond) \
277 do { \
278 if ((cond)) { \
279 report_errorf(#cond); \
280 goto fail; \
281 } \
282 } while (false)
283 #define report_elf_if(cond) \
284 do { \
285 if ((cond)) { \
286 report_elf_errorf(#cond); \
287 goto fail; \
288 } \
289 } while (false)
290
291 /* Copy and layout shared LDS symbols. */
292 if (i.num_shared_lds_symbols) {
293 if (!util_dynarray_resize(&binary->lds_symbols, struct ac_rtld_symbol,
294 i.num_shared_lds_symbols))
295 goto fail;
296
297 memcpy(binary->lds_symbols.data, i.shared_lds_symbols, binary->lds_symbols.size);
298 }
299
300 util_dynarray_foreach(&binary->lds_symbols, struct ac_rtld_symbol, symbol)
301 symbol->part_idx = ~0u;
302
303 unsigned max_lds_size = 64 * 1024;
304
305 if (i.info->chip_class == GFX6 ||
306 (i.shader_type != MESA_SHADER_COMPUTE &&
307 i.shader_type != MESA_SHADER_FRAGMENT))
308 max_lds_size = 32 * 1024;
309
310 uint64_t shared_lds_size = 0;
311 if (!layout_symbols(binary->lds_symbols.data, i.num_shared_lds_symbols, &shared_lds_size))
312 goto fail;
313
314 if (shared_lds_size > max_lds_size) {
315 fprintf(stderr, "ac_rtld error(1): too much LDS (used = %u, max = %u)\n",
316 (unsigned)shared_lds_size, max_lds_size);
317 goto fail;
318 }
319 binary->lds_size = shared_lds_size;
320
321 /* First pass over all parts: open ELFs, pre-determine the placement of
322 * sections in the memory image, and collect and layout private LDS symbols. */
323 uint32_t lds_end_align = 0;
324
325 if (binary->options.halt_at_entry)
326 pasted_text_size += 4;
327
328 for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
329 struct ac_rtld_part *part = &binary->parts[part_idx];
330 unsigned part_lds_symbols_begin =
331 util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol);
332
333 part->elf = elf_memory((char *)i.elf_ptrs[part_idx], i.elf_sizes[part_idx]);
334 report_elf_if(!part->elf);
335
336 const Elf64_Ehdr *ehdr = elf64_getehdr(part->elf);
337 report_elf_if(!ehdr);
338 report_if(ehdr->e_machine != MY_EM_AMDGPU);
339
340 size_t section_str_index;
341 size_t num_shdrs;
342 report_elf_if(elf_getshdrstrndx(part->elf, &section_str_index) < 0);
343 report_elf_if(elf_getshdrnum(part->elf, &num_shdrs) < 0);
344
345 part->num_sections = num_shdrs;
346 part->sections = calloc(sizeof(*part->sections), num_shdrs);
347 report_if(!part->sections);
348
349 Elf_Scn *section = NULL;
350 while ((section = elf_nextscn(part->elf, section))) {
351 Elf64_Shdr *shdr = elf64_getshdr(section);
352 struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
353 s->name = elf_strptr(part->elf, section_str_index, shdr->sh_name);
354 report_elf_if(!s->name);
355
356 /* Cannot actually handle linked objects yet */
357 report_elf_if(shdr->sh_addr != 0);
358
359 /* Alignment must be 0 or a power of two */
360 report_elf_if(shdr->sh_addralign & (shdr->sh_addralign - 1));
361 uint64_t sh_align = MAX2(shdr->sh_addralign, 1);
362
363 if (shdr->sh_flags & SHF_ALLOC &&
364 shdr->sh_type != SHT_NOTE) {
365 report_if(shdr->sh_flags & SHF_WRITE);
366
367 s->is_rx = true;
368
369 if (shdr->sh_flags & SHF_EXECINSTR) {
370 report_elf_if(shdr->sh_size & 3);
371
372 if (!strcmp(s->name, ".text"))
373 s->is_pasted_text = true;
374
375 exec_size += shdr->sh_size;
376 }
377
378 if (s->is_pasted_text) {
379 s->offset = pasted_text_size;
380 pasted_text_size += shdr->sh_size;
381 } else {
382 rx_align = align(rx_align, sh_align);
383 rx_size = align(rx_size, sh_align);
384 s->offset = rx_size;
385 rx_size += shdr->sh_size;
386 }
387 } else if (shdr->sh_type == SHT_SYMTAB) {
388 if (!read_private_lds_symbols(binary, part_idx, section, &lds_end_align))
389 goto fail;
390 }
391 }
392
393 uint64_t part_lds_size = shared_lds_size;
394 if (!layout_symbols(
395 util_dynarray_element(&binary->lds_symbols, struct ac_rtld_symbol, part_lds_symbols_begin),
396 util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol) - part_lds_symbols_begin,
397 &part_lds_size))
398 goto fail;
399 binary->lds_size = MAX2(binary->lds_size, part_lds_size);
400 }
401
402 binary->rx_end_markers = pasted_text_size;
403 pasted_text_size += 4 * DEBUGGER_NUM_MARKERS;
404
405 /* __lds_end is a special symbol that points at the end of the memory
406 * occupied by other LDS symbols. Its alignment is taken as the
407 * maximum of its alignment over all shader parts where it occurs.
408 */
409 if (lds_end_align) {
410 binary->lds_size = align(binary->lds_size, lds_end_align);
411
412 struct ac_rtld_symbol *lds_end =
413 util_dynarray_grow(&binary->lds_symbols, struct ac_rtld_symbol, 1);
414 lds_end->name = "__lds_end";
415 lds_end->size = 0;
416 lds_end->align = lds_end_align;
417 lds_end->offset = binary->lds_size;
418 lds_end->part_idx = ~0u;
419 }
420
421 if (binary->lds_size > max_lds_size) {
422 fprintf(stderr, "ac_rtld error(2): too much LDS (used = %u, max = %u)\n",
423 (unsigned)binary->lds_size, max_lds_size);
424 goto fail;
425 }
426
427 /* Second pass: Adjust offsets of non-pasted text sections. */
428 binary->rx_size = pasted_text_size;
429 binary->rx_size = align(binary->rx_size, rx_align);
430
431 for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
432 struct ac_rtld_part *part = &binary->parts[part_idx];
433 size_t num_shdrs;
434 elf_getshdrnum(part->elf, &num_shdrs);
435
436 for (unsigned j = 0; j < num_shdrs; ++j) {
437 struct ac_rtld_section *s = &part->sections[j];
438 if (s->is_rx && !s->is_pasted_text)
439 s->offset += binary->rx_size;
440 }
441 }
442
443 binary->rx_size += rx_size;
444 binary->exec_size = exec_size;
445
446 if (i.info->chip_class >= GFX10) {
447 /* In gfx10, the SQ fetches up to 3 cache lines of 16 dwords
448 * ahead of the PC, configurable by SH_MEM_CONFIG and
449 * S_INST_PREFETCH. This can cause two issues:
450 *
451 * (1) Crossing a page boundary to an unmapped page. The logic
452 * does not distinguish between a required fetch and a "mere"
453 * prefetch and will fault.
454 *
455 * (2) Prefetching instructions that will be changed for a
456 * different shader.
457 *
458 * (2) is not currently an issue because we flush the I$ at IB
459 * boundaries, but (1) needs to be addressed. Due to buffer
460 * suballocation, we just play it safe.
461 */
462 binary->rx_size = align(binary->rx_size + 3 * 64, 64);
463 }
464
465 return true;
466
467 #undef report_if
468 #undef report_elf_if
469
470 fail:
471 ac_rtld_close(binary);
472 return false;
473 }
474
475 void ac_rtld_close(struct ac_rtld_binary *binary)
476 {
477 for (unsigned i = 0; i < binary->num_parts; ++i) {
478 struct ac_rtld_part *part = &binary->parts[i];
479 free(part->sections);
480 elf_end(part->elf);
481 }
482
483 util_dynarray_fini(&binary->lds_symbols);
484 free(binary->parts);
485 binary->parts = NULL;
486 binary->num_parts = 0;
487 }
488
489 static bool get_section_by_name(struct ac_rtld_part *part, const char *name,
490 const char **data, size_t *nbytes)
491 {
492 for (unsigned i = 0; i < part->num_sections; ++i) {
493 struct ac_rtld_section *s = &part->sections[i];
494 if (s->name && !strcmp(name, s->name)) {
495 Elf_Scn *target_scn = elf_getscn(part->elf, i);
496 Elf_Data *target_data = elf_getdata(target_scn, NULL);
497 if (!target_data) {
498 report_elf_errorf("ac_rtld: get_section_by_name: elf_getdata");
499 return false;
500 }
501
502 *data = target_data->d_buf;
503 *nbytes = target_data->d_size;
504 return true;
505 }
506 }
507 return false;
508 }
509
510 bool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name,
511 const char **data, size_t *nbytes)
512 {
513 assert(binary->num_parts == 1);
514 return get_section_by_name(&binary->parts[0], name, data, nbytes);
515 }
516
517 bool ac_rtld_read_config(struct ac_rtld_binary *binary,
518 struct ac_shader_config *config)
519 {
520 for (unsigned i = 0; i < binary->num_parts; ++i) {
521 struct ac_rtld_part *part = &binary->parts[i];
522 const char *config_data;
523 size_t config_nbytes;
524
525 if (!get_section_by_name(part, ".AMDGPU.config",
526 &config_data, &config_nbytes))
527 return false;
528
529 /* TODO: be precise about scratch use? */
530 struct ac_shader_config c = {};
531 ac_parse_shader_binary_config(config_data, config_nbytes,
532 binary->wave_size, true, &c);
533
534 config->num_sgprs = MAX2(config->num_sgprs, c.num_sgprs);
535 config->num_vgprs = MAX2(config->num_vgprs, c.num_vgprs);
536 config->spilled_sgprs = MAX2(config->spilled_sgprs, c.spilled_sgprs);
537 config->spilled_vgprs = MAX2(config->spilled_vgprs, c.spilled_vgprs);
538 config->scratch_bytes_per_wave = MAX2(config->scratch_bytes_per_wave,
539 c.scratch_bytes_per_wave);
540
541 assert(i == 0 || config->float_mode == c.float_mode);
542 config->float_mode = c.float_mode;
543
544 /* SPI_PS_INPUT_ENA/ADDR can't be combined. Only the value from
545 * the main shader part is used. */
546 assert(config->spi_ps_input_ena == 0 &&
547 config->spi_ps_input_addr == 0);
548 config->spi_ps_input_ena = c.spi_ps_input_ena;
549 config->spi_ps_input_addr = c.spi_ps_input_addr;
550
551 /* TODO: consistently use LDS symbols for this */
552 config->lds_size = MAX2(config->lds_size, c.lds_size);
553
554 /* TODO: Should we combine these somehow? It's currently only
555 * used for radeonsi's compute, where multiple parts aren't used. */
556 assert(config->rsrc1 == 0 && config->rsrc2 == 0);
557 config->rsrc1 = c.rsrc1;
558 config->rsrc2 = c.rsrc2;
559 }
560
561 return true;
562 }
563
564 static bool resolve_symbol(const struct ac_rtld_upload_info *u,
565 unsigned part_idx, const Elf64_Sym *sym,
566 const char *name, uint64_t *value)
567 {
568 /* TODO: properly disentangle the undef and the LDS cases once
569 * STT_AMDGPU_LDS is retired. */
570 if (sym->st_shndx == SHN_UNDEF || sym->st_shndx == SHN_AMDGPU_LDS) {
571 const struct ac_rtld_symbol *lds_sym =
572 find_symbol(&u->binary->lds_symbols, name, part_idx);
573
574 if (lds_sym) {
575 *value = lds_sym->offset;
576 return true;
577 }
578
579 /* TODO: resolve from other parts */
580
581 if (u->get_external_symbol(u->cb_data, name, value))
582 return true;
583
584 report_errorf("symbol %s: unknown", name);
585 return false;
586 }
587
588 struct ac_rtld_part *part = &u->binary->parts[part_idx];
589 if (sym->st_shndx >= part->num_sections) {
590 report_errorf("symbol %s: section out of bounds", name);
591 return false;
592 }
593
594 struct ac_rtld_section *s = &part->sections[sym->st_shndx];
595 if (!s->is_rx) {
596 report_errorf("symbol %s: bad section", name);
597 return false;
598 }
599
600 uint64_t section_base = u->rx_va + s->offset;
601
602 *value = section_base + sym->st_value;
603 return true;
604 }
605
606 static bool apply_relocs(const struct ac_rtld_upload_info *u,
607 unsigned part_idx, const Elf64_Shdr *reloc_shdr,
608 const Elf_Data *reloc_data)
609 {
610 #define report_if(cond) \
611 do { \
612 if ((cond)) { \
613 report_errorf(#cond); \
614 return false; \
615 } \
616 } while (false)
617 #define report_elf_if(cond) \
618 do { \
619 if ((cond)) { \
620 report_elf_errorf(#cond); \
621 return false; \
622 } \
623 } while (false)
624
625 struct ac_rtld_part *part = &u->binary->parts[part_idx];
626 Elf_Scn *target_scn = elf_getscn(part->elf, reloc_shdr->sh_info);
627 report_elf_if(!target_scn);
628
629 Elf_Data *target_data = elf_getdata(target_scn, NULL);
630 report_elf_if(!target_data);
631
632 Elf_Scn *symbols_scn = elf_getscn(part->elf, reloc_shdr->sh_link);
633 report_elf_if(!symbols_scn);
634
635 Elf64_Shdr *symbols_shdr = elf64_getshdr(symbols_scn);
636 report_elf_if(!symbols_shdr);
637 uint32_t strtabidx = symbols_shdr->sh_link;
638
639 Elf_Data *symbols_data = elf_getdata(symbols_scn, NULL);
640 report_elf_if(!symbols_data);
641
642 const Elf64_Sym *symbols = symbols_data->d_buf;
643 size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
644
645 struct ac_rtld_section *s = &part->sections[reloc_shdr->sh_info];
646 report_if(!s->is_rx);
647
648 const char *orig_base = target_data->d_buf;
649 char *dst_base = u->rx_ptr + s->offset;
650 uint64_t va_base = u->rx_va + s->offset;
651
652 Elf64_Rel *rel = reloc_data->d_buf;
653 size_t num_relocs = reloc_data->d_size / sizeof(*rel);
654 for (size_t i = 0; i < num_relocs; ++i, ++rel) {
655 size_t r_sym = ELF64_R_SYM(rel->r_info);
656 unsigned r_type = ELF64_R_TYPE(rel->r_info);
657
658 const char *orig_ptr = orig_base + rel->r_offset;
659 char *dst_ptr = dst_base + rel->r_offset;
660 uint64_t va = va_base + rel->r_offset;
661
662 uint64_t symbol;
663 uint64_t addend;
664
665 if (r_sym == STN_UNDEF) {
666 symbol = 0;
667 } else {
668 report_elf_if(r_sym >= num_symbols);
669
670 const Elf64_Sym *sym = &symbols[r_sym];
671 const char *symbol_name =
672 elf_strptr(part->elf, strtabidx, sym->st_name);
673 report_elf_if(!symbol_name);
674
675 if (!resolve_symbol(u, part_idx, sym, symbol_name, &symbol))
676 return false;
677 }
678
679 /* TODO: Should we also support .rela sections, where the
680 * addend is part of the relocation record? */
681
682 /* Load the addend from the ELF instead of the destination,
683 * because the destination may be in VRAM. */
684 switch (r_type) {
685 case R_AMDGPU_ABS32:
686 case R_AMDGPU_ABS32_LO:
687 case R_AMDGPU_ABS32_HI:
688 case R_AMDGPU_REL32:
689 case R_AMDGPU_REL32_LO:
690 case R_AMDGPU_REL32_HI:
691 addend = *(const uint32_t *)orig_ptr;
692 break;
693 case R_AMDGPU_ABS64:
694 case R_AMDGPU_REL64:
695 addend = *(const uint64_t *)orig_ptr;
696 break;
697 default:
698 report_errorf("unsupported r_type == %u", r_type);
699 return false;
700 }
701
702 uint64_t abs = symbol + addend;
703
704 switch (r_type) {
705 case R_AMDGPU_ABS32:
706 assert((uint32_t)abs == abs);
707 case R_AMDGPU_ABS32_LO:
708 *(uint32_t *)dst_ptr = util_cpu_to_le32(abs);
709 break;
710 case R_AMDGPU_ABS32_HI:
711 *(uint32_t *)dst_ptr = util_cpu_to_le32(abs >> 32);
712 break;
713 case R_AMDGPU_ABS64:
714 *(uint64_t *)dst_ptr = util_cpu_to_le64(abs);
715 break;
716 case R_AMDGPU_REL32:
717 assert((int64_t)(int32_t)(abs - va) == (int64_t)(abs - va));
718 case R_AMDGPU_REL32_LO:
719 *(uint32_t *)dst_ptr = util_cpu_to_le32(abs - va);
720 break;
721 case R_AMDGPU_REL32_HI:
722 *(uint32_t *)dst_ptr = util_cpu_to_le32((abs - va) >> 32);
723 break;
724 case R_AMDGPU_REL64:
725 *(uint64_t *)dst_ptr = util_cpu_to_le64(abs - va);
726 break;
727 default:
728 unreachable("bad r_type");
729 }
730 }
731
732 return true;
733
734 #undef report_if
735 #undef report_elf_if
736 }
737
738 /**
739 * Upload the binary or binaries to the provided GPU buffers, including
740 * relocations.
741 */
742 bool ac_rtld_upload(struct ac_rtld_upload_info *u)
743 {
744 #define report_if(cond) \
745 do { \
746 if ((cond)) { \
747 report_errorf(#cond); \
748 return false; \
749 } \
750 } while (false)
751 #define report_elf_if(cond) \
752 do { \
753 if ((cond)) { \
754 report_errorf(#cond); \
755 return false; \
756 } \
757 } while (false)
758
759 if (u->binary->options.halt_at_entry) {
760 /* s_sethalt 1 */
761 *(uint32_t *)u->rx_ptr = util_cpu_to_le32(0xbf8d0001);
762 }
763
764 /* First pass: upload raw section data and lay out private LDS symbols. */
765 for (unsigned i = 0; i < u->binary->num_parts; ++i) {
766 struct ac_rtld_part *part = &u->binary->parts[i];
767
768 Elf_Scn *section = NULL;
769 while ((section = elf_nextscn(part->elf, section))) {
770 Elf64_Shdr *shdr = elf64_getshdr(section);
771 struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
772
773 if (!s->is_rx)
774 continue;
775
776 report_if(shdr->sh_type != SHT_PROGBITS);
777
778 Elf_Data *data = elf_getdata(section, NULL);
779 report_elf_if(!data || data->d_size != shdr->sh_size);
780 memcpy(u->rx_ptr + s->offset, data->d_buf, shdr->sh_size);
781 }
782 }
783
784 if (u->binary->rx_end_markers) {
785 uint32_t *dst = (uint32_t *)(u->rx_ptr + u->binary->rx_end_markers);
786 for (unsigned i = 0; i < DEBUGGER_NUM_MARKERS; ++i)
787 *dst++ = util_cpu_to_le32(DEBUGGER_END_OF_CODE_MARKER);
788 }
789
790 /* Second pass: handle relocations, overwriting uploaded data where
791 * appropriate. */
792 for (unsigned i = 0; i < u->binary->num_parts; ++i) {
793 struct ac_rtld_part *part = &u->binary->parts[i];
794 Elf_Scn *section = NULL;
795 while ((section = elf_nextscn(part->elf, section))) {
796 Elf64_Shdr *shdr = elf64_getshdr(section);
797 if (shdr->sh_type == SHT_REL) {
798 Elf_Data *relocs = elf_getdata(section, NULL);
799 report_elf_if(!relocs || relocs->d_size != shdr->sh_size);
800 if (!apply_relocs(u, i, shdr, relocs))
801 return false;
802 } else if (shdr->sh_type == SHT_RELA) {
803 report_errorf("SHT_RELA not supported");
804 return false;
805 }
806 }
807 }
808
809 return true;
810
811 #undef report_if
812 #undef report_elf_if
813 }