ac/rtld: check correct LDS max size
[mesa.git] / src / amd / common / ac_rtld.c
1 /*
2 * Copyright 2014-2019 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "ac_rtld.h"
25
26 #include <gelf.h>
27 #include <libelf.h>
28 #include <stdarg.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32
33 #include "ac_binary.h"
34 #include "ac_gpu_info.h"
35 #include "util/u_dynarray.h"
36 #include "util/u_math.h"
37
38 // Old distributions may not have this enum constant
39 #define MY_EM_AMDGPU 224
40
41 #ifndef STT_AMDGPU_LDS
42 #define STT_AMDGPU_LDS 13
43 #endif
44
45 #ifndef R_AMDGPU_NONE
46 #define R_AMDGPU_NONE 0
47 #define R_AMDGPU_ABS32_LO 1
48 #define R_AMDGPU_ABS32_HI 2
49 #define R_AMDGPU_ABS64 3
50 #define R_AMDGPU_REL32 4
51 #define R_AMDGPU_REL64 5
52 #define R_AMDGPU_ABS32 6
53 #define R_AMDGPU_GOTPCREL 7
54 #define R_AMDGPU_GOTPCREL32_LO 8
55 #define R_AMDGPU_GOTPCREL32_HI 9
56 #define R_AMDGPU_REL32_LO 10
57 #define R_AMDGPU_REL32_HI 11
58 #define R_AMDGPU_RELATIVE64 13
59 #endif
60
61 /* For the UMR disassembler. */
62 #define DEBUGGER_END_OF_CODE_MARKER 0xbf9f0000 /* invalid instruction */
63 #define DEBUGGER_NUM_MARKERS 5
64
65 struct ac_rtld_section {
66 bool is_rx : 1;
67 bool is_pasted_text : 1;
68 uint64_t offset;
69 const char *name;
70 };
71
72 struct ac_rtld_part {
73 Elf *elf;
74 struct ac_rtld_section *sections;
75 unsigned num_sections;
76 };
77
78 static void report_erroraf(const char *fmt, va_list va)
79 {
80 char *msg;
81 int ret = asprintf(&msg, fmt, va);
82 if (ret < 0)
83 msg = "(asprintf failed)";
84
85 fprintf(stderr, "ac_rtld error: %s\n", msg);
86
87 if (ret >= 0)
88 free(msg);
89 }
90
91 static void report_errorf(const char *fmt, ...) PRINTFLIKE(1, 2);
92
93 static void report_errorf(const char *fmt, ...)
94 {
95 va_list va;
96 va_start(va, fmt);
97 report_erroraf(fmt, va);
98 va_end(va);
99 }
100
101 static void report_elf_errorf(const char *fmt, ...) PRINTFLIKE(1, 2);
102
103 static void report_elf_errorf(const char *fmt, ...)
104 {
105 va_list va;
106 va_start(va, fmt);
107 report_erroraf(fmt, va);
108 va_end(va);
109
110 fprintf(stderr, "ELF error: %s\n", elf_errmsg(elf_errno()));
111 }
112
113 /**
114 * Find a symbol in a dynarray of struct ac_rtld_symbol by \p name and shader
115 * \p part_idx.
116 */
117 static const struct ac_rtld_symbol *find_symbol(const struct util_dynarray *symbols,
118 const char *name, unsigned part_idx)
119 {
120 util_dynarray_foreach(symbols, struct ac_rtld_symbol, symbol) {
121 if ((symbol->part_idx == ~0u || symbol->part_idx == part_idx) &&
122 !strcmp(name, symbol->name))
123 return symbol;
124 }
125 return 0;
126 }
127
128 static int compare_symbol_by_align(const void *lhsp, const void *rhsp)
129 {
130 const struct ac_rtld_symbol *lhs = lhsp;
131 const struct ac_rtld_symbol *rhs = rhsp;
132 if (rhs->align > lhs->align)
133 return 1;
134 if (rhs->align < lhs->align)
135 return -1;
136 return 0;
137 }
138
139 /**
140 * Sort the given symbol list by decreasing alignment and assign offsets.
141 */
142 static bool layout_symbols(struct ac_rtld_symbol *symbols, unsigned num_symbols,
143 uint64_t *ptotal_size)
144 {
145 qsort(symbols, num_symbols, sizeof(*symbols), compare_symbol_by_align);
146
147 uint64_t total_size = *ptotal_size;
148
149 for (unsigned i = 0; i < num_symbols; ++i) {
150 struct ac_rtld_symbol *s = &symbols[i];
151 assert(util_is_power_of_two_nonzero(s->align));
152
153 total_size = align64(total_size, s->align);
154 s->offset = total_size;
155
156 if (total_size + s->size < total_size) {
157 report_errorf("%s: size overflow", __FUNCTION__);
158 return false;
159 }
160
161 total_size += s->size;
162 }
163
164 *ptotal_size = total_size;
165 return true;
166 }
167
168 /**
169 * Read LDS symbols from the given \p section of the ELF of \p part and append
170 * them to the LDS symbols list.
171 *
172 * Shared LDS symbols are filtered out.
173 */
174 static bool read_private_lds_symbols(struct ac_rtld_binary *binary,
175 unsigned part_idx,
176 Elf_Scn *section,
177 uint32_t *lds_end_align)
178 {
179 #define report_elf_if(cond) \
180 do { \
181 if ((cond)) { \
182 report_errorf(#cond); \
183 return false; \
184 } \
185 } while (false)
186
187 struct ac_rtld_part *part = &binary->parts[part_idx];
188 Elf64_Shdr *shdr = elf64_getshdr(section);
189 uint32_t strtabidx = shdr->sh_link;
190 Elf_Data *symbols_data = elf_getdata(section, NULL);
191 report_elf_if(!symbols_data);
192
193 const Elf64_Sym *symbol = symbols_data->d_buf;
194 size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
195
196 for (size_t j = 0; j < num_symbols; ++j, ++symbol) {
197 if (ELF64_ST_TYPE(symbol->st_info) != STT_AMDGPU_LDS)
198 continue;
199
200 report_elf_if(symbol->st_size > 1u << 29);
201
202 struct ac_rtld_symbol s = {};
203 s.name = elf_strptr(part->elf, strtabidx, symbol->st_name);
204 s.size = symbol->st_size;
205 s.align = MIN2(1u << (symbol->st_other >> 3), 1u << 16);
206 s.part_idx = part_idx;
207
208 if (!strcmp(s.name, "__lds_end")) {
209 report_elf_if(s.size != 0);
210 *lds_end_align = MAX2(*lds_end_align, s.align);
211 continue;
212 }
213
214 const struct ac_rtld_symbol *shared =
215 find_symbol(&binary->lds_symbols, s.name, part_idx);
216 if (shared) {
217 report_elf_if(s.align > shared->align);
218 report_elf_if(s.size > shared->size);
219 continue;
220 }
221
222 util_dynarray_append(&binary->lds_symbols, struct ac_rtld_symbol, s);
223 }
224
225 return true;
226
227 #undef report_elf_if
228 }
229
230 /**
231 * Open a binary consisting of one or more shader parts.
232 *
233 * \param binary the uninitialized struct
234 * \param i binary opening parameters
235 */
236 bool ac_rtld_open(struct ac_rtld_binary *binary,
237 struct ac_rtld_open_info i)
238 {
239 /* One of the libelf implementations
240 * (http://www.mr511.de/software/english.htm) requires calling
241 * elf_version() before elf_memory().
242 */
243 elf_version(EV_CURRENT);
244
245 memset(binary, 0, sizeof(*binary));
246 memcpy(&binary->options, &i.options, sizeof(binary->options));
247 binary->num_parts = i.num_parts;
248 binary->parts = calloc(sizeof(*binary->parts), i.num_parts);
249 if (!binary->parts)
250 return false;
251
252 uint64_t pasted_text_size = 0;
253 uint64_t rx_align = 1;
254 uint64_t rx_size = 0;
255
256 #define report_if(cond) \
257 do { \
258 if ((cond)) { \
259 report_errorf(#cond); \
260 goto fail; \
261 } \
262 } while (false)
263 #define report_elf_if(cond) \
264 do { \
265 if ((cond)) { \
266 report_elf_errorf(#cond); \
267 goto fail; \
268 } \
269 } while (false)
270
271 /* Copy and layout shared LDS symbols. */
272 if (i.num_shared_lds_symbols) {
273 if (!util_dynarray_resize(&binary->lds_symbols, struct ac_rtld_symbol,
274 i.num_shared_lds_symbols))
275 goto fail;
276
277 memcpy(binary->lds_symbols.data, i.shared_lds_symbols, binary->lds_symbols.size);
278 }
279
280 util_dynarray_foreach(&binary->lds_symbols, struct ac_rtld_symbol, symbol)
281 symbol->part_idx = ~0u;
282
283 unsigned max_lds_size = 64 * 1024;
284
285 if (i.info->chip_class == GFX6 ||
286 (i.shader_type != MESA_SHADER_COMPUTE &&
287 i.shader_type != MESA_SHADER_FRAGMENT))
288 max_lds_size = 32 * 1024;
289
290 uint64_t shared_lds_size = 0;
291 if (!layout_symbols(binary->lds_symbols.data, i.num_shared_lds_symbols, &shared_lds_size))
292 goto fail;
293 report_if(shared_lds_size > max_lds_size);
294 binary->lds_size = shared_lds_size;
295
296 /* First pass over all parts: open ELFs, pre-determine the placement of
297 * sections in the memory image, and collect and layout private LDS symbols. */
298 uint32_t lds_end_align = 0;
299
300 if (binary->options.halt_at_entry)
301 pasted_text_size += 4;
302
303 for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
304 struct ac_rtld_part *part = &binary->parts[part_idx];
305 unsigned part_lds_symbols_begin =
306 util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol);
307
308 part->elf = elf_memory((char *)i.elf_ptrs[part_idx], i.elf_sizes[part_idx]);
309 report_elf_if(!part->elf);
310
311 const Elf64_Ehdr *ehdr = elf64_getehdr(part->elf);
312 report_elf_if(!ehdr);
313 report_if(ehdr->e_machine != MY_EM_AMDGPU);
314
315 size_t section_str_index;
316 size_t num_shdrs;
317 report_elf_if(elf_getshdrstrndx(part->elf, &section_str_index) < 0);
318 report_elf_if(elf_getshdrnum(part->elf, &num_shdrs) < 0);
319
320 part->num_sections = num_shdrs;
321 part->sections = calloc(sizeof(*part->sections), num_shdrs);
322 report_if(!part->sections);
323
324 Elf_Scn *section = NULL;
325 while ((section = elf_nextscn(part->elf, section))) {
326 Elf64_Shdr *shdr = elf64_getshdr(section);
327 struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
328 s->name = elf_strptr(part->elf, section_str_index, shdr->sh_name);
329 report_elf_if(!s->name);
330
331 /* Cannot actually handle linked objects yet */
332 report_elf_if(shdr->sh_addr != 0);
333
334 /* Alignment must be 0 or a power of two */
335 report_elf_if(shdr->sh_addralign & (shdr->sh_addralign - 1));
336 uint64_t sh_align = MAX2(shdr->sh_addralign, 1);
337
338 if (shdr->sh_flags & SHF_ALLOC &&
339 shdr->sh_type != SHT_NOTE) {
340 report_if(shdr->sh_flags & SHF_WRITE);
341
342 s->is_rx = true;
343
344 if (shdr->sh_flags & SHF_EXECINSTR) {
345 report_elf_if(shdr->sh_size & 3);
346
347 if (!strcmp(s->name, ".text"))
348 s->is_pasted_text = true;
349 }
350
351 if (s->is_pasted_text) {
352 s->offset = pasted_text_size;
353 pasted_text_size += shdr->sh_size;
354 } else {
355 rx_align = align(rx_align, sh_align);
356 rx_size = align(rx_size, sh_align);
357 s->offset = rx_size;
358 rx_size += shdr->sh_size;
359 }
360 } else if (shdr->sh_type == SHT_SYMTAB) {
361 if (!read_private_lds_symbols(binary, part_idx, section, &lds_end_align))
362 goto fail;
363 }
364 }
365
366 uint64_t part_lds_size = shared_lds_size;
367 if (!layout_symbols(
368 util_dynarray_element(&binary->lds_symbols, struct ac_rtld_symbol, part_lds_symbols_begin),
369 util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol) - part_lds_symbols_begin,
370 &part_lds_size))
371 goto fail;
372 binary->lds_size = MAX2(binary->lds_size, part_lds_size);
373 }
374
375 binary->rx_end_markers = pasted_text_size;
376 pasted_text_size += 4 * DEBUGGER_NUM_MARKERS;
377
378 /* __lds_end is a special symbol that points at the end of the memory
379 * occupied by other LDS symbols. Its alignment is taken as the
380 * maximum of its alignment over all shader parts where it occurs.
381 */
382 if (lds_end_align) {
383 binary->lds_size = align(binary->lds_size, lds_end_align);
384
385 struct ac_rtld_symbol *lds_end =
386 util_dynarray_grow(&binary->lds_symbols, struct ac_rtld_symbol, 1);
387 lds_end->name = "__lds_end";
388 lds_end->size = 0;
389 lds_end->align = lds_end_align;
390 lds_end->offset = binary->lds_size;
391 lds_end->part_idx = ~0u;
392 }
393
394 report_elf_if(binary->lds_size > max_lds_size);
395
396 /* Second pass: Adjust offsets of non-pasted text sections. */
397 binary->rx_size = pasted_text_size;
398 binary->rx_size = align(binary->rx_size, rx_align);
399
400 for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
401 struct ac_rtld_part *part = &binary->parts[part_idx];
402 size_t num_shdrs;
403 elf_getshdrnum(part->elf, &num_shdrs);
404
405 for (unsigned j = 0; j < num_shdrs; ++j) {
406 struct ac_rtld_section *s = &part->sections[j];
407 if (s->is_rx && !s->is_pasted_text)
408 s->offset += binary->rx_size;
409 }
410 }
411
412 binary->rx_size += rx_size;
413
414 return true;
415
416 #undef report_if
417 #undef report_elf_if
418
419 fail:
420 ac_rtld_close(binary);
421 return false;
422 }
423
424 void ac_rtld_close(struct ac_rtld_binary *binary)
425 {
426 for (unsigned i = 0; i < binary->num_parts; ++i) {
427 struct ac_rtld_part *part = &binary->parts[i];
428 free(part->sections);
429 elf_end(part->elf);
430 }
431
432 util_dynarray_fini(&binary->lds_symbols);
433 free(binary->parts);
434 binary->parts = NULL;
435 binary->num_parts = 0;
436 }
437
438 static bool get_section_by_name(struct ac_rtld_part *part, const char *name,
439 const char **data, size_t *nbytes)
440 {
441 for (unsigned i = 0; i < part->num_sections; ++i) {
442 struct ac_rtld_section *s = &part->sections[i];
443 if (s->name && !strcmp(name, s->name)) {
444 Elf_Scn *target_scn = elf_getscn(part->elf, i);
445 Elf_Data *target_data = elf_getdata(target_scn, NULL);
446 if (!target_data) {
447 report_elf_errorf("ac_rtld: get_section_by_name: elf_getdata");
448 return false;
449 }
450
451 *data = target_data->d_buf;
452 *nbytes = target_data->d_size;
453 return true;
454 }
455 }
456 return false;
457 }
458
459 bool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name,
460 const char **data, size_t *nbytes)
461 {
462 assert(binary->num_parts == 1);
463 return get_section_by_name(&binary->parts[0], name, data, nbytes);
464 }
465
466 bool ac_rtld_read_config(struct ac_rtld_binary *binary,
467 struct ac_shader_config *config)
468 {
469 for (unsigned i = 0; i < binary->num_parts; ++i) {
470 struct ac_rtld_part *part = &binary->parts[i];
471 const char *config_data;
472 size_t config_nbytes;
473
474 if (!get_section_by_name(part, ".AMDGPU.config",
475 &config_data, &config_nbytes))
476 return false;
477
478 /* TODO: be precise about scratch use? */
479 struct ac_shader_config c = {};
480 ac_parse_shader_binary_config(config_data, config_nbytes, true, &c);
481
482 config->num_sgprs = MAX2(config->num_sgprs, c.num_sgprs);
483 config->num_vgprs = MAX2(config->num_vgprs, c.num_vgprs);
484 config->spilled_sgprs = MAX2(config->spilled_sgprs, c.spilled_sgprs);
485 config->spilled_vgprs = MAX2(config->spilled_vgprs, c.spilled_vgprs);
486 config->scratch_bytes_per_wave = MAX2(config->scratch_bytes_per_wave,
487 c.scratch_bytes_per_wave);
488
489 assert(i == 0 || config->float_mode == c.float_mode);
490 config->float_mode = c.float_mode;
491
492 /* SPI_PS_INPUT_ENA/ADDR can't be combined. Only the value from
493 * the main shader part is used. */
494 assert(config->spi_ps_input_ena == 0 &&
495 config->spi_ps_input_addr == 0);
496 config->spi_ps_input_ena = c.spi_ps_input_ena;
497 config->spi_ps_input_addr = c.spi_ps_input_addr;
498
499 /* TODO: consistently use LDS symbols for this */
500 config->lds_size = MAX2(config->lds_size, c.lds_size);
501
502 /* TODO: Should we combine these somehow? It's currently only
503 * used for radeonsi's compute, where multiple parts aren't used. */
504 assert(config->rsrc1 == 0 && config->rsrc2 == 0);
505 config->rsrc1 = c.rsrc1;
506 config->rsrc2 = c.rsrc2;
507 }
508
509 return true;
510 }
511
512 static bool resolve_symbol(const struct ac_rtld_upload_info *u,
513 unsigned part_idx, const Elf64_Sym *sym,
514 const char *name, uint64_t *value)
515 {
516 if (sym->st_shndx == SHN_UNDEF) {
517 const struct ac_rtld_symbol *lds_sym =
518 find_symbol(&u->binary->lds_symbols, name, part_idx);
519
520 if (lds_sym) {
521 *value = lds_sym->offset;
522 return true;
523 }
524
525 /* TODO: resolve from other parts */
526
527 if (u->get_external_symbol(u->cb_data, name, value))
528 return true;
529
530 report_errorf("symbol %s: unknown", name);
531 return false;
532 }
533
534 struct ac_rtld_part *part = &u->binary->parts[part_idx];
535 if (sym->st_shndx >= part->num_sections) {
536 report_errorf("symbol %s: section out of bounds", name);
537 return false;
538 }
539
540 struct ac_rtld_section *s = &part->sections[sym->st_shndx];
541 if (!s->is_rx) {
542 report_errorf("symbol %s: bad section", name);
543 return false;
544 }
545
546 uint64_t section_base = u->rx_va + s->offset;
547
548 *value = section_base + sym->st_value;
549 return true;
550 }
551
552 static bool apply_relocs(const struct ac_rtld_upload_info *u,
553 unsigned part_idx, const Elf64_Shdr *reloc_shdr,
554 const Elf_Data *reloc_data)
555 {
556 #define report_if(cond) \
557 do { \
558 if ((cond)) { \
559 report_errorf(#cond); \
560 return false; \
561 } \
562 } while (false)
563 #define report_elf_if(cond) \
564 do { \
565 if ((cond)) { \
566 report_elf_errorf(#cond); \
567 return false; \
568 } \
569 } while (false)
570
571 struct ac_rtld_part *part = &u->binary->parts[part_idx];
572 Elf_Scn *target_scn = elf_getscn(part->elf, reloc_shdr->sh_info);
573 report_elf_if(!target_scn);
574
575 Elf_Data *target_data = elf_getdata(target_scn, NULL);
576 report_elf_if(!target_data);
577
578 Elf_Scn *symbols_scn = elf_getscn(part->elf, reloc_shdr->sh_link);
579 report_elf_if(!symbols_scn);
580
581 Elf64_Shdr *symbols_shdr = elf64_getshdr(symbols_scn);
582 report_elf_if(!symbols_shdr);
583 uint32_t strtabidx = symbols_shdr->sh_link;
584
585 Elf_Data *symbols_data = elf_getdata(symbols_scn, NULL);
586 report_elf_if(!symbols_data);
587
588 const Elf64_Sym *symbols = symbols_data->d_buf;
589 size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
590
591 struct ac_rtld_section *s = &part->sections[reloc_shdr->sh_info];
592 report_if(!s->is_rx);
593
594 const char *orig_base = target_data->d_buf;
595 char *dst_base = u->rx_ptr + s->offset;
596 uint64_t va_base = u->rx_va + s->offset;
597
598 Elf64_Rel *rel = reloc_data->d_buf;
599 size_t num_relocs = reloc_data->d_size / sizeof(*rel);
600 for (size_t i = 0; i < num_relocs; ++i, ++rel) {
601 size_t r_sym = ELF64_R_SYM(rel->r_info);
602 unsigned r_type = ELF64_R_TYPE(rel->r_info);
603
604 const char *orig_ptr = orig_base + rel->r_offset;
605 char *dst_ptr = dst_base + rel->r_offset;
606 uint64_t va = va_base + rel->r_offset;
607
608 uint64_t symbol;
609 uint64_t addend;
610
611 if (r_sym == STN_UNDEF) {
612 symbol = 0;
613 } else {
614 report_elf_if(r_sym >= num_symbols);
615
616 const Elf64_Sym *sym = &symbols[r_sym];
617 const char *symbol_name =
618 elf_strptr(part->elf, strtabidx, sym->st_name);
619 report_elf_if(!symbol_name);
620
621 if (!resolve_symbol(u, part_idx, sym, symbol_name, &symbol))
622 return false;
623 }
624
625 /* TODO: Should we also support .rela sections, where the
626 * addend is part of the relocation record? */
627
628 /* Load the addend from the ELF instead of the destination,
629 * because the destination may be in VRAM. */
630 switch (r_type) {
631 case R_AMDGPU_ABS32:
632 case R_AMDGPU_ABS32_LO:
633 case R_AMDGPU_ABS32_HI:
634 case R_AMDGPU_REL32:
635 case R_AMDGPU_REL32_LO:
636 case R_AMDGPU_REL32_HI:
637 addend = *(const uint32_t *)orig_ptr;
638 break;
639 case R_AMDGPU_ABS64:
640 case R_AMDGPU_REL64:
641 addend = *(const uint64_t *)orig_ptr;
642 break;
643 default:
644 report_errorf("unsupported r_type == %u", r_type);
645 return false;
646 }
647
648 uint64_t abs = symbol + addend;
649
650 switch (r_type) {
651 case R_AMDGPU_ABS32:
652 assert((uint32_t)abs == abs);
653 case R_AMDGPU_ABS32_LO:
654 *(uint32_t *)dst_ptr = util_cpu_to_le32(abs);
655 break;
656 case R_AMDGPU_ABS32_HI:
657 *(uint32_t *)dst_ptr = util_cpu_to_le32(abs >> 32);
658 break;
659 case R_AMDGPU_ABS64:
660 *(uint64_t *)dst_ptr = util_cpu_to_le64(abs);
661 break;
662 case R_AMDGPU_REL32:
663 assert((int64_t)(int32_t)(abs - va) == (int64_t)(abs - va));
664 case R_AMDGPU_REL32_LO:
665 *(uint32_t *)dst_ptr = util_cpu_to_le32(abs - va);
666 break;
667 case R_AMDGPU_REL32_HI:
668 *(uint32_t *)dst_ptr = util_cpu_to_le32((abs - va) >> 32);
669 break;
670 case R_AMDGPU_REL64:
671 *(uint64_t *)dst_ptr = util_cpu_to_le64(abs - va);
672 break;
673 default:
674 unreachable("bad r_type");
675 }
676 }
677
678 return true;
679
680 #undef report_if
681 #undef report_elf_if
682 }
683
684 /**
685 * Upload the binary or binaries to the provided GPU buffers, including
686 * relocations.
687 */
688 bool ac_rtld_upload(struct ac_rtld_upload_info *u)
689 {
690 #define report_if(cond) \
691 do { \
692 if ((cond)) { \
693 report_errorf(#cond); \
694 return false; \
695 } \
696 } while (false)
697 #define report_elf_if(cond) \
698 do { \
699 if ((cond)) { \
700 report_errorf(#cond); \
701 return false; \
702 } \
703 } while (false)
704
705 if (u->binary->options.halt_at_entry) {
706 /* s_sethalt 1 */
707 *(uint32_t *)u->rx_ptr = util_cpu_to_le32(0xbf8d0001);
708 }
709
710 /* First pass: upload raw section data and lay out private LDS symbols. */
711 for (unsigned i = 0; i < u->binary->num_parts; ++i) {
712 struct ac_rtld_part *part = &u->binary->parts[i];
713
714 Elf_Scn *section = NULL;
715 while ((section = elf_nextscn(part->elf, section))) {
716 Elf64_Shdr *shdr = elf64_getshdr(section);
717 struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
718
719 if (!s->is_rx)
720 continue;
721
722 report_if(shdr->sh_type != SHT_PROGBITS);
723
724 Elf_Data *data = elf_getdata(section, NULL);
725 report_elf_if(!data || data->d_size != shdr->sh_size);
726 memcpy(u->rx_ptr + s->offset, data->d_buf, shdr->sh_size);
727 }
728 }
729
730 if (u->binary->rx_end_markers) {
731 uint32_t *dst = (uint32_t *)(u->rx_ptr + u->binary->rx_end_markers);
732 for (unsigned i = 0; i < DEBUGGER_NUM_MARKERS; ++i)
733 *dst++ = util_cpu_to_le32(DEBUGGER_END_OF_CODE_MARKER);
734 }
735
736 /* Second pass: handle relocations, overwriting uploaded data where
737 * appropriate. */
738 for (unsigned i = 0; i < u->binary->num_parts; ++i) {
739 struct ac_rtld_part *part = &u->binary->parts[i];
740 Elf_Scn *section = NULL;
741 while ((section = elf_nextscn(part->elf, section))) {
742 Elf64_Shdr *shdr = elf64_getshdr(section);
743 if (shdr->sh_type == SHT_REL) {
744 Elf_Data *relocs = elf_getdata(section, NULL);
745 report_elf_if(!relocs || relocs->d_size != shdr->sh_size);
746 if (!apply_relocs(u, i, shdr, relocs))
747 return false;
748 } else if (shdr->sh_type == SHT_RELA) {
749 report_errorf("SHT_RELA not supported");
750 return false;
751 }
752 }
753 }
754
755 return true;
756
757 #undef report_if
758 #undef report_elf_if
759 }