996ff045b16d881c5bca2ab237fc8a36d9fc92ed
[mesa.git] / src / amd / common / ac_rtld.c
1 /*
2 * Copyright 2014-2019 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "ac_rtld.h"
25
26 #include <gelf.h>
27 #include <libelf.h>
28 #include <stdarg.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32
33 #include "ac_binary.h"
34 #include "util/u_math.h"
35
36 // Old distributions may not have this enum constant
37 #define MY_EM_AMDGPU 224
38
39 #ifndef R_AMDGPU_NONE
40 #define R_AMDGPU_NONE 0
41 #define R_AMDGPU_ABS32_LO 1
42 #define R_AMDGPU_ABS32_HI 2
43 #define R_AMDGPU_ABS64 3
44 #define R_AMDGPU_REL32 4
45 #define R_AMDGPU_REL64 5
46 #define R_AMDGPU_ABS32 6
47 #define R_AMDGPU_GOTPCREL 7
48 #define R_AMDGPU_GOTPCREL32_LO 8
49 #define R_AMDGPU_GOTPCREL32_HI 9
50 #define R_AMDGPU_REL32_LO 10
51 #define R_AMDGPU_REL32_HI 11
52 #define R_AMDGPU_RELATIVE64 13
53 #endif
54
55 /* For the UMR disassembler. */
56 #define DEBUGGER_END_OF_CODE_MARKER 0xbf9f0000 /* invalid instruction */
57 #define DEBUGGER_NUM_MARKERS 5
58
59 struct ac_rtld_section {
60 bool is_rx : 1;
61 bool is_pasted_text : 1;
62 uint64_t offset;
63 const char *name;
64 };
65
66 struct ac_rtld_part {
67 Elf *elf;
68 struct ac_rtld_section *sections;
69 unsigned num_sections;
70 };
71
72 static void report_erroraf(const char *fmt, va_list va)
73 {
74 char *msg;
75 int ret = asprintf(&msg, fmt, va);
76 if (ret < 0)
77 msg = "(asprintf failed)";
78
79 fprintf(stderr, "ac_rtld error: %s\n", msg);
80
81 if (ret >= 0)
82 free(msg);
83 }
84
85 static void report_errorf(const char *fmt, ...) PRINTFLIKE(1, 2);
86
87 static void report_errorf(const char *fmt, ...)
88 {
89 va_list va;
90 va_start(va, fmt);
91 report_erroraf(fmt, va);
92 va_end(va);
93 }
94
95 static void report_elf_errorf(const char *fmt, ...) PRINTFLIKE(1, 2);
96
97 static void report_elf_errorf(const char *fmt, ...)
98 {
99 va_list va;
100 va_start(va, fmt);
101 report_erroraf(fmt, va);
102 va_end(va);
103
104 fprintf(stderr, "ELF error: %s\n", elf_errmsg(elf_errno()));
105 }
106
107 /**
108 * Open a binary consisting of one or more shader parts.
109 *
110 * \param binary the uninitialized struct
111 * \param num_parts number of shader parts
112 * \param elf_ptrs pointers to the in-memory ELF objects for each shader part
113 * \param elf_sizes sizes (in bytes) of the in-memory ELF objects
114 */
115 bool ac_rtld_open(struct ac_rtld_binary *binary, unsigned num_parts,
116 const char * const *elf_ptrs,
117 const size_t *elf_sizes)
118 {
119 /* One of the libelf implementations
120 * (http://www.mr511.de/software/english.htm) requires calling
121 * elf_version() before elf_memory().
122 */
123 elf_version(EV_CURRENT);
124
125 memset(binary, 0, sizeof(*binary));
126 binary->num_parts = num_parts;
127 binary->parts = calloc(sizeof(*binary->parts), num_parts);
128 if (!binary->parts)
129 return false;
130
131 uint64_t pasted_text_size = 0;
132 uint64_t rx_align = 1;
133 uint64_t rx_size = 0;
134
135 #define report_if(cond) \
136 do { \
137 if ((cond)) { \
138 report_errorf(#cond); \
139 goto fail; \
140 } \
141 } while (false)
142 #define report_elf_if(cond) \
143 do { \
144 if ((cond)) { \
145 report_elf_errorf(#cond); \
146 goto fail; \
147 } \
148 } while (false)
149
150 /* First pass over all parts: open ELFs and determine the placement of
151 * sections in the memory image. */
152 for (unsigned i = 0; i < num_parts; ++i) {
153 struct ac_rtld_part *part = &binary->parts[i];
154 part->elf = elf_memory((char *)elf_ptrs[i], elf_sizes[i]);
155 report_elf_if(!part->elf);
156
157 const Elf64_Ehdr *ehdr = elf64_getehdr(part->elf);
158 report_elf_if(!ehdr);
159 report_if(ehdr->e_machine != MY_EM_AMDGPU);
160
161 size_t section_str_index;
162 size_t num_shdrs;
163 report_elf_if(elf_getshdrstrndx(part->elf, &section_str_index) < 0);
164 report_elf_if(elf_getshdrnum(part->elf, &num_shdrs) < 0);
165
166 part->num_sections = num_shdrs;
167 part->sections = calloc(sizeof(*part->sections), num_shdrs);
168 report_if(!part->sections);
169
170 Elf_Scn *section = NULL;
171 while ((section = elf_nextscn(part->elf, section))) {
172 Elf64_Shdr *shdr = elf64_getshdr(section);
173 struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
174 s->name = elf_strptr(part->elf, section_str_index, shdr->sh_name);
175 report_elf_if(!s->name);
176
177 /* Cannot actually handle linked objects yet */
178 report_elf_if(shdr->sh_addr != 0);
179
180 /* Alignment must be 0 or a power of two */
181 report_elf_if(shdr->sh_addralign & (shdr->sh_addralign - 1));
182 uint64_t sh_align = MAX2(shdr->sh_addralign, 1);
183
184 if (shdr->sh_flags & SHF_ALLOC &&
185 shdr->sh_type != SHT_NOTE) {
186 report_if(shdr->sh_flags & SHF_WRITE);
187
188 s->is_rx = true;
189
190 if (shdr->sh_flags & SHF_EXECINSTR) {
191 report_elf_if(shdr->sh_size & 3);
192
193 if (!strcmp(s->name, ".text"))
194 s->is_pasted_text = true;
195 }
196
197 if (s->is_pasted_text) {
198 s->offset = pasted_text_size;
199 pasted_text_size += shdr->sh_size;
200 } else {
201 rx_align = align(rx_align, sh_align);
202 rx_size = align(rx_size, sh_align);
203 s->offset = rx_size;
204 rx_size += shdr->sh_size;
205 }
206 }
207 }
208 }
209
210 binary->rx_end_markers = pasted_text_size;
211 pasted_text_size += 4 * DEBUGGER_NUM_MARKERS;
212
213 /* Second pass: Adjust offsets of non-pasted text sections. */
214 binary->rx_size = pasted_text_size;
215 binary->rx_size = align(binary->rx_size, rx_align);
216
217 for (unsigned i = 0; i < num_parts; ++i) {
218 struct ac_rtld_part *part = &binary->parts[i];
219 size_t num_shdrs;
220 elf_getshdrnum(part->elf, &num_shdrs);
221
222 for (unsigned j = 0; j < num_shdrs; ++j) {
223 struct ac_rtld_section *s = &part->sections[j];
224 if (s->is_rx && !s->is_pasted_text)
225 s->offset += binary->rx_size;
226 }
227 }
228
229 binary->rx_size += rx_size;
230
231 return true;
232
233 #undef report_if
234 #undef report_elf_if
235
236 fail:
237 ac_rtld_close(binary);
238 return false;
239 }
240
241 void ac_rtld_close(struct ac_rtld_binary *binary)
242 {
243 for (unsigned i = 0; i < binary->num_parts; ++i) {
244 struct ac_rtld_part *part = &binary->parts[i];
245 free(part->sections);
246 elf_end(part->elf);
247 }
248
249 free(binary->parts);
250 binary->parts = NULL;
251 binary->num_parts = 0;
252 }
253
254 static bool get_section_by_name(struct ac_rtld_part *part, const char *name,
255 const char **data, size_t *nbytes)
256 {
257 for (unsigned i = 0; i < part->num_sections; ++i) {
258 struct ac_rtld_section *s = &part->sections[i];
259 if (s->name && !strcmp(name, s->name)) {
260 Elf_Scn *target_scn = elf_getscn(part->elf, i);
261 Elf_Data *target_data = elf_getdata(target_scn, NULL);
262 if (!target_data) {
263 report_elf_errorf("ac_rtld: get_section_by_name: elf_getdata");
264 return false;
265 }
266
267 *data = target_data->d_buf;
268 *nbytes = target_data->d_size;
269 return true;
270 }
271 }
272 return false;
273 }
274
275 bool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name,
276 const char **data, size_t *nbytes)
277 {
278 assert(binary->num_parts == 1);
279 return get_section_by_name(&binary->parts[0], name, data, nbytes);
280 }
281
282 bool ac_rtld_read_config(struct ac_rtld_binary *binary,
283 struct ac_shader_config *config)
284 {
285 for (unsigned i = 0; i < binary->num_parts; ++i) {
286 struct ac_rtld_part *part = &binary->parts[i];
287 const char *config_data;
288 size_t config_nbytes;
289
290 if (!get_section_by_name(part, ".AMDGPU.config",
291 &config_data, &config_nbytes))
292 return false;
293
294 /* TODO: be precise about scratch use? */
295 struct ac_shader_config c = {};
296 ac_parse_shader_binary_config(config_data, config_nbytes, true, &c);
297
298 config->num_sgprs = MAX2(config->num_sgprs, c.num_sgprs);
299 config->num_vgprs = MAX2(config->num_vgprs, c.num_vgprs);
300 config->spilled_sgprs = MAX2(config->spilled_sgprs, c.spilled_sgprs);
301 config->spilled_vgprs = MAX2(config->spilled_vgprs, c.spilled_vgprs);
302 config->scratch_bytes_per_wave = MAX2(config->scratch_bytes_per_wave,
303 c.scratch_bytes_per_wave);
304
305 assert(i == 0 || config->float_mode == c.float_mode);
306 config->float_mode = c.float_mode;
307
308 /* SPI_PS_INPUT_ENA/ADDR can't be combined. Only the value from
309 * the main shader part is used. */
310 assert(config->spi_ps_input_ena == 0 &&
311 config->spi_ps_input_addr == 0);
312 config->spi_ps_input_ena = c.spi_ps_input_ena;
313 config->spi_ps_input_addr = c.spi_ps_input_addr;
314
315 /* TODO: consistently use LDS symbols for this */
316 config->lds_size = MAX2(config->lds_size, c.lds_size);
317
318 /* TODO: Should we combine these somehow? It's currently only
319 * used for radeonsi's compute, where multiple parts aren't used. */
320 assert(config->rsrc1 == 0 && config->rsrc2 == 0);
321 config->rsrc1 = c.rsrc1;
322 config->rsrc2 = c.rsrc2;
323 }
324
325 return true;
326 }
327
328 static bool resolve_symbol(const struct ac_rtld_upload_info *u,
329 unsigned part_idx, const Elf64_Sym *sym,
330 const char *name, uint64_t *value)
331 {
332 if (sym->st_shndx == SHN_UNDEF) {
333 /* TODO: resolve from other parts */
334
335 if (u->get_external_symbol(u->cb_data, name, value))
336 return true;
337
338 report_errorf("symbol %s: unknown", name);
339 return false;
340 }
341
342 struct ac_rtld_part *part = &u->binary->parts[part_idx];
343 if (sym->st_shndx >= part->num_sections) {
344 report_errorf("symbol %s: section out of bounds", name);
345 return false;
346 }
347
348 struct ac_rtld_section *s = &part->sections[sym->st_shndx];
349 if (!s->is_rx) {
350 report_errorf("symbol %s: bad section", name);
351 return false;
352 }
353
354 uint64_t section_base = u->rx_va + s->offset;
355
356 *value = section_base + sym->st_value;
357 return true;
358 }
359
360 static bool apply_relocs(const struct ac_rtld_upload_info *u,
361 unsigned part_idx, const Elf64_Shdr *reloc_shdr,
362 const Elf_Data *reloc_data)
363 {
364 #define report_if(cond) \
365 do { \
366 if ((cond)) { \
367 report_errorf(#cond); \
368 return false; \
369 } \
370 } while (false)
371 #define report_elf_if(cond) \
372 do { \
373 if ((cond)) { \
374 report_elf_errorf(#cond); \
375 return false; \
376 } \
377 } while (false)
378
379 struct ac_rtld_part *part = &u->binary->parts[part_idx];
380 Elf_Scn *target_scn = elf_getscn(part->elf, reloc_shdr->sh_info);
381 report_elf_if(!target_scn);
382
383 Elf_Data *target_data = elf_getdata(target_scn, NULL);
384 report_elf_if(!target_data);
385
386 Elf_Scn *symbols_scn = elf_getscn(part->elf, reloc_shdr->sh_link);
387 report_elf_if(!symbols_scn);
388
389 Elf64_Shdr *symbols_shdr = elf64_getshdr(symbols_scn);
390 report_elf_if(!symbols_shdr);
391 uint32_t strtabidx = symbols_shdr->sh_link;
392
393 Elf_Data *symbols_data = elf_getdata(symbols_scn, NULL);
394 report_elf_if(!symbols_data);
395
396 const Elf64_Sym *symbols = symbols_data->d_buf;
397 size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
398
399 struct ac_rtld_section *s = &part->sections[reloc_shdr->sh_info];
400 report_if(!s->is_rx);
401
402 const char *orig_base = target_data->d_buf;
403 char *dst_base = u->rx_ptr + s->offset;
404 uint64_t va_base = u->rx_va + s->offset;
405
406 Elf64_Rel *rel = reloc_data->d_buf;
407 size_t num_relocs = reloc_data->d_size / sizeof(*rel);
408 for (size_t i = 0; i < num_relocs; ++i, ++rel) {
409 size_t r_sym = ELF64_R_SYM(rel->r_info);
410 unsigned r_type = ELF64_R_TYPE(rel->r_info);
411
412 const char *orig_ptr = orig_base + rel->r_offset;
413 char *dst_ptr = dst_base + rel->r_offset;
414 uint64_t va = va_base + rel->r_offset;
415
416 uint64_t symbol;
417 uint64_t addend;
418
419 if (r_sym == STN_UNDEF) {
420 symbol = 0;
421 } else {
422 report_elf_if(r_sym >= num_symbols);
423
424 const Elf64_Sym *sym = &symbols[r_sym];
425 const char *symbol_name =
426 elf_strptr(part->elf, strtabidx, sym->st_name);
427 report_elf_if(!symbol_name);
428
429 if (!resolve_symbol(u, part_idx, sym, symbol_name, &symbol))
430 return false;
431 }
432
433 /* TODO: Should we also support .rela sections, where the
434 * addend is part of the relocation record? */
435
436 /* Load the addend from the ELF instead of the destination,
437 * because the destination may be in VRAM. */
438 switch (r_type) {
439 case R_AMDGPU_ABS32:
440 case R_AMDGPU_ABS32_LO:
441 case R_AMDGPU_ABS32_HI:
442 case R_AMDGPU_REL32:
443 case R_AMDGPU_REL32_LO:
444 case R_AMDGPU_REL32_HI:
445 addend = *(const uint32_t *)orig_ptr;
446 break;
447 case R_AMDGPU_ABS64:
448 case R_AMDGPU_REL64:
449 addend = *(const uint64_t *)orig_ptr;
450 break;
451 default:
452 report_errorf("unsupported r_type == %u", r_type);
453 return false;
454 }
455
456 uint64_t abs = symbol + addend;
457
458 switch (r_type) {
459 case R_AMDGPU_ABS32:
460 assert((uint32_t)abs == abs);
461 case R_AMDGPU_ABS32_LO:
462 *(uint32_t *)dst_ptr = util_cpu_to_le32(abs);
463 break;
464 case R_AMDGPU_ABS32_HI:
465 *(uint32_t *)dst_ptr = util_cpu_to_le32(abs >> 32);
466 break;
467 case R_AMDGPU_ABS64:
468 *(uint64_t *)dst_ptr = util_cpu_to_le64(abs);
469 break;
470 case R_AMDGPU_REL32:
471 assert((int64_t)(int32_t)(abs - va) == (int64_t)(abs - va));
472 case R_AMDGPU_REL32_LO:
473 *(uint32_t *)dst_ptr = util_cpu_to_le32(abs - va);
474 break;
475 case R_AMDGPU_REL32_HI:
476 *(uint32_t *)dst_ptr = util_cpu_to_le32((abs - va) >> 32);
477 break;
478 case R_AMDGPU_REL64:
479 *(uint64_t *)dst_ptr = util_cpu_to_le64(abs - va);
480 break;
481 default:
482 unreachable("bad r_type");
483 }
484 }
485
486 return true;
487
488 #undef report_if
489 #undef report_elf_if
490 }
491
492 /**
493 * Upload the binary or binaries to the provided GPU buffers, including
494 * relocations.
495 */
496 bool ac_rtld_upload(struct ac_rtld_upload_info *u)
497 {
498 #define report_if(cond) \
499 do { \
500 if ((cond)) { \
501 report_errorf(#cond); \
502 return false; \
503 } \
504 } while (false)
505 #define report_elf_if(cond) \
506 do { \
507 if ((cond)) { \
508 report_errorf(#cond); \
509 return false; \
510 } \
511 } while (false)
512
513 /* First pass: upload raw section data. */
514 for (unsigned i = 0; i < u->binary->num_parts; ++i) {
515 struct ac_rtld_part *part = &u->binary->parts[i];
516 Elf_Scn *section = NULL;
517 while ((section = elf_nextscn(part->elf, section))) {
518 Elf64_Shdr *shdr = elf64_getshdr(section);
519 struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
520
521 if (!s->is_rx)
522 continue;
523
524 report_if(shdr->sh_type != SHT_PROGBITS);
525
526 Elf_Data *data = elf_getdata(section, NULL);
527 report_elf_if(!data || data->d_size != shdr->sh_size);
528 memcpy(u->rx_ptr + s->offset, data->d_buf, shdr->sh_size);
529 }
530 }
531
532 if (u->binary->rx_end_markers) {
533 uint32_t *dst = (uint32_t *)(u->rx_ptr + u->binary->rx_end_markers);
534 for (unsigned i = 0; i < DEBUGGER_NUM_MARKERS; ++i)
535 *dst++ = util_cpu_to_le32(DEBUGGER_END_OF_CODE_MARKER);
536 }
537
538 /* Second pass: handle relocations, overwriting uploaded data where
539 * appropriate. */
540 for (unsigned i = 0; i < u->binary->num_parts; ++i) {
541 struct ac_rtld_part *part = &u->binary->parts[i];
542 Elf_Scn *section = NULL;
543 while ((section = elf_nextscn(part->elf, section))) {
544 Elf64_Shdr *shdr = elf64_getshdr(section);
545 if (shdr->sh_type == SHT_REL) {
546 Elf_Data *relocs = elf_getdata(section, NULL);
547 report_elf_if(!relocs || relocs->d_size != shdr->sh_size);
548 if (!apply_relocs(u, i, shdr, relocs))
549 return false;
550 } else if (shdr->sh_type == SHT_RELA) {
551 report_errorf("SHT_RELA not supported");
552 return false;
553 }
554 }
555 }
556
557 return true;
558
559 #undef report_if
560 #undef report_elf_if
561 }