From a4f49061b6d921f72b2faf4843144f3c75f828f7 Mon Sep 17 00:00:00 2001 From: Andrew Stubbs Date: Thu, 9 Jul 2020 22:48:39 +0100 Subject: [PATCH] amdgcn: Handle early debug info in mkoffload Forward the early debug information from the input LTO file to the output HSACO file, in the same way lto-wrapper does. This is a little more complicated, however, because the ELF file containing the debug needs to be converted from x86_64 to amdgcn, and because the offloaded code will have less content than the host program the debug info describes. gcc/ChangeLog: * config/gcn/mkoffload.c: Include simple-object.h and elf.h. (EM_AMDGPU): New macro. (ELFOSABI_AMDGPU_HSA): New macro. (ELFABIVERSION_AMDGPU_HSA): New macro. (EF_AMDGPU_MACH_AMDGCN_GFX803): New macro. (EF_AMDGPU_MACH_AMDGCN_GFX900): New macro. (EF_AMDGPU_MACH_AMDGCN_GFX906): New macro. (R_AMDGPU_NONE): New macro. (R_AMDGPU_ABS32_LO): New macro. (R_AMDGPU_ABS32_HI): New macro. (R_AMDGPU_ABS64): New macro. (R_AMDGPU_REL32): New macro. (R_AMDGPU_REL64): New macro. (R_AMDGPU_ABS32): New macro. (R_AMDGPU_GOTPCREL): New macro. (R_AMDGPU_GOTPCREL32_LO): New macro. (R_AMDGPU_GOTPCREL32_HI): New macro. (R_AMDGPU_REL32_LO): New macro. (R_AMDGPU_REL32_HI): New macro. (reserved): New macro. (R_AMDGPU_RELATIVE64): New macro. (gcn_s1_name): Delete global variable. (gcn_s2_name): Delete global variable. (gcn_o_name): Delete global variable. (gcn_cfile_name): Delete global variable. (files_to_cleanup): New global variable. (offload_abi): New global variable. (tool_cleanup): Use files_to_cleanup, not explicit list. (copy_early_debug_info): New function. (main): New local variables gcn_s1_name, gcn_s2_name, gcn_o_name, gcn_cfile_name. Create files_to_cleanup obstack. Recognize -march options. Copy early debug info from input .o files. --- gcc/config/gcn/mkoffload.c | 272 +++++++++++++++++++++++++++++++++++-- 1 file changed, 259 insertions(+), 13 deletions(-) diff --git a/gcc/config/gcn/mkoffload.c b/gcc/config/gcn/mkoffload.c index 0415d945e72..553f25e70df 100644 --- a/gcc/config/gcn/mkoffload.c +++ b/gcc/config/gcn/mkoffload.c @@ -33,31 +33,53 @@ #include #include "collect-utils.h" #include "gomp-constants.h" +#include "simple-object.h" +#include "elf.h" + +/* These probably won't be in elf.h for a while. */ +#ifndef EM_AMDGPU +#define EM_AMDGPU 0xe0; + +#define ELFOSABI_AMDGPU_HSA 64 +#define ELFABIVERSION_AMDGPU_HSA 1 + +#define EF_AMDGPU_MACH_AMDGCN_GFX803 0x2a +#define EF_AMDGPU_MACH_AMDGCN_GFX900 0x2c +#define EF_AMDGPU_MACH_AMDGCN_GFX906 0x2f + +#define R_AMDGPU_NONE 0 +#define R_AMDGPU_ABS32_LO 1 /* (S + A) & 0xFFFFFFFF */ +#define R_AMDGPU_ABS32_HI 2 /* (S + A) >> 32 */ +#define R_AMDGPU_ABS64 3 /* S + A */ +#define R_AMDGPU_REL32 4 /* S + A - P */ +#define R_AMDGPU_REL64 5 /* S + A - P */ +#define R_AMDGPU_ABS32 6 /* S + A */ +#define R_AMDGPU_GOTPCREL 7 /* G + GOT + A - P */ +#define R_AMDGPU_GOTPCREL32_LO 8 /* (G + GOT + A - P) & 0xFFFFFFFF */ +#define R_AMDGPU_GOTPCREL32_HI 9 /* (G + GOT + A - P) >> 32 */ +#define R_AMDGPU_REL32_LO 10 /* (S + A - P) & 0xFFFFFFFF */ +#define R_AMDGPU_REL32_HI 11 /* (S + A - P) >> 32 */ +#define reserved 12 +#define R_AMDGPU_RELATIVE64 13 /* B + A */ +#endif const char tool_name[] = "gcn mkoffload"; -/* Files to unlink. */ -static const char *gcn_s1_name; -static const char *gcn_s2_name; -static const char *gcn_o_name; -static const char *gcn_cfile_name; static const char *gcn_dumpbase; +static struct obstack files_to_cleanup; enum offload_abi offload_abi = OFFLOAD_ABI_UNSET; +uint32_t elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX803; // Default GPU architecture. /* Delete tempfiles. */ void tool_cleanup (bool from_signal ATTRIBUTE_UNUSED) { - if (gcn_cfile_name) - maybe_unlink (gcn_cfile_name); - if (gcn_s1_name) - maybe_unlink (gcn_s1_name); - if (gcn_s2_name) - maybe_unlink (gcn_s2_name); - if (gcn_o_name) - maybe_unlink (gcn_o_name); + obstack_ptr_grow (&files_to_cleanup, NULL); + const char **files = XOBFINISH (&files_to_cleanup, const char **); + for (int i = 0; files[i]; i++) + maybe_unlink (files[i]); } static void @@ -204,6 +226,180 @@ access_check (const char *name, int mode) return access (name, mode); } +/* Copy the early-debug-info from the incoming LTO object to a new object + that will be linked into the output HSACO file. The host relocations + must be translated into GCN relocations, and any global undefined symbols + must be weakened (so as not to have the debug info try to pull in host + junk). + + Returns true if the file was created, false otherwise. */ + +static bool +copy_early_debug_info (const char *infile, const char *outfile) +{ + const char *errmsg; + int err; + + /* The simple_object code can handle extracting the debug sections. + This code is based on that in lto-wrapper.c. */ + int infd = open (infile, O_RDONLY | O_BINARY); + if (infd == -1) + return false; + simple_object_read *inobj = simple_object_start_read (infd, 0, + "__GNU_LTO", + &errmsg, &err); + if (!inobj) + return false; + + off_t off, len; + if (simple_object_find_section (inobj, ".gnu.debuglto_.debug_info", + &off, &len, &errmsg, &err) != 1) + { + simple_object_release_read (inobj); + close (infd); + return false; + } + + errmsg = simple_object_copy_lto_debug_sections (inobj, outfile, &err, true); + if (errmsg) + { + unlink_if_ordinary (outfile); + return false; + } + + simple_object_release_read (inobj); + close (infd); + + /* Open the file we just created for some adjustments. + The simple_object code can't do this, so we do it manually. */ + FILE *outfd = fopen (outfile, "r+b"); + if (!outfd) + return false; + + Elf64_Ehdr ehdr; + if (fread (&ehdr, sizeof (ehdr), 1, outfd) != 1) + { + fclose (outfd); + return true; + } + + /* We only support host relocations of x86_64, for now. */ + gcc_assert (ehdr.e_machine == EM_X86_64); + + /* Patch the correct elf architecture flag into the file. */ + ehdr.e_ident[7] = ELFOSABI_AMDGPU_HSA; + ehdr.e_ident[8] = ELFABIVERSION_AMDGPU_HSA; + ehdr.e_type = ET_REL; + ehdr.e_machine = EM_AMDGPU; + ehdr.e_flags = elf_arch; + + /* Load the section headers so we can walk them later. */ + Elf64_Shdr *sections = (Elf64_Shdr *)xmalloc (sizeof (Elf64_Shdr) + * ehdr.e_shnum); + if (fseek (outfd, ehdr.e_shoff, SEEK_SET) == -1 + || fread (sections, sizeof (Elf64_Shdr), ehdr.e_shnum, + outfd) != ehdr.e_shnum) + { + free (sections); + fclose (outfd); + return true; + } + + /* Convert the host relocations to target relocations. */ + for (int i = 0; i < ehdr.e_shnum; i++) + { + if (sections[i].sh_type != SHT_RELA) + continue; + + char *data = (char *)xmalloc (sections[i].sh_size); + if (fseek (outfd, sections[i].sh_offset, SEEK_SET) == -1 + || fread (data, sections[i].sh_size, 1, outfd) != 1) + { + free (data); + continue; + } + + for (size_t offset = 0; + offset < sections[i].sh_size; + offset += sections[i].sh_entsize) + { + Elf64_Rela *reloc = (Elf64_Rela *) (data + offset); + + /* Map the host relocations to GCN relocations. + Only relocations that can appear in DWARF need be handled. */ + switch (ELF64_R_TYPE (reloc->r_info)) + { + case R_X86_64_32: + case R_X86_64_32S: + reloc->r_info = R_AMDGPU_ABS32; + break; + case R_X86_64_PC32: + reloc->r_info = R_AMDGPU_REL32; + break; + case R_X86_64_PC64: + reloc->r_info = R_AMDGPU_REL64; + break; + case R_X86_64_64: + reloc->r_info = R_AMDGPU_ABS64; + break; + case R_X86_64_RELATIVE: + reloc->r_info = R_AMDGPU_RELATIVE64; + break; + default: + gcc_unreachable (); + } + } + + /* Write back our relocation changes. */ + if (fseek (outfd, sections[i].sh_offset, SEEK_SET) != -1) + fwrite (data, sections[i].sh_size, 1, outfd); + + free (data); + } + + /* Weaken any global undefined symbols that would pull in unwanted + objects. */ + for (int i = 0; i < ehdr.e_shnum; i++) + { + if (sections[i].sh_type != SHT_SYMTAB) + continue; + + char *data = (char *)xmalloc (sections[i].sh_size); + if (fseek (outfd, sections[i].sh_offset, SEEK_SET) == -1 + || fread (data, sections[i].sh_size, 1, outfd) != 1) + { + free (data); + continue; + } + + for (size_t offset = 0; + offset < sections[i].sh_size; + offset += sections[i].sh_entsize) + { + Elf64_Sym *sym = (Elf64_Sym *) (data + offset); + int type = ELF64_ST_TYPE (sym->st_info); + int bind = ELF64_ST_BIND (sym->st_info); + + if (bind == STB_GLOBAL && sym->st_shndx == 0) + sym->st_info = ELF64_ST_INFO (STB_WEAK, type); + } + + /* Write back our symbol changes. */ + if (fseek (outfd, sections[i].sh_offset, SEEK_SET) != -1) + fwrite (data, sections[i].sh_size, 1, outfd); + + free (data); + } + free (sections); + + /* Write back our header changes. */ + rewind (outfd); + fwrite (&ehdr, sizeof (ehdr), 1, outfd); + + fclose (outfd); + return true; +} + /* Parse an input assembler file, extract the offload tables etc., and output (1) the assembler code, minus the tables (which can contain problematic relocations), and (2) a C file with the offload tables @@ -538,9 +734,15 @@ main (int argc, char **argv) FILE *cfile = stdout; const char *outname = 0; + const char *gcn_s1_name; + const char *gcn_s2_name; + const char *gcn_o_name; + const char *gcn_cfile_name; + progname = "mkoffload"; diagnostic_initialize (global_dc, 0); + obstack_init (&files_to_cleanup); if (atexit (mkoffload_cleanup) != 0) fatal_error (input_location, "atexit failed"); @@ -632,7 +834,14 @@ main (int argc, char **argv) else if (strcmp (argv[i], "-dumpbase") == 0 && i + 1 < argc) dumppfx = argv[++i]; + else if (strcmp (argv[i], "-march=fiji") == 0) + elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX803; + else if (strcmp (argv[i], "-march=gfx900") == 0) + elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX900; + else if (strcmp (argv[i], "-march=gfx906") == 0) + elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX906; } + if (!(fopenacc ^ fopenmp)) fatal_error (input_location, "either -fopenacc or -fopenmp must be set"); @@ -693,6 +902,10 @@ main (int argc, char **argv) gcn_o_name = make_temp_file (".mkoffload.hsaco"); gcn_cfile_name = make_temp_file (".c"); } + obstack_ptr_grow (&files_to_cleanup, gcn_s1_name); + obstack_ptr_grow (&files_to_cleanup, gcn_s2_name); + obstack_ptr_grow (&files_to_cleanup, gcn_o_name); + obstack_ptr_grow (&files_to_cleanup, gcn_cfile_name); obstack_ptr_grow (&cc_argv_obstack, "-dumpdir"); obstack_ptr_grow (&cc_argv_obstack, ""); @@ -710,6 +923,39 @@ main (int argc, char **argv) struct obstack ld_argv_obstack; obstack_init (&ld_argv_obstack); obstack_ptr_grow (&ld_argv_obstack, driver); + + /* Extract early-debug information from the input objects. + This loop finds all the inputs that end ".o" and aren't the output. */ + int dbgcount = 0; + for (int ix = 1; ix != argc; ix++) + { + if (!strcmp (argv[ix], "-o") && ix + 1 != argc) + ++ix; + else + { + if (strcmp (argv[ix] + strlen(argv[ix]) - 2, ".o") == 0) + { + char *dbgobj; + if (save_temps) + { + char buf[10]; + sprintf (buf, "%d", dbgcount++); + dbgobj = concat (dumppfx, ".mkoffload.dbg", buf, ".o", NULL); + } + else + dbgobj = make_temp_file (".mkoffload.dbg.o"); + + /* If the copy fails then just ignore it. */ + if (copy_early_debug_info (argv[ix], dbgobj)) + { + obstack_ptr_grow (&ld_argv_obstack, dbgobj); + obstack_ptr_grow (&files_to_cleanup, dbgobj); + } + else + free (dbgobj); + } + } + } obstack_ptr_grow (&ld_argv_obstack, gcn_s2_name); obstack_ptr_grow (&ld_argv_obstack, "-lgomp"); -- 2.30.2