From 49ea9b450bb1ca97f6e40c420c8cde5878e11048 Mon Sep 17 00:00:00 2001 From: Nick Alcock Date: Sat, 20 Jul 2019 14:44:44 +0100 Subject: [PATCH] libctf: add CU-mapping machinery Once the deduplicator is capable of actually detecting conflicting types with the same name (i.e., not yet) we will place such conflicting types, and types that depend on them, into CTF dictionaries that are the child of the main dictionary we usually emit: currently, this will lead to the .ctf section becoming a CTF archive rather than a single dictionary, with the default-named archive member (_CTF_SECTION, or NULL) being the main shared dictionary with most of the types in it. By default, the sections are named after the compilation unit they come from (complete path and all), with the cuname field in the CTF header providing further evidence of the name without requiring the caller to engage in tiresome parsing. But some callers may not wish the mapping from input CU to output sub-dictionary to be purely CU-based. The machinery here allows this to be freely changed, in two ways: - callers can call ctf_link_add_cu_mapping to specify that a single input compilation unit should have its types placed in some other CU if they conflict: the CU will always be created, even if empty, so the consuming program can depend on its existence. You can map multiple input CUs to one output CU to force all their types to be merged together: if some of *those* types conflict, the behaviour is currently unspecified (the new deduplicator will specify it). - callers can call ctf_link_set_memb_name_changer to provide a function which is passed every CTF sub-dictionary name in turn (including _CTF_SECTION) and can return a new name, or NULL if no change is desired. The mapping from input to output names should not map two input names to the same output name: if this happens, the two are not merged but will result in an archive with two members with the same name (technically valid, but it's hard to access the second same-named member: you have to do an iteration over archive members). This is used by the kernel's ctfarchive machinery (not yet upstream) to encode CTF under member names like {module name}.ctf rather than .ctf.CU, but it is anticipated that other large projects may wish to have their own storage for CTF outside of .ctf sections and may wish to have new naming schemes that suit their special-purpose consumers. New in v3. v4: check for strdup failure. v5: fix tabdamage. include/ * ctf-api.h (ctf_link_add_cu_mapping): New. (ctf_link_memb_name_changer_f): New. (ctf_link_set_memb_name_changer): New. libctf/ * ctf-impl.h (ctf_file_t) : New. : Likewise. : Likewise. * ctf-create.c (ctf_update): Update accordingly. * ctf-open.c (ctf_file_close): Likewise. * ctf-link.c (ctf_create_per_cu): Apply the cu mapping. (ctf_link_add_cu_mapping): New. (ctf_link_set_memb_name_changer): Likewise. (ctf_change_parent_name): New. (ctf_name_list_accum_cb_arg_t) : New, storage for names allocated by the caller's ctf_link_memb_name_changer. : Likewise. (ctf_accumulate_archive_names): Call the ctf_link_memb_name_changer. (ctf_link_write): Likewise (for _CTF_SECTION only): also call ctf_change_parent_name. Free any resulting names. --- include/ChangeLog | 6 ++ include/ctf-api.h | 10 +++ libctf/ChangeLog | 18 +++++ libctf/ctf-create.c | 4 ++ libctf/ctf-impl.h | 4 ++ libctf/ctf-link.c | 171 ++++++++++++++++++++++++++++++++++++++++++-- libctf/ctf-open.c | 1 + 7 files changed, 210 insertions(+), 4 deletions(-) diff --git a/include/ChangeLog b/include/ChangeLog index 0122c1d6280..ce7c17377a0 100644 --- a/include/ChangeLog +++ b/include/ChangeLog @@ -1,3 +1,9 @@ +2019-07-30 Nick Alcock + + * ctf-api.h (ctf_link_add_cu_mapping): New. + (ctf_link_memb_name_changer_f): New. + (ctf_link_set_memb_name_changer): New. + 2019-07-13 Nick Alcock * ctf-api.h (ECTF_INTERNAL): New. diff --git a/include/ctf-api.h b/include/ctf-api.h index 4130a2ecd19..4ac5fea8bc6 100644 --- a/include/ctf-api.h +++ b/include/ctf-api.h @@ -421,6 +421,16 @@ extern int ctf_link_shuffle_syms (ctf_file_t *, ctf_link_iter_symbol_f *, extern unsigned char *ctf_link_write (ctf_file_t *, size_t *size, size_t threshold); +/* Specialist linker functions. These functions are not used by ld, but can be + used by other prgorams making use of the linker machinery for other purposes + to customize its output. */ +extern int ctf_link_add_cu_mapping (ctf_file_t *, const char *from, + const char *to); +typedef char *ctf_link_memb_name_changer_f (ctf_file_t *, + const char *, void *); +extern void ctf_link_set_memb_name_changer + (ctf_file_t *, ctf_link_memb_name_changer_f *, void *); + extern void ctf_setdebug (int debug); extern int ctf_getdebug (void); diff --git a/libctf/ChangeLog b/libctf/ChangeLog index a5995eb25d8..b2726488cb6 100644 --- a/libctf/ChangeLog +++ b/libctf/ChangeLog @@ -1,3 +1,21 @@ +2019-07-30 Nick Alcock + + * ctf-impl.h (ctf_file_t) : New. + : Likewise. + : Likewise. + * ctf-create.c (ctf_update): Update accordingly. + * ctf-open.c (ctf_file_close): Likewise. + * ctf-link.c (ctf_create_per_cu): Apply the cu mapping. + (ctf_link_add_cu_mapping): New. + (ctf_link_set_memb_name_changer): Likewise. + (ctf_change_parent_name): New. + (ctf_name_list_accum_cb_arg_t) : New, storage for names + allocated by the caller's ctf_link_memb_name_changer. + : Likewise. + (ctf_accumulate_archive_names): Call the ctf_link_memb_name_changer. + (ctf_link_write): Likewise (for _CTF_SECTION only): also call + ctf_change_parent_name. Free any resulting names. + 2019-07-13 Nick Alcock * ctf-link.c (ctf_create_per_cu): New, refactored out of... diff --git a/libctf/ctf-create.c b/libctf/ctf-create.c index 90e45f340b1..19da29c5db9 100644 --- a/libctf/ctf-create.c +++ b/libctf/ctf-create.c @@ -473,7 +473,10 @@ ctf_update (ctf_file_t *fp) nfp->ctf_link_inputs = fp->ctf_link_inputs; nfp->ctf_link_outputs = fp->ctf_link_outputs; nfp->ctf_syn_ext_strtab = fp->ctf_syn_ext_strtab; + nfp->ctf_link_cu_mapping = fp->ctf_link_cu_mapping; nfp->ctf_link_type_mapping = fp->ctf_link_type_mapping; + nfp->ctf_link_memb_name_changer = fp->ctf_link_memb_name_changer; + nfp->ctf_link_memb_name_changer_arg = fp->ctf_link_memb_name_changer_arg; nfp->ctf_snapshot_lu = fp->ctf_snapshots; @@ -486,6 +489,7 @@ ctf_update (ctf_file_t *fp) fp->ctf_link_inputs = NULL; fp->ctf_link_outputs = NULL; fp->ctf_syn_ext_strtab = NULL; + fp->ctf_link_cu_mapping = NULL; fp->ctf_link_type_mapping = NULL; fp->ctf_dvhash = NULL; diff --git a/libctf/ctf-impl.h b/libctf/ctf-impl.h index c5225549298..554b1872cbf 100644 --- a/libctf/ctf-impl.h +++ b/libctf/ctf-impl.h @@ -281,6 +281,10 @@ struct ctf_file ctf_dynhash_t *ctf_link_inputs; /* Inputs to this link. */ ctf_dynhash_t *ctf_link_outputs; /* Additional outputs from this link. */ ctf_dynhash_t *ctf_link_type_mapping; /* Map input types to output types. */ + ctf_dynhash_t *ctf_link_cu_mapping; /* Map CU names to CTF dict names. */ + /* Allow the caller to Change the name of link archive members. */ + ctf_link_memb_name_changer_f *ctf_link_memb_name_changer; + void *ctf_link_memb_name_changer_arg; /* Argument for it. */ char *ctf_tmp_typeslice; /* Storage for slicing up type names. */ size_t ctf_tmp_typeslicelen; /* Size of the typeslice. */ void *ctf_specific; /* Data for ctf_get/setspecific(). */ diff --git a/libctf/ctf-link.c b/libctf/ctf-link.c index 8dd81d1f124..28c21132607 100644 --- a/libctf/ctf-link.c +++ b/libctf/ctf-link.c @@ -182,9 +182,26 @@ static ctf_file_t * ctf_create_per_cu (ctf_file_t *fp, const char *filename, const char *cuname) { ctf_file_t *cu_fp; + const char *ctf_name = NULL; char *dynname = NULL; - if ((cu_fp = ctf_dynhash_lookup (fp->ctf_link_outputs, filename)) == NULL) + /* First, check the mapping table and translate the per-CU name we use + accordingly. We check both the input filename and the CU name. Only if + neither are set do we fall back to the input filename as the per-CU + dictionary name. We prefer the filename because this is easier for likely + callers to determine. */ + + if (fp->ctf_link_cu_mapping) + { + if (((ctf_name = ctf_dynhash_lookup (fp->ctf_link_cu_mapping, filename)) == NULL) && + ((ctf_name = ctf_dynhash_lookup (fp->ctf_link_cu_mapping, cuname)) == NULL)) + ctf_name = filename; + } + + if (ctf_name == NULL) + ctf_name = filename; + + if ((cu_fp = ctf_dynhash_lookup (fp->ctf_link_outputs, ctf_name)) == NULL) { int err; @@ -197,7 +214,7 @@ ctf_create_per_cu (ctf_file_t *fp, const char *filename, const char *cuname) return NULL; } - if ((dynname = strdup (filename)) == NULL) + if ((dynname = strdup (ctf_name)) == NULL) goto oom; if (ctf_dynhash_insert (fp->ctf_link_outputs, dynname, cu_fp) < 0) goto oom; @@ -215,6 +232,79 @@ ctf_create_per_cu (ctf_file_t *fp, const char *filename, const char *cuname) return NULL; } +/* Add a mapping directing that the CU named FROM should have its + conflicting/non-duplicate types (depending on link mode) go into a container + named TO. Many FROMs can share a TO: in this case, the effect on conflicting + types is not yet defined (but in time an auto-renaming algorithm will be + added: ugly, but there is really no right thing one can do in this + situation). + + We forcibly add a container named TO in every case, even though it may well + wind up empty, because clients that use this facility usually expect to find + every TO container present, even if empty, and malfunction otherwise. */ + +int +ctf_link_add_cu_mapping (ctf_file_t *fp, const char *from, const char *to) +{ + int err; + char *f, *t; + + if (fp->ctf_link_cu_mapping == NULL) + fp->ctf_link_cu_mapping = ctf_dynhash_create (ctf_hash_string, + ctf_hash_eq_string, free, + free); + if (fp->ctf_link_cu_mapping == NULL) + return ctf_set_errno (fp, ENOMEM); + + if (fp->ctf_link_outputs == NULL) + fp->ctf_link_outputs = ctf_dynhash_create (ctf_hash_string, + ctf_hash_eq_string, free, + ctf_file_close_thunk); + + if (fp->ctf_link_outputs == NULL) + return ctf_set_errno (fp, ENOMEM); + + f = strdup (from); + t = strdup (to); + if (!f || !t) + goto oom; + + if (ctf_create_per_cu (fp, t, t) == NULL) + goto oom_noerrno; /* Errno is set for us. */ + + err = ctf_dynhash_insert (fp->ctf_link_cu_mapping, f, t); + if (err) + { + ctf_set_errno (fp, err); + goto oom_noerrno; + } + + return 0; + + oom: + ctf_set_errno (fp, errno); + oom_noerrno: + free (f); + free (t); + return -1; +} + +/* Set a function which is called to transform the names of archive members. + This is useful for applying regular transformations to many names, where + ctf_link_add_cu_mapping applies arbitrarily irregular changes to single + names. The member name changer is applied at ctf_link_write time, so it + cannot conflate multiple CUs into one the way ctf_link_add_cu_mapping can. + The changer function accepts a name and should return a new + dynamically-allocated name, or NULL if the name should be left unchanged. */ +void +ctf_link_set_memb_name_changer (ctf_file_t *fp, + ctf_link_memb_name_changer_f *changer, + void *arg) +{ + fp->ctf_link_memb_name_changer = changer; + fp->ctf_link_memb_name_changer_arg = arg; +} + typedef struct ctf_link_in_member_cb_arg { ctf_file_t *out_fp; @@ -266,7 +356,7 @@ ctf_link_one_type (ctf_id_t type, int isroot _libctf_unused_, void *arg_) ctf_set_errno (arg->out_fp, 0); } - if ((per_cu_out_fp = ctf_create_per_cu (arg->out_fp, arg->arcname, + if ((per_cu_out_fp = ctf_create_per_cu (arg->out_fp, arg->file_name, arg->cu_name)) == NULL) return -1; /* Errno is set for us. */ @@ -347,7 +437,7 @@ ctf_link_one_variable (const char *name, ctf_id_t type, void *arg_) type only present in the child. Try adding to the child, creating if need be. */ - if ((per_cu_out_fp = ctf_create_per_cu (arg->out_fp, arg->arcname, + if ((per_cu_out_fp = ctf_create_per_cu (arg->out_fp, arg->file_name, arg->cu_name)) == NULL) return -1; /* Errno is set for us. */ @@ -589,6 +679,8 @@ typedef struct ctf_name_list_accum_cb_arg ctf_file_t *fp; ctf_file_t **files; size_t i; + char **dynames; + size_t ndynames; } ctf_name_list_accum_cb_arg_t; /* Accumulate the names and a count of the names in the link output hash, @@ -622,12 +714,51 @@ ctf_accumulate_archive_names (void *key, void *value, void *arg_) ctf_set_errno (arg->fp, ENOMEM); return; } + + /* Allow the caller to get in and modify the name at the last minute. If the + caller *does* modify the name, we have to stash away the new name the + caller returned so we can free it later on. (The original name is the key + of the ctf_link_outputs hash and is freed by the dynhash machinery.) */ + + if (fp->ctf_link_memb_name_changer) + { + char **dynames; + char *dyname; + void *nc_arg = fp->ctf_link_memb_name_changer_arg; + + dyname = fp->ctf_link_memb_name_changer (fp, name, nc_arg); + + if (dyname != NULL) + { + if ((dynames = realloc (arg->dynames, + sizeof (char *) * ++(arg->ndynames))) == NULL) + { + (arg->ndynames)--; + ctf_set_errno (arg->fp, ENOMEM); + return; + } + arg->dynames = dynames; + name = (const char *) dyname; + } + } + arg->names = names; arg->names[(arg->i) - 1] = (char *) name; arg->files = files; arg->files[(arg->i) - 1] = fp; } +/* Change the name of the parent CTF section, if the name transformer has got to + it. */ +static void +ctf_change_parent_name (void *key _libctf_unused_, void *value, void *arg) +{ + ctf_file_t *fp = (ctf_file_t *) value; + const char *name = (const char *) arg; + + ctf_parent_name_set (fp, name); +} + /* Write out a CTF archive (if there are per-CU CTF files) or a CTF file (otherwise) into a new dynamically-allocated string, and return it. Members with sizes above THRESHOLD are compressed. */ @@ -636,6 +767,7 @@ ctf_link_write (ctf_file_t *fp, size_t *size, size_t threshold) { ctf_name_list_accum_cb_arg_t arg; char **names; + char *transformed_name = NULL; ctf_file_t **files; FILE *f = NULL; int err; @@ -675,7 +807,22 @@ ctf_link_write (ctf_file_t *fp, size_t *size, size_t threshold) } arg.names = names; memmove (&(arg.names[1]), arg.names, sizeof (char *) * (arg.i)); + arg.names[0] = (char *) _CTF_SECTION; + if (fp->ctf_link_memb_name_changer) + { + void *nc_arg = fp->ctf_link_memb_name_changer_arg; + + transformed_name = fp->ctf_link_memb_name_changer (fp, _CTF_SECTION, + nc_arg); + + if (transformed_name != NULL) + { + arg.names[0] = transformed_name; + ctf_dynhash_iter (fp->ctf_link_outputs, ctf_change_parent_name, + transformed_name); + } + } if ((files = realloc (arg.files, sizeof (struct ctf_file *) * (arg.i + 1))) == NULL) @@ -736,6 +883,14 @@ ctf_link_write (ctf_file_t *fp, size_t *size, size_t threshold) *size = fsize; free (arg.names); free (arg.files); + free (transformed_name); + if (arg.ndynames) + { + size_t i; + for (i = 0; i < arg.ndynames; i++) + free (arg.dynames[i]); + free (arg.dynames); + } return buf; err_no: @@ -746,6 +901,14 @@ ctf_link_write (ctf_file_t *fp, size_t *size, size_t threshold) fclose (f); free (arg.names); free (arg.files); + free (transformed_name); + if (arg.ndynames) + { + size_t i; + for (i = 0; i < arg.ndynames; i++) + free (arg.dynames[i]); + free (arg.dynames); + } ctf_dprintf ("Cannot write archive in link: %s failure: %s\n", errloc, ctf_errmsg (ctf_errno (fp))); return NULL; diff --git a/libctf/ctf-open.c b/libctf/ctf-open.c index 600fe8fcae3..2e1913bb373 100644 --- a/libctf/ctf-open.c +++ b/libctf/ctf-open.c @@ -1628,6 +1628,7 @@ ctf_file_close (ctf_file_t *fp) ctf_dynhash_destroy (fp->ctf_link_inputs); ctf_dynhash_destroy (fp->ctf_link_outputs); ctf_dynhash_destroy (fp->ctf_link_type_mapping); + ctf_dynhash_destroy (fp->ctf_link_cu_mapping); ctf_free (fp->ctf_sxlate); ctf_free (fp->ctf_txlate); -- 2.30.2