From 99dc3ebdfff927b30db58117d7bd80586e273669 Mon Sep 17 00:00:00 2001 From: Nick Alcock Date: Wed, 7 Aug 2019 18:01:08 +0100 Subject: [PATCH] libctf: properly handle ctf_add_type of forwards and self-reffing structs The code to handle structures (and unions) that refer to themselves in ctf_add_type is extremely dodgy. It works by looking through the list of not-yet-committed types for a structure with the same name as the structure in question and assuming, if it finds it, that this must be a reference to the same type. This is a linear search that gets ever slower as the dictionary grows, requiring you to call ctf_update at intervals to keep performance tolerable: but if you do that, you run into the problem that if a forward declared before the ctf_update is changed to a structure afterwards, ctf_update explodes. The last commit fixed most of this: this commit can use it, adding a new ctf_add_processing hash that tracks source type IDs that are currently being processed and uses it to avoid infinite recursion rather than the dynamic type list: we split ctf_add_type into a ctf_add_type_internal, so that ctf_add_type itself can become a wrapper that empties out this being-processed hash once the entire recursive type addition is over. Structure additions themselves avoid adding their dependent types quite so much by checking the type mapping and avoiding re-adding types we already know we have added. We also add support for adding forwards to dictionaries that already contain the thing they are a forward to: we just silently return the original type. v4: return existing struct/union/enum types properly, rather than using an uninitialized variable: shrinks sizes of CTF sections back down to roughly where they were in v1/v2 of this patch series. v5: fix tabdamage. libctf/ * ctf-impl.h (ctf_file_t) : New. * ctf-open.c (ctf_file_close): Free it. * ctf-create.c (ctf_serialize): Adjust. (membcmp): When reporting a conflict due to an error, report the error. (ctf_add_type): Turn into a ctf_add_processing wrapper. Rename to... (ctf_add_type_internal): ... this. Hand back types we are already in the middle of adding immediately. Hand back structs/unions with the same number of members immediately. Do not walk the dynamic list. Call ctf_add_type_internal, not ctf_add_type. Handle forwards promoted to other types and the inverse case identically. Add structs to the mapping as soon as we intern them, before they gain any members. --- libctf/ChangeLog | 16 ++++ libctf/ctf-create.c | 201 +++++++++++++++++++++++++------------------- libctf/ctf-impl.h | 1 + libctf/ctf-open.c | 1 + 4 files changed, 134 insertions(+), 85 deletions(-) diff --git a/libctf/ChangeLog b/libctf/ChangeLog index 723db81f182..106955385db 100644 --- a/libctf/ChangeLog +++ b/libctf/ChangeLog @@ -1,3 +1,19 @@ +2019-08-07 Nick Alcock + + * ctf-impl.h (ctf_file_t) : New. + * ctf-open.c (ctf_file_close): Free it. + * ctf-create.c (ctf_serialize): Adjust. + (membcmp): When reporting a conflict due to an error, report the + error. + (ctf_add_type): Turn into a ctf_add_processing wrapper. Rename to... + (ctf_add_type_internal): ... this. Hand back types we are already + in the middle of adding immediately. Hand back structs/unions with + the same number of members immediately. Do not walk the dynamic + list. Call ctf_add_type_internal, not ctf_add_type. Handle + forwards promoted to other types and the inverse case identically. + Add structs to the mapping as soon as we intern them, before they + gain any members. + 2019-08-09 Nick Alcock * ctf-impl.h (ctf_names_t): New. diff --git a/libctf/ctf-create.c b/libctf/ctf-create.c index 16e7de85c15..22635af61f1 100644 --- a/libctf/ctf-create.c +++ b/libctf/ctf-create.c @@ -525,6 +525,7 @@ ctf_serialize (ctf_file_t *fp) nfp->ctf_dvhash = fp->ctf_dvhash; nfp->ctf_dvdefs = fp->ctf_dvdefs; nfp->ctf_dtoldid = fp->ctf_dtoldid; + nfp->ctf_add_processing = fp->ctf_add_processing; nfp->ctf_snapshots = fp->ctf_snapshots + 1; nfp->ctf_specific = fp->ctf_specific; nfp->ctf_ptrtab = fp->ctf_ptrtab; @@ -553,6 +554,7 @@ ctf_serialize (ctf_file_t *fp) fp->ctf_str_atoms = NULL; fp->ctf_prov_strtab = NULL; memset (&fp->ctf_dtdefs, 0, sizeof (ctf_list_t)); + fp->ctf_add_processing = NULL; fp->ctf_ptrtab = NULL; fp->ctf_link_inputs = NULL; fp->ctf_link_outputs = NULL; @@ -1527,7 +1529,8 @@ enumcmp (const char *name, int value, void *arg) if (ctf_enum_value (ctb->ctb_file, ctb->ctb_type, name, &bvalue) < 0) { - ctf_dprintf ("Conflict due to member %s iteration error.\n", name); + ctf_dprintf ("Conflict due to member %s iteration error: %s.\n", name, + ctf_errmsg (ctf_errno (ctb->ctb_file))); return 1; } if (value != bvalue) @@ -1557,7 +1560,8 @@ membcmp (const char *name, ctf_id_t type _libctf_unused_, unsigned long offset, if (ctf_member_info (ctb->ctb_file, ctb->ctb_type, name, &ctm) < 0) { - ctf_dprintf ("Conflict due to member %s iteration error.\n", name); + ctf_dprintf ("Conflict due to member %s iteration error: %s.\n", name, + ctf_errmsg (ctf_errno (ctb->ctb_file))); return 1; } if (ctm.ctm_offset != offset) @@ -1603,11 +1607,13 @@ membadd (const char *name, ctf_id_t type, unsigned long offset, void *arg) following the source type's links and embedded member types. If the destination container already contains a named type which has the same attributes, then we succeed and return this type but no changes occur. */ -ctf_id_t -ctf_add_type (ctf_file_t *dst_fp, ctf_file_t *src_fp, ctf_id_t src_type) +static ctf_id_t +ctf_add_type_internal (ctf_file_t *dst_fp, ctf_file_t *src_fp, ctf_id_t src_type, + ctf_file_t *proc_tracking_fp) { ctf_id_t dst_type = CTF_ERR; uint32_t dst_kind = CTF_K_UNKNOWN; + ctf_file_t *tmp_fp = dst_fp; ctf_id_t tmp; const char *name; @@ -1618,7 +1624,6 @@ ctf_add_type (ctf_file_t *dst_fp, ctf_file_t *src_fp, ctf_id_t src_type) ctf_encoding_t src_en, dst_en; ctf_arinfo_t src_ar, dst_ar; - ctf_dtdef_t *dtd; ctf_funcinfo_t ctc; ctf_id_t orig_src_type = src_type; @@ -1638,6 +1643,33 @@ ctf_add_type (ctf_file_t *dst_fp, ctf_file_t *src_fp, ctf_id_t src_type) flag = LCTF_INFO_ISROOT (src_fp, src_tp->ctt_info); vlen = LCTF_INFO_VLEN (src_fp, src_tp->ctt_info); + /* If this is a type we are currently in the middle of adding, hand it + straight back. (This lets us handle self-referential structures without + considering forwards and empty structures the same as their completed + forms.) */ + + tmp = ctf_type_mapping (src_fp, src_type, &tmp_fp); + + if (tmp != 0) + { + if (ctf_dynhash_lookup (proc_tracking_fp->ctf_add_processing, + (void *) (uintptr_t) src_type)) + return tmp; + + /* If this type has already been added from this container, and is the same + kind and (if a struct or union) has the same number of members, hand it + straight back. */ + + if ((ctf_type_kind_unsliced (tmp_fp, tmp) == (int) kind) + && (kind == CTF_K_STRUCT || kind == CTF_K_UNION + || kind == CTF_K_ENUM)) + { + if ((dst_tp = ctf_lookup_by_id (&tmp_fp, dst_type)) != NULL) + if (vlen == LCTF_INFO_VLEN (tmp_fp, dst_tp->ctt_info)) + return tmp; + } + } + forward_kind = kind; if (kind == CTF_K_FORWARD) forward_kind = src_tp->ctt_type; @@ -1697,6 +1729,9 @@ ctf_add_type (ctf_file_t *dst_fp, ctf_file_t *src_fp, ctf_id_t src_type) if ((dst_tp = ctf_lookup_by_id (&fp, dst_type)) == NULL) return CTF_ERR; + if (ctf_type_encoding (dst_fp, dst_type, &dst_en) != 0) + return CTF_ERR; /* errno set for us. */ + if (LCTF_INFO_ISROOT (fp, dst_tp->ctt_info) & CTF_ADD_ROOT) { /* The type that we found in the hash is also root-visible. If @@ -1705,9 +1740,6 @@ ctf_add_type (ctf_file_t *dst_fp, ctf_file_t *src_fp, ctf_id_t src_type) even if there is no conflict: we must check the contained type too. */ - if (ctf_type_encoding (dst_fp, dst_type, &dst_en) != 0) - return CTF_ERR; /* errno set for us. */ - if (memcmp (&src_en, &dst_en, sizeof (ctf_encoding_t)) == 0) { if (kind != CTF_K_SLICE) @@ -1723,71 +1755,17 @@ ctf_add_type (ctf_file_t *dst_fp, ctf_file_t *src_fp, ctf_id_t src_type) } else { - /* We found a non-root-visible type in the hash. We reset - dst_type to ensure that we continue to look for a possible - conflict in the pending list. */ - - dst_type = CTF_ERR; - } - } - } - - /* If the non-empty name was not found in the appropriate hash, search - the list of pending dynamic definitions that are not yet committed. - If a matching name and kind are found, assume this is the type that - we are looking for. This is necessary to permit ctf_add_type() to - operate recursively on entities such as a struct that contains a - pointer member that refers to the same struct type. */ - - if (dst_type == CTF_ERR && name[0] != '\0') - { - for (dtd = ctf_list_prev (&dst_fp->ctf_dtdefs); dtd != NULL - && LCTF_TYPE_TO_INDEX (src_fp, dtd->dtd_type) > dst_fp->ctf_dtoldid; - dtd = ctf_list_prev (dtd)) - { - const char *ctt_name; - - if (LCTF_INFO_KIND (src_fp, dtd->dtd_data.ctt_info) == kind - && dtd->dtd_data.ctt_name - && ((ctt_name = ctf_strraw (src_fp, dtd->dtd_data.ctt_name)) != NULL) - && strcmp (ctt_name, name) == 0) - { - int sroot; /* Is the src root-visible? */ - int droot; /* Is the dst root-visible? */ - int match; /* Do the encodings match? */ - - if (kind != CTF_K_INTEGER && kind != CTF_K_FLOAT && kind != CTF_K_SLICE) - { - ctf_add_type_mapping (src_fp, src_type, dst_fp, dtd->dtd_type); - return dtd->dtd_type; - } - - sroot = (flag & CTF_ADD_ROOT); - droot = (LCTF_INFO_ISROOT (dst_fp, - dtd->dtd_data. - ctt_info) & CTF_ADD_ROOT); - - match = (memcmp (&src_en, &dtd->dtd_u.dtu_enc, - sizeof (ctf_encoding_t)) == 0); - - /* If the types share the same encoding then return the id of the - first unless one type is root-visible and the other is not; in - that case the new type must get a new id if a match is never - found. Note: slices are not certain to match even if there is - no conflict: we must check the contained type too. */ + /* We found a non-root-visible type in the hash. If its encoding + is the same, we can reuse it, unless it is a slice. */ - if (match && sroot == droot) + if (memcmp (&src_en, &dst_en, sizeof (ctf_encoding_t)) == 0) { if (kind != CTF_K_SLICE) { - ctf_add_type_mapping (src_fp, src_type, dst_fp, dtd->dtd_type); - return dtd->dtd_type; + ctf_add_type_mapping (src_fp, src_type, dst_fp, dst_type); + return dst_type; } } - else if (!match && sroot && droot) - { - return (ctf_set_errno (dst_fp, ECTF_CONFLICT)); - } } } } @@ -1800,10 +1778,18 @@ ctf_add_type (ctf_file_t *dst_fp, ctf_file_t *src_fp, ctf_id_t src_type) dst.ctb_type = dst_type; dst.ctb_dtd = NULL; - /* Now perform kind-specific processing. If dst_type is CTF_ERR, then - we add a new type with the same properties as src_type to dst_fp. - If dst_type is not CTF_ERR, then we verify that dst_type has the - same attributes as src_type. We recurse for embedded references. */ + /* Now perform kind-specific processing. If dst_type is CTF_ERR, then we add + a new type with the same properties as src_type to dst_fp. If dst_type is + not CTF_ERR, then we verify that dst_type has the same attributes as + src_type. We recurse for embedded references. Before we start, we note + that we are processing this type, to prevent infinite recursion: we do not + re-process any type that appears in this list. The list is emptied + wholesale at the end of processing everything in this recursive stack. */ + + if (ctf_dynhash_insert (proc_tracking_fp->ctf_add_processing, + (void *) (uintptr_t) src_type, (void *) 1) < 0) + return ctf_set_errno (dst_fp, ENOMEM); + switch (kind) { case CTF_K_INTEGER: @@ -1822,7 +1808,8 @@ ctf_add_type (ctf_file_t *dst_fp, ctf_file_t *src_fp, ctf_id_t src_type) /* We have checked for conflicting encodings: now try to add the contained type. */ src_type = ctf_type_reference (src_fp, src_type); - dst_type = ctf_add_type (dst_fp, src_fp, src_type); + src_type = ctf_add_type_internal (dst_fp, src_fp, src_type, + proc_tracking_fp); if (src_type == CTF_ERR) return CTF_ERR; /* errno is set for us. */ @@ -1835,7 +1822,8 @@ ctf_add_type (ctf_file_t *dst_fp, ctf_file_t *src_fp, ctf_id_t src_type) case CTF_K_CONST: case CTF_K_RESTRICT: src_type = ctf_type_reference (src_fp, src_type); - src_type = ctf_add_type (dst_fp, src_fp, src_type); + src_type = ctf_add_type_internal (dst_fp, src_fp, src_type, + proc_tracking_fp); if (src_type == CTF_ERR) return CTF_ERR; /* errno is set for us. */ @@ -1848,8 +1836,11 @@ ctf_add_type (ctf_file_t *dst_fp, ctf_file_t *src_fp, ctf_id_t src_type) return (ctf_set_errno (dst_fp, ctf_errno (src_fp))); src_ar.ctr_contents = - ctf_add_type (dst_fp, src_fp, src_ar.ctr_contents); - src_ar.ctr_index = ctf_add_type (dst_fp, src_fp, src_ar.ctr_index); + ctf_add_type_internal (dst_fp, src_fp, src_ar.ctr_contents, + proc_tracking_fp); + src_ar.ctr_index = ctf_add_type_internal (dst_fp, src_fp, + src_ar.ctr_index, + proc_tracking_fp); src_ar.ctr_nelems = src_ar.ctr_nelems; if (src_ar.ctr_contents == CTF_ERR || src_ar.ctr_index == CTF_ERR) @@ -1876,7 +1867,9 @@ ctf_add_type (ctf_file_t *dst_fp, ctf_file_t *src_fp, ctf_id_t src_type) break; case CTF_K_FUNCTION: - ctc.ctc_return = ctf_add_type (dst_fp, src_fp, src_tp->ctt_type); + ctc.ctc_return = ctf_add_type_internal (dst_fp, src_fp, + src_tp->ctt_type, + proc_tracking_fp); ctc.ctc_argc = 0; ctc.ctc_flags = 0; @@ -1893,6 +1886,7 @@ ctf_add_type (ctf_file_t *dst_fp, ctf_file_t *src_fp, ctf_id_t src_type) int errs = 0; size_t size; ssize_t ssize; + ctf_dtdef_t *dtd; /* Technically to match a struct or union we need to check both ways (src members vs. dst, dst members vs. src) but we make @@ -1902,7 +1896,8 @@ ctf_add_type (ctf_file_t *dst_fp, ctf_file_t *src_fp, ctf_id_t src_type) This optimization can be defeated for unions, but is so pathological as to render it irrelevant for our purposes. */ - if (dst_type != CTF_ERR && dst_kind != CTF_K_FORWARD) + if (dst_type != CTF_ERR && kind != CTF_K_FORWARD + && dst_kind != CTF_K_FORWARD) { if (ctf_type_size (src_fp, src_type) != ctf_type_size (dst_fp, dst_type)) @@ -1936,6 +1931,10 @@ ctf_add_type (ctf_file_t *dst_fp, ctf_file_t *src_fp, ctf_id_t src_type) dst.ctb_type = dst_type; dst.ctb_dtd = dtd; + /* Pre-emptively add this struct to the type mapping so that + structures that refer to themselves work. */ + ctf_add_type_mapping (src_fp, src_type, dst_fp, dst_type); + if (ctf_member_iter (src_fp, src_type, membadd, &dst) != 0) errs++; /* Increment errs and fail at bottom of case. */ @@ -1963,12 +1962,22 @@ ctf_add_type (ctf_file_t *dst_fp, ctf_file_t *src_fp, ctf_id_t src_type) for (dmd = ctf_list_next (&dtd->dtd_u.dtu_members); dmd != NULL; dmd = ctf_list_next (dmd)) { - if ((dmd->dmd_type = ctf_add_type (dst_fp, src_fp, - dmd->dmd_type)) == CTF_ERR) + ctf_file_t *dst = dst_fp; + ctf_id_t memb_type; + + memb_type = ctf_type_mapping (src_fp, dmd->dmd_type, &dst); + if (memb_type == 0) { - if (ctf_errno (dst_fp) != ECTF_NONREPRESENTABLE) - errs++; + if ((dmd->dmd_type = + ctf_add_type_internal (dst_fp, src_fp, dmd->dmd_type, + proc_tracking_fp)) == CTF_ERR) + { + if (ctf_errno (dst_fp) != ECTF_NONREPRESENTABLE) + errs++; + } } + else + dmd->dmd_type = memb_type; } if (errs) @@ -1977,7 +1986,8 @@ ctf_add_type (ctf_file_t *dst_fp, ctf_file_t *src_fp, ctf_id_t src_type) } case CTF_K_ENUM: - if (dst_type != CTF_ERR && dst_kind != CTF_K_FORWARD) + if (dst_type != CTF_ERR && kind != CTF_K_FORWARD + && dst_kind != CTF_K_FORWARD) { if (ctf_enum_iter (src_fp, src_type, enumcmp, &dst) || ctf_enum_iter (dst_fp, dst_type, enumcmp, &src)) @@ -2003,7 +2013,8 @@ ctf_add_type (ctf_file_t *dst_fp, ctf_file_t *src_fp, ctf_id_t src_type) case CTF_K_TYPEDEF: src_type = ctf_type_reference (src_fp, src_type); - src_type = ctf_add_type (dst_fp, src_fp, src_type); + src_type = ctf_add_type_internal (dst_fp, src_fp, src_type, + proc_tracking_fp); if (src_type == CTF_ERR) return CTF_ERR; /* errno is set for us. */ @@ -2017,9 +2028,8 @@ ctf_add_type (ctf_file_t *dst_fp, ctf_file_t *src_fp, ctf_id_t src_type) equivalent. */ if (dst_type == CTF_ERR) - { dst_type = ctf_add_typedef (dst_fp, flag, name, src_type); - } + break; default: @@ -2031,6 +2041,27 @@ ctf_add_type (ctf_file_t *dst_fp, ctf_file_t *src_fp, ctf_id_t src_type) return dst_type; } +ctf_id_t +ctf_add_type (ctf_file_t *dst_fp, ctf_file_t *src_fp, ctf_id_t src_type) +{ + ctf_id_t id; + + if (!src_fp->ctf_add_processing) + src_fp->ctf_add_processing = ctf_dynhash_create (ctf_hash_integer, + ctf_hash_eq_integer, + NULL, NULL); + + /* We store the hash on the source, because it contains only source type IDs: + but callers will invariably expect errors to appear on the dest. */ + if (!src_fp->ctf_add_processing) + return (ctf_set_errno (dst_fp, ENOMEM)); + + id = ctf_add_type_internal (dst_fp, src_fp, src_type, src_fp); + ctf_dynhash_empty (src_fp->ctf_add_processing); + + return id; +} + /* Write the compressed CTF data stream to the specified gzFile descriptor. */ int ctf_gzwrite (ctf_file_t *fp, gzFile fd) diff --git a/libctf/ctf-impl.h b/libctf/ctf-impl.h index d284717b3a8..f8e9a7788ad 100644 --- a/libctf/ctf-impl.h +++ b/libctf/ctf-impl.h @@ -294,6 +294,7 @@ struct ctf_file /* Allow the caller to Change the name of link archive members. */ ctf_link_memb_name_changer_f *ctf_link_memb_name_changer; void *ctf_link_memb_name_changer_arg; /* Argument for it. */ + ctf_dynhash_t *ctf_add_processing; /* Types ctf_add_type is working on now. */ char *ctf_tmp_typeslice; /* Storage for slicing up type names. */ size_t ctf_tmp_typeslicelen; /* Size of the typeslice. */ void *ctf_specific; /* Data for ctf_get/setspecific(). */ diff --git a/libctf/ctf-open.c b/libctf/ctf-open.c index c4fca243391..b6989579465 100644 --- a/libctf/ctf-open.c +++ b/libctf/ctf-open.c @@ -1665,6 +1665,7 @@ ctf_file_close (ctf_file_t *fp) ctf_dynhash_destroy (fp->ctf_link_outputs); ctf_dynhash_destroy (fp->ctf_link_type_mapping); ctf_dynhash_destroy (fp->ctf_link_cu_mapping); + ctf_dynhash_destroy (fp->ctf_add_processing); ctf_free (fp->ctf_sxlate); ctf_free (fp->ctf_txlate); -- 2.30.2