From 0f0c11f7fc9f0ab6bd63fc5f8a4cee7367a81849 Mon Sep 17 00:00:00 2001 From: Nick Alcock Date: Fri, 5 Jun 2020 18:35:46 +0100 Subject: [PATCH] libctf, dedup: add deduplicator This adds the core deduplicator that the ctf_link machinery calls (possibly repeatedly) to link the CTF sections: it takes an array of input ctf_file_t's and another array that indicates which entries in the input array are parents of which other entries, and returns an array of outputs. The first output is always the ctf_file_t on which ctf_link/ctf_dedup/etc was called: the other outputs are child dicts that have the first output as their parent. include/ * ctf-api.h (CTF_LINK_SHARE_DUPLICATED): No longer unimplemented. libctf/ * ctf-impl.h (ctf_type_id_key): New, the key in the cd_id_to_file_t. (ctf_dedup): New, core deduplicator state. (ctf_file_t) : New. : New. : New. (ctf_hash_type_id_key): New prototype. (ctf_hash_eq_type_id_key): Likewise. (ctf_dedup_atoms_init): Likewise. * ctf-hash.c (ctf_hash_eq_type_id_key): New. (ctf_dedup_atoms_init): Likewise. * ctf-create.c (ctf_serialize): Adjusted. (ctf_add_encoded): No longer static. (ctf_add_reftype): Likewise. * ctf-open.c (ctf_file_close): Destroy the ctf_dedup_atoms_alloc. * ctf-dedup.c: New file. * ctf-decls.h [!HAVE_DECL_STPCPY]: Add prototype. * configure.ac: Check for stpcpy. * Makefile.am: Add it. * Makefile.in: Regenerate. * config.h.in: Regenerate. * configure: Regenerate. --- include/ChangeLog | 4 + include/ctf-api.h | 2 +- libctf/ChangeLog | 26 + libctf/Makefile.am | 5 +- libctf/Makefile.in | 52 +- libctf/config.h.in | 4 + libctf/configure | 10 + libctf/configure.ac | 2 +- libctf/ctf-create.c | 10 +- libctf/ctf-decls.h | 4 + libctf/ctf-dedup.c | 3155 +++++++++++++++++++++++++++++++++++++++++++ libctf/ctf-hash.c | 22 + libctf/ctf-impl.h | 127 ++ libctf/ctf-open.c | 2 + 14 files changed, 3401 insertions(+), 24 deletions(-) create mode 100644 libctf/ctf-dedup.c diff --git a/include/ChangeLog b/include/ChangeLog index 3ddc2645405..daf0fe2a564 100644 --- a/include/ChangeLog +++ b/include/ChangeLog @@ -1,3 +1,7 @@ +2020-07-22 Nick Alcock + + * ctf-api.h (CTF_LINK_SHARE_DUPLICATED): No longer unimplemented. + 2020-07-22 Nick Alcock * ctf-api.h (ctf_link_variable_filter_t): New. diff --git a/include/ctf-api.h b/include/ctf-api.h index 70bf7e0d8d2..c3683db0996 100644 --- a/include/ctf-api.h +++ b/include/ctf-api.h @@ -83,7 +83,7 @@ typedef struct ctf_link_sym /* Share all types that are not in conflict. The default. */ #define CTF_LINK_SHARE_UNCONFLICTED 0x0 -/* Share only types that are used by multiple inputs. Not implemented yet. */ +/* Share only types that are used by multiple inputs. */ #define CTF_LINK_SHARE_DUPLICATED 0x1 /* Create empty outputs for all registered CU mappings even if no types are diff --git a/libctf/ChangeLog b/libctf/ChangeLog index be00e920160..58eb2f61de3 100644 --- a/libctf/ChangeLog +++ b/libctf/ChangeLog @@ -1,3 +1,29 @@ +2020-07-22 Nick Alcock + + * ctf-impl.h (ctf_type_id_key): New, the key in the + cd_id_to_file_t. + (ctf_dedup): New, core deduplicator state. + (ctf_file_t) : New. + : New. + : New. + (ctf_hash_type_id_key): New prototype. + (ctf_hash_eq_type_id_key): Likewise. + (ctf_dedup_atoms_init): Likewise. + * ctf-hash.c (ctf_hash_eq_type_id_key): New. + (ctf_dedup_atoms_init): Likewise. + * ctf-create.c (ctf_serialize): Adjusted. + (ctf_add_encoded): No longer static. + (ctf_add_reftype): Likewise. + * ctf-open.c (ctf_file_close): Destroy the + ctf_dedup_atoms_alloc. + * ctf-dedup.c: New file. + * ctf-decls.h [!HAVE_DECL_STPCPY]: Add prototype. + * configure.ac: Check for stpcpy. + * Makefile.am: Add it. + * Makefile.in: Regenerate. + * config.h.in: Regenerate. + * configure: Regenerate. + 2020-07-22 Nick Alcock * configure.ac: Add --enable-libctf-hash-debugging. diff --git a/libctf/Makefile.am b/libctf/Makefile.am index 8f5ea6b0247..8b8f0a8cf83 100644 --- a/libctf/Makefile.am +++ b/libctf/Makefile.am @@ -43,8 +43,9 @@ libctf_nobfd_la_LIBADD = @SHARED_LIBADD@ $(ZLIB) libctf_nobfd_la_LDFLAGS = -version-info 0:0:0 @SHARED_LDFLAGS@ @VERSION_FLAGS@ libctf_nobfd_la_CPPFLAGS = $(AM_CPPFLAGS) -DNOBFD=1 libctf_nobfd_la_SOURCES = ctf-archive.c ctf-dump.c ctf-create.c ctf-decl.c ctf-error.c \ - ctf-hash.c ctf-labels.c ctf-link.c ctf-lookup.c ctf-open.c \ - ctf-sha1.c ctf-string.c ctf-subr.c ctf-types.c ctf-util.c + ctf-hash.c ctf-labels.c ctf-dedup.c ctf-link.c ctf-lookup.c \ + ctf-open.c ctf-sha1.c ctf-string.c ctf-subr.c ctf-types.c \ + ctf-util.c if NEED_CTF_QSORT_R libctf_nobfd_la_SOURCES += ctf-qsort_r.c endif diff --git a/libctf/Makefile.in b/libctf/Makefile.in index bc385278e09..78de09ba10a 100644 --- a/libctf/Makefile.in +++ b/libctf/Makefile.in @@ -166,18 +166,18 @@ am__DEPENDENCIES_1 = libctf_nobfd_la_DEPENDENCIES = $(am__DEPENDENCIES_1) am__libctf_nobfd_la_SOURCES_DIST = ctf-archive.c ctf-dump.c \ ctf-create.c ctf-decl.c ctf-error.c ctf-hash.c ctf-labels.c \ - ctf-link.c ctf-lookup.c ctf-open.c ctf-sha1.c ctf-string.c \ - ctf-subr.c ctf-types.c ctf-util.c ctf-qsort_r.c + ctf-dedup.c ctf-link.c ctf-lookup.c ctf-open.c ctf-sha1.c \ + ctf-string.c ctf-subr.c ctf-types.c ctf-util.c ctf-qsort_r.c @NEED_CTF_QSORT_R_TRUE@am__objects_1 = libctf_nobfd_la-ctf-qsort_r.lo am_libctf_nobfd_la_OBJECTS = libctf_nobfd_la-ctf-archive.lo \ libctf_nobfd_la-ctf-dump.lo libctf_nobfd_la-ctf-create.lo \ libctf_nobfd_la-ctf-decl.lo libctf_nobfd_la-ctf-error.lo \ libctf_nobfd_la-ctf-hash.lo libctf_nobfd_la-ctf-labels.lo \ - libctf_nobfd_la-ctf-link.lo libctf_nobfd_la-ctf-lookup.lo \ - libctf_nobfd_la-ctf-open.lo libctf_nobfd_la-ctf-sha1.lo \ - libctf_nobfd_la-ctf-string.lo libctf_nobfd_la-ctf-subr.lo \ - libctf_nobfd_la-ctf-types.lo libctf_nobfd_la-ctf-util.lo \ - $(am__objects_1) + libctf_nobfd_la-ctf-dedup.lo libctf_nobfd_la-ctf-link.lo \ + libctf_nobfd_la-ctf-lookup.lo libctf_nobfd_la-ctf-open.lo \ + libctf_nobfd_la-ctf-sha1.lo libctf_nobfd_la-ctf-string.lo \ + libctf_nobfd_la-ctf-subr.lo libctf_nobfd_la-ctf-types.lo \ + libctf_nobfd_la-ctf-util.lo $(am__objects_1) libctf_nobfd_la_OBJECTS = $(am_libctf_nobfd_la_OBJECTS) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) @@ -191,18 +191,18 @@ libctf_nobfd_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ @INSTALL_LIBBFD_TRUE@am_libctf_nobfd_la_rpath = -rpath $(libdir) am__DEPENDENCIES_2 = $(am__DEPENDENCIES_1) am__libctf_la_SOURCES_DIST = ctf-archive.c ctf-dump.c ctf-create.c \ - ctf-decl.c ctf-error.c ctf-hash.c ctf-labels.c ctf-link.c \ - ctf-lookup.c ctf-open.c ctf-sha1.c ctf-string.c ctf-subr.c \ - ctf-types.c ctf-util.c ctf-qsort_r.c ctf-open-bfd.c + ctf-decl.c ctf-error.c ctf-hash.c ctf-labels.c ctf-dedup.c \ + ctf-link.c ctf-lookup.c ctf-open.c ctf-sha1.c ctf-string.c \ + ctf-subr.c ctf-types.c ctf-util.c ctf-qsort_r.c ctf-open-bfd.c @NEED_CTF_QSORT_R_TRUE@am__objects_2 = libctf_la-ctf-qsort_r.lo am__objects_3 = libctf_la-ctf-archive.lo libctf_la-ctf-dump.lo \ libctf_la-ctf-create.lo libctf_la-ctf-decl.lo \ libctf_la-ctf-error.lo libctf_la-ctf-hash.lo \ - libctf_la-ctf-labels.lo libctf_la-ctf-link.lo \ - libctf_la-ctf-lookup.lo libctf_la-ctf-open.lo \ - libctf_la-ctf-sha1.lo libctf_la-ctf-string.lo \ - libctf_la-ctf-subr.lo libctf_la-ctf-types.lo \ - libctf_la-ctf-util.lo $(am__objects_2) + libctf_la-ctf-labels.lo libctf_la-ctf-dedup.lo \ + libctf_la-ctf-link.lo libctf_la-ctf-lookup.lo \ + libctf_la-ctf-open.lo libctf_la-ctf-sha1.lo \ + libctf_la-ctf-string.lo libctf_la-ctf-subr.lo \ + libctf_la-ctf-types.lo libctf_la-ctf-util.lo $(am__objects_2) am_libctf_la_OBJECTS = $(am__objects_3) libctf_la-ctf-open-bfd.lo libctf_la_OBJECTS = $(am_libctf_la_OBJECTS) libctf_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ @@ -457,9 +457,9 @@ libctf_nobfd_la_LIBADD = @SHARED_LIBADD@ $(ZLIB) libctf_nobfd_la_LDFLAGS = -version-info 0:0:0 @SHARED_LDFLAGS@ @VERSION_FLAGS@ libctf_nobfd_la_CPPFLAGS = $(AM_CPPFLAGS) -DNOBFD=1 libctf_nobfd_la_SOURCES = ctf-archive.c ctf-dump.c ctf-create.c \ - ctf-decl.c ctf-error.c ctf-hash.c ctf-labels.c ctf-link.c \ - ctf-lookup.c ctf-open.c ctf-sha1.c ctf-string.c ctf-subr.c \ - ctf-types.c ctf-util.c $(am__append_1) + ctf-decl.c ctf-error.c ctf-hash.c ctf-labels.c ctf-dedup.c \ + ctf-link.c ctf-lookup.c ctf-open.c ctf-sha1.c ctf-string.c \ + ctf-subr.c ctf-types.c ctf-util.c $(am__append_1) libctf_la_LIBADD = @BFD_LIBADD@ $(libctf_nobfd_la_LIBADD) libctf_la_CPPFLAGS = $(AM_CPPFLAGS) -DNOBFD=0 libctf_la_DEPENDENCIES = @BFD_DEPENDENCIES@ @@ -581,6 +581,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libctf_la-ctf-archive.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libctf_la-ctf-create.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libctf_la-ctf-decl.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libctf_la-ctf-dedup.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libctf_la-ctf-dump.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libctf_la-ctf-error.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libctf_la-ctf-hash.Plo@am__quote@ @@ -598,6 +599,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libctf_nobfd_la-ctf-archive.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libctf_nobfd_la-ctf-create.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libctf_nobfd_la-ctf-decl.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libctf_nobfd_la-ctf-dedup.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libctf_nobfd_la-ctf-dump.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libctf_nobfd_la-ctf-error.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libctf_nobfd_la-ctf-hash.Plo@am__quote@ @@ -682,6 +684,13 @@ libctf_nobfd_la-ctf-labels.lo: ctf-labels.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libctf_nobfd_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libctf_nobfd_la-ctf-labels.lo `test -f 'ctf-labels.c' || echo '$(srcdir)/'`ctf-labels.c +libctf_nobfd_la-ctf-dedup.lo: ctf-dedup.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libctf_nobfd_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libctf_nobfd_la-ctf-dedup.lo -MD -MP -MF $(DEPDIR)/libctf_nobfd_la-ctf-dedup.Tpo -c -o libctf_nobfd_la-ctf-dedup.lo `test -f 'ctf-dedup.c' || echo '$(srcdir)/'`ctf-dedup.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libctf_nobfd_la-ctf-dedup.Tpo $(DEPDIR)/libctf_nobfd_la-ctf-dedup.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='ctf-dedup.c' object='libctf_nobfd_la-ctf-dedup.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libctf_nobfd_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libctf_nobfd_la-ctf-dedup.lo `test -f 'ctf-dedup.c' || echo '$(srcdir)/'`ctf-dedup.c + libctf_nobfd_la-ctf-link.lo: ctf-link.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libctf_nobfd_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libctf_nobfd_la-ctf-link.lo -MD -MP -MF $(DEPDIR)/libctf_nobfd_la-ctf-link.Tpo -c -o libctf_nobfd_la-ctf-link.lo `test -f 'ctf-link.c' || echo '$(srcdir)/'`ctf-link.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libctf_nobfd_la-ctf-link.Tpo $(DEPDIR)/libctf_nobfd_la-ctf-link.Plo @@ -794,6 +803,13 @@ libctf_la-ctf-labels.lo: ctf-labels.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libctf_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libctf_la-ctf-labels.lo `test -f 'ctf-labels.c' || echo '$(srcdir)/'`ctf-labels.c +libctf_la-ctf-dedup.lo: ctf-dedup.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libctf_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libctf_la-ctf-dedup.lo -MD -MP -MF $(DEPDIR)/libctf_la-ctf-dedup.Tpo -c -o libctf_la-ctf-dedup.lo `test -f 'ctf-dedup.c' || echo '$(srcdir)/'`ctf-dedup.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libctf_la-ctf-dedup.Tpo $(DEPDIR)/libctf_la-ctf-dedup.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='ctf-dedup.c' object='libctf_la-ctf-dedup.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libctf_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libctf_la-ctf-dedup.lo `test -f 'ctf-dedup.c' || echo '$(srcdir)/'`ctf-dedup.c + libctf_la-ctf-link.lo: ctf-link.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libctf_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libctf_la-ctf-link.lo -MD -MP -MF $(DEPDIR)/libctf_la-ctf-link.Tpo -c -o libctf_la-ctf-link.lo `test -f 'ctf-link.c' || echo '$(srcdir)/'`ctf-link.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libctf_la-ctf-link.Tpo $(DEPDIR)/libctf_la-ctf-link.Plo diff --git a/libctf/config.h.in b/libctf/config.h.in index 2ddd5126088..5db637c7903 100644 --- a/libctf/config.h.in +++ b/libctf/config.h.in @@ -32,6 +32,10 @@ don't. */ #undef HAVE_DECL_BSWAP_64 +/* Define to 1 if you have the declaration of `stpcpy', and to 0 if you don't. + */ +#undef HAVE_DECL_STPCPY + /* Define to 1 if you have the declaration of `vasprintf', and to 0 if you don't. */ #undef HAVE_DECL_VASPRINTF diff --git a/libctf/configure b/libctf/configure index 9f16ae92b8d..a1f5b7ff09f 100755 --- a/libctf/configure +++ b/libctf/configure @@ -13186,6 +13186,16 @@ fi cat >>confdefs.h <<_ACEOF #define HAVE_DECL_VASPRINTF $ac_have_decl _ACEOF +ac_fn_c_check_decl "$LINENO" "stpcpy" "ac_cv_have_decl_stpcpy" "$ac_includes_default" +if test "x$ac_cv_have_decl_stpcpy" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_STPCPY $ac_have_decl +_ACEOF diff --git a/libctf/configure.ac b/libctf/configure.ac index 3799a0cb906..f5ed973b10d 100644 --- a/libctf/configure.ac +++ b/libctf/configure.ac @@ -108,7 +108,7 @@ AC_CHECK_FUNCS(pread) dnl Check for bswap_{16,32,64} AC_CHECK_DECLS([bswap_16, bswap_32, bswap_64], [], [], [[#include ]]) -AC_CHECK_DECLS([asprintf, vasprintf]) +AC_CHECK_DECLS([asprintf, vasprintf, stpcpy]) dnl Check for qsort_r. (Taken from gnulib.) AC_CHECK_FUNCS_ONCE([qsort_r]) diff --git a/libctf/ctf-create.c b/libctf/ctf-create.c index 001e625aa87..35c286c3b40 100644 --- a/libctf/ctf-create.c +++ b/libctf/ctf-create.c @@ -546,6 +546,9 @@ ctf_serialize (ctf_file_t *fp) nfp->ctf_link_variable_filter = fp->ctf_link_variable_filter; nfp->ctf_link_variable_filter_arg = fp->ctf_link_variable_filter_arg; nfp->ctf_link_flags = fp->ctf_link_flags; + nfp->ctf_dedup_atoms = fp->ctf_dedup_atoms; + nfp->ctf_dedup_atoms_alloc = fp->ctf_dedup_atoms_alloc; + memcpy (&nfp->ctf_dedup, &fp->ctf_dedup, sizeof (fp->ctf_dedup)); nfp->ctf_snapshot_lu = fp->ctf_snapshots; @@ -571,11 +574,14 @@ ctf_serialize (ctf_file_t *fp) fp->ctf_link_in_cu_mapping = NULL; fp->ctf_link_out_cu_mapping = NULL; fp->ctf_link_type_mapping = NULL; + fp->ctf_dedup_atoms = NULL; + fp->ctf_dedup_atoms_alloc = NULL; fp->ctf_parent_unreffed = 1; fp->ctf_dvhash = NULL; memset (&fp->ctf_dvdefs, 0, sizeof (ctf_list_t)); memset (fp->ctf_lookups, 0, sizeof (fp->ctf_lookups)); + memset (&fp->ctf_dedup, 0, sizeof (fp->ctf_dedup)); fp->ctf_structs.ctn_writable = NULL; fp->ctf_unions.ctn_writable = NULL; fp->ctf_enums.ctn_writable = NULL; @@ -878,7 +884,7 @@ clp2 (size_t x) return (x + 1); } -static ctf_id_t +ctf_id_t ctf_add_encoded (ctf_file_t *fp, uint32_t flag, const char *name, const ctf_encoding_t *ep, uint32_t kind) { @@ -899,7 +905,7 @@ ctf_add_encoded (ctf_file_t *fp, uint32_t flag, return type; } -static ctf_id_t +ctf_id_t ctf_add_reftype (ctf_file_t *fp, uint32_t flag, ctf_id_t ref, uint32_t kind) { ctf_dtdef_t *dtd; diff --git a/libctf/ctf-decls.h b/libctf/ctf-decls.h index c47a72e722f..e1ada3b9592 100644 --- a/libctf/ctf-decls.h +++ b/libctf/ctf-decls.h @@ -72,4 +72,8 @@ void ctf_qsort_r (void *base, size_t nmemb, size_t size, #define MAX(a, b) ((a) > (b) ? (a) : (b)) #define MIN(a, b) ((a) < (b) ? (a) : (b)) +#if !HAVE_DECL_STPCPY +extern char *stpcpy (char *, const char *); +#endif + #endif /* _CTF_DECLS_H */ diff --git a/libctf/ctf-dedup.c b/libctf/ctf-dedup.c new file mode 100644 index 00000000000..77b34f4539d --- /dev/null +++ b/libctf/ctf-dedup.c @@ -0,0 +1,3155 @@ +/* CTF type deduplication. + Copyright (C) 2019 Free Software Foundation, Inc. + + This file is part of libctf. + + libctf is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; see the file COPYING. If not see + . */ + +#include +#include +#include +#include +#include "hashtab.h" + +/* (In the below, relevant functions are named in square brackets.) */ + +/* Type deduplication is a three-phase process: + + [ctf_dedup, ctf_dedup_hash_type, ctf_dedup_rhash_type] + 1) come up with unambiguous hash values for all types: no two types may have + the same hash value, and any given type should have only one hash value + (for optimal deduplication). + + [ctf_dedup, ctf_dedup_detect_name_ambiguity, + ctf_dedup_conflictify_unshared, ctf_dedup_mark_conflicting_hash] + 2) mark those distinct types with names that collide (and thus cannot be + declared simultaneously in the same translation unit) as conflicting, and + recursively mark all types that cite one of those types as conflicting as + well. Possibly mark all types cited in only one TU as conflicting, if + the CTF_LINK_SHARE_DUPLICATED link mode is active. + + [ctf_dedup_emit, ctf_dedup_emit_struct_members, ctf_dedup_id_to_target] + 3) emit all the types, one hash value at a time. Types not marked + conflicting are emitted once, into the shared dictionary: types marked + conflicting are emitted once per TU into a dictionary corresponding to + each TU in which they appear. Structs marked conflicting get at the very + least a forward emitted into the shared dict so that other dicts can cite + it if needed. + + [id_to_packed_id] + This all works over an array of inputs (usually in the same order as the + inputs on the link line). We don't use the ctf_link_inputs hash directly + because it is convenient to be able to address specific input types as a + *global type ID* or 'GID', a pair of an array offset and a ctf_id_t. Since + both are already 32 bits or less or can easily be constrained to that range, + we can pack them both into a single 64-bit hash word for easy lookups, which + would be much more annoying to do with a ctf_file_t * and a ctf_id_t. (On + 32-bit platforms, we must do that anyway, since pointers, and thus hash keys + and values, are only 32 bits wide). We track which inputs are parents of + which other inputs so that we can correctly recognize that types we have + traversed in children may cite types in parents, and so that we can process + the parents first.) + + Note that thanks to ld -r, the deduplicator can be fed its own output, so the + inputs may themselves have child dicts. Since we need to support this usage + anyway, we can use it in one other place. If the caller finds translation + units to be too small a unit ambiguous types, links can be 'cu-mapped', where + the caller provides a mapping of input TU names to output child dict names. + This mapping can fuse many child TUs into one potential child dict, so that + ambiguous types in any of those input TUs go into the same child dict. + When a many:1 cu-mapping is detected, the ctf_dedup machinery is called + repeatedly, once for every output name that has more than one input, to fuse + all the input TUs associated with a given output dict into one, and once again + as normal to deduplicate all those intermediate outputs (and any 1:1 inputs) + together. This has much higher memory usage than otherwise, because in the + intermediate state, all the output TUs are in memory at once and cannot be + lazily opened. It also has implications for the emission code: if types + appear ambiguously in multiple input TUs that are all mapped to the same + child dict, we cannot put them in children in the cu-mapping link phase + because this output is meant to *become* a child in the next link stage and + parent/child relationships are only one level deep: so instead, we just hide + all but one of the ambiguous types. + + There are a few other subtleties here that make this more complex than it + seems. Let's go over the steps above in more detail. + + 1) HASHING. + + [ctf_dedup_hash_type, ctf_dedup_rhash_type] + Hashing proceeds recursively, mixing in the properties of each input type + (including its name, if any), and then adding the hash values of every type + cited by that type. The result is stashed in the cd_type_hashes so other + phases can find the hash values of input types given their IDs, and so that + if we encounter this type again while hashing we can just return its hash + value: it is also stashed in the *output mapping*, a mapping from hash value + to the set of GIDs corresponding to that type in all inputs. We also keep + track of the GID of the first appearance of the type in any input (in + cd_output_first_gid), and the GID of structs, unions, and forwards that only + appear in one TU (in cd_struct_origin). See below for where these things are + used. + + Everything in this phase is time-critical, because it is operating over + non-deduplicated types and so may have hundreds or thousands of times the + data volume to deal with than later phases. Trace output is hidden behind + ENABLE_LIBCTF_HASH_DEBUGGING to prevent the sheer number of calls to + ctf_dprintf from slowing things down (tenfold slowdowns are observed purely + from the calls to ctf_dprintf(), even with debugging switched off), and keep + down the volume of output (hundreds of gigabytes of debug output are not + uncommon on larger links). + + We have to do *something* about potential cycles in the type graph. We'd + like to avoid emitting forwards in the final output if possible, because + forwards aren't much use: they have no members. We are mostly saved from + needing to worry about this at emission time by ctf_add_struct*() + automatically replacing newly-created forwards when the real struct/union + comes along. So we only have to avoid getting stuck in cycles during the + hashing phase, while also not confusing types that cite members that are + structs with each other. It is easiest to solve this problem by noting two + things: + + - all cycles in C depend on the presence of tagged structs/unions + - all tagged structs/unions have a unique name they can be disambiguated by + + [ctf_dedup_is_stub] + This means that we can break all cycles by ceasing to hash in cited types at + every tagged struct/union and instead hashing in a stub consisting of the + struct/union's *decorated name*, which is the name preceded by "s " or "u " + depending on the namespace (cached in cd_decorated_names). Forwards are + decorated identically (so a forward to "struct foo" would be represented as + "s foo"): this means that a citation of a forward to a type and a citation of + a concrete definition of a type with the same name ends up getting the same + hash value. + + Of course, it is quite possible to have two TUs with structs with the same + name and different definitions, but that's OK because when we scan for types + with ambiguous names we will identify these and mark them conflicting. + + We populate one thing to help conflictedness marking. No unconflicted type + may cite a conflicted one, but this means that conflictedness marking must + walk from types to the types that cite them, which is the opposite of the + usual order. We can make this easier to do by constructing a *citers* graph + in cd_citers, which points from types to the types that cite them: because we + emit forwards corresponding to every conflicted struct/union, we don't need + to do this for citations of structs/unions by other types. This is very + convenient for us, because that's the only type we don't traverse + recursively: so we can construct the citers graph at the same time as we + hash, rather than needing to add an extra pass. (This graph is a dynhash of + *type hash values*, so it's small: in effect it is automatically + deduplicated.) + + 2) COLLISIONAL MARKING. + + [ctf_dedup_detect_name_ambiguity, ctf_dedup_mark_conflicting_hash] + We identify types whose names collide during the hashing process, and count + the rough number of uses of each name (caching may throw it off a bit: this + doesn't need to be accurate). We then mark the less-frequently-cited types + with each names conflicting: the most-frequently-cited one goes into the + shared type dictionary, while all others are duplicated into per-TU + dictionaries, named after the input TU, that have the shared dictionary as a + parent. For structures and unions this is not quite good enough: we'd like + to have citations of forwards to ambiguously named structures and unions + *stay* as citations of forwards, so that the user can tell that the caller + didn't actually know which structure definition was meant: but if we put one + of those structures into the shared dictionary, it would supplant and replace + the forward, leaving no sign. So structures and unions do not take part in + this popularity contest: if their names are ambiguous, they are just + duplicated, and only a forward appears in the shared dict. + + [ctf_dedup_propagate_conflictedness] + The process of marking types conflicted is itself recursive: we recursively + traverse the cd_citers graph populated in the hashing pass above and mark + everything that we encounter conflicted (without wasting time re-marking + anything that is already marked). This naturally terminates just where we + want it to (at types that are cited by no other types, and at structures and + unions) and suffices to ensure that types that cite conflicted types are + always marked conflicted. + + [ctf_dedup_conflictify_unshared, ctf_dedup_multiple_input_dicts] + When linking in CTF_LINK_SHARE_DUPLICATED mode, we would like all types that + are used in only one TU to end up in a per-CU dict. The easiest way to do + that is to mark them conflicted. ctf_dedup_conflictify_unshared does this, + traversing the output mapping and using ctf_dedup_multiple_input_dicts to + check the number of input dicts each distinct type hash value came from: + types that only came from one get marked conflicted. One caveat here is that + we need to consider both structs and forwards to them: a struct that appears + in one TU and has a dozen citations to an opaque forward in other TUs should + *not* be considered to be used in only one TU, because users would find it + useful to be able to traverse into opaque structures of that sort: so we use + cd_struct_origin to check both structs/unions and the forwards corresponding + to them. + + 3) EMISSION. + + [ctf_dedup_walk_output_mapping, ctf_dedup_rwalk_output_mapping, + ctf_dedup_rwalk_one_output_mapping] + Emission involves another walk of the entire output mapping, this time + traversing everything other than struct members, recursively. Types are + emitted from leaves to trunk, emitting all types a type cites before emitting + the type itself. We sort the output mapping before traversing it, for + reproducibility and also correctness: the input dicts may have parent/child + relationships, so we simply sort all types that first appear in parents + before all children, then sort types that first appear in dicts appearing + earlier on the linker command line before those that appear later, then sort + by input ctf_id_t. (This is where we use cd_output_first_gid, collected + above.) + + The walking is done using a recursive traverser which arranges to not revisit + any type already visited and to call its callback once per input GID for + input GIDs corresponding to conflicted output types. The traverser only + finds input types and calls a callback for them as many times as the output + needs to appear: it doesn't try to figure out anything about where the output + might go. That's done by the callback based on whether the type is + marked conflicted or not. + + [ctf_dedup_emit_type, ctf_dedup_id_to_target, ctf_dedup_synthesize_forward] + ctf_dedup_emit_type is the (sole) callback for ctf_dedup_walk_output_mapping. + Conflicted types have all necessary dictionaries created, and then we emit + the type into each dictionary in turn, working over each input CTF type + corresponding to each hash value and using ctf_dedup_id_to_target to map each + input ctf_id_t into the corresponding type in the output (dealing with input + ctf_id_t's with parents in the process by simply chasing to the parent dict + if the type we're looking up is in there). Emitting structures involves + simply noting that the members of this structure need emission later on: + because you cannot cite a single structure member from another type, we avoid + emitting the members at this stage to keep recursion depths down a bit. + + At this point, if we have by some mischance decided that two different types + with child types that hash to different values have in fact got the same hash + value themselves and *not* marked it conflicting, the type walk will walk + only *one* of them and in all likelihood we'll find that we are trying to + emit a type into some child dictionary that references a type that was never + emitted into that dictionary and assertion-fail. This always indicates a bug + in the conflictedness marking machinery or the hashing code, or both. + + ctf_dedup_id_to_target calls ctf_dedup_synthesize_forward to do one extra + thing, alluded to above: if this is a conflicted tagged structure or union, + and the target is the shared dict (i.e., the type we're being asked to emit + is not itself conflicted so can't just point straight at the conflicted + type), we instead synthesise a forward with the same name, emit it into the + shared dict, record it in cd_output_emission_conflicted_forwards so that we + don't re-emit it, and return it. This means that cycles that contain + conflicts do not cause the entire cycle to be replicated in every child: only + that piece of the cycle which takes you back as far as the closest tagged + struct/union needs to be replicated. This trick means that no part of the + deduplicator needs a cycle detector: every recursive walk can stop at tagged + structures. + + [ctf_dedup_emit_struct_members] + The final stage of emission is to walk over all structures with members + that need emission and emit all of them. Every type has been emitted at + this stage, so emission cannot fail. + + [ctf_dedup_populate_type_mappings, ctf_dedup_populate_type_mapping] + Finally, we update the input -> output type ID mappings used by the ctf-link + machinery to update all the other sections. This is surprisingly expensive + and may be replaced with a scheme which lets the ctf-link machinery extract + the needed info directly from the deduplicator. */ + +/* Possible future optimizations are flagged with 'optimization opportunity' + below. */ + +/* Global optimization opportunity: a GC pass, eliminating types with no direct + or indirect citations from the other sections in the dictionary. */ + +/* Internal flag values for ctf_dedup_hash_type. */ + +/* Child call: consider forwardable types equivalent to forwards or stubs below + this point. */ +#define CTF_DEDUP_HASH_INTERNAL_CHILD 0x01 + +/* Transform references to single ctf_id_ts in passed-in inputs into a number + that will fit in a uint64_t. Needs rethinking if CTF_MAX_TYPE is boosted. + + On 32-bit platforms, we pack things together differently: see the note + above. */ + +#if UINTPTR_MAX < UINT64_MAX +# define IDS_NEED_ALLOCATION 1 +# define CTF_DEDUP_GID(fp, input, type) id_to_packed_id (fp, input, type) +# define CTF_DEDUP_GID_TO_INPUT(id) packed_id_to_input (id) +# define CTF_DEDUP_GID_TO_TYPE(id) packed_id_to_type (id) +#else +# define CTF_DEDUP_GID(fp, input, type) \ + (void *) (((uint64_t) input) << 32 | (type)) +# define CTF_DEDUP_GID_TO_INPUT(id) ((int) (((uint64_t) id) >> 32)) +# define CTF_DEDUP_GID_TO_TYPE(id) (ctf_id_t) (((uint64_t) id) & ~(0xffffffff00000000ULL)) +#endif + +#ifdef IDS_NEED_ALLOCATION + + /* This is the 32-bit path, which stores GIDs in a pool and returns a pointer + into the pool. It is notably less efficient than the 64-bit direct storage + approach, but with a smaller key, this is all we can do. */ + +static void * +id_to_packed_id (ctf_file_t *fp, int input_num, ctf_id_t type) +{ + const void *lookup; + ctf_type_id_key_t *dynkey = NULL; + ctf_type_id_key_t key = { input_num, type }; + + if (!ctf_dynhash_lookup_kv (fp->ctf_dedup.cd_id_to_file_t, + &key, &lookup, NULL)) + { + if ((dynkey = malloc (sizeof (ctf_type_id_key_t))) == NULL) + goto oom; + memcpy (dynkey, &key, sizeof (ctf_type_id_key_t)); + + if (ctf_dynhash_insert (fp->ctf_dedup.cd_id_to_file_t, dynkey, NULL) < 0) + goto oom; + + ctf_dynhash_lookup_kv (fp->ctf_dedup.cd_id_to_file_t, + dynkey, &lookup, NULL); + } + /* We use a raw assert() here because there isn't really a way to get any sort + of error back from this routine without vastly complicating things for the + much more common case of !IDS_NEED_ALLOCATION. */ + assert (lookup); + return (void *) lookup; + + oom: + free (dynkey); + ctf_set_errno (fp, ENOMEM); + return NULL; +} + +static int +packed_id_to_input (const void *id) +{ + const ctf_type_id_key_t *key = (ctf_type_id_key_t *) id; + + return key->ctii_input_num; +} + +static ctf_id_t +packed_id_to_type (const void *id) +{ + const ctf_type_id_key_t *key = (ctf_type_id_key_t *) id; + + return key->ctii_type; +} +#endif + +/* Make an element in a dynhash-of-dynsets, or return it if already present. */ + +static ctf_dynset_t * +make_set_element (ctf_dynhash_t *set, const void *key) +{ + ctf_dynset_t *element; + + if ((element = ctf_dynhash_lookup (set, key)) == NULL) + { + if ((element = ctf_dynset_create (htab_hash_string, + ctf_dynset_eq_string, + NULL)) == NULL) + return NULL; + + if (ctf_dynhash_insert (set, (void *) key, element) < 0) + { + ctf_dynset_destroy (element); + return NULL; + } + } + + return element; +} + +/* Initialize the dedup atoms table. */ +int +ctf_dedup_atoms_init (ctf_file_t *fp) +{ + if (fp->ctf_dedup_atoms) + return 0; + + if (!fp->ctf_dedup_atoms_alloc) + { + if ((fp->ctf_dedup_atoms_alloc + = ctf_dynset_create (htab_hash_string, ctf_dynset_eq_string, + free)) == NULL) + return ctf_set_errno (fp, ENOMEM); + } + fp->ctf_dedup_atoms = fp->ctf_dedup_atoms_alloc; + return 0; +} + +/* Intern things in the dedup atoms table. */ + +static const char * +intern (ctf_file_t *fp, char *atom) +{ + const void *foo; + + if (atom == NULL) + return NULL; + + if (!ctf_dynset_exists (fp->ctf_dedup_atoms, atom, &foo)) + { + if (ctf_dynset_insert (fp->ctf_dedup_atoms, atom) < 0) + { + ctf_set_errno (fp, ENOMEM); + return NULL; + } + foo = atom; + } + else + free (atom); + + return (const char *) foo; +} + +/* Add an indication of the namespace to a type name in a way that is not valid + for C identifiers. Used to maintain hashes of type names to other things + while allowing for the four C namespaces (normal, struct, union, enum). + Return a new dynamically-allocated string. */ +static const char * +ctf_decorate_type_name (ctf_file_t *fp, const char *name, int kind) +{ + ctf_dedup_t *d = &fp->ctf_dedup; + const char *ret; + const char *k; + char *p; + size_t i; + + switch (kind) + { + case CTF_K_STRUCT: + k = "s "; + i = 0; + break; + case CTF_K_UNION: + k = "u "; + i = 1; + break; + case CTF_K_ENUM: + k = "e "; + i = 2; + break; + default: + k = ""; + i = 3; + } + + if ((ret = ctf_dynhash_lookup (d->cd_decorated_names[i], name)) == NULL) + { + char *str; + + if ((str = malloc (strlen (name) + strlen (k) + 1)) == NULL) + goto oom; + + p = stpcpy (str, k); + strcpy (p, name); + ret = intern (fp, str); + if (!ret) + goto oom; + + if (ctf_dynhash_cinsert (d->cd_decorated_names[i], name, ret) < 0) + goto oom; + } + + return ret; + + oom: + ctf_set_errno (fp, ENOMEM); + return NULL; +} + +/* Hash a type, possibly debugging-dumping something about it as well. */ +static inline void +ctf_dedup_sha1_add (ctf_sha1_t *sha1, const void *buf, size_t len, + const char *description _libctf_unused_, + unsigned long depth _libctf_unused_) +{ + ctf_sha1_add (sha1, buf, len); + +#ifdef ENABLE_LIBCTF_HASH_DEBUGGING + ctf_sha1_t tmp; + char tmp_hval[CTF_SHA1_SIZE]; + tmp = *sha1; + ctf_sha1_fini (&tmp, tmp_hval); + ctf_dprintf ("%lu: after hash addition of %s: %s\n", depth, description, + tmp_hval); +#endif +} + +static const char * +ctf_dedup_hash_type (ctf_file_t *fp, ctf_file_t *input, + ctf_file_t **inputs, uint32_t *parents, + int input_num, ctf_id_t type, int flags, + unsigned long depth, + int (*populate_fun) (ctf_file_t *fp, + ctf_file_t *input, + ctf_file_t **inputs, + int input_num, + ctf_id_t type, + void *id, + const char *decorated_name, + const char *hash)); + +/* Determine whether this type is being hashed as a stub (in which case it is + unsafe to cache it). */ +static int +ctf_dedup_is_stub (const char *name, int kind, int fwdkind, int flags) +{ + /* We can cache all types unless we are recursing to children and are hashing + in a tagged struct, union or forward, all of which are replaced with their + decorated name as a stub and will have different hash values when hashed at + the top level. */ + + return ((flags & CTF_DEDUP_HASH_INTERNAL_CHILD) && name + && (kind == CTF_K_STRUCT || kind == CTF_K_UNION + || (kind == CTF_K_FORWARD && (fwdkind == CTF_K_STRUCT + || fwdkind == CTF_K_UNION)))); +} + +/* Populate struct_origin if need be (not already populated, or populated with + a different origin), in which case it must go to -1, "shared".) + + Only called for forwards or forwardable types with names, when the link mode + is CTF_LINK_SHARE_DUPLICATED. */ +static int +ctf_dedup_record_origin (ctf_file_t *fp, int input_num, const char *decorated, + void *id) +{ + ctf_dedup_t *d = &fp->ctf_dedup; + void *origin; + int populate_origin = 0; + + if (ctf_dynhash_lookup_kv (d->cd_struct_origin, decorated, NULL, &origin)) + { + if (CTF_DEDUP_GID_TO_INPUT (origin) != input_num + && CTF_DEDUP_GID_TO_INPUT (origin) != -1) + { + populate_origin = 1; + origin = CTF_DEDUP_GID (fp, -1, -1); + } + } + else + { + populate_origin = 1; + origin = id; + } + + if (populate_origin) + if (ctf_dynhash_cinsert (d->cd_struct_origin, decorated, origin) < 0) + return ctf_set_errno (fp, errno); + return 0; +} + +/* Do the underlying hashing and recursion for ctf_dedup_hash_type (which it + calls, recursively). */ + +static const char * +ctf_dedup_rhash_type (ctf_file_t *fp, ctf_file_t *input, ctf_file_t **inputs, + uint32_t *parents, int input_num, ctf_id_t type, + void *type_id, const ctf_type_t *tp, const char *name, + const char *decorated, int kind, int flags, + unsigned long depth, + int (*populate_fun) (ctf_file_t *fp, + ctf_file_t *input, + ctf_file_t **inputs, + int input_num, + ctf_id_t type, + void *id, + const char *decorated_name, + const char *hash)) +{ + ctf_dedup_t *d = &fp->ctf_dedup; + ctf_next_t *i = NULL; + ctf_sha1_t hash; + ctf_id_t child_type; + char hashbuf[CTF_SHA1_SIZE]; + const char *hval = NULL; + const char *whaterr; + int err; + + const char *citer = NULL; + ctf_dynset_t *citers = NULL; + + /* Add a citer to the citers set. */ +#define ADD_CITER(citers, hval) \ + do \ + { \ + whaterr = "updating citers"; \ + if (!citers) \ + if ((citers = ctf_dynset_create (htab_hash_string, \ + ctf_dynset_eq_string, \ + NULL)) == NULL) \ + goto oom; \ + if (ctf_dynset_cinsert (citers, hval) < 0) \ + goto oom; \ + } while (0) + + /* If this is a named struct or union or a forward to one, and this is a child + traversal, treat this type as if it were a forward -- do not recurse to + children, ignore all content not already hashed in, and hash in the + decorated name of the type instead. */ + + if (ctf_dedup_is_stub (name, kind, tp->ctt_type, flags)) + { +#ifdef ENABLE_LIBCTF_HASH_DEBUGGING + ctf_dprintf ("Struct/union/forward citation: substituting forwarding " + "stub with decorated name %s\n", decorated); + +#endif + ctf_sha1_init (&hash); + ctf_dedup_sha1_add (&hash, decorated, strlen (decorated) + 1, + "decorated struct/union/forward name", depth); + ctf_sha1_fini (&hash, hashbuf); + + if ((hval = intern (fp, strdup (hashbuf))) == NULL) + { + ctf_err_warn (fp, 0, "%s (%i): out of memory during forwarding-stub " + "hashing for type with GID %p; errno: %s", + ctf_link_input_name (input), input_num, type_id, + strerror (errno)); + return NULL; /* errno is set for us. */ + } + + /* In share--duplicated link mode, make sure the origin of this type is + recorded, even if this is a type in a parent dict which will not be + directly traversed. */ + if (d->cd_link_flags & CTF_LINK_SHARE_DUPLICATED + && ctf_dedup_record_origin (fp, input_num, decorated, type_id) < 0) + return NULL; /* errno is set for us. */ + + return hval; + } + + /* Now ensure that subsequent recursive calls (but *not* the top-level call) + get this treatment. */ + flags |= CTF_DEDUP_HASH_INTERNAL_CHILD; + + /* If this is a struct, union, or forward with a name, record the unique + originating input TU, if there is one. */ + + if (decorated && (ctf_forwardable_kind (kind) || kind != CTF_K_FORWARD)) + if (d->cd_link_flags & CTF_LINK_SHARE_DUPLICATED + && ctf_dedup_record_origin (fp, input_num, decorated, type_id) < 0) + return NULL; /* errno is set for us. */ + + /* Mix in invariant stuff, transforming the type kind if needed. Note that + the vlen is *not* hashed in: the actual variable-length info is hashed in + instead, piecewise. The vlen is not part of the type, only the + variable-length data is: identical types with distinct vlens are quite + possible. Equally, we do not want to hash in the isroot flag: both the + compiler and the deduplicator set the nonroot flag to indicate clashes with + *other types in the same TU* with the same name: so two types can easily + have distinct nonroot flags, yet be exactly the same type.*/ + +#ifdef ENABLE_LIBCTF_HASH_DEBUGGING + ctf_dprintf ("%lu: hashing thing with ID %i/%lx (kind %i): %s.\n", + depth, input_num, type, kind, name ? name : ""); +#endif + + ctf_sha1_init (&hash); + if (name) + ctf_dedup_sha1_add (&hash, name, strlen (name) + 1, "name", depth); + ctf_dedup_sha1_add (&hash, &kind, sizeof (uint32_t), "kind", depth); + + /* Hash content of this type. */ + switch (kind) + { + case CTF_K_UNKNOWN: + /* No extra state. */ + break; + case CTF_K_FORWARD: + + /* Add the forwarded kind, stored in the ctt_type. */ + ctf_dedup_sha1_add (&hash, &tp->ctt_type, sizeof (tp->ctt_type), + "forwarded kind", depth); + break; + case CTF_K_INTEGER: + case CTF_K_FLOAT: + { + ctf_encoding_t ep; + memset (&ep, 0, sizeof (ctf_encoding_t)); + + ctf_dedup_sha1_add (&hash, &tp->ctt_size, sizeof (uint32_t), "size", + depth); + if (ctf_type_encoding (input, type, &ep) < 0) + { + whaterr = "encoding"; + goto err; + } + ctf_dedup_sha1_add (&hash, &ep, sizeof (ctf_encoding_t), "encoding", + depth); + break; + } + /* Types that reference other types. */ + case CTF_K_TYPEDEF: + case CTF_K_VOLATILE: + case CTF_K_CONST: + case CTF_K_RESTRICT: + case CTF_K_POINTER: + /* Hash the referenced type, if not already hashed, and mix it in. */ + child_type = ctf_type_reference (input, type); + if ((hval = ctf_dedup_hash_type (fp, input, inputs, parents, input_num, + child_type, flags, depth, + populate_fun)) == NULL) + { + whaterr = "referenced type hashing"; + goto err; + } + ctf_dedup_sha1_add (&hash, hval, strlen (hval) + 1, "referenced type", + depth); + citer = hval; + + break; + + /* The slices of two types hash identically only if the type they overlay + also has the same encoding. This is not ideal, but in practice will work + well enough. We work directly rather than using the CTF API because + we do not want the slice's normal automatically-shine-through + semantics to kick in here. */ + case CTF_K_SLICE: + { + const ctf_slice_t *slice; + const ctf_dtdef_t *dtd; + ssize_t size; + ssize_t increment; + + child_type = ctf_type_reference (input, type); + ctf_get_ctt_size (input, tp, &size, &increment); + ctf_dedup_sha1_add (&hash, &size, sizeof (ssize_t), "size", depth); + + if ((hval = ctf_dedup_hash_type (fp, input, inputs, parents, input_num, + child_type, flags, depth, + populate_fun)) == NULL) + { + whaterr = "slice-referenced type hashing"; + goto err; + } + ctf_dedup_sha1_add (&hash, hval, strlen (hval) + 1, "sliced type", + depth); + citer = hval; + + if ((dtd = ctf_dynamic_type (input, type)) != NULL) + slice = &dtd->dtd_u.dtu_slice; + else + slice = (ctf_slice_t *) ((uintptr_t) tp + increment); + + ctf_dedup_sha1_add (&hash, &slice->cts_offset, + sizeof (slice->cts_offset), "slice offset", depth); + ctf_dedup_sha1_add (&hash, &slice->cts_bits, + sizeof (slice->cts_bits), "slice bits", depth); + break; + } + + case CTF_K_ARRAY: + { + ctf_arinfo_t ar; + + if (ctf_array_info (input, type, &ar) < 0) + { + whaterr = "array info"; + goto err; + } + + if ((hval = ctf_dedup_hash_type (fp, input, inputs, parents, input_num, + ar.ctr_contents, flags, depth, + populate_fun)) == NULL) + { + whaterr = "array contents type hashing"; + goto err; + } + ctf_dedup_sha1_add (&hash, hval, strlen (hval) + 1, "array contents", + depth); + ADD_CITER (citers, hval); + + if ((hval = ctf_dedup_hash_type (fp, input, inputs, parents, input_num, + ar.ctr_index, flags, depth, + populate_fun)) == NULL) + { + whaterr = "array index type hashing"; + goto err; + } + ctf_dedup_sha1_add (&hash, hval, strlen (hval) + 1, "array index", + depth); + ctf_dedup_sha1_add (&hash, &ar.ctr_nelems, sizeof (ar.ctr_nelems), + "element count", depth); + ADD_CITER (citers, hval); + + break; + } + case CTF_K_FUNCTION: + { + ctf_funcinfo_t fi; + ctf_id_t *args; + uint32_t j; + + if (ctf_func_type_info (input, type, &fi) < 0) + { + whaterr = "func type info"; + goto err; + } + + if ((hval = ctf_dedup_hash_type (fp, input, inputs, parents, input_num, + fi.ctc_return, flags, depth, + populate_fun)) == NULL) + { + whaterr = "func return type"; + goto err; + } + ctf_dedup_sha1_add (&hash, hval, strlen (hval) + 1, "func return", + depth); + ctf_dedup_sha1_add (&hash, &fi.ctc_argc, sizeof (fi.ctc_argc), + "func argc", depth); + ctf_dedup_sha1_add (&hash, &fi.ctc_flags, sizeof (fi.ctc_flags), + "func flags", depth); + ADD_CITER (citers, hval); + + if ((args = calloc (fi.ctc_argc, sizeof (ctf_id_t))) == NULL) + { + whaterr = "memory allocation"; + goto err; + } + + if (ctf_func_type_args (input, type, fi.ctc_argc, args) < 0) + { + free (args); + whaterr = "func arg type"; + goto err; + } + for (j = 0; j < fi.ctc_argc; j++) + { + if ((hval = ctf_dedup_hash_type (fp, input, inputs, parents, + input_num, args[j], flags, depth, + populate_fun)) == NULL) + { + free (args); + whaterr = "func arg type hashing"; + goto err; + } + ctf_dedup_sha1_add (&hash, hval, strlen (hval) + 1, "func arg type", + depth); + ADD_CITER (citers, hval); + } + free (args); + break; + } + case CTF_K_ENUM: + { + int val; + const char *ename; + + ctf_dedup_sha1_add (&hash, &tp->ctt_size, sizeof (uint32_t), + "enum size", depth); + while ((ename = ctf_enum_next (input, type, &i, &val)) != NULL) + { + ctf_dedup_sha1_add (&hash, ename, strlen (ename) + 1, "enumerator", + depth); + ctf_dedup_sha1_add (&hash, &val, sizeof (val), "enumerand", depth); + } + if (ctf_errno (input) != ECTF_NEXT_END) + { + whaterr = "enum member iteration"; + goto err; + } + break; + } + /* Top-level only. */ + case CTF_K_STRUCT: + case CTF_K_UNION: + { + ssize_t offset; + const char *mname; + ctf_id_t membtype; + ssize_t size; + + ctf_get_ctt_size (input, tp, &size, NULL); + ctf_dedup_sha1_add (&hash, &size, sizeof (ssize_t), "struct size", + depth); + + while ((offset = ctf_member_next (input, type, &i, &mname, + &membtype)) >= 0) + { + if (mname == NULL) + mname = ""; + ctf_dedup_sha1_add (&hash, mname, strlen (mname) + 1, + "member name", depth); + +#ifdef ENABLE_LIBCTF_HASH_DEBUGGING + ctf_dprintf ("%lu: Traversing to member %s\n", depth, mname); +#endif + if ((hval = ctf_dedup_hash_type (fp, input, inputs, parents, + input_num, membtype, flags, depth, + populate_fun)) == NULL) + { + whaterr = "struct/union member type hashing"; + goto iterr; + } + + ctf_dedup_sha1_add (&hash, hval, strlen (hval) + 1, "member hash", + depth); + ctf_dedup_sha1_add (&hash, &offset, sizeof (offset), "member offset", + depth); + ADD_CITER (citers, hval); + } + if (ctf_errno (input) != ECTF_NEXT_END) + { + whaterr = "struct/union member iteration"; + goto err; + } + break; + } + default: + whaterr = "unknown type kind"; + goto err; + } + ctf_sha1_fini (&hash, hashbuf); + + if ((hval = intern (fp, strdup (hashbuf))) == NULL) + { + whaterr = "hash internment"; + goto oom; + } + + /* Populate the citers for this type's subtypes, now the hash for the type + itself is known. */ + whaterr = "citer tracking"; + + if (citer) + { + ctf_dynset_t *citer_hashes; + + if ((citer_hashes = make_set_element (d->cd_citers, citer)) == NULL) + goto oom; + if (ctf_dynset_cinsert (citer_hashes, hval) < 0) + goto oom; + } + else if (citers) + { + const void *k; + + while ((err = ctf_dynset_cnext (citers, &i, &k)) == 0) + { + ctf_dynset_t *citer_hashes; + citer = (const char *) k; + + if ((citer_hashes = make_set_element (d->cd_citers, citer)) == NULL) + goto oom; + + if (ctf_dynset_exists (citer_hashes, hval, NULL)) + continue; + if (ctf_dynset_cinsert (citer_hashes, hval) < 0) + goto oom; + } + if (err != ECTF_NEXT_END) + goto err; + ctf_dynset_destroy (citers); + } + + return hval; + + iterr: + ctf_next_destroy (i); + err: + ctf_sha1_fini (&hash, NULL); + ctf_err_warn (fp, 0, "%s (%i): %s error during type hashing for " + "type %lx, kind %i: CTF error: %s; errno: %s", + ctf_link_input_name (input), input_num, whaterr, type, + kind, ctf_errmsg (ctf_errno (fp)), strerror (errno)); + return NULL; + oom: + ctf_set_errno (fp, errno); + ctf_err_warn (fp, 0, "%s (%i): %s error during type hashing for " + "type %lx, kind %i: CTF error: %s; errno: %s", + ctf_link_input_name (input), input_num, whaterr, type, + kind, ctf_errmsg (ctf_errno (fp)), strerror (errno)); + return NULL; +} + +/* Hash a TYPE in the INPUT: FP is the eventual output, where the ctf_dedup + state is stored. INPUT_NUM is the number of this input in the set of inputs. + Record its hash in FP's cd_type_hashes once it is known. PARENTS is + described in the comment above ctf_dedup. + + (The flags argument currently accepts only the flag + CTF_DEDUP_HASH_INTERNAL_CHILD, an implementation detail used to prevent + struct/union hashing in recursive traversals below the TYPE.) + + We use the CTF API rather than direct access wherever possible, because types + that appear identical through the API should be considered identical, with + one exception: slices should only be considered identical to other slices, + not to the corresponding unsliced type. + + The POPULATE_FUN is a mandatory hook that populates other mappings with each + type we see (excepting types that are recursively hashed as stubs). The + caller should not rely on the order of calls to this hook, though it will be + called at least once for every non-stub reference to every type. + + Returns a hash value (an atom), or NULL on error. */ + +static const char * +ctf_dedup_hash_type (ctf_file_t *fp, ctf_file_t *input, + ctf_file_t **inputs, uint32_t *parents, + int input_num, ctf_id_t type, int flags, + unsigned long depth, + int (*populate_fun) (ctf_file_t *fp, + ctf_file_t *input, + ctf_file_t **inputs, + int input_num, + ctf_id_t type, + void *id, + const char *decorated_name, + const char *hash)) +{ + ctf_dedup_t *d = &fp->ctf_dedup; + const ctf_type_t *tp; + void *type_id; + const char *hval = NULL; + const char *name; + const char *whaterr; + const char *decorated = NULL; + uint32_t kind, fwdkind; + + depth++; + +#ifdef ENABLE_LIBCTF_HASH_DEBUGGING + ctf_dprintf ("%lu: ctf_dedup_hash_type (%i, %lx, flags %x)\n", depth, input_num, type, flags); +#endif + + /* The unimplemented type doesn't really exist, but must be noted in parent + hashes: so it gets a fixed, arbitrary hash. */ + if (type == 0) + return "00000000000000000000"; + + /* Possible optimization: if the input type is in the parent type space, just + copy recursively-cited hashes from the parent's types into the output + mapping rather than rehashing them. */ + + type_id = CTF_DEDUP_GID (fp, input_num, type); + + if ((tp = ctf_lookup_by_id (&input, type)) == NULL) + { + ctf_err_warn (fp, 0, "%s (%i): lookup failure for type %lx: flags %x", + ctf_link_input_name (input), input_num, type, flags); + return NULL; /* errno is set for us. */ + } + + kind = LCTF_INFO_KIND (input, tp->ctt_info); + name = ctf_strraw (input, tp->ctt_name); + + if (tp->ctt_name == 0 || !name || name[0] == '\0') + name = NULL; + + /* Treat the unknown kind just like the unimplemented type. */ + if (kind == CTF_K_UNKNOWN) + return "00000000000000000000"; + + /* Decorate the name appropriately for the namespace it appears in: forwards + appear in the namespace of their referent. */ + + fwdkind = kind; + if (name) + { + if (kind == CTF_K_FORWARD) + fwdkind = tp->ctt_type; + + if ((decorated = ctf_decorate_type_name (fp, name, fwdkind)) == NULL) + return NULL; /* errno is set for us. */ + } + + /* If not hashing a stub, we can rely on various sorts of caches. + + Optimization opportunity: we may be able to avoid calling the populate_fun + sometimes here. */ + + if (!ctf_dedup_is_stub (name, kind, fwdkind, flags)) + { + if ((hval = ctf_dynhash_lookup (d->cd_type_hashes, type_id)) != NULL) + { +#ifdef ENABLE_LIBCTF_HASH_DEBUGGING + ctf_dprintf ("%lu: Known hash for ID %i/%lx: %s\n", depth, input_num, + type, hval); +#endif + populate_fun (fp, input, inputs, input_num, type, type_id, + decorated, hval); + + return hval; + } + } + + /* We have never seen this type before, and must figure out its hash and the + hashes of the types it cites. + + Hash this type, and call ourselves recursively. (The hashing part is + optional, and is disabled if overidden_hval is set.) */ + + if ((hval = ctf_dedup_rhash_type (fp, input, inputs, parents, input_num, + type, type_id, tp, name, decorated, + kind, flags, depth, populate_fun)) == NULL) + return NULL; /* errno is set for us. */ + + /* The hash of this type is now known: record it unless caching is unsafe + because the hash value will change later. This will be the final storage + of this type's hash, so we call the population function on it. */ + + if (!ctf_dedup_is_stub (name, kind, fwdkind, flags)) + { +#ifdef ENABLE_LIBCTF_HASH_DEBUGGING + ctf_dprintf ("Caching %lx, ID %p (%s), %s in final location\n", type, + type_id, name ? name : "", hval); +#endif + + if (ctf_dynhash_cinsert (d->cd_type_hashes, type_id, hval) < 0) + { + whaterr = "hash caching"; + goto oom; + } + + if (populate_fun (fp, input, inputs, input_num, type, type_id, + decorated, hval) < 0) + { + whaterr = "population function"; + goto err; /* errno is set for us. */ + } + } + +#ifdef ENABLE_LIBCTF_HASH_DEBUGGING + ctf_dprintf ("%lu: Returning final hash for ID %i/%lx: %s\n", depth, + input_num, type, hval); +#endif + return hval; + + oom: + ctf_set_errno (fp, errno); + err: + ctf_err_warn (fp, 0, "%s (%i): %s error during type hashing, type %lx, " + "kind %i: CTF errno: %s; errno: %s", + ctf_link_input_name (input), input_num, whaterr, type, + kind, ctf_errmsg (ctf_errno (fp)), + strerror (errno)); + return NULL; +} + +/* Populate a number of useful mappings not directly used by the hashing + machinery: the output mapping, the cd_name_counts mapping from name -> hash + -> count of hashval deduplication state for a given hashed type, and the + cd_output_first_tu mapping. */ + +static int +ctf_dedup_populate_mappings (ctf_file_t *fp, ctf_file_t *input _libctf_unused_, + ctf_file_t **inputs _libctf_unused_, + int input_num _libctf_unused_, + ctf_id_t type _libctf_unused_, void *id, + const char *decorated_name, + const char *hval) +{ + ctf_dedup_t *d = &fp->ctf_dedup; + ctf_dynset_t *type_ids; + ctf_dynhash_t *name_counts; + long int count; + +#ifdef ENABLE_LIBCTF_HASH_DEBUGGING + ctf_dprintf ("Hash %s, %s, into output mapping for %i/%lx @ %s\n", + hval, decorated_name ? decorated_name : "(unnamed)", + input_num, type, ctf_link_input_name (input)); + + const char *orig_hval; + + /* Make sure we never map a single GID to multiple hash values. */ + + if ((orig_hval = ctf_dynhash_lookup (d->cd_output_mapping_guard, id)) != NULL) + { + /* We can rely on pointer identity here, since all hashes are + interned. */ + if (!ctf_assert (fp, orig_hval == hval)) + return -1; + } + else + if (ctf_dynhash_cinsert (d->cd_output_mapping_guard, id, hval) < 0) + return ctf_set_errno (fp, errno); +#endif + + /* Record the type in the output mapping: if this is the first time this type + has been seen, also record it in the cd_output_first_gid. Because we + traverse types in TU order and we do not merge types after the hashing + phase, this will be the lowest TU this type ever appears in. */ + + if ((type_ids = ctf_dynhash_lookup (d->cd_output_mapping, + hval)) == NULL) + { + if (ctf_dynhash_cinsert (d->cd_output_first_gid, hval, id) < 0) + return ctf_set_errno (fp, errno); + + if ((type_ids = ctf_dynset_create (htab_hash_pointer, + htab_eq_pointer, + NULL)) == NULL) + return ctf_set_errno (fp, errno); + if (ctf_dynhash_insert (d->cd_output_mapping, (void *) hval, + type_ids) < 0) + { + ctf_dynset_destroy (type_ids); + return ctf_set_errno (fp, errno); + } + } +#ifdef ENABLE_LIBCTF_HASH_DEBUGGING + { + /* Verify that all types with this hash are of the same kind, and that the + first TU a type was seen in never falls. */ + + int err; + const void *one_id; + ctf_next_t *i = NULL; + int orig_kind = ctf_type_kind_unsliced (input, type); + int orig_first_tu; + + orig_first_tu = CTF_DEDUP_GID_TO_INPUT + (ctf_dynhash_lookup (d->cd_output_first_gid, hval)); + if (!ctf_assert (fp, orig_first_tu <= CTF_DEDUP_GID_TO_INPUT (id))) + return -1; + + while ((err = ctf_dynset_cnext (type_ids, &i, &one_id)) == 0) + { + ctf_file_t *foo = inputs[CTF_DEDUP_GID_TO_INPUT (one_id)]; + ctf_id_t bar = CTF_DEDUP_GID_TO_TYPE (one_id); + if (ctf_type_kind_unsliced (foo, bar) != orig_kind) + { + ctf_err_warn (fp, 1, "added wrong kind to output mapping " + "for hash %s named %s: %p/%lx from %s is " + "kind %i, but newly-added %p/%lx from %s is " + "kind %i", hval, + decorated_name ? decorated_name : "(unnamed)", + (void *) foo, bar, + ctf_link_input_name (foo), + ctf_type_kind_unsliced (foo, bar), + (void *) input, type, + ctf_link_input_name (input), orig_kind); + if (!ctf_assert (fp, ctf_type_kind_unsliced (foo, bar) + == orig_kind)) + return -1; + } + } + if (err != ECTF_NEXT_END) + return ctf_set_errno (fp, err); + } +#endif + + /* This function will be repeatedly called for the same types many times: + don't waste time reinserting the same keys in that case. */ + if (!ctf_dynset_exists (type_ids, id, NULL) + && ctf_dynset_insert (type_ids, id) < 0) + return ctf_set_errno (fp, errno); + + /* The rest only needs to happen for types with names. */ + if (!decorated_name) + return 0; + + /* Count the number of occurrences of the hash value for this GID. */ + + hval = ctf_dynhash_lookup (d->cd_type_hashes, id); + + /* Mapping from name -> hash(hashval, count) not already present? */ + if ((name_counts = ctf_dynhash_lookup (d->cd_name_counts, + decorated_name)) == NULL) + { + if ((name_counts = ctf_dynhash_create (ctf_hash_string, + ctf_hash_eq_string, + NULL, NULL)) == NULL) + return ctf_set_errno (fp, errno); + if (ctf_dynhash_cinsert (d->cd_name_counts, decorated_name, + name_counts) < 0) + { + ctf_dynhash_destroy (name_counts); + return ctf_set_errno (fp, errno); + } + } + + /* This will, conveniently, return NULL (i.e. 0) for a new entry. */ + count = (long int) (uintptr_t) ctf_dynhash_lookup (name_counts, hval); + + if (ctf_dynhash_cinsert (name_counts, hval, + (const void *) (uintptr_t) (count + 1)) < 0) + return ctf_set_errno (fp, errno); + + return 0; +} + +/* Mark a single hash as corresponding to a conflicting type. Mark all types + that cite it as conflicting as well, terminating the recursive walk only when + types that are already conflicted or types do not cite other types are seen. + (Tagged structures and unions do not appear in the cd_citers graph, so the + walk also terminates there, since any reference to a conflicting structure is + just going to reference an unconflicting forward instead: see + ctf_dedup_maybe_synthesize_forward.) */ + +static int +ctf_dedup_mark_conflicting_hash (ctf_file_t *fp, const char *hval) +{ + ctf_dedup_t *d = &fp->ctf_dedup; + ctf_next_t *i = NULL; + int err; + const void *k; + ctf_dynset_t *citers; + + /* Mark conflicted if not already so marked. */ + if (ctf_dynset_exists (d->cd_conflicting_types, hval, NULL)) + return 0; + + ctf_dprintf ("Marking %s as conflicted\n", hval); + + if (ctf_dynset_cinsert (d->cd_conflicting_types, hval) < 0) + { + ctf_dprintf ("Out of memory marking %s as conflicted\n", hval); + ctf_set_errno (fp, errno); + return -1; + } + + /* If any types cite this type, mark them conflicted too. */ + if ((citers = ctf_dynhash_lookup (d->cd_citers, hval)) == NULL) + return 0; + + while ((err = ctf_dynset_cnext (citers, &i, &k)) == 0) + { + const char *hv = (const char *) k; + + if (ctf_dynset_exists (d->cd_conflicting_types, hv, NULL)) + continue; + + if (ctf_dedup_mark_conflicting_hash (fp, hv) < 0) + { + ctf_next_destroy (i); + return -1; /* errno is set for us. */ + } + } + if (err != ECTF_NEXT_END) + return ctf_set_errno (fp, err); + + return 0; +} + +/* Look up a type kind from the output mapping, given a type hash value. */ +static int +ctf_dedup_hash_kind (ctf_file_t *fp, ctf_file_t **inputs, const char *hash) +{ + ctf_dedup_t *d = &fp->ctf_dedup; + void *id; + ctf_dynset_t *type_ids; + + /* Precondition: the output mapping is populated. */ + if (!ctf_assert (fp, ctf_dynhash_elements (d->cd_output_mapping) > 0)) + return -1; + + /* Look up some GID from the output hash for this type. (They are all + identical, so we can pick any). Don't assert if someone calls this + function wrongly, but do assert if the output mapping knows about the hash, + but has nothing associated with it. */ + + type_ids = ctf_dynhash_lookup (d->cd_output_mapping, hash); + if (!type_ids) + { + ctf_dprintf ("Looked up type kind by nonexistent hash %s.\n", hash); + return ctf_set_errno (fp, ECTF_INTERNAL); + } + id = ctf_dynset_lookup_any (type_ids); + if (!ctf_assert (fp, id)) + return -1; + + return ctf_type_kind_unsliced (inputs[CTF_DEDUP_GID_TO_INPUT (id)], + CTF_DEDUP_GID_TO_TYPE (id)); +} + +/* Used to keep a count of types: i.e. distinct type hash values. */ +typedef struct ctf_dedup_type_counter +{ + ctf_file_t *fp; + ctf_file_t **inputs; + int num_non_forwards; +} ctf_dedup_type_counter_t; + +/* Add to the type counter for one name entry from the cd_name_counts. */ +static int +ctf_dedup_count_types (void *key_, void *value _libctf_unused_, void *arg_) +{ + const char *hval = (const char *) key_; + int kind; + ctf_dedup_type_counter_t *arg = (ctf_dedup_type_counter_t *) arg_; + + kind = ctf_dedup_hash_kind (arg->fp, arg->inputs, hval); + + /* We rely on ctf_dedup_hash_kind setting the fp to -ECTF_INTERNAL on error to + smuggle errors out of here. */ + + if (kind != CTF_K_FORWARD) + { + arg->num_non_forwards++; + ctf_dprintf ("Counting hash %s: kind %i: num_non_forwards is %i\n", + hval, kind, arg->num_non_forwards); + } + + /* We only need to know if there is more than one non-forward (an ambiguous + type): don't waste time iterating any more than needed to figure that + out. */ + + if (arg->num_non_forwards > 1) + return 1; + + return 0; +} + +/* Detect name ambiguity and mark ambiguous names as conflicting, other than the + most common. */ +static int +ctf_dedup_detect_name_ambiguity (ctf_file_t *fp, ctf_file_t **inputs) +{ + ctf_dedup_t *d = &fp->ctf_dedup; + ctf_next_t *i = NULL; + void *k; + void *v; + int err; + const char *erm; + + /* Go through cd_name_counts for all CTF namespaces in turn. */ + + while ((err = ctf_dynhash_next (d->cd_name_counts, &i, &k, &v)) == 0) + { + const char *decorated = (const char *) k; + ctf_dynhash_t *name_counts = (ctf_dynhash_t *) v; + ctf_next_t *j = NULL; + + /* If this is a forwardable kind or a forward (which we can tell without + consulting the type because its decorated name has a space as its + second character: see ctf_decorate_type_name), we are only interested + in whether this name has many hashes associated with it: any such name + is necessarily ambiguous, and types with that name are conflicting. + Once we know whether this is true, we can skip to the next name: so use + ctf_dynhash_iter_find for efficiency. */ + + if (decorated[0] != '\0' && decorated[1] == ' ') + { + ctf_dedup_type_counter_t counters = { fp, inputs, 0 }; + ctf_dynhash_t *counts = (ctf_dynhash_t *) v; + + ctf_dynhash_iter_find (counts, ctf_dedup_count_types, &counters); + + /* Check for assertion failure and pass it up. */ + if (ctf_errno (fp) == ECTF_INTERNAL) + goto assert_err; + + if (counters.num_non_forwards > 1) + { + const void *hval_; + + while ((err = ctf_dynhash_cnext (counts, &j, &hval_, NULL)) == 0) + { + const char *hval = (const char *) hval_; + ctf_dynset_t *type_ids; + void *id; + int kind; + + /* Dig through the types in this hash to find the non-forwards + and mark them ambiguous. */ + + type_ids = ctf_dynhash_lookup (d->cd_output_mapping, hval); + + /* Nonexistent? Must be a forward with no referent. */ + if (!type_ids) + continue; + + id = ctf_dynset_lookup_any (type_ids); + + kind = ctf_type_kind (inputs[CTF_DEDUP_GID_TO_INPUT (id)], + CTF_DEDUP_GID_TO_TYPE (id)); + + if (kind != CTF_K_FORWARD) + { + ctf_dprintf ("Marking %p, with hash %s, conflicting: one " + "of many non-forward GIDs for %s\n", id, + hval, (char *) k); + ctf_dedup_mark_conflicting_hash (fp, hval); + } + } + if (err != ECTF_NEXT_END) + { + erm = "marking conflicting structs/unions"; + goto iterr; + } + } + } + else + { + /* This is an ordinary type. Find the most common type with this + name, and mark it unconflicting: all others are conflicting. (We + cannot do this sort of popularity contest with forwardable types + because any forwards to that type would be immediately unified with + the most-popular type on insertion, and we want conflicting structs + et al to have all forwards left intact, so the user is notified + that this type is conflicting. TODO: improve this in future by + setting such forwards non-root-visible.) */ + + const void *key; + const void *count; + const char *hval; + long max_hcount = -1; + const char *max_hval = NULL; + + if (ctf_dynhash_elements (name_counts) <= 1) + continue; + + /* First find the most common. */ + while ((err = ctf_dynhash_cnext (name_counts, &j, &key, &count)) == 0) + { + hval = (const char *) key; + if ((long int) (uintptr_t) count > max_hcount) + { + max_hcount = (long int) (uintptr_t) count; + max_hval = hval; + } + } + if (err != ECTF_NEXT_END) + { + erm = "finding commonest conflicting type"; + goto iterr; + } + + /* Mark all the others as conflicting. */ + while ((err = ctf_dynhash_cnext (name_counts, &j, &key, NULL)) == 0) + { + hval = (const char *) key; + if (strcmp (max_hval, hval) == 0) + continue; + + ctf_dprintf ("Marking %s, an uncommon hash for %s, conflicting\n", + hval, (const char *) k); + if (ctf_dedup_mark_conflicting_hash (fp, hval) < 0) + { + erm = "marking hashes as conflicting"; + goto err; + } + } + if (err != ECTF_NEXT_END) + { + erm = "marking uncommon conflicting types"; + goto iterr; + } + } + } + if (err != ECTF_NEXT_END) + { + erm = "scanning for ambiguous names"; + goto iterr; + } + + return 0; + + err: + ctf_next_destroy (i); + ctf_err_warn (fp, 0, "%s: %s", erm, ctf_errmsg (ctf_errno (fp))); + return -1; + + iterr: + ctf_err_warn (fp, 0, "iteration failed %s: %s", erm, ctf_errmsg (err)); + return ctf_set_errno (fp, err); + + assert_err: + ctf_next_destroy (i); + return -1; /* errno is set for us. */ +} + +/* Initialize the deduplication machinery. */ + +static int +ctf_dedup_init (ctf_file_t *fp) +{ + ctf_dedup_t *d = &fp->ctf_dedup; + size_t i; + + if (ctf_dedup_atoms_init (fp) < 0) + goto oom; + +#if IDS_NEED_ALLOCATION + if ((d->cd_id_to_file_t = ctf_dynhash_create (ctf_hash_type_id_key, + ctf_hash_eq_type_id_key, + free, NULL)) == NULL) + goto oom; +#endif + + for (i = 0; i < 4; i++) + { + if ((d->cd_decorated_names[i] = ctf_dynhash_create (ctf_hash_string, + ctf_hash_eq_string, + NULL, NULL)) == NULL) + goto oom; + } + + if ((d->cd_name_counts + = ctf_dynhash_create (ctf_hash_string, + ctf_hash_eq_string, NULL, + (ctf_hash_free_fun) ctf_dynhash_destroy)) == NULL) + goto oom; + + if ((d->cd_type_hashes + = ctf_dynhash_create (ctf_hash_integer, + ctf_hash_eq_integer, + NULL, NULL)) == NULL) + goto oom; + + if ((d->cd_struct_origin + = ctf_dynhash_create (ctf_hash_string, + ctf_hash_eq_string, + NULL, NULL)) == NULL) + goto oom; + + if ((d->cd_citers + = ctf_dynhash_create (ctf_hash_string, + ctf_hash_eq_string, NULL, + (ctf_hash_free_fun) ctf_dynset_destroy)) == NULL) + goto oom; + + if ((d->cd_output_mapping + = ctf_dynhash_create (ctf_hash_string, + ctf_hash_eq_string, NULL, + (ctf_hash_free_fun) ctf_dynset_destroy)) == NULL) + goto oom; + + if ((d->cd_output_first_gid + = ctf_dynhash_create (ctf_hash_string, + ctf_hash_eq_string, + NULL, NULL)) == NULL) + goto oom; + +#ifdef ENABLE_LIBCTF_HASH_DEBUGGING + if ((d->cd_output_mapping_guard + = ctf_dynhash_create (ctf_hash_integer, + ctf_hash_eq_integer, NULL, NULL)) == NULL) + goto oom; +#endif + + if ((d->cd_emission_struct_members + = ctf_dynhash_create (ctf_hash_integer, + ctf_hash_eq_integer, + NULL, NULL)) == NULL) + goto oom; + + if ((d->cd_conflicting_types + = ctf_dynset_create (htab_hash_string, + ctf_dynset_eq_string, NULL)) == NULL) + goto oom; + + return 0; + + oom: + ctf_err_warn (fp, 0, "ctf_dedup_init: cannot initialize: " + "out of memory."); + return ctf_set_errno (fp, ENOMEM); +} + +void +ctf_dedup_fini (ctf_file_t *fp, ctf_file_t **outputs, uint32_t noutputs) +{ + ctf_dedup_t *d = &fp->ctf_dedup; + size_t i; + + /* ctf_dedup_atoms is kept across links. */ +#if IDS_NEED_ALLOCATION + ctf_dynhash_destroy (d->cd_id_to_file_t); +#endif + for (i = 0; i < 4; i++) + ctf_dynhash_destroy (d->cd_decorated_names[i]); + ctf_dynhash_destroy (d->cd_name_counts); + ctf_dynhash_destroy (d->cd_type_hashes); + ctf_dynhash_destroy (d->cd_struct_origin); + ctf_dynhash_destroy (d->cd_citers); + ctf_dynhash_destroy (d->cd_output_mapping); + ctf_dynhash_destroy (d->cd_output_first_gid); +#ifdef ENABLE_LIBCTF_HASH_DEBUGGING + ctf_dynhash_destroy (d->cd_output_mapping_guard); +#endif + ctf_dynhash_destroy (d->cd_emission_struct_members); + ctf_dynset_destroy (d->cd_conflicting_types); + + /* Free the per-output state. */ + if (outputs) + { + for (i = 0; i < noutputs; i++) + { + ctf_dedup_t *od = &outputs[i]->ctf_dedup; + ctf_dynhash_destroy (od->cd_output_emission_hashes); + ctf_dynhash_destroy (od->cd_output_emission_conflicted_forwards); + ctf_file_close (od->cd_output); + } + } + memset (d, 0, sizeof (ctf_dedup_t)); +} + +/* Return 1 if this type is cited by multiple input dictionaries. */ + +static int +ctf_dedup_multiple_input_dicts (ctf_file_t *output, ctf_file_t **inputs, + const char *hval) +{ + ctf_dedup_t *d = &output->ctf_dedup; + ctf_dynset_t *type_ids; + ctf_next_t *i = NULL; + void *id; + ctf_file_t *found = NULL, *relative_found = NULL; + const char *type_id; + ctf_file_t *input_fp; + ctf_id_t input_id; + const char *name; + const char *decorated; + int fwdkind; + int multiple = 0; + int err; + + type_ids = ctf_dynhash_lookup (d->cd_output_mapping, hval); + if (!ctf_assert (output, type_ids)) + return -1; + + /* Scan across the IDs until we find proof that two disjoint dictionaries + are referenced. Exit as soon as possible. Optimization opportunity, but + possibly not worth it, given that this is only executed in + CTF_LINK_SHARE_DUPLICATED mode. */ + + while ((err = ctf_dynset_next (type_ids, &i, &id)) == 0) + { + ctf_file_t *fp = inputs[CTF_DEDUP_GID_TO_INPUT (id)]; + + if (fp == found || fp == relative_found) + continue; + + if (!found) + { + found = fp; + continue; + } + + if (!relative_found + && (fp->ctf_parent == found || found->ctf_parent == fp)) + { + relative_found = fp; + continue; + } + + multiple = 1; + ctf_next_destroy (i); + break; + } + if ((err != ECTF_NEXT_END) && (err != 0)) + { + ctf_err_warn (output, 0, "propagating conflictedness: %s", + ctf_errmsg (err)); + ctf_set_errno (output, err); + return -1; + } + + if (multiple) + return multiple; + + /* This type itself does not appear in multiple input dicts: how about another + related type with the same name (e.g. a forward if this is a struct, + etc). */ + + type_id = ctf_dynset_lookup_any (type_ids); + if (!ctf_assert (output, type_id)) + return -1; + + input_fp = inputs[CTF_DEDUP_GID_TO_INPUT (type_id)]; + input_id = CTF_DEDUP_GID_TO_TYPE (type_id); + fwdkind = ctf_type_kind_forwarded (input_fp, input_id); + name = ctf_type_name_raw (input_fp, input_id); + + if ((fwdkind == CTF_K_STRUCT || fwdkind == CTF_K_UNION) + && name && name[0] != '\0') + { + const void *origin; + + if ((decorated = ctf_decorate_type_name (output, name, + fwdkind)) == NULL) + return -1; /* errno is set for us. */ + + origin = ctf_dynhash_lookup (d->cd_struct_origin, decorated); + if ((origin != NULL) && (CTF_DEDUP_GID_TO_INPUT (origin) < 0)) + multiple = 1; + } + + return multiple; +} + +/* Demote unconflicting types which reference only one input, or which reference + two inputs where one input is the parent of the other, into conflicting + types. Only used if the link mode is CTF_LINK_SHARE_DUPLICATED. */ + +static int +ctf_dedup_conflictify_unshared (ctf_file_t *output, ctf_file_t **inputs) +{ + ctf_dedup_t *d = &output->ctf_dedup; + ctf_next_t *i = NULL; + int err; + const void *k; + ctf_dynset_t *to_mark = NULL; + + if ((to_mark = ctf_dynset_create (htab_hash_string, ctf_dynset_eq_string, + NULL)) == NULL) + goto err_no; + + while ((err = ctf_dynhash_cnext (d->cd_output_mapping, &i, &k, NULL)) == 0) + { + const char *hval = (const char *) k; + int conflicting; + + /* Types referenced by only one dict, with no type appearing under that + name elsewhere, are marked conflicting. */ + + conflicting = !ctf_dedup_multiple_input_dicts (output, inputs, hval); + + if (conflicting < 0) + goto err; /* errno is set for us. */ + + if (conflicting) + if (ctf_dynset_cinsert (to_mark, hval) < 0) + goto err; + } + if (err != ECTF_NEXT_END) + goto iterr; + + while ((err = ctf_dynset_cnext (to_mark, &i, &k)) == 0) + { + const char *hval = (const char *) k; + + if (ctf_dedup_mark_conflicting_hash (output, hval) < 0) + goto err; + } + if (err != ECTF_NEXT_END) + goto iterr; + + ctf_dynset_destroy (to_mark); + + return 0; + + err_no: + ctf_set_errno (output, errno); + err: + err = ctf_errno (output); + ctf_next_destroy (i); + iterr: + ctf_set_errno (output, err); + ctf_dynset_destroy (to_mark); + ctf_err_warn (output, 0, "conflictifying unshared types: %s", + ctf_errmsg (ctf_errno (output))); + return -1; +} + +/* The core deduplicator. Populate cd_output_mapping in the output ctf_dedup + with a mapping of all types that belong in this dictionary and where they + come from, and cd_conflicting_types with an indication of whether each type + is conflicted or not. OUTPUT is the top-level output: INPUTS is the array of + input dicts; NINPUTS is the size of that array; PARENTS is an NINPUTS-element + array with each element corresponding to a input which is a child dict set to + the number in the INPUTS array of that input's parent. + + If CU_MAPPED is set, this is a first pass for a link with a non-empty CU + mapping: only one output will result. + + Only deduplicates: does not emit the types into the output. Call + ctf_dedup_emit afterwards to do that. */ + +int +ctf_dedup (ctf_file_t *output, ctf_file_t **inputs, uint32_t ninputs, + uint32_t *parents, int cu_mapped) +{ + ctf_dedup_t *d = &output->ctf_dedup; + size_t i; + ctf_next_t *it = NULL; + + for (i = 0; i < ninputs; i++) + ctf_dprintf ("Input %i: %s\n", (int) i, ctf_link_input_name (inputs[i])); + + if (ctf_dedup_init (output) < 0) + return -1; /* errno is set for us. */ + + /* Some flags do not apply when CU-mapping: this is not a duplicated link, + because there is only one output and we really don't want to end up marking + all nonconflicting but appears-only-once types as conflicting (which in the + CU-mapped link means we'd mark them all as non-root-visible!). */ + d->cd_link_flags = output->ctf_link_flags; + if (cu_mapped) + d->cd_link_flags &= ~(CTF_LINK_SHARE_DUPLICATED); + + /* Compute hash values for all types, recursively, treating child structures + and unions equivalent to forwards, and hashing in the name of the referent + of each such type into structures, unions, and non-opaque forwards. + Populate a mapping from decorated name (including an indication of + struct/union/enum namespace) to count of type hash values in + cd_name_counts, a mapping from and a mapping from hash values to input type + IDs in cd_output_mapping. */ + + ctf_dprintf ("Computing type hashes\n"); + for (i = 0; i < ninputs; i++) + { + ctf_id_t id; + + while ((id = ctf_type_next (inputs[i], &it, NULL, 1)) != CTF_ERR) + { + ctf_dedup_hash_type (output, inputs[i], inputs, parents, + i, id, 0, 0, ctf_dedup_populate_mappings); + } + if (ctf_errno (inputs[i]) != ECTF_NEXT_END) + { + ctf_err_warn (output, 0, "iteration failure computing type " + "hashes: %s", ctf_errmsg (ctf_errno (inputs[i]))); + return ctf_set_errno (output, ctf_errno (inputs[i])); + } + } + + /* Go through the cd_name_counts name->hash->count mapping for all CTF + namespaces: any name with many hashes associated with it at this stage is + necessarily ambiguous. Mark all the hashes except the most common as + conflicting in the output. */ + + ctf_dprintf ("Detecting type name ambiguity\n"); + if (ctf_dedup_detect_name_ambiguity (output, inputs) < 0) + return -1; /* errno is set for us. */ + + /* If the link mode is CTF_LINK_SHARE_DUPLICATED, we change any unconflicting + types whose output mapping references only one input dict into a + conflicting type, so that they end up in the per-CU dictionaries. */ + + if (d->cd_link_flags & CTF_LINK_SHARE_DUPLICATED) + { + ctf_dprintf ("Conflictifying unshared types\n"); + if (ctf_dedup_conflictify_unshared (output, inputs) < 0) + return -1; /* errno is set for us. */ + } + return 0; +} + +static int +ctf_dedup_rwalk_output_mapping (ctf_file_t *output, ctf_file_t **inputs, + uint32_t ninputs, uint32_t *parents, + ctf_dynset_t *already_visited, + const char *hval, + int (*visit_fun) (const char *hval, + ctf_file_t *output, + ctf_file_t **inputs, + uint32_t ninputs, + uint32_t *parents, + int already_visited, + ctf_file_t *input, + ctf_id_t type, + void *id, + int depth, + void *arg), + void *arg, unsigned long depth); + +/* Like ctf_dedup_rwalk_output_mapping (which see), only takes a single target + type and visits it. */ +static int +ctf_dedup_rwalk_one_output_mapping (ctf_file_t *output, + ctf_file_t **inputs, uint32_t ninputs, + uint32_t *parents, + ctf_dynset_t *already_visited, + int visited, void *type_id, + const char *hval, + int (*visit_fun) (const char *hval, + ctf_file_t *output, + ctf_file_t **inputs, + uint32_t ninputs, + uint32_t *parents, + int already_visited, + ctf_file_t *input, + ctf_id_t type, + void *id, + int depth, + void *arg), + void *arg, unsigned long depth) +{ + ctf_dedup_t *d = &output->ctf_dedup; + ctf_file_t *fp; + int input_num; + ctf_id_t type; + int ret; + const char *whaterr; + + input_num = CTF_DEDUP_GID_TO_INPUT (type_id); + fp = inputs[input_num]; + type = CTF_DEDUP_GID_TO_TYPE (type_id); + + ctf_dprintf ("%lu: Starting walk over type %s, %i/%lx (%p), from %s, " + "kind %i\n", depth, hval, input_num, type, (void *) fp, + ctf_link_input_name (fp), ctf_type_kind_unsliced (fp, type)); + + /* Get the single call we do if this type has already been visited out of the + way. */ + if (visited) + return visit_fun (hval, output, inputs, ninputs, parents, visited, fp, + type, type_id, depth, arg); + + /* This macro is really ugly, but the alternative is repeating this code many + times, which is worse. */ + +#define CTF_TYPE_WALK(type, errlabel, errmsg) \ + do { \ + void *type_id; \ + const char *hashval; \ + int cited_type_input_num = input_num; \ + \ + if ((fp->ctf_flags & LCTF_CHILD) && (LCTF_TYPE_ISPARENT (fp, type))) \ + cited_type_input_num = parents[input_num]; \ + \ + type_id = CTF_DEDUP_GID (output, cited_type_input_num, type); \ + \ + if (type == 0) \ + { \ + ctf_dprintf ("Walking: unimplemented type\n"); \ + break; \ + } \ + \ + ctf_dprintf ("Looking up ID %i/%lx in type hashes\n", \ + cited_type_input_num, type); \ + hashval = ctf_dynhash_lookup (d->cd_type_hashes, type_id); \ + if (!ctf_assert (output, hashval)) \ + { \ + whaterr = "looking up ID in type hashes"; \ + goto errlabel; \ + } \ + ctf_dprintf ("ID %i/%lx has hash %s\n", cited_type_input_num, type, \ + hashval); \ + \ + ret = ctf_dedup_rwalk_output_mapping (output, inputs, ninputs, parents, \ + already_visited, hashval, \ + visit_fun, arg, depth); \ + if (ret < 0) \ + { \ + whaterr = errmsg; \ + goto errlabel; \ + } \ + } while (0) + + switch (ctf_type_kind_unsliced (fp, type)) + { + case CTF_K_UNKNOWN: + /* Just skip things of unknown kind. */ + return 0; + case CTF_K_FORWARD: + case CTF_K_INTEGER: + case CTF_K_FLOAT: + case CTF_K_ENUM: + /* No types referenced. */ + break; + + case CTF_K_TYPEDEF: + case CTF_K_VOLATILE: + case CTF_K_CONST: + case CTF_K_RESTRICT: + case CTF_K_POINTER: + case CTF_K_SLICE: + CTF_TYPE_WALK (ctf_type_reference (fp, type), err, + "Referenced type walk"); + break; + + case CTF_K_ARRAY: + { + ctf_arinfo_t ar; + + if (ctf_array_info (fp, type, &ar) < 0) + { + whaterr = "array info lookup"; + goto err_msg; + } + + CTF_TYPE_WALK (ar.ctr_contents, err, "Array contents type walk"); + CTF_TYPE_WALK (ar.ctr_index, err, "Array index type walk"); + break; + } + + case CTF_K_FUNCTION: + { + ctf_funcinfo_t fi; + ctf_id_t *args; + uint32_t j; + + if (ctf_func_type_info (fp, type, &fi) < 0) + { + whaterr = "func type info lookup"; + goto err_msg; + } + + CTF_TYPE_WALK (fi.ctc_return, err, "Func return type walk"); + + if ((args = calloc (fi.ctc_argc, sizeof (ctf_id_t))) == NULL) + { + whaterr = "memory allocation"; + goto err_msg; + } + + if (ctf_func_type_args (fp, type, fi.ctc_argc, args) < 0) + { + whaterr = "func arg type lookup"; + free (args); + goto err_msg; + } + + for (j = 0; j < fi.ctc_argc; j++) + CTF_TYPE_WALK (args[j], err_free_args, "Func arg type walk"); + free (args); + break; + + err_free_args: + free (args); + goto err; + } + case CTF_K_STRUCT: + case CTF_K_UNION: + /* We do not recursively traverse the members of structures: they are + emitted later, in a separate pass. */ + break; + default: + whaterr = "unknown type kind"; + goto err; + } + + return visit_fun (hval, output, inputs, ninputs, parents, visited, fp, type, + type_id, depth, arg); + + err_msg: + ctf_set_errno (output, ctf_errno (fp)); + ctf_err_warn (fp, 0, "%s during type walking in %s at ID %lx: %s", whaterr, + ctf_link_input_name (fp), type, ctf_errmsg (ctf_errno (fp))); + err: + return -1; +} +/* Recursively traverse the output mapping, and do something with each type + visited, from leaves to root. VISIT_FUN, called as recursion unwinds, + returns a negative error code or zero. Type hashes may be visited more than + once, but are not recursed through repeatedly: ALREADY_VISITED tracks whether + types have already been visited. */ +static int +ctf_dedup_rwalk_output_mapping (ctf_file_t *output, ctf_file_t **inputs, + uint32_t ninputs, uint32_t *parents, + ctf_dynset_t *already_visited, + const char *hval, + int (*visit_fun) (const char *hval, + ctf_file_t *output, + ctf_file_t **inputs, + uint32_t ninputs, + uint32_t *parents, + int already_visited, + ctf_file_t *input, + ctf_id_t type, + void *id, + int depth, + void *arg), + void *arg, unsigned long depth) +{ + ctf_dedup_t *d = &output->ctf_dedup; + ctf_next_t *i = NULL; + int err; + int visited = 1; + ctf_dynset_t *type_ids; + void *id; + + depth++; + + type_ids = ctf_dynhash_lookup (d->cd_output_mapping, hval); + if (!type_ids) + { + ctf_err_warn (output, 0, "looked up type kind by nonexistent " + "hash %s.", hval); + return ctf_set_errno (output, ECTF_INTERNAL); + } + + /* Have we seen this type before? */ + + if (!ctf_dynset_exists (already_visited, hval, NULL)) + { + /* Mark as already-visited immediately, to eliminate the possibility of + cycles: but remember we have not actually visited it yet for the + upcoming call to the visit_fun. (All our callers handle cycles + properly themselves, so we can just abort them aggressively as soon as + we find ourselves in one.) */ + + visited = 0; + if (ctf_dynset_cinsert (already_visited, hval) < 0) + { + ctf_err_warn (output, 0, "out of memory tracking already-visited " + "types."); + return ctf_set_errno (output, ENOMEM); + } + } + + /* If this type is marked conflicted, traverse members and call + ctf_dedup_rwalk_output_mapping_once on all the unique ones: otherwise, just + pick a random one and use it. */ + + if (!ctf_dynset_exists (d->cd_conflicting_types, hval, NULL)) + { + id = ctf_dynset_lookup_any (type_ids); + if (!ctf_assert (output, id)) + return -1; + + return ctf_dedup_rwalk_one_output_mapping (output, inputs, ninputs, + parents, already_visited, + visited, id, hval, visit_fun, + arg, depth); + } + + while ((err = ctf_dynset_next (type_ids, &i, &id)) == 0) + { + int ret; + + ret = ctf_dedup_rwalk_one_output_mapping (output, inputs, ninputs, + parents, already_visited, + visited, id, hval, + visit_fun, arg, depth); + if (ret < 0) + { + ctf_next_destroy (i); + return ret; /* errno is set for us. */ + } + } + if (err != ECTF_NEXT_END) + { + ctf_err_warn (output, 0, "walking many types with one hash: %s", + ctf_errmsg (err)); + return ctf_set_errno (output, err); + } + + return 0; +} + +typedef struct ctf_sort_om_cb_arg +{ + ctf_file_t **inputs; + uint32_t ninputs; + ctf_dedup_t *d; +} ctf_sort_om_cb_arg_t; + +/* Sort the output mapping into order: types first appearing in earlier inputs + first, parents preceding children: if types first appear in the same input, + sort those with earlier ctf_id_t's first. */ +static int +sort_output_mapping (const ctf_next_hkv_t *one, const ctf_next_hkv_t *two, + void *arg_) +{ + ctf_sort_om_cb_arg_t *arg = (ctf_sort_om_cb_arg_t *) arg_; + ctf_dedup_t *d = arg->d; + const char *one_hval = (const char *) one->hkv_key; + const char *two_hval = (const char *) two->hkv_key; + void *one_gid, *two_gid; + uint32_t one_ninput; + uint32_t two_ninput; + ctf_file_t *one_fp; + ctf_file_t *two_fp; + ctf_id_t one_type; + ctf_id_t two_type; + + one_gid = ctf_dynhash_lookup (d->cd_output_first_gid, one_hval); + two_gid = ctf_dynhash_lookup (d->cd_output_first_gid, two_hval); + + one_ninput = CTF_DEDUP_GID_TO_INPUT (one_gid); + two_ninput = CTF_DEDUP_GID_TO_INPUT (two_gid); + + one_type = CTF_DEDUP_GID_TO_TYPE (one_gid); + two_type = CTF_DEDUP_GID_TO_TYPE (two_gid); + + /* It's kind of hard to smuggle an assertion failure out of here. */ + assert (one_ninput < arg->ninputs && two_ninput < arg->ninputs); + + one_fp = arg->inputs[one_ninput]; + two_fp = arg->inputs[two_ninput]; + + /* Parents before children. */ + + if (!(one_fp->ctf_flags & LCTF_CHILD) + && (two_fp->ctf_flags & LCTF_CHILD)) + return -1; + else if ((one_fp->ctf_flags & LCTF_CHILD) + && !(two_fp->ctf_flags & LCTF_CHILD)) + return 1; + + /* ninput order, types appearing in earlier TUs first. */ + + if (one_ninput < two_ninput) + return -1; + else if (two_ninput < one_ninput) + return 1; + + /* Same TU. Earliest ctf_id_t first. They cannot be the same. */ + + assert (one_type != two_type); + if (one_type < two_type) + return -1; + else + return 1; +} + +/* The public entry point to ctf_dedup_rwalk_output_mapping, above. */ +static int +ctf_dedup_walk_output_mapping (ctf_file_t *output, ctf_file_t **inputs, + uint32_t ninputs, uint32_t *parents, + int (*visit_fun) (const char *hval, + ctf_file_t *output, + ctf_file_t **inputs, + uint32_t ninputs, + uint32_t *parents, + int already_visited, + ctf_file_t *input, + ctf_id_t type, + void *id, + int depth, + void *arg), + void *arg) +{ + ctf_dynset_t *already_visited; + ctf_next_t *i = NULL; + ctf_sort_om_cb_arg_t sort_arg; + int err; + void *k; + + if ((already_visited = ctf_dynset_create (htab_hash_string, + ctf_dynset_eq_string, + NULL)) == NULL) + return ctf_set_errno (output, ENOMEM); + + sort_arg.inputs = inputs; + sort_arg.ninputs = ninputs; + sort_arg.d = &output->ctf_dedup; + + while ((err = ctf_dynhash_next_sorted (output->ctf_dedup.cd_output_mapping, + &i, &k, NULL, sort_output_mapping, + &sort_arg)) == 0) + { + const char *hval = (const char *) k; + + err = ctf_dedup_rwalk_output_mapping (output, inputs, ninputs, parents, + already_visited, hval, visit_fun, + arg, 0); + if (err < 0) + { + ctf_next_destroy (i); + goto err; /* errno is set for us. */ + } + } + if (err != ECTF_NEXT_END) + { + ctf_err_warn (output, 0, "recursing over output mapping: %s", + ctf_errmsg (err)); + ctf_set_errno (output, err); + goto err; + } + ctf_dynset_destroy (already_visited); + + return 0; + err: + ctf_dynset_destroy (already_visited); + return -1; +} + +/* Possibly synthesise a synthetic forward in TARGET to subsitute for a + conflicted per-TU type ID in INPUT with hash HVAL. Return its CTF ID, or 0 + if none was needed. */ +static ctf_id_t +ctf_dedup_maybe_synthesize_forward (ctf_file_t *output, ctf_file_t *target, + ctf_file_t *input, ctf_id_t id, + const char *hval) +{ + ctf_dedup_t *od = &output->ctf_dedup; + ctf_dedup_t *td = &target->ctf_dedup; + int kind; + int fwdkind; + const char *name; + const char *decorated; + void *v; + ctf_id_t emitted_forward; + + if (!ctf_dynset_exists (od->cd_conflicting_types, hval, NULL) + || target->ctf_flags & LCTF_CHILD + || !ctf_type_name_raw (input, id) + || (((kind = ctf_type_kind_unsliced (input, id)) != CTF_K_STRUCT + && kind != CTF_K_UNION && kind != CTF_K_FORWARD))) + return 0; + + fwdkind = ctf_type_kind_forwarded (input, id); + name = ctf_type_name_raw (input, id); + + ctf_dprintf ("Using synthetic forward for conflicted struct/union with " + "hval %s\n", hval); + + if (!ctf_assert (output, name)) + return CTF_ERR; + + if ((decorated = ctf_decorate_type_name (output, name, fwdkind)) == NULL) + return CTF_ERR; + + if (!ctf_dynhash_lookup_kv (td->cd_output_emission_conflicted_forwards, + decorated, NULL, &v)) + { + if ((emitted_forward = ctf_add_forward (target, CTF_ADD_ROOT, name, + fwdkind)) == CTF_ERR) + { + ctf_set_errno (output, ctf_errno (target)); + return CTF_ERR; + } + + if (ctf_dynhash_cinsert (td->cd_output_emission_conflicted_forwards, + decorated, (void *) (uintptr_t) + emitted_forward) < 0) + { + ctf_set_errno (output, ENOMEM); + return CTF_ERR; + } + } + else + emitted_forward = (ctf_id_t) (uintptr_t) v; + + ctf_dprintf ("Cross-TU conflicted struct: passing back forward, %lx\n", + emitted_forward); + + return emitted_forward; +} + +/* Map a GID in some INPUT dict, in the form of an input number and a ctf_id_t, + into a GID in a target output dict. If it returns 0, this is the + unimplemented type, and the input type must have been 0. The OUTPUT dict is + assumed to be the parent of the TARGET, if it is not the TARGET itself. + + Returns CTF_ERR on failure. Responds to an incoming CTF_ERR as an 'id' by + returning CTF_ERR, to simplify callers. Errors are always propagated to the + input, even if they relate to the target, for the same reason. (Target + errors are expected to be very rare.) + + If the type in question is a citation of a conflicted type in a different TU, + emit a forward of the right type in its place (if not already emitted), and + record that forward in cd_output_emission_conflicted_forwards. This avoids + the need to replicate the entire type graph below this point in the current + TU (an appalling waste of space). + + TODO: maybe replace forwards in the same TU with their referents? Might + make usability a bit better. */ + +static ctf_id_t +ctf_dedup_id_to_target (ctf_file_t *output, ctf_file_t *target, + ctf_file_t **inputs, uint32_t ninputs, + uint32_t *parents, ctf_file_t *input, int input_num, + ctf_id_t id) +{ + ctf_dedup_t *od = &output->ctf_dedup; + ctf_dedup_t *td = &target->ctf_dedup; + ctf_file_t *err_fp = input; + const char *hval; + void *target_id; + ctf_id_t emitted_forward; + + /* The target type of an error is an error. */ + if (id == CTF_ERR) + return CTF_ERR; + + /* The unimplemented type's ID never changes. */ + if (!id) + { + ctf_dprintf ("%i/%lx: unimplemented type\n", input_num, id); + return 0; + } + + ctf_dprintf ("Mapping %i/%lx to target %p (%s)\n", input_num, + id, (void *) target, ctf_link_input_name (target)); + + /* If the input type is in the parent type space, and this is a child, reset + the input to the parent (which must already have been emitted, since + emission of parent dicts happens before children). */ + if ((input->ctf_flags & LCTF_CHILD) && (LCTF_TYPE_ISPARENT (input, id))) + { + if (!ctf_assert (output, parents[input_num] <= ninputs)) + return -1; + input = inputs[parents[input_num]]; + input_num = parents[input_num]; + } + + hval = ctf_dynhash_lookup (od->cd_type_hashes, + CTF_DEDUP_GID (output, input_num, id)); + + if (!ctf_assert (output, hval && td->cd_output_emission_hashes)) + return -1; + + /* If this type is a conflicted tagged structure, union, or forward, + substitute a synthetic forward instead, emitting it if need be. Only do + this if the target is in the parent dict: if it's in the child dict, we can + just point straight at the thing itself. Of course, we might be looking in + the child dict right now and not find it and have to look in the parent, so + we have to do this check twice. */ + + emitted_forward = ctf_dedup_maybe_synthesize_forward (output, target, + input, id, hval); + switch (emitted_forward) + { + case 0: /* No forward needed. */ + break; + case -1: + ctf_set_errno (err_fp, ctf_errno (output)); + ctf_err_warn (err_fp, 0, "adding synthetic forward for type %i/%lx: " + "%s", input_num, id, ctf_errmsg (ctf_errno (err_fp))); + return -1; + default: + return emitted_forward; + } + + ctf_dprintf ("Looking up %i/%lx, hash %s, in target\n", input_num, id, hval); + + target_id = ctf_dynhash_lookup (td->cd_output_emission_hashes, hval); + if (!target_id) + { + /* Must be in the parent, so this must be a child, and they must not be + the same dict. */ + ctf_dprintf ("Checking shared parent for target\n"); + if (!ctf_assert (output, (target != output) + && (target->ctf_flags & LCTF_CHILD))) + return -1; + + target_id = ctf_dynhash_lookup (od->cd_output_emission_hashes, hval); + + emitted_forward = ctf_dedup_maybe_synthesize_forward (output, output, + input, id, hval); + switch (emitted_forward) + { + case 0: /* No forward needed. */ + break; + case -1: + ctf_set_errno (err_fp, ctf_errno (output)); + ctf_err_warn (err_fp, 0, "adding synthetic forward for type %i/%lx: " + "%s", input_num, id, ctf_errmsg (ctf_errno (err_fp))); + return -1; + default: + return emitted_forward; + } + } + if (!ctf_assert (output, target_id)) + return -1; + return (ctf_id_t) (uintptr_t) target_id; +} + +/* Emit a single deduplicated TYPE with the given HVAL, located in a given + INPUT, with the given (G)ID, into the shared OUTPUT or a + possibly-newly-created per-CU dict. All the types this type depends upon + have already been emitted. (This type itself may also have been emitted.) + + If the ARG is 1, this is a CU-mapped deduplication round mapping many + ctf_file_t's into precisely one: conflicting types should be marked + non-root-visible. If the ARG is 0, conflicting types go into per-CU + dictionaries stored in the input's ctf_dedup.cd_output: otherwise, everything + is emitted directly into the output. No struct/union members are emitted. + + Optimization opportunity: trace the ancestry of non-root-visible types and + elide all that neither have a root-visible type somewhere towards their root, + nor have the type visible via any other route (the function info section, + data object section, backtrace section etc). */ + +static int +ctf_dedup_emit_type (const char *hval, ctf_file_t *output, ctf_file_t **inputs, + uint32_t ninputs, uint32_t *parents, int already_visited, + ctf_file_t *input, ctf_id_t type, void *id, int depth, + void *arg) +{ + ctf_dedup_t *d = &output->ctf_dedup; + int kind = ctf_type_kind_unsliced (input, type); + const char *name; + ctf_file_t *target = output; + ctf_file_t *real_input; + const ctf_type_t *tp; + int input_num = CTF_DEDUP_GID_TO_INPUT (id); + int output_num = (uint32_t) -1; /* 'shared' */ + int cu_mapped = *(int *)arg; + int isroot = 1; + int is_conflicting; + + ctf_next_t *i = NULL; + ctf_id_t new_type; + ctf_id_t ref; + ctf_id_t maybe_dup = 0; + ctf_encoding_t ep; + const char *erm; + int emission_hashed = 0; + + /* We don't want to re-emit something we've already emitted. */ + + if (already_visited) + return 0; + + ctf_dprintf ("%i: Emitting type with hash %s from %s: determining target\n", + depth, hval, ctf_link_input_name (input)); + + /* Conflicting types go into a per-CU output dictionary, unless this is a + CU-mapped run. The import is not refcounted, since it goes into the + ctf_link_outputs dict of the output that is its parent. */ + is_conflicting = ctf_dynset_exists (d->cd_conflicting_types, hval, NULL); + + if (is_conflicting && !cu_mapped) + { + ctf_dprintf ("%i: Type %s in %i/%lx is conflicted: " + "inserting into per-CU target.\n", + depth, hval, input_num, type); + + if (input->ctf_dedup.cd_output) + target = input->ctf_dedup.cd_output; + else + { + int err; + + if ((target = ctf_create (&err)) == NULL) + { + ctf_err_warn (output, 0, "cannot create per-CU CTF archive " + "for CU %s: %s", ctf_link_input_name (input), + ctf_errmsg (err)); ctf_set_errno (output, err); + return -1; + } + + ctf_import_unref (target, output); + if (ctf_cuname (input) != NULL) + ctf_cuname_set (target, ctf_cuname (input)); + else + ctf_cuname_set (target, "unnamed-CU"); + ctf_parent_name_set (target, _CTF_SECTION); + + input->ctf_dedup.cd_output = target; + } + output_num = input_num; + } + + real_input = input; + if ((tp = ctf_lookup_by_id (&real_input, type)) == NULL) + { + ctf_err_warn (output, 0, "%s: lookup failure for type %lx: %s", + ctf_link_input_name (real_input), type, + ctf_errmsg (ctf_errno (input))); + ctf_set_errno (output, ctf_errno (input)); + return -1; /* errno is set for us. */ + } + + name = ctf_strraw (real_input, tp->ctt_name); + + /* Hide conflicting types, if we were asked to: also hide if a type with this + name already exists and is not a forward. */ + if (cu_mapped && is_conflicting) + isroot = 0; + else if (name + && (maybe_dup = ctf_lookup_by_rawname (target, kind, name)) != 0) + { + if (ctf_type_kind (target, maybe_dup) != CTF_K_FORWARD) + isroot = 0; + } + + ctf_dprintf ("%i: Emitting type with hash %s (%s), into target %i/%p\n", + depth, hval, name ? name : "", input_num, (void *) target); + + if (!target->ctf_dedup.cd_output_emission_hashes) + if ((target->ctf_dedup.cd_output_emission_hashes + = ctf_dynhash_create (ctf_hash_string, ctf_hash_eq_string, + NULL, NULL)) == NULL) + goto oom_hash; + + if (!target->ctf_dedup.cd_output_emission_conflicted_forwards) + if ((target->ctf_dedup.cd_output_emission_conflicted_forwards + = ctf_dynhash_create (ctf_hash_string, ctf_hash_eq_string, + NULL, NULL)) == NULL) + goto oom_hash; + + switch (kind) + { + case CTF_K_UNKNOWN: + /* These are types that CTF cannot encode, marked as such by the compile. + We intentionally do not re-emit these. */ + new_type = 0; + break; + case CTF_K_FORWARD: + /* This will do nothing if the type to which this forwards already exists, + and will be replaced with such a type if it appears later. */ + + erm = "forward"; + if ((new_type = ctf_add_forward (target, isroot, name, + ctf_type_kind_forwarded (input, type))) + == CTF_ERR) + goto err_target; + break; + + case CTF_K_FLOAT: + case CTF_K_INTEGER: + erm = "float/int"; + if (ctf_type_encoding (input, type, &ep) < 0) + goto err_input; /* errno is set for us. */ + if ((new_type = ctf_add_encoded (target, isroot, name, &ep, kind)) + == CTF_ERR) + goto err_target; + break; + + case CTF_K_ENUM: + { + int val; + erm = "enum"; + if ((new_type = ctf_add_enum (target, isroot, name)) == CTF_ERR) + goto err_input; /* errno is set for us. */ + + while ((name = ctf_enum_next (input, type, &i, &val)) != NULL) + { + if (ctf_add_enumerator (target, new_type, name, val) < 0) + { + ctf_err_warn (target, 0, "%s (%i): cannot add enumeration " + "value %s from input type %lx: %s", + ctf_link_input_name (input), input_num, name, + type, ctf_errmsg (ctf_errno (target))); + ctf_next_destroy (i); + return ctf_set_errno (output, ctf_errno (target)); + } + } + if (ctf_errno (input) != ECTF_NEXT_END) + goto err_input; + break; + } + + case CTF_K_TYPEDEF: + erm = "typedef"; + + ref = ctf_type_reference (input, type); + if ((ref = ctf_dedup_id_to_target (output, target, inputs, ninputs, + parents, input, input_num, + ref)) == CTF_ERR) + goto err_input; /* errno is set for us. */ + + if ((new_type = ctf_add_typedef (target, isroot, name, ref)) == CTF_ERR) + goto err_target; /* errno is set for us. */ + break; + + case CTF_K_VOLATILE: + case CTF_K_CONST: + case CTF_K_RESTRICT: + case CTF_K_POINTER: + erm = "pointer or cvr-qual"; + + ref = ctf_type_reference (input, type); + if ((ref = ctf_dedup_id_to_target (output, target, inputs, ninputs, + parents, input, input_num, + ref)) == CTF_ERR) + goto err_input; /* errno is set for us. */ + + if ((new_type = ctf_add_reftype (target, isroot, ref, kind)) == CTF_ERR) + goto err_target; /* errno is set for us. */ + break; + + case CTF_K_SLICE: + erm = "slice"; + + if (ctf_type_encoding (input, type, &ep) < 0) + goto err_input; /* errno is set for us. */ + + ref = ctf_type_reference (input, type); + if ((ref = ctf_dedup_id_to_target (output, target, inputs, ninputs, + parents, input, input_num, + ref)) == CTF_ERR) + goto err_input; + + if ((new_type = ctf_add_slice (target, isroot, ref, &ep)) == CTF_ERR) + goto err_target; + break; + + case CTF_K_ARRAY: + { + ctf_arinfo_t ar; + + erm = "array info"; + if (ctf_array_info (input, type, &ar) < 0) + goto err_input; + + ar.ctr_contents = ctf_dedup_id_to_target (output, target, inputs, + ninputs, parents, input, + input_num, ar.ctr_contents); + ar.ctr_index = ctf_dedup_id_to_target (output, target, inputs, ninputs, + parents, input, input_num, + ar.ctr_index); + + if (ar.ctr_contents == CTF_ERR || ar.ctr_index == CTF_ERR) + goto err_input; + + if ((new_type = ctf_add_array (target, isroot, &ar)) == CTF_ERR) + goto err_target; + + break; + } + + case CTF_K_FUNCTION: + { + ctf_funcinfo_t fi; + ctf_id_t *args; + uint32_t j; + + erm = "function"; + if (ctf_func_type_info (input, type, &fi) < 0) + goto err_input; + + fi.ctc_return = ctf_dedup_id_to_target (output, target, inputs, ninputs, + parents, input, input_num, + fi.ctc_return); + if (fi.ctc_return == CTF_ERR) + goto err_input; + + if ((args = calloc (fi.ctc_argc, sizeof (ctf_id_t))) == NULL) + { + ctf_set_errno (input, ENOMEM); + goto err_input; + } + + erm = "function args"; + if (ctf_func_type_args (input, type, fi.ctc_argc, args) < 0) + { + free (args); + goto err_input; + } + + for (j = 0; j < fi.ctc_argc; j++) + { + args[j] = ctf_dedup_id_to_target (output, target, inputs, ninputs, + parents, input, input_num, + args[j]); + if (args[j] == CTF_ERR) + goto err_input; + } + + if ((new_type = ctf_add_function (target, isroot, + &fi, args)) == CTF_ERR) + { + free (args); + goto err_target; + } + free (args); + break; + } + + case CTF_K_STRUCT: + case CTF_K_UNION: + { + size_t size = ctf_type_size (input, type); + void *out_id; + /* Insert the structure itself, so other types can refer to it. */ + + erm = "structure/union"; + if (kind == CTF_K_STRUCT) + new_type = ctf_add_struct_sized (target, isroot, name, size); + else + new_type = ctf_add_union_sized (target, isroot, name, size); + + if (new_type == CTF_ERR) + goto err_target; + + out_id = CTF_DEDUP_GID (output, output_num, new_type); + ctf_dprintf ("%i: Noting need to emit members of %p -> %p\n", depth, + id, out_id); + /* Record the need to emit the members of this structure later. */ + if (ctf_dynhash_insert (d->cd_emission_struct_members, id, out_id) < 0) + goto err_target; + break; + } + default: + ctf_err_warn (output, 0, "%s: unknown type kind for input type %lx", + ctf_link_input_name (input), type); + return -1; + } + + if (!emission_hashed + && new_type != 0 + && ctf_dynhash_cinsert (target->ctf_dedup.cd_output_emission_hashes, + hval, (void *) (uintptr_t) new_type) < 0) + { + ctf_err_warn (output, 0, "out of memory tracking deduplicated " + "global type IDs"); + return ctf_set_errno (output, ENOMEM); + } + + if (!emission_hashed && new_type != 0) + ctf_dprintf ("%i: Inserted %s, %i/%lx -> %lx into emission hash for " + "target %p (%s)\n", depth, hval, input_num, type, new_type, + (void *) target, ctf_link_input_name (target)); + + return 0; + + oom_hash: + ctf_err_warn (output, 0, "out of memory creating emission-tracking hashes"); + return ctf_set_errno (output, ENOMEM); + + err_input: + ctf_err_warn (output, 0, "%s (%i): while emitting deduplicated %s, error " + "getting input type %lx: %s", ctf_link_input_name (input), + input_num,erm, type, ctf_errmsg (ctf_errno (input))); + ctf_set_errno (output, ctf_errno (input)); + return -1; + err_target: + ctf_err_warn (output, 0, "%s (%i): while emitting deduplicated %s, error " + "emitting target type from input type %lx: %s", + ctf_link_input_name (input), input_num, erm, type, + ctf_errmsg (ctf_errno (target))); + ctf_set_errno (output, ctf_errno (target)); + return -1; +} + +/* Traverse the cd_emission_struct_members and emit the members of all + structures and unions. All other types are emitted and complete by this + point. */ + +static int +ctf_dedup_emit_struct_members (ctf_file_t *output, ctf_file_t **inputs, + uint32_t ninputs, uint32_t *parents) +{ + ctf_dedup_t *d = &output->ctf_dedup; + ctf_next_t *i = NULL; + void *input_id, *target_id; + int err; + ctf_file_t *err_fp, *input_fp; + int input_num; + ctf_id_t err_type; + + while ((err = ctf_dynhash_next (d->cd_emission_struct_members, &i, + &input_id, &target_id)) == 0) + { + ctf_next_t *j = NULL; + ctf_file_t *target; + uint32_t target_num; + ctf_id_t input_type, target_type; + ssize_t offset; + ctf_id_t membtype; + const char *name; + + input_num = CTF_DEDUP_GID_TO_INPUT (input_id); + input_fp = inputs[input_num]; + input_type = CTF_DEDUP_GID_TO_TYPE (input_id); + + /* The output is either -1 (for the shared, parent output dict) or the + number of the corresponding input. */ + target_num = CTF_DEDUP_GID_TO_INPUT (target_id); + if (target_num == (uint32_t) -1) + target = output; + else + { + target = inputs[target_num]->ctf_dedup.cd_output; + if (!ctf_assert (output, target)) + { + err_fp = output; + err_type = input_type; + goto err_target; + } + } + target_type = CTF_DEDUP_GID_TO_TYPE (target_id); + + while ((offset = ctf_member_next (input_fp, input_type, &j, &name, + &membtype)) >= 0) + { + err_fp = target; + err_type = target_type; + if ((membtype = ctf_dedup_id_to_target (output, target, inputs, + ninputs, parents, input_fp, + input_num, + membtype)) == CTF_ERR) + { + ctf_next_destroy (j); + goto err_target; + } + + if (name == NULL) + name = ""; +#ifdef ENABLE_LIBCTF_HASH_DEBUGGING + ctf_dprintf ("Emitting %s, offset %zi\n", name, offset); +#endif + if (ctf_add_member_offset (target, target_type, name, + membtype, offset) < 0) + { + ctf_next_destroy (j); + goto err_target; + } + } + if (ctf_errno (input_fp) != ECTF_NEXT_END) + { + err = ctf_errno (input_fp); + ctf_next_destroy (i); + goto iterr; + } + } + if (err != ECTF_NEXT_END) + goto iterr; + + return 0; + err_target: + ctf_next_destroy (i); + ctf_err_warn (output, 0, "%s (%i): error emitting members for structure " + "type %lx: %s", ctf_link_input_name (input_fp), input_num, + err_type, ctf_errmsg (ctf_errno (err_fp))); + ctf_set_errno (output, ctf_errno (err_fp)); + return -1; + iterr: + ctf_err_warn (output, 0, "iteration failure emitting structure members: %s", + ctf_errmsg (err)); + ctf_set_errno (output, err); + return -1; +} + +/* Populate the type mapping used by the types in one FP (which must be an input + dict containing a non-null cd_output resulting from a ctf_dedup_emit_type + walk). */ +static int +ctf_dedup_populate_type_mapping (ctf_file_t *shared, ctf_file_t *fp, + ctf_file_t **inputs) +{ + ctf_dedup_t *d = &shared->ctf_dedup; + ctf_file_t *output = fp->ctf_dedup.cd_output; + const void *k, *v; + ctf_next_t *i = NULL; + int err; + + /* The shared dict (the output) stores its types in the fp itself, not in a + separate cd_output dict. */ + if (shared == fp) + output = fp; + + /* There may be no types to emit at all, or all the types in this TU may be + shared. */ + if (!output || !output->ctf_dedup.cd_output_emission_hashes) + return 0; + + while ((err = ctf_dynhash_cnext (output->ctf_dedup.cd_output_emission_hashes, + &i, &k, &v)) == 0) + { + const char *hval = (const char *) k; + ctf_id_t id_out = (ctf_id_t) (uintptr_t) v; + ctf_next_t *j = NULL; + ctf_dynset_t *type_ids; + const void *id; + + type_ids = ctf_dynhash_lookup (d->cd_output_mapping, hval); + if (!ctf_assert (shared, type_ids)) + return -1; +#ifdef ENABLE_LIBCTF_HASH_DEBUGGING + ctf_dprintf ("Traversing emission hash: hval %s\n", hval); +#endif + + while ((err = ctf_dynset_cnext (type_ids, &j, &id)) == 0) + { + ctf_file_t *input = inputs[CTF_DEDUP_GID_TO_INPUT (id)]; + ctf_id_t id_in = CTF_DEDUP_GID_TO_TYPE (id); + +#ifdef ENABLE_LIBCTF_HASH_DEBUGGING + ctf_dprintf ("Adding mapping from %i/%lx to %lx\n", + CTF_DEDUP_GID_TO_INPUT (id), id_in, id_out); +#endif + ctf_add_type_mapping (input, id_in, output, id_out); + } + if (err != ECTF_NEXT_END) + { + ctf_next_destroy (i); + goto err; + } + } + if (err != ECTF_NEXT_END) + goto err; + + return 0; + + err: + ctf_err_warn (shared, 0, "iteration error populating the type mapping: %s", + ctf_errmsg (err)); + return ctf_set_errno (shared, err); +} + +/* Populate the type mapping machinery used by the rest of the linker, + by ctf_add_type, etc. */ +static int +ctf_dedup_populate_type_mappings (ctf_file_t *output, ctf_file_t **inputs, + uint32_t ninputs) +{ + size_t i; + + if (ctf_dedup_populate_type_mapping (output, output, inputs) < 0) + { + ctf_err_warn (output, 0, "cannot populate type mappings for shared " + "CTF dict: %s", ctf_errmsg (ctf_errno (output))); + return -1; /* errno is set for us. */ + } + + for (i = 0; i < ninputs; i++) + { + if (ctf_dedup_populate_type_mapping (output, inputs[i], inputs) < 0) + { + ctf_err_warn (output, 0, "cannot populate type mappings for per-CU " + "CTF dict: %s", ctf_errmsg (ctf_errno (inputs[i]))); + return ctf_set_errno (output, ctf_errno (inputs[i])); + } + } + + return 0; +} + +/* Emit deduplicated types into the outputs. The shared type repository is + OUTPUT, on which the ctf_dedup function must have already been called. The + PARENTS array contains the INPUTS index of the parent dict for every child + dict at the corresponding index in the INPUTS (for non-child dicts, the value + is undefined). + + Return an array of fps with content emitted into them (starting with OUTPUT, + which is the parent of all others, then all the newly-generated outputs). + + If CU_MAPPED is set, this is a first pass for a link with a non-empty CU + mapping: only one output will result. */ + +ctf_file_t ** +ctf_dedup_emit (ctf_file_t *output, ctf_file_t **inputs, uint32_t ninputs, + uint32_t *parents, uint32_t *noutputs, int cu_mapped) +{ + size_t num_outputs = 1; /* Always at least one output: us. */ + ctf_file_t **outputs; + ctf_file_t **walk; + size_t i; + + ctf_dprintf ("Triggering emission.\n"); + if (ctf_dedup_walk_output_mapping (output, inputs, ninputs, parents, + ctf_dedup_emit_type, &cu_mapped) < 0) + return NULL; /* errno is set for us. */ + + ctf_dprintf ("Populating struct members.\n"); + if (ctf_dedup_emit_struct_members (output, inputs, ninputs, parents) < 0) + return NULL; /* errno is set for us. */ + + if (ctf_dedup_populate_type_mappings (output, inputs, ninputs) < 0) + return NULL; /* errno is set for us. */ + + for (i = 0; i < ninputs; i++) + { + if (inputs[i]->ctf_dedup.cd_output) + num_outputs++; + } + + if (!ctf_assert (output, !cu_mapped || (cu_mapped && num_outputs == 1))) + return NULL; + + if ((outputs = calloc (num_outputs, sizeof (ctf_file_t *))) == NULL) + { + ctf_err_warn (output, 0, "out of memory allocating link outputs array"); + ctf_set_errno (output, ENOMEM); + return NULL; + } + *noutputs = num_outputs; + + walk = outputs; + *walk = output; + output->ctf_refcnt++; + walk++; + + for (i = 0; i < ninputs; i++) + { + if (inputs[i]->ctf_dedup.cd_output) + { + *walk = inputs[i]->ctf_dedup.cd_output; + inputs[i]->ctf_dedup.cd_output = NULL; + walk++; + } + } + + ctf_dedup_fini (output, outputs, num_outputs); + return outputs; +} diff --git a/libctf/ctf-hash.c b/libctf/ctf-hash.c index ed06f1f86a7..297526094cc 100644 --- a/libctf/ctf-hash.c +++ b/libctf/ctf-hash.c @@ -117,6 +117,28 @@ ctf_hash_eq_type_key (const void *a, const void *b) && (key_a->cltk_idx == key_b->cltk_idx); } +/* Hash a type_id_key. */ +unsigned int +ctf_hash_type_id_key (const void *ptr) +{ + ctf_helem_t *hep = (ctf_helem_t *) ptr; + ctf_type_id_key_t *k = (ctf_type_id_key_t *) hep->key; + + return htab_hash_pointer ((void *) (uintptr_t) k->ctii_input_num) + + 59 * htab_hash_pointer ((void *) (uintptr_t) k->ctii_type); +} + +int +ctf_hash_eq_type_id_key (const void *a, const void *b) +{ + ctf_helem_t *hep_a = (ctf_helem_t *) a; + ctf_helem_t *hep_b = (ctf_helem_t *) b; + ctf_type_id_key_t *key_a = (ctf_type_id_key_t *) hep_a->key; + ctf_type_id_key_t *key_b = (ctf_type_id_key_t *) hep_b->key; + + return (key_a->ctii_input_num == key_b->ctii_input_num) + && (key_a->ctii_type == key_b->ctii_type); +} /* Hash and eq functions for the dynset. Most of these can just use the underlying hashtab functions directly. */ diff --git a/libctf/ctf-impl.h b/libctf/ctf-impl.h index c2fcc92d672..a8a16f77a1e 100644 --- a/libctf/ctf-impl.h +++ b/libctf/ctf-impl.h @@ -247,6 +247,106 @@ typedef struct ctf_link_type_key ctf_id_t cltk_idx; } ctf_link_type_key_t; +/* The structure used as the key in a cd_id_to_file_t on 32-bit platforms. */ +typedef struct ctf_type_id_key +{ + int ctii_input_num; + ctf_id_t ctii_type; +} ctf_type_id_key_t; + +/* Deduplicator state. + + The dedup state below uses three terms consistently. A "hash" is a + ctf_dynhash_t; a "hash value" is the hash value of a type as returned by + ctf_dedup_hash_type; a "global type ID" or "global ID" is a packed-together + reference to a single ctf_file_t (by array index in an array of inputs) and + ctf_id_t, i.e. a single instance of some hash value in some input. + + The deduplication algorithm takes a bunch of inputs and yields a single + shared "output" and possibly many outputs corresponding to individual inputs + that still contain types after sharing of unconflicted types. Almost all + deduplicator state is stored in the struct ctf_dedup in the output, though a + (very) few things are stored in inputs for simplicity's sake, usually if they + are linking together things within the scope of a single TU. + + Flushed at the end of every ctf_dedup run. */ + +typedef struct ctf_dedup +{ + /* The CTF linker flags in force for this dedup run. */ + int cd_link_flags; + + /* On 32-bit platforms only, a hash of global type IDs, in the form of + a ctf_link_type_id_key_t. */ + ctf_dynhash_t *cd_id_to_file_t; + + /* Atoms tables of decorated names: maps undecorated name to decorated name. + (The actual allocations are in the CTF file for the former and the real + atoms table for the latter). Uses the same namespaces as ctf_lookups, + below, but has no need for null-termination. */ + ctf_dynhash_t *cd_decorated_names[4]; + + /* Map type names to a hash from type hash value -> number of times each value + has appeared. */ + ctf_dynhash_t *cd_name_counts; + + /* Map global type IDs to type hash values. Used to determine if types are + already hashed without having to recompute their hash values again, and to + link types together at later stages. Forwards that are peeked through to + structs and unions are not represented in here, so lookups that might be + such a type (in practice, all lookups) must go via cd_replaced_types first + to take this into account. Discarded before each rehashing. */ + ctf_dynhash_t *cd_type_hashes; + + /* Maps from the names of structs/unions/enums to a a single GID which is the + only appearance of that type in any input: if it appears in more than one + input, a value which is a GID with an input_num of -1 appears. Used in + share-duplicated link mode link modes to determine whether structs/unions + can be cited from multiple TUs. Only populated in that link mode. */ + ctf_dynhash_t *cd_struct_origin; + + /* Maps type hash values to a set of hash values of the types that cite them: + i.e., pointing backwards up the type graph. Used for recursive conflict + marking. Citations from tagged structures, unions, and forwards do not + appear in this graph. */ + ctf_dynhash_t *cd_citers; + + /* Maps type hash values to input global type IDs. The value is a set (a + hash) of global type IDs. Discarded before each rehashing. The result of + the ctf_dedup function. */ + ctf_dynhash_t *cd_output_mapping; + + /* A map giving the GID of the first appearance of each type for each type + hash value. */ + ctf_dynhash_t *cd_output_first_gid; + + /* Used to ensure that we never try to map a single type ID to more than one + hash. */ + ctf_dynhash_t *cd_output_mapping_guard; + + /* Maps the global type IDs of structures in input TUs whose members still + need emission to the global type ID of the already-emitted target type + (which has no members yet) in the appropriate target. Uniquely, the latter + ID represents a *target* ID (i.e. the cd_output_mapping of some specified + input): we encode the shared (parent) dict with an ID of -1. */ + ctf_dynhash_t *cd_emission_struct_members; + + /* A set (a hash) of hash values of conflicting types. */ + ctf_dynset_t *cd_conflicting_types; + + /* Maps type hashes to ctf_id_t's in this dictionary. Populated only at + emission time, in the dictionary where emission is taking place. */ + ctf_dynhash_t *cd_output_emission_hashes; + + /* Maps the decorated names of conflicted cross-TU forwards that were forcibly + emitted in this TU to their emitted ctf_id_ts. Populated only at emission + time, in the dictionary where emission is taking place. */ + ctf_dynhash_t *cd_output_emission_conflicted_forwards; + + /* Points to the output counterpart of this input dictionary, at emission + time. */ + ctf_file_t *cd_output; +} ctf_dedup_t; /* The ctf_file is the structure used to represent a CTF container to library clients, who see it only as an opaque pointer. Modifications can therefore @@ -346,6 +446,18 @@ struct ctf_file void *ctf_link_variable_filter_arg; /* Argument for it. */ ctf_dynhash_t *ctf_add_processing; /* Types ctf_add_type is working on now. */ + + /* Atoms table for dedup string storage. All strings in the ctf_dedup_t are + stored here. Only the _alloc copy is allocated or freed: the + ctf_dedup_atoms may be pointed to some other CTF dict, to share its atoms. + We keep the atoms table outside the ctf_dedup so that atoms can be + preserved across multiple similar links, such as when doing cu-mapped + links. */ + ctf_dynset_t *ctf_dedup_atoms; + ctf_dynset_t *ctf_dedup_atoms_alloc; + + ctf_dedup_t ctf_dedup; /* Deduplicator state. */ + char *ctf_tmp_typeslice; /* Storage for slicing up type names. */ size_t ctf_tmp_typeslicelen; /* Size of the typeslice. */ void *ctf_specific; /* Data for ctf_get/setspecific(). */ @@ -451,11 +563,13 @@ typedef unsigned int (*ctf_hash_fun) (const void *ptr); extern unsigned int ctf_hash_integer (const void *ptr); extern unsigned int ctf_hash_string (const void *ptr); extern unsigned int ctf_hash_type_key (const void *ptr); +extern unsigned int ctf_hash_type_id_key (const void *ptr); typedef int (*ctf_hash_eq_fun) (const void *, const void *); extern int ctf_hash_eq_integer (const void *, const void *); extern int ctf_hash_eq_string (const void *, const void *); extern int ctf_hash_eq_type_key (const void *, const void *); +extern int ctf_hash_eq_type_id_key (const void *, const void *); extern int ctf_dynset_eq_string (const void *, const void *); @@ -526,11 +640,24 @@ extern int ctf_dvd_insert (ctf_file_t *, ctf_dvdef_t *); extern void ctf_dvd_delete (ctf_file_t *, ctf_dvdef_t *); extern ctf_dvdef_t *ctf_dvd_lookup (const ctf_file_t *, const char *); +extern ctf_id_t ctf_add_encoded (ctf_file_t *, uint32_t, const char *, + const ctf_encoding_t *, uint32_t kind); +extern ctf_id_t ctf_add_reftype (ctf_file_t *, uint32_t, ctf_id_t, + uint32_t kind); + extern void ctf_add_type_mapping (ctf_file_t *src_fp, ctf_id_t src_type, ctf_file_t *dst_fp, ctf_id_t dst_type); extern ctf_id_t ctf_type_mapping (ctf_file_t *src_fp, ctf_id_t src_type, ctf_file_t **dst_fp); +extern int ctf_dedup_atoms_init (ctf_file_t *); +extern int ctf_dedup (ctf_file_t *, ctf_file_t **, uint32_t ninputs, + uint32_t *parents, int cu_mapped); +extern void ctf_dedup_fini (ctf_file_t *, ctf_file_t **, uint32_t); +extern ctf_file_t **ctf_dedup_emit (ctf_file_t *, ctf_file_t **, + uint32_t ninputs, uint32_t *parents, + uint32_t *noutputs, int cu_mapped); + extern void ctf_decl_init (ctf_decl_t *); extern void ctf_decl_fini (ctf_decl_t *); extern void ctf_decl_push (ctf_decl_t *, ctf_file_t *, ctf_id_t); diff --git a/libctf/ctf-open.c b/libctf/ctf-open.c index 285e0e0ff71..fee7789a308 100644 --- a/libctf/ctf-open.c +++ b/libctf/ctf-open.c @@ -1719,6 +1719,8 @@ ctf_file_close (ctf_file_t *fp) ctf_dynhash_destroy (fp->ctf_link_in_cu_mapping); ctf_dynhash_destroy (fp->ctf_link_out_cu_mapping); ctf_dynhash_destroy (fp->ctf_add_processing); + ctf_dedup_fini (fp, NULL, 0); + ctf_dynset_destroy (fp->ctf_dedup_atoms_alloc); for (err = ctf_list_next (&fp->ctf_errs_warnings); err != NULL; err = nerr) { -- 2.30.2