From: Sriraman Tallam Date: Wed, 5 Aug 2009 20:51:56 +0000 (+0000) Subject: *** empty log message *** X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=ef15dade898122c47e0f7dc0f48c1399c444afdd;p=binutils-gdb.git *** empty log message *** --- diff --git a/gold/ChangeLog b/gold/ChangeLog index d56b934a15b..09ea43ea4cf 100644 --- a/gold/ChangeLog +++ b/gold/ChangeLog @@ -1,3 +1,49 @@ +2009-08-05 Sriraman Tallam + + * icf.cc: New file. + * icf.h: New file. + * Makefile.am (CCFILES): Add icf.cc. + (HFILES): Add icf.h + * Makefile.in: Regenerate. + * dynobj.h (Sized_dynobj::do_section_entsize): New function. + * gc.h (gc_process_relocs): Populate lists used by icf to contain + section, symbol and addend information for the relocs. + * gold.cc (queue_middle_tasks): Call identical code folding. + * gold.h: Add defines for multimap. + * layout.cc (Layout::create_symtab_sections): Add symtab as parameter + to the call of finalize_local_symbols. + * main.cc (main): Create object of class Icf. + * object.cc (Sized_relobj::do_layout): Allow this function to be + called twice during icf. + (Sized_relobj::do_finalize_local_symbols): Fold symbols corresponding + to sections marked as identical by icf. + (Sized_relobj::do_section_flags): Get section_flags from Symbols_data + when available. + (Sized_relobj::do_section_entsize): New function. + * object.h (Object::section_entsize): New function. + (Object::do_section_entsize): New pure virtual function. + (Relobj::finalize_local_symbols): Add new parameter. + (Relobj::do_section_entsize): New function. + * options.h (General_options::icf): New option. + (General_options::icf_iterations): New option. + (General_options::print_icf_sections): New option. + * plugin.cc (Sized_pluginobj::do_section_entsize): New function. + * plugin.h (Sized_pluginobj::do_section_entsize): New function. + * reloc.cc (Read_relocs::run): Delay scanning relocs when doing + icf. + * symtab.cc (Symbol_table::is_section_folded): New function. + (Symbol_table::sized_finalize_symbol): Fold symbols corresponding + to sections marked as identical by icf. + * symtab.h (Symbol_table::set_icf): New function. + (Symbol_table::icf): New function. + (Symbol_table::is_section_folded): New function. + (Symbol_table::icf_): New data member. + * target-reloc.h (relocate_section): Ignore sections folded by icf. + * testsuite/Makefile.am: Add commands to build icf_test. + * testsuite/Makefile.in: Regenerate. + * testsuite/icf_test.sh: New file. + * testsuite/icf_test.cc: New file. + 2009-07-24 Chris Demetriou * layout.cc (is_compressible_debug_section): Fix incorrect diff --git a/gold/Makefile.am b/gold/Makefile.am index 14173c19bf6..513699fb206 100644 --- a/gold/Makefile.am +++ b/gold/Makefile.am @@ -51,6 +51,7 @@ CCFILES = \ gc.cc \ gold.cc \ gold-threads.cc \ + icf.cc \ incremental.cc \ layout.cc \ mapfile.cc \ @@ -93,6 +94,7 @@ HFILES = \ gc.h \ gold.h \ gold-threads.h \ + icf.h \ layout.h \ mapfile.h \ merge.h \ diff --git a/gold/Makefile.in b/gold/Makefile.in index 24500745019..c7474b10db0 100644 --- a/gold/Makefile.in +++ b/gold/Makefile.in @@ -82,10 +82,10 @@ am__objects_1 = archive.$(OBJEXT) binary.$(OBJEXT) common.$(OBJEXT) \ dirsearch.$(OBJEXT) dynobj.$(OBJEXT) dwarf_reader.$(OBJEXT) \ ehframe.$(OBJEXT) errors.$(OBJEXT) expression.$(OBJEXT) \ fileread.$(OBJEXT) gc.$(OBJEXT) gold.$(OBJEXT) \ - gold-threads.$(OBJEXT) incremental.$(OBJEXT) layout.$(OBJEXT) \ - mapfile.$(OBJEXT) merge.$(OBJEXT) object.$(OBJEXT) \ - options.$(OBJEXT) output.$(OBJEXT) parameters.$(OBJEXT) \ - plugin.$(OBJEXT) readsyms.$(OBJEXT) \ + gold-threads.$(OBJEXT) icf.$(OBJEXT) incremental.$(OBJEXT) \ + layout.$(OBJEXT) mapfile.$(OBJEXT) merge.$(OBJEXT) \ + object.$(OBJEXT) options.$(OBJEXT) output.$(OBJEXT) \ + parameters.$(OBJEXT) plugin.$(OBJEXT) readsyms.$(OBJEXT) \ reduced_debug_output.$(OBJEXT) reloc.$(OBJEXT) \ resolve.$(OBJEXT) script-sections.$(OBJEXT) script.$(OBJEXT) \ stringpool.$(OBJEXT) symtab.$(OBJEXT) target.$(OBJEXT) \ @@ -337,6 +337,7 @@ CCFILES = \ gc.cc \ gold.cc \ gold-threads.cc \ + icf.cc \ incremental.cc \ layout.cc \ mapfile.cc \ @@ -379,6 +380,7 @@ HFILES = \ gc.h \ gold.h \ gold-threads.h \ + icf.h \ layout.h \ mapfile.h \ merge.h \ @@ -565,6 +567,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gold-threads.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gold.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/i386.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/icf.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/incremental.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/layout.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/main.Po@am__quote@ diff --git a/gold/dynobj.h b/gold/dynobj.h index 815563e728b..a15e2a43238 100644 --- a/gold/dynobj.h +++ b/gold/dynobj.h @@ -198,6 +198,11 @@ class Sized_dynobj : public Dynobj do_section_flags(unsigned int shndx) { return this->elf_file_.section_flags(shndx); } + // Not used for dynobj. + uint64_t + do_section_entsize(unsigned int ) + { gold_unreachable(); } + // Return section address. uint64_t do_section_address(unsigned int shndx) diff --git a/gold/gc.h b/gold/gc.h index b7d520f2589..ffb721c7fb8 100644 --- a/gold/gc.h +++ b/gold/gc.h @@ -24,6 +24,7 @@ #define GOLD_GC_H #include +#include #include "elfcpp.h" #include "symtab.h" @@ -53,45 +54,51 @@ class Garbage_collection { return reinterpret_cast(loc.first) ^ loc.second; } }; + public: + typedef Unordered_set Sections_reachable; typedef std::map Section_ref; typedef std::queue Worklist_type; - public : - Garbage_collection() - :is_worklist_ready_(false) - { } - - // Accessor methods for the private members. - - Sections_reachable& - referenced_list() - { return referenced_list_; } - - Section_ref& - section_reloc_map() - { return section_reloc_map_; } - - Worklist_type& - worklist() - { return work_list_; } - - bool - is_worklist_ready() - { return is_worklist_ready_; } - - void - worklist_ready() - { is_worklist_ready_ = true; } - - void - do_transitive_closure(); - - private : - Worklist_type work_list_; - bool is_worklist_ready_; - Section_ref section_reloc_map_; - Sections_reachable referenced_list_; + Garbage_collection() + : is_worklist_ready_(false) + { } + + // Accessor methods for the private members. + + Sections_reachable& + referenced_list() + { return referenced_list_; } + + Section_ref& + section_reloc_map() + { return this->section_reloc_map_; } + + Worklist_type& + worklist() + { return this->work_list_; } + + bool + is_worklist_ready() + { return this->is_worklist_ready_; } + + void + worklist_ready() + { this->is_worklist_ready_ = true; } + + void + do_transitive_closure(); + + bool + is_section_garbage(Object* obj, unsigned int shndx) + { return (this->referenced_list().find(Section_id(obj, shndx)) + == this->referenced_list().end()); } + private: + + Worklist_type work_list_; + bool is_worklist_ready_; + Section_ref section_reloc_map_; + Sections_reachable referenced_list_; }; // Data to pass between successive invocations of do_layout @@ -120,10 +127,11 @@ struct Symbols_data section_size_type symbol_names_size; }; -// This function implements the the generic part of reloc -// processing to map a section to all the sections it -// references through relocs. It is used only during garbage -// collection. +// This function implements the generic part of reloc +// processing to map a section to all the sections it +// references through relocs. It is called only during +// garbage collection (--gc-sections) and identical code +// folding (--icf). template @@ -133,8 +141,8 @@ gc_process_relocs( Symbol_table* symtab, Layout*, Target_type* , - Sized_relobj* object, - unsigned int data_shndx, + Sized_relobj* src_obj, + unsigned int src_indx, const unsigned char* prelocs, size_t reloc_count, Output_section*, @@ -142,22 +150,36 @@ gc_process_relocs( size_t local_count, const unsigned char* plocal_syms) { - Object *src_obj, *dst_obj; - unsigned int src_indx, dst_indx; + Object *dst_obj; + unsigned int dst_indx; - src_obj = object; - src_indx = data_shndx; - typedef typename Reloc_types::Reloc Reltype; const int reloc_size = Reloc_types::reloc_size; const int sym_size = elfcpp::Elf_sizes::sym_size; + std::vector* secvec = NULL; + std::vector* symvec = NULL; + std::vector >* addendvec = NULL; + bool is_icf_tracked = false; + + if (parameters->options().icf() + && is_prefix_of(".text.", (src_obj)->section_name(src_indx).c_str())) + { + is_icf_tracked = true; + Section_id src_id(src_obj, src_indx); + secvec = &symtab->icf()->section_reloc_list()[src_id]; + symvec = &symtab->icf()->symbol_reloc_list()[src_id]; + addendvec = &symtab->icf()->addend_reloc_list()[src_id]; + } + for (size_t i = 0; i < reloc_count; ++i, prelocs += reloc_size) { Reltype reloc(prelocs); typename elfcpp::Elf_types::Elf_WXword r_info = reloc.get_r_info(); unsigned int r_sym = elfcpp::elf_r_sym(r_info); - + typename elfcpp::Elf_types::Elf_Swxword addend = + Reloc_types::get_reloc_addend_noerror(&reloc); + if (r_sym < local_count) { gold_assert(plocal_syms != NULL); @@ -165,17 +187,26 @@ gc_process_relocs( + r_sym * sym_size); unsigned int shndx = lsym.get_st_shndx(); bool is_ordinary; - shndx = object->adjust_sym_shndx(r_sym, shndx, &is_ordinary); - if (!is_ordinary) + shndx = src_obj->adjust_sym_shndx(r_sym, shndx, &is_ordinary); + if (!is_ordinary) continue; dst_obj = src_obj; - if (shndx == src_indx) - continue; dst_indx = shndx; + Section_id dst_id(dst_obj, dst_indx); + if (is_icf_tracked) + { + (*secvec).push_back(dst_id); + (*symvec).push_back(NULL); + long long symvalue = static_cast(lsym.get_st_value()); + (*addendvec).push_back(std::make_pair(symvalue, + static_cast(addend))); + } + if (shndx == src_indx) + continue; } else { - Symbol* gsym = object->global_symbol(r_sym); + Symbol* gsym = src_obj->global_symbol(r_sym); gold_assert(gsym != NULL); if (gsym->is_forwarder()) gsym = symtab->resolve_forwards(gsym); @@ -186,19 +217,34 @@ gc_process_relocs( dst_indx = gsym->shndx(&is_ordinary); if (!is_ordinary) continue; + Section_id dst_id(dst_obj, dst_indx); + if (is_icf_tracked) + { + (*secvec).push_back(dst_id); + (*symvec).push_back(gsym); + Sized_symbol* sized_gsym = + static_cast* >(gsym); + long long symvalue = + static_cast(sized_gsym->value()); + (*addendvec).push_back(std::make_pair(symvalue, + static_cast(addend))); + } } - Section_id p1(src_obj, src_indx); - Section_id p2(dst_obj, dst_indx); - Garbage_collection::Section_ref::iterator map_it; - map_it = symtab->gc()->section_reloc_map().find(p1); - if (map_it == symtab->gc()->section_reloc_map().end()) - { - symtab->gc()->section_reloc_map()[p1].insert(p2); - } - else + if (parameters->options().gc_sections()) { - Garbage_collection::Sections_reachable& v(map_it->second); - v.insert(p2); + Section_id src_id(src_obj, src_indx); + Section_id dst_id(dst_obj, dst_indx); + Garbage_collection::Section_ref::iterator map_it; + map_it = symtab->gc()->section_reloc_map().find(src_id); + if (map_it == symtab->gc()->section_reloc_map().end()) + { + symtab->gc()->section_reloc_map()[src_id].insert(dst_id); + } + else + { + Garbage_collection::Sections_reachable& v(map_it->second); + v.insert(dst_id); + } } } return; diff --git a/gold/gold.cc b/gold/gold.cc index c65ba29d913..9f9f251ce53 100644 --- a/gold/gold.cc +++ b/gold/gold.cc @@ -41,6 +41,7 @@ #include "reloc.h" #include "defstd.h" #include "plugin.h" +#include "icf.h" namespace gold { @@ -203,10 +204,10 @@ queue_initial_tasks(const General_options& options, } if (parameters->options().relocatable() - && parameters->options().gc_sections()) - gold_error(_("cannot mix -r with garbage collection")); + && (parameters->options().gc_sections() || parameters->options().icf())) + gold_error(_("cannot mix -r with --gc-sections or --icf")); - if (parameters->options().gc_sections()) + if (parameters->options().gc_sections() || parameters->options().icf()) { workqueue->queue(new Task_function(new Gc_runner(options, input_objects, @@ -309,8 +310,23 @@ queue_middle_tasks(const General_options& options, gold_assert(symtab->gc() != NULL); // Do a transitive closure on all references to determine the worklist. symtab->gc()->do_transitive_closure(); - // Call do_layout again to determine the output_sections for all - // referenced input sections. + } + + // If identical code folding (--icf) is chosen it makes sense to do it + // only after garbage collection (--gc-sections) as we do not want to + // be folding sections that will be garbage. + if (parameters->options().icf()) + { + symtab->icf()->find_identical_sections(input_objects, symtab); + } + + // Call Object::layout for the second time to determine the + // output_sections for all referenced input sections. When + // --gc-sections or --icf is turned on, Object::layout is + // called twice. It is called the first time when the + // symbols are added. + if (parameters->options().gc_sections() || parameters->options().icf()) + { for (Input_objects::Relobj_iterator p = input_objects->relobj_begin(); p != input_objects->relobj_end(); ++p) @@ -318,6 +334,7 @@ queue_middle_tasks(const General_options& options, (*p)->layout(symtab, layout, NULL); } } + // Layout deferred objects due to plugins. if (parameters->options().has_plugins()) { @@ -325,7 +342,8 @@ queue_middle_tasks(const General_options& options, gold_assert(plugins != NULL); plugins->layout_deferred_objects(); } - if (parameters->options().gc_sections()) + + if (parameters->options().gc_sections() || parameters->options().icf()) { for (Input_objects::Relobj_iterator p = input_objects->relobj_begin(); p != input_objects->relobj_end(); @@ -420,7 +438,7 @@ queue_middle_tasks(const General_options& options, // If doing garbage collection, the relocations have already been read. // Otherwise, read and scan the relocations. - if (parameters->options().gc_sections()) + if (parameters->options().gc_sections() || parameters->options().icf()) { for (Input_objects::Relobj_iterator p = input_objects->relobj_begin(); p != input_objects->relobj_end(); diff --git a/gold/gold.h b/gold/gold.h index 9dca4a81d93..1319699ae76 100644 --- a/gold/gold.h +++ b/gold/gold.h @@ -75,6 +75,7 @@ #define Unordered_set std::tr1::unordered_set #define Unordered_map std::tr1::unordered_map +#define Unordered_multimap std::tr1::unordered_multimap #define reserve_unordered_map(map, n) ((map)->rehash(n)) @@ -86,6 +87,7 @@ #define Unordered_set __gnu_cxx::hash_set #define Unordered_map __gnu_cxx::hash_map +#define Unordered_multimap __gnu_cxx::hash_multimap namespace __gnu_cxx { @@ -119,6 +121,7 @@ struct hash #define Unordered_set std::set #define Unordered_map std::map +#define Unordered_map std::multimap #define reserve_unordered_map(map, n) diff --git a/gold/icf.cc b/gold/icf.cc new file mode 100644 index 00000000000..55d8ea4f2a4 --- /dev/null +++ b/gold/icf.cc @@ -0,0 +1,634 @@ +// icf.cc -- Identical Code Folding. +// +// Copyright 2009 Free Software Foundation, Inc. +// Written by Sriraman Tallam . + +// This file is part of gold. + +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, +// MA 02110-1301, USA. + +// Identical Code Folding Algorithm +// ---------------------------------- +// Detecting identical functions is done here and the basic algorithm +// is as follows. A checksum is computed on each .text section using +// its contents and relocations. If the symbol name corresponding to +// a relocation is known it is used to compute the checksum. If the +// symbol name is not known the stringified name of the object and the +// section number pointed to by the relocation is used. The checksums +// are stored as keys in a hash map and a section is identical to some +// other section if its checksum is already present in the hash map. +// Checksum collisions are handled by using a multimap and explicitly +// checking the contents when two sections have the same checksum. +// +// However, two functions A and B with identical text but with +// relocations pointing to different .text sections can be identical if +// the corresponding .text sections to which their relocations point to +// turn out to be identical. Hence, this checksumming process must be +// done repeatedly until convergence is obtained. Here is an example for +// the following case : +// +// int funcA () int funcB () +// { { +// return foo(); return goo(); +// } } +// +// The functions funcA and funcB are identical if functions foo() and +// goo() are identical. +// +// Hence, as described above, we repeatedly do the checksumming, +// assigning identical functions to the same group, until convergence is +// obtained. Now, we have two different ways to do this depending on how +// we initialize. +// +// Algorithm I : +// ----------- +// We can start with marking all functions as different and repeatedly do +// the checksumming. This has the advantage that we do not need to wait +// for convergence. We can stop at any point and correctness will be +// guaranteed although not all cases would have been found. However, this +// has a problem that some cases can never be found even if it is run until +// convergence. Here is an example with mutually recursive functions : +// +// int funcA (int a) int funcB (int a) +// { { +// if (a == 1) if (a == 1) +// return 1; return 1; +// return 1 + funcB(a - 1); return 1 + funcA(a - 1); +// } } +// +// In this example funcA and funcB are identical and one of them could be +// folded into the other. However, if we start with assuming that funcA +// and funcB are not identical, the algorithm, even after it is run to +// convergence, cannot detect that they are identical. It should be noted +// that even if the functions were self-recursive, Algorithm I cannot catch +// that they are identical, at least as is. +// +// Algorithm II : +// ------------ +// Here we start with marking all functions as identical and then repeat +// the checksumming until convergence. This can detect the above case +// mentioned above. It can detect all cases that Algorithm I can and more. +// However, the caveat is that it has to be run to convergence. It cannot +// be stopped arbitrarily like Algorithm I as correctness cannot be +// guaranteed. Algorithm II is not implemented. +// +// Algorithm I is used because experiments show that about three +// iterations are more than enough to achieve convergence. Algorithm I can +// handle recursive calls if it is changed to use a special common symbol +// for recursive relocs. This seems to be the most common case that +// Algorithm I could not catch as is. Mutually recursive calls are not +// frequent and Algorithm I wins because of its ability to be stopped +// arbitrarily. +// +// Caveat with using function pointers : +// ------------------------------------ +// +// Programs using function pointer comparisons/checks should use function +// folding with caution as the result of such comparisons could be different +// when folding takes place. This could lead to unexpected run-time +// behaviour. +// +// +// How to run : --icf +// Optional parameters : --icf-iterations --print-icf-sections +// +// Performance : Less than 20 % link-time overhead on industry strength +// applications. Up to 6 % text size reductions. + +#include "gold.h" +#include "object.h" +#include "gc.h" +#include "icf.h" +#include "symtab.h" +#include "libiberty.h" + +namespace gold +{ + +// This function determines if a section or a group of identical +// sections has unique contents. Such unique sections or groups can be +// declared final and need not be processed any further. +// Parameters : +// ID_SECTION : Vector mapping a section index to a Section_id pair. +// IS_SECN_OR_GROUP_UNIQUE : To check if a section or a group of identical +// sections is already known to be unique. +// SECTION_CONTENTS : Contains the section's text and relocs to sections +// that cannot be folded. SECTION_CONTENTS are NULL +// implies that this function is being called for the +// first time before the first iteration of icf. + +static void +preprocess_for_unique_sections(const std::vector& id_section, + std::vector* is_secn_or_group_unique, + std::vector* section_contents) +{ + Unordered_map uniq_map; + std::pair::iterator, bool> + uniq_map_insert; + + for (unsigned int i = 0; i < id_section.size(); i++) + { + if ((*is_secn_or_group_unique)[i]) + continue; + + uint32_t cksum; + Section_id secn = id_section[i]; + section_size_type plen; + if (section_contents == NULL) + { + const unsigned char* contents; + contents = secn.first->section_contents(secn.second, + &plen, + false); + cksum = xcrc32(contents, plen, 0xffffffff); + } + else + { + const unsigned char* contents_array = reinterpret_cast + ((*section_contents)[i].c_str()); + cksum = xcrc32(contents_array, (*section_contents)[i].length(), + 0xffffffff); + } + uniq_map_insert = uniq_map.insert(std::make_pair(cksum, i)); + if (uniq_map_insert.second) + { + (*is_secn_or_group_unique)[i] = true; + } + else + { + (*is_secn_or_group_unique)[i] = false; + (*is_secn_or_group_unique)[uniq_map_insert.first->second] = false; + } + } +} + +// This returns the buffer containing the section's contents, both +// text and relocs. Relocs are differentiated as those pointing to +// sections that could be folded and those that cannot. Only relocs +// pointing to sections that could be folded are recomputed on +// subsequent invocations of this function. +// Parameters : +// FIRST_ITERATION : true if it is the first invocation. +// SECN : Section for which contents are desired. +// SECTION_NUM : Unique section number of this section. +// NUM_TRACKED_RELOCS : Vector reference to store the number of relocs +// to ICF sections. +// KEPT_SECTION_ID : Vector which maps folded sections to kept sections. +// SECTION_CONTENTS : Store the section's text and relocs to non-ICF +// sections. + +static std::string +get_section_contents(bool first_iteration, + const Section_id& secn, + unsigned int section_num, + unsigned int* num_tracked_relocs, + Symbol_table* symtab, + const std::vector& kept_section_id, + std::vector* section_contents) +{ + section_size_type plen; + const unsigned char* contents = NULL; + + if (first_iteration) + { + contents = secn.first->section_contents(secn.second, + &plen, + false); + } + + // The buffer to hold all the contents including relocs. A checksum + // is then computed on this buffer. + std::string buffer; + std::string icf_reloc_buffer; + + if (num_tracked_relocs) + *num_tracked_relocs = 0; + + Icf::Section_list& seclist = symtab->icf()->section_reloc_list(); + Icf::Symbol_list& symlist = symtab->icf()->symbol_reloc_list(); + Icf::Addend_list& addendlist = symtab->icf()->addend_reloc_list(); + + Icf::Section_list::iterator it_seclist = seclist.find(secn); + Icf::Symbol_list::iterator it_symlist = symlist.find(secn); + Icf::Addend_list::iterator it_addendlist = addendlist.find(secn); + + buffer.clear(); + icf_reloc_buffer.clear(); + + // Process relocs and put them into the buffer. + + if (it_seclist != seclist.end()) + { + gold_assert(it_symlist != symlist.end()); + gold_assert(it_addendlist != addendlist.end()); + Icf::Sections_reachable_list v = it_seclist->second; + Icf::Symbol_info s = it_symlist->second; + Icf::Addend_info a = it_addendlist->second; + Icf::Sections_reachable_list::iterator it_v = v.begin(); + Icf::Symbol_info::iterator it_s = s.begin(); + Icf::Addend_info::iterator it_a = a.begin(); + + for (; it_v != v.end(); ++it_v, ++it_s, ++it_a) + { + // ADDEND_STR stores the symbol value and addend, each + // atmost 16 hex digits long. it_v points to a pair + // where first is the symbol value and second is the + // addend. + char addend_str[34]; + snprintf(addend_str, sizeof(addend_str), "%llx %llx", + (*it_a).first, (*it_a).second); + Section_id reloc_secn(it_v->first, it_v->second); + + // If this reloc turns back and points to the same section, + // like a recursive call, use a special symbol to mark this. + if (reloc_secn.first == secn.first + && reloc_secn.second == secn.second) + { + if (first_iteration) + { + buffer.append("R"); + buffer.append(addend_str); + buffer.append("@"); + } + continue; + } + Icf::Uniq_secn_id_map& section_id_map = + symtab->icf()->section_to_int_map(); + Icf::Uniq_secn_id_map::iterator section_id_map_it = + section_id_map.find(reloc_secn); + if (section_id_map_it != section_id_map.end()) + { + // This is a reloc to a section that might be folded. + if (num_tracked_relocs) + (*num_tracked_relocs)++; + + char kept_section_str[10]; + unsigned int secn_id = section_id_map_it->second; + snprintf(kept_section_str, sizeof(kept_section_str), "%u", + kept_section_id[secn_id]); + if (first_iteration) + { + buffer.append("ICF_R"); + buffer.append(addend_str); + } + icf_reloc_buffer.append(kept_section_str); + // Append the addend. + icf_reloc_buffer.append(addend_str); + icf_reloc_buffer.append("@"); + } + else + { + // This is a reloc to a section that cannot be folded. + // Process it only in the first iteration. + if (!first_iteration) + continue; + + uint64_t secn_flags = (it_v->first)->section_flags(it_v->second); + // This reloc points to a merge section. Hash the + // contents of this section. + if ((secn_flags & elfcpp::SHF_MERGE) != 0) + { + uint64_t entsize = + (it_v->first)->section_entsize(it_v->second); + long long offset = it_a->first + it_a->second; + section_size_type secn_len; + const unsigned char* str_contents = + (it_v->first)->section_contents(it_v->second, + &secn_len, + false) + offset; + if ((secn_flags & elfcpp::SHF_STRINGS) != 0) + { + // String merge section. + const char* str_char = + reinterpret_cast(str_contents); + switch(entsize) + { + case 1: + { + buffer.append(str_char); + break; + } + case 2: + { + const uint16_t* ptr_16 = + reinterpret_cast(str_char); + unsigned int strlen_16 = 0; + // Find the NULL character. + while(*(ptr_16 + strlen_16) != 0) + strlen_16++; + buffer.append(str_char, strlen_16 * 2); + } + break; + case 4: + { + const uint32_t* ptr_32 = + reinterpret_cast(str_char); + unsigned int strlen_32 = 0; + // Find the NULL character. + while(*(ptr_32 + strlen_32) != 0) + strlen_32++; + buffer.append(str_char, strlen_32 * 4); + } + break; + default: + gold_unreachable(); + } + } + else + { + // Use the entsize to determine the length. + buffer.append(reinterpret_cast(str_contents), + entsize); + } + } + else if ((*it_s) != NULL) + { + // If symbol name is available use that. + const char *sym_name = (*it_s)->name(); + buffer.append(sym_name); + // Append the addend. + buffer.append(addend_str); + buffer.append("@"); + } + else + { + // Symbol name is not available, like for a local symbol, + // use object and section id. + buffer.append(it_v->first->name()); + char secn_id[10]; + snprintf(secn_id, sizeof(secn_id), "%u",it_v->second); + buffer.append(secn_id); + // Append the addend. + buffer.append(addend_str); + buffer.append("@"); + } + } + } + } + + if (first_iteration) + { + buffer.append("Contents = "); + buffer.append(reinterpret_cast(contents), plen); + // Store the section contents that dont change to avoid recomputing + // during the next call to this function. + (*section_contents)[section_num] = buffer; + } + else + { + gold_assert(buffer.empty()); + // Reuse the contents computed in the previous iteration. + buffer.append((*section_contents)[section_num]); + } + + buffer.append(icf_reloc_buffer); + return buffer; +} + +// This function computes a checksum on each section to detect and form +// groups of identical sections. The first iteration does this for all +// sections. +// Further iterations do this only for the kept sections from each group to +// determine if larger groups of identical sections could be formed. The +// first section in each group is the kept section for that group. +// +// CRC32 is the checksumming algorithm and can have collisions. That is, +// two sections with different contents can have the same checksum. Hence, +// a multimap is used to maintain more than one group of checksum +// identical sections. A section is added to a group only after its +// contents are explicitly compared with the kept section of the group. +// +// Parameters : +// ITERATION_NUM : Invocation instance of this function. +// NUM_TRACKED_RELOCS : Vector reference to store the number of relocs +// to ICF sections. +// KEPT_SECTION_ID : Vector which maps folded sections to kept sections. +// ID_SECTION : Vector mapping a section to an unique integer. +// IS_SECN_OR_GROUP_UNIQUE : To check if a section or a group of identical +// sectionsis already known to be unique. +// SECTION_CONTENTS : Store the section's text and relocs to non-ICF +// sections. + +static bool +match_sections(unsigned int iteration_num, + Symbol_table* symtab, + std::vector* num_tracked_relocs, + std::vector* kept_section_id, + const std::vector& id_section, + std::vector* is_secn_or_group_unique, + std::vector* section_contents) +{ + Unordered_multimap section_cksum; + std::pair::iterator, + Unordered_multimap::iterator> key_range; + bool converged = true; + + if (iteration_num == 1) + preprocess_for_unique_sections(id_section, + is_secn_or_group_unique, + NULL); + else + preprocess_for_unique_sections(id_section, + is_secn_or_group_unique, + section_contents); + + std::vector full_section_contents; + + for (unsigned int i = 0; i < id_section.size(); i++) + { + full_section_contents.push_back(""); + if ((*is_secn_or_group_unique)[i]) + continue; + + Section_id secn = id_section[i]; + std::string this_secn_contents; + uint32_t cksum; + if (iteration_num == 1) + { + unsigned int num_relocs = 0; + this_secn_contents = get_section_contents(true, secn, i, &num_relocs, + symtab, (*kept_section_id), + section_contents); + (*num_tracked_relocs)[i] = num_relocs; + } + else + { + if ((*kept_section_id)[i] != i) + { + // This section is already folded into something. See + // if it should point to a different kept section. + unsigned int kept_section = (*kept_section_id)[i]; + if (kept_section != (*kept_section_id)[kept_section]) + { + (*kept_section_id)[i] = (*kept_section_id)[kept_section]; + } + continue; + } + this_secn_contents = get_section_contents(false, secn, i, NULL, + symtab, (*kept_section_id), + section_contents); + } + + const unsigned char* this_secn_contents_array = + reinterpret_cast(this_secn_contents.c_str()); + cksum = xcrc32(this_secn_contents_array, this_secn_contents.length(), + 0xffffffff); + size_t count = section_cksum.count(cksum); + + if (count == 0) + { + // Start a group with this cksum. + section_cksum.insert(std::make_pair(cksum, i)); + full_section_contents[i] = this_secn_contents; + } + else + { + key_range = section_cksum.equal_range(cksum); + Unordered_multimap::iterator it; + // Search all the groups with this cksum for a match. + for (it = key_range.first; it != key_range.second; ++it) + { + unsigned int kept_section = it->second; + if (full_section_contents[kept_section].length() + != this_secn_contents.length()) + continue; + if (memcmp(full_section_contents[kept_section].c_str(), + this_secn_contents.c_str(), + this_secn_contents.length()) != 0) + continue; + (*kept_section_id)[i] = kept_section; + converged = false; + break; + } + if (it == key_range.second) + { + // Create a new group for this cksum. + section_cksum.insert(std::make_pair(cksum, i)); + full_section_contents[i] = this_secn_contents; + } + } + // If there are no relocs to foldable sections do not process + // this section any further. + if (iteration_num == 1 && (*num_tracked_relocs)[i] == 0) + (*is_secn_or_group_unique)[i] = true; + } + + return converged; +} + + +// This is the main ICF function called in gold.cc. This does the +// initialization and calls match_sections repeatedly (twice by default) +// which computes the crc checksums and detects identical functions. + +void +Icf::find_identical_sections(const Input_objects* input_objects, + Symbol_table* symtab) +{ + unsigned int section_num = 0; + std::vector num_tracked_relocs; + std::vector is_secn_or_group_unique; + std::vector section_contents; + + // Decide which sections are possible candidates first. + + for (Input_objects::Relobj_iterator p = input_objects->relobj_begin(); + p != input_objects->relobj_end(); + ++p) + { + for (unsigned int i = 0;i < (*p)->shnum(); ++i) + { + // Only looking to fold functions, so just look at .text sections. + if (!is_prefix_of(".text.", (*p)->section_name(i).c_str())) + continue; + if (!(*p)->is_section_included(i)) + continue; + if (parameters->options().gc_sections() + && symtab->gc()->is_section_garbage(*p, i)) + continue; + this->id_section_.push_back(Section_id(*p, i)); + this->section_id_[Section_id(*p, i)] = section_num; + this->kept_section_id_.push_back(section_num); + num_tracked_relocs.push_back(0); + is_secn_or_group_unique.push_back(false); + section_contents.push_back(""); + section_num++; + } + } + + unsigned int num_iterations = 0; + + // Default number of iterations to run ICF is 2. + unsigned int max_iterations = (parameters->options().icf_iterations() > 0) + ? parameters->options().icf_iterations() + : 2; + + bool converged = false; + + while (!converged && (num_iterations < max_iterations)) + { + num_iterations++; + converged = match_sections(num_iterations, symtab, + &num_tracked_relocs, &this->kept_section_id_, + this->id_section_, &is_secn_or_group_unique, + §ion_contents); + } + + if (parameters->options().print_icf_sections()) + { + if (converged) + gold_info(_("%s: ICF Converged after %u iteration(s)"), + program_name, num_iterations); + else + gold_info(_("%s: ICF stopped after %u iteration(s)"), + program_name, num_iterations); + } + + this->icf_ready(); +} + +// This function determines if the section corresponding to the +// given object and index is folded based on if the kept section +// is different from this section. + +bool +Icf::is_section_folded(Object* obj, unsigned int shndx) +{ + Section_id secn(obj, shndx); + Uniq_secn_id_map::iterator it = this->section_id_.find(secn); + if (it == this->section_id_.end()) + return false; + unsigned int section_num = it->second; + unsigned int kept_section_id = this->kept_section_id_[section_num]; + return kept_section_id != section_num; +} + +// This function returns the folded section for the given section. + +Section_id +Icf::get_folded_section(Object* dup_obj, unsigned int dup_shndx) +{ + Section_id dup_secn(dup_obj, dup_shndx); + Uniq_secn_id_map::iterator it = this->section_id_.find(dup_secn); + gold_assert(it != this->section_id_.end()); + unsigned int section_num = it->second; + unsigned int kept_section_id = this->kept_section_id_[section_num]; + Section_id folded_section = this->id_section_[kept_section_id]; + return folded_section; +} + +} // End of namespace gold. diff --git a/gold/icf.h b/gold/icf.h new file mode 100644 index 00000000000..965964d2ecb --- /dev/null +++ b/gold/icf.h @@ -0,0 +1,140 @@ +// icf.h -- Identical Code Folding + +// Copyright 2009 Free Software Foundation, Inc. +// Written by Sriraman Tallam . + +// This file is part of gold. + +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, +// MA 02110-1301, USA. + +#ifndef GOLD_ICF_H +#define GOLD_ICF_H + +#include + +#include "elfcpp.h" +#include "symtab.h" + +namespace gold +{ + +class Object; +class Input_objects; +class Symbol_table; + +typedef std::pair Section_id; + +class Icf +{ + public: + struct Section_id_hash + { + size_t operator()(const Section_id& loc) const + { return reinterpret_cast(loc.first) ^ loc.second; } + }; + + typedef std::vector Sections_reachable_list; + typedef std::vector Symbol_info; + typedef std::vector > Addend_info; + typedef Unordered_map Section_list; + typedef Unordered_map Symbol_list; + typedef Unordered_map Addend_list; + typedef Unordered_map Uniq_secn_id_map; + + Icf() + : id_section_(), section_id_(), kept_section_id_(), + num_tracked_relocs(NULL), icf_ready_(false), + section_reloc_list_(), symbol_reloc_list_(), + addend_reloc_list_() + { } + + // Returns the kept folded identical section corresponding to + // dup_obj and dup_shndx. + Section_id + get_folded_section(Object* dup_obj, unsigned int dup_shndx); + + // Forms groups of identical sections where the first member + // of each group is the kept section during folding. + void + find_identical_sections(const Input_objects* input_objects, + Symbol_table* symtab); + + // This is set when ICF has been run and the groups of + // identical sections have been formed. + void + icf_ready() + { this->icf_ready_ = true; } + + // Returns true if ICF has been run. + bool + is_icf_ready() + { return this->icf_ready_; } + + // Returns the kept section corresponding to the + // given section. + bool + is_section_folded(Object* obj, unsigned int shndx); + + // Returns a map of a section to a list of all sections referenced + // by its relocations. + Section_list& + section_reloc_list() + { return this->section_reloc_list_; } + + // Returns a map of a section to a list of all symbols referenced + // by its relocations. + Symbol_list& + symbol_reloc_list() + { return this->symbol_reloc_list_; } + + // Returns a maps of a section to a list of symbol values and addends + // of its relocations. + Addend_list& + addend_reloc_list() + { return this->addend_reloc_list_; } + + // Returns a mapping of each section to a unique integer. + Uniq_secn_id_map& + section_to_int_map() + { return this->section_id_; } + + private: + + // Maps integers to sections. + std::vector id_section_; + // Does the reverse. + Uniq_secn_id_map section_id_; + // Given a section id, this maps it to the id of the kept + // section. If the id's are the same then this section is + // not folded. + std::vector kept_section_id_; + unsigned int* num_tracked_relocs; + // Flag to indicate if ICF has been run. + bool icf_ready_; + + // These lists are populated by gc_process_relocs in gc.h. + Section_list section_reloc_list_; + Symbol_list symbol_reloc_list_; + Addend_list addend_reloc_list_; +}; + +} // End of namespace gold. + +#endif diff --git a/gold/layout.cc b/gold/layout.cc index eb8aac75a5d..6907295dc0a 100644 --- a/gold/layout.cc +++ b/gold/layout.cc @@ -2253,7 +2253,7 @@ Layout::create_symtab_sections(const Input_objects* input_objects, ++p) { unsigned int index = (*p)->finalize_local_symbols(local_symbol_index, - off); + off, symtab); off += (index - local_symbol_index) * symsize; local_symbol_index = index; } diff --git a/gold/main.cc b/gold/main.cc index 84b4ae91eef..d8ef582ec08 100644 --- a/gold/main.cc +++ b/gold/main.cc @@ -44,6 +44,7 @@ #include "layout.h" #include "plugin.h" #include "gc.h" +#include "icf.h" #include "incremental.h" using namespace gold; @@ -203,9 +204,12 @@ main(int argc, char** argv) // The list of input objects. Input_objects input_objects; - // The Garbage Collection Object. + // The Garbage Collection (GC, --gc-sections) Object. Garbage_collection gc; + // The Identical Code Folding (ICF, --icf) Object. + Icf icf; + // The symbol table. We're going to guess here how many symbols // we're going to see based on the number of input files. Even when // this is off, it means at worst we don't quite optimize hashtable @@ -216,6 +220,9 @@ main(int argc, char** argv) if (parameters->options().gc_sections()) symtab.set_gc(&gc); + if (parameters->options().icf()) + symtab.set_icf(&icf); + // The layout object. Layout layout(command_line.number_of_input_files(), &command_line.script_options()); diff --git a/gold/object.cc b/gold/object.cc index 1b964d70cab..b72fb580aae 100644 --- a/gold/object.cc +++ b/gold/object.cc @@ -914,14 +914,15 @@ Sized_relobj::layout_section(Layout* layout, // whether they should be included in the link. If they should, we // pass them to the Layout object, which will return an output section // and an offset. -// During garbage collection (gc-sections), this function is called -// twice. When it is called the first time, it is for setting up some -// sections as roots to a work-list and to do comdat processing. Actual -// layout happens the second time around after all the relevant sections -// have been determined. The first time, is_worklist_ready is false. -// It is then set to true after the worklist is processed and the relevant -// sections are determined. Then, this function is called again to -// layout the sections. +// During garbage collection (--gc-sections) and identical code folding +// (--icf), this function is called twice. When it is called the first +// time, it is for setting up some sections as roots to a work-list for +// --gc-sections and to do comdat processing. Actual layout happens the +// second time around after all the relevant sections have been determined. +// The first time, is_worklist_ready or is_icf_ready is false. It is then +// set to true after the garbage collection worklist or identical code +// folding is processed and the relevant sections to be kept are +// determined. Then, this function is called again to layout the sections. template void @@ -930,10 +931,22 @@ Sized_relobj::do_layout(Symbol_table* symtab, Read_symbols_data* sd) { const unsigned int shnum = this->shnum(); - bool is_gc_pass_one = (parameters->options().gc_sections() - && !symtab->gc()->is_worklist_ready()); - bool is_gc_pass_two = (parameters->options().gc_sections() - && symtab->gc()->is_worklist_ready()); + bool is_gc_pass_one = ((parameters->options().gc_sections() + && !symtab->gc()->is_worklist_ready()) + || (parameters->options().icf() + && !symtab->icf()->is_icf_ready())); + + bool is_gc_pass_two = ((parameters->options().gc_sections() + && symtab->gc()->is_worklist_ready()) + || (parameters->options().icf() + && symtab->icf()->is_icf_ready())); + + bool is_gc_or_icf = (parameters->options().gc_sections() + || parameters->options().icf()); + + // Both is_gc_pass_one and is_gc_pass_two should not be true. + gold_assert(!(is_gc_pass_one && is_gc_pass_two)); + if (shnum == 0) return; Symbols_data* gc_sd = NULL; @@ -958,7 +971,7 @@ Sized_relobj::do_layout(Symbol_table* symtab, const unsigned char* symbol_names_data = NULL; section_size_type symbol_names_size; - if (parameters->options().gc_sections()) + if (is_gc_or_icf) { section_headers_data = gc_sd->section_headers_data; section_names_size = gc_sd->section_names_size; @@ -986,9 +999,10 @@ Sized_relobj::do_layout(Symbol_table* symtab, const unsigned char* pshdrs; // Get the section names. - const unsigned char* pnamesu = parameters->options().gc_sections() ? - gc_sd->section_names_data : - sd->section_names->data(); + const unsigned char* pnamesu = (is_gc_or_icf) + ? gc_sd->section_names_data + : sd->section_names->data(); + const char* pnames = reinterpret_cast(pnamesu); // If any input files have been claimed by plugins, we need to defer @@ -1141,7 +1155,7 @@ Sized_relobj::do_layout(Symbol_table* symtab, } } - if (is_gc_pass_one) + if (is_gc_pass_one && parameters->options().gc_sections()) { if (is_section_name_included(name) || shdr.get_sh_type() == elfcpp::SHT_INIT_ARRAY @@ -1188,7 +1202,7 @@ Sized_relobj::do_layout(Symbol_table* symtab, continue; } - if (is_gc_pass_two) + if (is_gc_pass_two && parameters->options().gc_sections()) { // This is executed during the second pass of garbage // collection. do_layout has been called before and some @@ -1199,13 +1213,12 @@ Sized_relobj::do_layout(Symbol_table* symtab, gold_assert(out_section_offsets[i] == invalid_address); continue; } - if ((shdr.get_sh_flags() & elfcpp::SHF_ALLOC) != 0) - if (symtab->gc()->referenced_list().find(Section_id(this,i)) - == symtab->gc()->referenced_list().end()) + if (((shdr.get_sh_flags() & elfcpp::SHF_ALLOC) != 0) + && symtab->gc()->is_section_garbage(this, i)) { if (parameters->options().print_gc_sections()) gold_info(_("%s: removing unused section from '%s'" - " in file '%s"), + " in file '%s'"), program_name, this->section_name(i).c_str(), this->name().c_str()); out_sections[i] = NULL; @@ -1213,6 +1226,36 @@ Sized_relobj::do_layout(Symbol_table* symtab, continue; } } + + if (is_gc_pass_two && parameters->options().icf()) + { + if (out_sections[i] == NULL) + { + gold_assert(out_section_offsets[i] == invalid_address); + continue; + } + if (((shdr.get_sh_flags() & elfcpp::SHF_ALLOC) != 0) + && symtab->icf()->is_section_folded(this, i)) + { + if (parameters->options().print_icf_sections()) + { + Section_id folded = + symtab->icf()->get_folded_section(this, i); + Relobj* folded_obj = + reinterpret_cast(folded.first); + gold_info(_("%s: ICF folding section '%s' in file '%s'" + "into '%s' in file '%s'"), + program_name, this->section_name(i).c_str(), + this->name().c_str(), + folded_obj->section_name(folded.second).c_str(), + folded_obj->name().c_str()); + } + out_sections[i] = NULL; + out_section_offsets[i] = invalid_address; + continue; + } + } + // Defer layout here if input files are claimed by plugins. When gc // is turned on this function is called twice. For the second call // should_defer_layout should be false. @@ -1228,7 +1271,8 @@ Sized_relobj::do_layout(Symbol_table* symtab, out_sections[i] = reinterpret_cast(2); out_section_offsets[i] = invalid_address; continue; - } + } + // During gc_pass_two if a section that was previously deferred is // found, do not layout the section as layout_deferred_sections will // do it later from gold.cc. @@ -1256,10 +1300,13 @@ Sized_relobj::do_layout(Symbol_table* symtab, layout->layout_gnu_stack(seen_gnu_stack, gnu_stack_flags); // When doing a relocatable link handle the reloc sections at the - // end. Garbage collection is not turned on for relocatable code. + // end. Garbage collection and Identical Code Folding is not + // turned on for relocatable code. if (emit_relocs) this->size_relocatable_relocs(); - gold_assert(!parameters->options().gc_sections() || reloc_sections.empty()); + + gold_assert(!(is_gc_or_icf) || reloc_sections.empty()); + for (std::vector::const_iterator p = reloc_sections.begin(); p != reloc_sections.end(); ++p) @@ -1342,6 +1389,7 @@ Sized_relobj::do_layout(Symbol_table* symtab, delete[] gc_sd->section_names_data; delete[] gc_sd->symbols_data; delete[] gc_sd->symbol_names_data; + this->set_symbols_data(NULL); } else { @@ -1554,7 +1602,8 @@ Sized_relobj::do_count_local_symbols(Stringpool* pool, template unsigned int Sized_relobj::do_finalize_local_symbols(unsigned int index, - off_t off) + off_t off, + Symbol_table* symtab) { gold_assert(off == static_cast(align_address(off, size >> 3))); @@ -1596,6 +1645,21 @@ Sized_relobj::do_finalize_local_symbols(unsigned int index, } Output_section* os = out_sections[shndx]; + Address secoffset = out_offsets[shndx]; + if (symtab->is_section_folded(this, shndx)) + { + gold_assert (os == NULL && secoffset == invalid_address); + // Get the os of the section it is folded onto. + Section_id folded = symtab->icf()->get_folded_section(this, + shndx); + gold_assert(folded.first != NULL); + Sized_relobj* folded_obj = reinterpret_cast + *>(folded.first); + os = folded_obj->output_section(folded.second); + gold_assert(os != NULL); + secoffset = folded_obj->get_output_section_offset(folded.second); + gold_assert(secoffset != invalid_address); + } if (os == NULL) { @@ -1605,7 +1669,7 @@ Sized_relobj::do_finalize_local_symbols(unsigned int index, // so we leave the input value unchanged here. continue; } - else if (out_offsets[shndx] == invalid_address) + else if (secoffset == invalid_address) { uint64_t start; @@ -1647,11 +1711,11 @@ Sized_relobj::do_finalize_local_symbols(unsigned int index, } else if (lv.is_tls_symbol()) lv.set_output_value(os->tls_offset() - + out_offsets[shndx] + + secoffset + lv.input_value()); else lv.set_output_value((relocatable ? 0 : os->address()) - + out_offsets[shndx] + + secoffset + lv.input_value()); } @@ -1696,6 +1760,42 @@ Sized_relobj::do_set_local_dynsym_offset(off_t off) return this->output_local_dynsym_count_; } +// If Symbols_data is not NULL get the section flags from here otherwise +// get it from the file. + +template +uint64_t +Sized_relobj::do_section_flags(unsigned int shndx) +{ + Symbols_data* sd = this->get_symbols_data(); + if (sd != NULL) + { + const unsigned char* pshdrs = sd->section_headers_data + + This::shdr_size * shndx; + typename This::Shdr shdr(pshdrs); + return shdr.get_sh_flags(); + } + // If sd is NULL, read the section header from the file. + return this->elf_file_.section_flags(shndx); +} + +// Get the section's ent size from Symbols_data. Called by get_section_contents +// in icf.cc + +template +uint64_t +Sized_relobj::do_section_entsize(unsigned int shndx) +{ + Symbols_data* sd = this->get_symbols_data(); + gold_assert (sd != NULL); + + const unsigned char* pshdrs = sd->section_headers_data + + This::shdr_size * shndx; + typename This::Shdr shdr(pshdrs); + return shdr.get_sh_entsize(); +} + + // Write out the local symbols. template diff --git a/gold/object.h b/gold/object.h index dc6a156b049..df090d00b80 100644 --- a/gold/object.h +++ b/gold/object.h @@ -321,6 +321,11 @@ class Object section_flags(unsigned int shndx) { return this->do_section_flags(shndx); } + // Return the section entsize given a section index. + uint64_t + section_entsize(unsigned int shndx) + { return this->do_section_entsize(shndx); } + // Return the section address given a section index. uint64_t section_address(unsigned int shndx) @@ -508,6 +513,10 @@ class Object virtual uint64_t do_section_flags(unsigned int shndx) = 0; + // Get section entsize--implemented by child class. + virtual uint64_t + do_section_entsize(unsigned int shndx) = 0; + // Get section address--implemented by child class. virtual uint64_t do_section_address(unsigned int shndx) = 0; @@ -617,7 +626,8 @@ class Relobj : public Object output_sections_(), map_to_relocatable_relocs_(NULL), object_merge_map_(NULL), - relocs_must_follow_section_writes_(false) + relocs_must_follow_section_writes_(false), + sd_(NULL) { } // During garbage collection, the Read_symbols_data pass for @@ -689,8 +699,8 @@ class Relobj : public Object // indexes for the local variables, and set the offset where local // symbol information will be stored. Returns the new local symbol index. unsigned int - finalize_local_symbols(unsigned int index, off_t off) - { return this->do_finalize_local_symbols(index, off); } + finalize_local_symbols(unsigned int index, off_t off, Symbol_table* symtab) + { return this->do_finalize_local_symbols(index, off, symtab); } // Set the output dynamic symbol table indexes for the local variables. unsigned int @@ -814,7 +824,7 @@ class Relobj : public Object // for the local variables, and set the offset where local symbol // information will be stored. virtual unsigned int - do_finalize_local_symbols(unsigned int, off_t) = 0; + do_finalize_local_symbols(unsigned int, off_t, Symbol_table*) = 0; // Set the output dynamic symbol table indexes for the local variables. virtual unsigned int @@ -1491,7 +1501,7 @@ class Sized_relobj : public Relobj // Finalize the local symbols. unsigned int - do_finalize_local_symbols(unsigned int, off_t); + do_finalize_local_symbols(unsigned int, off_t, Symbol_table*); // Set the offset where local dynamic symbol information will be stored. unsigned int @@ -1523,8 +1533,11 @@ class Sized_relobj : public Relobj // Return section flags. uint64_t - do_section_flags(unsigned int shndx) - { return this->elf_file_.section_flags(shndx); } + do_section_flags(unsigned int shndx); + + // Return section entsize. + uint64_t + do_section_entsize(unsigned int shndx); // Return section address. uint64_t diff --git a/gold/options.h b/gold/options.h index deec2ba871e..eeade6fe28b 100644 --- a/gold/options.h +++ b/gold/options.h @@ -813,6 +813,17 @@ class General_options DEFINE_special(static, options::ONE_DASH, '\0', N_("Do not link against shared libraries"), NULL); + DEFINE_bool(icf, options::TWO_DASHES, '\0', false, + N_("Fold identical functions"), + N_("Don't fold identical functions (default)")); + + DEFINE_uint(icf_iterations, options::TWO_DASHES , '\0', 0, + N_("Number of iterations of ICF (default 2)"), N_("COUNT")); + + DEFINE_bool(print_icf_sections, options::TWO_DASHES, '\0', false, + N_("List folded identical sections on stderr"), + N_("Do not list folded identical sections")); + DEFINE_bool(gc_sections, options::TWO_DASHES, '\0', false, N_("Remove unused sections"), N_("Don't remove unused sections (default)")); diff --git a/gold/plugin.cc b/gold/plugin.cc index 2a21b8f5578..7aee46f7126 100644 --- a/gold/plugin.cc +++ b/gold/plugin.cc @@ -684,6 +684,16 @@ Sized_pluginobj::do_section_flags(unsigned int) return 0; } +// Return section entsize. Not used for plugin objects. + +template +uint64_t +Sized_pluginobj::do_section_entsize(unsigned int) +{ + gold_unreachable(); + return 0; +} + // Return section address. Not used for plugin objects. template diff --git a/gold/plugin.h b/gold/plugin.h index 672863d514c..6a98ac8d8bc 100644 --- a/gold/plugin.h +++ b/gold/plugin.h @@ -383,6 +383,10 @@ class Sized_pluginobj : public Pluginobj uint64_t do_section_flags(unsigned int shndx); + // Return section entsize. + uint64_t + do_section_entsize(unsigned int shndx); + // Return section address. uint64_t do_section_address(unsigned int shndx); diff --git a/gold/reloc.cc b/gold/reloc.cc index 1985867fda8..0842a73ca83 100644 --- a/gold/reloc.cc +++ b/gold/reloc.cc @@ -65,12 +65,12 @@ Read_relocs::run(Workqueue* workqueue) this->object_->set_relocs_data(rd); this->object_->release(); - // If garbage collection is desired, we must process the relocs - // instead of scanning the relocs as reloc processing is necessary - // to determine unused sections. - if (parameters->options().gc_sections()) - { - workqueue->queue_next(new Gc_process_relocs(this->options_, + // If garbage collection or identical comdat folding is desired, we + // process the relocs first before scanning them. Scanning of relocs is + // done only after garbage or identical sections is identified. + if (parameters->options().gc_sections() || parameters->options().icf()) + { + workqueue->queue_next(new Gc_process_relocs(this->options_, this->symtab_, this->layout_, this->object_, rd, @@ -418,7 +418,7 @@ Sized_relobj::do_scan_relocs(const General_options& options, // When garbage collection is on, unreferenced sections are not included // in the link that would have been included normally. This is known only // after Read_relocs hence this check has to be done again. - if (parameters->options().gc_sections()) + if (parameters->options().gc_sections() || parameters->options().icf()) { if (p->output_section == NULL) continue; diff --git a/gold/symtab.cc b/gold/symtab.cc index daf9daf453a..292a26275de 100644 --- a/gold/symtab.cc +++ b/gold/symtab.cc @@ -489,7 +489,7 @@ Symbol_table::Symbol_table(unsigned int count, : saw_undefined_(0), offset_(0), table_(count), namepool_(), forwarders_(), commons_(), tls_commons_(), small_commons_(), large_commons_(), forced_locals_(), warnings_(), - version_script_(version_script), gc_(NULL) + version_script_(version_script), gc_(NULL), icf_(NULL) { namepool_.reserve(count); } @@ -516,6 +516,13 @@ Symbol_table::Symbol_table_eq::operator()(const Symbol_table_key& k1, return k1.first == k2.first && k1.second == k2.second; } +bool +Symbol_table::is_section_folded(Object* obj, unsigned int shndx) const +{ + return (parameters->options().icf() + && this->icf_->is_section_folded(obj, shndx)); +} + // For symbols that have been listed with -u option, add them to the // work list to avoid gc'ing them. @@ -2417,8 +2424,22 @@ Symbol_table::sized_finalize_symbol(Symbol* unsized_sym) { Relobj* relobj = static_cast(symobj); Output_section* os = relobj->output_section(shndx); + uint64_t secoff64 = relobj->output_section_offset(shndx); - if (os == NULL) + if (this->is_section_folded(relobj, shndx)) + { + gold_assert(os == NULL); + // Get the os of the section it is folded onto. + Section_id folded = this->icf_->get_folded_section(relobj, + shndx); + gold_assert(folded.first != NULL); + Relobj* folded_obj = reinterpret_cast(folded.first); + os = folded_obj->output_section(folded.second); + gold_assert(os != NULL); + secoff64 = folded_obj->output_section_offset(folded.second); + } + + if (os == NULL) { sym->set_symtab_index(-1U); bool static_or_reloc = (parameters->doing_static_link() || @@ -2428,10 +2449,10 @@ Symbol_table::sized_finalize_symbol(Symbol* unsized_sym) return false; } - uint64_t secoff64 = relobj->output_section_offset(shndx); if (secoff64 == -1ULL) { // The section needs special handling (e.g., a merge section). + value = os->output_address(relobj, shndx, sym->value()); } else @@ -2642,6 +2663,19 @@ Symbol_table::sized_write_globals(const Stringpool* sympool, { Relobj* relobj = static_cast(symobj); Output_section* os = relobj->output_section(in_shndx); + if (this->is_section_folded(relobj, in_shndx)) + { + // This global symbol must be written out even though + // it is folded. + // Get the os of the section it is folded onto. + Section_id folded = + this->icf_->get_folded_section(relobj, in_shndx); + gold_assert(folded.first !=NULL); + Relobj* folded_obj = + reinterpret_cast(folded.first); + os = folded_obj->output_section(folded.second); + gold_assert(os != NULL); + } gold_assert(os != NULL); shndx = os->out_shndx(); diff --git a/gold/symtab.h b/gold/symtab.h index be6bc12242b..b89fd664131 100644 --- a/gold/symtab.h +++ b/gold/symtab.h @@ -28,6 +28,7 @@ #include #include "gc.h" +#include "icf.h" #include "elfcpp.h" #include "parameters.h" #include "stringpool.h" @@ -58,6 +59,7 @@ class Output_segment; class Output_file; class Output_symtab_xindex; class Garbage_collection; +class Icf; // The base class of an entry in the symbol table. The symbol table // can have a lot of entries, so we don't want this class to big. @@ -1162,12 +1164,24 @@ class Symbol_table ~Symbol_table(); + void + set_icf(Icf* icf) + { this->icf_ = icf;} + + Icf* + icf() const + { return this->icf_; } + + // Returns true if ICF determined that this is a duplicate section. + bool + is_section_folded(Object* obj, unsigned int shndx) const; + void set_gc(Garbage_collection* gc) { this->gc_ = gc; } Garbage_collection* - gc() + gc() const { return this->gc_; } // During garbage collection, this keeps undefined symbols. @@ -1670,6 +1684,7 @@ class Symbol_table // Information parsed from the version script, if any. const Version_script_info& version_script_; Garbage_collection* gc_; + Icf* icf_; }; // We inline get_sized_symbol for efficiency. diff --git a/gold/target-reloc.h b/gold/target-reloc.h index 4fcbfa33e9e..d93e7552888 100644 --- a/gold/target-reloc.h +++ b/gold/target-reloc.h @@ -218,12 +218,14 @@ relocate_section( // If the local symbol belongs to a section we are discarding, // and that section is a debug section, try to find the // corresponding kept section and map this symbol to its - // counterpart in the kept section. + // counterpart in the kept section. The symbol must not + // correspond to a section we are folding. bool is_ordinary; unsigned int shndx = psymval->input_shndx(&is_ordinary); if (is_ordinary && shndx != elfcpp::SHN_UNDEF - && !object->is_section_included(shndx)) + && !object->is_section_included(shndx) + && !(relinfo->symtab->is_section_folded(object, shndx))) { if (comdat_behavior == CB_UNDETERMINED) { diff --git a/gold/testsuite/Makefile.am b/gold/testsuite/Makefile.am index 5afd4b0a582..ca177bd41af 100644 --- a/gold/testsuite/Makefile.am +++ b/gold/testsuite/Makefile.am @@ -115,6 +115,15 @@ gc_comdat_test: gc_comdat_test_1.o gc_comdat_test_2.o gcctestdir/ld gc_comdat_test.stdout: gc_comdat_test $(TEST_NM) -C gc_comdat_test > gc_comdat_test.stdout +check_SCRIPTS += icf_test.sh +check_DATA += icf_test.stdout +icf_test.o: icf_test.cc + $(CXXCOMPILE) -O0 -c -ffunction-sections -g -o $@ $< +icf_test: icf_test.o gcctestdir/ld + $(CXXLINK) -Bgcctestdir/ -Wl,--icf icf_test.o +icf_test.stdout: icf_test + $(TEST_NM) -C icf_test > icf_test.stdout + check_PROGRAMS += basic_test check_PROGRAMS += basic_static_test diff --git a/gold/testsuite/Makefile.in b/gold/testsuite/Makefile.in index c1082748625..d6b43bdf6a6 100644 --- a/gold/testsuite/Makefile.in +++ b/gold/testsuite/Makefile.in @@ -57,11 +57,12 @@ check_PROGRAMS = object_unittest$(EXEEXT) binary_unittest$(EXEEXT) \ # Test --dynamic-list, --dynamic-list-data, --dynamic-list-cpp-new, # and --dynamic-list-cpp-typeinfo @GCC_TRUE@@NATIVE_LINKER_TRUE@am__append_1 = gc_comdat_test.sh \ -@GCC_TRUE@@NATIVE_LINKER_TRUE@ two_file_shared.sh weak_plt.sh \ -@GCC_TRUE@@NATIVE_LINKER_TRUE@ debug_msg.sh undef_symbol.sh \ -@GCC_TRUE@@NATIVE_LINKER_TRUE@ ver_test_1.sh ver_test_2.sh \ -@GCC_TRUE@@NATIVE_LINKER_TRUE@ ver_test_4.sh ver_test_5.sh \ -@GCC_TRUE@@NATIVE_LINKER_TRUE@ ver_test_7.sh ver_test_10.sh \ +@GCC_TRUE@@NATIVE_LINKER_TRUE@ icf_test.sh two_file_shared.sh \ +@GCC_TRUE@@NATIVE_LINKER_TRUE@ weak_plt.sh debug_msg.sh \ +@GCC_TRUE@@NATIVE_LINKER_TRUE@ undef_symbol.sh ver_test_1.sh \ +@GCC_TRUE@@NATIVE_LINKER_TRUE@ ver_test_2.sh ver_test_4.sh \ +@GCC_TRUE@@NATIVE_LINKER_TRUE@ ver_test_5.sh ver_test_7.sh \ +@GCC_TRUE@@NATIVE_LINKER_TRUE@ ver_test_10.sh \ @GCC_TRUE@@NATIVE_LINKER_TRUE@ ver_matching_test.sh \ @GCC_TRUE@@NATIVE_LINKER_TRUE@ script_test_3.sh \ @GCC_TRUE@@NATIVE_LINKER_TRUE@ script_test_4.sh \ @@ -74,6 +75,7 @@ check_PROGRAMS = object_unittest$(EXEEXT) binary_unittest$(EXEEXT) \ # We also want to make sure we do something reasonable when there's no # debug info available. For the best test, we use .so's. @GCC_TRUE@@NATIVE_LINKER_TRUE@am__append_2 = gc_comdat_test.stdout \ +@GCC_TRUE@@NATIVE_LINKER_TRUE@ icf_test.stdout \ @GCC_TRUE@@NATIVE_LINKER_TRUE@ two_file_shared.dbg \ @GCC_TRUE@@NATIVE_LINKER_TRUE@ weak_plt_shared.so debug_msg.err \ @GCC_TRUE@@NATIVE_LINKER_TRUE@ debug_msg_so.err \ @@ -345,7 +347,7 @@ check_PROGRAMS = object_unittest$(EXEEXT) binary_unittest$(EXEEXT) \ @NATIVE_LINKER_FALSE@ $(am__DEPENDENCIES_1) \ @NATIVE_LINKER_FALSE@ $(am__DEPENDENCIES_1) subdir = testsuite -DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in +DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ChangeLog ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/../config/depstand.m4 \ $(top_srcdir)/../config/gettext-sister.m4 \ @@ -2316,6 +2318,12 @@ uninstall-am: uninstall-info-am @GCC_TRUE@@NATIVE_LINKER_TRUE@ $(CXXLINK) -Bgcctestdir/ -Wl,--gc-sections gc_comdat_test_1.o gc_comdat_test_2.o @GCC_TRUE@@NATIVE_LINKER_TRUE@gc_comdat_test.stdout: gc_comdat_test @GCC_TRUE@@NATIVE_LINKER_TRUE@ $(TEST_NM) -C gc_comdat_test > gc_comdat_test.stdout +@GCC_TRUE@@NATIVE_LINKER_TRUE@icf_test.o: icf_test.cc +@GCC_TRUE@@NATIVE_LINKER_TRUE@ $(CXXCOMPILE) -O0 -c -ffunction-sections -g -o $@ $< +@GCC_TRUE@@NATIVE_LINKER_TRUE@icf_test: icf_test.o gcctestdir/ld +@GCC_TRUE@@NATIVE_LINKER_TRUE@ $(CXXLINK) -Bgcctestdir/ -Wl,--icf icf_test.o +@GCC_TRUE@@NATIVE_LINKER_TRUE@icf_test.stdout: icf_test +@GCC_TRUE@@NATIVE_LINKER_TRUE@ $(TEST_NM) -C icf_test > icf_test.stdout @GCC_TRUE@@NATIVE_LINKER_TRUE@basic_test.o: basic_test.cc @GCC_TRUE@@NATIVE_LINKER_TRUE@ $(CXXCOMPILE) -O0 -c -o $@ $< @GCC_TRUE@@NATIVE_LINKER_TRUE@basic_test: basic_test.o gcctestdir/ld diff --git a/gold/testsuite/icf_test.cc b/gold/testsuite/icf_test.cc new file mode 100644 index 00000000000..c7a5ea9d938 --- /dev/null +++ b/gold/testsuite/icf_test.cc @@ -0,0 +1,51 @@ +// icf_test.cc -- a test case for gold + +// Copyright 2009 Free Software Foundation, Inc. +// Written by Sriraman Tallam . + +// This file is part of gold. + +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, +// MA 02110-1301, USA. + +// The goal of this program is to verify if identical code folding +// correctly identifies and folds functions. folded_func must be +// folded into kept_func. + +int common() +{ + return 1; +} + +int kept_func() +{ + common(); + // Recursive call. + kept_func(); + return 1; +} + +int folded_func() +{ + common(); + // Recursive call. + folded_func(); + return 1; +} + +int main() +{ + return 0; +} diff --git a/gold/testsuite/icf_test.sh b/gold/testsuite/icf_test.sh new file mode 100755 index 00000000000..5f1481cdb9f --- /dev/null +++ b/gold/testsuite/icf_test.sh @@ -0,0 +1,40 @@ +#!/bin/sh + +# icf_test.sh -- test --icf + +# Copyright 2009 Free Software Foundation, Inc. +# Written by Sriraman Tallam . + +# This file is part of gold. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, +# MA 02110-1301, USA. + +# The goal of this program is to verify if icf works as expected. +# File icf_test.cc is in this test. This program checks if the +# identical sections are correctly folded. + +check() +{ + func_addr_1=`grep $2 $1 | awk '{print $1}'` + func_addr_2=`grep $3 $1 | awk '{print $1}'` + if [ $func_addr_1 != $func_addr_2 ] + then + echo "Identical Code Folding failed to fold" $2 "and" $3 + exit 1 + fi +} + +check icf_test.stdout "folded_func" "kept_func"