From Craig Silverstein: First cut at detecting ODR violations.
authorIan Lance Taylor <iant@google.com>
Tue, 13 Nov 2007 20:02:32 +0000 (20:02 +0000)
committerIan Lance Taylor <iant@google.com>
Tue, 13 Nov 2007 20:02:32 +0000 (20:02 +0000)
gold/gold.cc
gold/resolve.cc
gold/symtab.cc
gold/symtab.h

index 83bb3f6065b3c09b03ed0d200eceb29d693bbc53..a215d0f31dc85b42e35aff1386152004e71fa178 100644 (file)
@@ -180,6 +180,10 @@ queue_middle_tasks(const General_options& options,
                 (*input_objects->dynobj_begin())->name().c_str());
     }
 
+  // See if any of the input definitions violate the One Definition Rule.
+  // TODO: if this is too slow, do this as a task, rather than inline.
+  symtab->detect_odr_violations();
+
   // Define some sections and symbols needed for a dynamic link.  This
   // handles some cases we want to see before we read the relocs.
   layout->create_initial_dynamic_sections(input_objects, symtab);
index 7d141c02534c2e0dee0b04d46f8b96e99bc93739..980831b7950eee7b479e1be0396eff83a89de5f7 100644 (file)
@@ -119,14 +119,76 @@ static const unsigned int def_flag = 0 << def_undef_or_common_shift;
 static const unsigned int undef_flag = 1 << def_undef_or_common_shift;
 static const unsigned int common_flag = 2 << def_undef_or_common_shift;
 
+// This convenience function combines all the flags based on facts
+// about the symbol.
+
+static unsigned int
+symbol_to_bits(elfcpp::STB binding, bool is_dynamic,
+              unsigned int shndx, elfcpp::STT type)
+{
+  unsigned int bits;
+
+  switch (binding)
+    {
+    case elfcpp::STB_GLOBAL:
+      bits = global_flag;
+      break;
+
+    case elfcpp::STB_WEAK:
+      bits = weak_flag;
+      break;
+
+    case elfcpp::STB_LOCAL:
+      // We should only see externally visible symbols in the symbol
+      // table.
+      gold_error(_("invalid STB_LOCAL symbol in external symbols"));
+      bits = global_flag;
+
+    default:
+      // Any target which wants to handle STB_LOOS, etc., needs to
+      // define a resolve method.
+      gold_error(_("unsupported symbol binding"));
+      bits = global_flag;
+    }
+
+  if (is_dynamic)
+    bits |= dynamic_flag;
+  else
+    bits |= regular_flag;
+
+  switch (shndx)
+    {
+    case elfcpp::SHN_UNDEF:
+      bits |= undef_flag;
+      break;
+
+    case elfcpp::SHN_COMMON:
+      bits |= common_flag;
+      break;
+
+    default:
+      if (type == elfcpp::STT_COMMON)
+       bits |= common_flag;
+      else
+        bits |= def_flag;
+      break;
+    }
+
+  return bits;
+}
+
 // Resolve a symbol.  This is called the second and subsequent times
-// we see a symbol.  TO is the pre-existing symbol.  SYM is the new
-// symbol, seen in OBJECT.  VERSION of the version of SYM.
+// we see a symbol.  TO is the pre-existing symbol.  ORIG_SYM is the
+// new symbol, seen in OBJECT.  SYM is almost always identical to
+// ORIG_SYM, but may be munged (for instance, if we determine the
+// symbol is in a to-be-discarded section, we'll set sym's shndx to
+// UNDEFINED).  VERSION of the version of SYM.
 
 template<int size, bool big_endian>
 void
 Symbol_table::resolve(Sized_symbol<size>* to,
                      const elfcpp::Sym<size, big_endian>& sym,
+                     const elfcpp::Sym<size, big_endian>& orig_sym,
                      Object* object, const char* version)
 {
   if (object->target()->has_resolve())
@@ -150,53 +212,10 @@ Symbol_table::resolve(Sized_symbol<size>* to,
       to->set_in_dyn();
     }
 
-  unsigned int frombits;
-  switch (sym.get_st_bind())
-    {
-    case elfcpp::STB_GLOBAL:
-      frombits = global_flag;
-      break;
-
-    case elfcpp::STB_WEAK:
-      frombits = weak_flag;
-      break;
-
-    case elfcpp::STB_LOCAL:
-      gold_error(_("%s: invalid STB_LOCAL symbol %s in external symbols"),
-                object->name().c_str(), to->name());
-      frombits = global_flag;
-      break;
-
-    default:
-      gold_error(_("%s: unsupported symbol binding %d for symbol %s"),
-                object->name().c_str(),
-                static_cast<int>(sym.get_st_bind()), to->name());
-      frombits = global_flag;
-      break;
-    }
-
-  if (!object->is_dynamic())
-    frombits |= regular_flag;
-  else
-    frombits |= dynamic_flag;
-
-  switch (sym.get_st_shndx())
-    {
-    case elfcpp::SHN_UNDEF:
-      frombits |= undef_flag;
-      break;
-
-    case elfcpp::SHN_COMMON:
-      frombits |= common_flag;
-      break;
-
-    default:
-      if (sym.get_st_type() == elfcpp::STT_COMMON)
-       frombits |= common_flag;
-      else
-        frombits |= def_flag;
-      break;
-    }
+  unsigned int frombits = symbol_to_bits(sym.get_st_bind(),
+                                         object->is_dynamic(),
+                                         sym.get_st_shndx(),
+                                         sym.get_st_type());
 
   bool adjust_common_sizes;
   if (Symbol_table::should_override(to, frombits, object,
@@ -214,6 +233,34 @@ Symbol_table::resolve(Sized_symbol<size>* to,
       if (adjust_common_sizes && sym.get_st_size() > to->symsize())
         to->set_symsize(sym.get_st_size());
     }
+
+  // A new weak undefined reference, merging with an old weak
+  // reference, could be a One Definition Rule (ODR) violation --
+  // especially if the types or sizes of the references differ.  We'll
+  // store such pairs and look them up later to make sure they
+  // actually refer to the same lines of code.  (Note: not all ODR
+  // violations can be found this way, and not everything this finds
+  // is an ODR violation.  But it's helpful to warn about.)
+  // We use orig_sym here because we want the symbol exactly as it
+  // appears in the object file, not munged via our future processing.
+  if (orig_sym.get_st_bind() == elfcpp::STB_WEAK
+      && to->binding() == elfcpp::STB_WEAK
+      && orig_sym.get_st_shndx() != elfcpp::SHN_UNDEF
+      && to->shndx() != elfcpp::SHN_UNDEF
+      && orig_sym.get_st_size() != 0    // Ignore weird 0-sized symbols.
+      && to->symsize() != 0
+      && (orig_sym.get_st_type() != to->type()
+          || orig_sym.get_st_size() != to->symsize())
+      // C does not have a concept of ODR, so we only need to do this
+      // on C++ symbols.  These have (mangled) names starting with _Z.
+      && to->name()[0] == '_' && to->name()[1] == 'Z')
+    {
+      Symbol_location from_location
+          = { object, orig_sym.get_st_shndx(), orig_sym.get_st_value() };
+      Symbol_location to_location = { to->object(), to->shndx(), to->value() };
+      this->candidate_odr_violations_[to->name()].insert(from_location);
+      this->candidate_odr_violations_[to->name()].insert(to_location);
+    }
 }
 
 // Handle the core of symbol resolution.  This is called with the
@@ -229,51 +276,11 @@ Symbol_table::should_override(const Symbol* to, unsigned int frombits,
 {
   *adjust_common_sizes = false;
 
-  unsigned int tobits;
-  switch (to->binding())
-    {
-    case elfcpp::STB_GLOBAL:
-      tobits = global_flag;
-      break;
-
-    case elfcpp::STB_WEAK:
-      tobits = weak_flag;
-      break;
-
-    case elfcpp::STB_LOCAL:
-      // We should only see externally visible symbols in the symbol
-      // table.
-      gold_unreachable();
-
-    default:
-      // Any target which wants to handle STB_LOOS, etc., needs to
-      // define a resolve method.
-      gold_unreachable();
-    }
-
-  if (to->source() == Symbol::FROM_OBJECT
-      && to->object()->is_dynamic())
-    tobits |= dynamic_flag;
-  else
-    tobits |= regular_flag;
-
-  switch (to->shndx())
-    {
-    case elfcpp::SHN_UNDEF:
-      tobits |= undef_flag;
-      break;
-
-    case elfcpp::SHN_COMMON:
-      tobits |= common_flag;
-      break;
-
-    default:
-      if (to->type() == elfcpp::STT_COMMON)
-       tobits |= common_flag;
-      else
-        tobits |= def_flag;
-      break;
-    }
+  unsigned int tobits = symbol_to_bits(to->binding(),
+                                       (to->source() == Symbol::FROM_OBJECT
+                                        && to->object()->is_dynamic()),
+                                       to->shndx(),
+                                       to->type());
 
   // FIXME: Warn if either but not both of TO and SYM are STT_TLS.
 
@@ -719,6 +726,7 @@ void
 Symbol_table::resolve<32, false>(
     Sized_symbol<32>* to,
     const elfcpp::Sym<32, false>& sym,
+    const elfcpp::Sym<32, false>& orig_sym,
     Object* object,
     const char* version);
 #endif
@@ -729,6 +737,7 @@ void
 Symbol_table::resolve<32, true>(
     Sized_symbol<32>* to,
     const elfcpp::Sym<32, true>& sym,
+    const elfcpp::Sym<32, true>& orig_sym,
     Object* object,
     const char* version);
 #endif
@@ -739,6 +748,7 @@ void
 Symbol_table::resolve<64, false>(
     Sized_symbol<64>* to,
     const elfcpp::Sym<64, false>& sym,
+    const elfcpp::Sym<64, false>& orig_sym,
     Object* object,
     const char* version);
 #endif
@@ -749,6 +759,7 @@ void
 Symbol_table::resolve<64, true>(
     Sized_symbol<64>* to,
     const elfcpp::Sym<64, true>& sym,
+    const elfcpp::Sym<64, true>& orig_sym,
     Object* object,
     const char* version);
 #endif
index e3face79df711ee3f6d01894bc8b541bb2efe7a7..be88534843a9f953f07f3a56c7e1dec10d35307e 100644 (file)
 #include "gold.h"
 
 #include <stdint.h>
+#include <set>
 #include <string>
 #include <utility>
 
 #include "object.h"
+#include "dwarf_reader.h"
 #include "dynobj.h"
 #include "output.h"
 #include "target.h"
@@ -343,7 +345,7 @@ Symbol_table::resolve(Sized_symbol<size>* to, const Sized_symbol<size>* from,
   esym.put_st_info(from->binding(), from->type());
   esym.put_st_other(from->visibility(), from->nonvis());
   esym.put_st_shndx(from->shndx());
-  this->resolve(to, esym.sym(), from->object(), version);
+  this->resolve(to, esym.sym(), esym.sym(), from->object(), version);
   if (from->in_reg())
     to->set_in_reg();
   if (from->in_dyn())
@@ -372,6 +374,11 @@ Symbol_table::resolve(Sized_symbol<size>* to, const Sized_symbol<size>* from,
 // object file as a forwarder, and record it in the forwarders_ map.
 // Note that entries in the hash table will never be marked as
 // forwarders.
+//
+// SYM and ORIG_SYM are almost always the same.  ORIG_SYM is the
+// symbol exactly as it existed in the input file.  SYM is usually
+// that as well, but can be modified, for instance if we determine
+// it's in a to-be-discarded section.
 
 template<int size, bool big_endian>
 Sized_symbol<size>*
@@ -381,7 +388,8 @@ Symbol_table::add_from_object(Object* object,
                              const char *version,
                              Stringpool::Key version_key,
                              bool def,
-                             const elfcpp::Sym<size, big_endian>& sym)
+                             const elfcpp::Sym<size, big_endian>& sym,
+                             const elfcpp::Sym<size, big_endian>& orig_sym)
 {
   Symbol* const snull = NULL;
   std::pair<typename Symbol_table_type::iterator, bool> ins =
@@ -416,7 +424,7 @@ Symbol_table::add_from_object(Object* object,
       was_undefined = ret->is_undefined();
       was_common = ret->is_common();
 
-      this->resolve(ret, sym, object, version);
+      this->resolve(ret, sym, orig_sym, object, version);
 
       if (def)
        {
@@ -456,7 +464,7 @@ Symbol_table::add_from_object(Object* object,
          ret = this->get_sized_symbol SELECT_SIZE_NAME(size) (
               insdef.first->second
               SELECT_SIZE(size));
-         this->resolve(ret, sym, object, version);
+         this->resolve(ret, sym, orig_sym, object, version);
          ins.first->second = ret;
        }
       else
@@ -571,7 +579,7 @@ Symbol_table::add_from_relobj(
          Stringpool::Key name_key;
          name = this->namepool_.add(name, true, &name_key);
          res = this->add_from_object(relobj, name, name_key, NULL, 0,
-                                     false, *psym);
+                                     false, *psym, sym);
        }
       else
        {
@@ -590,7 +598,7 @@ Symbol_table::add_from_relobj(
          ver = this->namepool_.add(ver, true, &ver_key);
 
          res = this->add_from_object(relobj, name, name_key, ver, ver_key,
-                                     def, *psym);
+                                     def, *psym, sym);
        }
 
       (*sympointers)[i] = res;
@@ -659,7 +667,7 @@ Symbol_table::add_from_dynobj(
          Stringpool::Key name_key;
          name = this->namepool_.add(name, true, &name_key);
          res = this->add_from_object(dynobj, name, name_key, NULL, 0,
-                                     false, sym);
+                                     false, sym, sym);
        }
       else
        {
@@ -693,7 +701,7 @@ Symbol_table::add_from_dynobj(
            {
              // This symbol does not have a version.
              res = this->add_from_object(dynobj, name, name_key, NULL, 0,
-                                         false, sym);
+                                         false, sym, sym);
            }
          else
            {
@@ -723,14 +731,14 @@ Symbol_table::add_from_dynobj(
              if (sym.get_st_shndx() == elfcpp::SHN_ABS
                  && name_key == version_key)
                res = this->add_from_object(dynobj, name, name_key, NULL, 0,
-                                           false, sym);
+                                           false, sym, sym);
              else
                {
                  const bool def = (!hidden
                                    && (sym.get_st_shndx()
                                        != elfcpp::SHN_UNDEF));
                  res = this->add_from_object(dynobj, name, name_key, version,
-                                             version_key, def, sym);
+                                             version_key, def, sym, sym);
                }
            }
        }
@@ -1794,6 +1802,97 @@ Symbol_table::sized_write_section_symbol(const Output_section* os,
   of->write_output_view(offset, sym_size, pov);
 }
 
+// Check candidate_odr_violations_ to find symbols with the same name
+// but apparently different definitions (different source-file/line-no).
+
+void
+Symbol_table::detect_odr_violations() const
+{
+  if (parameters->get_size() == 32)
+    {
+      if (!parameters->is_big_endian())
+       {
+#ifdef HAVE_TARGET_32_LITTLE
+         this->sized_detect_odr_violations<32, false>();
+#else
+         gold_unreachable();
+#endif
+       }
+      else
+       {
+#ifdef HAVE_TARGET_32_BIG
+         this->sized_detect_odr_violations<32, true>();
+#else
+         gold_unreachable();
+#endif
+       }
+    }
+  else if (parameters->get_size() == 64)
+    {
+      if (!parameters->is_big_endian())
+       {
+#ifdef HAVE_TARGET_64_LITTLE
+         this->sized_detect_odr_violations<64, false>();
+#else
+         gold_unreachable();
+#endif
+       }
+      else
+       {
+#ifdef HAVE_TARGET_64_BIG
+         this->sized_detect_odr_violations<64, true>();
+#else
+         gold_unreachable();
+#endif
+       }
+    }
+  else
+    gold_unreachable();
+}
+
+// Implement detect_odr_violations.
+
+template<int size, bool big_endian>
+void
+Symbol_table::sized_detect_odr_violations() const
+{
+  for (Odr_map::const_iterator it = candidate_odr_violations_.begin();
+       it != candidate_odr_violations_.end();
+       ++it)
+    {
+      const char* symbol_name = it->first;
+      // We use a sorted set so the output is deterministic.
+      std::set<std::string> line_nums;
+
+      Unordered_set<Symbol_location, Symbol_location_hash>::const_iterator
+       locs;
+      for (locs = it->second.begin(); locs != it->second.end(); ++locs)
+        {
+         // We need to lock the object in order to read it.  This
+         // means that we can not run inside a Task.  If we want to
+         // run this in a Task for better performance, we will need
+         // one Task for object, plus appropriate locking to ensure
+         // that we don't conflict with other uses of the object.
+          locs->object->lock();
+          Dwarf_line_info<size, big_endian> line_info(locs->object);
+          locs->object->unlock();
+          std::string lineno = line_info.addr2line(locs->shndx, locs->offset);
+          if (!lineno.empty())
+            line_nums.insert(lineno);
+        }
+
+      if (line_nums.size() > 1)
+        {
+          gold_warning(_("symbol %s defined in multiple places "
+                        "(possible ODR violation):"), symbol_name);
+          for (std::set<std::string>::const_iterator it2 = line_nums.begin();
+               it2 != line_nums.end();
+               ++it2)
+            fprintf(stderr, "  %s\n", it2->c_str());
+        }
+    }
+}
+
 // Warnings functions.
 
 // Add a new warning.
index 81ad2d91f2bc2fbe860bd71f1be7d5b7e4d3c842..367813fc617170f4d0e938099814a80406536eb8 100644 (file)
@@ -895,7 +895,7 @@ class Warnings
   };
 
   // A mapping from warning symbol names (canonicalized in
-  // Symbol_table's namepool_ field) to 
+  // Symbol_table's namepool_ field) to warning information.
   typedef Unordered_map<const char*, Warning_location> Warning_table;
 
   Warning_table warnings_;
@@ -968,7 +968,7 @@ class Symbol_table
   // Define a set of symbols in output segments.
   void
   define_symbols(const Layout*, const Target*, int count,
-                const Define_symbol_in_segment*);  
+                const Define_symbol_in_segment*);
 
   // Define SYM using a COPY reloc.  POSD is the Output_data where the
   // symbol should be defined--typically a .dyn.bss section.  VALUE is
@@ -1023,6 +1023,11 @@ class Symbol_table
                size_t relnum, off_t reloffset) const
   { this->warnings_.issue_warning(sym, relinfo, relnum, reloffset); }
 
+  // Check candidate_odr_violations_ to find symbols with the same name
+  // but apparently different definitions (different source-file/line-no).
+  void
+  detect_odr_violations() const;
+
   // SYM is defined using a COPY reloc.  Return the dynamic object
   // where the original definition was found.
   Dynobj*
@@ -1070,13 +1075,15 @@ class Symbol_table
   Sized_symbol<size>*
   add_from_object(Object*, const char *name, Stringpool::Key name_key,
                  const char *version, Stringpool::Key version_key,
-                 bool def, const elfcpp::Sym<size, big_endian>& sym);
+                 bool def, const elfcpp::Sym<size, big_endian>& sym,
+                  const elfcpp::Sym<size, big_endian>& orig_sym);
 
   // Resolve symbols.
   template<int size, bool big_endian>
   void
   resolve(Sized_symbol<size>* to,
          const elfcpp::Sym<size, big_endian>& sym,
+         const elfcpp::Sym<size, big_endian>& orig_sym,
          Object*, const char* version);
 
   template<int size, bool big_endian>
@@ -1157,6 +1164,11 @@ class Symbol_table
   void
   do_allocate_commons(const General_options&, Layout*);
 
+  // Implement detect_odr_violations.
+  template<int size, bool big_endian>
+  void
+  sized_detect_odr_violations() const;
+
   // Finalize symbols specialized for size.
   template<int size>
   off_t
@@ -1208,6 +1220,33 @@ class Symbol_table
   // they are defined.
   typedef Unordered_map<const Symbol*, Dynobj*> Copied_symbol_dynobjs;
 
+  // A map from symbol name (as a pointer into the namepool) to all
+  // the locations the symbols is (weakly) defined (and certain other
+  // conditions are met).  This map will be used later to detect
+  // possible One Definition Rule (ODR) violations.
+  struct Symbol_location
+  {
+    Object* object;         // Object where the symbol is defined.
+    unsigned int shndx;     // Section-in-object where the symbol is defined.
+    off_t offset;           // Offset-in-section where the symbol is defined.
+    bool operator==(const Symbol_location& that) const
+    {
+      return (this->object == that.object
+              && this->shndx == that.shndx
+              && this->offset == that.offset);
+    }
+  };
+
+  struct Symbol_location_hash
+  {
+    size_t operator()(const Symbol_location& loc) const
+    { return reinterpret_cast<uintptr_t>(loc.object) ^ loc.offset ^ loc.shndx; }
+  };
+
+  typedef Unordered_map<const char*,
+                        Unordered_set<Symbol_location, Symbol_location_hash> >
+  Odr_map;
+
   // We increment this every time we see a new undefined symbol, for
   // use in archive groups.
   int saw_undefined_;
@@ -1242,6 +1281,9 @@ class Symbol_table
   Commons_type commons_;
   // Manage symbol warnings.
   Warnings warnings_;
+  // Manage potential One Definition Rule (ODR) violations.
+  Odr_map candidate_odr_violations_;
+
   // When we emit a COPY reloc for a symbol, we define it in an
   // Output_data.  When it's time to emit version information for it,
   // we need to know the dynamic object in which we found the original