util/style/file_types.py

   1 # Copyright (c) 2010 The Hewlett-Packard Development Company
   2 # All rights reserved.
   3 #
   4 # Redistribution and use in source and binary forms, with or without
   5 # modification, are permitted provided that the following conditions are
   6 # met: redistributions of source code must retain the above copyright
   7 # notice, this list of conditions and the following disclaimer;
   8 # redistributions in binary form must reproduce the above copyright
   9 # notice, this list of conditions and the following disclaimer in the
  10 # documentation and/or other materials provided with the distribution;
  11 # neither the name of the copyright holders nor the names of its
  12 # contributors may be used to endorse or promote products derived from
  13 # this software without specific prior written permission.
  14 #
  15 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  16 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  17 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  18 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  19 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  20 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  21 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  22 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  23 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  25 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26
  27 import os
  28
  29 # lanuage type for each file extension
  30 lang_types = {
  31     '.c'     : "C",
  32     '.cl'    : "C",
  33     '.h'     : "C",
  34     '.cc'    : "C++",
  35     '.hh'    : "C++",
  36     '.cxx'   : "C++",
  37     '.hxx'   : "C++",
  38     '.cpp'   : "C++",
  39     '.hpp'   : "C++",
  40     '.C'     : "C++",
  41     '.H'     : "C++",
  42     '.i'     : "swig",
  43     '.py'    : "python",
  44     '.pl'    : "perl",
  45     '.pm'    : "perl",
  46     '.s'     : "asm",
  47     '.S'     : "asm",
  48     '.l'     : "lex",
  49     '.ll'    : "lex",
  50     '.y'     : "yacc",
  51     '.yy'    : "yacc",
  52     '.isa'   : "isa",
  53     '.sh'    : "shell",
  54     '.slicc' : "slicc",
  55     '.sm'    : "slicc",
  56     '.awk'   : "awk",
  57     '.el'    : "lisp",
  58     '.txt'   : "text",
  59     '.tex'   : "tex",
  60     '.mk'    : "make",
  61     '.dts'    : "dts",
  62     }
  63
  64 # languages based on file prefix
  65 lang_prefixes = (
  66     ('SCons',    'scons'),
  67     ('Make',     'make'),
  68     ('make',     'make'),
  69     ('Doxyfile', 'doxygen'),
  70     )
  71
  72 # languages based on #! line of first file
  73 hash_bang = (
  74     ('python', 'python'),
  75     ('perl',   'perl'),
  76     ('sh',     'shell'),
  77     )
  78
  79 # the list of all languages that we detect
  80 all_languages = frozenset(lang_types.values())
  81 all_languages |= frozenset(lang for start,lang in lang_prefixes)
  82 all_languages |= frozenset(lang for start,lang in hash_bang)
  83
  84 def lang_type(filename, firstline=None, openok=True):
  85     '''identify the language of a given filename and potentially the
  86     firstline of the file.  If the firstline of the file is not
  87     provided and openok is True, open the file and read the first line
  88     if necessary'''
  89
  90     basename = os.path.basename(filename)
  91     name,extension = os.path.splitext(basename)
  92
  93     # first try to detect language based on file extension
  94     try:
  95         return lang_types[extension]
  96     except KeyError:
  97         pass
  98
  99     # now try to detect language based on file prefix
 100     for start,lang in lang_prefixes:
 101         if basename.startswith(start):
 102             return lang
 103
 104     # if a first line was not provided but the file is ok to open,
 105     # grab the first line of the file.
 106     if firstline is None and openok:
 107         handle = open(filename, 'r')
 108         firstline = handle.readline()
 109         handle.close()
 110
 111     # try to detect language based on #! in first line
 112     if firstline and firstline.startswith('#!'):
 113         for string,lang in hash_bang:
 114             if firstline.find(string) > 0:
 115                 return lang
 116
 117     # sorry, we couldn't detect the language
 118     return None
 119
 120 # directories and files to ignore by default
 121 default_dir_ignore = frozenset(('.hg', '.svn', 'build', 'ext'))
 122 default_file_ignore = frozenset(('parsetab.py', ))
 123
 124 def find_files(base, languages=all_languages,
 125                dir_ignore=default_dir_ignore,
 126                file_ignore=default_file_ignore):
 127     '''find all files in a directory and its subdirectories based on a
 128     set of languages, ignore directories specified in dir_ignore and
 129     files specified in file_ignore'''
 130     if base[-1] != '/':
 131         base += '/'
 132
 133     def update_dirs(dirs):
 134         '''strip the ignored directories out of the provided list'''
 135         index = len(dirs) - 1
 136         for i,d in enumerate(reversed(dirs)):
 137             if d in dir_ignore:
 138                 del dirs[index - i]
 139
 140     # walk over base
 141     for root,dirs,files in os.walk(base):
 142         root = root.replace(base, '', 1)
 143
 144         # strip ignored directories from the list
 145         update_dirs(dirs)
 146
 147         for filename in files:
 148             if filename in file_ignore:
 149                 # skip ignored files
 150                 continue
 151
 152             # try to figure out the language of the specified file
 153             fullpath = os.path.join(base, root, filename)
 154             language = lang_type(fullpath)
 155
 156             # if the file is one of the langauges that we want return
 157             # its name and the language
 158             if language in languages:
 159                 yield fullpath, language
 160
 161 def update_file(dst, src, language, mutator):
 162     '''update a file of the specified language with the provided
 163     mutator generator.  If inplace is provided, update the file in
 164     place and return the handle to the updated file.  If inplace is
 165     false, write the updated file to cStringIO'''
 166
 167     # if the source and destination are the same, we're updating in place
 168     inplace = dst == src
 169
 170     if isinstance(src, str):
 171         # if a filename was provided, open the file
 172         if inplace:
 173             mode = 'r+'
 174         else:
 175             mode = 'r'
 176         src = open(src, mode)
 177
 178     orig_lines = []
 179
 180     # grab all of the lines of the file and strip them of their line ending
 181     old_lines = list(line.rstrip('\r\n') for line in src.xreadlines())
 182     new_lines = list(mutator(old_lines, src.name, language))
 183
 184     for line in src.xreadlines():
 185         line = line
 186
 187     if inplace:
 188         # if we're updating in place and the file hasn't changed, do nothing
 189         if old_lines == new_lines:
 190             return
 191
 192         # otherwise, truncate the file and seek to the beginning.
 193         dst = src
 194         dst.truncate(0)
 195         dst.seek(0)
 196     elif isinstance(dst, str):
 197         # if we're not updating in place and a destination file name
 198         # was provided, create a file object
 199         dst = open(dst, 'w')
 200
 201     for line in new_lines:
 202         dst.write(line)
 203         dst.write('\n')