util: Optionally search entire history when listing changes
[gem5.git] / util / maint / git-patch-to-hg-patch
1 #!/usr/bin/env python2
2 #
3 # This file originated from the moz-git-tools repo on GitHub
4 # (https://github.com/mozilla/moz-git-tools), which contains the
5 # following LICENSE notice:
6 #
7 # <quote>
8 # Except for git-new-workdir, which is covered under GPLv2, the code
9 # in this repository is placed into the public domain via CC0.
10 #
11 # http://creativecommons.org/publicdomain/zero/1.0/legalcode
12 # </quote>
13
14 r"""Git format-patch to hg importable patch.
15
16 (Who knew this was so complicated?)
17
18 >>> process(StringIO('From 3ce1ccc06 Mon Sep 17 00:00:00 2001\nFrom: fromuser\nSubject: subject\n\nRest of patch.\nMore patch.\n'))
19 '# HG changeset patch\n# User fromuser\n\nsubject\n\nRest of patch.\nMore patch.\n'
20
21 >>> process(StringIO('From: fromuser\nSubject: A very long subject line. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Morbi faucibus, arcu sit amet\n\nRest of patch.\nMore patch.\n'))
22 '# HG changeset patch\n# User fromuser\n\nA very long subject line. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Morbi faucibus, arcu sit amet\n\nRest of patch.\nMore patch.\n'
23
24 >>> process(StringIO('From: f\nSubject: =?UTF-8?q?Bug=20655877=20-=20Dont=20treat=20SVG=20text=20frames=20?= =?UTF-8?q?as=20being=20positioned.=20r=3D=3F?=\n\nPatch.'))
25 '# HG changeset patch\n# User f\n\nBug 655877 - Dont treat SVG text frames as being positioned. r=?\n\nPatch.'
26 """
27
28 # Original author: bholley
29
30 import sys
31 import re
32 import fileinput
33 import email, email.parser, email.header, email.utils
34 import math
35 from cStringIO import StringIO
36 from itertools import takewhile
37
38 def decode_header(hdr_string):
39 r"""Clean up weird encoding crap.
40
41 >>> clean_header('[PATCH] =?UTF-8?q?Bug=20655877=20r=3D=3F?=')
42 '[PATCH] Bug 655877 r=?'
43 """
44 rv = []
45 hdr = email.header.Header(hdr_string, maxlinelen=float('inf'))
46 for (part, encoding) in email.header.decode_header(hdr):
47 if encoding is None:
48 rv.append(part)
49 else:
50 rv.append(part.decode(encoding).encode('utf-8'))
51 return ' '.join(rv)
52
53 def clean_header(hdr_string):
54 r"""Transform a header split over many lines into a header split only where
55 linebreaks are intended. This is important because hg cares about the first
56 line of the commit message.
57
58 Also clean up weird encoding crap.
59
60 >>> clean_header('Foo\n bar\n baz')
61 'Foo bar baz'
62 >>> clean_header('Foo\n bar\nSpam\nEggs')
63 'Foo bar\nSpam\nEggs'
64 """
65
66 lines = []
67 curline = ''
68 for line in decode_header(hdr_string).split('\n'):
69 if not line.startswith(' '):
70 lines.append(curline)
71 curline = ''
72 curline += line
73 lines.append(curline)
74 return '\n'.join(lines[1:])
75
76 def process(git_patch_file):
77 parser = email.parser.Parser()
78 msg = parser.parse(git_patch_file)
79 from_hdr = clean_header(msg['From'])
80 commit_title = clean_header(msg['subject'])
81 if not len(commit_title) or not len(from_hdr):
82 sys.stderr.write("%s does not look like a valid git patch file, skipping\n"
83 % git_patch_file.name)
84 return
85
86 parsed_from = email.utils.parseaddr(from_hdr)
87 nuke_prefix = r"\[PATCH( \d+/\d+)?\] "
88 match = re.match(nuke_prefix, commit_title)
89 if match:
90 commit_title = commit_title[match.end():]
91
92 patch_body = msg.get_payload()
93
94 # git format-patch wraps the diff (including trailing whitespace):
95 # ---
96 # <diff>
97 # --
98 # 2.0.3
99 # This doesn't hurt parsing the diff at all, but the version number is
100 # nonsense once the git specific items have been stripped
101 patch_body = re.sub(r'--\s?\n[0-9\.]+\n$', '', patch_body)
102
103 return '\n'.join(['# HG changeset patch',
104 '# User %s <%s>' % parsed_from,
105 '',
106 commit_title,
107 '',
108 patch_body])
109
110 if __name__ == "__main__":
111 if len(sys.argv) > 1 and sys.argv[1] == '--test':
112 import doctest
113 doctest.testmod()
114 sys.exit(0)
115
116 # If there were no arguments, do stdin->stdout.
117 filelist = sys.argv[1:]
118 if not filelist:
119 lines = process(sys.stdin)
120 sys.stdout.writelines(lines)
121 sys.exit(0)
122
123 # Otherwise, we take a list of files.
124 for filename in filelist:
125
126 # Read the lines.
127 f = open(filename, 'r')
128 lines = process(f)
129 f.close()
130
131 # Process.
132
133 if lines:
134 # Write them back to the same file.
135 f = open(filename, 'w')
136 f.writelines(lines)
137 f.close()