3 # This file originated from the moz-git-tools repo on GitHub
4 # (https://github.com/mozilla/moz-git-tools), which contains the
5 # following LICENSE notice:
8 # Except for git-new-workdir, which is covered under GPLv2, the code
9 # in this repository is placed into the public domain via CC0.
11 # http://creativecommons.org/publicdomain/zero/1.0/legalcode
14 r
"""Git format-patch to hg importable patch.
16 (Who knew this was so complicated?)
18 >>> process(StringIO('From 3ce1ccc06 Mon Sep 17 00:00:00 2001\nFrom: fromuser\nSubject: subject\n\nRest of patch.\nMore patch.\n'))
19 '# HG changeset patch\n# User fromuser\n\nsubject\n\nRest of patch.\nMore patch.\n'
21 >>> process(StringIO('From: fromuser\nSubject: A very long subject line. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Morbi faucibus, arcu sit amet\n\nRest of patch.\nMore patch.\n'))
22 '# HG changeset patch\n# User fromuser\n\nA very long subject line. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Morbi faucibus, arcu sit amet\n\nRest of patch.\nMore patch.\n'
24 >>> process(StringIO('From: f\nSubject: =?UTF-8?q?Bug=20655877=20-=20Dont=20treat=20SVG=20text=20frames=20?= =?UTF-8?q?as=20being=20positioned.=20r=3D=3F?=\n\nPatch.'))
25 '# HG changeset patch\n# User f\n\nBug 655877 - Dont treat SVG text frames as being positioned. r=?\n\nPatch.'
28 # Original author: bholley
33 import email
, email
.parser
, email
.header
, email
.utils
35 from cStringIO
import StringIO
36 from itertools
import takewhile
38 def decode_header(hdr_string
):
39 r
"""Clean up weird encoding crap.
41 >>> clean_header('[PATCH] =?UTF-8?q?Bug=20655877=20r=3D=3F?=')
42 '[PATCH] Bug 655877 r=?'
45 hdr
= email
.header
.Header(hdr_string
, maxlinelen
=float('inf'))
46 for (part
, encoding
) in email
.header
.decode_header(hdr
):
50 rv
.append(part
.decode(encoding
).encode('utf-8'))
53 def clean_header(hdr_string
):
54 r
"""Transform a header split over many lines into a header split only where
55 linebreaks are intended. This is important because hg cares about the first
56 line of the commit message.
58 Also clean up weird encoding crap.
60 >>> clean_header('Foo\n bar\n baz')
62 >>> clean_header('Foo\n bar\nSpam\nEggs')
68 for line
in decode_header(hdr_string
).split('\n'):
69 if not line
.startswith(' '):
74 return '\n'.join(lines
[1:])
76 def process(git_patch_file
):
77 parser
= email
.parser
.Parser()
78 msg
= parser
.parse(git_patch_file
)
79 from_hdr
= clean_header(msg
['From'])
80 commit_title
= clean_header(msg
['subject'])
81 if not len(commit_title
) or not len(from_hdr
):
82 sys
.stderr
.write("%s does not look like a valid git patch file, skipping\n"
83 % git_patch_file
.name
)
86 parsed_from
= email
.utils
.parseaddr(from_hdr
)
87 nuke_prefix
= r
"\[PATCH( \d+/\d+)?\] "
88 match
= re
.match(nuke_prefix
, commit_title
)
90 commit_title
= commit_title
[match
.end():]
92 patch_body
= msg
.get_payload()
94 # git format-patch wraps the diff (including trailing whitespace):
99 # This doesn't hurt parsing the diff at all, but the version number is
100 # nonsense once the git specific items have been stripped
101 patch_body
= re
.sub(r
'--\s?\n[0-9\.]+\n$', '', patch_body
)
103 return '\n'.join(['# HG changeset patch',
104 '# User %s <%s>' % parsed_from
,
110 if __name__
== "__main__":
111 if len(sys
.argv
) > 1 and sys
.argv
[1] == '--test':
116 # If there were no arguments, do stdin->stdout.
117 filelist
= sys
.argv
[1:]
119 lines
= process(sys
.stdin
)
120 sys
.stdout
.writelines(lines
)
123 # Otherwise, we take a list of files.
124 for filename
in filelist
:
127 f
= open(filename
, 'r')
134 # Write them back to the same file.
135 f
= open(filename
, 'w')