From: Gabe Black Date: Thu, 27 Aug 2020 08:52:04 +0000 (-0700) Subject: util: Explicitly decode/encode in utf-8. X-Git-Tag: v20.1.0.0~217 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=52f392b947cd46064fac464f89decaa4a313ea73;p=gem5.git util: Explicitly decode/encode in utf-8. The default encoding for python 2 is ascii which can't handle some characters in, for instance, people's names which have accented letters. This change explicitly selects the utf-8 encoding which pacifies python and is mostly equivalent except in these rare cases. In python 3, the default encoding is utf-8 to begin with, and it's no longer possible to change it. In this case, explicitly selecting the encoding is redundant but harmless. When we support only python 3, then this change can be reverted. Thanks to Lakin Smith for proposing a related solution and pointing out some information that led to this one. Change-Id: I99bd59063c77edd712954ffe90d7de320ade49ea Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/33575 Maintainer: Gabe Black Tested-by: kokoro Reviewed-by: Lakin Smith Reviewed-by: Jason Lowe-Power --- diff --git a/util/git-pre-commit.py b/util/git-pre-commit.py index b6d124abb..7681b878b 100755 --- a/util/git-pre-commit.py +++ b/util/git-pre-commit.py @@ -79,7 +79,7 @@ for status, fname in git.status(filter="MA", cached=True): # Show they appropriate object and dump it to a file status = git.file_from_index(fname) f = TemporaryFile() - f.write(status.encode()) + f.write(status.encode('utf-8')) verifiers = [ v(ui, opts, base=repo_base) for v in all_verifiers ] for v in verifiers: diff --git a/util/style/repo.py b/util/style/repo.py index f66c16b68..b5b425624 100644 --- a/util/style/repo.py +++ b/util/style/repo.py @@ -186,7 +186,7 @@ class GitRepo(AbstractRepo): if filter: cmd += [ "--diff-filter=%s" % filter ] cmd += [ self.head_revision(), "--" ] + files - status = subprocess.check_output(cmd).decode().rstrip("\n") + status = subprocess.check_output(cmd).decode('utf-8').rstrip("\n") if status: return [ f.split("\t") for f in status.split("\n") ] @@ -195,12 +195,12 @@ class GitRepo(AbstractRepo): def file_from_index(self, name): return subprocess.check_output( - [ self.git, "show", ":%s" % (name, ) ]).decode() + [ self.git, "show", ":%s" % (name, ) ]).decode('utf-8') def file_from_head(self, name): return subprocess.check_output( [ self.git, "show", "%s:%s" % (self.head_revision(), name) ]) \ - .decode() + .decode('utf-8') def detect_repo(path="."): """Auto-detect the revision control system used for a source code diff --git a/util/style/verifiers.py b/util/style/verifiers.py index 85f31cee1..681efac51 100644 --- a/util/style/verifiers.py +++ b/util/style/verifiers.py @@ -239,7 +239,7 @@ class LineVerifier(Verifier): for num,line in enumerate(fobj): if num not in regions: continue - s_line = line.decode().rstrip('\n') + s_line = line.decode('utf-8').rstrip('\n') if not self.check_line(s_line, language=lang): if not silent: self.ui.write("invalid %s in %s:%d\n" % \ @@ -351,7 +351,7 @@ class SortedIncludes(Verifier): close = True norm_fname = self.normalize_filename(filename) - old = [ l.decode().rstrip('\n') for l in fobj ] + old = [ l.decode('utf-8').rstrip('\n') for l in fobj ] if close: fobj.close()