Mercurial > hg > hg-git
diff git_handler.py @ 186:f4caf22b87cd
Handle git repositories with legacy encodings.
author | Abderrahim Kitouni <a.kitouni@gmail.com> |
---|---|
date | Thu, 18 Jun 2009 16:49:13 +0100 (2009-06-18) |
parents | 7bf98d3085f4 |
children | 5f196f80ffb3 |
line wrap: on
line diff
--- a/git_handler.py +++ b/git_handler.py @@ -197,24 +197,32 @@ commit['tree'] = tree_sha (time, timezone) = ctx.date() - # hg authors might not have emails - author = ctx.user() + if 'git-author' in extra: + author = extra['git-author'] + else: + # hg authors might not have emails + author = ctx.user() - # check for git author pattern compliance - regex = re.compile('^(.*?) \<(.*?)\>(.*)$') - a = regex.match(author) + # check for git author pattern compliance + regex = re.compile('^(.*?) \<(.*?)\>(.*)$') + a = regex.match(author) - if a: - name = a.group(1) - email = a.group(2) - if len(a.group(3)) > 0: - name += ' ext:(' + urllib.quote(a.group(3)) + ')' - author = name + ' <' + email + '>' + if a: + name = a.group(1) + email = a.group(2) + if len(a.group(3)) > 0: + name += ' ext:(' + urllib.quote(a.group(3)) + ')' + author = name + ' <' + email + '>' + else: + author = author + ' <none@none>' + + commit['author'] = author + ' ' + str(int(time)) + ' ' + format_timezone(-timezone) + + if 'git-commit-message' in extra: + commit['message'] = extra['git-commit-message'] else: - author = author + ' <none@none>' - commit['author'] = author + ' ' + str(int(time)) + ' ' + format_timezone(-timezone) - message = ctx.description() - commit['message'] = ctx.description() + "\n" + message = ctx.description() + commit['message'] = ctx.description() + "\n" if 'committer' in extra: # fixup timezone @@ -443,6 +451,35 @@ date = (commit.author_time, -commit.author_timezone) text = strip_message + try: + text.decode('utf-8') + except UnicodeDecodeError: + extra['git-commit-message'] = text + text = self.decode_guess(text, commit._encoding) + + author = commit.author + + # convert extra data back to the end + if ' ext:' in commit.author: + regex = re.compile('^(.*?)\ ext:\((.*)\) <(.*)\>$') + m = regex.match(commit.author) + if m: + name = m.group(1) + ex = urllib.unquote(m.group(2)) + email = m.group(3) + author = name + ' <' + email + '>' + ex + + if ' <none@none>' in commit.author: + author = commit.author[:-12] + + try: + author.decode('utf-8') + except UnicodeDecodeError: + extra['git-author'] = author + author = self.decode_guess(author, commit._encoding) + + oldenc = self.swap_out_encoding() + def getfilectx(repo, memctx, f): try: (mode, sha, data) = self.git.get_file(commit, f) @@ -463,7 +500,7 @@ # merge, possibly octopus def commit_octopus(p1, p2): ctx = context.memctx(self.repo, (p1, p2), text, files, getfilectx, - commit.author, date, {'hg-git': 'octopus'}) + author, date, {'hg-git': 'octopus'}) return hex(self.repo.commitctx(ctx)) octopus = len(gparents) > 2 @@ -484,21 +521,6 @@ node2 = self.repo.changectx(p2) pa = node1.ancestor(node2) - author = commit.author - - # convert extra data back to the end - if ' ext:' in commit.author: - regex = re.compile('^(.*?)\ ext:\((.*)\) <(.*)\>$') - m = regex.match(commit.author) - if m: - name = m.group(1) - ex = urllib.unquote(m.group(2)) - email = m.group(3) - author = name + ' <' + email + '>' + ex - - if ' <none@none>' in commit.author: - author = commit.author[:-12] - # if named branch, add to extra if hg_branch: extra['branch'] = hg_branch @@ -521,6 +543,8 @@ node = self.repo.commit_import_ctx(ctx, pa, force_files) + self.swap_out_encoding(oldenc) + # save changeset to mapping file cs = hex(node) self.map_set(commit.id, cs) @@ -790,6 +814,30 @@ if names: return names[0] + # Stolen from hgsubversion + def swap_out_encoding(self, new_encoding='UTF-8'): + try: + from mercurial import encoding + old = encoding.encoding + encoding.encoding = new_encoding + except ImportError: + old = hgutil._encoding + hgutil._encoding = new_encoding + return old + + def decode_guess(self, string, encoding): + # text is not valid utf-8, try to make sense of it + if encoding: + try: + return string.decode(encoding).encode('utf-8') + except UnicodeDecodeError: + pass + + try: + return string.decode('latin-1').encode('utf-8') + except UnicodeDecodeError: + return string.decode('ascii', 'replace').encode('utf-8') + def check_bookmarks(self): if self.ui.config('extensions', 'hgext.bookmarks') is not None: self.ui.warn("YOU NEED TO SETUP BOOKMARKS\n")