# HG changeset patch # User Abderrahim Kitouni # Date 1248467015 -3600 # Node ID cde57730faa7b6fdfe85d96d46030bfa1d9423ee # Parent 80d67ae190df650bdd06fb19ab80fcbbaaa24880 store non utf-8 encoded author/commit message as deltas diff --git a/git_handler.py b/git_handler.py --- a/git_handler.py +++ b/git_handler.py @@ -3,6 +3,7 @@ from dulwich.index import commit_tree from dulwich.objects import Blob, Commit, Tag, Tree +from dulwich.pack import create_delta, apply_delta from dulwich.repo import Repo from hgext import bookmarks @@ -212,33 +213,32 @@ commit.tree = tree_sha (time, timezone) = ctx.date() - if 'author' in extra: - author = extra['author'] - else: - # hg authors might not have emails - author = ctx.user() + # hg authors might not have emails + author = ctx.user() + + # check for git author pattern compliance + regex = re.compile('^(.*?) \<(.*?)\>(.*)$') + a = regex.match(author) - # check for git author pattern compliance - regex = re.compile('^(.*?) \<(.*?)\>(.*)$') - a = regex.match(author) + if a: + name = a.group(1) + email = a.group(2) + if len(a.group(3)) > 0: + name += ' ext:(' + urllib.quote(a.group(3)) + ')' + author = name + ' <' + email + '>' + else: + author = author + ' ' - if a: - name = a.group(1) - email = a.group(2) - if len(a.group(3)) > 0: - name += ' ext:(' + urllib.quote(a.group(3)) + ')' - author = name + ' <' + email + '>' - else: - author = author + ' ' + if 'author' in extra: + author = apply_delta(author, extra['author']) commit.author = author commit.author_time = int(time) commit.author_timezone = -timezone + commit.message = ctx.description() + "\n" if 'message' in extra: - commit.message = extra['message'] - else: - commit.message = ctx.description() + "\n" + commit.message = apply_delta(commit.message, extra['message']) if 'committer' in extra: # fixup timezone @@ -373,8 +373,9 @@ try: text.decode('utf-8') except UnicodeDecodeError: - extra['message'] = text + origtext = text text = self.decode_guess(text, commit.encoding) + extra['message'] = create_delta(text, origtext) author = commit.author @@ -394,8 +395,9 @@ try: author.decode('utf-8') except UnicodeDecodeError: - extra['author'] = author + origauthor = author author = self.decode_guess(author, commit.encoding) + extra['author'] = create_delta(author, origauthor) oldenc = self.swap_out_encoding() diff --git a/tests/test-encoding.out b/tests/test-encoding.out --- a/tests/test-encoding.out +++ b/tests/test-encoding.out @@ -16,48 +16,48 @@ at: 0/4 updating working directory 4 files updated, 0 files merged, 0 files removed, 0 files unresolved -@ changeset: 3:983a70eb26960008c2d503306d2c2b047e1b2c0c +@ changeset: 3:8549ee7fe0801b2dafc06047ca6f66d36da709f5 | tag: master | tag: default/master | tag: tip -| parent: 2:c7896464e6b20cb173d7f3dbe69d99498aa4264a +| parent: 2:0422fbb4ec39fb69e87b94a3874ac890333de11a | parent: -1:0000000000000000000000000000000000000000 | manifest: 3:ea49f93388380ead5601c8fcbfa187516e7c2ed8 | user: tést èncödîng | date: Mon Jan 01 00:00:13 2007 +0000 | files+: delta -| extra: author=t\xe9st \xe8nc\xf6d\xeeng +| extra: author=$ \x90\x01\x01\xe9\x91\x03\x03\x01\xe8\x91\x08\x02\x01\xf6\x91\x0c\x01\x01\xee\x91\x0f\x15 | extra: branch=default | extra: committer=test 1167609613 0 | extra: encoding=latin-1 -| extra: message=add d\xe9lt\xe0\n +| extra: message=\x0c\n\x90\x05\x01\xe9\x91\x07\x02\x01\xe0\x91\x0b\x01 | description: | add déltà | | -o changeset: 2:c7896464e6b20cb173d7f3dbe69d99498aa4264a -| parent: 1:ee2901710a4e5d3a1c79f937c0e2ee62074489de +o changeset: 2:0422fbb4ec39fb69e87b94a3874ac890333de11a +| parent: 1:9f6268bfc9eb3956c5ab8752d7b983b0ffe57115 | parent: -1:0000000000000000000000000000000000000000 | manifest: 2:f580e7da3673c137370da2b931a1dee83590d7b4 | user: tést èncödîng | date: Mon Jan 01 00:00:12 2007 +0000 | files+: gamma -| extra: author=t\xe9st \xe8nc\xf6d\xeeng +| extra: author=$ \x90\x01\x01\xe9\x91\x03\x03\x01\xe8\x91\x08\x02\x01\xf6\x91\x0c\x01\x01\xee\x91\x0f\x15 | extra: branch=default | extra: committer=test 1167609612 0 -| extra: message=add g\xe4mm\xe2\n +| extra: message=\x0c\n\x90\x05\x01\xe4\x91\x07\x02\x01\xe2\x91\x0b\x01 | description: | add gämmâ | | -o changeset: 1:ee2901710a4e5d3a1c79f937c0e2ee62074489de +o changeset: 1:9f6268bfc9eb3956c5ab8752d7b983b0ffe57115 | parent: 0:bb7d36568d6188ce0de2392246c43f6f213df954 | parent: -1:0000000000000000000000000000000000000000 | manifest: 1:f0bd6fbafbaebe4bb59c35108428f6fce152431d | user: tést èncödîng | date: Mon Jan 01 00:00:11 2007 +0000 | files+: beta -| extra: author=t\xe9st \xe8nc\xf6d\xeeng +| extra: author=$ \x90\x01\x01\xe9\x91\x03\x03\x01\xe8\x91\x08\x02\x01\xf6\x91\x0c\x01\x01\xee\x91\x0f\x15 | extra: branch=default | extra: committer=test 1167609611 0 | description: