changeset 225:cde57730faa7

store non utf-8 encoded author/commit message as deltas
author Abderrahim Kitouni <a.kitouni@gmail.com>
date Fri, 24 Jul 2009 21:23:35 +0100
parents 80d67ae190df
children 3b8804c59b63
files git_handler.py tests/test-encoding.out
diffstat 2 files changed, 33 insertions(+), 31 deletions(-) [+]
line wrap: on
line diff
--- a/git_handler.py
+++ b/git_handler.py
@@ -3,6 +3,7 @@
 
 from dulwich.index import commit_tree
 from dulwich.objects import Blob, Commit, Tag, Tree
+from dulwich.pack import create_delta, apply_delta
 from dulwich.repo import Repo
 
 from hgext import bookmarks
@@ -212,33 +213,32 @@
         commit.tree = tree_sha
         (time, timezone) = ctx.date()
 
-        if 'author' in extra:
-            author = extra['author']
-        else:
-            # hg authors might not have emails
-            author = ctx.user()
+        # hg authors might not have emails
+        author = ctx.user()
+
+        # check for git author pattern compliance
+        regex = re.compile('^(.*?) \<(.*?)\>(.*)$')
+        a = regex.match(author)
 
-            # check for git author pattern compliance
-            regex = re.compile('^(.*?) \<(.*?)\>(.*)$')
-            a = regex.match(author)
+        if a:
+            name = a.group(1)
+            email = a.group(2)
+            if len(a.group(3)) > 0:
+                name += ' ext:(' + urllib.quote(a.group(3)) + ')'
+            author = name + ' <' + email + '>'
+        else:
+            author = author + ' <none@none>'
 
-            if a:
-                name = a.group(1)
-                email = a.group(2)
-                if len(a.group(3)) > 0:
-                    name += ' ext:(' + urllib.quote(a.group(3)) + ')'
-                author = name + ' <' + email + '>'
-            else:
-                author = author + ' <none@none>'
+        if 'author' in extra:
+            author = apply_delta(author, extra['author'])
 
         commit.author = author
         commit.author_time = int(time)
         commit.author_timezone = -timezone
 
+        commit.message = ctx.description() + "\n"
         if 'message' in extra:
-            commit.message = extra['message']
-        else:
-            commit.message = ctx.description() + "\n"
+            commit.message = apply_delta(commit.message, extra['message'])
 
         if 'committer' in extra:
             # fixup timezone
@@ -373,8 +373,9 @@
         try:
             text.decode('utf-8')
         except UnicodeDecodeError:
-            extra['message'] = text
+            origtext = text
             text = self.decode_guess(text, commit.encoding)
+            extra['message'] = create_delta(text, origtext)
 
         author = commit.author
 
@@ -394,8 +395,9 @@
         try:
             author.decode('utf-8')
         except UnicodeDecodeError:
-            extra['author'] = author
+            origauthor = author
             author = self.decode_guess(author, commit.encoding)
+            extra['author'] = create_delta(author, origauthor)
 
         oldenc = self.swap_out_encoding()
 
--- a/tests/test-encoding.out
+++ b/tests/test-encoding.out
@@ -16,48 +16,48 @@
 at: 0/4
 updating working directory
 4 files updated, 0 files merged, 0 files removed, 0 files unresolved
-@  changeset:   3:983a70eb26960008c2d503306d2c2b047e1b2c0c
+@  changeset:   3:8549ee7fe0801b2dafc06047ca6f66d36da709f5
 |  tag:         master
 |  tag:         default/master
 |  tag:         tip
-|  parent:      2:c7896464e6b20cb173d7f3dbe69d99498aa4264a
+|  parent:      2:0422fbb4ec39fb69e87b94a3874ac890333de11a
 |  parent:      -1:0000000000000000000000000000000000000000
 |  manifest:    3:ea49f93388380ead5601c8fcbfa187516e7c2ed8
 |  user:        tést èncödîng <test@example.org>
 |  date:        Mon Jan 01 00:00:13 2007 +0000
 |  files+:      delta
-|  extra:       author=t\xe9st \xe8nc\xf6d\xeeng <test@example.org>
+|  extra:       author=$ \x90\x01\x01\xe9\x91\x03\x03\x01\xe8\x91\x08\x02\x01\xf6\x91\x0c\x01\x01\xee\x91\x0f\x15
 |  extra:       branch=default
 |  extra:       committer=test <test@example.org> 1167609613 0
 |  extra:       encoding=latin-1
-|  extra:       message=add d\xe9lt\xe0\n
+|  extra:       message=\x0c\n\x90\x05\x01\xe9\x91\x07\x02\x01\xe0\x91\x0b\x01
 |  description:
 |  add déltà
 |
 |
-o  changeset:   2:c7896464e6b20cb173d7f3dbe69d99498aa4264a
-|  parent:      1:ee2901710a4e5d3a1c79f937c0e2ee62074489de
+o  changeset:   2:0422fbb4ec39fb69e87b94a3874ac890333de11a
+|  parent:      1:9f6268bfc9eb3956c5ab8752d7b983b0ffe57115
 |  parent:      -1:0000000000000000000000000000000000000000
 |  manifest:    2:f580e7da3673c137370da2b931a1dee83590d7b4
 |  user:        tést èncödîng <test@example.org>
 |  date:        Mon Jan 01 00:00:12 2007 +0000
 |  files+:      gamma
-|  extra:       author=t\xe9st \xe8nc\xf6d\xeeng <test@example.org>
+|  extra:       author=$ \x90\x01\x01\xe9\x91\x03\x03\x01\xe8\x91\x08\x02\x01\xf6\x91\x0c\x01\x01\xee\x91\x0f\x15
 |  extra:       branch=default
 |  extra:       committer=test <test@example.org> 1167609612 0
-|  extra:       message=add g\xe4mm\xe2\n
+|  extra:       message=\x0c\n\x90\x05\x01\xe4\x91\x07\x02\x01\xe2\x91\x0b\x01
 |  description:
 |  add gämmâ
 |
 |
-o  changeset:   1:ee2901710a4e5d3a1c79f937c0e2ee62074489de
+o  changeset:   1:9f6268bfc9eb3956c5ab8752d7b983b0ffe57115
 |  parent:      0:bb7d36568d6188ce0de2392246c43f6f213df954
 |  parent:      -1:0000000000000000000000000000000000000000
 |  manifest:    1:f0bd6fbafbaebe4bb59c35108428f6fce152431d
 |  user:        tést èncödîng <test@example.org>
 |  date:        Mon Jan 01 00:00:11 2007 +0000
 |  files+:      beta
-|  extra:       author=t\xe9st \xe8nc\xf6d\xeeng <test@example.org>
+|  extra:       author=$ \x90\x01\x01\xe9\x91\x03\x03\x01\xe8\x91\x08\x02\x01\xf6\x91\x0c\x01\x01\xee\x91\x0f\x15
 |  extra:       branch=default
 |  extra:       committer=test <test@example.org> 1167609611 0
 |  description: