Mercurial > hg > hg-git
changeset 186:f4caf22b87cd
Handle git repositories with legacy encodings.
author | Abderrahim Kitouni <a.kitouni@gmail.com> |
---|---|
date | Thu, 18 Jun 2009 16:49:13 +0100 |
parents | 1224d118ac92 |
children | 5f196f80ffb3 |
files | git_handler.py tests/latin-1-encoding tests/test-encoding |
diffstat | 3 files changed, 154 insertions(+), 31 deletions(-) [+] |
line wrap: on
line diff
--- a/git_handler.py +++ b/git_handler.py @@ -197,24 +197,32 @@ commit['tree'] = tree_sha (time, timezone) = ctx.date() - # hg authors might not have emails - author = ctx.user() + if 'git-author' in extra: + author = extra['git-author'] + else: + # hg authors might not have emails + author = ctx.user() - # check for git author pattern compliance - regex = re.compile('^(.*?) \<(.*?)\>(.*)$') - a = regex.match(author) + # check for git author pattern compliance + regex = re.compile('^(.*?) \<(.*?)\>(.*)$') + a = regex.match(author) - if a: - name = a.group(1) - email = a.group(2) - if len(a.group(3)) > 0: - name += ' ext:(' + urllib.quote(a.group(3)) + ')' - author = name + ' <' + email + '>' + if a: + name = a.group(1) + email = a.group(2) + if len(a.group(3)) > 0: + name += ' ext:(' + urllib.quote(a.group(3)) + ')' + author = name + ' <' + email + '>' + else: + author = author + ' <none@none>' + + commit['author'] = author + ' ' + str(int(time)) + ' ' + format_timezone(-timezone) + + if 'git-commit-message' in extra: + commit['message'] = extra['git-commit-message'] else: - author = author + ' <none@none>' - commit['author'] = author + ' ' + str(int(time)) + ' ' + format_timezone(-timezone) - message = ctx.description() - commit['message'] = ctx.description() + "\n" + message = ctx.description() + commit['message'] = ctx.description() + "\n" if 'committer' in extra: # fixup timezone @@ -443,6 +451,35 @@ date = (commit.author_time, -commit.author_timezone) text = strip_message + try: + text.decode('utf-8') + except UnicodeDecodeError: + extra['git-commit-message'] = text + text = self.decode_guess(text, commit._encoding) + + author = commit.author + + # convert extra data back to the end + if ' ext:' in commit.author: + regex = re.compile('^(.*?)\ ext:\((.*)\) <(.*)\>$') + m = regex.match(commit.author) + if m: + name = m.group(1) + ex = urllib.unquote(m.group(2)) + email = m.group(3) + author = name + ' <' + email + '>' + ex + + if ' <none@none>' in commit.author: + author = commit.author[:-12] + + try: + author.decode('utf-8') + except UnicodeDecodeError: + extra['git-author'] = author + author = self.decode_guess(author, commit._encoding) + + oldenc = self.swap_out_encoding() + def getfilectx(repo, memctx, f): try: (mode, sha, data) = self.git.get_file(commit, f) @@ -463,7 +500,7 @@ # merge, possibly octopus def commit_octopus(p1, p2): ctx = context.memctx(self.repo, (p1, p2), text, files, getfilectx, - commit.author, date, {'hg-git': 'octopus'}) + author, date, {'hg-git': 'octopus'}) return hex(self.repo.commitctx(ctx)) octopus = len(gparents) > 2 @@ -484,21 +521,6 @@ node2 = self.repo.changectx(p2) pa = node1.ancestor(node2) - author = commit.author - - # convert extra data back to the end - if ' ext:' in commit.author: - regex = re.compile('^(.*?)\ ext:\((.*)\) <(.*)\>$') - m = regex.match(commit.author) - if m: - name = m.group(1) - ex = urllib.unquote(m.group(2)) - email = m.group(3) - author = name + ' <' + email + '>' + ex - - if ' <none@none>' in commit.author: - author = commit.author[:-12] - # if named branch, add to extra if hg_branch: extra['branch'] = hg_branch @@ -521,6 +543,8 @@ node = self.repo.commit_import_ctx(ctx, pa, force_files) + self.swap_out_encoding(oldenc) + # save changeset to mapping file cs = hex(node) self.map_set(commit.id, cs) @@ -790,6 +814,30 @@ if names: return names[0] + # Stolen from hgsubversion + def swap_out_encoding(self, new_encoding='UTF-8'): + try: + from mercurial import encoding + old = encoding.encoding + encoding.encoding = new_encoding + except ImportError: + old = hgutil._encoding + hgutil._encoding = new_encoding + return old + + def decode_guess(self, string, encoding): + # text is not valid utf-8, try to make sense of it + if encoding: + try: + return string.decode(encoding).encode('utf-8') + except UnicodeDecodeError: + pass + + try: + return string.decode('latin-1').encode('utf-8') + except UnicodeDecodeError: + return string.decode('ascii', 'replace').encode('utf-8') + def check_bookmarks(self): if self.ui.config('extensions', 'hgext.bookmarks') is not None: self.ui.warn("YOU NEED TO SETUP BOOKMARKS\n")
new file mode 100644 --- /dev/null +++ b/tests/latin-1-encoding @@ -0,0 +1,18 @@ +# -*- coding: latin-1 -*- + +# this file contains some latin-1 messages for test-encoding + +GIT_AUTHOR_NAME='tést èncödîng'; export GIT_AUTHOR_NAME +echo beta > beta +git add beta +commit -m 'add beta' + +echo gamma > gamma +git add gamma +commit -m 'add gämmâ' + +# test the commit encoding field +git config i18n.commitencoding latin-1 +echo delta > delta +git add delta +commit -m 'add déltà'
new file mode 100755 --- /dev/null +++ b/tests/test-encoding @@ -0,0 +1,57 @@ +#!/bin/sh + +# -*- coding: utf-8 -*- + +# Fails for some reason, need to investigate +# "$TESTDIR/hghave" git || exit 80 + +# bail early if the user is already running git-daemon +echo hi | nc localhost 9418 2>/dev/null && exit 80 + +echo "[extensions]" >> $HGRCPATH +echo "hggit=$(echo $(dirname $(dirname $0)))" >> $HGRCPATH +echo 'hgext.graphlog =' >> $HGRCPATH +echo 'hgext.bookmarks =' >> $HGRCPATH + +GIT_AUTHOR_NAME='test'; export GIT_AUTHOR_NAME +GIT_AUTHOR_EMAIL='test@example.org'; export GIT_AUTHOR_EMAIL +GIT_AUTHOR_DATE="2007-01-01 00:00:00 +0000"; export GIT_AUTHOR_DATE +GIT_COMMITTER_NAME="$GIT_AUTHOR_NAME"; export GIT_COMMITTER_NAME +GIT_COMMITTER_EMAIL="$GIT_AUTHOR_EMAIL"; export GIT_COMMITTER_EMAIL +GIT_COMMITTER_DATE="$GIT_AUTHOR_DATE"; export GIT_COMMITTER_DATE + +count=10 +commit() +{ + GIT_AUTHOR_DATE="2007-01-01 00:00:$count +0000" + GIT_COMMITTER_DATE="$GIT_AUTHOR_DATE" + git commit "$@" >/dev/null || echo "git commit error" + count=`expr $count + 1` +} + +mkdir gitrepo +cd gitrepo +git init | python -c "import sys; print sys.stdin.read().replace('$(dirname $(pwd))/', '')" + +# utf-8 encoded commit message +echo alpha > alpha +git add alpha +commit -m 'add älphà ' + +. $TESTDIR/latin-1-encoding + +# dulwich does not presently support local git repos, workaround +cd .. +git daemon --base-path="$(pwd)"\ + --listen=localhost\ + --export-all\ + --pid-file=gitdaemon.pid \ + --detach --reuseaddr + +hg clone git://localhost/gitrepo hgrepo +cd hgrepo + +HGENCODING=utf-8 hg log --graph --debug + +cd .. +kill `cat gitdaemon.pid`