changeset 612:60a4d55fdaa6

Merge a work-around for a bug in dulwich. I've been waiting for dulwich upstream to fix this *and* for a test from domruf that's acceptable. Having gotten neither over a period of /months/, and having hit the bug myself, I'm moving on and accepting a patch without tests. This will likely break again, but hopefully before we'd break it dulwich will be fixed.
author Augie Fackler <raf@durin42.com>
date Tue, 17 Sep 2013 09:59:36 -0400
parents 30d502905da9 (current diff) 681298a09daf (diff)
children bf8518b09d57
files hggit/git_handler.py
diffstat 25 files changed, 716 insertions(+), 75 deletions(-) [+]
line wrap: on
line diff
--- a/.hgignore
+++ b/.hgignore
@@ -4,3 +4,5 @@
 build
 dist
 *.egg-info
+*.orig
+
--- a/.hgtags
+++ b/.hgtags
@@ -11,3 +11,4 @@
 a9c0b93488d4d082f813c6d91c8e473505a026c4 0.3.2
 9d44dafbb31c14126be151b78c7a41b3c110fd97 0.3.3
 586b7aa9646641b3b1083ab349bb186c79aa646b 0.3.4
+a3c3b8077cbeec7c381a4b312d722d575a738610 0.4.0
--- a/hggit/_ssh.py
+++ b/hggit/_ssh.py
@@ -11,7 +11,7 @@
     """
 
     class _Vendor(SSHVendor):
-        def connect_ssh(self, host, command, username=None, port=None):
+        def run_command(self, host, command, username=None, port=None):
             from dulwich.client import SubprocessWrapper
             from mercurial import util
             import subprocess
--- a/hggit/git_handler.py
+++ b/hggit/git_handler.py
@@ -2,10 +2,9 @@
 import stat, posixpath, StringIO
 
 from dulwich.errors import HangupException, GitProtocolError, UpdateRefsError
-from dulwich.index import commit_tree
 from dulwich.objects import Blob, Commit, Tag, Tree, parse_timezone, S_IFGITLINK
 from dulwich.pack import create_delta, apply_delta
-from dulwich.repo import Repo
+from dulwich.repo import Repo, check_ref_format
 from dulwich import client
 from dulwich import config as dul_config
 
@@ -26,9 +25,11 @@
 from mercurial import error
 
 import _ssh
+import hg2git
 import util
 from overlay import overlayrepo
 
+
 RE_GIT_AUTHOR = re.compile('^(.*?) ?\<(.*?)(?:\>(.*))?$')
 
 RE_GIT_SANITIZE_AUTHOR = re.compile('[<>\n]')
@@ -278,11 +279,21 @@
 
         if remote_name and new_refs:
             for ref, new_sha in new_refs.iteritems():
-                if new_sha != old_refs.get(ref):
-                    self.ui.note("    %s::%s => GIT:%s\n" %
+                old_sha = old_refs.get(ref)
+                if old_sha is None:
+                    if self.ui.verbose:
+                        self.ui.note("adding reference %s::%s => GIT:%s\n" %
                                    (remote_name, ref, new_sha[0:8]))
+                    else:
+                        self.ui.status("adding reference %s\n" % ref)
+                elif new_sha != old_sha:
+                    if self.ui.verbose:
+                        self.ui.note("updating reference %s::%s => GIT:%s\n" %
+                                   (remote_name, ref, new_sha[0:8]))
+                    else:
+                        self.ui.status("updating reference %s\n" % ref)
                 else:
-                    self.ui.debug("    %s::%s => GIT:%s\n" %
+                    self.ui.debug("unchanged reference %s::%s => GIT:%s\n" %
                                    (remote_name, ref, new_sha[0:8]))
 
             self.update_remote_branches(remote_name, new_refs)
@@ -340,6 +351,12 @@
         total = len(export)
         if total:
             self.ui.note(_("exporting hg objects to git\n"))
+
+        # By only exporting deltas, the assertion is that all previous objects
+        # for all other changesets are already present in the Git repository.
+        # This assertion is necessary to prevent redundant work.
+        exporter = hg2git.IncrementalChangesetExporter(self.repo)
+
         for i, rev in enumerate(export):
             util.progress(self.ui, 'exporting', i, total=total)
             ctx = self.repo.changectx(rev)
@@ -348,14 +365,14 @@
                 self.ui.debug("revision %d is a part "
                               "of octopus explosion\n" % ctx.rev())
                 continue
-            self.export_hg_commit(rev)
+            self.export_hg_commit(rev, exporter)
         util.progress(self.ui, 'importing', None, total=total)
 
 
     # convert this commit into git objects
     # go through the manifest, convert all blobs/trees we don't have
     # write the commit object (with metadata info)
-    def export_hg_commit(self, rev):
+    def export_hg_commit(self, rev, exporter):
         self.ui.note(_("converting revision %s\n") % hex(rev))
 
         oldenc = self.swap_out_encoding()
@@ -366,6 +383,12 @@
         commit = Commit()
 
         (time, timezone) = ctx.date()
+        # work around to bad timezone offets - dulwich does not handle
+        # sub minute based timezones. In the one known case, it was a
+        # manual edit that led to the unusual value. Based on that,
+        # there is no reason to round one way or the other, so do the
+        # simplest and round down.
+        timezone -= (timezone % 60)
         commit.author = self.get_git_author(ctx)
         commit.author_time = int(time)
         commit.author_timezone = -timezone
@@ -407,7 +430,11 @@
         if 'encoding' in extra:
             commit.encoding = extra['encoding']
 
-        tree_sha = commit_tree(self.git.object_store, self.iterblobs(ctx))
+        for obj, nodeid in exporter.update_changeset(ctx):
+            self.git.object_store.add_object(obj)
+
+        tree_sha = exporter.root_tree_sha
+
         if tree_sha not in self.git.object_store:
             raise hgutil.Abort(_('Tree SHA-1 not present in Git repo: %s' %
                 tree_sha))
@@ -550,43 +577,6 @@
 
         return message
 
-    def iterblobs(self, ctx):
-        if '.hgsubstate' in ctx:
-            hgsub = util.OrderedDict()
-            if '.hgsub' in ctx:
-                hgsub = util.parse_hgsub(ctx['.hgsub'].data().splitlines())
-            hgsubstate = util.parse_hgsubstate(ctx['.hgsubstate'].data().splitlines())
-            for path, sha in hgsubstate.iteritems():
-                try:
-                    if path in hgsub and not hgsub[path].startswith('[git]'):
-                        # some other kind of a repository (e.g. [hg])
-                        # that keeps its state in .hgsubstate, shall ignore
-                        continue
-                    yield path, sha, S_IFGITLINK
-                except ValueError:
-                    pass
-
-        for f in ctx:
-            if f == '.hgsubstate' or f == '.hgsub':
-                continue
-            fctx = ctx[f]
-            blobid = self.map_git_get(hex(fctx.filenode()))
-
-            if not blobid:
-                blob = Blob.from_string(fctx.data())
-                self.git.object_store.add_object(blob)
-                self.map_set(blob.id, hex(fctx.filenode()))
-                blobid = blob.id
-
-            if 'l' in ctx.flags(f):
-                mode = 0120000
-            elif 'x' in ctx.flags(f):
-                mode = 0100755
-            else:
-                mode = 0100644
-
-            yield f, blobid, mode
-
     def getnewgitcommits(self, refs=None):
         self.init_if_missing()
 
@@ -871,16 +861,40 @@
     def upload_pack(self, remote, revs, force):
         client, path = self.get_transport_and_path(remote)
         old_refs = {}
+        change_totals = {}
+
         def changed(refs):
+            self.ui.status(_("searching for changes\n"))
             old_refs.update(refs)
             to_push = revs or set(self.local_heads().values() + self.tags.values())
             return self.get_changed_refs(refs, to_push, force)
 
-        genpack = self.git.object_store.generate_pack_contents
+        def genpack(have, want):
+            commits = []
+            for mo in self.git.object_store.find_missing_objects(have, want):
+                (sha, name) = mo
+                o = self.git.object_store[sha]
+                t = type(o)
+                change_totals[t] = change_totals.get(t, 0) + 1
+                if isinstance(o, Commit):
+                    commits.append(sha)
+            commit_count = len(commits)
+            self.ui.note(_("%d commits found\n") % commit_count)
+            if commit_count > 0:
+                self.ui.debug(_("list of commits:\n"))
+                for commit in commits:
+                    self.ui.debug("%s\n" % commit)
+                self.ui.status(_("adding objects\n"))
+            return self.git.object_store.generate_pack_contents(have, want)
+
         try:
-            self.ui.status(_("searching for changes\n"))
-            self.ui.note(_("creating and sending data\n"))
             new_refs = client.send_pack(path, changed, genpack)
+            if len(change_totals) > 0:
+                self.ui.status(_("added %d commits with %d trees"
+                                 " and %d blobs\n") %
+                               (change_totals.get(Commit, 0),
+                                change_totals.get(Tree, 0),
+                                change_totals.get(Blob, 0)))
             return old_refs, new_refs
         except (HangupException, GitProtocolError), e:
             raise hgutil.Abort(_("git remote error: ") + str(e))
@@ -1025,12 +1039,18 @@
                 tag = tag.replace(' ', '_')
                 target = self.map_git_get(hex(sha))
                 if target is not None:
-                    self.git.refs['refs/tags/' + tag] = target
-                    self.tags[tag] = hex(sha)
+                    tag_refname = 'refs/tags/' + tag
+                    if(check_ref_format(tag_refname)):
+                      self.git.refs[tag_refname] = target
+                      self.tags[tag] = hex(sha)
+                    else:
+                      self.repo.ui.warn(
+                        'Skipping export of tag %s because it '
+                        'has invalid name as a git refname.\n' % tag)
                 else:
                     self.repo.ui.warn(
                         'Skipping export of tag %s because it '
-                        'has no matching git revision.' % tag)
+                        'has no matching git revision.\n' % tag)
 
     def _filter_for_bookmarks(self, bms):
         if not self.branch_bookmark_suffix:
@@ -1095,6 +1115,7 @@
             heads = dict([(ref[11:],refs[ref]) for ref in refs
                           if ref.startswith('refs/heads/')])
 
+            suffix = self.branch_bookmark_suffix or ''
             for head, sha in heads.iteritems():
                 # refs contains all the refs in the server, not just
                 # the ones we are pulling
@@ -1103,28 +1124,13 @@
                 hgsha = bin(self.map_hg_get(sha))
                 if not head in bms:
                     # new branch
-                    bms[head] = hgsha
+                    bms[head + suffix] = hgsha
                 else:
                     bm = self.repo[bms[head]]
                     if bm.ancestor(self.repo[hgsha]) == bm:
                         # fast forward
-                        bms[head] = hgsha
+                        bms[head + suffix] = hgsha
 
-            # if there's a branch bookmark suffix,
-            # then add it on to all bookmark names
-            # that would otherwise conflict with a branch
-            # name
-            if self.branch_bookmark_suffix:
-                real_branch_names = self.repo.branchmap()
-                bms = dict(
-                    (
-                        bm_name + self.branch_bookmark_suffix
-                            if bm_name in real_branch_names
-                        else bm_name,
-                        bms[bm_name]
-                    )
-                    for bm_name in bms
-                )
             if heads:
                 if oldbm:
                     bookmarks.write(self.repo, bms)
@@ -1345,7 +1351,7 @@
             res = git_match.groupdict()
             transport = client.SSHGitClient if 'ssh' in res['scheme'] else client.TCPGitClient
             host, port, sepr, path = res['host'], res['port'], res['sepr'], res['path']
-            if sepr == '/':
+            if sepr == '/' and not path.startswith('~'):
                 path = '/' + path
             # strip trailing slash for heroku-style URLs
             # ssh+git://git@heroku.com:project.git/
new file mode 100644
--- /dev/null
+++ b/hggit/hg2git.py
@@ -0,0 +1,303 @@
+# This file contains code dealing specifically with converting Mercurial
+# repositories to Git repositories. Code in this file is meant to be a generic
+# library and should be usable outside the context of hg-git or an hg command.
+
+import os
+import stat
+
+import dulwich.objects as dulobjs
+import mercurial.node
+
+import util
+
+
+class IncrementalChangesetExporter(object):
+    """Incrementally export Mercurial changesets to Git trees.
+
+    The purpose of this class is to facilitate Git tree export that is more
+    optimal than brute force.
+
+    A "dumb" implementations of Mercurial to Git export would iterate over
+    every file present in a Mercurial changeset and would convert each to
+    a Git blob and then conditionally add it to a Git repository if it didn't
+    yet exist. This is suboptimal because the overhead associated with
+    obtaining every file's raw content and converting it to a Git blob is
+    not trivial!
+
+    This class works around the suboptimality of brute force export by
+    leveraging the information stored in Mercurial - the knowledge of what
+    changed between changesets - to only export Git objects corresponding to
+    changes in Mercurial. In the context of converting Mercurial repositories
+    to Git repositories, we only export objects Git (possibly) hasn't seen yet.
+    This prevents a lot of redundant work and is thus faster.
+
+    Callers instantiate an instance of this class against a mercurial.localrepo
+    instance. They then associate it with a specific changesets by calling
+    update_changeset(). On each call to update_changeset(), the instance
+    computes the difference between the current and new changesets and emits
+    Git objects that haven't yet been encountered during the lifetime of the
+    class instance. In other words, it expresses Mercurial changeset deltas in
+    terms of Git objects. Callers then (usually) take this set of Git objects
+    and add them to the Git repository.
+
+    This class only emits Git blobs and trees, not commits.
+
+    The tree calculation part of this class is essentially a reimplementation
+    of dulwich.index.commit_tree. However, since our implementation reuses
+    Tree instances and only recalculates SHA-1 when things change, we are
+    more efficient.
+    """
+
+    def __init__(self, hg_repo):
+        """Create an instance against a mercurial.localrepo."""
+        self._hg = hg_repo
+
+        # Our current revision.
+        self._rev = mercurial.node.nullrev
+
+        # Path to dulwich.objects.Tree.
+        self._dirs = {}
+
+        # Mercurial file nodeid to Git blob SHA-1. Used to prevent redundant
+        # blob calculation.
+        self._blob_cache = {}
+
+    @property
+    def root_tree_sha(self):
+        """The SHA-1 of the root Git tree.
+
+        This is needed to construct a Git commit object.
+        """
+        return self._dirs[''].id
+
+    def update_changeset(self, ctx):
+        """Set the tree to track a new Mercurial changeset.
+
+        This is a generator of 2-tuples. The first item in each tuple is a
+        dulwich object, either a Blob or a Tree. The second item is the
+        corresponding Mercurial nodeid for the item, if any. Only blobs will
+        have nodeids. Trees do not correspond to a specific nodeid, so it does
+        not make sense to emit a nodeid for them.
+
+        When exporting trees from Mercurial, callers typically write the
+        returned dulwich object to the Git repo via the store's add_object().
+
+        Some emitted objects may already exist in the Git repository. This
+        class does not know about the Git repository, so it's up to the caller
+        to conditionally add the object, etc.
+
+        Emitted objects are those that have changed since the last call to
+        update_changeset. If this is the first call to update_chanageset, all
+        objects in the tree are emitted.
+        """
+        # Our general strategy is to accumulate dulwich.objects.Blob and
+        # dulwich.objects.Tree instances for the current Mercurial changeset.
+        # We do this incremental by iterating over the Mercurial-reported
+        # changeset delta. We rely on the behavior of Mercurial to lazy
+        # calculate a Tree's SHA-1 when we modify it. This is critical to
+        # performance.
+
+        # In theory we should be able to look at changectx.files(). This is
+        # *much* faster. However, it may not be accurate, especially with older
+        # repositories, which may not record things like deleted files
+        # explicitly in the manifest (which is where files() gets its data).
+        # The only reliable way to get the full set of changes is by looking at
+        # the full manifest. And, the easy way to compare two manifests is
+        # localrepo.status().
+        modified, added, removed = self._hg.status(self._rev, ctx.rev())[0:3]
+
+        # We track which directories/trees have modified in this update and we
+        # only export those.
+        dirty_trees = set()
+
+        # We first process file removals so we can prune dead trees.
+        for path in removed:
+            d = os.path.dirname(path)
+            tree = self._dirs.get(d, dulobjs.Tree())
+
+            del tree[os.path.basename(path)]
+            dirty_trees.add(d)
+
+            # If removing this file made the tree empty, we should delete this
+            # tree. This could result in parent trees losing their only child
+            # and so on.
+            if not len(tree):
+                self._remove_tree(d)
+                continue
+
+            self._dirs[d] = tree
+
+        # For every file that changed or was added, we need to calculate the
+        # corresponding Git blob and its tree entry. We emit the blob
+        # immediately and update trees to be aware of its presence.
+        for path in set(modified) | set(added):
+            # Handle special Mercurial paths.
+            if path == '.hgsubstate':
+                self._handle_subrepos(ctx, dirty_trees)
+                continue
+
+            if path == '.hgsub':
+                continue
+
+            d = os.path.dirname(path)
+            tree = self._dirs.setdefault(d, dulobjs.Tree())
+            dirty_trees.add(d)
+
+            fctx = ctx[path]
+
+            entry, blob = IncrementalChangesetExporter.tree_entry(fctx,
+                self._blob_cache)
+            if blob is not None:
+                yield (blob, fctx.filenode())
+
+            tree.add(*entry)
+
+        # Now that all the trees represent the current changeset, recalculate
+        # the tree IDs and emit them. Note that we wait until now to calculate
+        # tree SHA-1s. This is an important difference between us and
+        # dulwich.index.commit_tree(), which builds new Tree instances for each
+        # series of blobs.
+        for obj in self._populate_tree_entries(dirty_trees):
+            yield (obj, None)
+
+        self._rev = ctx.rev()
+
+    def _remove_tree(self, path):
+        """Remove a (presumably empty) tree from the current changeset.
+
+        A now-empty tree may be the only child of its parent. So, we traverse
+        up the chain to the root tree, deleting any empty trees along the way.
+        """
+        try:
+            del self._dirs[path]
+        except KeyError:
+            return
+
+        # Now we traverse up to the parent and delete any references.
+        if path == '':
+            return
+
+        basename = os.path.basename(path)
+        parent = os.path.dirname(path)
+        while True:
+            tree = self._dirs.get(parent, None)
+
+            # No parent entry. Nothing to remove or update.
+            if tree is None:
+                return
+
+            try:
+                del tree[basename]
+            except KeyError:
+                return
+
+            if len(tree):
+                return
+
+            # The parent tree is empty. Se, we can delete it.
+            del self._dirs[parent]
+
+            if parent == '':
+                return
+
+            basename = os.path.basename(parent)
+            parent = os.path.dirname(parent)
+
+    def _populate_tree_entries(self, dirty_trees):
+        self._dirs.setdefault('', dulobjs.Tree())
+
+        # Fill in missing directories.
+        for path in self._dirs.keys():
+            parent = os.path.dirname(path)
+
+            while parent != '':
+                parent_tree = self._dirs.get(parent, None)
+
+                if parent_tree is not None:
+                    break
+
+                self._dirs[parent] = dulobjs.Tree()
+                parent = os.path.dirname(parent)
+
+        for dirty in list(dirty_trees):
+            parent = os.path.dirname(dirty)
+
+            while parent != '':
+                if parent in dirty_trees:
+                    break
+
+                dirty_trees.add(parent)
+                parent = os.path.dirname(parent)
+
+        # The root tree is always dirty but doesn't always get updated.
+        dirty_trees.add('')
+
+        # We only need to recalculate and export dirty trees.
+        for d in sorted(dirty_trees, key=len, reverse=True):
+            # Only happens for deleted directories.
+            try:
+                tree = self._dirs[d]
+            except KeyError:
+                continue
+
+            yield tree
+
+            if d == '':
+                continue
+
+            parent_tree = self._dirs[os.path.dirname(d)]
+
+            # Accessing the tree's ID is what triggers SHA-1 calculation and is
+            # the expensive part (at least if the tree has been modified since
+            # the last time we retrieved its ID). Also, assigning an entry to a
+            # tree (even if it already exists) invalidates the existing tree
+            # and incurs SHA-1 recalculation. So, it's in our interest to avoid
+            # invalidating trees. Since we only update the entries of dirty
+            # trees, this should hold true.
+            parent_tree[os.path.basename(d)] = (stat.S_IFDIR, tree.id)
+
+    def _handle_subrepos(self, ctx, dirty_trees):
+        substate = util.parse_hgsubstate(ctx['.hgsubstate'].data().splitlines())
+        sub = util.OrderedDict()
+
+        if '.hgsub' in ctx:
+            sub = util.parse_hgsub(ctx['.hgsub'].data().splitlines())
+
+        for path, sha in substate.iteritems():
+            # Ignore non-Git repositories keeping state in .hgsubstate.
+            if path in sub and not sub[path].startswith('[git]'):
+                continue
+
+            d = os.path.dirname(path)
+            dirty_trees.add(d)
+            tree = self._dirs.setdefault(d, dulobjs.Tree())
+            tree.add(os.path.basename(path), dulobjs.S_IFGITLINK, sha)
+
+    @staticmethod
+    def tree_entry(fctx, blob_cache):
+        """Compute a dulwich TreeEntry from a filectx.
+
+        A side effect is the TreeEntry is stored in the passed cache.
+
+        Returns a 2-tuple of (dulwich.objects.TreeEntry, dulwich.objects.Blob).
+        """
+        blob_id = blob_cache.get(fctx.filenode(), None)
+        blob = None
+
+        if blob_id is None:
+            blob = dulobjs.Blob.from_string(fctx.data())
+            blob_id = blob.id
+            blob_cache[fctx.filenode()] = blob_id
+
+        flags = fctx.flags()
+
+        if 'l' in flags:
+            mode = 0120000
+        elif 'x' in flags:
+            mode = 0100755
+        else:
+            mode = 0100644
+
+        return (dulobjs.TreeEntry(os.path.basename(fctx.path()), mode, blob_id),
+                blob)
+
--- a/hggit/overlay.py
+++ b/hggit/overlay.py
@@ -59,6 +59,10 @@
 
         addtree(self.tree, '')
 
+    def iteritems(self):
+        self.load()
+        return self._map.iteritems()
+
     def __iter__(self):
         self.load()
         return self._map.__iter__()
--- a/setup.py
+++ b/setup.py
@@ -16,7 +16,7 @@
     maintainer='Augie Fackler',
     maintainer_email='durin42@gmail.com',
     url='http://hg-git.github.com/',
-    description='push and pull from a Git server using Mercurial',
+    description='push to and pull from a Git repository using Mercurial',
     long_description="""
 This extension lets you communicate (push and pull) with a Git server.
 This way you can use Git hosting for your project or collaborate with a
@@ -26,5 +26,5 @@
     license='GPLv2',
     packages=['hggit'],
     package_data={ 'hggit': ['help/git.rst'] },
-    install_requires=['dulwich>=0.8.6'] + extra_req,
+    install_requires=['dulwich>=0.9.1'] + extra_req,
 )
new file mode 100644
--- /dev/null
+++ b/tests/test-branch-bookmark-suffix.t
@@ -0,0 +1,139 @@
+bail if the user does not have dulwich
+  $ python -c 'import dulwich, dulwich.repo' || exit 80
+
+  $ echo "[extensions]" >> $HGRCPATH
+  $ echo "hggit=$(echo $(dirname $TESTDIR))/hggit" >> $HGRCPATH
+  $ echo 'hgext.graphlog =' >> $HGRCPATH
+  $ echo "[git]" >> $HGRCPATH
+  $ echo "branch_bookmark_suffix=_bookmark" >> $HGRCPATH
+
+  $ GIT_AUTHOR_NAME='test'; export GIT_AUTHOR_NAME
+  $ GIT_AUTHOR_EMAIL='test@example.org'; export GIT_AUTHOR_EMAIL
+  $ GIT_AUTHOR_DATE="2007-01-01 00:00:00 +0000"; export GIT_AUTHOR_DATE
+  $ GIT_COMMITTER_NAME="$GIT_AUTHOR_NAME"; export GIT_COMMITTER_NAME
+  $ GIT_COMMITTER_EMAIL="$GIT_AUTHOR_EMAIL"; export GIT_COMMITTER_EMAIL
+  $ GIT_COMMITTER_DATE="$GIT_AUTHOR_DATE"; export GIT_COMMITTER_DATE
+
+  $ count=10
+  $ commit()
+  > {
+  >     GIT_AUTHOR_DATE="2007-01-01 00:00:$count +0000"
+  >     GIT_COMMITTER_DATE="$GIT_AUTHOR_DATE"
+  >     git commit "$@" >/dev/null 2>/dev/null || echo "git commit error"
+  >     count=`expr $count + 1`
+  > }
+  $ hgcommit()
+  > {
+  >     HGDATE="2007-01-01 00:00:$count +0000"
+  >     hg commit -d "$HGDATE" "$@" >/dev/null 2>/dev/null || echo "hg commit error"
+  >     count=`expr $count + 1`
+  > }
+
+  $ git config --global push.default matching
+  $ git init --bare gitrepo1
+  Initialized empty Git repository in $TESTTMP/gitrepo1/
+
+  $ hg init hgrepo
+  $ cd hgrepo
+  $ hg branch -q branch1
+  $ hg bookmark branch1_bookmark
+  $ echo f1 > f1
+  $ hg add f1
+  $ hgcommit -m "add f1"
+  $ hg branch -q branch2
+  $ hg bookmark branch2_bookmark
+  $ echo f2 > f2
+  $ hg add f2
+  $ hgcommit -m "add f2"
+  $ hg log --graph
+  @  changeset:   1:600de9b6d498
+  |  branch:      branch2
+  |  bookmark:    branch2_bookmark
+  |  tag:         tip
+  |  user:        test
+  |  date:        Mon Jan 01 00:00:11 2007 +0000
+  |  summary:     add f2
+  |
+  o  changeset:   0:40a840c1f8ae
+     branch:      branch1
+     bookmark:    branch1_bookmark
+     user:        test
+     date:        Mon Jan 01 00:00:10 2007 +0000
+     summary:     add f1
+  
+
+
+  $ hg push ../gitrepo1
+  pushing to ../gitrepo1
+  searching for changes
+  adding objects
+  added 2 commits with 2 trees and 2 blobs
+
+  $ cd ..
+
+  $ cd gitrepo1
+  $ git symbolic-ref HEAD refs/heads/branch1
+  $ git branch
+  * branch1
+    branch2
+  $ cd ..
+
+  $ git clone gitrepo1 gitrepo2
+  Cloning into 'gitrepo2'...
+  done.
+  $ cd gitrepo2
+  $ git checkout branch1
+  Already on 'branch1'
+  $ echo g1 >> f1
+  $ git add f1
+  $ commit -m "append f1"
+  $ git checkout branch2
+  Switched to a new branch 'branch2'
+  Branch branch2 set up to track remote branch branch2 from origin.
+  $ echo g2 >> f2
+  $ git add f2
+  $ commit -m "append f2"
+  $ git push origin
+  To $TESTTMP/gitrepo1
+     bbfe79a..d8aef79  branch1 -> branch1
+     288e92b..f8f8de5  branch2 -> branch2
+  $ cd ..
+
+  $ cd hgrepo
+  $ hg pull ../gitrepo1
+  pulling from ../gitrepo1
+  importing git objects into hg
+  (run 'hg heads' to see heads)
+  $ hg log --graph
+  o  changeset:   3:0a696ec0f478
+  |  bookmark:    branch2_bookmark
+  |  tag:         default/branch2
+  |  tag:         tip
+  |  parent:      1:600de9b6d498
+  |  user:        test <test@example.org>
+  |  date:        Mon Jan 01 00:00:13 2007 +0000
+  |  summary:     append f2
+  |
+  | o  changeset:   2:49db35e15e81
+  | |  bookmark:    branch1_bookmark
+  | |  tag:         default/branch1
+  | |  parent:      0:40a840c1f8ae
+  | |  user:        test <test@example.org>
+  | |  date:        Mon Jan 01 00:00:12 2007 +0000
+  | |  summary:     append f1
+  | |
+  @ |  changeset:   1:600de9b6d498
+  |/   branch:      branch2
+  |    user:        test
+  |    date:        Mon Jan 01 00:00:11 2007 +0000
+  |    summary:     add f2
+  |
+  o  changeset:   0:40a840c1f8ae
+     branch:      branch1
+     user:        test
+     date:        Mon Jan 01 00:00:10 2007 +0000
+     summary:     add f1
+  
+
+
+  $ cd ..
--- a/tests/test-conflict-1.t
+++ b/tests/test-conflict-1.t
@@ -50,6 +50,8 @@
   $ hg push -r master ../gitrepo
   pushing to ../gitrepo
   searching for changes
+  adding objects
+  added 4 commits with 3 trees and 3 blobs
   $ cd ..
 
   $ hg clone gitrepo hgrepo2 | grep -v '^updating'
--- a/tests/test-conflict-2.t
+++ b/tests/test-conflict-2.t
@@ -50,6 +50,8 @@
   $ hg push -r master ../gitrepo
   pushing to ../gitrepo
   searching for changes
+  adding objects
+  added 4 commits with 3 trees and 3 blobs
   $ cd ..
 
   $ hg clone gitrepo hgrepo2 | grep -v '^updating'
--- a/tests/test-convergedmerge.t
+++ b/tests/test-convergedmerge.t
@@ -51,6 +51,8 @@
   $ hg push -r master ../gitrepo
   pushing to ../gitrepo
   searching for changes
+  adding objects
+  added 5 commits with 3 trees and 3 blobs
   $ cd ..
 
   $ hg clone gitrepo hgrepo2 | grep -v '^updating'
--- a/tests/test-empty-working-tree.t
+++ b/tests/test-empty-working-tree.t
@@ -22,6 +22,8 @@
   $ hg push ../gitrepo2
   pushing to ../gitrepo2
   searching for changes
+  adding objects
+  added 1 commits with 1 trees and 0 blobs
   $ cd ..
   $ git --git-dir=gitrepo2 log --pretty=medium
   commit 678256865a8c85ae925bf834369264193c88f8de
--- a/tests/test-encoding.t
+++ b/tests/test-encoding.t
@@ -94,6 +94,8 @@
   $ hg push ../gitrepo2
   pushing to ../gitrepo2
   searching for changes
+  adding objects
+  added 4 commits with 4 trees and 4 blobs
 
   $ cd ..
 Latin1 commit messages started being automatically converted to UTF-8 in
--- a/tests/test-file-removal.t
+++ b/tests/test-file-removal.t
@@ -75,6 +75,8 @@
   $ hg push ../gitrepo2
   pushing to ../gitrepo2
   searching for changes
+  adding objects
+  added 5 commits with 6 trees and 3 blobs
 
   $ cd ..
   $ git --git-dir=gitrepo2 log --pretty=medium
--- a/tests/test-git-tags.t
+++ b/tests/test-git-tags.t
@@ -41,5 +41,8 @@
   $ hg push
   pushing to $TESTTMP/gitrepo
   searching for changes
+  adding objects
+  added 1 commits with 1 trees and 2 blobs
+  updating reference refs/heads/master
 
   $ cd ..
--- a/tests/test-hg-author.t
+++ b/tests/test-hg-author.t
@@ -24,12 +24,18 @@
   $ hg push
   pushing to $TESTTMP/gitrepo
   searching for changes
+  adding objects
+  added 1 commits with 1 trees and 2 blobs
+  updating reference refs/heads/master
 
   $ echo gamma >> beta
   $ fn_hg_commit -u "test <test@example.com> (comment)" -m 'modify beta'
   $ hg push
   pushing to $TESTTMP/gitrepo
   searching for changes
+  adding objects
+  added 1 commits with 1 trees and 2 blobs
+  updating reference refs/heads/master
 
   $ echo gamma > gamma
   $ hg add gamma
@@ -37,6 +43,9 @@
   $ hg push
   pushing to $TESTTMP/gitrepo
   searching for changes
+  adding objects
+  added 1 commits with 1 trees and 3 blobs
+  updating reference refs/heads/master
 
   $ echo delta > delta
   $ hg add delta
@@ -44,6 +53,9 @@
   $ hg push
   pushing to $TESTTMP/gitrepo
   searching for changes
+  adding objects
+  added 1 commits with 1 trees and 4 blobs
+  updating reference refs/heads/master
 
   $ echo epsilon > epsilon
   $ hg add epsilon
@@ -51,6 +63,9 @@
   $ hg push
   pushing to $TESTTMP/gitrepo
   searching for changes
+  adding objects
+  added 1 commits with 1 trees and 5 blobs
+  updating reference refs/heads/master
 
   $ echo zeta > zeta
   $ hg add zeta
@@ -58,6 +73,9 @@
   $ hg push
   pushing to $TESTTMP/gitrepo
   searching for changes
+  adding objects
+  added 1 commits with 1 trees and 6 blobs
+  updating reference refs/heads/master
 
   $ echo eta > eta
   $ hg add eta
@@ -65,6 +83,9 @@
   $ hg push
   pushing to $TESTTMP/gitrepo
   searching for changes
+  adding objects
+  added 1 commits with 1 trees and 7 blobs
+  updating reference refs/heads/master
 
   $ echo theta > theta
   $ hg add theta
@@ -72,6 +93,9 @@
   $ hg push
   pushing to $TESTTMP/gitrepo
   searching for changes
+  adding objects
+  added 1 commits with 1 trees and 8 blobs
+  updating reference refs/heads/master
 
   $ hg log --graph | egrep -v ': *(not-master|master)'
   @  changeset:   8:d3c51ce68cfd
--- a/tests/test-hg-branch.t
+++ b/tests/test-hg-branch.t
@@ -23,6 +23,9 @@
   $ hg push
   pushing to $TESTTMP/gitrepo
   searching for changes
+  adding objects
+  added 1 commits with 1 trees and 1 blobs
+  updating reference refs/heads/master
 
   $ hg branch gamma | grep -v 'permanent and global'
   marked working directory as branch gamma
@@ -30,6 +33,9 @@
   $ hg push
   pushing to $TESTTMP/gitrepo
   searching for changes
+  adding objects
+  added 1 commits with 1 trees and 1 blobs
+  updating reference refs/heads/master
 
   $ hg log --graph | egrep -v ': *(not-master|master)'
   @  changeset:   2:05aed681ccb3
new file mode 100644
--- /dev/null
+++ b/tests/test-hg-tags-invalid.t
@@ -0,0 +1,89 @@
+Load commonly used test logic
+  $ . "$TESTDIR/testutil"
+
+  $ git init gitrepo
+  Initialized empty Git repository in $TESTTMP/gitrepo/.git/
+  $ cd gitrepo
+  $ echo alpha > alpha
+  $ git add alpha
+  $ fn_git_commit -m "add alpha"
+  $ git checkout -b not-master
+  Switched to a new branch 'not-master'
+
+  $ cd ..
+  $ hg clone gitrepo hgrepo | grep -v '^updating'
+  importing git objects into hg
+  1 files updated, 0 files merged, 0 files removed, 0 files unresolved
+
+  $ cd hgrepo
+  $ hg co master
+  0 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  $ fn_hg_tag alph#a
+  $ fn_hg_tag bet*a
+  $ hg push
+  pushing to $TESTTMP/gitrepo
+  Skipping export of tag bet*a because it has invalid name as a git refname.
+  searching for changes
+  adding objects
+  added 2 commits with 2 trees and 3 blobs
+  adding reference refs/tags/alph#a
+  updating reference refs/heads/master
+
+  $ hg log --graph | egrep -v ': *(not-master|master)'
+  @  changeset:   2:e72bdd9ef5c0
+  |  tag:         default/master
+  |  tag:         tip
+  |  user:        test
+  |  date:        Mon Jan 01 00:00:12 2007 +0000
+  |  summary:     Added tag bet*a for changeset 432ce25d86bc
+  |
+  o  changeset:   1:432ce25d86bc
+  |  tag:         bet*a
+  |  user:        test
+  |  date:        Mon Jan 01 00:00:11 2007 +0000
+  |  summary:     Added tag alph#a for changeset 3442585be8a6
+  |
+  o  changeset:   0:3442585be8a6
+     tag:         alph#a
+     tag:         default/not-master
+     user:        test <test@example.org>
+     date:        Mon Jan 01 00:00:10 2007 +0000
+     summary:     add alpha
+  
+
+  $ cd ..
+  $ cd gitrepo
+git should have only the valid tag alph#a but have full commit log including the missing invalid bet*a tag commit
+  $ git tag -l
+  alph#a
+
+  $ cd ..
+  $ hg clone gitrepo hgrepo2 | grep -v '^updating'
+  importing git objects into hg
+  2 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  $ hg -R hgrepo2 log --graph | egrep -v ': *(not-master|master)'
+  @  changeset:   2:e72bdd9ef5c0
+  |  tag:         default/master
+  |  tag:         tip
+  |  user:        test
+  |  date:        Mon Jan 01 00:00:12 2007 +0000
+  |  summary:     Added tag bet*a for changeset 432ce25d86bc
+  |
+  o  changeset:   1:432ce25d86bc
+  |  tag:         bet*a
+  |  user:        test
+  |  date:        Mon Jan 01 00:00:11 2007 +0000
+  |  summary:     Added tag alph#a for changeset 3442585be8a6
+  |
+  o  changeset:   0:3442585be8a6
+     tag:         alph#a
+     tag:         default/not-master
+     user:        test <test@example.org>
+     date:        Mon Jan 01 00:00:10 2007 +0000
+     summary:     add alpha
+  
+
+the tag should be in .hgtags
+  $ cat hgrepo2/.hgtags
+  3442585be8a60c6cd476bbc4e45755339f2a23ef alph#a
+  432ce25d86bc4281747aa42e27b473b992e2b0b9 bet*a
--- a/tests/test-hg-tags.t
+++ b/tests/test-hg-tags.t
@@ -22,6 +22,10 @@
   $ hg push
   pushing to $TESTTMP/gitrepo
   searching for changes
+  adding objects
+  added 1 commits with 1 trees and 2 blobs
+  adding reference refs/tags/alpha
+  updating reference refs/heads/master
 
   $ hg log --graph | egrep -v ': *(not-master|master)'
   @  changeset:   1:d529e9229f6d
--- a/tests/test-merge.t
+++ b/tests/test-merge.t
@@ -42,6 +42,8 @@
   $ hg push ../gitrepo2
   pushing to ../gitrepo2
   searching for changes
+  adding objects
+  added 4 commits with 4 trees and 3 blobs
 
   $ cd ..
 git log in repo pushed from hg
--- a/tests/test-octopus.t
+++ b/tests/test-octopus.t
@@ -69,6 +69,8 @@
   $ hg push ../gitrepo2
   pushing to ../gitrepo2
   searching for changes
+  adding objects
+  added 5 commits with 5 trees and 4 blobs
   $ cd ..
 
   $ git --git-dir=gitrepo2 log --pretty=medium | sed s/\\.\\.\\.//g
--- a/tests/test-push.t
+++ b/tests/test-push.t
@@ -29,6 +29,9 @@
   $ hg push -r beta
   pushing to $TESTTMP/gitrepo
   searching for changes
+  adding objects
+  added 1 commits with 1 trees and 2 blobs
+  adding reference refs/heads/beta
 
   $ cd ..
 
@@ -100,6 +103,9 @@
   $ hg push -fr master
   pushing to $TESTTMP/gitrepo
   searching for changes
+  adding objects
+  added 1 commits with 1 trees and 3 blobs
+  updating reference refs/heads/master
 
 this should fail, no changes to push
 The exit code for this was broken in Mercurial (incorrectly returning 0) until
--- a/tests/test-subrepos.t
+++ b/tests/test-subrepos.t
@@ -38,7 +38,7 @@
   $ hg bookmarks -f -r default master
 1. Ensure gitlinks are transformed to .hgsubstate on hg <- git pull
 .hgsub shall list two [git] subrepos
-  $ cat .hgsub
+  $ cat .hgsub | sort
   subrepo1 = [git]../gitsubrepo
   xyz/subrepo2 = [git]../gitsubrepo
 .hgsubstate shall list two idenitcal revisions
@@ -72,6 +72,9 @@
   $ hg push
   pushing to $TESTTMP/gitrepo1
   searching for changes
+  adding objects
+  added 1 commits with 2 trees and 2 blobs
+  updating reference refs/heads/master
   $ cd ..
   $ cd gitrepo1
 there shall be two gitlink entries, with values matching that in .hgsubstate
@@ -100,11 +103,12 @@
   importing git objects into hg
   (run 'hg update' to get a working copy)
   $ hg checkout -C
+  updating to active bookmark master
   cloning subrepo hgsub from $TESTTMP/hgsub
   2 files updated, 0 files merged, 0 files removed, 0 files unresolved
   $ cd ..
 pull shall bring .hgsub entry which was added to the git repo
-  $ cat hgrepo/.hgsub
+  $ cat hgrepo/.hgsub | sort
   hgsub = $TESTTMP/hgsub
   subrepo1 = [git]../gitsubrepo
   xyz/subrepo2 = [git]../gitsubrepo
new file mode 100755
--- /dev/null
+++ b/tests/test-timezone.t
@@ -0,0 +1,32 @@
+This test shows how dulwich fails to convert a commit accepted by hg.
+
+In the real world case, it was a hand edit by the user to change the
+timezone field in an export. However, if it is good enough for hg, we
+have to make it good enough for git.
+
+Load commonly used test logic
+  $ . "$TESTDIR/testutil"
+
+  $ hg init hgrepo
+  $ cd hgrepo
+  $ touch beta
+  $ hg add beta
+  $ fn_hg_commit -m "test commit"
+  $ cat >patch2 <<EOF
+  > # HG changeset patch
+  > # User J. User <juser@example.com>
+  > # Date 1337962044 25201
+  > # Node ID 1111111111111111111111111111111111111111
+  > # Parent  0000000000000000000000000000000000000000
+  > Patch with sub-minute time zone
+  >
+  > diff --git a/alpha b/alpha
+  > new file mode 100644
+  > --- /dev/null
+  > +++ b/alpha
+  > @@ -0,0 +1,1 @@
+  > +alpha
+  > EOF
+  $ hg import patch2
+  applying patch2
+  $ hg gexport
--- a/tests/test-tree-decomposition.t
+++ b/tests/test-tree-decomposition.t
@@ -16,7 +16,7 @@
 
   $ rm -r d1
   $ echo c > d1
-  $ git add d1
+  $ git add --all d1
   $ fn_git_commit -m 'replace a dir with a file'
 
 
@@ -41,6 +41,8 @@
   $ hg push ../gitrepo2
   pushing to ../gitrepo2
   searching for changes
+  adding objects
+  added 3 commits with 6 trees and 3 blobs
   $ cd ..
 
   $ git --git-dir=gitrepo2 log --pretty=medium