changeset 673:fbfa6353d96c

hg2git: fix subrepo handling to be deterministic Previously, the correctness of _handle_subrepos was based on the order the files were processed in. For example, consider the case where a subrepo at location 'loc' is replaced with a file at 'loc', while another subrepo exists. This would cause .hgsubstate and .hgsub to be modified and the file added. If .hgsubstate was seen _before_ 'loc' in the modified/added loop, then _handle_subrepos would run and remove 'loc' correctly, before 'loc' was added back later. If, however, .hgsubstate was seen _after_ 'loc', then _handle_subrepos would run after 'loc' was added and would remove 'loc'. With this patch, _handle_subrepos merely computes the changes that need to be applied. The changes are then applied, making sure removed files and subrepos are processed before added ones. This was detected by setting a random PYTHONHASHSEED (in this case, 3910358828) and running the test suite against it. An upcoming patch will randomize the PYTHONHASHSEED in run-tests.py, just like is done in Mercurial.
author Siddharth Agarwal <sid0@fb.com>
date Wed, 19 Feb 2014 20:52:59 -0800
parents 71fb5dd678bc
children 0b33ab75e3cb
files hggit/hg2git.py
diffstat 1 files changed, 27 insertions(+), 18 deletions(-) [+]
line wrap: on
line diff
--- a/hggit/hg2git.py
+++ b/hggit/hg2git.py
@@ -120,27 +120,34 @@
         # only export those.
         dirty_trees = set()
 
-        # We first process file removals so we can prune dead trees.
+        subadded, subremoved = [], []
+
+        for s in modified, added, removed:
+            if '.hgsub' in s or '.hgsubstate' in s:
+                subadded, subremoved = self._handle_subrepos(newctx)
+                break
+
+        # We first process subrepo and file removals so we can prune dead trees.
+        for path in subremoved:
+            self._remove_path(path, dirty_trees)
+
         for path in removed:
-            if path == '.hgsubstate':
-                self._handle_subrepos(newctx, dirty_trees)
-                continue
-
-            if path == '.hgsub':
+            if path == '.hgsubstate' or path == '.hgsub':
                 continue
 
             self._remove_path(path, dirty_trees)
 
+        for path, sha in subadded:
+            d = os.path.dirname(path)
+            tree = self._dirs.setdefault(d, dulobjs.Tree())
+            dirty_trees.add(d)
+            tree.add(os.path.basename(path), dulobjs.S_IFGITLINK, sha)
+
         # For every file that changed or was added, we need to calculate the
         # corresponding Git blob and its tree entry. We emit the blob
         # immediately and update trees to be aware of its presence.
         for path in set(modified) | set(added):
-            # Handle special Mercurial paths.
-            if path == '.hgsubstate':
-                self._handle_subrepos(newctx, dirty_trees)
-                continue
-
-            if path == '.hgsub':
+            if path == '.hgsubstate' or path == '.hgsub':
                 continue
 
             d = os.path.dirname(path)
@@ -278,7 +285,7 @@
             # trees, this should hold true.
             parent_tree[os.path.basename(d)] = (stat.S_IFDIR, tree.id)
 
-    def _handle_subrepos(self, newctx, dirty_trees):
+    def _handle_subrepos(self, newctx):
         sub, substate = parse_subrepos(self._ctx)
         newsub, newsubstate = parse_subrepos(newctx)
 
@@ -296,6 +303,9 @@
         # git links without corresponding submodule paths are stored as subrepos
         # with a substate but without an entry in .hgsub.
 
+        # 'added' is both modified and added
+        added, removed = [], []
+
         def isgit(sub, path):
             return path not in sub or sub[path].startswith('[git]')
 
@@ -306,7 +316,7 @@
             # old = git
             if path not in newsubstate or not isgit(newsub, path):
                 # new = hg or no, case (2) or (3)
-                self._remove_path(path, dirty_trees)
+                removed.append(path)
 
         for path, sha in newsubstate.iteritems():
             if not isgit(newsub, path):
@@ -314,10 +324,9 @@
                 continue
 
             # case (1)
-            d = os.path.dirname(path)
-            dirty_trees.add(d)
-            tree = self._dirs.setdefault(d, dulobjs.Tree())
-            tree.add(os.path.basename(path), dulobjs.S_IFGITLINK, sha)
+            added.append((path, sha))
+
+        return added, removed
 
     @staticmethod
     def tree_entry(fctx, blob_cache):