Mercurial > hg > hg-git
changeset 673:fbfa6353d96c
hg2git: fix subrepo handling to be deterministic
Previously, the correctness of _handle_subrepos was based on the order the
files were processed in. For example, consider the case where a subrepo at
location 'loc' is replaced with a file at 'loc', while another subrepo exists.
This would cause .hgsubstate and .hgsub to be modified and the file added.
If .hgsubstate was seen _before_ 'loc' in the modified/added loop, then
_handle_subrepos would run and remove 'loc' correctly, before 'loc' was added
back later. If, however, .hgsubstate was seen _after_ 'loc', then
_handle_subrepos would run after 'loc' was added and would remove 'loc'.
With this patch, _handle_subrepos merely computes the changes that need to be
applied. The changes are then applied, making sure removed files and subrepos
are processed before added ones.
This was detected by setting a random PYTHONHASHSEED (in this case, 3910358828)
and running the test suite against it. An upcoming patch will randomize the
PYTHONHASHSEED in run-tests.py, just like is done in Mercurial.
author | Siddharth Agarwal <sid0@fb.com> |
---|---|
date | Wed, 19 Feb 2014 20:52:59 -0800 |
parents | 71fb5dd678bc |
children | 0b33ab75e3cb |
files | hggit/hg2git.py |
diffstat | 1 files changed, 27 insertions(+), 18 deletions(-) [+] |
line wrap: on
line diff
--- a/hggit/hg2git.py +++ b/hggit/hg2git.py @@ -120,27 +120,34 @@ # only export those. dirty_trees = set() - # We first process file removals so we can prune dead trees. + subadded, subremoved = [], [] + + for s in modified, added, removed: + if '.hgsub' in s or '.hgsubstate' in s: + subadded, subremoved = self._handle_subrepos(newctx) + break + + # We first process subrepo and file removals so we can prune dead trees. + for path in subremoved: + self._remove_path(path, dirty_trees) + for path in removed: - if path == '.hgsubstate': - self._handle_subrepos(newctx, dirty_trees) - continue - - if path == '.hgsub': + if path == '.hgsubstate' or path == '.hgsub': continue self._remove_path(path, dirty_trees) + for path, sha in subadded: + d = os.path.dirname(path) + tree = self._dirs.setdefault(d, dulobjs.Tree()) + dirty_trees.add(d) + tree.add(os.path.basename(path), dulobjs.S_IFGITLINK, sha) + # For every file that changed or was added, we need to calculate the # corresponding Git blob and its tree entry. We emit the blob # immediately and update trees to be aware of its presence. for path in set(modified) | set(added): - # Handle special Mercurial paths. - if path == '.hgsubstate': - self._handle_subrepos(newctx, dirty_trees) - continue - - if path == '.hgsub': + if path == '.hgsubstate' or path == '.hgsub': continue d = os.path.dirname(path) @@ -278,7 +285,7 @@ # trees, this should hold true. parent_tree[os.path.basename(d)] = (stat.S_IFDIR, tree.id) - def _handle_subrepos(self, newctx, dirty_trees): + def _handle_subrepos(self, newctx): sub, substate = parse_subrepos(self._ctx) newsub, newsubstate = parse_subrepos(newctx) @@ -296,6 +303,9 @@ # git links without corresponding submodule paths are stored as subrepos # with a substate but without an entry in .hgsub. + # 'added' is both modified and added + added, removed = [], [] + def isgit(sub, path): return path not in sub or sub[path].startswith('[git]') @@ -306,7 +316,7 @@ # old = git if path not in newsubstate or not isgit(newsub, path): # new = hg or no, case (2) or (3) - self._remove_path(path, dirty_trees) + removed.append(path) for path, sha in newsubstate.iteritems(): if not isgit(newsub, path): @@ -314,10 +324,9 @@ continue # case (1) - d = os.path.dirname(path) - dirty_trees.add(d) - tree = self._dirs.setdefault(d, dulobjs.Tree()) - tree.add(os.path.basename(path), dulobjs.S_IFGITLINK, sha) + added.append((path, sha)) + + return added, removed @staticmethod def tree_entry(fctx, blob_cache):