changeset 9:7e776864b301

sorts the commits topologically before converting
author Scott Chacon <schacon@gmail.com>
date Sat, 25 Apr 2009 20:56:03 -0700
parents 2548735d24ef
children 66860f141788
files TODO.txt dulwich/objects.py git_handler.py
diffstat 3 files changed, 139 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
new file mode 100644
--- /dev/null
+++ b/TODO.txt
@@ -0,0 +1,27 @@
+CLONE
+===========
+
+* update/add bookmarks
+* checkout the tip
+* limit to HEAD branch? (gh-pages makes weird import)
+
+* tag conversion
+
+FETCH
+===========
+
+* gfetch command
+
+PUSH
+==========
+
+* get a list of all the hg changesets not yet mapped
+* create git objects from each changeset (incl trees/blobs)
+* update mapfile with new changeset/commit mapping
+* connect to server pushing to
+  - figure out needs (use heads/bookmarks for haves)
+* create packfile with needed objects
+  - some delta compression if possible (?)
+* upload packfile, remove temp packfile
+
+* convert tags to git
--- a/dulwich/objects.py
+++ b/dulwich/objects.py
@@ -363,7 +363,10 @@
         return [(mode, name, hexsha) for (name, (mode, hexsha)) in self._entries.iteritems()]
 
     def entry(self, name):
-        return self._entries[name]
+        try:
+            return self._entries[name]
+        except:
+            return (None, None)
         
     def iteritems(self):
         for name in sorted(self._entries.keys()):
--- a/git_handler.py
+++ b/git_handler.py
@@ -58,7 +58,7 @@
         # import heads as remote references
         todo = []
         done = set()
-        convert_list = []
+        convert_list = {}
         
         # get a list of all the head shas
         for head, sha in self.git.heads().iteritems():
@@ -73,14 +73,15 @@
                 continue
             done.add(sha)
             commit = self.git.commit(sha)
-            convert_list.append(commit)
+            convert_list[sha] = commit
             todo.extend([p for p in commit.parents if p not in done])
         
         # sort the commits by commit date (TODO: change to topo sort)
-        convert_list.sort(cmp=lambda x,y: x.commit_time-y.commit_time)
+        commits = TopoSort(convert_list).items()
         
         # import each of the commits, oldest first
-        for commit in convert_list:
+        for csha in commits:
+            commit = convert_list[csha]
             self.import_git_commit(commit)
     
         # TODO : update Hg bookmarks (possibly named heads?)
@@ -109,8 +110,7 @@
             pass
 
         files = self.git.get_files_changed(commit)
-
-        print files
+        #print files
 
         # get a list of the changed, added, removed files
         extra = {}
@@ -147,3 +147,105 @@
                 return transport(host), "/"+path
         # if its not git or git+ssh, try a local url..
         return SubprocessGitClient(), uri
+
+
+"""
+   Tarjan's algorithm and topological sorting implementation in Python
+   by Paul Harrison
+   Public domain, do with it as you will
+"""
+class TopoSort(object):
+    
+    def __init__(self, commitdict):
+        self._sorted = self.robust_topological_sort(commitdict)
+        self._shas = []
+        for level in self._sorted:
+            self._shas.append(level[0])
+            
+    def items(self):
+        self._shas.reverse()
+        return self._shas
+        
+    def strongly_connected_components(self, graph):
+        """ Find the strongly connected components in a graph using
+            Tarjan's algorithm.
+
+            graph should be a dictionary mapping node names to
+            lists of successor nodes.
+            """
+
+        result = [ ]
+        stack = [ ]
+        low = { }
+
+        def visit(node):
+            if node in low: return
+
+            num = len(low)
+            low[node] = num
+            stack_pos = len(stack)
+            stack.append(node)
+
+            for successor in graph[node].parents:
+                visit(successor)
+                low[node] = min(low[node], low[successor])
+
+            if num == low[node]:
+                component = tuple(stack[stack_pos:])
+                del stack[stack_pos:]
+                result.append(component)
+                for item in component:
+                    low[item] = len(graph)
+
+        for node in graph:
+            visit(node)
+
+        return result
+
+
+    def topological_sort(self, graph):
+        count = { }
+        for node in graph:
+            count[node] = 0
+        for node in graph:
+            for successor in graph[node]:
+                count[successor] += 1
+
+        ready = [ node for node in graph if count[node] == 0 ]
+
+        result = [ ]
+        while ready:
+            node = ready.pop(-1)
+            result.append(node)
+
+            for successor in graph[node]:
+                count[successor] -= 1
+                if count[successor] == 0:
+                    ready.append(successor)
+
+        return result
+
+
+    def robust_topological_sort(self, graph):
+        """ First identify strongly connected components,
+            then perform a topological sort on these components. """
+
+        components = self.strongly_connected_components(graph)
+
+        node_component = { }
+        for component in components:
+            for node in component:
+                node_component[node] = component
+
+        component_graph = { }
+        for component in components:
+            component_graph[component] = [ ]
+
+        for node in graph:
+            node_c = node_component[node]
+            for successor in graph[node].parents:
+                successor_c = node_component[successor]
+                if node_c != successor_c:
+                    component_graph[node_c].append(successor_c) 
+
+        return self.topological_sort(component_graph)