changeset 283:90458271e374

use a simple toposort algorithm for DAG (post order from a DFS from the heads)
author Benoit Boissinot <benoit.boissinot@ens-lyon.org>
date Wed, 24 Feb 2010 17:21:23 +0100
parents 8655c071a15d
children 12cfa77a2ab0
files hggit/git_handler.py hggit/toposort.py tests/test-octopus.out tests/test-topo-sort.py tests/test-topo-sort.py.out
diffstat 5 files changed, 28 insertions(+), 229 deletions(-) [+]
line wrap: on
line diff
--- a/hggit/git_handler.py
+++ b/hggit/git_handler.py
@@ -1,5 +1,4 @@
 import os, math, urllib, re
-import toposort
 
 from dulwich.errors import HangupException
 from dulwich.index import commit_tree
@@ -371,19 +370,25 @@
                         todo.append(sha)
 
         # traverse the heads getting a list of all the unique commits
+        commits = []
+        seen = set(todo)
         while todo:
-            sha = todo.pop()
+            sha = todo[-1]
+            if sha in done:
+                todo.pop()
+                continue
             assert isinstance(sha, str)
-            if sha in done:
-                continue
-            done.add(sha)
             obj = self.git.get_object(sha)
             assert isinstance(obj, Commit)
-            convert_list[sha] = obj
-            todo.extend([p for p in obj.parents if p not in done])
-
-        # sort the commits
-        commits = toposort.TopoSort(convert_list).items()
+            for p in obj.parents:
+                if p not in done:
+                    todo.append(p)
+                    break
+            else:
+                commits.append(sha)
+                convert_list[sha] = obj
+                done.add(sha)
+                todo.pop()
 
         commits = [commit for commit in commits if not commit in self._map_git]
         # import each of the commits, oldest first
deleted file mode 100644
--- a/hggit/toposort.py
+++ /dev/null
@@ -1,159 +0,0 @@
-''
-"""
-   Tarjan's algorithm and topological sorting implementation in Python
-   by Paul Harrison
-   Public domain, do with it as you will
-"""
-class TopoSort(object):
-
-    def __init__(self, commitdict):
-        self._sorted = self.robust_topological_sort(commitdict)
-        self._shas = []
-        for level in self._sorted:
-            for sha in level:
-                self._shas.append(sha)
-
-    def items(self):
-        self._shas.reverse()
-        return self._shas
-
-    def strongly_connected_components(self, graph):
-        """ Find the strongly connected components in a graph using
-            Tarjan's algorithm.
-
-            graph should be a dictionary mapping node names to
-            lists of successor nodes.
-            """
-
-        result = [ ]
-        stack = [ ]
-        low = { }
-
-        def visit(node):
-            if node in low: return
-
-            num = len(low)
-            low[node] = num
-            stack_pos = len(stack)
-            stack.append(node)
-
-            for successor in graph[node].parents:
-                visit(successor)
-                low[node] = min(low[node], low[successor])
-
-            if num == low[node]:
-                component = tuple(stack[stack_pos:])
-                del stack[stack_pos:]
-                result.append(component)
-                for item in component:
-                    low[item] = len(graph)
-
-        for node in graph:
-            visit(node)
-
-        return result
-
-    def strongly_connected_components_non(self, G):
-        """Returns a list of strongly connected components in G.
-
-         Uses Tarjan's algorithm with Nuutila's modifications.
-         Nonrecursive version of algorithm.
-
-         References:
-
-          R. Tarjan (1972). Depth-first search and linear graph algorithms.
-          SIAM Journal of Computing 1(2):146-160.
-
-          E. Nuutila and E. Soisalon-Soinen (1994).
-          On finding the strongly connected components in a directed graph.
-          Information Processing Letters 49(1): 9-14.
-
-         """
-        preorder={}
-        lowlink={}
-        scc_found={}
-        scc_queue = []
-        scc_list=[]
-        i=0     # Preorder counter
-        for source in G:
-            if source not in scc_found:
-                queue=[source]
-                while queue:
-                    v=queue[-1]
-                    if v not in preorder:
-                        i=i+1
-                        preorder[v]=i
-                    done=1
-                    v_nbrs=G[v]
-                    for w in v_nbrs.parents:
-                        if w not in preorder:
-                            queue.append(w)
-                            done=0
-                            break
-                    if done==1:
-                        lowlink[v]=preorder[v]
-                        for w in v_nbrs.parents:
-                            if w not in scc_found:
-                                if preorder[w]>preorder[v]:
-                                    lowlink[v]=min([lowlink[v],lowlink[w]])
-                                else:
-                                    lowlink[v]=min([lowlink[v],preorder[w]])
-                        queue.pop()
-                        if lowlink[v]==preorder[v]:
-                            scc_found[v]=True
-                            scc=(v,)
-                            while scc_queue and preorder[scc_queue[-1]]>preorder[v]:
-                                k=scc_queue.pop()
-                                scc_found[k]=True
-                                scc.append(k)
-                            scc_list.append(scc)
-                        else:
-                            scc_queue.append(v)
-        scc_list.sort(lambda x, y: cmp(len(y),len(x)))
-        return scc_list
-
-    def topological_sort(self, graph):
-        count = { }
-        for node in graph:
-            count[node] = 0
-        for node in graph:
-            for successor in graph[node]:
-                count[successor] += 1
-
-        ready = [ node for node in graph if count[node] == 0 ]
-
-        result = [ ]
-        while ready:
-            node = ready.pop(-1)
-            result.append(node)
-
-            for successor in graph[node]:
-                count[successor] -= 1
-                if count[successor] == 0:
-                    ready.append(successor)
-
-        return result
-
-    def robust_topological_sort(self, graph):
-        """ First identify strongly connected components,
-            then perform a topological sort on these components. """
-
-        components = self.strongly_connected_components_non(graph)
-        
-        node_component = { }
-        for component in components:
-            for node in component:
-                node_component[node] = component
-
-        component_graph = { }
-        for component in components:
-            component_graph[component] = [ ]
-
-        for node in graph:
-            node_c = node_component[node]
-            for successor in graph[node].parents:
-                successor_c = node_component[successor]
-                if node_c != successor_c:
-                    component_graph[node_c].append(successor_c)
-
-        return self.topological_sort(component_graph)
--- a/tests/test-octopus.out
+++ b/tests/test-octopus.out
@@ -24,7 +24,7 @@
 |\   tag:         master
 | |  tag:         default/master
 | |  tag:         tip
-| |  parent:      1:1436150b86c2
+| |  parent:      3:1436150b86c2
 | |  parent:      4:7f6c791a169f
 | |  user:        test <test@example.org>
 | |  date:        Mon Jan 01 00:00:13 2007 +0000
@@ -32,31 +32,31 @@
 | |
 | o    changeset:   4:7f6c791a169f
 | |\   parent:      2:7bcd915dc873
-| | |  parent:      3:37c124f2d0a0
+| | |  parent:      1:37c124f2d0a0
 | | |  user:        test <test@example.org>
 | | |  date:        Mon Jan 01 00:00:13 2007 +0000
 | | |  summary:     Merge branches 'branch1' and 'branch2'
 | | |
-| | o  changeset:   3:37c124f2d0a0
-| | |  tag:         branch2
-| | |  tag:         default/branch2
+o | |  changeset:   3:1436150b86c2
 | | |  parent:      0:3442585be8a6
 | | |  user:        test <test@example.org>
-| | |  date:        Mon Jan 01 00:00:12 2007 +0000
-| | |  summary:     add gamma
+| | |  date:        Mon Jan 01 00:00:13 2007 +0000
+| | |  summary:     add delta
 | | |
-| o |  changeset:   2:7bcd915dc873
-| |/   tag:         branch1
++---o  changeset:   2:7bcd915dc873
+| |    tag:         branch1
 | |    tag:         default/branch1
 | |    parent:      0:3442585be8a6
 | |    user:        test <test@example.org>
 | |    date:        Mon Jan 01 00:00:11 2007 +0000
 | |    summary:     add beta
 | |
-o |  changeset:   1:1436150b86c2
-|/   user:        test <test@example.org>
-|    date:        Mon Jan 01 00:00:13 2007 +0000
-|    summary:     add delta
+| o  changeset:   1:37c124f2d0a0
+|/   tag:         branch2
+|    tag:         default/branch2
+|    user:        test <test@example.org>
+|    date:        Mon Jan 01 00:00:12 2007 +0000
+|    summary:     add gamma
 |
 o  changeset:   0:3442585be8a6
    user:        test <test@example.org>
deleted file mode 100755
--- a/tests/test-topo-sort.py
+++ /dev/null
@@ -1,45 +0,0 @@
-import random, os, sys
-
-sys.path.append(os.path.join(os.path.dirname(__file__), os.path.pardir))
-
-from hggit import toposort
-
-class Ob:
-    def __init__(self, eyedee, parents):
-        self._id = eyedee
-        self.parents = parents
-
-    def id(self):
-        return self._id
-
-#   f
-#  /\
-# e  \
-# |\  \
-# | g |
-# |/  |
-# c   d
-# |\ /
-# h b
-# |/
-# a
-
-def testsort():
-    data = {
-     'f' : Ob('f', ['d', 'e']),
-     'd' : Ob('d', ['b']),
-     'e' : Ob('e', ['c', 'g']),
-     'g' : Ob('g', ['c']),
-     'c' : Ob('c', ['b', 'h']),
-     'b' : Ob('b', ['a']),
-     'h' : Ob('h', ['a']),
-     'a' : Ob('a', []),
-    }
-    d = toposort.TopoSort(data).items()
-    sort = ['a', 'b', 'd', 'h', 'c', 'g', 'e', 'f']
-    print '%% should sort to %r' % (sort, )
-    print d
-
-
-if __name__ == '__main__':
-    testsort()
deleted file mode 100644
--- a/tests/test-topo-sort.py.out
+++ /dev/null
@@ -1,2 +0,0 @@
-% should sort to ['a', 'b', 'd', 'h', 'c', 'g', 'e', 'f']
-['a', 'b', 'd', 'h', 'c', 'g', 'e', 'f']