Mercurial > hg > hg-git
changeset 76:aa2dadf04144
fixed the topo sorting and added a unit test
author | Scott Chacon <schacon@gmail.com> |
---|---|
date | Fri, 01 May 2009 20:05:30 -0700 |
parents | 466ebe5a157d |
children | d7b8768d005a |
files | git_handler.py toposort.py unit-tests/topo-test.py |
diffstat | 3 files changed, 148 insertions(+), 152 deletions(-) [+] |
line wrap: on
line diff
--- a/git_handler.py +++ b/git_handler.py @@ -1,4 +1,5 @@ import os, errno, sys, time, datetime, pickle, copy +import toposort import dulwich from dulwich.repo import Repo from dulwich.client import SimpleFetchGraphWalker @@ -359,16 +360,14 @@ def generate_pack_contents(self, want, have): graph_walker = SimpleFetchGraphWalker(want, self.git.get_parents) next = graph_walker.next() - shas = set() + shas = [] while next: if next in have: graph_walker.ack(next) else: - shas.add(next) + shas.append(next) next = graph_walker.next() - seen = [] - # so now i have the shas, need to turn them into a list of # tuples (sha, path) for ALL the objects i'm sending # TODO : don't send blobs or trees they already have @@ -378,15 +377,11 @@ for (mode, name, sha) in tree.entries(): if mode == 57344: # TODO : properly handle submodules continue - if sha in seen: - continue - obj = self.git.get_object(sha) - seen.append(sha) - if isinstance(obj, Blob): + if isinstance (obj, Blob): changes.append((obj, path + name)) elif isinstance(obj, Tree): - changes.extend(get_objects (obj, path + name + '/')) + changes.extend (get_objects (obj, path + name + '/')) return changes objects = [] @@ -529,24 +524,11 @@ # TODO : map extra parents to the extras file pass - # if committer is different than author, add it to extra - extra = {} - if not ((commit.author == commit.committer) and (commit.author_time == commit.commit_time)): - cdate = datetime.datetime.fromtimestamp(commit.commit_time).strftime("%Y-%m-%d %H:%M:%S") - extra['committer'] = commit.committer - extra['commit_time'] = cdate + files = self.git.get_files_changed(commit) # get a list of the changed, added, removed files - files = self.git.get_files_changed(commit) - - # if this is a merge commit, don't list renamed files - # i'm really confused here - this is the only way my use case will - # work, but it seems really hacky - do I need to just remove renames - # from one of the parents? AARRGH! - if not (p2 == "0"*40): - for removefile in hg_renames.values(): - files.remove(removefile) - + extra = {} + # *TODO : if committer is different than author, add it to extra text = strip_message date = datetime.datetime.fromtimestamp(commit.author_time).strftime("%Y-%m-%d %H:%M:%S") ctx = context.memctx(self.repo, (p1, p2), text, files, getfilectx, @@ -588,129 +570,3 @@ os.rmdir(git_dir) if os.path.exists(mapfile): os.remove(mapfile) - - -'' -""" - Tarjan's algorithm and topological sorting implementation in Python - by Paul Harrison - Public domain, do with it as you will -""" -class TopoSort(object): - - def __init__(self, commitdict): - self._sorted = self.robust_topological_sort(commitdict) - self._shas = [] - for level in self._sorted: - for sha in level: - self._shas.append(sha) - - def items(self): - self._shas.reverse() - return self._shas - - def strongly_connected_components(self, G): - """Returns a list of strongly connected components in G. - - Uses Tarjan's algorithm with Nuutila's modifications. - Nonrecursive version of algorithm. - - References: - - R. Tarjan (1972). Depth-first search and linear graph algorithms. - SIAM Journal of Computing 1(2):146-160. - - E. Nuutila and E. Soisalon-Soinen (1994). - On finding the strongly connected components in a directed graph. - Information Processing Letters 49(1): 9-14. - - """ - preorder={} - lowlink={} - scc_found={} - scc_queue = [] - scc_list=[] - i=0 # Preorder counter - for source in G: - if source not in scc_found: - queue=[source] - while queue: - v=queue[-1] - if v not in preorder: - i=i+1 - preorder[v]=i - done=1 - v_nbrs=G[v] - for w in v_nbrs: - if w not in preorder: - queue.append(w) - done=0 - break - if done==1: - lowlink[v]=preorder[v] - for w in v_nbrs: - if w not in scc_found: - if preorder[w]>preorder[v]: - lowlink[v]=min([lowlink[v],lowlink[w]]) - else: - lowlink[v]=min([lowlink[v],preorder[w]]) - queue.pop() - if lowlink[v]==preorder[v]: - scc_found[v]=True - scc=[v] - while scc_queue and preorder[scc_queue[-1]]>preorder[v]: - k=scc_queue.pop() - scc_found[k]=True - scc.append(k) - scc_list.append(scc) - else: - scc_queue.append(v) - scc_list.sort(lambda x, y: cmp(len(y),len(x))) - return scc_list - - def topological_sort(self, graph): - count = { } - for node in graph: - count[node] = 0 - for node in graph: - for successor in graph[node]: - count[successor] += 1 - - ready = [ node for node in graph if count[node] == 0 ] - - result = [ ] - while ready: - node = ready.pop(-1) - result.append(node) - - for successor in graph[node]: - count[successor] -= 1 - if count[successor] == 0: - ready.append(successor) - - return result - - - def robust_topological_sort(self, graph): - """ First identify strongly connected components, - then perform a topological sort on these components. """ - - components = self.strongly_connected_components(graph) - - node_component = { } - for component in components: - for node in component: - node_component[node] = component - - component_graph = { } - for component in components: - component_graph[component] = [ ] - - for node in graph: - node_c = node_component[node] - for successor in graph[node].parents: - successor_c = node_component[successor] - if node_c != successor_c: - component_graph[node_c].append(successor_c) - - return self.topological_sort(component_graph)
new file mode 100644 --- /dev/null +++ b/toposort.py @@ -0,0 +1,102 @@ +'' +""" + Tarjan's algorithm and topological sorting implementation in Python + by Paul Harrison + Public domain, do with it as you will +""" +class TopoSort(object): + + def __init__(self, commitdict): + self._sorted = self.robust_topological_sort(commitdict) + self._shas = [] + for level in self._sorted: + for sha in level: + self._shas.append(sha) + + def items(self): + self._shas.reverse() + return self._shas + + def strongly_connected_components(self, graph): + """ Find the strongly connected components in a graph using + Tarjan's algorithm. + + graph should be a dictionary mapping node names to + lists of successor nodes. + """ + + result = [ ] + stack = [ ] + low = { } + + def visit(node): + if node in low: return + + num = len(low) + low[node] = num + stack_pos = len(stack) + stack.append(node) + + for successor in graph[node].parents(): + visit(successor) + low[node] = min(low[node], low[successor]) + + if num == low[node]: + component = tuple(stack[stack_pos:]) + del stack[stack_pos:] + result.append(component) + for item in component: + low[item] = len(graph) + + for node in graph: + visit(node) + + return result + + + def topological_sort(self, graph): + count = { } + for node in graph: + count[node] = 0 + for node in graph: + for successor in graph[node]: + count[successor] += 1 + + ready = [ node for node in graph if count[node] == 0 ] + + result = [ ] + while ready: + node = ready.pop(-1) + result.append(node) + + for successor in graph[node]: + count[successor] -= 1 + if count[successor] == 0: + ready.append(successor) + + return result + + + def robust_topological_sort(self, graph): + """ First identify strongly connected components, + then perform a topological sort on these components. """ + + components = self.strongly_connected_components(graph) + + node_component = { } + for component in components: + for node in component: + node_component[node] = component + + component_graph = { } + for component in components: + component_graph[component] = [ ] + + for node in graph: + node_c = node_component[node] + for successor in graph[node].parents(): + successor_c = node_component[successor] + if node_c != successor_c: + component_graph[node_c].append(successor_c) + + return self.topological_sort(component_graph)
new file mode 100644 --- /dev/null +++ b/unit-tests/topo-test.py @@ -0,0 +1,38 @@ +import random, sys +import unittest + +sys.path.append('../') + +import toposort + +class Ob: + def __init__(self, eyedee, parents): + self._id = eyedee + self._parents = parents + + def id(self): + return self._id + + def parents(self): + return self._parents + + +class TestTopoSorting(unittest.TestCase): + + def testsort(self): + data = { + 'f' : Ob('f', ['d', 'e']), + 'd' : Ob('d', ['b']), + 'e' : Ob('e', ['c', 'g']), + 'g' : Ob('g', ['c']), + 'c' : Ob('c', ['b', 'h']), + 'b' : Ob('b', ['a']), + 'h' : Ob('h', ['a']), + 'a' : Ob('a', []), + } + d = toposort.TopoSort(data).items() + sort = ['a', 'b', 'd', 'h', 'c', 'g', 'e', 'f'] + self.assertEquals(d, sort) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file