changeset 11:f2826f7b1ae5

sped up large imports significantly by caching parsed trees and sha_to_hexes
author Scott Chacon <schacon@gmail.com>
date Sun, 26 Apr 2009 11:44:28 -0700
parents 66860f141788
children 227b11d75844
files TODO.txt __init__.py dulwich/objects.py dulwich/repo.py git_handler.py
diffstat 5 files changed, 18 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/TODO.txt
+++ b/TODO.txt
@@ -5,6 +5,7 @@
 * update/add bookmarks
 * checkout the tip
 * limit to HEAD branch? (gh-pages makes weird import)
+* some sort of remote management
 
 * tag conversion
 
--- a/__init__.py
+++ b/__init__.py
@@ -14,16 +14,6 @@
 
 '''
 
-#
-# Stage One - use Git commands to do the import / pushes, all in one big uggo file
-#
-# Stage Two - implement the Git packfile generation and server communication
-#             in native Python, so we don't need Git locally and don't need
-#             to keep all the git repo data around.  We should just need a SHA
-#             mapping - since everything is append only in both systems it should
-#             be pretty simple to do. 
-#
-
 # just importing every damn thing because i don't know python that well
 # and I have no idea what I actually need
 from mercurial import util, repair, merge, cmdutil, commands, hg, url
@@ -55,7 +45,7 @@
     git.fetch(git_url)
     
     # checkout the tip
-    # hg.update(ui, dest_repo)
+    hg.update(dest_repo, None)
 
 def gpush(ui, repo):
     dest_repo.ui.status(_("pushing to git url\n"))
--- a/dulwich/objects.py
+++ b/dulwich/objects.py
@@ -50,10 +50,17 @@
     return dcomped
 
 
+# SC hacked this to keep a global dict of already hexed shas because the
+# import script calls this a bajillion times.  Will try to cache other areas
+# so this isn't called as much in the first place.
+already_hexed_shas = {}
 def sha_to_hex(sha):
     """Takes a string and returns the hex of the sha within"""
+    if sha in already_hexed_shas:
+        return already_hexed_shas[sha]
     hexsha = "".join(["%02x" % ord(c) for c in sha])
     assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % hexsha
+    already_hexed_shas[sha] = hexsha
     return hexsha
 
 
--- a/dulwich/repo.py
+++ b/dulwich/repo.py
@@ -296,8 +296,14 @@
     def commit(self, sha):
         return self._get_object(sha, Commit)
 
+    # we call this a lot on import, so we're caching it a bit
+    already_parsed_trees = {}
     def tree(self, sha):
-        return self._get_object(sha, Tree)
+        if sha in self.already_parsed_trees:
+            return self.already_parsed_trees[sha]
+        tree = self._get_object(sha, Tree)
+        self.already_parsed_trees[sha] = tree
+        return tree
 
     def tag(self, sha):
         return self._get_object(sha, Tag)
--- a/git_handler.py
+++ b/git_handler.py
@@ -159,7 +159,8 @@
         self._sorted = self.robust_topological_sort(commitdict)
         self._shas = []
         for level in self._sorted:
-            self._shas.append(level[0])
+            for sha in level:
+                self._shas.append(sha)
             
     def items(self):
         self._shas.reverse()