changeset 50:d274092e3b24

Hacky implementation of file removals.
author Augie Fackler <durin42@gmail.com>
date Tue, 28 Apr 2009 19:33:03 -0700
parents 9f3f79a209d6
children 1421d04f1ad2
files dulwich/repo.py git_handler.py tests/test-file-removal tests/test-file-removal.out
diffstat 4 files changed, 110 insertions(+), 61 deletions(-) [+]
line wrap: on
line diff
--- a/dulwich/repo.py
+++ b/dulwich/repo.py
@@ -412,25 +412,41 @@
     # takes a commit and returns an array of the files that were changed
     # between that commit and it's parents
     def get_files_changed(self, commit):
-
         def filenames(basetree, comptree, prefix):
+            basefiles = set()
             changes = list()
             csha = None
             ctree = None
-            for (bmode, bname, bsha) in basetree.entries():
-                if bmode == 57344: # TODO : properly handle submodules
-                    continue
-                bobj = self.get_object(bsha)
-                if comptree:
-                    (cmode, csha) = comptree.entry(bname)
-                if csha != bsha:
-                    if isinstance (bobj, Blob):
-                        changes.append (prefix + bname)
-                    elif isinstance(bobj, Tree):
-                        if csha:
-                            ctree = self.get_object(csha)
-                        changes.extend (filenames (bobj, ctree, prefix + bname + '/'))
-            # TODO: handle removals?
+            if basetree:
+                for (bmode, bname, bsha) in basetree.entries():
+                    if bmode == 57344: # TODO : properly handle submodules
+                        continue
+                    basefiles.add(bname)
+                    bobj = self.get_object(bsha)
+                    if comptree:
+                        (cmode, csha) = comptree.entry(bname)
+                    if csha != bsha:
+                        if isinstance (bobj, Blob):
+                            changes.append (prefix + bname)
+                        elif isinstance(bobj, Tree):
+                            if csha:
+                                ctree = self.get_object(csha)
+                            changes.extend(filenames(bobj,
+                                                     ctree,
+                                                     prefix + bname + '/'))
+
+            # handle removals
+            if comptree:
+                for (bmode, bname, bsha, ) in comptree.entries():
+                    if bmode == 57344: # TODO: hande submodles
+                        continue
+                    if bname not in basefiles:
+                        bobj = self.get_object(bsha)
+                        if isinstance(bobj, Blob):
+                            changes.append(prefix + bname)
+                        elif isinstance(bobj, Tree):
+                            changes.extend(filenames(None, bobj,
+                                                     prefix + bname + '/'))
             return changes
 
         all_changes = list()
--- a/git_handler.py
+++ b/git_handler.py
@@ -14,7 +14,7 @@
     Tree,
     hex_to_sha
     )
-    
+
 import math
 
 def seconds_to_offset(time):
@@ -78,7 +78,7 @@
             return self._map_hg[hgsha]
         else:
             return None
-        
+
     def load_map(self):
         self._map_git = {}
         self._map_hg = {}
@@ -141,13 +141,13 @@
             print "URL for " + remote_name + " : " + name
         else:
             print "No remote named : " + remote_name
-        return 
+        return
 
     def remote_list(self):
         for key, value in self._config.iteritems():
             if key[0:6] == 'remote':
                 print key + "\t" + value
-            
+
     def remote_name_to_url(self, remote_name):
         return self._config['remote.' + remote_name + '.url']
 
@@ -155,25 +155,25 @@
         # TODO : if bookmarks exist, add them as git branches
         c = self.map_git_get(hex(self.repo.changelog.tip()))
         self.git.set_ref('refs/heads/master', c)
-        
+
     def export_git_objects(self):
         print "exporting git objects"
         for rev in self.repo.changelog:
             self.export_hg_commit(rev)
-                
+
     # convert this commit into git objects
     # go through the manifest, convert all blobs/trees we don't have
     # write the commit object (with metadata info)
     def export_hg_commit(self, rev):
         # return if we've already processed this
-        node = self.repo.changelog.lookup(rev)        
+        node = self.repo.changelog.lookup(rev)
         phgsha = hex(node)
         pgit_sha = self.map_git_get(phgsha)
         if pgit_sha:
             return pgit_sha
-        
+
         print "converting revision " + str(rev)
-        
+
         # make sure parents are converted first
         parents = self.repo.parents(rev)
         for parent in parents:
@@ -183,37 +183,37 @@
             if not p_rev == -1:
                 if not git_sha:
                     self.export_hg_commit(p_rev)
-        
+
         ctx = self.repo.changectx(rev)
         tree_sha = self.write_git_tree(ctx)
-        
+
         # TODO : something with tags?
         # TODO : explicit file renaming, copying?
-        
+
         commit = {}
         commit['tree'] = tree_sha
         (time, timezone) = ctx.date()
-        commit['author'] = ctx.user() + ' ' + str(int(time)) + ' ' + seconds_to_offset(timezone) 
+        commit['author'] = ctx.user() + ' ' + str(int(time)) + ' ' + seconds_to_offset(timezone)
         message = ctx.description()
         commit['message'] = ctx.description()
         commit['message'] += "\n\n--HG--\n"
         commit['message'] += "branch : " + ctx.branch() + "\n"
-        
+
         commit['parents'] = []
         for parent in parents:
             hgsha = hex(parent.node())
             git_sha = self.map_git_get(hgsha)
             if git_sha:
                 commit['parents'].append(git_sha)
-            
+
         commit_sha = self.git.write_commit_hash(commit) # writing new blobs to git
         self.map_set(commit_sha, phgsha)
         return commit_sha
-        
+
     def write_git_tree(self, ctx):
         trees = {}
         man = ctx.manifest()
-        for filenm in man.keys():            
+        for filenm in man.keys():
             # write blob if not in our git database
             fctx = ctx.filectx(filenm)
             is_exec = 'x' in fctx.flags()
@@ -248,7 +248,7 @@
                 if treeentry not in trees[parentpath]:
                     trees[parentpath].append( treeentry )
             else:
-                fileentry = ['blob', parts[0], blob_sha, is_exec, is_link]                
+                fileentry = ['blob', parts[0], blob_sha, is_exec, is_link]
                 if '/' not in trees:
                     trees['/'] = []
                 trees['/'].append(fileentry)
@@ -258,7 +258,7 @@
         dirs.sort(lambda a, b: len(b.split('/'))-len(a.split('/')))
         dirs.remove('/')
         dirs.append('/')
-        
+
         # write all the trees
         tree_sha = None
         tree_shas = {}
@@ -273,7 +273,7 @@
             tree_sha = self.git.write_tree_array(tree_data) # writing new trees to git
             tree_shas[dirnm] = tree_sha
         return tree_sha # should be the last root tree sha
-                
+
     def remote_head(self, remote_name):
         for head, sha in self.git.remote_refs(remote_name).iteritems():
             if head == 'HEAD':
@@ -293,30 +293,30 @@
 
     # TODO : for now, we'll just push all heads that match remote heads
     #        * we should have specified push, tracking branches and --all
-    # takes a dict of refs:shas from the server and returns what should be 
+    # takes a dict of refs:shas from the server and returns what should be
     # pushed up
     def get_changed_refs(self, refs):
         keys = refs.keys()
-        
+
         changed = []
-        if not keys: 
+        if not keys:
             return None
-            
+
         # TODO : this is a huge hack
         if keys[0] == 'capabilities^{}': # nothing on the server yet - first push
             changed.append(("0"*40, self.git.ref('master'), 'refs/heads/master'))
-            
+
         for ref_name in keys:
             parts = ref_name.split('/')
             if parts[0] == 'refs': # strip off 'refs/heads'
                 if parts[1] == 'heads':
                     head = "/".join([v for v in parts[2:]])
                     local_ref = self.git.ref(ref_name)
-                    if local_ref: 
+                    if local_ref:
                         if not local_ref == refs[ref_name]:
                             changed.append((refs[ref_name], local_ref, ref_name))
         return changed
-        
+
     # takes a list of shas the server wants and shas the server has
     # and generates a list of commit shas we need to push up
     def generate_pack_contents(self, want, have):
@@ -329,8 +329,8 @@
             else:
                 shas.append(next)
             next = graph_walker.next()
-            
-        # so now i have the shas, need to turn them into a list of 
+
+        # so now i have the shas, need to turn them into a list of
         # tuples (sha, path) for ALL the objects i'm sending
         # TODO : don't send blobs or trees they already have
         def get_objects(tree, path):
@@ -345,16 +345,16 @@
                 elif isinstance(obj, Tree):
                     changes.extend (get_objects (obj, path + name + '/'))
             return changes
-            
+
         objects = []
         for commit_sha in shas:
             commit = self.git.commit(commit_sha)
             objects.append((commit, 'commit'))
             tree = self.git.get_object(commit.tree)
             objects.extend( get_objects(tree, '/') )
-            
+
         return objects
-        
+
     def fetch_pack(self, remote_name):
         git_url = self.remote_name_to_url(remote_name)
         client, path = self.get_transport_and_path(git_url)
@@ -420,11 +420,15 @@
         print "importing: " + commit.id
         # TODO : look for HG metadata in the message and use it
         # TODO : add extra Git data (committer info) as extras to changeset
-        
+
         # TODO : (?) have to handle merge contexts at some point (two parent files, etc)
-        # TODO : throw IOError for removed files
+        # TODO : Do something less coarse-grained than try/except on the
+        #        get_file call for removed files
         def getfilectx(repo, memctx, f):
-            (e, sha, data) = self.git.get_file(commit, f)
+            try:
+                (e, sha, data) = self.git.get_file(commit, f)
+            except TypeError:
+                raise IOError()
             e = '' # TODO : make this a real mode
             return context.memfilectx(f, data, 'l' in e, 'x' in e, None)
 
@@ -483,7 +487,7 @@
     def clear(self):
         git_dir = self.repo.join('git')
         mapfile = self.repo.join('git-mapfile')
-        if os.path.exists(git_dir):        
+        if os.path.exists(git_dir):
             for root, dirs, files in os.walk(git_dir, topdown=False):
                 for name in files:
                     os.remove(os.path.join(root, name))
@@ -492,7 +496,7 @@
             os.rmdir(git_dir)
         if os.path.exists(mapfile):
             os.remove(mapfile)
-        
+
 
 ''
 """
--- a/tests/test-file-removal
+++ b/tests/test-file-removal
@@ -36,8 +36,14 @@
 echo beta > beta
 git add beta
 commit -m 'add beta'
+mkdir foo
+echo blah > foo/bar
+git add foo
+commit -m 'add foo'
 git rm alpha
 commit -m 'remove alpha'
+git rm foo/bar
+commit -m 'remove foo/bar'
 echo % final manifest in git is just beta
 git ls-files
 
@@ -53,7 +59,12 @@
 cd hgrepo
 hg log --graph
 
-echo % make sure that alpha is not in the manifest
+echo
+echo % make sure alpha is not in this manifest
+hg manifest -r 3
+
+echo
+echo % make sure that only beta is in the manifest
 hg manifest
 
 cd ..
--- a/tests/test-file-removal.out
+++ b/tests/test-file-removal.out
@@ -1,24 +1,37 @@
 Initialized empty Git repository in gitrepo/.git/
 
 rm 'alpha'
+rm 'foo/bar'
 % final manifest in git is just beta
 beta
 fetching from : origin
 exporting git objects
-Counting objects: 8, done.
-Compressing objects:  25% (1/4)   
Compressing objects:  50% (2/4)   
Compressing objects:  75% (3/4)   
Compressing objects: 100% (4/4)   
Compressing objects: 100% (4/4), done.
-Total 8 (delta 0), reused 0 (delta 0)
+Counting objects: 14, done.
+Compressing objects:  12% (1/8)   
Compressing objects:  25% (2/8)   
Compressing objects:  37% (3/8)   
Compressing objects:  50% (4/8)   
Compressing objects:  62% (5/8)   
Compressing objects:  75% (6/8)   
Compressing objects:  87% (7/8)   
Compressing objects: 100% (8/8)   
Compressing objects: 100% (8/8), done.
+Total 14 (delta 1), reused 0 (delta 0)
 importing Git objects into Hg
 importing: 7eeab2ea75ec1ac0ff3d500b5b6f8a3447dd7c03
 importing: 9497a4ee62e16ee641860d7677cdb2589ea15554
-importing: 8467011f7e1308f296cd58266804be0b7df43474
-2 files updated, 0 files merged, 0 files removed, 0 files unresolved
-@  changeset:   2:74a7df57f08e
+importing: f2d0d5bfa905e12dee728b509b96cf265bb6ee43
+importing: b0edaf0adac19392cf2867498b983bc5192b41dd
+importing: b991de8952c482a7cd51162674ffff8474862218
+1 files updated, 0 files merged, 0 files removed, 0 files unresolved
+@  changeset:   4:ea41a3f0ed10
 |  tag:         origin/master
 |  tag:         tip
 |  user:        test <test@example.org>
+|  date:        Mon Jan 01 00:00:14 2007 +0000
+|  summary:     remove foo/bar
+|
+o  changeset:   3:c84537f94bcc
+|  user:        test <test@example.org>
+|  date:        Mon Jan 01 00:00:13 2007 +0000
+|  summary:     remove alpha
+|
+o  changeset:   2:e25450e1354f
+|  user:        test <test@example.org>
 |  date:        Mon Jan 01 00:00:12 2007 +0000
-|  summary:     remove alpha
+|  summary:     add foo
 |
 o  changeset:   1:7bcd915dc873
 |  user:        test <test@example.org>
@@ -30,5 +43,10 @@
    date:        Mon Jan 01 00:00:10 2007 +0000
    summary:     add alpha
 
-% make sure that alpha is not in the manifest
+
+% make sure alpha is not in this manifest
 beta
+foo/bar
+
+% make sure that only beta is in the manifest
+beta