# HG changeset patch # User Scott Chacon # Date 1241041932 25200 # Node ID bd8fa1a9288866fe471ae1c9b28e9de558f0018c # Parent eb05594fc3c8f243337eb714eaf9982ee82b3a16# Parent 52d45a331c92dd54d0f2376be68b08c9ec8c0e95 Merged in defunkts changes diff --git a/Makefile b/Makefile --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ cd tests && $(PYTHON) run-tests.py --with-hg=`which hg` $(TESTFLAGS) test-%: - cd tests && $(PYTHON) run-tests.py $(TESTFLAGS) $@ + cd tests && $(PYTHON) run-tests.py --with-hg=`which hg` $(TESTFLAGS) $@ .PHONY: help all local build doc clean install install-bin install-doc \ install-home install-home-bin install-home-doc dist dist-notests tests \ update-pot diff --git a/TODO.txt b/TODO.txt --- a/TODO.txt +++ b/TODO.txt @@ -1,7 +1,6 @@ GENERAL ========== -* respect file modes on conversions -* explicit file renames +* integrate as native protocol handler (hg push git://...) * more tests * submodules? * .gitignore, etc - try to convert? @@ -10,31 +9,26 @@ PUSH ========== * push with branch names (w/ proper error messages) -* update 'remote' references after push confirmation -* push confirmation? is there extra data after the packfile upload? -* output something after process is complete (master -> master etc) - - explain what branch mapping policy determined +* explain what branch mapping policy determined when updating refs * convert tags to git -* octopus merge explode re-implode FETCH =========== -* add removed files to filechanged and raise IOError -* gfetch command -* only try to import non-mapped commits * tag conversion -* save git metadata (committers, unless it's the same) as extra info -* octopus merge explode + +MAPPING ISSUES +============== +Created in Hg: +* explicit file renames +* named branches + +Created in Git: +* octopus merge explode/implode +* different committer in Git objects + SPEED/EFFICIENCY ================ * switch object mapping to hg->git since the many to one is that direction * don't send blobs/trees already on server - -MILESTONES -============= -* Hg<->Git<->Hg - - create a repo in Hg - - push to a Git server - - clone into another Hg client - - verify that the node ids are identical +* packfile creation benchmarking (seems to take a while sometimes) \ No newline at end of file diff --git a/__init__.py b/__init__.py --- a/__init__.py +++ b/__init__.py @@ -78,8 +78,10 @@ git = GitHandler(repo, ui) git.clear() -def gfetch(ui, repo): +def gfetch(ui, repo, remote_name='origin'): repo.ui.status(_("pulling from git url\n")) + git = GitHandler(repo, ui) + git.fetch(remote_name) commands.norepo += " gclone" cmdtable = { diff --git a/dulwich/client.py b/dulwich/client.py --- a/dulwich/client.py +++ b/dulwich/client.py @@ -36,7 +36,7 @@ from pack import ( write_pack_data, ) - +from objects import sha_to_hex def _fileno_can_read(fileno): return len(select.select([fileno], [], [], 0)[0]) > 0 @@ -65,10 +65,8 @@ return ret return None - CAPABILITIES = ["multi_ack", "side-band-64k", "ofs-delta"] - class GitClient(object): """Git smart server client. @@ -89,8 +87,8 @@ self.proto = Protocol(read, write, report_activity) self._can_read = can_read self._capabilities = list(CAPABILITIES) - if thin_packs: - self._capabilities.append("thin-pack") + #if thin_packs: + # self._capabilities.append("thin-pack") def capabilities(self): return " ".join(self._capabilities) @@ -118,26 +116,41 @@ if not changed_refs: print 'nothing changed' self.proto.write_pkt_line(None) - return - self.proto.write_pkt_line("%s %s %s\0%s" % (changed_refs[0][0], changed_refs[0][1], changed_refs[0][2], self.capabilities())) + return None + return_refs = copy.copy(changed_refs) + want = [] have = [] - for changed_ref in changed_refs[:]: - self.proto.write_pkt_line("%s %s %s" % changed_ref) + sent_capabilities = False + for changed_ref in changed_refs: + if sent_capabilities: + self.proto.write_pkt_line("%s %s %s" % changed_ref) + else: + self.proto.write_pkt_line("%s %s %s\0%s" % (changed_ref[0], changed_ref[1], changed_ref[2], self.capabilities())) + sent_capabilities = True want.append(changed_ref[1]) if changed_ref[0] != "0"*40: have.append(changed_ref[0]) self.proto.write_pkt_line(None) shas = generate_pack_contents(want, have) + # write packfile contents to a temp file (fd, tmppath) = tempfile.mkstemp(suffix=".pack") f = os.fdopen(fd, 'w') (entries, sha) = write_pack_data(f, shas, len(shas)) + # write that temp file to our filehandle f = open(tmppath, "r") self.proto.write_file(f) self.proto.write(sha) f.close() + + # read the final confirmation sha + sha = self.proto.read(20) + if sha: + print "CONFIRM: " + sha_to_hex(sha) + + return return_refs def fetch_pack(self, path, determine_wants, graph_walker, pack_data, progress): """Retrieve a pack from a git smart server. @@ -207,7 +220,7 @@ :param path: Path of the repository on the remote host """ self.proto.send_cmd("git-receive-pack", path, "host=%s" % self.host) - super(TCPGitClient, self).send_pack(path, changed_refs, generate_pack_contents) + return super(TCPGitClient, self).send_pack(path, changed_refs, generate_pack_contents) def fetch_pack(self, path, determine_wants, graph_walker, pack_data, progress): """Fetch a pack from the remote host. @@ -244,7 +257,7 @@ def send_pack(self, path, changed_refs, generate_pack_contents): client = self._connect("git-receive-pack", path) - client.send_pack(path, changed_refs, generate_pack_contents) + return client.send_pack(path, changed_refs, generate_pack_contents) def fetch_pack(self, path, determine_wants, graph_walker, pack_data, progress): @@ -300,7 +313,7 @@ def send_pack(self, path, changed_refs, generate_pack_contents): remote = get_ssh_vendor().connect_ssh(self.host, ["git-receive-pack '%s'" % path], port=self.port) client = GitClient(lambda: _fileno_can_read(remote.proc.stdout.fileno()), remote.recv, remote.send, *self._args, **self._kwargs) - client.send_pack(path, changed_refs, generate_pack_contents) + return client.send_pack(path, changed_refs, generate_pack_contents) def fetch_pack(self, path, determine_wants, graph_walker, pack_data, progress): remote = get_ssh_vendor().connect_ssh(self.host, ["git-upload-pack '%s'" % path], port=self.port) diff --git a/dulwich/object_store.py b/dulwich/object_store.py --- a/dulwich/object_store.py +++ b/dulwich/object_store.py @@ -176,7 +176,7 @@ :param path: Path to the pack file. """ p = PackData(path) - entries = p.sorted_entries() + entries = p.sorted_entries(self.get_raw) basename = os.path.join(self.pack_dir, "pack-%s" % iter_sha1(entry[0] for entry in entries)) write_pack_index_v2(basename+".idx", entries, p.get_stored_checksum()) diff --git a/dulwich/pack.py b/dulwich/pack.py --- a/dulwich/pack.py +++ b/dulwich/pack.py @@ -508,7 +508,6 @@ postponed = defaultdict(list) class Postpone(Exception): """Raised to postpone delta resolving.""" - def get_ref_text(sha): assert len(sha) == 20 if sha in found: diff --git a/dulwich/repo.py b/dulwich/repo.py --- a/dulwich/repo.py +++ b/dulwich/repo.py @@ -346,7 +346,7 @@ # takes a hash of the commit data # {'author': 'Scott Chacon 1240868341 -0700' # 'committer': 'Scott Chacon 1240868341 -0700', - # 'message': 'test commit two\n\n--HG EXTRAS--\nbranch : default\n', + # 'message': 'test commit two\n\n--HG--\nbranch : default\n', # 'tree': '36a63c12d097b487e4ed634c34d2f80870e64f68', # 'parents': ['ca82a6dff817ec66f44342007202690a93763949'], # } @@ -412,25 +412,42 @@ # takes a commit and returns an array of the files that were changed # between that commit and it's parents def get_files_changed(self, commit): - def filenames(basetree, comptree, prefix): + basefiles = set() changes = list() csha = None ctree = None - for (bmode, bname, bsha) in basetree.entries(): - if bmode == 57344: # TODO : properly handle submodules - continue - bobj = self.get_object(bsha) - if comptree: - (cmode, csha) = comptree.entry(bname) - if csha != bsha: - if isinstance (bobj, Blob): - changes.append (prefix + bname) - elif isinstance(bobj, Tree): - if csha: - ctree = self.get_object(csha) - changes.extend (filenames (bobj, ctree, prefix + bname + '/')) - # TODO: handle removals? + cmode = None + if basetree: + for (bmode, bname, bsha) in basetree.entries(): + if bmode == 57344: # TODO : properly handle submodules + continue + basefiles.add(bname) + bobj = self.get_object(bsha) + if comptree: + (cmode, csha) = comptree.entry(bname) + if not ((csha == bsha) and (cmode == bmode)): + if isinstance (bobj, Blob): + changes.append (prefix + bname) + elif isinstance(bobj, Tree): + if csha: + ctree = self.get_object(csha) + changes.extend(filenames(bobj, + ctree, + prefix + bname + '/')) + + # handle removals + if comptree: + for (bmode, bname, bsha, ) in comptree.entries(): + if bmode == 57344: # TODO: hande submodles + continue + if bname not in basefiles: + bobj = self.get_object(bsha) + if isinstance(bobj, Blob): + changes.append(prefix + bname) + elif isinstance(bobj, Tree): + changes.extend(filenames(None, bobj, + prefix + bname + '/')) return changes all_changes = list() diff --git a/git_handler.py b/git_handler.py --- a/git_handler.py +++ b/git_handler.py @@ -14,7 +14,7 @@ Tree, hex_to_sha ) - + import math def seconds_to_offset(time): @@ -78,7 +78,7 @@ return self._map_hg[hgsha] else: return None - + def load_map(self): self._map_git = {} self._map_hg = {} @@ -113,8 +113,9 @@ def fetch(self, remote_name): self.ui.status(_("fetching from : " + remote_name + "\n")) self.export_git_objects() - self.fetch_pack(remote_name) - self.import_git_objects(remote_name) + refs = self.fetch_pack(remote_name) + if refs: + self.import_git_objects(remote_name) self.save_map() def push(self, remote_name): @@ -141,13 +142,13 @@ print "URL for " + remote_name + " : " + name else: print "No remote named : " + remote_name - return + return def remote_list(self): for key, value in self._config.iteritems(): if key[0:6] == 'remote': print key + "\t" + value - + def remote_name_to_url(self, remote_name): return self._config['remote.' + remote_name + '.url'] @@ -155,25 +156,25 @@ # TODO : if bookmarks exist, add them as git branches c = self.map_git_get(hex(self.repo.changelog.tip())) self.git.set_ref('refs/heads/master', c) - + def export_git_objects(self): print "exporting git objects" for rev in self.repo.changelog: self.export_hg_commit(rev) - + # convert this commit into git objects # go through the manifest, convert all blobs/trees we don't have # write the commit object (with metadata info) def export_hg_commit(self, rev): # return if we've already processed this - node = self.repo.changelog.lookup(rev) + node = self.repo.changelog.lookup(rev) phgsha = hex(node) pgit_sha = self.map_git_get(phgsha) if pgit_sha: return pgit_sha - - print "converting revision " + str(rev) - + + self.ui.status("converting revision " + str(rev) + "\n") + # make sure parents are converted first parents = self.repo.parents(rev) for parent in parents: @@ -183,37 +184,44 @@ if not p_rev == -1: if not git_sha: self.export_hg_commit(p_rev) - + ctx = self.repo.changectx(rev) tree_sha = self.write_git_tree(ctx) - + # TODO : something with tags? # TODO : explicit file renaming, copying? - + commit = {} commit['tree'] = tree_sha (time, timezone) = ctx.date() - commit['author'] = ctx.user() + ' ' + str(int(time)) + ' ' + seconds_to_offset(timezone) + commit['author'] = ctx.user() + ' ' + str(int(time)) + ' ' + seconds_to_offset(timezone) message = ctx.description() commit['message'] = ctx.description() - commit['message'] += "\n\n--HG--\n" - commit['message'] += "branch : " + ctx.branch() + "\n" + # HG EXTRA INFORMATION + add_extras = False + if not ctx.branch() == 'default': + add_extras = True + + if add_extras: + commit['message'] += "\n\n--HG--\n" + commit['message'] += "branch : " + ctx.branch() + "\n" + commit['parents'] = [] for parent in parents: hgsha = hex(parent.node()) git_sha = self.map_git_get(hgsha) if git_sha: commit['parents'].append(git_sha) - + commit_sha = self.git.write_commit_hash(commit) # writing new blobs to git self.map_set(commit_sha, phgsha) return commit_sha - + def write_git_tree(self, ctx): trees = {} man = ctx.manifest() - for filenm in man.keys(): + for filenm in man.keys(): # write blob if not in our git database fctx = ctx.filectx(filenm) is_exec = 'x' in fctx.flags() @@ -226,14 +234,27 @@ parts = filenm.split('/') if len(parts) > 1: - # get filename and path for leading subdir filepath = parts[-1:][0] dirpath = "/".join([v for v in parts[0:-1]]) + '/' # get subdir name and path for parent dir - parentsub = parts[-2:][0] - parentpath = "/".join([v for v in parts[0:-2]]) + '/' + parpath = '/' + nparpath = '/' + for part in parts[0:-1]: + if nparpath == '/': + nparpath = part + '/' + else: + nparpath += part + '/' + + treeentry = ['tree', part + '/', nparpath] + + if parpath not in trees: + trees[parpath] = [] + if treeentry not in trees[parpath]: + trees[parpath].append( treeentry ) + + parpath = nparpath # set file entry fileentry = ['blob', filepath, blob_sha, is_exec, is_link] @@ -241,24 +262,18 @@ trees[dirpath] = [] trees[dirpath].append(fileentry) - # set directory entry - treeentry = ['tree', parentsub + '/', dirpath] - if parentpath not in trees: - trees[parentpath] = [] - if treeentry not in trees[parentpath]: - trees[parentpath].append( treeentry ) else: - fileentry = ['blob', parts[0], blob_sha, is_exec, is_link] + fileentry = ['blob', parts[0], blob_sha, is_exec, is_link] if '/' not in trees: trees['/'] = [] trees['/'].append(fileentry) - + # sort by tree depth, so we write the deepest trees first dirs = trees.keys() dirs.sort(lambda a, b: len(b.split('/'))-len(a.split('/'))) dirs.remove('/') dirs.append('/') - + # write all the trees tree_sha = None tree_shas = {} @@ -273,7 +288,7 @@ tree_sha = self.git.write_tree_array(tree_data) # writing new trees to git tree_shas[dirnm] = tree_sha return tree_sha # should be the last root tree sha - + def remote_head(self, remote_name): for head, sha in self.git.remote_refs(remote_name).iteritems(): if head == 'HEAD': @@ -286,37 +301,44 @@ changed = self.get_changed_refs genpack = self.generate_pack_contents try: - client.send_pack(path, changed, genpack) - # TODO : self.git.set_remote_refs(refs, remote_name) + self.ui.status("creating and sending data\n") + changed_refs = client.send_pack(path, changed, genpack) + if changed_refs: + new_refs = {} + for old, new, ref in changed_refs: + self.ui.status(" "+ remote_name + "::" + ref + " : GIT:" + old[0:8] + " => GIT:" + new[0:8] + "\n") + new_refs[ref] = new + self.git.set_remote_refs(new_refs, remote_name) + self.update_hg_bookmarks(remote_name) except: raise # TODO : for now, we'll just push all heads that match remote heads # * we should have specified push, tracking branches and --all - # takes a dict of refs:shas from the server and returns what should be + # takes a dict of refs:shas from the server and returns what should be # pushed up def get_changed_refs(self, refs): keys = refs.keys() - + changed = [] - if not keys: + if not keys: return None - + # TODO : this is a huge hack if keys[0] == 'capabilities^{}': # nothing on the server yet - first push changed.append(("0"*40, self.git.ref('master'), 'refs/heads/master')) - + for ref_name in keys: parts = ref_name.split('/') if parts[0] == 'refs': # strip off 'refs/heads' if parts[1] == 'heads': head = "/".join([v for v in parts[2:]]) local_ref = self.git.ref(ref_name) - if local_ref: + if local_ref: if not local_ref == refs[ref_name]: changed.append((refs[ref_name], local_ref, ref_name)) return changed - + # takes a list of shas the server wants and shas the server has # and generates a list of commit shas we need to push up def generate_pack_contents(self, want, have): @@ -329,8 +351,8 @@ else: shas.append(next) next = graph_walker.next() - - # so now i have the shas, need to turn them into a list of + + # so now i have the shas, need to turn them into a list of # tuples (sha, path) for ALL the objects i'm sending # TODO : don't send blobs or trees they already have def get_objects(tree, path): @@ -345,16 +367,16 @@ elif isinstance(obj, Tree): changes.extend (get_objects (obj, path + name + '/')) return changes - + objects = [] for commit_sha in shas: commit = self.git.commit(commit_sha) objects.append((commit, 'commit')) tree = self.git.get_object(commit.tree) objects.extend( get_objects(tree, '/') ) - + return objects - + def fetch_pack(self, remote_name): git_url = self.remote_name_to_url(remote_name) client, path = self.get_transport_and_path(git_url) @@ -365,7 +387,11 @@ refs = client.fetch_pack(path, determine_wants, graphwalker, f.write, sys.stdout.write) f.close() commit() - self.git.set_remote_refs(refs, remote_name) + if refs: + self.git.set_remote_refs(refs, remote_name) + else: + self.ui.status(_("nothing new on the server\n")) + return refs except: f.close() raise @@ -401,31 +427,47 @@ # import each of the commits, oldest first for csha in commits: - commit = convert_list[csha] - self.import_git_commit(commit) + if not self.map_hg_get(csha): + commit = convert_list[csha] + self.import_git_commit(commit) + + self.update_hg_bookmarks(remote_name) - # update Hg bookmarks - bms = {} - for head, sha in self.git.remote_refs(remote_name).iteritems(): - hgsha = hex_to_sha(self.map_hg_get(sha)) - if not head == 'HEAD': - bms[remote_name + '/' + head] = hgsha + def update_hg_bookmarks(self, remote_name): try: + bms = bookmarks.parse(self.repo) + for head, sha in self.git.remote_refs(remote_name).iteritems(): + hgsha = hex_to_sha(self.map_hg_get(sha)) + if not head == 'HEAD': + bms[remote_name + '/' + head] = hgsha bookmarks.write(self.repo, bms) except AttributeError: self.repo.ui.warn('creating bookmarks failed, do you have' ' bookmarks enabled?\n') - + + def convert_git_int_mode(self, mode): + convert = { + 33188: '', + 40960: 'l', + 33261: 'e'} + if mode in convert: + return convert[mode] + return '' + def import_git_commit(self, commit): print "importing: " + commit.id # TODO : look for HG metadata in the message and use it # TODO : add extra Git data (committer info) as extras to changeset - + # TODO : (?) have to handle merge contexts at some point (two parent files, etc) - # TODO : throw IOError for removed files + # TODO : Do something less coarse-grained than try/except on the + # get_file call for removed files def getfilectx(repo, memctx, f): - (e, sha, data) = self.git.get_file(commit, f) - e = '' # TODO : make this a real mode + try: + (mode, sha, data) = self.git.get_file(commit, f) + e = self.convert_git_int_mode(mode) + except TypeError: + raise IOError() return context.memfilectx(f, data, 'l' in e, 'x' in e, None) p1 = "0" * 40 @@ -457,12 +499,6 @@ gitsha = commit.id self.map_set(gitsha, p2) - def getfilectx(self, source, repo, memctx, f): - v = files[f] - data = source.getfile(f, v) - e = source.getmode(f, v) - return context.memfilectx(f, data, 'l' in e, 'x' in e, copies.get(f)) - def check_bookmarks(self): if self.ui.config('extensions', 'hgext.bookmarks') is not None: print "YOU NEED TO SETUP BOOKMARKS" @@ -483,7 +519,7 @@ def clear(self): git_dir = self.repo.join('git') mapfile = self.repo.join('git-mapfile') - if os.path.exists(git_dir): + if os.path.exists(git_dir): for root, dirs, files in os.walk(git_dir, topdown=False): for name in files: os.remove(os.path.join(root, name)) @@ -492,7 +528,7 @@ os.rmdir(git_dir) if os.path.exists(mapfile): os.remove(mapfile) - + '' """ diff --git a/tests/test-file-removal b/tests/test-file-removal --- a/tests/test-file-removal +++ b/tests/test-file-removal @@ -36,8 +36,14 @@ echo beta > beta git add beta commit -m 'add beta' +mkdir foo +echo blah > foo/bar +git add foo +commit -m 'add foo' git rm alpha commit -m 'remove alpha' +git rm foo/bar +commit -m 'remove foo/bar' echo % final manifest in git is just beta git ls-files @@ -53,7 +59,12 @@ cd hgrepo hg log --graph -echo % make sure that alpha is not in the manifest +echo +echo % make sure alpha is not in this manifest +hg manifest -r 3 + +echo +echo % make sure that only beta is in the manifest hg manifest cd .. diff --git a/tests/test-file-removal.out b/tests/test-file-removal.out --- a/tests/test-file-removal.out +++ b/tests/test-file-removal.out @@ -1,24 +1,37 @@ Initialized empty Git repository in gitrepo/.git/ rm 'alpha' +rm 'foo/bar' % final manifest in git is just beta beta fetching from : origin exporting git objects -Counting objects: 8, done. -Compressing objects: 25% (1/4) Compressing objects: 50% (2/4) Compressing objects: 75% (3/4) Compressing objects: 100% (4/4) Compressing objects: 100% (4/4), done. -Total 8 (delta 0), reused 0 (delta 0) +Counting objects: 14, done. +Compressing objects: 12% (1/8) Compressing objects: 25% (2/8) Compressing objects: 37% (3/8) Compressing objects: 50% (4/8) Compressing objects: 62% (5/8) Compressing objects: 75% (6/8) Compressing objects: 87% (7/8) Compressing objects: 100% (8/8) Compressing objects: 100% (8/8), done. +Total 14 (delta 1), reused 0 (delta 0) importing Git objects into Hg importing: 7eeab2ea75ec1ac0ff3d500b5b6f8a3447dd7c03 importing: 9497a4ee62e16ee641860d7677cdb2589ea15554 -importing: 8467011f7e1308f296cd58266804be0b7df43474 -2 files updated, 0 files merged, 0 files removed, 0 files unresolved -@ changeset: 2:74a7df57f08e +importing: f2d0d5bfa905e12dee728b509b96cf265bb6ee43 +importing: b0edaf0adac19392cf2867498b983bc5192b41dd +importing: b991de8952c482a7cd51162674ffff8474862218 +1 files updated, 0 files merged, 0 files removed, 0 files unresolved +@ changeset: 4:ea41a3f0ed10 | tag: origin/master | tag: tip | user: test +| date: Mon Jan 01 00:00:14 2007 +0000 +| summary: remove foo/bar +| +o changeset: 3:c84537f94bcc +| user: test +| date: Mon Jan 01 00:00:13 2007 +0000 +| summary: remove alpha +| +o changeset: 2:e25450e1354f +| user: test | date: Mon Jan 01 00:00:12 2007 +0000 -| summary: remove alpha +| summary: add foo | o changeset: 1:7bcd915dc873 | user: test @@ -30,5 +43,10 @@ date: Mon Jan 01 00:00:10 2007 +0000 summary: add alpha -% make sure that alpha is not in the manifest + +% make sure alpha is not in this manifest beta +foo/bar + +% make sure that only beta is in the manifest +beta