Mercurial > hg > hg-git

new file mode 100644
--- /dev/null
+++ b/DESIGN.txt
@@ -0,0 +1,41 @@
+GitHug, the Hg-Git Plugin
+=========================
+
+This plugin is designed to allow you to push to a Git server over the Git
+protocol and to pull from a Git based project.  All data is stored in Hg
+native format with a mapping table.  People collaborating in Git should not
+even be able to tell that you're using Hg to collaborate on their project.
+Nothing should be kept in the Git format except perhaps for caching.
+
+(Dulwich library)
+
+May need to use bookmarks extension to do everything better.
+
+* Cloning from a Git Repository *
+
+hg init, hg add remote, hg gfetch, hg checkout
+
+* Fetching from a Git Repository *
+
+hg gremote add (git-url)
+
+hg gfetch origin (like a hg pull)
+ - connects to server (upload-pack)
+ - gets a list of server shas
+ - sees what it does not have (maps all unmapped shas, checks list)
+ - requests needed shas
+ - fetches packfile
+    - explodes
+    - converts parent linages to hg changesets
+    - updates local parents
+
+* Pushing to a Git Repository *
+
+hg gpush origin
+  - maps all unmapped shas
+  - connects to server
+  - needs/haves
+  - creates packfile of git versions of everything the server needs
+  - transfers it, updates local references (bookmarks?)
+
+
--- a/__init__.py
+++ b/__init__.py
@@ -26,13 +26,14 @@

 # just importing every damn thing because i don't know python that well
 # and I have no idea what I actually need
-from mercurial import util, repair, merge, cmdutil, commands, error, hg, url
+from mercurial import util, repair, merge, cmdutil, commands, hg, url
 from mercurial import extensions, ancestor
 from mercurial.commands import templateopts
 from mercurial.node import nullrev, nullid, short
 from mercurial.i18n import _
-import os, errno
+import os, errno, sys
 import subprocess
+import dulwich

 def gclone(ui, git_url, hg_repo_path=None):
     ## TODO : add git_url as the default remote path
@@ -41,84 +42,54 @@
         if hg_repo_path.endswith('.git'):
             hg_repo_path = hg_repo_path[:-4]
         hg_repo_path += '-hg'
-    subprocess.call(['hg', 'init', hg_repo_path])
-    clone_git(git_url, hg_repo_path)
-    import_git_heads(hg_repo_path)
-
-    # check it out
-    oldwd = os.getcwd()
-    os.chdir(hg_repo_path)
-    subprocess.call(['hg', 'checkout'])
-    os.chdir(oldwd)
+    dest_repo = hg.repository(ui, hg_repo_path, create=True)

-def gpull(ui, repo, source='default', **opts):
-    """fetch from a git repo
-    """
-    lock = wlock = None
-    try:
-        lock = repo.lock()
-        wlock = repo.wlock()
-        ui.write("fetching from the remote\n")
-        git_fetch(git_path())
-        import_git_heads()
-        # do the pull
-    finally:
-        del lock, wlock
+    # make the git data directory
+    git_hg_path = os.path.join(hg_repo_path, '.hg', 'git')
+    os.mkdir(git_hg_path)
+    dulwich.repo.Repo.init_bare(git_hg_path)
+
+    # fetch the initial git data
+    git_fetch(dest_repo, git_url)
+
+    # checkout the tip
+    # hg.update(ui, dest_repo)

-def gpush(ui, repo, dest='default', **opts):
-    """push to a git repo
-    """
-    lock = wlock = None
-    try:
-        lock = repo.lock()
-        wlock = repo.wlock()
-        ui.write("pushing to the remote\n")
-        # do the push
-    finally:
-        del lock, wlock
+def gpush(ui, repo):
+    dest_repo.ui.status(_("pushing to git url\n"))
+
+def gpull(ui, repo):
+    dest_repo.ui.status(_("pulling from git url\n"))
+

-def git_path(hg_path=None):
-    if hg_path:
-      return os.path.join(hg_path, '.hg', 'git-remote')
-    else:
-      return os.path.join('.hg', 'git-remote')
-
-def clone_git(git_url, hg_path=None):
-    git_initialize(git_path(hg_path), git_url)
-    git_fetch(git_path(hg_path))
-
-def git_initialize(git_repo_path, git_url):
-    # TODO: implement this in pure python - should be strait-forward
-    oldwd = os.getcwd()
-    os.makedirs(git_repo_path)
-    os.chdir(git_repo_path)
-    subprocess.call(['git', '--bare', 'init'])
-    subprocess.call(['git', 'remote', 'add', 'origin', git_url])
-    os.chdir(oldwd)
+def git_fetch(dest_repo, git_url):
+    dest_repo.ui.status(_("fetching from git url\n"))
+    git_fetch_pack(dest_repo, git_url)

-def git_fetch(git_repo_path, remote='origin'):
-    # TODO: implement this in pure python
-    #       - we'll have to handle ssh and git
-    oldwd = os.getcwd()
-    os.chdir(git_repo_path)
-    subprocess.call(['git', 'fetch', remote])
-    os.chdir(oldwd)
-
-def git_push():
-    # find all the local changesets that aren't mapped
-    # create git commit object shas and map them
-    # stick those objects in a packfile and push them up (over ssh)
-    return 0
+def git_fetch_pack(dest_repo, git_url):
+    from dulwich.repo import Repo
+    from dulwich.client import SimpleFetchGraphWalker
+    client, path = get_transport_and_path(git_url)
+    git_dir = os.path.join(dest_repo.path, 'git')
+    r = Repo(git_dir)
+    graphwalker = SimpleFetchGraphWalker(r.heads().values(), r.get_parents)
+    f, commit = r.object_store.add_pack()
+    try:
+        client.fetch_pack(path, r.object_store.determine_wants_all, graphwalker, f.write, sys.stdout.write)
+        f.close()
+        commit()
+    except:
+        f.close()
+    raise

-def import_git_heads(hg_path=None):
-    # go through each branch
-      # add all commits we don't have locally
-      # write a SHA<->SHA mapping table
-      # update the local branches to match
-    if not hg_path:
-      hg_path = '.'
-    return subprocess.call(['hg', 'convert', git_path(hg_path), hg_path])
-
+def get_transport_and_path(uri):
+    from dulwich.client import TCPGitClient, SSHGitClient, SubprocessGitClient
+    for handler, transport in (("git://", TCPGitClient), ("git+ssh://", SSHGitClient)):
+        if uri.startswith(handler):
+            host, path = uri[len(handler):].split("/", 1)
+            return transport(host), "/"+path
+    # if its not git or git+ssh, try a local url..
+    return SubprocessGitClient(), uri

 commands.norepo += " gclone"
 cmdtable = {
new file mode 100644
--- /dev/null
+++ b/dulwich/__init__.py
@@ -0,0 +1,26 @@
+# __init__.py -- The git module of dulwich
+# Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
+# Copyright (C) 2008 Jelmer Vernooji <jelmer@samba.org>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2
+# of the License or (at your option) any later version of
+# the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA  02110-1301, USA.
+
+import client
+import protocol
+import repo
+import server
+
+__version__ = (0, 1, 1)
new file mode 100644
--- /dev/null
+++ b/dulwich/_objects.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2009 Jelmer Vernooij <jelmer@samba.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License or (at your option) a later version of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA  02110-1301, USA.
+ */
+
+#include <Python.h>
+
+#define hexbyte(x) (isdigit(x)?(x)-'0':(x)-'a'+0xa)
+#define bytehex(x) (((x)<0xa)?('0'+(x)):('a'-0xa+(x)))
+
+static PyObject *py_hex_to_sha(PyObject *self, PyObject *py_hexsha)
+{
+	char *hexsha;
+	char sha[20];
+	int i;
+
+	if (!PyString_Check(py_hexsha)) {
+		PyErr_SetString(PyExc_TypeError, "hex sha is not a string");
+		return NULL;
+	}
+
+	if (PyString_Size(py_hexsha) != 40) {
+		PyErr_SetString(PyExc_ValueError, "hex sha is not 40 bytes long");
+		return NULL;
+	}
+
+	hexsha = PyString_AsString(py_hexsha);
+
+	for (i = 0; i < 20; i++) {
+		sha[i] = (hexbyte(hexsha[i*2]) << 4) + hexbyte(hexsha[i*2+1]);
+	}
+
+	return PyString_FromStringAndSize(sha, 20);
+}
+
+static PyObject *py_sha_to_hex(PyObject *self, PyObject *py_sha)
+{
+	char hexsha[41];
+	unsigned char *sha;
+	int i;
+
+	if (!PyString_Check(py_sha)) {
+		PyErr_SetString(PyExc_TypeError, "sha is not a string");
+		return NULL;
+	}
+
+	if (PyString_Size(py_sha) != 20) {
+		PyErr_SetString(PyExc_ValueError, "sha is not 20 bytes long");
+		return NULL;
+	}
+
+	sha = (unsigned char *)PyString_AsString(py_sha);
+	for (i = 0; i < 20; i++) {
+		hexsha[i*2] = bytehex((sha[i] & 0xF0) >> 4);
+		hexsha[i*2+1] = bytehex(sha[i] & 0x0F);
+	}
+
+	return PyString_FromStringAndSize(hexsha, 40);
+}
+
+static PyMethodDef py_objects_methods[] = {
+	{ "hex_to_sha", (PyCFunction)py_hex_to_sha, METH_O, NULL },
+	{ "sha_to_hex", (PyCFunction)py_sha_to_hex, METH_O, NULL },
+};
+
+void init_objects(void)
+{
+	PyObject *m;
+
+	m = Py_InitModule3("_objects", py_objects_methods, NULL);
+	if (m == NULL)
+		return;
+}
new file mode 100644
--- /dev/null
+++ b/dulwich/_pack.c
@@ -0,0 +1,188 @@
+/*
+ * Copyright (C) 2009 Jelmer Vernooij <jelmer@samba.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License or (at your option) a later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA  02110-1301, USA.
+ */
+
+#include <Python.h>
+#include <stdint.h>
+
+static int py_is_sha(PyObject *sha)
+{
+    if (!PyString_Check(sha))
+        return 0;
+
+    if (PyString_Size(sha) != 20)
+        return 0;
+
+    return 1;
+}
+
+
+static size_t get_delta_header_size(uint8_t *delta, int *index, int length)
+{
+	size_t size = 0;
+	int i = 0;
+	while ((*index) < length) {
+		uint8_t cmd = delta[*index];
+		(*index)++;
+		size |= (cmd & ~0x80) << i;
+		i += 7;
+		if (!(cmd & 0x80))
+			break;
+	}
+	return size;
+}
+
+
+static PyObject *py_apply_delta(PyObject *self, PyObject *args)
+{
+	uint8_t *src_buf, *delta;
+	int src_buf_len, delta_len;
+	size_t src_size, dest_size;
+	size_t outindex = 0;
+	int index;
+	uint8_t *out;
+	PyObject *ret;
+
+	if (!PyArg_ParseTuple(args, "s#s#", (uint8_t *)&src_buf, &src_buf_len,
+						  (uint8_t *)&delta, &delta_len))
+		return NULL;
+
+    index = 0;
+    src_size = get_delta_header_size(delta, &index, delta_len);
+    if (src_size != src_buf_len) {
+		PyErr_Format(PyExc_ValueError,
+			"Unexpected source buffer size: %lu vs %d", src_size, src_buf_len);
+		return NULL;
+	}
+    dest_size = get_delta_header_size(delta, &index, delta_len);
+	ret = PyString_FromStringAndSize(NULL, dest_size);
+	if (ret == NULL) {
+		PyErr_NoMemory();
+		return NULL;
+	}
+	out = (uint8_t *)PyString_AsString(ret);
+    while (index < delta_len) {
+        char cmd = delta[index];
+        index++;
+        if (cmd & 0x80) {
+            size_t cp_off = 0, cp_size = 0;
+			int i;
+            for (i = 0; i < 4; i++) {
+                if (cmd & (1 << i)) {
+                    uint8_t x = delta[index];
+                    index++;
+                    cp_off |= x << (i * 8);
+				}
+			}
+            for (i = 0; i < 3; i++) {
+                if (cmd & (1 << (4+i))) {
+                    uint8_t x = delta[index];
+                    index++;
+                    cp_size |= x << (i * 8);
+				}
+			}
+            if (cp_size == 0)
+                cp_size = 0x10000;
+            if (cp_off + cp_size < cp_size ||
+                cp_off + cp_size > src_size ||
+                cp_size > dest_size)
+                break;
+			memcpy(out+outindex, src_buf+cp_off, cp_size);
+			outindex += cp_size;
+		} else if (cmd != 0) {
+			memcpy(out+outindex, delta+index, cmd);
+			outindex += cmd;
+            index += cmd;
+		} else {
+			PyErr_SetString(PyExc_ValueError, "Invalid opcode 0");
+			return NULL;
+		}
+	}
+
+    if (index != delta_len) {
+		PyErr_SetString(PyExc_ValueError, "delta not empty");
+		return NULL;
+	}
+
+	if (dest_size != outindex) {
+        PyErr_SetString(PyExc_ValueError, "dest size incorrect");
+		return NULL;
+	}
+
+    return ret;
+}
+
+static PyObject *py_bisect_find_sha(PyObject *self, PyObject *args)
+{
+    PyObject *unpack_name;
+    char *sha;
+    int sha_len;
+	int start, end;
+    if (!PyArg_ParseTuple(args, "iis#O", &start, &end,
+						  &sha, &sha_len, &unpack_name))
+        return NULL;
+
+    if (sha_len != 20) {
+        PyErr_SetString(PyExc_ValueError, "Sha is not 20 bytes long");
+        return NULL;
+    }
+    if (start > end) {
+        PyErr_SetString(PyExc_AssertionError, "start > end");
+        return NULL;
+    }
+
+    while (start <= end) {
+        PyObject *file_sha;
+        int i = (start + end)/2;
+        int cmp;
+        file_sha = PyObject_CallFunction(unpack_name, "i", i);
+        if (file_sha == NULL) {
+            return NULL;
+        }
+        if (!py_is_sha(file_sha)) {
+            PyErr_SetString(PyExc_TypeError, "unpack_name returned non-sha object");
+			Py_DECREF(file_sha);
+            return NULL;
+        }
+        cmp = memcmp(PyString_AsString(file_sha), sha, 20);
+		Py_DECREF(file_sha);
+        if (cmp < 0)
+            start = i + 1;
+        else if (cmp > 0)
+            end = i - 1;
+        else {
+			return PyInt_FromLong(i);
+        }
+    }
+    Py_RETURN_NONE;
+}
+
+
+static PyMethodDef py_pack_methods[] = {
+	{ "apply_delta", (PyCFunction)py_apply_delta, METH_VARARGS, NULL },
+    { "bisect_find_sha", (PyCFunction)py_bisect_find_sha, METH_VARARGS, NULL },
+};
+
+void init_pack(void)
+{
+	PyObject *m;
+
+	m = Py_InitModule3("_pack", py_pack_methods, NULL);
+	if (m == NULL)
+		return;
+}
new file mode 100644
--- /dev/null
+++ b/dulwich/client.py
@@ -0,0 +1,296 @@
+# server.py -- Implementation of the server side git protocols
+# Copyright (C) 2008 Jelmer Vernooij <jelmer@samba.org>
+# Copyright (C) 2008 John Carr
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# or (at your option) a later version of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA  02110-1301, USA.
+
+"""Client side support for the Git protocol."""
+
+__docformat__ = 'restructuredText'
+
+import os
+import select
+import socket
+import subprocess
+
+from protocol import (
+    Protocol,
+    TCP_GIT_PORT,
+    extract_capabilities,
+    )
+from pack import (
+    write_pack_data,
+    )
+
+
+def _fileno_can_read(fileno):
+    return len(select.select([fileno], [], [], 0)[0]) > 0
+
+
+class SimpleFetchGraphWalker(object):
+
+    def __init__(self, local_heads, get_parents):
+        self.heads = set(local_heads)
+        self.get_parents = get_parents
+        self.parents = {}
+
+    def ack(self, ref):
+        if ref in self.heads:
+            self.heads.remove(ref)
+        if ref in self.parents:
+            for p in self.parents[ref]:
+                self.ack(p)
+
+    def next(self):
+        if self.heads:
+            ret = self.heads.pop()
+            ps = self.get_parents(ret)
+            self.parents[ret] = ps
+            self.heads.update(ps)
+            return ret
+        return None
+
+
+CAPABILITIES = ["multi_ack", "side-band-64k", "ofs-delta"]
+
+
+class GitClient(object):
+    """Git smart server client.
+
+    """
+
+    def __init__(self, can_read, read, write, thin_packs=True,
+        report_activity=None):
+        """Create a new GitClient instance.
+
+        :param can_read: Function that returns True if there is data available
+            to be read.
+        :param read: Callback for reading data, takes number of bytes to read
+        :param write: Callback for writing data
+        :param thin_packs: Whether or not thin packs should be retrieved
+        :param report_activity: Optional callback for reporting transport
+            activity.
+        """
+        self.proto = Protocol(read, write, report_activity)
+        self._can_read = can_read
+        self._capabilities = list(CAPABILITIES)
+        if thin_packs:
+            self._capabilities.append("thin-pack")
+
+    def capabilities(self):
+        return " ".join(self._capabilities)
+
+    def read_refs(self):
+        server_capabilities = None
+        refs = {}
+        # Receive refs from server
+        for pkt in self.proto.read_pkt_seq():
+            (sha, ref) = pkt.rstrip("\n").split(" ", 1)
+            if server_capabilities is None:
+                (ref, server_capabilities) = extract_capabilities(ref)
+            refs[ref] = sha
+        return refs, server_capabilities
+
+    def send_pack(self, path, generate_pack_contents):
+        """Upload a pack to a remote repository.
+
+        :param path: Repository path
+        :param generate_pack_contents: Function that can return the shas of the
+            objects to upload.
+        """
+        refs, server_capabilities = self.read_refs()
+        changed_refs = [] # FIXME
+        if not changed_refs:
+            self.proto.write_pkt_line(None)
+            return
+        self.proto.write_pkt_line("%s %s %s\0%s" % (changed_refs[0][0], changed_refs[0][1], changed_refs[0][2], self.capabilities()))
+        want = []
+        have = []
+        for changed_ref in changed_refs[:]:
+            self.proto.write_pkt_line("%s %s %s" % changed_refs)
+            want.append(changed_refs[1])
+            if changed_refs[0] != "0"*40:
+                have.append(changed_refs[0])
+        self.proto.write_pkt_line(None)
+        shas = generate_pack_contents(want, have, None)
+        write_pack_data(self.write, shas, len(shas))
+
+    def fetch_pack(self, path, determine_wants, graph_walker, pack_data, progress):
+        """Retrieve a pack from a git smart server.
+
+        :param determine_wants: Callback that returns list of commits to fetch
+        :param graph_walker: Object with next() and ack().
+        :param pack_data: Callback called for each bit of data in the pack
+        :param progress: Callback for progress reports (strings)
+        """
+        (refs, server_capabilities) = self.read_refs()
+        wants = determine_wants(refs)
+        if not wants:
+            self.proto.write_pkt_line(None)
+            return
+        self.proto.write_pkt_line("want %s %s\n" % (wants[0], self.capabilities()))
+        for want in wants[1:]:
+            self.proto.write_pkt_line("want %s\n" % want)
+        self.proto.write_pkt_line(None)
+        have = graph_walker.next()
+        while have:
+            self.proto.write_pkt_line("have %s\n" % have)
+            if self._can_read():
+                pkt = self.proto.read_pkt_line()
+                parts = pkt.rstrip("\n").split(" ")
+                if parts[0] == "ACK":
+                    graph_walker.ack(parts[1])
+                    assert parts[2] == "continue"
+            have = graph_walker.next()
+        self.proto.write_pkt_line("done\n")
+        pkt = self.proto.read_pkt_line()
+        while pkt:
+            parts = pkt.rstrip("\n").split(" ")
+            if parts[0] == "ACK":
+                graph_walker.ack(pkt.split(" ")[1])
+            if len(parts) < 3 or parts[2] != "continue":
+                break
+            pkt = self.proto.read_pkt_line()
+        for pkt in self.proto.read_pkt_seq():
+            channel = ord(pkt[0])
+            pkt = pkt[1:]
+            if channel == 1:
+                pack_data(pkt)
+            elif channel == 2:
+                progress(pkt)
+            else:
+                raise AssertionError("Invalid sideband channel %d" % channel)
+
+
+class TCPGitClient(GitClient):
+    """A Git Client that works over TCP directly (i.e. git://)."""
+
+    def __init__(self, host, port=None, *args, **kwargs):
+        self._socket = socket.socket(type=socket.SOCK_STREAM)
+        if port is None:
+            port = TCP_GIT_PORT
+        self._socket.connect((host, port))
+        self.rfile = self._socket.makefile('rb', -1)
+        self.wfile = self._socket.makefile('wb', 0)
+        self.host = host
+        super(TCPGitClient, self).__init__(lambda: _fileno_can_read(self._socket.fileno()), self.rfile.read, self.wfile.write, *args, **kwargs)
+
+    def send_pack(self, path):
+        """Send a pack to a remote host.
+
+        :param path: Path of the repository on the remote host
+        """
+        self.proto.send_cmd("git-receive-pack", path, "host=%s" % self.host)
+        super(TCPGitClient, self).send_pack(path)
+
+    def fetch_pack(self, path, determine_wants, graph_walker, pack_data, progress):
+        """Fetch a pack from the remote host.
+
+        :param path: Path of the reposiutory on the remote host
+        :param determine_wants: Callback that receives available refs dict and
+            should return list of sha's to fetch.
+        :param graph_walker: GraphWalker instance used to find missing shas
+        :param pack_data: Callback for writing pack data
+        :param progress: Callback for writing progress
+        """
+        self.proto.send_cmd("git-upload-pack", path, "host=%s" % self.host)
+        super(TCPGitClient, self).fetch_pack(path, determine_wants, graph_walker, pack_data, progress)
+
+
+class SubprocessGitClient(GitClient):
+
+    def __init__(self, *args, **kwargs):
+        self.proc = None
+        self._args = args
+        self._kwargs = kwargs
+
+    def _connect(self, service, *args):
+        argv = [service] + list(args)
+        self.proc = subprocess.Popen(argv, bufsize=0,
+                                stdin=subprocess.PIPE,
+                                stdout=subprocess.PIPE)
+        def read_fn(size):
+            return self.proc.stdout.read(size)
+        def write_fn(data):
+            self.proc.stdin.write(data)
+            self.proc.stdin.flush()
+        return GitClient(lambda: _fileno_can_read(self.proc.stdout.fileno()), read_fn, write_fn, *args, **kwargs)
+
+    def send_pack(self, path):
+        client = self._connect("git-receive-pack", path)
+        client.send_pack(path)
+
+    def fetch_pack(self, path, determine_wants, graph_walker, pack_data,
+        progress):
+        client = self._connect("git-upload-pack", path)
+        client.fetch_pack(path, determine_wants, graph_walker, pack_data, progress)
+
+
+class SSHSubprocess(object):
+    """A socket-like object that talks to an ssh subprocess via pipes."""
+
+    def __init__(self, proc):
+        self.proc = proc
+
+    def send(self, data):
+        return os.write(self.proc.stdin.fileno(), data)
+
+    def recv(self, count):
+        return self.proc.stdout.read(count)
+
+    def close(self):
+        self.proc.stdin.close()
+        self.proc.stdout.close()
+        self.proc.wait()
+
+
+class SSHVendor(object):
+
+    def connect_ssh(self, host, command, username=None, port=None):
+        #FIXME: This has no way to deal with passwords..
+        args = ['ssh', '-x']
+        if port is not None:
+            args.extend(['-p', str(port)])
+        if username is not None:
+            host = "%s@%s" % (username, host)
+        args.append(host)
+        proc = subprocess.Popen(args + command,
+                                stdin=subprocess.PIPE,
+                                stdout=subprocess.PIPE)
+        return SSHSubprocess(proc)
+
+# Can be overridden by users
+get_ssh_vendor = SSHVendor
+
+
+class SSHGitClient(GitClient):
+
+    def __init__(self, host, port=None, *args, **kwargs):
+        self.host = host
+        self.port = port
+        self._args = args
+        self._kwargs = kwargs
+
+    def send_pack(self, path):
+        remote = get_ssh_vendor().connect_ssh(self.host, ["git-receive-pack %s" % path], port=self.port)
+        client = GitClient(lambda: _fileno_can_read(remote.proc.stdout.fileno()), remote.recv, remote.send, *self._args, **self._kwargs)
+        client.send_pack(path)
+
+    def fetch_pack(self, path, determine_wants, graph_walker, pack_data, progress):
+        remote = get_ssh_vendor().connect_ssh(self.host, ["git-upload-pack %s" % path], port=self.port)
+        client = GitClient(lambda: _fileno_can_read(remote.proc.stdout.fileno()), remote.recv, remote.send, *self._args, **self._kwargs)
+        client.fetch_pack(path, determine_wants, graph_walker, pack_data, progress)
+
new file mode 100644
--- /dev/null
+++ b/dulwich/errors.py
@@ -0,0 +1,105 @@
+# errors.py -- errors for dulwich
+# Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2
+# or (at your option) any later version of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA  02110-1301, USA.
+
+"""Dulwich-related exception classes and utility functions."""
+
+class ChecksumMismatch(Exception):
+    """A checksum didn't match the expected contents."""
+
+    def __init__(self, expected, got, extra=None):
+        self.expected = expected
+        self.got = got
+        self.extra = extra
+        if self.extra is None:
+            Exception.__init__(self,
+                "Checksum mismatch: Expected %s, got %s" % (expected, got))
+        else:
+            Exception.__init__(self,
+                "Checksum mismatch: Expected %s, got %s; %s" %
+                (expected, got, extra))
+
+
+class WrongObjectException(Exception):
+    """Baseclass for all the _ is not a _ exceptions on objects.
+
+    Do not instantiate directly.
+
+    Subclasses should define a _type attribute that indicates what
+    was expected if they were raised.
+    """
+
+    def __init__(self, sha, *args, **kwargs):
+        string = "%s is not a %s" % (sha, self._type)
+        Exception.__init__(self, string)
+
+
+class NotCommitError(WrongObjectException):
+    """Indicates that the sha requested does not point to a commit."""
+
+    _type = 'commit'
+
+
+class NotTreeError(WrongObjectException):
+    """Indicates that the sha requested does not point to a tree."""
+
+    _type = 'tree'
+
+
+class NotBlobError(WrongObjectException):
+    """Indicates that the sha requested does not point to a blob."""
+
+    _type = 'blob'
+
+
+class MissingCommitError(Exception):
+    """Indicates that a commit was not found in the repository"""
+
+    def __init__(self, sha, *args, **kwargs):
+        Exception.__init__(self, "%s is not in the revision store" % sha)
+
+
+class ObjectMissing(Exception):
+    """Indicates that a requested object is missing."""
+
+    def __init__(self, sha, *args, **kwargs):
+        Exception.__init__(self, "%s is not in the pack" % sha)
+
+
+class ApplyDeltaError(Exception):
+    """Indicates that applying a delta failed."""
+
+    def __init__(self, *args, **kwargs):
+        Exception.__init__(self, *args, **kwargs)
+
+
+class NotGitRepository(Exception):
+    """Indicates that no Git repository was found."""
+
+    def __init__(self, *args, **kwargs):
+        Exception.__init__(self, *args, **kwargs)
+
+
+class GitProtocolError(Exception):
+    """Git protocol exception."""
+
+    def __init__(self, *args, **kwargs):
+        Exception.__init__(self, *args, **kwargs)
+
+
+class HangupException(GitProtocolError):
+    """Hangup exception."""
new file mode 100644
--- /dev/null
+++ b/dulwich/index.py
@@ -0,0 +1,163 @@
+# index.py -- File parser/write for the git index file
+# Copryight (C) 2008 Jelmer Vernooij <jelmer@samba.org>
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2
+# of the License or (at your opinion) any later version of the license.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA  02110-1301, USA.
+
+"""Parser for the git index file format."""
+
+import struct
+
+def read_cache_time(f):
+    return struct.unpack(">LL", f.read(8))
+
+
+def write_cache_time(f, t):
+    if isinstance(t, int):
+        t = (t, 0)
+    f.write(struct.pack(">LL", *t))
+
+
+def read_cache_entry(f):
+    """Read an entry from a cache file.
+
+    :param f: File-like object to read from
+    :return: tuple with: inode, device, mode, uid, gid, size, sha, flags
+    """
+    beginoffset = f.tell()
+    ctime = read_cache_time(f)
+    mtime = read_cache_time(f)
+    (ino, dev, mode, uid, gid, size, sha, flags, ) = \
+        struct.unpack(">LLLLLL20sH", f.read(20 + 4 * 6 + 2))
+    name = ""
+    char = f.read(1)
+    while char != "\0":
+        name += char
+        char = f.read(1)
+    # Padding:
+    real_size = ((f.tell() - beginoffset + 7) & ~7)
+    f.seek(beginoffset + real_size)
+    return (name, ctime, mtime, ino, dev, mode, uid, gid, size, sha, flags)
+
+
+def write_cache_entry(f, entry):
+    """Write an index entry to a file.
+
+    :param f: File object
+    :param entry: Entry to write, tuple with:
+        (name, ctime, mtime, ino, dev, mode, uid, gid, size, sha, flags)
+    """
+    beginoffset = f.tell()
+    (name, ctime, mtime, ino, dev, mode, uid, gid, size, sha, flags) = entry
+    write_cache_time(f, ctime)
+    write_cache_time(f, mtime)
+    f.write(struct.pack(">LLLLLL20sH", ino, dev, mode, uid, gid, size, sha, flags))
+    f.write(name)
+    f.write(chr(0))
+    real_size = ((f.tell() - beginoffset + 7) & ~7)
+    f.write("\0" * ((beginoffset + real_size) - f.tell()))
+
+
+def read_index(f):
+    """Read an index file, yielding the individual entries."""
+    header = f.read(4)
+    if header != "DIRC":
+        raise AssertionError("Invalid index file header: %r" % header)
+    (version, num_entries) = struct.unpack(">LL", f.read(4 * 2))
+    assert version in (1, 2)
+    for i in range(num_entries):
+        yield read_cache_entry(f)
+
+
+def read_index_dict(f):
+    """Read an index file and return it as a dictionary.
+
+    :param f: File object to read from
+    """
+    ret = {}
+    for x in read_index(f):
+        ret[x[0]] = tuple(x[1:])
+    return ret
+
+
+def write_index(f, entries):
+    """Write an index file.
+
+    :param f: File-like object to write to
+    :param entries: Iterable over the entries to write
+    """
+    f.write("DIRC")
+    f.write(struct.pack(">LL", 2, len(entries)))
+    for x in entries:
+        write_cache_entry(f, x)
+
+
+def write_index_dict(f, entries):
+    """Write an index file based on the contents of a dictionary.
+
+    """
+    entries_list = []
+    for name in sorted(entries):
+        entries_list.append((name,) + tuple(entries[name]))
+    write_index(f, entries_list)
+
+
+class Index(object):
+
+    def __init__(self, filename):
+        self._filename = filename
+        self.clear()
+        self.read()
+
+    def write(self):
+        f = open(self._filename, 'w')
+        try:
+            write_index_dict(f, self._byname)
+        finally:
+            f.close()
+
+    def read(self):
+        f = open(self._filename, 'r')
+        try:
+            for x in read_index(f):
+
+                self[x[0]] = tuple(x[1:])
+        finally:
+            f.close()
+
+    def __len__(self):
+        return len(self._byname)
+
+    def __getitem__(self, name):
+        return self._byname[name]
+
+    def get_sha1(self, path):
+        return self[path][-2]
+
+    def clear(self):
+        self._byname = {}
+
+    def __setitem__(self, name, x):
+        assert isinstance(name, str)
+        assert len(x) == 10
+        # Remove the old entry if any
+        self._byname[name] = x
+
+    def iteritems(self):
+        return self._byname.iteritems()
+
+    def update(self, entries):
+        for name, value in entries.iteritems():
+            self[name] = value
new file mode 100644
--- /dev/null
+++ b/dulwich/lru_cache.py
@@ -0,0 +1,252 @@
+# Copyright (C) 2006, 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""A simple least-recently-used (LRU) cache."""
+
+from collections import deque
+
+
+class LRUCache(object):
+    """A class which manages a cache of entries, removing unused ones."""
+
+    def __init__(self, max_cache=100, after_cleanup_count=None):
+        self._cache = {}
+        self._cleanup = {}
+        self._queue = deque() # Track when things are accessed
+        self._refcount = {} # number of entries in self._queue for each key
+        self._update_max_cache(max_cache, after_cleanup_count)
+
+    def __contains__(self, key):
+        return key in self._cache
+
+    def __getitem__(self, key):
+        val = self._cache[key]
+        self._record_access(key)
+        return val
+
+    def __len__(self):
+        return len(self._cache)
+
+    def add(self, key, value, cleanup=None):
+        """Add a new value to the cache.
+
+        Also, if the entry is ever removed from the queue, call cleanup.
+        Passing it the key and value being removed.
+
+        :param key: The key to store it under
+        :param value: The object to store
+        :param cleanup: None or a function taking (key, value) to indicate
+                        'value' sohuld be cleaned up.
+        """
+        if key in self._cache:
+            self._remove(key)
+        self._cache[key] = value
+        if cleanup is not None:
+            self._cleanup[key] = cleanup
+        self._record_access(key)
+
+        if len(self._cache) > self._max_cache:
+            # Trigger the cleanup
+            self.cleanup()
+
+    def get(self, key, default=None):
+        if key in self._cache:
+            return self[key]
+        return default
+
+    def keys(self):
+        """Get the list of keys currently cached.
+
+        Note that values returned here may not be available by the time you
+        request them later. This is simply meant as a peak into the current
+        state.
+
+        :return: An unordered list of keys that are currently cached.
+        """
+        return self._cache.keys()
+
+    def cleanup(self):
+        """Clear the cache until it shrinks to the requested size.
+
+        This does not completely wipe the cache, just makes sure it is under
+        the after_cleanup_count.
+        """
+        # Make sure the cache is shrunk to the correct size
+        while len(self._cache) > self._after_cleanup_count:
+            self._remove_lru()
+        # No need to compact the queue at this point, because the code that
+        # calls this would have already triggered it based on queue length
+
+    def __setitem__(self, key, value):
+        """Add a value to the cache, there will be no cleanup function."""
+        self.add(key, value, cleanup=None)
+
+    def _record_access(self, key):
+        """Record that key was accessed."""
+        self._queue.append(key)
+        # Can't use setdefault because you can't += 1 the result
+        self._refcount[key] = self._refcount.get(key, 0) + 1
+
+        # If our access queue is too large, clean it up too
+        if len(self._queue) > self._compact_queue_length:
+            self._compact_queue()
+
+    def _compact_queue(self):
+        """Compact the queue, leaving things in sorted last appended order."""
+        new_queue = deque()
+        for item in self._queue:
+            if self._refcount[item] == 1:
+                new_queue.append(item)
+            else:
+                self._refcount[item] -= 1
+        self._queue = new_queue
+        # All entries should be of the same size. There should be one entry in
+        # queue for each entry in cache, and all refcounts should == 1
+        if not (len(self._queue) == len(self._cache) ==
+                len(self._refcount) == sum(self._refcount.itervalues())):
+            raise AssertionError()
+
+    def _remove(self, key):
+        """Remove an entry, making sure to maintain the invariants."""
+        cleanup = self._cleanup.pop(key, None)
+        val = self._cache.pop(key)
+        if cleanup is not None:
+            cleanup(key, val)
+        return val
+
+    def _remove_lru(self):
+        """Remove one entry from the lru, and handle consequences.
+
+        If there are no more references to the lru, then this entry should be
+        removed from the cache.
+        """
+        key = self._queue.popleft()
+        self._refcount[key] -= 1
+        if not self._refcount[key]:
+            del self._refcount[key]
+            self._remove(key)
+
+    def clear(self):
+        """Clear out all of the cache."""
+        # Clean up in LRU order
+        while self._cache:
+            self._remove_lru()
+
+    def resize(self, max_cache, after_cleanup_count=None):
+        """Change the number of entries that will be cached."""
+        self._update_max_cache(max_cache,
+                               after_cleanup_count=after_cleanup_count)
+
+    def _update_max_cache(self, max_cache, after_cleanup_count=None):
+        self._max_cache = max_cache
+        if after_cleanup_count is None:
+            self._after_cleanup_count = self._max_cache * 8 / 10
+        else:
+            self._after_cleanup_count = min(after_cleanup_count, self._max_cache)
+
+        self._compact_queue_length = 4*self._max_cache
+        if len(self._queue) > self._compact_queue_length:
+            self._compact_queue()
+        self.cleanup()
+
+
+class LRUSizeCache(LRUCache):
+    """An LRUCache that removes things based on the size of the values.
+
+    This differs in that it doesn't care how many actual items there are,
+    it just restricts the cache to be cleaned up after so much data is stored.
+
+    The values that are added must support len(value).
+    """
+
+    def __init__(self, max_size=1024*1024, after_cleanup_size=None,
+                 compute_size=None):
+        """Create a new LRUSizeCache.
+
+        :param max_size: The max number of bytes to store before we start
+            clearing out entries.
+        :param after_cleanup_size: After cleaning up, shrink everything to this
+            size.
+        :param compute_size: A function to compute the size of the values. We
+            use a function here, so that you can pass 'len' if you are just
+            using simple strings, or a more complex function if you are using
+            something like a list of strings, or even a custom object.
+            The function should take the form "compute_size(value) => integer".
+            If not supplied, it defaults to 'len()'
+        """
+        self._value_size = 0
+        self._compute_size = compute_size
+        if compute_size is None:
+            self._compute_size = len
+        # This approximates that texts are > 0.5k in size. It only really
+        # effects when we clean up the queue, so we don't want it to be too
+        # large.
+        self._update_max_size(max_size, after_cleanup_size=after_cleanup_size)
+        LRUCache.__init__(self, max_cache=max(int(max_size/512), 1))
+
+    def add(self, key, value, cleanup=None):
+        """Add a new value to the cache.
+
+        Also, if the entry is ever removed from the queue, call cleanup.
+        Passing it the key and value being removed.
+
+        :param key: The key to store it under
+        :param value: The object to store
+        :param cleanup: None or a function taking (key, value) to indicate
+                        'value' sohuld be cleaned up.
+        """
+        if key in self._cache:
+            self._remove(key)
+        value_len = self._compute_size(value)
+        if value_len >= self._after_cleanup_size:
+            return
+        self._value_size += value_len
+        self._cache[key] = value
+        if cleanup is not None:
+            self._cleanup[key] = cleanup
+        self._record_access(key)
+
+        if self._value_size > self._max_size:
+            # Time to cleanup
+            self.cleanup()
+
+    def cleanup(self):
+        """Clear the cache until it shrinks to the requested size.
+
+        This does not completely wipe the cache, just makes sure it is under
+        the after_cleanup_size.
+        """
+        # Make sure the cache is shrunk to the correct size
+        while self._value_size > self._after_cleanup_size:
+            self._remove_lru()
+
+    def _remove(self, key):
+        """Remove an entry, making sure to maintain the invariants."""
+        val = LRUCache._remove(self, key)
+        self._value_size -= self._compute_size(val)
+
+    def resize(self, max_size, after_cleanup_size=None):
+        """Change the number of bytes that will be cached."""
+        self._update_max_size(max_size, after_cleanup_size=after_cleanup_size)
+        max_cache = max(int(max_size/512), 1)
+        self._update_max_cache(max_cache)
+
+    def _update_max_size(self, max_size, after_cleanup_size=None):
+        self._max_size = max_size
+        if after_cleanup_size is None:
+            self._after_cleanup_size = self._max_size * 8 / 10
+        else:
+            self._after_cleanup_size = min(after_cleanup_size, self._max_size)
new file mode 100644
--- /dev/null
+++ b/dulwich/misc.py
@@ -0,0 +1,90 @@
+# misc.py -- For dealing with python2.4 oddness
+# Copyright (C) 2008 Canonical Ltd.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2
+# of the License or (at your option) a later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA  02110-1301, USA.
+"""Misc utilities to work with python2.4.
+
+These utilities can all be deleted when dulwich decides it wants to stop
+support for python 2.4.
+"""
+try:
+    import hashlib
+except ImportError:
+    import sha
+import struct
+
+
+class defaultdict(dict):
+    """A python 2.4 equivalent of collections.defaultdict."""
+
+    def __init__(self, default_factory=None, *a, **kw):
+        if (default_factory is not None and
+            not hasattr(default_factory, '__call__')):
+            raise TypeError('first argument must be callable')
+        dict.__init__(self, *a, **kw)
+        self.default_factory = default_factory
+
+    def __getitem__(self, key):
+        try:
+            return dict.__getitem__(self, key)
+        except KeyError:
+            return self.__missing__(key)
+
+    def __missing__(self, key):
+        if self.default_factory is None:
+            raise KeyError(key)
+        self[key] = value = self.default_factory()
+        return value
+
+    def __reduce__(self):
+        if self.default_factory is None:
+            args = tuple()
+        else:
+            args = self.default_factory,
+        return type(self), args, None, None, self.items()
+
+    def copy(self):
+        return self.__copy__()
+
+    def __copy__(self):
+        return type(self)(self.default_factory, self)
+
+    def __deepcopy__(self, memo):
+        import copy
+        return type(self)(self.default_factory,
+                          copy.deepcopy(self.items()))
+    def __repr__(self):
+        return 'defaultdict(%s, %s)' % (self.default_factory,
+                                        dict.__repr__(self))
+
+
+def make_sha(source=''):
+    """A python2.4 workaround for the sha/hashlib module fiasco."""
+    try:
+        return hashlib.sha1(source)
+    except NameError:
+        sha1 = sha.sha(source)
+        return sha1
+
+
+def unpack_from(fmt, buf, offset=0):
+    """A python2.4 workaround for struct missing unpack_from."""
+    try:
+        return struct.unpack_from(fmt, buf, offset)
+    except AttributeError:
+        b = buf[offset:offset+struct.calcsize(fmt)]
+        return struct.unpack(fmt, b)
+
new file mode 100644
--- /dev/null
+++ b/dulwich/object_store.py
@@ -0,0 +1,306 @@
+# object_store.py -- Object store for git objects
+# Copyright (C) 2008 Jelmer Vernooij <jelmer@samba.org>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# or (at your option) a later version of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA  02110-1301, USA.
+
+import os
+import tempfile
+import urllib2
+
+from errors import (
+    NotTreeError,
+    )
+from objects import (
+    ShaFile,
+    Tree,
+    hex_to_sha,
+    sha_to_hex,
+    )
+from pack import (
+    Pack,
+    PackData,
+    iter_sha1,
+    load_packs,
+    load_pack_index,
+    write_pack,
+    write_pack_data,
+    write_pack_index_v2,
+    )
+
+PACKDIR = 'pack'
+
+class ObjectStore(object):
+    """Object store."""
+
+    def __init__(self, path):
+        """Open an object store.
+
+        :param path: Path of the object store.
+        """
+        self.path = path
+        self._pack_cache = None
+        self.pack_dir = os.path.join(self.path, PACKDIR)
+
+    def determine_wants_all(self, refs):
+	    return [sha for (ref, sha) in refs.iteritems() if not sha in self and not ref.endswith("^{}")]
+
+    def iter_shas(self, shas):
+        """Iterate over the objects for the specified shas.
+
+        :param shas: Iterable object with SHAs
+        """
+        return ObjectStoreIterator(self, shas)
+
+    def __contains__(self, sha):
+        for pack in self.packs:
+            if sha in pack:
+                return True
+        ret = self._get_shafile(sha)
+        if ret is not None:
+            return True
+        return False
+
+    @property
+    def packs(self):
+        """List with pack objects."""
+        if self._pack_cache is None:
+            self._pack_cache = list(load_packs(self.pack_dir))
+        return self._pack_cache
+
+    def _add_known_pack(self, path):
+        """Add a newly appeared pack to the cache by path.
+
+        """
+        if self._pack_cache is not None:
+            self._pack_cache.append(Pack(path))
+
+    def _get_shafile_path(self, sha):
+        dir = sha[:2]
+        file = sha[2:]
+        # Check from object dir
+        return os.path.join(self.path, dir, file)
+
+    def _get_shafile(self, sha):
+        path = self._get_shafile_path(sha)
+        if os.path.exists(path):
+          return ShaFile.from_file(path)
+        return None
+
+    def _add_shafile(self, sha, o):
+        dir = os.path.join(self.path, sha[:2])
+        if not os.path.isdir(dir):
+            os.mkdir(dir)
+        path = os.path.join(dir, sha[2:])
+        f = open(path, 'w+')
+        try:
+            f.write(o.as_legacy_object())
+        finally:
+            f.close()
+
+    def get_raw(self, name):
+        """Obtain the raw text for an object.
+
+        :param name: sha for the object.
+        :return: tuple with object type and object contents.
+        """
+        if len(name) == 40:
+            sha = hex_to_sha(name)
+            hexsha = name
+        elif len(name) == 20:
+            sha = name
+            hexsha = None
+        else:
+            raise AssertionError
+        for pack in self.packs:
+            try:
+                return pack.get_raw(sha)
+            except KeyError:
+                pass
+        if hexsha is None:
+            hexsha = sha_to_hex(name)
+        ret = self._get_shafile(hexsha)
+        if ret is not None:
+            return ret.as_raw_string()
+        raise KeyError(hexsha)
+
+    def __getitem__(self, sha):
+        type, uncomp = self.get_raw(sha)
+        return ShaFile.from_raw_string(type, uncomp)
+
+    def move_in_thin_pack(self, path):
+        """Move a specific file containing a pack into the pack directory.
+
+        :note: The file should be on the same file system as the
+            packs directory.
+
+        :param path: Path to the pack file.
+        """
+        data = PackData(path)
+
+        # Write index for the thin pack (do we really need this?)
+        temppath = os.path.join(self.pack_dir,
+            sha_to_hex(urllib2.randombytes(20))+".tempidx")
+        data.create_index_v2(temppath, self.get_raw)
+        p = Pack.from_objects(data, load_pack_index(temppath))
+
+        # Write a full pack version
+        temppath = os.path.join(self.pack_dir,
+            sha_to_hex(urllib2.randombytes(20))+".temppack")
+        write_pack(temppath, ((o, None) for o in p.iterobjects(self.get_raw)),
+                len(p))
+        pack_sha = load_pack_index(temppath+".idx").objects_sha1()
+        newbasename = os.path.join(self.pack_dir, "pack-%s" % pack_sha)
+        os.rename(temppath+".pack", newbasename+".pack")
+        os.rename(temppath+".idx", newbasename+".idx")
+        self._add_known_pack(newbasename)
+
+    def move_in_pack(self, path):
+        """Move a specific file containing a pack into the pack directory.
+
+        :note: The file should be on the same file system as the
+            packs directory.
+
+        :param path: Path to the pack file.
+        """
+        p = PackData(path)
+        entries = p.sorted_entries()
+        basename = os.path.join(self.pack_dir,
+            "pack-%s" % iter_sha1(entry[0] for entry in entries))
+        write_pack_index_v2(basename+".idx", entries, p.get_stored_checksum())
+        os.rename(path, basename + ".pack")
+        self._add_known_pack(basename)
+
+    def add_thin_pack(self):
+        """Add a new thin pack to this object store.
+
+        Thin packs are packs that contain deltas with parents that exist
+        in a different pack.
+        """
+        fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
+        f = os.fdopen(fd, 'w')
+        def commit():
+            os.fsync(fd)
+            f.close()
+            if os.path.getsize(path) > 0:
+                self.move_in_thin_pack(path)
+        return f, commit
+
+    def add_pack(self):
+        """Add a new pack to this object store.
+
+        :return: Fileobject to write to and a commit function to
+            call when the pack is finished.
+        """
+        fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
+        f = os.fdopen(fd, 'w')
+        def commit():
+            os.fsync(fd)
+            f.close()
+            if os.path.getsize(path) > 0:
+                self.move_in_pack(path)
+        return f, commit
+
+    def add_object(self, obj):
+        self._add_shafile(obj.id, obj)
+
+    def add_objects(self, objects):
+        """Add a set of objects to this object store.
+
+        :param objects: Iterable over a list of objects.
+        """
+        if len(objects) == 0:
+            return
+        f, commit = self.add_pack()
+        write_pack_data(f, objects, len(objects))
+        commit()
+
+
+class ObjectImporter(object):
+    """Interface for importing objects."""
+
+    def __init__(self, count):
+        """Create a new ObjectImporter.
+
+        :param count: Number of objects that's going to be imported.
+        """
+        self.count = count
+
+    def add_object(self, object):
+        """Add an object."""
+        raise NotImplementedError(self.add_object)
+
+    def finish(self, object):
+        """Finish the imoprt and write objects to disk."""
+        raise NotImplementedError(self.finish)
+
+
+class ObjectIterator(object):
+    """Interface for iterating over objects."""
+
+    def iterobjects(self):
+        raise NotImplementedError(self.iterobjects)
+
+
+class ObjectStoreIterator(ObjectIterator):
+    """ObjectIterator that works on top of an ObjectStore."""
+
+    def __init__(self, store, sha_iter):
+        self.store = store
+        self.sha_iter = sha_iter
+        self._shas = []
+
+    def __iter__(self):
+        for sha, path in self.itershas():
+            yield self.store[sha], path
+
+    def iterobjects(self):
+        for o, path in self:
+            yield o
+
+    def itershas(self):
+        for sha in self._shas:
+            yield sha
+        for sha in self.sha_iter:
+            self._shas.append(sha)
+            yield sha
+
+    def __contains__(self, needle):
+        """Check if an object is present.
+
+        :param needle: SHA1 of the object to check for
+        """
+        return needle in self.store
+
+    def __getitem__(self, key):
+        """Find an object by SHA1."""
+        return self.store[key]
+
+    def __len__(self):
+        """Return the number of objects."""
+        return len(list(self.itershas()))
+
+
+def tree_lookup_path(lookup_obj, root_sha, path):
+    parts = path.split("/")
+    sha = root_sha
+    for p in parts:
+        obj = lookup_obj(sha)
+        if type(obj) is not Tree:
+            raise NotTreeError(sha)
+        if p == '':
+            continue
+        mode, sha = obj[p]
+    return lookup_obj(sha)
new file mode 100644
--- /dev/null
+++ b/dulwich/objects.py
@@ -0,0 +1,579 @@
+# objects.py -- Access to base git objects
+# Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
+# Copyright (C) 2008 Jelmer Vernooij <jelmer@samba.org>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2
+# of the License or (at your option) a later version of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA  02110-1301, USA.
+
+
+"""Access to base git objects."""
+
+
+import mmap
+import os
+import sha
+import zlib
+
+from errors import (
+    NotBlobError,
+    NotCommitError,
+    NotTreeError,
+    )
+
+BLOB_ID = "blob"
+TAG_ID = "tag"
+TREE_ID = "tree"
+COMMIT_ID = "commit"
+PARENT_ID = "parent"
+AUTHOR_ID = "author"
+COMMITTER_ID = "committer"
+OBJECT_ID = "object"
+TYPE_ID = "type"
+TAGGER_ID = "tagger"
+
+def _decompress(string):
+    dcomp = zlib.decompressobj()
+    dcomped = dcomp.decompress(string)
+    dcomped += dcomp.flush()
+    return dcomped
+
+
+def sha_to_hex(sha):
+    """Takes a string and returns the hex of the sha within"""
+    hexsha = "".join(["%02x" % ord(c) for c in sha])
+    assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % hexsha
+    return hexsha
+
+
+def hex_to_sha(hex):
+    """Takes a hex sha and returns a binary sha"""
+    assert len(hex) == 40, "Incorrent length of hexsha: %s" % hex
+    return ''.join([chr(int(hex[i:i+2], 16)) for i in xrange(0, len(hex), 2)])
+
+
+class ShaFile(object):
+    """A git SHA file."""
+
+    @classmethod
+    def _parse_legacy_object(cls, map):
+        """Parse a legacy object, creating it and setting object._text"""
+        text = _decompress(map)
+        object = None
+        for posstype in type_map.keys():
+            if text.startswith(posstype):
+                object = type_map[posstype]()
+                text = text[len(posstype):]
+                break
+        assert object is not None, "%s is not a known object type" % text[:9]
+        assert text[0] == ' ', "%s is not a space" % text[0]
+        text = text[1:]
+        size = 0
+        i = 0
+        while text[0] >= '0' and text[0] <= '9':
+            if i > 0 and size == 0:
+                assert False, "Size is not in canonical format"
+            size = (size * 10) + int(text[0])
+            text = text[1:]
+            i += 1
+        object._size = size
+        assert text[0] == "\0", "Size not followed by null"
+        text = text[1:]
+        object._text = text
+        return object
+
+    def as_legacy_object(self):
+        return zlib.compress("%s %d\0%s" % (self._type, len(self._text), self._text))
+
+    def as_raw_string(self):
+        return self._num_type, self._text
+
+    @classmethod
+    def _parse_object(cls, map):
+        """Parse a new style object , creating it and setting object._text"""
+        used = 0
+        byte = ord(map[used])
+        used += 1
+        num_type = (byte >> 4) & 7
+        try:
+            object = num_type_map[num_type]()
+        except KeyError:
+            raise AssertionError("Not a known type: %d" % num_type)
+        while (byte & 0x80) != 0:
+            byte = ord(map[used])
+            used += 1
+        raw = map[used:]
+        object._text = _decompress(raw)
+        return object
+
+    @classmethod
+    def _parse_file(cls, map):
+        word = (ord(map[0]) << 8) + ord(map[1])
+        if ord(map[0]) == 0x78 and (word % 31) == 0:
+            return cls._parse_legacy_object(map)
+        else:
+            return cls._parse_object(map)
+
+    def __init__(self):
+        """Don't call this directly"""
+
+    def _parse_text(self):
+        """For subclasses to do initialisation time parsing"""
+
+    @classmethod
+    def from_file(cls, filename):
+        """Get the contents of a SHA file on disk"""
+        size = os.path.getsize(filename)
+        f = open(filename, 'rb')
+        try:
+            map = mmap.mmap(f.fileno(), size, access=mmap.ACCESS_READ)
+            shafile = cls._parse_file(map)
+            shafile._parse_text()
+            return shafile
+        finally:
+            f.close()
+
+    @classmethod
+    def from_raw_string(cls, type, string):
+        """Creates an object of the indicated type from the raw string given.
+
+        Type is the numeric type of an object. String is the raw uncompressed
+        contents.
+        """
+        real_class = num_type_map[type]
+        obj = real_class()
+        obj._num_type = type
+        obj._text = string
+        obj._parse_text()
+        return obj
+
+    def _header(self):
+        return "%s %lu\0" % (self._type, len(self._text))
+
+    def sha(self):
+        """The SHA1 object that is the name of this object."""
+        ressha = sha.new()
+        ressha.update(self._header())
+        ressha.update(self._text)
+        return ressha
+
+    @property
+    def id(self):
+        return self.sha().hexdigest()
+
+    @property
+    def type(self):
+        return self._num_type
+
+    def __repr__(self):
+        return "<%s %s>" % (self.__class__.__name__, self.id)
+
+    def __eq__(self, other):
+        """Return true id the sha of the two objects match.
+
+        The __le__ etc methods aren't overriden as they make no sense,
+        certainly at this level.
+        """
+        return self.sha().digest() == other.sha().digest()
+
+
+class Blob(ShaFile):
+    """A Git Blob object."""
+
+    _type = BLOB_ID
+    _num_type = 3
+
+    @property
+    def data(self):
+        """The text contained within the blob object."""
+        return self._text
+
+    @classmethod
+    def from_file(cls, filename):
+        blob = ShaFile.from_file(filename)
+        if blob._type != cls._type:
+            raise NotBlobError(filename)
+        return blob
+
+    @classmethod
+    def from_string(cls, string):
+        """Create a blob from a string."""
+        shafile = cls()
+        shafile._text = string
+        return shafile
+
+
+class Tag(ShaFile):
+    """A Git Tag object."""
+
+    _type = TAG_ID
+    _num_type = 4
+
+    @classmethod
+    def from_file(cls, filename):
+        blob = ShaFile.from_file(filename)
+        if blob._type != cls._type:
+            raise NotBlobError(filename)
+        return blob
+
+    @classmethod
+    def from_string(cls, string):
+        """Create a blob from a string."""
+        shafile = cls()
+        shafile._text = string
+        return shafile
+
+    def _parse_text(self):
+        """Grab the metadata attached to the tag"""
+        text = self._text
+        count = 0
+        assert text.startswith(OBJECT_ID), "Invalid tag object, " \
+            "must start with %s" % OBJECT_ID
+        count += len(OBJECT_ID)
+        assert text[count] == ' ', "Invalid tag object, " \
+            "%s must be followed by space not %s" % (OBJECT_ID, text[count])
+        count += 1
+        self._object_sha = text[count:count+40]
+        count += 40
+        assert text[count] == '\n', "Invalid tag object, " \
+            "%s sha must be followed by newline" % OBJECT_ID
+        count += 1
+        assert text[count:].startswith(TYPE_ID), "Invalid tag object, " \
+            "%s sha must be followed by %s" % (OBJECT_ID, TYPE_ID)
+        count += len(TYPE_ID)
+        assert text[count] == ' ', "Invalid tag object, " \
+            "%s must be followed by space not %s" % (TAG_ID, text[count])
+        count += 1
+        self._object_type = ""
+        while text[count] != '\n':
+            self._object_type += text[count]
+            count += 1
+        count += 1
+        assert self._object_type in (COMMIT_ID, BLOB_ID, TREE_ID, TAG_ID), "Invalid tag object, " \
+            "unexpected object type %s" % self._object_type
+        self._object_type = type_map[self._object_type]
+
+        assert text[count:].startswith(TAG_ID), "Invalid tag object, " \
+            "object type must be followed by %s" % (TAG_ID)
+        count += len(TAG_ID)
+        assert text[count] == ' ', "Invalid tag object, " \
+            "%s must be followed by space not %s" % (TAG_ID, text[count])
+        count += 1
+        self._name = ""
+        while text[count] != '\n':
+            self._name += text[count]
+            count += 1
+        count += 1
+
+        assert text[count:].startswith(TAGGER_ID), "Invalid tag object, " \
+            "%s must be followed by %s" % (TAG_ID, TAGGER_ID)
+        count += len(TAGGER_ID)
+        assert text[count] == ' ', "Invalid tag object, " \
+            "%s must be followed by space not %s" % (TAGGER_ID, text[count])
+        count += 1
+        self._tagger = ""
+        while text[count] != '>':
+            assert text[count] != '\n', "Malformed tagger information"
+            self._tagger += text[count]
+            count += 1
+        self._tagger += text[count]
+        count += 1
+        assert text[count] == ' ', "Invalid tag object, " \
+            "tagger information must be followed by space not %s" % text[count]
+        count += 1
+        self._tag_time = int(text[count:count+10])
+        while text[count] != '\n':
+            count += 1
+        count += 1
+        assert text[count] == '\n', "There must be a new line after the headers"
+        count += 1
+        self._message = text[count:]
+
+    @property
+    def object(self):
+        """Returns the object pointed by this tag, represented as a tuple(type, sha)"""
+        return (self._object_type, self._object_sha)
+
+    @property
+    def name(self):
+        """Returns the name of this tag"""
+        return self._name
+
+    @property
+    def tagger(self):
+        """Returns the name of the person who created this tag"""
+        return self._tagger
+
+    @property
+    def tag_time(self):
+        """Returns the creation timestamp of the tag.
+
+        Returns it as the number of seconds since the epoch"""
+        return self._tag_time
+
+    @property
+    def message(self):
+        """Returns the message attached to this tag"""
+        return self._message
+
+
+class Tree(ShaFile):
+    """A Git tree object"""
+
+    _type = TREE_ID
+    _num_type = 2
+
+    def __init__(self):
+        self._entries = {}
+
+    @classmethod
+    def from_file(cls, filename):
+        tree = ShaFile.from_file(filename)
+        if tree._type != cls._type:
+            raise NotTreeError(filename)
+        return tree
+
+    def __getitem__(self, name):
+        return self._entries[name]
+
+    def __setitem__(self, name, value):
+        assert isinstance(value, tuple)
+        assert len(value) == 2
+        self._entries[name] = value
+
+    def __delitem__(self, name):
+        del self._entries[name]
+
+    def add(self, mode, name, hexsha):
+        self._entries[name] = mode, hexsha
+
+    def entries(self):
+        """Return a list of tuples describing the tree entries"""
+        return [(mode, name, hexsha) for (name, (mode, hexsha)) in self._entries.iteritems()]
+
+    def iteritems(self):
+        for name in sorted(self._entries.keys()):
+            yield name, self_entries[name][0], self._entries[name][1]
+
+    def _parse_text(self):
+        """Grab the entries in the tree"""
+        count = 0
+        while count < len(self._text):
+            mode = 0
+            chr = self._text[count]
+            while chr != ' ':
+                assert chr >= '0' and chr <= '7', "%s is not a valid mode char" % chr
+                mode = (mode << 3) + (ord(chr) - ord('0'))
+                count += 1
+                chr = self._text[count]
+            count += 1
+            chr = self._text[count]
+            name = ''
+            while chr != '\0':
+                name += chr
+                count += 1
+                chr = self._text[count]
+            count += 1
+            chr = self._text[count]
+            sha = self._text[count:count+20]
+            hexsha = sha_to_hex(sha)
+            self.add(mode, name, hexsha)
+            count = count + 20
+
+    def serialize(self):
+        self._text = ""
+        for name, mode, hexsha in self.iteritems():
+            self._text += "%04o %s\0%s" % (mode, name, hex_to_sha(hexsha))
+
+
+class Commit(ShaFile):
+    """A git commit object"""
+
+    _type = COMMIT_ID
+    _num_type = 1
+
+    def __init__(self):
+        self._parents = []
+
+    @classmethod
+    def from_file(cls, filename):
+        commit = ShaFile.from_file(filename)
+        if commit._type != cls._type:
+            raise NotCommitError(filename)
+        return commit
+
+    def _parse_text(self):
+        text = self._text
+        count = 0
+        assert text.startswith(TREE_ID), "Invalid commit object, " \
+             "must start with %s" % TREE_ID
+        count += len(TREE_ID)
+        assert text[count] == ' ', "Invalid commit object, " \
+             "%s must be followed by space not %s" % (TREE_ID, text[count])
+        count += 1
+        self._tree = text[count:count+40]
+        count = count + 40
+        assert text[count] == "\n", "Invalid commit object, " \
+             "tree sha must be followed by newline"
+        count += 1
+        self._parents = []
+        while text[count:].startswith(PARENT_ID):
+            count += len(PARENT_ID)
+            assert text[count] == ' ', "Invalid commit object, " \
+                 "%s must be followed by space not %s" % (PARENT_ID, text[count])
+            count += 1
+            self._parents.append(text[count:count+40])
+            count += 40
+            assert text[count] == "\n", "Invalid commit object, " \
+                 "parent sha must be followed by newline"
+            count += 1
+        self._author = None
+        if text[count:].startswith(AUTHOR_ID):
+            count += len(AUTHOR_ID)
+            assert text[count] == ' ', "Invalid commit object, " \
+                 "%s must be followed by space not %s" % (AUTHOR_ID, text[count])
+            count += 1
+            self._author = ''
+            while text[count] != '>':
+                assert text[count] != '\n', "Malformed author information"
+                self._author += text[count]
+                count += 1
+            self._author += text[count]
+            count += 1
+            assert text[count] == ' ', "Invalid commit object, " \
+                 "author information must be followed by space not %s" % text[count]
+            count += 1
+            self._author_time = int(text[count:count+10])
+            while text[count] != ' ':
+                count += 1
+            self._author_timezone = int(text[count:count+6])
+            count += 1
+            while text[count] != '\n':
+                count += 1
+            count += 1
+        self._committer = None
+        if text[count:].startswith(COMMITTER_ID):
+            count += len(COMMITTER_ID)
+            assert text[count] == ' ', "Invalid commit object, " \
+                 "%s must be followed by space not %s" % (COMMITTER_ID, text[count])
+            count += 1
+            self._committer = ''
+            while text[count] != '>':
+                assert text[count] != '\n', "Malformed committer information"
+                self._committer += text[count]
+                count += 1
+            self._committer += text[count]
+            count += 1
+            assert text[count] == ' ', "Invalid commit object, " \
+                 "commiter information must be followed by space not %s" % text[count]
+            count += 1
+            self._commit_time = int(text[count:count+10])
+            while text[count] != ' ':
+                count += 1
+            self._commit_timezone = int(text[count:count+6])
+            count += 1
+            while text[count] != '\n':
+                count += 1
+            count += 1
+        assert text[count] == '\n', "There must be a new line after the headers"
+        count += 1
+        # XXX: There can be an encoding field.
+        self._message = text[count:]
+
+    def serialize(self):
+        self._text = ""
+        self._text += "%s %s\n" % (TREE_ID, self._tree)
+        for p in self._parents:
+            self._text += "%s %s\n" % (PARENT_ID, p)
+        self._text += "%s %s %s %+05d\n" % (AUTHOR_ID, self._author, str(self._author_time), self._author_timezone)
+        self._text += "%s %s %s %+05d\n" % (COMMITTER_ID, self._committer, str(self._commit_time), self._commit_timezone)
+        self._text += "\n" # There must be a new line after the headers
+        self._text += self._message
+
+    @property
+    def tree(self):
+        """Returns the tree that is the state of this commit"""
+        return self._tree
+
+    @property
+    def parents(self):
+        """Return a list of parents of this commit."""
+        return self._parents
+
+    @property
+    def author(self):
+        """Returns the name of the author of the commit"""
+        return self._author
+
+    @property
+    def committer(self):
+        """Returns the name of the committer of the commit"""
+        return self._committer
+
+    @property
+    def message(self):
+        """Returns the commit message"""
+        return self._message
+
+    @property
+    def commit_time(self):
+        """Returns the timestamp of the commit.
+
+        Returns it as the number of seconds since the epoch.
+        """
+        return self._commit_time
+
+    @property
+    def commit_timezone(self):
+        """Returns the zone the commit time is in
+        """
+        return self._commit_timezone
+
+    @property
+    def author_time(self):
+        """Returns the timestamp the commit was written.
+
+        Returns it as the number of seconds since the epoch.
+        """
+        return self._author_time
+
+    @property
+    def author_timezone(self):
+        """Returns the zone the author time is in
+        """
+        return self._author_timezone
+
+
+type_map = {
+    BLOB_ID : Blob,
+    TREE_ID : Tree,
+    COMMIT_ID : Commit,
+    TAG_ID: Tag,
+}
+
+num_type_map = {
+    0: None,
+    1: Commit,
+    2: Tree,
+    3: Blob,
+    4: Tag,
+    # 5 Is reserved for further expansion
+}
+
+try:
+    # Try to import C versions
+    from _objects import hex_to_sha, sha_to_hex
+except ImportError:
+    pass
+
new file mode 100644
--- /dev/null
+++ b/dulwich/pack.py
@@ -0,0 +1,983 @@
+# pack.py -- For dealing wih packed git objects.
+# Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
+# Copryight (C) 2008 Jelmer Vernooij <jelmer@samba.org>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2
+# of the License or (at your option) a later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA  02110-1301, USA.
+
+"""Classes for dealing with packed git objects.
+
+A pack is a compact representation of a bunch of objects, stored
+using deltas where possible.
+
+They have two parts, the pack file, which stores the data, and an index
+that tells you where the data is.
+
+To find an object you look in all of the index files 'til you find a
+match for the object name. You then use the pointer got from this as
+a pointer in to the corresponding packfile.
+"""
+
+try:
+    from collections import defaultdict
+except ImportError:
+    from misc import defaultdict
+
+from itertools import chain, imap, izip
+import mmap
+import os
+import struct
+try:
+    from struct import unpack_from
+except ImportError:
+    from misc import unpack_from
+import sys
+import zlib
+import difflib
+
+from errors import (
+    ApplyDeltaError,
+    ChecksumMismatch,
+    )
+from lru_cache import (
+    LRUSizeCache,
+    )
+from objects import (
+    ShaFile,
+    hex_to_sha,
+    sha_to_hex,
+    )
+from misc import make_sha
+
+supports_mmap_offset = (sys.version_info[0] >= 3 or
+        (sys.version_info[0] == 2 and sys.version_info[1] >= 6))
+
+
+def take_msb_bytes(map, offset):
+    ret = []
+    while len(ret) == 0 or ret[-1] & 0x80:
+        ret.append(ord(map[offset]))
+        offset += 1
+    return ret
+
+
+def read_zlib(data, offset, dec_size):
+    obj = zlib.decompressobj()
+    ret = []
+    fed = 0
+    while obj.unused_data == "":
+        base = offset+fed
+        add = data[base:base+1024]
+        if len(add) < 1024:
+            add += "Z"
+        fed += len(add)
+        ret.append(obj.decompress(add))
+    x = "".join(ret)
+    assert len(x) == dec_size
+    comp_len = fed-len(obj.unused_data)
+    return x, comp_len
+
+
+def iter_sha1(iter):
+    """Return the hexdigest of the SHA1 over a set of names."""
+    sha1 = make_sha()
+    for name in iter:
+        sha1.update(name)
+    return sha1.hexdigest()
+
+
+def simple_mmap(f, offset, size, access=mmap.ACCESS_READ):
+    """Simple wrapper for mmap() which always supports the offset parameter.
+
+    :param f: File object.
+    :param offset: Offset in the file, from the beginning of the file.
+    :param size: Size of the mmap'ed area
+    :param access: Access mechanism.
+    :return: MMAP'd area.
+    """
+    mem = mmap.mmap(f.fileno(), size+offset, access=access)
+    return mem, offset
+
+
+def load_pack_index(filename):
+    f = open(filename, 'r')
+    if f.read(4) == '\377tOc':
+        version = struct.unpack(">L", f.read(4))[0]
+        if version == 2:
+            f.seek(0)
+            return PackIndex2(filename, file=f)
+        else:
+            raise KeyError("Unknown pack index format %d" % version)
+    else:
+        f.seek(0)
+        return PackIndex1(filename, file=f)
+
+
+def bisect_find_sha(start, end, sha, unpack_name):
+    assert start <= end
+    while start <= end:
+        i = (start + end)/2
+        file_sha = unpack_name(i)
+        x = cmp(file_sha, sha)
+        if x < 0:
+            start = i + 1
+        elif x > 0:
+            end = i - 1
+        else:
+            return i
+    return None
+
+
+class PackIndex(object):
+    """An index in to a packfile.
+
+    Given a sha id of an object a pack index can tell you the location in the
+    packfile of that object if it has it.
+
+    To do the loop it opens the file, and indexes first 256 4 byte groups
+    with the first byte of the sha id. The value in the four byte group indexed
+    is the end of the group that shares the same starting byte. Subtract one
+    from the starting byte and index again to find the start of the group.
+    The values are sorted by sha id within the group, so do the math to find
+    the start and end offset and then bisect in to find if the value is present.
+    """
+
+    def __init__(self, filename, file=None):
+        """Create a pack index object.
+
+        Provide it with the name of the index file to consider, and it will map
+        it whenever required.
+        """
+        self._filename = filename
+        # Take the size now, so it can be checked each time we map the file to
+        # ensure that it hasn't changed.
+        self._size = os.path.getsize(filename)
+        if file is None:
+            self._file = open(filename, 'r')
+        else:
+            self._file = file
+        self._contents, map_offset = simple_mmap(self._file, 0, self._size)
+        assert map_offset == 0
+
+    def __eq__(self, other):
+        if not isinstance(other, PackIndex):
+            return False
+
+        if self._fan_out_table != other._fan_out_table:
+            return False
+
+        for (name1, _, _), (name2, _, _) in izip(self.iterentries(), other.iterentries()):
+            if name1 != name2:
+                return False
+        return True
+
+    def close(self):
+        self._file.close()
+
+    def __len__(self):
+        """Return the number of entries in this pack index."""
+        return self._fan_out_table[-1]
+
+    def _unpack_entry(self, i):
+        """Unpack the i-th entry in the index file.
+
+        :return: Tuple with object name (SHA), offset in pack file and
+              CRC32 checksum (if known)."""
+        raise NotImplementedError(self._unpack_entry)
+
+    def _unpack_name(self, i):
+        """Unpack the i-th name from the index file."""
+        raise NotImplementedError(self._unpack_name)
+
+    def _unpack_offset(self, i):
+        """Unpack the i-th object offset from the index file."""
+        raise NotImplementedError(self._unpack_offset)
+
+    def _unpack_crc32_checksum(self, i):
+        """Unpack the crc32 checksum for the i-th object from the index file."""
+        raise NotImplementedError(self._unpack_crc32_checksum)
+
+    def __iter__(self):
+        return imap(sha_to_hex, self._itersha())
+
+    def _itersha(self):
+        for i in range(len(self)):
+            yield self._unpack_name(i)
+
+    def objects_sha1(self):
+        """Return the hex SHA1 over all the shas of all objects in this pack.
+
+        :note: This is used for the filename of the pack.
+        """
+        return iter_sha1(self._itersha())
+
+    def iterentries(self):
+        """Iterate over the entries in this pack index.
+
+        Will yield tuples with object name, offset in packfile and crc32 checksum.
+        """
+        for i in range(len(self)):
+            yield self._unpack_entry(i)
+
+    def _read_fan_out_table(self, start_offset):
+        ret = []
+        for i in range(0x100):
+            ret.append(struct.unpack(">L", self._contents[start_offset+i*4:start_offset+(i+1)*4])[0])
+        return ret
+
+    def check(self):
+        """Check that the stored checksum matches the actual checksum."""
+        return self.calculate_checksum() == self.get_stored_checksum()
+
+    def calculate_checksum(self):
+        return make_sha(self._contents[:-20]).digest()
+
+    def get_pack_checksum(self):
+        """Return the SHA1 checksum stored for the corresponding packfile."""
+        return str(self._contents[-40:-20])
+
+    def get_stored_checksum(self):
+        """Return the SHA1 checksum stored for this index."""
+        return str(self._contents[-20:])
+
+    def object_index(self, sha):
+        """Return the index in to the corresponding packfile for the object.
+
+        Given the name of an object it will return the offset that object lives
+        at within the corresponding pack file. If the pack file doesn't have the
+        object then None will be returned.
+        """
+        if len(sha) == 40:
+            sha = hex_to_sha(sha)
+        return self._object_index(sha)
+
+    def _object_index(self, sha):
+        """See object_index.
+
+        :param sha: A *binary* SHA string. (20 characters long)_
+        """
+        assert len(sha) == 20
+        idx = ord(sha[0])
+        if idx == 0:
+            start = 0
+        else:
+            start = self._fan_out_table[idx-1]
+        end = self._fan_out_table[idx]
+        i = bisect_find_sha(start, end, sha, self._unpack_name)
+        if i is None:
+            raise KeyError(sha)
+        return self._unpack_offset(i)
+
+
+
+class PackIndex1(PackIndex):
+    """Version 1 Pack Index."""
+
+    def __init__(self, filename, file=None):
+        PackIndex.__init__(self, filename, file)
+        self.version = 1
+        self._fan_out_table = self._read_fan_out_table(0)
+
+    def _unpack_entry(self, i):
+        (offset, name) = unpack_from(">L20s", self._contents,
+            (0x100 * 4) + (i * 24))
+        return (name, offset, None)
+
+    def _unpack_name(self, i):
+        offset = (0x100 * 4) + (i * 24) + 4
+        return self._contents[offset:offset+20]
+
+    def _unpack_offset(self, i):
+        offset = (0x100 * 4) + (i * 24)
+        return unpack_from(">L", self._contents, offset)[0]
+
+    def _unpack_crc32_checksum(self, i):
+        # Not stored in v1 index files
+        return None
+
+
+class PackIndex2(PackIndex):
+    """Version 2 Pack Index."""
+
+    def __init__(self, filename, file=None):
+        PackIndex.__init__(self, filename, file)
+        assert self._contents[:4] == '\377tOc', "Not a v2 pack index file"
+        (self.version, ) = unpack_from(">L", self._contents, 4)
+        assert self.version == 2, "Version was %d" % self.version
+        self._fan_out_table = self._read_fan_out_table(8)
+        self._name_table_offset = 8 + 0x100 * 4
+        self._crc32_table_offset = self._name_table_offset + 20 * len(self)
+        self._pack_offset_table_offset = self._crc32_table_offset + 4 * len(self)
+
+    def _unpack_entry(self, i):
+        return (self._unpack_name(i), self._unpack_offset(i),
+                self._unpack_crc32_checksum(i))
+
+    def _unpack_name(self, i):
+        offset = self._name_table_offset + i * 20
+        return self._contents[offset:offset+20]
+
+    def _unpack_offset(self, i):
+        offset = self._pack_offset_table_offset + i * 4
+        return unpack_from(">L", self._contents, offset)[0]
+
+    def _unpack_crc32_checksum(self, i):
+        return unpack_from(">L", self._contents,
+                          self._crc32_table_offset + i * 4)[0]
+
+
+
+def read_pack_header(f):
+    header = f.read(12)
+    assert header[:4] == "PACK"
+    (version,) = unpack_from(">L", header, 4)
+    assert version in (2, 3), "Version was %d" % version
+    (num_objects,) = unpack_from(">L", header, 8)
+    return (version, num_objects)
+
+
+def read_pack_tail(f):
+    return (f.read(20),)
+
+
+def unpack_object(map, offset=0):
+    bytes = take_msb_bytes(map, offset)
+    type = (bytes[0] >> 4) & 0x07
+    size = bytes[0] & 0x0f
+    for i, byte in enumerate(bytes[1:]):
+        size += (byte & 0x7f) << ((i * 7) + 4)
+    raw_base = len(bytes)
+    if type == 6: # offset delta
+        bytes = take_msb_bytes(map, raw_base + offset)
+        assert not (bytes[-1] & 0x80)
+        delta_base_offset = bytes[0] & 0x7f
+        for byte in bytes[1:]:
+            delta_base_offset += 1
+            delta_base_offset <<= 7
+            delta_base_offset += (byte & 0x7f)
+        raw_base+=len(bytes)
+        uncomp, comp_len = read_zlib(map, offset + raw_base, size)
+        assert size == len(uncomp)
+        return type, (delta_base_offset, uncomp), comp_len+raw_base
+    elif type == 7: # ref delta
+        basename = map[offset+raw_base:offset+raw_base+20]
+        uncomp, comp_len = read_zlib(map, offset+raw_base+20, size)
+        assert size == len(uncomp)
+        return type, (basename, uncomp), comp_len+raw_base+20
+    else:
+        uncomp, comp_len = read_zlib(map, offset+raw_base, size)
+        assert len(uncomp) == size
+        return type, uncomp, comp_len+raw_base
+
+
+def compute_object_size((num, obj)):
+    if num in (6, 7):
+        return len(obj[1])
+    assert isinstance(obj, str)
+    return len(obj)
+
+
+class PackData(object):
+    """The data contained in a packfile.
+
+    Pack files can be accessed both sequentially for exploding a pack, and
+    directly with the help of an index to retrieve a specific object.
+
+    The objects within are either complete or a delta aginst another.
+
+    The header is variable length. If the MSB of each byte is set then it
+    indicates that the subsequent byte is still part of the header.
+    For the first byte the next MS bits are the type, which tells you the type
+    of object, and whether it is a delta. The LS byte is the lowest bits of the
+    size. For each subsequent byte the LS 7 bits are the next MS bits of the
+    size, i.e. the last byte of the header contains the MS bits of the size.
+
+    For the complete objects the data is stored as zlib deflated data.
+    The size in the header is the uncompressed object size, so to uncompress
+    you need to just keep feeding data to zlib until you get an object back,
+    or it errors on bad data. This is done here by just giving the complete
+    buffer from the start of the deflated object on. This is bad, but until I
+    get mmap sorted out it will have to do.
+
+    Currently there are no integrity checks done. Also no attempt is made to try
+    and detect the delta case, or a request for an object at the wrong position.
+    It will all just throw a zlib or KeyError.
+    """
+
+    def __init__(self, filename):
+        """Create a PackData object that represents the pack in the given filename.
+
+        The file must exist and stay readable until the object is disposed of. It
+        must also stay the same size. It will be mapped whenever needed.
+
+        Currently there is a restriction on the size of the pack as the python
+        mmap implementation is flawed.
+        """
+        self._filename = filename
+        assert os.path.exists(filename), "%s is not a packfile" % filename
+        self._size = os.path.getsize(filename)
+        self._header_size = 12
+        assert self._size >= self._header_size, "%s is too small for a packfile (%d < %d)" % (filename, self._size, self._header_size)
+        self._file = open(self._filename, 'rb')
+        self._read_header()
+        self._offset_cache = LRUSizeCache(1024*1024*20,
+            compute_size=compute_object_size)
+
+    def close(self):
+        self._file.close()
+
+    def _read_header(self):
+        (version, self._num_objects) = read_pack_header(self._file)
+        self._file.seek(self._size-20)
+        (self._stored_checksum,) = read_pack_tail(self._file)
+
+    def __len__(self):
+        """Returns the number of objects in this pack."""
+        return self._num_objects
+
+    def calculate_checksum(self):
+        """Calculate the checksum for this pack."""
+        map, map_offset = simple_mmap(self._file, 0, self._size - 20)
+        try:
+            return make_sha(map[map_offset:self._size-20]).digest()
+        finally:
+            map.close()
+
+    def resolve_object(self, offset, type, obj, get_ref, get_offset=None):
+        """Resolve an object, possibly resolving deltas when necessary.
+
+        :return: Tuple with object type and contents.
+        """
+        if type not in (6, 7): # Not a delta
+            return type, obj
+
+        if get_offset is None:
+            get_offset = self.get_object_at
+
+        if type == 6: # offset delta
+            (delta_offset, delta) = obj
+            assert isinstance(delta_offset, int)
+            assert isinstance(delta, str)
+            base_offset = offset-delta_offset
+            type, base_obj = get_offset(base_offset)
+            assert isinstance(type, int)
+        elif type == 7: # ref delta
+            (basename, delta) = obj
+            assert isinstance(basename, str) and len(basename) == 20
+            assert isinstance(delta, str)
+            type, base_obj = get_ref(basename)
+            assert isinstance(type, int)
+            # Can't be a ofs delta, as we wouldn't know the base offset
+            assert type != 6
+            base_offset = None
+        type, base_text = self.resolve_object(base_offset, type, base_obj, get_ref)
+        if base_offset is not None:
+            self._offset_cache[base_offset] = type, base_text
+        ret = (type, apply_delta(base_text, delta))
+        return ret
+
+    def iterobjects(self, progress=None):
+        offset = self._header_size
+        num = len(self)
+        map, _ = simple_mmap(self._file, 0, self._size)
+        try:
+            for i in range(num):
+                (type, obj, total_size) = unpack_object(map, offset)
+                crc32 = zlib.crc32(map[offset:offset+total_size]) & 0xffffffff
+                yield offset, type, obj, crc32
+                offset += total_size
+                if progress:
+                    progress(i, num)
+        finally:
+            map.close()
+
+    def iterentries(self, ext_resolve_ref=None, progress=None):
+        found = {}
+        postponed = defaultdict(list)
+        class Postpone(Exception):
+            """Raised to postpone delta resolving."""
+
+        def get_ref_text(sha):
+            assert len(sha) == 20
+            if sha in found:
+                return found[sha]
+            if ext_resolve_ref:
+                try:
+                    return ext_resolve_ref(sha)
+                except KeyError:
+                    pass
+            raise Postpone, (sha, )
+        extra = []
+        todo = chain(self.iterobjects(progress=progress), extra)
+        for (offset, type, obj, crc32) in todo:
+            assert isinstance(offset, int)
+            assert isinstance(type, int)
+            assert isinstance(obj, tuple) or isinstance(obj, str)
+            try:
+                type, obj = self.resolve_object(offset, type, obj, get_ref_text)
+            except Postpone, (sha, ):
+                postponed[sha].append((offset, type, obj))
+            else:
+                shafile = ShaFile.from_raw_string(type, obj)
+                sha = shafile.sha().digest()
+                found[sha] = (type, obj)
+                yield sha, offset, crc32
+                extra.extend(postponed.get(sha, []))
+        if postponed:
+            raise KeyError([sha_to_hex(h) for h in postponed.keys()])
+
+    def sorted_entries(self, resolve_ext_ref=None, progress=None):
+        ret = list(self.iterentries(resolve_ext_ref, progress=progress))
+        ret.sort()
+        return ret
+
+    def create_index_v1(self, filename, resolve_ext_ref=None, progress=None):
+        entries = self.sorted_entries(resolve_ext_ref, progress=progress)
+        write_pack_index_v1(filename, entries, self.calculate_checksum())
+
+    def create_index_v2(self, filename, resolve_ext_ref=None, progress=None):
+        entries = self.sorted_entries(resolve_ext_ref, progress=progress)
+        write_pack_index_v2(filename, entries, self.calculate_checksum())
+
+    def get_stored_checksum(self):
+        return self._stored_checksum
+
+    def check(self):
+        return (self.calculate_checksum() == self.get_stored_checksum())
+
+    def get_object_at(self, offset):
+        """Given an offset in to the packfile return the object that is there.
+
+        Using the associated index the location of an object can be looked up, and
+        then the packfile can be asked directly for that object using this
+        function.
+        """
+        if offset in self._offset_cache:
+            return self._offset_cache[offset]
+        assert isinstance(offset, long) or isinstance(offset, int),\
+                "offset was %r" % offset
+        assert offset >= self._header_size
+        map, map_offset = simple_mmap(self._file, offset, self._size-offset)
+        try:
+            ret = unpack_object(map, map_offset)[:2]
+            return ret
+        finally:
+            map.close()
+
+
+class SHA1Writer(object):
+
+    def __init__(self, f):
+        self.f = f
+        self.sha1 = make_sha("")
+
+    def write(self, data):
+        self.sha1.update(data)
+        self.f.write(data)
+
+    def write_sha(self):
+        sha = self.sha1.digest()
+        assert len(sha) == 20
+        self.f.write(sha)
+        return sha
+
+    def close(self):
+        sha = self.write_sha()
+        self.f.close()
+        return sha
+
+    def tell(self):
+        return self.f.tell()
+
+
+def write_pack_object(f, type, object):
+    """Write pack object to a file.
+
+    :param f: File to write to
+    :param o: Object to write
+    :return: Tuple with offset at which the object was written, and crc32
+    """
+    ret = f.tell()
+    packed_data_hdr = ""
+    if type == 6: # ref delta
+        (delta_base_offset, object) = object
+    elif type == 7: # offset delta
+        (basename, object) = object
+    size = len(object)
+    c = (type << 4) | (size & 15)
+    size >>= 4
+    while size:
+        packed_data_hdr += (chr(c | 0x80))
+        c = size & 0x7f
+        size >>= 7
+    packed_data_hdr += chr(c)
+    if type == 6: # offset delta
+        ret = [delta_base_offset & 0x7f]
+        delta_base_offset >>= 7
+        while delta_base_offset:
+            delta_base_offset -= 1
+            ret.insert(0, 0x80 | (delta_base_offset & 0x7f))
+            delta_base_offset >>= 7
+        packed_data_hdr += "".join([chr(x) for x in ret])
+    elif type == 7: # ref delta
+        assert len(basename) == 20
+        packed_data_hdr += basename
+    packed_data = packed_data_hdr + zlib.compress(object)
+    f.write(packed_data)
+    return (f.tell(), (zlib.crc32(packed_data) & 0xffffffff))
+
+
+def write_pack(filename, objects, num_objects):
+    f = open(filename + ".pack", 'w')
+    try:
+        entries, data_sum = write_pack_data(f, objects, num_objects)
+    finally:
+        f.close()
+    entries.sort()
+    write_pack_index_v2(filename + ".idx", entries, data_sum)
+
+
+def write_pack_data(f, objects, num_objects, window=10):
+    """Write a new pack file.
+
+    :param filename: The filename of the new pack file.
+    :param objects: List of objects to write (tuples with object and path)
+    :return: List with (name, offset, crc32 checksum) entries, pack checksum
+    """
+    recency = list(objects)
+    # FIXME: Somehow limit delta depth
+    # FIXME: Make thin-pack optional (its not used when cloning a pack)
+    # Build a list of objects ordered by the magic Linus heuristic
+    # This helps us find good objects to diff against us
+    magic = []
+    for obj, path in recency:
+        magic.append( (obj.type, path, 1, -len(obj.as_raw_string()[1]), obj) )
+    magic.sort()
+    # Build a map of objects and their index in magic - so we can find preceeding objects
+    # to diff against
+    offs = {}
+    for i in range(len(magic)):
+        offs[magic[i][4]] = i
+    # Write the pack
+    entries = []
+    f = SHA1Writer(f)
+    f.write("PACK")               # Pack header
+    f.write(struct.pack(">L", 2)) # Pack version
+    f.write(struct.pack(">L", num_objects)) # Number of objects in pack
+    for o, path in recency:
+        sha1 = o.sha().digest()
+        orig_t, raw = o.as_raw_string()
+        winner = raw
+        t = orig_t
+        #for i in range(offs[o]-window, window):
+        #    if i < 0 or i >= len(offs): continue
+        #    b = magic[i][4]
+        #    if b.type != orig_t: continue
+        #    _, base = b.as_raw_string()
+        #    delta = create_delta(base, raw)
+        #    if len(delta) < len(winner):
+        #        winner = delta
+        #        t = 6 if magic[i][2] == 1 else 7
+        offset, crc32 = write_pack_object(f, t, winner)
+        entries.append((sha1, offset, crc32))
+    return entries, f.write_sha()
+
+
+def write_pack_index_v1(filename, entries, pack_checksum):
+    """Write a new pack index file.
+
+    :param filename: The filename of the new pack index file.
+    :param entries: List of tuples with object name (sha), offset_in_pack,  and
+            crc32_checksum.
+    :param pack_checksum: Checksum of the pack file.
+    """
+    f = open(filename, 'w')
+    f = SHA1Writer(f)
+    fan_out_table = defaultdict(lambda: 0)
+    for (name, offset, entry_checksum) in entries:
+        fan_out_table[ord(name[0])] += 1
+    # Fan-out table
+    for i in range(0x100):
+        f.write(struct.pack(">L", fan_out_table[i]))
+        fan_out_table[i+1] += fan_out_table[i]
+    for (name, offset, entry_checksum) in entries:
+        f.write(struct.pack(">L20s", offset, name))
+    assert len(pack_checksum) == 20
+    f.write(pack_checksum)
+    f.close()
+
+
+def create_delta(base_buf, target_buf):
+    """Use python difflib to work out how to transform base_buf to target_buf"""
+    assert isinstance(base_buf, str)
+    assert isinstance(target_buf, str)
+    out_buf = ""
+    # write delta header
+    def encode_size(size):
+        ret = ""
+        c = size & 0x7f
+        size >>= 7
+        while size:
+            ret += chr(c | 0x80)
+            c = size & 0x7f
+            size >>= 7
+        ret += chr(c)
+        return ret
+    out_buf += encode_size(len(base_buf))
+    out_buf += encode_size(len(target_buf))
+    # write out delta opcodes
+    seq = difflib.SequenceMatcher(a=base_buf, b=target_buf)
+    for opcode, i1, i2, j1, j2 in seq.get_opcodes():
+        # Git patch opcodes don't care about deletes!
+        #if opcode == "replace" or opcode == "delete":
+        #    pass
+        if opcode == "equal":
+            # If they are equal, unpacker will use data from base_buf
+            # Write out an opcode that says what range to use
+            scratch = ""
+            op = 0x80
+            o = i1
+            for i in range(4):
+                if o & 0xff << i*8:
+                    scratch += chr(o >> i)
+                    op |= 1 << i
+            s = i2 - i1
+            for i in range(2):
+                if s & 0xff << i*8:
+                    scratch += chr(s >> i)
+                    op |= 1 << (4+i)
+            out_buf += chr(op)
+            out_buf += scratch
+        if opcode == "replace" or opcode == "insert":
+            # If we are replacing a range or adding one, then we just
+            # output it to the stream (prefixed by its size)
+            s = j2 - j1
+            o = j1
+            while s > 127:
+                out_buf += chr(127)
+                out_buf += target_buf[o:o+127]
+                s -= 127
+                o += 127
+            out_buf += chr(s)
+            out_buf += target_buf[o:o+s]
+    return out_buf
+
+
+def apply_delta(src_buf, delta):
+    """Based on the similar function in git's patch-delta.c.
+
+    :param src_buf: Source buffer
+    :param delta: Delta instructions
+    """
+    assert isinstance(src_buf, str), "was %r" % (src_buf,)
+    assert isinstance(delta, str)
+    out = []
+    index = 0
+    delta_length = len(delta)
+    def get_delta_header_size(delta, index):
+        size = 0
+        i = 0
+        while delta:
+            cmd = ord(delta[index])
+            index += 1
+            size |= (cmd & ~0x80) << i
+            i += 7
+            if not cmd & 0x80:
+                break
+        return size, index
+    src_size, index = get_delta_header_size(delta, index)
+    dest_size, index = get_delta_header_size(delta, index)
+    assert src_size == len(src_buf), "%d vs %d" % (src_size, len(src_buf))
+    while index < delta_length:
+        cmd = ord(delta[index])
+        index += 1
+        if cmd & 0x80:
+            cp_off = 0
+            for i in range(4):
+                if cmd & (1 << i):
+                    x = ord(delta[index])
+                    index += 1
+                    cp_off |= x << (i * 8)
+            cp_size = 0
+            for i in range(3):
+                if cmd & (1 << (4+i)):
+                    x = ord(delta[index])
+                    index += 1
+                    cp_size |= x << (i * 8)
+            if cp_size == 0:
+                cp_size = 0x10000
+            if (cp_off + cp_size < cp_size or
+                cp_off + cp_size > src_size or
+                cp_size > dest_size):
+                break
+            out.append(src_buf[cp_off:cp_off+cp_size])
+        elif cmd != 0:
+            out.append(delta[index:index+cmd])
+            index += cmd
+        else:
+            raise ApplyDeltaError("Invalid opcode 0")
+
+    if index != delta_length:
+        raise ApplyDeltaError("delta not empty: %r" % delta[index:])
+
+    out = ''.join(out)
+    if dest_size != len(out):
+        raise ApplyDeltaError("dest size incorrect")
+
+    return out
+
+
+def write_pack_index_v2(filename, entries, pack_checksum):
+    """Write a new pack index file.
+
+    :param filename: The filename of the new pack index file.
+    :param entries: List of tuples with object name (sha), offset_in_pack,  and
+            crc32_checksum.
+    :param pack_checksum: Checksum of the pack file.
+    """
+    f = open(filename, 'w')
+    f = SHA1Writer(f)
+    f.write('\377tOc') # Magic!
+    f.write(struct.pack(">L", 2))
+    fan_out_table = defaultdict(lambda: 0)
+    for (name, offset, entry_checksum) in entries:
+        fan_out_table[ord(name[0])] += 1
+    # Fan-out table
+    for i in range(0x100):
+        f.write(struct.pack(">L", fan_out_table[i]))
+        fan_out_table[i+1] += fan_out_table[i]
+    for (name, offset, entry_checksum) in entries:
+        f.write(name)
+    for (name, offset, entry_checksum) in entries:
+        f.write(struct.pack(">L", entry_checksum))
+    for (name, offset, entry_checksum) in entries:
+        # FIXME: handle if MSBit is set in offset
+        f.write(struct.pack(">L", offset))
+    # FIXME: handle table for pack files > 8 Gb
+    assert len(pack_checksum) == 20
+    f.write(pack_checksum)
+    f.close()
+
+
+class Pack(object):
+
+    def __init__(self, basename):
+        self._basename = basename
+        self._data_path = self._basename + ".pack"
+        self._idx_path = self._basename + ".idx"
+        self._data = None
+        self._idx = None
+
+    @classmethod
+    def from_objects(self, data, idx):
+        ret = Pack("")
+        ret._data = data
+        ret._idx = idx
+        return ret
+
+    def name(self):
+        """The SHA over the SHAs of the objects in this pack."""
+        return self.idx.objects_sha1()
+
+    @property
+    def data(self):
+        if self._data is None:
+            self._data = PackData(self._data_path)
+            assert len(self.idx) == len(self._data)
+            idx_stored_checksum = self.idx.get_pack_checksum()
+            data_stored_checksum = self._data.get_stored_checksum()
+            if idx_stored_checksum != data_stored_checksum:
+                raise ChecksumMismatch(sha_to_hex(idx_stored_checksum),
+                                       sha_to_hex(data_stored_checksum))
+        return self._data
+
+    @property
+    def idx(self):
+        if self._idx is None:
+            self._idx = load_pack_index(self._idx_path)
+        return self._idx
+
+    def close(self):
+        if self._data is not None:
+            self._data.close()
+        self.idx.close()
+
+    def __eq__(self, other):
+        return type(self) == type(other) and self.idx == other.idx
+
+    def __len__(self):
+        """Number of entries in this pack."""
+        return len(self.idx)
+
+    def __repr__(self):
+        return "%s(%r)" % (self.__class__.__name__, self._basename)
+
+    def __iter__(self):
+        """Iterate over all the sha1s of the objects in this pack."""
+        return iter(self.idx)
+
+    def check(self):
+        if not self.idx.check():
+            return False
+        if not self.data.check():
+            return False
+        return True
+
+    def get_stored_checksum(self):
+        return self.data.get_stored_checksum()
+
+    def __contains__(self, sha1):
+        """Check whether this pack contains a particular SHA1."""
+        try:
+            self.idx.object_index(sha1)
+            return True
+        except KeyError:
+            return False
+
+    def get_raw(self, sha1, resolve_ref=None):
+        offset = self.idx.object_index(sha1)
+        obj_type, obj = self.data.get_object_at(offset)
+        if type(offset) is long:
+          offset = int(offset)
+        if resolve_ref is None:
+            resolve_ref = self.get_raw
+        return self.data.resolve_object(offset, obj_type, obj, resolve_ref)
+
+    def __getitem__(self, sha1):
+        """Retrieve the specified SHA1."""
+        type, uncomp = self.get_raw(sha1)
+        return ShaFile.from_raw_string(type, uncomp)
+
+    def iterobjects(self, get_raw=None):
+        if get_raw is None:
+            get_raw = self.get_raw
+        for offset, type, obj, crc32 in self.data.iterobjects():
+            assert isinstance(offset, int)
+            yield ShaFile.from_raw_string(
+                    *self.data.resolve_object(offset, type, obj, get_raw))
+
+
+def load_packs(path):
+    if not os.path.exists(path):
+        return
+    for name in os.listdir(path):
+        if name.startswith("pack-") and name.endswith(".pack"):
+            yield Pack(os.path.join(path, name[:-len(".pack")]))
+
+
+try:
+    from _pack import apply_delta, bisect_find_sha
+except ImportError:
+    pass
new file mode 100644
--- /dev/null
+++ b/dulwich/protocol.py
@@ -0,0 +1,151 @@
+# protocol.py -- Shared parts of the git protocols
+# Copryight (C) 2008 John Carr <john.carr@unrouted.co.uk>
+# Copyright (C) 2008 Jelmer Vernooij <jelmer@samba.org>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2
+# or (at your option) any later version of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA  02110-1301, USA.
+
+"""Generic functions for talking the git smart server protocol."""
+
+import socket
+
+from errors import (
+    HangupException,
+    GitProtocolError,
+    )
+
+TCP_GIT_PORT = 9418
+
+class ProtocolFile(object):
+    """
+    Some network ops are like file ops. The file ops expect to operate on
+    file objects, so provide them with a dummy file.
+    """
+
+    def __init__(self, read, write):
+        self.read = read
+        self.write = write
+
+    def tell(self):
+        pass
+
+    def close(self):
+        pass
+
+
+class Protocol(object):
+
+    def __init__(self, read, write, report_activity=None):
+        self.read = read
+        self.write = write
+        self.report_activity = report_activity
+
+    def read_pkt_line(self):
+        """
+        Reads a 'pkt line' from the remote git process
+
+        :return: The next string from the stream
+        """
+        try:
+            sizestr = self.read(4)
+            if not sizestr:
+                raise HangupException()
+            size = int(sizestr, 16)
+            if size == 0:
+                if self.report_activity:
+                    self.report_activity(4, 'read')
+                return None
+            if self.report_activity:
+                self.report_activity(size, 'read')
+            return self.read(size-4)
+        except socket.error, e:
+            raise GitProtocolError(e)
+
+    def read_pkt_seq(self):
+        pkt = self.read_pkt_line()
+        while pkt:
+            yield pkt
+            pkt = self.read_pkt_line()
+
+    def write_pkt_line(self, line):
+        """
+        Sends a 'pkt line' to the remote git process
+
+        :param line: A string containing the data to send
+        """
+        try:
+            if line is None:
+                self.write("0000")
+                if self.report_activity:
+                    self.report_activity(4, 'write')
+            else:
+                self.write("%04x%s" % (len(line)+4, line))
+                if self.report_activity:
+                    self.report_activity(4+len(line), 'write')
+        except socket.error, e:
+            raise GitProtocolError(e)
+
+    def write_sideband(self, channel, blob):
+        """
+        Write data to the sideband (a git multiplexing method)
+
+        :param channel: int specifying which channel to write to
+        :param blob: a blob of data (as a string) to send on this channel
+        """
+        # a pktline can be a max of 65520. a sideband line can therefore be
+        # 65520-5 = 65515
+        # WTF: Why have the len in ASCII, but the channel in binary.
+        while blob:
+            self.write_pkt_line("%s%s" % (chr(channel), blob[:65515]))
+            blob = blob[65515:]
+
+    def send_cmd(self, cmd, *args):
+        """
+        Send a command and some arguments to a git server
+
+        Only used for git://
+
+        :param cmd: The remote service to access
+        :param args: List of arguments to send to remove service
+        """
+        self.write_pkt_line("%s %s" % (cmd, "".join(["%s\0" % a for a in args])))
+
+    def read_cmd(self):
+        """
+        Read a command and some arguments from the git client
+
+        Only used for git://
+
+        :return: A tuple of (command, [list of arguments])
+        """
+        line = self.read_pkt_line()
+        splice_at = line.find(" ")
+        cmd, args = line[:splice_at], line[splice_at+1:]
+        assert args[-1] == "\x00"
+        return cmd, args[:-1].split(chr(0))
+
+
+def extract_capabilities(text):
+    """Extract a capabilities list from a string, if present.
+
+    :param text: String to extract from
+    :return: Tuple with text with capabilities removed and list of
+        capabilities or None (if no capabilities were present.
+    """
+    if not "\0" in text:
+        return text, None
+    capabilities = text.split("\0")
+    return (capabilities[0], capabilities[1:])
+
new file mode 100644
--- /dev/null
+++ b/dulwich/repo.py
@@ -0,0 +1,359 @@
+# repo.py -- For dealing wih git repositories.
+# Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
+# Copyright (C) 2008 Jelmer Vernooij <jelmer@samba.org>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2
+# of the License or (at your option) any later version of
+# the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA  02110-1301, USA.
+
+import os
+import stat
+
+from errors import (
+    MissingCommitError,
+    NotBlobError,
+    NotCommitError,
+    NotGitRepository,
+    NotTreeError,
+    )
+from object_store import ObjectStore
+from objects import (
+    Blob,
+    Commit,
+    ShaFile,
+    Tag,
+    Tree,
+    )
+
+OBJECTDIR = 'objects'
+SYMREF = 'ref: '
+
+
+class Tags(object):
+
+    def __init__(self, tagdir, tags):
+        self.tagdir = tagdir
+        self.tags = tags
+
+    def __getitem__(self, name):
+        return self.tags[name]
+
+    def __setitem__(self, name, ref):
+        self.tags[name] = ref
+        f = open(os.path.join(self.tagdir, name), 'wb')
+        try:
+            f.write("%s\n" % ref)
+        finally:
+            f.close()
+
+    def __len__(self):
+        return len(self.tags)
+
+    def iteritems(self):
+        for k in self.tags:
+            yield k, self[k]
+
+
+def read_packed_refs(f):
+    l = f.readline()
+    assert l == "# pack-refs with: peeled \n"
+    for l in f.readlines():
+        if l[0] == "^":
+            # FIXME: Return somehow
+            continue
+        yield tuple(l.rstrip("\n").split(" ", 2))
+
+
+class MissingObjectFinder(object):
+
+    def __init__(self, object_store, wants, graph_walker, progress=None):
+        self.sha_done = set()
+        self.objects_to_send = set([(w, None) for w in wants])
+        self.object_store = object_store
+        if progress is None:
+            self.progress = lambda x: None
+        else:
+            self.progress = progress
+        ref = graph_walker.next()
+        while ref:
+            if ref in self.object_store:
+                graph_walker.ack(ref)
+            ref = graph_walker.next()
+
+    def add_todo(self, entries):
+        self.objects_to_send.update([e for e in entries if not e in self.sha_done])
+
+    def parse_tree(self, tree):
+        self.add_todo([(sha, name) for (mode, name, sha) in tree.entries()])
+
+    def parse_commit(self, commit):
+        self.add_todo([(commit.tree, "")])
+        self.add_todo([(p, None) for p in commit.parents])
+
+    def parse_tag(self, tag):
+        self.add_todo([(tag.object[1], None)])
+
+    def next(self):
+        if not self.objects_to_send:
+            return None
+        (sha, name) = self.objects_to_send.pop()
+        o = self.object_store[sha]
+        if isinstance(o, Commit):
+            self.parse_commit(o)
+        elif isinstance(o, Tree):
+            self.parse_tree(o)
+        elif isinstance(o, Tag):
+            self.parse_tag(o)
+        self.sha_done.add((sha, name))
+        self.progress("counting objects: %d\r" % len(self.sha_done))
+        return (sha, name)
+
+
+class Repo(object):
+
+    ref_locs = ['', 'refs', 'refs/tags', 'refs/heads', 'refs/remotes']
+
+    def __init__(self, root):
+        if os.path.isdir(os.path.join(root, ".git", "objects")):
+            self.bare = False
+            self._controldir = os.path.join(root, ".git")
+        elif os.path.isdir(os.path.join(root, "objects")):
+            self.bare = True
+            self._controldir = root
+        else:
+            raise NotGitRepository(root)
+        self.path = root
+        self.tags = Tags(self.tagdir(), self.get_tags())
+        self._object_store = None
+
+    def controldir(self):
+        return self._controldir
+
+    def find_missing_objects(self, determine_wants, graph_walker, progress):
+        """Find the missing objects required for a set of revisions.
+
+        :param determine_wants: Function that takes a dictionary with heads
+            and returns the list of heads to fetch.
+        :param graph_walker: Object that can iterate over the list of revisions
+            to fetch and has an "ack" method that will be called to acknowledge
+            that a revision is present.
+        :param progress: Simple progress function that will be called with
+            updated progress strings.
+        """
+        wants = determine_wants(self.get_refs())
+        return iter(MissingObjectFinder(self.object_store, wants, graph_walker,
+                progress).next, None)
+
+    def fetch_objects(self, determine_wants, graph_walker, progress):
+        """Fetch the missing objects required for a set of revisions.
+
+        :param determine_wants: Function that takes a dictionary with heads
+            and returns the list of heads to fetch.
+        :param graph_walker: Object that can iterate over the list of revisions
+            to fetch and has an "ack" method that will be called to acknowledge
+            that a revision is present.
+        :param progress: Simple progress function that will be called with
+            updated progress strings.
+        :return: tuple with number of objects, iterator over objects
+        """
+        return self.object_store.iter_shas(
+            self.find_missing_objects(determine_wants, graph_walker, progress))
+
+    def object_dir(self):
+        return os.path.join(self.controldir(), OBJECTDIR)
+
+    @property
+    def object_store(self):
+        if self._object_store is None:
+            self._object_store = ObjectStore(self.object_dir())
+        return self._object_store
+
+    def pack_dir(self):
+        return os.path.join(self.object_dir(), PACKDIR)
+
+    def _get_ref(self, file):
+        f = open(file, 'rb')
+        try:
+            contents = f.read()
+            if contents.startswith(SYMREF):
+                ref = contents[len(SYMREF):]
+                if ref[-1] == '\n':
+                    ref = ref[:-1]
+                return self.ref(ref)
+            assert len(contents) == 41, 'Invalid ref in %s' % file
+            return contents[:-1]
+        finally:
+            f.close()
+
+    def ref(self, name):
+        for dir in self.ref_locs:
+            file = os.path.join(self.controldir(), dir, name)
+            if os.path.exists(file):
+                return self._get_ref(file)
+        packed_refs = self.get_packed_refs()
+        if name in packed_refs:
+            return packed_refs[name]
+
+    def get_refs(self):
+        ret = {}
+        if self.head():
+            ret['HEAD'] = self.head()
+        for dir in ["refs/heads", "refs/tags"]:
+            for name in os.listdir(os.path.join(self.controldir(), dir)):
+                path = os.path.join(self.controldir(), dir, name)
+                if os.path.isfile(path):
+                    ret["/".join([dir, name])] = self._get_ref(path)
+        ret.update(self.get_packed_refs())
+        return ret
+
+    def get_packed_refs(self):
+        path = os.path.join(self.controldir(), 'packed-refs')
+        if not os.path.exists(path):
+            return {}
+        ret = {}
+        f = open(path, 'r')
+        try:
+            for entry in read_packed_refs(f):
+                ret[entry[1]] = entry[0]
+            return ret
+        finally:
+            f.close()
+
+    def set_ref(self, name, value):
+        file = os.path.join(self.controldir(), name)
+        f = open(file, 'w')
+        try:
+            f.write(value+"\n")
+        finally:
+            f.close()
+
+    def remove_ref(self, name):
+        file = os.path.join(self.controldir(), name)
+        if os.path.exists(file):
+            os.remove(file)
+
+    def tagdir(self):
+        return os.path.join(self.controldir(), 'refs', 'tags')
+
+    def get_tags(self):
+        ret = {}
+        for root, dirs, files in os.walk(self.tagdir()):
+            for name in files:
+                ret[name] = self._get_ref(os.path.join(root, name))
+        return ret
+
+    def heads(self):
+        ret = {}
+        for root, dirs, files in os.walk(os.path.join(self.controldir(), 'refs', 'heads')):
+            for name in files:
+                ret[name] = self._get_ref(os.path.join(root, name))
+        return ret
+
+    def head(self):
+        return self.ref('HEAD')
+
+    def _get_object(self, sha, cls):
+        assert len(sha) in (20, 40)
+        ret = self.get_object(sha)
+        if ret._type != cls._type:
+            if cls is Commit:
+                raise NotCommitError(ret)
+            elif cls is Blob:
+                raise NotBlobError(ret)
+            elif cls is Tree:
+                raise NotTreeError(ret)
+            else:
+                raise Exception("Type invalid: %r != %r" % (ret._type, cls._type))
+        return ret
+
+    def get_object(self, sha):
+        return self.object_store[sha]
+
+    def get_parents(self, sha):
+        return self.commit(sha).parents
+
+    def commit(self, sha):
+        return self._get_object(sha, Commit)
+
+    def tree(self, sha):
+        return self._get_object(sha, Tree)
+
+    def tag(self, sha):
+        return self._get_object(sha, Tag)
+
+    def get_blob(self, sha):
+        return self._get_object(sha, Blob)
+
+    def revision_history(self, head):
+        """Returns a list of the commits reachable from head.
+
+        Returns a list of commit objects. the first of which will be the commit
+        of head, then following theat will be the parents.
+
+        Raises NotCommitError if any no commits are referenced, including if the
+        head parameter isn't the sha of a commit.
+
+        XXX: work out how to handle merges.
+        """
+        # We build the list backwards, as parents are more likely to be older
+        # than children
+        pending_commits = [head]
+        history = []
+        while pending_commits != []:
+            head = pending_commits.pop(0)
+            try:
+                commit = self.commit(head)
+            except KeyError:
+                raise MissingCommitError(head)
+            if commit in history:
+                continue
+            i = 0
+            for known_commit in history:
+                if known_commit.commit_time > commit.commit_time:
+                    break
+                i += 1
+            history.insert(i, commit)
+            parents = commit.parents
+            pending_commits += parents
+        history.reverse()
+        return history
+
+    def __repr__(self):
+        return "<Repo at %r>" % self.path
+
+    @classmethod
+    def init(cls, path, mkdir=True):
+        controldir = os.path.join(path, ".git")
+        os.mkdir(controldir)
+        cls.init_bare(controldir)
+
+    @classmethod
+    def init_bare(cls, path, mkdir=True):
+        for d in [["objects"],
+                  ["objects", "info"],
+                  ["objects", "pack"],
+                  ["branches"],
+                  ["refs"],
+                  ["refs", "tags"],
+                  ["refs", "heads"],
+                  ["hooks"],
+                  ["info"]]:
+            os.mkdir(os.path.join(path, *d))
+        open(os.path.join(path, 'HEAD'), 'w').write("ref: refs/heads/master\n")
+        open(os.path.join(path, 'description'), 'w').write("Unnamed repository")
+        open(os.path.join(path, 'info', 'excludes'), 'w').write("")
+
+    create = init_bare
+
new file mode 100644
--- /dev/null
+++ b/dulwich/server.py
@@ -0,0 +1,238 @@
+# server.py -- Implementation of the server side git protocols
+# Copryight (C) 2008 John Carr <john.carr@unrouted.co.uk>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2
+# or (at your option) any later version of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA  02110-1301, USA.
+
+import SocketServer
+import tempfile
+
+from protocol import (
+    Protocol,
+    ProtocolFile,
+    TCP_GIT_PORT,
+    extract_capabilities,
+    )
+from repo import (
+    Repo,
+    )
+from pack import (
+    write_pack_data,
+    )
+
+class Backend(object):
+
+    def get_refs(self):
+        """
+        Get all the refs in the repository
+
+        :return: dict of name -> sha
+        """
+        raise NotImplementedError
+
+    def apply_pack(self, refs, read):
+        """ Import a set of changes into a repository and update the refs
+
+        :param refs: list of tuple(name, sha)
+        :param read: callback to read from the incoming pack
+        """
+        raise NotImplementedError
+
+    def fetch_objects(self, determine_wants, graph_walker, progress):
+        """
+        Yield the objects required for a list of commits.
+
+        :param progress: is a callback to send progress messages to the client
+        """
+        raise NotImplementedError
+
+
+class GitBackend(Backend):
+
+    def __init__(self, gitdir=None):
+        self.gitdir = gitdir
+
+        if not self.gitdir:
+            self.gitdir = tempfile.mkdtemp()
+            Repo.create(self.gitdir)
+
+        self.repo = Repo(self.gitdir)
+        self.fetch_objects = self.repo.fetch_objects
+        self.get_refs = self.repo.get_refs
+
+    def apply_pack(self, refs, read):
+        fd, commit = self.repo.object_store.add_thin_pack()
+        fd.write(read())
+        fd.close()
+        commit()
+
+        for oldsha, sha, ref in refs:
+            if ref == "0" * 40:
+                self.repo.remove_ref(ref)
+            else:
+                self.repo.set_ref(ref, sha)
+
+        print "pack applied"
+
+
+class Handler(object):
+
+    def __init__(self, backend, read, write):
+        self.backend = backend
+        self.proto = Protocol(read, write)
+
+    def capabilities(self):
+        return " ".join(self.default_capabilities())
+
+
+class UploadPackHandler(Handler):
+
+    def default_capabilities(self):
+        return ("multi_ack", "side-band-64k", "thin-pack", "ofs-delta")
+
+    def handle(self):
+        def determine_wants(heads):
+            keys = heads.keys()
+            if keys:
+                self.proto.write_pkt_line("%s %s\x00%s\n" % ( heads[keys[0]], keys[0], self.capabilities()))
+                for k in keys[1:]:
+                    self.proto.write_pkt_line("%s %s\n" % (heads[k], k))
+
+            # i'm done..
+            self.proto.write("0000")
+
+            # Now client will either send "0000", meaning that it doesnt want to pull.
+            # or it will start sending want want want commands
+            want = self.proto.read_pkt_line()
+            if want == None:
+                return []
+
+            want, self.client_capabilities = extract_capabilities(want)
+
+            want_revs = []
+            while want and want[:4] == 'want':
+                want_revs.append(want[5:45])
+                want = self.proto.read_pkt_line()
+            return want_revs
+
+        progress = lambda x: self.proto.write_sideband(2, x)
+        write = lambda x: self.proto.write_sideband(1, x)
+
+        class ProtocolGraphWalker(object):
+
+            def __init__(self, proto):
+                self.proto = proto
+                self._last_sha = None
+
+            def ack(self, have_ref):
+                self.proto.write_pkt_line("ACK %s continue\n" % have_ref)
+
+            def next(self):
+                have = self.proto.read_pkt_line()
+                if have[:4] == 'have':
+                    return have[5:45]
+
+                #if have[:4] == 'done':
+                #    return None
+
+                if self._last_sha:
+                    # Oddness: Git seems to resend the last ACK, without the "continue" statement
+                    self.proto.write_pkt_line("ACK %s\n" % self._last_sha)
+
+                # The exchange finishes with a NAK
+                self.proto.write_pkt_line("NAK\n")
+
+        graph_walker = ProtocolGraphWalker(self.proto)
+        num_objects, objects_iter = self.backend.fetch_objects(determine_wants, graph_walker, progress)
+
+        # Do they want any objects?
+        if num_objects == 0:
+            return
+
+        progress("dul-daemon says what\n")
+        progress("counting objects: %d, done.\n" % num_objects)
+        write_pack_data(ProtocolFile(None, write), objects_iter, num_objects)
+        progress("how was that, then?\n")
+        # we are done
+        self.proto.write("0000")
+
+
+class ReceivePackHandler(Handler):
+
+    def default_capabilities(self):
+        return ("report-status", "delete-refs")
+
+    def handle(self):
+        refs = self.backend.get_refs().items()
+
+        if refs:
+            self.proto.write_pkt_line("%s %s\x00%s\n" % (refs[0][1], refs[0][0], self.capabilities()))
+            for i in range(1, len(refs)):
+                ref = refs[i]
+                self.proto.write_pkt_line("%s %s\n" % (ref[1], ref[0]))
+        else:
+            self.proto.write_pkt_line("0000000000000000000000000000000000000000 capabilities^{} %s" % self.capabilities())
+
+        self.proto.write("0000")
+
+        client_refs = []
+        ref = self.proto.read_pkt_line()
+
+        # if ref is none then client doesnt want to send us anything..
+        if ref is None:
+            return
+
+        ref, client_capabilities = extract_capabilities(ref)
+
+        # client will now send us a list of (oldsha, newsha, ref)
+        while ref:
+            client_refs.append(ref.split())
+            ref = self.proto.read_pkt_line()
+
+        # backend can now deal with this refs and read a pack using self.read
+        self.backend.apply_pack(client_refs, self.proto.read)
+
+        # when we have read all the pack from the client, it assumes everything worked OK
+        # there is NO ack from the server before it reports victory.
+
+
+class TCPGitRequestHandler(SocketServer.StreamRequestHandler):
+
+    def handle(self):
+        proto = Protocol(self.rfile.read, self.wfile.write)
+        command, args = proto.read_cmd()
+
+        # switch case to handle the specific git command
+        if command == 'git-upload-pack':
+            cls = UploadPackHandler
+        elif command == 'git-receive-pack':
+            cls = ReceivePackHandler
+        else:
+            return
+
+        h = cls(self.server.backend, self.rfile.read, self.wfile.write)
+        h.handle()
+
+
+class TCPGitServer(SocketServer.TCPServer):
+
+    allow_reuse_address = True
+    serve = SocketServer.TCPServer.serve_forever
+
+    def __init__(self, backend, listen_addr, port=TCP_GIT_PORT):
+        self.backend = backend
+        SocketServer.TCPServer.__init__(self, (listen_addr, port), TCPGitRequestHandler)
+
+
new file mode 100644
--- /dev/null
+++ b/dulwich/tests/__init__.py
@@ -0,0 +1,19 @@
+# __init__.py -- The tests for dulwich
+# Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2
+# of the License or (at your option) any later version of
+# the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA  02110-1301, USA.
+
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..8f8ed37f1e6b8f0af781c26daa8f31ae9bd2167d
GIT binary patch
literal 16
Xc$_n@n4|5fs~2eSgo%MGhe-wiD3b&h
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..7bef12912d59b8fab01801f66978456947e6ce59
GIT binary patch
literal 22
dc$}-4<9+I+e~>}ChNrGxpusaH2E`XFA^>DH2Y~<p
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..8c901c5b89f920a740af8b23b771ef4019cdb665
GIT binary patch
literal 15
Wc$}-4<9+I+e~>`}0|Vy<#t#58#|6#+
new file mode 100644
--- /dev/null
+++ b/dulwich/tests/data/commits/0d89f20333fbb1d2f3a94da77f4981373d8f4310
@@ -0,0 +1,2 @@
+x���K
+�@]�)z��53׮�==-&�������e=��i:����"�Z�=��H)�����r�芔������>��4�wY���ԯM���x�|��q=�s)&�6Dh�6�{Y���m/�L�Xg?�
\ No newline at end of file
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..69c6dff1ab6b88df371b5f539df78e705d1f05a5
GIT binary patch
literal 194
zc$@*k06qVBoTZP;N(3<!L|x}AvL9&gP11S5Ag%@1uJ0>41KlG%k-@*W9q<b*C@2m^
zLCIxVPQ!;<9>!+owSr*H1hJ<OsUpXweYe!oHRM)e7OL5O(q`-M#5pmm$c&5yG~isT
z%`$6jXnO!@S-bxAlk{#43@9OK>@zli(zHnJ5HqWVCT!W!*Z8<>{)HC1`Zv1{{`SZ7
w?V!ddTA$Cq{C3CFYU9ZxmQ+eC*!%%@Ik`U@_MZ*+YTLW{X?kS$1Hb25!(j?!{r~^~
new file mode 100644
--- /dev/null
+++ b/dulwich/tests/data/commits/60dacdc733de308bb77bb76ce0fb0f9b44c9769e
@@ -0,0 +1,2 @@
+x����
+�0E]�+f/���N"�]�g�	��*5"���O�.υ�ɗi�����a�	UB�E��Rr�[��P�\���ʋ�
��Tz靖-�zN�0Q
)ZO���E��v�,p�Iop�[�'l�Ǻ�<��|�fֶk)P��GX�{&K���0��?�y�MQ
\ No newline at end of file
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..96f9998c0a1883d2b96b5088650eec063a5d3e97
GIT binary patch
literal 104
zc${<b402{*U|<4bM$f1XbAU7hjAmfqU}CULu3=zkT*AP>`~rd*7@p02G<(y<YtG)?
sn(NA=ZX9~j6FiZDIVmR*qKQHG@9Tf>Hrv@AnQ(54P{UopnsT;$0Cd_RNB{r;
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..ca0454de928844663ddb20a0369f1d9af0391c72
GIT binary patch
literal 1136
zc${Nk7zINZFpgq4W*WhmnF72iKb?nvtNoPZGVY?OzjuhdO#Q8J0w~_KX;E30M&%*S
z)q-WupI7XVdZP4h6;Pb#WB)3#V5y@8i~pWDeo8Du)#KJ*p(4qK7isHf#ZO+F?&$K+
gb$0jS=i5KE^l4qpH=6bEq@MNEml7SfmhNQ)04R$#EC2ui
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..e2754b2777a048fd7e83b22d77a89462c2394d2f
GIT binary patch
literal 214
zc$@*&04e`aK|@Ob00062000A!40xQS%s~pmFc3!JeNHj^f+R!DBp}kYYr*xT{$LAf
zp&cpS-l|7%`yPC@09066VpL9&d-hpN2ofO>NhJkE%M7SGecEGQ=%N)X-LP7D(P`Y8
zJIbI_H-7r|&4&qgrk!bXd-PGL@xgo7&$SL__{VV9{nAfzZ@9laNUZ^QoHH~qFf%bx
zNMy)Q=i%RKKP9=0yJ+g~9U?DNe=D2-0Due&4L5k4D@iRbQ845J019jZ9C8_e=w`2)
QUz4q8K}6w2n~S68w@*i1ng9R*
new file mode 100644
--- /dev/null
+++ b/dulwich/tests/data/repos/a/.git/HEAD
@@ -0,0 +1,1 @@
+ref: refs/heads/master
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..1db6ac04cf69d022d6a730b8d88601f7c5183157
GIT binary patch
literal 257
zc${<b402{*U|<4bW;fO)_kc75j0Ot1FetY#WMF7q0%X5{U<P)-&)oYTr5G-oW1(<)
zS@ZpnYaO57Ffb;9^u2)Ue}SZb5xRbVy;MyFt7TWbqrN@X)M>7HyMXy817i|M-#@7S
ze@OaQpzB{W?ZWOv!H!eARWl_&XCG!1KeK?9fiamOB*@hjC@;xitYE<P|4OZEOH_Yq
lnWvoG?dgyIWwr9vm#lNDnR2Z=GLBbD=!=}p)oC2;PXTX)P2>Oo
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..dfc9847b6f39818fb3a16abd03a52cca8b0488df
GIT binary patch
literal 133
zc$@)+0DAv;oTZIH3c@fD06pgwdoM_~P24OL(W@WuoMcyQp((VH;_uad!8y)hCePC}
zR6uSIbx{Qpj+TU&JbQg0cFuXnDTocoL0!n`yOT5VRiE<$w>aqvcU^0GLqBefXc}U<
nj6c8a`dGA{0g)V$w=F|sznHwAtNzn4R}qL1i$F3TrTaah1~NWM
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..00d4a694a367c8bef7c4fae99f1f18e6edfa1e53
GIT binary patch
literal 22
ec$}-4<9+I+e~>}C$LTYkp&n0|7!+r-C;<R-%?P*v
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..522a3def4bbdd7178cd5f5758fd7f51831e394ea
GIT binary patch
literal 22
ec$}-4<9+I+e~>}C$LTYkp&pNz7!>ESC<6d<#t69p
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..8a8432a9e223891b4250ec4ac8631b55b346017d
GIT binary patch
literal 71
zc$@)80J#5noGU3xO;s?pU@$Z=Ff%bxNM!K)%)S3nis7<377CY_HQx`p*74~LR5FRd
dUoTZt!D`tR@2GE&HFcV6-Y#JN2>>PC8en8vAQAuo
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..a044c5926c95969c522e5fabc37161ca65232d57
GIT binary patch
literal 22
ec$}-4<9+I+e~>}C$LTYkp&rke7!>ETr~m+S=?K68
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..7d172f39d4e00239ca900fc7f1b94ee2f145db36
GIT binary patch
literal 161
zc$@*D0ABxioTZLU3c@fDMqTF=*$a}SGnoWLbn69NPcsu#Xe(``czb`Y;9EX;A77b<
z;nr;EVAEz*TdM#GU;^Y+BT|Iy)vNTGw5u99p;M~n!LyE^kSRzGIp$EnOG1R=sK(BD
zLh9Kkjwsn1ezxm0+cOWk*o!W$f7$(g-HP@%9*_60Uw?WxYCBl(35d`E*{%Cv%I|5@
Pe>TiX#1``gU=~T<`ae&d
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..81b997b636b0ddc243de5d5c11eb9e4a93450bf2
GIT binary patch
literal 96
zc$@)X0H6POoGU3xO;xZkXD~D{Ff%bxNM!K)%)S3nis7<377CY_HQx`p*74~LR5FRd
zUoTZt!D`tR@2GE&HFcV6-Y#JN36)G{STyaz?nJ?kQ@d3&B|m2$W)wfOfE56|TP3G3
CMJoRQ
new file mode 100644
--- /dev/null
+++ b/dulwich/tests/data/repos/a/.git/refs/heads/master
@@ -0,0 +1,1 @@
+a90fa2d900a17e99b433217e988c4eb4a2e9a097
new file mode 100644
--- /dev/null
+++ b/dulwich/tests/data/repos/a/a
@@ -0,0 +1,1 @@
+file a
new file mode 100644
--- /dev/null
+++ b/dulwich/tests/data/repos/a/b
@@ -0,0 +1,1 @@
+file b
new file mode 100644
--- /dev/null
+++ b/dulwich/tests/data/repos/a/c
@@ -0,0 +1,1 @@
+file c
new file mode 100644
--- /dev/null
+++ b/dulwich/tests/data/repos/ooo_merge/.git/HEAD
@@ -0,0 +1,1 @@
+ref: refs/heads/master
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..639d0d16deb75e0483431920a0feb507b0c1a2f5
GIT binary patch
literal 257
zc${<b402{*U|<4bW;eEdSAjGGj0Ot1FerC8FfcSO0kU5}FavvjIuHL=`zgs~+(lD=
z?+|&J`di@y17jjk-+pU|eh3ZKpNg(uGjpF^-@^k+%F~u6im*Q^n=kUzn}IP2q^}gJ
zzZ6M-FS`DzUcvbll2`chlj~dNJ#4=#z?SUEz?jSs66ER%l$T^MRxse2AfXy`WJQ$g
m8?EM;wTE4$U*CUGFmtujikfpQPF0_E*Y4Qn9`ZstelGypaY>~B
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..cbe43c2ebd526cac5d66a4d0f614abf4d77c3e59
GIT binary patch
literal 22
dc$}-4<9+I+e~>}ChNrGxpur<12E}JA!T@9v2YLVi
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..3f3699b36e39f0046a19227248c45215b0f5e452
GIT binary patch
literal 70
zc$@)70J;BooGU3xO;s?pU@$Z=Ff%bxNMy)Q=i%RKKP9=0yJ+g~9U?DNe=D4TN+vT*
c^$O0fki5c|pIqNE?_v900k&jM0Mu?5iV>n8Bme*a
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..95c3c19bbeaeda10c619530ac6ea902ae1c0cdd4
GIT binary patch
literal 22
dc$}-4<9+I+e~>}ChNrGxpurO+2E`{VLI7kL2YCPh
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..2569779c10cc06f9e3639d73183c80b6fbd8f243
GIT binary patch
literal 71
zc$@)80J#5noGU3xO;s?pU@$Z=Ff%bxNMy)Q=i%RKKP9=0yJ+g~9U?DNe=D4TN+vOA
dX701=dw4)edD_xM5%wo#^F^L|0|3{t7x5-=A87yp
new file mode 100644
--- /dev/null
+++ b/dulwich/tests/data/repos/ooo_merge/.git/objects/76/01d7f6231db6a57f7bbb79ee52e4d462fd44d1
@@ -0,0 +1,2 @@
+x���Aj1E��)�L%[�PJօ���4�6����C��f�t�x���ӲzL
���`M��H*�[���d�L��:�^��l��8++Pb+�46n��hb�&�e�i�?����:檵����SH��@mD
+r�_�-����ᗮ���x�M�Y��_~{�a��U*�Z�{<���Fx����0<w_��LY�
\ No newline at end of file
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..610b7dfa3a19490625df89ba689ce6efe928d9eb
GIT binary patch
literal 95
zc$@)W0HFVPoGU3xO;xZkXD~D{Ff%bxNMy)Q=i%RKKP9=0yJ+g~9U?DNe=D4TN+vOA
zX701=dw4)edD_xM5%wo#^F^L|LnV_Lrg{bES4dvr%TKOvnfI{$t^ixICjb;YA~UHs
BDbWA`
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..7bef12912d59b8fab01801f66978456947e6ce59
GIT binary patch
literal 22
dc$}-4<9+I+e~>}ChNrGxpusaH2E`XFA^>DH2Y~<p
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..165943d626144b8c1aa9859bfac35e4cb7afda09
GIT binary patch
literal 46
zc$}-4qpj)X8)~pA#lXbCz}PU*Bjtqu^N07d>{WO6?9O=gURFszxL$h>1H;(_UJn4H
C9ugJ+
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..4438cdd7cad0280b0d0e50bc44d975d20f441b0e
GIT binary patch
literal 166
zc$@*I09pTdoTZOJ3c@fD06pgwdoRdtvQ0N2B6#xyo|AM-E3}n1QvAJGpWt;4!%UvX
zaVTJ!4`opWCk<=J9;0uGLuSsw3Nd!J##TtZ6RItmXIykD&;@lNk;UwV#70ih)s90a
z&bFwmo;sE~oA@gIyudAvy24%8lHPE7Y*njih|}fq<=c*rla@0OH7wjX=b=JEG<h#A
U`j5k0f4gakQ|`f<4~YOwxvp7HvH$=8
new file mode 100644
--- /dev/null
+++ b/dulwich/tests/data/repos/ooo_merge/.git/objects/f9/e39b120c68182a4ba35349f832d0e4e61f485c
@@ -0,0 +1,2 @@
+x����
+�0�a�y��$Yb6)�=�7q��B)���>����2
CW�%>�����r�T�&��I�@$���P5���i��X莥�7���y w:^�o_�|Q[���I���ZS��s�V�I�Ey?�
\ No newline at end of file
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..5bda024ac879ef55da75751f82e4c7e9e417ad69
GIT binary patch
literal 165
zc$@*H09yZeoTZOJ3c@fD06pgwdlzIk*)|D?2;TgF=iPLJ3T>rE#NUhh1g~=#X39K{
zmj<4B)fO{|SSX_@rz%NRI~<16XD<|$xoAuzHP+tUWU;A1%@(qf$I>TCURWh`fpbm4
z4`^&XRZd-T@@VIIfuoFehm+l#zF>QOt_RbLO#ADHKfic0S=#|o<is&xgf$YPE8o&)
T|2W*?x2uax<qYKB?zT=}Oo&f|
new file mode 100644
--- /dev/null
+++ b/dulwich/tests/data/repos/ooo_merge/.git/refs/heads/master
@@ -0,0 +1,1 @@
+7601d7f6231db6a57f7bbb79ee52e4d462fd44d1
new file mode 100644
--- /dev/null
+++ b/dulwich/tests/data/repos/ooo_merge/a
@@ -0,0 +1,1 @@
+test 1
new file mode 100644
--- /dev/null
+++ b/dulwich/tests/data/repos/ooo_merge/b
@@ -0,0 +1,1 @@
+test 2
new file mode 100644
--- /dev/null
+++ b/dulwich/tests/data/repos/ooo_merge/c
@@ -0,0 +1,1 @@
+test 3
new file mode 100644
--- /dev/null
+++ b/dulwich/tests/data/repos/simple_merge/.git/HEAD
@@ -0,0 +1,1 @@
+ref: refs/heads/master
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0120a9148b77f1444466cc02b6a0171e491facb8
GIT binary patch
literal 321
zc${<b402{*U|<4b7B^PyS|H5;qZt^O9U0i>Br`BHE&;M%KrjP)emW2TR{JT*W!yzm
zfA0`^nfhDd1OsCtP@i!hL_dUv>Q6z}ubH{euJ7RiCFN;L6Ghmcl+71;>dnBI1k^Ww
z2}D1HhU%Y!rk~;2+()xFUA*S(-L1K<JnF`wCq2Ov85mPQ`mRIuUq{lvn>hWc3?V_T
zu0Ux?1``DXt{XfOS<GLQS4dmlx4hH-S8cl4skYDe|HaH~i8;*{@kjFX>4U7X?FSA5
E0ClTi5dZ)H
new file mode 100644
--- /dev/null
+++ b/dulwich/tests/data/repos/simple_merge/.git/objects/0d/89f20333fbb1d2f3a94da77f4981373d8f4310
@@ -0,0 +1,2 @@
+x���K
+�@]�)z��53׮�==-&�������e=��i:����"�Z�=��H)�����r�芔������>��4�wY���ԯM���x�|��q=�s)&�6Dh�6�{Y���m/�L�Xg?�
\ No newline at end of file
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..a2247b203f986dcb9c22184b1aae17353960c5b4
GIT binary patch
literal 96
zc$@)X0H6POoGU3xO;xZkXD~D{Ff%bxNMy)Q=i%RKKP9=0yJ+g~9U?DNe=D4TN+vOA
zX701=dw4)edD_xM5%wo#^F^L|LnTufp3QwUd(*{h&feXc>&l~U9D33dJP`msq$RHC
C7Ava&
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..3dec0bf77a94a1664e95bd056f41ddb563072c89
GIT binary patch
literal 16
Xc$_n@n4|5fs~2eSh>3wKk4Y8)D2xOb
new file mode 100644
--- /dev/null
+++ b/dulwich/tests/data/repos/simple_merge/.git/objects/4c/ffe90e0a41ad3f5190079d7c8f036bde29cbe6
@@ -0,0 +1,2 @@
+x���K
+1D]���L~=��pݝ�2Fo����-U�:��.�U�UAI�!KU�.��!��k��j��
��F�N
�*R�{Qof�wSQ[���!��)��G;]g8Шw8��6���3M�ˇ/_���m��1 ��t�S�K���0��]�i*'p��CO�
\ No newline at end of file
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..69c6dff1ab6b88df371b5f539df78e705d1f05a5
GIT binary patch
literal 194
zc$@*k06qVBoTZP;N(3<!L|x}AvL9&gP11S5Ag%@1uJ0>41KlG%k-@*W9q<b*C@2m^
zLCIxVPQ!;<9>!+owSr*H1hJ<OsUpXweYe!oHRM)e7OL5O(q`-M#5pmm$c&5yG~isT
z%`$6jXnO!@S-bxAlk{#43@9OK>@zli(zHnJ5HqWVCT!W!*Z8<>{)HC1`Zv1{{`SZ7
w?V!ddTA$Cq{C3CFYU9ZxmQ+eC*!%%@Ik`U@_MZ*+YTLW{X?kS$1Hb25!(j?!{r~^~
new file mode 100644
--- /dev/null
+++ b/dulwich/tests/data/repos/simple_merge/.git/objects/60/dacdc733de308bb77bb76ce0fb0f9b44c9769e
@@ -0,0 +1,2 @@
+x����
+�0E]�+f/���N"�]�g�	��*5"���O�.υ�ɗi�����a�	UB�E��Rr�[��P�\���ʋ�
��Tz靖-�zN�0Q
)ZO���E��v�,p�Iop�[�'l�Ǻ�<��|�fֶk)P��GX�{&K���0��?�y�MQ
\ No newline at end of file
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..8f8ed37f1e6b8f0af781c26daa8f31ae9bd2167d
GIT binary patch
literal 16
Xc$_n@n4|5fs~2eSgo%MGhe-wiD3b&h
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..2569779c10cc06f9e3639d73183c80b6fbd8f243
GIT binary patch
literal 71
zc$@)80J#5noGU3xO;s?pU@$Z=Ff%bxNMy)Q=i%RKKP9=0yJ+g~9U?DNe=D4TN+vOA
dX701=dw4)edD_xM5%wo#^F^L|0|3{t7x5-=A87yp
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..610b7dfa3a19490625df89ba689ce6efe928d9eb
GIT binary patch
literal 95
zc$@)W0HFVPoGU3xO;xZkXD~D{Ff%bxNMy)Q=i%RKKP9=0yJ+g~9U?DNe=D4TN+vOA
zX701=dw4)edD_xM5%wo#^F^L|LnV_Lrg{bES4dvr%TKOvnfI{$t^ixICjb;YA~UHs
BDbWA`
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..7bef12912d59b8fab01801f66978456947e6ce59
GIT binary patch
literal 22
dc$}-4<9+I+e~>}ChNrGxpusaH2E`XFA^>DH2Y~<p
new file mode 100644
--- /dev/null
+++ b/dulwich/tests/data/repos/simple_merge/.git/objects/ab/64bbdcc51b170d21588e5c5d391ee5c0c96dfd
@@ -0,0 +1,2 @@
+x���M
+�0�]��$c�L"�;/�:3�bm��o��|�6���=������~SgU@c̍��b���U��A�)�ƙ{�u�l�R��+�ld&z7�ږm��{I�����i�Kt��.��p��mQ��x�?|��ݨ���!�־c仴�?s���9�r4/m�O+
\ No newline at end of file
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..d45835e8201d096288a79b38053dbe259d5475b5
GIT binary patch
literal 96
zc$@)X0H6POoGU3xO;xZkXD~D{Ff%bxNMy)Q=i%RKKP9=0yJ+g~9U?DNe=D4TN+vOA
zX701=dw4)edD_xM5%wo#^F^L|LnTugp3QwUd(*{h&feXc>&l~U9D33dJP`msx+SmZ
Cbt}UF
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..dce887e80f980d37bb634a0e7c99c5e062292fbd
GIT binary patch
literal 102
zc$@)d0Ga=IoGU3xO;s>7G-EI{FfcPQQAlLSPv_y^YCk2pjJs&+?;Rp9Q-3R*fJ!DY
zXlCxS>w9=WNqO4RL=pBUW%EUzdP5~s7@p02G<(y<YtG)?n(NA=ZX9~j6Fd<rnTk~s
I07!x}ldfwl?f?J)
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..8c901c5b89f920a740af8b23b771ef4019cdb665
GIT binary patch
literal 15
Wc$}-4<9+I+e~>`}0|Vy<#t#58#|6#+
new file mode 100644
--- /dev/null
+++ b/dulwich/tests/data/repos/simple_merge/.git/refs/heads/master
@@ -0,0 +1,1 @@
+5dac377bdded4c9aeb8dff595f0faeebcc8498cc
new file mode 100644
--- /dev/null
+++ b/dulwich/tests/data/repos/simple_merge/a
@@ -0,0 +1,1 @@
+test 1
new file mode 100644
--- /dev/null
+++ b/dulwich/tests/data/repos/simple_merge/b
@@ -0,0 +1,1 @@
+test 2
new file mode 100644
new file mode 100644
new file mode 100644
--- /dev/null
+++ b/dulwich/tests/data/tags/71033db03a03c6a36721efcf1968dd8f8e0cf023
@@ -0,0 +1,4 @@
+xm�MO�@�=�x�#݅��Q�nIME��������Q
+mʿ��G's�L2O�Sz���,��$1�v��qnaJb+��0u3�mMr�
+a�����d����ɢ��s,�=�RB bY(�֝�cQ	Yj���n�!p�	��7����#	ݜ5�����!X�[����Gپ�M}n}�]8m�9p�z�d%���
+!�#�f|�X�`����f�B���K���D%'��s����K���C�ӝ�5\��<a5E�Dp�D�d����-=n�
�oKk�=ʽ��n~�6iM
\ No newline at end of file
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..2569779c10cc06f9e3639d73183c80b6fbd8f243
GIT binary patch
literal 71
zc$@)80J#5noGU3xO;s?pU@$Z=Ff%bxNMy)Q=i%RKKP9=0yJ+g~9U?DNe=D4TN+vOA
dX701=dw4)edD_xM5%wo#^F^L|0|3{t7x5-=A87yp
new file mode 100644
--- /dev/null
+++ b/dulwich/tests/test_client.py
@@ -0,0 +1,43 @@
+# test_client.py -- Tests for the git protocol, client side
+# Copyright (C) 2009 Jelmer Vernooij <jelmer@samba.org>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2
+# or (at your option) any later version of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA  02110-1301, USA.
+
+from cStringIO import StringIO
+from unittest import TestCase
+
+from client import (
+    GitClient,
+    )
+
+class GitClientTests(TestCase):
+
+    def setUp(self):
+        self.rout = StringIO()
+        self.rin = StringIO()
+        self.client = GitClient(lambda x: True, self.rin.read,
+            self.rout.write)
+
+    def test_caps(self):
+        self.assertEquals(['multi_ack', 'side-band-64k', 'ofs-delta', 'thin-pack'], self.client._capabilities)
+
+    def test_fetch_pack_none(self):
+        self.rin.write(
+            "008855dcc6bf963f922e1ed5c4bbaaefcfacef57b1d7 HEAD.multi_ack thin-pack side-band side-band-64k ofs-delta shallow no-progress include-tag\n"
+            "0000")
+        self.rin.seek(0)
+        self.client.fetch_pack("bla", lambda heads: [], None, None, None)
+        self.assertEquals(self.rout.getvalue(), "0000")
new file mode 100644
--- /dev/null
+++ b/dulwich/tests/test_index.py
@@ -0,0 +1,67 @@
+# test_index.py -- Tests for the git index cache
+# Copyright (C) 2008 Jelmer Vernooij <jelmer@samba.org>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2
+# or (at your option) any later version of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA  02110-1301, USA.
+
+import os
+from unittest import TestCase
+
+from dulwich.index import (
+    Index,
+    read_index,
+    write_index,
+    )
+
+class IndexTestCase(TestCase):
+
+    datadir = os.path.join(os.path.dirname(__file__), 'data/indexes')
+
+    def get_simple_index(self, name):
+        return Index(os.path.join(self.datadir, name))
+
+
+class SimpleIndexTestcase(IndexTestCase):
+
+    def test_len(self):
+        self.assertEquals(1, len(self.get_simple_index("index")))
+
+    def test_iter(self):
+        self.assertEquals([
+            ('bla', (1230680220, 0), (1230680220, 0), 2050, 3761020, 33188, 1000, 1000, 0, '\xe6\x9d\xe2\x9b\xb2\xd1\xd6CK\x8b)\xaewZ\xd8\xc2\xe4\x8cS\x91', 3)
+            ],
+                list(self.get_simple_index("index")))
+
+    def test_getitem(self):
+        self.assertEquals( ('bla', (1230680220, 0), (1230680220, 0), 2050, 3761020, 33188, 1000, 1000, 0, '\xe6\x9d\xe2\x9b\xb2\xd1\xd6CK\x8b)\xaewZ\xd8\xc2\xe4\x8cS\x91', 3)
+            ,
+                self.get_simple_index("index")["bla"])
+
+
+class SimpleIndexWriterTestCase(IndexTestCase):
+
+    def test_simple_write(self):
+        entries = [('barbla', (1230680220, 0), (1230680220, 0), 2050, 3761020, 33188, 1000, 1000, 0, '\xe6\x9d\xe2\x9b\xb2\xd1\xd6CK\x8b)\xaewZ\xd8\xc2\xe4\x8cS\x91', 3)]
+        x = open('test-simple-write-index', 'w+')
+        try:
+            write_index(x, entries)
+        finally:
+            x.close()
+        x = open('test-simple-write-index', 'r')
+        try:
+            self.assertEquals(entries, list(read_index(x)))
+        finally:
+            x.close()
+
new file mode 100644
--- /dev/null
+++ b/dulwich/tests/test_object_store.py
@@ -0,0 +1,41 @@
+# test_object_store.py -- tests for object_store.py
+# Copyright (C) 2008 Jelmer Vernooij <jelmer@samba.org>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2
+# or (at your option) any later version of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA  02110-1301, USA.
+
+from unittest import TestCase
+
+from dulwich.object_store import ObjectStore
+
+class ObjectStoreTests(TestCase):
+
+    def test_pack_dir(self):
+        o = ObjectStore("foo")
+        self.assertEquals("foo/pack", o.pack_dir)
+
+    def test_empty_packs(self):
+        o = ObjectStore("foo")
+        self.assertEquals([], o.packs)
+
+    def test_add_objects_empty(self):
+        o = ObjectStore("foo")
+        o.add_objects([])
+
+    def test_add_commit(self):
+        o = ObjectStore("foo")
+        # TODO: Argh, no way to construct Git commit objects without
+        # access to a serialized form.
+        o.add_objects([])
new file mode 100644
--- /dev/null
+++ b/dulwich/tests/test_objects.py
@@ -0,0 +1,140 @@
+# test_objects.py -- tests for objects.py
+# Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2
+# of the License or (at your option) any later version of
+# the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA  02110-1301, USA.
+
+import os
+import unittest
+
+from dulwich.objects import (
+    Blob,
+    Tree,
+    Commit,
+    Tag,
+    )
+
+a_sha = '6f670c0fb53f9463760b7295fbb814e965fb20c8'
+b_sha = '2969be3e8ee1c0222396a5611407e4769f14e54b'
+c_sha = '954a536f7819d40e6f637f849ee187dd10066349'
+tree_sha = '70c190eb48fa8bbb50ddc692a17b44cb781af7f6'
+tag_sha = '71033db03a03c6a36721efcf1968dd8f8e0cf023'
+
+class BlobReadTests(unittest.TestCase):
+    """Test decompression of blobs"""
+
+    def get_sha_file(self, obj, base, sha):
+        return obj.from_file(os.path.join(os.path.dirname(__file__),
+                                          'data', base, sha))
+
+    def get_blob(self, sha):
+        """Return the blob named sha from the test data dir"""
+        return self.get_sha_file(Blob, 'blobs', sha)
+
+    def get_tree(self, sha):
+        return self.get_sha_file(Tree, 'trees', sha)
+
+    def get_tag(self, sha):
+        return self.get_sha_file(Tag, 'tags', sha)
+
+    def commit(self, sha):
+        return self.get_sha_file(Commit, 'commits', sha)
+
+    def test_decompress_simple_blob(self):
+        b = self.get_blob(a_sha)
+        self.assertEqual(b.data, 'test 1\n')
+        self.assertEqual(b.sha().hexdigest(), a_sha)
+
+    def test_parse_empty_blob_object(self):
+        sha = 'e69de29bb2d1d6434b8b29ae775ad8c2e48c5391'
+        b = self.get_blob(sha)
+        self.assertEqual(b.data, '')
+        self.assertEqual(b.sha().hexdigest(), sha)
+
+    def test_create_blob_from_string(self):
+        string = 'test 2\n'
+        b = Blob.from_string(string)
+        self.assertEqual(b.data, string)
+        self.assertEqual(b.sha().hexdigest(), b_sha)
+
+    def test_parse_legacy_blob(self):
+        string = 'test 3\n'
+        b = self.get_blob(c_sha)
+        self.assertEqual(b.data, string)
+        self.assertEqual(b.sha().hexdigest(), c_sha)
+
+    def test_eq(self):
+        blob1 = self.get_blob(a_sha)
+        blob2 = self.get_blob(a_sha)
+        self.assertEqual(blob1, blob2)
+
+    def test_read_tree_from_file(self):
+        t = self.get_tree(tree_sha)
+        self.assertEqual(t.entries()[0], (33188, 'a', a_sha))
+        self.assertEqual(t.entries()[1], (33188, 'b', b_sha))
+
+    def test_read_tag_from_file(self):
+        t = self.get_tag(tag_sha)
+        self.assertEqual(t.object, (Commit, '51b668fd5bf7061b7d6fa525f88803e6cfadaa51'))
+        self.assertEqual(t.name,'signed')
+        self.assertEqual(t.tagger,'Ali Sabil <ali.sabil@gmail.com>')
+        self.assertEqual(t.tag_time, 1231203091)
+        self.assertEqual(t.message, 'This is a signed tag\n-----BEGIN PGP SIGNATURE-----\nVersion: GnuPG v1.4.9 (GNU/Linux)\n\niEYEABECAAYFAkliqx8ACgkQqSMmLy9u/kcx5ACfakZ9NnPl02tOyYP6pkBoEkU1\n5EcAn0UFgokaSvS371Ym/4W9iJj6vh3h\n=ql7y\n-----END PGP SIGNATURE-----\n')
+
+
+    def test_read_commit_from_file(self):
+        sha = '60dacdc733de308bb77bb76ce0fb0f9b44c9769e'
+        c = self.commit(sha)
+        self.assertEqual(c.tree, tree_sha)
+        self.assertEqual(c.parents, ['0d89f20333fbb1d2f3a94da77f4981373d8f4310'])
+        self.assertEqual(c.author,
+            'James Westby <jw+debian@jameswestby.net>')
+        self.assertEqual(c.committer,
+            'James Westby <jw+debian@jameswestby.net>')
+        self.assertEqual(c.commit_time, 1174759230)
+        self.assertEqual(c.commit_timezone, 0)
+        self.assertEqual(c.author_timezone, 0)
+        self.assertEqual(c.message, 'Test commit\n')
+
+    def test_read_commit_no_parents(self):
+        sha = '0d89f20333fbb1d2f3a94da77f4981373d8f4310'
+        c = self.commit(sha)
+        self.assertEqual(c.tree, '90182552c4a85a45ec2a835cadc3451bebdfe870')
+        self.assertEqual(c.parents, [])
+        self.assertEqual(c.author,
+            'James Westby <jw+debian@jameswestby.net>')
+        self.assertEqual(c.committer,
+            'James Westby <jw+debian@jameswestby.net>')
+        self.assertEqual(c.commit_time, 1174758034)
+        self.assertEqual(c.commit_timezone, 0)
+        self.assertEqual(c.author_timezone, 0)
+        self.assertEqual(c.message, 'Test commit\n')
+
+    def test_read_commit_two_parents(self):
+        sha = '5dac377bdded4c9aeb8dff595f0faeebcc8498cc'
+        c = self.commit(sha)
+        self.assertEqual(c.tree, 'd80c186a03f423a81b39df39dc87fd269736ca86')
+        self.assertEqual(c.parents, ['ab64bbdcc51b170d21588e5c5d391ee5c0c96dfd',
+                                       '4cffe90e0a41ad3f5190079d7c8f036bde29cbe6'])
+        self.assertEqual(c.author,
+            'James Westby <jw+debian@jameswestby.net>')
+        self.assertEqual(c.committer,
+            'James Westby <jw+debian@jameswestby.net>')
+        self.assertEqual(c.commit_time, 1174773719)
+        self.assertEqual(c.commit_timezone, 0)
+        self.assertEqual(c.author_timezone, 0)
+        self.assertEqual(c.message, 'Merge ../b\n')
+
new file mode 100644
--- /dev/null
+++ b/dulwich/tests/test_pack.py
@@ -0,0 +1,283 @@
+# test_pack.py -- Tests for the handling of git packs.
+# Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
+# Copyright (C) 2008 Jelmer Vernooij <jelmer@samba.org>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2
+# of the License, or (at your option) any later version of the license.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA  02110-1301, USA.
+
+import os
+import unittest
+
+from dulwich.objects import (
+    Tree,
+    )
+from dulwich.pack import (
+    Pack,
+    PackData,
+    apply_delta,
+    create_delta,
+    load_pack_index,
+    hex_to_sha,
+    read_zlib,
+    sha_to_hex,
+    write_pack_index_v1,
+    write_pack_index_v2,
+    write_pack,
+    )
+
+pack1_sha = 'bc63ddad95e7321ee734ea11a7a62d314e0d7481'
+
+a_sha = '6f670c0fb53f9463760b7295fbb814e965fb20c8'
+tree_sha = 'b2a2766a2879c209ab1176e7e778b81ae422eeaa'
+commit_sha = 'f18faa16531ac570a3fdc8c7ca16682548dafd12'
+
+class PackTests(unittest.TestCase):
+    """Base class for testing packs"""
+
+    datadir = os.path.join(os.path.dirname(__file__), 'data/packs')
+
+    def get_pack_index(self, sha):
+        """Returns a PackIndex from the datadir with the given sha"""
+        return load_pack_index(os.path.join(self.datadir, 'pack-%s.idx' % sha))
+
+    def get_pack_data(self, sha):
+        """Returns a PackData object from the datadir with the given sha"""
+        return PackData(os.path.join(self.datadir, 'pack-%s.pack' % sha))
+
+    def get_pack(self, sha):
+        return Pack(os.path.join(self.datadir, 'pack-%s' % sha))
+
+
+class PackIndexTests(PackTests):
+    """Class that tests the index of packfiles"""
+
+    def test_object_index(self):
+        """Tests that the correct object offset is returned from the index."""
+        p = self.get_pack_index(pack1_sha)
+        self.assertRaises(KeyError, p.object_index, pack1_sha)
+        self.assertEqual(p.object_index(a_sha), 178)
+        self.assertEqual(p.object_index(tree_sha), 138)
+        self.assertEqual(p.object_index(commit_sha), 12)
+
+    def test_index_len(self):
+        p = self.get_pack_index(pack1_sha)
+        self.assertEquals(3, len(p))
+
+    def test_get_stored_checksum(self):
+        p = self.get_pack_index(pack1_sha)
+        self.assertEquals("\xf2\x84\x8e*\xd1o2\x9a\xe1\xc9.;\x95\xe9\x18\x88\xda\xa5\xbd\x01", str(p.get_stored_checksum()))
+        self.assertEquals( 'r\x19\x80\xe8f\xaf\x9a_\x93\xadgAD\xe1E\x9b\x8b\xa3\xe7\xb7' , str(p.get_pack_checksum()))
+
+    def test_index_check(self):
+        p = self.get_pack_index(pack1_sha)
+        self.assertEquals(True, p.check())
+
+    def test_iterentries(self):
+        p = self.get_pack_index(pack1_sha)
+        self.assertEquals([('og\x0c\x0f\xb5?\x94cv\x0br\x95\xfb\xb8\x14\xe9e\xfb \xc8', 178, None), ('\xb2\xa2vj(y\xc2\t\xab\x11v\xe7\xe7x\xb8\x1a\xe4"\xee\xaa', 138, None), ('\xf1\x8f\xaa\x16S\x1a\xc5p\xa3\xfd\xc8\xc7\xca\x16h%H\xda\xfd\x12', 12, None)], list(p.iterentries()))
+
+    def test_iter(self):
+        p = self.get_pack_index(pack1_sha)
+        self.assertEquals(set([tree_sha, commit_sha, a_sha]), set(p))
+
+
+class TestPackDeltas(unittest.TestCase):
+
+    test_string1 = "The answer was flailing in the wind"
+    test_string2 = "The answer was falling down the pipe"
+    test_string3 = "zzzzz"
+
+    test_string_empty = ""
+    test_string_big = "Z" * 8192
+
+    def _test_roundtrip(self, base, target):
+        self.assertEquals(target,
+            apply_delta(base, create_delta(base, target)))
+
+    def test_nochange(self):
+        self._test_roundtrip(self.test_string1, self.test_string1)
+
+    def test_change(self):
+        self._test_roundtrip(self.test_string1, self.test_string2)
+
+    def test_rewrite(self):
+        self._test_roundtrip(self.test_string1, self.test_string3)
+
+    def test_overflow(self):
+        self._test_roundtrip(self.test_string_empty, self.test_string_big)
+
+
+class TestPackData(PackTests):
+    """Tests getting the data from the packfile."""
+
+    def test_create_pack(self):
+        p = self.get_pack_data(pack1_sha)
+
+    def test_pack_len(self):
+        p = self.get_pack_data(pack1_sha)
+        self.assertEquals(3, len(p))
+
+    def test_index_check(self):
+        p = self.get_pack_data(pack1_sha)
+        self.assertEquals(True, p.check())
+
+    def test_iterobjects(self):
+        p = self.get_pack_data(pack1_sha)
+        self.assertEquals([(12, 1, 'tree b2a2766a2879c209ab1176e7e778b81ae422eeaa\nauthor James Westby <jw+debian@jameswestby.net> 1174945067 +0100\ncommitter James Westby <jw+debian@jameswestby.net> 1174945067 +0100\n\nTest commit\n', 3775879613L), (138, 2, '100644 a\x00og\x0c\x0f\xb5?\x94cv\x0br\x95\xfb\xb8\x14\xe9e\xfb \xc8', 912998690L), (178, 3, 'test 1\n', 1373561701L)], list(p.iterobjects()))
+
+    def test_iterentries(self):
+        p = self.get_pack_data(pack1_sha)
+        self.assertEquals(set([('og\x0c\x0f\xb5?\x94cv\x0br\x95\xfb\xb8\x14\xe9e\xfb \xc8', 178, 1373561701L), ('\xb2\xa2vj(y\xc2\t\xab\x11v\xe7\xe7x\xb8\x1a\xe4"\xee\xaa', 138, 912998690L), ('\xf1\x8f\xaa\x16S\x1a\xc5p\xa3\xfd\xc8\xc7\xca\x16h%H\xda\xfd\x12', 12, 3775879613L)]), set(p.iterentries()))
+
+    def test_create_index_v1(self):
+        p = self.get_pack_data(pack1_sha)
+        p.create_index_v1("v1test.idx")
+        idx1 = load_pack_index("v1test.idx")
+        idx2 = self.get_pack_index(pack1_sha)
+        self.assertEquals(idx1, idx2)
+
+    def test_create_index_v2(self):
+        p = self.get_pack_data(pack1_sha)
+        p.create_index_v2("v2test.idx")
+        idx1 = load_pack_index("v2test.idx")
+        idx2 = self.get_pack_index(pack1_sha)
+        self.assertEquals(idx1, idx2)
+
+
+class TestPack(PackTests):
+
+    def test_len(self):
+        p = self.get_pack(pack1_sha)
+        self.assertEquals(3, len(p))
+
+    def test_contains(self):
+        p = self.get_pack(pack1_sha)
+        self.assertTrue(tree_sha in p)
+
+    def test_get(self):
+        p = self.get_pack(pack1_sha)
+        self.assertEquals(type(p[tree_sha]), Tree)
+
+    def test_iter(self):
+        p = self.get_pack(pack1_sha)
+        self.assertEquals(set([tree_sha, commit_sha, a_sha]), set(p))
+
+    def test_get_object_at(self):
+        """Tests random access for non-delta objects"""
+        p = self.get_pack(pack1_sha)
+        obj = p[a_sha]
+        self.assertEqual(obj._type, 'blob')
+        self.assertEqual(obj.sha().hexdigest(), a_sha)
+        obj = p[tree_sha]
+        self.assertEqual(obj._type, 'tree')
+        self.assertEqual(obj.sha().hexdigest(), tree_sha)
+        obj = p[commit_sha]
+        self.assertEqual(obj._type, 'commit')
+        self.assertEqual(obj.sha().hexdigest(), commit_sha)
+
+    def test_copy(self):
+        origpack = self.get_pack(pack1_sha)
+        self.assertEquals(True, origpack.idx.check())
+        write_pack("Elch", [(x, "") for x in origpack.iterobjects()],
+            len(origpack))
+        newpack = Pack("Elch")
+        self.assertEquals(origpack, newpack)
+        self.assertEquals(True, newpack.idx.check())
+        self.assertEquals(origpack.name(), newpack.name())
+        self.assertEquals(origpack.idx.get_pack_checksum(),
+                          newpack.idx.get_pack_checksum())
+
+        self.assertTrue(
+                (origpack.idx.version != newpack.idx.version) or
+                (origpack.idx.get_stored_checksum() == newpack.idx.get_stored_checksum()))
+
+    def test_commit_obj(self):
+        p = self.get_pack(pack1_sha)
+        commit = p[commit_sha]
+        self.assertEquals("James Westby <jw+debian@jameswestby.net>",
+            commit.author)
+        self.assertEquals([], commit.parents)
+
+    def test_name(self):
+        p = self.get_pack(pack1_sha)
+        self.assertEquals(pack1_sha, p.name())
+
+
+class TestHexToSha(unittest.TestCase):
+
+    def test_simple(self):
+        self.assertEquals('\xab\xcd' * 10, hex_to_sha("abcd" * 10))
+
+    def test_reverse(self):
+        self.assertEquals("abcd" * 10, sha_to_hex('\xab\xcd' * 10))
+
+
+class BaseTestPackIndexWriting(object):
+
+    def test_empty(self):
+        pack_checksum = 'r\x19\x80\xe8f\xaf\x9a_\x93\xadgAD\xe1E\x9b\x8b\xa3\xe7\xb7'
+        self._write_fn("empty.idx", [], pack_checksum)
+        idx = load_pack_index("empty.idx")
+        self.assertTrue(idx.check())
+        self.assertEquals(idx.get_pack_checksum(), pack_checksum)
+        self.assertEquals(0, len(idx))
+
+    def test_single(self):
+        pack_checksum = 'r\x19\x80\xe8f\xaf\x9a_\x93\xadgAD\xe1E\x9b\x8b\xa3\xe7\xb7'
+        my_entries = [('og\x0c\x0f\xb5?\x94cv\x0br\x95\xfb\xb8\x14\xe9e\xfb \xc8', 178, 42)]
+        my_entries.sort()
+        self._write_fn("single.idx", my_entries, pack_checksum)
+        idx = load_pack_index("single.idx")
+        self.assertEquals(idx.version, self._expected_version)
+        self.assertTrue(idx.check())
+        self.assertEquals(idx.get_pack_checksum(), pack_checksum)
+        self.assertEquals(1, len(idx))
+        actual_entries = list(idx.iterentries())
+        self.assertEquals(len(my_entries), len(actual_entries))
+        for a, b in zip(my_entries, actual_entries):
+            self.assertEquals(a[0], b[0])
+            self.assertEquals(a[1], b[1])
+            if self._has_crc32_checksum:
+                self.assertEquals(a[2], b[2])
+            else:
+                self.assertTrue(b[2] is None)
+
+
+class TestPackIndexWritingv1(unittest.TestCase, BaseTestPackIndexWriting):
+
+    def setUp(self):
+        unittest.TestCase.setUp(self)
+        self._has_crc32_checksum = False
+        self._expected_version = 1
+        self._write_fn = write_pack_index_v1
+
+
+class TestPackIndexWritingv2(unittest.TestCase, BaseTestPackIndexWriting):
+
+    def setUp(self):
+        unittest.TestCase.setUp(self)
+        self._has_crc32_checksum = True
+        self._expected_version = 2
+        self._write_fn = write_pack_index_v2
+
+TEST_COMP1 = """\x78\x9c\x9d\x8e\xc1\x0a\xc2\x30\x10\x44\xef\xf9\x8a\xbd\xa9\x08\x92\x86\xb4\x26\x20\xe2\xd9\x83\x78\xf2\xbe\x49\x37\xb5\xa5\x69\xca\x36\xf5\xfb\x4d\xfd\x04\x67\x6e\x33\xcc\xf0\x32\x13\x81\xc6\x16\x8d\xa9\xbd\xad\x6c\xe3\x8a\x03\x4a\x73\xd6\xda\xd5\xa6\x51\x2e\x58\x65\x6c\x13\xbc\x94\x4a\xcc\xc8\x34\x65\x78\xa4\x89\x04\xae\xf9\x9d\x18\xee\x34\x46\x62\x78\x11\x4f\x29\xf5\x03\x5c\x86\x5f\x70\x5b\x30\x3a\x3c\x25\xee\xae\x50\xa9\xf2\x60\xa4\xaa\x34\x1c\x65\x91\xf0\x29\xc6\x3e\x67\xfa\x6f\x2d\x9e\x9c\x3e\x7d\x4b\xc0\x34\x8f\xe8\x29\x6e\x48\xa1\xa0\xc4\x88\xf3\xfe\xb0\x5b\x20\x85\xb0\x50\x06\xe4\x6e\xdd\xca\xd3\x17\x26\xfa\x49\x23"""
+
+
+class ZlibTests(unittest.TestCase):
+
+    def test_simple_decompress(self):
+        self.assertEquals(("tree 4ada885c9196b6b6fa08744b5862bf92896fc002\nparent None\nauthor Jelmer Vernooij <jelmer@samba.org> 1228980214 +0000\ncommitter Jelmer Vernooij <jelmer@samba.org> 1228980214 +0000\n\nProvide replacement for mmap()'s offset argument.", 158),
+        read_zlib(TEST_COMP1, 0, 229))
+
new file mode 100644
--- /dev/null
+++ b/dulwich/tests/test_protocol.py
@@ -0,0 +1,82 @@
+# test_protocol.py -- Tests for the git protocol
+# Copyright (C) 2009 Jelmer Vernooij <jelmer@samba.org>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2
+# or (at your option) any later version of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA  02110-1301, USA.
+
+from cStringIO import StringIO
+from unittest import TestCase
+
+from dulwich.protocol import (
+    Protocol,
+    extract_capabilities,
+    )
+
+class ProtocolTests(TestCase):
+
+    def setUp(self):
+        self.rout = StringIO()
+        self.rin = StringIO()
+        self.proto = Protocol(self.rin.read, self.rout.write)
+
+    def test_write_pkt_line_none(self):
+        self.proto.write_pkt_line(None)
+        self.assertEquals(self.rout.getvalue(), "0000")
+
+    def test_write_pkt_line(self):
+        self.proto.write_pkt_line("bla")
+        self.assertEquals(self.rout.getvalue(), "0007bla")
+
+    def test_read_pkt_line(self):
+        self.rin.write("0008cmd ")
+        self.rin.seek(0)
+        self.assertEquals("cmd ", self.proto.read_pkt_line())
+
+    def test_read_pkt_seq(self):
+        self.rin.write("0008cmd 0005l0000")
+        self.rin.seek(0)
+        self.assertEquals(["cmd ", "l"], list(self.proto.read_pkt_seq()))
+
+    def test_read_pkt_line_none(self):
+        self.rin.write("0000")
+        self.rin.seek(0)
+        self.assertEquals(None, self.proto.read_pkt_line())
+
+    def test_write_sideband(self):
+        self.proto.write_sideband(3, "bloe")
+        self.assertEquals(self.rout.getvalue(), "0009\x03bloe")
+
+    def test_send_cmd(self):
+        self.proto.send_cmd("fetch", "a", "b")
+        self.assertEquals(self.rout.getvalue(), "000efetch a\x00b\x00")
+
+    def test_read_cmd(self):
+        self.rin.write("0012cmd arg1\x00arg2\x00")
+        self.rin.seek(0)
+        self.assertEquals(("cmd", ["arg1", "arg2"]), self.proto.read_cmd())
+
+    def test_read_cmd_noend0(self):
+        self.rin.write("0011cmd arg1\x00arg2")
+        self.rin.seek(0)
+        self.assertRaises(AssertionError, self.proto.read_cmd)
+
+
+class ExtractCapabilitiesTestCase(TestCase):
+
+    def test_plain(self):
+        self.assertEquals(("bla", None), extract_capabilities("bla"))
+
+    def test_caps(self):
+        self.assertEquals(("bla", ["la", "la"]), extract_capabilities("bla\0la\0la"))
new file mode 100644
--- /dev/null
+++ b/dulwich/tests/test_repository.py
@@ -0,0 +1,133 @@
+# test_repository.py -- tests for repository.py
+# Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2
+# of the License or (at your option) any later version of
+# the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA  02110-1301, USA.
+
+import os
+import unittest
+
+from dulwich import errors
+from dulwich.repo import Repo
+
+missing_sha = 'b91fa4d900e17e99b433218e988c4eb4a3e9a097'
+
+class RepositoryTests(unittest.TestCase):
+
+    def open_repo(self, name):
+        return Repo(os.path.join(os.path.dirname(__file__),
+                          'data/repos', name, '.git'))
+
+    def test_simple_props(self):
+        r = self.open_repo('a')
+        basedir = os.path.join(os.path.dirname(__file__), 'data/repos/a/.git')
+        self.assertEqual(r.controldir(), basedir)
+        self.assertEqual(r.object_dir(), os.path.join(basedir, 'objects'))
+
+    def test_ref(self):
+        r = self.open_repo('a')
+        self.assertEqual(r.ref('master'),
+                         'a90fa2d900a17e99b433217e988c4eb4a2e9a097')
+
+    def test_get_refs(self):
+        r = self.open_repo('a')
+        self.assertEquals({
+            'HEAD': 'a90fa2d900a17e99b433217e988c4eb4a2e9a097',
+            'refs/heads/master': 'a90fa2d900a17e99b433217e988c4eb4a2e9a097'
+            }, r.get_refs())
+
+    def test_head(self):
+        r = self.open_repo('a')
+        self.assertEqual(r.head(), 'a90fa2d900a17e99b433217e988c4eb4a2e9a097')
+
+    def test_get_object(self):
+        r = self.open_repo('a')
+        obj = r.get_object(r.head())
+        self.assertEqual(obj._type, 'commit')
+
+    def test_get_object_non_existant(self):
+        r = self.open_repo('a')
+        self.assertRaises(KeyError, r.get_object, missing_sha)
+
+    def test_commit(self):
+        r = self.open_repo('a')
+        obj = r.commit(r.head())
+        self.assertEqual(obj._type, 'commit')
+
+    def test_commit_not_commit(self):
+        r = self.open_repo('a')
+        self.assertRaises(errors.NotCommitError,
+                          r.commit, '4f2e6529203aa6d44b5af6e3292c837ceda003f9')
+
+    def test_tree(self):
+        r = self.open_repo('a')
+        commit = r.commit(r.head())
+        tree = r.tree(commit.tree)
+        self.assertEqual(tree._type, 'tree')
+        self.assertEqual(tree.sha().hexdigest(), commit.tree)
+
+    def test_tree_not_tree(self):
+        r = self.open_repo('a')
+        self.assertRaises(errors.NotTreeError, r.tree, r.head())
+
+    def test_get_blob(self):
+        r = self.open_repo('a')
+        commit = r.commit(r.head())
+        tree = r.tree(commit.tree())
+        blob_sha = tree.entries()[0][2]
+        blob = r.get_blob(blob_sha)
+        self.assertEqual(blob._type, 'blob')
+        self.assertEqual(blob.sha().hexdigest(), blob_sha)
+
+    def test_get_blob(self):
+        r = self.open_repo('a')
+        self.assertRaises(errors.NotBlobError, r.get_blob, r.head())
+
+    def test_linear_history(self):
+        r = self.open_repo('a')
+        history = r.revision_history(r.head())
+        shas = [c.sha().hexdigest() for c in history]
+        self.assertEqual(shas, [r.head(),
+                                '2a72d929692c41d8554c07f6301757ba18a65d91'])
+
+    def test_merge_history(self):
+        r = self.open_repo('simple_merge')
+        history = r.revision_history(r.head())
+        shas = [c.sha().hexdigest() for c in history]
+        self.assertEqual(shas, ['5dac377bdded4c9aeb8dff595f0faeebcc8498cc',
+                                'ab64bbdcc51b170d21588e5c5d391ee5c0c96dfd',
+                                '4cffe90e0a41ad3f5190079d7c8f036bde29cbe6',
+                                '60dacdc733de308bb77bb76ce0fb0f9b44c9769e',
+                                '0d89f20333fbb1d2f3a94da77f4981373d8f4310'])
+
+    def test_revision_history_missing_commit(self):
+        r = self.open_repo('simple_merge')
+        self.assertRaises(errors.MissingCommitError, r.revision_history,
+                          missing_sha)
+
+    def test_out_of_order_merge(self):
+        """Test that revision history is ordered by date, not parent order."""
+        r = self.open_repo('ooo_merge')
+        history = r.revision_history(r.head())
+        shas = [c.sha().hexdigest() for c in history]
+        self.assertEqual(shas, ['7601d7f6231db6a57f7bbb79ee52e4d462fd44d1',
+                                'f507291b64138b875c28e03469025b1ea20bc614',
+                                'fb5b0425c7ce46959bec94d54b9a157645e114f5',
+                                'f9e39b120c68182a4ba35349f832d0e4e61f485c'])
+
+    def test_get_tags_empty(self):
+        r = self.open_repo('ooo_merge')
+        self.assertEquals({}, r.get_tags())