changeset 851:81c55f8629ba

hg2git: audit path components during export (CVE-2014-9390) A user recently got confused and managed to track and export a .git directory, which confuses git and causes it to emit very odd errors. For example, cloning one such repository (which has a symlink for .git) produces this output from git: Cloning into 'git'... done. error: Updating '.git' would lose untracked files in it and another (which has a .git directory checked in) produces this: Cloning into 'git'... done. error: Invalid path '.git/hooks/post-update' If it ended there, that'd be fine, but this led to a line of investigation that ended with CVE-2014-9390, so now git will block checking these revisions out, so we should try to prevent foot-shooting on our end. Since some servers (notably github) are blocking trees that contain these entries, default to refusing to export any path component that looks like it folds to .git. Since some histories probably contain this already, we offer an escape hatch via the config option git.blockdotgit that allows users to resume foot-shooting behavior.
author Augie Fackler <raf@durin42.com>
date Sun, 23 Nov 2014 19:06:21 -0500
parents 96c74ae74ccd
children e22ffc47337d
files Makefile hggit/compat.py hggit/hg2git.py tests/test-illegal-contents.t
diffstat 4 files changed, 165 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/Makefile
+++ b/Makefile
@@ -12,6 +12,7 @@
 	cd tests && $(PYTHON) run-tests.py --with-hg=`which hg` $(TESTFLAGS)
 
 test-%:
+	python -m doctest hggit/hg2git.py
 	cd tests && $(PYTHON) run-tests.py --with-hg=`which hg` $(TESTFLAGS) $@
 
 tests-%:
new file mode 100644
--- /dev/null
+++ b/hggit/compat.py
@@ -0,0 +1,24 @@
+try:
+    from mercurial import encoding
+    hfsignoreclean = encoding.hfsignoreclean
+except AttributeError:
+    # compat with hg 3.2.1 and earlier, which doesn't have
+    # hfsignoreclean (This was borrowed wholesale from hg 3.2.2.)
+    _ignore = [unichr(int(x, 16)).encode("utf-8") for x in
+               "200c 200d 200e 200f 202a 202b 202c 202d 202e "
+               "206a 206b 206c 206d 206e 206f feff".split()]
+    # verify the next function will work
+    assert set([i[0] for i in _ignore]) == set(["\xe2", "\xef"])
+
+    def hfsignoreclean(s):
+        """Remove codepoints ignored by HFS+ from s.
+
+        >>> hfsignoreclean(u'.h\u200cg'.encode('utf-8'))
+        '.hg'
+        >>> hfsignoreclean(u'.h\ufeffg'.encode('utf-8'))
+        '.hg'
+        """
+        if "\xe2" in s or "\xef" in s:
+            for c in _ignore:
+                s = s.replace(c, '')
+        return s
--- a/hggit/hg2git.py
+++ b/hggit/hg2git.py
@@ -7,7 +7,9 @@
 
 import dulwich.objects as dulobjs
 from dulwich import diff_tree
+from mercurial import util as hgutil
 
+import compat
 import util
 
 def parse_subrepos(ctx):
@@ -20,6 +22,51 @@
             ctx['.hgsubstate'].data().splitlines())
     return sub, substate
 
+
+def audit_git_path(ui, path):
+    r"""Check for path components that case-fold to .git.
+
+    >>> class fakeui(object):
+    ...     def configbool(*args):
+    ...         return False
+    ...     def warn(self, s):
+    ...         print s
+    >>> u = fakeui()
+    >>> audit_git_path(u, 'foo/git~100/wat')
+    warning: path 'foo/git~100/wat' contains a potentially dangerous path component.
+    It may not be legal to check out in Git.
+    It may also be rejected by some git server configurations.
+    <BLANKLINE>
+    >>> audit_git_path(u, u'foo/.gi\u200ct'.encode('utf-8'))
+    warning: path 'foo/.gi\xe2\x80\x8ct' contains a potentially dangerous path component.
+    It may not be legal to check out in Git.
+    It may also be rejected by some git server configurations.
+    <BLANKLINE>
+    >>> audit_git_path(u, 'this/is/safe')
+    """
+    dangerous = False
+    for c in path.split(os.path.sep):
+        if compat.hfsignoreclean(c) == '.git':
+            dangerous = True
+            break
+        elif '~' in c:
+            base, tail = c.split('~', 1)
+            if tail.isdigit() and base.upper().startswith('GIT'):
+                dangerous = True
+                break
+    if dangerous:
+        if ui.configbool('git', 'blockdotgit', True):
+            raise hgutil.Abort(
+                ('Refusing to export likely-dangerous path %r' % path),
+                hint=("If you need to continue, read about CVE-2014-9390 and "
+                      "then set '[git] blockdotgit = false' in your hgrc."))
+        ui.warn('warning: path %r contains a potentially dangerous path '
+                'component.\n'
+                'It may not be legal to check out in Git.\n'
+                'It may also be rejected by some git server configurations.\n'
+                % path)
+
+
 class IncrementalChangesetExporter(object):
     """Incrementally export Mercurial changesets to Git trees.
 
@@ -172,6 +219,7 @@
         # corresponding Git blob and its tree entry. We emit the blob
         # immediately and update trees to be aware of its presence.
         for path in set(modified) | set(added):
+            audit_git_path(self._hg.ui, path)
             if path == '.hgsubstate' or path == '.hgsub':
                 continue
 
@@ -380,4 +428,3 @@
 
         return (dulobjs.TreeEntry(os.path.basename(fctx.path()), mode, blob_id),
                 blob)
-
new file mode 100644
--- /dev/null
+++ b/tests/test-illegal-contents.t
@@ -0,0 +1,92 @@
+Check for contents we should refuse to export to git repositories (or
+at least warn).
+
+Load commonly used test logic
+  $ . "$TESTDIR/testutil"
+
+  $ hg init hg
+  $ cd hg
+  $ mkdir -p .git/hooks
+  $ cat > .git/hooks/post-update <<EOF
+  > #!/bin/sh
+  > echo pwned
+  > EOF
+
+  $ hg addremove
+  adding .git/hooks/post-update
+  $ hg ci -m "we should refuse to export this"
+  $ hg book master
+  $ hg gexport
+  abort: Refusing to export likely-dangerous path '.git/hooks/post-update'
+  (If you need to continue, read about CVE-2014-9390 and then set '[git] blockdotgit = false' in your hgrc.)
+  [255]
+  $ cd ..
+
+  $ rm -rf hg
+  $ hg init hg
+  $ cd hg
+  $ mkdir -p nested/.git/hooks/
+  $ cat > nested/.git/hooks/post-update <<EOF
+  > #!/bin/sh
+  > echo pwnd
+  > EOF
+  $ chmod +x nested/.git/hooks/post-update
+  $ hg addremove
+  adding nested/.git/hooks/post-update
+  $ hg ci -m "also refuse to export this"
+  $ hg book master
+  $ hg gexport
+  abort: Refusing to export likely-dangerous path 'nested/.git/hooks/post-update'
+  (If you need to continue, read about CVE-2014-9390 and then set '[git] blockdotgit = false' in your hgrc.)
+  [255]
+We can override if needed:
+  $ hg --config git.blockdotgit=false gexport
+  warning: path 'nested/.git/hooks/post-update' contains a potentially dangerous path component.
+  It may not be legal to check out in Git.
+  It may also be rejected by some git server configurations.
+  $ cd ..
+  $ git clone hg/.hg/git git
+  Cloning into 'git'...
+  done.
+  error: Invalid path 'nested/.git/hooks/post-update'
+
+Now check something that case-folds to .git, which might let you own
+Mac users:
+
+  $ cd ..
+  $ rm -rf hg
+  $ hg init hg
+  $ cd hg
+  $ mkdir -p .GIT/hooks/
+  $ cat > .GIT/hooks/post-checkout <<EOF
+  > #!/bin/sh
+  > echo pwnd
+  > EOF
+  $ chmod +x .GIT/hooks/post-checkout
+  $ hg addremove
+  adding .GIT/hooks/post-checkout
+  $ hg ci -m "also refuse to export this"
+  $ hg book master
+  $ hg gexport
+  $ cd ..
+
+And the NTFS case:
+  $ cd ..
+  $ rm -rf hg
+  $ hg init hg
+  $ cd hg
+  $ mkdir -p GIT~1/hooks/
+  $ cat > GIT~1/hooks/post-checkout <<EOF
+  > #!/bin/sh
+  > echo pwnd
+  > EOF
+  $ chmod +x GIT~1/hooks/post-checkout
+  $ hg addremove
+  adding GIT~1/hooks/post-checkout
+  $ hg ci -m "also refuse to export this"
+  $ hg book master
+  $ hg gexport
+  abort: Refusing to export likely-dangerous path 'GIT~1/hooks/post-checkout'
+  (If you need to continue, read about CVE-2014-9390 and then set '[git] blockdotgit = false' in your hgrc.)
+  [255]
+  $ cd ..