changeset 4943:686726300104

New function quotearg_alloc. Treat { } = as special sometimes. Work around bug in multibyte handling in older shells.
author Paul Eggert <eggert@cs.ucla.edu>
date Thu, 05 Feb 2004 19:05:52 +0000
parents 332dda494493
children fae25d470beb
files ChangeLog lib/quotearg.c lib/quotearg.h
diffstat 3 files changed, 92 insertions(+), 15 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2004-02-05  Paul Eggert  <eggert@twinsun.com>
+
+	Add comments and code, prompted by suggestions from Bruno Haible
+	for sh-quote.
+	* quotearg.h (quotearg_alloc): New decl.  Improve the comments
+	describing the enum quoting_style values.
+	* quotearg.c (quotearg_alloc): New function.
+	(quotearg_buffer_restyled): Treat lone { and } as special.
+	Treat = as special.  Work around bug with older shells
+	that "see" a '\' that is really the 2nd byte of a multibyte char.
+	Quote empty string with shell_quoting_style.
+
 2004-02-03  Bruno Haible  <bruno@clisp.org>
 
 	* modules/pipe: New file.
--- a/lib/quotearg.c
+++ b/lib/quotearg.c
@@ -1,5 +1,7 @@
 /* quotearg.c - quote arguments for output
-   Copyright (C) 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+   Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004 Free Software
+   Foundation, Inc.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -174,7 +176,7 @@
    size of the output, not counting the terminating null.
    If BUFFERSIZE is too small to store the output string, return the
    value that would have been returned had BUFFERSIZE been large enough.
-   If ARGSIZE is -1, use the string length of the argument for ARGSIZE.
+   If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE.
 
    This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
    ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
@@ -326,6 +328,10 @@
 	    }
 	  break;
 
+	case '{': case '}': /* sometimes special if isolated */
+	  if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1))
+	    break;
+	  /* Fall through.  */
 	case '#': case '~':
 	  if (i != 0)
 	    break;
@@ -334,7 +340,9 @@
 	case '!': /* special in bash */
 	case '"': case '$': case '&':
 	case '(': case ')': case '*': case ';':
-	case '<': case '>': case '[':
+	case '<':
+	case '=': /* sometimes special in 0th or (with "set -k") later args */
+	case '>': case '[':
 	case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
 	case '`': case '|':
 	  /* A shell special character.  In theory, '$' and '`' could
@@ -364,7 +372,7 @@
 
 	case '%': case '+': case ',': case '-': case '.': case '/':
 	case '0': case '1': case '2': case '3': case '4': case '5':
-	case '6': case '7': case '8': case '9': case ':': case '=':
+	case '6': case '7': case '8': case '9': case ':':
 	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
@@ -374,7 +382,6 @@
 	case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
 	case 'o': case 'p': case 'q': case 'r': case 's': case 't':
 	case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
-	case '{': case '}':
 	  /* These characters don't cause problems, no matter what the
 	     quoting style is.  They cannot start multibyte sequences.  */
 	  break;
@@ -427,6 +434,22 @@
 		      }
 		    else
 		      {
+			/* Work around a bug with older shells that "see" a '\'
+			   that is really the 2nd byte of a multibyte character.
+			   In practice the problem is limited to ASCII
+			   chars >= '@' that are shell special chars.  */
+			if ('[' == 0x5b && quoting_style == shell_quoting_style)
+			  {
+			    size_t j;
+			    for (j = 1; j < bytes; j++)
+			      switch (arg[i + m + j])
+				{
+				case '[': case '\\': case '^':
+				case '`': case '|':
+				  goto use_shell_always_quoting_style;
+				}
+			  }
+			    
 			if (! iswprint (w))
 			  printable = 0;
 			m += bytes;
@@ -472,6 +495,9 @@
       STORE (c);
     }
 
+  if (i == 0 && quoting_style == shell_quoting_style)
+    goto use_shell_always_quoting_style;
+
   if (quote_string)
     for (; *quote_string; quote_string++)
       STORE (*quote_string);
@@ -492,7 +518,8 @@
    size of the output, not counting the terminating null.
    If BUFFERSIZE is too small to store the output string, return the
    value that would have been returned had BUFFERSIZE been large enough.
-   If ARGSIZE is -1, use the string length of the argument for ARGSIZE.  */
+   If ARGSIZE is SIZE_MAX, use the string length of the argument for
+   ARGSIZE.  */
 size_t
 quotearg_buffer (char *buffer, size_t buffersize,
 		 char const *arg, size_t argsize,
@@ -506,8 +533,23 @@
   return r;
 }
 
+/* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly
+   allocated storage containing the quoted string.  */
+char *
+quotearg_alloc (char const *arg, size_t argsize,
+		struct quoting_options const *o)
+{
+  int e = errno;
+  size_t bufsize = quotearg_buffer (0, 0, arg, argsize, o) + 1;
+  char *buf = xmalloc (bufsize);
+  quotearg_buffer (buf, bufsize, arg, argsize, o);
+  errno = e;
+  return buf;
+}
+
 /* Use storage slot N to return a quoted version of argument ARG.
-   ARG is of size ARGSIZE, but if that is -1, ARG is a null-terminated string.
+   ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a
+   null-terminated string.
    OPTIONS specifies the quoting options.
    The returned value points to static storage that can be
    reused by the next call to this function with the same value of N.
--- a/lib/quotearg.h
+++ b/lib/quotearg.h
@@ -1,6 +1,6 @@
 /* quotearg.h - quote arguments for output
 
-   Copyright (C) 1998, 1999, 2000, 2001, 2002 Free Software
+   Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004 Free Software
    Foundation, Inc.
 
    This program is free software; you can redistribute it and/or modify
@@ -27,13 +27,31 @@
 /* Basic quoting styles.  */
 enum quoting_style
   {
-    literal_quoting_style,	/* --quoting-style=literal */
-    shell_quoting_style,	/* --quoting-style=shell */
-    shell_always_quoting_style,	/* --quoting-style=shell-always */
-    c_quoting_style,		/* --quoting-style=c */
-    escape_quoting_style,	/* --quoting-style=escape */
-    locale_quoting_style,	/* --quoting-style=locale */
-    clocale_quoting_style	/* --quoting-style=clocale */
+    /* Output names as-is (ls --quoting-style=literal).  */
+    literal_quoting_style,
+    
+    /* Quote names for the shell if they contain shell metacharacters
+       or would cause ambiguous output (ls --quoting-style=shell).  */
+    shell_quoting_style,
+
+    /* Quote names for the shell, even if they would normally not
+       require quoting (ls --quoting-style=shell-always).  */
+    shell_always_quoting_style,
+
+    /* Quote names as for a C language string (ls --quoting-style=c).  */
+    c_quoting_style,
+
+    /* Like c_quoting_style except omit the surrounding double-quote
+       characters (ls --quoting-style=escape).  */
+    escape_quoting_style,
+
+    /* Like clocale_quoting_style, but quote `like this' instead of
+       "like this" in the default C locale (ls --quoting-style=locale).  */
+    locale_quoting_style,
+
+    /* Like c_quoting_style except use quotation marks appropriate for
+       the locale (ls --quoting-style=clocale).  */
+    clocale_quoting_style
   };
 
 /* For now, --quoting-style=literal is the default, but this may change.  */
@@ -81,6 +99,11 @@
 			char const *arg, size_t argsize,
 			struct quoting_options const *o);
 
+/* Like quotearg_buffer, except return the result in a newly allocated
+   buffer.  It is the caller's responsibility to free the result.  */
+char *quotearg_alloc (char const *arg, size_t argsize,
+		      struct quoting_options const *o);
+
 /* Use storage slot N to return a quoted version of the string ARG.
    Use the default quoting options.
    The returned value points to static storage that can be